init
This commit is contained in:
commit
418d6d044d
|
@ -0,0 +1,10 @@
|
||||||
|
*/__pycache__/*
|
||||||
|
__pycache__/*
|
||||||
|
piper-models/*
|
||||||
|
piper-models
|
||||||
|
models
|
||||||
|
index
|
||||||
|
|
||||||
|
models
|
||||||
|
index
|
||||||
|
history
|
|
@ -0,0 +1,40 @@
|
||||||
|
This is a simple chatbot project.
|
||||||
|
The aim is to recreate something similar to neurosama, running on local hardware on a minimal amount of compute.
|
||||||
|
|
||||||
|
The bot is designed to be modular, with the ability to add new modules easily.
|
||||||
|
|
||||||
|
You need to supply a backup mediawiki xml. this is used to gather information to the chatbot.
|
||||||
|
|
||||||
|
A strong computer with cuda and a fair bit of vrm is adviced to get response times down.
|
||||||
|
|
||||||
|
Most settings are configured through enviroment variables from the flake.nix file.
|
||||||
|
|
||||||
|
## Modules
|
||||||
|
|
||||||
|
### stt
|
||||||
|
|
||||||
|
The stt module is responsible for converting speech to text.
|
||||||
|
Whisper-cpp-stream is used to stream audio through the whisper stt engine.
|
||||||
|
whisper-cpp-stream is a c++ program that reads audio from a microphone, and sends it to the whisper stt engine.
|
||||||
|
It is run through a python subprocess.
|
||||||
|
|
||||||
|
### llm
|
||||||
|
|
||||||
|
The llm module is responsible for crafting a response to the user's input. It uses a rag based on a supplied mediawiki wiki xml file, and in the future, included chat history.
|
||||||
|
|
||||||
|
langchain is the pyhton module that interfaces with the rag, and llm.
|
||||||
|
ollama is used on the backend to interface with a llama model.
|
||||||
|
|
||||||
|
future work will include giving astructured response, to include emotions, and metadata for a future image module.
|
||||||
|
|
||||||
|
### tts
|
||||||
|
|
||||||
|
piper is used as the tts engine.
|
||||||
|
It does not have proper python bindings in nixpkgs, so it is run with subprocess.
|
||||||
|
text is echoed into piper's stdin, and the output is played with aplay.
|
||||||
|
|
||||||
|
### image
|
||||||
|
|
||||||
|
The image module is responsible for processing images.
|
||||||
|
It captures the image using pygame, b64 encodes it and sends it to a multimodal model for descriptions.
|
||||||
|
Future work is to test out using opencv or something similar for image tagging instead, as the multimodal model halucinates a lot, and is also way too slow.
|
|
@ -0,0 +1,37 @@
|
||||||
|
import os
|
||||||
|
import llm
|
||||||
|
import image
|
||||||
|
import stt
|
||||||
|
import tts
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Waiting for STT input...")
|
||||||
|
line = ""
|
||||||
|
while not line.strip():
|
||||||
|
line = stt.get_buffer()
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"STT buffer: {line}")
|
||||||
|
|
||||||
|
# Get the response from the LLM chat module
|
||||||
|
llm_output = llm.chat(question=line, image_description=image_description)
|
||||||
|
print("LLM:", llm_output["response"])
|
||||||
|
# Run the LLM output through the TTS (text-to-speech) module
|
||||||
|
tts.speak(llm_output["response"].replace("*", "")) # Remove asterisks for better TTS understanding
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Initialize the LLM index if it doesn't exist
|
||||||
|
if not os.path.exists("index"):
|
||||||
|
llm.init_index() # Initialize the LLM index module (only needed with updated inputs)
|
||||||
|
|
||||||
|
# Initialize the LLM chat module
|
||||||
|
llm.init_chat()
|
||||||
|
# Describe the image and store the description
|
||||||
|
image_description = "" # image.describe()
|
||||||
|
print("Image description:", image_description)
|
||||||
|
stt.start()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
main()
|
|
@ -0,0 +1,26 @@
|
||||||
|
{
|
||||||
|
"nodes": {
|
||||||
|
"nixpkgs": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1713714899,
|
||||||
|
"narHash": "sha256-+z/XjO3QJs5rLE5UOf015gdVauVRQd2vZtsFkaXBq2Y=",
|
||||||
|
"owner": "NixOS",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "6143fc5eeb9c4f00163267708e26191d1e918932",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"id": "nixpkgs",
|
||||||
|
"ref": "nixos-unstable",
|
||||||
|
"type": "indirect"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"inputs": {
|
||||||
|
"nixpkgs": "nixpkgs"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": "root",
|
||||||
|
"version": 7
|
||||||
|
}
|
|
@ -0,0 +1,104 @@
|
||||||
|
{
|
||||||
|
description = "A simple flake";
|
||||||
|
|
||||||
|
inputs.nixpkgs.url = "nixpkgs/nixos-unstable";
|
||||||
|
|
||||||
|
outputs = { self, nixpkgs }: {
|
||||||
|
|
||||||
|
defaultPackage.x86_64-linux = let
|
||||||
|
pkgs = nixpkgs.legacyPackages.x86_64-linux;
|
||||||
|
python = pkgs.python311;
|
||||||
|
pythonPackages = python.pkgs;
|
||||||
|
|
||||||
|
piper_model_url = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/hfc_female/medium/en_US-hfc_female-medium.onnx?download=true";
|
||||||
|
piper_model_json_url = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/hfc_female/medium/en_US-hfc_female-medium.onnx.json?download=true.json";
|
||||||
|
whisper_model_tiny_url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin?download=true";
|
||||||
|
whisper_model_base_url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin?download=true";
|
||||||
|
whisper_model_small_url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin?download=true";
|
||||||
|
whisper_model_small_tdrz_url = "https://huggingface.co/akashmjn/tinydiarize-whisper.cpp/resolve/main/ggml-small.en-tdrz.bin?download=true";
|
||||||
|
haracascade_face_url = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml";
|
||||||
|
|
||||||
|
piper_model = pkgs.fetchurl {
|
||||||
|
name = "hfc_female/medium/en_US-hfc_female-medium.onnx";
|
||||||
|
url = piper_model_url;
|
||||||
|
sha256 = "sha256-kUxHN4j8H6i2Os4c3NtEWI9K5SPTqzffFTZhaDWhQLc="; # replace with the correct sha256
|
||||||
|
};
|
||||||
|
|
||||||
|
piper_model_json = pkgs.fetchurl {
|
||||||
|
name = "hfc_female/medium/en_US-hfc_female-medium.onnx.json";
|
||||||
|
url = piper_model_json_url;
|
||||||
|
sha256 = "sha256-A/H6BiK4BGMoNZLZesqfbomuw0WlxWtyV3I+AJPFi2w="; # replace with the correct sha256
|
||||||
|
};
|
||||||
|
|
||||||
|
whisper_model_tiny = pkgs.fetchurl {
|
||||||
|
name = "ggml-tiny.bin";
|
||||||
|
url = whisper_model_tiny_url;
|
||||||
|
sha256 = "sha256-vgfgSOHlma1GNByNKhNWRQl6U4IhZ4t6zdGxkZxuGyE=";
|
||||||
|
};
|
||||||
|
|
||||||
|
whisper_model_base = pkgs.fetchurl {
|
||||||
|
name = "ggml-base.bin";
|
||||||
|
url = whisper_model_base_url;
|
||||||
|
sha256 = "sha256-YO1bw90U7qhWST0zQ0m0BXgt3K8AKNS130CINF+6Lv4=";
|
||||||
|
};
|
||||||
|
whisper_model_small = pkgs.fetchurl {
|
||||||
|
name = "ggml-small.bin";
|
||||||
|
url = whisper_model_small_url;
|
||||||
|
sha256 = "sha256-G+OpsgY4Z7k35k4ux0gzZKeZF+FX+pjF2UtcH//qmHs=";
|
||||||
|
};
|
||||||
|
|
||||||
|
whisper_model_small_tdrz = pkgs.fetchurl {
|
||||||
|
name = "ggml-small.en-tdrz.bin";
|
||||||
|
url = whisper_model_small_url;
|
||||||
|
sha256 = "sha256-G+OpsgY4Z7k35k4ux0gzZKeZF+FX+pjF2UtcH//qmHs=";
|
||||||
|
};
|
||||||
|
|
||||||
|
haracascade_face = pkgs.fetchurl {
|
||||||
|
name = "haarcascade_frontalface_default.xml";
|
||||||
|
url = haracascade_face_url;
|
||||||
|
sha256 = "sha256-D31FJ4ROtRTUpJSOgi2pD7sWo0oLu7xq3GSYdHpar7A=";
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
in pkgs.mkShell {
|
||||||
|
nativeBuildInputs = with pkgs; [
|
||||||
|
python
|
||||||
|
piper-tts
|
||||||
|
alsa-utils # for aplay for piper-tts to stream to
|
||||||
|
openai-whisper-cpp # for stt
|
||||||
|
opencv
|
||||||
|
|
||||||
|
(pythonPackages.numpy)
|
||||||
|
(pythonPackages.pytorch)
|
||||||
|
|
||||||
|
(pythonPackages.langchain)
|
||||||
|
(pythonPackages.mwxml)
|
||||||
|
(pythonPackages.mwparserfromhell) #dependency langchain document fetcher
|
||||||
|
(pythonPackages.sentence-transformers) #dependency for langchain embedding
|
||||||
|
(pythonPackages.chromadb) #vector search
|
||||||
|
(pythonPackages.opencv4)
|
||||||
|
(pythonPackages.pillow)
|
||||||
|
|
||||||
|
];
|
||||||
|
|
||||||
|
WHISPER_AUDIO_DEVICE = "1";
|
||||||
|
WHISPER_MODEL_PATH = whisper_model_tiny;
|
||||||
|
PIPER_MODEL_PATH = piper_model;
|
||||||
|
PIPER_MODEL_JSON_PATH =piper_model_json;
|
||||||
|
HARA_CASCADE_FACE_PATH = haracascade_face;
|
||||||
|
OLLAMA_HOST = "http://localhost:11434";
|
||||||
|
IMAGE_DESCRIPTION_CAMERA = "0";
|
||||||
|
IMAGE_DESCRIPTION_MODEL = "llava";
|
||||||
|
|
||||||
|
|
||||||
|
shellHook = ''
|
||||||
|
#need to set ollama url first, before pulling models needed for the project
|
||||||
|
ollama pull llava
|
||||||
|
ollama pull llama3
|
||||||
|
'';
|
||||||
|
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
|
@ -0,0 +1,118 @@
|
||||||
|
import cv2
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import base64
|
||||||
|
import face_recognition
|
||||||
|
from PIL import Image
|
||||||
|
from io import BytesIO
|
||||||
|
from langchain_community.chat_models import ChatOllama
|
||||||
|
from langchain_core.messages import HumanMessage
|
||||||
|
from langchain_core.output_parsers import StrOutputParser
|
||||||
|
|
||||||
|
camera = int(os.environ.get("IMAGE_DESCRIPTION_CAMERA", "0")) # Define your camera here, 0 is usually the built-in webcam
|
||||||
|
ollamaUrl = os.environ.get("OLLAMA_HOST", "http://localhost:11434")
|
||||||
|
imagemodel = os.environ.get("IMAGE_DESCRIPTION_MODEL", "llava")
|
||||||
|
|
||||||
|
|
||||||
|
def capture():
|
||||||
|
"""
|
||||||
|
Capture an image from the webcam.
|
||||||
|
:return: Captured image
|
||||||
|
"""
|
||||||
|
cap = cv2.VideoCapture(camera)
|
||||||
|
ret, frame = cap.read()
|
||||||
|
cap.release()
|
||||||
|
return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if ret else None
|
||||||
|
|
||||||
|
def encode_image(image):
|
||||||
|
"""
|
||||||
|
Encode the given image to base64.
|
||||||
|
:param image: Image to encode
|
||||||
|
:return: Base64 encoded image
|
||||||
|
"""
|
||||||
|
pil_image = Image.fromarray(image)
|
||||||
|
buffered = BytesIO()
|
||||||
|
pil_image.save(buffered, format="JPEG")
|
||||||
|
return base64.b64encode(buffered.getvalue())
|
||||||
|
|
||||||
|
def resize_image(encoded_image, size=(672, 672)):
|
||||||
|
"""
|
||||||
|
Resize the given image to the specified size.
|
||||||
|
:param encoded_image: Base64 encoded image to resize
|
||||||
|
:param size: New size for the image
|
||||||
|
:return: Resized image
|
||||||
|
"""
|
||||||
|
image_data = base64.b64decode(encoded_image)
|
||||||
|
pil_image = Image.open(BytesIO(image_data))
|
||||||
|
resized_image = pil_image.resize(size)
|
||||||
|
buffered = BytesIO()
|
||||||
|
resized_image.save(buffered, format="JPEG")
|
||||||
|
return buffered.getvalue()
|
||||||
|
|
||||||
|
def capture_encoded_image():
|
||||||
|
"""
|
||||||
|
Capture an image from the webcam, resize it, and encode it to base64.
|
||||||
|
:return: Base64 encoded image
|
||||||
|
"""
|
||||||
|
image = capture()
|
||||||
|
if image is not None:
|
||||||
|
encoded_image = encode_image(image)
|
||||||
|
resized_image = resize_image(encoded_image)
|
||||||
|
return base64.b64encode(resized_image).decode("utf-8")
|
||||||
|
|
||||||
|
def get_image_from_encoded(encoded_image):
|
||||||
|
"""
|
||||||
|
Get an image from the given base64 encoded image.
|
||||||
|
:param encoded_image: Base64 encoded image
|
||||||
|
:return: Image
|
||||||
|
"""
|
||||||
|
image_data = base64.b64decode(encoded_image)
|
||||||
|
return cv2.imdecode(np.frombuffer(image_data, np.uint8), cv2.IMREAD_COLOR)
|
||||||
|
|
||||||
|
|
||||||
|
def get_faces(image):
|
||||||
|
"""
|
||||||
|
Get the faces from the given image and assign a persistent ID to each face.
|
||||||
|
:param image: Image to get faces from
|
||||||
|
:return: List of dictionaries containing the ID and location of each face
|
||||||
|
"""
|
||||||
|
face_cascade = cv2.CascadeClassifier(os.environ.get("HARA_CASCADE_FACE_PATH", "haarcascade_frontalface_default.xml"))
|
||||||
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||||
|
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
|
||||||
|
print(faces)#TODO: remove this debug print
|
||||||
|
face_data = []
|
||||||
|
for (x, y, w, h) in faces:
|
||||||
|
face_image = image[y:y+h, x:x+w]
|
||||||
|
face_encoding = face_recognition.face_encodings(face_image)
|
||||||
|
if face_encoding:
|
||||||
|
face_id = hash(tuple(face_encoding[0]))
|
||||||
|
face_data.append({
|
||||||
|
"id": face_id,
|
||||||
|
"location": (x, y, w, h)
|
||||||
|
})
|
||||||
|
return face_data
|
||||||
|
|
||||||
|
def describe(temperature=0, prompt="Briefely explain this image like it is your eyes. Use fewer words if possible. What is visible, and where are items located. Describe the pepole in the scene in some more detail. Refer to the camera as you."):
|
||||||
|
image_b64 = capture_encoded_image()
|
||||||
|
llava = ChatOllama(model=imagemodel, temperature=temperature, base_url=ollamaUrl)
|
||||||
|
def prompt_func(data):
|
||||||
|
text = data["text"]
|
||||||
|
image = data["image"]
|
||||||
|
image_part = {
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": f"data:image/jpeg;base64,{image}",
|
||||||
|
}
|
||||||
|
content_parts = []
|
||||||
|
text_part = {"type": "text", "text": text}
|
||||||
|
content_parts.append(image_part)
|
||||||
|
content_parts.append(text_part)
|
||||||
|
return [HumanMessage(content=content_parts)]
|
||||||
|
chain = prompt_func | llava | StrOutputParser()
|
||||||
|
query_chain = chain.invoke(
|
||||||
|
{"text": prompt, "image": image_b64}
|
||||||
|
)
|
||||||
|
return query_chain
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# print(capture_encoded_image())
|
||||||
|
print(describe())
|
|
@ -0,0 +1,228 @@
|
||||||
|
#!python3
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import datetime
|
||||||
|
from langchain_community.chat_models import ChatOllama
|
||||||
|
from langchain_core.messages import HumanMessage
|
||||||
|
from langchain_core.output_parsers import StrOutputParser
|
||||||
|
from langchain_community.document_loaders import MWDumpLoader
|
||||||
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
|
|
||||||
|
from langchain_community.llms import Ollama
|
||||||
|
from langchain_community.document_loaders import DirectoryLoader
|
||||||
|
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
|
||||||
|
from langchain.text_splitter import CharacterTextSplitter
|
||||||
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
|
from langchain_community.document_transformers.embeddings_redundant_filter import EmbeddingsRedundantFilter
|
||||||
|
from langchain_community.vectorstores import Chroma
|
||||||
|
from langchain.chains import ConversationalRetrievalChain
|
||||||
|
from langchain_core.prompts import HumanMessagePromptTemplate
|
||||||
|
from langchain_core.prompts import SystemMessagePromptTemplate
|
||||||
|
from langchain.memory import ConversationBufferMemory
|
||||||
|
from langchain_community.embeddings import HuggingFaceEmbeddings
|
||||||
|
from langchain_community.embeddings import OllamaEmbeddings
|
||||||
|
from langchain.utils.html import (PREFIXES_TO_IGNORE_REGEX,
|
||||||
|
SUFFIXES_TO_IGNORE_REGEX)
|
||||||
|
|
||||||
|
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
|
||||||
|
from langchain_core.prompts import PromptTemplate
|
||||||
|
from langchain_community.llms import Ollama
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
text_model = "llama3" #ollama
|
||||||
|
embedding_model ="all-MiniLM-L6-v2" #hugginface
|
||||||
|
wikilocation=os.environ.get("RAG_WIKI_LOCATION", "wiki/current.xml") #mediawiki xml to index
|
||||||
|
index_dir = "./index"
|
||||||
|
date = datetime.datetime.now().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
global conversation
|
||||||
|
conversation = None
|
||||||
|
|
||||||
|
|
||||||
|
ollamaUrl = os.environ.get("OLLAMA_HOST", "http://localhost:11434")
|
||||||
|
class Emotion(str, Enum):
|
||||||
|
NEUTRAL = "neutral"
|
||||||
|
HAPPY = "happy"
|
||||||
|
SAD = "sad"
|
||||||
|
ANGRY = "angry"
|
||||||
|
SURPRISED = "surprised"
|
||||||
|
CONFUSED = "confused"
|
||||||
|
EXCITED = "excited"
|
||||||
|
CALM = "calm"
|
||||||
|
|
||||||
|
class Action(str, Enum):
|
||||||
|
NOTHING = "nothing"
|
||||||
|
STUTTER = "stutter"
|
||||||
|
SQUEAL = "squeal"
|
||||||
|
MEWOW = "mewow"
|
||||||
|
SMUG = "smug"
|
||||||
|
WAGS_TAIL = "wags_tail"
|
||||||
|
WINK = "wink"
|
||||||
|
NOD = "nod"
|
||||||
|
LAUGH = "laugh"
|
||||||
|
SIGH = "sigh"
|
||||||
|
GLOOMY = "gloomy"
|
||||||
|
LOOK_AWAY = "look_away"
|
||||||
|
LOOK_TOWARDS_YOU = "look_towards_you"
|
||||||
|
|
||||||
|
def chat(question="Hello, how are you today?", image_description=""):
|
||||||
|
#example with rag
|
||||||
|
global conversation
|
||||||
|
global format_instructions
|
||||||
|
|
||||||
|
#TODO: implement chat history and memory, with storage to disk
|
||||||
|
chat_history = []
|
||||||
|
|
||||||
|
response = conversation({"question": question, "image_description": image_description, "chat_history": chat_history, "format_instructions": format_instructions, "date": date})
|
||||||
|
# print(response)
|
||||||
|
answer = response['answer']
|
||||||
|
|
||||||
|
result = output_parser.parse(answer)
|
||||||
|
|
||||||
|
#enforce keys.
|
||||||
|
try:
|
||||||
|
result["emotion"] = Emotion(result["emotion"])
|
||||||
|
except:
|
||||||
|
print(f"Could not parse emotion: {result['emotion']}")
|
||||||
|
result["emotion"] = Emotion.NEUTRAL
|
||||||
|
|
||||||
|
for i in range(len(result["actions"])):
|
||||||
|
try:
|
||||||
|
result["actions"][i] = Action(result["actions"][i])
|
||||||
|
except:
|
||||||
|
print(f"Could not parse action: {result['actions'][i]}")
|
||||||
|
result["actions"][i] = Action.NOTHING
|
||||||
|
|
||||||
|
# print(result)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
#some toy functions to interact with the llm
|
||||||
|
endstring = "<|eot_id|><|start_header_id|>assistant<|end_header_id|>" #think the llama3 end tokens are not properly implemented, in langchain yet.
|
||||||
|
def simple_stream(prompt="A question to ask the model", temperature=0.5,):
|
||||||
|
llm = Ollama(model=text_model, temperature=temperature, base_url=ollamaUrl, stop=["<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>", "<|reserved_special_token|>"]) #stop needs to be manually given for llama3 for now.
|
||||||
|
return llm.stream(prompt)
|
||||||
|
|
||||||
|
def simple(prompt="A question to ask the model", temperature=0.5,):
|
||||||
|
stream = simple_stream(prompt, temperature)
|
||||||
|
result = ""
|
||||||
|
for line in stream:
|
||||||
|
result += line.rstrip("\n")
|
||||||
|
if result.endswith(endstring):
|
||||||
|
result = result.replace(endstring, "")
|
||||||
|
return result
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
#https://scribe.rip/rahasak/build-rag-application-using-a-llm-running-on-local-computer-with-ollama-and-langchain-e6513853fda0
|
||||||
|
def init_index():
|
||||||
|
# remove the current index
|
||||||
|
if os.path.exists(index_dir):
|
||||||
|
shutil.rmtree(index_dir)
|
||||||
|
|
||||||
|
|
||||||
|
# Load data from MediaWiki dump
|
||||||
|
documents = MWDumpLoader(wikilocation).load()
|
||||||
|
#TODO: add chat history to the documents
|
||||||
|
|
||||||
|
# Split text
|
||||||
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
||||||
|
documents = text_splitter.split_documents(documents)
|
||||||
|
|
||||||
|
# Apply the redundant filter
|
||||||
|
embeddings = HuggingFaceEmbeddings(
|
||||||
|
model_name=embedding_model,
|
||||||
|
multi_process=True,
|
||||||
|
# encode_kwargs={"normalize_embeddings": True},
|
||||||
|
)
|
||||||
|
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
|
||||||
|
documents = redundant_filter.transform_documents(documents)
|
||||||
|
vectordb = Chroma.from_documents(
|
||||||
|
documents=documents,
|
||||||
|
embedding=embeddings,
|
||||||
|
persist_directory=index_dir,
|
||||||
|
collection_name="pvv-wiki"
|
||||||
|
)
|
||||||
|
vectordb.persist()
|
||||||
|
|
||||||
|
|
||||||
|
response_schemas = [
|
||||||
|
ResponseSchema(
|
||||||
|
name="response",
|
||||||
|
description="reply to the user's question or statement.",
|
||||||
|
),
|
||||||
|
ResponseSchema(
|
||||||
|
name="emotion",
|
||||||
|
description=f"emotion expressed in the response, selected from a set of possible options {list(str(e.value) for e in Emotion)}",
|
||||||
|
type="Emotion",
|
||||||
|
),
|
||||||
|
ResponseSchema(
|
||||||
|
name="actions",
|
||||||
|
description=f"List of actions to take at random in response to the user's question or statement from the set {list(str(a.value) for a in Action)}",
|
||||||
|
type="List[Action]",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
|
||||||
|
format_instructions = output_parser.get_format_instructions()
|
||||||
|
general_system_template = r"""
|
||||||
|
Given a specific context, please give a short answer to the question, use relevant context to try and find a possible outcome. If the data does not help, be uncertain in the final answear.
|
||||||
|
The current date is {date}
|
||||||
|
----
|
||||||
|
{context}
|
||||||
|
----
|
||||||
|
You may refer to what you can see in front of you in the description below. Any reference to the camera or image should be interpreted as "you" or "your eyes" or you can se:
|
||||||
|
{image_description}
|
||||||
|
____
|
||||||
|
Do not refer to yourself as an ai.
|
||||||
|
awoid expressions in the response.
|
||||||
|
|
||||||
|
You are a cute anime carachter named pvv chan, you like programming linux, opensource and board games.
|
||||||
|
Without refering to yourself, reply to the human talking to you.
|
||||||
|
{format_instructions}
|
||||||
|
"""
|
||||||
|
general_user_template = "Question:```{question}```"
|
||||||
|
messages = [
|
||||||
|
SystemMessagePromptTemplate.from_template(general_system_template),
|
||||||
|
HumanMessagePromptTemplate.from_template(general_user_template)
|
||||||
|
]
|
||||||
|
qa_prompt = ChatPromptTemplate.from_messages( messages )
|
||||||
|
|
||||||
|
def init_chat():
|
||||||
|
global conversation
|
||||||
|
|
||||||
|
#load index from local directory
|
||||||
|
embeddings = HuggingFaceEmbeddings(
|
||||||
|
model_name=embedding_model,
|
||||||
|
multi_process=True,
|
||||||
|
# encode_kwargs={"normalize_embeddings": True},
|
||||||
|
)
|
||||||
|
vectordb = Chroma(persist_directory=index_dir, embedding_function=embeddings)
|
||||||
|
|
||||||
|
llm = Ollama(
|
||||||
|
model=text_model,
|
||||||
|
base_url=ollamaUrl,
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# create conversation
|
||||||
|
conversation = ConversationalRetrievalChain.from_llm(
|
||||||
|
llm,
|
||||||
|
retriever=vectordb.as_retriever(search_kwargs={"k": 2} ), #amount of documents to use for the response
|
||||||
|
# retriever=vectordb.as_retriever(),
|
||||||
|
return_source_documents=True,
|
||||||
|
verbose=True,
|
||||||
|
combine_docs_chain_kwargs={"prompt": qa_prompt},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
#print(simple(prompt="What is the meaning of life. (answear short)"))
|
||||||
|
|
||||||
|
print("inittialising index")
|
||||||
|
# init_index()
|
||||||
|
print("initialising chat")
|
||||||
|
init_chat()
|
||||||
|
|
||||||
|
print("chatting")
|
||||||
|
print(chat(question="Hello, how are you today? What is our dns server named?"))
|
|
@ -0,0 +1,83 @@
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import multiprocessing
|
||||||
|
import atexit
|
||||||
|
|
||||||
|
audio_device = os.getenv("WHISPER_AUDIO_DEVICE", "-1")
|
||||||
|
whisper_model = os.getenv("WHISPER_MODEL_PATH", "models/ggml-tiny.bin")
|
||||||
|
command = ["whisper-cpp-stream", "-kc", "-m", whisper_model, "-c", audio_device, "-t", "4"]
|
||||||
|
|
||||||
|
filter_strings = ["", "*", "\r", "\n","\t", "(inaudible)", "[BLANK_AUDIO]", "[Start speaking]", "(gunshot)", "(wind howling)", "[Music]", "(footsteps)"] # Example strings to filter out
|
||||||
|
|
||||||
|
class SharedString:
|
||||||
|
def __init__(self):
|
||||||
|
manager = multiprocessing.Manager()
|
||||||
|
self.namespace = manager.Namespace()
|
||||||
|
self.namespace.value = ""
|
||||||
|
|
||||||
|
def get_value(self):
|
||||||
|
with multiprocessing.Lock():
|
||||||
|
return self.namespace.value
|
||||||
|
|
||||||
|
def set_value(self, new_value):
|
||||||
|
with multiprocessing.Lock():
|
||||||
|
self.namespace.value = new_value
|
||||||
|
|
||||||
|
def append(self, append_value):
|
||||||
|
with multiprocessing.Lock():
|
||||||
|
self.namespace.value += append_value
|
||||||
|
|
||||||
|
buffer = SharedString()
|
||||||
|
process = None
|
||||||
|
process_thread = None
|
||||||
|
|
||||||
|
def read_output(proc, buffer):
|
||||||
|
while True:
|
||||||
|
output = proc.stdout.readline()
|
||||||
|
if output == b"" and proc.poll() is not None:
|
||||||
|
break
|
||||||
|
if output:
|
||||||
|
# print(output.decode("utf-8"))
|
||||||
|
buffer.append(output.decode("utf-8"))
|
||||||
|
|
||||||
|
def start():
|
||||||
|
global process, process_thread
|
||||||
|
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
process_thread = multiprocessing.Process(target=read_output, args=(process, buffer))
|
||||||
|
process_thread.start()
|
||||||
|
|
||||||
|
# Register cleanup function to be called when script exits
|
||||||
|
atexit.register(stop)
|
||||||
|
|
||||||
|
def stop():
|
||||||
|
global process, process_thread
|
||||||
|
if process:
|
||||||
|
process.terminate()
|
||||||
|
process_thread.join()
|
||||||
|
process = None
|
||||||
|
process_thread = None
|
||||||
|
|
||||||
|
def filter_buffer(data):
|
||||||
|
for f_str in filter_strings:
|
||||||
|
data = data.replace(f_str, "")
|
||||||
|
return data.strip()
|
||||||
|
|
||||||
|
def get_buffer():
|
||||||
|
data = buffer.get_value()
|
||||||
|
buffer.set_value("")
|
||||||
|
return filter_buffer(data)
|
||||||
|
#return data
|
||||||
|
|
||||||
|
def main():
|
||||||
|
start()
|
||||||
|
|
||||||
|
try:
|
||||||
|
while process.poll() is None:
|
||||||
|
data = get_buffer()
|
||||||
|
if data:
|
||||||
|
print(data)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
stop()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
|
@ -0,0 +1,21 @@
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
|
||||||
|
piper_model_path = os.getenv("PIPER_MODEL_PATH")
|
||||||
|
piper_model_json_path = os.getenv("PIPER_MODEL_JSON_PATH")
|
||||||
|
|
||||||
|
def speak(text):
|
||||||
|
|
||||||
|
# some text cleanup
|
||||||
|
illegal_chars = ["\n", "\r", "\t", "", "*", "`", "[", "]", "{", "}", "\"", "\'"]
|
||||||
|
for char in illegal_chars:
|
||||||
|
text = text.replace(char, "")
|
||||||
|
|
||||||
|
#remove emojis
|
||||||
|
text = text.encode('ascii', 'ignore').decode('ascii')
|
||||||
|
|
||||||
|
command = f"echo \"{text}. \" | piper -q -m {piper_model_path} -c {piper_model_json_path} --output-raw | aplay -q -r 22050 -f S16_LE -t raw -"
|
||||||
|
process = subprocess.run(command, shell=True, check=True)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
speak("Hello, world. This is a tts test.")
|
Loading…
Reference in New Issue