38 lines
1.1 KiB
Python
38 lines
1.1 KiB
Python
|
import os
|
||
|
import llm
|
||
|
import image
|
||
|
import stt
|
||
|
import tts
|
||
|
|
||
|
def main():
|
||
|
print("Waiting for STT input...")
|
||
|
line = ""
|
||
|
while not line.strip():
|
||
|
line = stt.get_buffer()
|
||
|
if not line.strip():
|
||
|
continue
|
||
|
|
||
|
print(f"STT buffer: {line}")
|
||
|
|
||
|
# Get the response from the LLM chat module
|
||
|
llm_output = llm.chat(question=line, image_description=image_description)
|
||
|
print("LLM:", llm_output["response"])
|
||
|
# Run the LLM output through the TTS (text-to-speech) module
|
||
|
tts.speak(llm_output["response"].replace("*", "")) # Remove asterisks for better TTS understanding
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
# Initialize the LLM index if it doesn't exist
|
||
|
if not os.path.exists("index"):
|
||
|
llm.init_index() # Initialize the LLM index module (only needed with updated inputs)
|
||
|
|
||
|
# Initialize the LLM chat module
|
||
|
llm.init_chat()
|
||
|
# Describe the image and store the description
|
||
|
image_description = "" # image.describe()
|
||
|
print("Image description:", image_description)
|
||
|
stt.start()
|
||
|
|
||
|
while True:
|
||
|
main()
|