pvv-chan/assistant.py

import os
import llm
import image
import stt
import tts

def main():
        print("Waiting for STT input...")
        line = ""
        while not line.strip():
            line = stt.get_buffer()
            if not line.strip():
                continue

        print(f"STT buffer: {line}")

        # Get the response from the LLM chat module
        llm_output = llm.chat(question=line, image_description=image_description)
        print("LLM:", llm_output["response"])
        # Run the LLM output through the TTS (text-to-speech) module
        tts.speak(llm_output["response"].replace("*", ""))  # Remove asterisks for better TTS understanding


if __name__ == "__main__":
    # Initialize the LLM index if it doesn't exist
    if not os.path.exists("index"):
        llm.init_index()  # Initialize the LLM index module (only needed with updated inputs)

    # Initialize the LLM chat module
    llm.init_chat()
    # Describe the image and store the description
    image_description = ""  # image.describe()
    print("Image description:", image_description)
    stt.start()

    while True:
        main()