import os import subprocess import multiprocessing import atexit audio_device = os.getenv("WHISPER_AUDIO_DEVICE", "-1") whisper_model = os.getenv("WHISPER_MODEL_PATH", "models/ggml-tiny.bin") command = ["whisper-cpp-stream", "-kc", "-m", whisper_model, "-c", audio_device, "-t", "4"] filter_strings = ["", "*", "\r", "\n","\t", "(inaudible)", "[BLANK_AUDIO]", "[Start speaking]", "(gunshot)", "(wind howling)", "[Music]", "(footsteps)"] # Example strings to filter out class SharedString: def __init__(self): manager = multiprocessing.Manager() self.namespace = manager.Namespace() self.namespace.value = "" def get_value(self): with multiprocessing.Lock(): return self.namespace.value def set_value(self, new_value): with multiprocessing.Lock(): self.namespace.value = new_value def append(self, append_value): with multiprocessing.Lock(): self.namespace.value += append_value buffer = SharedString() process = None process_thread = None def read_output(proc, buffer): while True: output = proc.stdout.readline() if output == b"" and proc.poll() is not None: break if output: # print(output.decode("utf-8")) buffer.append(output.decode("utf-8")) def start(): global process, process_thread process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) process_thread = multiprocessing.Process(target=read_output, args=(process, buffer)) process_thread.start() # Register cleanup function to be called when script exits atexit.register(stop) def stop(): global process, process_thread if process: process.terminate() process_thread.join() process = None process_thread = None def filter_buffer(data): for f_str in filter_strings: data = data.replace(f_str, "") return data.strip() def get_buffer(): data = buffer.get_value() buffer.set_value("") return filter_buffer(data) #return data def main(): start() try: while process.poll() is None: data = get_buffer() if data: print(data) except KeyboardInterrupt: stop() if __name__ == "__main__": main()