From 67439d615ca800189c7f82d136c6e8860bbdfec3 Mon Sep 17 00:00:00 2001 From: Adrian Gunnar Lauterer Date: Sat, 4 May 2024 16:58:14 +0200 Subject: [PATCH] init proof of consept --- .gitignore | 4 ++++ README.md | 12 ++++++++++++ flake.lock | 26 ++++++++++++++++++++++++++ flake.nix | 44 ++++++++++++++++++++++++++++++++++++++++++++ pdf-speak.py | 29 +++++++++++++++++++++++++++++ 5 files changed, 115 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 pdf-speak.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fc1cefe --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*/__pycache__/* +__pycache__/* +piper-models + diff --git a/README.md b/README.md new file mode 100644 index 0000000..a188e61 --- /dev/null +++ b/README.md @@ -0,0 +1,12 @@ +# pdf-speak + +This is a little short side project i made to read sentence for sentence of a text pdf out loud using piper-tts. + +The project is made using nix, and should be run using the provided dev enviroment in the flake.nix + + nix develop + pyhton pdf-to-piper.py -f YOURPDF -p PAGENUMBER + +Only testen on a linux system. +No warranty provided + diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..83f6135 --- /dev/null +++ b/flake.lock @@ -0,0 +1,26 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1713714899, + "narHash": "sha256-+z/XjO3QJs5rLE5UOf015gdVauVRQd2vZtsFkaXBq2Y=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "6143fc5eeb9c4f00163267708e26191d1e918932", + "type": "github" + }, + "original": { + "id": "nixpkgs", + "ref": "nixos-unstable", + "type": "indirect" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..1271409 --- /dev/null +++ b/flake.nix @@ -0,0 +1,44 @@ +{ + description = "A simple flake"; + + inputs.nixpkgs.url = "nixpkgs/nixos-unstable"; + + outputs = { self, nixpkgs }: { + + defaultPackage.x86_64-linux = let + pkgs = nixpkgs.legacyPackages.x86_64-linux; + python = pkgs.python311; + pythonPackages = python.pkgs; + + modelUrl = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/hfc_female/medium/en_US-hfc_female-medium.onnx?download=true"; + modelJsonUrl = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/hfc_female/medium/en_US-hfc_female-medium.onnx.json?download=true.json"; + + model = pkgs.fetchurl { + name = "hfc_female/medium/en_US-hfc_female-medium.onnx"; + url = modelUrl; + sha256 = "sha256-kUxHN4j8H6i2Os4c3NtEWI9K5SPTqzffFTZhaDWhQLc="; # replace with the correct sha256 + }; + + modelJson = pkgs.fetchurl { + name = "hfc_female/medium/en_US-hfc_female-medium.onnx.json"; + url = modelJsonUrl; + sha256 = "sha256-A/H6BiK4BGMoNZLZesqfbomuw0WlxWtyV3I+AJPFi2w="; # replace with the correct sha256 + }; + in pkgs.mkShell { + nativeBuildInputs = with pkgs; [ + python + piper-tts + alsa-utils # for aplay for piper-tts to stream to + + (pythonPackages.pypdf) + + ]; + + shellHook = '' + mkdir -p piper-models + cp ${model} piper-models/ + cp ${modelJson} piper-models/ + ''; + }; + }; +} \ No newline at end of file diff --git a/pdf-speak.py b/pdf-speak.py new file mode 100644 index 0000000..e26072f --- /dev/null +++ b/pdf-speak.py @@ -0,0 +1,29 @@ +from pypdf import PdfReader +import subprocess +import argparse + + +def tts(text): + piper_model_json_path = "piper-models/ihfq9facjxhl8b8z3afhn1kisq5wsghg-hfc_female-medium-en_US-hfc_female-medium.onnx.json" + piper_model_path = "piper-models/dy5s1ri7ixy1c27fg4adaf8ji3hmqiic-hfc_female-medium-en_US-hfc_female-medium.onnx" + command = f"echo \"{text}. \" | piper -q -m {piper_model_path} -c {piper_model_json_path} --output-raw | aplay -q -r 22050 -f S16_LE -t raw -" + process = subprocess.run(command, shell=True, check=True) + +def read_pdf(pdf_name, page_num): + reader = PdfReader(pdf_name) + page = reader.pages[page_num-1] + text = page.extract_text().replace("\n","").split(". ") + + for sentence in text: + print(sentence) + tts(sentence) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Read PDF and convert to speech') + parser.add_argument("-f", '--pdf-file', type=str, help='Path to the PDF file') + parser.add_argument("-p", '--page', type=int, default=1, help='Page number to read from the PDF') + + args = parser.parse_args() + pdf = args.pdf_file + page = args.page + read_pdf(pdf, page)