init proof of consept

This commit is contained in:
Adrian Gunnar Lauterer 2024-05-04 16:58:14 +02:00
commit 67439d615c
5 changed files with 115 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
*/__pycache__/*
__pycache__/*
piper-models

12
README.md Normal file
View File

@ -0,0 +1,12 @@
# pdf-speak
This is a little short side project i made to read sentence for sentence of a text pdf out loud using piper-tts.
The project is made using nix, and should be run using the provided dev enviroment in the flake.nix
nix develop
pyhton pdf-to-piper.py -f YOURPDF -p PAGENUMBER
Only testen on a linux system.
No warranty provided

26
flake.lock Normal file
View File

@ -0,0 +1,26 @@
{
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1713714899,
"narHash": "sha256-+z/XjO3QJs5rLE5UOf015gdVauVRQd2vZtsFkaXBq2Y=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "6143fc5eeb9c4f00163267708e26191d1e918932",
"type": "github"
},
"original": {
"id": "nixpkgs",
"ref": "nixos-unstable",
"type": "indirect"
}
},
"root": {
"inputs": {
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

44
flake.nix Normal file
View File

@ -0,0 +1,44 @@
{
description = "A simple flake";
inputs.nixpkgs.url = "nixpkgs/nixos-unstable";
outputs = { self, nixpkgs }: {
defaultPackage.x86_64-linux = let
pkgs = nixpkgs.legacyPackages.x86_64-linux;
python = pkgs.python311;
pythonPackages = python.pkgs;
modelUrl = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/hfc_female/medium/en_US-hfc_female-medium.onnx?download=true";
modelJsonUrl = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/hfc_female/medium/en_US-hfc_female-medium.onnx.json?download=true.json";
model = pkgs.fetchurl {
name = "hfc_female/medium/en_US-hfc_female-medium.onnx";
url = modelUrl;
sha256 = "sha256-kUxHN4j8H6i2Os4c3NtEWI9K5SPTqzffFTZhaDWhQLc="; # replace with the correct sha256
};
modelJson = pkgs.fetchurl {
name = "hfc_female/medium/en_US-hfc_female-medium.onnx.json";
url = modelJsonUrl;
sha256 = "sha256-A/H6BiK4BGMoNZLZesqfbomuw0WlxWtyV3I+AJPFi2w="; # replace with the correct sha256
};
in pkgs.mkShell {
nativeBuildInputs = with pkgs; [
python
piper-tts
alsa-utils # for aplay for piper-tts to stream to
(pythonPackages.pypdf)
];
shellHook = ''
mkdir -p piper-models
cp ${model} piper-models/
cp ${modelJson} piper-models/
'';
};
};
}

29
pdf-speak.py Normal file
View File

@ -0,0 +1,29 @@
from pypdf import PdfReader
import subprocess
import argparse
def tts(text):
piper_model_json_path = "piper-models/ihfq9facjxhl8b8z3afhn1kisq5wsghg-hfc_female-medium-en_US-hfc_female-medium.onnx.json"
piper_model_path = "piper-models/dy5s1ri7ixy1c27fg4adaf8ji3hmqiic-hfc_female-medium-en_US-hfc_female-medium.onnx"
command = f"echo \"{text}. \" | piper -q -m {piper_model_path} -c {piper_model_json_path} --output-raw | aplay -q -r 22050 -f S16_LE -t raw -"
process = subprocess.run(command, shell=True, check=True)
def read_pdf(pdf_name, page_num):
reader = PdfReader(pdf_name)
page = reader.pages[page_num-1]
text = page.extract_text().replace("\n","").split(". ")
for sentence in text:
print(sentence)
tts(sentence)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Read PDF and convert to speech')
parser.add_argument("-f", '--pdf-file', type=str, help='Path to the PDF file')
parser.add_argument("-p", '--page', type=int, default=1, help='Page number to read from the PDF')
args = parser.parse_args()
pdf = args.pdf_file
page = args.page
read_pdf(pdf, page)