init proof of consept
This commit is contained in:
commit
67439d615c
|
@ -0,0 +1,4 @@
|
||||||
|
*/__pycache__/*
|
||||||
|
__pycache__/*
|
||||||
|
piper-models
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
# pdf-speak
|
||||||
|
|
||||||
|
This is a little short side project i made to read sentence for sentence of a text pdf out loud using piper-tts.
|
||||||
|
|
||||||
|
The project is made using nix, and should be run using the provided dev enviroment in the flake.nix
|
||||||
|
|
||||||
|
nix develop
|
||||||
|
pyhton pdf-to-piper.py -f YOURPDF -p PAGENUMBER
|
||||||
|
|
||||||
|
Only testen on a linux system.
|
||||||
|
No warranty provided
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
{
|
||||||
|
"nodes": {
|
||||||
|
"nixpkgs": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1713714899,
|
||||||
|
"narHash": "sha256-+z/XjO3QJs5rLE5UOf015gdVauVRQd2vZtsFkaXBq2Y=",
|
||||||
|
"owner": "NixOS",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "6143fc5eeb9c4f00163267708e26191d1e918932",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"id": "nixpkgs",
|
||||||
|
"ref": "nixos-unstable",
|
||||||
|
"type": "indirect"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"inputs": {
|
||||||
|
"nixpkgs": "nixpkgs"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": "root",
|
||||||
|
"version": 7
|
||||||
|
}
|
|
@ -0,0 +1,44 @@
|
||||||
|
{
|
||||||
|
description = "A simple flake";
|
||||||
|
|
||||||
|
inputs.nixpkgs.url = "nixpkgs/nixos-unstable";
|
||||||
|
|
||||||
|
outputs = { self, nixpkgs }: {
|
||||||
|
|
||||||
|
defaultPackage.x86_64-linux = let
|
||||||
|
pkgs = nixpkgs.legacyPackages.x86_64-linux;
|
||||||
|
python = pkgs.python311;
|
||||||
|
pythonPackages = python.pkgs;
|
||||||
|
|
||||||
|
modelUrl = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/hfc_female/medium/en_US-hfc_female-medium.onnx?download=true";
|
||||||
|
modelJsonUrl = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/hfc_female/medium/en_US-hfc_female-medium.onnx.json?download=true.json";
|
||||||
|
|
||||||
|
model = pkgs.fetchurl {
|
||||||
|
name = "hfc_female/medium/en_US-hfc_female-medium.onnx";
|
||||||
|
url = modelUrl;
|
||||||
|
sha256 = "sha256-kUxHN4j8H6i2Os4c3NtEWI9K5SPTqzffFTZhaDWhQLc="; # replace with the correct sha256
|
||||||
|
};
|
||||||
|
|
||||||
|
modelJson = pkgs.fetchurl {
|
||||||
|
name = "hfc_female/medium/en_US-hfc_female-medium.onnx.json";
|
||||||
|
url = modelJsonUrl;
|
||||||
|
sha256 = "sha256-A/H6BiK4BGMoNZLZesqfbomuw0WlxWtyV3I+AJPFi2w="; # replace with the correct sha256
|
||||||
|
};
|
||||||
|
in pkgs.mkShell {
|
||||||
|
nativeBuildInputs = with pkgs; [
|
||||||
|
python
|
||||||
|
piper-tts
|
||||||
|
alsa-utils # for aplay for piper-tts to stream to
|
||||||
|
|
||||||
|
(pythonPackages.pypdf)
|
||||||
|
|
||||||
|
];
|
||||||
|
|
||||||
|
shellHook = ''
|
||||||
|
mkdir -p piper-models
|
||||||
|
cp ${model} piper-models/
|
||||||
|
cp ${modelJson} piper-models/
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
from pypdf import PdfReader
|
||||||
|
import subprocess
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
def tts(text):
|
||||||
|
piper_model_json_path = "piper-models/ihfq9facjxhl8b8z3afhn1kisq5wsghg-hfc_female-medium-en_US-hfc_female-medium.onnx.json"
|
||||||
|
piper_model_path = "piper-models/dy5s1ri7ixy1c27fg4adaf8ji3hmqiic-hfc_female-medium-en_US-hfc_female-medium.onnx"
|
||||||
|
command = f"echo \"{text}. \" | piper -q -m {piper_model_path} -c {piper_model_json_path} --output-raw | aplay -q -r 22050 -f S16_LE -t raw -"
|
||||||
|
process = subprocess.run(command, shell=True, check=True)
|
||||||
|
|
||||||
|
def read_pdf(pdf_name, page_num):
|
||||||
|
reader = PdfReader(pdf_name)
|
||||||
|
page = reader.pages[page_num-1]
|
||||||
|
text = page.extract_text().replace("\n","").split(". ")
|
||||||
|
|
||||||
|
for sentence in text:
|
||||||
|
print(sentence)
|
||||||
|
tts(sentence)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description='Read PDF and convert to speech')
|
||||||
|
parser.add_argument("-f", '--pdf-file', type=str, help='Path to the PDF file')
|
||||||
|
parser.add_argument("-p", '--page', type=int, default=1, help='Page number to read from the PDF')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
pdf = args.pdf_file
|
||||||
|
page = args.page
|
||||||
|
read_pdf(pdf, page)
|
Loading…
Reference in New Issue