init proof of consept
This commit is contained in:
commit
67439d615c
|
@ -0,0 +1,4 @@
|
|||
*/__pycache__/*
|
||||
__pycache__/*
|
||||
piper-models
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
# pdf-speak
|
||||
|
||||
This is a little short side project i made to read sentence for sentence of a text pdf out loud using piper-tts.
|
||||
|
||||
The project is made using nix, and should be run using the provided dev enviroment in the flake.nix
|
||||
|
||||
nix develop
|
||||
pyhton pdf-to-piper.py -f YOURPDF -p PAGENUMBER
|
||||
|
||||
Only testen on a linux system.
|
||||
No warranty provided
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"nodes": {
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1713714899,
|
||||
"narHash": "sha256-+z/XjO3QJs5rLE5UOf015gdVauVRQd2vZtsFkaXBq2Y=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "6143fc5eeb9c4f00163267708e26191d1e918932",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"id": "nixpkgs",
|
||||
"ref": "nixos-unstable",
|
||||
"type": "indirect"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
{
|
||||
description = "A simple flake";
|
||||
|
||||
inputs.nixpkgs.url = "nixpkgs/nixos-unstable";
|
||||
|
||||
outputs = { self, nixpkgs }: {
|
||||
|
||||
defaultPackage.x86_64-linux = let
|
||||
pkgs = nixpkgs.legacyPackages.x86_64-linux;
|
||||
python = pkgs.python311;
|
||||
pythonPackages = python.pkgs;
|
||||
|
||||
modelUrl = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/hfc_female/medium/en_US-hfc_female-medium.onnx?download=true";
|
||||
modelJsonUrl = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/hfc_female/medium/en_US-hfc_female-medium.onnx.json?download=true.json";
|
||||
|
||||
model = pkgs.fetchurl {
|
||||
name = "hfc_female/medium/en_US-hfc_female-medium.onnx";
|
||||
url = modelUrl;
|
||||
sha256 = "sha256-kUxHN4j8H6i2Os4c3NtEWI9K5SPTqzffFTZhaDWhQLc="; # replace with the correct sha256
|
||||
};
|
||||
|
||||
modelJson = pkgs.fetchurl {
|
||||
name = "hfc_female/medium/en_US-hfc_female-medium.onnx.json";
|
||||
url = modelJsonUrl;
|
||||
sha256 = "sha256-A/H6BiK4BGMoNZLZesqfbomuw0WlxWtyV3I+AJPFi2w="; # replace with the correct sha256
|
||||
};
|
||||
in pkgs.mkShell {
|
||||
nativeBuildInputs = with pkgs; [
|
||||
python
|
||||
piper-tts
|
||||
alsa-utils # for aplay for piper-tts to stream to
|
||||
|
||||
(pythonPackages.pypdf)
|
||||
|
||||
];
|
||||
|
||||
shellHook = ''
|
||||
mkdir -p piper-models
|
||||
cp ${model} piper-models/
|
||||
cp ${modelJson} piper-models/
|
||||
'';
|
||||
};
|
||||
};
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
from pypdf import PdfReader
|
||||
import subprocess
|
||||
import argparse
|
||||
|
||||
|
||||
def tts(text):
|
||||
piper_model_json_path = "piper-models/ihfq9facjxhl8b8z3afhn1kisq5wsghg-hfc_female-medium-en_US-hfc_female-medium.onnx.json"
|
||||
piper_model_path = "piper-models/dy5s1ri7ixy1c27fg4adaf8ji3hmqiic-hfc_female-medium-en_US-hfc_female-medium.onnx"
|
||||
command = f"echo \"{text}. \" | piper -q -m {piper_model_path} -c {piper_model_json_path} --output-raw | aplay -q -r 22050 -f S16_LE -t raw -"
|
||||
process = subprocess.run(command, shell=True, check=True)
|
||||
|
||||
def read_pdf(pdf_name, page_num):
|
||||
reader = PdfReader(pdf_name)
|
||||
page = reader.pages[page_num-1]
|
||||
text = page.extract_text().replace("\n","").split(". ")
|
||||
|
||||
for sentence in text:
|
||||
print(sentence)
|
||||
tts(sentence)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Read PDF and convert to speech')
|
||||
parser.add_argument("-f", '--pdf-file', type=str, help='Path to the PDF file')
|
||||
parser.add_argument("-p", '--page', type=int, default=1, help='Page number to read from the PDF')
|
||||
|
||||
args = parser.parse_args()
|
||||
pdf = args.pdf_file
|
||||
page = args.page
|
||||
read_pdf(pdf, page)
|
Loading…
Reference in New Issue