83 lines
2.3 KiB
Nix
83 lines
2.3 KiB
Nix
{ inputs = {
|
|
nixpkgs.url = "nixpkgs/nixos-22.11";
|
|
|
|
JMdictSrc = {
|
|
url = "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz";
|
|
flake = false;
|
|
};
|
|
|
|
tatoeba_jpn_indices = {
|
|
url = "https://downloads.tatoeba.org/exports/jpn_indices.tar.bz2";
|
|
flake = false;
|
|
};
|
|
|
|
tatoeba_eng_sentences = {
|
|
url = "https://downloads.tatoeba.org/exports/per_language/eng/eng_sentences.tsv.bz2";
|
|
flake = false;
|
|
};
|
|
};
|
|
|
|
outputs = { self, nixpkgs, JMdictSrc, tatoeba_jpn_indices, tatoeba_eng_sentences }: let
|
|
system = "x86_64-linux";
|
|
pkgs = nixpkgs.legacyPackages.${system};
|
|
inherit (pkgs) lib;
|
|
in {
|
|
|
|
packages.${system} = {
|
|
unidic = pkgs.callPackage ./nix/unidic.nix pkgs.python3Packages;
|
|
mecab-unidic = pkgs.callPackage ./nix/mecab-unidic.nix {
|
|
mecab-base = import "${nixpkgs}/pkgs/tools/text/mecab/base.nix" { inherit (pkgs) fetchurl; };
|
|
inherit (self.packages.${system}) unidic;
|
|
};
|
|
|
|
jmdict = pkgs.stdenvNoCC.mkDerivation {
|
|
name = "JMdict";
|
|
|
|
src = JMdictSrc;
|
|
dontUnpack = true;
|
|
|
|
nativeBuildInputs = with pkgs; [ xmlformat ];
|
|
buildPhase = ''
|
|
gzip -dkc ${JMdictSrc} > JMdict.xml
|
|
xmlformat -i JMdict.xml
|
|
'';
|
|
|
|
installPhase = ''
|
|
mkdir $out
|
|
cp JMdict.xml $out
|
|
'';
|
|
|
|
meta = {
|
|
description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words";
|
|
homepage = "https://www.edrdg.org/jmdict/j_jmdict.html";
|
|
license = {
|
|
shortName = "EDRDG";
|
|
fullName = "Electronic Dictionary Research and Development Group General Dictionary Licence";
|
|
url = "http://www.csse.monash.edu.au/~jwb/edrdg/licence.html";
|
|
};
|
|
maintainers = [ "h7x4 <h7x4@nani.wtf>" ];
|
|
platforms = lib.platforms.all;
|
|
};
|
|
};
|
|
|
|
};
|
|
|
|
devShells.${system}.default = pkgs.mkShell {
|
|
packages = with pkgs; [
|
|
self.packages.${system}.unidic
|
|
] ++ (with pkgs.python3Packages; [
|
|
# flask
|
|
(mecab-python3.override { mecab = self.packages.${system}.mecab-unidic; })
|
|
nltk
|
|
pandas
|
|
python
|
|
requests
|
|
scikit-learn
|
|
spacy
|
|
sqlalchemy
|
|
wget
|
|
]);
|
|
};
|
|
};
|
|
}
|