diff --git a/flake.lock b/flake.lock index 204f575..6867cce 100644 --- a/flake.lock +++ b/flake.lock @@ -1,48 +1,32 @@ { "nodes": { - "jmdict-src": { - "flake": false, + "datasources": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, "locked": { - "narHash": "sha256-eOc3a/AYNRFF3w6lWhyf0Sh92xeXS7+9Qvn0tvvH6Ys=", - "type": "file", - "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz" + "lastModified": 1775550160, + "narHash": "sha256-bgvKrMGUPaDY4EZv+82z1ccYoxwaergdVw/3PZhc2Fc=", + "ref": "refs/heads/main", + "rev": "f46229af3678124c5ea7c8dff3292747d0274f69", + "revCount": 8, + "type": "git", + "url": "https://git.pvv.ntnu.no/Mugiten/datasources.git" }, "original": { - "type": "file", - "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz" - } - }, - "jmdict-with-examples-src": { - "flake": false, - "locked": { - "narHash": "sha256-nx+WMkscWvA/XImKM7NESYVmICwSgXWOO1KPXasHY94=", - "type": "file", - "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz" - }, - "original": { - "type": "file", - "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz" - } - }, - "kanjidic2-src": { - "flake": false, - "locked": { - "narHash": "sha256-2T/cAS/kZmVMURStgHVhz524+J9+v5onKs8eEYf2fY0=", - "type": "file", - "url": "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz" - }, - "original": { - "type": "file", - "url": "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz" + "type": "git", + "url": "https://git.pvv.ntnu.no/Mugiten/datasources.git" } }, "nixpkgs": { "locked": { - "lastModified": 1774386573, - "narHash": "sha256-4hAV26quOxdC6iyG7kYaZcM3VOskcPUrdCQd/nx8obc=", + "lastModified": 1775423009, + "narHash": "sha256-vPKLpjhIVWdDrfiUM8atW6YkIggCEKdSAlJPzzhkQlw=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "46db2e09e1d3f113a13c0d7b81e2f221c63b8ce9", + "rev": "68d8aa3d661f0e6bd5862291b5bb263b2a6595c9", "type": "github" }, "original": { @@ -51,25 +35,10 @@ "type": "indirect" } }, - "radkfile-src": { - "flake": false, - "locked": { - "narHash": "sha256-DHpMUE2Umje8PbzXUCS6pHZeXQ5+WTxbjSkGU3erDHQ=", - "type": "file", - "url": "http://ftp.edrdg.org/pub/Nihongo/radkfile.gz" - }, - "original": { - "type": "file", - "url": "http://ftp.edrdg.org/pub/Nihongo/radkfile.gz" - } - }, "root": { "inputs": { - "jmdict-src": "jmdict-src", - "jmdict-with-examples-src": "jmdict-with-examples-src", - "kanjidic2-src": "kanjidic2-src", - "nixpkgs": "nixpkgs", - "radkfile-src": "radkfile-src" + "datasources": "datasources", + "nixpkgs": "nixpkgs" } } }, diff --git a/flake.nix b/flake.nix index cfa0ea8..854ce68 100644 --- a/flake.nix +++ b/flake.nix @@ -4,35 +4,16 @@ inputs = { nixpkgs.url = "nixpkgs/nixos-unstable"; - jmdict-src = { - # url = "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz"; - url = "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz"; - flake = false; - }; - - jmdict-with-examples-src = { - url = "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"; - flake = false; - }; - - radkfile-src = { - url = "http://ftp.edrdg.org/pub/Nihongo/radkfile.gz"; - flake = false; - }; - - kanjidic2-src = { - url = "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz"; - flake = false; + datasources = { + url = "git+https://git.pvv.ntnu.no/Mugiten/datasources.git"; + inputs.nixpkgs.follows = "nixpkgs"; }; }; outputs = { self, nixpkgs, - jmdict-src, - jmdict-with-examples-src, - radkfile-src, - kanjidic2-src + datasources, }: let inherit (nixpkgs) lib; systems = [ @@ -135,29 +116,21 @@ ln -s ${src} $out ''; - jmdict = pkgs.callPackage ./nix/jmdict.nix { - inherit jmdict-src jmdict-with-examples-src edrdgMetadata; - }; - - radkfile = pkgs.callPackage ./nix/radkfile.nix { - inherit radkfile-src edrdgMetadata; - }; - - kanjidic2 = pkgs.callPackage ./nix/kanjidic2.nix { - inherit kanjidic2-src edrdgMetadata; - }; + inherit (datasources.packages.${system}) jmdict radkfile kanjidic2; database-tool = pkgs.callPackage ./nix/database_tool.nix { inherit src; }; database = pkgs.callPackage ./nix/database.nix { - inherit (self.packages.${system}) database-tool jmdict radkfile kanjidic2; + inherit (datasources.packages.${system}) jmdict radkfile kanjidic2; + inherit (self.packages.${system}) database-tool; inherit src; }; database-wal = pkgs.callPackage ./nix/database.nix { - inherit (self.packages.${system}) database-tool jmdict radkfile kanjidic2; + inherit (datasources.packages.${system}) jmdict radkfile kanjidic2; + inherit (self.packages.${system}) database-tool; inherit src; wal = true; }; diff --git a/lib/_data_ingestion/radkfile/parser.dart b/lib/_data_ingestion/radkfile/parser.dart index 7f419d1..6d530af 100644 --- a/lib/_data_ingestion/radkfile/parser.dart +++ b/lib/_data_ingestion/radkfile/parser.dart @@ -1,7 +1,7 @@ import 'dart:io'; Iterable parseRADKFILEBlocks(File radkfile) { - final String content = File('data/tmp/radkfile_utf8').readAsStringSync(); + final String content = File('data/tmp/RADKFILE').readAsStringSync(); final Iterable blocks = content .replaceAll(RegExp(r'^#.*$'), '') diff --git a/nix/jmdict.nix b/nix/jmdict.nix deleted file mode 100644 index 5b4e743..0000000 --- a/nix/jmdict.nix +++ /dev/null @@ -1,46 +0,0 @@ -{ - stdenvNoCC, - jmdict-src, - jmdict-with-examples-src, - xmlformat, - gzip, - edrdgMetadata, -}: -stdenvNoCC.mkDerivation { - name = "jmdict"; - - dontUnpack = true; - srcs = [ - jmdict-src - jmdict-with-examples-src - ]; - - nativeBuildInputs = [ - gzip - xmlformat - ]; - - buildPhase = '' - runHook preBuild - - gzip -dkc "${jmdict-src}" > JMdict.xml - gzip -dkc "${jmdict-with-examples-src}" > JMdict_with_examples.xml - xmlformat -i JMdict.xml - xmlformat -i JMdict_with_examples.xml - - runHook postBuild - ''; - - installPhase = '' - runHook preInstall - - install -Dt "$out" JMdict.xml JMdict_with_examples.xml - - runHook postInstall - ''; - - meta = edrdgMetadata // { - description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words"; - homepage = "https://www.edrdg.org/jmdict/j_jmdict.html"; - }; -} diff --git a/nix/kanjidic2.nix b/nix/kanjidic2.nix deleted file mode 100644 index 62bed2a..0000000 --- a/nix/kanjidic2.nix +++ /dev/null @@ -1,40 +0,0 @@ -{ - stdenvNoCC, - kanjidic2-src, - xmlformat, - gzip, - edrdgMetadata, -}: -stdenvNoCC.mkDerivation { - name = "kanjidic2"; - - src = kanjidic2-src; - dontUnpack = true; - - nativeBuildInputs = [ - gzip - xmlformat - ]; - - buildPhase = '' - runHook preBuild - - gzip -dkc "${kanjidic2-src}" > kanjidic2.xml - xmlformat -i kanjidic2.xml - - runHook postBuild - ''; - - installPhase = '' - runHook preInstall - - install -Dt "$out" kanjidic2.xml - - runHook postInstall - ''; - - meta = edrdgMetadata // { - description = "A consolidated XML-format kanji database"; - homepage = "https://www.edrdg.org/kanjidic/kanjd2index_legacy.html"; - }; -} diff --git a/nix/radkfile.nix b/nix/radkfile.nix deleted file mode 100644 index 560c37e..0000000 --- a/nix/radkfile.nix +++ /dev/null @@ -1,40 +0,0 @@ -{ - stdenv, - radkfile-src, - gzip, - iconv, - edrdgMetadata, -}: -stdenv.mkDerivation { - name = "radkfile"; - - src = radkfile-src; - dontUnpack = true; - - nativeBuildInputs = [ - gzip - iconv - ]; - - buildPhase = '' - runHook preBuild - - gzip -dkc "$src" > radkfile - iconv -f EUC-JP -t UTF-8 -o radkfile_utf8 radkfile - - runHook postBuild - ''; - - installPhase = '' - runHook preInstall - - install -Dt "$out" radkfile_utf8 - - runHook postInstall - ''; - - meta = edrdgMetadata // { - description = "A file providing searchable decompositions of kanji characters"; - homepage = "https://www.edrdg.org/krad/kradinf.html"; - }; -}