Compare commits

..

2 Commits

10 changed files with 366 additions and 247 deletions

View File

@ -1,5 +1,9 @@
# ja_db # jadb
[![built with nix](https://builtwithnix.org/badge.svg)](https://builtwithnix.org) [![built with nix](https://builtwithnix.org/badge.svg)](https://builtwithnix.org)
An SQLite database containing open source japanese language translation data combined from several sources
- **JMDict:** https://edrdg.org/jmdict/j_jmdict.html
- **RADKFILE/KRADFILE:** https://www.edrdg.org/krad/kradinf.html
- **KANJIDIC2:** https://www.edrdg.org/kanjidic/kanjd2index_legacy.html

View File

@ -12,7 +12,7 @@ Future<void> main(List<String> arguments) async {
final db = await createDatabaseFactoryFfi(ffiInit: () { final db = await createDatabaseFactoryFfi(ffiInit: () {
open.overrideForAll(() => DynamicLibrary.open(arguments[0])); open.overrideForAll(() => DynamicLibrary.open(arguments[0]));
}) })
.openDatabase(Directory.current.uri.resolve('main.db').path); .openDatabase(Directory.current.uri.resolve('jadb.sqlite').path);
await addDataFromJMdict(db); await addDataFromJMdict(db);
await addDataFromRADKFILE(db); await addDataFromRADKFILE(db);
await addDataFromKANJIDIC(db); await addDataFromKANJIDIC(db);

View File

@ -1,9 +1,24 @@
{ {
"nodes": { "nodes": {
"JMdictSrc": { "flake-utils": {
"locked": {
"lastModified": 1649676176,
"narHash": "sha256-OWKJratjt2RW151VUlJPRALb7OU2S5s+f0vLj4o1bHM=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "a4b154ebbdc88c8498a5c7b01589addc9e9cb678",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"jmdict-src": {
"flake": false, "flake": false,
"locked": { "locked": {
"narHash": "sha256-7hGfayIXi8mH80025s7Pgqk7jwOvZlQMHsxHv0sq5ek=", "narHash": "sha256-997+CJDljSggj2Kh9+1Xd+yvLJUkleQ7b5iuYIEwswc=",
"type": "file", "type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz" "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz"
}, },
@ -12,10 +27,10 @@
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz" "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz"
} }
}, },
"JMdictWithExamplesSrc": { "jmdict-with-examples-src": {
"flake": false, "flake": false,
"locked": { "locked": {
"narHash": "sha256-OtxCfNaIbRVBuztTiNim7xjxGDFrV9MCPpymLLYBMnk=", "narHash": "sha256-6GU10LW5XO76ptXC+DNBRusuFI1E1i8Sda63sTXCsFU=",
"type": "file", "type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz" "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
}, },
@ -24,7 +39,7 @@
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz" "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
} }
}, },
"KANJIDIC2Src": { "kanjidic2-src": {
"flake": false, "flake": false,
"locked": { "locked": {
"narHash": "sha256-BaVQaAEisPC60ohz8Gyr5r0Fe4Qrjupj6VKQqMvq9Eo=", "narHash": "sha256-BaVQaAEisPC60ohz8Gyr5r0Fe4Qrjupj6VKQqMvq9Eo=",
@ -36,38 +51,9 @@
"url": "http://nihongo.monash.edu/kanjidic2/kanjidic2.xml.gz" "url": "http://nihongo.monash.edu/kanjidic2/kanjidic2.xml.gz"
} }
}, },
"RADKFILESrc": {
"flake": false,
"locked": {
"narHash": "sha256-rO2z5GPt3g6osZOlpyWysmIbRV2Gw4AR4XvngVTHNpk=",
"type": "file",
"url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"
},
"original": {
"type": "file",
"url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"
}
},
"flake-utils": {
"locked": {
"lastModified": 1659877975,
"narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nix-dart": { "nix-dart": {
"inputs": { "inputs": {
"flake-utils": [ "flake-utils": "flake-utils",
"flake-utils"
],
"nixpkgs": [ "nixpkgs": [
"nixpkgs" "nixpkgs"
], ],
@ -89,16 +75,16 @@
}, },
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1661187878, "lastModified": 1731386116,
"narHash": "sha256-/wCqoQB1BsaVi4nb8Iz0PreeBNMTim0p78NLtyWejFE=", "narHash": "sha256-lKA770aUmjPHdTaJWnP3yQ9OI1TigenUqVC3wweqZuI=",
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "52527082ea267fe486f0648582d57c85486b2031", "rev": "689fed12a013f56d4c4d3f612489634267d86529",
"type": "github" "type": "github"
}, },
"original": { "original": {
"id": "nixpkgs", "id": "nixpkgs",
"ref": "nixos-22.05", "ref": "nixos-24.05",
"type": "indirect" "type": "indirect"
} }
}, },
@ -118,15 +104,26 @@
"type": "github" "type": "github"
} }
}, },
"radkfile-src": {
"flake": false,
"locked": {
"narHash": "sha256-rO2z5GPt3g6osZOlpyWysmIbRV2Gw4AR4XvngVTHNpk=",
"type": "file",
"url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"
},
"original": {
"type": "file",
"url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"
}
},
"root": { "root": {
"inputs": { "inputs": {
"JMdictSrc": "JMdictSrc", "jmdict-src": "jmdict-src",
"JMdictWithExamplesSrc": "JMdictWithExamplesSrc", "jmdict-with-examples-src": "jmdict-with-examples-src",
"KANJIDIC2Src": "KANJIDIC2Src", "kanjidic2-src": "kanjidic2-src",
"RADKFILESrc": "RADKFILESrc",
"flake-utils": "flake-utils",
"nix-dart": "nix-dart", "nix-dart": "nix-dart",
"nixpkgs": "nixpkgs" "nixpkgs": "nixpkgs",
"radkfile-src": "radkfile-src"
} }
} }
}, },

272
flake.nix
View File

@ -2,37 +2,29 @@
description = "A SQLite database containing open source japanese language translation data"; description = "A SQLite database containing open source japanese language translation data";
inputs = { inputs = {
nixpkgs.url = "nixpkgs/nixos-22.05"; nixpkgs.url = "nixpkgs/nixos-24.05";
flake-utils = {
url = "github:numtide/flake-utils";
inputs.nixpkgs.follows = "nixpkgs";
};
nix-dart = { nix-dart = {
url = "github:tadfisher/nix-dart"; url = "github:tadfisher/nix-dart";
inputs = { inputs.nixpkgs.follows = "nixpkgs";
nixpkgs.follows = "nixpkgs";
flake-utils.follows = "flake-utils";
};
}; };
JMdictSrc = { jmdict-src = {
url = "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz"; url = "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz";
flake = false; flake = false;
}; };
JMdictWithExamplesSrc = { jmdict-with-examples-src = {
url = "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"; url = "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz";
flake = false; flake = false;
}; };
RADKFILESrc = { radkfile-src = {
url = "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"; url = "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz";
flake = false; flake = false;
}; };
KANJIDIC2Src = { kanjidic2-src = {
url = "http://nihongo.monash.edu/kanjidic2/kanjidic2.xml.gz"; url = "http://nihongo.monash.edu/kanjidic2/kanjidic2.xml.gz";
flake = false; flake = false;
}; };
@ -42,216 +34,96 @@
outputs = { outputs = {
self, self,
nixpkgs, nixpkgs,
flake-utils,
nix-dart, nix-dart,
JMdictSrc, jmdict-src,
JMdictWithExamplesSrc, jmdict-with-examples-src,
RADKFILESrc, radkfile-src,
KANJIDIC2Src kanjidic2-src
}: let }: let
system = "x86_64-linux"; inherit (nixpkgs) lib;
pkgs = import nixpkgs { systems = [
inherit system; "x86_64-linux"
overlays = [ "aarch64-linux"
# (final: prev: { dart = nix-dart.packages.${system}.dart; }) "x86_64-darwin"
nix-dart.overlay "aarch64-darwin"
]; "armv7l-linux"
}; ];
inherit (pkgs) lib;
forAllSystems = f: lib.genAttrs systems (system: let
pkgs = import nixpkgs {
inherit system;
overlays = [
nix-dart.overlay
];
};
in f system pkgs);
in { in {
devShell.${system} = pkgs.mkShell { apps = forAllSystems (system: pkgs: {
buildInputs = with pkgs; [ default = let
nix-dart.packages.${system}.pub2nix-lock inherit (self.packages.${system}) docs;
dart in {
gnumake type = "app";
sqlite program = "${pkgs.writeShellScript "host-docs" ''
sqlite-web ${pkgs.python3} -m http.server -d ${docs}
sqlint ''}";
sqlfluff };
]; });
};
defaultPackage.${system} = self.packages.${system}.database; devShells = forAllSystems (system: pkgs: {
default = pkgs.mkShell {
buildInputs = with pkgs; [
nix-dart.packages.${system}.pub2nix-lock
dart
gnumake
sqlite-interactive
sqlite-web
sqlint
sqlfluff
];
};
});
packages.${system} = let # defaultPackage.${system} = self.packages.${system}.database;
inherit (pkgs.stdenv) mkDerivation;
dbName = "main.db";
packages = let
edrdgMetadata = { edrdgMetadata = {
license = { license = [{
shortName = "EDRDG"; shortName = "EDRDG";
fullName = "Electronic Dictionary Research and Development Group General Dictionary Licence"; fullName = "Electronic Dictionary Research and Development Group General Dictionary Licence";
url = "http://www.csse.monash.edu.au/~jwb/edrdg/licence.html"; url = "http://www.csse.monash.edu.au/~jwb/edrdg/licence.html";
}; }];
maintainers = [ "h7x4 <h7x4@nani.wtf>" ]; maintainers = [ lib.maintainers.h7x4 ];
platforms = lib.platforms.all; platforms = lib.platforms.all;
}; };
in {
JMdict = mkDerivation {
name = "JMdict";
srcs = [ src = lib.cleanSource ./.;
JMdictSrc in forAllSystems (system: pkgs: {
JMdictWithExamplesSrc default = self.packages.${system}.database;
];
dontUnpack = true;
nativeBuildInputs = with pkgs; [ xmlformat ]; jmdict = pkgs.callPackage ./nix/jmdict.nix {
buildPhase = '' inherit jmdict-src jmdict-with-examples-src edrdgMetadata;
gzip -dkc ${JMdictSrc} > JMdict.xml
gzip -dkc ${JMdictWithExamplesSrc} > JMdict_with_examples.xml
xmlformat -i JMdict.xml
xmlformat -i JMdict_with_examples.xml
'';
installPhase = ''
mkdir $out
cp JMdict.xml $out
cp JMdict_with_examples.xml $out
'';
meta = edrdgMetadata // {
description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words";
homepage = "https://www.edrdg.org/jmdict/j_jmdict.html";
};
}; };
RADKFILE = mkDerivation { radkfile = pkgs.callPackage ./nix/radkfile.nix {
name = "RADKFILE"; inherit radkfile-src edrdgMetadata;
src = RADKFILESrc;
dontUnpack = true;
buildPhase = ''
gzip -dkc $src > radkfile
iconv -f EUC-JP -t UTF-8 -o radkfile_utf8 radkfile
'';
installPhase = ''
mkdir $out
cp radkfile_utf8 $out
'';
meta = edrdgMetadata // {
description = "A file providing searchable decompositions of kanji characters";
homepage = "https://www.edrdg.org/krad/kradinf.html";
};
}; };
KANJIDIC2 = mkDerivation { kanjidic2 = pkgs.callPackage ./nix/kanjidic2.nix {
name = "KANJIDIC2"; inherit kanjidic2-src edrdgMetadata;
src = KANJIDIC2Src;
dontUnpack = true;
nativeBuildInputs = with pkgs; [ xmlformat ];
buildPhase = ''
gzip -dkc ${KANJIDIC2Src} > kanjidic2.xml
'';
# xmlformat -i kanjidic2.xml
installPhase = ''
mkdir $out
cp kanjidic2.xml $out
'';
meta = edrdgMetadata // {
# description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words";
# homepage = "https://www.edrdg.org/jmdict/j_jmdict.html";
};
}; };
database_generator = let database-tool = pkgs.callPackage ./nix/database_tool.nix {
buildDartPackage = nix-dart.builders.${system}.buildDartPackage.override { inherit nix-dart src;
dart = nix-dart.packages.${system}.dart-dev;
};
in (buildDartPackage {
pname = "database_generator";
version = "1.0";
src = builtins.filterSource (path: type: baseNameOf path != ".dart_tool") ./.;
preBuild = ''
dart --version
'';
specFile = ./pubspec.yaml;
lockFile = ./pub2nix.lock;
}).overrideAttrs(old: {
buildInputs = [nix-dart.packages.${system}.dart-dev];
buildPhase = builtins.replaceStrings ["pub"] ["dart pub"] old.buildPhase;
});
database = mkDerivation {
name = "database";
src = builtins.filterSource (path: type: baseNameOf path != dbName) ./.;
nativeBuildInputs = with pkgs; [
sqlite
self.packages.${system}.database_generator
];
buildPhase = ''
mkdir -p data
ln -s ${self.packages.${system}.JMdict}/* data
ln -s ${self.packages.${system}.RADKFILE}/* data
ln -s ${self.packages.${system}.KANJIDIC2}/* data
for migration in migrations/*.sql; do
sqlite3 ${dbName} < $migration
done
ja_db ${pkgs.sqlite.out}/lib/libsqlite3.so
'';
installPhase = ''
mkdir -p $out
cp ${dbName} $out/${dbName}
'';
}; };
docs = mkDerivation { database = pkgs.callPackage ./nix/database.nix {
name = "docs"; inherit (self.packages.${system}) database-tool jmdict radkfile kanjidic2;
src = self.packages.${system}.database; inherit src;
nativeBuildInputs = with pkgs; [
sqlite
schemaspy
sqlite-jdbc
];
buildPhase = let
properties = pkgs.writeText "sqlite.properties" ''
description=SQLite
driver=org.sqlite.JDBC
driverPath=${pkgs.sqlite-jdbc}/share/java/sqlite-jdbc-3.25.2.jar
connectionSpec=jdbc:sqlite:<db>
'';
args = pkgs.writeText "schemaspy.properties" ''
schemaspy.cat="%"
schemaspy.t=sqlite
schemaspy.sso=true
schemaspy.db=${dbName}
schemaspy.o=docs
schemaspy.s=schema.sql
'';
in ''
sqlite3 main.db ".schema" > schema.sql
cp ${args} ./schemaspy.properties
ls
schemaspy -t ${properties}
'';
installPhase = ''
cp -r docs $out
'';
}; };
};
hydraJobs = { docs = pkgs.callPackage ./nix/docs.nix {
inherit (self.packages.${system}) database docs; inherit (self.packages.${system}) database;
}; };
});
}; };
} }

43
nix/database.nix Normal file
View File

@ -0,0 +1,43 @@
{
lib,
stdenvNoCC,
src,
database-tool,
jmdict,
radkfile,
kanjidic2,
sqlite,
}:
stdenvNoCC.mkDerivation {
name = "jadb";
inherit src;
nativeBuildInputs = [
database-tool
sqlite
];
buildPhase = ''
runHook preBuild
mkdir -p data
ln -s ${jmdict}/* data
ln -s ${radkfile}/* data
ln -s ${kanjidic2}/* data
for migration in migrations/*.sql; do
sqlite3 jadb.sqlite < $migration
done
${lib.getExe database-tool} ${sqlite.out}/lib/libsqlite3.so
runHook postBuild
'';
installPhase = ''
runHook preInstall
install -Dm644 -t $out jadb.sqlite
runHook postInstall
'';
}

27
nix/database_tool.nix Normal file
View File

@ -0,0 +1,27 @@
{
nix-dart,
system,
src,
}:
let
buildDartPackage = nix-dart.builders.${system}.buildDartPackage.override {
dart = nix-dart.packages.${system}.dart-dev;
};
in (buildDartPackage {
pname = "database_tool";
version = "1.0";
inherit src;
preBuild = ''
dart --version
'';
specFile = ../pubspec.yaml;
lockFile = ../pub2nix.lock;
meta.mainProgram = "ja_db";
}).overrideAttrs(old: {
buildInputs = [nix-dart.packages.${system}.dart-dev];
buildPhase = builtins.replaceStrings ["pub"] ["dart pub"] old.buildPhase;
})

53
nix/docs.nix Normal file
View File

@ -0,0 +1,53 @@
{
stdenvNoCC,
database,
sqlite,
schemaspy,
sqlite-jdbc,
writeText,
}:
stdenvNoCC.mkDerivation {
name = "docs";
src = database;
nativeBuildInputs = [
sqlite
schemaspy
sqlite-jdbc
];
buildPhase = let
properties = writeText "sqlite.properties" ''
description=SQLite
driver=org.sqlite.JDBC
driverPath=${sqlite-jdbc}/share/java/sqlite-jdbc-3.25.2.jar
connectionSpec=jdbc:sqlite:<db>
'';
args = writeText "schemaspy.properties" ''
schemaspy.cat="%"
schemaspy.t=sqlite
schemaspy.sso=true
schemaspy.db=jadb.sqlite
schemaspy.o=docs
schemaspy.s=schema.sql
'';
in ''
runHook preBuild
sqlite3 jadb.sqlite ".schema" > schema.sql
cp ${args} ./schemaspy.properties
schemaspy -t ${properties}
runHook postBuild
'';
installPhase = ''
runHook preBuild
cp -r docs $out
runHook postBuild
'';
}

45
nix/jmdict.nix Normal file
View File

@ -0,0 +1,45 @@
{
stdenvNoCC,
jmdict-src,
jmdict-with-examples-src,
xmlformat,
gzip,
edrdgMetadata,
}:
stdenvNoCC.mkDerivation {
name = "jmdict";
srcs = [
jmdict-src
jmdict-with-examples-src
];
dontUnpack = true;
nativeBuildInputs = [
gzip
xmlformat
];
buildPhase = ''
runHook preBuild
gzip -dkc ${jmdict-src} > JMdict.xml
gzip -dkc ${jmdict-with-examples-src} > JMdict_with_examples.xml
xmlformat -i JMdict.xml
xmlformat -i JMdict_with_examples.xml
runHook postBuild
'';
installPhase = ''
runHook preInstall
install -Dt $out JMdict.xml JMdict_with_examples.xml
runHook postInstall
'';
meta = edrdgMetadata // {
description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words";
homepage = "https://www.edrdg.org/jmdict/j_jmdict.html";
};
}

39
nix/kanjidic2.nix Normal file
View File

@ -0,0 +1,39 @@
{
stdenvNoCC,
kanjidic2-src,
xmlformat,
gzip,
edrdgMetadata,
}:
stdenvNoCC.mkDerivation {
name = "kanjidic2";
src = kanjidic2-src;
dontUnpack = true;
nativeBuildInputs = [
gzip
xmlformat
];
buildPhase = ''
runHook preBuild
gzip -dkc ${kanjidic2-src} > kanjidic2.xml
# xmlformat -i kanjidic2.xml
runHook postBuild
'';
installPhase = ''
runHook preInstall
install -Dt $out kanjidic2.xml
runHook postInstall
'';
meta = edrdgMetadata // {
# description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words";
# homepage = "https://www.edrdg.org/jmdict/j_jmdict.html";
};
}

39
nix/radkfile.nix Normal file
View File

@ -0,0 +1,39 @@
{
stdenv,
radkfile-src,
gzip,
iconv,
edrdgMetadata,
}:
stdenv.mkDerivation {
name = "radkfile";
src = radkfile-src;
dontUnpack = true;
nativeBuildInputs = [
gzip
iconv
];
buildPhase = ''
runHook preBuild
gzip -dkc $src > radkfile
iconv -f EUC-JP -t UTF-8 -o radkfile_utf8 radkfile
runHook postBuild
'';
installPhase = ''
runHook preInstall
install -Dt $out radkfile_utf8
runHook postInstall
'';
meta = edrdgMetadata // {
description = "A file providing searchable decompositions of kanji characters";
homepage = "https://www.edrdg.org/krad/kradinf.html";
};
}