Compare commits

..

2 Commits

10 changed files with 366 additions and 247 deletions

View File

@ -1,5 +1,9 @@
# ja_db
# jadb
[![built with nix](https://builtwithnix.org/badge.svg)](https://builtwithnix.org)
An SQLite database containing open source japanese language translation data combined from several sources
- **JMDict:** https://edrdg.org/jmdict/j_jmdict.html
- **RADKFILE/KRADFILE:** https://www.edrdg.org/krad/kradinf.html
- **KANJIDIC2:** https://www.edrdg.org/kanjidic/kanjd2index_legacy.html

View File

@ -12,7 +12,7 @@ Future<void> main(List<String> arguments) async {
final db = await createDatabaseFactoryFfi(ffiInit: () {
open.overrideForAll(() => DynamicLibrary.open(arguments[0]));
})
.openDatabase(Directory.current.uri.resolve('main.db').path);
.openDatabase(Directory.current.uri.resolve('jadb.sqlite').path);
await addDataFromJMdict(db);
await addDataFromRADKFILE(db);
await addDataFromKANJIDIC(db);

View File

@ -1,9 +1,24 @@
{
"nodes": {
"JMdictSrc": {
"flake-utils": {
"locked": {
"lastModified": 1649676176,
"narHash": "sha256-OWKJratjt2RW151VUlJPRALb7OU2S5s+f0vLj4o1bHM=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "a4b154ebbdc88c8498a5c7b01589addc9e9cb678",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"jmdict-src": {
"flake": false,
"locked": {
"narHash": "sha256-7hGfayIXi8mH80025s7Pgqk7jwOvZlQMHsxHv0sq5ek=",
"narHash": "sha256-997+CJDljSggj2Kh9+1Xd+yvLJUkleQ7b5iuYIEwswc=",
"type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz"
},
@ -12,10 +27,10 @@
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz"
}
},
"JMdictWithExamplesSrc": {
"jmdict-with-examples-src": {
"flake": false,
"locked": {
"narHash": "sha256-OtxCfNaIbRVBuztTiNim7xjxGDFrV9MCPpymLLYBMnk=",
"narHash": "sha256-6GU10LW5XO76ptXC+DNBRusuFI1E1i8Sda63sTXCsFU=",
"type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
},
@ -24,7 +39,7 @@
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
}
},
"KANJIDIC2Src": {
"kanjidic2-src": {
"flake": false,
"locked": {
"narHash": "sha256-BaVQaAEisPC60ohz8Gyr5r0Fe4Qrjupj6VKQqMvq9Eo=",
@ -36,38 +51,9 @@
"url": "http://nihongo.monash.edu/kanjidic2/kanjidic2.xml.gz"
}
},
"RADKFILESrc": {
"flake": false,
"locked": {
"narHash": "sha256-rO2z5GPt3g6osZOlpyWysmIbRV2Gw4AR4XvngVTHNpk=",
"type": "file",
"url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"
},
"original": {
"type": "file",
"url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"
}
},
"flake-utils": {
"locked": {
"lastModified": 1659877975,
"narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nix-dart": {
"inputs": {
"flake-utils": [
"flake-utils"
],
"flake-utils": "flake-utils",
"nixpkgs": [
"nixpkgs"
],
@ -89,16 +75,16 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1661187878,
"narHash": "sha256-/wCqoQB1BsaVi4nb8Iz0PreeBNMTim0p78NLtyWejFE=",
"lastModified": 1731386116,
"narHash": "sha256-lKA770aUmjPHdTaJWnP3yQ9OI1TigenUqVC3wweqZuI=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "52527082ea267fe486f0648582d57c85486b2031",
"rev": "689fed12a013f56d4c4d3f612489634267d86529",
"type": "github"
},
"original": {
"id": "nixpkgs",
"ref": "nixos-22.05",
"ref": "nixos-24.05",
"type": "indirect"
}
},
@ -118,15 +104,26 @@
"type": "github"
}
},
"radkfile-src": {
"flake": false,
"locked": {
"narHash": "sha256-rO2z5GPt3g6osZOlpyWysmIbRV2Gw4AR4XvngVTHNpk=",
"type": "file",
"url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"
},
"original": {
"type": "file",
"url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"
}
},
"root": {
"inputs": {
"JMdictSrc": "JMdictSrc",
"JMdictWithExamplesSrc": "JMdictWithExamplesSrc",
"KANJIDIC2Src": "KANJIDIC2Src",
"RADKFILESrc": "RADKFILESrc",
"flake-utils": "flake-utils",
"jmdict-src": "jmdict-src",
"jmdict-with-examples-src": "jmdict-with-examples-src",
"kanjidic2-src": "kanjidic2-src",
"nix-dart": "nix-dart",
"nixpkgs": "nixpkgs"
"nixpkgs": "nixpkgs",
"radkfile-src": "radkfile-src"
}
}
},

272
flake.nix
View File

@ -2,37 +2,29 @@
description = "A SQLite database containing open source japanese language translation data";
inputs = {
nixpkgs.url = "nixpkgs/nixos-22.05";
flake-utils = {
url = "github:numtide/flake-utils";
inputs.nixpkgs.follows = "nixpkgs";
};
nixpkgs.url = "nixpkgs/nixos-24.05";
nix-dart = {
url = "github:tadfisher/nix-dart";
inputs = {
nixpkgs.follows = "nixpkgs";
flake-utils.follows = "flake-utils";
};
inputs.nixpkgs.follows = "nixpkgs";
};
JMdictSrc = {
jmdict-src = {
url = "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz";
flake = false;
};
JMdictWithExamplesSrc = {
jmdict-with-examples-src = {
url = "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz";
flake = false;
};
RADKFILESrc = {
radkfile-src = {
url = "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz";
flake = false;
};
KANJIDIC2Src = {
kanjidic2-src = {
url = "http://nihongo.monash.edu/kanjidic2/kanjidic2.xml.gz";
flake = false;
};
@ -42,216 +34,96 @@
outputs = {
self,
nixpkgs,
flake-utils,
nix-dart,
JMdictSrc,
JMdictWithExamplesSrc,
RADKFILESrc,
KANJIDIC2Src
jmdict-src,
jmdict-with-examples-src,
radkfile-src,
kanjidic2-src
}: let
system = "x86_64-linux";
pkgs = import nixpkgs {
inherit system;
overlays = [
# (final: prev: { dart = nix-dart.packages.${system}.dart; })
nix-dart.overlay
];
};
inherit (pkgs) lib;
inherit (nixpkgs) lib;
systems = [
"x86_64-linux"
"aarch64-linux"
"x86_64-darwin"
"aarch64-darwin"
"armv7l-linux"
];
forAllSystems = f: lib.genAttrs systems (system: let
pkgs = import nixpkgs {
inherit system;
overlays = [
nix-dart.overlay
];
};
in f system pkgs);
in {
devShell.${system} = pkgs.mkShell {
buildInputs = with pkgs; [
nix-dart.packages.${system}.pub2nix-lock
dart
gnumake
sqlite
sqlite-web
sqlint
sqlfluff
];
};
apps = forAllSystems (system: pkgs: {
default = let
inherit (self.packages.${system}) docs;
in {
type = "app";
program = "${pkgs.writeShellScript "host-docs" ''
${pkgs.python3} -m http.server -d ${docs}
''}";
};
});
defaultPackage.${system} = self.packages.${system}.database;
devShells = forAllSystems (system: pkgs: {
default = pkgs.mkShell {
buildInputs = with pkgs; [
nix-dart.packages.${system}.pub2nix-lock
dart
gnumake
sqlite-interactive
sqlite-web
sqlint
sqlfluff
];
};
});
packages.${system} = let
inherit (pkgs.stdenv) mkDerivation;
dbName = "main.db";
# defaultPackage.${system} = self.packages.${system}.database;
packages = let
edrdgMetadata = {
license = {
license = [{
shortName = "EDRDG";
fullName = "Electronic Dictionary Research and Development Group General Dictionary Licence";
url = "http://www.csse.monash.edu.au/~jwb/edrdg/licence.html";
};
maintainers = [ "h7x4 <h7x4@nani.wtf>" ];
}];
maintainers = [ lib.maintainers.h7x4 ];
platforms = lib.platforms.all;
};
in {
JMdict = mkDerivation {
name = "JMdict";
srcs = [
JMdictSrc
JMdictWithExamplesSrc
];
dontUnpack = true;
src = lib.cleanSource ./.;
in forAllSystems (system: pkgs: {
default = self.packages.${system}.database;
nativeBuildInputs = with pkgs; [ xmlformat ];
buildPhase = ''
gzip -dkc ${JMdictSrc} > JMdict.xml
gzip -dkc ${JMdictWithExamplesSrc} > JMdict_with_examples.xml
xmlformat -i JMdict.xml
xmlformat -i JMdict_with_examples.xml
'';
installPhase = ''
mkdir $out
cp JMdict.xml $out
cp JMdict_with_examples.xml $out
'';
meta = edrdgMetadata // {
description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words";
homepage = "https://www.edrdg.org/jmdict/j_jmdict.html";
};
jmdict = pkgs.callPackage ./nix/jmdict.nix {
inherit jmdict-src jmdict-with-examples-src edrdgMetadata;
};
RADKFILE = mkDerivation {
name = "RADKFILE";
src = RADKFILESrc;
dontUnpack = true;
buildPhase = ''
gzip -dkc $src > radkfile
iconv -f EUC-JP -t UTF-8 -o radkfile_utf8 radkfile
'';
installPhase = ''
mkdir $out
cp radkfile_utf8 $out
'';
meta = edrdgMetadata // {
description = "A file providing searchable decompositions of kanji characters";
homepage = "https://www.edrdg.org/krad/kradinf.html";
};
radkfile = pkgs.callPackage ./nix/radkfile.nix {
inherit radkfile-src edrdgMetadata;
};
KANJIDIC2 = mkDerivation {
name = "KANJIDIC2";
src = KANJIDIC2Src;
dontUnpack = true;
nativeBuildInputs = with pkgs; [ xmlformat ];
buildPhase = ''
gzip -dkc ${KANJIDIC2Src} > kanjidic2.xml
'';
# xmlformat -i kanjidic2.xml
installPhase = ''
mkdir $out
cp kanjidic2.xml $out
'';
meta = edrdgMetadata // {
# description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words";
# homepage = "https://www.edrdg.org/jmdict/j_jmdict.html";
};
kanjidic2 = pkgs.callPackage ./nix/kanjidic2.nix {
inherit kanjidic2-src edrdgMetadata;
};
database_generator = let
buildDartPackage = nix-dart.builders.${system}.buildDartPackage.override {
dart = nix-dart.packages.${system}.dart-dev;
};
in (buildDartPackage {
pname = "database_generator";
version = "1.0";
src = builtins.filterSource (path: type: baseNameOf path != ".dart_tool") ./.;
preBuild = ''
dart --version
'';
specFile = ./pubspec.yaml;
lockFile = ./pub2nix.lock;
}).overrideAttrs(old: {
buildInputs = [nix-dart.packages.${system}.dart-dev];
buildPhase = builtins.replaceStrings ["pub"] ["dart pub"] old.buildPhase;
});
database = mkDerivation {
name = "database";
src = builtins.filterSource (path: type: baseNameOf path != dbName) ./.;
nativeBuildInputs = with pkgs; [
sqlite
self.packages.${system}.database_generator
];
buildPhase = ''
mkdir -p data
ln -s ${self.packages.${system}.JMdict}/* data
ln -s ${self.packages.${system}.RADKFILE}/* data
ln -s ${self.packages.${system}.KANJIDIC2}/* data
for migration in migrations/*.sql; do
sqlite3 ${dbName} < $migration
done
ja_db ${pkgs.sqlite.out}/lib/libsqlite3.so
'';
installPhase = ''
mkdir -p $out
cp ${dbName} $out/${dbName}
'';
database-tool = pkgs.callPackage ./nix/database_tool.nix {
inherit nix-dart src;
};
docs = mkDerivation {
name = "docs";
src = self.packages.${system}.database;
nativeBuildInputs = with pkgs; [
sqlite
schemaspy
sqlite-jdbc
];
buildPhase = let
properties = pkgs.writeText "sqlite.properties" ''
description=SQLite
driver=org.sqlite.JDBC
driverPath=${pkgs.sqlite-jdbc}/share/java/sqlite-jdbc-3.25.2.jar
connectionSpec=jdbc:sqlite:<db>
'';
args = pkgs.writeText "schemaspy.properties" ''
schemaspy.cat="%"
schemaspy.t=sqlite
schemaspy.sso=true
schemaspy.db=${dbName}
schemaspy.o=docs
schemaspy.s=schema.sql
'';
in ''
sqlite3 main.db ".schema" > schema.sql
cp ${args} ./schemaspy.properties
ls
schemaspy -t ${properties}
'';
installPhase = ''
cp -r docs $out
'';
database = pkgs.callPackage ./nix/database.nix {
inherit (self.packages.${system}) database-tool jmdict radkfile kanjidic2;
inherit src;
};
};
hydraJobs = {
inherit (self.packages.${system}) database docs;
};
docs = pkgs.callPackage ./nix/docs.nix {
inherit (self.packages.${system}) database;
};
});
};
}

43
nix/database.nix Normal file
View File

@ -0,0 +1,43 @@
{
lib,
stdenvNoCC,
src,
database-tool,
jmdict,
radkfile,
kanjidic2,
sqlite,
}:
stdenvNoCC.mkDerivation {
name = "jadb";
inherit src;
nativeBuildInputs = [
database-tool
sqlite
];
buildPhase = ''
runHook preBuild
mkdir -p data
ln -s ${jmdict}/* data
ln -s ${radkfile}/* data
ln -s ${kanjidic2}/* data
for migration in migrations/*.sql; do
sqlite3 jadb.sqlite < $migration
done
${lib.getExe database-tool} ${sqlite.out}/lib/libsqlite3.so
runHook postBuild
'';
installPhase = ''
runHook preInstall
install -Dm644 -t $out jadb.sqlite
runHook postInstall
'';
}

27
nix/database_tool.nix Normal file
View File

@ -0,0 +1,27 @@
{
nix-dart,
system,
src,
}:
let
buildDartPackage = nix-dart.builders.${system}.buildDartPackage.override {
dart = nix-dart.packages.${system}.dart-dev;
};
in (buildDartPackage {
pname = "database_tool";
version = "1.0";
inherit src;
preBuild = ''
dart --version
'';
specFile = ../pubspec.yaml;
lockFile = ../pub2nix.lock;
meta.mainProgram = "ja_db";
}).overrideAttrs(old: {
buildInputs = [nix-dart.packages.${system}.dart-dev];
buildPhase = builtins.replaceStrings ["pub"] ["dart pub"] old.buildPhase;
})

53
nix/docs.nix Normal file
View File

@ -0,0 +1,53 @@
{
stdenvNoCC,
database,
sqlite,
schemaspy,
sqlite-jdbc,
writeText,
}:
stdenvNoCC.mkDerivation {
name = "docs";
src = database;
nativeBuildInputs = [
sqlite
schemaspy
sqlite-jdbc
];
buildPhase = let
properties = writeText "sqlite.properties" ''
description=SQLite
driver=org.sqlite.JDBC
driverPath=${sqlite-jdbc}/share/java/sqlite-jdbc-3.25.2.jar
connectionSpec=jdbc:sqlite:<db>
'';
args = writeText "schemaspy.properties" ''
schemaspy.cat="%"
schemaspy.t=sqlite
schemaspy.sso=true
schemaspy.db=jadb.sqlite
schemaspy.o=docs
schemaspy.s=schema.sql
'';
in ''
runHook preBuild
sqlite3 jadb.sqlite ".schema" > schema.sql
cp ${args} ./schemaspy.properties
schemaspy -t ${properties}
runHook postBuild
'';
installPhase = ''
runHook preBuild
cp -r docs $out
runHook postBuild
'';
}

45
nix/jmdict.nix Normal file
View File

@ -0,0 +1,45 @@
{
stdenvNoCC,
jmdict-src,
jmdict-with-examples-src,
xmlformat,
gzip,
edrdgMetadata,
}:
stdenvNoCC.mkDerivation {
name = "jmdict";
srcs = [
jmdict-src
jmdict-with-examples-src
];
dontUnpack = true;
nativeBuildInputs = [
gzip
xmlformat
];
buildPhase = ''
runHook preBuild
gzip -dkc ${jmdict-src} > JMdict.xml
gzip -dkc ${jmdict-with-examples-src} > JMdict_with_examples.xml
xmlformat -i JMdict.xml
xmlformat -i JMdict_with_examples.xml
runHook postBuild
'';
installPhase = ''
runHook preInstall
install -Dt $out JMdict.xml JMdict_with_examples.xml
runHook postInstall
'';
meta = edrdgMetadata // {
description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words";
homepage = "https://www.edrdg.org/jmdict/j_jmdict.html";
};
}

39
nix/kanjidic2.nix Normal file
View File

@ -0,0 +1,39 @@
{
stdenvNoCC,
kanjidic2-src,
xmlformat,
gzip,
edrdgMetadata,
}:
stdenvNoCC.mkDerivation {
name = "kanjidic2";
src = kanjidic2-src;
dontUnpack = true;
nativeBuildInputs = [
gzip
xmlformat
];
buildPhase = ''
runHook preBuild
gzip -dkc ${kanjidic2-src} > kanjidic2.xml
# xmlformat -i kanjidic2.xml
runHook postBuild
'';
installPhase = ''
runHook preInstall
install -Dt $out kanjidic2.xml
runHook postInstall
'';
meta = edrdgMetadata // {
# description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words";
# homepage = "https://www.edrdg.org/jmdict/j_jmdict.html";
};
}

39
nix/radkfile.nix Normal file
View File

@ -0,0 +1,39 @@
{
stdenv,
radkfile-src,
gzip,
iconv,
edrdgMetadata,
}:
stdenv.mkDerivation {
name = "radkfile";
src = radkfile-src;
dontUnpack = true;
nativeBuildInputs = [
gzip
iconv
];
buildPhase = ''
runHook preBuild
gzip -dkc $src > radkfile
iconv -f EUC-JP -t UTF-8 -o radkfile_utf8 radkfile
runHook postBuild
'';
installPhase = ''
runHook preInstall
install -Dt $out radkfile_utf8
runHook postInstall
'';
meta = edrdgMetadata // {
description = "A file providing searchable decompositions of kanji characters";
homepage = "https://www.edrdg.org/krad/kradinf.html";
};
}