2 Commits

Author SHA1 Message Date
oysteikt e989655370 search/word_search: score matching kana higher than converted kana
Build and test / build (push) Failing after 3m9s
2026-06-04 02:39:12 +09:00
oysteikt ef491a0977 Add kana independent search with tamerye extension 2026-06-04 02:38:15 +09:00
12 changed files with 120 additions and 188 deletions
Generated
+77 -11
View File
@@ -1,5 +1,20 @@
{
"nodes": {
"crane": {
"locked": {
"lastModified": 1780099841,
"narHash": "sha256-EVZd2RsbpreRUDSi9rBwPY+ZxoyMaiEBbZxxhljbaS4=",
"owner": "ipetkov",
"repo": "crane",
"rev": "0532eb17955225173906d671fb36306bdeb1e2dc",
"type": "github"
},
"original": {
"owner": "ipetkov",
"repo": "crane",
"type": "github"
}
},
"datasources": {
"inputs": {
"nixpkgs": [
@@ -20,20 +35,26 @@
"url": "https://git.pvv.ntnu.no/Mugiten/datasources.git"
}
},
"kanjivg-src": {
"flake": false,
"nix-sqlite": {
"inputs": {
"nixpkgs": [
"tamerye",
"nixpkgs"
]
},
"locked": {
"lastModified": 1775218066,
"narHash": "sha256-iYv9xakgoGt/JwwdKDUCpSAF36hBtKlX9oN7xiLowjs=",
"ref": "refs/heads/master",
"rev": "544d319f79348c092d567b662f27f33dacfa60cd",
"revCount": 2215,
"lastModified": 1780224621,
"narHash": "sha256-mGAHKHEzh+J83GzYCyuM6xdfBPFYTBjNAmDmbHKRZ5U=",
"ref": "main",
"rev": "8afa09b948ec5ccbe488c21d14d458060720d313",
"revCount": 28,
"type": "git",
"url": "https://git.pvv.ntnu.no/mugiten/kanjivg.git"
"url": "https://git.pvv.ntnu.no/mugiten/nix-custom-sqlite.git"
},
"original": {
"ref": "main",
"type": "git",
"url": "https://git.pvv.ntnu.no/mugiten/kanjivg.git"
"url": "https://git.pvv.ntnu.no/mugiten/nix-custom-sqlite.git"
}
},
"nixpkgs": {
@@ -54,8 +75,53 @@
"root": {
"inputs": {
"datasources": "datasources",
"kanjivg-src": "kanjivg-src",
"nixpkgs": "nixpkgs"
"nixpkgs": "nixpkgs",
"tamerye": "tamerye"
}
},
"rust-overlay": {
"inputs": {
"nixpkgs": [
"tamerye",
"nixpkgs"
]
},
"locked": {
"lastModified": 1780197589,
"narHash": "sha256-FVCr2Ij/jKf59a4LW481eeOF6rJRreOBrVgW/aUBTrw=",
"owner": "oxalica",
"repo": "rust-overlay",
"rev": "21632e942d89bf1cce4e5a63d7e58a215a0cbfcc",
"type": "github"
},
"original": {
"owner": "oxalica",
"repo": "rust-overlay",
"type": "github"
}
},
"tamerye": {
"inputs": {
"crane": "crane",
"nix-sqlite": "nix-sqlite",
"nixpkgs": [
"nixpkgs"
],
"rust-overlay": "rust-overlay"
},
"locked": {
"lastModified": 1780505561,
"narHash": "sha256-534k5H8k6GcFUGy7ENsOvuZ2Lf+6WcOf3vsqfrsnNlo=",
"ref": "main",
"rev": "f9789bb582218218e29a7ac674c5edf0d3609a5d",
"revCount": 17,
"type": "git",
"url": "https://git.pvv.ntnu.no/Mugiten/tamerye.git"
},
"original": {
"ref": "main",
"type": "git",
"url": "https://git.pvv.ntnu.no/Mugiten/tamerye.git"
}
}
},
+22 -12
View File
@@ -4,22 +4,22 @@
inputs = {
nixpkgs.url = "nixpkgs/nixos-unstable";
datasources = {
url = "git+https://git.pvv.ntnu.no/Mugiten/datasources.git";
tamerye = {
url = "git+https://git.pvv.ntnu.no/Mugiten/tamerye.git?ref=main";
inputs.nixpkgs.follows = "nixpkgs";
};
kanjivg-src = {
url = "git+https://git.pvv.ntnu.no/mugiten/kanjivg.git";
flake = false;
datasources = {
url = "git+https://git.pvv.ntnu.no/Mugiten/datasources.git";
inputs.nixpkgs.follows = "nixpkgs";
};
};
outputs = {
self,
nixpkgs,
tamerye,
datasources,
kanjivg-src,
}: let
inherit (nixpkgs) lib;
systems = [
@@ -30,7 +30,14 @@
"armv7l-linux"
];
forAllSystems = f: lib.genAttrs systems (system: f system nixpkgs.legacyPackages.${system});
forAllSystems = f: lib.genAttrs systems (system: let
pkgs = import nixpkgs {
inherit system;
overlays = [
tamerye.overlays.default
];
};
in f system pkgs);
in {
apps = forAllSystems (system: pkgs: {
default = {
@@ -41,7 +48,7 @@
runtimeEnv = {
JADB_PATH = "${self.packages.${system}.database}/jadb.sqlite";
LIBSQLITE_PATH = "${pkgs.sqlite.out}/lib/libsqlite3.so";
LIBSQLITE_PATH = "${pkgs.tamerye-sqlite}/lib/libsqlite3.so";
};
text = ''
@@ -69,18 +76,18 @@
gnumake
lcov
sqldiff
sqlite-interactive
tamerye-sqlite-cli
];
env = {
LIBSQLITE_PATH = "${pkgs.sqlite.out}/lib/libsqlite3.so";
LIBSQLITE_PATH = "${pkgs.tamerye-sqlite}/lib/libsqlite3.so";
JADB_PATH = "result/jadb.sqlite";
LD_LIBRARY_PATH = lib.makeLibraryPath [ pkgs.sqlite ];
LD_LIBRARY_PATH = lib.makeLibraryPath [ pkgs.tamerye-sqlite ];
};
};
sqlite-debugging = pkgs.mkShell {
packages = with pkgs; [
sqlite-interactive
tamerye-sqlite-cli
sqlite-analyzer
sqlite-web
# sqlint
@@ -125,16 +132,19 @@
inherit (datasources.packages.${system}) jmdict radkfile kanjidic2;
database-tool = pkgs.callPackage ./nix/database_tool.nix {
sqlite = pkgs.tamerye-sqlite;
inherit src;
};
database = pkgs.callPackage ./nix/database.nix {
sqlite = pkgs.tamerye-sqlite-cli;
inherit (datasources.packages.${system}) jmdict radkfile kanjidic2 tanos-jlpt;
inherit (self.packages.${system}) database-tool;
inherit src;
};
database-wal = pkgs.callPackage ./nix/database.nix {
sqlite = pkgs.tamerye-sqlite-cli;
inherit (datasources.packages.${system}) jmdict radkfile kanjidic2 tanos-jlpt;
inherit (self.packages.${system}) database-tool;
inherit src;
-92
View File
@@ -1,92 +0,0 @@
import 'package:jadb/_data_ingestion/sql_writable.dart';
/// Enum set in the kvg:position attribute, used by `<g>` elements in the KanjiVG SVG files.
enum KanjiPathGroupPosition {
bottom,
kamae,
kamaec,
left,
middle,
nyo,
nyoc,
right,
tare,
tarec,
top,
}
/// Contents of a \<g> element in the KanjiVG SVG files.
class KanjiPathGroupTreeNode extends SQLWritable {
final String id;
final List<KanjiPathGroupTreeNode> children;
final String? element;
final String? original;
final KanjiPathGroupPosition? position;
final String? radical;
final int? part;
KanjiPathGroupTreeNode({
required this.id,
this.children = const [],
this.element,
this.original,
this.position,
this.radical,
this.part,
});
@override
Map<String, Object?> get sqlValue => {
'id': id,
'element': element,
'original': original,
'position': position?.name,
'radical': radical,
'part': part,
};
}
/// Contents of a `<text>` element in the StrokeNumber's group in the KanjiVG SVG files
class KanjiStrokeNumber extends SQLWritable {
final int num;
final double x;
final double y;
KanjiStrokeNumber(this.num, this.x, this.y);
@override
Map<String, Object?> get sqlValue => {'num': num, 'x': x, 'y': y};
}
/// Contents of a `<path>` element in the KanjiVG SVG files
class KanjiVGPath extends SQLWritable {
final String id;
final String type;
final String svgPath;
KanjiVGPath({required this.id, required this.type, required this.svgPath});
@override
Map<String, Object?> get sqlValue => {
'id': id,
'type': type,
'svgPath': svgPath,
};
}
class KanjiVGItem extends SQLWritable {
final String character;
final List<KanjiVGPath> paths;
final List<KanjiStrokeNumber> strokeNumbers;
final List<KanjiPathGroupTreeNode> pathGroups;
KanjiVGItem({
required this.character,
required this.paths,
required this.strokeNumbers,
required this.pathGroups,
});
@override
Map<String, Object?> get sqlValue => {'character': character};
}
@@ -1,7 +0,0 @@
import 'package:sqflite_common/sqflite.dart';
Future<void> seedKanjiVGData(Iterable<String> xmlContents, Database db) async {
final b = db.batch();
await b.commit(noResult: true);
}
-2
View File
@@ -1,6 +1,5 @@
import 'package:jadb/table_names/jmdict.dart';
import 'package:jadb/table_names/kanjidic.dart';
import 'package:jadb/table_names/kanjivg.dart';
import 'package:jadb/table_names/radkfile.dart';
import 'package:jadb/table_names/tanos_jlpt.dart';
import 'package:sqflite_common/sqlite_api.dart';
@@ -22,7 +21,6 @@ Future<void> verifyTablesWithDbConnection(DatabaseExecutor db) async {
...KANJIDICTableNames.allTables,
...RADKFILETableNames.allTables,
...TanosJLPTTableNames.allTables,
...KanjiVGTableNames.allTables,
};
final missingTables = expectedTables.difference(tables);
+6 -5
View File
@@ -67,25 +67,28 @@ String _filterFTSSensitiveCharacters(String word) {
SELECT DISTINCT
"$tableName"."entryId",
100
+ (("${tableName}FTS"."reading" = ?) * 10000)
+ (("$tableName"."reading" = ?1) * 100000)
+ (("${tableName}FTS"."reading" = normalize_jp(?1)) * 10000)
+ (("$tableName"."reading" LIKE ?1 || '%') * 20)
+ (("$tableName"."orderNum" = 0) * 20)
+ COALESCE("JMdict_EntryScore"."score", 0)
AS "score"
FROM "${tableName}FTS"
JOIN "$tableName" USING ("elementId")
LEFT JOIN "JMdict_EntryScore" USING ("elementId")
WHERE "${tableName}FTS"."reading" MATCH ? || '*'
WHERE "${tableName}FTS"."reading" MATCH normalize_jp(?1) || '*'
),
non_fts_results AS (
SELECT DISTINCT
"$tableName"."entryId",
50
+ (("$tableName"."reading" LIKE '%' || ?1 || '%') * 20)
+ (("$tableName"."orderNum" = 0) * 20)
+ COALESCE("JMdict_EntryScore"."score", 0)
AS "score"
FROM "$tableName"
LEFT JOIN "JMdict_EntryScore" USING ("elementId")
WHERE "reading" LIKE '%' || ? || '%'
WHERE "$tableName"."reading" LIKE '%' || normalize_jp(?1) || '%'
AND "$tableName"."entryId" NOT IN (SELECT "entryId" FROM "fts_results")
)
@@ -102,8 +105,6 @@ String _filterFTSSensitiveCharacters(String word) {
'''
.trim(),
[
_filterFTSSensitiveCharacters(word),
_filterFTSSensitiveCharacters(word),
_filterFTSSensitiveCharacters(word),
?pageSize,
?offset,
-9
View File
@@ -1,9 +0,0 @@
abstract class KanjiVGTableNames {
static const String version = 'KanjiVG_Version';
static const String entry = 'KanjiVG_Entry';
static const String path = 'KanjiVG_Path';
static const String strokeNumber = 'KanjiVG_StrokeNumber';
static const String pathGroup = 'KanjiVG_PathGroup';
static Set<String> get allTables => {version, entry, path, strokeNumber, pathGroup};
}
+1 -1
View File
@@ -1 +1 @@
const int jadbSchemaVersion = 1;
const int jadbSchemaVersion = 2;
+2
View File
@@ -66,6 +66,7 @@ CREATE TABLE "JMdict_KanjiElement" (
) WITHOUT ROWID;
CREATE INDEX "JMdict_KanjiElement_byReading" ON "JMdict_KanjiElement"("reading");
CREATE INDEX "JMdict_KanjiElement_byNormalizedReading" ON "JMdict_KanjiElement"(normalize_jp("reading"));
CREATE TABLE "JMdict_KanjiElementInfo" (
"elementId" INTEGER NOT NULL REFERENCES "JMdict_KanjiElement"("elementId"),
@@ -91,6 +92,7 @@ CREATE TABLE "JMdict_ReadingElement" (
) WITHOUT ROWID;
CREATE INDEX "JMdict_ReadingElement_byReading" ON "JMdict_ReadingElement"("reading");
CREATE INDEX "JMdict_ReadingElement_byNormalizedReading" ON "JMdict_ReadingElement"(normalize_jp("reading"));
CREATE TABLE "JMdict_ReadingElementRestriction" (
"elementId" INTEGER NOT NULL REFERENCES "JMdict_ReadingElement"("elementId"),
+4 -4
View File
@@ -4,7 +4,7 @@ CREATE TRIGGER "JMdict_KanjiElement_InsertFTS"
AFTER INSERT ON "JMdict_KanjiElement"
BEGIN
INSERT INTO "JMdict_KanjiElementFTS"("elementId", "reading")
VALUES (NEW."elementId", NEW."reading");
VALUES (NEW."elementId", normalize_jp(NEW."reading"));
END;
CREATE TRIGGER "JMdict_KanjiElement_UpdateFTS"
@@ -14,7 +14,7 @@ BEGIN
UPDATE "JMdict_KanjiElementFTS"
SET
"elementId" = NEW."elementId",
"reading" = NEW."reading"
"reading" = normalize_jp(NEW."reading")
WHERE "elementId" = OLD."elementId";
END;
@@ -33,7 +33,7 @@ CREATE TRIGGER "JMdict_ReadingElement_InsertFTS"
AFTER INSERT ON "JMdict_ReadingElement"
BEGIN
INSERT INTO "JMdict_ReadingElementFTS"("elementId", "reading")
VALUES (NEW."elementId", NEW."reading");
VALUES (NEW."elementId", normalize_jp(NEW."reading"));
END;
CREATE TRIGGER "JMdict_ReadingElement_UpdateFTS"
@@ -43,7 +43,7 @@ BEGIN
UPDATE "JMdict_ReadingElementFTS"
SET
"elementId" = NEW."elementId",
"reading" = NEW."reading"
"reading" = normalize_jp(NEW."reading")
WHERE "elementId" = OLD."elementId";
END;
-45
View File
@@ -1,45 +0,0 @@
CREATE TABLE "KanjiVG_Version" (
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
"date" DATE NOT NULL,
"hash" VARCHAR(64) NOT NULL
) WITHOUT ROWID;
CREATE TRIGGER "KanjiVG_Version_SingleRow"
BEFORE INSERT ON "KanjiVG_Version"
WHEN (SELECT COUNT(*) FROM "KanjiVG_Version") >= 1
BEGIN
SELECT RAISE(FAIL, 'Only one row allowed in KanjiVG_Version');
END;
CREATE TABLE "KanjiVG_Entry" (
"character" CHAR(1) PRIMARY KEY NOT NULL
) WITHOUT ROWID;
CREATE TABLE "KanjiVG_StrokeNumber" (
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
"strokeNum" INTEGER NOT NULL,
"x" REAL NOT NULL,
"y" REAL NOT NULL,
PRIMARY KEY ("character", "strokeNum")
) WITHOUT ROWID;
CREATE TABLE "KanjiVG_Path" (
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
"pathId" TEXT NOT NULL,
"type" VARCHAR(10) NOT NULL,
"svgPath" TEXT NOT NULL,
PRIMARY KEY ("character", "pathId")
) WITHOUT ROWID;
CREATE TABLE "KanjiVG_PathGroup" (
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
"groupId" TEXT NOT NULL,
"parentGroupId" TEXT REFERENCES "KanjiVG_PathGroup"("groupId"),
"element" TEXT,
"original" TEXT,
"position" VARCHAR(10),
"radical" TEXT,
"part" INTEGER,
PRIMARY KEY ("character", "groupId"),
CHECK ("position" IN ('bottom', 'kamae', 'kamaec', 'left', 'middle', 'nyo', 'nyoc', 'right', 'tare', 'tarec', 'top') OR "position" IS NULL)
) WITHOUT ROWID;
+8
View File
@@ -1,6 +1,10 @@
{
src,
buildDartApplication,
sqlite,
callPackage,
path,
}:
buildDartApplication {
pname = "jadb-database-tool";
@@ -32,5 +36,9 @@ buildDartApplication {
autoPubspecLock = ../pubspec.lock;
customSourceBuilders.sqlite3 = callPackage "${path}/pkgs/development/compilers/dart/package-source-builders/sqlite3/default.nix" {
inherit sqlite;
};
meta.mainProgram = "jadb";
}