From 4f320e4ea9f0426d66c8309afe947fe1832563fc Mon Sep 17 00:00:00 2001 From: h7x4 Date: Wed, 8 Apr 2026 16:27:19 +0900 Subject: [PATCH] jmdict: embed `orderNum` in `elementId` for kanji and readings --- lib/_data_ingestion/jmdict/objects.dart | 13 ++++++------ lib/_data_ingestion/jmdict/seed_data.dart | 13 +++++------- lib/_data_ingestion/jmdict/xml_parser.dart | 20 ++++++++++++++++--- lib/search/word_search/data_query.dart | 4 ++-- migrations/0001_JMDict.sql | 12 ++++------- .../0005_JMDict_search_index_tables.sql | 8 ++++---- migrations/0010_Views.sql | 4 ++-- 7 files changed, 40 insertions(+), 34 deletions(-) diff --git a/lib/_data_ingestion/jmdict/objects.dart b/lib/_data_ingestion/jmdict/objects.dart index ea75d10..1db4c5b 100644 --- a/lib/_data_ingestion/jmdict/objects.dart +++ b/lib/_data_ingestion/jmdict/objects.dart @@ -1,13 +1,15 @@ import 'package:jadb/_data_ingestion/sql_writable.dart'; abstract class Element extends SQLWritable { + final int elementId; final String reading; final int? news; final int? ichi; final int? spec; final int? gai; final int? nf; - const Element({ + Element({ + required this.elementId, required this.reading, this.news, this.ichi, @@ -18,6 +20,7 @@ abstract class Element extends SQLWritable { @override Map get sqlValue => { + 'elementId': elementId, 'reading': reading, 'news': news, 'ichi': ichi, @@ -28,12 +31,11 @@ abstract class Element extends SQLWritable { } class KanjiElement extends Element { - int orderNum; List info; KanjiElement({ this.info = const [], - required this.orderNum, + required super.elementId, required super.reading, super.news, super.ichi, @@ -45,21 +47,19 @@ class KanjiElement extends Element { @override Map get sqlValue => { ...super.sqlValue, - 'orderNum': orderNum, }; } class ReadingElement extends Element { - int orderNum; bool readingDoesNotMatchKanji; List info; List restrictions; ReadingElement({ - required this.orderNum, required this.readingDoesNotMatchKanji, this.info = const [], this.restrictions = const [], + required super.elementId, required super.reading, super.news, super.ichi, @@ -71,7 +71,6 @@ class ReadingElement extends Element { @override Map get sqlValue => { ...super.sqlValue, - 'orderNum': orderNum, 'readingDoesNotMatchKanji': readingDoesNotMatchKanji, }; } diff --git a/lib/_data_ingestion/jmdict/seed_data.dart b/lib/_data_ingestion/jmdict/seed_data.dart index 03b3208..dde338c 100644 --- a/lib/_data_ingestion/jmdict/seed_data.dart +++ b/lib/_data_ingestion/jmdict/seed_data.dart @@ -84,41 +84,38 @@ Future seedJMDictData(List entries, Database db) async { print(' [JMdict] Batch 1 - Kanji and readings'); Batch b = db.batch(); - int elementId = 0; for (final e in entries) { b.insert(JMdictTableNames.entry, e.sqlValue); for (final k in e.kanji) { - elementId++; b.insert( JMdictTableNames.kanjiElement, - k.sqlValue..addAll({'entryId': e.entryId, 'elementId': elementId}), + k.sqlValue..addAll({'entryId': e.entryId}), ); for (final i in k.info) { b.insert(JMdictTableNames.kanjiInfo, { - 'elementId': elementId, + 'elementId': k.elementId, 'info': i, }); } } for (final r in e.readings) { - elementId++; b.insert( JMdictTableNames.readingElement, - r.sqlValue..addAll({'entryId': e.entryId, 'elementId': elementId}), + r.sqlValue..addAll({'entryId': e.entryId}), ); for (final i in r.info) { b.insert(JMdictTableNames.readingInfo, { - 'elementId': elementId, + 'elementId': r.elementId, 'info': i, }); } for (final res in r.restrictions) { b.insert(JMdictTableNames.readingRestriction, { - 'elementId': elementId, + 'elementId': r.elementId, 'restriction': res, }); } diff --git a/lib/_data_ingestion/jmdict/xml_parser.dart b/lib/_data_ingestion/jmdict/xml_parser.dart index 0941a7c..c19682f 100644 --- a/lib/_data_ingestion/jmdict/xml_parser.dart +++ b/lib/_data_ingestion/jmdict/xml_parser.dart @@ -80,11 +80,18 @@ List parseJMDictData(XmlElement root) { final List readingEls = []; final List senses = []; - for (final (kanjiNum, kEle) in entry.findElements('k_ele').indexed) { + for (final (orderNum, kEle) in entry.findElements('k_ele').indexed) { + assert( + orderNum < 100, + 'Entry $entryId has more than 100 kanji elements, which will break the elementId generation logic.', + ); + final elementId = entryId * 100 + orderNum; + final kePri = getPriorityValues(kEle, 'ke'); + kanjiEls.add( KanjiElement( - orderNum: kanjiNum + 1, + elementId: elementId, info: kEle .findElements('ke_inf') .map((e) => e.innerText.substring(1, e.innerText.length - 1)) @@ -100,13 +107,20 @@ List parseJMDictData(XmlElement root) { } for (final (orderNum, rEle) in entry.findElements('r_ele').indexed) { + assert( + orderNum < 100, + 'Entry $entryId has more than 100 readings, which will break the elementId generation logic.', + ); + final elementId = entryId * 100 + orderNum; + final rePri = getPriorityValues(rEle, 're'); final readingDoesNotMatchKanji = rEle .findElements('re_nokanji') .isNotEmpty; + readingEls.add( ReadingElement( - orderNum: orderNum + 1, + elementId: elementId, readingDoesNotMatchKanji: readingDoesNotMatchKanji, info: rEle .findElements('re_inf') diff --git a/lib/search/word_search/data_query.dart b/lib/search/word_search/data_query.dart index 461d553..ab8b0f7 100644 --- a/lib/search/word_search/data_query.dart +++ b/lib/search/word_search/data_query.dart @@ -69,7 +69,7 @@ Future>> _readingelementsQuery( JMdictTableNames.readingElement, where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})', whereArgs: entryIds, - orderBy: 'orderNum', + orderBy: 'elementId', ); Future>> _kanjielementsQuery( @@ -79,7 +79,7 @@ Future>> _kanjielementsQuery( JMdictTableNames.kanjiElement, where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})', whereArgs: entryIds, - orderBy: 'orderNum', + orderBy: 'elementId', ); Future>> _jlpttagsQuery( diff --git a/migrations/0001_JMDict.sql b/migrations/0001_JMDict.sql index 28d448e..2629c21 100644 --- a/migrations/0001_JMDict.sql +++ b/migrations/0001_JMDict.sql @@ -53,19 +53,17 @@ CREATE TABLE "JMdict_Entry" ( CREATE TABLE "JMdict_KanjiElement" ( "elementId" INTEGER PRIMARY KEY, + "orderNum" INTEGER GENERATED ALWAYS AS ("elementId" % 100) VIRTUAL, "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"), - "orderNum" INTEGER NOT NULL, "reading" TEXT NOT NULL, "news" INTEGER CHECK ("news" BETWEEN 1 AND 2), "ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2), "spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2), "gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2), "nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48), - UNIQUE("entryId", "reading"), - UNIQUE("entryId", "orderNum") + UNIQUE("entryId", "reading") ) WITHOUT ROWID; -CREATE INDEX "JMdict_KanjiElement_byEntryId_byOrderNum" ON "JMdict_KanjiElement"("entryId", "orderNum"); CREATE INDEX "JMdict_KanjiElement_byReading" ON "JMdict_KanjiElement"("reading"); CREATE TABLE "JMdict_KanjiElementInfo" ( @@ -78,8 +76,8 @@ CREATE TABLE "JMdict_KanjiElementInfo" ( CREATE TABLE "JMdict_ReadingElement" ( "elementId" INTEGER PRIMARY KEY, + "orderNum" INTEGER GENERATED ALWAYS AS ("elementId" % 100) VIRTUAL, "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"), - "orderNum" INTEGER NOT NULL, "reading" TEXT NOT NULL, "readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE, "news" INTEGER CHECK ("news" BETWEEN 1 AND 2), @@ -87,11 +85,9 @@ CREATE TABLE "JMdict_ReadingElement" ( "spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2), "gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2), "nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48), - UNIQUE("entryId", "reading"), - UNIQUE("entryId", "orderNum") + UNIQUE("entryId", "reading") ) WITHOUT ROWID; -CREATE INDEX "JMdict_ReadingElement_byEntryId_byOrderNum" ON "JMdict_ReadingElement"("entryId", "orderNum"); CREATE INDEX "JMdict_ReadingElement_byReading" ON "JMdict_ReadingElement"("reading"); CREATE TABLE "JMdict_ReadingElementRestriction" ( diff --git a/migrations/0005_JMDict_search_index_tables.sql b/migrations/0005_JMDict_search_index_tables.sql index 67eca9f..ea11436 100644 --- a/migrations/0005_JMDict_search_index_tables.sql +++ b/migrations/0005_JMDict_search_index_tables.sql @@ -44,7 +44,7 @@ SELECT + (("spec" IS 2) * 5) + (("gai" IS 1) * 10) + (("gai" IS 2) * 5) - + (("orderNum" IS 1) * 20) + + (("orderNum" IS 0) * 20) - (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5) AS "score" FROM "JMdict_ReadingElement" @@ -76,7 +76,7 @@ SELECT + (("spec" IS 2) * 5) + (("gai" IS 1) * 10) + (("gai" IS 2) * 5) - + (("orderNum" IS 1) * 20) + + (("orderNum" IS 0) * 20) - (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5) AS "score" FROM "JMdict_KanjiElement" @@ -108,7 +108,7 @@ BEGIN END; CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_ReadingElement" -AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum" +AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "elementId" ON "JMdict_ReadingElement" BEGIN UPDATE "JMdict_EntryScore" @@ -145,7 +145,7 @@ BEGIN END; CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_KanjiElement" -AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum" +AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "elementId" ON "JMdict_KanjiElement" BEGIN UPDATE "JMdict_EntryScore" diff --git a/migrations/0010_Views.sql b/migrations/0010_Views.sql index f7bd416..10e1d82 100644 --- a/migrations/0010_Views.sql +++ b/migrations/0010_Views.sql @@ -32,9 +32,9 @@ SELECT THEN "JMdict_ReadingElement"."reading" ELSE NULL END AS "furigana", - COALESCE("JMdict_KanjiElement"."orderNum", 1) + COALESCE("JMdict_KanjiElement"."orderNum", 0) + "JMdict_ReadingElement"."orderNum" - = 2 + = 0 AS "isFirst", "JMdict_KanjiElement"."orderNum" AS "kanjiOrderNum", "JMdict_ReadingElement"."orderNum" AS "readingOrderNum"