From c32775ce7ab8f51117f1b200fe8835f9fe9ac477 Mon Sep 17 00:00:00 2001 From: h7x4 Date: Tue, 24 Jun 2025 01:01:07 +0200 Subject: [PATCH] use ids for \{kanji,reading\}Element tables --- lib/_data_ingestion/jmdict/seed_data.dart | 23 +++++--- lib/search/word_search/data_query.dart | 19 +++---- lib/search/word_search/entry_id_query.dart | 10 ++-- lib/search/word_search/regrouping.dart | 27 ++++++++-- migrations/0001_JMDict.sql | 27 ++++------ migrations/0003_JMDict_FTS5_Ttables.sql | 28 +++++----- .../0005_JMDict_search_index_tables.sql | 54 +++++++++---------- migrations/0010_Views.sql | 9 +++- 8 files changed, 106 insertions(+), 91 deletions(-) diff --git a/lib/_data_ingestion/jmdict/seed_data.dart b/lib/_data_ingestion/jmdict/seed_data.dart index 25db0fb..b8a3404 100644 --- a/lib/_data_ingestion/jmdict/seed_data.dart +++ b/lib/_data_ingestion/jmdict/seed_data.dart @@ -72,20 +72,25 @@ Future seedJMDictData(List entries, Database db) async { print(' [JMdict] Batch 1 - Kanji and readings'); Batch b = db.batch(); + int elementId = 0; for (final e in entries) { b.insert(JMdictTableNames.entry, e.sqlValue); + for (final k in e.kanji) { + elementId++; b.insert( JMdictTableNames.kanjiElement, - k.sqlValue..addAll({'entryId': e.entryId}), + k.sqlValue..addAll({ + 'entryId': e.entryId, + 'elementId': elementId, + }), ); for (final i in k.info) { b.insert( JMdictTableNames.kanjiInfo, { - 'entryId': e.entryId, - 'reading': k.reading, + 'elementId': elementId, 'info': i, }, ); @@ -93,17 +98,20 @@ Future seedJMDictData(List entries, Database db) async { } for (final r in e.readings) { + elementId++; b.insert( JMdictTableNames.readingElement, - r.sqlValue..addAll({'entryId': e.entryId}), + r.sqlValue..addAll({ + 'entryId': e.entryId, + 'elementId': elementId, + }), ); for (final i in r.info) { b.insert( JMdictTableNames.readingInfo, { - 'entryId': e.entryId, - 'reading': r.reading, + 'elementId': elementId, 'info': i, }, ); @@ -112,8 +120,7 @@ Future seedJMDictData(List entries, Database db) async { b.insert( JMdictTableNames.readingRestriction, { - 'entryId': e.entryId, - 'reading': r.reading, + 'elementId': elementId, 'restriction': res, }, ); diff --git a/lib/search/word_search/data_query.dart b/lib/search/word_search/data_query.dart index 8990855..349ab82 100644 --- a/lib/search/word_search/data_query.dart +++ b/lib/search/word_search/data_query.dart @@ -230,40 +230,37 @@ Future fetchLinearWordQueryData( // Reading queries final readingIds = readingElements - .map((element) => ( - element['entryId'] as int, - escapeStringValue(element['reading'] as String) - )) + .map((element) => element['elementId'] as int) .toList(); late final List> readingElementInfos; final Future>> readingElementInfos_query = connection.query( JMdictTableNames.readingInfo, - where: '(entryId, reading) IN (${readingIds.join(',')})', + where: '(elementId) IN (${List.filled(readingIds.length, '?').join(',')})', + whereArgs: readingIds, ); late final List> readingElementRestrictions; final Future>> readingElementRestrictions_query = connection.query( JMdictTableNames.readingRestriction, - where: '(entryId, reading) IN (${readingIds.join(',')})', + where: '(elementId) IN (${List.filled(readingIds.length, '?').join(',')})', + whereArgs: readingIds, ); // Kanji queries final kanjiIds = kanjiElements - .map((element) => ( - element['entryId'] as int, - escapeStringValue(element['reading'] as String) - )) + .map((element) => element['elementId'] as int) .toList(); late final List> kanjiElementInfos; final Future>> kanjiElementInfos_query = connection.query( JMdictTableNames.kanjiInfo, - where: '(entryId, reading) IN (${kanjiIds.join(',')})', + where: '(elementId) IN (${List.filled(kanjiIds.length, '?').join(',')})', + whereArgs: kanjiIds, ); await Future.wait([ diff --git a/lib/search/word_search/entry_id_query.dart b/lib/search/word_search/entry_id_query.dart index e5b3dd5..8eec0ab 100644 --- a/lib/search/word_search/entry_id_query.dart +++ b/lib/search/word_search/entry_id_query.dart @@ -51,14 +51,14 @@ String _filterFTSSensitiveCharacters(String word) { WITH fts_results AS ( SELECT DISTINCT - "${tableName}FTS"."entryId", + "${tableName}"."entryId", 100 + (("${tableName}FTS"."reading" = ?) * 50) + "JMdict_EntryScore"."score" AS "score" FROM "${tableName}FTS" - JOIN "${tableName}" USING ("entryId", "reading") - JOIN "JMdict_EntryScore" USING ("entryId", "reading") + JOIN "${tableName}" USING ("elementId") + JOIN "JMdict_EntryScore" USING ("elementId") WHERE "${tableName}FTS"."reading" MATCH ? || '*' AND "JMdict_EntryScore"."type" = '${tableName == JMdictTableNames.kanjiElement ? 'kanji' : 'reading'}' ORDER BY @@ -72,9 +72,9 @@ String _filterFTSSensitiveCharacters(String word) { + "JMdict_EntryScore"."score" AS "score" FROM "${tableName}" - JOIN "JMdict_EntryScore" USING ("entryId", "reading") + JOIN "JMdict_EntryScore" USING ("elementId") WHERE "reading" LIKE '%' || ? || '%' - AND "entryId" NOT IN (SELECT "entryId" FROM "fts_results") + AND "${tableName}"."entryId" NOT IN (SELECT "entryId" FROM "fts_results") AND "JMdict_EntryScore"."type" = '${tableName == JMdictTableNames.kanjiElement ? 'kanji' : 'reading'}' ORDER BY "JMdict_EntryScore"."score" DESC, diff --git a/lib/search/word_search/regrouping.dart b/lib/search/word_search/regrouping.dart index e11165e..11c279e 100644 --- a/lib/search/word_search/regrouping.dart +++ b/lib/search/word_search/regrouping.dart @@ -182,16 +182,35 @@ GroupedWordResult _regroup_words({ 'No readings found for entryId: $entryId', ); + final Map readingElementIdsToReading = { + for (final element in readingElements_) + element['elementId'] as int: element['reading'] as String, + }; + + final Map kanjiElementIdsToReading = { + for (final element in kanjiElements_) + element['elementId'] as int: element['reading'] as String, + }; + + final readingElementInfos_ = readingElementInfos + .where((element) => element['entryId'] == entryId) + .toList(); + + final kanjiElementInfos_ = kanjiElementInfos + .where((element) => element['entryId'] == entryId) + .toList(); + return GroupedWordResult( rubys: rubys, readingInfos: { - for (final rei in readingElementInfos) - rei['reading'] as String: + for (final rei in readingElementInfos_) + readingElementIdsToReading[rei['elementId'] as int]!: JMdictReadingInfo.fromId(rei['info'] as String), }, kanjiInfos: { - for (final kei in kanjiElementInfos) - kei['reading'] as String: JMdictKanjiInfo.fromId(kei['info'] as String), + for (final kei in kanjiElementInfos_) + kanjiElementIdsToReading[kei['elementId'] as int]!: + JMdictKanjiInfo.fromId(kei['info'] as String), }, ); } diff --git a/migrations/0001_JMDict.sql b/migrations/0001_JMDict.sql index 1e60113..ba9e2cc 100644 --- a/migrations/0001_JMDict.sql +++ b/migrations/0001_JMDict.sql @@ -39,6 +39,7 @@ CREATE TABLE "JMdict_Entry" ( -- KanjiElement CREATE TABLE "JMdict_KanjiElement" ( + "elementId" INTEGER PRIMARY KEY, "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"), "orderNum" INTEGER NOT NULL, "reading" TEXT NOT NULL, @@ -47,7 +48,7 @@ CREATE TABLE "JMdict_KanjiElement" ( "spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2), "gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2), "nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48), - PRIMARY KEY ("entryId", "reading"), + UNIQUE("entryId", "reading"), UNIQUE("entryId", "orderNum") ) WITHOUT ROWID; @@ -55,17 +56,15 @@ CREATE INDEX "JMdict_KanjiElement_byEntryId_byOrderNum" ON "JMdict_KanjiElement" CREATE INDEX "JMdict_KanjiElement_byReading" ON "JMdict_KanjiElement"("reading"); CREATE TABLE "JMdict_KanjiElementInfo" ( - "entryId" INTEGER NOT NULL, - "reading" TEXT NOT NULL, + "elementId" INTEGER NOT NULL REFERENCES "JMdict_KanjiElement"("elementId"), "info" TEXT NOT NULL REFERENCES "JMdict_InfoKanji"("id"), - FOREIGN KEY ("entryId", "reading") - REFERENCES "JMdict_KanjiElement"("entryId", "reading"), - PRIMARY KEY ("entryId", "reading", "info") + PRIMARY KEY ("elementId", "info") ) WITHOUT ROWID; -- ReadingElement CREATE TABLE "JMdict_ReadingElement" ( + "elementId" INTEGER PRIMARY KEY, "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"), "orderNum" INTEGER NOT NULL, "reading" TEXT NOT NULL, @@ -75,7 +74,7 @@ CREATE TABLE "JMdict_ReadingElement" ( "spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2), "gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2), "nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48), - PRIMARY KEY ("entryId", "reading"), + UNIQUE("entryId", "reading"), UNIQUE("entryId", "orderNum") ) WITHOUT ROWID; @@ -83,21 +82,15 @@ CREATE INDEX "JMdict_ReadingElement_byEntryId_byOrderNum" ON "JMdict_ReadingElem CREATE INDEX "JMdict_ReadingElement_byReading" ON "JMdict_ReadingElement"("reading"); CREATE TABLE "JMdict_ReadingElementRestriction" ( - "entryId" INTEGER NOT NULL, - "reading" TEXT NOT NULL, + "elementId" INTEGER NOT NULL REFERENCES "JMdict_ReadingElement"("elementId"), "restriction" TEXT NOT NULL, - FOREIGN KEY ("entryId", "reading") - REFERENCES "JMdict_ReadingElement"("entryId", "reading"), - PRIMARY KEY ("entryId", "reading", "restriction") + PRIMARY KEY ("elementId", "restriction") ) WITHOUT ROWID; CREATE TABLE "JMdict_ReadingElementInfo" ( - "entryId" INTEGER NOT NULL, - "reading" TEXT NOT NULL, + "elementId" INTEGER NOT NULL REFERENCES "JMdict_ReadingElement"("elementId"), "info" TEXT NOT NULL REFERENCES "JMdict_InfoReading"("id"), - FOREIGN KEY ("entryId", "reading") - REFERENCES "JMdict_ReadingElement"("entryId", "reading"), - PRIMARY KEY ("entryId", "reading", "info") + PRIMARY KEY ("elementId", "info") ) WITHOUT ROWID; -- Sense diff --git a/migrations/0003_JMDict_FTS5_Ttables.sql b/migrations/0003_JMDict_FTS5_Ttables.sql index 33adf3c..1fcf540 100644 --- a/migrations/0003_JMDict_FTS5_Ttables.sql +++ b/migrations/0003_JMDict_FTS5_Ttables.sql @@ -1,55 +1,55 @@ -CREATE VIRTUAL TABLE "JMdict_KanjiElementFTS" USING FTS5("entryId" UNINDEXED, "reading"); +CREATE VIRTUAL TABLE "JMdict_KanjiElementFTS" USING FTS5("elementId" UNINDEXED, "reading"); CREATE TRIGGER "JMdict_KanjiElement_InsertFTS" AFTER INSERT ON "JMdict_KanjiElement" BEGIN - INSERT INTO "JMdict_KanjiElementFTS"("entryId", "reading") - VALUES (NEW."entryId", NEW."reading"); + INSERT INTO "JMdict_KanjiElementFTS"("elementId", "reading") + VALUES (NEW."elementId", NEW."reading"); END; CREATE TRIGGER "JMdict_KanjiElement_UpdateFTS" -AFTER UPDATE OF "entryId", "reading" +AFTER UPDATE OF "elementId", "reading" ON "JMdict_KanjiElement" BEGIN UPDATE "JMdict_KanjiElementFTS" SET - "entryId" = NEW."entryId", + "elementId" = NEW."elementId", "reading" = NEW."reading" - WHERE "entryId" = OLD."entryId"; + WHERE "elementId" = OLD."elementId"; END; CREATE TRIGGER "JMdict_KanjiElement_DeleteFTS" AFTER DELETE ON "JMdict_KanjiElement" BEGIN DELETE FROM "JMdict_KanjiElementFTS" - WHERE "entryId" = OLD."entryId"; + WHERE "elementId" = OLD."elementId"; END; -CREATE VIRTUAL TABLE "JMdict_ReadingElementFTS" USING FTS5("entryId" UNINDEXED, "reading"); +CREATE VIRTUAL TABLE "JMdict_ReadingElementFTS" USING FTS5("elementId" UNINDEXED, "reading"); CREATE TRIGGER "JMdict_ReadingElement_InsertFTS" AFTER INSERT ON "JMdict_ReadingElement" BEGIN - INSERT INTO "JMdict_ReadingElementFTS"("entryId", "reading") - VALUES (NEW."entryId", NEW."reading"); + INSERT INTO "JMdict_ReadingElementFTS"("elementId", "reading") + VALUES (NEW."elementId", NEW."reading"); END; CREATE TRIGGER "JMdict_ReadingElement_UpdateFTS" -AFTER UPDATE OF "entryId", "reading" +AFTER UPDATE OF "elementId", "reading" ON "JMdict_ReadingElement" BEGIN UPDATE "JMdict_ReadingElementFTS" SET - "entryId" = NEW."entryId", + "elementId" = NEW."elementId", "reading" = NEW."reading" - WHERE "entryId" = OLD."entryId"; + WHERE "elementId" = OLD."elementId"; END; CREATE TRIGGER "JMdict_ReadingElement_DeleteFTS" AFTER DELETE ON "JMdict_ReadingElement" BEGIN DELETE FROM "JMdict_ReadingElementFTS" - WHERE "entryId" = OLD."entryId"; + WHERE "elementId" = OLD."elementId"; END; diff --git a/migrations/0005_JMDict_search_index_tables.sql b/migrations/0005_JMDict_search_index_tables.sql index 909805e..c2575a9 100644 --- a/migrations/0005_JMDict_search_index_tables.sql +++ b/migrations/0005_JMDict_search_index_tables.sql @@ -1,17 +1,17 @@ CREATE TABLE "JMdict_EntryScore" ( "type" TEXT NOT NULL CHECK ("type" IN ('reading', 'kanji')), - "entryId" INTEGER NOT NULL, - "reading" TEXT NOT NULL, + "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"), + "elementId" INTEGER NOT NULL, "score" INTEGER NOT NULL DEFAULT 0, "common" BOOLEAN NOT NULL DEFAULT FALSE, - PRIMARY KEY ("type", "entryId", "reading") + PRIMARY KEY ("type", "elementId") ) WITHOUT ROWID; -CREATE INDEX "JMdict_EntryScore_byEntryId_byReading_byScore" ON "JMdict_EntryScore"("entryId", "reading", "score"); +CREATE INDEX "JMdict_EntryScore_byElementId_byScore" ON "JMdict_EntryScore"("elementId", "score"); CREATE INDEX "JMdict_EntryScore_byScore" ON "JMdict_EntryScore"("score"); CREATE INDEX "JMdict_EntryScore_byCommon" ON "JMdict_EntryScore"("common"); -CREATE INDEX "JMdict_EntryScore_byType_byEntryId_byReading_byScore" ON "JMdict_EntryScore"("type", "entryId", "reading", "score"); +CREATE INDEX "JMdict_EntryScore_byType_byElementId_byScore" ON "JMdict_EntryScore"("type", "elementId", "score"); CREATE INDEX "JMdict_EntryScore_byType_byScore" ON "JMdict_EntryScore"("type", "score"); CREATE INDEX "JMdict_EntryScore_byType_byCommon" ON "JMdict_EntryScore"("type", "common"); @@ -22,7 +22,7 @@ CREATE VIEW "JMdict_EntryScoreView_Reading" AS SELECT 'reading' AS "type", "JMdict_ReadingElement"."entryId", - "JMdict_ReadingElement"."reading", + "JMdict_ReadingElement"."elementId", ( "news" IS 1 OR "ichi" IS 1 @@ -54,7 +54,7 @@ CREATE VIEW "JMdict_EntryScoreView_Kanji" AS SELECT 'kanji' AS "type", "JMdict_KanjiElement"."entryId", - "JMdict_KanjiElement"."reading", + "JMdict_KanjiElement"."elementId", ( "news" IS 1 OR "ichi" IS 1 @@ -98,14 +98,13 @@ BEGIN INSERT INTO "JMdict_EntryScore" ( "type", "entryId", - "reading", + "elementId", "score", "common" ) - SELECT "type", "entryId", "reading", "score", "common" + SELECT "type", "entryId", "elementId", "score", "common" FROM "JMdict_EntryScoreView_Reading" - WHERE "entryId" = NEW."entryId" - AND "reading" = NEW."reading"; + WHERE "elementId" = NEW."elementId"; END; CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_ReadingElement" @@ -117,8 +116,7 @@ BEGIN "score" = "JMdict_EntryScoreView_Reading"."score", "common" = "JMdict_EntryScoreView_Reading"."common" FROM "JMdict_EntryScoreView_Reading" - WHERE "entryId" = NEW."entryId" - AND "reading" = NEW."reading"; + WHERE "elementId" = NEW."elementId"; END; CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_ReadingElement" @@ -126,8 +124,7 @@ AFTER DELETE ON "JMdict_ReadingElement" BEGIN DELETE FROM "JMdict_EntryScore" WHERE "type" = 'reading' - AND "entryId" = OLD."entryId" - AND "reading" = OLD."reading"; + AND "elementId" = OLD."elementId"; END; --- JMdict_KanjiElement triggers @@ -138,14 +135,13 @@ BEGIN INSERT INTO "JMdict_EntryScore" ( "type", "entryId", - "reading", + "elementId", "score", "common" ) - SELECT "type", "entryId", "reading", "score", "common" + SELECT "type", "entryId", "elementId", "score", "common" FROM "JMdict_EntryScoreView_Kanji" - WHERE "entryId" = NEW."entryId" - AND "reading" = NEW."reading"; + WHERE "elementId" = NEW."elementId"; END; CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_KanjiElement" @@ -157,8 +153,7 @@ BEGIN "score" = "JMdict_EntryScoreView_Kanji"."score", "common" = "JMdict_EntryScoreView_Kanji"."common" FROM "JMdict_EntryScoreView_Kanji" - WHERE "entryId" = NEW."entryId" - AND "reading" = NEW."reading"; + WHERE "elementId" = NEW."elementId"; END; CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_KanjiElement" @@ -166,8 +161,7 @@ AFTER DELETE ON "JMdict_KanjiElement" BEGIN DELETE FROM "JMdict_EntryScore" WHERE "type" = 'kanji' - AND "entryId" = OLD."entryId" - AND "reading" = OLD."reading"; + AND "elementId" = OLD."elementId"; END; --- JMdict_JLPTTag triggers @@ -181,8 +175,8 @@ BEGIN "common" = "JMdict_EntryScoreView"."common" FROM "JMdict_EntryScoreView" WHERE "JMdict_EntryScoreView"."entryId" = NEW."entryId" - AND "JMdict_EntryScoreView"."entryId" = "JMdict_EntryScore"."entryId" - AND "JMdict_EntryScoreView"."reading" = "JMdict_EntryScore"."reading"; + AND "JMdict_EntryScore"."entryId" = NEW."entryId" + AND "JMdict_EntryScoreView"."elementId" = "JMdict_EntryScore"."elementId"; END; CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_JLPTTag" @@ -195,8 +189,8 @@ BEGIN "common" = "JMdict_EntryScoreView"."common" FROM "JMdict_EntryScoreView" WHERE "JMdict_EntryScoreView"."entryId" = NEW."entryId" - AND "JMdict_EntryScoreView"."entryId" = "JMdict_EntryScore"."entryId" - AND "JMdict_EntryScoreView"."reading" = "JMdict_EntryScore"."reading"; + AND "JMdict_EntryScore"."entryId" = NEW."entryId" + AND "JMdict_EntryScoreView"."elementId" = "JMdict_EntryScore"."elementId"; END; CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_JLPTTag" @@ -207,7 +201,7 @@ BEGIN "score" = "JMdict_EntryScoreView"."score", "common" = "JMdict_EntryScoreView"."common" FROM "JMdict_EntryScoreView" - WHERE "JMdict_EntryScoreView"."entryId" = NEW."entryId" - AND "JMdict_EntryScoreView"."entryId" = "JMdict_EntryScore"."entryId" - AND "JMdict_EntryScoreView"."reading" = "JMdict_EntryScore"."reading"; + WHERE "JMdict_EntryScoreView"."entryId" = OLD."entryId" + AND "JMdict_EntryScore"."entryId" = OLD."entryId" + AND "JMdict_EntryScoreView"."elementId" = "JMdict_EntryScore"."elementId"; END; diff --git a/migrations/0010_Views.sql b/migrations/0010_Views.sql index b0acf94..a3d0214 100644 --- a/migrations/0010_Views.sql +++ b/migrations/0010_Views.sql @@ -77,8 +77,13 @@ SELECT DISTINCT "radical" FROM "RADKFILE"; CREATE VIEW "JMdict_CombinedEntryScore" AS SELECT - "JMdict_EntryScore"."entryId", + CASE + WHEN "JMdict_EntryScore"."type" = 'kanji' + THEN (SELECT entryId FROM "JMdict_KanjiElement" WHERE "elementId" = "JMdict_EntryScore"."elementId") + WHEN "JMdict_EntryScore"."type" = 'reading' + THEN (SELECT entryId FROM "JMdict_ReadingElement" WHERE "elementId" = "JMdict_EntryScore"."elementId") + END AS "entryId", MAX("JMdict_EntryScore"."score") AS "score", MAX("JMdict_EntryScore"."common") AS "common" FROM "JMdict_EntryScore" -GROUP BY "JMdict_EntryScore"."entryId"; +GROUP BY "entryId";