diff --git a/lib/_data_ingestion/jmdict/objects.dart b/lib/_data_ingestion/jmdict/objects.dart index 1db4c5b..f0cb70f 100644 --- a/lib/_data_ingestion/jmdict/objects.dart +++ b/lib/_data_ingestion/jmdict/objects.dart @@ -141,7 +141,6 @@ class XRef { class Sense extends SQLWritable { final int senseId; - final int orderNum; final List antonyms; final List dialects; final List fields; @@ -156,7 +155,6 @@ class Sense extends SQLWritable { const Sense({ required this.senseId, - required this.orderNum, this.antonyms = const [], this.dialects = const [], this.fields = const [], @@ -173,7 +171,6 @@ class Sense extends SQLWritable { @override Map get sqlValue => { 'senseId': senseId, - 'orderNum': orderNum, }; bool get isEmpty => diff --git a/lib/_data_ingestion/jmdict/seed_data.dart b/lib/_data_ingestion/jmdict/seed_data.dart index dde338c..433a61d 100644 --- a/lib/_data_ingestion/jmdict/seed_data.dart +++ b/lib/_data_ingestion/jmdict/seed_data.dart @@ -8,10 +8,10 @@ import 'package:sqflite_common/sqlite_api.dart'; /// A wrapper for the result of resolving an xref, which includes the resolved entry and a flag /// indicating whether the xref was ambiguous (i.e. could refer to multiple entries). class ResolvedXref { - Entry entry; - bool ambiguous; + final Entry entry; + final bool ambiguous; - ResolvedXref(this.entry, this.ambiguous); + const ResolvedXref(this.entry, this.ambiguous); } /// Resolves an xref (pair of kanji, optionally reading, and optionally sense number) to an a specific @@ -228,7 +228,9 @@ Future seedJMDictData(List entries, Database db) async { 'xrefEntryId': resolvedEntry.entry.entryId, 'seeAlsoKanji': xref.kanjiRef, 'seeAlsoReading': xref.readingRef, - 'seeAlsoSense': xref.senseOrderNum, + 'seeAlsoSense': xref.senseOrderNum != null + ? xref.senseOrderNum! - 1 + : null, 'ambiguous': resolvedEntry.ambiguous, }); } @@ -255,7 +257,9 @@ Future seedJMDictData(List entries, Database db) async { 'xrefEntryId': resolvedEntry.entry.entryId, 'antonymKanji': ant.kanjiRef, 'antonymReading': ant.readingRef, - 'antonymSense': ant.senseOrderNum, + 'antonymSense': ant.senseOrderNum != null + ? ant.senseOrderNum! - 1 + : null, 'ambiguous': resolvedEntry.ambiguous, }); } diff --git a/lib/_data_ingestion/jmdict/xml_parser.dart b/lib/_data_ingestion/jmdict/xml_parser.dart index c19682f..85bb413 100644 --- a/lib/_data_ingestion/jmdict/xml_parser.dart +++ b/lib/_data_ingestion/jmdict/xml_parser.dart @@ -71,8 +71,6 @@ XRefParts parseXrefParts(String s) { List parseJMDictData(XmlElement root) { final List entries = []; - int senseId = 0; - for (final entry in root.childElements) { final entryId = int.parse(entry.findElements('ent_seq').first.innerText); @@ -141,10 +139,14 @@ List parseJMDictData(XmlElement root) { } for (final (orderNum, sense) in entry.findElements('sense').indexed) { - senseId++; + assert( + orderNum < 100, + 'Entry $entryId has more than 100 senses, which will break the senseId generation logic.', + ); + final senseId = entryId * 100 + orderNum; + final result = Sense( senseId: senseId, - orderNum: orderNum + 1, restrictedToKanji: sense .findElements('stagk') .map((e) => e.innerText) diff --git a/lib/search/word_search/entry_id_query.dart b/lib/search/word_search/entry_id_query.dart index 4338803..14156cc 100644 --- a/lib/search/word_search/entry_id_query.dart +++ b/lib/search/word_search/entry_id_query.dart @@ -199,8 +199,8 @@ Future> _queryEnglish( SELECT "${JMdictTableNames.sense}"."entryId", MAX("JMdict_EntryScore"."score") - + (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 1) * 50) - + (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 2) * 30) + + (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 0) * 50) + + (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 1) * 30) + (("${JMdictTableNames.senseGlossary}"."phrase" = ?1) * 20) as "score" FROM "${JMdictTableNames.senseGlossary}" diff --git a/migrations/0001_JMDict.sql b/migrations/0001_JMDict.sql index 2629c21..b1bef6c 100644 --- a/migrations/0001_JMDict.sql +++ b/migrations/0001_JMDict.sql @@ -106,13 +106,11 @@ CREATE TABLE "JMdict_ReadingElementInfo" ( CREATE TABLE "JMdict_Sense" ( "senseId" INTEGER PRIMARY KEY, + "orderNum" INTEGER GENERATED ALWAYS AS ("senseId" % 100) VIRTUAL, "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"), - "orderNum" INTEGER NOT NULL, UNIQUE("entryId", "orderNum") ); -CREATE INDEX "JMdict_Sense_byEntryId_byOrderNum" ON "JMdict_Sense"("entryId", "orderNum"); - CREATE TABLE "JMdict_SenseRestrictedToKanji" ( "entryId" INTEGER NOT NULL, "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"), @@ -148,9 +146,18 @@ CREATE TABLE "JMdict_SenseSeeAlso" ( -- For some entries, the cross reference is ambiguous. This means that while the ingestion -- has determined some xrefEntryId, it is not guaranteed to be the correct one. "ambiguous" BOOLEAN NOT NULL DEFAULT FALSE, + + "seeAlsoSenseKey" INTEGER GENERATED ALWAYS AS ( + CASE + WHEN "seeAlsoSense" IS NOT NULL THEN ("xrefEntryId" * 100) + "seeAlsoSense" + ELSE NULL + END + ) VIRTUAL, + FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"), FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), - FOREIGN KEY ("xrefEntryId", "seeAlsoSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"), + FOREIGN KEY ("seeAlsoSenseKey") REFERENCES "JMdict_Sense"("senseId"), + UNIQUE("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense") ); @@ -163,9 +170,18 @@ CREATE TABLE "JMdict_SenseAntonym" ( -- For some entries, the cross reference is ambiguous. This means that while the ingestion -- has determined some xrefEntryId, it is not guaranteed to be the correct one. "ambiguous" BOOLEAN NOT NULL DEFAULT FALSE, + + "antonymSenseKey" INTEGER GENERATED ALWAYS AS ( + CASE + WHEN "antonymSense" IS NOT NULL THEN ("xrefEntryId" * 100) + "antonymSense" + ELSE NULL + END + ) VIRTUAL, + FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"), FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), - FOREIGN KEY ("xrefEntryId", "antonymSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"), + FOREIGN KEY ("antonymSenseKey") REFERENCES "JMdict_Sense"("senseId"), + UNIQUE("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense") );