jmdict: embed orderNum in senseId for senses

This commit is contained in:
2026-04-08 16:38:42 +09:00
parent e8ee1ab944
commit 99218a6987
5 changed files with 38 additions and 19 deletions

View File

@@ -141,7 +141,6 @@ class XRef {
class Sense extends SQLWritable {
final int senseId;
final int orderNum;
final List<XRefParts> antonyms;
final List<String> dialects;
final List<String> fields;
@@ -156,7 +155,6 @@ class Sense extends SQLWritable {
const Sense({
required this.senseId,
required this.orderNum,
this.antonyms = const [],
this.dialects = const [],
this.fields = const [],
@@ -173,7 +171,6 @@ class Sense extends SQLWritable {
@override
Map<String, Object?> get sqlValue => {
'senseId': senseId,
'orderNum': orderNum,
};
bool get isEmpty =>

View File

@@ -8,10 +8,10 @@ import 'package:sqflite_common/sqlite_api.dart';
/// A wrapper for the result of resolving an xref, which includes the resolved entry and a flag
/// indicating whether the xref was ambiguous (i.e. could refer to multiple entries).
class ResolvedXref {
Entry entry;
bool ambiguous;
final Entry entry;
final bool ambiguous;
ResolvedXref(this.entry, this.ambiguous);
const ResolvedXref(this.entry, this.ambiguous);
}
/// Resolves an xref (pair of kanji, optionally reading, and optionally sense number) to an a specific
@@ -228,7 +228,9 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
'xrefEntryId': resolvedEntry.entry.entryId,
'seeAlsoKanji': xref.kanjiRef,
'seeAlsoReading': xref.readingRef,
'seeAlsoSense': xref.senseOrderNum,
'seeAlsoSense': xref.senseOrderNum != null
? xref.senseOrderNum! - 1
: null,
'ambiguous': resolvedEntry.ambiguous,
});
}
@@ -255,7 +257,9 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
'xrefEntryId': resolvedEntry.entry.entryId,
'antonymKanji': ant.kanjiRef,
'antonymReading': ant.readingRef,
'antonymSense': ant.senseOrderNum,
'antonymSense': ant.senseOrderNum != null
? ant.senseOrderNum! - 1
: null,
'ambiguous': resolvedEntry.ambiguous,
});
}

View File

@@ -71,8 +71,6 @@ XRefParts parseXrefParts(String s) {
List<Entry> parseJMDictData(XmlElement root) {
final List<Entry> entries = [];
int senseId = 0;
for (final entry in root.childElements) {
final entryId = int.parse(entry.findElements('ent_seq').first.innerText);
@@ -141,10 +139,14 @@ List<Entry> parseJMDictData(XmlElement root) {
}
for (final (orderNum, sense) in entry.findElements('sense').indexed) {
senseId++;
assert(
orderNum < 100,
'Entry $entryId has more than 100 senses, which will break the senseId generation logic.',
);
final senseId = entryId * 100 + orderNum;
final result = Sense(
senseId: senseId,
orderNum: orderNum + 1,
restrictedToKanji: sense
.findElements('stagk')
.map((e) => e.innerText)

View File

@@ -199,8 +199,8 @@ Future<List<ScoredEntryId>> _queryEnglish(
SELECT
"${JMdictTableNames.sense}"."entryId",
MAX("JMdict_EntryScore"."score")
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 1) * 50)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 2) * 30)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 0) * 50)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 1) * 30)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1) * 20)
as "score"
FROM "${JMdictTableNames.senseGlossary}"

View File

@@ -106,13 +106,11 @@ CREATE TABLE "JMdict_ReadingElementInfo" (
CREATE TABLE "JMdict_Sense" (
"senseId" INTEGER PRIMARY KEY,
"orderNum" INTEGER GENERATED ALWAYS AS ("senseId" % 100) VIRTUAL,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
"orderNum" INTEGER NOT NULL,
UNIQUE("entryId", "orderNum")
);
CREATE INDEX "JMdict_Sense_byEntryId_byOrderNum" ON "JMdict_Sense"("entryId", "orderNum");
CREATE TABLE "JMdict_SenseRestrictedToKanji" (
"entryId" INTEGER NOT NULL,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
@@ -148,9 +146,18 @@ CREATE TABLE "JMdict_SenseSeeAlso" (
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
"seeAlsoSenseKey" INTEGER GENERATED ALWAYS AS (
CASE
WHEN "seeAlsoSense" IS NOT NULL THEN ("xrefEntryId" * 100) + "seeAlsoSense"
ELSE NULL
END
) VIRTUAL,
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "seeAlsoSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
FOREIGN KEY ("seeAlsoSenseKey") REFERENCES "JMdict_Sense"("senseId"),
UNIQUE("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
);
@@ -163,9 +170,18 @@ CREATE TABLE "JMdict_SenseAntonym" (
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
"antonymSenseKey" INTEGER GENERATED ALWAYS AS (
CASE
WHEN "antonymSense" IS NOT NULL THEN ("xrefEntryId" * 100) + "antonymSense"
ELSE NULL
END
) VIRTUAL,
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "antonymSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
FOREIGN KEY ("antonymSenseKey") REFERENCES "JMdict_Sense"("senseId"),
UNIQUE("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
);