jmdict: embed orderNum in senseId for senses
This commit is contained in:
@@ -141,7 +141,6 @@ class XRef {
|
||||
|
||||
class Sense extends SQLWritable {
|
||||
final int senseId;
|
||||
final int orderNum;
|
||||
final List<XRefParts> antonyms;
|
||||
final List<String> dialects;
|
||||
final List<String> fields;
|
||||
@@ -156,7 +155,6 @@ class Sense extends SQLWritable {
|
||||
|
||||
const Sense({
|
||||
required this.senseId,
|
||||
required this.orderNum,
|
||||
this.antonyms = const [],
|
||||
this.dialects = const [],
|
||||
this.fields = const [],
|
||||
@@ -173,7 +171,6 @@ class Sense extends SQLWritable {
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'senseId': senseId,
|
||||
'orderNum': orderNum,
|
||||
};
|
||||
|
||||
bool get isEmpty =>
|
||||
|
||||
@@ -8,10 +8,10 @@ import 'package:sqflite_common/sqlite_api.dart';
|
||||
/// A wrapper for the result of resolving an xref, which includes the resolved entry and a flag
|
||||
/// indicating whether the xref was ambiguous (i.e. could refer to multiple entries).
|
||||
class ResolvedXref {
|
||||
Entry entry;
|
||||
bool ambiguous;
|
||||
final Entry entry;
|
||||
final bool ambiguous;
|
||||
|
||||
ResolvedXref(this.entry, this.ambiguous);
|
||||
const ResolvedXref(this.entry, this.ambiguous);
|
||||
}
|
||||
|
||||
/// Resolves an xref (pair of kanji, optionally reading, and optionally sense number) to an a specific
|
||||
@@ -228,7 +228,9 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
|
||||
'xrefEntryId': resolvedEntry.entry.entryId,
|
||||
'seeAlsoKanji': xref.kanjiRef,
|
||||
'seeAlsoReading': xref.readingRef,
|
||||
'seeAlsoSense': xref.senseOrderNum,
|
||||
'seeAlsoSense': xref.senseOrderNum != null
|
||||
? xref.senseOrderNum! - 1
|
||||
: null,
|
||||
'ambiguous': resolvedEntry.ambiguous,
|
||||
});
|
||||
}
|
||||
@@ -255,7 +257,9 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
|
||||
'xrefEntryId': resolvedEntry.entry.entryId,
|
||||
'antonymKanji': ant.kanjiRef,
|
||||
'antonymReading': ant.readingRef,
|
||||
'antonymSense': ant.senseOrderNum,
|
||||
'antonymSense': ant.senseOrderNum != null
|
||||
? ant.senseOrderNum! - 1
|
||||
: null,
|
||||
'ambiguous': resolvedEntry.ambiguous,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -71,8 +71,6 @@ XRefParts parseXrefParts(String s) {
|
||||
List<Entry> parseJMDictData(XmlElement root) {
|
||||
final List<Entry> entries = [];
|
||||
|
||||
int senseId = 0;
|
||||
|
||||
for (final entry in root.childElements) {
|
||||
final entryId = int.parse(entry.findElements('ent_seq').first.innerText);
|
||||
|
||||
@@ -141,10 +139,14 @@ List<Entry> parseJMDictData(XmlElement root) {
|
||||
}
|
||||
|
||||
for (final (orderNum, sense) in entry.findElements('sense').indexed) {
|
||||
senseId++;
|
||||
assert(
|
||||
orderNum < 100,
|
||||
'Entry $entryId has more than 100 senses, which will break the senseId generation logic.',
|
||||
);
|
||||
final senseId = entryId * 100 + orderNum;
|
||||
|
||||
final result = Sense(
|
||||
senseId: senseId,
|
||||
orderNum: orderNum + 1,
|
||||
restrictedToKanji: sense
|
||||
.findElements('stagk')
|
||||
.map((e) => e.innerText)
|
||||
|
||||
@@ -199,8 +199,8 @@ Future<List<ScoredEntryId>> _queryEnglish(
|
||||
SELECT
|
||||
"${JMdictTableNames.sense}"."entryId",
|
||||
MAX("JMdict_EntryScore"."score")
|
||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 1) * 50)
|
||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 2) * 30)
|
||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 0) * 50)
|
||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 1) * 30)
|
||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1) * 20)
|
||||
as "score"
|
||||
FROM "${JMdictTableNames.senseGlossary}"
|
||||
|
||||
@@ -106,13 +106,11 @@ CREATE TABLE "JMdict_ReadingElementInfo" (
|
||||
|
||||
CREATE TABLE "JMdict_Sense" (
|
||||
"senseId" INTEGER PRIMARY KEY,
|
||||
"orderNum" INTEGER GENERATED ALWAYS AS ("senseId" % 100) VIRTUAL,
|
||||
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
|
||||
"orderNum" INTEGER NOT NULL,
|
||||
UNIQUE("entryId", "orderNum")
|
||||
);
|
||||
|
||||
CREATE INDEX "JMdict_Sense_byEntryId_byOrderNum" ON "JMdict_Sense"("entryId", "orderNum");
|
||||
|
||||
CREATE TABLE "JMdict_SenseRestrictedToKanji" (
|
||||
"entryId" INTEGER NOT NULL,
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||
@@ -148,9 +146,18 @@ CREATE TABLE "JMdict_SenseSeeAlso" (
|
||||
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
|
||||
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
|
||||
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
|
||||
"seeAlsoSenseKey" INTEGER GENERATED ALWAYS AS (
|
||||
CASE
|
||||
WHEN "seeAlsoSense" IS NOT NULL THEN ("xrefEntryId" * 100) + "seeAlsoSense"
|
||||
ELSE NULL
|
||||
END
|
||||
) VIRTUAL,
|
||||
|
||||
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
|
||||
FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
||||
FOREIGN KEY ("xrefEntryId", "seeAlsoSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
|
||||
FOREIGN KEY ("seeAlsoSenseKey") REFERENCES "JMdict_Sense"("senseId"),
|
||||
|
||||
UNIQUE("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
|
||||
);
|
||||
|
||||
@@ -163,9 +170,18 @@ CREATE TABLE "JMdict_SenseAntonym" (
|
||||
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
|
||||
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
|
||||
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
|
||||
"antonymSenseKey" INTEGER GENERATED ALWAYS AS (
|
||||
CASE
|
||||
WHEN "antonymSense" IS NOT NULL THEN ("xrefEntryId" * 100) + "antonymSense"
|
||||
ELSE NULL
|
||||
END
|
||||
) VIRTUAL,
|
||||
|
||||
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
|
||||
FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
||||
FOREIGN KEY ("xrefEntryId", "antonymSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
|
||||
FOREIGN KEY ("antonymSenseKey") REFERENCES "JMdict_Sense"("senseId"),
|
||||
|
||||
UNIQUE("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user