jmdict: embed orderNum in elementId for kanji and readings

This commit is contained in:
2026-04-08 16:27:19 +09:00
parent 9c9f5543c8
commit 4f320e4ea9
7 changed files with 40 additions and 34 deletions

View File

@@ -1,13 +1,15 @@
import 'package:jadb/_data_ingestion/sql_writable.dart';
abstract class Element extends SQLWritable {
final int elementId;
final String reading;
final int? news;
final int? ichi;
final int? spec;
final int? gai;
final int? nf;
const Element({
Element({
required this.elementId,
required this.reading,
this.news,
this.ichi,
@@ -18,6 +20,7 @@ abstract class Element extends SQLWritable {
@override
Map<String, Object?> get sqlValue => {
'elementId': elementId,
'reading': reading,
'news': news,
'ichi': ichi,
@@ -28,12 +31,11 @@ abstract class Element extends SQLWritable {
}
class KanjiElement extends Element {
int orderNum;
List<String> info;
KanjiElement({
this.info = const [],
required this.orderNum,
required super.elementId,
required super.reading,
super.news,
super.ichi,
@@ -45,21 +47,19 @@ class KanjiElement extends Element {
@override
Map<String, Object?> get sqlValue => {
...super.sqlValue,
'orderNum': orderNum,
};
}
class ReadingElement extends Element {
int orderNum;
bool readingDoesNotMatchKanji;
List<String> info;
List<String> restrictions;
ReadingElement({
required this.orderNum,
required this.readingDoesNotMatchKanji,
this.info = const [],
this.restrictions = const [],
required super.elementId,
required super.reading,
super.news,
super.ichi,
@@ -71,7 +71,6 @@ class ReadingElement extends Element {
@override
Map<String, Object?> get sqlValue => {
...super.sqlValue,
'orderNum': orderNum,
'readingDoesNotMatchKanji': readingDoesNotMatchKanji,
};
}

View File

@@ -84,41 +84,38 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
print(' [JMdict] Batch 1 - Kanji and readings');
Batch b = db.batch();
int elementId = 0;
for (final e in entries) {
b.insert(JMdictTableNames.entry, e.sqlValue);
for (final k in e.kanji) {
elementId++;
b.insert(
JMdictTableNames.kanjiElement,
k.sqlValue..addAll({'entryId': e.entryId, 'elementId': elementId}),
k.sqlValue..addAll({'entryId': e.entryId}),
);
for (final i in k.info) {
b.insert(JMdictTableNames.kanjiInfo, {
'elementId': elementId,
'elementId': k.elementId,
'info': i,
});
}
}
for (final r in e.readings) {
elementId++;
b.insert(
JMdictTableNames.readingElement,
r.sqlValue..addAll({'entryId': e.entryId, 'elementId': elementId}),
r.sqlValue..addAll({'entryId': e.entryId}),
);
for (final i in r.info) {
b.insert(JMdictTableNames.readingInfo, {
'elementId': elementId,
'elementId': r.elementId,
'info': i,
});
}
for (final res in r.restrictions) {
b.insert(JMdictTableNames.readingRestriction, {
'elementId': elementId,
'elementId': r.elementId,
'restriction': res,
});
}

View File

@@ -80,11 +80,18 @@ List<Entry> parseJMDictData(XmlElement root) {
final List<ReadingElement> readingEls = [];
final List<Sense> senses = [];
for (final (kanjiNum, kEle) in entry.findElements('k_ele').indexed) {
for (final (orderNum, kEle) in entry.findElements('k_ele').indexed) {
assert(
orderNum < 100,
'Entry $entryId has more than 100 kanji elements, which will break the elementId generation logic.',
);
final elementId = entryId * 100 + orderNum;
final kePri = getPriorityValues(kEle, 'ke');
kanjiEls.add(
KanjiElement(
orderNum: kanjiNum + 1,
elementId: elementId,
info: kEle
.findElements('ke_inf')
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
@@ -100,13 +107,20 @@ List<Entry> parseJMDictData(XmlElement root) {
}
for (final (orderNum, rEle) in entry.findElements('r_ele').indexed) {
assert(
orderNum < 100,
'Entry $entryId has more than 100 readings, which will break the elementId generation logic.',
);
final elementId = entryId * 100 + orderNum;
final rePri = getPriorityValues(rEle, 're');
final readingDoesNotMatchKanji = rEle
.findElements('re_nokanji')
.isNotEmpty;
readingEls.add(
ReadingElement(
orderNum: orderNum + 1,
elementId: elementId,
readingDoesNotMatchKanji: readingDoesNotMatchKanji,
info: rEle
.findElements('re_inf')

View File

@@ -69,7 +69,7 @@ Future<List<Map<String, Object?>>> _readingelementsQuery(
JMdictTableNames.readingElement,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
orderBy: 'orderNum',
orderBy: 'elementId',
);
Future<List<Map<String, Object?>>> _kanjielementsQuery(
@@ -79,7 +79,7 @@ Future<List<Map<String, Object?>>> _kanjielementsQuery(
JMdictTableNames.kanjiElement,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
orderBy: 'orderNum',
orderBy: 'elementId',
);
Future<List<Map<String, Object?>>> _jlpttagsQuery(

View File

@@ -53,19 +53,17 @@ CREATE TABLE "JMdict_Entry" (
CREATE TABLE "JMdict_KanjiElement" (
"elementId" INTEGER PRIMARY KEY,
"orderNum" INTEGER GENERATED ALWAYS AS ("elementId" % 100) VIRTUAL,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
"orderNum" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
UNIQUE("entryId", "reading"),
UNIQUE("entryId", "orderNum")
UNIQUE("entryId", "reading")
) WITHOUT ROWID;
CREATE INDEX "JMdict_KanjiElement_byEntryId_byOrderNum" ON "JMdict_KanjiElement"("entryId", "orderNum");
CREATE INDEX "JMdict_KanjiElement_byReading" ON "JMdict_KanjiElement"("reading");
CREATE TABLE "JMdict_KanjiElementInfo" (
@@ -78,8 +76,8 @@ CREATE TABLE "JMdict_KanjiElementInfo" (
CREATE TABLE "JMdict_ReadingElement" (
"elementId" INTEGER PRIMARY KEY,
"orderNum" INTEGER GENERATED ALWAYS AS ("elementId" % 100) VIRTUAL,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
"orderNum" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE,
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
@@ -87,11 +85,9 @@ CREATE TABLE "JMdict_ReadingElement" (
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
UNIQUE("entryId", "reading"),
UNIQUE("entryId", "orderNum")
UNIQUE("entryId", "reading")
) WITHOUT ROWID;
CREATE INDEX "JMdict_ReadingElement_byEntryId_byOrderNum" ON "JMdict_ReadingElement"("entryId", "orderNum");
CREATE INDEX "JMdict_ReadingElement_byReading" ON "JMdict_ReadingElement"("reading");
CREATE TABLE "JMdict_ReadingElementRestriction" (

View File

@@ -44,7 +44,7 @@ SELECT
+ (("spec" IS 2) * 5)
+ (("gai" IS 1) * 10)
+ (("gai" IS 2) * 5)
+ (("orderNum" IS 1) * 20)
+ (("orderNum" IS 0) * 20)
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
AS "score"
FROM "JMdict_ReadingElement"
@@ -76,7 +76,7 @@ SELECT
+ (("spec" IS 2) * 5)
+ (("gai" IS 1) * 10)
+ (("gai" IS 2) * 5)
+ (("orderNum" IS 1) * 20)
+ (("orderNum" IS 0) * 20)
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
AS "score"
FROM "JMdict_KanjiElement"
@@ -108,7 +108,7 @@ BEGIN
END;
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_ReadingElement"
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum"
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "elementId"
ON "JMdict_ReadingElement"
BEGIN
UPDATE "JMdict_EntryScore"
@@ -145,7 +145,7 @@ BEGIN
END;
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_KanjiElement"
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum"
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "elementId"
ON "JMdict_KanjiElement"
BEGIN
UPDATE "JMdict_EntryScore"

View File

@@ -32,9 +32,9 @@ SELECT
THEN "JMdict_ReadingElement"."reading"
ELSE NULL
END AS "furigana",
COALESCE("JMdict_KanjiElement"."orderNum", 1)
COALESCE("JMdict_KanjiElement"."orderNum", 0)
+ "JMdict_ReadingElement"."orderNum"
= 2
= 0
AS "isFirst",
"JMdict_KanjiElement"."orderNum" AS "kanjiOrderNum",
"JMdict_ReadingElement"."orderNum" AS "readingOrderNum"