jmdict: embed orderNum in elementId for kanji and readings
This commit is contained in:
@@ -1,13 +1,15 @@
|
||||
import 'package:jadb/_data_ingestion/sql_writable.dart';
|
||||
|
||||
abstract class Element extends SQLWritable {
|
||||
final int elementId;
|
||||
final String reading;
|
||||
final int? news;
|
||||
final int? ichi;
|
||||
final int? spec;
|
||||
final int? gai;
|
||||
final int? nf;
|
||||
const Element({
|
||||
Element({
|
||||
required this.elementId,
|
||||
required this.reading,
|
||||
this.news,
|
||||
this.ichi,
|
||||
@@ -18,6 +20,7 @@ abstract class Element extends SQLWritable {
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'elementId': elementId,
|
||||
'reading': reading,
|
||||
'news': news,
|
||||
'ichi': ichi,
|
||||
@@ -28,12 +31,11 @@ abstract class Element extends SQLWritable {
|
||||
}
|
||||
|
||||
class KanjiElement extends Element {
|
||||
int orderNum;
|
||||
List<String> info;
|
||||
|
||||
KanjiElement({
|
||||
this.info = const [],
|
||||
required this.orderNum,
|
||||
required super.elementId,
|
||||
required super.reading,
|
||||
super.news,
|
||||
super.ichi,
|
||||
@@ -45,21 +47,19 @@ class KanjiElement extends Element {
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
...super.sqlValue,
|
||||
'orderNum': orderNum,
|
||||
};
|
||||
}
|
||||
|
||||
class ReadingElement extends Element {
|
||||
int orderNum;
|
||||
bool readingDoesNotMatchKanji;
|
||||
List<String> info;
|
||||
List<String> restrictions;
|
||||
|
||||
ReadingElement({
|
||||
required this.orderNum,
|
||||
required this.readingDoesNotMatchKanji,
|
||||
this.info = const [],
|
||||
this.restrictions = const [],
|
||||
required super.elementId,
|
||||
required super.reading,
|
||||
super.news,
|
||||
super.ichi,
|
||||
@@ -71,7 +71,6 @@ class ReadingElement extends Element {
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
...super.sqlValue,
|
||||
'orderNum': orderNum,
|
||||
'readingDoesNotMatchKanji': readingDoesNotMatchKanji,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -84,41 +84,38 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
|
||||
print(' [JMdict] Batch 1 - Kanji and readings');
|
||||
Batch b = db.batch();
|
||||
|
||||
int elementId = 0;
|
||||
for (final e in entries) {
|
||||
b.insert(JMdictTableNames.entry, e.sqlValue);
|
||||
|
||||
for (final k in e.kanji) {
|
||||
elementId++;
|
||||
b.insert(
|
||||
JMdictTableNames.kanjiElement,
|
||||
k.sqlValue..addAll({'entryId': e.entryId, 'elementId': elementId}),
|
||||
k.sqlValue..addAll({'entryId': e.entryId}),
|
||||
);
|
||||
|
||||
for (final i in k.info) {
|
||||
b.insert(JMdictTableNames.kanjiInfo, {
|
||||
'elementId': elementId,
|
||||
'elementId': k.elementId,
|
||||
'info': i,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
for (final r in e.readings) {
|
||||
elementId++;
|
||||
b.insert(
|
||||
JMdictTableNames.readingElement,
|
||||
r.sqlValue..addAll({'entryId': e.entryId, 'elementId': elementId}),
|
||||
r.sqlValue..addAll({'entryId': e.entryId}),
|
||||
);
|
||||
|
||||
for (final i in r.info) {
|
||||
b.insert(JMdictTableNames.readingInfo, {
|
||||
'elementId': elementId,
|
||||
'elementId': r.elementId,
|
||||
'info': i,
|
||||
});
|
||||
}
|
||||
for (final res in r.restrictions) {
|
||||
b.insert(JMdictTableNames.readingRestriction, {
|
||||
'elementId': elementId,
|
||||
'elementId': r.elementId,
|
||||
'restriction': res,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -80,11 +80,18 @@ List<Entry> parseJMDictData(XmlElement root) {
|
||||
final List<ReadingElement> readingEls = [];
|
||||
final List<Sense> senses = [];
|
||||
|
||||
for (final (kanjiNum, kEle) in entry.findElements('k_ele').indexed) {
|
||||
for (final (orderNum, kEle) in entry.findElements('k_ele').indexed) {
|
||||
assert(
|
||||
orderNum < 100,
|
||||
'Entry $entryId has more than 100 kanji elements, which will break the elementId generation logic.',
|
||||
);
|
||||
final elementId = entryId * 100 + orderNum;
|
||||
|
||||
final kePri = getPriorityValues(kEle, 'ke');
|
||||
|
||||
kanjiEls.add(
|
||||
KanjiElement(
|
||||
orderNum: kanjiNum + 1,
|
||||
elementId: elementId,
|
||||
info: kEle
|
||||
.findElements('ke_inf')
|
||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||
@@ -100,13 +107,20 @@ List<Entry> parseJMDictData(XmlElement root) {
|
||||
}
|
||||
|
||||
for (final (orderNum, rEle) in entry.findElements('r_ele').indexed) {
|
||||
assert(
|
||||
orderNum < 100,
|
||||
'Entry $entryId has more than 100 readings, which will break the elementId generation logic.',
|
||||
);
|
||||
final elementId = entryId * 100 + orderNum;
|
||||
|
||||
final rePri = getPriorityValues(rEle, 're');
|
||||
final readingDoesNotMatchKanji = rEle
|
||||
.findElements('re_nokanji')
|
||||
.isNotEmpty;
|
||||
|
||||
readingEls.add(
|
||||
ReadingElement(
|
||||
orderNum: orderNum + 1,
|
||||
elementId: elementId,
|
||||
readingDoesNotMatchKanji: readingDoesNotMatchKanji,
|
||||
info: rEle
|
||||
.findElements('re_inf')
|
||||
|
||||
@@ -69,7 +69,7 @@ Future<List<Map<String, Object?>>> _readingelementsQuery(
|
||||
JMdictTableNames.readingElement,
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
orderBy: 'orderNum',
|
||||
orderBy: 'elementId',
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _kanjielementsQuery(
|
||||
@@ -79,7 +79,7 @@ Future<List<Map<String, Object?>>> _kanjielementsQuery(
|
||||
JMdictTableNames.kanjiElement,
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
orderBy: 'orderNum',
|
||||
orderBy: 'elementId',
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _jlpttagsQuery(
|
||||
|
||||
@@ -53,19 +53,17 @@ CREATE TABLE "JMdict_Entry" (
|
||||
|
||||
CREATE TABLE "JMdict_KanjiElement" (
|
||||
"elementId" INTEGER PRIMARY KEY,
|
||||
"orderNum" INTEGER GENERATED ALWAYS AS ("elementId" % 100) VIRTUAL,
|
||||
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
|
||||
"orderNum" INTEGER NOT NULL,
|
||||
"reading" TEXT NOT NULL,
|
||||
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
|
||||
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
|
||||
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
|
||||
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
|
||||
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
|
||||
UNIQUE("entryId", "reading"),
|
||||
UNIQUE("entryId", "orderNum")
|
||||
UNIQUE("entryId", "reading")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE INDEX "JMdict_KanjiElement_byEntryId_byOrderNum" ON "JMdict_KanjiElement"("entryId", "orderNum");
|
||||
CREATE INDEX "JMdict_KanjiElement_byReading" ON "JMdict_KanjiElement"("reading");
|
||||
|
||||
CREATE TABLE "JMdict_KanjiElementInfo" (
|
||||
@@ -78,8 +76,8 @@ CREATE TABLE "JMdict_KanjiElementInfo" (
|
||||
|
||||
CREATE TABLE "JMdict_ReadingElement" (
|
||||
"elementId" INTEGER PRIMARY KEY,
|
||||
"orderNum" INTEGER GENERATED ALWAYS AS ("elementId" % 100) VIRTUAL,
|
||||
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
|
||||
"orderNum" INTEGER NOT NULL,
|
||||
"reading" TEXT NOT NULL,
|
||||
"readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
|
||||
@@ -87,11 +85,9 @@ CREATE TABLE "JMdict_ReadingElement" (
|
||||
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
|
||||
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
|
||||
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
|
||||
UNIQUE("entryId", "reading"),
|
||||
UNIQUE("entryId", "orderNum")
|
||||
UNIQUE("entryId", "reading")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE INDEX "JMdict_ReadingElement_byEntryId_byOrderNum" ON "JMdict_ReadingElement"("entryId", "orderNum");
|
||||
CREATE INDEX "JMdict_ReadingElement_byReading" ON "JMdict_ReadingElement"("reading");
|
||||
|
||||
CREATE TABLE "JMdict_ReadingElementRestriction" (
|
||||
|
||||
@@ -44,7 +44,7 @@ SELECT
|
||||
+ (("spec" IS 2) * 5)
|
||||
+ (("gai" IS 1) * 10)
|
||||
+ (("gai" IS 2) * 5)
|
||||
+ (("orderNum" IS 1) * 20)
|
||||
+ (("orderNum" IS 0) * 20)
|
||||
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
|
||||
AS "score"
|
||||
FROM "JMdict_ReadingElement"
|
||||
@@ -76,7 +76,7 @@ SELECT
|
||||
+ (("spec" IS 2) * 5)
|
||||
+ (("gai" IS 1) * 10)
|
||||
+ (("gai" IS 2) * 5)
|
||||
+ (("orderNum" IS 1) * 20)
|
||||
+ (("orderNum" IS 0) * 20)
|
||||
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
|
||||
AS "score"
|
||||
FROM "JMdict_KanjiElement"
|
||||
@@ -108,7 +108,7 @@ BEGIN
|
||||
END;
|
||||
|
||||
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_ReadingElement"
|
||||
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum"
|
||||
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "elementId"
|
||||
ON "JMdict_ReadingElement"
|
||||
BEGIN
|
||||
UPDATE "JMdict_EntryScore"
|
||||
@@ -145,7 +145,7 @@ BEGIN
|
||||
END;
|
||||
|
||||
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_KanjiElement"
|
||||
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum"
|
||||
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "elementId"
|
||||
ON "JMdict_KanjiElement"
|
||||
BEGIN
|
||||
UPDATE "JMdict_EntryScore"
|
||||
|
||||
@@ -32,9 +32,9 @@ SELECT
|
||||
THEN "JMdict_ReadingElement"."reading"
|
||||
ELSE NULL
|
||||
END AS "furigana",
|
||||
COALESCE("JMdict_KanjiElement"."orderNum", 1)
|
||||
COALESCE("JMdict_KanjiElement"."orderNum", 0)
|
||||
+ "JMdict_ReadingElement"."orderNum"
|
||||
= 2
|
||||
= 0
|
||||
AS "isFirst",
|
||||
"JMdict_KanjiElement"."orderNum" AS "kanjiOrderNum",
|
||||
"JMdict_ReadingElement"."orderNum" AS "readingOrderNum"
|
||||
|
||||
Reference in New Issue
Block a user