From 31c3fb807e614ef5e0feaa10476203c7bad6ecdb Mon Sep 17 00:00:00 2001 From: h7x4 Date: Mon, 19 May 2025 16:40:34 +0200 Subject: [PATCH] treewide: redo handling of kanjidic radicals --- lib/_data_ingestion/kanjidic/objects.dart | 13 +++----- lib/_data_ingestion/kanjidic/seed_data.dart | 16 +++++++-- lib/_data_ingestion/kanjidic/xml_parser.dart | 9 +++-- .../kanji_search/kanji_search_radical.dart | 25 ++++++++++---- .../kanji_search/kanji_search_result.dart | 14 ++++---- lib/search/kanji_search.dart | 33 ++++++++++++++++--- migrations/0006_KANJIDIC2.sql | 27 +++++++++------ ...0007_XREF__KANJIDIC_Radical___RADKFILE.sql | 10 +++--- 8 files changed, 97 insertions(+), 50 deletions(-) diff --git a/lib/_data_ingestion/kanjidic/objects.dart b/lib/_data_ingestion/kanjidic/objects.dart index c759377..f205ab0 100644 --- a/lib/_data_ingestion/kanjidic/objects.dart +++ b/lib/_data_ingestion/kanjidic/objects.dart @@ -21,20 +21,17 @@ class CodePoint extends SQLWritable { class Radical extends SQLWritable { final String kanji; - final String type; - final String radical; + final int radicalId; const Radical({ required this.kanji, - required this.type, - required this.radical, + required this.radicalId, }); @override Map get sqlValue => { 'kanji': kanji, - 'type': type, - 'radical': radical, + 'radicalId': radicalId, }; } @@ -224,7 +221,7 @@ class Character extends SQLWritable { final List radicalName; final List codepoints; - final List radicals; + final Radical? radical; final List strokeMiscounts; final List variants; final List dictionaryReferences; @@ -244,7 +241,7 @@ class Character extends SQLWritable { this.jlpt, this.radicalName = const [], this.codepoints = const [], - this.radicals = const [], + required this.radical, this.strokeMiscounts = const [], this.variants = const [], this.dictionaryReferences = const [], diff --git a/lib/_data_ingestion/kanjidic/seed_data.dart b/lib/_data_ingestion/kanjidic/seed_data.dart index f953d4d..b5339d3 100644 --- a/lib/_data_ingestion/kanjidic/seed_data.dart +++ b/lib/_data_ingestion/kanjidic/seed_data.dart @@ -14,14 +14,24 @@ Future seedKANJIDICData(List characters, Database db) async { // print(c.dictionaryReferences.map((e) => e.sqlValue).toList()); // } b.insert(KANJIDICTableNames.character, c.sqlValue); + for (final n in c.radicalName) { - b.insert(KANJIDICTableNames.radicalName, {'kanji': c.literal, 'name': n}); + assert(c.radical != null, 'Radical name without radical'); + b.insert( + KANJIDICTableNames.radicalName, + { + 'radicalId': c.radical!.radicalId, + 'name': n, + }, + conflictAlgorithm: ConflictAlgorithm.ignore, + ); } + for (final cp in c.codepoints) { b.insert(KANJIDICTableNames.codepoint, cp.sqlValue); } - for (final r in c.radicals) { - b.insert(KANJIDICTableNames.radical, r.sqlValue); + if (c.radical != null) { + b.insert(KANJIDICTableNames.radical, c.radical!.sqlValue); } for (final sm in c.strokeMiscounts) { b.insert( diff --git a/lib/_data_ingestion/kanjidic/xml_parser.dart b/lib/_data_ingestion/kanjidic/xml_parser.dart index 74a5512..9f6313e 100644 --- a/lib/_data_ingestion/kanjidic/xml_parser.dart +++ b/lib/_data_ingestion/kanjidic/xml_parser.dart @@ -42,17 +42,16 @@ List parseKANJIDICData(XmlElement root) { ) .toList() ?? [], - radicals: radical + radical: radical ?.findElements('rad_value') + .where((e) => e.getAttribute('rad_type') == 'classical') .map( (e) => Radical( kanji: kanji, - type: e.getAttribute('rad_type')!, - radical: e.innerText, + radicalId: int.parse(e.innerText), ), ) - .toList() ?? - [], + .firstOrNull, strokeMiscounts: misc .findElements('stroke_count') .skip(1) diff --git a/lib/models/kanji_search/kanji_search_radical.dart b/lib/models/kanji_search/kanji_search_radical.dart index 56f8a7e..d324667 100644 --- a/lib/models/kanji_search/kanji_search_radical.dart +++ b/lib/models/kanji_search/kanji_search_radical.dart @@ -4,37 +4,48 @@ class KanjiSearchRadical extends Equatable { /// The radical symbol. final String symbol; + /// The names of this radical. + /// + /// Each name might refer to a specific form of the radical. + final List names; + /// The radical forms used in this kanji. + /// + /// (e.g. "亻" for "人", "氵" for "水") final List forms; - /// The meaning of the radical. - final String meaning; + /// The meanings of the radical. + final List meanings; // ignore: public_member_api_docs const KanjiSearchRadical({ required this.symbol, - this.forms = const [], - required this.meaning, + required this.names, + required this.forms, + required this.meanings, }); @override List get props => [ symbol, + this.names, forms, - meaning, + meanings, ]; Map toJson() => { 'symbol': symbol, + 'names': names, 'forms': forms, - 'meaning': meaning, + 'meanings': meanings, }; factory KanjiSearchRadical.fromJson(Map json) { return KanjiSearchRadical( symbol: json['symbol'] as String, + names: (json['names'] as List).map((e) => e as String).toList(), forms: (json['forms'] as List).map((e) => e as String).toList(), - meaning: json['meaning'] as String, + meanings: (json['meanings'] as List).map((e) => e as String).toList(), ); } } diff --git a/lib/models/kanji_search/kanji_search_result.dart b/lib/models/kanji_search/kanji_search_result.dart index 1094476..3bf582f 100644 --- a/lib/models/kanji_search/kanji_search_result.dart +++ b/lib/models/kanji_search/kanji_search_result.dart @@ -49,17 +49,17 @@ class KanjiSearchResult extends Equatable { const KanjiSearchResult({ required this.kanji, - this.taughtIn, - this.jlptLevel, - this.newspaperFrequencyRank, + required this.taughtIn, + required this.jlptLevel, + required this.newspaperFrequencyRank, required this.strokeCount, required this.meanings, - this.kunyomi = const [], - this.onyomi = const [], + required this.kunyomi, + required this.onyomi, // this.kunyomiExamples = const [], // this.onyomiExamples = const [], - this.radical, - this.parts = const [], + required this.radical, + required this.parts, required this.codepoints, }); diff --git a/lib/search/kanji_search.dart b/lib/search/kanji_search.dart index b2b0bd1..d5aa81f 100644 --- a/lib/search/kanji_search.dart +++ b/lib/search/kanji_search.dart @@ -1,3 +1,4 @@ +import 'package:jadb/models/kanji_search/kanji_search_radical.dart'; import 'package:jadb/models/kanji_search/kanji_search_result.dart'; import 'package:sqflite_common/sqflite.dart'; @@ -62,10 +63,21 @@ Future searchKanjiWithDbConnection( ); late final List> radicals; - final radicals_query = connection.query( - "KANJIDIC_Radical", - where: "KANJIDIC_Radical.kanji = ?", - whereArgs: [kanji], + final radicals_query = connection.rawQuery( + ''' + SELECT DISTINCT + "XREF__KANJIDIC_Radical__RADKFILE"."radicalSymbol" AS "symbol", + "names" + FROM "KANJIDIC_Radical" + JOIN "XREF__KANJIDIC_Radical__RADKFILE" USING ("radicalId") + LEFT JOIN ( + SELECT "radicalId", group_concat("name") AS "names" + FROM "KANJIDIC_RadicalName" + GROUP BY "radicalId" + ) USING ("radicalId") + WHERE "KANJIDIC_Radical"."kanji" = ? + ''', + [kanji], ); late final List> parts; @@ -124,6 +136,18 @@ Future searchKanjiWithDbConnection( final entry = characters.first; + assert(radicals.length <= 1, 'There should be at most one radical per kanji'); + final radical = radicals.isNotEmpty + ? KanjiSearchRadical( + symbol: radicals.first['symbol'] as String, + names: (radicals.first['names'] as String).split(','), + // TODO: add radical form data + forms: [], + // TODO: add radical meaning data + meanings: [], + ) + : null; + final String? grade = { 1: 'grade 1', 2: 'grade 2', @@ -155,6 +179,7 @@ Future searchKanjiWithDbConnection( kunyomi: kunyomis.map((item) => item['yomi'] as String).toList(), parts: parts.map((item) => item['radical'] as String).toList(), onyomi: onyomis.map((item) => item['yomi'] as String).toList(), + radical: radical, codepoints: { for (final codepoint in codepoints) codepoint['type'] as String: codepoint['codepoint'] as String, diff --git a/migrations/0006_KANJIDIC2.sql b/migrations/0006_KANJIDIC2.sql index 78884ca..16dcf61 100644 --- a/migrations/0006_KANJIDIC2.sql +++ b/migrations/0006_KANJIDIC2.sql @@ -6,12 +6,6 @@ CREATE TABLE "KANJIDIC_Character" ( "jlpt" INTEGER ) WITHOUT ROWID; -CREATE TABLE "KANJIDIC_RadicalName" ( - "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), - "name" TEXT NOT NULL, - PRIMARY KEY("kanji", "name") -) WITHOUT ROWID; - CREATE TABLE "KANJIDIC_Codepoint" ( "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), "type" VARCHAR(6) NOT NULL CHECK ("type" IN ('jis208', 'jis212', 'jis213', 'ucs')), @@ -22,12 +16,25 @@ CREATE TABLE "KANJIDIC_Codepoint" ( CREATE INDEX "KANJIDIC_Codepoint_byCharacter" ON "KANJIDIC_Codepoint"("kanji"); CREATE TABLE "KANJIDIC_Radical" ( - "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), - "type" VARCHAR(9) NOT NULL CHECK ("type" IN ('classical', 'nelson_c')), - "radical" INTEGER NOT NULL CHECK ("radical" BETWEEN 1 AND IIF("type" = 'classical', 214, 213)), - PRIMARY KEY("kanji", "type") + "kanji" CHAR(1) NOT NULL PRIMARY KEY REFERENCES "KANJIDIC_Character"("literal"), + "radicalId" INTEGER NOT NULL CHECK ("radicalId" BETWEEN 1 AND 214) ) WITHOUT ROWID; +CREATE INDEX "KANJIDIC_Radical_byRadicalId" ON "KANJIDIC_Radical"("radicalId"); + +CREATE TABLE "KANJIDIC_RadicalNelsonCId" ( + "radicalId" INTEGER NOT NULL PRIMARY KEY CHECK ("radicalId" BETWEEN 1 AND 214), + "nelsonId" INTEGER UNIQUE NOT NULL CHECK ("nelsonId" BETWEEN 1 AND 213) +); + +CREATE TABLE "KANJIDIC_RadicalName" ( + "radicalId" INTEGER NOT NULL CHECK ("radicalId" BETWEEN 1 AND 214), + "name" TEXT NOT NULL, + PRIMARY KEY("radicalId", "name") +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_RadicalName_byRadicalId" ON "KANJIDIC_RadicalName"("radicalId"); + CREATE TABLE "KANJIDIC_StrokeMiscount" ( "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), "strokeCount" INTEGER NOT NULL, diff --git a/migrations/0007_XREF__KANJIDIC_Radical___RADKFILE.sql b/migrations/0007_XREF__KANJIDIC_Radical___RADKFILE.sql index a7fb3d4..cbf3950 100644 --- a/migrations/0007_XREF__KANJIDIC_Radical___RADKFILE.sql +++ b/migrations/0007_XREF__KANJIDIC_Radical___RADKFILE.sql @@ -1,13 +1,11 @@ CREATE TABLE "XREF__KANJIDIC_Radical__RADKFILE"( - "radicalId" INTEGER NOT NULL, - "radicalSymbol" CHAR(1) NOT NULL REFERENCES "RADKFILE"("radical"), - "radicalType" VARCHAR(9) NOT NULL CHECK ("radicalType" IN ('classical', 'nelson_c')) DEFAULT 'classical', - PRIMARY KEY ("radicalId", "radicalSymbol", "radicalType"), - FOREIGN KEY ("radicalId", "radicalType") REFERENCES "KANJIDIC_Radical"("radical", "type") + "radicalId" INTEGER NOT NULL CHECK ("radicalId" BETWEEN 1 AND 214), + "radicalSymbol" CHAR(1) UNIQUE NOT NULL REFERENCES "RADKFILE"("radical"), + PRIMARY KEY ("radicalId", "radicalSymbol") ) WITHOUT ROWID; CREATE INDEX "XREF__KANJIDIC_Radical__RADKFILE__byRadicalId" ON "XREF__KANJIDIC_Radical__RADKFILE"("radicalId"); -CREATE INDEX "XREF__KANJIDIC_Radical__RADKFILE__byRadicalSymbol_byRadicalType" ON "XREF__KANJIDIC_Radical__RADKFILE"("radicalSymbol", "radicalType"); +CREATE INDEX "XREF__KANJIDIC_Radical__RADKFILE__byRadicalSymbol" ON "XREF__KANJIDIC_Radical__RADKFILE"("radicalSymbol"); /* Source: https://ctext.org/kangxi-zidian */ INSERT INTO "XREF__KANJIDIC_Radical__RADKFILE"("radicalId", "radicalSymbol") VALUES