From 505380d2cc79272a1b7c5460b1b7c9b566a05f92 Mon Sep 17 00:00:00 2001 From: h7x4 Date: Tue, 2 Jun 2026 03:19:52 +0900 Subject: [PATCH] lib/_data_ingestion: add xref exception list for jmdict --- lib/_data_ingestion/jmdict/objects.dart | 21 +++++++++++++----- lib/_data_ingestion/jmdict/seed_data.dart | 27 +++++++++++++++++++++++ 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/lib/_data_ingestion/jmdict/objects.dart b/lib/_data_ingestion/jmdict/objects.dart index bf71c43..fdad3e4 100644 --- a/lib/_data_ingestion/jmdict/objects.dart +++ b/lib/_data_ingestion/jmdict/objects.dart @@ -45,9 +45,7 @@ class KanjiElement extends Element { }); @override - Map get sqlValue => { - ...super.sqlValue, - }; + Map get sqlValue => {...super.sqlValue}; } class ReadingElement extends Element { @@ -129,6 +127,19 @@ class XRefParts { 'readingRef': readingRef, 'senseOrderNum': senseOrderNum, }; + + @override + bool operator ==(Object other) { + if (identical(this, other)) return true; + + return other is XRefParts && + other.kanjiRef == kanjiRef && + other.readingRef == readingRef && + other.senseOrderNum == senseOrderNum; + } + + @override + int get hashCode => Object.hash(kanjiRef, readingRef, senseOrderNum); } class XRef { @@ -168,9 +179,7 @@ class Sense extends SQLWritable { }); @override - Map get sqlValue => { - 'senseId': senseId, - }; + Map get sqlValue => {'senseId': senseId}; bool get isEmpty => antonyms.isEmpty && diff --git a/lib/_data_ingestion/jmdict/seed_data.dart b/lib/_data_ingestion/jmdict/seed_data.dart index ef07fd5..b939d16 100644 --- a/lib/_data_ingestion/jmdict/seed_data.dart +++ b/lib/_data_ingestion/jmdict/seed_data.dart @@ -15,6 +15,12 @@ class ResolvedXref { const ResolvedXref(this.entry, this.ambiguous); } +// A constant map of xref parts to jmdict id for unresolvable xrefs. +final xrefExceptions = { + // NOTE: see https://www.edrdg.org/jmwsgi/entr.py?svc=jmdict&g=2870981.1~2369718 for details + XRefParts(kanjiRef: 'プレストレスト', readingRef: 'コンクリート'): 2472380, +}; + /// Resolves an xref (pair of kanji, optionally reading, and optionally sense number) to an a specific /// JMdict entry, if possible. /// @@ -28,6 +34,27 @@ ResolvedXref resolveXref( XRefParts xref, ) { late List candidateEntries; + + if (xrefExceptions.containsKey(xref)) { + final exceptionEntryId = xrefExceptions[xref]!; + // NOTE: this is slow, but we have few exceptions. Let's wait for JMdict XML-NG to be released so we can delete this :) + final exceptionEntry = + entriesByKanji.values + .expand((set) => set) + .firstWhereOrNull((entry) => entry.entryId == exceptionEntryId) ?? + entriesByReading.values + .expand((set) => set) + .firstWhereOrNull((entry) => entry.entryId == exceptionEntryId); + + if (exceptionEntry != null) { + return ResolvedXref(exceptionEntry, false); + } else { + throw Exception( + 'Xref $xref matches an exception entry ID $exceptionEntryId, but that entry was not found among the candidates.', + ); + } + } + switch ((xref.kanjiRef, xref.readingRef)) { case (null, null): throw Exception('Xref $xref has no kanji or reading reference');