This commit is contained in:
Oystein Kristoffer Tveit 2025-04-25 22:47:06 +02:00
parent 1c2f90a617
commit c89c9a1b98
Signed by: oysteikt
GPG Key ID: 9F2F7D8250F35146
23 changed files with 1080 additions and 149 deletions

16
flake.lock generated

@ -3,19 +3,19 @@
"jmdict-src": {
"flake": false,
"locked": {
"narHash": "sha256-+hXdq4aUDuxpCoLJ4zYuKg6zQLHas+/M6+sEDA84RgQ=",
"narHash": "sha256-J7GywcE/5LD6YFhdIXxQ2kvAamYkMpannRQyj5yU/nA=",
"type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz"
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz"
},
"original": {
"type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz"
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz"
}
},
"jmdict-with-examples-src": {
"flake": false,
"locked": {
"narHash": "sha256-H3a5XoSJUvzTReP5g3jL7JGTIEsGyMrLopV4Yfxewig=",
"narHash": "sha256-cQtnDLqSHP1be6rkK2ceHL3HmX4YnfvUQi/af3uBQDc=",
"type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
},
@ -27,7 +27,7 @@
"kanjidic2-src": {
"flake": false,
"locked": {
"narHash": "sha256-sQS2knH7D/qqrn1BchAvFwjajp9HXv/5r4jx1OEuJWs=",
"narHash": "sha256-ml7ZfAXmjDD1sVKhHMS66ytFItIjXRIppqSaYu8IAws=",
"type": "file",
"url": "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz"
},
@ -38,11 +38,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1745391562,
"narHash": "sha256-sPwcCYuiEopaafePqlG826tBhctuJsLx/mhKKM5Fmjo=",
"lastModified": 1745526057,
"narHash": "sha256-ITSpPDwvLBZBnPRS2bUcHY3gZSwis/uTe255QgMtTLA=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "8a2f738d9d1f1d986b5a4cd2fd2061a7127237d7",
"rev": "f771eb401a46846c1aebd20552521b233dd7e18b",
"type": "github"
},
"original": {

@ -5,7 +5,8 @@
nixpkgs.url = "nixpkgs/nixos-unstable";
jmdict-src = {
url = "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz";
# url = "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz";
url = "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz";
flake = false;
};

@ -15,9 +15,9 @@ abstract class TableNames {
static const String senseField = 'JMdict_SenseField';
static const String senseGlossary = 'JMdict_SenseGlossary';
static const String senseInfo = 'JMdict_SenseInfo';
static const String senseLanguageSource = 'JMdict_SenseLanguageSource';
static const String senseMisc = 'JMdict_SenseMisc';
static const String sensePOS = 'JMdict_SensePOS';
static const String senseLanguageSource = 'JMdict_SenseLanguageSource';
static const String senseRestrictedToKanji = 'JMdict_SenseRestrictedToKanji';
static const String senseRestrictedToReading =
'JMdict_SenseRestrictedToReading';
@ -216,7 +216,22 @@ class Sense extends SQLWritable {
});
@override
Map<String, Object?> get sqlValue => {};
Map<String, Object?> get sqlValue => {
'id': id,
'orderNum': orderNum,
};
bool get isEmpty => antonyms.isEmpty &&
dialects.isEmpty &&
fields.isEmpty &&
info.isEmpty &&
languageSource.isEmpty &&
glossary.isEmpty &&
misc.isEmpty &&
pos.isEmpty &&
restrictedToKanji.isEmpty &&
restrictedToReading.isEmpty &&
seeAlso.isEmpty;
}
class Entry extends SQLWritable {

@ -196,58 +196,65 @@ List<Entry> parseXML(XmlElement root) {
for (final (orderNum, sense) in entry.findElements('sense').indexed) {
senseId++;
senses.add(
Sense(
id: senseId,
orderNum: orderNum + 1,
restrictedToKanji:
sense.findElements('stagk').map((e) => e.innerText).toList(),
restrictedToReading:
sense.findElements('stagr').map((e) => e.innerText).toList(),
pos: sense
.findElements('pos')
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
.toList(),
misc: sense
.findElements('misc')
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
.toList(),
dialects: sense
.findElements('dial')
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
.toList(),
info: sense.findElements('s_inf').map((e) => e.innerText).toList(),
languageSource: sense
.findElements('lsource')
.map(
(e) => LanguageSource(
language: e.getAttribute('xml:lang') ?? 'eng',
fullyDescribesSense: e.getAttribute('ls_type') == 'part',
constructedFromSmallerWords:
e.getAttribute('ls_wasei') == 'y',
),
)
.toList(),
glossary: sense
.findElements('gloss')
.map(
(e) => Glossary(
language: e.getAttribute('xml:lang') ?? 'eng',
phrase: e.innerText,
type: e.getAttribute('g_type'),
),
)
.toList(),
antonyms: sense
.findElements('ant')
.map((e) => parseXrefParts(e.innerText))
.toList(),
seeAlso: sense
.findElements('xref')
.map((e) => parseXrefParts(e.innerText))
.toList(),
),
final result = Sense(
id: senseId,
orderNum: orderNum + 1,
restrictedToKanji:
sense.findElements('stagk').map((e) => e.innerText).toList(),
restrictedToReading:
sense.findElements('stagr').map((e) => e.innerText).toList(),
pos: sense
.findElements('pos')
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
.toList(),
misc: sense
.findElements('misc')
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
.toList(),
dialects: sense
.findElements('dial')
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
.toList(),
info: sense.findElements('s_inf').map((e) => e.innerText).toList(),
languageSource: sense
.findElements('lsource')
.map(
(e) => LanguageSource(
language: e.getAttribute('xml:lang') ?? 'eng',
fullyDescribesSense: e.getAttribute('ls_type') == 'part',
constructedFromSmallerWords: e.getAttribute('ls_wasei') == 'y',
),
)
.toList(),
glossary: sense
.findElements('gloss')
.map(
(e) => Glossary(
language: e.getAttribute('xml:lang') ?? 'eng',
phrase: e.innerText,
type: e.getAttribute('g_type'),
),
)
.toList(),
antonyms: sense
.findElements('ant')
.map((e) => parseXrefParts(e.innerText))
.toList(),
seeAlso: sense
.findElements('xref')
.map((e) => parseXrefParts(e.innerText))
.toList(),
);
if (result.isEmpty) {
print(
'WARNING: Sense $senseId for entry $entryId is empty, '
'kanji: ${kanjiEls.map((e) => e.reading).join(', ')}, '
'reading: ${readingEls.map((e) => e.reading).join(', ')}',
);
} else {
senses.add(result);
}
}
entries.add(
@ -264,7 +271,7 @@ List<Entry> parseXML(XmlElement root) {
}
Future<void> insertIntoDB(List<Entry> entries, Database db) async {
print(' [JMdict] Batch 1');
print(' [JMdict] Batch 1 - Kanji and readings');
Batch b = db.batch();
for (final e in entries) {
b.insert(TableNames.entry, e.sqlValue);
@ -326,32 +333,26 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
for (final s in e.senses) {
for (final g in s.glossary) {
if (g.language == "eng")
b.insert(
TableNames.entryByEnglish,
{
'entryId': e.id,
'english': g.phrase,
},
// Some entries have the same reading twice with difference in katakana and hiragana
conflictAlgorithm: ConflictAlgorithm.ignore,
);
b.insert(
TableNames.entryByEnglish,
{
'entryId': e.id,
'english': g.phrase,
},
// Some entries have the same reading twice with difference in katakana and hiragana
conflictAlgorithm: ConflictAlgorithm.ignore,
);
}
}
}
await b.commit();
print(' [JMdict] Batch 2');
print(' [JMdict] Batch 2 - Senses');
b = db.batch();
for (final e in entries) {
for (final s in e.senses) {
b.insert(
TableNames.sense,
s.sqlValue
..addAll({'id': s.id, 'entryId': e.id, 'orderNum': s.orderNum}));
b.insert(TableNames.sense, s.sqlValue..addAll({'entryId': e.id}));
for (final d in s.dialects) {
b.insert(TableNames.senseDialect, {'senseId': s.id, 'dialect': d});
}
@ -392,21 +393,17 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
);
}
for (final g in s.glossary) {
if (g.language == 'eng')
b.insert(
TableNames.senseGlossary,
g.sqlValue..addAll({'senseId': s.id}),
// There are some duplicate glossary, especially in
// the other languages.
conflictAlgorithm: ConflictAlgorithm.ignore,
);
b.insert(
TableNames.senseGlossary,
g.sqlValue..addAll({'senseId': s.id}),
);
}
}
}
await b.commit();
print(' [JMdict] Building trees');
print(' [JMdict] Building xref trees');
SplayTreeMap<String, Set<Entry>> entriesByKanji = SplayTreeMap();
for (final entry in entries) {
for (final kanji in entry.kanji) {
@ -428,7 +425,7 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
}
}
print(' [JMdict] Batch 3');
print(' [JMdict] Batch 3 - Xrefs');
b = db.batch();
for (final e in entries) {

@ -1,5 +1,11 @@
/// Interface for objects which are meant to be written to a table in a SQL database.
abstract class SQLWritable {
const SQLWritable();
/// Returns a map of the object's properties and their values.
///
/// Note that there might be properties in the object which is meant to be
/// inserted into a different table. These properties will/should be excluded
/// from this map.
Map<String, Object?> get sqlValue;
}

@ -0,0 +1,43 @@
/// Dialect tags from JMdict
///
/// See https://www.edrdg.org/jmwsgi/edhelp.py#kw_dial
enum JMdictDialect {
brazilian(id: 'bra', description: 'Brazilian'),
hokkaido(id: 'hob', description: 'Hokkaido-ben'),
kansai(id: 'ksb', description: 'Kansai-ben'),
kantou(id: 'ktb', description: 'Kantou-ben'),
kyoto(id: 'kyb', description: 'Kyoto-ben'),
kyushu(id: 'kyu', description: 'Kyuushuu-ben'),
nagano(id: 'nab', description: 'Nagano-ben'),
osaka(id: 'osb', description: 'Osaka-ben'),
ryukyu(id: 'rkb', description: 'Ryuukyuu-ben'),
tokyo(id: 'std', description: 'Tokyo-ben (std)'),
tohoku(id: 'thb', description: 'Touhoku-ben'),
tosa(id: 'tsb', description: 'Tosa-ben'),
tsugaru(id: 'tsug', description: 'Tsugaru-ben');
final String id;
final String description;
const JMdictDialect({
required this.id,
required this.description,
});
static JMdictDialect fromId(String id) =>
JMdictDialect.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
Map<String, Object?> toJson() => {
'id': id,
'description': description,
};
static JMdictDialect fromJson(Map<String, Object?> json) =>
JMdictDialect.values.firstWhere(
(e) => e.id == json['id'],
orElse: () => throw Exception('Unknown id: ${json['id']}'),
);
}

@ -0,0 +1,126 @@
/// Domain / Field of expertise tags from JMdict
///
/// See https://www.edrdg.org/jmwsgi/edhelp.py#kw_fld
enum JMdictField {
agriculture(id: 'agric', description: 'agriculture'),
anatomy(id: 'anat', description: 'anatomy'),
archeology(id: 'archeol', description: 'archeology'),
architecture(id: 'archit', description: 'architecture'),
art(id: 'art', description: 'art, aesthetics'),
astronomy(id: 'astron', description: 'astronomy'),
audiovisual(id: 'audvid', description: 'audiovisual'),
aviation(id: 'aviat', description: 'aviation'),
baseball(id: 'baseb', description: 'baseball'),
biochemistry(id: 'biochem', description: 'biochemistry'),
biology(id: 'biol', description: 'biology'),
botany(id: 'bot', description: 'botany'),
boxing(id: 'boxing', description: 'boxing'),
buddhism(id: 'Buddh', description: 'Buddhism'),
business(id: 'bus', description: 'business'),
cardGames(id: 'cards', description: 'card games'),
chemistry(id: 'chem', description: 'chemistry'),
chineseMythology(id: 'chmyth', description: 'Chinese mythology'),
christianity(id: 'Christn', description: 'Christianity'),
civilEngineering(id: 'civeng', description: 'civil engineering'),
clothing(id: 'cloth', description: 'clothing'),
computing(id: 'comp', description: 'computing'),
crystallography(id: 'cryst', description: 'crystallography'),
dentistry(id: 'dent', description: 'dentistry'),
ecology(id: 'ecol', description: 'ecology'),
economics(id: 'econ', description: 'economics'),
electricEngineering(id: 'elec', description: 'electricity, elec. eng.'),
electronics(id: 'electr', description: 'electronics'),
embryology(id: 'embryo', description: 'embryology'),
engineering(id: 'engr', description: 'engineering'),
entomology(id: 'ent', description: 'entomology'),
figureSkating(id: 'figskt', description: 'figure skating'),
film(id: 'film', description: 'film'),
finance(id: 'finc', description: 'finance'),
fishing(id: 'fish', description: 'fishing'),
food(id: 'food', description: 'food, cooking'),
gardening(id: 'gardn', description: 'gardening, horticulture'),
genetics(id: 'genet', description: 'genetics'),
geography(id: 'geogr', description: 'geography'),
geology(id: 'geol', description: 'geology'),
geometry(id: 'geom', description: 'geometry'),
go(id: 'go', description: 'go (game)'),
golf(id: 'golf', description: 'golf'),
grammar(id: 'gramm', description: 'grammar'),
greekMythology(id: 'grmyth', description: 'Greek mythology'),
hanafuda(id: 'hanaf', description: 'hanafuda'),
horseRacing(id: 'horse', description: 'horse racing'),
internet(id: 'internet', description: 'Internet'),
japaneseMythology(id: 'jpmyth', description: 'Japanese mythology'),
kabuki(id: 'kabuki', description: 'kabuki'),
law(id: 'law', description: 'law'),
linguistics(id: 'ling', description: 'linguistics'),
logic(id: 'logic', description: 'logic'),
martialArts(id: 'MA', description: 'martial arts'),
mahjong(id: 'mahj', description: 'mahjong'),
manga(id: 'manga', description: 'manga'),
mathematics(id: 'math', description: 'mathematics'),
mechanicalEngineering(id: 'mech', description: 'mechanical engineering'),
medicine(id: 'med', description: 'medicine'),
meteorology(id: 'met', description: 'meteorology'),
military(id: 'mil', description: 'military'),
mineralogy(id: 'min', description: 'mineralogy'),
mining(id: 'mining', description: 'mining'),
motorsport(id: 'motor', description: 'motorsport'),
music(id: 'music', description: 'music'),
noh(id: 'noh', description: 'noh (theatre)'),
ornithology(id: 'ornith', description: 'ornithology'),
paleontology(id: 'paleo', description: 'paleontology'),
pathology(id: 'pathol', description: 'pathology'),
pharmacology(id: 'pharm', description: 'pharmacology'),
philosophy(id: 'phil', description: 'philosophy'),
photography(id: 'photo', description: 'photography'),
physics(id: 'physics', description: 'physics'),
physiology(id: 'physiol', description: 'physiology'),
politics(id: 'politics', description: 'politics'),
printing(id: 'print', description: 'printing (press)'),
professionalWrestling(id: 'prowres', description: 'professional wrestling'),
psychiatry(id: 'psy', description: 'psychiatry'),
psychoanalysis(id: 'psyanal', description: 'psychoanalysis'),
psychology(id: 'psych', description: 'psychology'),
railway(id: 'rail', description: 'railway'),
romanMythology(id: 'rommyth', description: 'Roman mythology'),
shinto(id: 'Shinto', description: 'Shinto'),
shogi(id: 'shogi', description: 'shogi (game)'),
skiing(id: 'ski', description: 'skiing'),
sports(id: 'sports', description: 'sports'),
statistics(id: 'stat', description: 'statistics'),
stockMarket(id: 'stockm', description: 'stock market'),
sumo(id: 'sumo', description: 'sumo (wrestling)'),
surgery(id: 'surg', description: 'surgery'),
telecommunications(id: 'telec', description: 'telecommunications'),
trademark(id: 'tradem', description: 'trademark'),
television(id: 'tv', description: 'television'),
veterinaryTerms(id: 'vet', description: 'veterinary terms'),
videoGames(id: 'vidg', description: 'video games'),
zoology(id: 'zool', description: 'zoology');
final String id;
final String description;
const JMdictField({
required this.id,
required this.description,
});
static JMdictField fromId(String id) =>
JMdictField.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
Map<String, Object?> toJson() => {
'id': id,
'description': description,
};
static JMdictField fromJson(Map<String, Object?> json) =>
JMdictField.values.firstWhere(
(e) => e.id == json['id'],
orElse: () => throw Exception('Unknown id: ${json['id']}'),
);
}

@ -0,0 +1,36 @@
/// Kanji info tags from JMdict
///
/// See https://www.edrdg.org/jmwsgi/edhelp.py#kw_kinf
enum JMdictKanjiInfo {
ateji(id: 'ateji', description: 'ateji (phonetic) reading'),
ik(id: 'ik', description: 'word containing irregular kana usage'),
io(id: 'io', description: 'irregular okurigana usage'),
ok(id: 'oK', description: 'word containing out-dated kanji or kanji usage'),
rk(id: 'rK', description: 'rarely-used kanji form'),
sk(id: 'sK', description: 'search-only kanji form');
final String id;
final String description;
const JMdictKanjiInfo({
required this.id,
required this.description,
});
static JMdictKanjiInfo fromId(String id) =>
JMdictKanjiInfo.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
Map<String, Object?> toJson() => {
'id': id,
'description': description,
};
static JMdictKanjiInfo fromJson(Map<String, Object?> json) =>
JMdictKanjiInfo.values.firstWhere(
(e) => e.id == json['id'],
orElse: () => throw Exception('Unknown id: ${json['id']}'),
);
}

@ -0,0 +1,98 @@
/// Miscellaneous sense tags from JMdict
///
/// See https://www.edrdg.org/jmwsgi/edhelp.py#kw_misc
enum JMdictMisc {
abbreviation(id: 'abbr', description: 'abbreviation'),
aphorism(id: 'aphorism', description: 'aphorism (pithy saying)'),
archaic(id: 'arch', description: 'archaic'),
character(id: 'char', description: 'character'),
childrensLanguage(id: 'chn', description: 'children\'s language'),
colloquial(id: 'col', description: 'colloquial'),
company(id: 'company', description: 'company name'),
creature(id: 'creat', description: 'creature'),
datedTerm(id: 'dated', description: 'dated term'),
deity(id: 'dei', description: 'deity'),
derogatory(id: 'derog', description: 'derogatory'),
document(id: 'doc', description: 'document'),
euphemistic(id: 'euph', description: 'euphemistic'),
event(id: 'ev', description: 'event'),
familiarLanguage(id: 'fam', description: 'familiar language'),
female(id: 'fem', description: 'female term, language, or name'),
fiction(id: 'fict', description: 'fiction'),
formal(id: 'form', description: 'formal or literary term'),
givenName(
id: 'given',
description: 'given name or forename, gender not specified',
),
group(id: 'group', description: 'group'),
historical(id: 'hist', description: 'historical term'),
honorific(
id: 'hon',
description: 'honorific or respectful (sonkeigo) language',
),
humble(id: 'hum', description: 'humble (kenjougo) language'),
idiomatic(id: 'id', description: 'idiomatic expression'),
jocular(id: 'joc', description: 'jocular, humorous term'),
legend(id: 'leg', description: 'legend'),
mangaSlang(id: 'm-sl', description: 'manga slang'),
male(id: 'male', description: 'male term, language, or name'),
mythology(id: 'myth', description: 'mythology'),
internetSlang(id: 'net-sl', description: 'Internet slang'),
object(id: 'obj', description: 'object'),
obsolete(id: 'obs', description: 'obsolete term'),
onomatopoeic(id: 'on-mim', description: 'onomatopoeic or mimetic word'),
organizationName(id: 'organization', description: 'organization name'),
other(id: 'oth', description: 'other'),
personName(id: 'person', description: 'full name of a particular person'),
placeName(id: 'place', description: 'place name'),
poetical(id: 'poet', description: 'poetical term'),
polite(id: 'pol', description: 'polite (teineigo) language'),
product(id: 'product', description: 'product name'),
proverb(id: 'proverb', description: 'proverb'),
quotation(id: 'quote', description: 'quotation'),
rare(id: 'rare', description: 'rare term'),
religion(id: 'relig', description: 'religion'),
sensitive(id: 'sens', description: 'sensitive'),
service(id: 'serv', description: 'service'),
ship(id: 'ship', description: 'ship name'),
slang(id: 'sl', description: 'slang'),
railwayStation(id: 'station', description: 'railway station'),
surname(id: 'surname', description: 'family or surname'),
onlyKana(id: 'uk', description: 'word usually written using kana alone'),
unclassifiedName(id: 'unclass', description: 'unclassified name'),
vulgar(id: 'vulg', description: 'vulgar expression or word'),
workOfArt(
id: 'work',
description: 'work of art, literature, music, etc. name',
),
xRated(
id: 'X',
description: 'rude or X-rated term (not displayed in educational software)',
),
yojijukugo(id: 'yoji', description: 'yojijukugo');
final String id;
final String description;
const JMdictMisc({
required this.id,
required this.description,
});
static JMdictMisc fromId(String id) =>
JMdictMisc.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
Map<String, Object?> toJson() => {
'id': id,
'description': description,
};
static JMdictMisc fromJson(Map<String, Object?> json) =>
JMdictMisc.values.firstWhere(
(e) => e.id == json['id'],
orElse: () => throw Exception('Unknown id: ${json['id']}'),
);
}

@ -0,0 +1,122 @@
/// Part-of-speech tags from JMdict
///
/// See https://www.edrdg.org/jmwsgi/edhelp.py#kw_pos
enum JMdictPOS {
adjF(id: 'adj-f', description: 'noun or verb acting prenominally'),
adjI(id: 'adj-i', description: 'adjective (keiyoushi)'),
adjIx(id: 'adj-ix', description: 'adjective (keiyoushi) - yoi/ii class'),
adjKari(id: 'adj-kari', description: '\'kari\' adjective (archaic)'),
adjKu(id: 'adj-ku', description: '\'ku\' adjective (archaic)'),
adjNa(id: 'adj-na', description: 'adjectival nouns or quasi-adjectives (keiyodoshi)'),
adjNari(id: 'adj-nari', description: 'archaic/formal form of na-adjective'),
adjNo(id: 'adj-no', description: 'nouns which may take the genitive case particle ''no'''),
adjPn(id: 'adj-pn', description: 'pre-noun adjectival (rentaishi)'),
adjShiku(id: 'adj-shiku', description: '\'shiku\' adjective (archaic)'),
adjT(id: 'adj-t', description: '\'taru\' adjective'),
adv(id: 'adv', description: 'adverb (fukushi)'),
advTo(id: 'adv-to', description: 'adverb taking the ''to'' particle'),
aux(id: 'aux', description: 'auxiliary'),
auxAdj(id: 'aux-adj', description: 'auxiliary adjective'),
auxV(id: 'aux-v', description: 'auxiliary verb'),
conj(id: 'conj', description: 'conjunction'),
cop(id: 'cop', description: 'copula'),
ctr(id: 'ctr', description: 'counter'),
exp(id: 'exp', description: 'expressions (phrases, clauses, etc.)'),
int(id: 'int', description: 'interjection (kandoushi)'),
n(id: 'n', description: 'noun (common) (futsuumeishi)'),
nAdv(id: 'n-adv', description: 'adverbial noun (fukushitekimeishi)'),
nPr(id: 'n-pr', description: 'proper noun'),
nPref(id: 'n-pref', description: 'noun, used as a prefix'),
nSuf(id: 'n-suf', description: 'noun, used as a suffix'),
nT(id: 'n-t', description: 'noun (temporal) (jisoumeishi)'),
num(id: 'num', description: 'numeric'),
pn(id: 'pn', description: 'pronoun'),
pref(id: 'pref', description: 'prefix'),
prt(id: 'prt', description: 'particle'),
suf(id: 'suf', description: 'suffix'),
unc(id: 'unc', description: 'unclassified'),
vUnspec(id: 'v-unspec', description: 'verb unspecified'),
v1(id: 'v1', description: 'Ichidan verb'),
v1S(id: 'v1-s', description: 'Ichidan verb - kureru special class'),
v2aS(id: 'v2a-s', description: 'Nidan verb with ''u'' ending (archaic)'),
v2bK(id: 'v2b-k', description: 'Nidan verb (upper class) with ''bu'' ending (archaic)'),
v2bS(id: 'v2b-s', description: 'Nidan verb (lower class) with ''bu'' ending (archaic)'),
v2dK(id: 'v2d-k', description: 'Nidan verb (upper class) with ''dzu'' ending (archaic)'),
v2dS(id: 'v2d-s', description: 'Nidan verb (lower class) with ''dzu'' ending (archaic)'),
v2gK(id: 'v2g-k', description: 'Nidan verb (upper class) with ''gu'' ending (archaic)'),
v2gS(id: 'v2g-s', description: 'Nidan verb (lower class) with ''gu'' ending (archaic)'),
v2hK(id: 'v2h-k', description: 'Nidan verb (upper class) with ''hu/fu'' ending (archaic)'),
v2hS(id: 'v2h-s', description: 'Nidan verb (lower class) with ''hu/fu'' ending (archaic)'),
v2kK(id: 'v2k-k', description: 'Nidan verb (upper class) with ''ku'' ending (archaic)'),
v2kS(id: 'v2k-s', description: 'Nidan verb (lower class) with ''ku'' ending (archaic)'),
v2mK(id: 'v2m-k', description: 'Nidan verb (upper class) with ''mu'' ending (archaic)'),
v2mS(id: 'v2m-s', description: 'Nidan verb (lower class) with ''mu'' ending (archaic)'),
v2nS(id: 'v2n-s', description: 'Nidan verb (lower class) with ''nu'' ending (archaic)'),
v2rK(id: 'v2r-k', description: 'Nidan verb (upper class) with ''ru'' ending (archaic)'),
v2rS(id: 'v2r-s', description: 'Nidan verb (lower class) with ''ru'' ending (archaic)'),
v2sS(id: 'v2s-s', description: 'Nidan verb (lower class) with ''su'' ending (archaic)'),
v2tK(id: 'v2t-k', description: 'Nidan verb (upper class) with ''tsu'' ending (archaic)'),
v2tS(id: 'v2t-s', description: 'Nidan verb (lower class) with ''tsu'' ending (archaic)'),
v2wS(id: 'v2w-s', description: 'Nidan verb (lower class) with ''u'' ending and ''we'' conjugation (archaic)'),
v2yK(id: 'v2y-k', description: 'Nidan verb (upper class) with ''yu'' ending (archaic)'),
v2yS(id: 'v2y-s', description: 'Nidan verb (lower class) with ''yu'' ending (archaic)'),
v2zS(id: 'v2z-s', description: 'Nidan verb (lower class) with ''zu'' ending (archaic)'),
v4b(id: 'v4b', description: 'Yodan verb with ''bu'' ending (archaic)'),
v4g(id: 'v4g', description: 'Yodan verb with ''gu'' ending (archaic)'),
v4h(id: 'v4h', description: 'Yodan verb with ''hu/fu'' ending (archaic)'),
v4k(id: 'v4k', description: 'Yodan verb with ''ku'' ending (archaic)'),
v4m(id: 'v4m', description: 'Yodan verb with ''mu'' ending (archaic)'),
v4n(id: 'v4n', description: 'Yodan verb with ''nu'' ending (archaic)'),
v4r(id: 'v4r', description: 'Yodan verb with ''ru'' ending (archaic)'),
v4s(id: 'v4s', description: 'Yodan verb with ''su'' ending (archaic)'),
v4t(id: 'v4t', description: 'Yodan verb with ''tsu'' ending (archaic)'),
v5aru(id: 'v5aru', description: 'Godan verb - -aru special class'),
v5b(id: 'v5b', description: 'Godan verb with ''bu'' ending'),
v5g(id: 'v5g', description: 'Godan verb with ''gu'' ending'),
v5k(id: 'v5k', description: 'Godan verb with ''ku'' ending'),
v5kS(id: 'v5k-s', description: 'Godan verb - Iku/Yuku special class'),
v5m(id: 'v5m', description: 'Godan verb with ''mu'' ending'),
v5n(id: 'v5n', description: 'Godan verb with ''nu'' ending'),
v5r(id: 'v5r', description: 'Godan verb with ''ru'' ending'),
v5rI(id: 'v5r-i', description: 'Godan verb with ''ru'' ending (irregular verb)'),
v5s(id: 'v5s', description: 'Godan verb with ''su'' ending'),
v5t(id: 'v5t', description: 'Godan verb with ''tsu'' ending'),
v5u(id: 'v5u', description: 'Godan verb with ''u'' ending'),
v5uS(id: 'v5u-s', description: 'Godan verb with ''u'' ending (special class)'),
v5uru(id: 'v5uru', description: 'Godan verb - Uru old class verb (old form of Eru)'),
vi(id: 'vi', description: 'intransitive verb'),
vk(id: 'vk', description: 'Kuru verb - special class'),
vn(id: 'vn', description: 'irregular nu verb'),
vr(id: 'vr', description: 'irregular ru verb, plain form ends with -ri'),
vs(id: 'vs', description: 'noun or participle which takes the aux. verb suru'),
vsC(id: 'vs-c', description: 'suru verb - precursor to the modern suru'),
vsI(id: 'vs-i', description: 'suru verb - included'),
vsS(id: 'vs-s', description: 'suru verb - special class'),
vt(id: 'vt', description: 'transitive verb'),
vz(id: 'vz', description: 'Ichidan verb - zuru verb (alternative form of -jiru verbs)');
final String id;
final String description;
const JMdictPOS({
required this.id,
required this.description,
});
static JMdictPOS fromId(String id) =>
JMdictPOS.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
Map<String, Object?> toJson() => {
'id': id,
'description': description,
};
static JMdictPOS fromJson(Map<String, Object?> json) =>
JMdictPOS.values.firstWhere(
(e) => e.id == json['id'],
orElse: () => throw Exception('Unknown id: ${json['id']}'),
);
}

@ -0,0 +1,39 @@
/// Reading info tags from JMdict
///
/// See https://www.edrdg.org/jmwsgi/edhelp.py#kw_rinf
enum JMdictReadingInfo {
gikun(
id: 'gikun',
description:
'gikun (meaning as reading) or jukujikun (special kanji reading)',
),
ik(id: 'ik', description: 'word containing irregular kana usage'),
ok(id: 'ok', description: 'out-dated or obsolete kana usage'),
rk(id: 'rk', description: 'rarely used kana form'),
sk(id: 'sk', description: 'search-only kana form');
final String id;
final String description;
const JMdictReadingInfo({
required this.id,
required this.description,
});
static JMdictReadingInfo fromId(String id) =>
JMdictReadingInfo.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
Map<String, Object?> toJson() => {
'id': id,
'description': description,
};
static JMdictReadingInfo fromJson(Map<String, Object?> json) =>
JMdictReadingInfo.values.firstWhere(
(e) => e.id == json['id'],
orElse: () => throw Exception('Unknown id: ${json['id']}'),
);
}

@ -1,5 +0,0 @@
class WordSearchResult {
// TODO: implement me
Map<String, dynamic> toJson() => {};
}

@ -0,0 +1,25 @@
import 'package:jadb/models/word_search/word_search_ruby.dart';
import 'package:jadb/models/word_search/word_search_sense.dart';
import 'package:jadb/models/word_search/word_search_sources.dart';
class WordSearchResult {
// TODO: implement me
final int entryId;
final List<WordSearchRuby> japanese;
final List<WordSearchSense> senses;
final WordSearchSources sources;
const WordSearchResult({
required this.entryId,
required this.japanese,
required this.senses,
required this.sources,
});
Map<String, dynamic> toJson() => {
'entryId': entryId,
'japanese': japanese.map((e) => e.toJson()).toList(),
'senses': senses.map((e) => e.toJson()).toList(),
};
}

@ -0,0 +1,25 @@
/// A pair of base and optional furigana.
class WordSearchRuby {
/// Base part. Could be a kanji or a reading.
String base;
/// Furigana, if applicable.
String? furigana;
WordSearchRuby({
required this.base,
this.furigana,
});
Map<String, dynamic> toJson() => {
'base': base,
'furigana': furigana,
};
factory WordSearchRuby.fromJson(Map<String, dynamic> json) {
return WordSearchRuby(
base: json['base'] as String,
furigana: json['furigana'] as String?,
);
}
}

@ -0,0 +1,100 @@
import 'package:jadb/models/jmdict/jmdict_dialect.dart';
import 'package:jadb/models/jmdict/jmdict_field.dart';
import 'package:jadb/models/jmdict/jmdict_misc.dart';
import 'package:jadb/models/jmdict/jmdict_pos.dart';
import 'package:jadb/models/word_search/word_search_xref_entry.dart';
class WordSearchSense {
/// The meaning(s) of the word.
final List<String> englishDefinitions;
/// Type of word (Noun, Verb, etc.).
final List<JMdictPOS> partsOfSpeech;
/// Relevant words (might include synonyms).
final List<WordSearchXrefEntry> seeAlso;
/// Words with opposite meaning.
final List<WordSearchXrefEntry> antonyms;
/// Restrictions on which of the readings of the parent entry this sense applies to.
final List<String> restrictedToReading;
/// Restrictions on which of the kanji of the parent entry this sense applies to.
final List<String> restrictedToKanji;
/// Tags for which domains or fields of expertise that this sense is relevant to.
final List<JMdictField> fields;
/// Tags for which dialects this sense is used in.
final List<JMdictDialect> dialects;
/// Tags for miscellaneous information.
final List<JMdictMisc> misc;
/// Extra information about the sense.
final List<String> info;
// TODO: there is a lot more info to collect in the languageSource data
/// Information about the the origin of the word, if loaned from another language.
final List<String> languageSource;
// TODO: add example sentencesa
const WordSearchSense({
required this.englishDefinitions,
required this.partsOfSpeech,
required this.seeAlso,
required this.antonyms,
required this.restrictedToReading,
required this.restrictedToKanji,
required this.fields,
required this.dialects,
required this.misc,
required this.info,
required this.languageSource,
});
bool get isEmpty => englishDefinitions.isEmpty &&
partsOfSpeech.isEmpty &&
seeAlso.isEmpty &&
antonyms.isEmpty &&
restrictedToReading.isEmpty &&
restrictedToKanji.isEmpty &&
fields.isEmpty &&
dialects.isEmpty &&
misc.isEmpty &&
info.isEmpty &&
languageSource.isEmpty;
Map<String, dynamic> toJson() => {
'englishDefinitions': englishDefinitions,
'partsOfSpeech': partsOfSpeech.map((e) => e.toJson()).toList(),
'seeAlso': seeAlso.map((e) => e.toJson()).toList(),
'antonyms': antonyms.map((e) => e.toJson()).toList(),
'restrictedToReading': restrictedToReading,
'restrictedToKanji': restrictedToKanji,
'fields': fields.map((e) => e.toJson()).toList(),
'dialects': dialects.map((e) => e.toJson()).toList(),
'misc': misc.map((e) => e.toJson()).toList(),
'info': info,
'languageSource': languageSource,
};
String prettyPrint() {
return 'WordSearchSense{'
'englishDefinitions: $englishDefinitions, '
'partsOfSpeech: $partsOfSpeech, '
'seeAlso: $seeAlso, '
'antonyms: $antonyms, '
'restrictedToReading: $restrictedToReading, '
'restrictedToKanji: $restrictedToKanji, '
'fields: $fields, '
'dialects: $dialects, '
'misc: $misc, '
'info: $info, '
'languageSource: $languageSource'
'}';
}
}

@ -0,0 +1,21 @@
class WordSearchSources {
final bool jmdict;
final bool jmnedict;
const WordSearchSources({
this.jmdict = true,
this.jmnedict = false,
});
Map<String, Object?> get sqlValue => {
'jmdict': jmdict,
'jmnedict': jmnedict,
};
factory WordSearchSources.fromJson(Map<String, dynamic> json) {
return WordSearchSources(
jmdict: json['jmdict'] as bool? ?? true,
jmnedict: json['jmnedict'] as bool? ?? false,
);
}
}

@ -0,0 +1,20 @@
class WordSearchXrefEntry {
final int entryId;
final bool ambiguous;
const WordSearchXrefEntry({
required this.entryId,
required this.ambiguous,
});
Map<String, dynamic> toJson() => {
'entryId': entryId,
'ambiguous': ambiguous,
};
factory WordSearchXrefEntry.fromJson(Map<String, dynamic> json) =>
WordSearchXrefEntry(
entryId: json['entryId'] as int,
ambiguous: json['ambiguous'] as bool,
);
}

@ -1,5 +1,5 @@
import 'package:jadb/models/jmdict/word_search_result.dart';
import 'package:jadb/models/kanjidic/kanji_search_result.dart';
import 'package:jadb/models/word_search/word_search_result.dart';
import 'package:jadb/models/kanji_search/kanji_search_result.dart';
import 'package:jadb/models/radkfile/radicals_search_result.dart';
import 'package:jadb/search/jmdict.dart';

@ -1,4 +1,12 @@
import 'package:jadb/models/jmdict/word_search_result.dart';
import 'package:collection/collection.dart';
import 'package:jadb/models/jmdict/jmdict_dialect.dart';
import 'package:jadb/models/jmdict/jmdict_field.dart';
import 'package:jadb/models/jmdict/jmdict_misc.dart';
import 'package:jadb/models/jmdict/jmdict_pos.dart';
import 'package:jadb/models/word_search/word_search_result.dart';
import 'package:jadb/models/word_search/word_search_ruby.dart';
import 'package:jadb/models/word_search/word_search_sense.dart';
import 'package:jadb/models/word_search/word_search_xref_entry.dart';
import 'package:sqflite_common/sqlite_api.dart';
// TODO: Support globs
@ -9,6 +17,10 @@ import 'package:sqflite_common/sqlite_api.dart';
// TODO: Support mixing kana and romaji
String _escapeStringValue(String value) {
return "'" + value.replaceAll("'", "''") + "'";
}
Future<List<WordSearchResult>?> searchWordWithDbConnection(
DatabaseExecutor connection,
String word, {
@ -18,176 +30,176 @@ Future<List<WordSearchResult>?> searchWordWithDbConnection(
return null;
}
late final List<int> matches;
late final List<int> entryIds;
if (isKana) {
matches = (await connection.query(
entryIds = (await connection.query(
'JMdict_EntryByKana',
where: 'kana LIKE ?',
whereArgs: ['%$word%'],
whereArgs: ['$word%'],
))
.map((row) => row['entryId'] as int)
.toList();
} else {
matches = (await connection.query(
entryIds = (await connection.query(
'JMdict_EntryByEnglish',
where: 'english LIKE ?',
whereArgs: ['%$word%'],
whereArgs: ['$word%'],
))
.map((row) => row['entryId'] as int)
.toList();
}
if (matches.isEmpty) {
if (entryIds.isEmpty) {
return [];
}
late final List<int> senseIds;
final Future<List<int>> senseIds_query = connection
.query(
'JMdict_Sense',
where: 'entryId IN (${matches.join(',')})',
)
.then((rows) => rows.map((row) => row['id'] as int).toList());
late final List<Map<String, Object?>> senses;
final Future<List<Map<String, Object?>>> senses_query = connection.query(
'JMdict_Sense',
where: 'entryId IN (${entryIds.join(',')})',
);
late final List<Map<String, Object?>> readingElements;
final Future<List<Map<String, Object?>>> readingElements_query =
connection.query(
'JMdict_ReadingElement',
where: 'entryId IN (${matches.join(',')})',
where: 'entryId IN (${entryIds.join(',')})',
);
late final List<Map<String, Object?>> kanjiElements;
final Future<List<Map<String, Object?>>> kanjiElements_query =
connection.query(
'JMdict_KanjiElement',
where: 'entryId IN (${matches.join(',')})',
where: 'entryId IN (${entryIds.join(',')})',
);
await Future.wait([
senseIds_query.then((value) => senseIds = value),
senses_query.then((value) => senses = value),
readingElements_query.then((value) => readingElements = value),
kanjiElements_query.then((value) => kanjiElements = value),
]);
print(senseIds);
print(readingElements);
print(kanjiElements);
// Sense queries
final senseIds = senses.map((element) => element['id'] as int).toList();
late final List<Map<String, Object?>> senseAntonyms;
final Future<List<Map<String, Object?>>> senseAntonyms_query =
connection.query(
'JMdict_SenseAntonym',
where: 'entryId IN (${senseIds.join(',')})',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseDialects;
final Future<List<Map<String, Object?>>> senseDialects_query =
connection.query(
'JMdict_SenseDialect',
where: 'entryId IN (${senseIds.join(',')})',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseFields;
final Future<List<Map<String, Object?>>> senseFields_query = connection.query(
'JMdict_SenseField',
where: 'entryId IN (${senseIds.join(',')})',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseGlossaries;
final Future<List<Map<String, Object?>>> senseGlossaries_query =
connection.query(
'JMdict_SenseGlossary',
where: 'entryId IN (${senseIds.join(',')})',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseInfos;
final Future<List<Map<String, Object?>>> senseInfos_query = connection.query(
'JMdict_SenseInfo',
where: 'entryId IN (${senseIds.join(',')})',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseLanguageSources;
final Future<List<Map<String, Object?>>> senseLanguageSources_query =
connection.query(
'JMdict_SenseLanguageSource',
where: 'entryId IN (${senseIds.join(',')})',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseMiscs;
final Future<List<Map<String, Object?>>> senseMiscs_query = connection.query(
'JMdict_SenseMisc',
where: 'entryId IN (${senseIds.join(',')})',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> sensePOSs;
final Future<List<Map<String, Object?>>> sensePOSs_query = connection.query(
'JMdict_SensePOS',
where: 'entryId IN (${senseIds.join(',')})',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseRestrictedToKanjis;
final Future<List<Map<String, Object?>>> senseRestrictedToKanjis_query =
connection.query(
'JMdict_SenseRestrictedToKanji',
where: 'entryId IN (${senseIds.join(',')})',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseRestrictedToReadings;
final Future<List<Map<String, Object?>>> senseRestrictedToReadings_query =
connection.query(
'JMdict_SenseRestrictedToReading',
where: 'entryId IN (${senseIds.join(',')})',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseSeeAlsos;
final Future<List<Map<String, Object?>>> senseSeeAlsos_query =
connection.query(
'JMdict_SenseSeeAlso',
where: 'entryId IN (${senseIds.join(',')})',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> exampleSentences;
final Future<List<Map<String, Object?>>> exampleSentences_query =
connection.query(
'JMdict_ExampleSentence',
where: 'entryId IN (${senseIds.join(',')})',
where: 'senseId IN (${senseIds.join(',')})',
);
// Reading queries
final readingIds = readingElements
.map((element) =>
(element['entryId'] as int, element['reading'] as String))
.map((element) => (
element['entryId'] as int,
_escapeStringValue(element['reading'] as String)
))
.toList();
late final List<Map<String, Object?>> readingElementInfos;
final Future<List<Map<String, Object?>>> readingElementInfos_query =
connection.query(
'JMdict_ReadingElementInfo',
where: 'entryId IN (${readingIds.join(',')})',
where: '(entryId, reading) IN (${readingIds.join(',')})',
);
late final List<Map<String, Object?>> readingElementRestrictions;
final Future<List<Map<String, Object?>>> readingElementRestrictions_query =
connection.query(
'JMdict_ReadingElementRestriction',
where: 'entryId IN (${readingIds.join(',')})',
where: '(entryId, reading) IN (${readingIds.join(',')})',
);
// Kanji queries
final kanjiIds = kanjiElements
.map((element) =>
(element['entryId'] as int, element['reading'] as String))
.map((element) => (
element['entryId'] as int,
_escapeStringValue(element['reading'] as String)
))
.toList();
late final List<Map<String, Object?>> kanjiElementInfos;
final Future<List<Map<String, Object?>>> kanjiElementInfos_query =
connection.query(
'JMdict_KanjiElementInfo',
where: 'entryId IN (${kanjiIds.join(',')})',
where: '(entryId, reading) IN (${kanjiIds.join(',')})',
);
await Future.wait([
@ -211,5 +223,251 @@ Future<List<WordSearchResult>?> searchWordWithDbConnection(
kanjiElementInfos_query.then((value) => kanjiElementInfos = value),
]);
throw UnimplementedError();
return _regroupWordSearchResults(
entryIds: entryIds,
readingElements: readingElements,
kanjiElements: kanjiElements,
senses: senses,
senseAntonyms: senseAntonyms,
senseDialects: senseDialects,
senseFields: senseFields,
senseGlossaries: senseGlossaries,
senseInfos: senseInfos,
senseLanguageSources: senseLanguageSources,
senseMiscs: senseMiscs,
sensePOSs: sensePOSs,
senseRestrictedToKanjis: senseRestrictedToKanjis,
senseRestrictedToReadings: senseRestrictedToReadings,
senseSeeAlsos: senseSeeAlsos,
exampleSentences: exampleSentences,
readingElementInfos: readingElementInfos,
readingElementRestrictions: readingElementRestrictions,
kanjiElementInfos: kanjiElementInfos,
);
}
List<WordSearchResult> _regroupWordSearchResults({
required List<int> entryIds,
required List<Map<String, Object?>> readingElements,
required List<Map<String, Object?>> kanjiElements,
required List<Map<String, Object?>> senses,
required List<Map<String, Object?>> senseAntonyms,
required List<Map<String, Object?>> senseDialects,
required List<Map<String, Object?>> senseFields,
required List<Map<String, Object?>> senseGlossaries,
required List<Map<String, Object?>> senseInfos,
required List<Map<String, Object?>> senseLanguageSources,
required List<Map<String, Object?>> senseMiscs,
required List<Map<String, Object?>> sensePOSs,
required List<Map<String, Object?>> senseRestrictedToKanjis,
required List<Map<String, Object?>> senseRestrictedToReadings,
required List<Map<String, Object?>> senseSeeAlsos,
required List<Map<String, Object?>> exampleSentences,
required List<Map<String, Object?>> readingElementInfos,
required List<Map<String, Object?>> readingElementRestrictions,
required List<Map<String, Object?>> kanjiElementInfos,
}) {
final List<WordSearchResult> results = [];
for (final entryId in entryIds) {
final List<Map<String, Object?>> entryReadingElements = readingElements
.where((element) => element['entryId'] == entryId)
.toList();
final List<Map<String, Object?>> entryKanjiElements = kanjiElements
.where((element) => element['entryId'] == entryId)
.toList();
final List<Map<String, Object?>> entrySenses =
senses.where((element) => element['entryId'] == entryId).toList();
final List<WordSearchRuby> entryReadingElementsGrouped = _regroup_words(
entryId: entryId,
readingElements: entryReadingElements,
kanjiElements: entryKanjiElements,
readingElementInfos: readingElementInfos,
readingElementRestrictions: readingElementRestrictions,
kanjiElementInfos: kanjiElementInfos,
);
final List<WordSearchSense> entrySensesGrouped = _regroup_senses(
senses: entrySenses,
senseAntonyms: senseAntonyms,
senseDialects: senseDialects,
senseFields: senseFields,
senseGlossaries: senseGlossaries,
senseInfos: senseInfos,
senseLanguageSources: senseLanguageSources,
senseMiscs: senseMiscs,
sensePOSs: sensePOSs,
senseRestrictedToKanjis: senseRestrictedToKanjis,
senseRestrictedToReadings: senseRestrictedToReadings,
senseSeeAlsos: senseSeeAlsos,
exampleSentences: exampleSentences,
);
print("Entry ID: $entryId");
print("Words:");
for (final ruby in entryReadingElementsGrouped) {
print("${ruby.base}[${ruby.furigana ?? ""}]");
}
print("Senses:");
for (final sense in entrySensesGrouped) {
print(sense.prettyPrint());
}
}
return results;
}
List<WordSearchRuby> _regroup_words({
required int entryId,
required List<Map<String, Object?>> kanjiElements,
required List<Map<String, Object?>> kanjiElementInfos,
required List<Map<String, Object?>> readingElements,
required List<Map<String, Object?>> readingElementInfos,
required List<Map<String, Object?>> readingElementRestrictions,
}) {
final List<WordSearchRuby> result = [];
final kanjiElements_ =
kanjiElements.where((element) => element['entryId'] == entryId).toList();
final readingElements_ = readingElements
.where((element) => element['entryId'] == entryId)
.toList();
final readingElementRestrictions_ = readingElementRestrictions
.where((element) => element['entryId'] == entryId)
.toList();
for (final readingElement in readingElements_) {
for (final kanjiElement in kanjiElements_) {
final kanji = kanjiElement['reading'] as String;
final reading = readingElement['reading'] as String;
final doesNotMatchKanji = readingElement['doesNotMatchKanji'] == 1;
if (doesNotMatchKanji) {
continue;
}
final restrictions = readingElementRestrictions_
.where((element) => element['reading'] == reading)
.toList();
if (restrictions.isNotEmpty &&
!restrictions.any((element) => element['restriction'] == kanji)) {
continue;
}
final ruby = WordSearchRuby(
base: kanji,
furigana: reading,
);
result.add(ruby);
}
}
for (final readingElement
in readingElements_.where((e) => e['doesNotMatchKanji'] == 1)) {
final reading = readingElement['reading'] as String;
final ruby = WordSearchRuby(
base: reading,
);
result.add(ruby);
}
return result;
}
List<WordSearchSense> _regroup_senses({
required List<Map<String, Object?>> senses,
required List<Map<String, Object?>> senseAntonyms,
required List<Map<String, Object?>> senseDialects,
required List<Map<String, Object?>> senseFields,
required List<Map<String, Object?>> senseGlossaries,
required List<Map<String, Object?>> senseInfos,
required List<Map<String, Object?>> senseLanguageSources,
required List<Map<String, Object?>> senseMiscs,
required List<Map<String, Object?>> sensePOSs,
required List<Map<String, Object?>> senseRestrictedToKanjis,
required List<Map<String, Object?>> senseRestrictedToReadings,
required List<Map<String, Object?>> senseSeeAlsos,
required List<Map<String, Object?>> exampleSentences,
}) {
final groupedSenseAntonyms =
senseAntonyms.groupListsBy((element) => element['senseId'] as int);
final groupedSenseDialects =
senseDialects.groupListsBy((element) => element['senseId'] as int);
final groupedSenseFields =
senseFields.groupListsBy((element) => element['senseId'] as int);
final groupedSenseGlossaries =
senseGlossaries.groupListsBy((element) => element['senseId'] as int);
final groupedSenseInfos =
senseInfos.groupListsBy((element) => element['senseId'] as int);
final groupedSenseLanguageSources =
senseLanguageSources.groupListsBy((element) => element['senseId'] as int);
final groupedSenseMiscs =
senseMiscs.groupListsBy((element) => element['senseId'] as int);
final groupedSensePOSs =
sensePOSs.groupListsBy((element) => element['senseId'] as int);
final groupedSenseRestrictedToKanjis = senseRestrictedToKanjis
.groupListsBy((element) => element['senseId'] as int);
final groupedSenseRestrictedToReadings = senseRestrictedToReadings
.groupListsBy((element) => element['senseId'] as int);
final groupedSenseSeeAlsos =
senseSeeAlsos.groupListsBy((element) => element['senseId'] as int);
final List<WordSearchSense> result = [];
for (final sense in senses) {
final int senseId = sense['id'] as int;
final antonyms = groupedSenseAntonyms[senseId] ?? [];
final dialects = groupedSenseDialects[senseId] ?? [];
final fields = groupedSenseFields[senseId] ?? [];
final glossaries = groupedSenseGlossaries[senseId] ?? [];
final infos = groupedSenseInfos[senseId] ?? [];
final languageSources = groupedSenseLanguageSources[senseId] ?? [];
final miscs = groupedSenseMiscs[senseId] ?? [];
final pos = groupedSensePOSs[senseId] ?? [];
final restrictedToKanjis = groupedSenseRestrictedToKanjis[senseId] ?? [];
final restrictedToReadings =
groupedSenseRestrictedToReadings[senseId] ?? [];
final seeAlsos = groupedSenseSeeAlsos[senseId] ?? [];
final resultSense = WordSearchSense(
englishDefinitions: glossaries.map((e) => e['phrase'] as String).toList(),
partsOfSpeech:
pos.map((e) => JMdictPOS.fromId(e['pos'] as String)).toList(),
seeAlso: seeAlsos
.map((e) => WordSearchXrefEntry(
entryId: e['xrefEntryId'] as int,
ambiguous: e['ambiguous'] == 1,
))
.toList(),
antonyms: antonyms
.map((e) => WordSearchXrefEntry(
entryId: e['xrefEntryId'] as int,
ambiguous: e['ambiguous'] == 1,
))
.toList(),
restrictedToReading:
restrictedToReadings.map((e) => e['reading'] as String).toList(),
restrictedToKanji:
restrictedToKanjis.map((e) => e['kanji'] as String).toList(),
fields:
fields.map((e) => JMdictField.fromId(e['field'] as String)).toList(),
dialects: dialects
.map((e) => JMdictDialect.fromId(e['dialect'] as String))
.toList(),
misc: miscs.map((e) => JMdictMisc.fromId(e['misc'] as String)).toList(),
info: infos.map((e) => e['info'] as String).toList(),
languageSource:
languageSources.map((e) => e['language'] as String).toList(),
);
result.add(resultSense);
}
return result;
}

@ -1,4 +1,4 @@
import 'package:jadb/models/kanjidic/kanji_search_result.dart';
import 'package:jadb/models/kanji_search/kanji_search_result.dart';
import 'package:sqflite_common/sqflite.dart';
Future<KanjiSearchResult?> searchKanjiWithDbConnection(

@ -274,18 +274,22 @@ INSERT INTO "JMdict_InfoPOS"("id", "description") VALUES
('vt', 'transitive verb'),
('vz', 'Ichidan verb - zuru verb (alternative form of -jiru verbs)');
-- NOTE: several of these are not used directly in the XML export, but is implicitly
-- used by whether the reading is marked as kun/on and nanori tags, etc.
INSERT INTO "JMdict_InfoReading"("id", "description") VALUES
('gikun', 'gikun (meaning as reading) or jukujikun (special kanji reading)'),
('go', 'on-yomi, go'),
-- ('go', 'on-yomi, go'),
('ik', 'word containing irregular kana usage'),
('jouyou', 'approved reading for jouyou kanji'),
('kan', 'on-yomi, kan'),
('kanyou', 'on-yomi, kan''you'),
('kun', 'kun-yomi'),
('name', 'reading used only in names (nanori)'),
-- ('jouyou', 'approved reading for jouyou kanji'),
-- ('kan', 'on-yomi, kan'),
-- ('kanyou', 'on-yomi, kan''you'),
-- ('kun', 'kun-yomi'),
-- ('name', 'reading used only in names (nanori)'),
('ok', 'out-dated or obsolete kana usage'),
('on', 'on-yomi'),
('rad', 'reading used as name of radical'),
-- ('on', 'on-yomi'),
-- ('rad', 'reading used as name of radical'),
('rk', 'rarely used kana form'),
('sk', 'search-only kana form'),
('tou', 'on-yomi, tou');
('sk', 'search-only kana form')
-- ('tou', 'on-yomi, tou')
;