From 7544013ffd739e2ff92d96fe5ecaff141927958f Mon Sep 17 00:00:00 2001 From: h7x4 Date: Tue, 22 Apr 2025 19:10:53 +0200 Subject: [PATCH] lib: improve jmdict ingestion, update constants --- lib/_data_ingestion/jmdict/objects.dart | 34 +-- lib/_data_ingestion/jmdict/parser.dart | 285 ++++++++++++++------ migrations/0001_JMDict.sql | 30 ++- migrations/0002_JMDict_insert_constants.sql | 72 +++-- 4 files changed, 288 insertions(+), 133 deletions(-) diff --git a/lib/_data_ingestion/jmdict/objects.dart b/lib/_data_ingestion/jmdict/objects.dart index 842c546..51dac22 100644 --- a/lib/_data_ingestion/jmdict/objects.dart +++ b/lib/_data_ingestion/jmdict/objects.dart @@ -148,32 +148,12 @@ class XRefParts { this.readingRef, this.senseNum, }) : assert(kanjiRef != null || readingRef != null); - - factory XRefParts.fromString(String s) { - final parts = s.split('・'); - if (parts.length == 1) { - if (parts[0].contains(kanaRegex)) { - return XRefParts(readingRef: parts[0]); - } - return XRefParts(kanjiRef: parts[0]); - } else if (parts.length == 2) { - if (int.tryParse(parts[1]) != null) { - if (parts[0].contains(kanaRegex)) { - return XRefParts(readingRef: parts[0], senseNum: int.parse(parts[1])); - } - return XRefParts(kanjiRef: parts[0], senseNum: int.parse(parts[1])); - } - return XRefParts(kanjiRef: parts[0], readingRef: parts[1]); - } else if (parts.length == 3) { - return XRefParts( - kanjiRef: parts[0], - readingRef: parts[1], - senseNum: int.parse(parts[2]), - ); - } - - return XRefParts(); - } + + Map toJson() => { + 'kanjiRef': kanjiRef, + 'readingRef': readingRef, + 'senseNum': senseNum, + }; } class XRef { @@ -188,6 +168,7 @@ class XRef { class Sense extends SQLWritable { final int id; + final int senseNum; final List antonyms; final List dialects; final List fields; @@ -202,6 +183,7 @@ class Sense extends SQLWritable { const Sense({ required this.id, + required this.senseNum, this.antonyms = const [], this.dialects = const [], this.fields = const [], diff --git a/lib/_data_ingestion/jmdict/parser.dart b/lib/_data_ingestion/jmdict/parser.dart index 73e42d2..6d47884 100644 --- a/lib/_data_ingestion/jmdict/parser.dart +++ b/lib/_data_ingestion/jmdict/parser.dart @@ -1,13 +1,17 @@ import 'dart:collection'; import 'dart:io'; +import 'package:collection/collection.dart'; import 'package:jadb/romaji_transliteration.dart'; import 'package:sqflite_common/sqlite_api.dart'; import 'package:xml/xml.dart'; import 'objects.dart'; -List getPriNums(XmlElement e, String prefix) { +/// parse priority values from r_ele and k_ele xml elements +/// +/// source: http://www.edrdg.org/jmwsgi/edhelp.py?sid=#kw_freq +List getPriorityValues(XmlElement e, String prefix) { int? news, ichi, spec, gai, nf; for (final pri in e.findElements('${prefix}_pri')) { final txt = pri.innerText; @@ -24,6 +28,110 @@ List getPriNums(XmlElement e, String prefix) { return [news, ichi, spec, gai, nf]; } +// source: www.edrdg.org/jmwsgi/edhelp.py?sid=#syn_xref +XRefParts parseXrefParts(String s) { + final parts = s.split('・'); + late final XRefParts result; + switch (parts.length) { + case 1: + result = parts[0].contains(kanaRegex) + ? XRefParts(readingRef: parts[0]) + : XRefParts(kanjiRef: parts[0]); + break; + + case 2: + if (int.tryParse(parts[1]) != null) { + if (parts[0].contains(kanaRegex)) { + result = XRefParts( + readingRef: parts[0], + senseNum: int.parse(parts[1]), + ); + } else { + result = XRefParts( + kanjiRef: parts[0], + senseNum: int.parse(parts[1]), + ); + } + } else { + result = XRefParts( + kanjiRef: parts[0], + readingRef: parts[1], + ); + } + break; + + case 3: + result = XRefParts( + kanjiRef: parts[0], + readingRef: parts[1], + senseNum: int.parse(parts[2]), + ); + break; + + default: + result = XRefParts(); + break; + } + + return result; +} + +Entry resolveXref( + SplayTreeMap> entriesByKanji, + SplayTreeMap> entriesByReading, + XRefParts xref, +) { + List candidateEntries = switch ((xref.kanjiRef, xref.readingRef)) { + (null, null) => + throw Exception('Xref $xref has no kanji or reading reference'), + (String k, null) => entriesByKanji[k]!.toList(), + (null, String r) => entriesByReading[r]!.toList(), + (String k, String r) => + entriesByKanji[k]!.intersection(entriesByReading[r]!).toList(), + }; + + // Filter out entries that don't have the number of senses specified in the xref + if (xref.senseNum != null) { + candidateEntries + .retainWhere((entry) => entry.senses.length >= xref.senseNum!); + } + + // If the xref has a reading ref but no kanji ref, and there are multiple + // entries to choose from, prefer entries with empty kanji readings + // if possible. + if (xref.kanjiRef == null && + xref.readingRef != null && + candidateEntries.length > 1) { + final candidatesWithEmptyKanji = + candidateEntries.where((entry) => entry.kanji.length == 0).toList(); + + if (candidatesWithEmptyKanji.isNotEmpty) { + candidateEntries = candidatesWithEmptyKanji; + } + } + + // Having more senses is a cheap way to choose the most likely correct + // entry in case there are multiple candidates left. + candidateEntries.sortBy((entry) => entry.senses.length); + + if (candidateEntries.length == 0) { + throw Exception( + 'SKIPPING: Xref $xref has ${candidateEntries.length} entries, ' + 'kanjiRef: ${xref.kanjiRef}, readingRef: ${xref.readingRef}, ' + 'senseNum: ${xref.senseNum}', + ); + } else if (candidateEntries.length > 1) { + print( + 'WARNING: Xref $xref has ${candidateEntries.length} entries, ' + 'kanjiRef: ${xref.kanjiRef}, readingRef: ${xref.readingRef}, ' + 'senseNum: ${xref.senseNum}', + ); + return candidateEntries.first; + } else { + return candidateEntries.first; + } +} + List transformXML(XmlElement root) { final List entries = []; @@ -36,10 +144,13 @@ List transformXML(XmlElement root) { final List senses = []; for (final k_ele in entry.findAllElements('k_ele')) { - final ke_pri = getPriNums(k_ele, 'ke'); + final ke_pri = getPriorityValues(k_ele, 'ke'); kanjiEls.add( KanjiElement( - info: k_ele.findElements('ke_inf').map((e) => e.innerText).toList(), + info: k_ele + .findElements('ke_inf') + .map((e) => e.innerText.replaceAll(RegExp('[&;]'), '')) + .toList(), reading: k_ele.findElements('keb').first.innerText, news: ke_pri[0], ichi: ke_pri[1], @@ -51,7 +162,7 @@ List transformXML(XmlElement root) { } for (final r_ele in entry.findAllElements('r_ele')) { - final re_pri = getPriNums(r_ele, 're'); + final re_pri = getPriorityValues(r_ele, 're'); readingEls.add( ReadingElement( info: r_ele @@ -70,11 +181,14 @@ List transformXML(XmlElement root) { ); } + int senseNum = 0; for (final sense in entry.findAllElements('sense')) { senseId++; + senseNum++; senses.add( Sense( id: senseId, + senseNum: senseNum, restrictedToKanji: sense.findElements('stagk').map((e) => e.innerText).toList(), restrictedToReading: @@ -115,11 +229,11 @@ List transformXML(XmlElement root) { .toList(), antonyms: sense .findElements('ant') - .map((e) => XRefParts.fromString(e.innerText)) + .map((e) => parseXrefParts(e.innerText)) .toList(), seeAlso: sense .findElements('xref') - .map((e) => XRefParts.fromString(e.innerText)) + .map((e) => parseXrefParts(e.innerText)) .toList(), ), ); @@ -154,7 +268,11 @@ Future insertIntoDB(List entries, Database db) async { for (final i in k.info) { b.insert( TableNames.kanjiInfo, - {'entryId': e.id, 'reading': k.reading, 'info': i}, + { + 'entryId': e.id, + 'reading': k.reading, + 'info': i, + }, ); } } @@ -166,20 +284,31 @@ Future insertIntoDB(List entries, Database db) async { b.insert( TableNames.entryByKana, - {'entryId': e.id, 'kana': transliterateKanaToLatin(r.reading)}, + { + 'entryId': e.id, + 'kana': transliterateKanaToLatin(r.reading), + }, // Some entries have the same reading twice with difference in katakana and hiragana conflictAlgorithm: ConflictAlgorithm.ignore, ); for (final i in r.info) { b.insert( TableNames.readingInfo, - {'entryId': e.id, 'reading': r.reading, 'info': i}, + { + 'entryId': e.id, + 'reading': r.reading, + 'info': i, + }, ); } for (final res in r.restrictions) { b.insert( TableNames.readingRestriction, - {'entryId': e.id, 'reading': r.reading, 'restriction': res}, + { + 'entryId': e.id, + 'reading': r.reading, + 'restriction': res, + }, ); } } @@ -189,7 +318,10 @@ Future insertIntoDB(List entries, Database db) async { if (g.language == "eng") b.insert( TableNames.entryByEnglish, - {'entryId': e.id, 'english': g.phrase}, + { + 'entryId': e.id, + 'english': g.phrase, + }, // Some entries have the same reading twice with difference in katakana and hiragana conflictAlgorithm: ConflictAlgorithm.ignore, ); @@ -199,35 +331,15 @@ Future insertIntoDB(List entries, Database db) async { await b.commit(); - print(' [JMdict] Building trees'); - SplayTreeMap> entriesByKanji = SplayTreeMap(); - for (final entry in entries) { - for (final kanji in entry.kanji) { - if (entriesByKanji.containsKey(kanji.reading)) { - entriesByKanji.update(kanji.reading, (list) => list..add(entry)); - } else { - entriesByKanji.putIfAbsent(kanji.reading, () => {entry}); - } - } - } - SplayTreeMap> entriesByReading = SplayTreeMap(); - for (final entry in entries) { - for (final reading in entry.readings) { - if (entriesByReading.containsKey(reading.reading)) { - entriesByReading.update(reading.reading, (list) => list..add(entry)); - } else { - entriesByReading.putIfAbsent(reading.reading, () => {entry}); - } - } - } - print(' [JMdict] Batch 2'); b = db.batch(); for (final e in entries) { for (final s in e.senses) { b.insert( - TableNames.sense, s.sqlValue..addAll({'id': s.id, 'entryId': e.id})); + TableNames.sense, + s.sqlValue + ..addAll({'id': s.id, 'entryId': e.id, 'senseNum': s.senseNum})); for (final d in s.dialects) { b.insert(TableNames.senseDialect, {'senseId': s.id, 'dialect': d}); @@ -278,52 +390,71 @@ Future insertIntoDB(List entries, Database db) async { conflictAlgorithm: ConflictAlgorithm.ignore, ); } + } + } - for (final xref in s.seeAlso) { - final Set entries; - if (xref.kanjiRef != null && xref.readingRef != null) { - entries = entriesByKanji[xref.kanjiRef]! - .difference(entriesByReading[xref.readingRef]!); - } else if (xref.kanjiRef != null) { - entries = entriesByKanji[xref.kanjiRef]!; - } else { - entries = entriesByReading[xref.readingRef]!; - } - for (final ex in entries) - if (!(xref.senseNum != null && xref.senseNum! > ex.senses.length)) { - b.insert( - TableNames.senseSeeAlso, - { - 'senseId': s.id, - 'xrefEntryId': ex.id, - 'seeAlsoKanji': xref.kanjiRef, - 'seeAlsoReading': xref.readingRef, - 'seeAlsoSense': xref.senseNum, - }, - ); - } + await b.commit(); + + print(' [JMdict] Building trees'); + SplayTreeMap> entriesByKanji = SplayTreeMap(); + for (final entry in entries) { + for (final kanji in entry.kanji) { + if (entriesByKanji.containsKey(kanji.reading)) { + entriesByKanji.update(kanji.reading, (list) => list..add(entry)); + } else { + entriesByKanji.putIfAbsent(kanji.reading, () => {entry}); } + } + } + SplayTreeMap> entriesByReading = SplayTreeMap(); + for (final entry in entries) { + for (final reading in entry.readings) { + if (entriesByReading.containsKey(reading.reading)) { + entriesByReading.update(reading.reading, (list) => list..add(entry)); + } else { + entriesByReading.putIfAbsent(reading.reading, () => {entry}); + } + } + } + + print(' [JMdict] Batch 3'); + b = db.batch(); + + for (final e in entries) { + for (final s in e.senses) { + for (final xref in s.seeAlso) { + final entry = resolveXref( + entriesByKanji, + entriesByReading, + xref, + ); + + b.insert( + TableNames.senseSeeAlso, + { + 'senseId': s.id, + 'xrefEntryId': entry.id, + 'seeAlsoKanji': xref.kanjiRef, + 'seeAlsoReading': xref.readingRef, + 'seeAlsoSense': xref.senseNum, + }, + ); + } + for (final ant in s.antonyms) { - final Set entries; - if (ant.kanjiRef != null && ant.readingRef != null) { - entries = entriesByKanji[ant.kanjiRef]! - .difference(entriesByReading[ant.readingRef]!); - } else if (ant.kanjiRef != null) { - entries = entriesByKanji[ant.kanjiRef]!; - } else { - entries = entriesByReading[ant.readingRef]!; - } - for (final ex in entries) { - if (!(ant.senseNum != null && ant.senseNum! > ex.senses.length)) { - b.insert(TableNames.senseAntonyms, { - 'senseId': s.id, - 'xrefEntryId': ex.id, - 'antonymKanji': ant.kanjiRef, - 'antonymReading': ant.readingRef, - 'antonymSense': ant.senseNum, - }); - } - } + final entry = resolveXref( + entriesByKanji, + entriesByReading, + ant, + ); + + b.insert(TableNames.senseAntonyms, { + 'senseId': s.id, + 'xrefEntryId': entry.id, + 'antonymKanji': ant.kanjiRef, + 'antonymReading': ant.readingRef, + 'antonymSense': ant.senseNum, + }); } } } diff --git a/migrations/0001_JMDict.sql b/migrations/0001_JMDict.sql index 6f7cfae..1071057 100644 --- a/migrations/0001_JMDict.sql +++ b/migrations/0001_JMDict.sql @@ -45,7 +45,7 @@ CREATE TABLE "JMdict_KanjiElement" ( "ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2), "spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2), "gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2), - "nf" INTEGER, + "nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48), PRIMARY KEY ("entryId", "reading") ) WITHOUT ROWID; @@ -92,20 +92,20 @@ CREATE TABLE "JMdict_ReadingElementInfo" ( -- Sense --- Optimal solution here would be to have an id INTEGER AUTOINCREMENT, --- and the entryId as a composite key, since the entryId is used below. --- However, autoincrementing composite keys are not available in sqlite - CREATE TABLE "JMdict_Sense" ( "id" INTEGER PRIMARY KEY AUTOINCREMENT, - "entryId" INTEGER REFERENCES "JMdict_Entry"("id") + "entryId" INTEGER REFERENCES "JMdict_Entry"("id"), + "senseNum" INTEGER, + UNIQUE("entryId", "senseNum") ); +CREATE INDEX "JMdict_Sense_byEntryId_bySenseNum" ON "JMdict_Sense"("entryId", "senseNum"); + CREATE TABLE "JMdict_SenseRestrictedToKanji" ( "entryId" INTEGER, "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), "kanji" TEXT, - FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"), + FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"), PRIMARY KEY ("entryId", "senseId", "kanji") ); @@ -132,12 +132,13 @@ CREATE TABLE "JMdict_SenseSeeAlso" ( "xrefEntryId" INTEGER, "seeAlsoReading" TEXT, "seeAlsoKanji" TEXT, - "seeAlsoSense" TEXT REFERENCES "JMdict_Sense"("id"), + "seeAlsoSense" INTEGER, CHECK ("seeAlsoReading" = NULL <> "seeAlsoKanji" = NULL), -- CHECK("seeAlsoSense" = NULL OR "seeAlsoSense") - -- Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId. - FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"), + -- TODO: Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId. + FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"), FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), + FOREIGN KEY ("xrefEntryId", "seeAlsoSense") REFERENCES "JMdict_Sense"("entryId", "senseNum"), PRIMARY KEY ("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense") ); @@ -146,10 +147,11 @@ CREATE TABLE "JMdict_SenseAntonym" ( "xrefEntryId" INTEGER, "antonymReading" TEXT, "antonymKanji" TEXT, - "antonymSense" TEXT REFERENCES "JMdict_Sense"("id"), + "antonymSense" INTEGER, CHECK ("antonymReading" = NULL <> "antonymKanji" = NULL), - FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"), + FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"), FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), + FOREIGN KEY ("xrefEntryId", "antonymSense") REFERENCES "JMdict_Sense"("entryId", "senseNum"), PRIMARY KEY ("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense") ); @@ -189,7 +191,7 @@ CREATE TABLE "JMdict_SenseLanguageSource" ( CREATE TABLE "JMdict_SenseDialect" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), - "dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("dialect"), + "dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("id"), PRIMARY KEY ("senseId", "dialect") ) WITHOUT ROWID; @@ -246,4 +248,4 @@ CREATE TABLE "JMdict_EntryByEnglish" ( PRIMARY KEY ("english", "entryId") ) WITHOUT ROWID; -CREATE INDEX "JMdict_EntryByEnglish_byEnglish" ON "JMdict_EntryByEnglish"("english"); \ No newline at end of file +CREATE INDEX "JMdict_EntryByEnglish_byEnglish" ON "JMdict_EntryByEnglish"("english"); diff --git a/migrations/0002_JMDict_insert_constants.sql b/migrations/0002_JMDict_insert_constants.sql index 36816be..db0f3ac 100644 --- a/migrations/0002_JMDict_insert_constants.sql +++ b/migrations/0002_JMDict_insert_constants.sql @@ -1,3 +1,5 @@ +-- Source: http://www.edrdg.org/jmwsgi/edhelp.py + INSERT INTO "JMdict_InfoDialect"("id", "description") VALUES ('bra', 'Brazilian'), ('hob', 'Hokkaido-ben'), @@ -8,10 +10,11 @@ INSERT INTO "JMdict_InfoDialect"("id", "description") VALUES ('nab', 'Nagano-ben'), ('osb', 'Osaka-ben'), ('rkb', 'Ryuukyuu-ben'), + ('std', 'Tokyo-ben (std)'), ('thb', 'Touhoku-ben'), ('tsb', 'Tosa-ben'), ('tsug', 'Tsugaru-ben'); - + INSERT INTO "JMdict_InfoField"("id", "description") VALUES ('agric', 'agriculture'), ('anat', 'anatomy'), @@ -25,13 +28,18 @@ INSERT INTO "JMdict_InfoField"("id", "description") VALUES ('biochem', 'biochemistry'), ('biol', 'biology'), ('bot', 'botany'), + ('boxing', 'boxing'), ('Buddh', 'Buddhism'), ('bus', 'business'), + ('cards', 'card games'), ('chem', 'chemistry'), + ('chmyth', 'Chinese mythology'), ('Christn', 'Christianity'), + ('civeng', 'civil engineering'), ('cloth', 'clothing'), ('comp', 'computing'), ('cryst', 'crystallography'), + ('dent', 'dentistry'), ('ecol', 'ecology'), ('econ', 'economics'), ('elec', 'electricity, elec. eng.'), @@ -39,6 +47,8 @@ INSERT INTO "JMdict_InfoField"("id", "description") VALUES ('embryo', 'embryology'), ('engr', 'engineering'), ('ent', 'entomology'), + ('figskt', 'figure skating'), + ('film', 'film'), ('finc', 'finance'), ('fish', 'fishing'), ('food', 'food, cooking'), @@ -53,62 +63,82 @@ INSERT INTO "JMdict_InfoField"("id", "description") VALUES ('grmyth', 'Greek mythology'), ('hanaf', 'hanafuda'), ('horse', 'horse racing'), + ('internet', 'Internet'), + ('jpmyth', 'Japanese mythology'), + ('kabuki', 'kabuki'), ('law', 'law'), ('ling', 'linguistics'), ('logic', 'logic'), ('MA', 'martial arts'), ('mahj', 'mahjong'), + ('manga', 'manga'), ('math', 'mathematics'), ('mech', 'mechanical engineering'), ('med', 'medicine'), ('met', 'meteorology'), ('mil', 'military'), + ('min', 'mineralogy'), + ('mining', 'mining'), + ('motor', 'motorsport'), ('music', 'music'), + ('noh', 'noh (theatre)'), ('ornith', 'ornithology'), ('paleo', 'paleontology'), ('pathol', 'pathology'), - ('pharm', 'pharmacy'), + ('pharm', 'pharmacology'), ('phil', 'philosophy'), ('photo', 'photography'), ('physics', 'physics'), ('physiol', 'physiology'), - ('print', 'printing'), + ('politics', 'politics'), + ('print', 'printing (press)'), + ('prowres', 'professional wrestling'), ('psy', 'psychiatry'), + ('psyanal', 'psychoanalysis'), ('psych', 'psychology'), ('rail', 'railway'), + ('rommyth', 'Roman mythology'), ('Shinto', 'Shinto'), - ('shogi', 'shogi'), + ('shogi', 'shogi (game)'), + ('ski', 'skiing'), ('sports', 'sports'), ('stat', 'statistics'), - ('sumo', 'sumo'), + ('stockm', 'stock market'), + ('sumo', 'sumo (wrestling)'), + ('surg', 'surgery'), ('telec', 'telecommunications'), ('tradem', 'trademark'), + ('tv', 'television'), + ('vet', 'veterinary terms'), ('vidg', 'video games'), ('zool', 'zoology'); INSERT INTO "JMdict_InfoKanji"("id", "description") VALUES ('ateji', 'ateji (phonetic) reading'), - ('ik', 'word containing irregular kana usage'), ('iK', 'word containing irregular kanji usage'), + ('ik', 'word containing irregular kana usage'), ('io', 'irregular okurigana usage'), ('oK', 'word containing out-dated kanji or kanji usage'), - ('rK', 'rarely-used kanji form'); + ('rK', 'rarely-used kanji form'), + ('sK', 'search-only kanji form'); INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES ('abbr', 'abbreviation'), - ('arch', 'archaism'), + ('aphorism', 'aphorism (pithy saying)'), + ('arch', 'archaic'), ('char', 'character'), ('chn', 'children''s language'), - ('col', 'colloquialism'), + ('col', 'colloquial'), ('company', 'company name'), ('creat', 'creature'), ('dated', 'dated term'), ('dei', 'deity'), ('derog', 'derogatory'), ('doc', 'document'), + ('euph', 'euphemistic'), ('ev', 'event'), ('fam', 'familiar language'), - ('fem', 'female term or language'), + ('fem', 'female term, language, or name'), ('fict', 'fiction'), ('form', 'formal or literary term'), ('given', 'given name or forename, gender not specified'), @@ -120,12 +150,11 @@ INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES ('joc', 'jocular, humorous term'), ('leg', 'legend'), ('m-sl', 'manga slang'), - ('male', 'male term or language'), + ('male', 'male term, language, or name'), ('myth', 'mythology'), ('net-sl', 'Internet slang'), ('obj', 'object'), ('obs', 'obsolete term'), - ('obsc', 'obscure term'), ('on-mim', 'onomatopoeic or mimetic word'), ('organization', 'organization name'), ('oth', 'other'), @@ -136,10 +165,11 @@ INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES ('product', 'product name'), ('proverb', 'proverb'), ('quote', 'quotation'), - ('rare', 'rare'), + ('rare', 'rare term'), ('relig', 'religion'), ('sens', 'sensitive'), ('serv', 'service'), + ('ship', 'ship name'), ('sl', 'slang'), ('station', 'railway station'), ('surname', 'family or surname'), @@ -149,7 +179,7 @@ INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES ('work', 'work of art, literature, music, etc. name'), ('X', 'rude or X-rated term (not displayed in educational software)'), ('yoji', 'yojijukugo'); - + INSERT INTO "JMdict_InfoPOS"("id", "description") VALUES ('adj-f', 'noun or verb acting prenominally'), ('adj-i', 'adjective (keiyoushi)'), @@ -238,7 +268,7 @@ INSERT INTO "JMdict_InfoPOS"("id", "description") VALUES ('vn', 'irregular nu verb'), ('vr', 'irregular ru verb, plain form ends with -ri'), ('vs', 'noun or participle which takes the aux. verb suru'), - ('vs-c', 'su verb - precursor to the modern suru'), + ('vs-c', 'suru verb - precursor to the modern suru'), ('vs-i', 'suru verb - included'), ('vs-s', 'suru verb - special class'), ('vt', 'transitive verb'), @@ -246,6 +276,16 @@ INSERT INTO "JMdict_InfoPOS"("id", "description") VALUES INSERT INTO "JMdict_InfoReading"("id", "description") VALUES ('gikun', 'gikun (meaning as reading) or jukujikun (special kanji reading)'), + ('go', 'on-yomi, go'), ('ik', 'word containing irregular kana usage'), + ('jouyou', 'approved reading for jouyou kanji'), + ('kan', 'on-yomi, kan'), + ('kanyou', 'on-yomi, kan''you'), + ('kun', 'kun-yomi'), + ('name', 'reading used only in names (nanori)'), ('ok', 'out-dated or obsolete kana usage'), - ('uK', 'word usually written using kanji alone'); + ('on', 'on-yomi'), + ('rad', 'reading used as name of radical'), + ('rk', 'rarely used kana form'), + ('sk', 'search-only kana form'), + ('tou', 'on-yomi, tou');