lib: improve jmdict ingestion, update constants

This commit is contained in:
2025-04-22 19:10:53 +02:00
parent 84e80fe876
commit 7544013ffd
4 changed files with 288 additions and 133 deletions

View File

@@ -148,32 +148,12 @@ class XRefParts {
this.readingRef,
this.senseNum,
}) : assert(kanjiRef != null || readingRef != null);
factory XRefParts.fromString(String s) {
final parts = s.split('');
if (parts.length == 1) {
if (parts[0].contains(kanaRegex)) {
return XRefParts(readingRef: parts[0]);
}
return XRefParts(kanjiRef: parts[0]);
} else if (parts.length == 2) {
if (int.tryParse(parts[1]) != null) {
if (parts[0].contains(kanaRegex)) {
return XRefParts(readingRef: parts[0], senseNum: int.parse(parts[1]));
}
return XRefParts(kanjiRef: parts[0], senseNum: int.parse(parts[1]));
}
return XRefParts(kanjiRef: parts[0], readingRef: parts[1]);
} else if (parts.length == 3) {
return XRefParts(
kanjiRef: parts[0],
readingRef: parts[1],
senseNum: int.parse(parts[2]),
);
}
return XRefParts();
}
Map<String, Object?> toJson() => {
'kanjiRef': kanjiRef,
'readingRef': readingRef,
'senseNum': senseNum,
};
}
class XRef {
@@ -188,6 +168,7 @@ class XRef {
class Sense extends SQLWritable {
final int id;
final int senseNum;
final List<XRefParts> antonyms;
final List<String> dialects;
final List<String> fields;
@@ -202,6 +183,7 @@ class Sense extends SQLWritable {
const Sense({
required this.id,
required this.senseNum,
this.antonyms = const [],
this.dialects = const [],
this.fields = const [],

View File

@@ -1,13 +1,17 @@
import 'dart:collection';
import 'dart:io';
import 'package:collection/collection.dart';
import 'package:jadb/romaji_transliteration.dart';
import 'package:sqflite_common/sqlite_api.dart';
import 'package:xml/xml.dart';
import 'objects.dart';
List<int?> getPriNums(XmlElement e, String prefix) {
/// parse priority values from r_ele and k_ele xml elements
///
/// source: http://www.edrdg.org/jmwsgi/edhelp.py?sid=#kw_freq
List<int?> getPriorityValues(XmlElement e, String prefix) {
int? news, ichi, spec, gai, nf;
for (final pri in e.findElements('${prefix}_pri')) {
final txt = pri.innerText;
@@ -24,6 +28,110 @@ List<int?> getPriNums(XmlElement e, String prefix) {
return [news, ichi, spec, gai, nf];
}
// source: www.edrdg.org/jmwsgi/edhelp.py?sid=#syn_xref
XRefParts parseXrefParts(String s) {
final parts = s.split('');
late final XRefParts result;
switch (parts.length) {
case 1:
result = parts[0].contains(kanaRegex)
? XRefParts(readingRef: parts[0])
: XRefParts(kanjiRef: parts[0]);
break;
case 2:
if (int.tryParse(parts[1]) != null) {
if (parts[0].contains(kanaRegex)) {
result = XRefParts(
readingRef: parts[0],
senseNum: int.parse(parts[1]),
);
} else {
result = XRefParts(
kanjiRef: parts[0],
senseNum: int.parse(parts[1]),
);
}
} else {
result = XRefParts(
kanjiRef: parts[0],
readingRef: parts[1],
);
}
break;
case 3:
result = XRefParts(
kanjiRef: parts[0],
readingRef: parts[1],
senseNum: int.parse(parts[2]),
);
break;
default:
result = XRefParts();
break;
}
return result;
}
Entry resolveXref(
SplayTreeMap<String, Set<Entry>> entriesByKanji,
SplayTreeMap<String, Set<Entry>> entriesByReading,
XRefParts xref,
) {
List<Entry> candidateEntries = switch ((xref.kanjiRef, xref.readingRef)) {
(null, null) =>
throw Exception('Xref $xref has no kanji or reading reference'),
(String k, null) => entriesByKanji[k]!.toList(),
(null, String r) => entriesByReading[r]!.toList(),
(String k, String r) =>
entriesByKanji[k]!.intersection(entriesByReading[r]!).toList(),
};
// Filter out entries that don't have the number of senses specified in the xref
if (xref.senseNum != null) {
candidateEntries
.retainWhere((entry) => entry.senses.length >= xref.senseNum!);
}
// If the xref has a reading ref but no kanji ref, and there are multiple
// entries to choose from, prefer entries with empty kanji readings
// if possible.
if (xref.kanjiRef == null &&
xref.readingRef != null &&
candidateEntries.length > 1) {
final candidatesWithEmptyKanji =
candidateEntries.where((entry) => entry.kanji.length == 0).toList();
if (candidatesWithEmptyKanji.isNotEmpty) {
candidateEntries = candidatesWithEmptyKanji;
}
}
// Having more senses is a cheap way to choose the most likely correct
// entry in case there are multiple candidates left.
candidateEntries.sortBy<num>((entry) => entry.senses.length);
if (candidateEntries.length == 0) {
throw Exception(
'SKIPPING: Xref $xref has ${candidateEntries.length} entries, '
'kanjiRef: ${xref.kanjiRef}, readingRef: ${xref.readingRef}, '
'senseNum: ${xref.senseNum}',
);
} else if (candidateEntries.length > 1) {
print(
'WARNING: Xref $xref has ${candidateEntries.length} entries, '
'kanjiRef: ${xref.kanjiRef}, readingRef: ${xref.readingRef}, '
'senseNum: ${xref.senseNum}',
);
return candidateEntries.first;
} else {
return candidateEntries.first;
}
}
List<Entry> transformXML(XmlElement root) {
final List<Entry> entries = [];
@@ -36,10 +144,13 @@ List<Entry> transformXML(XmlElement root) {
final List<Sense> senses = [];
for (final k_ele in entry.findAllElements('k_ele')) {
final ke_pri = getPriNums(k_ele, 'ke');
final ke_pri = getPriorityValues(k_ele, 'ke');
kanjiEls.add(
KanjiElement(
info: k_ele.findElements('ke_inf').map((e) => e.innerText).toList(),
info: k_ele
.findElements('ke_inf')
.map((e) => e.innerText.replaceAll(RegExp('[&;]'), ''))
.toList(),
reading: k_ele.findElements('keb').first.innerText,
news: ke_pri[0],
ichi: ke_pri[1],
@@ -51,7 +162,7 @@ List<Entry> transformXML(XmlElement root) {
}
for (final r_ele in entry.findAllElements('r_ele')) {
final re_pri = getPriNums(r_ele, 're');
final re_pri = getPriorityValues(r_ele, 're');
readingEls.add(
ReadingElement(
info: r_ele
@@ -70,11 +181,14 @@ List<Entry> transformXML(XmlElement root) {
);
}
int senseNum = 0;
for (final sense in entry.findAllElements('sense')) {
senseId++;
senseNum++;
senses.add(
Sense(
id: senseId,
senseNum: senseNum,
restrictedToKanji:
sense.findElements('stagk').map((e) => e.innerText).toList(),
restrictedToReading:
@@ -115,11 +229,11 @@ List<Entry> transformXML(XmlElement root) {
.toList(),
antonyms: sense
.findElements('ant')
.map((e) => XRefParts.fromString(e.innerText))
.map((e) => parseXrefParts(e.innerText))
.toList(),
seeAlso: sense
.findElements('xref')
.map((e) => XRefParts.fromString(e.innerText))
.map((e) => parseXrefParts(e.innerText))
.toList(),
),
);
@@ -154,7 +268,11 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
for (final i in k.info) {
b.insert(
TableNames.kanjiInfo,
{'entryId': e.id, 'reading': k.reading, 'info': i},
{
'entryId': e.id,
'reading': k.reading,
'info': i,
},
);
}
}
@@ -166,20 +284,31 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
b.insert(
TableNames.entryByKana,
{'entryId': e.id, 'kana': transliterateKanaToLatin(r.reading)},
{
'entryId': e.id,
'kana': transliterateKanaToLatin(r.reading),
},
// Some entries have the same reading twice with difference in katakana and hiragana
conflictAlgorithm: ConflictAlgorithm.ignore,
);
for (final i in r.info) {
b.insert(
TableNames.readingInfo,
{'entryId': e.id, 'reading': r.reading, 'info': i},
{
'entryId': e.id,
'reading': r.reading,
'info': i,
},
);
}
for (final res in r.restrictions) {
b.insert(
TableNames.readingRestriction,
{'entryId': e.id, 'reading': r.reading, 'restriction': res},
{
'entryId': e.id,
'reading': r.reading,
'restriction': res,
},
);
}
}
@@ -189,7 +318,10 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
if (g.language == "eng")
b.insert(
TableNames.entryByEnglish,
{'entryId': e.id, 'english': g.phrase},
{
'entryId': e.id,
'english': g.phrase,
},
// Some entries have the same reading twice with difference in katakana and hiragana
conflictAlgorithm: ConflictAlgorithm.ignore,
);
@@ -199,35 +331,15 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
await b.commit();
print(' [JMdict] Building trees');
SplayTreeMap<String, Set<Entry>> entriesByKanji = SplayTreeMap();
for (final entry in entries) {
for (final kanji in entry.kanji) {
if (entriesByKanji.containsKey(kanji.reading)) {
entriesByKanji.update(kanji.reading, (list) => list..add(entry));
} else {
entriesByKanji.putIfAbsent(kanji.reading, () => {entry});
}
}
}
SplayTreeMap<String, Set<Entry>> entriesByReading = SplayTreeMap();
for (final entry in entries) {
for (final reading in entry.readings) {
if (entriesByReading.containsKey(reading.reading)) {
entriesByReading.update(reading.reading, (list) => list..add(entry));
} else {
entriesByReading.putIfAbsent(reading.reading, () => {entry});
}
}
}
print(' [JMdict] Batch 2');
b = db.batch();
for (final e in entries) {
for (final s in e.senses) {
b.insert(
TableNames.sense, s.sqlValue..addAll({'id': s.id, 'entryId': e.id}));
TableNames.sense,
s.sqlValue
..addAll({'id': s.id, 'entryId': e.id, 'senseNum': s.senseNum}));
for (final d in s.dialects) {
b.insert(TableNames.senseDialect, {'senseId': s.id, 'dialect': d});
@@ -278,52 +390,71 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
conflictAlgorithm: ConflictAlgorithm.ignore,
);
}
}
}
for (final xref in s.seeAlso) {
final Set<Entry> entries;
if (xref.kanjiRef != null && xref.readingRef != null) {
entries = entriesByKanji[xref.kanjiRef]!
.difference(entriesByReading[xref.readingRef]!);
} else if (xref.kanjiRef != null) {
entries = entriesByKanji[xref.kanjiRef]!;
} else {
entries = entriesByReading[xref.readingRef]!;
}
for (final ex in entries)
if (!(xref.senseNum != null && xref.senseNum! > ex.senses.length)) {
b.insert(
TableNames.senseSeeAlso,
{
'senseId': s.id,
'xrefEntryId': ex.id,
'seeAlsoKanji': xref.kanjiRef,
'seeAlsoReading': xref.readingRef,
'seeAlsoSense': xref.senseNum,
},
);
}
await b.commit();
print(' [JMdict] Building trees');
SplayTreeMap<String, Set<Entry>> entriesByKanji = SplayTreeMap();
for (final entry in entries) {
for (final kanji in entry.kanji) {
if (entriesByKanji.containsKey(kanji.reading)) {
entriesByKanji.update(kanji.reading, (list) => list..add(entry));
} else {
entriesByKanji.putIfAbsent(kanji.reading, () => {entry});
}
}
}
SplayTreeMap<String, Set<Entry>> entriesByReading = SplayTreeMap();
for (final entry in entries) {
for (final reading in entry.readings) {
if (entriesByReading.containsKey(reading.reading)) {
entriesByReading.update(reading.reading, (list) => list..add(entry));
} else {
entriesByReading.putIfAbsent(reading.reading, () => {entry});
}
}
}
print(' [JMdict] Batch 3');
b = db.batch();
for (final e in entries) {
for (final s in e.senses) {
for (final xref in s.seeAlso) {
final entry = resolveXref(
entriesByKanji,
entriesByReading,
xref,
);
b.insert(
TableNames.senseSeeAlso,
{
'senseId': s.id,
'xrefEntryId': entry.id,
'seeAlsoKanji': xref.kanjiRef,
'seeAlsoReading': xref.readingRef,
'seeAlsoSense': xref.senseNum,
},
);
}
for (final ant in s.antonyms) {
final Set<Entry> entries;
if (ant.kanjiRef != null && ant.readingRef != null) {
entries = entriesByKanji[ant.kanjiRef]!
.difference(entriesByReading[ant.readingRef]!);
} else if (ant.kanjiRef != null) {
entries = entriesByKanji[ant.kanjiRef]!;
} else {
entries = entriesByReading[ant.readingRef]!;
}
for (final ex in entries) {
if (!(ant.senseNum != null && ant.senseNum! > ex.senses.length)) {
b.insert(TableNames.senseAntonyms, {
'senseId': s.id,
'xrefEntryId': ex.id,
'antonymKanji': ant.kanjiRef,
'antonymReading': ant.readingRef,
'antonymSense': ant.senseNum,
});
}
}
final entry = resolveXref(
entriesByKanji,
entriesByReading,
ant,
);
b.insert(TableNames.senseAntonyms, {
'senseId': s.id,
'xrefEntryId': entry.id,
'antonymKanji': ant.kanjiRef,
'antonymReading': ant.readingRef,
'antonymSense': ant.senseNum,
});
}
}
}

View File

@@ -45,7 +45,7 @@ CREATE TABLE "JMdict_KanjiElement" (
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
"nf" INTEGER,
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
PRIMARY KEY ("entryId", "reading")
) WITHOUT ROWID;
@@ -92,20 +92,20 @@ CREATE TABLE "JMdict_ReadingElementInfo" (
-- Sense
-- Optimal solution here would be to have an id INTEGER AUTOINCREMENT,
-- and the entryId as a composite key, since the entryId is used below.
-- However, autoincrementing composite keys are not available in sqlite
CREATE TABLE "JMdict_Sense" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
"entryId" INTEGER REFERENCES "JMdict_Entry"("id")
"entryId" INTEGER REFERENCES "JMdict_Entry"("id"),
"senseNum" INTEGER,
UNIQUE("entryId", "senseNum")
);
CREATE INDEX "JMdict_Sense_byEntryId_bySenseNum" ON "JMdict_Sense"("entryId", "senseNum");
CREATE TABLE "JMdict_SenseRestrictedToKanji" (
"entryId" INTEGER,
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"kanji" TEXT,
FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"),
FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "senseId", "kanji")
);
@@ -132,12 +132,13 @@ CREATE TABLE "JMdict_SenseSeeAlso" (
"xrefEntryId" INTEGER,
"seeAlsoReading" TEXT,
"seeAlsoKanji" TEXT,
"seeAlsoSense" TEXT REFERENCES "JMdict_Sense"("id"),
"seeAlsoSense" INTEGER,
CHECK ("seeAlsoReading" = NULL <> "seeAlsoKanji" = NULL),
-- CHECK("seeAlsoSense" = NULL OR "seeAlsoSense")
-- Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId.
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"),
-- TODO: Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId.
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "seeAlsoSense") REFERENCES "JMdict_Sense"("entryId", "senseNum"),
PRIMARY KEY ("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
);
@@ -146,10 +147,11 @@ CREATE TABLE "JMdict_SenseAntonym" (
"xrefEntryId" INTEGER,
"antonymReading" TEXT,
"antonymKanji" TEXT,
"antonymSense" TEXT REFERENCES "JMdict_Sense"("id"),
"antonymSense" INTEGER,
CHECK ("antonymReading" = NULL <> "antonymKanji" = NULL),
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"),
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "antonymSense") REFERENCES "JMdict_Sense"("entryId", "senseNum"),
PRIMARY KEY ("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
);
@@ -189,7 +191,7 @@ CREATE TABLE "JMdict_SenseLanguageSource" (
CREATE TABLE "JMdict_SenseDialect" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("dialect"),
"dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("id"),
PRIMARY KEY ("senseId", "dialect")
) WITHOUT ROWID;
@@ -246,4 +248,4 @@ CREATE TABLE "JMdict_EntryByEnglish" (
PRIMARY KEY ("english", "entryId")
) WITHOUT ROWID;
CREATE INDEX "JMdict_EntryByEnglish_byEnglish" ON "JMdict_EntryByEnglish"("english");
CREATE INDEX "JMdict_EntryByEnglish_byEnglish" ON "JMdict_EntryByEnglish"("english");

View File

@@ -1,3 +1,5 @@
-- Source: http://www.edrdg.org/jmwsgi/edhelp.py
INSERT INTO "JMdict_InfoDialect"("id", "description") VALUES
('bra', 'Brazilian'),
('hob', 'Hokkaido-ben'),
@@ -8,10 +10,11 @@ INSERT INTO "JMdict_InfoDialect"("id", "description") VALUES
('nab', 'Nagano-ben'),
('osb', 'Osaka-ben'),
('rkb', 'Ryuukyuu-ben'),
('std', 'Tokyo-ben (std)'),
('thb', 'Touhoku-ben'),
('tsb', 'Tosa-ben'),
('tsug', 'Tsugaru-ben');
INSERT INTO "JMdict_InfoField"("id", "description") VALUES
('agric', 'agriculture'),
('anat', 'anatomy'),
@@ -25,13 +28,18 @@ INSERT INTO "JMdict_InfoField"("id", "description") VALUES
('biochem', 'biochemistry'),
('biol', 'biology'),
('bot', 'botany'),
('boxing', 'boxing'),
('Buddh', 'Buddhism'),
('bus', 'business'),
('cards', 'card games'),
('chem', 'chemistry'),
('chmyth', 'Chinese mythology'),
('Christn', 'Christianity'),
('civeng', 'civil engineering'),
('cloth', 'clothing'),
('comp', 'computing'),
('cryst', 'crystallography'),
('dent', 'dentistry'),
('ecol', 'ecology'),
('econ', 'economics'),
('elec', 'electricity, elec. eng.'),
@@ -39,6 +47,8 @@ INSERT INTO "JMdict_InfoField"("id", "description") VALUES
('embryo', 'embryology'),
('engr', 'engineering'),
('ent', 'entomology'),
('figskt', 'figure skating'),
('film', 'film'),
('finc', 'finance'),
('fish', 'fishing'),
('food', 'food, cooking'),
@@ -53,62 +63,82 @@ INSERT INTO "JMdict_InfoField"("id", "description") VALUES
('grmyth', 'Greek mythology'),
('hanaf', 'hanafuda'),
('horse', 'horse racing'),
('internet', 'Internet'),
('jpmyth', 'Japanese mythology'),
('kabuki', 'kabuki'),
('law', 'law'),
('ling', 'linguistics'),
('logic', 'logic'),
('MA', 'martial arts'),
('mahj', 'mahjong'),
('manga', 'manga'),
('math', 'mathematics'),
('mech', 'mechanical engineering'),
('med', 'medicine'),
('met', 'meteorology'),
('mil', 'military'),
('min', 'mineralogy'),
('mining', 'mining'),
('motor', 'motorsport'),
('music', 'music'),
('noh', 'noh (theatre)'),
('ornith', 'ornithology'),
('paleo', 'paleontology'),
('pathol', 'pathology'),
('pharm', 'pharmacy'),
('pharm', 'pharmacology'),
('phil', 'philosophy'),
('photo', 'photography'),
('physics', 'physics'),
('physiol', 'physiology'),
('print', 'printing'),
('politics', 'politics'),
('print', 'printing (press)'),
('prowres', 'professional wrestling'),
('psy', 'psychiatry'),
('psyanal', 'psychoanalysis'),
('psych', 'psychology'),
('rail', 'railway'),
('rommyth', 'Roman mythology'),
('Shinto', 'Shinto'),
('shogi', 'shogi'),
('shogi', 'shogi (game)'),
('ski', 'skiing'),
('sports', 'sports'),
('stat', 'statistics'),
('sumo', 'sumo'),
('stockm', 'stock market'),
('sumo', 'sumo (wrestling)'),
('surg', 'surgery'),
('telec', 'telecommunications'),
('tradem', 'trademark'),
('tv', 'television'),
('vet', 'veterinary terms'),
('vidg', 'video games'),
('zool', 'zoology');
INSERT INTO "JMdict_InfoKanji"("id", "description") VALUES
('ateji', 'ateji (phonetic) reading'),
('ik', 'word containing irregular kana usage'),
('iK', 'word containing irregular kanji usage'),
('ik', 'word containing irregular kana usage'),
('io', 'irregular okurigana usage'),
('oK', 'word containing out-dated kanji or kanji usage'),
('rK', 'rarely-used kanji form');
('rK', 'rarely-used kanji form'),
('sK', 'search-only kanji form');
INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES
('abbr', 'abbreviation'),
('arch', 'archaism'),
('aphorism', 'aphorism (pithy saying)'),
('arch', 'archaic'),
('char', 'character'),
('chn', 'children''s language'),
('col', 'colloquialism'),
('col', 'colloquial'),
('company', 'company name'),
('creat', 'creature'),
('dated', 'dated term'),
('dei', 'deity'),
('derog', 'derogatory'),
('doc', 'document'),
('euph', 'euphemistic'),
('ev', 'event'),
('fam', 'familiar language'),
('fem', 'female term or language'),
('fem', 'female term, language, or name'),
('fict', 'fiction'),
('form', 'formal or literary term'),
('given', 'given name or forename, gender not specified'),
@@ -120,12 +150,11 @@ INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES
('joc', 'jocular, humorous term'),
('leg', 'legend'),
('m-sl', 'manga slang'),
('male', 'male term or language'),
('male', 'male term, language, or name'),
('myth', 'mythology'),
('net-sl', 'Internet slang'),
('obj', 'object'),
('obs', 'obsolete term'),
('obsc', 'obscure term'),
('on-mim', 'onomatopoeic or mimetic word'),
('organization', 'organization name'),
('oth', 'other'),
@@ -136,10 +165,11 @@ INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES
('product', 'product name'),
('proverb', 'proverb'),
('quote', 'quotation'),
('rare', 'rare'),
('rare', 'rare term'),
('relig', 'religion'),
('sens', 'sensitive'),
('serv', 'service'),
('ship', 'ship name'),
('sl', 'slang'),
('station', 'railway station'),
('surname', 'family or surname'),
@@ -149,7 +179,7 @@ INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES
('work', 'work of art, literature, music, etc. name'),
('X', 'rude or X-rated term (not displayed in educational software)'),
('yoji', 'yojijukugo');
INSERT INTO "JMdict_InfoPOS"("id", "description") VALUES
('adj-f', 'noun or verb acting prenominally'),
('adj-i', 'adjective (keiyoushi)'),
@@ -238,7 +268,7 @@ INSERT INTO "JMdict_InfoPOS"("id", "description") VALUES
('vn', 'irregular nu verb'),
('vr', 'irregular ru verb, plain form ends with -ri'),
('vs', 'noun or participle which takes the aux. verb suru'),
('vs-c', 'su verb - precursor to the modern suru'),
('vs-c', 'suru verb - precursor to the modern suru'),
('vs-i', 'suru verb - included'),
('vs-s', 'suru verb - special class'),
('vt', 'transitive verb'),
@@ -246,6 +276,16 @@ INSERT INTO "JMdict_InfoPOS"("id", "description") VALUES
INSERT INTO "JMdict_InfoReading"("id", "description") VALUES
('gikun', 'gikun (meaning as reading) or jukujikun (special kanji reading)'),
('go', 'on-yomi, go'),
('ik', 'word containing irregular kana usage'),
('jouyou', 'approved reading for jouyou kanji'),
('kan', 'on-yomi, kan'),
('kanyou', 'on-yomi, kan''you'),
('kun', 'kun-yomi'),
('name', 'reading used only in names (nanori)'),
('ok', 'out-dated or obsolete kana usage'),
('uK', 'word usually written using kanji alone');
('on', 'on-yomi'),
('rad', 'reading used as name of radical'),
('rk', 'rarely used kana form'),
('sk', 'search-only kana form'),
('tou', 'on-yomi, tou');