lib: improve jmdict ingestion, update constants
This commit is contained in:
@@ -148,32 +148,12 @@ class XRefParts {
|
||||
this.readingRef,
|
||||
this.senseNum,
|
||||
}) : assert(kanjiRef != null || readingRef != null);
|
||||
|
||||
factory XRefParts.fromString(String s) {
|
||||
final parts = s.split('・');
|
||||
if (parts.length == 1) {
|
||||
if (parts[0].contains(kanaRegex)) {
|
||||
return XRefParts(readingRef: parts[0]);
|
||||
}
|
||||
return XRefParts(kanjiRef: parts[0]);
|
||||
} else if (parts.length == 2) {
|
||||
if (int.tryParse(parts[1]) != null) {
|
||||
if (parts[0].contains(kanaRegex)) {
|
||||
return XRefParts(readingRef: parts[0], senseNum: int.parse(parts[1]));
|
||||
}
|
||||
return XRefParts(kanjiRef: parts[0], senseNum: int.parse(parts[1]));
|
||||
}
|
||||
return XRefParts(kanjiRef: parts[0], readingRef: parts[1]);
|
||||
} else if (parts.length == 3) {
|
||||
return XRefParts(
|
||||
kanjiRef: parts[0],
|
||||
readingRef: parts[1],
|
||||
senseNum: int.parse(parts[2]),
|
||||
);
|
||||
}
|
||||
|
||||
return XRefParts();
|
||||
}
|
||||
|
||||
Map<String, Object?> toJson() => {
|
||||
'kanjiRef': kanjiRef,
|
||||
'readingRef': readingRef,
|
||||
'senseNum': senseNum,
|
||||
};
|
||||
}
|
||||
|
||||
class XRef {
|
||||
@@ -188,6 +168,7 @@ class XRef {
|
||||
|
||||
class Sense extends SQLWritable {
|
||||
final int id;
|
||||
final int senseNum;
|
||||
final List<XRefParts> antonyms;
|
||||
final List<String> dialects;
|
||||
final List<String> fields;
|
||||
@@ -202,6 +183,7 @@ class Sense extends SQLWritable {
|
||||
|
||||
const Sense({
|
||||
required this.id,
|
||||
required this.senseNum,
|
||||
this.antonyms = const [],
|
||||
this.dialects = const [],
|
||||
this.fields = const [],
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
import 'dart:collection';
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:collection/collection.dart';
|
||||
import 'package:jadb/romaji_transliteration.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
import 'package:xml/xml.dart';
|
||||
|
||||
import 'objects.dart';
|
||||
|
||||
List<int?> getPriNums(XmlElement e, String prefix) {
|
||||
/// parse priority values from r_ele and k_ele xml elements
|
||||
///
|
||||
/// source: http://www.edrdg.org/jmwsgi/edhelp.py?sid=#kw_freq
|
||||
List<int?> getPriorityValues(XmlElement e, String prefix) {
|
||||
int? news, ichi, spec, gai, nf;
|
||||
for (final pri in e.findElements('${prefix}_pri')) {
|
||||
final txt = pri.innerText;
|
||||
@@ -24,6 +28,110 @@ List<int?> getPriNums(XmlElement e, String prefix) {
|
||||
return [news, ichi, spec, gai, nf];
|
||||
}
|
||||
|
||||
// source: www.edrdg.org/jmwsgi/edhelp.py?sid=#syn_xref
|
||||
XRefParts parseXrefParts(String s) {
|
||||
final parts = s.split('・');
|
||||
late final XRefParts result;
|
||||
switch (parts.length) {
|
||||
case 1:
|
||||
result = parts[0].contains(kanaRegex)
|
||||
? XRefParts(readingRef: parts[0])
|
||||
: XRefParts(kanjiRef: parts[0]);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
if (int.tryParse(parts[1]) != null) {
|
||||
if (parts[0].contains(kanaRegex)) {
|
||||
result = XRefParts(
|
||||
readingRef: parts[0],
|
||||
senseNum: int.parse(parts[1]),
|
||||
);
|
||||
} else {
|
||||
result = XRefParts(
|
||||
kanjiRef: parts[0],
|
||||
senseNum: int.parse(parts[1]),
|
||||
);
|
||||
}
|
||||
} else {
|
||||
result = XRefParts(
|
||||
kanjiRef: parts[0],
|
||||
readingRef: parts[1],
|
||||
);
|
||||
}
|
||||
break;
|
||||
|
||||
case 3:
|
||||
result = XRefParts(
|
||||
kanjiRef: parts[0],
|
||||
readingRef: parts[1],
|
||||
senseNum: int.parse(parts[2]),
|
||||
);
|
||||
break;
|
||||
|
||||
default:
|
||||
result = XRefParts();
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Entry resolveXref(
|
||||
SplayTreeMap<String, Set<Entry>> entriesByKanji,
|
||||
SplayTreeMap<String, Set<Entry>> entriesByReading,
|
||||
XRefParts xref,
|
||||
) {
|
||||
List<Entry> candidateEntries = switch ((xref.kanjiRef, xref.readingRef)) {
|
||||
(null, null) =>
|
||||
throw Exception('Xref $xref has no kanji or reading reference'),
|
||||
(String k, null) => entriesByKanji[k]!.toList(),
|
||||
(null, String r) => entriesByReading[r]!.toList(),
|
||||
(String k, String r) =>
|
||||
entriesByKanji[k]!.intersection(entriesByReading[r]!).toList(),
|
||||
};
|
||||
|
||||
// Filter out entries that don't have the number of senses specified in the xref
|
||||
if (xref.senseNum != null) {
|
||||
candidateEntries
|
||||
.retainWhere((entry) => entry.senses.length >= xref.senseNum!);
|
||||
}
|
||||
|
||||
// If the xref has a reading ref but no kanji ref, and there are multiple
|
||||
// entries to choose from, prefer entries with empty kanji readings
|
||||
// if possible.
|
||||
if (xref.kanjiRef == null &&
|
||||
xref.readingRef != null &&
|
||||
candidateEntries.length > 1) {
|
||||
final candidatesWithEmptyKanji =
|
||||
candidateEntries.where((entry) => entry.kanji.length == 0).toList();
|
||||
|
||||
if (candidatesWithEmptyKanji.isNotEmpty) {
|
||||
candidateEntries = candidatesWithEmptyKanji;
|
||||
}
|
||||
}
|
||||
|
||||
// Having more senses is a cheap way to choose the most likely correct
|
||||
// entry in case there are multiple candidates left.
|
||||
candidateEntries.sortBy<num>((entry) => entry.senses.length);
|
||||
|
||||
if (candidateEntries.length == 0) {
|
||||
throw Exception(
|
||||
'SKIPPING: Xref $xref has ${candidateEntries.length} entries, '
|
||||
'kanjiRef: ${xref.kanjiRef}, readingRef: ${xref.readingRef}, '
|
||||
'senseNum: ${xref.senseNum}',
|
||||
);
|
||||
} else if (candidateEntries.length > 1) {
|
||||
print(
|
||||
'WARNING: Xref $xref has ${candidateEntries.length} entries, '
|
||||
'kanjiRef: ${xref.kanjiRef}, readingRef: ${xref.readingRef}, '
|
||||
'senseNum: ${xref.senseNum}',
|
||||
);
|
||||
return candidateEntries.first;
|
||||
} else {
|
||||
return candidateEntries.first;
|
||||
}
|
||||
}
|
||||
|
||||
List<Entry> transformXML(XmlElement root) {
|
||||
final List<Entry> entries = [];
|
||||
|
||||
@@ -36,10 +144,13 @@ List<Entry> transformXML(XmlElement root) {
|
||||
final List<Sense> senses = [];
|
||||
|
||||
for (final k_ele in entry.findAllElements('k_ele')) {
|
||||
final ke_pri = getPriNums(k_ele, 'ke');
|
||||
final ke_pri = getPriorityValues(k_ele, 'ke');
|
||||
kanjiEls.add(
|
||||
KanjiElement(
|
||||
info: k_ele.findElements('ke_inf').map((e) => e.innerText).toList(),
|
||||
info: k_ele
|
||||
.findElements('ke_inf')
|
||||
.map((e) => e.innerText.replaceAll(RegExp('[&;]'), ''))
|
||||
.toList(),
|
||||
reading: k_ele.findElements('keb').first.innerText,
|
||||
news: ke_pri[0],
|
||||
ichi: ke_pri[1],
|
||||
@@ -51,7 +162,7 @@ List<Entry> transformXML(XmlElement root) {
|
||||
}
|
||||
|
||||
for (final r_ele in entry.findAllElements('r_ele')) {
|
||||
final re_pri = getPriNums(r_ele, 're');
|
||||
final re_pri = getPriorityValues(r_ele, 're');
|
||||
readingEls.add(
|
||||
ReadingElement(
|
||||
info: r_ele
|
||||
@@ -70,11 +181,14 @@ List<Entry> transformXML(XmlElement root) {
|
||||
);
|
||||
}
|
||||
|
||||
int senseNum = 0;
|
||||
for (final sense in entry.findAllElements('sense')) {
|
||||
senseId++;
|
||||
senseNum++;
|
||||
senses.add(
|
||||
Sense(
|
||||
id: senseId,
|
||||
senseNum: senseNum,
|
||||
restrictedToKanji:
|
||||
sense.findElements('stagk').map((e) => e.innerText).toList(),
|
||||
restrictedToReading:
|
||||
@@ -115,11 +229,11 @@ List<Entry> transformXML(XmlElement root) {
|
||||
.toList(),
|
||||
antonyms: sense
|
||||
.findElements('ant')
|
||||
.map((e) => XRefParts.fromString(e.innerText))
|
||||
.map((e) => parseXrefParts(e.innerText))
|
||||
.toList(),
|
||||
seeAlso: sense
|
||||
.findElements('xref')
|
||||
.map((e) => XRefParts.fromString(e.innerText))
|
||||
.map((e) => parseXrefParts(e.innerText))
|
||||
.toList(),
|
||||
),
|
||||
);
|
||||
@@ -154,7 +268,11 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
|
||||
for (final i in k.info) {
|
||||
b.insert(
|
||||
TableNames.kanjiInfo,
|
||||
{'entryId': e.id, 'reading': k.reading, 'info': i},
|
||||
{
|
||||
'entryId': e.id,
|
||||
'reading': k.reading,
|
||||
'info': i,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -166,20 +284,31 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
|
||||
|
||||
b.insert(
|
||||
TableNames.entryByKana,
|
||||
{'entryId': e.id, 'kana': transliterateKanaToLatin(r.reading)},
|
||||
{
|
||||
'entryId': e.id,
|
||||
'kana': transliterateKanaToLatin(r.reading),
|
||||
},
|
||||
// Some entries have the same reading twice with difference in katakana and hiragana
|
||||
conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
);
|
||||
for (final i in r.info) {
|
||||
b.insert(
|
||||
TableNames.readingInfo,
|
||||
{'entryId': e.id, 'reading': r.reading, 'info': i},
|
||||
{
|
||||
'entryId': e.id,
|
||||
'reading': r.reading,
|
||||
'info': i,
|
||||
},
|
||||
);
|
||||
}
|
||||
for (final res in r.restrictions) {
|
||||
b.insert(
|
||||
TableNames.readingRestriction,
|
||||
{'entryId': e.id, 'reading': r.reading, 'restriction': res},
|
||||
{
|
||||
'entryId': e.id,
|
||||
'reading': r.reading,
|
||||
'restriction': res,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -189,7 +318,10 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
|
||||
if (g.language == "eng")
|
||||
b.insert(
|
||||
TableNames.entryByEnglish,
|
||||
{'entryId': e.id, 'english': g.phrase},
|
||||
{
|
||||
'entryId': e.id,
|
||||
'english': g.phrase,
|
||||
},
|
||||
// Some entries have the same reading twice with difference in katakana and hiragana
|
||||
conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
);
|
||||
@@ -199,35 +331,15 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
|
||||
|
||||
await b.commit();
|
||||
|
||||
print(' [JMdict] Building trees');
|
||||
SplayTreeMap<String, Set<Entry>> entriesByKanji = SplayTreeMap();
|
||||
for (final entry in entries) {
|
||||
for (final kanji in entry.kanji) {
|
||||
if (entriesByKanji.containsKey(kanji.reading)) {
|
||||
entriesByKanji.update(kanji.reading, (list) => list..add(entry));
|
||||
} else {
|
||||
entriesByKanji.putIfAbsent(kanji.reading, () => {entry});
|
||||
}
|
||||
}
|
||||
}
|
||||
SplayTreeMap<String, Set<Entry>> entriesByReading = SplayTreeMap();
|
||||
for (final entry in entries) {
|
||||
for (final reading in entry.readings) {
|
||||
if (entriesByReading.containsKey(reading.reading)) {
|
||||
entriesByReading.update(reading.reading, (list) => list..add(entry));
|
||||
} else {
|
||||
entriesByReading.putIfAbsent(reading.reading, () => {entry});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
print(' [JMdict] Batch 2');
|
||||
b = db.batch();
|
||||
|
||||
for (final e in entries) {
|
||||
for (final s in e.senses) {
|
||||
b.insert(
|
||||
TableNames.sense, s.sqlValue..addAll({'id': s.id, 'entryId': e.id}));
|
||||
TableNames.sense,
|
||||
s.sqlValue
|
||||
..addAll({'id': s.id, 'entryId': e.id, 'senseNum': s.senseNum}));
|
||||
|
||||
for (final d in s.dialects) {
|
||||
b.insert(TableNames.senseDialect, {'senseId': s.id, 'dialect': d});
|
||||
@@ -278,52 +390,71 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
|
||||
conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (final xref in s.seeAlso) {
|
||||
final Set<Entry> entries;
|
||||
if (xref.kanjiRef != null && xref.readingRef != null) {
|
||||
entries = entriesByKanji[xref.kanjiRef]!
|
||||
.difference(entriesByReading[xref.readingRef]!);
|
||||
} else if (xref.kanjiRef != null) {
|
||||
entries = entriesByKanji[xref.kanjiRef]!;
|
||||
} else {
|
||||
entries = entriesByReading[xref.readingRef]!;
|
||||
}
|
||||
for (final ex in entries)
|
||||
if (!(xref.senseNum != null && xref.senseNum! > ex.senses.length)) {
|
||||
b.insert(
|
||||
TableNames.senseSeeAlso,
|
||||
{
|
||||
'senseId': s.id,
|
||||
'xrefEntryId': ex.id,
|
||||
'seeAlsoKanji': xref.kanjiRef,
|
||||
'seeAlsoReading': xref.readingRef,
|
||||
'seeAlsoSense': xref.senseNum,
|
||||
},
|
||||
);
|
||||
}
|
||||
await b.commit();
|
||||
|
||||
print(' [JMdict] Building trees');
|
||||
SplayTreeMap<String, Set<Entry>> entriesByKanji = SplayTreeMap();
|
||||
for (final entry in entries) {
|
||||
for (final kanji in entry.kanji) {
|
||||
if (entriesByKanji.containsKey(kanji.reading)) {
|
||||
entriesByKanji.update(kanji.reading, (list) => list..add(entry));
|
||||
} else {
|
||||
entriesByKanji.putIfAbsent(kanji.reading, () => {entry});
|
||||
}
|
||||
}
|
||||
}
|
||||
SplayTreeMap<String, Set<Entry>> entriesByReading = SplayTreeMap();
|
||||
for (final entry in entries) {
|
||||
for (final reading in entry.readings) {
|
||||
if (entriesByReading.containsKey(reading.reading)) {
|
||||
entriesByReading.update(reading.reading, (list) => list..add(entry));
|
||||
} else {
|
||||
entriesByReading.putIfAbsent(reading.reading, () => {entry});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
print(' [JMdict] Batch 3');
|
||||
b = db.batch();
|
||||
|
||||
for (final e in entries) {
|
||||
for (final s in e.senses) {
|
||||
for (final xref in s.seeAlso) {
|
||||
final entry = resolveXref(
|
||||
entriesByKanji,
|
||||
entriesByReading,
|
||||
xref,
|
||||
);
|
||||
|
||||
b.insert(
|
||||
TableNames.senseSeeAlso,
|
||||
{
|
||||
'senseId': s.id,
|
||||
'xrefEntryId': entry.id,
|
||||
'seeAlsoKanji': xref.kanjiRef,
|
||||
'seeAlsoReading': xref.readingRef,
|
||||
'seeAlsoSense': xref.senseNum,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
for (final ant in s.antonyms) {
|
||||
final Set<Entry> entries;
|
||||
if (ant.kanjiRef != null && ant.readingRef != null) {
|
||||
entries = entriesByKanji[ant.kanjiRef]!
|
||||
.difference(entriesByReading[ant.readingRef]!);
|
||||
} else if (ant.kanjiRef != null) {
|
||||
entries = entriesByKanji[ant.kanjiRef]!;
|
||||
} else {
|
||||
entries = entriesByReading[ant.readingRef]!;
|
||||
}
|
||||
for (final ex in entries) {
|
||||
if (!(ant.senseNum != null && ant.senseNum! > ex.senses.length)) {
|
||||
b.insert(TableNames.senseAntonyms, {
|
||||
'senseId': s.id,
|
||||
'xrefEntryId': ex.id,
|
||||
'antonymKanji': ant.kanjiRef,
|
||||
'antonymReading': ant.readingRef,
|
||||
'antonymSense': ant.senseNum,
|
||||
});
|
||||
}
|
||||
}
|
||||
final entry = resolveXref(
|
||||
entriesByKanji,
|
||||
entriesByReading,
|
||||
ant,
|
||||
);
|
||||
|
||||
b.insert(TableNames.senseAntonyms, {
|
||||
'senseId': s.id,
|
||||
'xrefEntryId': entry.id,
|
||||
'antonymKanji': ant.kanjiRef,
|
||||
'antonymReading': ant.readingRef,
|
||||
'antonymSense': ant.senseNum,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ CREATE TABLE "JMdict_KanjiElement" (
|
||||
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
|
||||
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
|
||||
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
|
||||
"nf" INTEGER,
|
||||
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
|
||||
PRIMARY KEY ("entryId", "reading")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
@@ -92,20 +92,20 @@ CREATE TABLE "JMdict_ReadingElementInfo" (
|
||||
|
||||
-- Sense
|
||||
|
||||
-- Optimal solution here would be to have an id INTEGER AUTOINCREMENT,
|
||||
-- and the entryId as a composite key, since the entryId is used below.
|
||||
-- However, autoincrementing composite keys are not available in sqlite
|
||||
|
||||
CREATE TABLE "JMdict_Sense" (
|
||||
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
"entryId" INTEGER REFERENCES "JMdict_Entry"("id")
|
||||
"entryId" INTEGER REFERENCES "JMdict_Entry"("id"),
|
||||
"senseNum" INTEGER,
|
||||
UNIQUE("entryId", "senseNum")
|
||||
);
|
||||
|
||||
CREATE INDEX "JMdict_Sense_byEntryId_bySenseNum" ON "JMdict_Sense"("entryId", "senseNum");
|
||||
|
||||
CREATE TABLE "JMdict_SenseRestrictedToKanji" (
|
||||
"entryId" INTEGER,
|
||||
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
|
||||
"kanji" TEXT,
|
||||
FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"),
|
||||
FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
|
||||
PRIMARY KEY ("entryId", "senseId", "kanji")
|
||||
);
|
||||
|
||||
@@ -132,12 +132,13 @@ CREATE TABLE "JMdict_SenseSeeAlso" (
|
||||
"xrefEntryId" INTEGER,
|
||||
"seeAlsoReading" TEXT,
|
||||
"seeAlsoKanji" TEXT,
|
||||
"seeAlsoSense" TEXT REFERENCES "JMdict_Sense"("id"),
|
||||
"seeAlsoSense" INTEGER,
|
||||
CHECK ("seeAlsoReading" = NULL <> "seeAlsoKanji" = NULL),
|
||||
-- CHECK("seeAlsoSense" = NULL OR "seeAlsoSense")
|
||||
-- Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId.
|
||||
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"),
|
||||
-- TODO: Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId.
|
||||
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
|
||||
FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
||||
FOREIGN KEY ("xrefEntryId", "seeAlsoSense") REFERENCES "JMdict_Sense"("entryId", "senseNum"),
|
||||
PRIMARY KEY ("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
|
||||
);
|
||||
|
||||
@@ -146,10 +147,11 @@ CREATE TABLE "JMdict_SenseAntonym" (
|
||||
"xrefEntryId" INTEGER,
|
||||
"antonymReading" TEXT,
|
||||
"antonymKanji" TEXT,
|
||||
"antonymSense" TEXT REFERENCES "JMdict_Sense"("id"),
|
||||
"antonymSense" INTEGER,
|
||||
CHECK ("antonymReading" = NULL <> "antonymKanji" = NULL),
|
||||
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"),
|
||||
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
|
||||
FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
||||
FOREIGN KEY ("xrefEntryId", "antonymSense") REFERENCES "JMdict_Sense"("entryId", "senseNum"),
|
||||
PRIMARY KEY ("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
|
||||
);
|
||||
|
||||
@@ -189,7 +191,7 @@ CREATE TABLE "JMdict_SenseLanguageSource" (
|
||||
|
||||
CREATE TABLE "JMdict_SenseDialect" (
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
|
||||
"dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("dialect"),
|
||||
"dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("id"),
|
||||
PRIMARY KEY ("senseId", "dialect")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
@@ -246,4 +248,4 @@ CREATE TABLE "JMdict_EntryByEnglish" (
|
||||
PRIMARY KEY ("english", "entryId")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE INDEX "JMdict_EntryByEnglish_byEnglish" ON "JMdict_EntryByEnglish"("english");
|
||||
CREATE INDEX "JMdict_EntryByEnglish_byEnglish" ON "JMdict_EntryByEnglish"("english");
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
-- Source: http://www.edrdg.org/jmwsgi/edhelp.py
|
||||
|
||||
INSERT INTO "JMdict_InfoDialect"("id", "description") VALUES
|
||||
('bra', 'Brazilian'),
|
||||
('hob', 'Hokkaido-ben'),
|
||||
@@ -8,10 +10,11 @@ INSERT INTO "JMdict_InfoDialect"("id", "description") VALUES
|
||||
('nab', 'Nagano-ben'),
|
||||
('osb', 'Osaka-ben'),
|
||||
('rkb', 'Ryuukyuu-ben'),
|
||||
('std', 'Tokyo-ben (std)'),
|
||||
('thb', 'Touhoku-ben'),
|
||||
('tsb', 'Tosa-ben'),
|
||||
('tsug', 'Tsugaru-ben');
|
||||
|
||||
|
||||
INSERT INTO "JMdict_InfoField"("id", "description") VALUES
|
||||
('agric', 'agriculture'),
|
||||
('anat', 'anatomy'),
|
||||
@@ -25,13 +28,18 @@ INSERT INTO "JMdict_InfoField"("id", "description") VALUES
|
||||
('biochem', 'biochemistry'),
|
||||
('biol', 'biology'),
|
||||
('bot', 'botany'),
|
||||
('boxing', 'boxing'),
|
||||
('Buddh', 'Buddhism'),
|
||||
('bus', 'business'),
|
||||
('cards', 'card games'),
|
||||
('chem', 'chemistry'),
|
||||
('chmyth', 'Chinese mythology'),
|
||||
('Christn', 'Christianity'),
|
||||
('civeng', 'civil engineering'),
|
||||
('cloth', 'clothing'),
|
||||
('comp', 'computing'),
|
||||
('cryst', 'crystallography'),
|
||||
('dent', 'dentistry'),
|
||||
('ecol', 'ecology'),
|
||||
('econ', 'economics'),
|
||||
('elec', 'electricity, elec. eng.'),
|
||||
@@ -39,6 +47,8 @@ INSERT INTO "JMdict_InfoField"("id", "description") VALUES
|
||||
('embryo', 'embryology'),
|
||||
('engr', 'engineering'),
|
||||
('ent', 'entomology'),
|
||||
('figskt', 'figure skating'),
|
||||
('film', 'film'),
|
||||
('finc', 'finance'),
|
||||
('fish', 'fishing'),
|
||||
('food', 'food, cooking'),
|
||||
@@ -53,62 +63,82 @@ INSERT INTO "JMdict_InfoField"("id", "description") VALUES
|
||||
('grmyth', 'Greek mythology'),
|
||||
('hanaf', 'hanafuda'),
|
||||
('horse', 'horse racing'),
|
||||
('internet', 'Internet'),
|
||||
('jpmyth', 'Japanese mythology'),
|
||||
('kabuki', 'kabuki'),
|
||||
('law', 'law'),
|
||||
('ling', 'linguistics'),
|
||||
('logic', 'logic'),
|
||||
('MA', 'martial arts'),
|
||||
('mahj', 'mahjong'),
|
||||
('manga', 'manga'),
|
||||
('math', 'mathematics'),
|
||||
('mech', 'mechanical engineering'),
|
||||
('med', 'medicine'),
|
||||
('met', 'meteorology'),
|
||||
('mil', 'military'),
|
||||
('min', 'mineralogy'),
|
||||
('mining', 'mining'),
|
||||
('motor', 'motorsport'),
|
||||
('music', 'music'),
|
||||
('noh', 'noh (theatre)'),
|
||||
('ornith', 'ornithology'),
|
||||
('paleo', 'paleontology'),
|
||||
('pathol', 'pathology'),
|
||||
('pharm', 'pharmacy'),
|
||||
('pharm', 'pharmacology'),
|
||||
('phil', 'philosophy'),
|
||||
('photo', 'photography'),
|
||||
('physics', 'physics'),
|
||||
('physiol', 'physiology'),
|
||||
('print', 'printing'),
|
||||
('politics', 'politics'),
|
||||
('print', 'printing (press)'),
|
||||
('prowres', 'professional wrestling'),
|
||||
('psy', 'psychiatry'),
|
||||
('psyanal', 'psychoanalysis'),
|
||||
('psych', 'psychology'),
|
||||
('rail', 'railway'),
|
||||
('rommyth', 'Roman mythology'),
|
||||
('Shinto', 'Shinto'),
|
||||
('shogi', 'shogi'),
|
||||
('shogi', 'shogi (game)'),
|
||||
('ski', 'skiing'),
|
||||
('sports', 'sports'),
|
||||
('stat', 'statistics'),
|
||||
('sumo', 'sumo'),
|
||||
('stockm', 'stock market'),
|
||||
('sumo', 'sumo (wrestling)'),
|
||||
('surg', 'surgery'),
|
||||
('telec', 'telecommunications'),
|
||||
('tradem', 'trademark'),
|
||||
('tv', 'television'),
|
||||
('vet', 'veterinary terms'),
|
||||
('vidg', 'video games'),
|
||||
('zool', 'zoology');
|
||||
|
||||
INSERT INTO "JMdict_InfoKanji"("id", "description") VALUES
|
||||
('ateji', 'ateji (phonetic) reading'),
|
||||
('ik', 'word containing irregular kana usage'),
|
||||
('iK', 'word containing irregular kanji usage'),
|
||||
('ik', 'word containing irregular kana usage'),
|
||||
('io', 'irregular okurigana usage'),
|
||||
('oK', 'word containing out-dated kanji or kanji usage'),
|
||||
('rK', 'rarely-used kanji form');
|
||||
('rK', 'rarely-used kanji form'),
|
||||
('sK', 'search-only kanji form');
|
||||
|
||||
INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES
|
||||
('abbr', 'abbreviation'),
|
||||
('arch', 'archaism'),
|
||||
('aphorism', 'aphorism (pithy saying)'),
|
||||
('arch', 'archaic'),
|
||||
('char', 'character'),
|
||||
('chn', 'children''s language'),
|
||||
('col', 'colloquialism'),
|
||||
('col', 'colloquial'),
|
||||
('company', 'company name'),
|
||||
('creat', 'creature'),
|
||||
('dated', 'dated term'),
|
||||
('dei', 'deity'),
|
||||
('derog', 'derogatory'),
|
||||
('doc', 'document'),
|
||||
('euph', 'euphemistic'),
|
||||
('ev', 'event'),
|
||||
('fam', 'familiar language'),
|
||||
('fem', 'female term or language'),
|
||||
('fem', 'female term, language, or name'),
|
||||
('fict', 'fiction'),
|
||||
('form', 'formal or literary term'),
|
||||
('given', 'given name or forename, gender not specified'),
|
||||
@@ -120,12 +150,11 @@ INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES
|
||||
('joc', 'jocular, humorous term'),
|
||||
('leg', 'legend'),
|
||||
('m-sl', 'manga slang'),
|
||||
('male', 'male term or language'),
|
||||
('male', 'male term, language, or name'),
|
||||
('myth', 'mythology'),
|
||||
('net-sl', 'Internet slang'),
|
||||
('obj', 'object'),
|
||||
('obs', 'obsolete term'),
|
||||
('obsc', 'obscure term'),
|
||||
('on-mim', 'onomatopoeic or mimetic word'),
|
||||
('organization', 'organization name'),
|
||||
('oth', 'other'),
|
||||
@@ -136,10 +165,11 @@ INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES
|
||||
('product', 'product name'),
|
||||
('proverb', 'proverb'),
|
||||
('quote', 'quotation'),
|
||||
('rare', 'rare'),
|
||||
('rare', 'rare term'),
|
||||
('relig', 'religion'),
|
||||
('sens', 'sensitive'),
|
||||
('serv', 'service'),
|
||||
('ship', 'ship name'),
|
||||
('sl', 'slang'),
|
||||
('station', 'railway station'),
|
||||
('surname', 'family or surname'),
|
||||
@@ -149,7 +179,7 @@ INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES
|
||||
('work', 'work of art, literature, music, etc. name'),
|
||||
('X', 'rude or X-rated term (not displayed in educational software)'),
|
||||
('yoji', 'yojijukugo');
|
||||
|
||||
|
||||
INSERT INTO "JMdict_InfoPOS"("id", "description") VALUES
|
||||
('adj-f', 'noun or verb acting prenominally'),
|
||||
('adj-i', 'adjective (keiyoushi)'),
|
||||
@@ -238,7 +268,7 @@ INSERT INTO "JMdict_InfoPOS"("id", "description") VALUES
|
||||
('vn', 'irregular nu verb'),
|
||||
('vr', 'irregular ru verb, plain form ends with -ri'),
|
||||
('vs', 'noun or participle which takes the aux. verb suru'),
|
||||
('vs-c', 'su verb - precursor to the modern suru'),
|
||||
('vs-c', 'suru verb - precursor to the modern suru'),
|
||||
('vs-i', 'suru verb - included'),
|
||||
('vs-s', 'suru verb - special class'),
|
||||
('vt', 'transitive verb'),
|
||||
@@ -246,6 +276,16 @@ INSERT INTO "JMdict_InfoPOS"("id", "description") VALUES
|
||||
|
||||
INSERT INTO "JMdict_InfoReading"("id", "description") VALUES
|
||||
('gikun', 'gikun (meaning as reading) or jukujikun (special kanji reading)'),
|
||||
('go', 'on-yomi, go'),
|
||||
('ik', 'word containing irregular kana usage'),
|
||||
('jouyou', 'approved reading for jouyou kanji'),
|
||||
('kan', 'on-yomi, kan'),
|
||||
('kanyou', 'on-yomi, kan''you'),
|
||||
('kun', 'kun-yomi'),
|
||||
('name', 'reading used only in names (nanori)'),
|
||||
('ok', 'out-dated or obsolete kana usage'),
|
||||
('uK', 'word usually written using kanji alone');
|
||||
('on', 'on-yomi'),
|
||||
('rad', 'reading used as name of radical'),
|
||||
('rk', 'rarely used kana form'),
|
||||
('sk', 'search-only kana form'),
|
||||
('tou', 'on-yomi, tou');
|
||||
|
||||
Reference in New Issue
Block a user