Implement word search
This commit is contained in:
@@ -15,9 +15,9 @@ abstract class TableNames {
|
||||
static const String senseField = 'JMdict_SenseField';
|
||||
static const String senseGlossary = 'JMdict_SenseGlossary';
|
||||
static const String senseInfo = 'JMdict_SenseInfo';
|
||||
static const String senseLanguageSource = 'JMdict_SenseLanguageSource';
|
||||
static const String senseMisc = 'JMdict_SenseMisc';
|
||||
static const String sensePOS = 'JMdict_SensePOS';
|
||||
static const String senseLanguageSource = 'JMdict_SenseLanguageSource';
|
||||
static const String senseRestrictedToKanji = 'JMdict_SenseRestrictedToKanji';
|
||||
static const String senseRestrictedToReading =
|
||||
'JMdict_SenseRestrictedToReading';
|
||||
@@ -216,7 +216,22 @@ class Sense extends SQLWritable {
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {};
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'id': id,
|
||||
'orderNum': orderNum,
|
||||
};
|
||||
|
||||
bool get isEmpty => antonyms.isEmpty &&
|
||||
dialects.isEmpty &&
|
||||
fields.isEmpty &&
|
||||
info.isEmpty &&
|
||||
languageSource.isEmpty &&
|
||||
glossary.isEmpty &&
|
||||
misc.isEmpty &&
|
||||
pos.isEmpty &&
|
||||
restrictedToKanji.isEmpty &&
|
||||
restrictedToReading.isEmpty &&
|
||||
seeAlso.isEmpty;
|
||||
}
|
||||
|
||||
class Entry extends SQLWritable {
|
||||
|
||||
@@ -196,58 +196,65 @@ List<Entry> parseXML(XmlElement root) {
|
||||
|
||||
for (final (orderNum, sense) in entry.findElements('sense').indexed) {
|
||||
senseId++;
|
||||
senses.add(
|
||||
Sense(
|
||||
id: senseId,
|
||||
orderNum: orderNum + 1,
|
||||
restrictedToKanji:
|
||||
sense.findElements('stagk').map((e) => e.innerText).toList(),
|
||||
restrictedToReading:
|
||||
sense.findElements('stagr').map((e) => e.innerText).toList(),
|
||||
pos: sense
|
||||
.findElements('pos')
|
||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||
.toList(),
|
||||
misc: sense
|
||||
.findElements('misc')
|
||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||
.toList(),
|
||||
dialects: sense
|
||||
.findElements('dial')
|
||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||
.toList(),
|
||||
info: sense.findElements('s_inf').map((e) => e.innerText).toList(),
|
||||
languageSource: sense
|
||||
.findElements('lsource')
|
||||
.map(
|
||||
(e) => LanguageSource(
|
||||
language: e.getAttribute('xml:lang') ?? 'eng',
|
||||
fullyDescribesSense: e.getAttribute('ls_type') == 'part',
|
||||
constructedFromSmallerWords:
|
||||
e.getAttribute('ls_wasei') == 'y',
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
glossary: sense
|
||||
.findElements('gloss')
|
||||
.map(
|
||||
(e) => Glossary(
|
||||
language: e.getAttribute('xml:lang') ?? 'eng',
|
||||
phrase: e.innerText,
|
||||
type: e.getAttribute('g_type'),
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
antonyms: sense
|
||||
.findElements('ant')
|
||||
.map((e) => parseXrefParts(e.innerText))
|
||||
.toList(),
|
||||
seeAlso: sense
|
||||
.findElements('xref')
|
||||
.map((e) => parseXrefParts(e.innerText))
|
||||
.toList(),
|
||||
),
|
||||
final result = Sense(
|
||||
id: senseId,
|
||||
orderNum: orderNum + 1,
|
||||
restrictedToKanji:
|
||||
sense.findElements('stagk').map((e) => e.innerText).toList(),
|
||||
restrictedToReading:
|
||||
sense.findElements('stagr').map((e) => e.innerText).toList(),
|
||||
pos: sense
|
||||
.findElements('pos')
|
||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||
.toList(),
|
||||
misc: sense
|
||||
.findElements('misc')
|
||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||
.toList(),
|
||||
dialects: sense
|
||||
.findElements('dial')
|
||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||
.toList(),
|
||||
info: sense.findElements('s_inf').map((e) => e.innerText).toList(),
|
||||
languageSource: sense
|
||||
.findElements('lsource')
|
||||
.map(
|
||||
(e) => LanguageSource(
|
||||
language: e.getAttribute('xml:lang') ?? 'eng',
|
||||
fullyDescribesSense: e.getAttribute('ls_type') == 'part',
|
||||
constructedFromSmallerWords: e.getAttribute('ls_wasei') == 'y',
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
glossary: sense
|
||||
.findElements('gloss')
|
||||
.map(
|
||||
(e) => Glossary(
|
||||
language: e.getAttribute('xml:lang') ?? 'eng',
|
||||
phrase: e.innerText,
|
||||
type: e.getAttribute('g_type'),
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
antonyms: sense
|
||||
.findElements('ant')
|
||||
.map((e) => parseXrefParts(e.innerText))
|
||||
.toList(),
|
||||
seeAlso: sense
|
||||
.findElements('xref')
|
||||
.map((e) => parseXrefParts(e.innerText))
|
||||
.toList(),
|
||||
);
|
||||
|
||||
if (result.isEmpty) {
|
||||
print(
|
||||
'WARNING: Sense $senseId for entry $entryId is empty, '
|
||||
'kanji: ${kanjiEls.map((e) => e.reading).join(', ')}, '
|
||||
'reading: ${readingEls.map((e) => e.reading).join(', ')}',
|
||||
);
|
||||
} else {
|
||||
senses.add(result);
|
||||
}
|
||||
}
|
||||
|
||||
entries.add(
|
||||
@@ -264,7 +271,7 @@ List<Entry> parseXML(XmlElement root) {
|
||||
}
|
||||
|
||||
Future<void> insertIntoDB(List<Entry> entries, Database db) async {
|
||||
print(' [JMdict] Batch 1');
|
||||
print(' [JMdict] Batch 1 - Kanji and readings');
|
||||
Batch b = db.batch();
|
||||
for (final e in entries) {
|
||||
b.insert(TableNames.entry, e.sqlValue);
|
||||
@@ -326,32 +333,26 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
|
||||
|
||||
for (final s in e.senses) {
|
||||
for (final g in s.glossary) {
|
||||
if (g.language == "eng")
|
||||
b.insert(
|
||||
TableNames.entryByEnglish,
|
||||
{
|
||||
'entryId': e.id,
|
||||
'english': g.phrase,
|
||||
},
|
||||
// Some entries have the same reading twice with difference in katakana and hiragana
|
||||
conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
);
|
||||
b.insert(
|
||||
TableNames.entryByEnglish,
|
||||
{
|
||||
'entryId': e.id,
|
||||
'english': g.phrase,
|
||||
},
|
||||
// Some entries have the same reading twice with difference in katakana and hiragana
|
||||
conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await b.commit();
|
||||
|
||||
print(' [JMdict] Batch 2');
|
||||
print(' [JMdict] Batch 2 - Senses');
|
||||
b = db.batch();
|
||||
|
||||
for (final e in entries) {
|
||||
for (final s in e.senses) {
|
||||
b.insert(
|
||||
TableNames.sense,
|
||||
s.sqlValue
|
||||
..addAll({'id': s.id, 'entryId': e.id, 'orderNum': s.orderNum}));
|
||||
|
||||
b.insert(TableNames.sense, s.sqlValue..addAll({'entryId': e.id}));
|
||||
for (final d in s.dialects) {
|
||||
b.insert(TableNames.senseDialect, {'senseId': s.id, 'dialect': d});
|
||||
}
|
||||
@@ -392,21 +393,17 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
|
||||
);
|
||||
}
|
||||
for (final g in s.glossary) {
|
||||
if (g.language == 'eng')
|
||||
b.insert(
|
||||
TableNames.senseGlossary,
|
||||
g.sqlValue..addAll({'senseId': s.id}),
|
||||
// There are some duplicate glossary, especially in
|
||||
// the other languages.
|
||||
conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
);
|
||||
b.insert(
|
||||
TableNames.senseGlossary,
|
||||
g.sqlValue..addAll({'senseId': s.id}),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await b.commit();
|
||||
|
||||
print(' [JMdict] Building trees');
|
||||
print(' [JMdict] Building xref trees');
|
||||
SplayTreeMap<String, Set<Entry>> entriesByKanji = SplayTreeMap();
|
||||
for (final entry in entries) {
|
||||
for (final kanji in entry.kanji) {
|
||||
@@ -428,7 +425,7 @@ Future<void> insertIntoDB(List<Entry> entries, Database db) async {
|
||||
}
|
||||
}
|
||||
|
||||
print(' [JMdict] Batch 3');
|
||||
print(' [JMdict] Batch 3 - Xrefs');
|
||||
b = db.batch();
|
||||
|
||||
for (final e in entries) {
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
/// Interface for objects which are meant to be written to a table in a SQL database.
|
||||
abstract class SQLWritable {
|
||||
const SQLWritable();
|
||||
|
||||
/// Returns a map of the object's properties and their values.
|
||||
///
|
||||
/// Note that there might be properties in the object which is meant to be
|
||||
/// inserted into a different table. These properties will/should be excluded
|
||||
/// from this map.
|
||||
Map<String, Object?> get sqlValue;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user