232 lines
6.9 KiB
Dart
232 lines
6.9 KiB
Dart
import 'dart:io';
|
|
|
|
import 'package:sqflite_common/sqlite_api.dart';
|
|
import 'package:xml/xml.dart';
|
|
import 'package:collection/collection.dart';
|
|
|
|
import 'objects.dart';
|
|
|
|
List<Character> transformXML(XmlElement root) {
|
|
final List<Character> result = [];
|
|
for (final c in root.findElements('character')) {
|
|
final kanji = c.findElements('literal').first.innerText;
|
|
result.add(
|
|
Character(
|
|
literal: kanji,
|
|
strokeCount:
|
|
int.parse(c.findAllElements('stroke_count').first.innerText),
|
|
grade:
|
|
int.tryParse(c.findElements('grade').firstOrNull?.innerText ?? ''),
|
|
frequency:
|
|
int.tryParse(c.findElements('freq').firstOrNull?.innerText ?? ''),
|
|
jlpt: int.tryParse(
|
|
c.findElements('rad_name').firstOrNull?.innerText ?? '',
|
|
),
|
|
radicalName:
|
|
c.findElements('rad_name').map((e) => e.innerText).toList(),
|
|
codepoints: c
|
|
.findAllElements('cp_value')
|
|
.map(
|
|
(e) => CodePoint(
|
|
kanji: kanji,
|
|
type: e.getAttribute('cp_type')!,
|
|
codepoint: e.innerText,
|
|
),
|
|
)
|
|
.toList(),
|
|
radicals: c
|
|
.findAllElements('rad_value')
|
|
.map(
|
|
(e) => Radical(
|
|
kanji: kanji,
|
|
type: e.getAttribute('rad_type')!,
|
|
radical: e.innerText,
|
|
),
|
|
)
|
|
.toList(),
|
|
strokeMiscounts: c
|
|
.findAllElements('stroke_count')
|
|
.skip(1)
|
|
.map((e) => int.parse(e.innerText))
|
|
.toList(),
|
|
variants: c
|
|
.findAllElements('variant')
|
|
.map(
|
|
(e) => Variant(
|
|
kanji: kanji,
|
|
type: e.getAttribute('var_type')!,
|
|
variant: e.innerText,
|
|
),
|
|
)
|
|
.toList(),
|
|
dictionaryReferences: c
|
|
.findAllElements('dic_ref')
|
|
.where((e) => e.getAttribute('dr_type') != 'moro')
|
|
.map(
|
|
(e) => DictionaryReference(
|
|
kanji: kanji,
|
|
type: e.getAttribute('dr_type')!,
|
|
ref: e.innerText,
|
|
),
|
|
)
|
|
.toList(),
|
|
dictionaryReferencesMoro: c
|
|
.findAllElements('dic_ref')
|
|
.where((e) => e.getAttribute('dr_type') == 'moro')
|
|
.map(
|
|
(e) => DictionaryReferenceMoro(
|
|
kanji: kanji,
|
|
ref: e.innerText,
|
|
page: int.tryParse(e.getAttribute('m_page') ?? ''),
|
|
volume: int.tryParse(e.getAttribute('m_vol') ?? ''),
|
|
),
|
|
)
|
|
.toList(),
|
|
querycodes: c
|
|
.findAllElements('q_code')
|
|
.map(
|
|
(e) => QueryCode(
|
|
kanji: kanji,
|
|
code: e.innerText,
|
|
type: e.getAttribute('qc_type')!,
|
|
skipMisclassification: e.getAttribute('skip_misclass'),
|
|
),
|
|
)
|
|
.toList(),
|
|
readings: c
|
|
.findAllElements('reading')
|
|
.where(
|
|
(e) => !['ja_on', 'ja_kun'].contains(e.getAttribute('r_type')),
|
|
)
|
|
.map(
|
|
(e) => Reading(
|
|
kanji: kanji,
|
|
type: e.getAttribute('r_type')!,
|
|
reading: e.innerText,
|
|
),
|
|
)
|
|
.toList(),
|
|
kunyomi: c
|
|
.findAllElements('reading')
|
|
.where((e) => e.getAttribute('r_type') == 'ja_kun')
|
|
.map(
|
|
(e) => Kunyomi(
|
|
kanji: kanji,
|
|
yomi: e.innerText,
|
|
isJouyou: e.getAttribute('r_status') == 'jy',
|
|
),
|
|
)
|
|
.toList(),
|
|
onyomi: c
|
|
.findAllElements('reading')
|
|
.where((e) => e.getAttribute('r_type') == 'ja_on')
|
|
.map(
|
|
(e) => Onyomi(
|
|
kanji: kanji,
|
|
yomi: e.innerText,
|
|
isJouyou: e.getAttribute('r_status') == 'jy',
|
|
type: e.getAttribute('on_type')),
|
|
)
|
|
.toList(),
|
|
meanings: c
|
|
.findAllElements('meaning')
|
|
.map(
|
|
(e) => Meaning(
|
|
kanji: kanji,
|
|
language: e.getAttribute('m_lang') ?? 'eng',
|
|
meaning: e.innerText,
|
|
),
|
|
)
|
|
.toList(),
|
|
nanori: c.findAllElements('nanori').map((e) => e.innerText).toList(),
|
|
),
|
|
);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
Future<void> insertIntoDB(List<Character> characters, Database db) async {
|
|
final b = db.batch();
|
|
for (final c in characters) {
|
|
// if (c.dictionaryReferences.any((e) =>
|
|
// c.dictionaryReferences
|
|
// .where((e2) => e.kanji == e2.kanji && e.type == e2.type)
|
|
// .length >
|
|
// 1)) {
|
|
// print(c.dictionaryReferences.map((e) => e.sqlValue).toList());
|
|
// }
|
|
b.insert(TableNames.character, c.sqlValue);
|
|
for (final n in c.radicalName) {
|
|
b.insert(TableNames.radicalName, {'kanji': c.literal, 'name': n});
|
|
}
|
|
for (final cp in c.codepoints) {
|
|
b.insert(TableNames.codepoint, cp.sqlValue);
|
|
}
|
|
for (final r in c.radicals) {
|
|
b.insert(TableNames.radical, r.sqlValue);
|
|
}
|
|
for (final sm in c.strokeMiscounts) {
|
|
b.insert(
|
|
TableNames.strokeMiscount,
|
|
{
|
|
'kanji': c.literal,
|
|
'strokeCount': sm,
|
|
},
|
|
);
|
|
}
|
|
for (final v in c.variants) {
|
|
b.insert(TableNames.variant, v.sqlValue);
|
|
}
|
|
for (final dr in c.dictionaryReferences) {
|
|
// There are duplicate entries here
|
|
b.insert(
|
|
TableNames.dictionaryReference,
|
|
dr.sqlValue,
|
|
conflictAlgorithm: ConflictAlgorithm.ignore,
|
|
);
|
|
}
|
|
for (final drm in c.dictionaryReferencesMoro) {
|
|
b.insert(TableNames.dictionaryReferenceMoro, drm.sqlValue);
|
|
}
|
|
for (final q in c.querycodes) {
|
|
b.insert(TableNames.queryCode, q.sqlValue);
|
|
}
|
|
for (final r in c.readings) {
|
|
b.insert(TableNames.reading, r.sqlValue);
|
|
}
|
|
for (final k in c.kunyomi) {
|
|
b.insert(TableNames.kunyomi, k.sqlValue);
|
|
}
|
|
for (final o in c.onyomi) {
|
|
b.insert(TableNames.onyomi, o.sqlValue);
|
|
}
|
|
for (final m in c.meanings) {
|
|
b.insert(TableNames.meaning, m.sqlValue);
|
|
}
|
|
for (final n in c.nanori) {
|
|
b.insert(
|
|
TableNames.nanori,
|
|
{
|
|
'kanji': c.literal,
|
|
'nanori': n,
|
|
},
|
|
);
|
|
}
|
|
}
|
|
b.commit();
|
|
}
|
|
|
|
Future<void> addDataFromKANJIDIC(Database db) async {
|
|
print('[KANJIDIC2] Reading file...');
|
|
String rawXML = File('data/kanjidic2.xml').readAsStringSync();
|
|
|
|
print('[KANJIDIC2] Parsing XML...');
|
|
XmlElement root = XmlDocument.parse(rawXML).getElement('kanjidic2')!;
|
|
|
|
print('[KANJIDIC2] Transforming data...');
|
|
final entries = transformXML(root);
|
|
|
|
print('[KANJIDIC2] Writing to database...');
|
|
await insertIntoDB(entries, db);
|
|
}
|