166 lines
5.6 KiB
Dart
166 lines
5.6 KiB
Dart
import 'package:jadb/_data_ingestion/kanjidic/objects.dart';
|
|
import 'package:xml/xml.dart';
|
|
|
|
List<Character> parseKANJIDICData(XmlElement root) {
|
|
final List<Character> result = [];
|
|
for (final c in root.findElements('character')) {
|
|
final kanji = c.findElements('literal').first.innerText;
|
|
|
|
final codepoint = c.findElements('codepoint').firstOrNull;
|
|
final radical = c.findElements('radical').firstOrNull;
|
|
final misc = c.findElements('misc').first;
|
|
final dic_number = c.findElements('dic_number').firstOrNull;
|
|
final query_code = c.findElements('query_code').first;
|
|
final reading_meaning = c.findElements('reading_meaning').firstOrNull;
|
|
|
|
// TODO: Group readings and meanings by their rmgroup parent node.
|
|
|
|
result.add(
|
|
Character(
|
|
literal: kanji,
|
|
strokeCount:
|
|
int.parse(misc.findElements('stroke_count').first.innerText),
|
|
grade: int.tryParse(
|
|
misc.findElements('grade').firstOrNull?.innerText ?? ''),
|
|
frequency: int.tryParse(
|
|
misc.findElements('freq').firstOrNull?.innerText ?? ''),
|
|
jlpt: int.tryParse(
|
|
misc.findElements('jlpt').firstOrNull?.innerText ?? '',
|
|
),
|
|
radicalName: misc
|
|
.findElements('rad_name')
|
|
.map((e) => e.innerText)
|
|
.toList(),
|
|
codepoints: codepoint
|
|
?.findElements('cp_value')
|
|
.map(
|
|
(e) => CodePoint(
|
|
kanji: kanji,
|
|
type: e.getAttribute('cp_type')!,
|
|
codepoint: e.innerText,
|
|
),
|
|
)
|
|
.toList() ??
|
|
[],
|
|
radical: radical
|
|
?.findElements('rad_value')
|
|
.where((e) => e.getAttribute('rad_type') == 'classical')
|
|
.map(
|
|
(e) => Radical(
|
|
kanji: kanji,
|
|
radicalId: int.parse(e.innerText),
|
|
),
|
|
)
|
|
.firstOrNull,
|
|
strokeMiscounts: misc
|
|
.findElements('stroke_count')
|
|
.skip(1)
|
|
.map((e) => int.parse(e.innerText))
|
|
.toList(),
|
|
variants: misc
|
|
.findElements('variant')
|
|
.map(
|
|
(e) => Variant(
|
|
kanji: kanji,
|
|
type: e.getAttribute('var_type')!,
|
|
variant: e.innerText,
|
|
),
|
|
)
|
|
.toList(),
|
|
dictionaryReferences: dic_number
|
|
?.findElements('dic_ref')
|
|
.where((e) => e.getAttribute('dr_type') != 'moro')
|
|
.map(
|
|
(e) => DictionaryReference(
|
|
kanji: kanji,
|
|
type: e.getAttribute('dr_type')!,
|
|
ref: e.innerText,
|
|
),
|
|
)
|
|
.toList() ??
|
|
[],
|
|
dictionaryReferencesMoro: dic_number
|
|
?.findElements('dic_ref')
|
|
.where((e) => e.getAttribute('dr_type') == 'moro')
|
|
.map(
|
|
(e) => DictionaryReferenceMoro(
|
|
kanji: kanji,
|
|
ref: e.innerText,
|
|
page: int.tryParse(e.getAttribute('m_page') ?? ''),
|
|
volume: int.tryParse(e.getAttribute('m_vol') ?? ''),
|
|
),
|
|
)
|
|
.toList() ??
|
|
[],
|
|
querycodes: query_code
|
|
.findElements('q_code')
|
|
.map(
|
|
(e) => QueryCode(
|
|
kanji: kanji,
|
|
code: e.innerText,
|
|
type: e.getAttribute('qc_type')!,
|
|
skipMisclassification: e.getAttribute('skip_misclass'),
|
|
),
|
|
)
|
|
.toList(),
|
|
readings: reading_meaning
|
|
?.findAllElements('reading')
|
|
.where(
|
|
(e) =>
|
|
!['ja_on', 'ja_kun'].contains(e.getAttribute('r_type')),
|
|
)
|
|
.map(
|
|
(e) => Reading(
|
|
kanji: kanji,
|
|
type: e.getAttribute('r_type')!,
|
|
reading: e.innerText,
|
|
),
|
|
)
|
|
.toList() ??
|
|
[],
|
|
kunyomi: reading_meaning
|
|
?.findAllElements('reading')
|
|
.where((e) => e.getAttribute('r_type') == 'ja_kun')
|
|
.map(
|
|
(e) => Kunyomi(
|
|
kanji: kanji,
|
|
yomi: e.innerText,
|
|
isJouyou: e.getAttribute('r_status') == 'jy',
|
|
),
|
|
)
|
|
.toList() ??
|
|
[],
|
|
onyomi: reading_meaning
|
|
?.findAllElements('reading')
|
|
.where((e) => e.getAttribute('r_type') == 'ja_on')
|
|
.map(
|
|
(e) => Onyomi(
|
|
kanji: kanji,
|
|
yomi: e.innerText,
|
|
isJouyou: e.getAttribute('r_status') == 'jy',
|
|
type: e.getAttribute('on_type')),
|
|
)
|
|
.toList() ??
|
|
[],
|
|
meanings: reading_meaning
|
|
?.findAllElements('meaning')
|
|
.map(
|
|
(e) => Meaning(
|
|
kanji: kanji,
|
|
language: e.getAttribute('m_lang') ?? 'eng',
|
|
meaning: e.innerText,
|
|
),
|
|
)
|
|
.toList() ??
|
|
[],
|
|
nanori: reading_meaning
|
|
?.findElements('nanori')
|
|
.map((e) => e.innerText)
|
|
.toList() ??
|
|
[],
|
|
),
|
|
);
|
|
}
|
|
return result;
|
|
}
|