176 lines
5.7 KiB
Dart
176 lines
5.7 KiB
Dart
import 'package:jadb/_data_ingestion/kanjidic/objects.dart';
|
|
import 'package:jadb/util/romaji_transliteration.dart';
|
|
import 'package:xml/xml.dart';
|
|
|
|
List<Character> parseKANJIDICData(XmlElement root) {
|
|
final List<Character> result = [];
|
|
for (final c in root.findElements('character')) {
|
|
final kanji = c.findElements('literal').first.innerText;
|
|
|
|
final codepoint = c.findElements('codepoint').firstOrNull;
|
|
final radical = c.findElements('radical').firstOrNull;
|
|
final misc = c.findElements('misc').first;
|
|
final dicNumber = c.findElements('dic_number').firstOrNull;
|
|
final queryCode = c.findElements('query_code').first;
|
|
final readingMeaning = c.findElements('reading_meaning').firstOrNull;
|
|
|
|
// TODO: Group readings and meanings by their rmgroup parent node.
|
|
|
|
result.add(
|
|
Character(
|
|
literal: kanji,
|
|
strokeCount: int.parse(
|
|
misc.findElements('stroke_count').first.innerText,
|
|
),
|
|
grade: int.tryParse(
|
|
misc.findElements('grade').firstOrNull?.innerText ?? '',
|
|
),
|
|
frequency: int.tryParse(
|
|
misc.findElements('freq').firstOrNull?.innerText ?? '',
|
|
),
|
|
jlpt: int.tryParse(
|
|
misc.findElements('jlpt').firstOrNull?.innerText ?? '',
|
|
),
|
|
radicalName: misc
|
|
.findElements('rad_name')
|
|
.map((e) => e.innerText)
|
|
.toList(),
|
|
codepoints:
|
|
codepoint
|
|
?.findElements('cp_value')
|
|
.map(
|
|
(e) => CodePoint(
|
|
kanji: kanji,
|
|
type: e.getAttribute('cp_type')!,
|
|
codepoint: e.innerText,
|
|
),
|
|
)
|
|
.toList() ??
|
|
[],
|
|
radical: radical
|
|
?.findElements('rad_value')
|
|
.where((e) => e.getAttribute('rad_type') == 'classical')
|
|
.map(
|
|
(e) => Radical(kanji: kanji, radicalId: int.parse(e.innerText)),
|
|
)
|
|
.firstOrNull,
|
|
strokeMiscounts: misc
|
|
.findElements('stroke_count')
|
|
.skip(1)
|
|
.map((e) => int.parse(e.innerText))
|
|
.toList(),
|
|
variants: misc
|
|
.findElements('variant')
|
|
.map(
|
|
(e) => Variant(
|
|
kanji: kanji,
|
|
type: e.getAttribute('var_type')!,
|
|
variant: e.innerText,
|
|
),
|
|
)
|
|
.toList(),
|
|
dictionaryReferences:
|
|
dicNumber
|
|
?.findElements('dic_ref')
|
|
.where((e) => e.getAttribute('dr_type') != 'moro')
|
|
.map(
|
|
(e) => DictionaryReference(
|
|
kanji: kanji,
|
|
type: e.getAttribute('dr_type')!,
|
|
ref: e.innerText,
|
|
),
|
|
)
|
|
.toList() ??
|
|
[],
|
|
dictionaryReferencesMoro:
|
|
dicNumber
|
|
?.findElements('dic_ref')
|
|
.where((e) => e.getAttribute('dr_type') == 'moro')
|
|
.map(
|
|
(e) => DictionaryReferenceMoro(
|
|
kanji: kanji,
|
|
ref: e.innerText,
|
|
page: int.tryParse(e.getAttribute('m_page') ?? ''),
|
|
volume: int.tryParse(e.getAttribute('m_vol') ?? ''),
|
|
),
|
|
)
|
|
.toList() ??
|
|
[],
|
|
querycodes: queryCode
|
|
.findElements('q_code')
|
|
.map(
|
|
(e) => QueryCode(
|
|
kanji: kanji,
|
|
code: e.innerText,
|
|
type: e.getAttribute('qc_type')!,
|
|
skipMisclassification: e.getAttribute('skip_misclass'),
|
|
),
|
|
)
|
|
.toList(),
|
|
readings:
|
|
readingMeaning
|
|
?.findAllElements('reading')
|
|
.where(
|
|
(e) =>
|
|
!['ja_on', 'ja_kun'].contains(e.getAttribute('r_type')),
|
|
)
|
|
.map(
|
|
(e) => Reading(
|
|
kanji: kanji,
|
|
type: e.getAttribute('r_type')!,
|
|
reading: e.innerText,
|
|
),
|
|
)
|
|
.toList() ??
|
|
[],
|
|
kunyomi:
|
|
readingMeaning
|
|
?.findAllElements('reading')
|
|
.where((e) => e.getAttribute('r_type') == 'ja_kun')
|
|
.map(
|
|
(e) => Kunyomi(
|
|
kanji: kanji,
|
|
yomi: e.innerText,
|
|
isJouyou: e.getAttribute('r_status') == 'jy',
|
|
),
|
|
)
|
|
.toList() ??
|
|
[],
|
|
onyomi:
|
|
readingMeaning
|
|
?.findAllElements('reading')
|
|
.where((e) => e.getAttribute('r_type') == 'ja_on')
|
|
.map(
|
|
(e) => Onyomi(
|
|
kanji: kanji,
|
|
yomi: transliterateKatakanaToHiragana(e.innerText),
|
|
isJouyou: e.getAttribute('r_status') == 'jy',
|
|
type: e.getAttribute('on_type'),
|
|
),
|
|
)
|
|
.toList() ??
|
|
[],
|
|
meanings:
|
|
readingMeaning
|
|
?.findAllElements('meaning')
|
|
.map(
|
|
(e) => Meaning(
|
|
kanji: kanji,
|
|
language: e.getAttribute('m_lang') ?? 'eng',
|
|
meaning: e.innerText,
|
|
),
|
|
)
|
|
.toList() ??
|
|
[],
|
|
nanori:
|
|
readingMeaning
|
|
?.findElements('nanori')
|
|
.map((e) => e.innerText)
|
|
.toList() ??
|
|
[],
|
|
),
|
|
);
|
|
}
|
|
return result;
|
|
}
|