data_ingestion: fix kanji grade, jlpt, newspaper rank ingestion
This commit is contained in:
@@ -10,40 +10,51 @@ List<Character> transformXML(XmlElement root) {
|
||||
final List<Character> result = [];
|
||||
for (final c in root.findElements('character')) {
|
||||
final kanji = c.findElements('literal').first.innerText;
|
||||
|
||||
final codepoint = c.findElements('codepoint').firstOrNull;
|
||||
final radical = c.findElements('radical').firstOrNull;
|
||||
final misc = c.findElements('misc').first;
|
||||
final dic_number = c.findElements('dic_number').firstOrNull;
|
||||
|
||||
result.add(
|
||||
Character(
|
||||
literal: kanji,
|
||||
strokeCount:
|
||||
int.parse(c.findAllElements('stroke_count').first.innerText),
|
||||
grade:
|
||||
int.tryParse(c.findElements('grade').firstOrNull?.innerText ?? ''),
|
||||
frequency:
|
||||
int.tryParse(c.findElements('freq').firstOrNull?.innerText ?? ''),
|
||||
int.parse(misc.findElements('stroke_count').first.innerText),
|
||||
grade: int.tryParse(
|
||||
misc.findElements('grade').firstOrNull?.innerText ?? ''),
|
||||
frequency: int.tryParse(
|
||||
misc.findElements('freq').firstOrNull?.innerText ?? ''),
|
||||
jlpt: int.tryParse(
|
||||
c.findElements('rad_name').firstOrNull?.innerText ?? '',
|
||||
misc.findElements('jlpt').firstOrNull?.innerText ?? '',
|
||||
),
|
||||
radicalName:
|
||||
c.findElements('rad_name').map((e) => e.innerText).toList(),
|
||||
codepoints: c
|
||||
.findAllElements('cp_value')
|
||||
.map(
|
||||
(e) => CodePoint(
|
||||
kanji: kanji,
|
||||
type: e.getAttribute('cp_type')!,
|
||||
codepoint: e.innerText,
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
radicals: c
|
||||
.findAllElements('rad_value')
|
||||
.map(
|
||||
(e) => Radical(
|
||||
kanji: kanji,
|
||||
type: e.getAttribute('rad_type')!,
|
||||
radical: e.innerText,
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
radicalName: radical
|
||||
?.findElements('rad_name')
|
||||
.map((e) => e.innerText)
|
||||
.toList() ??
|
||||
[],
|
||||
codepoints: codepoint
|
||||
?.findElements('cp_value')
|
||||
.map(
|
||||
(e) => CodePoint(
|
||||
kanji: kanji,
|
||||
type: e.getAttribute('cp_type')!,
|
||||
codepoint: e.innerText,
|
||||
),
|
||||
)
|
||||
.toList() ??
|
||||
[],
|
||||
radicals: radical
|
||||
?.findElements('rad_value')
|
||||
.map(
|
||||
(e) => Radical(
|
||||
kanji: kanji,
|
||||
type: e.getAttribute('rad_type')!,
|
||||
radical: e.innerText,
|
||||
),
|
||||
)
|
||||
.toList() ??
|
||||
[],
|
||||
strokeMiscounts: c
|
||||
.findAllElements('stroke_count')
|
||||
.skip(1)
|
||||
@@ -59,29 +70,31 @@ List<Character> transformXML(XmlElement root) {
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
dictionaryReferences: c
|
||||
.findAllElements('dic_ref')
|
||||
.where((e) => e.getAttribute('dr_type') != 'moro')
|
||||
.map(
|
||||
(e) => DictionaryReference(
|
||||
kanji: kanji,
|
||||
type: e.getAttribute('dr_type')!,
|
||||
ref: e.innerText,
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
dictionaryReferencesMoro: c
|
||||
.findAllElements('dic_ref')
|
||||
.where((e) => e.getAttribute('dr_type') == 'moro')
|
||||
.map(
|
||||
(e) => DictionaryReferenceMoro(
|
||||
kanji: kanji,
|
||||
ref: e.innerText,
|
||||
page: int.tryParse(e.getAttribute('m_page') ?? ''),
|
||||
volume: int.tryParse(e.getAttribute('m_vol') ?? ''),
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
dictionaryReferences: dic_number
|
||||
?.findElements('dic_ref')
|
||||
.where((e) => e.getAttribute('dr_type') != 'moro')
|
||||
.map(
|
||||
(e) => DictionaryReference(
|
||||
kanji: kanji,
|
||||
type: e.getAttribute('dr_type')!,
|
||||
ref: e.innerText,
|
||||
),
|
||||
)
|
||||
.toList() ??
|
||||
[],
|
||||
dictionaryReferencesMoro: dic_number
|
||||
?.findElements('dic_ref')
|
||||
.where((e) => e.getAttribute('dr_type') == 'moro')
|
||||
.map(
|
||||
(e) => DictionaryReferenceMoro(
|
||||
kanji: kanji,
|
||||
ref: e.innerText,
|
||||
page: int.tryParse(e.getAttribute('m_page') ?? ''),
|
||||
volume: int.tryParse(e.getAttribute('m_vol') ?? ''),
|
||||
),
|
||||
)
|
||||
.toList() ??
|
||||
[],
|
||||
querycodes: c
|
||||
.findAllElements('q_code')
|
||||
.map(
|
||||
|
||||
Reference in New Issue
Block a user