import 'package:jadb/_data_ingestion/kanjidic/objects.dart'; import 'package:jadb/util/romaji_transliteration.dart'; import 'package:xml/xml.dart'; List parseKANJIDICData(XmlElement root) { final List result = []; for (final c in root.findElements('character')) { final kanji = c.findElements('literal').first.innerText; final codepoint = c.findElements('codepoint').firstOrNull; final radical = c.findElements('radical').firstOrNull; final misc = c.findElements('misc').first; final dicNumber = c.findElements('dic_number').firstOrNull; final queryCode = c.findElements('query_code').first; final readingMeaning = c.findElements('reading_meaning').firstOrNull; // TODO: Group readings and meanings by their rmgroup parent node. result.add( Character( literal: kanji, strokeCount: int.parse( misc.findElements('stroke_count').first.innerText, ), grade: int.tryParse( misc.findElements('grade').firstOrNull?.innerText ?? '', ), frequency: int.tryParse( misc.findElements('freq').firstOrNull?.innerText ?? '', ), jlpt: int.tryParse( misc.findElements('jlpt').firstOrNull?.innerText ?? '', ), radicalName: misc .findElements('rad_name') .map((e) => e.innerText) .toList(), codepoints: codepoint ?.findElements('cp_value') .map( (e) => CodePoint( kanji: kanji, type: e.getAttribute('cp_type')!, codepoint: e.innerText, ), ) .toList() ?? [], radical: radical ?.findElements('rad_value') .where((e) => e.getAttribute('rad_type') == 'classical') .map( (e) => Radical(kanji: kanji, radicalId: int.parse(e.innerText)), ) .firstOrNull, strokeMiscounts: misc .findElements('stroke_count') .skip(1) .map((e) => int.parse(e.innerText)) .toList(), variants: misc .findElements('variant') .map( (e) => Variant( kanji: kanji, type: e.getAttribute('var_type')!, variant: e.innerText, ), ) .toList(), dictionaryReferences: dicNumber ?.findElements('dic_ref') .where((e) => e.getAttribute('dr_type') != 'moro') .map( (e) => DictionaryReference( kanji: kanji, type: e.getAttribute('dr_type')!, ref: e.innerText, ), ) .toList() ?? [], dictionaryReferencesMoro: dicNumber ?.findElements('dic_ref') .where((e) => e.getAttribute('dr_type') == 'moro') .map( (e) => DictionaryReferenceMoro( kanji: kanji, ref: e.innerText, page: int.tryParse(e.getAttribute('m_page') ?? ''), volume: int.tryParse(e.getAttribute('m_vol') ?? ''), ), ) .toList() ?? [], querycodes: queryCode .findElements('q_code') .map( (e) => QueryCode( kanji: kanji, code: e.innerText, type: e.getAttribute('qc_type')!, skipMisclassification: e.getAttribute('skip_misclass'), ), ) .toList(), readings: readingMeaning ?.findAllElements('reading') .where( (e) => !['ja_on', 'ja_kun'].contains(e.getAttribute('r_type')), ) .map( (e) => Reading( kanji: kanji, type: e.getAttribute('r_type')!, reading: e.innerText, ), ) .toList() ?? [], kunyomi: readingMeaning ?.findAllElements('reading') .where((e) => e.getAttribute('r_type') == 'ja_kun') .map( (e) => Kunyomi( kanji: kanji, yomi: e.innerText, isJouyou: e.getAttribute('r_status') == 'jy', ), ) .toList() ?? [], onyomi: readingMeaning ?.findAllElements('reading') .where((e) => e.getAttribute('r_type') == 'ja_on') .map( (e) => Onyomi( kanji: kanji, yomi: transliterateKatakanaToHiragana(e.innerText), isJouyou: e.getAttribute('r_status') == 'jy', type: e.getAttribute('on_type'), ), ) .toList() ?? [], meanings: readingMeaning ?.findAllElements('meaning') .map( (e) => Meaning( kanji: kanji, language: e.getAttribute('m_lang') ?? 'eng', meaning: e.innerText, ), ) .toList() ?? [], nanori: readingMeaning ?.findElements('nanori') .map((e) => e.innerText) .toList() ?? [], ), ); } return result; }