import 'dart:io'; import 'package:sqflite_common/sqlite_api.dart'; import 'package:xml/xml.dart'; import 'package:collection/collection.dart'; import 'objects.dart'; List transformXML(XmlElement root) { final List result = []; for (final c in root.findElements('character')) { final kanji = c.findElements('literal').first.innerText; result.add( Character( literal: kanji, strokeCount: int.parse(c.findAllElements('stroke_count').first.innerText), grade: int.tryParse(c.findElements('grade').firstOrNull?.innerText ?? ''), frequency: int.tryParse(c.findElements('freq').firstOrNull?.innerText ?? ''), jlpt: int.tryParse( c.findElements('rad_name').firstOrNull?.innerText ?? '', ), radicalName: c.findElements('rad_name').map((e) => e.innerText).toList(), codepoints: c .findAllElements('cp_value') .map( (e) => CodePoint( kanji: kanji, type: e.getAttribute('cp_type')!, codepoint: e.innerText, ), ) .toList(), radicals: c .findAllElements('rad_value') .map( (e) => Radical( kanji: kanji, type: e.getAttribute('rad_type')!, radical: e.innerText, ), ) .toList(), strokeMiscounts: c .findAllElements('stroke_count') .skip(1) .map((e) => int.parse(e.innerText)) .toList(), variants: c .findAllElements('variant') .map( (e) => Variant( kanji: kanji, type: e.getAttribute('var_type')!, variant: e.innerText, ), ) .toList(), dictionaryReferences: c .findAllElements('dic_ref') .where((e) => e.getAttribute('dr_type') != 'moro') .map( (e) => DictionaryReference( kanji: kanji, type: e.getAttribute('dr_type')!, ref: e.innerText, ), ) .toList(), dictionaryReferencesMoro: c .findAllElements('dic_ref') .where((e) => e.getAttribute('dr_type') == 'moro') .map( (e) => DictionaryReferenceMoro( kanji: kanji, ref: e.innerText, page: int.tryParse(e.getAttribute('m_page') ?? ''), volume: int.tryParse(e.getAttribute('m_vol') ?? ''), ), ) .toList(), querycodes: c .findAllElements('q_code') .map( (e) => QueryCode( kanji: kanji, code: e.innerText, type: e.getAttribute('qc_type')!, skipMisclassification: e.getAttribute('skip_misclass'), ), ) .toList(), readings: c .findAllElements('reading') .where( (e) => !['ja_on', 'ja_kun'].contains(e.getAttribute('r_type')), ) .map( (e) => Reading( kanji: kanji, type: e.getAttribute('r_type')!, reading: e.innerText, ), ) .toList(), kunyomi: c .findAllElements('reading') .where((e) => e.getAttribute('r_type') == 'ja_kun') .map( (e) => Kunyomi( kanji: kanji, yomi: e.innerText, isJouyou: e.getAttribute('r_status') == 'jy', ), ) .toList(), onyomi: c .findAllElements('reading') .where((e) => e.getAttribute('r_type') == 'ja_on') .map( (e) => Onyomi( kanji: kanji, yomi: e.innerText, isJouyou: e.getAttribute('r_status') == 'jy', type: e.getAttribute('on_type')), ) .toList(), meanings: c .findAllElements('meaning') .map( (e) => Meaning( kanji: kanji, language: e.getAttribute('m_lang') ?? 'eng', meaning: e.innerText, ), ) .toList(), nanori: c.findAllElements('nanori').map((e) => e.innerText).toList(), ), ); } return result; } Future insertIntoDB(List characters, Database db) async { final b = db.batch(); for (final c in characters) { // if (c.dictionaryReferences.any((e) => // c.dictionaryReferences // .where((e2) => e.kanji == e2.kanji && e.type == e2.type) // .length > // 1)) { // print(c.dictionaryReferences.map((e) => e.sqlValue).toList()); // } b.insert(TableNames.character, c.sqlValue); for (final n in c.radicalName) { b.insert(TableNames.radicalName, {'kanji': c.literal, 'name': n}); } for (final cp in c.codepoints) { b.insert(TableNames.codepoint, cp.sqlValue); } for (final r in c.radicals) { b.insert(TableNames.radical, r.sqlValue); } for (final sm in c.strokeMiscounts) { b.insert( TableNames.strokeMiscount, { 'kanji': c.literal, 'strokeCount': sm, }, ); } for (final v in c.variants) { b.insert(TableNames.variant, v.sqlValue); } for (final dr in c.dictionaryReferences) { // There are duplicate entries here b.insert( TableNames.dictionaryReference, dr.sqlValue, conflictAlgorithm: ConflictAlgorithm.ignore, ); } for (final drm in c.dictionaryReferencesMoro) { b.insert(TableNames.dictionaryReferenceMoro, drm.sqlValue); } for (final q in c.querycodes) { b.insert(TableNames.queryCode, q.sqlValue); } for (final r in c.readings) { b.insert(TableNames.reading, r.sqlValue); } for (final k in c.kunyomi) { b.insert(TableNames.kunyomi, k.sqlValue); } for (final o in c.onyomi) { b.insert(TableNames.onyomi, o.sqlValue); } for (final m in c.meanings) { b.insert(TableNames.meaning, m.sqlValue); } for (final n in c.nanori) { b.insert( TableNames.nanori, { 'kanji': c.literal, 'nanori': n, }, ); } } b.commit(); } Future addDataFromKANJIDIC(Database db) async { print('[KANJIDIC2] Reading file...'); String rawXML = File('data/kanjidic2.xml').readAsStringSync(); print('[KANJIDIC2] Parsing XML...'); XmlElement root = XmlDocument.parse(rawXML).getElement('kanjidic2')!; print('[KANJIDIC2] Transforming data...'); final entries = transformXML(root); print('[KANJIDIC2] Writing to database...'); await insertIntoDB(entries, db); }