import 'dart:convert'; import 'dart:io'; import 'package:csv/csv.dart'; import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart'; import 'package:xml/xml_events.dart'; Future> parseJLPTRankedWords( Map files, ) async { final List result = []; final codec = CsvCodec( fieldDelimiter: ',', lineDelimiter: '\n', quoteMode: QuoteMode.strings, escapeCharacter: '\\', ); for (final entry in files.entries) { final jlptLevel = entry.key; final file = entry.value; if (!file.existsSync()) { throw Exception('File $jlptLevel does not exist'); } final words = await file .openRead() .transform(utf8.decoder) .transform(codec.decoder) .flatten() .map((row) { if (row.length != 3) { throw Exception('Invalid line in $jlptLevel: $row'); } return row; }) .map((row) => row.map((e) => e as String).toList()) .map((row) { final kanji = row[0].isEmpty ? null : row[0] .replaceFirst(RegExp('^お・'), '') .replaceAll(RegExp(r'(.*)'), ''); final readings = row[1] .split(RegExp('[・/、(:?s+)]')) .map((e) => e.trim()) .toList(); final meanings = row[2].split(',').expand(cleanMeaning).toList(); return JLPTRankedWord( readings: readings, kanji: kanji, jlptLevel: jlptLevel, meanings: meanings, ); }) .toList(); result.addAll(words); } return result; } List cleanMeaning(String meaning) { final initialTrim = meaning.trim().replaceAll(RegExp(r'^\d.\s+'), ''); final woParens = initialTrim.replaceAll(RegExp(r'\s*\(.*?\)\s*'), ''); return {initialTrim, woParens}.toList(); }