import 'dart:convert'; import 'dart:io'; import 'package:csv/csv.dart'; import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart'; Future> parseJLPTRankedWords( Map files, ) async { final List result = []; for (final entry in files.entries) { final jlptLevel = entry.key; final file = entry.value; if (!file.existsSync()) { throw Exception('File $jlptLevel does not exist'); } final rows = await file .openRead() .transform(utf8.decoder) .transform(CsvToListConverter()) .toList(); for (final row in rows) { if (row.length != 3) { throw Exception('Invalid line in $jlptLevel: $row'); } final kanji = (row[0] as String).isEmpty ? null : (row[0] as String) .replaceFirst(RegExp('^お・'), '') .replaceAll(RegExp(r'(.*)'), ''); final readings = (row[1] as String) .split(RegExp('[・/、(:?s+)]')) .map((e) => e.trim()) .toList(); final meanings = (row[2] as String) .split(',') .expand(cleanMeaning) .toList(); result.add( JLPTRankedWord( readings: readings, kanji: kanji, jlptLevel: jlptLevel, meanings: meanings, ), ); } } return result; } List cleanMeaning(String meaning) { final initialTrim = meaning.trim().replaceAll(RegExp(r'^\d.\s+'), ''); final woParens = initialTrim.replaceAll(RegExp(r'\s*\(.*?\)\s*'), ''); return {initialTrim, woParens}.toList(); }