From 05b56466e746e074ce8aa7d000cf62b3f5819637 Mon Sep 17 00:00:00 2001 From: h7x4 Date: Sat, 21 Feb 2026 00:46:24 +0900 Subject: [PATCH] tanos-jlpt: fix breaking changes for csv parser --- .../tanos-jlpt/csv_parser.dart | 72 ++++++++++--------- 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/lib/_data_ingestion/tanos-jlpt/csv_parser.dart b/lib/_data_ingestion/tanos-jlpt/csv_parser.dart index c0ab10a..dfdab0a 100644 --- a/lib/_data_ingestion/tanos-jlpt/csv_parser.dart +++ b/lib/_data_ingestion/tanos-jlpt/csv_parser.dart @@ -3,12 +3,20 @@ import 'dart:io'; import 'package:csv/csv.dart'; import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart'; +import 'package:xml/xml_events.dart'; Future> parseJLPTRankedWords( Map files, ) async { final List result = []; + final codec = CsvCodec( + fieldDelimiter: ',', + lineDelimiter: '\n', + quoteMode: QuoteMode.strings, + escapeCharacter: '\\', + ); + for (final entry in files.entries) { final jlptLevel = entry.key; final file = entry.value; @@ -17,42 +25,42 @@ Future> parseJLPTRankedWords( throw Exception('File $jlptLevel does not exist'); } - final rows = await file + final words = await file .openRead() .transform(utf8.decoder) - .transform(CsvToListConverter()) + .transform(codec.decoder) + .flatten() + .map((row) { + if (row.length != 3) { + throw Exception('Invalid line in $jlptLevel: $row'); + } + return row; + }) + .map((row) => row.map((e) => e as String).toList()) + .map((row) { + final kanji = row[0].isEmpty + ? null + : row[0] + .replaceFirst(RegExp('^お・'), '') + .replaceAll(RegExp(r'(.*)'), ''); + + final readings = row[1] + .split(RegExp('[・/、(:?s+)]')) + .map((e) => e.trim()) + .toList(); + + final meanings = row[2].split(',').expand(cleanMeaning).toList(); + + return JLPTRankedWord( + readings: readings, + kanji: kanji, + jlptLevel: jlptLevel, + meanings: meanings, + ); + }) .toList(); - for (final row in rows) { - if (row.length != 3) { - throw Exception('Invalid line in $jlptLevel: $row'); - } - - final kanji = (row[0] as String).isEmpty - ? null - : (row[0] as String) - .replaceFirst(RegExp('^お・'), '') - .replaceAll(RegExp(r'(.*)'), ''); - - final readings = (row[1] as String) - .split(RegExp('[・/、(:?s+)]')) - .map((e) => e.trim()) - .toList(); - - final meanings = (row[2] as String) - .split(',') - .expand(cleanMeaning) - .toList(); - - result.add( - JLPTRankedWord( - readings: readings, - kanji: kanji, - jlptLevel: jlptLevel, - meanings: meanings, - ), - ); - } + result.addAll(words); } return result;