tanos-jlpt: fix breaking changes for csv parser

This commit is contained in:
2026-02-21 00:46:24 +09:00
parent 33016ca751
commit 05b56466e7

View File

@@ -3,12 +3,20 @@ import 'dart:io';
import 'package:csv/csv.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
import 'package:xml/xml_events.dart';
Future<List<JLPTRankedWord>> parseJLPTRankedWords(
Map<String, File> files,
) async {
final List<JLPTRankedWord> result = [];
final codec = CsvCodec(
fieldDelimiter: ',',
lineDelimiter: '\n',
quoteMode: QuoteMode.strings,
escapeCharacter: '\\',
);
for (final entry in files.entries) {
final jlptLevel = entry.key;
final file = entry.value;
@@ -17,42 +25,42 @@ Future<List<JLPTRankedWord>> parseJLPTRankedWords(
throw Exception('File $jlptLevel does not exist');
}
final rows = await file
final words = await file
.openRead()
.transform(utf8.decoder)
.transform(CsvToListConverter())
.transform(codec.decoder)
.flatten()
.map((row) {
if (row.length != 3) {
throw Exception('Invalid line in $jlptLevel: $row');
}
return row;
})
.map((row) => row.map((e) => e as String).toList())
.map((row) {
final kanji = row[0].isEmpty
? null
: row[0]
.replaceFirst(RegExp('^お・'), '')
.replaceAll(RegExp(r'.*'), '');
final readings = row[1]
.split(RegExp('[・/、(:?s+)]'))
.map((e) => e.trim())
.toList();
final meanings = row[2].split(',').expand(cleanMeaning).toList();
return JLPTRankedWord(
readings: readings,
kanji: kanji,
jlptLevel: jlptLevel,
meanings: meanings,
);
})
.toList();
for (final row in rows) {
if (row.length != 3) {
throw Exception('Invalid line in $jlptLevel: $row');
}
final kanji = (row[0] as String).isEmpty
? null
: (row[0] as String)
.replaceFirst(RegExp('^お・'), '')
.replaceAll(RegExp(r'.*'), '');
final readings = (row[1] as String)
.split(RegExp('[・/、(:?s+)]'))
.map((e) => e.trim())
.toList();
final meanings = (row[2] as String)
.split(',')
.expand(cleanMeaning)
.toList();
result.add(
JLPTRankedWord(
readings: readings,
kanji: kanji,
jlptLevel: jlptLevel,
meanings: meanings,
),
);
}
result.addAll(words);
}
return result;