tanos-jlpt: fix breaking changes for csv parser
This commit is contained in:
@@ -3,12 +3,20 @@ import 'dart:io';
|
||||
|
||||
import 'package:csv/csv.dart';
|
||||
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
|
||||
import 'package:xml/xml_events.dart';
|
||||
|
||||
Future<List<JLPTRankedWord>> parseJLPTRankedWords(
|
||||
Map<String, File> files,
|
||||
) async {
|
||||
final List<JLPTRankedWord> result = [];
|
||||
|
||||
final codec = CsvCodec(
|
||||
fieldDelimiter: ',',
|
||||
lineDelimiter: '\n',
|
||||
quoteMode: QuoteMode.strings,
|
||||
escapeCharacter: '\\',
|
||||
);
|
||||
|
||||
for (final entry in files.entries) {
|
||||
final jlptLevel = entry.key;
|
||||
final file = entry.value;
|
||||
@@ -17,42 +25,42 @@ Future<List<JLPTRankedWord>> parseJLPTRankedWords(
|
||||
throw Exception('File $jlptLevel does not exist');
|
||||
}
|
||||
|
||||
final rows = await file
|
||||
final words = await file
|
||||
.openRead()
|
||||
.transform(utf8.decoder)
|
||||
.transform(CsvToListConverter())
|
||||
.transform(codec.decoder)
|
||||
.flatten()
|
||||
.map((row) {
|
||||
if (row.length != 3) {
|
||||
throw Exception('Invalid line in $jlptLevel: $row');
|
||||
}
|
||||
return row;
|
||||
})
|
||||
.map((row) => row.map((e) => e as String).toList())
|
||||
.map((row) {
|
||||
final kanji = row[0].isEmpty
|
||||
? null
|
||||
: row[0]
|
||||
.replaceFirst(RegExp('^お・'), '')
|
||||
.replaceAll(RegExp(r'(.*)'), '');
|
||||
|
||||
final readings = row[1]
|
||||
.split(RegExp('[・/、(:?s+)]'))
|
||||
.map((e) => e.trim())
|
||||
.toList();
|
||||
|
||||
final meanings = row[2].split(',').expand(cleanMeaning).toList();
|
||||
|
||||
return JLPTRankedWord(
|
||||
readings: readings,
|
||||
kanji: kanji,
|
||||
jlptLevel: jlptLevel,
|
||||
meanings: meanings,
|
||||
);
|
||||
})
|
||||
.toList();
|
||||
|
||||
for (final row in rows) {
|
||||
if (row.length != 3) {
|
||||
throw Exception('Invalid line in $jlptLevel: $row');
|
||||
}
|
||||
|
||||
final kanji = (row[0] as String).isEmpty
|
||||
? null
|
||||
: (row[0] as String)
|
||||
.replaceFirst(RegExp('^お・'), '')
|
||||
.replaceAll(RegExp(r'(.*)'), '');
|
||||
|
||||
final readings = (row[1] as String)
|
||||
.split(RegExp('[・/、(:?s+)]'))
|
||||
.map((e) => e.trim())
|
||||
.toList();
|
||||
|
||||
final meanings = (row[2] as String)
|
||||
.split(',')
|
||||
.expand(cleanMeaning)
|
||||
.toList();
|
||||
|
||||
result.add(
|
||||
JLPTRankedWord(
|
||||
readings: readings,
|
||||
kanji: kanji,
|
||||
jlptLevel: jlptLevel,
|
||||
meanings: meanings,
|
||||
),
|
||||
);
|
||||
}
|
||||
result.addAll(words);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
Reference in New Issue
Block a user