62 lines
1.6 KiB
Dart
62 lines
1.6 KiB
Dart
import 'dart:convert';
|
||
import 'dart:io';
|
||
|
||
import 'package:csv/csv.dart';
|
||
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
|
||
|
||
Future<List<JLPTRankedWord>> parseJLPTRankedWords(
|
||
Map<String, File> files,
|
||
) async {
|
||
final List<JLPTRankedWord> result = [];
|
||
|
||
for (final entry in files.entries) {
|
||
final jlptLevel = entry.key;
|
||
final file = entry.value;
|
||
|
||
if (!file.existsSync()) {
|
||
throw Exception("File $jlptLevel does not exist");
|
||
}
|
||
|
||
final rows = await file
|
||
.openRead()
|
||
.transform(utf8.decoder)
|
||
.transform(CsvToListConverter())
|
||
.toList();
|
||
|
||
for (final row in rows) {
|
||
if (row.length != 3) {
|
||
throw Exception("Invalid line in $jlptLevel: $row");
|
||
}
|
||
|
||
final kanji = (row[0] as String).isEmpty
|
||
? null
|
||
: (row[0] as String)
|
||
.replaceFirst(RegExp('^お・'), '')
|
||
.replaceAll(RegExp(r'(.*)'), '');
|
||
|
||
final readings = (row[1] as String)
|
||
.split(RegExp('[・/、(:?\s+)]'))
|
||
.map((e) => e.trim())
|
||
.toList();
|
||
|
||
final meanings =
|
||
(row[2] as String).split(',').expand(cleanMeaning).toList();
|
||
|
||
result.add(JLPTRankedWord(
|
||
readings: readings,
|
||
kanji: kanji,
|
||
jlptLevel: jlptLevel,
|
||
meanings: meanings,
|
||
));
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
List<String> cleanMeaning(String meaning) {
|
||
final initialTrim = meaning.trim().replaceAll(RegExp(r'^\d.\s+'), '');
|
||
final woParens = initialTrim.replaceAll(RegExp(r'\s*\(.*?\)\s*'), '');
|
||
return {initialTrim, woParens}.toList();
|
||
}
|