66 lines
1.6 KiB
Dart
66 lines
1.6 KiB
Dart
import 'dart:convert';
|
||
import 'dart:io';
|
||
|
||
import 'package:csv/csv.dart';
|
||
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
|
||
|
||
Future<List<JLPTRankedWord>> parseJLPTRankedWords(
|
||
Map<String, File> files,
|
||
) async {
|
||
final List<JLPTRankedWord> result = [];
|
||
|
||
for (final entry in files.entries) {
|
||
final jlptLevel = entry.key;
|
||
final file = entry.value;
|
||
|
||
if (!file.existsSync()) {
|
||
throw Exception('File $jlptLevel does not exist');
|
||
}
|
||
|
||
final rows = await file
|
||
.openRead()
|
||
.transform(utf8.decoder)
|
||
.transform(CsvToListConverter())
|
||
.toList();
|
||
|
||
for (final row in rows) {
|
||
if (row.length != 3) {
|
||
throw Exception('Invalid line in $jlptLevel: $row');
|
||
}
|
||
|
||
final kanji = (row[0] as String).isEmpty
|
||
? null
|
||
: (row[0] as String)
|
||
.replaceFirst(RegExp('^お・'), '')
|
||
.replaceAll(RegExp(r'(.*)'), '');
|
||
|
||
final readings = (row[1] as String)
|
||
.split(RegExp('[・/、(:?s+)]'))
|
||
.map((e) => e.trim())
|
||
.toList();
|
||
|
||
final meanings = (row[2] as String)
|
||
.split(',')
|
||
.expand(cleanMeaning)
|
||
.toList();
|
||
|
||
result.add(
|
||
JLPTRankedWord(
|
||
readings: readings,
|
||
kanji: kanji,
|
||
jlptLevel: jlptLevel,
|
||
meanings: meanings,
|
||
),
|
||
);
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
List<String> cleanMeaning(String meaning) {
|
||
final initialTrim = meaning.trim().replaceAll(RegExp(r'^\d.\s+'), '');
|
||
final woParens = initialTrim.replaceAll(RegExp(r'\s*\(.*?\)\s*'), '');
|
||
return {initialTrim, woParens}.toList();
|
||
}
|