WIP: Add tanos jlpt tags to database

This commit is contained in:
2025-04-29 21:11:09 +02:00
parent adb2d687fb
commit fa4353bae0
14 changed files with 653 additions and 13 deletions

View File

@@ -0,0 +1,94 @@
import 'dart:io';
import 'package:jadb/_data_ingestion/open_local_db.dart';
import 'package:args/command_runner.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/csv_parser.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/resolve.dart';
import 'package:jadb/cli/args.dart';
import 'package:sqflite_common/sqlite_api.dart';
class CreateTanosJlptMappings extends Command {
final name = "create-tanos-jlpt-mappings";
final description =
"Resolve Tanos JLPT data against JMDict. This tool is useful to create overrides for ambiguous references";
CreateTanosJlptMappings() {
addLibsqliteArg(argParser);
addJadbArg(argParser);
argParser.addFlag(
'overrides',
abbr: 'o',
help: 'Whether to use existing overrides when resolving',
defaultsTo: false,
);
}
Future<void> run() async {
if (argResults!.option('libsqlite') == null ||
argResults!.option('jadb') == null) {
print(argParser.usage);
exit(64);
}
final db = await openLocalDb(
jadbPath: argResults!.option('jadb')!,
libsqlitePath: argResults!.option('libsqlite')!,
);
final useOverrides = argResults!.flag('overrides');
Map<String, File> files = {
'N1': File('data/tanos-jlpt/n1.csv'),
'N2': File('data/tanos-jlpt/n2.csv'),
'N3': File('data/tanos-jlpt/n3.csv'),
'N4': File('data/tanos-jlpt/n4.csv'),
'N5': File('data/tanos-jlpt/n5.csv'),
};
final rankedWords = await parseJLPTRankedWords(files);
await resolveExisting(rankedWords, db, useOverrides);
}
}
Future<void> resolveExisting(
List<JLPTRankedWord> rankedWords,
Database db,
bool useOverrides,
) async {
List<JLPTRankedWord> missingWords = [];
for (final (i, word) in rankedWords.indexed) {
try {
print(
'[${(i + 1).toString().padLeft(4, '0')}/${rankedWords.length}] ${word.toString()}');
await findEntry(word, db, useOverrides: useOverrides);
} catch (e) {
print(e);
missingWords.add(word);
}
}
print('Missing entries:');
for (final word in missingWords) {
print(word.toString());
}
print('Statistics:');
for (final jlptLevel in ['N5', 'N4', 'N3', 'N2', 'N1']) {
final missingWordCount =
missingWords.where((e) => e.jlptLevel == jlptLevel).length;
final totalWordCount =
rankedWords.where((e) => e.jlptLevel == jlptLevel).length;
final failureRate =
((missingWordCount / totalWordCount) * 100).toStringAsFixed(2);
print(
'${jlptLevel} failures: [${missingWordCount}/${totalWordCount}] (${failureRate}%)');
}
print('Not able to determine the entry for ${missingWords.length} words');
}