Files
jadb/lib/cli/commands/create_tanos_jlpt_mappings.dart

101 lines
2.8 KiB
Dart

import 'dart:io';
import 'package:args/command_runner.dart';
import 'package:jadb/_data_ingestion/open_local_db.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/csv_parser.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/resolve.dart';
import 'package:jadb/cli/args.dart';
import 'package:sqflite_common/sqlite_api.dart';
class CreateTanosJlptMappings extends Command {
@override
final name = 'create-tanos-jlpt-mappings';
@override
final description =
'Resolve Tanos JLPT data against JMDict. This tool is useful to create overrides for ambiguous references';
CreateTanosJlptMappings() {
addLibsqliteArg(argParser);
addJadbArg(argParser);
argParser.addFlag(
'overrides',
abbr: 'o',
help: 'Whether to use existing overrides when resolving',
defaultsTo: false,
);
}
@override
Future<void> run() async {
if (argResults!.option('libsqlite') == null ||
argResults!.option('jadb') == null) {
print(argParser.usage);
exit(64);
}
final db = await openLocalDb(
jadbPath: argResults!.option('jadb')!,
libsqlitePath: argResults!.option('libsqlite')!,
);
final useOverrides = argResults!.flag('overrides');
final Map<String, File> files = {
'N1': File('data/tanos-jlpt/n1.csv'),
'N2': File('data/tanos-jlpt/n2.csv'),
'N3': File('data/tanos-jlpt/n3.csv'),
'N4': File('data/tanos-jlpt/n4.csv'),
'N5': File('data/tanos-jlpt/n5.csv'),
};
final rankedWords = await parseJLPTRankedWords(files);
await resolveExisting(rankedWords, db, useOverrides);
}
}
Future<void> resolveExisting(
List<JLPTRankedWord> rankedWords,
Database db,
bool useOverrides,
) async {
final List<JLPTRankedWord> missingWords = [];
for (final (i, word) in rankedWords.indexed) {
try {
print(
'[${(i + 1).toString().padLeft(4, '0')}/${rankedWords.length}] ${word.toString()}',
);
await findEntry(word, db, useOverrides: useOverrides);
} catch (e) {
print(e);
missingWords.add(word);
}
}
print('Missing entries:');
for (final word in missingWords) {
print(word.toString());
}
print('Statistics:');
for (final jlptLevel in ['N5', 'N4', 'N3', 'N2', 'N1']) {
final missingWordCount = missingWords
.where((e) => e.jlptLevel == jlptLevel)
.length;
final totalWordCount = rankedWords
.where((e) => e.jlptLevel == jlptLevel)
.length;
final failureRate = ((missingWordCount / totalWordCount) * 100)
.toStringAsFixed(2);
print(
'$jlptLevel failures: [$missingWordCount/$totalWordCount] ($failureRate%)',
);
}
print('Not able to determine the entry for ${missingWords.length} words');
}