101 lines
2.8 KiB
Dart
101 lines
2.8 KiB
Dart
import 'dart:io';
|
|
|
|
import 'package:args/command_runner.dart';
|
|
import 'package:jadb/_data_ingestion/open_local_db.dart';
|
|
import 'package:jadb/_data_ingestion/tanos-jlpt/csv_parser.dart';
|
|
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
|
|
import 'package:jadb/_data_ingestion/tanos-jlpt/resolve.dart';
|
|
import 'package:jadb/cli/args.dart';
|
|
import 'package:sqflite_common/sqlite_api.dart';
|
|
|
|
class CreateTanosJlptMappings extends Command {
|
|
@override
|
|
final name = 'create-tanos-jlpt-mappings';
|
|
@override
|
|
final description =
|
|
'Resolve Tanos JLPT data against JMDict. This tool is useful to create overrides for ambiguous references';
|
|
|
|
CreateTanosJlptMappings() {
|
|
addLibsqliteArg(argParser);
|
|
addJadbArg(argParser);
|
|
|
|
argParser.addFlag(
|
|
'overrides',
|
|
abbr: 'o',
|
|
help: 'Whether to use existing overrides when resolving',
|
|
defaultsTo: false,
|
|
);
|
|
}
|
|
|
|
@override
|
|
Future<void> run() async {
|
|
if (argResults!.option('libsqlite') == null ||
|
|
argResults!.option('jadb') == null) {
|
|
print(argParser.usage);
|
|
exit(64);
|
|
}
|
|
|
|
final db = await openLocalDb(
|
|
jadbPath: argResults!.option('jadb')!,
|
|
libsqlitePath: argResults!.option('libsqlite')!,
|
|
);
|
|
|
|
final useOverrides = argResults!.flag('overrides');
|
|
|
|
final Map<String, File> files = {
|
|
'N1': File('data/tanos-jlpt/n1.csv'),
|
|
'N2': File('data/tanos-jlpt/n2.csv'),
|
|
'N3': File('data/tanos-jlpt/n3.csv'),
|
|
'N4': File('data/tanos-jlpt/n4.csv'),
|
|
'N5': File('data/tanos-jlpt/n5.csv'),
|
|
};
|
|
|
|
final rankedWords = await parseJLPTRankedWords(files);
|
|
|
|
await resolveExisting(rankedWords, db, useOverrides);
|
|
}
|
|
}
|
|
|
|
Future<void> resolveExisting(
|
|
List<JLPTRankedWord> rankedWords,
|
|
Database db,
|
|
bool useOverrides,
|
|
) async {
|
|
final List<JLPTRankedWord> missingWords = [];
|
|
for (final (i, word) in rankedWords.indexed) {
|
|
try {
|
|
print(
|
|
'[${(i + 1).toString().padLeft(4, '0')}/${rankedWords.length}] ${word.toString()}',
|
|
);
|
|
await findEntry(word, db, useOverrides: useOverrides);
|
|
} catch (e) {
|
|
print(e);
|
|
missingWords.add(word);
|
|
}
|
|
}
|
|
|
|
print('Missing entries:');
|
|
for (final word in missingWords) {
|
|
print(word.toString());
|
|
}
|
|
|
|
print('Statistics:');
|
|
for (final jlptLevel in ['N5', 'N4', 'N3', 'N2', 'N1']) {
|
|
final missingWordCount = missingWords
|
|
.where((e) => e.jlptLevel == jlptLevel)
|
|
.length;
|
|
final totalWordCount = rankedWords
|
|
.where((e) => e.jlptLevel == jlptLevel)
|
|
.length;
|
|
|
|
final failureRate = ((missingWordCount / totalWordCount) * 100)
|
|
.toStringAsFixed(2);
|
|
|
|
print(
|
|
'$jlptLevel failures: [$missingWordCount/$totalWordCount] ($failureRate%)',
|
|
);
|
|
}
|
|
|
|
print('Not able to determine the entry for ${missingWords.length} words');
|
|
}
|