WIP: Add tanos jlpt tags to database
This commit is contained in:
94
lib/cli/commands/create_tanos_jlpt_mappings.dart
Normal file
94
lib/cli/commands/create_tanos_jlpt_mappings.dart
Normal file
@@ -0,0 +1,94 @@
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:jadb/_data_ingestion/open_local_db.dart';
|
||||
|
||||
import 'package:args/command_runner.dart';
|
||||
import 'package:jadb/_data_ingestion/tanos-jlpt/csv_parser.dart';
|
||||
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
|
||||
import 'package:jadb/_data_ingestion/tanos-jlpt/resolve.dart';
|
||||
import 'package:jadb/cli/args.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
class CreateTanosJlptMappings extends Command {
|
||||
final name = "create-tanos-jlpt-mappings";
|
||||
final description =
|
||||
"Resolve Tanos JLPT data against JMDict. This tool is useful to create overrides for ambiguous references";
|
||||
|
||||
CreateTanosJlptMappings() {
|
||||
addLibsqliteArg(argParser);
|
||||
addJadbArg(argParser);
|
||||
|
||||
argParser.addFlag(
|
||||
'overrides',
|
||||
abbr: 'o',
|
||||
help: 'Whether to use existing overrides when resolving',
|
||||
defaultsTo: false,
|
||||
);
|
||||
}
|
||||
|
||||
Future<void> run() async {
|
||||
if (argResults!.option('libsqlite') == null ||
|
||||
argResults!.option('jadb') == null) {
|
||||
print(argParser.usage);
|
||||
exit(64);
|
||||
}
|
||||
|
||||
final db = await openLocalDb(
|
||||
jadbPath: argResults!.option('jadb')!,
|
||||
libsqlitePath: argResults!.option('libsqlite')!,
|
||||
);
|
||||
|
||||
final useOverrides = argResults!.flag('overrides');
|
||||
|
||||
Map<String, File> files = {
|
||||
'N1': File('data/tanos-jlpt/n1.csv'),
|
||||
'N2': File('data/tanos-jlpt/n2.csv'),
|
||||
'N3': File('data/tanos-jlpt/n3.csv'),
|
||||
'N4': File('data/tanos-jlpt/n4.csv'),
|
||||
'N5': File('data/tanos-jlpt/n5.csv'),
|
||||
};
|
||||
|
||||
final rankedWords = await parseJLPTRankedWords(files);
|
||||
|
||||
await resolveExisting(rankedWords, db, useOverrides);
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> resolveExisting(
|
||||
List<JLPTRankedWord> rankedWords,
|
||||
Database db,
|
||||
bool useOverrides,
|
||||
) async {
|
||||
List<JLPTRankedWord> missingWords = [];
|
||||
for (final (i, word) in rankedWords.indexed) {
|
||||
try {
|
||||
print(
|
||||
'[${(i + 1).toString().padLeft(4, '0')}/${rankedWords.length}] ${word.toString()}');
|
||||
await findEntry(word, db, useOverrides: useOverrides);
|
||||
} catch (e) {
|
||||
print(e);
|
||||
missingWords.add(word);
|
||||
}
|
||||
}
|
||||
|
||||
print('Missing entries:');
|
||||
for (final word in missingWords) {
|
||||
print(word.toString());
|
||||
}
|
||||
|
||||
print('Statistics:');
|
||||
for (final jlptLevel in ['N5', 'N4', 'N3', 'N2', 'N1']) {
|
||||
final missingWordCount =
|
||||
missingWords.where((e) => e.jlptLevel == jlptLevel).length;
|
||||
final totalWordCount =
|
||||
rankedWords.where((e) => e.jlptLevel == jlptLevel).length;
|
||||
|
||||
final failureRate =
|
||||
((missingWordCount / totalWordCount) * 100).toStringAsFixed(2);
|
||||
|
||||
print(
|
||||
'${jlptLevel} failures: [${missingWordCount}/${totalWordCount}] (${failureRate}%)');
|
||||
}
|
||||
|
||||
print('Not able to determine the entry for ${missingWords.length} words');
|
||||
}
|
||||
Reference in New Issue
Block a user