import 'dart:io'; import 'package:args/command_runner.dart'; import 'package:jadb/_data_ingestion/open_local_db.dart'; import 'package:jadb/_data_ingestion/tanos-jlpt/csv_parser.dart'; import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart'; import 'package:jadb/_data_ingestion/tanos-jlpt/resolve.dart'; import 'package:jadb/cli/args.dart'; import 'package:sqflite_common/sqlite_api.dart'; class CreateTanosJlptMappings extends Command { @override final name = 'create-tanos-jlpt-mappings'; @override final description = 'Resolve Tanos JLPT data against JMDict. This tool is useful to create overrides for ambiguous references'; CreateTanosJlptMappings() { addLibsqliteArg(argParser); addJadbArg(argParser); argParser.addFlag( 'overrides', abbr: 'o', help: 'Whether to use existing overrides when resolving', defaultsTo: false, ); } @override Future run() async { if (argResults!.option('libsqlite') == null || argResults!.option('jadb') == null) { print(argParser.usage); exit(64); } final db = await openLocalDb( jadbPath: argResults!.option('jadb')!, libsqlitePath: argResults!.option('libsqlite')!, ); final useOverrides = argResults!.flag('overrides'); final Map files = { 'N1': File('data/tanos-jlpt/n1.csv'), 'N2': File('data/tanos-jlpt/n2.csv'), 'N3': File('data/tanos-jlpt/n3.csv'), 'N4': File('data/tanos-jlpt/n4.csv'), 'N5': File('data/tanos-jlpt/n5.csv'), }; final rankedWords = await parseJLPTRankedWords(files); await resolveExisting(rankedWords, db, useOverrides); } } Future resolveExisting( List rankedWords, Database db, bool useOverrides, ) async { final List missingWords = []; for (final (i, word) in rankedWords.indexed) { try { print( '[${(i + 1).toString().padLeft(4, '0')}/${rankedWords.length}] ${word.toString()}', ); await findEntry(word, db, useOverrides: useOverrides); } catch (e) { print(e); missingWords.add(word); } } print('Missing entries:'); for (final word in missingWords) { print(word.toString()); } print('Statistics:'); for (final jlptLevel in ['N5', 'N4', 'N3', 'N2', 'N1']) { final missingWordCount = missingWords .where((e) => e.jlptLevel == jlptLevel) .length; final totalWordCount = rankedWords .where((e) => e.jlptLevel == jlptLevel) .length; final failureRate = ((missingWordCount / totalWordCount) * 100) .toStringAsFixed(2); print( '$jlptLevel failures: [$missingWordCount/$totalWordCount] ($failureRate%)', ); } print('Not able to determine the entry for ${missingWords.length} words'); }