Files
jadb/lib/_data_ingestion/tanos-jlpt/resolve.dart
2026-03-02 12:01:11 +09:00

137 lines
3.7 KiB
Dart

import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/overrides.dart';
import 'package:jadb/table_names/jmdict.dart';
import 'package:sqflite_common/sqlite_api.dart';
Future<List<int>> _findReadingCandidates(JLPTRankedWord word, Database db) => db
.query(
JMdictTableNames.readingElement,
columns: ['entryId'],
where:
'"reading" IN (${List.filled(word.readings.length, '?').join(',')})',
whereArgs: [...word.readings],
)
.then((rows) => rows.map((row) => row['entryId'] as int).toList());
Future<List<int>> _findKanjiCandidates(JLPTRankedWord word, Database db) => db
.query(
JMdictTableNames.kanjiElement,
columns: ['entryId'],
where: 'reading = ?',
whereArgs: [word.kanji],
)
.then((rows) => rows.map((row) => row['entryId'] as int).toList());
Future<List<(int, String)>> _findSenseCandidates(
JLPTRankedWord word,
Database db,
) => db
.rawQuery(
'SELECT entryId, phrase '
'FROM "${JMdictTableNames.senseGlossary}" '
'JOIN "${JMdictTableNames.sense}" USING (senseId)'
'WHERE phrase IN (${List.filled(word.meanings.length, '?').join(',')})',
[...word.meanings],
)
.then(
(rows) => rows
.map((row) => (row['entryId'] as int, row['phrase'] as String))
.toList(),
);
Future<int?> findEntry(
JLPTRankedWord word,
Database db, {
bool useOverrides = true,
}) async {
final List<int> readingCandidates = await _findReadingCandidates(word, db);
final List<(int, String)> senseCandidates = await _findSenseCandidates(
word,
db,
);
List<int> entryIds;
if (word.kanji != null) {
final List<int> kanjiCandidates = await _findKanjiCandidates(word, db);
entryIds = readingCandidates
.where((readingId) => kanjiCandidates.contains(readingId))
.toList();
if (entryIds.isEmpty) {
print('No entry found, trying to combine with senses');
entryIds = readingCandidates
.where(
(readingId) =>
senseCandidates.any((sense) => sense.$1 == readingId),
)
.toList();
}
} else {
entryIds = readingCandidates;
}
if ((entryIds.isEmpty || entryIds.length > 1) && useOverrides) {
print('No entry found, trying to fetch from overrides');
final overrideEntries = word.readings
.map((reading) => tanosJLPTOverrides[(word.kanji, reading)])
.whereType<int>()
.toSet();
if (overrideEntries.length > 1) {
throw Exception(
'Multiple override entries found for ${word.toString()}: $entryIds',
);
} else if (overrideEntries.isEmpty &&
!word.readings.any(
(reading) => tanosJLPTOverrides.containsKey((word.kanji, reading)),
)) {
throw Exception(
'No override entry found for ${word.toString()}: $entryIds',
);
}
print('Found override: ${overrideEntries.firstOrNull}');
return overrideEntries.firstOrNull;
}
if (entryIds.length > 1) {
throw Exception(
'Multiple override entries found for ${word.toString()}: $entryIds',
);
} else if (entryIds.isEmpty) {
throw Exception('No entry found for ${word.toString()}');
}
return entryIds.first;
}
Future<Map<String, Set<int>>> resolveAllEntries(
List<JLPTRankedWord> words,
Database db,
) async {
final Map<String, Set<int>> result = {
'N5': {},
'N4': {},
'N3': {},
'N2': {},
'N1': {},
};
for (final word in words) {
try {
final resolved = await findEntry(word, db, useOverrides: true);
if (resolved != null) {
result[word.jlptLevel]!.add(resolved);
}
} catch (e) {
print('ERROR: $e');
}
}
return result;
}