137 lines
3.7 KiB
Dart
137 lines
3.7 KiB
Dart
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
|
|
import 'package:jadb/_data_ingestion/tanos-jlpt/overrides.dart';
|
|
import 'package:jadb/table_names/jmdict.dart';
|
|
import 'package:sqflite_common/sqlite_api.dart';
|
|
|
|
Future<List<int>> _findReadingCandidates(JLPTRankedWord word, Database db) => db
|
|
.query(
|
|
JMdictTableNames.readingElement,
|
|
columns: ['entryId'],
|
|
where:
|
|
'"reading" IN (${List.filled(word.readings.length, '?').join(',')})',
|
|
whereArgs: [...word.readings],
|
|
)
|
|
.then((rows) => rows.map((row) => row['entryId'] as int).toList());
|
|
|
|
Future<List<int>> _findKanjiCandidates(JLPTRankedWord word, Database db) => db
|
|
.query(
|
|
JMdictTableNames.kanjiElement,
|
|
columns: ['entryId'],
|
|
where: 'reading = ?',
|
|
whereArgs: [word.kanji],
|
|
)
|
|
.then((rows) => rows.map((row) => row['entryId'] as int).toList());
|
|
|
|
Future<List<(int, String)>> _findSenseCandidates(
|
|
JLPTRankedWord word,
|
|
Database db,
|
|
) => db
|
|
.rawQuery(
|
|
'SELECT entryId, phrase '
|
|
'FROM "${JMdictTableNames.senseGlossary}" '
|
|
'JOIN "${JMdictTableNames.sense}" USING (senseId)'
|
|
'WHERE phrase IN (${List.filled(word.meanings.length, '?').join(',')})',
|
|
[...word.meanings],
|
|
)
|
|
.then(
|
|
(rows) => rows
|
|
.map((row) => (row['entryId'] as int, row['phrase'] as String))
|
|
.toList(),
|
|
);
|
|
|
|
Future<int?> findEntry(
|
|
JLPTRankedWord word,
|
|
Database db, {
|
|
bool useOverrides = true,
|
|
}) async {
|
|
final List<int> readingCandidates = await _findReadingCandidates(word, db);
|
|
final List<(int, String)> senseCandidates = await _findSenseCandidates(
|
|
word,
|
|
db,
|
|
);
|
|
|
|
List<int> entryIds;
|
|
|
|
if (word.kanji != null) {
|
|
final List<int> kanjiCandidates = await _findKanjiCandidates(word, db);
|
|
|
|
entryIds = readingCandidates
|
|
.where((readingId) => kanjiCandidates.contains(readingId))
|
|
.toList();
|
|
|
|
if (entryIds.isEmpty) {
|
|
print('No entry found, trying to combine with senses');
|
|
|
|
entryIds = readingCandidates
|
|
.where(
|
|
(readingId) =>
|
|
senseCandidates.any((sense) => sense.$1 == readingId),
|
|
)
|
|
.toList();
|
|
}
|
|
} else {
|
|
entryIds = readingCandidates;
|
|
}
|
|
|
|
if ((entryIds.isEmpty || entryIds.length > 1) && useOverrides) {
|
|
print('No entry found, trying to fetch from overrides');
|
|
final overrideEntries = word.readings
|
|
.map((reading) => tanosJLPTOverrides[(word.kanji, reading)])
|
|
.whereType<int>()
|
|
.toSet();
|
|
|
|
if (overrideEntries.length > 1) {
|
|
throw Exception(
|
|
'Multiple override entries found for ${word.toString()}: $entryIds',
|
|
);
|
|
} else if (overrideEntries.isEmpty &&
|
|
!word.readings.any(
|
|
(reading) => tanosJLPTOverrides.containsKey((word.kanji, reading)),
|
|
)) {
|
|
throw Exception(
|
|
'No override entry found for ${word.toString()}: $entryIds',
|
|
);
|
|
}
|
|
|
|
print('Found override: ${overrideEntries.firstOrNull}');
|
|
|
|
return overrideEntries.firstOrNull;
|
|
}
|
|
|
|
if (entryIds.length > 1) {
|
|
throw Exception(
|
|
'Multiple override entries found for ${word.toString()}: $entryIds',
|
|
);
|
|
} else if (entryIds.isEmpty) {
|
|
throw Exception('No entry found for ${word.toString()}');
|
|
}
|
|
|
|
return entryIds.first;
|
|
}
|
|
|
|
Future<Map<String, Set<int>>> resolveAllEntries(
|
|
List<JLPTRankedWord> words,
|
|
Database db,
|
|
) async {
|
|
final Map<String, Set<int>> result = {
|
|
'N5': {},
|
|
'N4': {},
|
|
'N3': {},
|
|
'N2': {},
|
|
'N1': {},
|
|
};
|
|
|
|
for (final word in words) {
|
|
try {
|
|
final resolved = await findEntry(word, db, useOverrides: true);
|
|
if (resolved != null) {
|
|
result[word.jlptLevel]!.add(resolved);
|
|
}
|
|
} catch (e) {
|
|
print('ERROR: $e');
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|