Implement word search

This commit is contained in:
2025-04-25 22:47:06 +02:00
parent 1c2f90a617
commit b6410c717f
24 changed files with 1352 additions and 325 deletions

View File

@@ -1,215 +0,0 @@
import 'package:jadb/models/jmdict/word_search_result.dart';
import 'package:sqflite_common/sqlite_api.dart';
// TODO: Support globs
// TODO: Support tags
// TODO: Prefer original kana type when sorting results
// TODO: Support mixing kana and romaji
Future<List<WordSearchResult>?> searchWordWithDbConnection(
DatabaseExecutor connection,
String word, {
bool isKana = true,
}) async {
if (word.isEmpty) {
return null;
}
late final List<int> matches;
if (isKana) {
matches = (await connection.query(
'JMdict_EntryByKana',
where: 'kana LIKE ?',
whereArgs: ['%$word%'],
))
.map((row) => row['entryId'] as int)
.toList();
} else {
matches = (await connection.query(
'JMdict_EntryByEnglish',
where: 'english LIKE ?',
whereArgs: ['%$word%'],
))
.map((row) => row['entryId'] as int)
.toList();
}
if (matches.isEmpty) {
return [];
}
late final List<int> senseIds;
final Future<List<int>> senseIds_query = connection
.query(
'JMdict_Sense',
where: 'entryId IN (${matches.join(',')})',
)
.then((rows) => rows.map((row) => row['id'] as int).toList());
late final List<Map<String, Object?>> readingElements;
final Future<List<Map<String, Object?>>> readingElements_query =
connection.query(
'JMdict_ReadingElement',
where: 'entryId IN (${matches.join(',')})',
);
late final List<Map<String, Object?>> kanjiElements;
final Future<List<Map<String, Object?>>> kanjiElements_query =
connection.query(
'JMdict_KanjiElement',
where: 'entryId IN (${matches.join(',')})',
);
await Future.wait([
senseIds_query.then((value) => senseIds = value),
readingElements_query.then((value) => readingElements = value),
kanjiElements_query.then((value) => kanjiElements = value),
]);
print(senseIds);
print(readingElements);
print(kanjiElements);
// Sense queries
late final List<Map<String, Object?>> senseAntonyms;
final Future<List<Map<String, Object?>>> senseAntonyms_query =
connection.query(
'JMdict_SenseAntonym',
where: 'entryId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseDialects;
final Future<List<Map<String, Object?>>> senseDialects_query =
connection.query(
'JMdict_SenseDialect',
where: 'entryId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseFields;
final Future<List<Map<String, Object?>>> senseFields_query = connection.query(
'JMdict_SenseField',
where: 'entryId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseGlossaries;
final Future<List<Map<String, Object?>>> senseGlossaries_query =
connection.query(
'JMdict_SenseGlossary',
where: 'entryId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseInfos;
final Future<List<Map<String, Object?>>> senseInfos_query = connection.query(
'JMdict_SenseInfo',
where: 'entryId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseLanguageSources;
final Future<List<Map<String, Object?>>> senseLanguageSources_query =
connection.query(
'JMdict_SenseLanguageSource',
where: 'entryId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseMiscs;
final Future<List<Map<String, Object?>>> senseMiscs_query = connection.query(
'JMdict_SenseMisc',
where: 'entryId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> sensePOSs;
final Future<List<Map<String, Object?>>> sensePOSs_query = connection.query(
'JMdict_SensePOS',
where: 'entryId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseRestrictedToKanjis;
final Future<List<Map<String, Object?>>> senseRestrictedToKanjis_query =
connection.query(
'JMdict_SenseRestrictedToKanji',
where: 'entryId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseRestrictedToReadings;
final Future<List<Map<String, Object?>>> senseRestrictedToReadings_query =
connection.query(
'JMdict_SenseRestrictedToReading',
where: 'entryId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseSeeAlsos;
final Future<List<Map<String, Object?>>> senseSeeAlsos_query =
connection.query(
'JMdict_SenseSeeAlso',
where: 'entryId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> exampleSentences;
final Future<List<Map<String, Object?>>> exampleSentences_query =
connection.query(
'JMdict_ExampleSentence',
where: 'entryId IN (${senseIds.join(',')})',
);
// Reading queries
final readingIds = readingElements
.map((element) =>
(element['entryId'] as int, element['reading'] as String))
.toList();
late final List<Map<String, Object?>> readingElementInfos;
final Future<List<Map<String, Object?>>> readingElementInfos_query =
connection.query(
'JMdict_ReadingElementInfo',
where: 'entryId IN (${readingIds.join(',')})',
);
late final List<Map<String, Object?>> readingElementRestrictions;
final Future<List<Map<String, Object?>>> readingElementRestrictions_query =
connection.query(
'JMdict_ReadingElementRestriction',
where: 'entryId IN (${readingIds.join(',')})',
);
// Kanji queries
final kanjiIds = kanjiElements
.map((element) =>
(element['entryId'] as int, element['reading'] as String))
.toList();
late final List<Map<String, Object?>> kanjiElementInfos;
final Future<List<Map<String, Object?>>> kanjiElementInfos_query =
connection.query(
'JMdict_KanjiElementInfo',
where: 'entryId IN (${kanjiIds.join(',')})',
);
await Future.wait([
senseAntonyms_query.then((value) => senseAntonyms = value),
senseDialects_query.then((value) => senseDialects = value),
senseFields_query.then((value) => senseFields = value),
senseGlossaries_query.then((value) => senseGlossaries = value),
senseInfos_query.then((value) => senseInfos = value),
senseLanguageSources_query.then((value) => senseLanguageSources = value),
senseMiscs_query.then((value) => senseMiscs = value),
sensePOSs_query.then((value) => sensePOSs = value),
senseRestrictedToKanjis_query
.then((value) => senseRestrictedToKanjis = value),
senseRestrictedToReadings_query
.then((value) => senseRestrictedToReadings = value),
senseSeeAlsos_query.then((value) => senseSeeAlsos = value),
exampleSentences_query.then((value) => exampleSentences = value),
readingElementInfos_query.then((value) => readingElementInfos = value),
readingElementRestrictions_query
.then((value) => readingElementRestrictions = value),
kanjiElementInfos_query.then((value) => kanjiElementInfos = value),
]);
throw UnimplementedError();
}

View File

@@ -1,4 +1,4 @@
import 'package:jadb/models/kanjidic/kanji_search_result.dart';
import 'package:jadb/models/kanji_search/kanji_search_result.dart';
import 'package:sqflite_common/sqflite.dart';
Future<KanjiSearchResult?> searchKanjiWithDbConnection(

506
lib/search/word_search.dart Normal file
View File

@@ -0,0 +1,506 @@
import 'package:collection/collection.dart';
import 'package:jadb/models/jmdict/jmdict_dialect.dart';
import 'package:jadb/models/jmdict/jmdict_field.dart';
import 'package:jadb/models/jmdict/jmdict_kanji_info.dart';
import 'package:jadb/models/jmdict/jmdict_misc.dart';
import 'package:jadb/models/jmdict/jmdict_pos.dart';
import 'package:jadb/models/jmdict/jmdict_reading_info.dart';
import 'package:jadb/models/word_search/word_search_result.dart';
import 'package:jadb/models/word_search/word_search_ruby.dart';
import 'package:jadb/models/word_search/word_search_sense.dart';
import 'package:jadb/models/word_search/word_search_sources.dart';
import 'package:jadb/models/word_search/word_search_xref_entry.dart';
import 'package:sqflite_common/sqlite_api.dart';
// TODO: Support globs
// TODO: Support tags
// TODO: Prefer original kana type when sorting results
// TODO: Support mixing kana and romaji
String _escapeStringValue(String value) {
return "'" + value.replaceAll("'", "''") + "'";
}
Future<List<WordSearchResult>?> searchWordWithDbConnection(
DatabaseExecutor connection,
String word, {
bool isKana = true,
}) async {
if (word.isEmpty) {
return null;
}
late final List<int> entryIds;
if (isKana) {
entryIds = (await connection.query(
'JMdict_EntryByKana',
where: 'kana LIKE ?',
whereArgs: ['$word%'],
))
.map((row) => row['entryId'] as int)
.toList();
} else {
entryIds = (await connection.query(
'JMdict_EntryByEnglish',
where: 'english LIKE ?',
whereArgs: ['$word%'],
))
.map((row) => row['entryId'] as int)
.toList();
}
if (entryIds.isEmpty) {
return [];
}
late final List<Map<String, Object?>> senses;
final Future<List<Map<String, Object?>>> senses_query = connection.query(
'JMdict_Sense',
where: 'entryId IN (${entryIds.join(',')})',
);
late final List<Map<String, Object?>> readingElements;
final Future<List<Map<String, Object?>>> readingElements_query =
connection.query(
'JMdict_ReadingElement',
where: 'entryId IN (${entryIds.join(',')})',
);
late final List<Map<String, Object?>> kanjiElements;
final Future<List<Map<String, Object?>>> kanjiElements_query =
connection.query(
'JMdict_KanjiElement',
where: 'entryId IN (${entryIds.join(',')})',
);
await Future.wait([
senses_query.then((value) => senses = value),
readingElements_query.then((value) => readingElements = value),
kanjiElements_query.then((value) => kanjiElements = value),
]);
// Sense queries
final senseIds = senses.map((element) => element['id'] as int).toList();
late final List<Map<String, Object?>> senseAntonyms;
final Future<List<Map<String, Object?>>> senseAntonyms_query =
connection.query(
'JMdict_SenseAntonym',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseDialects;
final Future<List<Map<String, Object?>>> senseDialects_query =
connection.query(
'JMdict_SenseDialect',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseFields;
final Future<List<Map<String, Object?>>> senseFields_query = connection.query(
'JMdict_SenseField',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseGlossaries;
final Future<List<Map<String, Object?>>> senseGlossaries_query =
connection.query(
'JMdict_SenseGlossary',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseInfos;
final Future<List<Map<String, Object?>>> senseInfos_query = connection.query(
'JMdict_SenseInfo',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseLanguageSources;
final Future<List<Map<String, Object?>>> senseLanguageSources_query =
connection.query(
'JMdict_SenseLanguageSource',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseMiscs;
final Future<List<Map<String, Object?>>> senseMiscs_query = connection.query(
'JMdict_SenseMisc',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> sensePOSs;
final Future<List<Map<String, Object?>>> sensePOSs_query = connection.query(
'JMdict_SensePOS',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseRestrictedToKanjis;
final Future<List<Map<String, Object?>>> senseRestrictedToKanjis_query =
connection.query(
'JMdict_SenseRestrictedToKanji',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseRestrictedToReadings;
final Future<List<Map<String, Object?>>> senseRestrictedToReadings_query =
connection.query(
'JMdict_SenseRestrictedToReading',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseSeeAlsos;
final Future<List<Map<String, Object?>>> senseSeeAlsos_query =
connection.query(
'JMdict_SenseSeeAlso',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> exampleSentences;
final Future<List<Map<String, Object?>>> exampleSentences_query =
connection.query(
'JMdict_ExampleSentence',
where: 'senseId IN (${senseIds.join(',')})',
);
// Reading queries
final readingIds = readingElements
.map((element) => (
element['entryId'] as int,
_escapeStringValue(element['reading'] as String)
))
.toList();
late final List<Map<String, Object?>> readingElementInfos;
final Future<List<Map<String, Object?>>> readingElementInfos_query =
connection.query(
'JMdict_ReadingElementInfo',
where: '(entryId, reading) IN (${readingIds.join(',')})',
);
late final List<Map<String, Object?>> readingElementRestrictions;
final Future<List<Map<String, Object?>>> readingElementRestrictions_query =
connection.query(
'JMdict_ReadingElementRestriction',
where: '(entryId, reading) IN (${readingIds.join(',')})',
);
// Kanji queries
final kanjiIds = kanjiElements
.map((element) => (
element['entryId'] as int,
_escapeStringValue(element['reading'] as String)
))
.toList();
late final List<Map<String, Object?>> kanjiElementInfos;
final Future<List<Map<String, Object?>>> kanjiElementInfos_query =
connection.query(
'JMdict_KanjiElementInfo',
where: '(entryId, reading) IN (${kanjiIds.join(',')})',
);
await Future.wait([
senseAntonyms_query.then((value) => senseAntonyms = value),
senseDialects_query.then((value) => senseDialects = value),
senseFields_query.then((value) => senseFields = value),
senseGlossaries_query.then((value) => senseGlossaries = value),
senseInfos_query.then((value) => senseInfos = value),
senseLanguageSources_query.then((value) => senseLanguageSources = value),
senseMiscs_query.then((value) => senseMiscs = value),
sensePOSs_query.then((value) => sensePOSs = value),
senseRestrictedToKanjis_query
.then((value) => senseRestrictedToKanjis = value),
senseRestrictedToReadings_query
.then((value) => senseRestrictedToReadings = value),
senseSeeAlsos_query.then((value) => senseSeeAlsos = value),
exampleSentences_query.then((value) => exampleSentences = value),
readingElementInfos_query.then((value) => readingElementInfos = value),
readingElementRestrictions_query
.then((value) => readingElementRestrictions = value),
kanjiElementInfos_query.then((value) => kanjiElementInfos = value),
]);
return _regroupWordSearchResults(
entryIds: entryIds,
readingElements: readingElements,
kanjiElements: kanjiElements,
senses: senses,
senseAntonyms: senseAntonyms,
senseDialects: senseDialects,
senseFields: senseFields,
senseGlossaries: senseGlossaries,
senseInfos: senseInfos,
senseLanguageSources: senseLanguageSources,
senseMiscs: senseMiscs,
sensePOSs: sensePOSs,
senseRestrictedToKanjis: senseRestrictedToKanjis,
senseRestrictedToReadings: senseRestrictedToReadings,
senseSeeAlsos: senseSeeAlsos,
exampleSentences: exampleSentences,
readingElementInfos: readingElementInfos,
readingElementRestrictions: readingElementRestrictions,
kanjiElementInfos: kanjiElementInfos,
);
}
List<WordSearchResult> _regroupWordSearchResults({
required List<int> entryIds,
required List<Map<String, Object?>> readingElements,
required List<Map<String, Object?>> kanjiElements,
required List<Map<String, Object?>> senses,
required List<Map<String, Object?>> senseAntonyms,
required List<Map<String, Object?>> senseDialects,
required List<Map<String, Object?>> senseFields,
required List<Map<String, Object?>> senseGlossaries,
required List<Map<String, Object?>> senseInfos,
required List<Map<String, Object?>> senseLanguageSources,
required List<Map<String, Object?>> senseMiscs,
required List<Map<String, Object?>> sensePOSs,
required List<Map<String, Object?>> senseRestrictedToKanjis,
required List<Map<String, Object?>> senseRestrictedToReadings,
required List<Map<String, Object?>> senseSeeAlsos,
required List<Map<String, Object?>> exampleSentences,
required List<Map<String, Object?>> readingElementInfos,
required List<Map<String, Object?>> readingElementRestrictions,
required List<Map<String, Object?>> kanjiElementInfos,
}) {
final List<WordSearchResult> results = [];
for (final entryId in entryIds) {
final List<Map<String, Object?>> entryReadingElements = readingElements
.where((element) => element['entryId'] == entryId)
.toList();
final List<Map<String, Object?>> entryKanjiElements = kanjiElements
.where((element) => element['entryId'] == entryId)
.toList();
final List<Map<String, Object?>> entrySenses =
senses.where((element) => element['entryId'] == entryId).toList();
final GroupedWordResult entryReadingElementsGrouped = _regroup_words(
entryId: entryId,
readingElements: entryReadingElements,
kanjiElements: entryKanjiElements,
readingElementInfos: readingElementInfos,
readingElementRestrictions: readingElementRestrictions,
kanjiElementInfos: kanjiElementInfos,
);
final List<WordSearchSense> entrySensesGrouped = _regroup_senses(
senses: entrySenses,
senseAntonyms: senseAntonyms,
senseDialects: senseDialects,
senseFields: senseFields,
senseGlossaries: senseGlossaries,
senseInfos: senseInfos,
senseLanguageSources: senseLanguageSources,
senseMiscs: senseMiscs,
sensePOSs: sensePOSs,
senseRestrictedToKanjis: senseRestrictedToKanjis,
senseRestrictedToReadings: senseRestrictedToReadings,
senseSeeAlsos: senseSeeAlsos,
exampleSentences: exampleSentences,
);
results.add(
WordSearchResult(
entryId: entryId,
japanese: entryReadingElementsGrouped.rubys,
kanjiInfo: entryReadingElementsGrouped.kanjiInfos,
readingInfo: entryReadingElementsGrouped.readingInfos,
senses: entrySensesGrouped,
sources: const WordSearchSources(
jmdict: true,
jmnedict: false,
),
),
);
}
return results;
}
class GroupedWordResult {
final List<WordSearchRuby> rubys;
final Map<String, JMdictReadingInfo> readingInfos;
final Map<String, JMdictKanjiInfo> kanjiInfos;
const GroupedWordResult({
required this.rubys,
required this.readingInfos,
required this.kanjiInfos,
});
}
GroupedWordResult _regroup_words({
required int entryId,
required List<Map<String, Object?>> kanjiElements,
required List<Map<String, Object?>> kanjiElementInfos,
required List<Map<String, Object?>> readingElements,
required List<Map<String, Object?>> readingElementInfos,
required List<Map<String, Object?>> readingElementRestrictions,
}) {
final List<WordSearchRuby> result = [];
final kanjiElements_ =
kanjiElements.where((element) => element['entryId'] == entryId).toList();
final readingElements_ = readingElements
.where((element) => element['entryId'] == entryId)
.toList();
final readingElementRestrictions_ = readingElementRestrictions
.where((element) => element['entryId'] == entryId)
.toList();
for (final readingElement in readingElements_) {
for (final kanjiElement in kanjiElements_) {
final kanji = kanjiElement['reading'] as String;
final reading = readingElement['reading'] as String;
final doesNotMatchKanji = readingElement['doesNotMatchKanji'] == 1;
if (doesNotMatchKanji) {
continue;
}
final restrictions = readingElementRestrictions_
.where((element) => element['reading'] == reading)
.toList();
if (restrictions.isNotEmpty &&
!restrictions.any((element) => element['restriction'] == kanji)) {
continue;
}
final ruby = WordSearchRuby(
base: kanji,
furigana: reading,
);
result.add(ruby);
}
}
for (final readingElement
in readingElements_.where((e) => e['doesNotMatchKanji'] == 1)) {
final reading = readingElement['reading'] as String;
final ruby = WordSearchRuby(
base: reading,
);
result.add(ruby);
}
return GroupedWordResult(
rubys: result,
readingInfos: Map.fromEntries(
readingElementInfos.map((e) => MapEntry(
e['reading'] as String,
JMdictReadingInfo.fromId(e['info'] as String),
)),
),
kanjiInfos: Map.fromEntries(
kanjiElementInfos.map((e) => MapEntry(
e['reading'] as String,
JMdictKanjiInfo.fromId(e['info'] as String),
)),
),
);
}
List<WordSearchSense> _regroup_senses({
required List<Map<String, Object?>> senses,
required List<Map<String, Object?>> senseAntonyms,
required List<Map<String, Object?>> senseDialects,
required List<Map<String, Object?>> senseFields,
required List<Map<String, Object?>> senseGlossaries,
required List<Map<String, Object?>> senseInfos,
required List<Map<String, Object?>> senseLanguageSources,
required List<Map<String, Object?>> senseMiscs,
required List<Map<String, Object?>> sensePOSs,
required List<Map<String, Object?>> senseRestrictedToKanjis,
required List<Map<String, Object?>> senseRestrictedToReadings,
required List<Map<String, Object?>> senseSeeAlsos,
required List<Map<String, Object?>> exampleSentences,
}) {
final groupedSenseAntonyms =
senseAntonyms.groupListsBy((element) => element['senseId'] as int);
final groupedSenseDialects =
senseDialects.groupListsBy((element) => element['senseId'] as int);
final groupedSenseFields =
senseFields.groupListsBy((element) => element['senseId'] as int);
final groupedSenseGlossaries =
senseGlossaries.groupListsBy((element) => element['senseId'] as int);
final groupedSenseInfos =
senseInfos.groupListsBy((element) => element['senseId'] as int);
final groupedSenseLanguageSources =
senseLanguageSources.groupListsBy((element) => element['senseId'] as int);
final groupedSenseMiscs =
senseMiscs.groupListsBy((element) => element['senseId'] as int);
final groupedSensePOSs =
sensePOSs.groupListsBy((element) => element['senseId'] as int);
final groupedSenseRestrictedToKanjis = senseRestrictedToKanjis
.groupListsBy((element) => element['senseId'] as int);
final groupedSenseRestrictedToReadings = senseRestrictedToReadings
.groupListsBy((element) => element['senseId'] as int);
final groupedSenseSeeAlsos =
senseSeeAlsos.groupListsBy((element) => element['senseId'] as int);
final List<WordSearchSense> result = [];
for (final sense in senses) {
final int senseId = sense['id'] as int;
final antonyms = groupedSenseAntonyms[senseId] ?? [];
final dialects = groupedSenseDialects[senseId] ?? [];
final fields = groupedSenseFields[senseId] ?? [];
final glossaries = groupedSenseGlossaries[senseId] ?? [];
final infos = groupedSenseInfos[senseId] ?? [];
final languageSources = groupedSenseLanguageSources[senseId] ?? [];
final miscs = groupedSenseMiscs[senseId] ?? [];
final pos = groupedSensePOSs[senseId] ?? [];
final restrictedToKanjis = groupedSenseRestrictedToKanjis[senseId] ?? [];
final restrictedToReadings =
groupedSenseRestrictedToReadings[senseId] ?? [];
final seeAlsos = groupedSenseSeeAlsos[senseId] ?? [];
final resultSense = WordSearchSense(
englishDefinitions: glossaries.map((e) => e['phrase'] as String).toList(),
partsOfSpeech:
pos.map((e) => JMdictPOS.fromId(e['pos'] as String)).toList(),
seeAlso: seeAlsos
.map((e) => WordSearchXrefEntry(
entryId: e['xrefEntryId'] as int,
ambiguous: e['ambiguous'] == 1,
))
.toList(),
antonyms: antonyms
.map((e) => WordSearchXrefEntry(
entryId: e['xrefEntryId'] as int,
ambiguous: e['ambiguous'] == 1,
))
.toList(),
restrictedToReading:
restrictedToReadings.map((e) => e['reading'] as String).toList(),
restrictedToKanji:
restrictedToKanjis.map((e) => e['kanji'] as String).toList(),
fields:
fields.map((e) => JMdictField.fromId(e['field'] as String)).toList(),
dialects: dialects
.map((e) => JMdictDialect.fromId(e['dialect'] as String))
.toList(),
misc: miscs.map((e) => JMdictMisc.fromId(e['misc'] as String)).toList(),
info: infos.map((e) => e['info'] as String).toList(),
languageSource:
languageSources.map((e) => e['language'] as String).toList(),
);
result.add(resultSense);
}
return result;
}