diff --git a/lib/models/word_search/word_search_result.dart b/lib/models/word_search/word_search_result.dart index 38e73b6..dcaeb50 100644 --- a/lib/models/word_search/word_search_result.dart +++ b/lib/models/word_search/word_search_result.dart @@ -7,6 +7,9 @@ import 'package:jadb/models/word_search/word_search_sources.dart'; /// A class representing a single dictionary entry from a word search. class WordSearchResult { + /// The score of the entry, used for sorting results. + final int score; + /// The ID of the entry in the database. final int entryId; @@ -32,6 +35,7 @@ class WordSearchResult { final WordSearchSources sources; const WordSearchResult({ + required this.score, required this.entryId, required this.isCommon, required this.japanese, @@ -43,6 +47,7 @@ class WordSearchResult { }); Map toJson() => { + '_score': score, 'entryId': entryId, 'isCommon': isCommon, 'japanese': japanese.map((e) => e.toJson()).toList(), @@ -57,6 +62,7 @@ class WordSearchResult { factory WordSearchResult.fromJson(Map json) => WordSearchResult( + score: json['_score'] as int, entryId: json['entryId'] as int, isCommon: json['isCommon'] as bool, japanese: (json['japanese'] as List) diff --git a/lib/search/word_search/entry_id_query.dart b/lib/search/word_search/entry_id_query.dart index 717fdc0..0e7fcdb 100644 --- a/lib/search/word_search/entry_id_query.dart +++ b/lib/search/word_search/entry_id_query.dart @@ -1,7 +1,16 @@ +import 'package:jadb/_data_ingestion/jmdict/table_names.dart'; +import 'package:jadb/_data_ingestion/tanos-jlpt/table_names.dart'; import 'package:jadb/search/word_search/word_search.dart'; import 'package:jadb/util/text_filtering.dart'; import 'package:sqflite_common/sqlite_api.dart'; +class ScoredEntryId { + final int entryId; + final int score; + + const ScoredEntryId(this.entryId, this.score); +} + SearchMode _determineSearchMode(String word) { final bool containsKanji = kanjiRegex.hasMatch(word); final bool containsAscii = RegExp(r'[A-Za-z]').hasMatch(word); @@ -19,7 +28,123 @@ SearchMode _determineSearchMode(String word) { } } -Future> fetchEntryIds( +(String, List) _kanjiReadingTemplate( + String tableName, + String word, + int pageSize, +) => + ( + ''' + WITH + fts_results AS ( + SELECT + "${tableName}FTS"."entryId", + CASE + WHEN "${tableName}FTS"."reading" = ? THEN 150 + ELSE 100 + END + - (substr(COALESCE("${TanosJLPTTableNames.jlptTag}"."jlptLevel", 'N0'), 2) * -5) + + CASE + WHEN "${tableName}"."news" = 1 + OR "${tableName}"."ichi" = 1 + OR "${tableName}"."spec" = 1 + OR "${tableName}"."gai" = 1 + THEN 50 + ELSE 0 + END + + CASE + WHEN "${tableName}"."news" = 1 THEN 10 + WHEN "${tableName}"."news" = 2 THEN 5 + ELSE 0 + END + + CASE + WHEN "${tableName}"."ichi" = 1 THEN 10 + WHEN "${tableName}"."ichi" = 2 THEN 5 + ELSE 0 + END + + CASE + WHEN "${tableName}"."spec" = 1 THEN 10 + WHEN "${tableName}"."spec" = 2 THEN 5 + ELSE 0 + END + + CASE + WHEN "${tableName}"."gai" = 1 THEN 10 + WHEN "${tableName}"."gai" = 2 THEN 5 + ELSE 0 + END + + CASE + WHEN "${tableName}"."orderNum" = 1 THEN 20 + ELSE 0 + END + AS "score" + FROM "${tableName}FTS" + LEFT JOIN "${TanosJLPTTableNames.jlptTag}" USING ("entryId") + JOIN "${tableName}" USING ("entryId", "reading") + WHERE "${tableName}FTS"."reading" MATCH ? || '*' + ORDER BY "score" DESC + LIMIT ? + ) + + SELECT * + FROM "fts_results" + UNION ALL + SELECT + "entryId", + 50 + - (substr(COALESCE("${TanosJLPTTableNames.jlptTag}"."jlptLevel", 'N0'), 2) * -5) + + CASE + WHEN "${tableName}"."news" = 1 + OR "${tableName}"."ichi" = 1 + OR "${tableName}"."spec" = 1 + OR "${tableName}"."gai" = 1 + THEN 50 + ELSE 0 + END + + CASE + WHEN "${tableName}"."news" = 1 THEN 10 + WHEN "${tableName}"."news" = 2 THEN 5 + ELSE 0 + END + + CASE + WHEN "${tableName}"."ichi" = 1 THEN 10 + WHEN "${tableName}"."ichi" = 2 THEN 5 + ELSE 0 + END + + CASE + WHEN "${tableName}"."spec" = 1 THEN 10 + WHEN "${tableName}"."spec" = 2 THEN 5 + ELSE 0 + END + + CASE + WHEN "${tableName}"."gai" = 1 THEN 10 + WHEN "${tableName}"."gai" = 2 THEN 5 + ELSE 0 + END + + CASE + WHEN "orderNum" = 1 THEN 20 + ELSE 0 + END + AS "score" + FROM "${tableName}" + LEFT JOIN "${TanosJLPTTableNames.jlptTag}" USING ("entryId") + WHERE "reading" LIKE '%' || ? || '%' + AND "entryId" NOT IN (SELECT "entryId" FROM "fts_results") + ORDER BY + "score" DESC, + "entryId" ASC + LIMIT ? + ''' + .trim(), + [ + word, + word, + pageSize, + word, + pageSize, + ] + ); + +Future> fetchEntryIds( DatabaseExecutor connection, String word, SearchMode searchMode, @@ -35,41 +160,60 @@ Future> fetchEntryIds( 'Word should not be empty when fetching entry IDs', ); - late final List entryIds; - if (searchMode == SearchMode.Kanji) { - entryIds = (await connection.query( - 'JMdict_EntryByKanji', - columns: ['entryId'], - where: 'kanji LIKE ?', - whereArgs: ['%$word%'], - limit: pageSize, - offset: offset, - )) - .map((row) => row['entryId'] as int) - .toList(); - } else if (searchMode == SearchMode.Kana) { - entryIds = (await connection.query( - 'JMdict_EntryByKana', - columns: ['entryId'], - where: 'kana LIKE ?', - whereArgs: ['%$word%'], - limit: pageSize, - offset: offset, - )) - .map((row) => row['entryId'] as int) - .toList(); - } else { - entryIds = (await connection.query( - 'JMdict_EntryByEnglish', - columns: ['entryId'], - where: 'english LIKE ?', - whereArgs: ['%$word%'], - limit: pageSize, - offset: offset, - )) - .map((row) => row['entryId'] as int) - .toList(); + late final List entryIds; + switch (searchMode) { + case SearchMode.Kanji: + final (query, args) = _kanjiReadingTemplate( + JMdictTableNames.kanjiElement, + word, + pageSize, + ); + entryIds = (await connection.rawQuery(query, args)) + .map((row) => ScoredEntryId( + row['entryId'] as int, + row['score'] as int, + )) + .toList(); + break; + + case SearchMode.Kana: + final (query, args) = _kanjiReadingTemplate( + JMdictTableNames.readingElement, + word, + pageSize, + ); + entryIds = (await connection.rawQuery(query, args)) + .map((row) => ScoredEntryId( + row['entryId'] as int, + row['score'] as int, + )) + .toList(); + break; + + case SearchMode.English: + entryIds = (await connection.query( + JMdictTableNames.senseGlossary, + columns: ['entryId'], + where: 'english LIKE ?', + whereArgs: ['%$word%'], + limit: pageSize, + offset: offset, + )) + .map((row) => ScoredEntryId( + row['entryId'] as int, + 0, + )) + .toList(); + break; + + case SearchMode.MixedKana: + case SearchMode.MixedKanji: + default: + throw UnimplementedError( + 'Search mode $searchMode is not implemented', + ); } + ; return entryIds; } diff --git a/lib/search/word_search/regrouping.dart b/lib/search/word_search/regrouping.dart index 4b4b31a..eed03eb 100644 --- a/lib/search/word_search/regrouping.dart +++ b/lib/search/word_search/regrouping.dart @@ -12,9 +12,10 @@ import 'package:jadb/models/word_search/word_search_sense.dart'; import 'package:jadb/models/word_search/word_search_sense_language_source.dart'; import 'package:jadb/models/word_search/word_search_sources.dart'; import 'package:jadb/models/word_search/word_search_xref_entry.dart'; +import 'package:jadb/search/word_search/entry_id_query.dart'; List regroupWordSearchResults({ - required List entryIds, + required List entryIds, required List> readingElements, required List> kanjiElements, required List> jlptTags, @@ -41,17 +42,17 @@ List regroupWordSearchResults({ final commonEntryIds = commonEntries.map((entry) => entry['entryId'] as int).toSet(); - for (final entryId in entryIds) { + for (final scoredEntryId in entryIds) { final List> entryReadingElements = readingElements - .where((element) => element['entryId'] == entryId) + .where((element) => element['entryId'] == scoredEntryId.entryId) .toList(); final List> entryKanjiElements = kanjiElements - .where((element) => element['entryId'] == entryId) + .where((element) => element['entryId'] == scoredEntryId.entryId) .toList(); final List> entryJlptTags = - jlptTags.where((element) => element['entryId'] == entryId).toList(); + jlptTags.where((element) => element['entryId'] == scoredEntryId.entryId).toList(); final jlptLevel = entryJlptTags .map((e) => JlptLevel.fromString(e['jlptLevel'] as String?)) @@ -59,13 +60,13 @@ List regroupWordSearchResults({ .firstOrNull ?? JlptLevel.none; - final isCommon = commonEntryIds.contains(entryId); + final isCommon = commonEntryIds.contains(scoredEntryId.entryId); final List> entrySenses = - senses.where((element) => element['entryId'] == entryId).toList(); + senses.where((element) => element['entryId'] == scoredEntryId.entryId).toList(); final GroupedWordResult entryReadingElementsGrouped = _regroup_words( - entryId: entryId, + entryId: scoredEntryId.entryId, readingElements: entryReadingElements, kanjiElements: entryKanjiElements, readingElementInfos: readingElementInfos, @@ -91,7 +92,8 @@ List regroupWordSearchResults({ results.add( WordSearchResult( - entryId: entryId, + score: scoredEntryId.score, + entryId: scoredEntryId.entryId, isCommon: isCommon, japanese: entryReadingElementsGrouped.rubys, kanjiInfo: entryReadingElementsGrouped.kanjiInfos, diff --git a/lib/search/word_search/word_search.dart b/lib/search/word_search/word_search.dart index 2bbb16c..ca5d3c9 100644 --- a/lib/search/word_search/word_search.dart +++ b/lib/search/word_search/word_search.dart @@ -33,7 +33,7 @@ Future?> searchWordWithDbConnection( } final offset = page * pageSize; - final List entryIds = await fetchEntryIds( + final List entryIds = await fetchEntryIds( connection, word, searchMode, @@ -47,7 +47,7 @@ Future?> searchWordWithDbConnection( final LinearWordQueryData linearWordQueryData = await fetchLinearWordQueryData( connection, - entryIds, + entryIds.map((e) => e.entryId).toList(), ); final result = regroupWordSearchResults(