lib/search/word_search: score and order results by several metrics

This commit is contained in:
2025-05-16 18:46:39 +02:00
parent 90d5717928
commit 33cf5028f4
4 changed files with 198 additions and 46 deletions

View File

@@ -7,6 +7,9 @@ import 'package:jadb/models/word_search/word_search_sources.dart';
/// A class representing a single dictionary entry from a word search.
class WordSearchResult {
/// The score of the entry, used for sorting results.
final int score;
/// The ID of the entry in the database.
final int entryId;
@@ -32,6 +35,7 @@ class WordSearchResult {
final WordSearchSources sources;
const WordSearchResult({
required this.score,
required this.entryId,
required this.isCommon,
required this.japanese,
@@ -43,6 +47,7 @@ class WordSearchResult {
});
Map<String, dynamic> toJson() => {
'_score': score,
'entryId': entryId,
'isCommon': isCommon,
'japanese': japanese.map((e) => e.toJson()).toList(),
@@ -57,6 +62,7 @@ class WordSearchResult {
factory WordSearchResult.fromJson(Map<String, dynamic> json) =>
WordSearchResult(
score: json['_score'] as int,
entryId: json['entryId'] as int,
isCommon: json['isCommon'] as bool,
japanese: (json['japanese'] as List<dynamic>)

View File

@@ -1,7 +1,16 @@
import 'package:jadb/_data_ingestion/jmdict/table_names.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/table_names.dart';
import 'package:jadb/search/word_search/word_search.dart';
import 'package:jadb/util/text_filtering.dart';
import 'package:sqflite_common/sqlite_api.dart';
class ScoredEntryId {
final int entryId;
final int score;
const ScoredEntryId(this.entryId, this.score);
}
SearchMode _determineSearchMode(String word) {
final bool containsKanji = kanjiRegex.hasMatch(word);
final bool containsAscii = RegExp(r'[A-Za-z]').hasMatch(word);
@@ -19,7 +28,123 @@ SearchMode _determineSearchMode(String word) {
}
}
Future<List<int>> fetchEntryIds(
(String, List<Object?>) _kanjiReadingTemplate(
String tableName,
String word,
int pageSize,
) =>
(
'''
WITH
fts_results AS (
SELECT
"${tableName}FTS"."entryId",
CASE
WHEN "${tableName}FTS"."reading" = ? THEN 150
ELSE 100
END
- (substr(COALESCE("${TanosJLPTTableNames.jlptTag}"."jlptLevel", 'N0'), 2) * -5)
+ CASE
WHEN "${tableName}"."news" = 1
OR "${tableName}"."ichi" = 1
OR "${tableName}"."spec" = 1
OR "${tableName}"."gai" = 1
THEN 50
ELSE 0
END
+ CASE
WHEN "${tableName}"."news" = 1 THEN 10
WHEN "${tableName}"."news" = 2 THEN 5
ELSE 0
END
+ CASE
WHEN "${tableName}"."ichi" = 1 THEN 10
WHEN "${tableName}"."ichi" = 2 THEN 5
ELSE 0
END
+ CASE
WHEN "${tableName}"."spec" = 1 THEN 10
WHEN "${tableName}"."spec" = 2 THEN 5
ELSE 0
END
+ CASE
WHEN "${tableName}"."gai" = 1 THEN 10
WHEN "${tableName}"."gai" = 2 THEN 5
ELSE 0
END
+ CASE
WHEN "${tableName}"."orderNum" = 1 THEN 20
ELSE 0
END
AS "score"
FROM "${tableName}FTS"
LEFT JOIN "${TanosJLPTTableNames.jlptTag}" USING ("entryId")
JOIN "${tableName}" USING ("entryId", "reading")
WHERE "${tableName}FTS"."reading" MATCH ? || '*'
ORDER BY "score" DESC
LIMIT ?
)
SELECT *
FROM "fts_results"
UNION ALL
SELECT
"entryId",
50
- (substr(COALESCE("${TanosJLPTTableNames.jlptTag}"."jlptLevel", 'N0'), 2) * -5)
+ CASE
WHEN "${tableName}"."news" = 1
OR "${tableName}"."ichi" = 1
OR "${tableName}"."spec" = 1
OR "${tableName}"."gai" = 1
THEN 50
ELSE 0
END
+ CASE
WHEN "${tableName}"."news" = 1 THEN 10
WHEN "${tableName}"."news" = 2 THEN 5
ELSE 0
END
+ CASE
WHEN "${tableName}"."ichi" = 1 THEN 10
WHEN "${tableName}"."ichi" = 2 THEN 5
ELSE 0
END
+ CASE
WHEN "${tableName}"."spec" = 1 THEN 10
WHEN "${tableName}"."spec" = 2 THEN 5
ELSE 0
END
+ CASE
WHEN "${tableName}"."gai" = 1 THEN 10
WHEN "${tableName}"."gai" = 2 THEN 5
ELSE 0
END
+ CASE
WHEN "orderNum" = 1 THEN 20
ELSE 0
END
AS "score"
FROM "${tableName}"
LEFT JOIN "${TanosJLPTTableNames.jlptTag}" USING ("entryId")
WHERE "reading" LIKE '%' || ? || '%'
AND "entryId" NOT IN (SELECT "entryId" FROM "fts_results")
ORDER BY
"score" DESC,
"entryId" ASC
LIMIT ?
'''
.trim(),
[
word,
word,
pageSize,
word,
pageSize,
]
);
Future<List<ScoredEntryId>> fetchEntryIds(
DatabaseExecutor connection,
String word,
SearchMode searchMode,
@@ -35,41 +160,60 @@ Future<List<int>> fetchEntryIds(
'Word should not be empty when fetching entry IDs',
);
late final List<int> entryIds;
if (searchMode == SearchMode.Kanji) {
entryIds = (await connection.query(
'JMdict_EntryByKanji',
columns: ['entryId'],
where: 'kanji LIKE ?',
whereArgs: ['%$word%'],
limit: pageSize,
offset: offset,
))
.map((row) => row['entryId'] as int)
.toList();
} else if (searchMode == SearchMode.Kana) {
entryIds = (await connection.query(
'JMdict_EntryByKana',
columns: ['entryId'],
where: 'kana LIKE ?',
whereArgs: ['%$word%'],
limit: pageSize,
offset: offset,
))
.map((row) => row['entryId'] as int)
.toList();
} else {
entryIds = (await connection.query(
'JMdict_EntryByEnglish',
columns: ['entryId'],
where: 'english LIKE ?',
whereArgs: ['%$word%'],
limit: pageSize,
offset: offset,
))
.map((row) => row['entryId'] as int)
.toList();
late final List<ScoredEntryId> entryIds;
switch (searchMode) {
case SearchMode.Kanji:
final (query, args) = _kanjiReadingTemplate(
JMdictTableNames.kanjiElement,
word,
pageSize,
);
entryIds = (await connection.rawQuery(query, args))
.map((row) => ScoredEntryId(
row['entryId'] as int,
row['score'] as int,
))
.toList();
break;
case SearchMode.Kana:
final (query, args) = _kanjiReadingTemplate(
JMdictTableNames.readingElement,
word,
pageSize,
);
entryIds = (await connection.rawQuery(query, args))
.map((row) => ScoredEntryId(
row['entryId'] as int,
row['score'] as int,
))
.toList();
break;
case SearchMode.English:
entryIds = (await connection.query(
JMdictTableNames.senseGlossary,
columns: ['entryId'],
where: 'english LIKE ?',
whereArgs: ['%$word%'],
limit: pageSize,
offset: offset,
))
.map((row) => ScoredEntryId(
row['entryId'] as int,
0,
))
.toList();
break;
case SearchMode.MixedKana:
case SearchMode.MixedKanji:
default:
throw UnimplementedError(
'Search mode $searchMode is not implemented',
);
}
;
return entryIds;
}

View File

@@ -12,9 +12,10 @@ import 'package:jadb/models/word_search/word_search_sense.dart';
import 'package:jadb/models/word_search/word_search_sense_language_source.dart';
import 'package:jadb/models/word_search/word_search_sources.dart';
import 'package:jadb/models/word_search/word_search_xref_entry.dart';
import 'package:jadb/search/word_search/entry_id_query.dart';
List<WordSearchResult> regroupWordSearchResults({
required List<int> entryIds,
required List<ScoredEntryId> entryIds,
required List<Map<String, Object?>> readingElements,
required List<Map<String, Object?>> kanjiElements,
required List<Map<String, Object?>> jlptTags,
@@ -41,17 +42,17 @@ List<WordSearchResult> regroupWordSearchResults({
final commonEntryIds =
commonEntries.map((entry) => entry['entryId'] as int).toSet();
for (final entryId in entryIds) {
for (final scoredEntryId in entryIds) {
final List<Map<String, Object?>> entryReadingElements = readingElements
.where((element) => element['entryId'] == entryId)
.where((element) => element['entryId'] == scoredEntryId.entryId)
.toList();
final List<Map<String, Object?>> entryKanjiElements = kanjiElements
.where((element) => element['entryId'] == entryId)
.where((element) => element['entryId'] == scoredEntryId.entryId)
.toList();
final List<Map<String, Object?>> entryJlptTags =
jlptTags.where((element) => element['entryId'] == entryId).toList();
jlptTags.where((element) => element['entryId'] == scoredEntryId.entryId).toList();
final jlptLevel = entryJlptTags
.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?))
@@ -59,13 +60,13 @@ List<WordSearchResult> regroupWordSearchResults({
.firstOrNull ??
JlptLevel.none;
final isCommon = commonEntryIds.contains(entryId);
final isCommon = commonEntryIds.contains(scoredEntryId.entryId);
final List<Map<String, Object?>> entrySenses =
senses.where((element) => element['entryId'] == entryId).toList();
senses.where((element) => element['entryId'] == scoredEntryId.entryId).toList();
final GroupedWordResult entryReadingElementsGrouped = _regroup_words(
entryId: entryId,
entryId: scoredEntryId.entryId,
readingElements: entryReadingElements,
kanjiElements: entryKanjiElements,
readingElementInfos: readingElementInfos,
@@ -91,7 +92,8 @@ List<WordSearchResult> regroupWordSearchResults({
results.add(
WordSearchResult(
entryId: entryId,
score: scoredEntryId.score,
entryId: scoredEntryId.entryId,
isCommon: isCommon,
japanese: entryReadingElementsGrouped.rubys,
kanjiInfo: entryReadingElementsGrouped.kanjiInfos,

View File

@@ -33,7 +33,7 @@ Future<List<WordSearchResult>?> searchWordWithDbConnection(
}
final offset = page * pageSize;
final List<int> entryIds = await fetchEntryIds(
final List<ScoredEntryId> entryIds = await fetchEntryIds(
connection,
word,
searchMode,
@@ -47,7 +47,7 @@ Future<List<WordSearchResult>?> searchWordWithDbConnection(
final LinearWordQueryData linearWordQueryData = await fetchLinearWordQueryData(
connection,
entryIds,
entryIds.map((e) => e.entryId).toList(),
);
final result = regroupWordSearchResults(