lib/search/word_search: score and order results by several metrics
This commit is contained in:
@@ -7,6 +7,9 @@ import 'package:jadb/models/word_search/word_search_sources.dart';
|
||||
|
||||
/// A class representing a single dictionary entry from a word search.
|
||||
class WordSearchResult {
|
||||
/// The score of the entry, used for sorting results.
|
||||
final int score;
|
||||
|
||||
/// The ID of the entry in the database.
|
||||
final int entryId;
|
||||
|
||||
@@ -32,6 +35,7 @@ class WordSearchResult {
|
||||
final WordSearchSources sources;
|
||||
|
||||
const WordSearchResult({
|
||||
required this.score,
|
||||
required this.entryId,
|
||||
required this.isCommon,
|
||||
required this.japanese,
|
||||
@@ -43,6 +47,7 @@ class WordSearchResult {
|
||||
});
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
'_score': score,
|
||||
'entryId': entryId,
|
||||
'isCommon': isCommon,
|
||||
'japanese': japanese.map((e) => e.toJson()).toList(),
|
||||
@@ -57,6 +62,7 @@ class WordSearchResult {
|
||||
|
||||
factory WordSearchResult.fromJson(Map<String, dynamic> json) =>
|
||||
WordSearchResult(
|
||||
score: json['_score'] as int,
|
||||
entryId: json['entryId'] as int,
|
||||
isCommon: json['isCommon'] as bool,
|
||||
japanese: (json['japanese'] as List<dynamic>)
|
||||
|
||||
@@ -1,7 +1,16 @@
|
||||
import 'package:jadb/_data_ingestion/jmdict/table_names.dart';
|
||||
import 'package:jadb/_data_ingestion/tanos-jlpt/table_names.dart';
|
||||
import 'package:jadb/search/word_search/word_search.dart';
|
||||
import 'package:jadb/util/text_filtering.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
class ScoredEntryId {
|
||||
final int entryId;
|
||||
final int score;
|
||||
|
||||
const ScoredEntryId(this.entryId, this.score);
|
||||
}
|
||||
|
||||
SearchMode _determineSearchMode(String word) {
|
||||
final bool containsKanji = kanjiRegex.hasMatch(word);
|
||||
final bool containsAscii = RegExp(r'[A-Za-z]').hasMatch(word);
|
||||
@@ -19,7 +28,123 @@ SearchMode _determineSearchMode(String word) {
|
||||
}
|
||||
}
|
||||
|
||||
Future<List<int>> fetchEntryIds(
|
||||
(String, List<Object?>) _kanjiReadingTemplate(
|
||||
String tableName,
|
||||
String word,
|
||||
int pageSize,
|
||||
) =>
|
||||
(
|
||||
'''
|
||||
WITH
|
||||
fts_results AS (
|
||||
SELECT
|
||||
"${tableName}FTS"."entryId",
|
||||
CASE
|
||||
WHEN "${tableName}FTS"."reading" = ? THEN 150
|
||||
ELSE 100
|
||||
END
|
||||
- (substr(COALESCE("${TanosJLPTTableNames.jlptTag}"."jlptLevel", 'N0'), 2) * -5)
|
||||
+ CASE
|
||||
WHEN "${tableName}"."news" = 1
|
||||
OR "${tableName}"."ichi" = 1
|
||||
OR "${tableName}"."spec" = 1
|
||||
OR "${tableName}"."gai" = 1
|
||||
THEN 50
|
||||
ELSE 0
|
||||
END
|
||||
+ CASE
|
||||
WHEN "${tableName}"."news" = 1 THEN 10
|
||||
WHEN "${tableName}"."news" = 2 THEN 5
|
||||
ELSE 0
|
||||
END
|
||||
+ CASE
|
||||
WHEN "${tableName}"."ichi" = 1 THEN 10
|
||||
WHEN "${tableName}"."ichi" = 2 THEN 5
|
||||
ELSE 0
|
||||
END
|
||||
+ CASE
|
||||
WHEN "${tableName}"."spec" = 1 THEN 10
|
||||
WHEN "${tableName}"."spec" = 2 THEN 5
|
||||
ELSE 0
|
||||
END
|
||||
+ CASE
|
||||
WHEN "${tableName}"."gai" = 1 THEN 10
|
||||
WHEN "${tableName}"."gai" = 2 THEN 5
|
||||
ELSE 0
|
||||
END
|
||||
+ CASE
|
||||
WHEN "${tableName}"."orderNum" = 1 THEN 20
|
||||
ELSE 0
|
||||
END
|
||||
AS "score"
|
||||
FROM "${tableName}FTS"
|
||||
LEFT JOIN "${TanosJLPTTableNames.jlptTag}" USING ("entryId")
|
||||
JOIN "${tableName}" USING ("entryId", "reading")
|
||||
WHERE "${tableName}FTS"."reading" MATCH ? || '*'
|
||||
ORDER BY "score" DESC
|
||||
LIMIT ?
|
||||
)
|
||||
|
||||
SELECT *
|
||||
FROM "fts_results"
|
||||
UNION ALL
|
||||
SELECT
|
||||
"entryId",
|
||||
50
|
||||
- (substr(COALESCE("${TanosJLPTTableNames.jlptTag}"."jlptLevel", 'N0'), 2) * -5)
|
||||
+ CASE
|
||||
WHEN "${tableName}"."news" = 1
|
||||
OR "${tableName}"."ichi" = 1
|
||||
OR "${tableName}"."spec" = 1
|
||||
OR "${tableName}"."gai" = 1
|
||||
THEN 50
|
||||
ELSE 0
|
||||
END
|
||||
+ CASE
|
||||
WHEN "${tableName}"."news" = 1 THEN 10
|
||||
WHEN "${tableName}"."news" = 2 THEN 5
|
||||
ELSE 0
|
||||
END
|
||||
+ CASE
|
||||
WHEN "${tableName}"."ichi" = 1 THEN 10
|
||||
WHEN "${tableName}"."ichi" = 2 THEN 5
|
||||
ELSE 0
|
||||
END
|
||||
+ CASE
|
||||
WHEN "${tableName}"."spec" = 1 THEN 10
|
||||
WHEN "${tableName}"."spec" = 2 THEN 5
|
||||
ELSE 0
|
||||
END
|
||||
+ CASE
|
||||
WHEN "${tableName}"."gai" = 1 THEN 10
|
||||
WHEN "${tableName}"."gai" = 2 THEN 5
|
||||
ELSE 0
|
||||
END
|
||||
+ CASE
|
||||
WHEN "orderNum" = 1 THEN 20
|
||||
ELSE 0
|
||||
END
|
||||
AS "score"
|
||||
FROM "${tableName}"
|
||||
LEFT JOIN "${TanosJLPTTableNames.jlptTag}" USING ("entryId")
|
||||
WHERE "reading" LIKE '%' || ? || '%'
|
||||
AND "entryId" NOT IN (SELECT "entryId" FROM "fts_results")
|
||||
ORDER BY
|
||||
"score" DESC,
|
||||
"entryId" ASC
|
||||
LIMIT ?
|
||||
'''
|
||||
.trim(),
|
||||
[
|
||||
word,
|
||||
word,
|
||||
pageSize,
|
||||
word,
|
||||
pageSize,
|
||||
]
|
||||
);
|
||||
|
||||
Future<List<ScoredEntryId>> fetchEntryIds(
|
||||
DatabaseExecutor connection,
|
||||
String word,
|
||||
SearchMode searchMode,
|
||||
@@ -35,41 +160,60 @@ Future<List<int>> fetchEntryIds(
|
||||
'Word should not be empty when fetching entry IDs',
|
||||
);
|
||||
|
||||
late final List<int> entryIds;
|
||||
if (searchMode == SearchMode.Kanji) {
|
||||
entryIds = (await connection.query(
|
||||
'JMdict_EntryByKanji',
|
||||
columns: ['entryId'],
|
||||
where: 'kanji LIKE ?',
|
||||
whereArgs: ['%$word%'],
|
||||
limit: pageSize,
|
||||
offset: offset,
|
||||
))
|
||||
.map((row) => row['entryId'] as int)
|
||||
.toList();
|
||||
} else if (searchMode == SearchMode.Kana) {
|
||||
entryIds = (await connection.query(
|
||||
'JMdict_EntryByKana',
|
||||
columns: ['entryId'],
|
||||
where: 'kana LIKE ?',
|
||||
whereArgs: ['%$word%'],
|
||||
limit: pageSize,
|
||||
offset: offset,
|
||||
))
|
||||
.map((row) => row['entryId'] as int)
|
||||
.toList();
|
||||
} else {
|
||||
entryIds = (await connection.query(
|
||||
'JMdict_EntryByEnglish',
|
||||
columns: ['entryId'],
|
||||
where: 'english LIKE ?',
|
||||
whereArgs: ['%$word%'],
|
||||
limit: pageSize,
|
||||
offset: offset,
|
||||
))
|
||||
.map((row) => row['entryId'] as int)
|
||||
.toList();
|
||||
late final List<ScoredEntryId> entryIds;
|
||||
switch (searchMode) {
|
||||
case SearchMode.Kanji:
|
||||
final (query, args) = _kanjiReadingTemplate(
|
||||
JMdictTableNames.kanjiElement,
|
||||
word,
|
||||
pageSize,
|
||||
);
|
||||
entryIds = (await connection.rawQuery(query, args))
|
||||
.map((row) => ScoredEntryId(
|
||||
row['entryId'] as int,
|
||||
row['score'] as int,
|
||||
))
|
||||
.toList();
|
||||
break;
|
||||
|
||||
case SearchMode.Kana:
|
||||
final (query, args) = _kanjiReadingTemplate(
|
||||
JMdictTableNames.readingElement,
|
||||
word,
|
||||
pageSize,
|
||||
);
|
||||
entryIds = (await connection.rawQuery(query, args))
|
||||
.map((row) => ScoredEntryId(
|
||||
row['entryId'] as int,
|
||||
row['score'] as int,
|
||||
))
|
||||
.toList();
|
||||
break;
|
||||
|
||||
case SearchMode.English:
|
||||
entryIds = (await connection.query(
|
||||
JMdictTableNames.senseGlossary,
|
||||
columns: ['entryId'],
|
||||
where: 'english LIKE ?',
|
||||
whereArgs: ['%$word%'],
|
||||
limit: pageSize,
|
||||
offset: offset,
|
||||
))
|
||||
.map((row) => ScoredEntryId(
|
||||
row['entryId'] as int,
|
||||
0,
|
||||
))
|
||||
.toList();
|
||||
break;
|
||||
|
||||
case SearchMode.MixedKana:
|
||||
case SearchMode.MixedKanji:
|
||||
default:
|
||||
throw UnimplementedError(
|
||||
'Search mode $searchMode is not implemented',
|
||||
);
|
||||
}
|
||||
;
|
||||
|
||||
return entryIds;
|
||||
}
|
||||
|
||||
@@ -12,9 +12,10 @@ import 'package:jadb/models/word_search/word_search_sense.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sense_language_source.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sources.dart';
|
||||
import 'package:jadb/models/word_search/word_search_xref_entry.dart';
|
||||
import 'package:jadb/search/word_search/entry_id_query.dart';
|
||||
|
||||
List<WordSearchResult> regroupWordSearchResults({
|
||||
required List<int> entryIds,
|
||||
required List<ScoredEntryId> entryIds,
|
||||
required List<Map<String, Object?>> readingElements,
|
||||
required List<Map<String, Object?>> kanjiElements,
|
||||
required List<Map<String, Object?>> jlptTags,
|
||||
@@ -41,17 +42,17 @@ List<WordSearchResult> regroupWordSearchResults({
|
||||
final commonEntryIds =
|
||||
commonEntries.map((entry) => entry['entryId'] as int).toSet();
|
||||
|
||||
for (final entryId in entryIds) {
|
||||
for (final scoredEntryId in entryIds) {
|
||||
final List<Map<String, Object?>> entryReadingElements = readingElements
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final List<Map<String, Object?>> entryKanjiElements = kanjiElements
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final List<Map<String, Object?>> entryJlptTags =
|
||||
jlptTags.where((element) => element['entryId'] == entryId).toList();
|
||||
jlptTags.where((element) => element['entryId'] == scoredEntryId.entryId).toList();
|
||||
|
||||
final jlptLevel = entryJlptTags
|
||||
.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?))
|
||||
@@ -59,13 +60,13 @@ List<WordSearchResult> regroupWordSearchResults({
|
||||
.firstOrNull ??
|
||||
JlptLevel.none;
|
||||
|
||||
final isCommon = commonEntryIds.contains(entryId);
|
||||
final isCommon = commonEntryIds.contains(scoredEntryId.entryId);
|
||||
|
||||
final List<Map<String, Object?>> entrySenses =
|
||||
senses.where((element) => element['entryId'] == entryId).toList();
|
||||
senses.where((element) => element['entryId'] == scoredEntryId.entryId).toList();
|
||||
|
||||
final GroupedWordResult entryReadingElementsGrouped = _regroup_words(
|
||||
entryId: entryId,
|
||||
entryId: scoredEntryId.entryId,
|
||||
readingElements: entryReadingElements,
|
||||
kanjiElements: entryKanjiElements,
|
||||
readingElementInfos: readingElementInfos,
|
||||
@@ -91,7 +92,8 @@ List<WordSearchResult> regroupWordSearchResults({
|
||||
|
||||
results.add(
|
||||
WordSearchResult(
|
||||
entryId: entryId,
|
||||
score: scoredEntryId.score,
|
||||
entryId: scoredEntryId.entryId,
|
||||
isCommon: isCommon,
|
||||
japanese: entryReadingElementsGrouped.rubys,
|
||||
kanjiInfo: entryReadingElementsGrouped.kanjiInfos,
|
||||
|
||||
@@ -33,7 +33,7 @@ Future<List<WordSearchResult>?> searchWordWithDbConnection(
|
||||
}
|
||||
|
||||
final offset = page * pageSize;
|
||||
final List<int> entryIds = await fetchEntryIds(
|
||||
final List<ScoredEntryId> entryIds = await fetchEntryIds(
|
||||
connection,
|
||||
word,
|
||||
searchMode,
|
||||
@@ -47,7 +47,7 @@ Future<List<WordSearchResult>?> searchWordWithDbConnection(
|
||||
|
||||
final LinearWordQueryData linearWordQueryData = await fetchLinearWordQueryData(
|
||||
connection,
|
||||
entryIds,
|
||||
entryIds.map((e) => e.entryId).toList(),
|
||||
);
|
||||
|
||||
final result = regroupWordSearchResults(
|
||||
|
||||
Reference in New Issue
Block a user