Files
jadb/lib/search/word_search/entry_id_query.dart

312 lines
8.5 KiB
Dart

import 'package:jadb/search/word_search/word_search.dart';
import 'package:jadb/table_names/jmdict.dart';
import 'package:jadb/util/text_filtering.dart';
import 'package:sqflite_common/sqlite_api.dart';
class ScoredEntryId {
final int entryId;
final int score;
const ScoredEntryId(this.entryId, this.score);
}
SearchMode _determineSearchMode(String word) {
final bool containsKanji = kanjiRegex.hasMatch(word);
final bool containsAscii = RegExp(r'[A-Za-z]').hasMatch(word);
if (containsKanji && containsAscii) {
return SearchMode.mixedKanji;
} else if (containsKanji) {
return SearchMode.kanji;
} else if (containsAscii) {
return SearchMode.english;
} else if (word.contains(hiraganaRegex) || word.contains(katakanaRegex)) {
return SearchMode.kana;
} else {
return SearchMode.mixedKana;
}
}
/// FTS reacts to certain characters, so we should filter them out.
String _filterFTSSensitiveCharacters(String word) {
return word
.replaceAll('.', '')
.replaceAll('-', '')
.replaceAll('*', '')
.replaceAll('+', '')
.replaceAll('(', '')
.replaceAll(')', '')
.replaceAll('^', '')
.replaceAll('"', '');
}
(String, List<Object?>) _kanjiReadingTemplate(
String tableName,
String word, {
int? pageSize,
int? offset,
bool countOnly = false,
}) {
assert(
tableName == JMdictTableNames.kanjiElement ||
tableName == JMdictTableNames.readingElement,
);
assert(!countOnly || pageSize == null);
assert(!countOnly || offset == null);
assert(pageSize == null || pageSize > 0);
assert(offset == null || offset >= 0);
assert(
offset == null || pageSize != null,
'Offset should only be used with pageSize set',
);
return (
'''
WITH
fts_results AS (
SELECT DISTINCT
"$tableName"."entryId",
100
+ (("${tableName}FTS"."reading" = ?) * 10000)
+ "JMdict_EntryScore"."score"
AS "score"
FROM "${tableName}FTS"
JOIN "$tableName" USING ("elementId")
JOIN "JMdict_EntryScore" USING ("elementId")
WHERE "${tableName}FTS"."reading" MATCH ? || '*'
AND "JMdict_EntryScore"."type" = '${tableName == JMdictTableNames.kanjiElement ? 'k' : 'r'}'
),
non_fts_results AS (
SELECT DISTINCT
"$tableName"."entryId",
50
+ "JMdict_EntryScore"."score"
AS "score"
FROM "$tableName"
JOIN "JMdict_EntryScore" USING ("elementId")
WHERE "reading" LIKE '%' || ? || '%'
AND "$tableName"."entryId" NOT IN (SELECT "entryId" FROM "fts_results")
AND "JMdict_EntryScore"."type" = '${tableName == JMdictTableNames.kanjiElement ? 'k' : 'r'}'
)
SELECT ${countOnly ? 'COUNT(DISTINCT "entryId") AS count' : '"entryId", MAX("score") AS "score"'}
FROM (
SELECT * FROM "fts_results"
UNION
SELECT * FROM "non_fts_results"
)
${!countOnly ? 'GROUP BY "entryId"' : ''}
${!countOnly ? 'ORDER BY "score" DESC, "entryId" ASC' : ''}
${pageSize != null ? 'LIMIT ?' : ''}
${offset != null ? 'OFFSET ?' : ''}
'''
.trim(),
[
_filterFTSSensitiveCharacters(word),
_filterFTSSensitiveCharacters(word),
_filterFTSSensitiveCharacters(word),
?pageSize,
?offset,
],
);
}
Future<List<ScoredEntryId>> _queryKanji(
DatabaseExecutor connection,
String word,
int? pageSize,
int? offset,
) {
final (query, args) = _kanjiReadingTemplate(
JMdictTableNames.kanjiElement,
word,
pageSize: pageSize,
offset: offset,
);
return connection
.rawQuery(query, args)
.then(
(result) => result
.map(
(row) =>
ScoredEntryId(row['entryId'] as int, row['score'] as int),
)
.toList(),
);
}
Future<int> _queryKanjiCount(DatabaseExecutor connection, String word) {
final (query, args) = _kanjiReadingTemplate(
JMdictTableNames.kanjiElement,
word,
countOnly: true,
);
return connection
.rawQuery(query, args)
.then((result) => result.firstOrNull?['count'] as int? ?? 0);
}
Future<List<ScoredEntryId>> _queryKana(
DatabaseExecutor connection,
String word,
int? pageSize,
int? offset,
) {
final (query, args) = _kanjiReadingTemplate(
JMdictTableNames.readingElement,
word,
pageSize: pageSize,
offset: offset,
);
return connection
.rawQuery(query, args)
.then(
(result) => result
.map(
(row) =>
ScoredEntryId(row['entryId'] as int, row['score'] as int),
)
.toList(),
);
}
Future<int> _queryKanaCount(DatabaseExecutor connection, String word) {
final (query, args) = _kanjiReadingTemplate(
JMdictTableNames.readingElement,
word,
countOnly: true,
);
return connection
.rawQuery(query, args)
.then((result) => result.firstOrNull?['count'] as int? ?? 0);
}
Future<List<ScoredEntryId>> _queryEnglish(
DatabaseExecutor connection,
String word,
int? pageSize,
int? offset,
) async {
assert(pageSize == null || pageSize > 0);
assert(offset == null || offset >= 0);
assert(
offset == null || pageSize != null,
'Offset should only be used with pageSize set',
);
final result = await connection.rawQuery(
'''
SELECT
"${JMdictTableNames.sense}"."entryId",
MAX("JMdict_EntryScore"."score")
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ? AND "${JMdictTableNames.sense}"."orderNum" = 1) * 50)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ? AND "${JMdictTableNames.sense}"."orderNum" = 2) * 30)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?) * 20)
as "score"
FROM "${JMdictTableNames.senseGlossary}"
JOIN "${JMdictTableNames.sense}" USING ("senseId")
JOIN "JMdict_EntryScore" USING ("entryId")
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?
GROUP BY "JMdict_EntryScore"."entryId"
ORDER BY
"score" DESC,
"${JMdictTableNames.sense}"."entryId" ASC
LIMIT ?
OFFSET ?
'''
.trim(),
[word, word, word, '%${word.replaceAll('%', '')}%', pageSize, offset],
);
return result
.map((row) => ScoredEntryId(row['entryId'] as int, row['score'] as int))
.toList();
}
Future<int> _queryEnglishCount(DatabaseExecutor connection, String word) async {
final result = await connection.rawQuery(
'''
SELECT
COUNT(DISTINCT "${JMdictTableNames.sense}"."entryId") AS "count"
FROM "${JMdictTableNames.senseGlossary}"
JOIN "${JMdictTableNames.sense}" USING ("senseId")
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?
'''
.trim(),
['%$word%'],
);
return result.first['count'] as int;
}
Future<List<ScoredEntryId>> fetchEntryIds(
DatabaseExecutor connection,
String word,
SearchMode searchMode,
int? pageSize,
int? offset,
) async {
if (searchMode == SearchMode.auto) {
searchMode = _determineSearchMode(word);
}
assert(word.isNotEmpty, 'Word should not be empty when fetching entry IDs');
late final List<ScoredEntryId> entryIds;
switch (searchMode) {
case SearchMode.kanji:
entryIds = await _queryKanji(connection, word, pageSize, offset);
break;
case SearchMode.kana:
entryIds = await _queryKana(connection, word, pageSize, offset);
break;
case SearchMode.english:
entryIds = await _queryEnglish(connection, word, pageSize, offset);
break;
case SearchMode.mixedKana:
case SearchMode.mixedKanji:
default:
throw UnimplementedError('Search mode $searchMode is not implemented');
}
return entryIds;
}
Future<int?> fetchEntryIdCount(
DatabaseExecutor connection,
String word,
SearchMode searchMode,
) async {
if (searchMode == SearchMode.auto) {
searchMode = _determineSearchMode(word);
}
assert(word.isNotEmpty, 'Word should not be empty when fetching entry IDs');
late final int? entryIdCount;
switch (searchMode) {
case SearchMode.kanji:
entryIdCount = await _queryKanjiCount(connection, word);
break;
case SearchMode.kana:
entryIdCount = await _queryKanaCount(connection, word);
break;
case SearchMode.english:
entryIdCount = await _queryEnglishCount(connection, word);
break;
case SearchMode.mixedKana:
case SearchMode.mixedKanji:
default:
throw UnimplementedError('Search mode $searchMode is not implemented');
}
return entryIdCount;
}