312 lines
8.5 KiB
Dart
312 lines
8.5 KiB
Dart
import 'package:jadb/search/word_search/word_search.dart';
|
|
import 'package:jadb/table_names/jmdict.dart';
|
|
import 'package:jadb/util/text_filtering.dart';
|
|
import 'package:sqflite_common/sqlite_api.dart';
|
|
|
|
class ScoredEntryId {
|
|
final int entryId;
|
|
final int score;
|
|
|
|
const ScoredEntryId(this.entryId, this.score);
|
|
}
|
|
|
|
SearchMode _determineSearchMode(String word) {
|
|
final bool containsKanji = kanjiRegex.hasMatch(word);
|
|
final bool containsAscii = RegExp(r'[A-Za-z]').hasMatch(word);
|
|
|
|
if (containsKanji && containsAscii) {
|
|
return SearchMode.mixedKanji;
|
|
} else if (containsKanji) {
|
|
return SearchMode.kanji;
|
|
} else if (containsAscii) {
|
|
return SearchMode.english;
|
|
} else if (word.contains(hiraganaRegex) || word.contains(katakanaRegex)) {
|
|
return SearchMode.kana;
|
|
} else {
|
|
return SearchMode.mixedKana;
|
|
}
|
|
}
|
|
|
|
/// FTS reacts to certain characters, so we should filter them out.
|
|
String _filterFTSSensitiveCharacters(String word) {
|
|
return word
|
|
.replaceAll('.', '')
|
|
.replaceAll('-', '')
|
|
.replaceAll('*', '')
|
|
.replaceAll('+', '')
|
|
.replaceAll('(', '')
|
|
.replaceAll(')', '')
|
|
.replaceAll('^', '')
|
|
.replaceAll('"', '');
|
|
}
|
|
|
|
(String, List<Object?>) _kanjiReadingTemplate(
|
|
String tableName,
|
|
String word, {
|
|
int? pageSize,
|
|
int? offset,
|
|
bool countOnly = false,
|
|
}) {
|
|
assert(
|
|
tableName == JMdictTableNames.kanjiElement ||
|
|
tableName == JMdictTableNames.readingElement,
|
|
);
|
|
assert(!countOnly || pageSize == null);
|
|
assert(!countOnly || offset == null);
|
|
assert(pageSize == null || pageSize > 0);
|
|
assert(offset == null || offset >= 0);
|
|
assert(
|
|
offset == null || pageSize != null,
|
|
'Offset should only be used with pageSize set',
|
|
);
|
|
|
|
return (
|
|
'''
|
|
WITH
|
|
fts_results AS (
|
|
SELECT DISTINCT
|
|
"$tableName"."entryId",
|
|
100
|
|
+ (("${tableName}FTS"."reading" = ?) * 10000)
|
|
+ "JMdict_EntryScore"."score"
|
|
AS "score"
|
|
FROM "${tableName}FTS"
|
|
JOIN "$tableName" USING ("elementId")
|
|
JOIN "JMdict_EntryScore" USING ("elementId")
|
|
WHERE "${tableName}FTS"."reading" MATCH ? || '*'
|
|
AND "JMdict_EntryScore"."type" = '${tableName == JMdictTableNames.kanjiElement ? 'k' : 'r'}'
|
|
),
|
|
non_fts_results AS (
|
|
SELECT DISTINCT
|
|
"$tableName"."entryId",
|
|
50
|
|
+ "JMdict_EntryScore"."score"
|
|
AS "score"
|
|
FROM "$tableName"
|
|
JOIN "JMdict_EntryScore" USING ("elementId")
|
|
WHERE "reading" LIKE '%' || ? || '%'
|
|
AND "$tableName"."entryId" NOT IN (SELECT "entryId" FROM "fts_results")
|
|
AND "JMdict_EntryScore"."type" = '${tableName == JMdictTableNames.kanjiElement ? 'k' : 'r'}'
|
|
)
|
|
|
|
SELECT ${countOnly ? 'COUNT(DISTINCT "entryId") AS count' : '"entryId", MAX("score") AS "score"'}
|
|
FROM (
|
|
SELECT * FROM "fts_results"
|
|
UNION
|
|
SELECT * FROM "non_fts_results"
|
|
)
|
|
${!countOnly ? 'GROUP BY "entryId"' : ''}
|
|
${!countOnly ? 'ORDER BY "score" DESC, "entryId" ASC' : ''}
|
|
${pageSize != null ? 'LIMIT ?' : ''}
|
|
${offset != null ? 'OFFSET ?' : ''}
|
|
'''
|
|
.trim(),
|
|
[
|
|
_filterFTSSensitiveCharacters(word),
|
|
_filterFTSSensitiveCharacters(word),
|
|
_filterFTSSensitiveCharacters(word),
|
|
?pageSize,
|
|
?offset,
|
|
],
|
|
);
|
|
}
|
|
|
|
Future<List<ScoredEntryId>> _queryKanji(
|
|
DatabaseExecutor connection,
|
|
String word,
|
|
int? pageSize,
|
|
int? offset,
|
|
) {
|
|
final (query, args) = _kanjiReadingTemplate(
|
|
JMdictTableNames.kanjiElement,
|
|
word,
|
|
pageSize: pageSize,
|
|
offset: offset,
|
|
);
|
|
return connection
|
|
.rawQuery(query, args)
|
|
.then(
|
|
(result) => result
|
|
.map(
|
|
(row) =>
|
|
ScoredEntryId(row['entryId'] as int, row['score'] as int),
|
|
)
|
|
.toList(),
|
|
);
|
|
}
|
|
|
|
Future<int> _queryKanjiCount(DatabaseExecutor connection, String word) {
|
|
final (query, args) = _kanjiReadingTemplate(
|
|
JMdictTableNames.kanjiElement,
|
|
word,
|
|
countOnly: true,
|
|
);
|
|
return connection
|
|
.rawQuery(query, args)
|
|
.then((result) => result.firstOrNull?['count'] as int? ?? 0);
|
|
}
|
|
|
|
Future<List<ScoredEntryId>> _queryKana(
|
|
DatabaseExecutor connection,
|
|
String word,
|
|
int? pageSize,
|
|
int? offset,
|
|
) {
|
|
final (query, args) = _kanjiReadingTemplate(
|
|
JMdictTableNames.readingElement,
|
|
word,
|
|
pageSize: pageSize,
|
|
offset: offset,
|
|
);
|
|
return connection
|
|
.rawQuery(query, args)
|
|
.then(
|
|
(result) => result
|
|
.map(
|
|
(row) =>
|
|
ScoredEntryId(row['entryId'] as int, row['score'] as int),
|
|
)
|
|
.toList(),
|
|
);
|
|
}
|
|
|
|
Future<int> _queryKanaCount(DatabaseExecutor connection, String word) {
|
|
final (query, args) = _kanjiReadingTemplate(
|
|
JMdictTableNames.readingElement,
|
|
word,
|
|
countOnly: true,
|
|
);
|
|
return connection
|
|
.rawQuery(query, args)
|
|
.then((result) => result.firstOrNull?['count'] as int? ?? 0);
|
|
}
|
|
|
|
Future<List<ScoredEntryId>> _queryEnglish(
|
|
DatabaseExecutor connection,
|
|
String word,
|
|
int? pageSize,
|
|
int? offset,
|
|
) async {
|
|
assert(pageSize == null || pageSize > 0);
|
|
assert(offset == null || offset >= 0);
|
|
assert(
|
|
offset == null || pageSize != null,
|
|
'Offset should only be used with pageSize set',
|
|
);
|
|
|
|
final result = await connection.rawQuery(
|
|
'''
|
|
SELECT
|
|
"${JMdictTableNames.sense}"."entryId",
|
|
MAX("JMdict_EntryScore"."score")
|
|
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ? AND "${JMdictTableNames.sense}"."orderNum" = 1) * 50)
|
|
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ? AND "${JMdictTableNames.sense}"."orderNum" = 2) * 30)
|
|
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?) * 20)
|
|
as "score"
|
|
FROM "${JMdictTableNames.senseGlossary}"
|
|
JOIN "${JMdictTableNames.sense}" USING ("senseId")
|
|
JOIN "JMdict_EntryScore" USING ("entryId")
|
|
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?
|
|
GROUP BY "JMdict_EntryScore"."entryId"
|
|
ORDER BY
|
|
"score" DESC,
|
|
"${JMdictTableNames.sense}"."entryId" ASC
|
|
LIMIT ?
|
|
OFFSET ?
|
|
'''
|
|
.trim(),
|
|
[word, word, word, '%${word.replaceAll('%', '')}%', pageSize, offset],
|
|
);
|
|
|
|
return result
|
|
.map((row) => ScoredEntryId(row['entryId'] as int, row['score'] as int))
|
|
.toList();
|
|
}
|
|
|
|
Future<int> _queryEnglishCount(DatabaseExecutor connection, String word) async {
|
|
final result = await connection.rawQuery(
|
|
'''
|
|
SELECT
|
|
COUNT(DISTINCT "${JMdictTableNames.sense}"."entryId") AS "count"
|
|
FROM "${JMdictTableNames.senseGlossary}"
|
|
JOIN "${JMdictTableNames.sense}" USING ("senseId")
|
|
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?
|
|
'''
|
|
.trim(),
|
|
['%$word%'],
|
|
);
|
|
|
|
return result.first['count'] as int;
|
|
}
|
|
|
|
Future<List<ScoredEntryId>> fetchEntryIds(
|
|
DatabaseExecutor connection,
|
|
String word,
|
|
SearchMode searchMode,
|
|
int? pageSize,
|
|
int? offset,
|
|
) async {
|
|
if (searchMode == SearchMode.auto) {
|
|
searchMode = _determineSearchMode(word);
|
|
}
|
|
|
|
assert(word.isNotEmpty, 'Word should not be empty when fetching entry IDs');
|
|
|
|
late final List<ScoredEntryId> entryIds;
|
|
switch (searchMode) {
|
|
case SearchMode.kanji:
|
|
entryIds = await _queryKanji(connection, word, pageSize, offset);
|
|
break;
|
|
|
|
case SearchMode.kana:
|
|
entryIds = await _queryKana(connection, word, pageSize, offset);
|
|
break;
|
|
|
|
case SearchMode.english:
|
|
entryIds = await _queryEnglish(connection, word, pageSize, offset);
|
|
break;
|
|
|
|
case SearchMode.mixedKana:
|
|
case SearchMode.mixedKanji:
|
|
default:
|
|
throw UnimplementedError('Search mode $searchMode is not implemented');
|
|
}
|
|
|
|
return entryIds;
|
|
}
|
|
|
|
Future<int?> fetchEntryIdCount(
|
|
DatabaseExecutor connection,
|
|
String word,
|
|
SearchMode searchMode,
|
|
) async {
|
|
if (searchMode == SearchMode.auto) {
|
|
searchMode = _determineSearchMode(word);
|
|
}
|
|
|
|
assert(word.isNotEmpty, 'Word should not be empty when fetching entry IDs');
|
|
|
|
late final int? entryIdCount;
|
|
|
|
switch (searchMode) {
|
|
case SearchMode.kanji:
|
|
entryIdCount = await _queryKanjiCount(connection, word);
|
|
break;
|
|
|
|
case SearchMode.kana:
|
|
entryIdCount = await _queryKanaCount(connection, word);
|
|
break;
|
|
|
|
case SearchMode.english:
|
|
entryIdCount = await _queryEnglishCount(connection, word);
|
|
break;
|
|
|
|
case SearchMode.mixedKana:
|
|
case SearchMode.mixedKanji:
|
|
default:
|
|
throw UnimplementedError('Search mode $searchMode is not implemented');
|
|
}
|
|
|
|
return entryIdCount;
|
|
}
|