From b6661c734f52ffde057e511a27e2e8fe79108140 Mon Sep 17 00:00:00 2001 From: h7x4 Date: Fri, 16 May 2025 23:50:01 +0200 Subject: [PATCH] lib/search/word_search: add word count search --- lib/cli/commands/query_word.dart | 12 ++- lib/search.dart | 10 +- lib/search/word_search/entry_id_query.dart | 118 ++++++++++++++++----- lib/search/word_search/word_search.dart | 18 ++++ 4 files changed, 127 insertions(+), 31 deletions(-) diff --git a/lib/cli/commands/query_word.dart b/lib/cli/commands/query_word.dart index d019ab3..f9709d1 100644 --- a/lib/cli/commands/query_word.dart +++ b/lib/cli/commands/query_word.dart @@ -27,10 +27,16 @@ class QueryWord extends Command { libsqlitePath: argResults!.option('libsqlite')!, ); + final String searchWord = 'かな'; + final time = Stopwatch()..start(); - final result = await JaDBConnection(db).searchWord('かな'); + final count = await JaDBConnection(db).searchWordCount(searchWord); time.stop(); + final time2 = Stopwatch()..start(); + final result = await JaDBConnection(db).searchWord(searchWord); + time2.stop(); + if (result == null) { print("Invalid search"); } else if (result.isEmpty) { @@ -42,6 +48,8 @@ class QueryWord extends Command { } } - print("Query took ${time.elapsedMilliseconds}ms"); + print("Total count: ${count}"); + print("Count query took ${time.elapsedMilliseconds}ms"); + print("Query took ${time2.elapsedMilliseconds}ms"); } } diff --git a/lib/search.dart b/lib/search.dart index 73112e6..5d4490f 100644 --- a/lib/search.dart +++ b/lib/search.dart @@ -12,14 +12,18 @@ class JaDBConnection { const JaDBConnection(this._connection); - Future searchKanji(String kanji) async => + Future searchKanji(String kanji) => searchKanjiWithDbConnection(this._connection, kanji); Future searchKanjiByRadicals( - List radicals) async { + List radicals, + ) async { throw UnimplementedError(); } - Future?> searchWord(String word) async => + Future?> searchWord(String word) => searchWordWithDbConnection(this._connection, word); + + Future searchWordCount(String word) => + searchWordCountWithDbConnection(this._connection, word); } diff --git a/lib/search/word_search/entry_id_query.dart b/lib/search/word_search/entry_id_query.dart index 3a926bb..2340aaa 100644 --- a/lib/search/word_search/entry_id_query.dart +++ b/lib/search/word_search/entry_id_query.dart @@ -30,14 +30,15 @@ SearchMode _determineSearchMode(String word) { (String, List) _kanjiReadingTemplate( String tableName, - String word, - int pageSize, -) => + String word, { + int pageSize = 10, + bool countOnly = false, +}) => ( ''' WITH fts_results AS ( - SELECT + SELECT DISTINCT "${tableName}FTS"."entryId", 100 + "${tableName}"."baseScore" @@ -49,34 +50,39 @@ SearchMode _determineSearchMode(String word) { JOIN "${tableName}" USING ("entryId", "reading") WHERE "${tableName}FTS"."reading" MATCH ? || '*' ORDER BY "score" DESC - LIMIT ? + ${!countOnly ? 'LIMIT ?' : ''} + ), + non_fts_results AS ( + SELECT DISTINCT + "entryId", + 50 + + "${tableName}"."baseScore" + - (substr(COALESCE("${TanosJLPTTableNames.jlptTag}"."jlptLevel", 'N0'), 2) * -5) + AS "score" + FROM "${tableName}" + LEFT JOIN "${TanosJLPTTableNames.jlptTag}" USING ("entryId") + WHERE "reading" LIKE '%' || ? || '%' + AND "entryId" NOT IN (SELECT "entryId" FROM "fts_results") + ORDER BY + "score" DESC, + "entryId" ASC + ${!countOnly ? 'LIMIT ?' : ''} ) - SELECT * - FROM "fts_results" - UNION ALL - SELECT - "entryId", - 50 - + "${tableName}"."baseScore" - - (substr(COALESCE("${TanosJLPTTableNames.jlptTag}"."jlptLevel", 'N0'), 2) * -5) - AS "score" - FROM "${tableName}" - LEFT JOIN "${TanosJLPTTableNames.jlptTag}" USING ("entryId") - WHERE "reading" LIKE '%' || ? || '%' - AND "entryId" NOT IN (SELECT "entryId" FROM "fts_results") - ORDER BY - "score" DESC, - "entryId" ASC - LIMIT ? + ${countOnly ? 'SELECT COUNT("entryId") AS count' : 'SELECT "entryId", "score"'} + FROM ( + SELECT * FROM fts_results + UNION ALL + SELECT * FROM non_fts_results + ) ''' .trim(), [ word, word, - pageSize, + if (!countOnly) pageSize, word, - pageSize, + if (!countOnly) pageSize, ] ); @@ -102,7 +108,7 @@ Future> fetchEntryIds( final (query, args) = _kanjiReadingTemplate( JMdictTableNames.kanjiElement, word, - pageSize, + pageSize: pageSize, ); entryIds = (await connection.rawQuery(query, args)) .map((row) => ScoredEntryId( @@ -116,7 +122,7 @@ Future> fetchEntryIds( final (query, args) = _kanjiReadingTemplate( JMdictTableNames.readingElement, word, - pageSize, + pageSize: pageSize, ); entryIds = (await connection.rawQuery(query, args)) .map((row) => ScoredEntryId( @@ -153,3 +159,63 @@ Future> fetchEntryIds( return entryIds; } + +Future fetchEntryIdCount( + DatabaseExecutor connection, + String word, + SearchMode searchMode, +) async { + if (searchMode == SearchMode.Auto) { + searchMode = _determineSearchMode(word); + } + + assert( + word.isNotEmpty, + 'Word should not be empty when fetching entry IDs', + ); + + late final int? entryIdCount; + + switch (searchMode) { + case SearchMode.Kanji: + final (query, args) = _kanjiReadingTemplate( + JMdictTableNames.kanjiElement, + word, + pageSize: 1, + countOnly: true, + ); + entryIdCount = (await connection.rawQuery(query, args)) + .firstOrNull?['count'] as int?; + break; + + case SearchMode.Kana: + final (query, args) = _kanjiReadingTemplate( + JMdictTableNames.readingElement, + word, + pageSize: 1, + countOnly: true, + ); + entryIdCount = (await connection.rawQuery(query, args)) + .firstOrNull?['count'] as int?; + break; + + case SearchMode.English: + entryIdCount = (await connection.query( + JMdictTableNames.senseGlossary, + columns: ['COUNT(DISTINCT entryId)'], + where: 'english LIKE ?', + whereArgs: ['%$word%'], + )) + .firstOrNull?['COUNT(DISTINCT entryId)'] as int?; + break; + + case SearchMode.MixedKana: + case SearchMode.MixedKanji: + default: + throw UnimplementedError( + 'Search mode $searchMode is not implemented', + ); + } + + return entryIdCount; +} diff --git a/lib/search/word_search/word_search.dart b/lib/search/word_search/word_search.dart index 647388f..087b19f 100644 --- a/lib/search/word_search/word_search.dart +++ b/lib/search/word_search/word_search.dart @@ -77,3 +77,21 @@ Future?> searchWordWithDbConnection( return result; } + +Future searchWordCountWithDbConnection( + DatabaseExecutor connection, + String word, { + SearchMode searchMode = SearchMode.Auto, +}) async { + if (word.isEmpty) { + return null; + } + + final int? entryIdCount = await fetchEntryIdCount( + connection, + word, + searchMode, + ); + + return entryIdCount; +}