From 4faf543d6e063e2e28a81a179dcb30c9bd904d2f Mon Sep 17 00:00:00 2001 From: h7x4 Date: Mon, 13 Apr 2026 18:16:19 +0900 Subject: [PATCH] jmdict: don't store empty entry scores --- docs/database.md | 3 ++- lib/search/word_search/entry_id_query.dart | 22 +++++++++---------- .../0005_JMDict_search_index_tables.sql | 22 +++++++++++++++---- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/docs/database.md b/docs/database.md index 5f4047e..1cdab87 100644 --- a/docs/database.md +++ b/docs/database.md @@ -25,4 +25,5 @@ The `JMdict_EntryScore` table is used to store the score of each entry, which is The table is automatically generated from other tables via triggers, and should be considered as a materialized view. -There is a score row for every single entry in both `JMdict_KanjiElement` and `JMdict_ReadingElement`, split by the `type` field. +There is a score row for every single entry in both `JMdict_KanjiElement` and `JMdict_ReadingElement`, split by the `type` field. +This is no longer true, we now only store the rows for which the score is not `0`. The `type` field is now also virtual, since the `elementId` fields for both kanji and readings are unique to each other. diff --git a/lib/search/word_search/entry_id_query.dart b/lib/search/word_search/entry_id_query.dart index b2dc269..9b3ab86 100644 --- a/lib/search/word_search/entry_id_query.dart +++ b/lib/search/word_search/entry_id_query.dart @@ -68,25 +68,25 @@ String _filterFTSSensitiveCharacters(String word) { "$tableName"."entryId", 100 + (("${tableName}FTS"."reading" = ?) * 10000) - + "JMdict_EntryScore"."score" + + (("$tableName"."orderNum" = 0) * 20) + + COALESCE("JMdict_EntryScore"."score", 0) AS "score" FROM "${tableName}FTS" JOIN "$tableName" USING ("elementId") - JOIN "JMdict_EntryScore" USING ("elementId") + LEFT JOIN "JMdict_EntryScore" USING ("elementId") WHERE "${tableName}FTS"."reading" MATCH ? || '*' - AND "JMdict_EntryScore"."elementId" ${tableName == JMdictTableNames.kanjiElement ? '<' : '>='} 1000000000 ), non_fts_results AS ( SELECT DISTINCT "$tableName"."entryId", 50 - + "JMdict_EntryScore"."score" + + (("$tableName"."orderNum" = 0) * 20) + + COALESCE("JMdict_EntryScore"."score", 0) AS "score" FROM "$tableName" - JOIN "JMdict_EntryScore" USING ("elementId") + LEFT JOIN "JMdict_EntryScore" USING ("elementId") WHERE "reading" LIKE '%' || ? || '%' AND "$tableName"."entryId" NOT IN (SELECT "entryId" FROM "fts_results") - AND "JMdict_EntryScore"."elementId" ${tableName == JMdictTableNames.kanjiElement ? '<' : '>='} 1000000000 ) SELECT ${countOnly ? 'COUNT(DISTINCT "entryId") AS count' : '"entryId", MAX("score") AS "score"'} @@ -198,16 +198,16 @@ Future> _queryEnglish( ''' SELECT "${JMdictTableNames.sense}"."entryId", - MAX("JMdict_EntryScore"."score") + COALESCE(MAX("JMdict_EntryScore"."score"), 0) + (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 0) * 50) + (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 1) * 30) - + (("${JMdictTableNames.senseGlossary}"."phrase" = ?1) * 20) + + (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" > 1) * 20) as "score" FROM "${JMdictTableNames.senseGlossary}" JOIN "${JMdictTableNames.sense}" USING ("senseId") - JOIN "JMdict_EntryScore" USING ("entryId") + LEFT JOIN "JMdict_EntryScore" USING ("entryId") WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?2 - GROUP BY "JMdict_EntryScore"."entryId" + GROUP BY "${JMdictTableNames.sense}"."entryId" ORDER BY "score" DESC, "${JMdictTableNames.sense}"."entryId" ASC @@ -215,7 +215,7 @@ Future> _queryEnglish( ${offset != null ? 'OFFSET ?4' : ''} ''' .trim(), - [word, '%${word.replaceAll('%', '')}%', if (pageSize != null) pageSize, if (offset != null) offset], + [word, '%${word.replaceAll('%', '')}%', ?pageSize, ?offset], ); return result diff --git a/migrations/0005_JMDict_search_index_tables.sql b/migrations/0005_JMDict_search_index_tables.sql index 961badb..c88db1e 100644 --- a/migrations/0005_JMDict_search_index_tables.sql +++ b/migrations/0005_JMDict_search_index_tables.sql @@ -46,7 +46,7 @@ SELECT + (("spec" IS 2) * 5) + (("gai" IS 1) * 10) + (("gai" IS 2) * 5) - + (("orderNum" IS 0) * 20) + -- + (("orderNum" IS 0) * 20) - (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5) AS "score" FROM "JMdict_ReadingElement" @@ -77,7 +77,7 @@ SELECT + (("spec" IS 2) * 5) + (("gai" IS 1) * 10) + (("gai" IS 2) * 5) - + (("orderNum" IS 0) * 20) + -- + (("orderNum" IS 0) * 20) - (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5) AS "score" FROM "JMdict_KanjiElement" @@ -103,7 +103,8 @@ BEGIN ) SELECT "elementId", "score", "common" FROM "JMdict_EntryScoreView_Reading" - WHERE "elementId" = NEW."elementId"; + WHERE "elementId" = NEW."elementId" + AND "score" > 0; END; CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_ReadingElement" @@ -116,6 +117,10 @@ BEGIN "common" = "JMdict_EntryScoreView_Reading"."common" FROM "JMdict_EntryScoreView_Reading" WHERE "elementId" = NEW."elementId"; + + DELETE FROM "JMdict_EntryScore" + WHERE "elementId" = NEW."elementId" + AND "score" <= 0; END; CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_ReadingElement" @@ -137,7 +142,8 @@ BEGIN ) SELECT "elementId", "score", "common" FROM "JMdict_EntryScoreView_Kanji" - WHERE "elementId" = NEW."elementId"; + WHERE "elementId" = NEW."elementId" + AND "score" > 0; END; CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_KanjiElement" @@ -150,6 +156,10 @@ BEGIN "common" = "JMdict_EntryScoreView_Kanji"."common" FROM "JMdict_EntryScoreView_Kanji" WHERE "elementId" = NEW."elementId"; + + DELETE FROM "JMdict_EntryScore" + WHERE "elementId" = NEW."elementId" + AND "score" <= 0; END; CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_KanjiElement" @@ -199,4 +209,8 @@ BEGIN WHERE "JMdict_EntryScoreView"."entryId" = OLD."entryId" AND "JMdict_EntryScore"."entryId" = OLD."entryId" AND "JMdict_EntryScoreView"."elementId" = "JMdict_EntryScore"."elementId"; + + DELETE FROM "JMdict_EntryScore" + WHERE "elementId" = OLD."elementId" + AND "score" <= 0; END;