jmdict: don't store empty entry scores
All checks were successful
Build and test / build (push) Successful in 8m17s

This commit is contained in:
2026-04-13 18:16:19 +09:00
parent d1a6f39cca
commit 4faf543d6e
3 changed files with 31 additions and 16 deletions

View File

@@ -25,4 +25,5 @@ The `JMdict_EntryScore` table is used to store the score of each entry, which is
The table is automatically generated from other tables via triggers, and should be considered as a materialized view.
There is a score row for every single entry in both `JMdict_KanjiElement` and `JMdict_ReadingElement`, split by the `type` field.
<s>There is a score row for every single entry in both `JMdict_KanjiElement` and `JMdict_ReadingElement`, split by the `type` field.</s>
This is no longer true, we now only store the rows for which the score is not `0`. The `type` field is now also virtual, since the `elementId` fields for both kanji and readings are unique to each other.

View File

@@ -68,25 +68,25 @@ String _filterFTSSensitiveCharacters(String word) {
"$tableName"."entryId",
100
+ (("${tableName}FTS"."reading" = ?) * 10000)
+ "JMdict_EntryScore"."score"
+ (("$tableName"."orderNum" = 0) * 20)
+ COALESCE("JMdict_EntryScore"."score", 0)
AS "score"
FROM "${tableName}FTS"
JOIN "$tableName" USING ("elementId")
JOIN "JMdict_EntryScore" USING ("elementId")
LEFT JOIN "JMdict_EntryScore" USING ("elementId")
WHERE "${tableName}FTS"."reading" MATCH ? || '*'
AND "JMdict_EntryScore"."elementId" ${tableName == JMdictTableNames.kanjiElement ? '<' : '>='} 1000000000
),
non_fts_results AS (
SELECT DISTINCT
"$tableName"."entryId",
50
+ "JMdict_EntryScore"."score"
+ (("$tableName"."orderNum" = 0) * 20)
+ COALESCE("JMdict_EntryScore"."score", 0)
AS "score"
FROM "$tableName"
JOIN "JMdict_EntryScore" USING ("elementId")
LEFT JOIN "JMdict_EntryScore" USING ("elementId")
WHERE "reading" LIKE '%' || ? || '%'
AND "$tableName"."entryId" NOT IN (SELECT "entryId" FROM "fts_results")
AND "JMdict_EntryScore"."elementId" ${tableName == JMdictTableNames.kanjiElement ? '<' : '>='} 1000000000
)
SELECT ${countOnly ? 'COUNT(DISTINCT "entryId") AS count' : '"entryId", MAX("score") AS "score"'}
@@ -198,16 +198,16 @@ Future<List<ScoredEntryId>> _queryEnglish(
'''
SELECT
"${JMdictTableNames.sense}"."entryId",
MAX("JMdict_EntryScore"."score")
COALESCE(MAX("JMdict_EntryScore"."score"), 0)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 0) * 50)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 1) * 30)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1) * 20)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" > 1) * 20)
as "score"
FROM "${JMdictTableNames.senseGlossary}"
JOIN "${JMdictTableNames.sense}" USING ("senseId")
JOIN "JMdict_EntryScore" USING ("entryId")
LEFT JOIN "JMdict_EntryScore" USING ("entryId")
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?2
GROUP BY "JMdict_EntryScore"."entryId"
GROUP BY "${JMdictTableNames.sense}"."entryId"
ORDER BY
"score" DESC,
"${JMdictTableNames.sense}"."entryId" ASC
@@ -215,7 +215,7 @@ Future<List<ScoredEntryId>> _queryEnglish(
${offset != null ? 'OFFSET ?4' : ''}
'''
.trim(),
[word, '%${word.replaceAll('%', '')}%', if (pageSize != null) pageSize, if (offset != null) offset],
[word, '%${word.replaceAll('%', '')}%', ?pageSize, ?offset],
);
return result

View File

@@ -46,7 +46,7 @@ SELECT
+ (("spec" IS 2) * 5)
+ (("gai" IS 1) * 10)
+ (("gai" IS 2) * 5)
+ (("orderNum" IS 0) * 20)
-- + (("orderNum" IS 0) * 20)
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
AS "score"
FROM "JMdict_ReadingElement"
@@ -77,7 +77,7 @@ SELECT
+ (("spec" IS 2) * 5)
+ (("gai" IS 1) * 10)
+ (("gai" IS 2) * 5)
+ (("orderNum" IS 0) * 20)
-- + (("orderNum" IS 0) * 20)
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
AS "score"
FROM "JMdict_KanjiElement"
@@ -103,7 +103,8 @@ BEGIN
)
SELECT "elementId", "score", "common"
FROM "JMdict_EntryScoreView_Reading"
WHERE "elementId" = NEW."elementId";
WHERE "elementId" = NEW."elementId"
AND "score" > 0;
END;
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_ReadingElement"
@@ -116,6 +117,10 @@ BEGIN
"common" = "JMdict_EntryScoreView_Reading"."common"
FROM "JMdict_EntryScoreView_Reading"
WHERE "elementId" = NEW."elementId";
DELETE FROM "JMdict_EntryScore"
WHERE "elementId" = NEW."elementId"
AND "score" <= 0;
END;
CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_ReadingElement"
@@ -137,7 +142,8 @@ BEGIN
)
SELECT "elementId", "score", "common"
FROM "JMdict_EntryScoreView_Kanji"
WHERE "elementId" = NEW."elementId";
WHERE "elementId" = NEW."elementId"
AND "score" > 0;
END;
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_KanjiElement"
@@ -150,6 +156,10 @@ BEGIN
"common" = "JMdict_EntryScoreView_Kanji"."common"
FROM "JMdict_EntryScoreView_Kanji"
WHERE "elementId" = NEW."elementId";
DELETE FROM "JMdict_EntryScore"
WHERE "elementId" = NEW."elementId"
AND "score" <= 0;
END;
CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_KanjiElement"
@@ -199,4 +209,8 @@ BEGIN
WHERE "JMdict_EntryScoreView"."entryId" = OLD."entryId"
AND "JMdict_EntryScore"."entryId" = OLD."entryId"
AND "JMdict_EntryScoreView"."elementId" = "JMdict_EntryScore"."elementId";
DELETE FROM "JMdict_EntryScore"
WHERE "elementId" = OLD."elementId"
AND "score" <= 0;
END;