migrations: precalculate word search score with triggers

This commit is contained in:
2025-05-22 16:57:06 +02:00
parent bfcb2bfc97
commit c208ef75f2
4 changed files with 243 additions and 44 deletions

View File

@@ -1,5 +1,4 @@
import 'package:jadb/_data_ingestion/jmdict/table_names.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/table_names.dart';
import 'package:jadb/search/word_search/word_search.dart';
import 'package:jadb/util/text_filtering.dart';
import 'package:sqflite_common/sqlite_api.dart';
@@ -41,13 +40,12 @@ SearchMode _determineSearchMode(String word) {
SELECT DISTINCT
"${tableName}FTS"."entryId",
100
+ "${tableName}"."baseScore"
+ (("${tableName}FTS"."reading" = ?) * 50)
- (substr(COALESCE("${TanosJLPTTableNames.jlptTag}"."jlptLevel", 'N0'), 2) * -5)
+ "${tableName}Score"."score"
AS "score"
FROM "${tableName}FTS"
LEFT JOIN "${TanosJLPTTableNames.jlptTag}" USING ("entryId")
JOIN "${tableName}" USING ("entryId", "reading")
JOIN "${tableName}Score" USING ("entryId", "reading")
WHERE "${tableName}FTS"."reading" MATCH ? || '*'
ORDER BY "score" DESC
${!countOnly ? 'LIMIT ?' : ''}
@@ -56,11 +54,10 @@ SearchMode _determineSearchMode(String word) {
SELECT DISTINCT
"entryId",
50
+ "${tableName}"."baseScore"
- (substr(COALESCE("${TanosJLPTTableNames.jlptTag}"."jlptLevel", 'N0'), 2) * -5)
+ "${tableName}Score"."score"
AS "score"
FROM "${tableName}"
LEFT JOIN "${TanosJLPTTableNames.jlptTag}" USING ("entryId")
JOIN "${tableName}Score" USING ("entryId", "reading")
WHERE "reading" LIKE '%' || ? || '%'
AND "entryId" NOT IN (SELECT "entryId" FROM "fts_results")
ORDER BY

View File

@@ -47,27 +47,12 @@ CREATE TABLE "JMdict_KanjiElement" (
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
"common" BOOLEAN NOT NULL AS (
"news" IS 1 OR "ichi" IS 1 OR "spec" IS 1 OR "gai" IS 1
),
"baseScore" INTEGER NOT NULL AS (
("common" * 50)
+ (("news" IS 1) * 10)
+ (("news" IS 2) * 5)
+ (("ichi" IS 1) * 10)
+ (("ichi" IS 2) * 5)
+ (("spec" IS 1) * 10)
+ (("spec" IS 2) * 5)
+ (("gai" IS 1) * 10)
+ (("gai" IS 2) * 5)
+ (("orderNum" IS 1) * 20)
) STORED,
PRIMARY KEY ("entryId", "reading"),
UNIQUE("entryId", "orderNum")
) WITHOUT ROWID;
CREATE INDEX "JMdict_KanjiElement_byEntryId_byOrderNum" ON "JMdict_KanjiElement"("entryId", "orderNum");
CREATE INDEX "JMdict_KanjiElement_byReading_byBaseScore" ON "JMdict_KanjiElement"("reading", "baseScore");
CREATE INDEX "JMdict_KanjiElement_byReading" ON "JMdict_KanjiElement"("reading");
CREATE TABLE "JMdict_KanjiElementInfo" (
"entryId" INTEGER NOT NULL,
@@ -90,27 +75,12 @@ CREATE TABLE "JMdict_ReadingElement" (
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
"common" BOOLEAN NOT NULL AS (
"news" IS 1 OR "ichi" IS 1 OR "spec" IS 1 OR "gai" IS 1
),
"baseScore" INTEGER NOT NULL AS (
("common" * 50)
+ (("news" IS 1) * 10)
+ (("news" IS 2) * 5)
+ (("ichi" IS 1) * 10)
+ (("ichi" IS 2) * 5)
+ (("spec" IS 1) * 10)
+ (("spec" IS 2) * 5)
+ (("gai" IS 1) * 10)
+ (("gai" IS 2) * 5)
+ (("orderNum" IS 1) * 20)
) STORED,
PRIMARY KEY ("entryId", "reading"),
UNIQUE("entryId", "orderNum")
) WITHOUT ROWID;
CREATE INDEX "JMdict_ReadingElement_byEntryId_byOrderNum" ON "JMdict_ReadingElement"("entryId", "orderNum");
CREATE INDEX "JMdict_ReadingElement_byReading_byBaseScore" ON "JMdict_ReadingElement"("reading", "baseScore");
CREATE INDEX "JMdict_ReadingElement_byReading" ON "JMdict_ReadingElement"("reading");
CREATE TABLE "JMdict_ReadingElementRestriction" (
"entryId" INTEGER NOT NULL,

View File

@@ -0,0 +1,229 @@
CREATE TABLE JMdict_ReadingElementScore (
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"score" INTEGER NOT NULL DEFAULT 0,
"common" BOOLEAN NOT NULL DEFAULT FALSE,
PRIMARY KEY (entryId, reading)
) WITHOUT ROWID;
CREATE INDEX "JMdict_ReadingElementScore_byEntryId_byReading_byScore" ON "JMdict_ReadingElementScore"("entryId", "reading", "score");
CREATE INDEX "JMdict_ReadingElementScore_byScore" ON "JMdict_ReadingElementScore"("score");
CREATE VIEW "JMdict_ReadingElementScoreView" AS
SELECT
"JMdict_ReadingElement"."entryId",
"JMdict_ReadingElement"."reading",
(
"news" IS 1
OR "ichi" IS 1
OR "spec" IS 1
OR "gai" IS 1
)
AS "common",
("common" * 50)
+ (("news" IS 1) * 10)
+ (("news" IS 2) * 5)
+ (("ichi" IS 1) * 10)
+ (("ichi" IS 2) * 5)
+ (("spec" IS 1) * 10)
+ (("spec" IS 2) * 5)
+ (("gai" IS 1) * 10)
+ (("gai" IS 2) * 5)
+ (("orderNum" IS 1) * 20)
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
AS "score"
FROM "JMdict_ReadingElement"
LEFT JOIN "JMdict_JLPTTag" USING ("entryId");
-- JMdict_ReadingElement triggers
CREATE TRIGGER "JMdict_ReadingElementScore_Insert"
AFTER INSERT ON "JMdict_ReadingElement"
BEGIN
INSERT INTO "JMdict_ReadingElementScore" (
"entryId",
"reading",
"score",
"common"
)
SELECT "entryId", "reading", "score", "common"
FROM "JMdict_ReadingElementScoreView"
WHERE "entryId" = NEW."entryId" AND "reading" = NEW."reading";
END;
CREATE TRIGGER "JMdict_ReadingElementScore_Update_JMdict_ReadingElement"
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum"
ON "JMdict_ReadingElement"
BEGIN
UPDATE "JMdict_ReadingElementScore"
SET
"score" = "JMdict_ReadingElementScoreView"."score",
"common" = "JMdict_ReadingElementScoreView"."common"
FROM "JMdict_ReadingElementScoreView"
WHERE "entryId" = NEW."entryId" AND "reading" = NEW."reading";
END;
CREATE TRIGGER "JMdict_ReadingElementScore_Delete_JMdict_ReadingElement"
AFTER DELETE ON "JMdict_ReadingElement"
BEGIN
DELETE FROM "JMdict_ReadingElementScore"
WHERE "entryId" = OLD."entryId" AND "reading" = OLD."reading";
END;
-- JMdict_JLPTTag triggers
CREATE TRIGGER "JMdict_ReadingElementScore_Insert_JMdict_JLPTTag"
AFTER INSERT ON "JMdict_JLPTTag"
BEGIN
UPDATE "JMdict_ReadingElementScore"
SET
"score" = "JMdict_ReadingElementScoreView"."score",
"common" = "JMdict_ReadingElementScoreView"."common"
FROM "JMdict_ReadingElementScoreView"
WHERE "JMdict_ReadingElementScoreView"."entryId" = NEW."entryId"
AND "JMdict_ReadingElementScoreView"."entryId" = "JMdict_ReadingElementScore"."entryId"
AND "JMdict_ReadingElementScoreView"."reading" = "JMdict_ReadingElementScore"."reading";
END;
CREATE TRIGGER "JMdict_ReadingElementScore_Update_JMdict_JLPTTag"
AFTER UPDATE OF "jlptLevel"
ON "JMdict_JLPTTag"
BEGIN
UPDATE "JMdict_ReadingElementScore"
SET
"score" = "JMdict_ReadingElementScoreView"."score",
"common" = "JMdict_ReadingElementScoreView"."common"
FROM "JMdict_ReadingElementScoreView"
WHERE "JMdict_ReadingElementScoreView"."entryId" = NEW."entryId"
AND "JMdict_ReadingElementScoreView"."entryId" = "JMdict_ReadingElementScore"."entryId"
AND "JMdict_ReadingElementScoreView"."reading" = "JMdict_ReadingElementScore"."reading";
END;
CREATE TRIGGER "JMdict_ReadingElementScore_Delete_JMdict_JLPTTag"
AFTER DELETE ON "JMdict_JLPTTag"
BEGIN
UPDATE "JMdict_ReadingElementScore"
SET
"score" = "JMdict_ReadingElementScoreView"."score",
"common" = "JMdict_ReadingElementScoreView"."common"
FROM "JMdict_ReadingElementScoreView"
WHERE "JMdict_ReadingElementScoreView"."entryId" = NEW."entryId"
AND "JMdict_ReadingElementScoreView"."entryId" = "JMdict_ReadingElementScore"."entryId"
AND "JMdict_ReadingElementScoreView"."reading" = "JMdict_ReadingElementScore"."reading";
END;
---
CREATE TABLE JMdict_KanjiElementScore (
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"score" INTEGER NOT NULL DEFAULT 0,
"common" BOOLEAN NOT NULL DEFAULT FALSE,
PRIMARY KEY (entryId, reading)
) WITHOUT ROWID;
CREATE INDEX "JMdict_KanjiElementScore_byEntryId_byReading_byScore" ON "JMdict_KanjiElementScore"("entryId", "reading", "score");
CREATE INDEX "JMdict_KanjiElementScore_byScore" ON "JMdict_KanjiElementScore"("score");
CREATE VIEW "JMdict_KanjiElementScoreView" AS
SELECT
"JMdict_KanjiElement"."entryId",
"JMdict_KanjiElement"."reading",
(
"news" IS 1
OR "ichi" IS 1
OR "spec" IS 1
OR "gai" IS 1
)
AS "common",
("common" * 50)
+ (("news" IS 1) * 10)
+ (("news" IS 2) * 5)
+ (("ichi" IS 1) * 10)
+ (("ichi" IS 2) * 5)
+ (("spec" IS 1) * 10)
+ (("spec" IS 2) * 5)
+ (("gai" IS 1) * 10)
+ (("gai" IS 2) * 5)
+ (("orderNum" IS 1) * 20)
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
AS "score"
FROM "JMdict_KanjiElement"
LEFT JOIN "JMdict_JLPTTag" USING ("entryId");
-- JMdict_KanjiElement triggers
CREATE TRIGGER "JMdict_KanjiElementScore_Insert"
AFTER INSERT ON "JMdict_KanjiElement"
BEGIN
INSERT INTO "JMdict_KanjiElementScore" (
"entryId",
"reading",
"score",
"common"
)
SELECT "entryId", "reading", "score", "common"
FROM "JMdict_KanjiElementScoreView"
WHERE "entryId" = NEW."entryId" AND "reading" = NEW."reading";
END;
CREATE TRIGGER "JMdict_KanjiElementScore_Update_JMdict_KanjiElement"
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum"
ON "JMdict_KanjiElement"
BEGIN
UPDATE "JMdict_KanjiElementScore"
SET
"score" = "JMdict_KanjiElementScoreView"."score",
"common" = "JMdict_KanjiElementScoreView"."common"
FROM "JMdict_KanjiElementScoreView"
WHERE "entryId" = NEW."entryId" AND "reading" = NEW."reading";
END;
CREATE TRIGGER "JMdict_KanjiElementScore_Delete_JMdict_KanjiElement"
AFTER DELETE ON "JMdict_KanjiElement"
BEGIN
DELETE FROM "JMdict_KanjiElementScore"
WHERE "entryId" = OLD."entryId" AND "reading" = OLD."reading";
END;
-- JMdict_JLPTTag triggers
CREATE TRIGGER "JMdict_KanjiElementScore_Insert_JMdict_JLPTTag"
AFTER INSERT ON "JMdict_JLPTTag"
BEGIN
UPDATE "JMdict_KanjiElementScore"
SET
"score" = "JMdict_KanjiElementScoreView"."score",
"common" = "JMdict_KanjiElementScoreView"."common"
FROM "JMdict_KanjiElementScoreView"
WHERE "JMdict_KanjiElementScoreView"."entryId" = NEW."entryId"
AND "JMdict_KanjiElementScoreView"."entryId" = "JMdict_KanjiElementScore"."entryId"
AND "JMdict_KanjiElementScoreView"."reading" = "JMdict_KanjiElementScore"."reading";
END;
CREATE TRIGGER "JMdict_KanjiElementScore_Update_JMdict_JLPTTag"
AFTER UPDATE OF "jlptLevel"
ON "JMdict_JLPTTag"
BEGIN
UPDATE "JMdict_KanjiElementScore"
SET
"score" = "JMdict_KanjiElementScoreView"."score",
"common" = "JMdict_KanjiElementScoreView"."common"
FROM "JMdict_KanjiElementScoreView"
WHERE "JMdict_KanjiElementScoreView"."entryId" = NEW."entryId"
AND "JMdict_KanjiElementScoreView"."entryId" = "JMdict_KanjiElementScore"."entryId"
AND "JMdict_KanjiElementScoreView"."reading" = "JMdict_KanjiElementScore"."reading";
END;
CREATE TRIGGER "JMdict_KanjiElementScore_Delete_JMdict_JLPTTag"
AFTER DELETE ON "JMdict_JLPTTag"
BEGIN
UPDATE "JMdict_KanjiElementScore"
SET
"score" = "JMdict_KanjiElementScoreView"."score",
"common" = "JMdict_KanjiElementScoreView"."common"
FROM "JMdict_KanjiElementScoreView"
WHERE "JMdict_KanjiElementScoreView"."entryId" = NEW."entryId"
AND "JMdict_KanjiElementScoreView"."entryId" = "JMdict_KanjiElementScore"."entryId"
AND "JMdict_KanjiElementScoreView"."reading" = "JMdict_KanjiElementScore"."reading";
END;

View File

@@ -42,10 +42,10 @@ LEFT JOIN "JMdict_ReadingElement" USING("entryId");
CREATE VIEW "JMdict_EntryCommon"("entryId")
AS
SELECT DISTINCT "entryId"
FROM "JMdict_KanjiElement"
FULL OUTER JOIN "JMdict_ReadingElement" USING("entryId")
WHERE "JMdict_ReadingElement"."common" = 1
OR "JMdict_KanjiElement"."common" = 1;
FROM "JMdict_KanjiElementScore"
FULL OUTER JOIN "JMdict_ReadingElementScore" USING("entryId")
WHERE "JMdict_ReadingElementScore"."common" = 1
OR "JMdict_KanjiElementScore"."common" = 1;
-- TODO: Make it possible to match words that contain the
@@ -64,4 +64,7 @@ JOIN "JMdict_KanjiElementFTS"
JOIN "JMdict_KanjiElement"
ON "JMdict_KanjiElementFTS"."entryId" = "JMdict_KanjiElement"."entryId"
AND "JMdict_KanjiElementFTS"."reading" LIKE '%' || "JMdict_KanjiElement"."reading"
WHERE "JMdict_KanjiElement"."common";
JOIN "JMdict_KanjiElementScore"
ON "JMdict_KanjiElement"."entryId" = "JMdict_KanjiElementScore"."entryId"
AND "JMdict_KanjiElement"."reading" = "JMdict_KanjiElementScore"."reading"
WHERE "JMdict_KanjiElementScore"."common";