migrations/JMDict: add extra constraints, clean up pkeys, etc.

This commit is contained in:
2025-05-14 17:12:28 +02:00
parent 581f9daf25
commit 1661817819

View File

@@ -40,7 +40,7 @@ CREATE TABLE "JMdict_Entry" (
CREATE TABLE "JMdict_KanjiElement" (
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
"orderNum" INTEGER,
"orderNum" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
@@ -65,7 +65,7 @@ CREATE TABLE "JMdict_KanjiElementInfo" (
CREATE TABLE "JMdict_ReadingElement" (
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
"orderNum" INTEGER,
"orderNum" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE,
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
@@ -99,29 +99,29 @@ CREATE TABLE "JMdict_ReadingElementInfo" (
-- Sense
CREATE TABLE "JMdict_Sense" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
"entryId" INTEGER REFERENCES "JMdict_Entry"("id"),
"orderNum" INTEGER,
"id" INTEGER PRIMARY KEY,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
"orderNum" INTEGER NOT NULL,
UNIQUE("entryId", "orderNum")
);
CREATE INDEX "JMdict_Sense_byEntryId_byOrderNum" ON "JMdict_Sense"("entryId", "orderNum");
CREATE TABLE "JMdict_SenseRestrictedToKanji" (
"entryId" INTEGER,
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"kanji" TEXT,
"entryId" INTEGER NOT NULL,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"kanji" TEXT NOT NULL,
FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "senseId", "kanji")
);
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseRestrictedToReading" (
"entryId" INTEGER,
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"reading" TEXT,
"entryId" INTEGER NOT NULL,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"reading" TEXT NOT NULL,
FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "senseId", "reading")
);
) WITHOUT ROWID;
-- In order to add xrefs, you will need to have added the entry to xref to.
-- These should be added in a second pass of the dictionary file.
@@ -134,37 +134,34 @@ CREATE TABLE "JMdict_SenseRestrictedToReading" (
-- These two things also concern "SenseAntonym"
CREATE TABLE "JMdict_SenseSeeAlso" (
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"xrefEntryId" INTEGER,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"xrefEntryId" INTEGER NOT NULL,
"seeAlsoReading" TEXT,
"seeAlsoKanji" TEXT,
"seeAlsoSense" INTEGER,
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
"ambiguous" BOOLEAN,
CHECK ("seeAlsoReading" = NULL <> "seeAlsoKanji" = NULL),
-- CHECK("seeAlsoSense" = NULL OR "seeAlsoSense")
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
-- TODO: Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId.
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "seeAlsoSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
PRIMARY KEY ("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
UNIQUE("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
);
CREATE TABLE "JMdict_SenseAntonym" (
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"xrefEntryId" INTEGER,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"xrefEntryId" INTEGER NOT NULL,
"antonymReading" TEXT,
"antonymKanji" TEXT,
"antonymSense" INTEGER,
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
"ambiguous" BOOLEAN,
CHECK ("antonymReading" = NULL <> "antonymKanji" = NULL),
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "antonymSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
PRIMARY KEY ("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
UNIQUE("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
);
-- These cross references are going to be mostly accessed from a sense
@@ -198,7 +195,7 @@ CREATE TABLE "JMdict_SenseLanguageSource" (
"phrase" TEXT,
"fullyDescribesSense" BOOLEAN NOT NULL DEFAULT TRUE,
"constructedFromSmallerWords" BOOLEAN NOT NULL DEFAULT FALSE,
PRIMARY KEY ("senseId", "language", "phrase")
UNIQUE("senseId", "language", "phrase")
);
CREATE TABLE "JMdict_SenseDialect" (
@@ -232,8 +229,7 @@ CREATE TABLE "JMdict_SenseInfo" (
-- the Tanaka Corpus, so I will leave the type out for now.
CREATE TABLE "JMdict_ExampleSentence" (
"id" INTEGER PRIMARY KEY,
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"word" TEXT NOT NULL,
"source" TEXT NOT NULL,
"sourceLanguage" CHAR(3) NOT NULL DEFAULT "eng",