Files
jadb/migrations/0001_JMDict.sql

253 lines
9.1 KiB
SQL

CREATE TABLE "JMdict_InfoDialect" (
"id" VARCHAR(4) PRIMARY KEY NOT NULL,
"description" TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE "JMdict_InfoField" (
"id" VARCHAR(7) PRIMARY KEY NOT NULL,
"description" TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE "JMdict_InfoKanji" (
"id" VARCHAR(5) PRIMARY KEY NOT NULL,
"description" TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE "JMdict_InfoMisc" (
"id" VARCHAR(12) PRIMARY KEY NOT NULL,
"description" TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE "JMdict_InfoPOS" (
"id" VARCHAR(9) PRIMARY KEY NOT NULL,
"description" TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE "JMdict_InfoReading" (
"id" VARCHAR(5) PRIMARY KEY NOT NULL,
"description" TEXT NOT NULL
) WITHOUT ROWID;
-- The XML specification says that an entry needs to have at least
-- one sense and one reading. I will just assume this is the case, and
-- not implement a check for it.
CREATE TABLE "JMdict_Entry" (
"id" INTEGER PRIMARY KEY
);
-- KanjiElement
CREATE TABLE "JMdict_KanjiElement" (
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
"orderNum" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
PRIMARY KEY ("entryId", "reading")
) WITHOUT ROWID;
CREATE INDEX "JMdict_KanjiElement_byReading" ON "JMdict_KanjiElement"("reading");
CREATE TABLE "JMdict_KanjiElementInfo" (
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"info" TEXT NOT NULL REFERENCES "JMdict_InfoKanji"("id"),
FOREIGN KEY ("entryId", "reading")
REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "reading", "info")
) WITHOUT ROWID;
-- ReadingElement
CREATE TABLE "JMdict_ReadingElement" (
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
"orderNum" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE,
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
PRIMARY KEY ("entryId", "reading")
) WITHOUT ROWID;
CREATE INDEX "JMdict_ReadingElement_byReading" ON "JMdict_ReadingElement"("reading");
CREATE TABLE "JMdict_ReadingElementRestriction" (
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"restriction" TEXT NOT NULL,
FOREIGN KEY ("entryId", "reading")
REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "reading", "restriction")
) WITHOUT ROWID;
CREATE TABLE "JMdict_ReadingElementInfo" (
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"info" TEXT NOT NULL REFERENCES "JMdict_InfoReading"("id"),
FOREIGN KEY ("entryId", "reading")
REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "reading", "info")
) WITHOUT ROWID;
-- Sense
CREATE TABLE "JMdict_Sense" (
"id" INTEGER PRIMARY KEY,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
"orderNum" INTEGER NOT NULL,
UNIQUE("entryId", "orderNum")
);
CREATE INDEX "JMdict_Sense_byEntryId_byOrderNum" ON "JMdict_Sense"("entryId", "orderNum");
CREATE TABLE "JMdict_SenseRestrictedToKanji" (
"entryId" INTEGER NOT NULL,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"kanji" TEXT NOT NULL,
FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "senseId", "kanji")
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseRestrictedToReading" (
"entryId" INTEGER NOT NULL,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"reading" TEXT NOT NULL,
FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "senseId", "reading")
) WITHOUT ROWID;
-- In order to add xrefs, you will need to have added the entry to xref to.
-- These should be added in a second pass of the dictionary file.
-- In this version of JMdict, the xrefs can be ambiguous.
-- There has been rumours of a nonambiguous version possibly arriving in the future
-- (https://www.edrdg.org/jmdict_edict_list/2019/msg00360.html)
-- but for time being, this need to be modeled as a one to many relationship.
-- These two things also concern "SenseAntonym"
CREATE TABLE "JMdict_SenseSeeAlso" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"xrefEntryId" INTEGER NOT NULL,
"seeAlsoReading" TEXT,
"seeAlsoKanji" TEXT,
"seeAlsoSense" INTEGER,
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
-- TODO: Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId.
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "seeAlsoSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
UNIQUE("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
);
CREATE TABLE "JMdict_SenseAntonym" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"xrefEntryId" INTEGER NOT NULL,
"antonymReading" TEXT,
"antonymKanji" TEXT,
"antonymSense" INTEGER,
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "antonymSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
UNIQUE("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
);
-- These cross references are going to be mostly accessed from a sense
-- This will speed up the join.
CREATE INDEX "JMdict_SenseSeeAlso_bySenseId" ON "JMdict_SenseSeeAlso"("senseId");
CREATE INDEX "JMdict_SenseAntonym_bySenseId" ON "JMdict_SenseAntonym"("senseId");
CREATE TABLE "JMdict_SensePOS" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"pos" TEXT NOT NULL REFERENCES "JMdict_InfoPOS"("id"),
PRIMARY KEY ("senseId", "pos")
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseField" (
"senseId" INTEGER NOT NULL,
"field" TEXT NOT NULL,
FOREIGN KEY ("senseId") REFERENCES "JMdict_Sense"("id"),
FOREIGN KEY ("field") REFERENCES "JMdict_InfoField"("id"),
PRIMARY KEY ("senseId", "field")
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseMisc" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"misc" TEXT NOT NULL REFERENCES "JMdict_InfoMisc"("id"),
PRIMARY KEY ("senseId", "misc")
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseLanguageSource" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"language" CHAR(3) NOT NULL DEFAULT "eng",
"phrase" TEXT,
"fullyDescribesSense" BOOLEAN NOT NULL DEFAULT TRUE,
"constructedFromSmallerWords" BOOLEAN NOT NULL DEFAULT FALSE,
UNIQUE("senseId", "language", "phrase")
);
CREATE TABLE "JMdict_SenseDialect" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("id"),
PRIMARY KEY ("senseId", "dialect")
) WITHOUT ROWID;
-- In the documentation, it says that the glossary can contain
-- special prioritized entries, but I can't find a single one of those.
-- Neither can I find a glossary tag with g_gend data, so these parts
-- will be omitted.
CREATE TABLE "JMdict_SenseGlossary" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"phrase" TEXT NOT NULL,
"language" CHAR(3) NOT NULL DEFAULT "eng",
"type" TEXT,
PRIMARY KEY ("senseId", "language", "phrase")
) WITHOUT ROWID;
CREATE INDEX "JMdict_SenseGlossary_byPhrase" ON JMdict_SenseGlossary("phrase");
CREATE TABLE "JMdict_SenseInfo" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"info" TEXT NOT NULL,
PRIMARY KEY ("senseId", "info")
) WITHOUT ROWID;
-- There is not a single example sentence that doesn't come from
-- the Tanaka Corpus, so I will leave the type out for now.
CREATE TABLE "JMdict_ExampleSentence" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"word" TEXT NOT NULL,
"source" TEXT NOT NULL,
"sourceLanguage" CHAR(3) NOT NULL DEFAULT "eng",
"japanese" TEXT NOT NULL
-- "type" TEXT NOT NULL DEFAULT "tat",
);
CREATE VIEW "JMdict_EntryByKana"("kana", "entryId")
AS
SELECT
"JMdict_ReadingElement"."reading" AS "kana",
"JMdict_ReadingElement"."entryId" AS "entryId"
FROM "JMdict_ReadingElement";
CREATE VIEW "JMdict_EntryByEnglish"("english", "entryId")
AS
SELECT
"JMdict_SenseGlossary"."phrase" AS "english",
"JMdict_Sense"."senseId" AS "entryId"
FROM "JMdict_SenseGlossary" JOIN "JMdict_Sense" USING("senseId");