444 lines
14 KiB
SQL
444 lines
14 KiB
SQL
-- TODO: figure out ondelete functions...
|
|
|
|
------------
|
|
-- JMdict --
|
|
------------
|
|
|
|
CREATE TABLE "JMdict_InfoDialect" (
|
|
"id" VARCHAR(4) PRIMARY KEY NOT NULL,
|
|
"description" TEXT NOT NULL
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "JMdict_InfoField" (
|
|
"id" VARCHAR(7) PRIMARY KEY NOT NULL,
|
|
"description" TEXT NOT NULL
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "JMdict_InfoKanji" (
|
|
"id" VARCHAR(5) PRIMARY KEY NOT NULL,
|
|
"description" TEXT NOT NULL
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "JMdict_InfoMisc" (
|
|
"id" VARCHAR(12) PRIMARY KEY NOT NULL,
|
|
"description" TEXT NOT NULL
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "JMdict_InfoPOS" (
|
|
"id" VARCHAR(9) PRIMARY KEY NOT NULL,
|
|
"description" TEXT NOT NULL
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "JMdict_InfoReading" (
|
|
"id" VARCHAR(5) PRIMARY KEY NOT NULL,
|
|
"description" TEXT NOT NULL
|
|
) WITHOUT ROWID;
|
|
|
|
-- The XML specification says that an entry needs to have at least
|
|
-- one sense and one reading. I will just assume this is the case, and
|
|
-- not implement a check for it.
|
|
|
|
CREATE TABLE "JMdict_Entry" (
|
|
"id" INTEGER PRIMARY KEY
|
|
);
|
|
|
|
-- KanjiElement
|
|
|
|
CREATE TABLE "JMdict_KanjiElement" (
|
|
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
|
|
"reading" TEXT NOT NULL,
|
|
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
|
|
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
|
|
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
|
|
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
|
|
"nf" INTEGER,
|
|
PRIMARY KEY ("entryId", "reading")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "JMdict_KanjiElementInfo" (
|
|
"entryId" INTEGER NOT NULL,
|
|
"reading" TEXT NOT NULL,
|
|
"info" TEXT NOT NULL REFERENCES "JMdict_InfoKanji"("id"),
|
|
FOREIGN KEY ("entryId", "reading")
|
|
REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
|
|
PRIMARY KEY ("entryId", "reading", "info")
|
|
) WITHOUT ROWID;
|
|
|
|
-- ReadingElement
|
|
|
|
CREATE TABLE "JMdict_ReadingElement" (
|
|
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
|
|
"reading" TEXT NOT NULL,
|
|
"readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE,
|
|
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
|
|
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
|
|
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
|
|
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
|
|
"nf" INTEGER,
|
|
PRIMARY KEY ("entryId", "reading")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "JMdict_ReadingElementRestriction" (
|
|
"entryId" INTEGER NOT NULL,
|
|
"reading" TEXT NOT NULL,
|
|
"restriction" TEXT NOT NULL,
|
|
FOREIGN KEY ("entryId", "reading")
|
|
REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
|
PRIMARY KEY ("entryId", "reading", "restriction")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "JMdict_ReadingElementInfo" (
|
|
"entryId" INTEGER NOT NULL,
|
|
"reading" TEXT NOT NULL,
|
|
"info" TEXT NOT NULL REFERENCES "JMdict_InfoReading"("id"),
|
|
FOREIGN KEY ("entryId", "reading")
|
|
REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
|
PRIMARY KEY ("entryId", "reading", "info")
|
|
) WITHOUT ROWID;
|
|
|
|
-- Sense
|
|
|
|
-- Optimal solution here would be to have an id INTEGER AUTOINCREMENT,
|
|
-- and the entryId as a composite key, since the entryId is used below.
|
|
-- However, autoincrementing composite keys are not available in sqlite
|
|
|
|
CREATE TABLE "JMdict_Sense" (
|
|
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
"entryId" INTEGER REFERENCES "JMdict_Entry"("id")
|
|
);
|
|
|
|
CREATE TABLE "JMdict_SenseRestrictedToKanji" (
|
|
"entryId" INTEGER,
|
|
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
|
|
"kanji" TEXT,
|
|
FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"),
|
|
PRIMARY KEY ("entryId", "senseId", "kanji")
|
|
);
|
|
|
|
CREATE TABLE "JMdict_SenseRestrictedToReading" (
|
|
"entryId" INTEGER,
|
|
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
|
|
"reading" TEXT,
|
|
FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
|
PRIMARY KEY ("entryId", "senseId", "reading")
|
|
);
|
|
|
|
-- In order to add xrefs, you will need to have added the entry to xref to.
|
|
-- These should be added in a second pass of the dictionary file.
|
|
|
|
-- In this version of JMdict, the xrefs can be ambiguous.
|
|
-- There has been rumours of a nonambiguous version possibly arriving in the future
|
|
-- (https://www.edrdg.org/jmdict_edict_list/2019/msg00360.html)
|
|
-- but for time being, this need to be modeled as a one to many relationship.
|
|
|
|
-- These two things also concern "SenseAntonym"
|
|
|
|
CREATE TABLE "JMdict_SenseSeeAlso" (
|
|
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
|
|
"xrefEntryId" INTEGER,
|
|
"seeAlsoReading" TEXT,
|
|
"seeAlsoKanji" TEXT,
|
|
"seeAlsoSense" TEXT REFERENCES "JMdict_Sense"("id"),
|
|
CHECK ("seeAlsoReading" = NULL <> "seeAlsoKanji" = NULL),
|
|
-- CHECK("seeAlsoSense" = NULL OR "seeAlsoSense")
|
|
-- Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId.
|
|
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"),
|
|
FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
|
PRIMARY KEY ("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
|
|
);
|
|
|
|
CREATE TABLE "JMdict_SenseAntonym" (
|
|
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
|
|
"xrefEntryId" INTEGER,
|
|
"antonymReading" TEXT,
|
|
"antonymKanji" TEXT,
|
|
"antonymSense" TEXT REFERENCES "JMdict_Sense"("id"),
|
|
CHECK ("antonymReading" = NULL <> "antonymKanji" = NULL),
|
|
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"),
|
|
FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
|
PRIMARY KEY ("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
|
|
);
|
|
|
|
-- These cross references are going to be mostly accessed from a sense
|
|
-- This will speed up the join.
|
|
CREATE INDEX "JMdict_SenseSeeAlso_bySenseId" ON "JMdict_SenseSeeAlso"("senseId");
|
|
CREATE INDEX "JMdict_SenseAntonym_bySenseId" ON "JMdict_SenseAntonym"("senseId");
|
|
|
|
CREATE TABLE "JMdict_SensePOS" (
|
|
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
|
|
"pos" TEXT NOT NULL REFERENCES "JMdict_InfoPOS"("id"),
|
|
PRIMARY KEY ("senseId", "pos")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "JMdict_SenseField" (
|
|
"senseId" INTEGER NOT NULL,
|
|
"field" TEXT NOT NULL,
|
|
FOREIGN KEY ("senseId") REFERENCES "JMdict_Sense"("id"),
|
|
FOREIGN KEY ("field") REFERENCES "JMdict_InfoField"("id"),
|
|
PRIMARY KEY ("senseId", "field")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "JMdict_SenseMisc" (
|
|
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
|
|
"misc" TEXT NOT NULL REFERENCES "JMdict_InfoMisc"("id"),
|
|
PRIMARY KEY ("senseId", "misc")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "JMdict_SenseLanguageSource" (
|
|
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
|
|
"language" CHAR(3) NOT NULL DEFAULT "eng",
|
|
"phrase" TEXT,
|
|
"fullyDescribesSense" BOOLEAN NOT NULL DEFAULT TRUE,
|
|
"constructedFromSmallerWords" BOOLEAN NOT NULL DEFAULT FALSE,
|
|
PRIMARY KEY ("senseId", "language", "phrase")
|
|
);
|
|
|
|
CREATE TABLE "JMdict_SenseDialect" (
|
|
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
|
|
"dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("dialect"),
|
|
PRIMARY KEY ("senseId", "dialect")
|
|
) WITHOUT ROWID;
|
|
|
|
-- In the documentation, it says that the glossary can contain
|
|
-- special prioritized entries, but I can't find a single one of those.
|
|
-- Neither can I find a glossary tag with g_gend data, so these parts
|
|
-- will be omitted.
|
|
|
|
CREATE TABLE "JMdict_SenseGlossary" (
|
|
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
|
|
"phrase" TEXT NOT NULL,
|
|
"language" CHAR(3) NOT NULL DEFAULT "eng",
|
|
"type" TEXT,
|
|
PRIMARY KEY ("senseId", "language", "phrase")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "JMdict_SenseInfo" (
|
|
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
|
|
"info" TEXT NOT NULL,
|
|
PRIMARY KEY ("senseId", "info")
|
|
) WITHOUT ROWID;
|
|
|
|
-- There is not a single example sentence that doesn't come from
|
|
-- the Tanaka Corpus, so I will leave the type out for now.
|
|
|
|
CREATE TABLE "JMdict_ExampleSentence" (
|
|
"id" INTEGER PRIMARY KEY,
|
|
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
|
|
"word" TEXT NOT NULL,
|
|
"source" TEXT NOT NULL,
|
|
"sourceLanguage" CHAR(3) NOT NULL DEFAULT "eng",
|
|
"japanese" TEXT NOT NULL
|
|
-- "type" TEXT NOT NULL DEFAULT "tat",
|
|
);
|
|
|
|
-- These tables are for optimizing searches.
|
|
|
|
-- In order to include results from both, the software should
|
|
-- first check if the searchword is convertible to kana, and then
|
|
-- potentially get results from both by doing a union between two
|
|
-- selects.
|
|
|
|
CREATE TABLE "JMdict_EntryByKana" (
|
|
"kana" TEXT NOT NULL,
|
|
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
|
|
PRIMARY KEY ("kana", "entryId")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE INDEX "JMdict_EntryByKana_byKana" ON "JMdict_EntryByKana"("kana");
|
|
|
|
CREATE TABLE "JMdict_EntryByEnglish" (
|
|
"english" TEXT NOT NULL,
|
|
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
|
|
PRIMARY KEY ("english", "entryId")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE INDEX "JMdict_EntryByEnglish_byEnglish" ON "JMdict_EntryByEnglish"("english");
|
|
|
|
--------------
|
|
-- RADKFILE --
|
|
--------------
|
|
|
|
CREATE TABLE "RADKFILE" (
|
|
"kanji" CHAR(1) NOT NULL,
|
|
"radical" CHAR(1) NOT NULL,
|
|
PRIMARY KEY ("kanji", "radical")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE INDEX "RADK" ON "RADKFILE"("radical");
|
|
CREATE INDEX "KRAD" ON "RADKFILE"("kanji");
|
|
|
|
CREATE VIEW "RADKFILE_Radicals" AS
|
|
SELECT DISTINCT "radical" FROM "RADKFILE";
|
|
|
|
--------------
|
|
-- KANJIDIC --
|
|
--------------
|
|
|
|
CREATE TABLE "KANJIDIC_Character" (
|
|
"literal" CHAR(1) NOT NULL PRIMARY KEY,
|
|
"grade" INTEGER CHECK ("grade" BETWEEN 1 AND 10),
|
|
"strokeCount" INTEGER NOT NULL,
|
|
"frequency" INTEGER,
|
|
"jlpt" INTEGER
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "KANJIDIC_RadicalName" (
|
|
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
|
"name" TEXT NOT NULL,
|
|
PRIMARY KEY("kanji", "name")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "KANJIDIC_Codepoint" (
|
|
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
|
"type" VARCHAR(6) NOT NULL CHECK ("type" IN ('jis208', 'jis212', 'jis213', 'ucs')),
|
|
"codepoint" VARCHAR(7) NOT NULL,
|
|
PRIMARY KEY ("kanji", "type")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE INDEX "KANJIDIC_Codepoint_byCharacter" ON "KANJIDIC_Codepoint"("kanji");
|
|
|
|
CREATE TABLE "KANJIDIC_Radical" (
|
|
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
|
"type" VARCHAR(9) NOT NULL CHECK ("type" IN ('classical', 'nelson_c')),
|
|
"radical" INTEGER NOT NULL CHECK ("radical" BETWEEN 1 AND IIF("type" = 'classical', 214, 212)),
|
|
PRIMARY KEY("kanji", "type")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "KANJIDIC_StrokeMiscount" (
|
|
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
|
"strokeCount" INTEGER NOT NULL,
|
|
PRIMARY KEY("kanji", "strokeCount")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "KANJIDIC_Variant" (
|
|
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
|
"variant" TEXT NOT NULL,
|
|
"type" VARCHAR(8) NOT NULL CHECK (
|
|
"type" IN (
|
|
'jis208',
|
|
'jis212',
|
|
'jis213',
|
|
'deroo',
|
|
'njecd',
|
|
's_h',
|
|
'nelson_c',
|
|
'oneill',
|
|
'ucs'
|
|
)
|
|
),
|
|
PRIMARY KEY ("kanji", "type", "variant")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "_KANJIDIC_DictionaryReference_Part1" (
|
|
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
|
"ref" VARCHAR(9) NOT NULL,
|
|
"type" VARCHAR(16) NOT NULL CHECK(
|
|
"type" IN (
|
|
'nelson_c',
|
|
'nelson_n',
|
|
'halpern_njecd',
|
|
'halpern_kkd',
|
|
'halpern_kkld',
|
|
'halpern_kkld_2ed',
|
|
'heisig',
|
|
'heisig6',
|
|
'gakken',
|
|
'oneill_names',
|
|
'oneill_kk',
|
|
'henshall',
|
|
'sh_kk',
|
|
'sh_kk2',
|
|
'sakade',
|
|
'jf_cards',
|
|
'henshall3',
|
|
'tutt_cards',
|
|
'crowley',
|
|
'kanji_in_context',
|
|
'busy_people',
|
|
'kodansha_compact',
|
|
'maniette'
|
|
)
|
|
),
|
|
PRIMARY KEY("kanji", "type")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "_KANJIDIC_DictionaryReference_Moro" (
|
|
"kanji" CHAR(1) NOT NULL PRIMARY KEY REFERENCES "KANJIDIC_Character"("literal"),
|
|
"ref" VARCHAR(7) NOT NULL,
|
|
"volume" INTEGER,
|
|
"page" INTEGER
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE INDEX "KANJIDIC_DictionaryReference_byPart1" ON "_KANJIDIC_DictionaryReference_Part1"("kanji", "ref", "type");
|
|
CREATE INDEX "KANJIDIC_DictionaryReference_byMoro" ON "_KANJIDIC_DictionaryReference_Moro"("kanji", "ref", "volume", "page");
|
|
|
|
CREATE VIEW "KANJIDIC_DictionaryReference" AS
|
|
SELECT "kanji", "ref", "type", NULL AS "volume", NULL AS "page" FROM "_KANJIDIC_DictionaryReference_Part1"
|
|
UNION
|
|
SELECT "kanji", "ref", 'moro' AS "type", "volume", "page" FROM "_KANJIDIC_DictionaryReference_Moro";
|
|
|
|
CREATE TABLE "KANJIDIC_QueryCode" (
|
|
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
|
"code" VARCHAR(7) NOT NULL,
|
|
"type" VARCHAR(11) NOT NULL CHECK ("type" IN ('skip', 'sh_desc', 'four_corner', 'deroo', 'misclass')),
|
|
"SKIPMisclassification" VARCHAR(15),
|
|
PRIMARY KEY ("kanji", "type", "code")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE TABLE "KANJIDIC_Reading" (
|
|
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
|
"type" VARCHAR(8) NOT NULL CHECK ("type" IN ('korean_h', 'korean_r', 'pinyin')),
|
|
"reading" TEXT NOT NULL,
|
|
PRIMARY KEY ("kanji", "type", "reading")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE INDEX "KANJIDIC_Reading_byReading" ON "KANJIDIC_Reading"("reading");
|
|
|
|
CREATE TABLE "KANJIDIC_Kunyomi" (
|
|
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
|
"yomi" TEXT NOT NULL,
|
|
"isJouyou" BOOLEAN,
|
|
PRIMARY KEY ("kanji", "yomi")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE INDEX "KANJIDIC_Kunyomi_byYomi" ON "KANJIDIC_Kunyomi"("yomi");
|
|
|
|
CREATE TABLE "KANJIDIC_Onyomi" (
|
|
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
|
"yomi" TEXT NOT NULL,
|
|
"type" VARCHAR(7) CHECK ("type" IN ('kan', 'go', 'tou', 'kan''you')),
|
|
"isJouyou" BOOLEAN,
|
|
PRIMARY KEY ("kanji", "yomi")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE INDEX "KANJIDIC_Onyomi_byYomi" ON "KANJIDIC_Onyomi"("yomi");
|
|
|
|
CREATE TABLE "KANJIDIC_Meaning" (
|
|
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
|
"language" CHAR(3) NOT NULL DEFAULT "eng",
|
|
"meaning" TEXT NOT NULL,
|
|
PRIMARY KEY ("kanji", "language", "meaning")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE INDEX "KANJIDIC_Meaning_byMeaning" ON "KANJIDIC_Meaning"("meaning");
|
|
|
|
CREATE TABLE "KANJIDIC_Nanori" (
|
|
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
|
"nanori" TEXT NOT NULL,
|
|
PRIMARY KEY ("kanji", "nanori")
|
|
) WITHOUT ROWID;
|
|
|
|
CREATE INDEX "KANJIDIC_Nanori_byNanori" ON "KANJIDIC_Nanori"("nanori");
|
|
|
|
-------------------------
|
|
-- Interdict relations --
|
|
-------------------------
|
|
|
|
-- Radk - kanjidic
|
|
-- kanjireading -> filter kanji regex - kanjidic
|
|
-- index kanji search by romaji
|
|
-- index kanji search by hiragana
|
|
-- index word search by romaji
|
|
-- index word search by hiragana
|
|
|