-- TODO: figure out ondelete functions... ------------ -- JMdict -- ------------ CREATE TABLE "JMdict_InfoDialect" ( "id" VARCHAR(4) PRIMARY KEY NOT NULL, "description" TEXT NOT NULL ) WITHOUT ROWID; CREATE TABLE "JMdict_InfoField" ( "id" VARCHAR(7) PRIMARY KEY NOT NULL, "description" TEXT NOT NULL ) WITHOUT ROWID; CREATE TABLE "JMdict_InfoKanji" ( "id" VARCHAR(5) PRIMARY KEY NOT NULL, "description" TEXT NOT NULL ) WITHOUT ROWID; CREATE TABLE "JMdict_InfoMisc" ( "id" VARCHAR(12) PRIMARY KEY NOT NULL, "description" TEXT NOT NULL ) WITHOUT ROWID; CREATE TABLE "JMdict_InfoPOS" ( "id" VARCHAR(9) PRIMARY KEY NOT NULL, "description" TEXT NOT NULL ) WITHOUT ROWID; CREATE TABLE "JMdict_InfoReading" ( "id" VARCHAR(5) PRIMARY KEY NOT NULL, "description" TEXT NOT NULL ) WITHOUT ROWID; -- The XML specification says that an entry needs to have at least -- one sense and one reading. I will just assume this is the case, and -- not implement a check for it. CREATE TABLE "JMdict_Entry" ( "id" INTEGER PRIMARY KEY ); -- KanjiElement CREATE TABLE "JMdict_KanjiElement" ( "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"), "reading" TEXT NOT NULL, "news" INTEGER CHECK ("news" BETWEEN 1 AND 2), "ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2), "spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2), "gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2), "nf" INTEGER, PRIMARY KEY ("entryId", "reading") ) WITHOUT ROWID; CREATE TABLE "JMdict_KanjiElementInfo" ( "entryId" INTEGER NOT NULL, "reading" TEXT NOT NULL, "info" TEXT NOT NULL REFERENCES "JMdict_InfoKanji"("id"), FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_KanjiElement"("entryId", "reading"), PRIMARY KEY ("entryId", "reading", "info") ) WITHOUT ROWID; -- ReadingElement CREATE TABLE "JMdict_ReadingElement" ( "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"), "reading" TEXT NOT NULL, "readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE, "news" INTEGER CHECK ("news" BETWEEN 1 AND 2), "ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2), "spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2), "gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2), "nf" INTEGER, PRIMARY KEY ("entryId", "reading") ) WITHOUT ROWID; CREATE TABLE "JMdict_ReadingElementRestriction" ( "entryId" INTEGER NOT NULL, "reading" TEXT NOT NULL, "restriction" TEXT NOT NULL, FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), PRIMARY KEY ("entryId", "reading", "restriction") ) WITHOUT ROWID; CREATE TABLE "JMdict_ReadingElementInfo" ( "entryId" INTEGER NOT NULL, "reading" TEXT NOT NULL, "info" TEXT NOT NULL REFERENCES "JMdict_InfoReading"("id"), FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), PRIMARY KEY ("entryId", "reading", "info") ) WITHOUT ROWID; -- Sense -- Optimal solution here would be to have an id INTEGER AUTOINCREMENT, -- and the entryId as a composite key, since the entryId is used below. -- However, autoincrementing composite keys are not available in sqlite CREATE TABLE "JMdict_Sense" ( "id" INTEGER PRIMARY KEY AUTOINCREMENT, "entryId" INTEGER REFERENCES "JMdict_Entry"("id") ); CREATE TABLE "JMdict_SenseRestrictedToKanji" ( "entryId" INTEGER, "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), "kanji" TEXT, FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"), PRIMARY KEY ("entryId", "senseId", "kanji") ); CREATE TABLE "JMdict_SenseRestrictedToReading" ( "entryId" INTEGER, "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), "reading" TEXT, FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), PRIMARY KEY ("entryId", "senseId", "reading") ); -- In order to add xrefs, you will need to have added the entry to xref to. -- These should be added in a second pass of the dictionary file. -- In this version of JMdict, the xrefs can be ambiguous. -- There has been rumours of a nonambiguous version possibly arriving in the future -- (https://www.edrdg.org/jmdict_edict_list/2019/msg00360.html) -- but for time being, this need to be modeled as a one to many relationship. -- These two things also concern "SenseAntonym" CREATE TABLE "JMdict_SenseSeeAlso" ( "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), "xrefEntryId" INTEGER, "seeAlsoReading" TEXT, "seeAlsoKanji" TEXT, "seeAlsoSense" TEXT REFERENCES "JMdict_Sense"("id"), CHECK ("seeAlsoReading" = NULL <> "seeAlsoKanji" = NULL), -- CHECK("seeAlsoSense" = NULL OR "seeAlsoSense") -- Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId. FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"), FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), PRIMARY KEY ("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense") ); CREATE TABLE "JMdict_SenseAntonym" ( "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), "xrefEntryId" INTEGER, "antonymReading" TEXT, "antonymKanji" TEXT, "antonymSense" TEXT REFERENCES "JMdict_Sense"("id"), CHECK ("antonymReading" = NULL <> "antonymKanji" = NULL), FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"), FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), PRIMARY KEY ("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense") ); -- These cross references are going to be mostly accessed from a sense -- This will speed up the join. CREATE INDEX "JMdict_SenseSeeAlso_bySenseId" ON "JMdict_SenseSeeAlso"("senseId"); CREATE INDEX "JMdict_SenseAntonym_bySenseId" ON "JMdict_SenseAntonym"("senseId"); CREATE TABLE "JMdict_SensePOS" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), "pos" TEXT NOT NULL REFERENCES "JMdict_InfoPOS"("id"), PRIMARY KEY ("senseId", "pos") ) WITHOUT ROWID; CREATE TABLE "JMdict_SenseField" ( "senseId" INTEGER NOT NULL, "field" TEXT NOT NULL, FOREIGN KEY ("senseId") REFERENCES "JMdict_Sense"("id"), FOREIGN KEY ("field") REFERENCES "JMdict_InfoField"("id"), PRIMARY KEY ("senseId", "field") ) WITHOUT ROWID; CREATE TABLE "JMdict_SenseMisc" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), "misc" TEXT NOT NULL REFERENCES "JMdict_InfoMisc"("id"), PRIMARY KEY ("senseId", "misc") ) WITHOUT ROWID; CREATE TABLE "JMdict_SenseLanguageSource" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), "language" CHAR(3) NOT NULL DEFAULT "eng", "phrase" TEXT, "fullyDescribesSense" BOOLEAN NOT NULL DEFAULT TRUE, "constructedFromSmallerWords" BOOLEAN NOT NULL DEFAULT FALSE, PRIMARY KEY ("senseId", "language", "phrase") ); CREATE TABLE "JMdict_SenseDialect" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), "dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("dialect"), PRIMARY KEY ("senseId", "dialect") ) WITHOUT ROWID; -- In the documentation, it says that the glossary can contain -- special prioritized entries, but I can't find a single one of those. -- Neither can I find a glossary tag with g_gend data, so these parts -- will be omitted. CREATE TABLE "JMdict_SenseGlossary" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), "phrase" TEXT NOT NULL, "language" CHAR(3) NOT NULL DEFAULT "eng", "type" TEXT, PRIMARY KEY ("senseId", "language", "phrase") ) WITHOUT ROWID; CREATE TABLE "JMdict_SenseInfo" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), "info" TEXT NOT NULL, PRIMARY KEY ("senseId", "info") ) WITHOUT ROWID; -- There is not a single example sentence that doesn't come from -- the Tanaka Corpus, so I will leave the type out for now. CREATE TABLE "JMdict_ExampleSentence" ( "id" INTEGER PRIMARY KEY, "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), "word" TEXT NOT NULL, "source" TEXT NOT NULL, "sourceLanguage" CHAR(3) NOT NULL DEFAULT "eng", "japanese" TEXT NOT NULL -- "type" TEXT NOT NULL DEFAULT "tat", ); -- These tables are for optimizing searches. -- In order to include results from both, the software should -- first check if the searchword is convertible to kana, and then -- potentially get results from both by doing a union between two -- selects. CREATE TABLE "JMdict_EntryByKana" ( "kana" TEXT NOT NULL, "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"), PRIMARY KEY ("kana", "entryId") ) WITHOUT ROWID; CREATE INDEX "JMdict_EntryByKana_byKana" ON "JMdict_EntryByKana"("kana"); CREATE TABLE "JMdict_EntryByEnglish" ( "english" TEXT NOT NULL, "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"), PRIMARY KEY ("english", "entryId") ) WITHOUT ROWID; CREATE INDEX "JMdict_EntryByEnglish_byEnglish" ON "JMdict_EntryByEnglish"("english"); -------------- -- RADKFILE -- -------------- CREATE TABLE "RADKFILE" ( "kanji" CHAR(1) NOT NULL, "radical" CHAR(1) NOT NULL, PRIMARY KEY ("kanji", "radical") ) WITHOUT ROWID; CREATE INDEX "RADK" ON "RADKFILE"("radical"); CREATE INDEX "KRAD" ON "RADKFILE"("kanji"); CREATE VIEW "RADKFILE_Radicals" AS SELECT DISTINCT "radical" FROM "RADKFILE"; -------------- -- KANJIDIC -- -------------- CREATE TABLE "KANJIDIC_Character" ( "literal" CHAR(1) NOT NULL PRIMARY KEY, "grade" INTEGER CHECK ("grade" BETWEEN 1 AND 10), "strokeCount" INTEGER NOT NULL, "frequency" INTEGER, "jlpt" INTEGER ) WITHOUT ROWID; CREATE TABLE "KANJIDIC_RadicalName" ( "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), "name" TEXT NOT NULL, PRIMARY KEY("kanji", "name") ) WITHOUT ROWID; CREATE TABLE "KANJIDIC_Codepoint" ( "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), "type" VARCHAR(6) NOT NULL CHECK ("type" IN ('jis208', 'jis212', 'jis213', 'ucs')), "codepoint" VARCHAR(7) NOT NULL, PRIMARY KEY ("kanji", "type") ) WITHOUT ROWID; CREATE INDEX "KANJIDIC_Codepoint_byCharacter" ON "KANJIDIC_Codepoint"("kanji"); CREATE TABLE "KANJIDIC_Radical" ( "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), "type" VARCHAR(9) NOT NULL CHECK ("type" IN ('classical', 'nelson_c')), "radical" INTEGER NOT NULL CHECK ("radical" BETWEEN 1 AND IIF("type" = 'classical', 214, 212)), PRIMARY KEY("kanji", "type") ) WITHOUT ROWID; CREATE TABLE "KANJIDIC_StrokeMiscount" ( "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), "strokeCount" INTEGER NOT NULL, PRIMARY KEY("kanji", "strokeCount") ) WITHOUT ROWID; CREATE TABLE "KANJIDIC_Variant" ( "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), "variant" TEXT NOT NULL, "type" VARCHAR(8) NOT NULL CHECK ( "type" IN ( 'jis208', 'jis212', 'jis213', 'deroo', 'njecd', 's_h', 'nelson_c', 'oneill', 'ucs' ) ), PRIMARY KEY ("kanji", "type", "variant") ) WITHOUT ROWID; CREATE TABLE "_KANJIDIC_DictionaryReference_Part1" ( "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), "ref" VARCHAR(9) NOT NULL, "type" VARCHAR(16) NOT NULL CHECK( "type" IN ( 'nelson_c', 'nelson_n', 'halpern_njecd', 'halpern_kkd', 'halpern_kkld', 'halpern_kkld_2ed', 'heisig', 'heisig6', 'gakken', 'oneill_names', 'oneill_kk', 'henshall', 'sh_kk', 'sh_kk2', 'sakade', 'jf_cards', 'henshall3', 'tutt_cards', 'crowley', 'kanji_in_context', 'busy_people', 'kodansha_compact', 'maniette' ) ), PRIMARY KEY("kanji", "type") ) WITHOUT ROWID; CREATE TABLE "_KANJIDIC_DictionaryReference_Moro" ( "kanji" CHAR(1) NOT NULL PRIMARY KEY REFERENCES "KANJIDIC_Character"("literal"), "ref" VARCHAR(7) NOT NULL, "volume" INTEGER, "page" INTEGER ) WITHOUT ROWID; CREATE INDEX "KANJIDIC_DictionaryReference_byPart1" ON "_KANJIDIC_DictionaryReference_Part1"("kanji", "ref", "type"); CREATE INDEX "KANJIDIC_DictionaryReference_byMoro" ON "_KANJIDIC_DictionaryReference_Moro"("kanji", "ref", "volume", "page"); CREATE VIEW "KANJIDIC_DictionaryReference" AS SELECT "kanji", "ref", "type", NULL AS "volume", NULL AS "page" FROM "_KANJIDIC_DictionaryReference_Part1" UNION SELECT "kanji", "ref", 'moro' AS "type", "volume", "page" FROM "_KANJIDIC_DictionaryReference_Moro"; CREATE TABLE "KANJIDIC_QueryCode" ( "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), "code" VARCHAR(7) NOT NULL, "type" VARCHAR(11) NOT NULL CHECK ("type" IN ('skip', 'sh_desc', 'four_corner', 'deroo', 'misclass')), "SKIPMisclassification" VARCHAR(15), PRIMARY KEY ("kanji", "type", "code") ) WITHOUT ROWID; CREATE TABLE "KANJIDIC_Reading" ( "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), "type" VARCHAR(8) NOT NULL CHECK ("type" IN ('korean_h', 'korean_r', 'pinyin')), "reading" TEXT NOT NULL, PRIMARY KEY ("kanji", "type", "reading") ) WITHOUT ROWID; CREATE INDEX "KANJIDIC_Reading_byReading" ON "KANJIDIC_Reading"("reading"); CREATE TABLE "KANJIDIC_Kunyomi" ( "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), "yomi" TEXT NOT NULL, "isJouyou" BOOLEAN, PRIMARY KEY ("kanji", "yomi") ) WITHOUT ROWID; CREATE INDEX "KANJIDIC_Kunyomi_byYomi" ON "KANJIDIC_Kunyomi"("yomi"); CREATE TABLE "KANJIDIC_Onyomi" ( "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), "yomi" TEXT NOT NULL, "type" VARCHAR(7) CHECK ("type" IN ('kan', 'go', 'tou', 'kan''you')), "isJouyou" BOOLEAN, PRIMARY KEY ("kanji", "yomi") ) WITHOUT ROWID; CREATE INDEX "KANJIDIC_Onyomi_byYomi" ON "KANJIDIC_Onyomi"("yomi"); CREATE TABLE "KANJIDIC_Meaning" ( "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), "language" CHAR(3) NOT NULL DEFAULT "eng", "meaning" TEXT NOT NULL, PRIMARY KEY ("kanji", "language", "meaning") ) WITHOUT ROWID; CREATE INDEX "KANJIDIC_Meaning_byMeaning" ON "KANJIDIC_Meaning"("meaning"); CREATE TABLE "KANJIDIC_Nanori" ( "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), "nanori" TEXT NOT NULL, PRIMARY KEY ("kanji", "nanori") ) WITHOUT ROWID; CREATE INDEX "KANJIDIC_Nanori_byNanori" ON "KANJIDIC_Nanori"("nanori"); ------------------------- -- Interdict relations -- ------------------------- -- Radk - kanjidic -- kanjireading -> filter kanji regex - kanjidic -- index kanji search by romaji -- index kanji search by hiragana -- index word search by romaji -- index word search by hiragana