CREATE TABLE "JMdict_InfoDialect" ( "id" VARCHAR(4) PRIMARY KEY NOT NULL, "description" TEXT NOT NULL ) WITHOUT ROWID; CREATE TABLE "JMdict_InfoField" ( "id" VARCHAR(7) PRIMARY KEY NOT NULL, "description" TEXT NOT NULL ) WITHOUT ROWID; CREATE TABLE "JMdict_InfoKanji" ( "id" VARCHAR(5) PRIMARY KEY NOT NULL, "description" TEXT NOT NULL ) WITHOUT ROWID; CREATE TABLE "JMdict_InfoMisc" ( "id" VARCHAR(12) PRIMARY KEY NOT NULL, "description" TEXT NOT NULL ) WITHOUT ROWID; CREATE TABLE "JMdict_InfoPOS" ( "id" VARCHAR(9) PRIMARY KEY NOT NULL, "description" TEXT NOT NULL ) WITHOUT ROWID; CREATE TABLE "JMdict_InfoReading" ( "id" VARCHAR(5) PRIMARY KEY NOT NULL, "description" TEXT NOT NULL ) WITHOUT ROWID; -- The XML specification says that an entry needs to have at least -- one sense and one reading. I will just assume this is the case, and -- not implement a check for it. CREATE TABLE "JMdict_Entry" ( "entryId" INTEGER PRIMARY KEY ); -- KanjiElement CREATE TABLE "JMdict_KanjiElement" ( "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"), "orderNum" INTEGER NOT NULL, "reading" TEXT NOT NULL, "news" INTEGER CHECK ("news" BETWEEN 1 AND 2), "ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2), "spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2), "gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2), "nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48), "baseScore" INTEGER NOT NULL AS ( (("news" IS 1 OR "ichi" IS 1 OR "spec" IS 1 OR "gai" IS 1) * 50) + (("news" IS 1) * 10) + (("news" IS 2) * 5) + (("ichi" IS 1) * 10) + (("ichi" IS 2) * 5) + (("spec" IS 1) * 10) + (("spec" IS 2) * 5) + (("gai" IS 1) * 10) + (("gai" IS 2) * 5) + (("orderNum" IS 1) * 20) ) STORED, PRIMARY KEY ("entryId", "reading"), UNIQUE("entryId", "orderNum") ) WITHOUT ROWID; CREATE INDEX "JMdict_KanjiElement_byEntryId_byOrderNum" ON "JMdict_KanjiElement"("entryId", "orderNum"); CREATE INDEX "JMdict_KanjiElement_byReading_byBaseScore" ON "JMdict_KanjiElement"("reading", "baseScore"); CREATE TABLE "JMdict_KanjiElementInfo" ( "entryId" INTEGER NOT NULL, "reading" TEXT NOT NULL, "info" TEXT NOT NULL REFERENCES "JMdict_InfoKanji"("id"), FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_KanjiElement"("entryId", "reading"), PRIMARY KEY ("entryId", "reading", "info") ) WITHOUT ROWID; -- ReadingElement CREATE TABLE "JMdict_ReadingElement" ( "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"), "orderNum" INTEGER NOT NULL, "reading" TEXT NOT NULL, "readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE, "news" INTEGER CHECK ("news" BETWEEN 1 AND 2), "ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2), "spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2), "gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2), "nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48), "baseScore" INTEGER NOT NULL AS ( (("news" IS 1 OR "ichi" IS 1 OR "spec" IS 1 OR "gai" IS 1) * 50) + (("news" IS 1) * 10) + (("news" IS 2) * 5) + (("ichi" IS 1) * 10) + (("ichi" IS 2) * 5) + (("spec" IS 1) * 10) + (("spec" IS 2) * 5) + (("gai" IS 1) * 10) + (("gai" IS 2) * 5) + (("orderNum" IS 1) * 20) ) STORED, PRIMARY KEY ("entryId", "reading"), UNIQUE("entryId", "orderNum") ) WITHOUT ROWID; CREATE INDEX "JMdict_ReadingElement_byEntryId_byOrderNum" ON "JMdict_ReadingElement"("entryId", "orderNum"); CREATE INDEX "JMdict_ReadingElement_byReading_byBaseScore" ON "JMdict_ReadingElement"("reading", "baseScore"); CREATE TABLE "JMdict_ReadingElementRestriction" ( "entryId" INTEGER NOT NULL, "reading" TEXT NOT NULL, "restriction" TEXT NOT NULL, FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), PRIMARY KEY ("entryId", "reading", "restriction") ) WITHOUT ROWID; CREATE TABLE "JMdict_ReadingElementInfo" ( "entryId" INTEGER NOT NULL, "reading" TEXT NOT NULL, "info" TEXT NOT NULL REFERENCES "JMdict_InfoReading"("id"), FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), PRIMARY KEY ("entryId", "reading", "info") ) WITHOUT ROWID; -- Sense CREATE TABLE "JMdict_Sense" ( "senseId" INTEGER PRIMARY KEY, "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"), "orderNum" INTEGER NOT NULL, UNIQUE("entryId", "orderNum") ); CREATE INDEX "JMdict_Sense_byEntryId_byOrderNum" ON "JMdict_Sense"("entryId", "orderNum"); CREATE TABLE "JMdict_SenseRestrictedToKanji" ( "entryId" INTEGER NOT NULL, "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"), "kanji" TEXT NOT NULL, FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"), PRIMARY KEY ("entryId", "senseId", "kanji") ) WITHOUT ROWID; CREATE TABLE "JMdict_SenseRestrictedToReading" ( "entryId" INTEGER NOT NULL, "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"), "reading" TEXT NOT NULL, FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), PRIMARY KEY ("entryId", "senseId", "reading") ) WITHOUT ROWID; -- In order to add xrefs, you will need to have added the entry to xref to. -- These should be added in a second pass of the dictionary file. -- In this version of JMdict, the xrefs can be ambiguous. -- There has been rumours of a nonambiguous version possibly arriving in the future -- (https://www.edrdg.org/jmdict_edict_list/2019/msg00360.html) -- but for time being, this need to be modeled as a one to many relationship. -- These two things also concern "SenseAntonym" CREATE TABLE "JMdict_SenseSeeAlso" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"), "xrefEntryId" INTEGER NOT NULL, "seeAlsoReading" TEXT, "seeAlsoKanji" TEXT, "seeAlsoSense" INTEGER, -- For some entries, the cross reference is ambiguous. This means that while the ingestion -- has determined some xrefEntryId, it is not guaranteed to be the correct one. "ambiguous" BOOLEAN NOT NULL DEFAULT FALSE, FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"), FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), FOREIGN KEY ("xrefEntryId", "seeAlsoSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"), UNIQUE("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense") ); CREATE TABLE "JMdict_SenseAntonym" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"), "xrefEntryId" INTEGER NOT NULL, "antonymReading" TEXT, "antonymKanji" TEXT, "antonymSense" INTEGER, -- For some entries, the cross reference is ambiguous. This means that while the ingestion -- has determined some xrefEntryId, it is not guaranteed to be the correct one. "ambiguous" BOOLEAN NOT NULL DEFAULT FALSE, FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"), FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), FOREIGN KEY ("xrefEntryId", "antonymSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"), UNIQUE("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense") ); -- These cross references are going to be mostly accessed from a sense -- This will speed up the join. CREATE INDEX "JMdict_SenseSeeAlso_bySenseId" ON "JMdict_SenseSeeAlso"("senseId"); CREATE INDEX "JMdict_SenseAntonym_bySenseId" ON "JMdict_SenseAntonym"("senseId"); CREATE TABLE "JMdict_SensePOS" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"), "pos" TEXT NOT NULL REFERENCES "JMdict_InfoPOS"("id"), PRIMARY KEY ("senseId", "pos") ) WITHOUT ROWID; CREATE TABLE "JMdict_SenseField" ( "senseId" INTEGER NOT NULL, "field" TEXT NOT NULL, FOREIGN KEY ("senseId") REFERENCES "JMdict_Sense"("senseId"), FOREIGN KEY ("field") REFERENCES "JMdict_InfoField"("id"), PRIMARY KEY ("senseId", "field") ) WITHOUT ROWID; CREATE TABLE "JMdict_SenseMisc" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"), "misc" TEXT NOT NULL REFERENCES "JMdict_InfoMisc"("id"), PRIMARY KEY ("senseId", "misc") ) WITHOUT ROWID; CREATE TABLE "JMdict_SenseLanguageSource" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"), "language" CHAR(3) NOT NULL DEFAULT "eng", "phrase" TEXT, "fullyDescribesSense" BOOLEAN NOT NULL DEFAULT TRUE, "constructedFromSmallerWords" BOOLEAN NOT NULL DEFAULT FALSE, UNIQUE("senseId", "language", "phrase") ); CREATE TABLE "JMdict_SenseDialect" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"), "dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("id"), PRIMARY KEY ("senseId", "dialect") ) WITHOUT ROWID; -- In the documentation, it says that the glossary can contain -- special prioritized entries, but I can't find a single one of those. -- Neither can I find a glossary tag with g_gend data, so these parts -- will be omitted. CREATE TABLE "JMdict_SenseGlossary" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"), "phrase" TEXT NOT NULL, "language" CHAR(3) NOT NULL DEFAULT "eng", "type" TEXT, PRIMARY KEY ("senseId", "language", "phrase") ) WITHOUT ROWID; CREATE INDEX "JMdict_SenseGlossary_byPhrase" ON JMdict_SenseGlossary("phrase"); CREATE TABLE "JMdict_SenseInfo" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"), "info" TEXT NOT NULL, PRIMARY KEY ("senseId", "info") ) WITHOUT ROWID; -- There is not a single example sentence that doesn't come from -- the Tanaka Corpus, so I will leave the type out for now. CREATE TABLE "JMdict_ExampleSentence" ( "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"), "word" TEXT NOT NULL, "source" TEXT NOT NULL, "sourceLanguage" CHAR(3) NOT NULL DEFAULT "eng", "japanese" TEXT NOT NULL -- "type" TEXT NOT NULL DEFAULT "tat", );