From 5cf0b95d8b98f44d2229c1ebccd9729beff9be3c Mon Sep 17 00:00:00 2001 From: h7x4 Date: Mon, 20 Jun 2022 20:06:07 +0200 Subject: [PATCH] init commit --- .github/icon.png | Bin 0 -> 17539 bytes .gitignore | 13 + .sqlfluff | 8 + README.md | 5 + bin/common.dart | 0 bin/ja_db.dart | 16 + bin/jmdict/objects.dart | 235 ++++++++++ bin/jmdict/parser.dart | 346 ++++++++++++++ bin/kanjidic/objects.dart | 284 +++++++++++ bin/kanjidic/parser.dart | 231 +++++++++ bin/objects.dart | 5 + bin/radkfile/objects.dart | 13 + bin/radkfile/parser.dart | 32 ++ bin/romaji_transliteration.dart | 622 +++++++++++++++++++++++++ flake.lock | 122 +++++ flake.nix | 208 +++++++++ migrations/0001_initial.sql | 443 ++++++++++++++++++ migrations/0002_insert_info_values.sql | 251 ++++++++++ pub2nix.lock | 99 ++++ pubspec.lock | 89 ++++ pubspec.yaml | 21 + 21 files changed, 3043 insertions(+) create mode 100644 .github/icon.png create mode 100644 .gitignore create mode 100644 .sqlfluff create mode 100644 README.md create mode 100644 bin/common.dart create mode 100644 bin/ja_db.dart create mode 100644 bin/jmdict/objects.dart create mode 100644 bin/jmdict/parser.dart create mode 100644 bin/kanjidic/objects.dart create mode 100644 bin/kanjidic/parser.dart create mode 100644 bin/objects.dart create mode 100644 bin/radkfile/objects.dart create mode 100644 bin/radkfile/parser.dart create mode 100644 bin/romaji_transliteration.dart create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 migrations/0001_initial.sql create mode 100644 migrations/0002_insert_info_values.sql create mode 100644 pub2nix.lock create mode 100644 pubspec.lock create mode 100644 pubspec.yaml diff --git a/.github/icon.png b/.github/icon.png new file mode 100644 index 0000000000000000000000000000000000000000..15285d2b978e06275cc5af592c9f7cc13056ca05 GIT binary patch literal 17539 zcmeAS@N?(olHy`uVBq!ia0y~yV4MKL9Bd2>3=R9u&M+`2FnGE+hE&{od$+PiCiO4- zhx6rSlTG$!Zr;66M0D$;gMm4&*_q#Ns_3$4oDvN2bF}$USfOxYp+tQLdmP7#KSzWG zCvtUkEX>_xJbT*(CebHCj*gmB($sA;7G_Sm`}5yFk5wGDlSyS`T)yqC}lwn`o zkxHWm|lsLCsn-m(?2o6pY?kd+CDrt-~R0D_w`{es}6_gh3>Wd_vy6!)T^u6 zZ+N*J5%iGY<*6~tdC171R5Z!D?$7({dNCH$dR<&(8e7h^J$vT4GbWF>fm2cMnUSP~ zh1RyDqhi}61PfV&gm%Wfdo!V(o#Bjg=+#TT$9&Sd1qIy|6hD-Bd!Jo$?_NFA3jK*n z_H&q@`4`WT+a?Fnv*6_IZFA$C9x^f*uVzrs_OXrb@33Iv?2r;J4gJqH(R})&2bxSK zex-As-<$+;+r*u#mQTy5U^viYvmqze`EXs1r_v*iB{vrHKk#J8*Zk;P8K*RHSw#Bt z;G9sUqy`bkYcu+pepJrP@!QUB68_5E_1c-4!Pd)`J=^`}stAk!67$5{^OGA2z674S zcKGwNh6$ zz4Z;gbJwi$zI*Ig`R1#g9gIN?7pKSn469UY8DUVm$Ar!&k5$-J$r6dbaoS<%UbXF^~^{+SZib8D}>{a@a} zQ}=l;NM%grRc*$el~1e$CTgy_by7f4iS2~J&SPiVPOMzhc~F4y!o|5>vS)57D6yT$ z)A(S^Vt(~BKSRq~;Vr?grJ4PP`RVULQhq6uPlhqHvrdlr<#A@l=c}9gI1)RK*jzpL z?%6Z*&C>-0nV*=>%gbEZcp|j1x5h2&G3{EQpZ_qKL0 zI;}Tax&8ZQhI=9xUOe{aJpS^=zvC^r>m|4*PInIT%|F#$QetkZdLy|*LV+dWMfJn! zY+HWSs7USou(jS&M9IBH&&B1Q+RvKfpM8CUZx(k*D2Vzu0&SvZ8&z-V= zqYrHhXW)43@ucBZP2EROYJFqt!ZYENY^{lz*^39??YjI+i}$Tsl^bp^R6ggaBJae= zoiXp;m2!*qBW1aR_6BT(T?!0vUdwY7X z9LaXtipwE?#;X7qspE|MrGI@86Et+t zQm~Pd{vNnMcg5FgrTMqd7jEl)QvT>=iMWd&gMq-NS2m~ST70~I>)J74VI?+?pPxVd ze9t5yf8oVq<;2_D`=6CR-u73WVY$;)?$Fj%+rFh3T`#;2Dgw5?J9dm!z|wHRRep|w zd-HS@-dXGyeGr_uIy}_&w&}CeclLHjD5Pe-J=B_f^z%CYtiMb(`_C25yL0*J5u=&X zCN1)Nf)=c`p88AvU6KpWgiXRX_x$8y_|3ZIm$;$9hyATl5lWMPiZy5-4mDi|W(xmPeaoGCmf{9swY+=U$>(W}aL zth}Nlx2WTk_fggk?IJ-JKCXVb44QY#2r@4* zIjlVOmi(&e>=7;UM`fQU#-INotNz{SLn4E=Q`ld_h-*US|Ndm&dD|hOpz3=2mQLtj z&4yVGT_2p5Z2Pt;X-$Ux@ta}6!Noic>dx1A@-Hpwr$VXyY@>>H*Sxj`H#@U;?CDZ*!dXBGqSTy%k7oeJYMXb;kDP>WgUls&A$mJ zQr>BNt6IMF^8cWTVk`3aA}33j&)(a(TV9Y^<;s;A87^O^bG*BB^>pCX;^tfbT5|up zy7sx>cy?CEa_#LIX78F@cqUwU#i++D@I-k@{G3}?x2m16^04r0XG&+f5Rl68;pw}O zS0?Y;K*|cGrSGpjAkM6FsCi?Aj)*X~n#^-G(VZ40y-zC5cbrV=+N~tWtYQMrpU>}d zC`bhee)_P-|HLBKpBt`t9@@5^Nnq8?BG)BRu6ge>Gb-52-IdrpG<mZan5cPL1vYJJgrg@mvj6ZIL>GJo_(6Q zMmjV5^RHjO{TV+voaov%|I63AThq?XiF!B9g=d08O7F_>tMl0-SVSM3`K9U|$C)kr zw)%eUUWUHFtIwT8^OV}JUAgk18{{>HzB|pEx$ko*h$R@-2^7nyzbO?hpHsT3%A`S8 zQTx_L+nHy~cVr70I&@k8@=&wqPpB*4Tz0Q(XSUbR=tOKrI(@+TKeI{sk}$BMWc zxo0%**mg=NxISDKwAn@FM8DR`h_DsRl6hUw0Q%Qq@C$^(_yI`%8` zCm&n=DxV>QwfxBB*Gb#vY99U7ZGEnk%5LIX#RPjJky3>Nu5nHUgMy5_A+e=3%TDzSMuuJvAeZ}oxg%yNHYr2UugKDE&JNZ6ViLcMPN zY!_rV2|km&d#t8wx3(bj5`}sBS+lZM>IqGJuups8uaLY)N4!t1boP1qqQX!2wMFP# z{>YA+{nj(*9FCpV!MG{?_@r%StF7km=RB~FQ7qX(=k=1z^1EBN-m!93n!7LD<)6Aw zs?fwN#=7aI%jWIY6=Y7iQe`w#pUFb$K~jxo+U?b+wUBU;R zB=oQPbaKIdg9x9?*4EdvRX#B?JgoGIJR)>{cc;FjkfB3a{h^!J(wfwB{jSWFq>*6s5G4|re}_NQBls(F5RhjA|K84naxn%wk9HcM{n=@ zXX|{9F#LS~!5~e|=KXU;4X^0EkJqgEa!#osYU|eJj$c;_h_3dsegAALw|L*vcRoiL z7#Vo(D(vM}a+H@4J1+A)`;AsJs8~4{eXWq=vTtcZ(pT;EZ;oD5OlsiqJ8x%DVBYYO zQT*vonS!pC*Ut(ceB1W+IID%W%F&xl4^HeV`Ng`8C$N>hW7{^j1;3&hjG6Vk{B&PC zUzS3R^d~y=psr4an!Yg^9nP=E<*p}sI6dkr_zOj2n zE=$ae0-5;bpYC6{W1~6!bZ(ONWPh`xvdfpezuTI|GRgEAi{Q^Rj;>GVXVv^${O0yy z;qp1941O#Vw380SKQ6Bj2<&9>xHjW3!yl~|f)iJLi(aMf7q_oQdH1mnrxeSE^WvGM z`@5G0%}-q`Z0PVn%2;|;IWxzf-Pe^q?cC+_jIpdcu=T|Eg|FV5=&y>^zj|$65T|e? zC=DdfHDuQ);CE4LTcB4bT|P&CgLh9{%9|Vg%s<-oAM)A6seWpdeEYU`P39sl{s}qG zhk3XoxDv8BUOm*`$tip;rTm6*@a@~?-Kldq7F=NeDXw!q`{wDw3g_7?E-|O%CO?)= zjGW7G=tJeDc{U#o7X2~F6-avdWq#m|--`N`^R5SZ2EPzCbSOJC*S-H68F#E@=zb;qp!sWTZ2zXPPAwaUxH;zf%b1yE8SV*9diy^| zXY$6Iyj=QHy>&bXHdk-pU@fRRQz&yyEwGjSgj=$jyVAs?9RC&vIJc|+y?J4#(a!`g z_v7A7b5^YSyZC2q;9gJzAk|kWw4x!USJIwg3$ula%I&>IV*T^aKKmYirch2~Vz-;& zmmhvQlYh!(Pgrq?Ipype%dHIWm_F>A&|`DW%dUQoPSln2&%VdsDcrYyRlUT)^>>7x zhqHA`DBQXg6&oB}EX^Q1>FwLiN)wZlPfy!>VwwKDJtvMctdTy}e`{mJn-ahI{vi(; z7p+|@Xg>QdZ^JZ!-1q9QFTMPI^UYDyvxQY5PZr-OHL#KDb?bk60%X4nTDYc&f9T34`$8%^I7Wjrl@5lC3}v{Qqq#W*wo%`cu{WR{FSdX6K{Hm z{`9@GZU2#HeAm3UZjGOD<@Rlk2M5Ccem&-)Qgr0XIlsEV2`eVKD`bo7$-G{5Z{@1x zZE4)0yUqF~i`mw%X3ux4uAa|6hb7_mh4|d&SM1Bm%Ipr_Qe5?b0o-kK*`D)~em?Mxod-cT^ z6DN!5-#d`TH+@EK{OW0?DTlV@C+7-GU1z(G{ZjIo+r_K8U7zqO-dY#upORSXe%v_c z(FZ{X!;0X!9a^O~MT+UfI*&rDY^kc0V%?@~XZtGgL`xNm-o5ig#z`sNxJmw(b9Z71YtmF`;=$Lrv< z!}fVvujTZ!QdP4b^=%ek-MFHONhSMC_`)j5)xBB=FU)8v$R`+ytw23O5moi>H7C2WUb5W z8btr4wA(RGd=hfJa-OfNONbjI69XudS6{u_&RoDXN3OqOUizcH!{q@T9j6Q(7#7r> zoD|NWqx0bDnyaSC$CiAH+jb^ZSWs|kVFQE1ox-?t-7>5XPTIy?IhA2@>xlnFA;D0O z28L$yYs&MsGaSfxADNPpkZ|yV&I%Wo5KWL>yLVlSXja@**RsE=JU%|&8&s9#h$$`N z;t^n2@%q)T)#eqJucs#dJEgl(H@ffc_kF*sZmPJrh{`B11mxddz2Fv?!v59sgRjaT z@3D*wTQ8STEFfs;z`*d)XIX5+`S)+`yFQ8T^m#V<(zRzBH9I;a6#lC|vj|^(g4f!k z|8&x}TEW>39UY9DK2P+By}OfXjnuEjt4>%J-u@@GdOk?pds9Z>{->2|8Lq8-^^)`U zKgHGaKpjV&;N82fxnyO3_fIj}pR`q2(9pq6OzG|0*^y6OT%IaMd9~I)of`ftJ1eVB z=vULKc^nfv|E+iX?fT@{q}QcNHTT;)7=vP*^`+`;-aI`cmCW!%PC;pt={pPgbaS;u z3K46VKq*8(P>{I=7G#O%)ATc|z*5O2hUIYA5)2#`tyZIdw8Jnd!gK z>ZohCg87!oDYu~AGy!Np~9;f3zc&kS8aY-P2H9!n-(J;0!-B*lBK^(ct8 z3{7n?aOmi8^K-d3AvfcJ&*}A#q>4A4n-vO@zI9haMd+bMibuIonm(e)Yfxfyx$L{60@PZkV(pe&%UbnV)S zUF(>W7tK09rXjc?1?UWM*d{F@44)DN!M^>iJVPwoBQ)E0fQPxUW0p zrkJNyD&%tRPVZ44-F3&whWroQ)&Y#C)bt zFS8tLfnnL#R=pQ;?$($e=el=%Sz_5&#Y=Dcr1|a4 zopNqgsOZ-{(F|`y54`#=e6@O4@XFb{XU`Ji5m@w)|LIZFXOrXX{Cw3`r!q(#4NL%q^^}&`g>EXH;bj`R zN>)+xT(@EJGVkcw;fIblxia{1)O=x_Gq+&QYwo=`jKKCrSs)XeCVni^&GS3e|lDT9rCg5k2~y4jlQ z2|8C+Zm!#*;x2dciB0I=v`Cx^*p#PERYZY<#;va^7{0 zt2qtn=Y7|`z8ACZa*;?kgKDv4c2NHNuS!B+Ow4N;>>^&Muai&+34FC{*S}fvGMzgZ z#UB4yll#A8?M8J@rQ_2MdFajdn#=!S@xk!vb0o{>JUtpA&fIe9*1c_icep8?WiVsg z;asmY+b8biHHoczH`&@Q{C8o>Y@cNde$72_*!kH%340~w3XDV zXl5JTJzH+i@ZV_aR-0g>ye2jJ_KhEIya#=Y1%HO>Oy0R@#_N=ZfYcTbt6F`w`6uN$ za@g6|omQKyo6{R;snj!HG;Vjn!y7A}|CMvt(4V`y?CRC4b3@-;HF8*ybi{aOnczl`{575epdQ1xl60>vFO-$@xuEA4mLF!|y$hB*SobJ&~LaIy^#mnd9w=C6^P%zQT)7n~ZHZ!NT z@ZO$?mHFO(C#goB{@lm3L34qUi|CG0=i<-OH#WCD-H=rO_oA=bey)j+neM&2oph|m z%7^zL!@|IMlQOwo@_JpA?nWldon01my+nQT@qHyf9tL|A7K2h?R^+vh9PeBYN9axe z{dbmSan{Y9W#@%0w@18w<+s$Y`NUGj_xa!ss?4_a#zqbqTdY4=Z;uaPC}0RVFFf(~ z?_a;FHXXCMGS{p2y04meUPj)$5F^D&e#;qalnyA#1jWYw75=s^QNS{{*88-}{_x9} zZ`{yW)wr#bp+CH{Cc&+`I{wD2Mn7Lyw&EpHc9B%kFo#q?Y-1Tb_Hm&M@EA;r>E_2hw+Yd;3)`y;OaZvAd*? z>3Ro$@Z}w!pS@jsU_JwjWz)SW;+9Hzx6ik4W%SZ>Ff%dVuV`;z5Z-uL!N`1n>FY+N zBwpDo3szM#CN%swG1tC+79Wq#y=FUecBNvL9VVM9i|en=2wjyGt@Lf#ty{mFY^R|@48Gl|39tP`S6*Qv9Z}V9+zdc&GwON^jWrd0{2-dw}GyV^LZsE_iwfFbE^7!{{&z>b0 z@4VB@(DiT0%nqNE-G9P*Ge3NueJ#~M$d2FPAd^dP=dJ@v4DBrC4}$%5Uq5lNx-0ta z6@O^Mj^gKjA6Bip$Nj*7d&cWs?|vslD(_2^+qmX2Lrr`2^nCX10wOuksN z&-%q%P;C{nZR1Clinw%#->2rtOZ7)=L|pI$hEf{PIvtJn6dr%^~c-( zHV3Wz<1qhFSJgcqhV2X%=l;yZr5#>~nU8I;ks) zuA%ii_bp6}|F&nYRkHv5cg4My+>Ca$>#j_8oS^N$BJIKX+xr(WGaoP7 z|7qdm?OP*PRV~=R_Y&9L%kCQ^-qbU!;mP1|+z3P)yzn^^U!0aNUzFg_8o$kMicj2wu?dIN-S4ytEGtumLXn1SxZcwRv@_B#r zt6L3vxqJ4zfA#uyU~WkN^H_sfD?i>1(b7DSAFyM!{Nh6YH&3VgbKZ`Awvd0q!jC`a z+Uf2}Tx(c*XDe4tO~1>ZS}WHj#%ug#k51FA7F3cy^kBP0SNwM+@m(iVy7m^d|9y7W z_U4;T<)2g67#d&w{Es2Qr$IG--fV-16_I;yJ4W4wUn|No>f!vTD`)jmqyX zG}@kga`CW4HgC@ZH3j&Jk1N0aPFV7RG@%SIP|65+k+k2&*(6+r!u6o=VU#q`={rb0IZ&CZhU%LWN>|*6U_H=t* zF8hah4Bgizi)Z$M+GTpBva?>^34PLUSGr8>T@WjmK*F~E?(^ZUi_2DC^}Mrff9t`{ z?@b>bz6UT1IpJ8oM1>`K(%zvYfk{xVL>61K@dcyHU^3d_*a3?`R9 zyK4#=?myDKQm6TE?%tK_w2n%5Ie&Nd{c&w*|Ylf!lKpBFR*yze9*nGyKj!AyOK<+$gzcXeGb#7Wi9o2|0afiVn6HFs=&WO zyPv(5xqkOb$%P|pqw`q{DjGbuiB2mm*_*pw>$n%+d5$B6MXR!YU+LJxzo~uK?AiBu z5?1vep1pKwjD6wpo$RaZd7en9e!4lYt*v~+^nJ_RpBC*Gs@LEB&ToEKjY1sL*%=$dycqc^4orzF)QH^7J0igakLEjqHI{t3N4S-#C5WyN03# z49gw9eq*?1r*MFi^Y}}R*N>f4SvVRBUteEu-&1|%MWxa{)((c3ML#kwfvS^NopZPp zg;Z!_J}h zA^Eqs*L-I|t;gH9)!tde zvm~(m2v6wi@ksQKT=FTx%XAX(&Cyudp0bId7bV*ORq~5h*Jr30>A$lOKfTEH zuj;*>X5yK??BDl1Q)~tm2C7CgC%ZmLVOU(-Y05uw^5dCj7IE3`7CSpuW_Yc4|bOPy7x&!;skTR-{Qbsh6P9a z?6_q%zn%TLd9#nc{6i)71-~Zl$h{wRFkIc~#OG;0FE@Xg09st4#B)C?`IW*av-xE& z*wuC8_AH$9dcEFUzwQ(3xh|AnXUgcbJa_Y}_q07hZlOyAcCPYxU6swh;jkq0x3W3) z$C3=25B9e{ExRvsOJ7Mn>S=wr`qSr6p)>FOuUH$SSKhd)&q^co ziTTx^e;5mo?_4hPT3xLBFW-U9u2QWvm3Nz1AD7*b7uP*?T=u!w=}pg=e*D%G+w9xG ze=O%W|J1q~j?{)Dvgvp8qzb-BpZV_SR`uaiKHFuZeg8kkr*A(!ZD&%Xc;B9?5@r{> zDI4BSR@0B)w=w2*`iqwv-;1oaT+`Tiy39Ch&;8Zw=5an)Zk;h@?X*-`(eo*{DoZ$C za~wGz$dPd4jj~nOPf-Q^<8gJBsY^qW&upt-x9WTUf_wi{D(^8)EvntSe*OD&hN`Yh zOKsmbtuf5Md-ePk{m4&VcM8u1O?(`0T6Kr%LC3BVtFz3`H<*vC|E+L<5i}mW@n!qQ z#b;-Qvi@IIus!wbt46Pt-i%j-ejX}l3U#rQJfG6nr5pV%Y3Y^opys)@(si-tcS2S% z#@No8?|b9h+pyBFjSTNTxI8;Q)~~&B|JTTC{0nw%^;m6v%JkWb#I3h4EzN%1R~)m`?_}IG zMxpXK`zmaOB_qy!un~N=k0B=2W&d-Yl(F}*&9QT48TrS5E?;#o>WE$4%c|sM zo1dLFaJcgM&#bD-J+F7MZ~9q&&Ufjzc8@#D=WbiXoC4~yckEGpXA!?<2|K4-LDR>z zO6&4sfQaOK8Gh3%(>4Uux0vpmnFBD>dvW~oYru76uivl-Syim=bg|gYm}Y) z>*wbm>sBrE{>SVQ5|R`-U958I=Gg0hZL;_IFu927o=X0Zx#p0*bV9?MA0cPf|Cay0 zp(s$J@T+gkm2!4fdlWm_S~|xop~lb z;&-ZmLgiKWKdTwz0=GeO3vnk9>0Vn$y?iTpZP5oa`NI;%pTTOx~w6 z`@7>m-P;!om>cAUPv^dV{rdEVw!fQSZaDnymDw}3?1TrmitZlk$dYc@r!+snbjQjo zm2VPNWfYp!;G!b74Pos6y7!Y z7W+m4;UxzHWaib!Ms|)5ni)yVO6e>yoc!V7fX* z|Ma&#yLYY@wXo?ij0*kQ(NUAco_;2M+UqjV5A!Q9D>+u{>%CdCX0^y%unYl+#} zrPpSxk6NY8@Wy`4{XpBbbN1(N%zSf0e$}gDMwymlRntmegGT_2y>%QIetdoVqCE7| z=6lZ8hi&A=_Ns2IEEX@{eDSs4QogdyH`y6t*b-DWA5q)o+NX8BYUagz_tdAACOWLP zU$AS}udHw9g17}HF0YCD)Y5zQ?C-EF^%W~udNN5oKD=o9huQZR%ayupT6tyZqF)Ry zd{S4>t^KvJXx-u+x%asgCVcO9c_O^dP(H0;%9XgKb*vmkKWkLuk6D5I@$b0C7m3f` za?GmrL)NndY-R7Z+H~i6)n$(-H6oQ=lmE-~efhsjTKfJW2FWJ9dG4G$LyY{1Q?6=V&+mq)80Tv(QesZY_*G^xi2aO5r&D!UwLV8J>(FNu z+Ni^JA`dhoHQ97otZN-l!k+%zm~V?GROpm3H|%HGAs>4y_lzdbgbBg_^Ojr(O>l&G z_O3tXve4>&|AhIsZvJFZPI|s;^2uP^^VK{D1YK`r`jtFCH#gwgbH{G6r=efp94tQA zaEjqi%3Q1`{u40_lezD?tSUE9Dif0N_3eC@^2twd=f(bA z_d5%tLQ9i&Ue8sUywl%h9sdRnQ?U=fzw+{me)e*B+3QflBM`cF%liJGpBBwz|M2N5 zUrO)K2~R%vcb_n1T=Oq^YQXV{9#4cA^x0-6SDyamywql9XqEDWntzh_r^O~J3!MM_ zhhswNN1rs+$B*pe=9ueGGffk0h-UENT_I7dmB_ao)OcgGuz!$rX3Z4!&0cyA+j^w8 zi~WmzzGjA?M|tt**{)CKvdSN2Eca@-zR)@R{K_kZf~Tu^4sbU1-_w14#%smr9gqL; z9o&DUYRzn~*pm#LGePSs0!nKmcX%A_aaa@cPGbJ63E%A*_LTg17`&@@r)Nlhvxf~& zs@8W-v3Omn-g(RqrZ#B*nUtdydTn9u)RdL3aSu3B`rWzeSWX%|!-Ba`bE9*AXdvSgpikRIk$edXxm>kJ&f>S9=w_McOmtjjau z!W@szct)X@k=ym+-dRs$kSd?Ecx}u&K8Mu|=Aw_1V^{sU?Zw-lqgDFkz=P*(e*E8W zHml7yP4iLR(y0?QWxB_c5{A8u;!C-dw(ndSvgyvN9V*N&f2A84F22~}W%&YB$_SOu zxqnpV`IQPAIdQ?oO81#R^atvk>plMXr5AS}@7c4HH=TUqzp%=@q1Gwx&i-e*umAqG zJh!du<>R??{W%LVIl6*YuPXm)VkQ^m_w_VGD6mQmT?7MoxS(ftM6}eZ}*2s-CtKc=e_cR z_icNQwba<%-?whz!vjStZ%?XGQdau3XStoNrj`+J%fgi&^B6vSeYb2`%hdDwGP^$B z<9YCP{^uSSt!JOE{+c$e^XI*L?-?vsvvl2Fejr_F=9#ZkZ%t>tP&%*t*oBsjeS)bJxO^2W9a>5oZmwv71mFD@#amRVJynK6hGB59G?S*=~CZGJdb8WP|Zs&)9S~s27e>sou+;{1BL2dZ;&0H}O(;ltx zINtW`nYn>OlZ2V;V(D5N*Mx>Qhd_PDsQlaI$Bbw89}QZ$_v)r+Tpu(uvp=7z+iTq*D zKbIOFx17COlCk-_)vn-`^4-sM^(KCZSiH)!?fAK?OE2qpypmMZx@3Ljb6ASBR9XQ` z&>h7GZVbw@|1=xi87$^?*s4u`J!?5L_;KH71Lh?bX1vM{|70b&ERwUbc7c|= zHtrU93tja8&RS#c%k!TZ_Q@2_Szjo0`eAkEn;YF~U%#0#%#fU9{K$eqeYTHYPrA`* z359>RcN{(tP%F}~SjmjJ+0EpY$I`wtPmAo`e#CR!$(zd{-)Y%*_?k4hz{m@}+-@@e zzj4Ecn%&7lE@Hcn&5@k(}p32_v&Tu6*u_U z4kyb-7nCH;o$H_4Ybk!;;bUi4j8Q`5awCHb{~ar@7&5QA zuDoQj{JWGXf4@f9)KxCs`}2MKj>mr%-JbDm!r3`8q7RPc80`$VnY?ovk;!@OC)P3~sRpYn-S5u5dC6ew>TKTX4*R5Sf6dQt@^j{x zpCbHV^1Bw!mj?T`%B5akcz~}bH}~4FlB)lEPUJAAG0%9r>z(4X)T>MlqP=o#3l!$- zzcYI#68d*vhmYpWUx9ZM80CAd&&9sc{kMAiTkFloVmKdkE9%@!oOFIYXtH;m-rRlj z8J(tziZ$#ww#T*Mrkp`^!_9S%&org>PLF+axq8>Vug84Oul^_BpsX0=B)aUoTd4h+ zK=+C6O2=w$md?4!(8rPZ@!Ln6_1d$2=EWjm(xDwK*eQW4p(Mmggq&qb-tceY3aAS?|VW* z=ZeJdD$%Wu{2;XL@AP*mpF{%x%DNo-xnZJ5^VR$NZ<#*3G2^k#35JCMmHSp^W=(yi z6F9f`;KifW3_jtyx0xf&j+H>94cPp$nE@#<%_hywTndVe>}O|Iu3fio-p;?a+_EyUdt{ZadCcqJ zdZpFG^yyaSDRU{~o$+61f7sLCUN0P%Uhu0ld0*VS`HXtvk1Q<9-`xpYyDEzD!GQ_l z<-0m`&Pjbfe5Zv|+;{iub!&M9q^2#8j>=N|{!MPjUt8`K``DG#BdULQ1Op8wJm=X5Ub^*YI-jDtm?k zAJ4pwSM!UMmiwPhsk)KteR)b>-}5`ijx9Ewzuo)E-9?gO8*FC#)ESHQ*Sm`8zS94~ zWg#AxDZTsvFH?=s0Ud_Wr`sB*ov@2cK6jGeuP&nN;l$vp-AOagT)ZsP{xI;B^2Cx~ z=aV9jGu#N%dY&M#^GfO6-`7n|ME-n|W^OrTEB1X)Jo65|11*!Ztv~4V&zOE9W#7^B zXQu^r+$eKv2xC}k5`5sG(x-%St*OnjZE)DzBdVjtG z4W##`G1hQp^V?*fnep)eBSV^&f$#wzaSMq971#V)=UQ~!nILcf&$!OM-2b8Q_P6VI zo_wMrx^o3fw>{f|)Uf#bl6Ox&Ir`D(*|Ldu)+aacT<-q-?C2VgCkZ^Pt)>!E`Qd$L zT6fhLReRdW8pZ4$rXNn8c?~B_81v4kO@6*k_~(~`_je5);;PkdUQ65g z@=LUWk+9N#J|2eOf3Bb3xQZiU;^sAzL3=flLIds;>b>(@`pfmJ_tIUwN<}LezVABr zCran_BhY$lwiAA!PUyC6yM8g$epS^tZYeR%v~M@_+dn$awp-U|KQ1kef5@}zOBUzt zeRq7-*h8Mwdsi5{GzQy*iXV6`p}=rWH$q_gzJ1~S8fjf8{>kY(UOZu=Kdsd3&NkyU zT(>4hs1>_jn^DJbU*m4E%HO<0g?cyco}U`q@aMYfittv}U%!`6J$yw@n&lI-(jVL)pHLQgbh8xG7!VdF)HYs{JApUriAIK5bXZria)6_g%H)l^XU}@(sdt@kVy2?@z00PirpLP<^YnZX z?-o+8!|aqq#>! zE_Y>^R9yn)?i)5C9-w}%nTh%NlTVAjGgP>HbZre(nz-2gcY?=3W|!Ny*39!sYujg~ z(cnDcyn5XC!+TglJ-0DNtX;L5B|>uTswmSr%_Sy6KWp+59%yv*e69YncvbkZ#~=Sc z-7WiMx~8n`c~B*{^Y6tMu?)}HY#)7hiF$2qJzwfA6PK96YbmL3n{{5NUEjv9b?BVm z!|z)<)`VOQRob*E@AEVFl&`NEFTN?rGy2KPT2pQ4-B7>s)yhtvG|Ab$f6PGH_`K9* z8^0&Vl=|2G|D6*5EI{(x6tY^ZVCnN?2v zv_Eb-vHcihI>U|g?Q>t4U-WqL2Gm>JD7iA?Tk)J+2EQ}Qf~Ft(yv||ce6`TMwht}B z-@Z)Z5>o&T?r%EzB&%c3{JV?SJYDdsBjTDCe?%tZ>Q_0xHSPEB*gpSp&TF%0#Y}es z56EV0W_qqB^E^B!h|BBFw*Aw5m&&!hI?JFYnz^-pa?HEMtq}o^?;AT(*9L^Lc~n-) z9q?ie!NVnFlqv!^==DAgCup2c^y=AR!! z{`c|+3;!y8dU&HtG}=jAfngf+jtQ}gQuZ=ky~?!ai%#}b(cX1Ey0N>=bXqqze71cc z$Pl_Xh9!SWu*Oj<>-kalFMYaKlmFhALG0HK{#cpfIqlqA_cXDcXn3`Ej{Lq9jRti? zlT85$p*mvKCTG~MSskz3WIc0Fn2VixSN#9wQP)m(mi&q{vu(Y2<=eNjTr~=_y=q-U z_IEh>*Du<&L~ruWT@$)>UVN;Z_4rFl?@#H5zc*!_ma`ww3XFZqsJ(jCs(1QFrH^0S zn7n_|`?t1|4oN%qDs^Z(e!DYsEh%hcyYm_n&y#x-UHP z2V?Uqt_}l-CR2t758p24KEEkU?b6HoDW@Z&uFdFX{8J-gzfbVMO~d0qb$7E&owOtO zzh?FsM}|BX#&^FvU(MX;lNNi>|3%G>Z%-M{rHE_1iafo4o4wO(Ms4$c+27A4-@dJV zx@)iD@t>*vhVz3S?6tPYmM;cVBAzX}jwA z$`e|9{#g~yXPAD}Xy$t^e!s;l?kw{EcKy89{nB`Mwa;tkrCj>xQpfy2SD7p0s`|FB zntSU%iP|_odIW`mJF?5CdS7imAuO>=Wa{@lp`FK$9e1}%PP1HfIz{e;+GP9o6RX{K z_h+dTdF|Q!S+Hzgp!|k=aZj$KHt=lTzq|OkT+^#p@+)4gJp5;Cjf#+E^8S*l=#wlb z0@U|}H}Y?|=`JI2^R4m4C?)N!>)$tCw_Sht>d`8xM^~@JBz^rZ&!4cUD`s!ynHk=v zeqNgU{_gs%TdiiFou_$p?&3a1qdSv7A51=&8JYa9?UkuO=lf?CfA;Pw`Bi6s`iXZ# z#et&4hBtol4ebUd!nu4AJImER`-h$Wa8*BY(pjm`nXGXhH_F(vQ}_=Zo{`~iR^8gM zXWv9S6 zoon?qF>*^sdVKXhN8x#7*#4!rK;+c({1wx#$_q*cC`>>9lx@*8 z(+7+@Z6>T{D~hw5fByQlXW6braq;t~OG})1e!TZH+qRsK%lzlhYkys3aLhS+_Pcj- z7d9X3nJLk^$8!Gp^Di0gRx7>Ns!HuBke6r$^+emzy~lv5<`4`&b^-F|2k;dD`%5Rc7Di!+HL~o=S9k#E$A1i(`t+;0?urcI`Ip=M=l@hPNJ(oMN$n|ez zvK&i61nZV~xdX|!a@{FqYS)@A4WU0*tzv&YU8l13?hH4jXBOReQ$9&{ z?Af_U+ezj0y#NX4%U|}apYi&Ot^-R%Alvc>dl|y==XaVts$9LStaNQ+_)?}f^0#tKmXA3-#T?Yx(j}pG)%SnB9S}yxvf&)qOeOvrn{#>5xW2dL|o+mYGMV`RK-`mf-M9)3F7CazO#;K)d zK6O_ii+c~{KsIPQs= zqC0-_Z)P`eC`)xUY&*yx$FdhIQ&U?Og)rPb0y02V2 z_H&|spF>&siFn3|kKKOW)@ul52-DnP6ct@VfTBD zXnw8bOMB%{J9jphIif-Pz>W`NSbISKm2e`72|Nrvl z!i%J(>}zfrmA!GE(=}Ubs`@AC)kRECPVTDs6Sl1?vPY%X?&1v7)u-kfB=$1&=g#jn zix1wss_vwaU!ubot;pngj2q%_%9ps<32c6Azu~AQ)6LrS$h}nz4z{ajWtgi+9}r@+ zi+ncwrGd?<_M=(~TxVDQoV1D0SCA~x6_cOfjTYfnIeKMod2)?e2I4b zAuR>2>1z!6nP!A09j)U$pl>3zx6Rn2fW^s$G4O$-=3IuK-CchoLh^g8&z;V(W#bTJ zt&UXK;;+5(6&s@!!yV{%xHgxFF&|%M^BJh@n7 z_djFN>eus`G@?5t6kHNTI&FGm-j(kBG|Rqv{m*#0RdOyi43;atIWOUNun+m&_HQ@; zs_4W>eaF(xtM;Yv{@u?0C1RDnf>f=deCM8k{V6B4b2yYp1&bd2tQVACd O1B0ilpUXO@geCxg7<%pi literal 0 HcmV?d00001 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d90690d --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +# Temporary storage for data ingestion files +/data + +# Files and directories created by pub. +.dart_tool/ +.packages + +# Conventional directory for build output. +/build/ +main.db + +# Nix +/result diff --git a/.sqlfluff b/.sqlfluff new file mode 100644 index 0000000..261dfcc --- /dev/null +++ b/.sqlfluff @@ -0,0 +1,8 @@ +[sqlfluff] +dialect = sqlite +exclude_rules = L003 + +[sqlfluff:rules] +tab_space_size = 2 +max_line_length = 80 +indent_unit = space \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..1cbd099 --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +# ja_db + +[![built with nix](https://builtwithnix.org/badge.svg)](https://builtwithnix.org) + + diff --git a/bin/common.dart b/bin/common.dart new file mode 100644 index 0000000..e69de29 diff --git a/bin/ja_db.dart b/bin/ja_db.dart new file mode 100644 index 0000000..c4d121f --- /dev/null +++ b/bin/ja_db.dart @@ -0,0 +1,16 @@ +import 'dart:io'; + +import 'package:sqflite_common_ffi/sqflite_ffi.dart'; + +import 'jmdict/parser.dart'; +import 'kanjidic/parser.dart'; +import 'radkfile/parser.dart'; + +Future main(List arguments) async { + final db = await databaseFactoryFfi + .openDatabase(Directory.current.uri.resolve('main.db').path); + await addDataFromJMdict(db); + await addDataFromRADKFILE(db); + await addDataFromKANJIDIC(db); +} + diff --git a/bin/jmdict/objects.dart b/bin/jmdict/objects.dart new file mode 100644 index 0000000..8c28200 --- /dev/null +++ b/bin/jmdict/objects.dart @@ -0,0 +1,235 @@ +import '../common.dart'; +import '../objects.dart'; + +class TableNames { + static const String entry = 'JMdict_Entry'; + static const String entryByKana = 'JMdict_EntryByKana'; + static const String entryByEnglish = 'JMdict_EntryByEnglish'; + static const String kanjiElement = 'JMdict_KanjiElement'; + static const String kanjiInfo = 'JMdict_KanjiElementInfo'; + static const String readingElement = 'JMdict_ReadingElement'; + static const String readingInfo = 'JMdict_ReadingElementInfo'; + static const String readingRestriction = 'JMdict_ReadingElementRestriction'; + static const String sense = 'JMdict_Sense'; + static const String senseAntonyms = 'JMdict_SenseAntonym'; + static const String senseDialect = 'JMdict_SenseDialect'; + static const String senseField = 'JMdict_SenseField'; + static const String senseGlossary = 'JMdict_SenseGlossary'; + static const String senseInfo = 'JMdict_SenseInfo'; + static const String senseLanguageSource = 'JMdict_SenseLanguageSource'; + static const String senseMisc = 'JMdict_SenseMisc'; + static const String sensePOS = 'JMdict_SensePOS'; + static const String senseRestrictedToKanji = 'JMdict_SenseRestrictedToKanji'; + static const String senseRestrictedToReading = 'JMdict_SenseRestrictedToReading'; + static const String senseSeeAlso = 'JMdict_SenseSeeAlso'; +} + +abstract class Element extends SQLWritable { + final String reading; + final int? news; + final int? ichi; + final int? spec; + final int? gai; + final int? nf; + const Element({ + required this.reading, + this.news, + this.ichi, + this.spec, + this.gai, + this.nf, + }); + + Map get sqlValue => { + 'reading': reading, + 'news': news, + 'ichi': ichi, + 'spec': spec, + 'gai': gai, + 'nf': nf, + }; +} + +class KanjiElement extends Element { + List info; + + KanjiElement({ + this.info = const [], + required String reading, + int? news, + int? ichi, + int? spec, + int? gai, + int? nf, + }) : super( + reading: reading, + news: news, + ichi: ichi, + spec: spec, + gai: gai, + nf: nf, + ); +} + +class ReadingElement extends Element { + List info; + List restrictions; + + ReadingElement({ + this.info = const [], + this.restrictions = const [], + required String reading, + int? news, + int? ichi, + int? spec, + int? gai, + int? nf, + }) : super( + reading: reading, + news: news, + ichi: ichi, + spec: spec, + gai: gai, + nf: nf, + ); +} + +class LanguageSource extends SQLWritable { + final String language; + final String? phrase; + final bool fullyDescribesSense; + final bool constructedFromSmallerWords; + + const LanguageSource({ + required this.language, + this.phrase, + this.fullyDescribesSense = true, + this.constructedFromSmallerWords = false, + }); + + @override + Map get sqlValue => { + 'language': language, + 'phrase': phrase, + 'fullyDescribesSense': fullyDescribesSense, + 'constructedFromSmallerWords': constructedFromSmallerWords, + }; +} + +class Glossary extends SQLWritable { + final String language; + final String phrase; + final String? type; + + const Glossary({ + required this.language, + required this.phrase, + this.type, + }); + + Map get sqlValue => { + 'language': language, + 'phrase': phrase, + 'type': type, + }; +} + +final kanaRegex = + RegExp(r'^[\p{Script=Katakana}\p{Script=Hiragana}ー]+$', unicode: true); + +class XRefParts { + final String? kanjiRef; + final String? readingRef; + final int? senseNum; + + const XRefParts({ + this.kanjiRef, + this.readingRef, + this.senseNum, + }) : assert(kanjiRef != null || readingRef != null); + + factory XRefParts.fromString(String s) { + final parts = s.split('・'); + if (parts.length == 1) { + if (parts[0].contains(kanaRegex)) { + return XRefParts(readingRef: parts[0]); + } + return XRefParts(kanjiRef: parts[0]); + } else if (parts.length == 2) { + if (int.tryParse(parts[1]) != null) { + if (parts[0].contains(kanaRegex)) { + return XRefParts(readingRef: parts[0], senseNum: int.parse(parts[1])); + } + return XRefParts(kanjiRef: parts[0], senseNum: int.parse(parts[1])); + } + return XRefParts(kanjiRef: parts[0], readingRef: parts[1]); + } else if (parts.length == 3) { + return XRefParts( + kanjiRef: parts[0], + readingRef: parts[1], + senseNum: int.parse(parts[2]), + ); + } + + return XRefParts(); + } +} + +class XRef { + final String entryId; + final String reading; + + const XRef({ + required this.entryId, + required this.reading, + }); +} + +class Sense extends SQLWritable { + final int id; + final List antonyms; + final List dialects; + final List fields; + final List info; + final List languageSource; + final List glossary; + final List misc; + final List pos; + final List restrictedToKanji; + final List restrictedToReading; + final List seeAlso; + + const Sense({ + required this.id, + this.antonyms = const [], + this.dialects = const [], + this.fields = const [], + this.info = const [], + this.languageSource = const [], + this.glossary = const [], + this.misc = const [], + this.pos = const [], + this.restrictedToKanji = const [], + this.restrictedToReading = const [], + this.seeAlso = const [], + }); + + @override + Map get sqlValue => {}; +} + +class Entry extends SQLWritable { + final int id; + final List kanji; + final List readings; + final List senses; + + const Entry({ + required this.id, + required this.kanji, + required this.readings, + required this.senses, + }); + + Map get sqlValue => {'id': id}; +} diff --git a/bin/jmdict/parser.dart b/bin/jmdict/parser.dart new file mode 100644 index 0000000..374b875 --- /dev/null +++ b/bin/jmdict/parser.dart @@ -0,0 +1,346 @@ +import 'dart:collection'; +import 'dart:io'; + +import 'package:sqflite_common/sqlite_api.dart'; +import 'package:xml/xml.dart'; + +import '../romaji_transliteration.dart'; +import 'objects.dart'; + +List getPriNums(XmlElement e, String prefix) { + int? news, ichi, spec, gai, nf; + for (final pri in e.findElements('${prefix}_pri')) { + final txt = pri.innerText; + if (txt.startsWith('news')) + news = int.parse(txt.substring(4)); + else if (txt.startsWith('ichi')) + ichi = int.parse(txt.substring(4)); + else if (txt.startsWith('spec')) + spec = int.parse(txt.substring(4)); + else if (txt.startsWith('gai')) + gai = int.parse(txt.substring(3)); + else if (txt.startsWith('nf')) nf = int.parse(txt.substring(2)); + } + return [news, ichi, spec, gai, nf]; +} + +List transformXML(XmlElement root) { + final List entries = []; + + int senseId = 0; + for (final entry in root.childElements) { + final entryId = int.parse(entry.findElements('ent_seq').first.innerText); + + final List kanjiEls = []; + final List readingEls = []; + final List senses = []; + + for (final k_ele in entry.findAllElements('k_ele')) { + final ke_pri = getPriNums(k_ele, 'ke'); + kanjiEls.add( + KanjiElement( + info: k_ele.findElements('ke_inf').map((e) => e.innerText).toList(), + reading: k_ele.findElements('keb').first.innerText, + news: ke_pri[0], + ichi: ke_pri[1], + spec: ke_pri[2], + gai: ke_pri[3], + nf: ke_pri[4], + ), + ); + } + + for (final r_ele in entry.findAllElements('r_ele')) { + final re_pri = getPriNums(r_ele, 're'); + readingEls.add( + ReadingElement( + info: r_ele + .findElements('re_inf') + .map((e) => e.innerText.substring(1, e.innerText.length - 1)) + .toList(), + restrictions: + r_ele.findElements('re_restr').map((e) => e.innerText).toList(), + reading: r_ele.findElements('reb').first.innerText, + news: re_pri[0], + ichi: re_pri[1], + spec: re_pri[2], + gai: re_pri[3], + nf: re_pri[4], + ), + ); + } + + for (final sense in entry.findAllElements('sense')) { + senseId++; + senses.add( + Sense( + id: senseId, + restrictedToKanji: + sense.findElements('stagk').map((e) => e.innerText).toList(), + restrictedToReading: + sense.findElements('stagr').map((e) => e.innerText).toList(), + pos: sense + .findElements('pos') + .map((e) => e.innerText.substring(1, e.innerText.length - 1)) + .toList(), + misc: sense + .findElements('misc') + .map((e) => e.innerText.substring(1, e.innerText.length - 1)) + .toList(), + dialects: sense + .findElements('dial') + .map((e) => e.innerText.substring(1, e.innerText.length - 1)) + .toList(), + info: sense.findElements('s_inf').map((e) => e.innerText).toList(), + languageSource: sense + .findElements('lsource') + .map( + (e) => LanguageSource( + language: e.getAttribute('xml:lang') ?? 'eng', + fullyDescribesSense: e.getAttribute('ls_type') == 'part', + constructedFromSmallerWords: + e.getAttribute('ls_wasei') == 'y', + ), + ) + .toList(), + glossary: sense + .findElements('gloss') + .map( + (e) => Glossary( + language: e.getAttribute('xml:lang') ?? 'eng', + phrase: e.innerText, + type: e.getAttribute('g_type'), + ), + ) + .toList(), + antonyms: sense + .findElements('ant') + .map((e) => XRefParts.fromString(e.innerText)) + .toList(), + seeAlso: sense + .findElements('xref') + .map((e) => XRefParts.fromString(e.innerText)) + .toList(), + ), + ); + } + + entries.add( + Entry( + id: entryId, + kanji: kanjiEls, + readings: readingEls, + senses: senses, + ), + ); + } + + return entries; +} + +Future insertIntoDB(List entries, Database db) async { + print(' [JMdict] Batch 1'); + Batch b = db.batch(); + for (final e in entries) { + b.insert(TableNames.entry, e.sqlValue); + for (final k in e.kanji) { + b.insert(TableNames.kanjiElement, k.sqlValue..addAll({'entryId': e.id})); + // b.insert( + // TableNames.entryByKana, + // {'entryId': e.id, 'kana': transliterateKatakanaToHiragana(k.reading)}, + // // Some entries have the same reading twice with difference in katakana and hiragana + // conflictAlgorithm: ConflictAlgorithm.ignore, + // ); + for (final i in k.info) { + b.insert( + TableNames.kanjiInfo, + {'entryId': e.id, 'reading': k.reading, 'info': i}, + ); + } + } + for (final r in e.readings) { + b.insert( + TableNames.readingElement, + r.sqlValue..addAll({'entryId': e.id}), + ); + + b.insert( + TableNames.entryByKana, + {'entryId': e.id, 'kana': transliterateKanaToLatin(r.reading)}, + // Some entries have the same reading twice with difference in katakana and hiragana + conflictAlgorithm: ConflictAlgorithm.ignore, + ); + for (final i in r.info) { + b.insert( + TableNames.readingInfo, + {'entryId': e.id, 'reading': r.reading, 'info': i}, + ); + } + for (final res in r.restrictions) { + b.insert( + TableNames.readingRestriction, + {'entryId': e.id, 'reading': r.reading, 'restriction': res}, + ); + } + } + + for (final s in e.senses) { + for (final g in s.glossary) { + if (g.language == "eng") + b.insert( + TableNames.entryByEnglish, + {'entryId': e.id, 'english': g.phrase}, + // Some entries have the same reading twice with difference in katakana and hiragana + conflictAlgorithm: ConflictAlgorithm.ignore, + ); + } + } + } + + await b.commit(); + + print(' [JMdict] Building trees'); + SplayTreeMap> entriesByKanji = SplayTreeMap(); + for (final entry in entries) { + for (final kanji in entry.kanji) { + if (entriesByKanji.containsKey(kanji.reading)) { + entriesByKanji.update(kanji.reading, (list) => list..add(entry)); + } else { + entriesByKanji.putIfAbsent(kanji.reading, () => {entry}); + } + } + } + SplayTreeMap> entriesByReading = SplayTreeMap(); + for (final entry in entries) { + for (final reading in entry.readings) { + if (entriesByReading.containsKey(reading.reading)) { + entriesByReading.update(reading.reading, (list) => list..add(entry)); + } else { + entriesByReading.putIfAbsent(reading.reading, () => {entry}); + } + } + } + + print(' [JMdict] Batch 2'); + b = db.batch(); + + for (final e in entries) { + for (final s in e.senses) { + b.insert( + TableNames.sense, s.sqlValue..addAll({'id': s.id, 'entryId': e.id})); + + for (final d in s.dialects) { + b.insert(TableNames.senseDialect, {'senseId': s.id, 'dialect': d}); + } + for (final f in s.fields) { + b.insert(TableNames.senseField, {'senseId': s.id, 'field': f}); + } + for (final i in s.info) { + b.insert(TableNames.senseInfo, {'senseId': s.id, 'info': i}); + } + for (final m in s.misc) { + b.insert(TableNames.senseMisc, {'senseId': s.id, 'misc': m}); + } + for (final p in s.pos) { + b.insert(TableNames.sensePOS, {'senseId': s.id, 'pos': p}); + } + for (final l in s.languageSource) { + b.insert( + TableNames.senseLanguageSource, + l.sqlValue..addAll({'senseId': s.id}), + ); + } + for (final rk in s.restrictedToKanji) { + b.insert( + TableNames.senseRestrictedToKanji, + {'entryId': e.id, 'senseId': s.id, 'kanji': rk}, + ); + } + for (final rr in s.restrictedToReading) { + b.insert( + TableNames.senseRestrictedToReading, + {'entryId': e.id, 'senseId': s.id, 'reading': rr}, + ); + } + for (final ls in s.languageSource) { + b.insert( + TableNames.senseLanguageSource, + ls.sqlValue..addAll({'senseId': s.id}), + ); + } + for (final g in s.glossary) { + if (g.language == 'eng') + b.insert( + TableNames.senseGlossary, + g.sqlValue..addAll({'senseId': s.id}), + // There are some duplicate glossary, especially in + // the other languages. + conflictAlgorithm: ConflictAlgorithm.ignore, + ); + } + + for (final xref in s.seeAlso) { + final Set entries; + if (xref.kanjiRef != null && xref.readingRef != null) { + entries = entriesByKanji[xref.kanjiRef]! + .difference(entriesByReading[xref.readingRef]!); + } else if (xref.kanjiRef != null) { + entries = entriesByKanji[xref.kanjiRef]!; + } else { + entries = entriesByReading[xref.readingRef]!; + } + for (final ex in entries) + if (!(xref.senseNum != null && xref.senseNum! > ex.senses.length)) { + b.insert( + TableNames.senseSeeAlso, + { + 'senseId': s.id, + 'xrefEntryId': ex.id, + 'seeAlsoKanji': xref.kanjiRef, + 'seeAlsoReading': xref.readingRef, + 'seeAlsoSense': xref.senseNum, + }, + ); + } + } + for (final ant in s.antonyms) { + final Set entries; + if (ant.kanjiRef != null && ant.readingRef != null) { + entries = entriesByKanji[ant.kanjiRef]! + .difference(entriesByReading[ant.readingRef]!); + } else if (ant.kanjiRef != null) { + entries = entriesByKanji[ant.kanjiRef]!; + } else { + entries = entriesByReading[ant.readingRef]!; + } + for (final ex in entries) { + if (!(ant.senseNum != null && ant.senseNum! > ex.senses.length)) { + b.insert(TableNames.senseAntonyms, { + 'senseId': s.id, + 'xrefEntryId': ex.id, + 'antonymKanji': ant.kanjiRef, + 'antonymReading': ant.readingRef, + 'antonymSense': ant.senseNum, + }); + } + } + } + } + } + + await b.commit(); +} + +Future addDataFromJMdict(Database db) async { + print('[JMdict] Reading file...'); + String rawXML = File('data/JMdict.xml').readAsStringSync(); + + print('[JMdict] Parsing XML...'); + XmlElement root = XmlDocument.parse(rawXML).getElement('JMdict')!; + + print('[JMdict] Transforming data...'); + final entries = transformXML(root); + + print('[JMdict] Writing to database...'); + await insertIntoDB(entries, db); +} diff --git a/bin/kanjidic/objects.dart b/bin/kanjidic/objects.dart new file mode 100644 index 0000000..c619625 --- /dev/null +++ b/bin/kanjidic/objects.dart @@ -0,0 +1,284 @@ +import '../objects.dart'; + +class TableNames { + static const String character = 'KANJIDIC_Character'; + static const String radicalName = 'KANJIDIC_RadicalName'; + static const String codepoint = 'KANJIDIC_Codepoint'; + static const String radical = 'KANJIDIC_Radical'; + static const String strokeMiscount = 'KANJIDIC_StrokeMiscount'; + static const String variant = 'KANJIDIC_Variant'; + static const String dictionaryReference = '_KANJIDIC_DictionaryReference_Part1'; + static const String dictionaryReferenceMoro = '_KANJIDIC_DictionaryReference_Moro'; + static const String queryCode = 'KANJIDIC_QueryCode'; + static const String reading = 'KANJIDIC_Reading'; + static const String kunyomi = 'KANJIDIC_Kunyomi'; + static const String onyomi = 'KANJIDIC_Onyomi'; + static const String meaning = 'KANJIDIC_Meaning'; + static const String nanori = 'KANJIDIC_Nanori'; +} + +class CodePoint extends SQLWritable { + final String kanji; + final String type; + final String codepoint; + + const CodePoint({ + required this.kanji, + required this.type, + required this.codepoint, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'type': type, + 'codepoint': codepoint, + }; +} + +class Radical extends SQLWritable { + final String kanji; + final String type; + final String radical; + + const Radical({ + required this.kanji, + required this.type, + required this.radical, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'type': type, + 'radical': radical, + }; +} + +class StrokeMiscount extends SQLWritable { + final String kanji; + final int strokeCount; + + const StrokeMiscount({ + required this.kanji, + required this.strokeCount, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'strokeCount': strokeCount, + }; +} + +class Variant extends SQLWritable { + final String kanji; + final String type; + final String variant; + + const Variant({ + required this.kanji, + required this.type, + required this.variant, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'type': type, + 'variant': variant, + }; +} + +class DictionaryReference extends SQLWritable { + final String kanji; + final String type; + final String ref; + + const DictionaryReference({ + required this.kanji, + required this.type, + required this.ref, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'type': type, + 'ref': ref, + }; +} + +class DictionaryReferenceMoro extends SQLWritable { + final String kanji; + final String ref; + final int? volume; + final int? page; + + const DictionaryReferenceMoro({ + required this.kanji, + required this.ref, + required this.volume, + required this.page, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'ref': ref, + 'volume': volume, + 'page': page, + }; +} + +class QueryCode extends SQLWritable { + final String kanji; + final String code; + final String type; + final String? skipMisclassification; + + const QueryCode({ + required this.kanji, + required this.code, + required this.type, + required this.skipMisclassification, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'code': code, + 'type': type, + 'skipMisclassification': skipMisclassification, + }; +} + +class Reading extends SQLWritable { + final String kanji; + final String type; + final String reading; + + const Reading({ + required this.kanji, + required this.type, + required this.reading, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'type': type, + 'reading': reading, + }; +} + +class Kunyomi extends SQLWritable { + final String kanji; + final String yomi; + final bool isJouyou; + + const Kunyomi({ + required this.kanji, + required this.yomi, + required this.isJouyou, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'yomi': yomi, + 'isJouyou': isJouyou, + }; +} + +class Onyomi extends SQLWritable { + final String kanji; + final String yomi; + final bool isJouyou; + final String? type; + + const Onyomi({ + required this.kanji, + required this.yomi, + required this.isJouyou, + required this.type, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'yomi': yomi, + 'isJouyou': isJouyou, + 'type': type, + }; +} + +class Meaning extends SQLWritable { + final String kanji; + final String language; + final String meaning; + + const Meaning({ + required this.kanji, + required this.language, + this.meaning = 'eng', + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'language': language, + 'meaning': meaning, + }; +} + +class Character extends SQLWritable { + final String literal; + final int strokeCount; + final int? grade; + final int? frequency; + final int? jlpt; + + final List radicalName; + final List codepoints; + final List radicals; + final List strokeMiscounts; + final List variants; + final List dictionaryReferences; + final List dictionaryReferencesMoro; + final List querycodes; + final List readings; + final List onyomi; + final List kunyomi; + final List meanings; + final List nanori; + + const Character({ + required this.literal, + required this.strokeCount, + this.grade, + this.frequency, + this.jlpt, + this.radicalName = const [], + this.codepoints = const [], + this.radicals = const [], + this.strokeMiscounts = const [], + this.variants = const [], + this.dictionaryReferences = const [], + this.dictionaryReferencesMoro = const [], + this.querycodes = const [], + this.readings = const [], + this.onyomi = const [], + this.kunyomi = const [], + this.meanings = const [], + this.nanori = const [], + }); + + Map get sqlValue => { + 'literal': literal, + 'grade': grade, + 'strokeCount': strokeCount, + 'frequency': frequency, + 'jlpt': jlpt, + }; +} diff --git a/bin/kanjidic/parser.dart b/bin/kanjidic/parser.dart new file mode 100644 index 0000000..171d3f6 --- /dev/null +++ b/bin/kanjidic/parser.dart @@ -0,0 +1,231 @@ +import 'dart:io'; + +import 'package:sqflite_common/sqlite_api.dart'; +import 'package:xml/xml.dart'; +import 'package:collection/collection.dart'; + +import 'objects.dart'; + +List transformXML(XmlElement root) { + final List result = []; + for (final c in root.findElements('character')) { + final kanji = c.findElements('literal').first.innerText; + result.add( + Character( + literal: kanji, + strokeCount: + int.parse(c.findAllElements('stroke_count').first.innerText), + grade: + int.tryParse(c.findElements('grade').firstOrNull?.innerText ?? ''), + frequency: + int.tryParse(c.findElements('freq').firstOrNull?.innerText ?? ''), + jlpt: int.tryParse( + c.findElements('rad_name').firstOrNull?.innerText ?? '', + ), + radicalName: + c.findElements('rad_name').map((e) => e.innerText).toList(), + codepoints: c + .findAllElements('cp_value') + .map( + (e) => CodePoint( + kanji: kanji, + type: e.getAttribute('cp_type')!, + codepoint: e.innerText, + ), + ) + .toList(), + radicals: c + .findAllElements('rad_value') + .map( + (e) => Radical( + kanji: kanji, + type: e.getAttribute('rad_type')!, + radical: e.innerText, + ), + ) + .toList(), + strokeMiscounts: c + .findAllElements('stroke_count') + .skip(1) + .map((e) => int.parse(e.innerText)) + .toList(), + variants: c + .findAllElements('variant') + .map( + (e) => Variant( + kanji: kanji, + type: e.getAttribute('var_type')!, + variant: e.innerText, + ), + ) + .toList(), + dictionaryReferences: c + .findAllElements('dic_ref') + .where((e) => e.getAttribute('dr_type') != 'moro') + .map( + (e) => DictionaryReference( + kanji: kanji, + type: e.getAttribute('dr_type')!, + ref: e.innerText, + ), + ) + .toList(), + dictionaryReferencesMoro: c + .findAllElements('dic_ref') + .where((e) => e.getAttribute('dr_type') == 'moro') + .map( + (e) => DictionaryReferenceMoro( + kanji: kanji, + ref: e.innerText, + page: int.tryParse(e.getAttribute('m_page') ?? ''), + volume: int.tryParse(e.getAttribute('m_vol') ?? ''), + ), + ) + .toList(), + querycodes: c + .findAllElements('q_code') + .map( + (e) => QueryCode( + kanji: kanji, + code: e.innerText, + type: e.getAttribute('qc_type')!, + skipMisclassification: e.getAttribute('skip_misclass'), + ), + ) + .toList(), + readings: c + .findAllElements('reading') + .where( + (e) => !['ja_on', 'ja_kun'].contains(e.getAttribute('r_type')), + ) + .map( + (e) => Reading( + kanji: kanji, + type: e.getAttribute('r_type')!, + reading: e.innerText, + ), + ) + .toList(), + kunyomi: c + .findAllElements('reading') + .where((e) => e.getAttribute('r_type') == 'ja_kun') + .map( + (e) => Kunyomi( + kanji: kanji, + yomi: e.innerText, + isJouyou: e.getAttribute('r_status') == 'jy', + ), + ) + .toList(), + onyomi: c + .findAllElements('reading') + .where((e) => e.getAttribute('r_type') == 'ja_on') + .map( + (e) => Onyomi( + kanji: kanji, + yomi: e.innerText, + isJouyou: e.getAttribute('r_status') == 'jy', + type: e.getAttribute('on_type')), + ) + .toList(), + meanings: c + .findAllElements('meaning') + .map( + (e) => Meaning( + kanji: kanji, + language: e.getAttribute('m_lang') ?? 'eng', + meaning: e.innerText, + ), + ) + .toList(), + nanori: c.findAllElements('nanori').map((e) => e.innerText).toList(), + ), + ); + } + return result; +} + +Future insertIntoDB(List characters, Database db) async { + final b = db.batch(); + for (final c in characters) { + // if (c.dictionaryReferences.any((e) => + // c.dictionaryReferences + // .where((e2) => e.kanji == e2.kanji && e.type == e2.type) + // .length > + // 1)) { + // print(c.dictionaryReferences.map((e) => e.sqlValue).toList()); + // } + b.insert(TableNames.character, c.sqlValue); + for (final n in c.radicalName) { + b.insert(TableNames.radicalName, {'kanji': c.literal, 'name': n}); + } + for (final cp in c.codepoints) { + b.insert(TableNames.codepoint, cp.sqlValue); + } + for (final r in c.radicals) { + b.insert(TableNames.radical, r.sqlValue); + } + for (final sm in c.strokeMiscounts) { + b.insert( + TableNames.strokeMiscount, + { + 'kanji': c.literal, + 'strokeCount': sm, + }, + ); + } + for (final v in c.variants) { + b.insert(TableNames.variant, v.sqlValue); + } + for (final dr in c.dictionaryReferences) { + // There are duplicate entries here + b.insert( + TableNames.dictionaryReference, + dr.sqlValue, + conflictAlgorithm: ConflictAlgorithm.ignore, + ); + } + for (final drm in c.dictionaryReferencesMoro) { + b.insert(TableNames.dictionaryReferenceMoro, drm.sqlValue); + } + for (final q in c.querycodes) { + b.insert(TableNames.queryCode, q.sqlValue); + } + for (final r in c.readings) { + b.insert(TableNames.reading, r.sqlValue); + } + for (final k in c.kunyomi) { + b.insert(TableNames.kunyomi, k.sqlValue); + } + for (final o in c.onyomi) { + b.insert(TableNames.onyomi, o.sqlValue); + } + for (final m in c.meanings) { + b.insert(TableNames.meaning, m.sqlValue); + } + for (final n in c.nanori) { + b.insert( + TableNames.nanori, + { + 'kanji': c.literal, + 'nanori': n, + }, + ); + } + } + b.commit(); +} + +Future addDataFromKANJIDIC(Database db) async { + print('[KANJIDIC2] Reading file...'); + String rawXML = File('data/kanjidic2.xml').readAsStringSync(); + + print('[KANJIDIC2] Parsing XML...'); + XmlElement root = XmlDocument.parse(rawXML).getElement('kanjidic2')!; + + print('[KANJIDIC2] Transforming data...'); + final entries = transformXML(root); + + print('[KANJIDIC2] Writing to database...'); + await insertIntoDB(entries, db); +} diff --git a/bin/objects.dart b/bin/objects.dart new file mode 100644 index 0000000..9631468 --- /dev/null +++ b/bin/objects.dart @@ -0,0 +1,5 @@ +abstract class SQLWritable { + const SQLWritable(); + + Map get sqlValue; +} \ No newline at end of file diff --git a/bin/radkfile/objects.dart b/bin/radkfile/objects.dart new file mode 100644 index 0000000..1a6fab6 --- /dev/null +++ b/bin/radkfile/objects.dart @@ -0,0 +1,13 @@ +class Radical { + final String radical; + final String kanji; + + // TODO: + final String something; + + const Radical({ + required this.radical, + required this.kanji, + required this.something, + }); +} diff --git a/bin/radkfile/parser.dart b/bin/radkfile/parser.dart new file mode 100644 index 0000000..b1d375e --- /dev/null +++ b/bin/radkfile/parser.dart @@ -0,0 +1,32 @@ +import 'dart:io'; + +import 'package:sqflite_common/sqlite_api.dart'; + +Future addDataFromRADKFILE(Database db) async { + final String content = File('data/radkfile_utf8').readAsStringSync(); + final Iterable blocks = + content.replaceAll(RegExp(r'^#.*$'), '').split(r'$').skip(2); + + print('[RADKFILE] Writing to database...'); + final b = db.batch(); + + for (final block in blocks) { + final String radical = block[1]; + final List kanjiList = block + .replaceFirst(RegExp(r'.*\n'), '') + .split('') + ..removeWhere((e) => e == '' || e == '\n'); + + for (final kanji in kanjiList.toSet()) { + b.insert( + 'RADKFILE', + { + 'radical': radical, + 'kanji': kanji, + }, + ); + } + } + + b.commit(); +} diff --git a/bin/romaji_transliteration.dart b/bin/romaji_transliteration.dart new file mode 100644 index 0000000..814453a --- /dev/null +++ b/bin/romaji_transliteration.dart @@ -0,0 +1,622 @@ +// Source: https://github.com/Kimtaro/ve/blob/master/lib/providers/japanese_transliterators.rb + +const hiragana_syllabic_n = 'ん'; +const hiragana_small_tsu = 'っ'; + +const Map hiragana_to_latin = { + 'あ': 'a', + 'い': 'i', + 'う': 'u', + 'え': 'e', + 'お': 'o', + 'か': 'ka', + 'き': 'ki', + 'く': 'ku', + 'け': 'ke', + 'こ': 'ko', + 'が': 'ga', + 'ぎ': 'gi', + 'ぐ': 'gu', + 'げ': 'ge', + 'ご': 'go', + 'さ': 'sa', + 'し': 'shi', + 'す': 'su', + 'せ': 'se', + 'そ': 'so', + 'ざ': 'za', + 'じ': 'ji', + 'ず': 'zu', + 'ぜ': 'ze', + 'ぞ': 'zo', + 'た': 'ta', + 'ち': 'chi', + 'つ': 'tsu', + 'て': 'te', + 'と': 'to', + 'だ': 'da', + 'ぢ': 'ji', + 'づ': 'zu', + 'で': 'de', + 'ど': 'do', + 'な': 'na', + 'に': 'ni', + 'ぬ': 'nu', + 'ね': 'ne', + 'の': 'no', + 'は': 'ha', + 'ひ': 'hi', + 'ふ': 'fu', + 'へ': 'he', + 'ほ': 'ho', + 'ば': 'ba', + 'び': 'bi', + 'ぶ': 'bu', + 'べ': 'be', + 'ぼ': 'bo', + 'ぱ': 'pa', + 'ぴ': 'pi', + 'ぷ': 'pu', + 'ぺ': 'pe', + 'ぽ': 'po', + 'ま': 'ma', + 'み': 'mi', + 'む': 'mu', + 'め': 'me', + 'も': 'mo', + 'や': 'ya', + 'ゆ': 'yu', + 'よ': 'yo', + 'ら': 'ra', + 'り': 'ri', + 'る': 'ru', + 'れ': 're', + 'ろ': 'ro', + 'わ': 'wa', + 'うぃ': 'whi', + 'うぇ': 'whe', + 'を': 'wo', + 'ゑ': 'we', + 'ゐ': 'wi', + 'ー': '-', + 'ん': 'n', + 'きゃ': 'kya', + 'きゅ': 'kyu', + 'きょ': 'kyo', + 'きぇ': 'kye', + 'きぃ': 'kyi', + 'ぎゃ': 'gya', + 'ぎゅ': 'gyu', + 'ぎょ': 'gyo', + 'ぎぇ': 'gye', + 'ぎぃ': 'gyi', + 'くぁ': 'kwa', + 'くぃ': 'kwi', + 'くぅ': 'kwu', + 'くぇ': 'kwe', + 'くぉ': 'kwo', + 'ぐぁ': 'qwa', + 'ぐぃ': 'gwi', + 'ぐぅ': 'gwu', + 'ぐぇ': 'gwe', + 'ぐぉ': 'gwo', + 'しゃ': 'sha', + 'しぃ': 'syi', + 'しゅ': 'shu', + 'しぇ': 'she', + 'しょ': 'sho', + 'じゃ': 'ja', + 'じゅ': 'ju', + 'じぇ': 'jye', + 'じょ': 'jo', + 'じぃ': 'jyi', + 'すぁ': 'swa', + 'すぃ': 'swi', + 'すぅ': 'swu', + 'すぇ': 'swe', + 'すぉ': 'swo', + 'ちゃ': 'cha', + 'ちゅ': 'chu', + 'ちぇ': 'tye', + 'ちょ': 'cho', + 'ちぃ': 'tyi', + 'ぢゃ': 'ja', + 'ぢぃ': 'dyi', + 'ぢゅ': 'ju', + 'ぢぇ': 'dye', + 'ぢょ': 'jo', + 'つぁ': 'tsa', + 'つぃ': 'tsi', + 'つぇ': 'tse', + 'つぉ': 'tso', + 'てゃ': 'tha', + 'てぃ': 'thi', + 'てゅ': 'thu', + 'てぇ': 'the', + 'てょ': 'tho', + 'とぁ': 'twa', + 'とぃ': 'twi', + 'とぅ': 'twu', + 'とぇ': 'twe', + 'とぉ': 'two', + 'でゃ': 'dha', + 'でぃ': 'dhi', + 'でゅ': 'dhu', + 'でぇ': 'dhe', + 'でょ': 'dho', + 'どぁ': 'dwa', + 'どぃ': 'dwi', + 'どぅ': 'dwu', + 'どぇ': 'dwe', + 'どぉ': 'dwo', + 'にゃ': 'nya', + 'にゅ': 'nyu', + 'にょ': 'nyo', + 'にぇ': 'nye', + 'にぃ': 'nyi', + 'ひゃ': 'hya', + 'ひぃ': 'hyi', + 'ひゅ': 'hyu', + 'ひぇ': 'hye', + 'ひょ': 'hyo', + 'びゃ': 'bya', + 'びぃ': 'byi', + 'びゅ': 'byu', + 'びぇ': 'bye', + 'びょ': 'byo', + 'ぴゃ': 'pya', + 'ぴぃ': 'pyi', + 'ぴゅ': 'pyu', + 'ぴぇ': 'pye', + 'ぴょ': 'pyo', + 'ふぁ': 'fwa', + 'ふぃ': 'fyi', + 'ふぇ': 'fye', + 'ふぉ': 'fwo', + 'ふぅ': 'fwu', + 'ふゃ': 'fya', + 'ふゅ': 'fyu', + 'ふょ': 'fyo', + 'みゃ': 'mya', + 'みぃ': 'myi', + 'みゅ': 'myu', + 'みぇ': 'mye', + 'みょ': 'myo', + 'りゃ': 'rya', + 'りぃ': 'ryi', + 'りゅ': 'ryu', + 'りぇ': 'rye', + 'りょ': 'ryo', + 'ゔぁ': 'va', + 'ゔぃ': 'vyi', + 'ゔ': 'vu', + 'ゔぇ': 'vye', + 'ゔぉ': 'vo', + 'ゔゃ': 'vya', + 'ゔゅ': 'vyu', + 'ゔょ': 'vyo', + 'うぁ': 'wha', + 'いぇ': 'ye', + 'うぉ': 'who', + 'ぁ': 'xa', + 'ぃ': 'xi', + 'ぅ': 'xu', + 'ぇ': 'xe', + 'ぉ': 'xo', + 'ゕ': 'xka', + 'ゖ': 'xke', + 'ゎ': 'xwa' +}; + +const Map latin_to_hiragana = { + 'a': 'あ', + 'i': 'い', + 'u': 'う', + 'e': 'え', + 'o': 'お', + 'ka': 'か', + 'ki': 'き', + 'ku': 'く', + 'ke': 'け', + 'ko': 'こ', + 'ga': 'が', + 'gi': 'ぎ', + 'gu': 'ぐ', + 'ge': 'げ', + 'go': 'ご', + 'sa': 'さ', + 'si': 'し', + 'shi': 'し', + 'su': 'す', + 'se': 'せ', + 'so': 'そ', + 'za': 'ざ', + 'zi': 'じ', + 'ji': 'じ', + 'zu': 'ず', + 'ze': 'ぜ', + 'zo': 'ぞ', + 'ta': 'た', + 'ti': 'ち', + 'chi': 'ち', + 'tu': 'つ', + 'tsu': 'つ', + 'te': 'て', + 'to': 'と', + 'da': 'だ', + 'di': 'ぢ', + 'du': 'づ', + 'dzu': 'づ', + 'de': 'で', + 'do': 'ど', + 'na': 'な', + 'ni': 'に', + 'nu': 'ぬ', + 'ne': 'ね', + 'no': 'の', + 'ha': 'は', + 'hi': 'ひ', + 'hu': 'ふ', + 'fu': 'ふ', + 'he': 'へ', + 'ho': 'ほ', + 'ba': 'ば', + 'bi': 'び', + 'bu': 'ぶ', + 'be': 'べ', + 'bo': 'ぼ', + 'pa': 'ぱ', + 'pi': 'ぴ', + 'pu': 'ぷ', + 'pe': 'ぺ', + 'po': 'ぽ', + 'ma': 'ま', + 'mi': 'み', + 'mu': 'む', + 'me': 'め', + 'mo': 'も', + 'ya': 'や', + 'yu': 'ゆ', + 'yo': 'よ', + 'ra': 'ら', + 'ri': 'り', + 'ru': 'る', + 're': 'れ', + 'ro': 'ろ', + 'la': 'ら', + 'li': 'り', + 'lu': 'る', + 'le': 'れ', + 'lo': 'ろ', + 'wa': 'わ', + 'wi': 'うぃ', + 'we': 'うぇ', + 'wo': 'を', + 'wye': 'ゑ', + 'wyi': 'ゐ', + '-': 'ー', + 'n': 'ん', + 'nn': 'ん', + "n'": 'ん', + 'kya': 'きゃ', + 'kyu': 'きゅ', + 'kyo': 'きょ', + 'kye': 'きぇ', + 'kyi': 'きぃ', + 'gya': 'ぎゃ', + 'gyu': 'ぎゅ', + 'gyo': 'ぎょ', + 'gye': 'ぎぇ', + 'gyi': 'ぎぃ', + 'kwa': 'くぁ', + 'kwi': 'くぃ', + 'kwu': 'くぅ', + 'kwe': 'くぇ', + 'kwo': 'くぉ', + 'gwa': 'ぐぁ', + 'gwi': 'ぐぃ', + 'gwu': 'ぐぅ', + 'gwe': 'ぐぇ', + 'gwo': 'ぐぉ', + 'qwa': 'ぐぁ', + 'qwi': 'ぐぃ', + 'qwu': 'ぐぅ', + 'qwe': 'ぐぇ', + 'qwo': 'ぐぉ', + 'sya': 'しゃ', + 'syi': 'しぃ', + 'syu': 'しゅ', + 'sye': 'しぇ', + 'syo': 'しょ', + 'sha': 'しゃ', + 'shu': 'しゅ', + 'she': 'しぇ', + 'sho': 'しょ', + 'ja': 'じゃ', + 'ju': 'じゅ', + 'je': 'じぇ', + 'jo': 'じょ', + 'jya': 'じゃ', + 'jyi': 'じぃ', + 'jyu': 'じゅ', + 'jye': 'じぇ', + 'jyo': 'じょ', + 'zya': 'じゃ', + 'zyu': 'じゅ', + 'zyo': 'じょ', + 'zye': 'じぇ', + 'zyi': 'じぃ', + 'swa': 'すぁ', + 'swi': 'すぃ', + 'swu': 'すぅ', + 'swe': 'すぇ', + 'swo': 'すぉ', + 'cha': 'ちゃ', + 'chu': 'ちゅ', + 'che': 'ちぇ', + 'cho': 'ちょ', + 'cya': 'ちゃ', + 'cyi': 'ちぃ', + 'cyu': 'ちゅ', + 'cye': 'ちぇ', + 'cyo': 'ちょ', + 'tya': 'ちゃ', + 'tyi': 'ちぃ', + 'tyu': 'ちゅ', + 'tye': 'ちぇ', + 'tyo': 'ちょ', + 'dya': 'ぢゃ', + 'dyi': 'ぢぃ', + 'dyu': 'ぢゅ', + 'dye': 'ぢぇ', + 'dyo': 'ぢょ', + 'tsa': 'つぁ', + 'tsi': 'つぃ', + 'tse': 'つぇ', + 'tso': 'つぉ', + 'tha': 'てゃ', + 'thi': 'てぃ', + 'thu': 'てゅ', + 'the': 'てぇ', + 'tho': 'てょ', + 'twa': 'とぁ', + 'twi': 'とぃ', + 'twu': 'とぅ', + 'twe': 'とぇ', + 'two': 'とぉ', + 'dha': 'でゃ', + 'dhi': 'でぃ', + 'dhu': 'でゅ', + 'dhe': 'でぇ', + 'dho': 'でょ', + 'dwa': 'どぁ', + 'dwi': 'どぃ', + 'dwu': 'どぅ', + 'dwe': 'どぇ', + 'dwo': 'どぉ', + 'nya': 'にゃ', + 'nyu': 'にゅ', + 'nyo': 'にょ', + 'nye': 'にぇ', + 'nyi': 'にぃ', + 'hya': 'ひゃ', + 'hyi': 'ひぃ', + 'hyu': 'ひゅ', + 'hye': 'ひぇ', + 'hyo': 'ひょ', + 'bya': 'びゃ', + 'byi': 'びぃ', + 'byu': 'びゅ', + 'bye': 'びぇ', + 'byo': 'びょ', + 'pya': 'ぴゃ', + 'pyi': 'ぴぃ', + 'pyu': 'ぴゅ', + 'pye': 'ぴぇ', + 'pyo': 'ぴょ', + 'fa': 'ふぁ', + 'fi': 'ふぃ', + 'fe': 'ふぇ', + 'fo': 'ふぉ', + 'fwa': 'ふぁ', + 'fwi': 'ふぃ', + 'fwu': 'ふぅ', + 'fwe': 'ふぇ', + 'fwo': 'ふぉ', + 'fya': 'ふゃ', + 'fyi': 'ふぃ', + 'fyu': 'ふゅ', + 'fye': 'ふぇ', + 'fyo': 'ふょ', + 'mya': 'みゃ', + 'myi': 'みぃ', + 'myu': 'みゅ', + 'mye': 'みぇ', + 'myo': 'みょ', + 'rya': 'りゃ', + 'ryi': 'りぃ', + 'ryu': 'りゅ', + 'rye': 'りぇ', + 'ryo': 'りょ', + 'lya': 'りゃ', + 'lyu': 'りゅ', + 'lyo': 'りょ', + 'lye': 'りぇ', + 'lyi': 'りぃ', + 'va': 'ゔぁ', + 'vi': 'ゔぃ', + 'vu': 'ゔ', + 've': 'ゔぇ', + 'vo': 'ゔぉ', + 'vya': 'ゔゃ', + 'vyi': 'ゔぃ', + 'vyu': 'ゔゅ', + 'vye': 'ゔぇ', + 'vyo': 'ゔょ', + 'wha': 'うぁ', + 'whi': 'うぃ', + 'ye': 'いぇ', + 'whe': 'うぇ', + 'who': 'うぉ', + 'xa': 'ぁ', + 'xi': 'ぃ', + 'xu': 'ぅ', + 'xe': 'ぇ', + 'xo': 'ぉ', + 'xya': 'ゃ', + 'xyu': 'ゅ', + 'xyo': 'ょ', + 'xtu': 'っ', + 'xtsu': 'っ', + 'xka': 'ゕ', + 'xke': 'ゖ', + 'xwa': 'ゎ', + '@@': ' ', + '#[': '「', + '#]': '」', + '#,': '、', + '#.': '。', + '#/': '・', +}; + +bool _smallTsu(String for_conversion) => for_conversion == hiragana_small_tsu; +bool _nFollowedByYuYeYo(String for_conversion, String kana) => + for_conversion == hiragana_syllabic_n && + kana.length > 1 && + 'やゆよ'.contains(kana.substring(1, 2)); + +String transliterateHiraganaToLatin(String hiragana) { + String kana = hiragana; + String romaji = ''; + bool geminate = false; + + while (kana.isNotEmpty) { + final lengths = [if (kana.length > 1) 2, 1]; + for (final length in lengths) { + final String for_conversion = kana.substring(0, length); + String? mora; + + if (_smallTsu(for_conversion)) { + geminate = true; + kana = kana.replaceRange(0, length, ''); + break; + } else if (_nFollowedByYuYeYo(for_conversion, kana)) { + mora = "n'"; + } + mora ??= hiragana_to_latin[for_conversion]; + + if (mora != null) { + if (geminate) { + geminate = false; + romaji += mora.substring(0, 1); + } + romaji += mora; + kana = kana.replaceRange(0, length, ''); + break; + } else if (length == 1) { + romaji += for_conversion; + kana = kana.replaceRange(0, length, ''); + } + } + } + return romaji; +} + +bool _doubleNFollowedByAIUEO(String for_conversion) => + RegExp(r'^nn[aiueo]$').hasMatch(for_conversion); +bool _hasTableMatch(String for_conversion) => + latin_to_hiragana[for_conversion] != null; +bool _hasDoubleConsonant(String for_conversion, int length) => + for_conversion == 'tch' || + (length == 2 && + RegExp(r'^([kgsztdnbpmyrlwchf])\1$').hasMatch(for_conversion)); + +String transliterateLatinToHiragana(String latin) { + String romaji = + latin.toLowerCase().replaceAll('mb', 'nb').replaceAll('mp', 'np'); + String kana = ''; + + while (romaji.isNotEmpty) { + final lengths = [ + if (romaji.length > 2) 3, + if (romaji.length > 1) 2, + 1, + ]; + + for (final length in lengths) { + String? mora; + int for_removal = length; + final String for_conversion = romaji.substring(0, length); + + if (_doubleNFollowedByAIUEO(for_conversion)) { + mora = hiragana_syllabic_n; + for_removal = 1; + } else if (_hasTableMatch(for_conversion)) { + mora = latin_to_hiragana[for_conversion]; + } else if (_hasDoubleConsonant(for_conversion, length)) { + mora = hiragana_small_tsu; + for_removal = 1; + } + + if (mora != null) { + kana += mora; + romaji = romaji.replaceRange(0, for_removal, ''); + break; + } else if (length == 1) { + kana += for_conversion; + romaji = romaji.replaceRange(0, 1, ''); + } + } + } + + return kana; +} + +String _transposeCodepointsInRange( + String text, + int distance, + int rangeStart, + int rangeEnd, +) => + String.fromCharCodes( + text.codeUnits + .map((c) => c + ((rangeStart <= c && c <= rangeEnd) ? distance : 0)), + ); + +String transliterateKanaToLatin(String kana) => + transliterateHiraganaToLatin(transliterateKatakanaToHiragana(kana)); + +String transliterateLatinToKatakana(String latin) => + transliterateHiraganaToKatakana(transliterateLatinToHiragana(latin)); + +String transliterateKatakanaToHiragana(String katakana) => + _transposeCodepointsInRange(katakana, -96, 12449, 12534); + +String transliterateHiraganaToKatakana(String hiragana) => + _transposeCodepointsInRange(hiragana, 96, 12353, 12438); + +String transliterateFullwidthRomajiToHalfwidth(String halfwidth) => + _transposeCodepointsInRange( + _transposeCodepointsInRange( + halfwidth, + -65248, + 65281, + 65374, + ), + -12256, + 12288, + 12288, + ); + +String transliterateHalfwidthRomajiToFullwidth(String halfwidth) => + _transposeCodepointsInRange( + _transposeCodepointsInRange( + halfwidth, + 65248, + 33, + 126, + ), + 12256, + 32, + 32, + ); diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..e969bd1 --- /dev/null +++ b/flake.lock @@ -0,0 +1,122 @@ +{ + "nodes": { + "JMdictSrc": { + "flake": false, + "locked": { + "narHash": "sha256-TAkT98/lC1zBAJ/ublGi/gK965pwxoHJrnWRaKKBq7I=", + "type": "file", + "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz" + }, + "original": { + "type": "file", + "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz" + } + }, + "JMdictWithExamplesSrc": { + "flake": false, + "locked": { + "narHash": "sha256-GfClwLR4uoxPKxRbI5qgELurAdpegCbZO5lEORb3EvA=", + "type": "file", + "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz" + }, + "original": { + "type": "file", + "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz" + } + }, + "RADKFILESrc": { + "flake": false, + "locked": { + "narHash": "sha256-rO2z5GPt3g6osZOlpyWysmIbRV2Gw4AR4XvngVTHNpk=", + "type": "file", + "url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz" + }, + "original": { + "type": "file", + "url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz" + } + }, + "flake-utils": { + "locked": { + "lastModified": 1649676176, + "narHash": "sha256-OWKJratjt2RW151VUlJPRALb7OU2S5s+f0vLj4o1bHM=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "a4b154ebbdc88c8498a5c7b01589addc9e9cb678", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nix-dart": { + "inputs": { + "flake-utils": [ + "flake-utils" + ], + "nixpkgs": [ + "nixpkgs" + ], + "pub2nix": "pub2nix" + }, + "locked": { + "lastModified": 1652213615, + "narHash": "sha256-+eehm2JlhoKgY+Ea4DTxDMei/x4Fgz7S+ZPqWpZysuI=", + "owner": "tadfisher", + "repo": "nix-dart", + "rev": "6f686ddf984306d944e9b5adf9f35f3a0a0a70b7", + "type": "github" + }, + "original": { + "owner": "tadfisher", + "repo": "nix-dart", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1655456688, + "narHash": "sha256-j2trI5gv2fnHdfUQFBy957avCPxxzCqE8R+TOYHPSRE=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "d17a56d90ecbd1b8fc908d49598fb854ef188461", + "type": "github" + }, + "original": { + "id": "nixpkgs", + "ref": "nixos-22.05", + "type": "indirect" + } + }, + "pub2nix": { + "flake": false, + "locked": { + "lastModified": 1594192744, + "narHash": "sha256-pDvcXSG1Mh2BpwkqAcNDJzcupV3pIAAtZJLfkiHMAz4=", + "owner": "paulyoung", + "repo": "pub2nix", + "rev": "0c7ecca590fcd1616db8c6468f799ffef36c85e9", + "type": "github" + }, + "original": { + "owner": "paulyoung", + "repo": "pub2nix", + "type": "github" + } + }, + "root": { + "inputs": { + "JMdictSrc": "JMdictSrc", + "JMdictWithExamplesSrc": "JMdictWithExamplesSrc", + "RADKFILESrc": "RADKFILESrc", + "flake-utils": "flake-utils", + "nix-dart": "nix-dart", + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..d7d6c61 --- /dev/null +++ b/flake.nix @@ -0,0 +1,208 @@ +{ + description = "A SQLite database containing open source japanese language translation data"; + + inputs = { + nixpkgs.url = "nixpkgs/nixos-22.05"; + + flake-utils = { + url = "github:numtide/flake-utils"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + + nix-dart = { + url = "github:tadfisher/nix-dart"; + inputs = { + nixpkgs.follows = "nixpkgs"; + flake-utils.follows = "flake-utils"; + }; + }; + + JMdictSrc = { + url = "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz"; + flake = false; + }; + + JMdictWithExamplesSrc = { + url = "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"; + flake = false; + }; + + RADKFILESrc = { + url = "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"; + flake = false; + }; + + }; + + outputs = { + self, + nixpkgs, + flake-utils, + nix-dart, + JMdictSrc, + JMdictWithExamplesSrc, + RADKFILESrc + }: let + system = "x86_64-linux"; + pkgs = import nixpkgs { + inherit system; + overlays = [ + # (final: prev: { dart = nix-dart.packages.${system}.dart; }) + nix-dart.overlay + ]; + }; + inherit (pkgs) lib; + + in { + devShell.${system} = pkgs.mkShell { + buildInputs = with pkgs; [ + nix-dart.packages.${system}.pub2nix-lock + dart + gnumake + sqlite + sqlite-web + sqlint + sqlfluff + ]; + }; + + defaultPackage.${system} = self.packages.${system}.database; + + packages.${system} = let + inherit (pkgs.stdenv) mkDerivation; + dbName = "main.db"; + + edrdgMetadata = { + license = { + shortName = "EDRDG"; + fullName = "Electronic Dictionary Research and Development Group General Dictionary Licence"; + url = "http://www.csse.monash.edu.au/~jwb/edrdg/licence.html"; + }; + maintainers = [ "h7x4 " ]; + platforms = lib.platforms.all; + }; + in { + JMdict = mkDerivation { + name = "JMdict"; + + srcs = [ + JMdictSrc + JMdictWithExamplesSrc + ]; + dontUnpack = true; + + nativeBuildInputs = with pkgs; [ xmlformat ]; + buildPhase = '' + gzip -dkc ${JMdictSrc} > jmdict.xml + gzip -dkc ${JMdictWithExamplesSrc} > jmdict_with_examples.xml + xmlformat -i jmdict.xml + xmlformat -i jmdict_with_examples.xml + ''; + + installPhase = '' + mkdir $out + cp jmdict.xml $out + cp jmdict_with_examples.xml $out + ''; + + meta = edrdgMetadata // { + description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words"; + homepage = "https://www.edrdg.org/jmdict/j_jmdict.html"; + }; + }; + + RADKFILE = mkDerivation { + name = "RADKFILE"; + + src = RADKFILESrc; + dontUnpack = true; + + buildPhase = '' + gzip -dkc $src > radkfile + ''; + + installPhase = '' + iconv -f EUC-JP -t UTF-8 -o $out radkfile + ''; + + meta = edrdgMetadata // { + description = "A file providing searchable decompositions of kanji characters"; + homepage = "https://www.edrdg.org/krad/kradinf.html"; + }; + }; + + database_generator = (nix-dart.builders.${system}.buildDartPackage { + pname = "database_generator"; + version = "1.0"; + + buildInputs = [ nix-dart.packages.${system}.dart-dev ]; + + src = builtins.filterSource (path: type: baseNameOf path != ".dart_tool") ./.; + specFile = ./pubspec.yaml; + lockFile = ./pub2nix.lock; + }).overrideAttrs(old: { + buildInputs = [nix-dart.packages.${system}.dart-dev]; + buildPhase = builtins.replaceStrings ["pub"] ["dart pub"] old.buildPhase; + }); + + database = mkDerivation { + name = "database"; + src = builtins.filterSource (path: type: baseNameOf path != dbName) ./.; + nativeBuildInputs = with pkgs; [ + sqlite + ]; + + buildPhase = '' + mkdir -p data + ln -s ${self.packages.${system}.JMdict}/* data + ln -s ${self.packages.${system}.RADKFILE} data + sqlite3 ${dbName} < migrations/0001_initial.sql + sqlite3 ${dbName} < migrations/0002_insert_info_values.sql + ''; + + installPhase = '' + mkdir -p $out + cp migrations/0001_initial.sql $out/schema.sql + cp ${dbName} $out/${dbName} + ''; + }; + + docs = mkDerivation { + name = "docs"; + src = self.packages.${system}.database; + nativeBuildInputs = with pkgs; [ + schemaspy + sqlite-jdbc + ]; + + buildPhase = let + properties = pkgs.writeText "sqlite.properties" '' + description=SQLite + driver=org.sqlite.JDBC + driverPath=${pkgs.sqlite-jdbc}/share/java/sqlite-jdbc-3.25.2.jar + connectionSpec=jdbc:sqlite: + ''; + + args = pkgs.writeText "schemaspy.properties" '' + schemaspy.cat="%" + schemaspy.t=sqlite + schemaspy.sso=true + schemaspy.db=${dbName} + schemaspy.o=docs + schemaspy.s=schema.sql + ''; + + in '' + cp ${args} ./schemaspy.properties + ls + schemaspy -t ${properties} + ''; + + installPhase = '' + cp -r docs $out + ''; + }; + }; + }; + +} diff --git a/migrations/0001_initial.sql b/migrations/0001_initial.sql new file mode 100644 index 0000000..6c987dc --- /dev/null +++ b/migrations/0001_initial.sql @@ -0,0 +1,443 @@ +-- TODO: figure out ondelete functions... + +------------ +-- JMdict -- +------------ + +CREATE TABLE "JMdict_InfoDialect" ( + "id" VARCHAR(4) PRIMARY KEY NOT NULL, + "description" TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE "JMdict_InfoField" ( + "id" VARCHAR(7) PRIMARY KEY NOT NULL, + "description" TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE "JMdict_InfoKanji" ( + "id" VARCHAR(5) PRIMARY KEY NOT NULL, + "description" TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE "JMdict_InfoMisc" ( + "id" VARCHAR(12) PRIMARY KEY NOT NULL, + "description" TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE "JMdict_InfoPOS" ( + "id" VARCHAR(9) PRIMARY KEY NOT NULL, + "description" TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE "JMdict_InfoReading" ( + "id" VARCHAR(5) PRIMARY KEY NOT NULL, + "description" TEXT NOT NULL +) WITHOUT ROWID; + +-- The XML specification says that an entry needs to have at least +-- one sense and one reading. I will just assume this is the case, and +-- not implement a check for it. + +CREATE TABLE "JMdict_Entry" ( + "id" INTEGER PRIMARY KEY +); + +-- KanjiElement + +CREATE TABLE "JMdict_KanjiElement" ( + "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"), + "reading" TEXT NOT NULL, + "news" INTEGER CHECK ("news" BETWEEN 1 AND 2), + "ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2), + "spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2), + "gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2), + "nf" INTEGER, + PRIMARY KEY ("entryId", "reading") +) WITHOUT ROWID; + +CREATE TABLE "JMdict_KanjiElementInfo" ( + "entryId" INTEGER NOT NULL, + "reading" TEXT NOT NULL, + "info" TEXT NOT NULL REFERENCES "JMdict_InfoKanji"("id"), + FOREIGN KEY ("entryId", "reading") + REFERENCES "JMdict_KanjiElement"("entryId", "reading"), + PRIMARY KEY ("entryId", "reading", "info") +) WITHOUT ROWID; + +-- ReadingElement + +CREATE TABLE "JMdict_ReadingElement" ( + "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"), + "reading" TEXT NOT NULL, + "readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE, + "news" INTEGER CHECK ("news" BETWEEN 1 AND 2), + "ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2), + "spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2), + "gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2), + "nf" INTEGER, + PRIMARY KEY ("entryId", "reading") +) WITHOUT ROWID; + +CREATE TABLE "JMdict_ReadingElementRestriction" ( + "entryId" INTEGER NOT NULL, + "reading" TEXT NOT NULL, + "restriction" TEXT NOT NULL, + FOREIGN KEY ("entryId", "reading") + REFERENCES "JMdict_ReadingElement"("entryId", "reading"), + PRIMARY KEY ("entryId", "reading", "restriction") +) WITHOUT ROWID; + +CREATE TABLE "JMdict_ReadingElementInfo" ( + "entryId" INTEGER NOT NULL, + "reading" TEXT NOT NULL, + "info" TEXT NOT NULL REFERENCES "JMdict_InfoReading"("id"), + FOREIGN KEY ("entryId", "reading") + REFERENCES "JMdict_ReadingElement"("entryId", "reading"), + PRIMARY KEY ("entryId", "reading", "info") +) WITHOUT ROWID; + +-- Sense + +-- Optimal solution here would be to have an id INTEGER AUTOINCREMENT, +-- and the entryId as a composite key, since the entryId is used below. +-- However, autoincrementing composite keys are not available in sqlite + +CREATE TABLE "JMdict_Sense" ( + "id" INTEGER PRIMARY KEY AUTOINCREMENT, + "entryId" INTEGER REFERENCES "JMdict_Entry"("id") +); + +CREATE TABLE "JMdict_SenseRestrictedToKanji" ( + "entryId" INTEGER, + "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), + "kanji" TEXT, + FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"), + PRIMARY KEY ("entryId", "senseId", "kanji") +); + +CREATE TABLE "JMdict_SenseRestrictedToReading" ( + "entryId" INTEGER, + "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), + "reading" TEXT, + FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), + PRIMARY KEY ("entryId", "senseId", "reading") +); + +-- In order to add xrefs, you will need to have added the entry to xref to. +-- These should be added in a second pass of the dictionary file. + +-- In this version of JMdict, the xrefs can be ambiguous. +-- There has been rumours of a nonambiguous version possibly arriving in the future +-- (https://www.edrdg.org/jmdict_edict_list/2019/msg00360.html) +-- but for time being, this need to be modeled as a one to many relationship. + +-- These two things also concern "SenseAntonym" + +CREATE TABLE "JMdict_SenseSeeAlso" ( + "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), + "xrefEntryId" INTEGER, + "seeAlsoReading" TEXT, + "seeAlsoKanji" TEXT, + "seeAlsoSense" TEXT REFERENCES "JMdict_Sense"("id"), + CHECK ("seeAlsoReading" = NULL <> "seeAlsoKanji" = NULL), + -- CHECK("seeAlsoSense" = NULL OR "seeAlsoSense") + -- Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId. + FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"), + FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), + PRIMARY KEY ("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense") +); + +CREATE TABLE "JMdict_SenseAntonym" ( + "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), + "xrefEntryId" INTEGER, + "antonymReading" TEXT, + "antonymKanji" TEXT, + "antonymSense" TEXT REFERENCES "JMdict_Sense"("id"), + CHECK ("antonymReading" = NULL <> "antonymKanji" = NULL), + FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"), + FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), + PRIMARY KEY ("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense") +); + +-- These cross references are going to be mostly accessed from a sense +-- This will speed up the join. +CREATE INDEX "JMdict_SenseSeeAlso_bySenseId" ON "JMdict_SenseSeeAlso"("senseId"); +CREATE INDEX "JMdict_SenseAntonym_bySenseId" ON "JMdict_SenseAntonym"("senseId"); + +CREATE TABLE "JMdict_SensePOS" ( + "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), + "pos" TEXT NOT NULL REFERENCES "JMdict_InfoPOS"("id"), + PRIMARY KEY ("senseId", "pos") +) WITHOUT ROWID; + +CREATE TABLE "JMdict_SenseField" ( + "senseId" INTEGER NOT NULL, + "field" TEXT NOT NULL, + FOREIGN KEY ("senseId") REFERENCES "JMdict_Sense"("id"), + FOREIGN KEY ("field") REFERENCES "JMdict_InfoField"("id"), + PRIMARY KEY ("senseId", "field") +) WITHOUT ROWID; + +CREATE TABLE "JMdict_SenseMisc" ( + "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), + "misc" TEXT NOT NULL REFERENCES "JMdict_InfoMisc"("id"), + PRIMARY KEY ("senseId", "misc") +) WITHOUT ROWID; + +CREATE TABLE "JMdict_SenseLanguageSource" ( + "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), + "language" CHAR(3) NOT NULL DEFAULT "eng", + "phrase" TEXT, + "fullyDescribesSense" BOOLEAN NOT NULL DEFAULT TRUE, + "constructedFromSmallerWords" BOOLEAN NOT NULL DEFAULT FALSE, + PRIMARY KEY ("senseId", "language", "phrase") +); + +CREATE TABLE "JMdict_SenseDialect" ( + "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), + "dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("dialect"), + PRIMARY KEY ("senseId", "dialect") +) WITHOUT ROWID; + +-- In the documentation, it says that the glossary can contain +-- special prioritized entries, but I can't find a single one of those. +-- Neither can I find a glossary tag with g_gend data, so these parts +-- will be omitted. + +CREATE TABLE "JMdict_SenseGlossary" ( + "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), + "phrase" TEXT NOT NULL, + "language" CHAR(3) NOT NULL DEFAULT "eng", + "type" TEXT, + PRIMARY KEY ("senseId", "language", "phrase") +) WITHOUT ROWID; + +CREATE TABLE "JMdict_SenseInfo" ( + "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), + "info" TEXT NOT NULL, + PRIMARY KEY ("senseId", "info") +) WITHOUT ROWID; + +-- There is not a single example sentence that doesn't come from +-- the Tanaka Corpus, so I will leave the type out for now. + +CREATE TABLE "JMdict_ExampleSentence" ( + "id" INTEGER PRIMARY KEY, + "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), + "word" TEXT NOT NULL, + "source" TEXT NOT NULL, + "sourceLanguage" CHAR(3) NOT NULL DEFAULT "eng", + "japanese" TEXT NOT NULL + -- "type" TEXT NOT NULL DEFAULT "tat", +); + +-- These tables are for optimizing searches. + +-- In order to include results from both, the software should +-- first check if the searchword is convertible to kana, and then +-- potentially get results from both by doing a union between two +-- selects. + +CREATE TABLE "JMdict_EntryByKana" ( + "kana" TEXT NOT NULL, + "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"), + PRIMARY KEY ("kana", "entryId") +) WITHOUT ROWID; + +CREATE INDEX "JMdict_EntryByKana_byKana" ON "JMdict_EntryByKana"("kana"); + +CREATE TABLE "JMdict_EntryByEnglish" ( + "english" TEXT NOT NULL, + "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"), + PRIMARY KEY ("english", "entryId") +) WITHOUT ROWID; + +CREATE INDEX "JMdict_EntryByEnglish_byEnglish" ON "JMdict_EntryByEnglish"("english"); + +-------------- +-- RADKFILE -- +-------------- + +CREATE TABLE "RADKFILE" ( + "kanji" CHAR(1) NOT NULL, + "radical" CHAR(1) NOT NULL, + PRIMARY KEY ("kanji", "radical") +) WITHOUT ROWID; + +CREATE INDEX "RADK" ON "RADKFILE"("radical"); +CREATE INDEX "KRAD" ON "RADKFILE"("kanji"); + +CREATE VIEW "RADKFILE_Radicals" AS +SELECT DISTINCT "radical" FROM "RADKFILE"; + +-------------- +-- KANJIDIC -- +-------------- + +CREATE TABLE "KANJIDIC_Character" ( + "literal" CHAR(1) NOT NULL PRIMARY KEY, + "grade" INTEGER CHECK ("grade" BETWEEN 1 AND 10), + "strokeCount" INTEGER NOT NULL, + "frequency" INTEGER, + "jlpt" INTEGER +) WITHOUT ROWID; + +CREATE TABLE "KANJIDIC_RadicalName" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "name" TEXT NOT NULL, + PRIMARY KEY("kanji", "name") +) WITHOUT ROWID; + +CREATE TABLE "KANJIDIC_Codepoint" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "type" VARCHAR(6) NOT NULL CHECK ("type" IN ('jis208', 'jis212', 'jis213', 'ucs')), + "codepoint" VARCHAR(7) NOT NULL, + PRIMARY KEY ("kanji", "type") +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_Codepoint_byCharacter" ON "KANJIDIC_Codepoint"("kanji"); + +CREATE TABLE "KANJIDIC_Radical" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "type" VARCHAR(9) NOT NULL CHECK ("type" IN ('classical', 'nelson_c')), + "radical" INTEGER NOT NULL CHECK ("radical" BETWEEN 1 AND IIF("type" = 'classical', 214, 212)), + PRIMARY KEY("kanji", "type") +) WITHOUT ROWID; + +CREATE TABLE "KANJIDIC_StrokeMiscount" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "strokeCount" INTEGER NOT NULL, + PRIMARY KEY("kanji", "strokeCount") +) WITHOUT ROWID; + +CREATE TABLE "KANJIDIC_Variant" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "variant" TEXT NOT NULL, + "type" VARCHAR(8) NOT NULL CHECK ( + "type" IN ( + 'jis208', + 'jis212', + 'jis213', + 'deroo', + 'njecd', + 's_h', + 'nelson_c', + 'oneill', + 'ucs' + ) + ), + PRIMARY KEY ("kanji", "type", "variant") +) WITHOUT ROWID; + +CREATE TABLE "_KANJIDIC_DictionaryReference_Part1" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "ref" VARCHAR(9) NOT NULL, + "type" VARCHAR(16) NOT NULL CHECK( + "type" IN ( + 'nelson_c', + 'nelson_n', + 'halpern_njecd', + 'halpern_kkd', + 'halpern_kkld', + 'halpern_kkld_2ed', + 'heisig', + 'heisig6', + 'gakken', + 'oneill_names', + 'oneill_kk', + 'henshall', + 'sh_kk', + 'sh_kk2', + 'sakade', + 'jf_cards', + 'henshall3', + 'tutt_cards', + 'crowley', + 'kanji_in_context', + 'busy_people', + 'kodansha_compact', + 'maniette' + ) + ), + PRIMARY KEY("kanji", "type") +) WITHOUT ROWID; + +CREATE TABLE "_KANJIDIC_DictionaryReference_Moro" ( + "kanji" CHAR(1) NOT NULL PRIMARY KEY REFERENCES "KANJIDIC_Character"("literal"), + "ref" VARCHAR(7) NOT NULL, + "volume" INTEGER, + "page" INTEGER +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_DictionaryReference_byPart1" ON "_KANJIDIC_DictionaryReference_Part1"("kanji", "ref", "type"); +CREATE INDEX "KANJIDIC_DictionaryReference_byMoro" ON "_KANJIDIC_DictionaryReference_Moro"("kanji", "ref", "volume", "page"); + +CREATE VIEW "KANJIDIC_DictionaryReference" AS +SELECT "kanji", "ref", "type", NULL AS "volume", NULL AS "page" FROM "_KANJIDIC_DictionaryReference_Part1" +UNION +SELECT "kanji", "ref", 'moro' AS "type", "volume", "page" FROM "_KANJIDIC_DictionaryReference_Moro"; + +CREATE TABLE "KANJIDIC_QueryCode" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "code" VARCHAR(7) NOT NULL, + "type" VARCHAR(11) NOT NULL CHECK ("type" IN ('skip', 'sh_desc', 'four_corner', 'deroo', 'misclass')), + "SKIPMisclassification" VARCHAR(15), + PRIMARY KEY ("kanji", "type", "code") +) WITHOUT ROWID; + +CREATE TABLE "KANJIDIC_Reading" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "type" VARCHAR(8) NOT NULL CHECK ("type" IN ('korean_h', 'korean_r', 'pinyin')), + "reading" TEXT NOT NULL, + PRIMARY KEY ("kanji", "type", "reading") +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_Reading_byReading" ON "KANJIDIC_Reading"("reading"); + +CREATE TABLE "KANJIDIC_Kunyomi" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "yomi" TEXT NOT NULL, + "isJouyou" BOOLEAN, + PRIMARY KEY ("kanji", "yomi") +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_Kunyomi_byYomi" ON "KANJIDIC_Kunyomi"("yomi"); + +CREATE TABLE "KANJIDIC_Onyomi" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "yomi" TEXT NOT NULL, + "type" VARCHAR(7) CHECK ("type" IN ('kan', 'go', 'tou', 'kan''you')), + "isJouyou" BOOLEAN, + PRIMARY KEY ("kanji", "yomi") +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_Onyomi_byYomi" ON "KANJIDIC_Onyomi"("yomi"); + +CREATE TABLE "KANJIDIC_Meaning" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "language" CHAR(3) NOT NULL DEFAULT "eng", + "meaning" TEXT NOT NULL, + PRIMARY KEY ("kanji", "language", "meaning") +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_Meaning_byMeaning" ON "KANJIDIC_Meaning"("meaning"); + +CREATE TABLE "KANJIDIC_Nanori" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "nanori" TEXT NOT NULL, + PRIMARY KEY ("kanji", "nanori") +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_Nanori_byNanori" ON "KANJIDIC_Nanori"("nanori"); + +------------------------- +-- Interdict relations -- +------------------------- + +-- Radk - kanjidic +-- kanjireading -> filter kanji regex - kanjidic +-- index kanji search by romaji +-- index kanji search by hiragana +-- index word search by romaji +-- index word search by hiragana + diff --git a/migrations/0002_insert_info_values.sql b/migrations/0002_insert_info_values.sql new file mode 100644 index 0000000..36816be --- /dev/null +++ b/migrations/0002_insert_info_values.sql @@ -0,0 +1,251 @@ +INSERT INTO "JMdict_InfoDialect"("id", "description") VALUES + ('bra', 'Brazilian'), + ('hob', 'Hokkaido-ben'), + ('ksb', 'Kansai-ben'), + ('ktb', 'Kantou-ben'), + ('kyb', 'Kyoto-ben'), + ('kyu', 'Kyuushuu-ben'), + ('nab', 'Nagano-ben'), + ('osb', 'Osaka-ben'), + ('rkb', 'Ryuukyuu-ben'), + ('thb', 'Touhoku-ben'), + ('tsb', 'Tosa-ben'), + ('tsug', 'Tsugaru-ben'); + +INSERT INTO "JMdict_InfoField"("id", "description") VALUES + ('agric', 'agriculture'), + ('anat', 'anatomy'), + ('archeol', 'archeology'), + ('archit', 'architecture'), + ('art', 'art, aesthetics'), + ('astron', 'astronomy'), + ('audvid', 'audiovisual'), + ('aviat', 'aviation'), + ('baseb', 'baseball'), + ('biochem', 'biochemistry'), + ('biol', 'biology'), + ('bot', 'botany'), + ('Buddh', 'Buddhism'), + ('bus', 'business'), + ('chem', 'chemistry'), + ('Christn', 'Christianity'), + ('cloth', 'clothing'), + ('comp', 'computing'), + ('cryst', 'crystallography'), + ('ecol', 'ecology'), + ('econ', 'economics'), + ('elec', 'electricity, elec. eng.'), + ('electr', 'electronics'), + ('embryo', 'embryology'), + ('engr', 'engineering'), + ('ent', 'entomology'), + ('finc', 'finance'), + ('fish', 'fishing'), + ('food', 'food, cooking'), + ('gardn', 'gardening, horticulture'), + ('genet', 'genetics'), + ('geogr', 'geography'), + ('geol', 'geology'), + ('geom', 'geometry'), + ('go', 'go (game)'), + ('golf', 'golf'), + ('gramm', 'grammar'), + ('grmyth', 'Greek mythology'), + ('hanaf', 'hanafuda'), + ('horse', 'horse racing'), + ('law', 'law'), + ('ling', 'linguistics'), + ('logic', 'logic'), + ('MA', 'martial arts'), + ('mahj', 'mahjong'), + ('math', 'mathematics'), + ('mech', 'mechanical engineering'), + ('med', 'medicine'), + ('met', 'meteorology'), + ('mil', 'military'), + ('music', 'music'), + ('ornith', 'ornithology'), + ('paleo', 'paleontology'), + ('pathol', 'pathology'), + ('pharm', 'pharmacy'), + ('phil', 'philosophy'), + ('photo', 'photography'), + ('physics', 'physics'), + ('physiol', 'physiology'), + ('print', 'printing'), + ('psy', 'psychiatry'), + ('psych', 'psychology'), + ('rail', 'railway'), + ('Shinto', 'Shinto'), + ('shogi', 'shogi'), + ('sports', 'sports'), + ('stat', 'statistics'), + ('sumo', 'sumo'), + ('telec', 'telecommunications'), + ('tradem', 'trademark'), + ('vidg', 'video games'), + ('zool', 'zoology'); + +INSERT INTO "JMdict_InfoKanji"("id", "description") VALUES + ('ateji', 'ateji (phonetic) reading'), + ('ik', 'word containing irregular kana usage'), + ('iK', 'word containing irregular kanji usage'), + ('io', 'irregular okurigana usage'), + ('oK', 'word containing out-dated kanji or kanji usage'), + ('rK', 'rarely-used kanji form'); + +INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES + ('abbr', 'abbreviation'), + ('arch', 'archaism'), + ('char', 'character'), + ('chn', 'children''s language'), + ('col', 'colloquialism'), + ('company', 'company name'), + ('creat', 'creature'), + ('dated', 'dated term'), + ('dei', 'deity'), + ('derog', 'derogatory'), + ('doc', 'document'), + ('ev', 'event'), + ('fam', 'familiar language'), + ('fem', 'female term or language'), + ('fict', 'fiction'), + ('form', 'formal or literary term'), + ('given', 'given name or forename, gender not specified'), + ('group', 'group'), + ('hist', 'historical term'), + ('hon', 'honorific or respectful (sonkeigo) language'), + ('hum', 'humble (kenjougo) language'), + ('id', 'idiomatic expression'), + ('joc', 'jocular, humorous term'), + ('leg', 'legend'), + ('m-sl', 'manga slang'), + ('male', 'male term or language'), + ('myth', 'mythology'), + ('net-sl', 'Internet slang'), + ('obj', 'object'), + ('obs', 'obsolete term'), + ('obsc', 'obscure term'), + ('on-mim', 'onomatopoeic or mimetic word'), + ('organization', 'organization name'), + ('oth', 'other'), + ('person', 'full name of a particular person'), + ('place', 'place name'), + ('poet', 'poetical term'), + ('pol', 'polite (teineigo) language'), + ('product', 'product name'), + ('proverb', 'proverb'), + ('quote', 'quotation'), + ('rare', 'rare'), + ('relig', 'religion'), + ('sens', 'sensitive'), + ('serv', 'service'), + ('sl', 'slang'), + ('station', 'railway station'), + ('surname', 'family or surname'), + ('uk', 'word usually written using kana alone'), + ('unclass', 'unclassified name'), + ('vulg', 'vulgar expression or word'), + ('work', 'work of art, literature, music, etc. name'), + ('X', 'rude or X-rated term (not displayed in educational software)'), + ('yoji', 'yojijukugo'); + +INSERT INTO "JMdict_InfoPOS"("id", "description") VALUES + ('adj-f', 'noun or verb acting prenominally'), + ('adj-i', 'adjective (keiyoushi)'), + ('adj-ix', 'adjective (keiyoushi) - yoi/ii class'), + ('adj-kari', '''kari'' adjective (archaic)'), + ('adj-ku', '''ku'' adjective (archaic)'), + ('adj-na', 'adjectival nouns or quasi-adjectives (keiyodoshi)'), + ('adj-nari', 'archaic/formal form of na-adjective'), + ('adj-no', 'nouns which may take the genitive case particle ''no'''), + ('adj-pn', 'pre-noun adjectival (rentaishi)'), + ('adj-shiku', '''shiku'' adjective (archaic)'), + ('adj-t', '''taru'' adjective'), + ('adv', 'adverb (fukushi)'), + ('adv-to', 'adverb taking the ''to'' particle'), + ('aux', 'auxiliary'), + ('aux-adj', 'auxiliary adjective'), + ('aux-v', 'auxiliary verb'), + ('conj', 'conjunction'), + ('cop', 'copula'), + ('ctr', 'counter'), + ('exp', 'expressions (phrases, clauses, etc.)'), + ('int', 'interjection (kandoushi)'), + ('n', 'noun (common) (futsuumeishi)'), + ('n-adv', 'adverbial noun (fukushitekimeishi)'), + ('n-pr', 'proper noun'), + ('n-pref', 'noun, used as a prefix'), + ('n-suf', 'noun, used as a suffix'), + ('n-t', 'noun (temporal) (jisoumeishi)'), + ('num', 'numeric'), + ('pn', 'pronoun'), + ('pref', 'prefix'), + ('prt', 'particle'), + ('suf', 'suffix'), + ('unc', 'unclassified'), + ('v-unspec', 'verb unspecified'), + ('v1', 'Ichidan verb'), + ('v1-s', 'Ichidan verb - kureru special class'), + ('v2a-s', 'Nidan verb with ''u'' ending (archaic)'), + ('v2b-k', 'Nidan verb (upper class) with ''bu'' ending (archaic)'), + ('v2b-s', 'Nidan verb (lower class) with ''bu'' ending (archaic)'), + ('v2d-k', 'Nidan verb (upper class) with ''dzu'' ending (archaic)'), + ('v2d-s', 'Nidan verb (lower class) with ''dzu'' ending (archaic)'), + ('v2g-k', 'Nidan verb (upper class) with ''gu'' ending (archaic)'), + ('v2g-s', 'Nidan verb (lower class) with ''gu'' ending (archaic)'), + ('v2h-k', 'Nidan verb (upper class) with ''hu/fu'' ending (archaic)'), + ('v2h-s', 'Nidan verb (lower class) with ''hu/fu'' ending (archaic)'), + ('v2k-k', 'Nidan verb (upper class) with ''ku'' ending (archaic)'), + ('v2k-s', 'Nidan verb (lower class) with ''ku'' ending (archaic)'), + ('v2m-k', 'Nidan verb (upper class) with ''mu'' ending (archaic)'), + ('v2m-s', 'Nidan verb (lower class) with ''mu'' ending (archaic)'), + ('v2n-s', 'Nidan verb (lower class) with ''nu'' ending (archaic)'), + ('v2r-k', 'Nidan verb (upper class) with ''ru'' ending (archaic)'), + ('v2r-s', 'Nidan verb (lower class) with ''ru'' ending (archaic)'), + ('v2s-s', 'Nidan verb (lower class) with ''su'' ending (archaic)'), + ('v2t-k', 'Nidan verb (upper class) with ''tsu'' ending (archaic)'), + ('v2t-s', 'Nidan verb (lower class) with ''tsu'' ending (archaic)'), + ('v2w-s', 'Nidan verb (lower class) with ''u'' ending and ''we'' conjugation (archaic)'), + ('v2y-k', 'Nidan verb (upper class) with ''yu'' ending (archaic)'), + ('v2y-s', 'Nidan verb (lower class) with ''yu'' ending (archaic)'), + ('v2z-s', 'Nidan verb (lower class) with ''zu'' ending (archaic)'), + ('v4b', 'Yodan verb with ''bu'' ending (archaic)'), + ('v4g', 'Yodan verb with ''gu'' ending (archaic)'), + ('v4h', 'Yodan verb with ''hu/fu'' ending (archaic)'), + ('v4k', 'Yodan verb with ''ku'' ending (archaic)'), + ('v4m', 'Yodan verb with ''mu'' ending (archaic)'), + ('v4n', 'Yodan verb with ''nu'' ending (archaic)'), + ('v4r', 'Yodan verb with ''ru'' ending (archaic)'), + ('v4s', 'Yodan verb with ''su'' ending (archaic)'), + ('v4t', 'Yodan verb with ''tsu'' ending (archaic)'), + ('v5aru', 'Godan verb - -aru special class'), + ('v5b', 'Godan verb with ''bu'' ending'), + ('v5g', 'Godan verb with ''gu'' ending'), + ('v5k', 'Godan verb with ''ku'' ending'), + ('v5k-s', 'Godan verb - Iku/Yuku special class'), + ('v5m', 'Godan verb with ''mu'' ending'), + ('v5n', 'Godan verb with ''nu'' ending'), + ('v5r', 'Godan verb with ''ru'' ending'), + ('v5r-i', 'Godan verb with ''ru'' ending (irregular verb)'), + ('v5s', 'Godan verb with ''su'' ending'), + ('v5t', 'Godan verb with ''tsu'' ending'), + ('v5u', 'Godan verb with ''u'' ending'), + ('v5u-s', 'Godan verb with ''u'' ending (special class)'), + ('v5uru', 'Godan verb - Uru old class verb (old form of Eru)'), + ('vi', 'intransitive verb'), + ('vk', 'Kuru verb - special class'), + ('vn', 'irregular nu verb'), + ('vr', 'irregular ru verb, plain form ends with -ri'), + ('vs', 'noun or participle which takes the aux. verb suru'), + ('vs-c', 'su verb - precursor to the modern suru'), + ('vs-i', 'suru verb - included'), + ('vs-s', 'suru verb - special class'), + ('vt', 'transitive verb'), + ('vz', 'Ichidan verb - zuru verb (alternative form of -jiru verbs)'); + +INSERT INTO "JMdict_InfoReading"("id", "description") VALUES + ('gikun', 'gikun (meaning as reading) or jukujikun (special kanji reading)'), + ('ik', 'word containing irregular kana usage'), + ('ok', 'out-dated or obsolete kana usage'), + ('uK', 'word usually written using kanji alone'); diff --git a/pub2nix.lock b/pub2nix.lock new file mode 100644 index 0000000..8639414 --- /dev/null +++ b/pub2nix.lock @@ -0,0 +1,99 @@ +packages: + collection: + dependency: direct main + description: + name: collection + url: https://pub.dartlang.org + source: hosted + version: 1.16.0 + sha256: 0nx7mbxwxw5z4mdjr9z8hg6g8kgy3cv5pv5ax0j1i9kl36brk5rg + ffi: + dependency: transitive + description: + name: ffi + url: https://pub.dartlang.org + source: hosted + version: 1.1.2 + sha256: 0w0yd43y1fsfzihd4j6diymg90bgvyi2zqyb3vf0k6g8hk8x1yr6 + js: + dependency: transitive + description: + name: js + url: https://pub.dartlang.org + source: hosted + version: 0.6.4 + sha256: 01knzh9890ygxpy59rsh77h2ilh69wyl83idvrcvwzk8fknjldkb + lints: + dependency: direct dev + description: + name: lints + url: https://pub.dartlang.org + source: hosted + version: 1.0.1 + sha256: 1xyn9xpzwfw1f0mp03pyvspcphkinhzawkgp5lwmi7p15mv1vgz2 + meta: + dependency: transitive + description: + name: meta + url: https://pub.dartlang.org + source: hosted + version: 1.7.0 + sha256: 1z8sx23l9jn2ickq3z63pqpb6k9y6gbnnvj9200c6v7m3cvd7jbv + path: + dependency: transitive + description: + name: path + url: https://pub.dartlang.org + source: hosted + version: 1.8.1 + sha256: 0wg5da3zykfbala8mvcl7r7blgi5qjb838qhw30brgj3ani2hdph + petitparser: + dependency: transitive + description: + name: petitparser + url: https://pub.dartlang.org + source: hosted + version: 5.0.0 + sha256: 01rcmvk1znjykph6znkd3skvfn61lj54km4xw6vwa5iwwg84p5ph + sqflite_common: + dependency: transitive + description: + name: sqflite_common + url: https://pub.dartlang.org + source: hosted + version: 2.2.1+1 + sha256: 1i3fmvgj0f1ynf03rd1x9r0bmxly333jyi392ghh1ahm0lnj1kzq + sqflite_common_ffi: + dependency: direct main + description: + name: sqflite_common_ffi + url: https://pub.dartlang.org + source: hosted + version: 2.1.1 + sha256: 0v5xq9xpg63zidf8as17zh0pbhfzf9k00a2wn183jz9i5rmh6207 + sqlite3: + dependency: transitive + description: + name: sqlite3 + url: https://pub.dartlang.org + source: hosted + version: 1.7.0 + sha256: 1x56g99nw3jqvx1ysggpmmvb9gap63wdxk0pjawzv47xxm058rhm + synchronized: + dependency: transitive + description: + name: synchronized + url: https://pub.dartlang.org + source: hosted + version: 3.0.0+2 + sha256: 1j6108cq1hbcqpwhk9sah8q3gcidd7222bzhha2nk9syxhzqy82i + xml: + dependency: direct main + description: + name: xml + url: https://pub.dartlang.org + source: hosted + version: 6.0.1 + sha256: 158srisyld2zwhchcz264rf8qnby54xan4db14hf7lda7bx8ikbh +sdks: + dart: '>=2.16.1 <3.0.0' diff --git a/pubspec.lock b/pubspec.lock new file mode 100644 index 0000000..e2c787b --- /dev/null +++ b/pubspec.lock @@ -0,0 +1,89 @@ +# Generated by pub +# See https://dart.dev/tools/pub/glossary#lockfile +packages: + collection: + dependency: "direct main" + description: + name: collection + url: "https://pub.dartlang.org" + source: hosted + version: "1.16.0" + ffi: + dependency: transitive + description: + name: ffi + url: "https://pub.dartlang.org" + source: hosted + version: "1.1.2" + js: + dependency: transitive + description: + name: js + url: "https://pub.dartlang.org" + source: hosted + version: "0.6.4" + lints: + dependency: "direct dev" + description: + name: lints + url: "https://pub.dartlang.org" + source: hosted + version: "1.0.1" + meta: + dependency: transitive + description: + name: meta + url: "https://pub.dartlang.org" + source: hosted + version: "1.7.0" + path: + dependency: transitive + description: + name: path + url: "https://pub.dartlang.org" + source: hosted + version: "1.8.1" + petitparser: + dependency: transitive + description: + name: petitparser + url: "https://pub.dartlang.org" + source: hosted + version: "5.0.0" + sqflite_common: + dependency: transitive + description: + name: sqflite_common + url: "https://pub.dartlang.org" + source: hosted + version: "2.2.1+1" + sqflite_common_ffi: + dependency: "direct main" + description: + name: sqflite_common_ffi + url: "https://pub.dartlang.org" + source: hosted + version: "2.1.1" + sqlite3: + dependency: transitive + description: + name: sqlite3 + url: "https://pub.dartlang.org" + source: hosted + version: "1.7.0" + synchronized: + dependency: transitive + description: + name: synchronized + url: "https://pub.dartlang.org" + source: hosted + version: "3.0.0+2" + xml: + dependency: "direct main" + description: + name: xml + url: "https://pub.dartlang.org" + source: hosted + version: "6.0.1" +sdks: + dart: ">=2.16.1 <3.0.0" diff --git a/pubspec.yaml b/pubspec.yaml new file mode 100644 index 0000000..731d81d --- /dev/null +++ b/pubspec.yaml @@ -0,0 +1,21 @@ +name: ja_db +description: A SQLite database containing open source japanese language translation data +version: 1.0.0 +homepage: https://git.nani.wtf/h7x4/ja_db + +environment: + sdk: '>=2.16.1 <3.0.0' + +dependencies: + collection: ^1.16.0 + sqflite_common_ffi: ^2.1.1 + xml: ^6.0.1 + +dev_dependencies: + lints: ^1.0.0 + +executables: + ja_db: ja_db + +platforms: + linux: