From 5cf0b95d8b98f44d2229c1ebccd9729beff9be3c Mon Sep 17 00:00:00 2001 From: h7x4 Date: Mon, 20 Jun 2022 20:06:07 +0200 Subject: [PATCH] init commit --- .github/icon.png | Bin 0 -> 17539 bytes .gitignore | 13 + .sqlfluff | 8 + README.md | 5 + bin/common.dart | 0 bin/ja_db.dart | 16 + bin/jmdict/objects.dart | 235 ++++++++++ bin/jmdict/parser.dart | 346 ++++++++++++++ bin/kanjidic/objects.dart | 284 +++++++++++ bin/kanjidic/parser.dart | 231 +++++++++ bin/objects.dart | 5 + bin/radkfile/objects.dart | 13 + bin/radkfile/parser.dart | 32 ++ bin/romaji_transliteration.dart | 622 +++++++++++++++++++++++++ flake.lock | 122 +++++ flake.nix | 208 +++++++++ migrations/0001_initial.sql | 443 ++++++++++++++++++ migrations/0002_insert_info_values.sql | 251 ++++++++++ pub2nix.lock | 99 ++++ pubspec.lock | 89 ++++ pubspec.yaml | 21 + 21 files changed, 3043 insertions(+) create mode 100644 .github/icon.png create mode 100644 .gitignore create mode 100644 .sqlfluff create mode 100644 README.md create mode 100644 bin/common.dart create mode 100644 bin/ja_db.dart create mode 100644 bin/jmdict/objects.dart create mode 100644 bin/jmdict/parser.dart create mode 100644 bin/kanjidic/objects.dart create mode 100644 bin/kanjidic/parser.dart create mode 100644 bin/objects.dart create mode 100644 bin/radkfile/objects.dart create mode 100644 bin/radkfile/parser.dart create mode 100644 bin/romaji_transliteration.dart create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 migrations/0001_initial.sql create mode 100644 migrations/0002_insert_info_values.sql create mode 100644 pub2nix.lock create mode 100644 pubspec.lock create mode 100644 pubspec.yaml diff --git a/.github/icon.png b/.github/icon.png new file mode 100644 index 0000000000000000000000000000000000000000..15285d2b978e06275cc5af592c9f7cc13056ca05 GIT binary patch literal 17539 zcmb5Wby!qy6fU}lp{2WHP*763Lm32VgrU2U2I(3Eq@+YjNlazNbf_9l=lMY}d0k0_^3Eh7X7o;Qv=&(rgtky9560_muoSJ#h(1H^sQxx^X&6eqbOPT;o2bm9OBRV8+4u@FN zFy(mDs_jWv(&T{4v;=a5=!`tS)Pk6eEE}5W_&}gZPJ=BA)SU`2m)Qrcchs z%$wW3rL>?*(jEN)^lO&?g zwm9jnWfQ!OBIJS!guLLe8QDo(ob8e6>{!O!&JJ=C97TONe>GAgw*sZM(BJP@kBN7tV>UBs+ z29T-eG{*=e^k&GX%H44~qa!K?^0->4tm(8KSfu*)dqs<2;Lr5UK%{Fm9brm_qP~Xv z*Z3DN#6|B-+kgXrZTUOsX9n)4i{QHq9s3Pol2kC|NPakck@Gxw;7CJ5cg7pMidqZl z4#^}NJ}?ZGX{bMnWxh7)ZZrV!S5#J!o57(6x2ttq795|i%poFTV!xM9<>It_ygN}S zR|9zpznnF8;uIrr)$aP-p?Pg104^9kCQ(lAywjfLS8|8P7x&ca+AeFGS9qg0Q`m`8 zT7FEOMmr?X_bkGDfwKFu=n`sw1GX8+)fV;g=Fgj8nN%a(At^!y_smUgIKu#>i}7Ng z23!x^Y%&atr#kQXN{@qJ`Ez5QjR~uMzy7?Sg=+#2$?7&`zUE3COwCUAQ&PfkRnCtN z&-bwCSik))C$>UUo>R9bpkcSaABRj*@I8hU-(xY4 zmu**~i+Wkts@wn}J14q6G~WRZk)6{{ca%-vQ5EU)Ypmd|QO~^j@wHx|EJl$?*7j%b zE2YyeSa+?#A*!RCX+I|fE^$6x&?}{=ugr%O-AbEQ92Yv-ik&N&>f!nEYdlm-0 z`Eu$HJB7qx>-{O|W!jjEGIyKI+yZhaQ7JLcyNoAOjvE)gEF^ThD0&}#H05t9iz%O{ z^2_OvzN}=Zu815?j0}8{Qqm;|V+PNQM^h{8G&EN)NDTw*1)`t_r<3$c9 zA#^+r*#4PtwD8*H6N!W(Qd&`ehf=RWcJAB5h&Cp0SBdm+q|LITGVlx|S^JnbtSrXP zn7K}c_N@S($NJZThLaS2H&;&cJK#XvQ<|IQMO;5XzC*_CKmOQwQfFP^C_gZxudC}# zgyd43By#@|9v&pdAe`040zTT-*7Xt#@P^)bh&d{5Y;xzHyx`?M; z#6+ee{5S)D_XlQ32jd+vdlTvUPf00Jd3t?>Lb7r$Q*8GoI2qqIA^cFXRgRAqIkT7s z&Os>^3C(CM2dr<}cKG{iyLy`eKb~b+jDaC$MaJL zU*^90Z0j-Vy&MG&fV#N$H<(hiD$mFi%zw4H!_G5lJ65-A-embGr|mBIXl*xcA1e4L zX$}6^bB&QTs|rRZ=c9{@OD)L0!eCru{@>$;2K&(*{um5xhH41L&qtOph1AC5&A@&ReyRDi z>+pkl*;qO@rJu#m>fv_>leo(a^-BfJc0BG1ow**5MOK<(&3_x8_P_6^1H+B!;vlu@ zFW+I z7+CSrToNHqJct!--%Txr4b=C){#m8k3p}{`&5cnMtC?W z#9zde9u}QQdCu;K07w5(rCQNqIl2kTa&5|}RnqqLgK)2Y#WY32lO)`4%ys1J^b7sL zacw-{5@pMCfBx=y89znJawpG+3q7}Ay=q^}6xILy_SfXwl0fW`8pQ;6NoVEsn5D-+ z>Kq2%G>Vl=<#!RFJ0UbS3cOn%o%|LW8s?>xV7Z+J@Eb05JBpO&kkAa#;=DFKa;WV@ zpiq_omIQ3sI{1P8T=`IgO)v9C30aGnUKB+Z?lO(5Yl6qrJ!|A^LqBex=D@`aDE}); z;dPU>MM-N^KGZV0!tm*o&s_5sI8RURg1c5x&plivd2NyEpnd#fuzE>f_SVQ_gn}A;8Z7b)#O2 zv9?$NdhpS{Vcj#6_G>{wVZ>~|lSr;%ZgWlg##~Qd_{tT)N;pjOdlUYk0f$|%r`zwG z=L`S|scEWl2V2YKE7hI*1aqAlh-!SJ+SgkWv|i$z;6>VEX;S2{3|!OKIb2>|E`4!) zY4b+EvedHyzgHE82F(cxd+Sx|y}G}#D-{)gM`1|ghK7kW5k6W4aZYasy90$ax&*dtatngON_=WXtYTeC81GU8eaut6-8ac@ zjU>_>lY8hPj{F0mT{~y;h>P4>_gp?{+!?@;;JMp3gS9-^`r-nokw_;BFQ&2a$@>cv zKp4ue3QoD=I8#aV!s@YgqW2wSIdq2ybxp`b>z5Nf0|UdAzsGwsd6@?pL$205saj%v%q1W97NYF2--xbblb_q;#~qHn3%)7bp6j6m zw=JNFoy=q1ZL#9#Po@)#tumR}16*v7EyiU~M>4(id5doB$0A@1p83Coy?HaJj6~w#m zK?>)5^`1YJ(JzO!M(wq^?e=sM^ekH7Cy9*ty~A+&ot=;hrvgIKWEaIAqC19!me27w ze+sk^P>$OAO$+PJ&56g`<>J5phIdZOJLWtyuR&YXl0ZObngfJ$dlRs z>~$sLN@o4%Z@$j-CA&aC-qf?Fy1&8x`gZ5ahZ#>CNs(`#4mSlkIK(b6LRfJ zOhK9}f%r|z<%nZ5v!puxSZ)~w^Bi%3VknduSfqHf^UsAhwRVP>P=FyJln7lPRD+M> z5jgD0G|36h^xzX4PGwk1AO20Dwqi8b^~|>{p>s=Blc zLm39pmaPrmwhxE=@m#P_SZyYA-AeNuIKP)ILq{-YPPF_k_v(mYsLr_f#f$ddcQ9_| z?=PdH1;4YT&)k!XW zqs-<^%g?lhS}a4Ayt2Q8XcJ4K_r|R*$8&!Y6qJvbgKatY&B8Te6R+6USMKvHJ8BvD z54ORZT>jwqOkrEBxr^HLGYt`(br&vrvP2BhzTYexd~l#{PwyO6`Vwl}AvoUTI@d^q z&LMNnF8}^JC6z{C5p7SBk?Cqtp6+AY-RN%PN|sh7+llwXW_ZpU6|b~P%syLXu4QW# zqC6n%@@t=5LC{|}H-_5>`v>TLO7VC}5&orts6ej{*J|q{3i7Cj3az?jJ+fnJ9860` zXJa#!gE+D3>m*`)I3>~o-(3&$1dy!|gTWZS2eE;jmTRau1uY};>(D+7#@z=%kZ`GM(&}P+ z7UV#jkv1Yk6P{?9N=}~Uhy)a_-`tyFO2*pPPWiEt6@2#xSuFf!l#>I-{S$!Tc6poS z4vChbx|Zk+LbzdXa?&;b#g8b#^>86rKx&4-5*@R&uN;#4#x`0GlCr8d5Rj(5oq`;pf&?POCpq$(0JP(tk*%$z<>zm_rEs|3gWI;+ zS|<%7*y}c zr)FXUwB6^*-_H`Vb(QDqVps=WOB(4{vsikQfp`J{4%CaykYjt>OG>LI;J~K-m|U-Y z5CcZJ|L(0z7?ckZORFReSvQ7|XCpx*Sa+17Vz$tNRl#skh#HjioV#Ht&%ZscR_CbG zwH8nbOqOl_A(*>iFVDLLj=Z=1hCg>c)v^`pvTmRRn|wk8+u6xBT7$#a*o{@ALe|m@ zE}Wk~52d(xiOR!IiM^?Qa;dc1|7Obv7Q7SB0ftbS?X_VB8{mwE4OS<*E5Yg@ z&RM`_Sb=rBLdeL;p^>*;hJ#>={r_;J;E>vmTDxqJ;pi^1sfKfqw8d02uKnv5BO9!L z6y?T~bCiGn%G0sw?ZUR}dsEN$m+$fZAD;4mXbBge{QuYX?GZtRnEy|2{@){z$N#H; z|J#u?&ad3>pOO3J&{yDufdSh)UQD8Or4}+Vb7k0J#2XvDiHHmagZ*ze-5Y5tshK}N zD*1y}P>Y(}`UtBm;dCAz_}U@F(a;co8x5@Qs_L;r-i)m+T0!Kks4r18)Y=0ropN{p zH1fW!KSBI|UDPwCim9c!9l1i!Z}CU#g7DkFIXF{PKBi#M;9xQkk4E*~{+2}8As>P- z_CyoDnC$AftUgrzi^02Y^c~{%^z6d@yA(g~+p72p1vFxbAFF5XYx~)(T+)M2Ym*{7 z5Sm;bEG+xuiWSTVQ2A&0FM3U7vbs9D_;PE2^waiK~Lz6ucW-!cM8J zzHruA)&(K;7Z;(KuV5R29ZwgkF7s5YkQJL5z;*@6CcBrjSXO(`txLUas9Fw#n^-#G zYT+<^316*SFnGg6f80J`X2Xx)6O#ZA|4)1}OcXUe3kcEckrog-H@4Th%X3v*a$s2D)M;UE`T$pJqR?*Jg)VXot@4=yAM@YU<|72D}%SsWaa@8Spm|? z*|}GA9gCjMp9;0HhKu{dIibvEgi5)p`w2UUW+>pJ^AX<~fM5nRCHOB9IPh#nHukMm zZK~n!|1O`>rT8i93GZH=j~goX(M78CVXHKLuzH0uw|L*TQ;zbpPyI3%2*Q0UBpLM= zF#P|*aO4A!0+|gF5wp?_U12k=uU=gnnsho7ZF;E-$}aqP1|7QKLlWR1-W%}4#Yc#UXQ?MZS#UfAeUGCwQ^ZZu8!84eo9ZWn5rwcjUqd*!j!!}v(d=@> zE1X!(jRT&$JDNo#_F&7?V7TCr(DF#>2i*?L<&fQCrqslA~Tv4Nq3eS$|c!zeKTx8yg{>d(k^E|(Y?+gOY z`!IRFU)+VY{=IJV7zA(vqg6v!1^sp-gXO}ck)9CRdkzpT5Gn29xN`W8cMmZfi98W+8c}hLlO8p% zIC!*0!%OFId>TfL+91_9(EbZkx%=j|kFOk}&L>k<)Ck4Q{$Mm-(56_;B-@~S;H46v zi68tIl9TI^vvsG#G*O|{#lOWg7sW!W4N@o_>qog;j0`7s&Q!gNAS!Nu#@C*$8p2QX z6S6EN-yH#YY-eXIbuFNWC<0_yEd`4=_?#bbS@KSl)rU6URc86JDucMT|F%Z7;1W<`#yg}Ut>Ly5%COFaa? zUx9>XBIL1%IvBsT8R~N&eL!&82y{(NPCjoOWQnz_pXq=0V0)a8PgGHSWDCmwH86%r zG~oO&A+#zI@^~#^qIRdtC~pcM85gULl~V}uk+t$T&D ze96!61H`5HAO9HvHuzHkd>K6eBzHcW#rs4@qS<|@PUy-cb0jYg25nhEz;lbkV zmdo2%*cr{04zCo13D@;A(Y$(jOYBmgtD^h-mEei0AIJaUqW~f@lnMxlO_5t5r}Wtv zaXj}dXh_O~y*XiAs!}3B}Xul-Tib5{eSNu!~oM68+z?_qH2 zW*?B6WSp*lwN_&tcKuz0vx_ig1#5A4&bmKXN}c#K@Ifc_NGG*@a;v%rFUuGg#HDpSIlhzINs(M1~Z!}9F!7wz8 z>6w|?HRVZ_6`7=Kh}sam%V2zbdip*pyfGF?GKdYf1Obf2^t*7##< z%ou8r)vIOxqC$`qq*4(^Hi`I`%IV-R-T^F#cbo5X`7j;@oYX8ZCI?+QMt0-P4o9%jUQ>&u%lCnFGVWD&Mt%b>mu>T1!SX*kyffwL zArDOu4$x*Y1u&Zh)owuaj%sCk?0st6onMYkdSou$De!K zj%_azv8G~3b;p~J)^{q=`2e=$%f+{}k|13(7To|*8VuYMH9?ToNS)?w&J8Uu&fqHHNJ1L%>y?n`ny7S)o z;$pO-tSZz*L2Ch?Joe8%M6qYM%*@Prdd2tot88_aaP{Jjz=Hdw+O^PUQX`9q<5 zyc0aluxG@Uc`?Jf%ZE=pI_B#7=N%ybbT=fla2paX7yT^uz$Ao?cguVP5O?!2D@OfG z{FQ!!)BaKRq}@XbIZ}nsSn!0{Rx}KV#`as*)#BS)g-b0^pKOvMkj>s3n)@i!;+^G( zcQUtHcP}lCI6CcFYAaTN;OL+Xcf2OQ$06C@e2Mf@zXx|p|K7RvR;G=$2iX4R4holF zb68TUAYJ73sR9PngE)h?H5H5NVLgcVCoi)m~sn4_PSiU zowPIi(E=O5*)|J^(lZ_%uRx*)rogmC#Iv$1h5YWgz(sYS36St}95gJAU0bB>Pa1oB zM{U(oK4Y(ppJe=`pLqih2xXXHS3-ofbbhe1IBCz7veC-l^-hqy1Ca~C13-^F-zdOW zX7ebSN2}C+d2Ft$016%O>N@<8+SFi#3Vh$y@q=(-Tp6Sx!hi}Q$79_iSW~H>&Iy|N z-A7pA&9q*VAnSysgFUXEdjePLz0)r%3Q#x`R|#xKj_<~Q*C47%E_aa?W)m`>A~t|9 zh;nVawYpz6k|H)|k^HXhUCPI*C+J+gqz$u&@5&Aq^td0O-L;#`SqnY2w%0PW2656!e*IAPf#pf?tJ5DSZ^K&|>aO=L;f-v34&v(ZyMu!f0DTg1*Be z`g(mtc4j-4<27d%j)+Y zx_}8axh`x482bA6gX-3c{Rhm<0%=}R(RTd2UHmqpTNOr#`D(&I;K&>M@DvV`8dlX% z$R~LUNooacUG?B$jUs=qPpLYCRW~8yR?#~+m_D)qZ)Be-WhLsJMVIZ<#)&OyDhNn# z?;Sd5WyH!-at^P#A?Os7-dy-uJZ-RD4}G^d(_*e9_xk;AbZL*ls@(sh1z6NZ>26%Q zjE>5UHgp!*S%-3_E=&Vnwt-61<9u8+^2oUKZ&&m_GE%=O)tXUAkn2si~F8 zQ8acuR^q?ocpP+ke0+`U@QVI<(W*1pigTxbt;x+5Z$A%+pG>87O1zEB_Wv;?mosi#ZyI1(#>l! zoert(B|`!)szWe!uA!kZ!VJ6&z6yxv@`AUQZ+-A8-}r{D;wZuK3N-~Du z>*NI!9&RttQ%yX>DCANOI4Kc}}0vL0xERXc#K2{7k4Es>p}#56JfCgkPI2t+mQAQ}IYpUa60mPq+Buk*miIjTnehDjYxvbK^~n7Z>v5?%dZ z9_P$JvcnLENRG@pV9`kt&$nxQg&bbE7b`s~Zu`AJCTNRfiCahCBy3#3v(mXWpO5cg)eJ~(Z4)RGnNcS^3CKjZ7 z|F477yI>S+C9tqVusUTStLqMCr)grJkm_Ty?|4l=lYIb^#pP%l#b$L-i~)ol6pXXIrhcH;vpT~AXNL>Tc=gfqc2hJ4paS3_EAK=x&Z4>7~cDflxEi@ zlD_IQba%h8Y7CdToZoLCo4jNOLkJg_O6 zjd?v3#8fsXUt{`YN8jf-9N685uMY@k*~tj$9pb}#8TQ*x>;Cw;U!NTaLnbaho!v{* z&leo<1hzMS3DUBU)p@C8hml0Gv##G>wu*A`)^%Lymf!(tvgDaT`KnTt%|n0 z{^aNiF}(&YD)H%GFQ)J_3BuZ!*PrbEFl_@CSa3vs{LIM`cdaI6Zf;VH-Sq_`?6?MR zSkqQ$2~3+-|F|?YKW?MzxA}x~nErIG(F`ryVG6Zf6*wU}MW4xmnb4YZzo3uq`%Zsr zKfWpVQCL+|%y?e)+I7auv13K1e^kuTec-$VwP@Tc7y2jArnr86=%K>I(bc=afcC9c zyt;G0QH`RLXqEYr+{OZ^?JZ=FX}iZ@+48Zu$hrn_a{Qn=Jtv%m_9eXX%jd%;)2icyw1U%xD!e`S`em5I;zt057J(T$T-; z+R2-J`sjuaOqoNGtdv8pTU%R0HLtGg|J3xHY>KUOI$JK!dM)(FJZD69!t%96LA)2R zZFP@{En??>s9C=?(uK9oB;6vTIwxeWtvW+=TX&x;mG3%b&;~1q2tx_-k^%!ex7v~? zN}+~V%!<`t8H7`1D7@-quo1Eg|z- z5F`^~VuG#l9EKgzw|s9UfRIi~^bBSW`;0uuJQg>`&M4`k(FGHi|Bo|Ul^Of}Xta_% zGw5F5wHaGgWC@puIS-#wsX+b5HA#H;ZsjV)Uzw#OSERk19Yh>jX;=tutC z&_@XIvrlJnq-1IhGa0#>t4Z@Cgf?c}$BXaYy^98daoYlfR5()VWWV)+(h68aM%_yf zGoxtl7Tm$**fjUkN8EA>R1)(gYlnxJyh$r(XOKItHb(IP+wv7lIYQOWPf0~&(?nofUp{`0&sS`^V<(`Sih(!-6O+F({Zmhz?;^u{`(e4&HC2Q-~pz% zmUruWIa`$yxu*iNz}y*Ex{sAYx%~Uq){EySqtEV;rIZ94A4VpOj9ltJ=PE5Ldx}N3 z(o>MJpS@Jb;sdWMn93tOipB#HIdYO8on;736lifd^@$ z)<&JOBZX-ptg9XtC)Fw+b3Gpx$lb3J%s)Uz!&kpy@d_2B3_7iuI>e5SDg&mhgI|?6 z<2DxbtHuZ3njw2jG_SdkkbLd}7}VDB!dE_zJK*8>nVAtox;!6Z;>(u)X~JOAsP-Jg z{=G;S12Cy9i9Ci>-dL7*{WC}lByISV*KPdiKIt-jet)5Rc`r%w>DFP|;2!;*o)yki z)+XL9^0R{nRw+$(36d2#Tkb!8^)XVNStFGsCP5;{I5wn-N0}O=k{^xHo>9yG@-g5 z{y}OfDLH?3<{X>DdIMbjy0T+IL}+rQX(!DW8>6|}lN*oXY1REZBfkPywz-0P zHps}!*E3Cwe*(tJOLbI|22Fv~JBx6X-$R%$1(SwGEU2x}Byahi)VdVwX^il(Zkck0 zrM76nt3VBvD9!%mF)=aqk*dO<1chKCGDPL4>ZJ3-f-JoKLkx*+!g=!QQBwS%Af)2P zCQW-FMP-!&5Wvl{2^hLiERxAW1ac$?-_S3mn^|#?jU8R#C;NcO>U%4{7w@J zNE1MvxRlPD+lr**Hh6~_B<31&zvEXA!$b&AYA$hLexNsvTKe(pZmMNJp zH&=wKGnp(>A>gG3_??U0%jN!od-=9<^ zf8SaeFlbroW7?STmtmnLFN7^Y_mgyZM-v8C{;1wbBpw+_t^2a7^&wCk8S>bCzH6Ot z>l*aijd6cga&xr^Je=_3o`K1wV-YbHW6k3sAmEyQcCl0e%-_n{&t*lrhVk!8xPxrv z-pkD*#SGvD-EpI%V^!T(dLKG(IDbY^1ywPj4ko)}bMtiDbuP)kc&W-eXOH9k-mCV0 zFm18aS@q#3+N*5tO)!iDcGzAbC;Na~h&b{?nMxk8f4o~<9GNyIz|?xMNVI&MfAkv8 zy?%(fNKcPFUtHV+B+9Yl=1R~Gpj7BMZ8ihvn@^r!|F_7r6)0c)AQidM8&^li7*LXD zw&`Q?h9en*SSlts)uU&tPWipPohUh~^()a^-K{%PN^3Pj`Si01=8 zxb4ncDot4VV)wN}p73s66M*^9AD12>22=HfrgZ7QN>v6xhE4pT!b>_ar9#FK86_|h z=)T2{#vrKc7tTuRdsfzgsaq%9e*kI|zXr4|w0LuYZ0T>emG6~w0Rpj?STr7H zU(GB2<}{Rjiye~2uXO(E6aIPwu3kj!qQheVu8ftfMZ_zgb(lzqcO)YDf3qnM&` zftXEtcJ3e26G!^Cj12Z3*w5zKm!AL}%r_5^%78>(j4Wry*1PuL^C!Fde~m%%ckuM& z#t``|>)mDEjmqlVKe@SEGl6Y1L${UFrL|f5wi0PA2c1YEO!xpFyAtjORQ)5d$*m-w6Mb z0JyT%`CmH-4bidP%r*C*wL_?og)lK_#1oNCm7>JqH81%mXBPPLZn*$!tYl)(B;$Vu z!%cTQS|tCUFtR4N&4vO_)7GCu=ba;;{#Z6W5P8M*5O82)*!e}4_hdfhb(RcZN{ks- zE1f36?Xh7t@pxekZV%8A(4xl_^(u^Uc7Kblzm8m)?*$HG<4lDtjY@y6zYxu>0VW+1 zu(V=S9z+d%joD!C-ru*QI;@Lfx`P>ECu$P#WAyI$pXv8mVh}V`I6F!_%vR4k_|+az z`w}-=@Yu-4EqbsLu)e3u1zU=Do%!*wq1>4`C`R78@mhSWPE!#--xTtP7n5 zO9*R%^r+c3-%WYbj&yy$DZz3)JMW^^o3ZNKWN#IJZf8E^aT+xZvbn!q2|2^^&;&VC zwKDwDE+NBgdpxLh@wi`oto()*$-(~YF?I3j6NLP*j&h1JtUq|hCuar#D+CTs4rHo% zz!zg4vweK9N(YS$J}3}$k0VOp#?IU!xjDVd%=CuT{dbkrCZMAb`1%}fe#{-C&Rbd0 zCvF3?c%#lakf&z~%355dwrf%^3cq83lRH-A&TNPE6@vBFCXK=Rpd1Fj?jaG`tc`3! z!sV)-g9)l$kdA8ab2(@ETtxiWLPD}8P`u#A?*~P`dc)uYRK?}ItJ&$>Mtsf!cS%2} zqBcBe4`LS*``9Nlz$|}*2CD!*O$r#ju|sM<9c0iIfYra3GKuc)K5CED(z1DdUjY#ush8w%Wlbi7QdtVTsZooGi!QDfW4066^nEbEHM^{CP7~SRa1OKbAb8E}N z$9Gd(dPLV@IGxPDtThcRjV3d-K#6+>5gB1GnB3ps+gb>~eC8_9bKuA)ycAGlnPHPEc`<~W+%K^X zXPv%^GYGd>9B1#hCq;)*nmc%0_}Fxs@8(1JX@5yb`t8o^SE7s|%jjfUzt$MuQHG#wCsk6)#EWt>Rs^a4B z>;qd2-jiL>!lqL;9Mlan1?*l|IQdvPUQH;dDDg(g10L#6-C{QLy;%F_xf-v4SbkyZ~aYF+6=x z&d6r;3#n$PVcgG@TcRRs`1?3h^iQ2lj+7S{W7b9kz_dR1h9z0cgwMj|l&A>R)gdD^ zvRjtswA}*QOoEPl!y~;fK$@8`XxrI0Hj=IrGwu5XsSgy1=%PQv4lO}3c1mLd^>*_Z z@S&!h=gTs5x8my>$-&WuFb*9o@kp6mlKI{XYc?3B;6h`{^JcsF*;8D6oii7xhug5H zoWyNzz9O-XC3e07wei;SJE=Z`%XzqfHX(o#dKdm;pCAU`xd3*J;7B>WP25z(eFbYC zLa|eZR6m_@!I53|3Fn))M0$cfB$&{dRK|HY^8gJ2?71IOqSa|7=+OR?no+qDG4si+ z@;Abv@_t%BDVuhxTfVH`rEV$3j+e(p+v!^@(Jy>i?K@CkFHV`Swm}k&gG#49kU6v@ z!if^4V{kV}6ysjt1X#7EPxcI{(|FRq1hYH8#yWfa)D1Z(0&yGWOx>uWJpb#lw;-rs z9>fVs#9RLSHDME?g#9BS0xqwn#%fXchAH(GskgNn)(Gu+PySuaq6?*CwW3yHm{%!q z+S-3*%)9mLR&NbfLfwMU#>UnQAmsSo{Q*h3bkW2%=*|_dqHIG2&x((a#ZRKvzt6$+ z`Md@vp5AJhn!hkSTvrA`i{7AP119+l$uc@S4U+EL#|_@F2t4{eC?k;WqdMOxTtPUS zV#w*OG?^I+EZtx5zI*M~?a4pAFqUS9yqdaKY7m8UaaoeqGr>Z&>_qS3;G;2;^`F#q z_PyCHbqwlk6cAmyFaI1gz&V90xQ0}v6mILy8ZXieNVG9ofIVtceae&M*S-7PHLidh z>&i-t$eWZH*DycFyI#bqwk*2j=Q_ilpfJhn3$}`%t}Ye|&*nZa+=o-leR1i|)&^&p z%#%+jt64l;_7{)>S6BV9Iy-m7 z7-Rci5h}XDjnlv zxKCg+{HNS??zlP(Px+j&cjHY!xO}hl>)Ytf6~YRtA>*PvaW)`DuH^{4z(Bq>hpJd5L^qM`%wD@3pV(#F%Uf#M(n|Hb8^k{$6zare-<6M+>S zr9d4dg0O<=;vXJJTMt#=gu8GzkfAD-c*o-$MFM}^?(S(BJ(XLW3+ATBzN~qj0{+S> zaSb}KltSf8OO?-1_}pwop1S?3PO1Ay#pE$A^V6PoY{b(>h+!qF9NUl{v}d9@5idk2 z&V$`7m+!~CJ`Va@h%)G3Irz8M#=M#VqOh@BC)aZQ{kIvgjw}1?6mGmFEuGJ>gGETg zw#C43QqQ|(Kh;Re-94)L^Yl|pg&qb0tE+Q6T35COS=sN~9&SSC#5lo0f@;XHvdyws zP@kNg0MatxHXdUbm(g79hb2}0yR@}f{Si2~JD+Mt#rq%D^99v4>||wDv}F>Ekojt~ zs*V{#*nSxGxRd9gZm_8z;s8vKMd$t&{|@r?w`j4JzRd8%JI58!94>yA(S1~VX*VZ8tm;Yxewx)=P~X13xySH?=NBuC%H)togn3#E|(AFyPBKwS6sHl*1fUj zbidR;M(wZaNoKk}!xv?j;% z{yqqOxAGUn_Ke8u8Ro|&cxY}Nvo#1$ISREa(8oCNz1F7d&EAeIy@GXf7YABH-*rdw zR>WM-sGUjhzGi2rzHCCZ^ae|z!t%0f@K02yfzcTxVv{fighZmi^3R>ZJ7abFoIn1A zy&W<#p3IDgTm{p~cao!L1P9Lf+OX5!G`s$L;5_^YaDzj3FJm{eYSrz{K5P9Bo<3Ov zMr~>DZyF7CHOfEUg7AnZF<)-nqWrbi4)AWfB*FY=%N^44N#9@St^O@bJdz0-+&9WI zFdL=~PGdct4~UT%mBJwz<4nz`P2mx|2nv&ru@mw;_fmSlj92gYB5FM#?1!=mAcLQh z#!{oo>ulFvU*GoQ)wwKcqim47{YJ_&AIRtv%Gx6&)hsNXFVH%f8dF{Ju~6nbs?4+f zaR3j6F7t5^I%2pQ?Kh{ zwk(63sq)c-<;rbI&+mGvw$~q#`{wDe)-0N@PS_!d>bu&!H&|Y7ZnBncmi1gU1mC5Q zwCQ>gXz~SnP@AjWAe^*jMwy9jW=HtDF^s37dM|uRwt4~67s&7nGhu3de9B5{SrBL1 z5ip#o_VxTn?%qOmLxWWINZy0K+`>eN(0uCAXPeJXMmD>zHbu!|_tqt@IzS&2DnGQU zhV(~!Ss}MIS&`8XMJQcK3|ss-kF@lM_Ay{&I>K<|gk!EU?ThDZOWNI&>8K?Z6cy9{ zVUSCM=!zWFS-I!CKi@1pXy4nh7CAh11xtl5&C*M?a; z>t!yNKQ=YF?yTfxZjqnC2wL}ngd^NWmxE%h{JiL`z=!>cCfU2YEZ;zqIg2i~T{3@c z><>h)9Ja?DXcyzbN*8sDM5CW@h?j5v>kvDWP0^yZ>BqN7JOpb@K53B;T#$qymHocsES}O@DFh{34aR!*h06T3-CVjXUCE|sJWy# ziILq@TVX#G^vwud3kCkqy|%^YiCgPwur&Su~n zE#SM`RgDS+mb`CHLiRq}%qmIwrvOx|81|%-1^I>fNiLQTe92% z=)32~iv0>GFZT7Rv@$5d+Gd?4HWff4(o9JM5*1Y6xkH|r>3X|cSmbw?T=Is#=6io3 zf=rY8OXr%qAqz`PwGn5RCxREsmyd2#1KPE$R`rJgp+TLWv?8y@bJgYeVS7Y&#-(m# zDVI%SaFg7j%Kr!fUjlAU@w8S?Y=Kv4*#_4A>Z|d!U;=yfZ?`c7^AKBw1a0xYRW&?K zU!;}06OZ)`7x!}3@EDL|{9*2f_7xt~cB6+8Lk-BWpnWUD2bWi0!gwy5Tra)JEWUHh zfe1yDNfYQ=cM&`4;6gvlmuUW23W9bo*@j3@|uJiU1s84jJa_@JZG2CLXCFd zcAHZ=^Q?a6uGnReENajEWd9(4fg}*|k2_uTEccEjqP=QYFf{M2su3u1{E)X%iK-dd zjh~q8KTi=zRPgm1w1A|nJUO4^MY#+7W#=>K+1mE* zr#C!C_%X7fW=WT==-(ww#>4eFLH^K+QYWez2VF#6-+;POy$iKHJwy{a5B&SP{q47P z5njctkngr)PFyy38rLCXxfj;zmsbBP{l-9}*LOqYzGsS-f6aYpl>j8U<|l|*=+>h` zzffpeDg5O&vdM$gSj@1#fXk89@5tBmNn*`}IDj%9KtZX!jZD`H+3W`m<=E}l6VAQFTg6-*Ra_?0@fVkQ7p`0@tlhjSVllSZJt<95Wx!L z5$EUK`VFR~lxMzzH3o;VSav@2YKXlgU@x&`+|0!cy!gCT*?5hDt%K}5P=+O69OIei z(34(rzZ`jA=9g}XT;_=VrP9Y%xHvl(S_@Rj32c1i3F`*?k^w3Qu`&_XeW3(s0TG6d zSHdbDU@ZegXZg{CT;M!D?#fWl{k8Pykc%wX3Qmv_+b1oavQ1ow6fn<$!Nl9BWmMV4 zgOwEbdGVZ5y5gUI>+>f-Gn9fgILh$V@4<5a91F^~^0DrlfE8tME2>zT1k+ugmcw`Q zLDlCLEGQPZ3?Ny0qWF}J>pYB{vEtF8XSNjh;Ud6OB~5tMW6S@sCIEjJdhP%K literal 0 HcmV?d00001 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d90690d --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +# Temporary storage for data ingestion files +/data + +# Files and directories created by pub. +.dart_tool/ +.packages + +# Conventional directory for build output. +/build/ +main.db + +# Nix +/result diff --git a/.sqlfluff b/.sqlfluff new file mode 100644 index 0000000..261dfcc --- /dev/null +++ b/.sqlfluff @@ -0,0 +1,8 @@ +[sqlfluff] +dialect = sqlite +exclude_rules = L003 + +[sqlfluff:rules] +tab_space_size = 2 +max_line_length = 80 +indent_unit = space \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..1cbd099 --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +# ja_db + +[![built with nix](https://builtwithnix.org/badge.svg)](https://builtwithnix.org) + + diff --git a/bin/common.dart b/bin/common.dart new file mode 100644 index 0000000..e69de29 diff --git a/bin/ja_db.dart b/bin/ja_db.dart new file mode 100644 index 0000000..c4d121f --- /dev/null +++ b/bin/ja_db.dart @@ -0,0 +1,16 @@ +import 'dart:io'; + +import 'package:sqflite_common_ffi/sqflite_ffi.dart'; + +import 'jmdict/parser.dart'; +import 'kanjidic/parser.dart'; +import 'radkfile/parser.dart'; + +Future main(List arguments) async { + final db = await databaseFactoryFfi + .openDatabase(Directory.current.uri.resolve('main.db').path); + await addDataFromJMdict(db); + await addDataFromRADKFILE(db); + await addDataFromKANJIDIC(db); +} + diff --git a/bin/jmdict/objects.dart b/bin/jmdict/objects.dart new file mode 100644 index 0000000..8c28200 --- /dev/null +++ b/bin/jmdict/objects.dart @@ -0,0 +1,235 @@ +import '../common.dart'; +import '../objects.dart'; + +class TableNames { + static const String entry = 'JMdict_Entry'; + static const String entryByKana = 'JMdict_EntryByKana'; + static const String entryByEnglish = 'JMdict_EntryByEnglish'; + static const String kanjiElement = 'JMdict_KanjiElement'; + static const String kanjiInfo = 'JMdict_KanjiElementInfo'; + static const String readingElement = 'JMdict_ReadingElement'; + static const String readingInfo = 'JMdict_ReadingElementInfo'; + static const String readingRestriction = 'JMdict_ReadingElementRestriction'; + static const String sense = 'JMdict_Sense'; + static const String senseAntonyms = 'JMdict_SenseAntonym'; + static const String senseDialect = 'JMdict_SenseDialect'; + static const String senseField = 'JMdict_SenseField'; + static const String senseGlossary = 'JMdict_SenseGlossary'; + static const String senseInfo = 'JMdict_SenseInfo'; + static const String senseLanguageSource = 'JMdict_SenseLanguageSource'; + static const String senseMisc = 'JMdict_SenseMisc'; + static const String sensePOS = 'JMdict_SensePOS'; + static const String senseRestrictedToKanji = 'JMdict_SenseRestrictedToKanji'; + static const String senseRestrictedToReading = 'JMdict_SenseRestrictedToReading'; + static const String senseSeeAlso = 'JMdict_SenseSeeAlso'; +} + +abstract class Element extends SQLWritable { + final String reading; + final int? news; + final int? ichi; + final int? spec; + final int? gai; + final int? nf; + const Element({ + required this.reading, + this.news, + this.ichi, + this.spec, + this.gai, + this.nf, + }); + + Map get sqlValue => { + 'reading': reading, + 'news': news, + 'ichi': ichi, + 'spec': spec, + 'gai': gai, + 'nf': nf, + }; +} + +class KanjiElement extends Element { + List info; + + KanjiElement({ + this.info = const [], + required String reading, + int? news, + int? ichi, + int? spec, + int? gai, + int? nf, + }) : super( + reading: reading, + news: news, + ichi: ichi, + spec: spec, + gai: gai, + nf: nf, + ); +} + +class ReadingElement extends Element { + List info; + List restrictions; + + ReadingElement({ + this.info = const [], + this.restrictions = const [], + required String reading, + int? news, + int? ichi, + int? spec, + int? gai, + int? nf, + }) : super( + reading: reading, + news: news, + ichi: ichi, + spec: spec, + gai: gai, + nf: nf, + ); +} + +class LanguageSource extends SQLWritable { + final String language; + final String? phrase; + final bool fullyDescribesSense; + final bool constructedFromSmallerWords; + + const LanguageSource({ + required this.language, + this.phrase, + this.fullyDescribesSense = true, + this.constructedFromSmallerWords = false, + }); + + @override + Map get sqlValue => { + 'language': language, + 'phrase': phrase, + 'fullyDescribesSense': fullyDescribesSense, + 'constructedFromSmallerWords': constructedFromSmallerWords, + }; +} + +class Glossary extends SQLWritable { + final String language; + final String phrase; + final String? type; + + const Glossary({ + required this.language, + required this.phrase, + this.type, + }); + + Map get sqlValue => { + 'language': language, + 'phrase': phrase, + 'type': type, + }; +} + +final kanaRegex = + RegExp(r'^[\p{Script=Katakana}\p{Script=Hiragana}ー]+$', unicode: true); + +class XRefParts { + final String? kanjiRef; + final String? readingRef; + final int? senseNum; + + const XRefParts({ + this.kanjiRef, + this.readingRef, + this.senseNum, + }) : assert(kanjiRef != null || readingRef != null); + + factory XRefParts.fromString(String s) { + final parts = s.split('・'); + if (parts.length == 1) { + if (parts[0].contains(kanaRegex)) { + return XRefParts(readingRef: parts[0]); + } + return XRefParts(kanjiRef: parts[0]); + } else if (parts.length == 2) { + if (int.tryParse(parts[1]) != null) { + if (parts[0].contains(kanaRegex)) { + return XRefParts(readingRef: parts[0], senseNum: int.parse(parts[1])); + } + return XRefParts(kanjiRef: parts[0], senseNum: int.parse(parts[1])); + } + return XRefParts(kanjiRef: parts[0], readingRef: parts[1]); + } else if (parts.length == 3) { + return XRefParts( + kanjiRef: parts[0], + readingRef: parts[1], + senseNum: int.parse(parts[2]), + ); + } + + return XRefParts(); + } +} + +class XRef { + final String entryId; + final String reading; + + const XRef({ + required this.entryId, + required this.reading, + }); +} + +class Sense extends SQLWritable { + final int id; + final List antonyms; + final List dialects; + final List fields; + final List info; + final List languageSource; + final List glossary; + final List misc; + final List pos; + final List restrictedToKanji; + final List restrictedToReading; + final List seeAlso; + + const Sense({ + required this.id, + this.antonyms = const [], + this.dialects = const [], + this.fields = const [], + this.info = const [], + this.languageSource = const [], + this.glossary = const [], + this.misc = const [], + this.pos = const [], + this.restrictedToKanji = const [], + this.restrictedToReading = const [], + this.seeAlso = const [], + }); + + @override + Map get sqlValue => {}; +} + +class Entry extends SQLWritable { + final int id; + final List kanji; + final List readings; + final List senses; + + const Entry({ + required this.id, + required this.kanji, + required this.readings, + required this.senses, + }); + + Map get sqlValue => {'id': id}; +} diff --git a/bin/jmdict/parser.dart b/bin/jmdict/parser.dart new file mode 100644 index 0000000..374b875 --- /dev/null +++ b/bin/jmdict/parser.dart @@ -0,0 +1,346 @@ +import 'dart:collection'; +import 'dart:io'; + +import 'package:sqflite_common/sqlite_api.dart'; +import 'package:xml/xml.dart'; + +import '../romaji_transliteration.dart'; +import 'objects.dart'; + +List getPriNums(XmlElement e, String prefix) { + int? news, ichi, spec, gai, nf; + for (final pri in e.findElements('${prefix}_pri')) { + final txt = pri.innerText; + if (txt.startsWith('news')) + news = int.parse(txt.substring(4)); + else if (txt.startsWith('ichi')) + ichi = int.parse(txt.substring(4)); + else if (txt.startsWith('spec')) + spec = int.parse(txt.substring(4)); + else if (txt.startsWith('gai')) + gai = int.parse(txt.substring(3)); + else if (txt.startsWith('nf')) nf = int.parse(txt.substring(2)); + } + return [news, ichi, spec, gai, nf]; +} + +List transformXML(XmlElement root) { + final List entries = []; + + int senseId = 0; + for (final entry in root.childElements) { + final entryId = int.parse(entry.findElements('ent_seq').first.innerText); + + final List kanjiEls = []; + final List readingEls = []; + final List senses = []; + + for (final k_ele in entry.findAllElements('k_ele')) { + final ke_pri = getPriNums(k_ele, 'ke'); + kanjiEls.add( + KanjiElement( + info: k_ele.findElements('ke_inf').map((e) => e.innerText).toList(), + reading: k_ele.findElements('keb').first.innerText, + news: ke_pri[0], + ichi: ke_pri[1], + spec: ke_pri[2], + gai: ke_pri[3], + nf: ke_pri[4], + ), + ); + } + + for (final r_ele in entry.findAllElements('r_ele')) { + final re_pri = getPriNums(r_ele, 're'); + readingEls.add( + ReadingElement( + info: r_ele + .findElements('re_inf') + .map((e) => e.innerText.substring(1, e.innerText.length - 1)) + .toList(), + restrictions: + r_ele.findElements('re_restr').map((e) => e.innerText).toList(), + reading: r_ele.findElements('reb').first.innerText, + news: re_pri[0], + ichi: re_pri[1], + spec: re_pri[2], + gai: re_pri[3], + nf: re_pri[4], + ), + ); + } + + for (final sense in entry.findAllElements('sense')) { + senseId++; + senses.add( + Sense( + id: senseId, + restrictedToKanji: + sense.findElements('stagk').map((e) => e.innerText).toList(), + restrictedToReading: + sense.findElements('stagr').map((e) => e.innerText).toList(), + pos: sense + .findElements('pos') + .map((e) => e.innerText.substring(1, e.innerText.length - 1)) + .toList(), + misc: sense + .findElements('misc') + .map((e) => e.innerText.substring(1, e.innerText.length - 1)) + .toList(), + dialects: sense + .findElements('dial') + .map((e) => e.innerText.substring(1, e.innerText.length - 1)) + .toList(), + info: sense.findElements('s_inf').map((e) => e.innerText).toList(), + languageSource: sense + .findElements('lsource') + .map( + (e) => LanguageSource( + language: e.getAttribute('xml:lang') ?? 'eng', + fullyDescribesSense: e.getAttribute('ls_type') == 'part', + constructedFromSmallerWords: + e.getAttribute('ls_wasei') == 'y', + ), + ) + .toList(), + glossary: sense + .findElements('gloss') + .map( + (e) => Glossary( + language: e.getAttribute('xml:lang') ?? 'eng', + phrase: e.innerText, + type: e.getAttribute('g_type'), + ), + ) + .toList(), + antonyms: sense + .findElements('ant') + .map((e) => XRefParts.fromString(e.innerText)) + .toList(), + seeAlso: sense + .findElements('xref') + .map((e) => XRefParts.fromString(e.innerText)) + .toList(), + ), + ); + } + + entries.add( + Entry( + id: entryId, + kanji: kanjiEls, + readings: readingEls, + senses: senses, + ), + ); + } + + return entries; +} + +Future insertIntoDB(List entries, Database db) async { + print(' [JMdict] Batch 1'); + Batch b = db.batch(); + for (final e in entries) { + b.insert(TableNames.entry, e.sqlValue); + for (final k in e.kanji) { + b.insert(TableNames.kanjiElement, k.sqlValue..addAll({'entryId': e.id})); + // b.insert( + // TableNames.entryByKana, + // {'entryId': e.id, 'kana': transliterateKatakanaToHiragana(k.reading)}, + // // Some entries have the same reading twice with difference in katakana and hiragana + // conflictAlgorithm: ConflictAlgorithm.ignore, + // ); + for (final i in k.info) { + b.insert( + TableNames.kanjiInfo, + {'entryId': e.id, 'reading': k.reading, 'info': i}, + ); + } + } + for (final r in e.readings) { + b.insert( + TableNames.readingElement, + r.sqlValue..addAll({'entryId': e.id}), + ); + + b.insert( + TableNames.entryByKana, + {'entryId': e.id, 'kana': transliterateKanaToLatin(r.reading)}, + // Some entries have the same reading twice with difference in katakana and hiragana + conflictAlgorithm: ConflictAlgorithm.ignore, + ); + for (final i in r.info) { + b.insert( + TableNames.readingInfo, + {'entryId': e.id, 'reading': r.reading, 'info': i}, + ); + } + for (final res in r.restrictions) { + b.insert( + TableNames.readingRestriction, + {'entryId': e.id, 'reading': r.reading, 'restriction': res}, + ); + } + } + + for (final s in e.senses) { + for (final g in s.glossary) { + if (g.language == "eng") + b.insert( + TableNames.entryByEnglish, + {'entryId': e.id, 'english': g.phrase}, + // Some entries have the same reading twice with difference in katakana and hiragana + conflictAlgorithm: ConflictAlgorithm.ignore, + ); + } + } + } + + await b.commit(); + + print(' [JMdict] Building trees'); + SplayTreeMap> entriesByKanji = SplayTreeMap(); + for (final entry in entries) { + for (final kanji in entry.kanji) { + if (entriesByKanji.containsKey(kanji.reading)) { + entriesByKanji.update(kanji.reading, (list) => list..add(entry)); + } else { + entriesByKanji.putIfAbsent(kanji.reading, () => {entry}); + } + } + } + SplayTreeMap> entriesByReading = SplayTreeMap(); + for (final entry in entries) { + for (final reading in entry.readings) { + if (entriesByReading.containsKey(reading.reading)) { + entriesByReading.update(reading.reading, (list) => list..add(entry)); + } else { + entriesByReading.putIfAbsent(reading.reading, () => {entry}); + } + } + } + + print(' [JMdict] Batch 2'); + b = db.batch(); + + for (final e in entries) { + for (final s in e.senses) { + b.insert( + TableNames.sense, s.sqlValue..addAll({'id': s.id, 'entryId': e.id})); + + for (final d in s.dialects) { + b.insert(TableNames.senseDialect, {'senseId': s.id, 'dialect': d}); + } + for (final f in s.fields) { + b.insert(TableNames.senseField, {'senseId': s.id, 'field': f}); + } + for (final i in s.info) { + b.insert(TableNames.senseInfo, {'senseId': s.id, 'info': i}); + } + for (final m in s.misc) { + b.insert(TableNames.senseMisc, {'senseId': s.id, 'misc': m}); + } + for (final p in s.pos) { + b.insert(TableNames.sensePOS, {'senseId': s.id, 'pos': p}); + } + for (final l in s.languageSource) { + b.insert( + TableNames.senseLanguageSource, + l.sqlValue..addAll({'senseId': s.id}), + ); + } + for (final rk in s.restrictedToKanji) { + b.insert( + TableNames.senseRestrictedToKanji, + {'entryId': e.id, 'senseId': s.id, 'kanji': rk}, + ); + } + for (final rr in s.restrictedToReading) { + b.insert( + TableNames.senseRestrictedToReading, + {'entryId': e.id, 'senseId': s.id, 'reading': rr}, + ); + } + for (final ls in s.languageSource) { + b.insert( + TableNames.senseLanguageSource, + ls.sqlValue..addAll({'senseId': s.id}), + ); + } + for (final g in s.glossary) { + if (g.language == 'eng') + b.insert( + TableNames.senseGlossary, + g.sqlValue..addAll({'senseId': s.id}), + // There are some duplicate glossary, especially in + // the other languages. + conflictAlgorithm: ConflictAlgorithm.ignore, + ); + } + + for (final xref in s.seeAlso) { + final Set entries; + if (xref.kanjiRef != null && xref.readingRef != null) { + entries = entriesByKanji[xref.kanjiRef]! + .difference(entriesByReading[xref.readingRef]!); + } else if (xref.kanjiRef != null) { + entries = entriesByKanji[xref.kanjiRef]!; + } else { + entries = entriesByReading[xref.readingRef]!; + } + for (final ex in entries) + if (!(xref.senseNum != null && xref.senseNum! > ex.senses.length)) { + b.insert( + TableNames.senseSeeAlso, + { + 'senseId': s.id, + 'xrefEntryId': ex.id, + 'seeAlsoKanji': xref.kanjiRef, + 'seeAlsoReading': xref.readingRef, + 'seeAlsoSense': xref.senseNum, + }, + ); + } + } + for (final ant in s.antonyms) { + final Set entries; + if (ant.kanjiRef != null && ant.readingRef != null) { + entries = entriesByKanji[ant.kanjiRef]! + .difference(entriesByReading[ant.readingRef]!); + } else if (ant.kanjiRef != null) { + entries = entriesByKanji[ant.kanjiRef]!; + } else { + entries = entriesByReading[ant.readingRef]!; + } + for (final ex in entries) { + if (!(ant.senseNum != null && ant.senseNum! > ex.senses.length)) { + b.insert(TableNames.senseAntonyms, { + 'senseId': s.id, + 'xrefEntryId': ex.id, + 'antonymKanji': ant.kanjiRef, + 'antonymReading': ant.readingRef, + 'antonymSense': ant.senseNum, + }); + } + } + } + } + } + + await b.commit(); +} + +Future addDataFromJMdict(Database db) async { + print('[JMdict] Reading file...'); + String rawXML = File('data/JMdict.xml').readAsStringSync(); + + print('[JMdict] Parsing XML...'); + XmlElement root = XmlDocument.parse(rawXML).getElement('JMdict')!; + + print('[JMdict] Transforming data...'); + final entries = transformXML(root); + + print('[JMdict] Writing to database...'); + await insertIntoDB(entries, db); +} diff --git a/bin/kanjidic/objects.dart b/bin/kanjidic/objects.dart new file mode 100644 index 0000000..c619625 --- /dev/null +++ b/bin/kanjidic/objects.dart @@ -0,0 +1,284 @@ +import '../objects.dart'; + +class TableNames { + static const String character = 'KANJIDIC_Character'; + static const String radicalName = 'KANJIDIC_RadicalName'; + static const String codepoint = 'KANJIDIC_Codepoint'; + static const String radical = 'KANJIDIC_Radical'; + static const String strokeMiscount = 'KANJIDIC_StrokeMiscount'; + static const String variant = 'KANJIDIC_Variant'; + static const String dictionaryReference = '_KANJIDIC_DictionaryReference_Part1'; + static const String dictionaryReferenceMoro = '_KANJIDIC_DictionaryReference_Moro'; + static const String queryCode = 'KANJIDIC_QueryCode'; + static const String reading = 'KANJIDIC_Reading'; + static const String kunyomi = 'KANJIDIC_Kunyomi'; + static const String onyomi = 'KANJIDIC_Onyomi'; + static const String meaning = 'KANJIDIC_Meaning'; + static const String nanori = 'KANJIDIC_Nanori'; +} + +class CodePoint extends SQLWritable { + final String kanji; + final String type; + final String codepoint; + + const CodePoint({ + required this.kanji, + required this.type, + required this.codepoint, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'type': type, + 'codepoint': codepoint, + }; +} + +class Radical extends SQLWritable { + final String kanji; + final String type; + final String radical; + + const Radical({ + required this.kanji, + required this.type, + required this.radical, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'type': type, + 'radical': radical, + }; +} + +class StrokeMiscount extends SQLWritable { + final String kanji; + final int strokeCount; + + const StrokeMiscount({ + required this.kanji, + required this.strokeCount, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'strokeCount': strokeCount, + }; +} + +class Variant extends SQLWritable { + final String kanji; + final String type; + final String variant; + + const Variant({ + required this.kanji, + required this.type, + required this.variant, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'type': type, + 'variant': variant, + }; +} + +class DictionaryReference extends SQLWritable { + final String kanji; + final String type; + final String ref; + + const DictionaryReference({ + required this.kanji, + required this.type, + required this.ref, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'type': type, + 'ref': ref, + }; +} + +class DictionaryReferenceMoro extends SQLWritable { + final String kanji; + final String ref; + final int? volume; + final int? page; + + const DictionaryReferenceMoro({ + required this.kanji, + required this.ref, + required this.volume, + required this.page, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'ref': ref, + 'volume': volume, + 'page': page, + }; +} + +class QueryCode extends SQLWritable { + final String kanji; + final String code; + final String type; + final String? skipMisclassification; + + const QueryCode({ + required this.kanji, + required this.code, + required this.type, + required this.skipMisclassification, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'code': code, + 'type': type, + 'skipMisclassification': skipMisclassification, + }; +} + +class Reading extends SQLWritable { + final String kanji; + final String type; + final String reading; + + const Reading({ + required this.kanji, + required this.type, + required this.reading, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'type': type, + 'reading': reading, + }; +} + +class Kunyomi extends SQLWritable { + final String kanji; + final String yomi; + final bool isJouyou; + + const Kunyomi({ + required this.kanji, + required this.yomi, + required this.isJouyou, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'yomi': yomi, + 'isJouyou': isJouyou, + }; +} + +class Onyomi extends SQLWritable { + final String kanji; + final String yomi; + final bool isJouyou; + final String? type; + + const Onyomi({ + required this.kanji, + required this.yomi, + required this.isJouyou, + required this.type, + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'yomi': yomi, + 'isJouyou': isJouyou, + 'type': type, + }; +} + +class Meaning extends SQLWritable { + final String kanji; + final String language; + final String meaning; + + const Meaning({ + required this.kanji, + required this.language, + this.meaning = 'eng', + }); + + @override + Map get sqlValue => { + 'kanji': kanji, + 'language': language, + 'meaning': meaning, + }; +} + +class Character extends SQLWritable { + final String literal; + final int strokeCount; + final int? grade; + final int? frequency; + final int? jlpt; + + final List radicalName; + final List codepoints; + final List radicals; + final List strokeMiscounts; + final List variants; + final List dictionaryReferences; + final List dictionaryReferencesMoro; + final List querycodes; + final List readings; + final List onyomi; + final List kunyomi; + final List meanings; + final List nanori; + + const Character({ + required this.literal, + required this.strokeCount, + this.grade, + this.frequency, + this.jlpt, + this.radicalName = const [], + this.codepoints = const [], + this.radicals = const [], + this.strokeMiscounts = const [], + this.variants = const [], + this.dictionaryReferences = const [], + this.dictionaryReferencesMoro = const [], + this.querycodes = const [], + this.readings = const [], + this.onyomi = const [], + this.kunyomi = const [], + this.meanings = const [], + this.nanori = const [], + }); + + Map get sqlValue => { + 'literal': literal, + 'grade': grade, + 'strokeCount': strokeCount, + 'frequency': frequency, + 'jlpt': jlpt, + }; +} diff --git a/bin/kanjidic/parser.dart b/bin/kanjidic/parser.dart new file mode 100644 index 0000000..171d3f6 --- /dev/null +++ b/bin/kanjidic/parser.dart @@ -0,0 +1,231 @@ +import 'dart:io'; + +import 'package:sqflite_common/sqlite_api.dart'; +import 'package:xml/xml.dart'; +import 'package:collection/collection.dart'; + +import 'objects.dart'; + +List transformXML(XmlElement root) { + final List result = []; + for (final c in root.findElements('character')) { + final kanji = c.findElements('literal').first.innerText; + result.add( + Character( + literal: kanji, + strokeCount: + int.parse(c.findAllElements('stroke_count').first.innerText), + grade: + int.tryParse(c.findElements('grade').firstOrNull?.innerText ?? ''), + frequency: + int.tryParse(c.findElements('freq').firstOrNull?.innerText ?? ''), + jlpt: int.tryParse( + c.findElements('rad_name').firstOrNull?.innerText ?? '', + ), + radicalName: + c.findElements('rad_name').map((e) => e.innerText).toList(), + codepoints: c + .findAllElements('cp_value') + .map( + (e) => CodePoint( + kanji: kanji, + type: e.getAttribute('cp_type')!, + codepoint: e.innerText, + ), + ) + .toList(), + radicals: c + .findAllElements('rad_value') + .map( + (e) => Radical( + kanji: kanji, + type: e.getAttribute('rad_type')!, + radical: e.innerText, + ), + ) + .toList(), + strokeMiscounts: c + .findAllElements('stroke_count') + .skip(1) + .map((e) => int.parse(e.innerText)) + .toList(), + variants: c + .findAllElements('variant') + .map( + (e) => Variant( + kanji: kanji, + type: e.getAttribute('var_type')!, + variant: e.innerText, + ), + ) + .toList(), + dictionaryReferences: c + .findAllElements('dic_ref') + .where((e) => e.getAttribute('dr_type') != 'moro') + .map( + (e) => DictionaryReference( + kanji: kanji, + type: e.getAttribute('dr_type')!, + ref: e.innerText, + ), + ) + .toList(), + dictionaryReferencesMoro: c + .findAllElements('dic_ref') + .where((e) => e.getAttribute('dr_type') == 'moro') + .map( + (e) => DictionaryReferenceMoro( + kanji: kanji, + ref: e.innerText, + page: int.tryParse(e.getAttribute('m_page') ?? ''), + volume: int.tryParse(e.getAttribute('m_vol') ?? ''), + ), + ) + .toList(), + querycodes: c + .findAllElements('q_code') + .map( + (e) => QueryCode( + kanji: kanji, + code: e.innerText, + type: e.getAttribute('qc_type')!, + skipMisclassification: e.getAttribute('skip_misclass'), + ), + ) + .toList(), + readings: c + .findAllElements('reading') + .where( + (e) => !['ja_on', 'ja_kun'].contains(e.getAttribute('r_type')), + ) + .map( + (e) => Reading( + kanji: kanji, + type: e.getAttribute('r_type')!, + reading: e.innerText, + ), + ) + .toList(), + kunyomi: c + .findAllElements('reading') + .where((e) => e.getAttribute('r_type') == 'ja_kun') + .map( + (e) => Kunyomi( + kanji: kanji, + yomi: e.innerText, + isJouyou: e.getAttribute('r_status') == 'jy', + ), + ) + .toList(), + onyomi: c + .findAllElements('reading') + .where((e) => e.getAttribute('r_type') == 'ja_on') + .map( + (e) => Onyomi( + kanji: kanji, + yomi: e.innerText, + isJouyou: e.getAttribute('r_status') == 'jy', + type: e.getAttribute('on_type')), + ) + .toList(), + meanings: c + .findAllElements('meaning') + .map( + (e) => Meaning( + kanji: kanji, + language: e.getAttribute('m_lang') ?? 'eng', + meaning: e.innerText, + ), + ) + .toList(), + nanori: c.findAllElements('nanori').map((e) => e.innerText).toList(), + ), + ); + } + return result; +} + +Future insertIntoDB(List characters, Database db) async { + final b = db.batch(); + for (final c in characters) { + // if (c.dictionaryReferences.any((e) => + // c.dictionaryReferences + // .where((e2) => e.kanji == e2.kanji && e.type == e2.type) + // .length > + // 1)) { + // print(c.dictionaryReferences.map((e) => e.sqlValue).toList()); + // } + b.insert(TableNames.character, c.sqlValue); + for (final n in c.radicalName) { + b.insert(TableNames.radicalName, {'kanji': c.literal, 'name': n}); + } + for (final cp in c.codepoints) { + b.insert(TableNames.codepoint, cp.sqlValue); + } + for (final r in c.radicals) { + b.insert(TableNames.radical, r.sqlValue); + } + for (final sm in c.strokeMiscounts) { + b.insert( + TableNames.strokeMiscount, + { + 'kanji': c.literal, + 'strokeCount': sm, + }, + ); + } + for (final v in c.variants) { + b.insert(TableNames.variant, v.sqlValue); + } + for (final dr in c.dictionaryReferences) { + // There are duplicate entries here + b.insert( + TableNames.dictionaryReference, + dr.sqlValue, + conflictAlgorithm: ConflictAlgorithm.ignore, + ); + } + for (final drm in c.dictionaryReferencesMoro) { + b.insert(TableNames.dictionaryReferenceMoro, drm.sqlValue); + } + for (final q in c.querycodes) { + b.insert(TableNames.queryCode, q.sqlValue); + } + for (final r in c.readings) { + b.insert(TableNames.reading, r.sqlValue); + } + for (final k in c.kunyomi) { + b.insert(TableNames.kunyomi, k.sqlValue); + } + for (final o in c.onyomi) { + b.insert(TableNames.onyomi, o.sqlValue); + } + for (final m in c.meanings) { + b.insert(TableNames.meaning, m.sqlValue); + } + for (final n in c.nanori) { + b.insert( + TableNames.nanori, + { + 'kanji': c.literal, + 'nanori': n, + }, + ); + } + } + b.commit(); +} + +Future addDataFromKANJIDIC(Database db) async { + print('[KANJIDIC2] Reading file...'); + String rawXML = File('data/kanjidic2.xml').readAsStringSync(); + + print('[KANJIDIC2] Parsing XML...'); + XmlElement root = XmlDocument.parse(rawXML).getElement('kanjidic2')!; + + print('[KANJIDIC2] Transforming data...'); + final entries = transformXML(root); + + print('[KANJIDIC2] Writing to database...'); + await insertIntoDB(entries, db); +} diff --git a/bin/objects.dart b/bin/objects.dart new file mode 100644 index 0000000..9631468 --- /dev/null +++ b/bin/objects.dart @@ -0,0 +1,5 @@ +abstract class SQLWritable { + const SQLWritable(); + + Map get sqlValue; +} \ No newline at end of file diff --git a/bin/radkfile/objects.dart b/bin/radkfile/objects.dart new file mode 100644 index 0000000..1a6fab6 --- /dev/null +++ b/bin/radkfile/objects.dart @@ -0,0 +1,13 @@ +class Radical { + final String radical; + final String kanji; + + // TODO: + final String something; + + const Radical({ + required this.radical, + required this.kanji, + required this.something, + }); +} diff --git a/bin/radkfile/parser.dart b/bin/radkfile/parser.dart new file mode 100644 index 0000000..b1d375e --- /dev/null +++ b/bin/radkfile/parser.dart @@ -0,0 +1,32 @@ +import 'dart:io'; + +import 'package:sqflite_common/sqlite_api.dart'; + +Future addDataFromRADKFILE(Database db) async { + final String content = File('data/radkfile_utf8').readAsStringSync(); + final Iterable blocks = + content.replaceAll(RegExp(r'^#.*$'), '').split(r'$').skip(2); + + print('[RADKFILE] Writing to database...'); + final b = db.batch(); + + for (final block in blocks) { + final String radical = block[1]; + final List kanjiList = block + .replaceFirst(RegExp(r'.*\n'), '') + .split('') + ..removeWhere((e) => e == '' || e == '\n'); + + for (final kanji in kanjiList.toSet()) { + b.insert( + 'RADKFILE', + { + 'radical': radical, + 'kanji': kanji, + }, + ); + } + } + + b.commit(); +} diff --git a/bin/romaji_transliteration.dart b/bin/romaji_transliteration.dart new file mode 100644 index 0000000..814453a --- /dev/null +++ b/bin/romaji_transliteration.dart @@ -0,0 +1,622 @@ +// Source: https://github.com/Kimtaro/ve/blob/master/lib/providers/japanese_transliterators.rb + +const hiragana_syllabic_n = 'ん'; +const hiragana_small_tsu = 'っ'; + +const Map hiragana_to_latin = { + 'あ': 'a', + 'い': 'i', + 'う': 'u', + 'え': 'e', + 'お': 'o', + 'か': 'ka', + 'き': 'ki', + 'く': 'ku', + 'け': 'ke', + 'こ': 'ko', + 'が': 'ga', + 'ぎ': 'gi', + 'ぐ': 'gu', + 'げ': 'ge', + 'ご': 'go', + 'さ': 'sa', + 'し': 'shi', + 'す': 'su', + 'せ': 'se', + 'そ': 'so', + 'ざ': 'za', + 'じ': 'ji', + 'ず': 'zu', + 'ぜ': 'ze', + 'ぞ': 'zo', + 'た': 'ta', + 'ち': 'chi', + 'つ': 'tsu', + 'て': 'te', + 'と': 'to', + 'だ': 'da', + 'ぢ': 'ji', + 'づ': 'zu', + 'で': 'de', + 'ど': 'do', + 'な': 'na', + 'に': 'ni', + 'ぬ': 'nu', + 'ね': 'ne', + 'の': 'no', + 'は': 'ha', + 'ひ': 'hi', + 'ふ': 'fu', + 'へ': 'he', + 'ほ': 'ho', + 'ば': 'ba', + 'び': 'bi', + 'ぶ': 'bu', + 'べ': 'be', + 'ぼ': 'bo', + 'ぱ': 'pa', + 'ぴ': 'pi', + 'ぷ': 'pu', + 'ぺ': 'pe', + 'ぽ': 'po', + 'ま': 'ma', + 'み': 'mi', + 'む': 'mu', + 'め': 'me', + 'も': 'mo', + 'や': 'ya', + 'ゆ': 'yu', + 'よ': 'yo', + 'ら': 'ra', + 'り': 'ri', + 'る': 'ru', + 'れ': 're', + 'ろ': 'ro', + 'わ': 'wa', + 'うぃ': 'whi', + 'うぇ': 'whe', + 'を': 'wo', + 'ゑ': 'we', + 'ゐ': 'wi', + 'ー': '-', + 'ん': 'n', + 'きゃ': 'kya', + 'きゅ': 'kyu', + 'きょ': 'kyo', + 'きぇ': 'kye', + 'きぃ': 'kyi', + 'ぎゃ': 'gya', + 'ぎゅ': 'gyu', + 'ぎょ': 'gyo', + 'ぎぇ': 'gye', + 'ぎぃ': 'gyi', + 'くぁ': 'kwa', + 'くぃ': 'kwi', + 'くぅ': 'kwu', + 'くぇ': 'kwe', + 'くぉ': 'kwo', + 'ぐぁ': 'qwa', + 'ぐぃ': 'gwi', + 'ぐぅ': 'gwu', + 'ぐぇ': 'gwe', + 'ぐぉ': 'gwo', + 'しゃ': 'sha', + 'しぃ': 'syi', + 'しゅ': 'shu', + 'しぇ': 'she', + 'しょ': 'sho', + 'じゃ': 'ja', + 'じゅ': 'ju', + 'じぇ': 'jye', + 'じょ': 'jo', + 'じぃ': 'jyi', + 'すぁ': 'swa', + 'すぃ': 'swi', + 'すぅ': 'swu', + 'すぇ': 'swe', + 'すぉ': 'swo', + 'ちゃ': 'cha', + 'ちゅ': 'chu', + 'ちぇ': 'tye', + 'ちょ': 'cho', + 'ちぃ': 'tyi', + 'ぢゃ': 'ja', + 'ぢぃ': 'dyi', + 'ぢゅ': 'ju', + 'ぢぇ': 'dye', + 'ぢょ': 'jo', + 'つぁ': 'tsa', + 'つぃ': 'tsi', + 'つぇ': 'tse', + 'つぉ': 'tso', + 'てゃ': 'tha', + 'てぃ': 'thi', + 'てゅ': 'thu', + 'てぇ': 'the', + 'てょ': 'tho', + 'とぁ': 'twa', + 'とぃ': 'twi', + 'とぅ': 'twu', + 'とぇ': 'twe', + 'とぉ': 'two', + 'でゃ': 'dha', + 'でぃ': 'dhi', + 'でゅ': 'dhu', + 'でぇ': 'dhe', + 'でょ': 'dho', + 'どぁ': 'dwa', + 'どぃ': 'dwi', + 'どぅ': 'dwu', + 'どぇ': 'dwe', + 'どぉ': 'dwo', + 'にゃ': 'nya', + 'にゅ': 'nyu', + 'にょ': 'nyo', + 'にぇ': 'nye', + 'にぃ': 'nyi', + 'ひゃ': 'hya', + 'ひぃ': 'hyi', + 'ひゅ': 'hyu', + 'ひぇ': 'hye', + 'ひょ': 'hyo', + 'びゃ': 'bya', + 'びぃ': 'byi', + 'びゅ': 'byu', + 'びぇ': 'bye', + 'びょ': 'byo', + 'ぴゃ': 'pya', + 'ぴぃ': 'pyi', + 'ぴゅ': 'pyu', + 'ぴぇ': 'pye', + 'ぴょ': 'pyo', + 'ふぁ': 'fwa', + 'ふぃ': 'fyi', + 'ふぇ': 'fye', + 'ふぉ': 'fwo', + 'ふぅ': 'fwu', + 'ふゃ': 'fya', + 'ふゅ': 'fyu', + 'ふょ': 'fyo', + 'みゃ': 'mya', + 'みぃ': 'myi', + 'みゅ': 'myu', + 'みぇ': 'mye', + 'みょ': 'myo', + 'りゃ': 'rya', + 'りぃ': 'ryi', + 'りゅ': 'ryu', + 'りぇ': 'rye', + 'りょ': 'ryo', + 'ゔぁ': 'va', + 'ゔぃ': 'vyi', + 'ゔ': 'vu', + 'ゔぇ': 'vye', + 'ゔぉ': 'vo', + 'ゔゃ': 'vya', + 'ゔゅ': 'vyu', + 'ゔょ': 'vyo', + 'うぁ': 'wha', + 'いぇ': 'ye', + 'うぉ': 'who', + 'ぁ': 'xa', + 'ぃ': 'xi', + 'ぅ': 'xu', + 'ぇ': 'xe', + 'ぉ': 'xo', + 'ゕ': 'xka', + 'ゖ': 'xke', + 'ゎ': 'xwa' +}; + +const Map latin_to_hiragana = { + 'a': 'あ', + 'i': 'い', + 'u': 'う', + 'e': 'え', + 'o': 'お', + 'ka': 'か', + 'ki': 'き', + 'ku': 'く', + 'ke': 'け', + 'ko': 'こ', + 'ga': 'が', + 'gi': 'ぎ', + 'gu': 'ぐ', + 'ge': 'げ', + 'go': 'ご', + 'sa': 'さ', + 'si': 'し', + 'shi': 'し', + 'su': 'す', + 'se': 'せ', + 'so': 'そ', + 'za': 'ざ', + 'zi': 'じ', + 'ji': 'じ', + 'zu': 'ず', + 'ze': 'ぜ', + 'zo': 'ぞ', + 'ta': 'た', + 'ti': 'ち', + 'chi': 'ち', + 'tu': 'つ', + 'tsu': 'つ', + 'te': 'て', + 'to': 'と', + 'da': 'だ', + 'di': 'ぢ', + 'du': 'づ', + 'dzu': 'づ', + 'de': 'で', + 'do': 'ど', + 'na': 'な', + 'ni': 'に', + 'nu': 'ぬ', + 'ne': 'ね', + 'no': 'の', + 'ha': 'は', + 'hi': 'ひ', + 'hu': 'ふ', + 'fu': 'ふ', + 'he': 'へ', + 'ho': 'ほ', + 'ba': 'ば', + 'bi': 'び', + 'bu': 'ぶ', + 'be': 'べ', + 'bo': 'ぼ', + 'pa': 'ぱ', + 'pi': 'ぴ', + 'pu': 'ぷ', + 'pe': 'ぺ', + 'po': 'ぽ', + 'ma': 'ま', + 'mi': 'み', + 'mu': 'む', + 'me': 'め', + 'mo': 'も', + 'ya': 'や', + 'yu': 'ゆ', + 'yo': 'よ', + 'ra': 'ら', + 'ri': 'り', + 'ru': 'る', + 're': 'れ', + 'ro': 'ろ', + 'la': 'ら', + 'li': 'り', + 'lu': 'る', + 'le': 'れ', + 'lo': 'ろ', + 'wa': 'わ', + 'wi': 'うぃ', + 'we': 'うぇ', + 'wo': 'を', + 'wye': 'ゑ', + 'wyi': 'ゐ', + '-': 'ー', + 'n': 'ん', + 'nn': 'ん', + "n'": 'ん', + 'kya': 'きゃ', + 'kyu': 'きゅ', + 'kyo': 'きょ', + 'kye': 'きぇ', + 'kyi': 'きぃ', + 'gya': 'ぎゃ', + 'gyu': 'ぎゅ', + 'gyo': 'ぎょ', + 'gye': 'ぎぇ', + 'gyi': 'ぎぃ', + 'kwa': 'くぁ', + 'kwi': 'くぃ', + 'kwu': 'くぅ', + 'kwe': 'くぇ', + 'kwo': 'くぉ', + 'gwa': 'ぐぁ', + 'gwi': 'ぐぃ', + 'gwu': 'ぐぅ', + 'gwe': 'ぐぇ', + 'gwo': 'ぐぉ', + 'qwa': 'ぐぁ', + 'qwi': 'ぐぃ', + 'qwu': 'ぐぅ', + 'qwe': 'ぐぇ', + 'qwo': 'ぐぉ', + 'sya': 'しゃ', + 'syi': 'しぃ', + 'syu': 'しゅ', + 'sye': 'しぇ', + 'syo': 'しょ', + 'sha': 'しゃ', + 'shu': 'しゅ', + 'she': 'しぇ', + 'sho': 'しょ', + 'ja': 'じゃ', + 'ju': 'じゅ', + 'je': 'じぇ', + 'jo': 'じょ', + 'jya': 'じゃ', + 'jyi': 'じぃ', + 'jyu': 'じゅ', + 'jye': 'じぇ', + 'jyo': 'じょ', + 'zya': 'じゃ', + 'zyu': 'じゅ', + 'zyo': 'じょ', + 'zye': 'じぇ', + 'zyi': 'じぃ', + 'swa': 'すぁ', + 'swi': 'すぃ', + 'swu': 'すぅ', + 'swe': 'すぇ', + 'swo': 'すぉ', + 'cha': 'ちゃ', + 'chu': 'ちゅ', + 'che': 'ちぇ', + 'cho': 'ちょ', + 'cya': 'ちゃ', + 'cyi': 'ちぃ', + 'cyu': 'ちゅ', + 'cye': 'ちぇ', + 'cyo': 'ちょ', + 'tya': 'ちゃ', + 'tyi': 'ちぃ', + 'tyu': 'ちゅ', + 'tye': 'ちぇ', + 'tyo': 'ちょ', + 'dya': 'ぢゃ', + 'dyi': 'ぢぃ', + 'dyu': 'ぢゅ', + 'dye': 'ぢぇ', + 'dyo': 'ぢょ', + 'tsa': 'つぁ', + 'tsi': 'つぃ', + 'tse': 'つぇ', + 'tso': 'つぉ', + 'tha': 'てゃ', + 'thi': 'てぃ', + 'thu': 'てゅ', + 'the': 'てぇ', + 'tho': 'てょ', + 'twa': 'とぁ', + 'twi': 'とぃ', + 'twu': 'とぅ', + 'twe': 'とぇ', + 'two': 'とぉ', + 'dha': 'でゃ', + 'dhi': 'でぃ', + 'dhu': 'でゅ', + 'dhe': 'でぇ', + 'dho': 'でょ', + 'dwa': 'どぁ', + 'dwi': 'どぃ', + 'dwu': 'どぅ', + 'dwe': 'どぇ', + 'dwo': 'どぉ', + 'nya': 'にゃ', + 'nyu': 'にゅ', + 'nyo': 'にょ', + 'nye': 'にぇ', + 'nyi': 'にぃ', + 'hya': 'ひゃ', + 'hyi': 'ひぃ', + 'hyu': 'ひゅ', + 'hye': 'ひぇ', + 'hyo': 'ひょ', + 'bya': 'びゃ', + 'byi': 'びぃ', + 'byu': 'びゅ', + 'bye': 'びぇ', + 'byo': 'びょ', + 'pya': 'ぴゃ', + 'pyi': 'ぴぃ', + 'pyu': 'ぴゅ', + 'pye': 'ぴぇ', + 'pyo': 'ぴょ', + 'fa': 'ふぁ', + 'fi': 'ふぃ', + 'fe': 'ふぇ', + 'fo': 'ふぉ', + 'fwa': 'ふぁ', + 'fwi': 'ふぃ', + 'fwu': 'ふぅ', + 'fwe': 'ふぇ', + 'fwo': 'ふぉ', + 'fya': 'ふゃ', + 'fyi': 'ふぃ', + 'fyu': 'ふゅ', + 'fye': 'ふぇ', + 'fyo': 'ふょ', + 'mya': 'みゃ', + 'myi': 'みぃ', + 'myu': 'みゅ', + 'mye': 'みぇ', + 'myo': 'みょ', + 'rya': 'りゃ', + 'ryi': 'りぃ', + 'ryu': 'りゅ', + 'rye': 'りぇ', + 'ryo': 'りょ', + 'lya': 'りゃ', + 'lyu': 'りゅ', + 'lyo': 'りょ', + 'lye': 'りぇ', + 'lyi': 'りぃ', + 'va': 'ゔぁ', + 'vi': 'ゔぃ', + 'vu': 'ゔ', + 've': 'ゔぇ', + 'vo': 'ゔぉ', + 'vya': 'ゔゃ', + 'vyi': 'ゔぃ', + 'vyu': 'ゔゅ', + 'vye': 'ゔぇ', + 'vyo': 'ゔょ', + 'wha': 'うぁ', + 'whi': 'うぃ', + 'ye': 'いぇ', + 'whe': 'うぇ', + 'who': 'うぉ', + 'xa': 'ぁ', + 'xi': 'ぃ', + 'xu': 'ぅ', + 'xe': 'ぇ', + 'xo': 'ぉ', + 'xya': 'ゃ', + 'xyu': 'ゅ', + 'xyo': 'ょ', + 'xtu': 'っ', + 'xtsu': 'っ', + 'xka': 'ゕ', + 'xke': 'ゖ', + 'xwa': 'ゎ', + '@@': ' ', + '#[': '「', + '#]': '」', + '#,': '、', + '#.': '。', + '#/': '・', +}; + +bool _smallTsu(String for_conversion) => for_conversion == hiragana_small_tsu; +bool _nFollowedByYuYeYo(String for_conversion, String kana) => + for_conversion == hiragana_syllabic_n && + kana.length > 1 && + 'やゆよ'.contains(kana.substring(1, 2)); + +String transliterateHiraganaToLatin(String hiragana) { + String kana = hiragana; + String romaji = ''; + bool geminate = false; + + while (kana.isNotEmpty) { + final lengths = [if (kana.length > 1) 2, 1]; + for (final length in lengths) { + final String for_conversion = kana.substring(0, length); + String? mora; + + if (_smallTsu(for_conversion)) { + geminate = true; + kana = kana.replaceRange(0, length, ''); + break; + } else if (_nFollowedByYuYeYo(for_conversion, kana)) { + mora = "n'"; + } + mora ??= hiragana_to_latin[for_conversion]; + + if (mora != null) { + if (geminate) { + geminate = false; + romaji += mora.substring(0, 1); + } + romaji += mora; + kana = kana.replaceRange(0, length, ''); + break; + } else if (length == 1) { + romaji += for_conversion; + kana = kana.replaceRange(0, length, ''); + } + } + } + return romaji; +} + +bool _doubleNFollowedByAIUEO(String for_conversion) => + RegExp(r'^nn[aiueo]$').hasMatch(for_conversion); +bool _hasTableMatch(String for_conversion) => + latin_to_hiragana[for_conversion] != null; +bool _hasDoubleConsonant(String for_conversion, int length) => + for_conversion == 'tch' || + (length == 2 && + RegExp(r'^([kgsztdnbpmyrlwchf])\1$').hasMatch(for_conversion)); + +String transliterateLatinToHiragana(String latin) { + String romaji = + latin.toLowerCase().replaceAll('mb', 'nb').replaceAll('mp', 'np'); + String kana = ''; + + while (romaji.isNotEmpty) { + final lengths = [ + if (romaji.length > 2) 3, + if (romaji.length > 1) 2, + 1, + ]; + + for (final length in lengths) { + String? mora; + int for_removal = length; + final String for_conversion = romaji.substring(0, length); + + if (_doubleNFollowedByAIUEO(for_conversion)) { + mora = hiragana_syllabic_n; + for_removal = 1; + } else if (_hasTableMatch(for_conversion)) { + mora = latin_to_hiragana[for_conversion]; + } else if (_hasDoubleConsonant(for_conversion, length)) { + mora = hiragana_small_tsu; + for_removal = 1; + } + + if (mora != null) { + kana += mora; + romaji = romaji.replaceRange(0, for_removal, ''); + break; + } else if (length == 1) { + kana += for_conversion; + romaji = romaji.replaceRange(0, 1, ''); + } + } + } + + return kana; +} + +String _transposeCodepointsInRange( + String text, + int distance, + int rangeStart, + int rangeEnd, +) => + String.fromCharCodes( + text.codeUnits + .map((c) => c + ((rangeStart <= c && c <= rangeEnd) ? distance : 0)), + ); + +String transliterateKanaToLatin(String kana) => + transliterateHiraganaToLatin(transliterateKatakanaToHiragana(kana)); + +String transliterateLatinToKatakana(String latin) => + transliterateHiraganaToKatakana(transliterateLatinToHiragana(latin)); + +String transliterateKatakanaToHiragana(String katakana) => + _transposeCodepointsInRange(katakana, -96, 12449, 12534); + +String transliterateHiraganaToKatakana(String hiragana) => + _transposeCodepointsInRange(hiragana, 96, 12353, 12438); + +String transliterateFullwidthRomajiToHalfwidth(String halfwidth) => + _transposeCodepointsInRange( + _transposeCodepointsInRange( + halfwidth, + -65248, + 65281, + 65374, + ), + -12256, + 12288, + 12288, + ); + +String transliterateHalfwidthRomajiToFullwidth(String halfwidth) => + _transposeCodepointsInRange( + _transposeCodepointsInRange( + halfwidth, + 65248, + 33, + 126, + ), + 12256, + 32, + 32, + ); diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..e969bd1 --- /dev/null +++ b/flake.lock @@ -0,0 +1,122 @@ +{ + "nodes": { + "JMdictSrc": { + "flake": false, + "locked": { + "narHash": "sha256-TAkT98/lC1zBAJ/ublGi/gK965pwxoHJrnWRaKKBq7I=", + "type": "file", + "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz" + }, + "original": { + "type": "file", + "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz" + } + }, + "JMdictWithExamplesSrc": { + "flake": false, + "locked": { + "narHash": "sha256-GfClwLR4uoxPKxRbI5qgELurAdpegCbZO5lEORb3EvA=", + "type": "file", + "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz" + }, + "original": { + "type": "file", + "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz" + } + }, + "RADKFILESrc": { + "flake": false, + "locked": { + "narHash": "sha256-rO2z5GPt3g6osZOlpyWysmIbRV2Gw4AR4XvngVTHNpk=", + "type": "file", + "url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz" + }, + "original": { + "type": "file", + "url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz" + } + }, + "flake-utils": { + "locked": { + "lastModified": 1649676176, + "narHash": "sha256-OWKJratjt2RW151VUlJPRALb7OU2S5s+f0vLj4o1bHM=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "a4b154ebbdc88c8498a5c7b01589addc9e9cb678", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nix-dart": { + "inputs": { + "flake-utils": [ + "flake-utils" + ], + "nixpkgs": [ + "nixpkgs" + ], + "pub2nix": "pub2nix" + }, + "locked": { + "lastModified": 1652213615, + "narHash": "sha256-+eehm2JlhoKgY+Ea4DTxDMei/x4Fgz7S+ZPqWpZysuI=", + "owner": "tadfisher", + "repo": "nix-dart", + "rev": "6f686ddf984306d944e9b5adf9f35f3a0a0a70b7", + "type": "github" + }, + "original": { + "owner": "tadfisher", + "repo": "nix-dart", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1655456688, + "narHash": "sha256-j2trI5gv2fnHdfUQFBy957avCPxxzCqE8R+TOYHPSRE=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "d17a56d90ecbd1b8fc908d49598fb854ef188461", + "type": "github" + }, + "original": { + "id": "nixpkgs", + "ref": "nixos-22.05", + "type": "indirect" + } + }, + "pub2nix": { + "flake": false, + "locked": { + "lastModified": 1594192744, + "narHash": "sha256-pDvcXSG1Mh2BpwkqAcNDJzcupV3pIAAtZJLfkiHMAz4=", + "owner": "paulyoung", + "repo": "pub2nix", + "rev": "0c7ecca590fcd1616db8c6468f799ffef36c85e9", + "type": "github" + }, + "original": { + "owner": "paulyoung", + "repo": "pub2nix", + "type": "github" + } + }, + "root": { + "inputs": { + "JMdictSrc": "JMdictSrc", + "JMdictWithExamplesSrc": "JMdictWithExamplesSrc", + "RADKFILESrc": "RADKFILESrc", + "flake-utils": "flake-utils", + "nix-dart": "nix-dart", + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..d7d6c61 --- /dev/null +++ b/flake.nix @@ -0,0 +1,208 @@ +{ + description = "A SQLite database containing open source japanese language translation data"; + + inputs = { + nixpkgs.url = "nixpkgs/nixos-22.05"; + + flake-utils = { + url = "github:numtide/flake-utils"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + + nix-dart = { + url = "github:tadfisher/nix-dart"; + inputs = { + nixpkgs.follows = "nixpkgs"; + flake-utils.follows = "flake-utils"; + }; + }; + + JMdictSrc = { + url = "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz"; + flake = false; + }; + + JMdictWithExamplesSrc = { + url = "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"; + flake = false; + }; + + RADKFILESrc = { + url = "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"; + flake = false; + }; + + }; + + outputs = { + self, + nixpkgs, + flake-utils, + nix-dart, + JMdictSrc, + JMdictWithExamplesSrc, + RADKFILESrc + }: let + system = "x86_64-linux"; + pkgs = import nixpkgs { + inherit system; + overlays = [ + # (final: prev: { dart = nix-dart.packages.${system}.dart; }) + nix-dart.overlay + ]; + }; + inherit (pkgs) lib; + + in { + devShell.${system} = pkgs.mkShell { + buildInputs = with pkgs; [ + nix-dart.packages.${system}.pub2nix-lock + dart + gnumake + sqlite + sqlite-web + sqlint + sqlfluff + ]; + }; + + defaultPackage.${system} = self.packages.${system}.database; + + packages.${system} = let + inherit (pkgs.stdenv) mkDerivation; + dbName = "main.db"; + + edrdgMetadata = { + license = { + shortName = "EDRDG"; + fullName = "Electronic Dictionary Research and Development Group General Dictionary Licence"; + url = "http://www.csse.monash.edu.au/~jwb/edrdg/licence.html"; + }; + maintainers = [ "h7x4 " ]; + platforms = lib.platforms.all; + }; + in { + JMdict = mkDerivation { + name = "JMdict"; + + srcs = [ + JMdictSrc + JMdictWithExamplesSrc + ]; + dontUnpack = true; + + nativeBuildInputs = with pkgs; [ xmlformat ]; + buildPhase = '' + gzip -dkc ${JMdictSrc} > jmdict.xml + gzip -dkc ${JMdictWithExamplesSrc} > jmdict_with_examples.xml + xmlformat -i jmdict.xml + xmlformat -i jmdict_with_examples.xml + ''; + + installPhase = '' + mkdir $out + cp jmdict.xml $out + cp jmdict_with_examples.xml $out + ''; + + meta = edrdgMetadata // { + description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words"; + homepage = "https://www.edrdg.org/jmdict/j_jmdict.html"; + }; + }; + + RADKFILE = mkDerivation { + name = "RADKFILE"; + + src = RADKFILESrc; + dontUnpack = true; + + buildPhase = '' + gzip -dkc $src > radkfile + ''; + + installPhase = '' + iconv -f EUC-JP -t UTF-8 -o $out radkfile + ''; + + meta = edrdgMetadata // { + description = "A file providing searchable decompositions of kanji characters"; + homepage = "https://www.edrdg.org/krad/kradinf.html"; + }; + }; + + database_generator = (nix-dart.builders.${system}.buildDartPackage { + pname = "database_generator"; + version = "1.0"; + + buildInputs = [ nix-dart.packages.${system}.dart-dev ]; + + src = builtins.filterSource (path: type: baseNameOf path != ".dart_tool") ./.; + specFile = ./pubspec.yaml; + lockFile = ./pub2nix.lock; + }).overrideAttrs(old: { + buildInputs = [nix-dart.packages.${system}.dart-dev]; + buildPhase = builtins.replaceStrings ["pub"] ["dart pub"] old.buildPhase; + }); + + database = mkDerivation { + name = "database"; + src = builtins.filterSource (path: type: baseNameOf path != dbName) ./.; + nativeBuildInputs = with pkgs; [ + sqlite + ]; + + buildPhase = '' + mkdir -p data + ln -s ${self.packages.${system}.JMdict}/* data + ln -s ${self.packages.${system}.RADKFILE} data + sqlite3 ${dbName} < migrations/0001_initial.sql + sqlite3 ${dbName} < migrations/0002_insert_info_values.sql + ''; + + installPhase = '' + mkdir -p $out + cp migrations/0001_initial.sql $out/schema.sql + cp ${dbName} $out/${dbName} + ''; + }; + + docs = mkDerivation { + name = "docs"; + src = self.packages.${system}.database; + nativeBuildInputs = with pkgs; [ + schemaspy + sqlite-jdbc + ]; + + buildPhase = let + properties = pkgs.writeText "sqlite.properties" '' + description=SQLite + driver=org.sqlite.JDBC + driverPath=${pkgs.sqlite-jdbc}/share/java/sqlite-jdbc-3.25.2.jar + connectionSpec=jdbc:sqlite: + ''; + + args = pkgs.writeText "schemaspy.properties" '' + schemaspy.cat="%" + schemaspy.t=sqlite + schemaspy.sso=true + schemaspy.db=${dbName} + schemaspy.o=docs + schemaspy.s=schema.sql + ''; + + in '' + cp ${args} ./schemaspy.properties + ls + schemaspy -t ${properties} + ''; + + installPhase = '' + cp -r docs $out + ''; + }; + }; + }; + +} diff --git a/migrations/0001_initial.sql b/migrations/0001_initial.sql new file mode 100644 index 0000000..6c987dc --- /dev/null +++ b/migrations/0001_initial.sql @@ -0,0 +1,443 @@ +-- TODO: figure out ondelete functions... + +------------ +-- JMdict -- +------------ + +CREATE TABLE "JMdict_InfoDialect" ( + "id" VARCHAR(4) PRIMARY KEY NOT NULL, + "description" TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE "JMdict_InfoField" ( + "id" VARCHAR(7) PRIMARY KEY NOT NULL, + "description" TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE "JMdict_InfoKanji" ( + "id" VARCHAR(5) PRIMARY KEY NOT NULL, + "description" TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE "JMdict_InfoMisc" ( + "id" VARCHAR(12) PRIMARY KEY NOT NULL, + "description" TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE "JMdict_InfoPOS" ( + "id" VARCHAR(9) PRIMARY KEY NOT NULL, + "description" TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE "JMdict_InfoReading" ( + "id" VARCHAR(5) PRIMARY KEY NOT NULL, + "description" TEXT NOT NULL +) WITHOUT ROWID; + +-- The XML specification says that an entry needs to have at least +-- one sense and one reading. I will just assume this is the case, and +-- not implement a check for it. + +CREATE TABLE "JMdict_Entry" ( + "id" INTEGER PRIMARY KEY +); + +-- KanjiElement + +CREATE TABLE "JMdict_KanjiElement" ( + "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"), + "reading" TEXT NOT NULL, + "news" INTEGER CHECK ("news" BETWEEN 1 AND 2), + "ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2), + "spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2), + "gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2), + "nf" INTEGER, + PRIMARY KEY ("entryId", "reading") +) WITHOUT ROWID; + +CREATE TABLE "JMdict_KanjiElementInfo" ( + "entryId" INTEGER NOT NULL, + "reading" TEXT NOT NULL, + "info" TEXT NOT NULL REFERENCES "JMdict_InfoKanji"("id"), + FOREIGN KEY ("entryId", "reading") + REFERENCES "JMdict_KanjiElement"("entryId", "reading"), + PRIMARY KEY ("entryId", "reading", "info") +) WITHOUT ROWID; + +-- ReadingElement + +CREATE TABLE "JMdict_ReadingElement" ( + "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"), + "reading" TEXT NOT NULL, + "readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE, + "news" INTEGER CHECK ("news" BETWEEN 1 AND 2), + "ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2), + "spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2), + "gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2), + "nf" INTEGER, + PRIMARY KEY ("entryId", "reading") +) WITHOUT ROWID; + +CREATE TABLE "JMdict_ReadingElementRestriction" ( + "entryId" INTEGER NOT NULL, + "reading" TEXT NOT NULL, + "restriction" TEXT NOT NULL, + FOREIGN KEY ("entryId", "reading") + REFERENCES "JMdict_ReadingElement"("entryId", "reading"), + PRIMARY KEY ("entryId", "reading", "restriction") +) WITHOUT ROWID; + +CREATE TABLE "JMdict_ReadingElementInfo" ( + "entryId" INTEGER NOT NULL, + "reading" TEXT NOT NULL, + "info" TEXT NOT NULL REFERENCES "JMdict_InfoReading"("id"), + FOREIGN KEY ("entryId", "reading") + REFERENCES "JMdict_ReadingElement"("entryId", "reading"), + PRIMARY KEY ("entryId", "reading", "info") +) WITHOUT ROWID; + +-- Sense + +-- Optimal solution here would be to have an id INTEGER AUTOINCREMENT, +-- and the entryId as a composite key, since the entryId is used below. +-- However, autoincrementing composite keys are not available in sqlite + +CREATE TABLE "JMdict_Sense" ( + "id" INTEGER PRIMARY KEY AUTOINCREMENT, + "entryId" INTEGER REFERENCES "JMdict_Entry"("id") +); + +CREATE TABLE "JMdict_SenseRestrictedToKanji" ( + "entryId" INTEGER, + "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), + "kanji" TEXT, + FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"), + PRIMARY KEY ("entryId", "senseId", "kanji") +); + +CREATE TABLE "JMdict_SenseRestrictedToReading" ( + "entryId" INTEGER, + "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), + "reading" TEXT, + FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), + PRIMARY KEY ("entryId", "senseId", "reading") +); + +-- In order to add xrefs, you will need to have added the entry to xref to. +-- These should be added in a second pass of the dictionary file. + +-- In this version of JMdict, the xrefs can be ambiguous. +-- There has been rumours of a nonambiguous version possibly arriving in the future +-- (https://www.edrdg.org/jmdict_edict_list/2019/msg00360.html) +-- but for time being, this need to be modeled as a one to many relationship. + +-- These two things also concern "SenseAntonym" + +CREATE TABLE "JMdict_SenseSeeAlso" ( + "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), + "xrefEntryId" INTEGER, + "seeAlsoReading" TEXT, + "seeAlsoKanji" TEXT, + "seeAlsoSense" TEXT REFERENCES "JMdict_Sense"("id"), + CHECK ("seeAlsoReading" = NULL <> "seeAlsoKanji" = NULL), + -- CHECK("seeAlsoSense" = NULL OR "seeAlsoSense") + -- Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId. + FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"), + FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), + PRIMARY KEY ("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense") +); + +CREATE TABLE "JMdict_SenseAntonym" ( + "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), + "xrefEntryId" INTEGER, + "antonymReading" TEXT, + "antonymKanji" TEXT, + "antonymSense" TEXT REFERENCES "JMdict_Sense"("id"), + CHECK ("antonymReading" = NULL <> "antonymKanji" = NULL), + FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"), + FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"), + PRIMARY KEY ("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense") +); + +-- These cross references are going to be mostly accessed from a sense +-- This will speed up the join. +CREATE INDEX "JMdict_SenseSeeAlso_bySenseId" ON "JMdict_SenseSeeAlso"("senseId"); +CREATE INDEX "JMdict_SenseAntonym_bySenseId" ON "JMdict_SenseAntonym"("senseId"); + +CREATE TABLE "JMdict_SensePOS" ( + "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), + "pos" TEXT NOT NULL REFERENCES "JMdict_InfoPOS"("id"), + PRIMARY KEY ("senseId", "pos") +) WITHOUT ROWID; + +CREATE TABLE "JMdict_SenseField" ( + "senseId" INTEGER NOT NULL, + "field" TEXT NOT NULL, + FOREIGN KEY ("senseId") REFERENCES "JMdict_Sense"("id"), + FOREIGN KEY ("field") REFERENCES "JMdict_InfoField"("id"), + PRIMARY KEY ("senseId", "field") +) WITHOUT ROWID; + +CREATE TABLE "JMdict_SenseMisc" ( + "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), + "misc" TEXT NOT NULL REFERENCES "JMdict_InfoMisc"("id"), + PRIMARY KEY ("senseId", "misc") +) WITHOUT ROWID; + +CREATE TABLE "JMdict_SenseLanguageSource" ( + "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), + "language" CHAR(3) NOT NULL DEFAULT "eng", + "phrase" TEXT, + "fullyDescribesSense" BOOLEAN NOT NULL DEFAULT TRUE, + "constructedFromSmallerWords" BOOLEAN NOT NULL DEFAULT FALSE, + PRIMARY KEY ("senseId", "language", "phrase") +); + +CREATE TABLE "JMdict_SenseDialect" ( + "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), + "dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("dialect"), + PRIMARY KEY ("senseId", "dialect") +) WITHOUT ROWID; + +-- In the documentation, it says that the glossary can contain +-- special prioritized entries, but I can't find a single one of those. +-- Neither can I find a glossary tag with g_gend data, so these parts +-- will be omitted. + +CREATE TABLE "JMdict_SenseGlossary" ( + "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), + "phrase" TEXT NOT NULL, + "language" CHAR(3) NOT NULL DEFAULT "eng", + "type" TEXT, + PRIMARY KEY ("senseId", "language", "phrase") +) WITHOUT ROWID; + +CREATE TABLE "JMdict_SenseInfo" ( + "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"), + "info" TEXT NOT NULL, + PRIMARY KEY ("senseId", "info") +) WITHOUT ROWID; + +-- There is not a single example sentence that doesn't come from +-- the Tanaka Corpus, so I will leave the type out for now. + +CREATE TABLE "JMdict_ExampleSentence" ( + "id" INTEGER PRIMARY KEY, + "senseId" INTEGER REFERENCES "JMdict_Sense"("id"), + "word" TEXT NOT NULL, + "source" TEXT NOT NULL, + "sourceLanguage" CHAR(3) NOT NULL DEFAULT "eng", + "japanese" TEXT NOT NULL + -- "type" TEXT NOT NULL DEFAULT "tat", +); + +-- These tables are for optimizing searches. + +-- In order to include results from both, the software should +-- first check if the searchword is convertible to kana, and then +-- potentially get results from both by doing a union between two +-- selects. + +CREATE TABLE "JMdict_EntryByKana" ( + "kana" TEXT NOT NULL, + "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"), + PRIMARY KEY ("kana", "entryId") +) WITHOUT ROWID; + +CREATE INDEX "JMdict_EntryByKana_byKana" ON "JMdict_EntryByKana"("kana"); + +CREATE TABLE "JMdict_EntryByEnglish" ( + "english" TEXT NOT NULL, + "entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"), + PRIMARY KEY ("english", "entryId") +) WITHOUT ROWID; + +CREATE INDEX "JMdict_EntryByEnglish_byEnglish" ON "JMdict_EntryByEnglish"("english"); + +-------------- +-- RADKFILE -- +-------------- + +CREATE TABLE "RADKFILE" ( + "kanji" CHAR(1) NOT NULL, + "radical" CHAR(1) NOT NULL, + PRIMARY KEY ("kanji", "radical") +) WITHOUT ROWID; + +CREATE INDEX "RADK" ON "RADKFILE"("radical"); +CREATE INDEX "KRAD" ON "RADKFILE"("kanji"); + +CREATE VIEW "RADKFILE_Radicals" AS +SELECT DISTINCT "radical" FROM "RADKFILE"; + +-------------- +-- KANJIDIC -- +-------------- + +CREATE TABLE "KANJIDIC_Character" ( + "literal" CHAR(1) NOT NULL PRIMARY KEY, + "grade" INTEGER CHECK ("grade" BETWEEN 1 AND 10), + "strokeCount" INTEGER NOT NULL, + "frequency" INTEGER, + "jlpt" INTEGER +) WITHOUT ROWID; + +CREATE TABLE "KANJIDIC_RadicalName" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "name" TEXT NOT NULL, + PRIMARY KEY("kanji", "name") +) WITHOUT ROWID; + +CREATE TABLE "KANJIDIC_Codepoint" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "type" VARCHAR(6) NOT NULL CHECK ("type" IN ('jis208', 'jis212', 'jis213', 'ucs')), + "codepoint" VARCHAR(7) NOT NULL, + PRIMARY KEY ("kanji", "type") +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_Codepoint_byCharacter" ON "KANJIDIC_Codepoint"("kanji"); + +CREATE TABLE "KANJIDIC_Radical" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "type" VARCHAR(9) NOT NULL CHECK ("type" IN ('classical', 'nelson_c')), + "radical" INTEGER NOT NULL CHECK ("radical" BETWEEN 1 AND IIF("type" = 'classical', 214, 212)), + PRIMARY KEY("kanji", "type") +) WITHOUT ROWID; + +CREATE TABLE "KANJIDIC_StrokeMiscount" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "strokeCount" INTEGER NOT NULL, + PRIMARY KEY("kanji", "strokeCount") +) WITHOUT ROWID; + +CREATE TABLE "KANJIDIC_Variant" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "variant" TEXT NOT NULL, + "type" VARCHAR(8) NOT NULL CHECK ( + "type" IN ( + 'jis208', + 'jis212', + 'jis213', + 'deroo', + 'njecd', + 's_h', + 'nelson_c', + 'oneill', + 'ucs' + ) + ), + PRIMARY KEY ("kanji", "type", "variant") +) WITHOUT ROWID; + +CREATE TABLE "_KANJIDIC_DictionaryReference_Part1" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "ref" VARCHAR(9) NOT NULL, + "type" VARCHAR(16) NOT NULL CHECK( + "type" IN ( + 'nelson_c', + 'nelson_n', + 'halpern_njecd', + 'halpern_kkd', + 'halpern_kkld', + 'halpern_kkld_2ed', + 'heisig', + 'heisig6', + 'gakken', + 'oneill_names', + 'oneill_kk', + 'henshall', + 'sh_kk', + 'sh_kk2', + 'sakade', + 'jf_cards', + 'henshall3', + 'tutt_cards', + 'crowley', + 'kanji_in_context', + 'busy_people', + 'kodansha_compact', + 'maniette' + ) + ), + PRIMARY KEY("kanji", "type") +) WITHOUT ROWID; + +CREATE TABLE "_KANJIDIC_DictionaryReference_Moro" ( + "kanji" CHAR(1) NOT NULL PRIMARY KEY REFERENCES "KANJIDIC_Character"("literal"), + "ref" VARCHAR(7) NOT NULL, + "volume" INTEGER, + "page" INTEGER +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_DictionaryReference_byPart1" ON "_KANJIDIC_DictionaryReference_Part1"("kanji", "ref", "type"); +CREATE INDEX "KANJIDIC_DictionaryReference_byMoro" ON "_KANJIDIC_DictionaryReference_Moro"("kanji", "ref", "volume", "page"); + +CREATE VIEW "KANJIDIC_DictionaryReference" AS +SELECT "kanji", "ref", "type", NULL AS "volume", NULL AS "page" FROM "_KANJIDIC_DictionaryReference_Part1" +UNION +SELECT "kanji", "ref", 'moro' AS "type", "volume", "page" FROM "_KANJIDIC_DictionaryReference_Moro"; + +CREATE TABLE "KANJIDIC_QueryCode" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "code" VARCHAR(7) NOT NULL, + "type" VARCHAR(11) NOT NULL CHECK ("type" IN ('skip', 'sh_desc', 'four_corner', 'deroo', 'misclass')), + "SKIPMisclassification" VARCHAR(15), + PRIMARY KEY ("kanji", "type", "code") +) WITHOUT ROWID; + +CREATE TABLE "KANJIDIC_Reading" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "type" VARCHAR(8) NOT NULL CHECK ("type" IN ('korean_h', 'korean_r', 'pinyin')), + "reading" TEXT NOT NULL, + PRIMARY KEY ("kanji", "type", "reading") +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_Reading_byReading" ON "KANJIDIC_Reading"("reading"); + +CREATE TABLE "KANJIDIC_Kunyomi" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "yomi" TEXT NOT NULL, + "isJouyou" BOOLEAN, + PRIMARY KEY ("kanji", "yomi") +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_Kunyomi_byYomi" ON "KANJIDIC_Kunyomi"("yomi"); + +CREATE TABLE "KANJIDIC_Onyomi" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "yomi" TEXT NOT NULL, + "type" VARCHAR(7) CHECK ("type" IN ('kan', 'go', 'tou', 'kan''you')), + "isJouyou" BOOLEAN, + PRIMARY KEY ("kanji", "yomi") +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_Onyomi_byYomi" ON "KANJIDIC_Onyomi"("yomi"); + +CREATE TABLE "KANJIDIC_Meaning" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "language" CHAR(3) NOT NULL DEFAULT "eng", + "meaning" TEXT NOT NULL, + PRIMARY KEY ("kanji", "language", "meaning") +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_Meaning_byMeaning" ON "KANJIDIC_Meaning"("meaning"); + +CREATE TABLE "KANJIDIC_Nanori" ( + "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"), + "nanori" TEXT NOT NULL, + PRIMARY KEY ("kanji", "nanori") +) WITHOUT ROWID; + +CREATE INDEX "KANJIDIC_Nanori_byNanori" ON "KANJIDIC_Nanori"("nanori"); + +------------------------- +-- Interdict relations -- +------------------------- + +-- Radk - kanjidic +-- kanjireading -> filter kanji regex - kanjidic +-- index kanji search by romaji +-- index kanji search by hiragana +-- index word search by romaji +-- index word search by hiragana + diff --git a/migrations/0002_insert_info_values.sql b/migrations/0002_insert_info_values.sql new file mode 100644 index 0000000..36816be --- /dev/null +++ b/migrations/0002_insert_info_values.sql @@ -0,0 +1,251 @@ +INSERT INTO "JMdict_InfoDialect"("id", "description") VALUES + ('bra', 'Brazilian'), + ('hob', 'Hokkaido-ben'), + ('ksb', 'Kansai-ben'), + ('ktb', 'Kantou-ben'), + ('kyb', 'Kyoto-ben'), + ('kyu', 'Kyuushuu-ben'), + ('nab', 'Nagano-ben'), + ('osb', 'Osaka-ben'), + ('rkb', 'Ryuukyuu-ben'), + ('thb', 'Touhoku-ben'), + ('tsb', 'Tosa-ben'), + ('tsug', 'Tsugaru-ben'); + +INSERT INTO "JMdict_InfoField"("id", "description") VALUES + ('agric', 'agriculture'), + ('anat', 'anatomy'), + ('archeol', 'archeology'), + ('archit', 'architecture'), + ('art', 'art, aesthetics'), + ('astron', 'astronomy'), + ('audvid', 'audiovisual'), + ('aviat', 'aviation'), + ('baseb', 'baseball'), + ('biochem', 'biochemistry'), + ('biol', 'biology'), + ('bot', 'botany'), + ('Buddh', 'Buddhism'), + ('bus', 'business'), + ('chem', 'chemistry'), + ('Christn', 'Christianity'), + ('cloth', 'clothing'), + ('comp', 'computing'), + ('cryst', 'crystallography'), + ('ecol', 'ecology'), + ('econ', 'economics'), + ('elec', 'electricity, elec. eng.'), + ('electr', 'electronics'), + ('embryo', 'embryology'), + ('engr', 'engineering'), + ('ent', 'entomology'), + ('finc', 'finance'), + ('fish', 'fishing'), + ('food', 'food, cooking'), + ('gardn', 'gardening, horticulture'), + ('genet', 'genetics'), + ('geogr', 'geography'), + ('geol', 'geology'), + ('geom', 'geometry'), + ('go', 'go (game)'), + ('golf', 'golf'), + ('gramm', 'grammar'), + ('grmyth', 'Greek mythology'), + ('hanaf', 'hanafuda'), + ('horse', 'horse racing'), + ('law', 'law'), + ('ling', 'linguistics'), + ('logic', 'logic'), + ('MA', 'martial arts'), + ('mahj', 'mahjong'), + ('math', 'mathematics'), + ('mech', 'mechanical engineering'), + ('med', 'medicine'), + ('met', 'meteorology'), + ('mil', 'military'), + ('music', 'music'), + ('ornith', 'ornithology'), + ('paleo', 'paleontology'), + ('pathol', 'pathology'), + ('pharm', 'pharmacy'), + ('phil', 'philosophy'), + ('photo', 'photography'), + ('physics', 'physics'), + ('physiol', 'physiology'), + ('print', 'printing'), + ('psy', 'psychiatry'), + ('psych', 'psychology'), + ('rail', 'railway'), + ('Shinto', 'Shinto'), + ('shogi', 'shogi'), + ('sports', 'sports'), + ('stat', 'statistics'), + ('sumo', 'sumo'), + ('telec', 'telecommunications'), + ('tradem', 'trademark'), + ('vidg', 'video games'), + ('zool', 'zoology'); + +INSERT INTO "JMdict_InfoKanji"("id", "description") VALUES + ('ateji', 'ateji (phonetic) reading'), + ('ik', 'word containing irregular kana usage'), + ('iK', 'word containing irregular kanji usage'), + ('io', 'irregular okurigana usage'), + ('oK', 'word containing out-dated kanji or kanji usage'), + ('rK', 'rarely-used kanji form'); + +INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES + ('abbr', 'abbreviation'), + ('arch', 'archaism'), + ('char', 'character'), + ('chn', 'children''s language'), + ('col', 'colloquialism'), + ('company', 'company name'), + ('creat', 'creature'), + ('dated', 'dated term'), + ('dei', 'deity'), + ('derog', 'derogatory'), + ('doc', 'document'), + ('ev', 'event'), + ('fam', 'familiar language'), + ('fem', 'female term or language'), + ('fict', 'fiction'), + ('form', 'formal or literary term'), + ('given', 'given name or forename, gender not specified'), + ('group', 'group'), + ('hist', 'historical term'), + ('hon', 'honorific or respectful (sonkeigo) language'), + ('hum', 'humble (kenjougo) language'), + ('id', 'idiomatic expression'), + ('joc', 'jocular, humorous term'), + ('leg', 'legend'), + ('m-sl', 'manga slang'), + ('male', 'male term or language'), + ('myth', 'mythology'), + ('net-sl', 'Internet slang'), + ('obj', 'object'), + ('obs', 'obsolete term'), + ('obsc', 'obscure term'), + ('on-mim', 'onomatopoeic or mimetic word'), + ('organization', 'organization name'), + ('oth', 'other'), + ('person', 'full name of a particular person'), + ('place', 'place name'), + ('poet', 'poetical term'), + ('pol', 'polite (teineigo) language'), + ('product', 'product name'), + ('proverb', 'proverb'), + ('quote', 'quotation'), + ('rare', 'rare'), + ('relig', 'religion'), + ('sens', 'sensitive'), + ('serv', 'service'), + ('sl', 'slang'), + ('station', 'railway station'), + ('surname', 'family or surname'), + ('uk', 'word usually written using kana alone'), + ('unclass', 'unclassified name'), + ('vulg', 'vulgar expression or word'), + ('work', 'work of art, literature, music, etc. name'), + ('X', 'rude or X-rated term (not displayed in educational software)'), + ('yoji', 'yojijukugo'); + +INSERT INTO "JMdict_InfoPOS"("id", "description") VALUES + ('adj-f', 'noun or verb acting prenominally'), + ('adj-i', 'adjective (keiyoushi)'), + ('adj-ix', 'adjective (keiyoushi) - yoi/ii class'), + ('adj-kari', '''kari'' adjective (archaic)'), + ('adj-ku', '''ku'' adjective (archaic)'), + ('adj-na', 'adjectival nouns or quasi-adjectives (keiyodoshi)'), + ('adj-nari', 'archaic/formal form of na-adjective'), + ('adj-no', 'nouns which may take the genitive case particle ''no'''), + ('adj-pn', 'pre-noun adjectival (rentaishi)'), + ('adj-shiku', '''shiku'' adjective (archaic)'), + ('adj-t', '''taru'' adjective'), + ('adv', 'adverb (fukushi)'), + ('adv-to', 'adverb taking the ''to'' particle'), + ('aux', 'auxiliary'), + ('aux-adj', 'auxiliary adjective'), + ('aux-v', 'auxiliary verb'), + ('conj', 'conjunction'), + ('cop', 'copula'), + ('ctr', 'counter'), + ('exp', 'expressions (phrases, clauses, etc.)'), + ('int', 'interjection (kandoushi)'), + ('n', 'noun (common) (futsuumeishi)'), + ('n-adv', 'adverbial noun (fukushitekimeishi)'), + ('n-pr', 'proper noun'), + ('n-pref', 'noun, used as a prefix'), + ('n-suf', 'noun, used as a suffix'), + ('n-t', 'noun (temporal) (jisoumeishi)'), + ('num', 'numeric'), + ('pn', 'pronoun'), + ('pref', 'prefix'), + ('prt', 'particle'), + ('suf', 'suffix'), + ('unc', 'unclassified'), + ('v-unspec', 'verb unspecified'), + ('v1', 'Ichidan verb'), + ('v1-s', 'Ichidan verb - kureru special class'), + ('v2a-s', 'Nidan verb with ''u'' ending (archaic)'), + ('v2b-k', 'Nidan verb (upper class) with ''bu'' ending (archaic)'), + ('v2b-s', 'Nidan verb (lower class) with ''bu'' ending (archaic)'), + ('v2d-k', 'Nidan verb (upper class) with ''dzu'' ending (archaic)'), + ('v2d-s', 'Nidan verb (lower class) with ''dzu'' ending (archaic)'), + ('v2g-k', 'Nidan verb (upper class) with ''gu'' ending (archaic)'), + ('v2g-s', 'Nidan verb (lower class) with ''gu'' ending (archaic)'), + ('v2h-k', 'Nidan verb (upper class) with ''hu/fu'' ending (archaic)'), + ('v2h-s', 'Nidan verb (lower class) with ''hu/fu'' ending (archaic)'), + ('v2k-k', 'Nidan verb (upper class) with ''ku'' ending (archaic)'), + ('v2k-s', 'Nidan verb (lower class) with ''ku'' ending (archaic)'), + ('v2m-k', 'Nidan verb (upper class) with ''mu'' ending (archaic)'), + ('v2m-s', 'Nidan verb (lower class) with ''mu'' ending (archaic)'), + ('v2n-s', 'Nidan verb (lower class) with ''nu'' ending (archaic)'), + ('v2r-k', 'Nidan verb (upper class) with ''ru'' ending (archaic)'), + ('v2r-s', 'Nidan verb (lower class) with ''ru'' ending (archaic)'), + ('v2s-s', 'Nidan verb (lower class) with ''su'' ending (archaic)'), + ('v2t-k', 'Nidan verb (upper class) with ''tsu'' ending (archaic)'), + ('v2t-s', 'Nidan verb (lower class) with ''tsu'' ending (archaic)'), + ('v2w-s', 'Nidan verb (lower class) with ''u'' ending and ''we'' conjugation (archaic)'), + ('v2y-k', 'Nidan verb (upper class) with ''yu'' ending (archaic)'), + ('v2y-s', 'Nidan verb (lower class) with ''yu'' ending (archaic)'), + ('v2z-s', 'Nidan verb (lower class) with ''zu'' ending (archaic)'), + ('v4b', 'Yodan verb with ''bu'' ending (archaic)'), + ('v4g', 'Yodan verb with ''gu'' ending (archaic)'), + ('v4h', 'Yodan verb with ''hu/fu'' ending (archaic)'), + ('v4k', 'Yodan verb with ''ku'' ending (archaic)'), + ('v4m', 'Yodan verb with ''mu'' ending (archaic)'), + ('v4n', 'Yodan verb with ''nu'' ending (archaic)'), + ('v4r', 'Yodan verb with ''ru'' ending (archaic)'), + ('v4s', 'Yodan verb with ''su'' ending (archaic)'), + ('v4t', 'Yodan verb with ''tsu'' ending (archaic)'), + ('v5aru', 'Godan verb - -aru special class'), + ('v5b', 'Godan verb with ''bu'' ending'), + ('v5g', 'Godan verb with ''gu'' ending'), + ('v5k', 'Godan verb with ''ku'' ending'), + ('v5k-s', 'Godan verb - Iku/Yuku special class'), + ('v5m', 'Godan verb with ''mu'' ending'), + ('v5n', 'Godan verb with ''nu'' ending'), + ('v5r', 'Godan verb with ''ru'' ending'), + ('v5r-i', 'Godan verb with ''ru'' ending (irregular verb)'), + ('v5s', 'Godan verb with ''su'' ending'), + ('v5t', 'Godan verb with ''tsu'' ending'), + ('v5u', 'Godan verb with ''u'' ending'), + ('v5u-s', 'Godan verb with ''u'' ending (special class)'), + ('v5uru', 'Godan verb - Uru old class verb (old form of Eru)'), + ('vi', 'intransitive verb'), + ('vk', 'Kuru verb - special class'), + ('vn', 'irregular nu verb'), + ('vr', 'irregular ru verb, plain form ends with -ri'), + ('vs', 'noun or participle which takes the aux. verb suru'), + ('vs-c', 'su verb - precursor to the modern suru'), + ('vs-i', 'suru verb - included'), + ('vs-s', 'suru verb - special class'), + ('vt', 'transitive verb'), + ('vz', 'Ichidan verb - zuru verb (alternative form of -jiru verbs)'); + +INSERT INTO "JMdict_InfoReading"("id", "description") VALUES + ('gikun', 'gikun (meaning as reading) or jukujikun (special kanji reading)'), + ('ik', 'word containing irregular kana usage'), + ('ok', 'out-dated or obsolete kana usage'), + ('uK', 'word usually written using kanji alone'); diff --git a/pub2nix.lock b/pub2nix.lock new file mode 100644 index 0000000..8639414 --- /dev/null +++ b/pub2nix.lock @@ -0,0 +1,99 @@ +packages: + collection: + dependency: direct main + description: + name: collection + url: https://pub.dartlang.org + source: hosted + version: 1.16.0 + sha256: 0nx7mbxwxw5z4mdjr9z8hg6g8kgy3cv5pv5ax0j1i9kl36brk5rg + ffi: + dependency: transitive + description: + name: ffi + url: https://pub.dartlang.org + source: hosted + version: 1.1.2 + sha256: 0w0yd43y1fsfzihd4j6diymg90bgvyi2zqyb3vf0k6g8hk8x1yr6 + js: + dependency: transitive + description: + name: js + url: https://pub.dartlang.org + source: hosted + version: 0.6.4 + sha256: 01knzh9890ygxpy59rsh77h2ilh69wyl83idvrcvwzk8fknjldkb + lints: + dependency: direct dev + description: + name: lints + url: https://pub.dartlang.org + source: hosted + version: 1.0.1 + sha256: 1xyn9xpzwfw1f0mp03pyvspcphkinhzawkgp5lwmi7p15mv1vgz2 + meta: + dependency: transitive + description: + name: meta + url: https://pub.dartlang.org + source: hosted + version: 1.7.0 + sha256: 1z8sx23l9jn2ickq3z63pqpb6k9y6gbnnvj9200c6v7m3cvd7jbv + path: + dependency: transitive + description: + name: path + url: https://pub.dartlang.org + source: hosted + version: 1.8.1 + sha256: 0wg5da3zykfbala8mvcl7r7blgi5qjb838qhw30brgj3ani2hdph + petitparser: + dependency: transitive + description: + name: petitparser + url: https://pub.dartlang.org + source: hosted + version: 5.0.0 + sha256: 01rcmvk1znjykph6znkd3skvfn61lj54km4xw6vwa5iwwg84p5ph + sqflite_common: + dependency: transitive + description: + name: sqflite_common + url: https://pub.dartlang.org + source: hosted + version: 2.2.1+1 + sha256: 1i3fmvgj0f1ynf03rd1x9r0bmxly333jyi392ghh1ahm0lnj1kzq + sqflite_common_ffi: + dependency: direct main + description: + name: sqflite_common_ffi + url: https://pub.dartlang.org + source: hosted + version: 2.1.1 + sha256: 0v5xq9xpg63zidf8as17zh0pbhfzf9k00a2wn183jz9i5rmh6207 + sqlite3: + dependency: transitive + description: + name: sqlite3 + url: https://pub.dartlang.org + source: hosted + version: 1.7.0 + sha256: 1x56g99nw3jqvx1ysggpmmvb9gap63wdxk0pjawzv47xxm058rhm + synchronized: + dependency: transitive + description: + name: synchronized + url: https://pub.dartlang.org + source: hosted + version: 3.0.0+2 + sha256: 1j6108cq1hbcqpwhk9sah8q3gcidd7222bzhha2nk9syxhzqy82i + xml: + dependency: direct main + description: + name: xml + url: https://pub.dartlang.org + source: hosted + version: 6.0.1 + sha256: 158srisyld2zwhchcz264rf8qnby54xan4db14hf7lda7bx8ikbh +sdks: + dart: '>=2.16.1 <3.0.0' diff --git a/pubspec.lock b/pubspec.lock new file mode 100644 index 0000000..e2c787b --- /dev/null +++ b/pubspec.lock @@ -0,0 +1,89 @@ +# Generated by pub +# See https://dart.dev/tools/pub/glossary#lockfile +packages: + collection: + dependency: "direct main" + description: + name: collection + url: "https://pub.dartlang.org" + source: hosted + version: "1.16.0" + ffi: + dependency: transitive + description: + name: ffi + url: "https://pub.dartlang.org" + source: hosted + version: "1.1.2" + js: + dependency: transitive + description: + name: js + url: "https://pub.dartlang.org" + source: hosted + version: "0.6.4" + lints: + dependency: "direct dev" + description: + name: lints + url: "https://pub.dartlang.org" + source: hosted + version: "1.0.1" + meta: + dependency: transitive + description: + name: meta + url: "https://pub.dartlang.org" + source: hosted + version: "1.7.0" + path: + dependency: transitive + description: + name: path + url: "https://pub.dartlang.org" + source: hosted + version: "1.8.1" + petitparser: + dependency: transitive + description: + name: petitparser + url: "https://pub.dartlang.org" + source: hosted + version: "5.0.0" + sqflite_common: + dependency: transitive + description: + name: sqflite_common + url: "https://pub.dartlang.org" + source: hosted + version: "2.2.1+1" + sqflite_common_ffi: + dependency: "direct main" + description: + name: sqflite_common_ffi + url: "https://pub.dartlang.org" + source: hosted + version: "2.1.1" + sqlite3: + dependency: transitive + description: + name: sqlite3 + url: "https://pub.dartlang.org" + source: hosted + version: "1.7.0" + synchronized: + dependency: transitive + description: + name: synchronized + url: "https://pub.dartlang.org" + source: hosted + version: "3.0.0+2" + xml: + dependency: "direct main" + description: + name: xml + url: "https://pub.dartlang.org" + source: hosted + version: "6.0.1" +sdks: + dart: ">=2.16.1 <3.0.0" diff --git a/pubspec.yaml b/pubspec.yaml new file mode 100644 index 0000000..731d81d --- /dev/null +++ b/pubspec.yaml @@ -0,0 +1,21 @@ +name: ja_db +description: A SQLite database containing open source japanese language translation data +version: 1.0.0 +homepage: https://git.nani.wtf/h7x4/ja_db + +environment: + sdk: '>=2.16.1 <3.0.0' + +dependencies: + collection: ^1.16.0 + sqflite_common_ffi: ^2.1.1 + xml: ^6.0.1 + +dev_dependencies: + lints: ^1.0.0 + +executables: + ja_db: ja_db + +platforms: + linux: