Compare commits
3 Commits
main
...
sqlite-icu
| Author | SHA1 | Date | |
|---|---|---|---|
|
7bacfc39a8
|
|||
|
c74a5f5cb6
|
|||
|
4fbed59143
|
@@ -24,7 +24,11 @@ jobs:
|
|||||||
build-users-group =
|
build-users-group =
|
||||||
|
|
||||||
- name: Update database inputs
|
- name: Update database inputs
|
||||||
run: nix flake update datasources
|
run: |
|
||||||
|
nix flake update jmdict-src
|
||||||
|
nix flake update jmdict-with-examples-src
|
||||||
|
nix flake update radkfile-src
|
||||||
|
nix flake update kanjidic2-src
|
||||||
|
|
||||||
- name: Build database
|
- name: Build database
|
||||||
run: nix build .#database -L
|
run: nix build .#database -L
|
||||||
@@ -40,7 +44,7 @@ jobs:
|
|||||||
compression: 0
|
compression: 0
|
||||||
|
|
||||||
- name: Print database statistics
|
- name: Print database statistics
|
||||||
run: nix develop .#sqlite-debugging --command sqlite3_analyzer result/jadb.sqlite
|
run: nix develop .# --command sqlite3_analyzer result/jadb.sqlite
|
||||||
|
|
||||||
# TODO: Defer failure of tests until after the coverage report is generated and uploaded.
|
# TODO: Defer failure of tests until after the coverage report is generated and uploaded.
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
|
|||||||
3476
data/tanos-jlpt/n1.csv
Normal file
3476
data/tanos-jlpt/n1.csv
Normal file
File diff suppressed because it is too large
Load Diff
1835
data/tanos-jlpt/n2.csv
Normal file
1835
data/tanos-jlpt/n2.csv
Normal file
File diff suppressed because it is too large
Load Diff
1803
data/tanos-jlpt/n3.csv
Normal file
1803
data/tanos-jlpt/n3.csv
Normal file
File diff suppressed because it is too large
Load Diff
582
data/tanos-jlpt/n4.csv
Normal file
582
data/tanos-jlpt/n4.csv
Normal file
@@ -0,0 +1,582 @@
|
|||||||
|
,あ,Ah
|
||||||
|
,ああ,like that
|
||||||
|
間,あいだ,a space
|
||||||
|
合う,あう,to match
|
||||||
|
,あかちゃん,baby
|
||||||
|
上る,あがる,to rise
|
||||||
|
赤ん坊,あかんぼう,baby
|
||||||
|
空く,あく,"to open, to become empty"
|
||||||
|
,あげる,to give
|
||||||
|
浅い,あさい,"shallow, superficial"
|
||||||
|
味,あじ,flavour
|
||||||
|
明日,あす・あした,tomorrow
|
||||||
|
遊び,あそび,play
|
||||||
|
集る,あつまる,to gather
|
||||||
|
集める,あつめる,to collect something
|
||||||
|
謝る,あやまる,to apologize
|
||||||
|
安心,あんしん,relief
|
||||||
|
安全,あんぜん,safety
|
||||||
|
,あんな,such
|
||||||
|
以下,いか,less than
|
||||||
|
以外,いがい,with the exception of
|
||||||
|
医学,いがく,medical science
|
||||||
|
生きる,いきる,to live
|
||||||
|
意見,いけん,opinion
|
||||||
|
石,いし,stone
|
||||||
|
,いじめる,to tease
|
||||||
|
以上,いじょう,"more than, this is all"
|
||||||
|
急ぐ,いそぐ,to hurry
|
||||||
|
致す,いたす,(humble) to do
|
||||||
|
一度,いちど,once
|
||||||
|
一生懸命,いっしょうけんめい,with utmost effort
|
||||||
|
,いっぱい,full
|
||||||
|
糸,いと,thread
|
||||||
|
以内,いない,within
|
||||||
|
田舎,いなか,countryside
|
||||||
|
祈る,いのる,to pray
|
||||||
|
,いらっしゃる,"(respectful) to be, to come or to go"
|
||||||
|
植える,うえる,"to plant, to grow"
|
||||||
|
受付,うけつけ,receipt
|
||||||
|
受ける,うける,to take a lesson or test
|
||||||
|
動く,うごく,to move
|
||||||
|
,うち,within
|
||||||
|
打つ,うつ,to hit
|
||||||
|
美しい,うつくしい,beautiful
|
||||||
|
写す,うつす,to copy or photograph
|
||||||
|
移る,うつる,to move house or transfer
|
||||||
|
腕,うで,arm
|
||||||
|
裏,うら,reverse side
|
||||||
|
売り場,うりば,place where things are sold
|
||||||
|
,うん,(informal) yes
|
||||||
|
運転手,うんてんしゅ,driver
|
||||||
|
枝,えだ,"branch, twig"
|
||||||
|
選ぶ,えらぶ,to choose
|
||||||
|
遠慮,えんりょ・する,"to be reserved, to be restrained"
|
||||||
|
,おいでになる,(respectful) to be
|
||||||
|
お祝い,おいわい,congratulation
|
||||||
|
,おかげ,"owing to, thanks to"
|
||||||
|
,おかしい,strange or funny
|
||||||
|
億,おく,one hundred million
|
||||||
|
屋上,おくじょう,rooftop
|
||||||
|
贈り物,おくりもの,gift
|
||||||
|
送る,おくる,to send
|
||||||
|
遅れる,おくれる,to be late
|
||||||
|
起す,おこす,to wake
|
||||||
|
行う,おこなう,to do
|
||||||
|
怒る,おこる,"to get angry, to be angry"
|
||||||
|
押し入れ,おしいれ,closet
|
||||||
|
お嬢さん,おじょうさん,young lady
|
||||||
|
お宅,おたく,(polite) your house
|
||||||
|
落る,おちる,to fall or drop
|
||||||
|
,おっしゃる,(respectful) to say
|
||||||
|
夫,おっと,husband
|
||||||
|
,おつり,"change from purchase, balance"
|
||||||
|
音,おと,"sound, note"
|
||||||
|
落す,おとす,to drop
|
||||||
|
踊り,おどり,a dance
|
||||||
|
踊る,おどる,to dance
|
||||||
|
驚く,おどろく,to be surprised
|
||||||
|
お祭り,おまつり,festival
|
||||||
|
お見舞い,おみまい,"calling on someone who is ill, enquiry"
|
||||||
|
お土産,おみやげ,souvenir
|
||||||
|
思い出す,おもいだす,to remember
|
||||||
|
思う,おもう,"to think, to feel"
|
||||||
|
,おもちゃ,toy
|
||||||
|
表,おもて,the front
|
||||||
|
親,おや,parents
|
||||||
|
泳ぎ方,およぎかた,way of swimming
|
||||||
|
下りる,おりる,"to get off, to descend"
|
||||||
|
折る,おる,to break or to fold
|
||||||
|
お礼,おれい,expression of gratitude
|
||||||
|
折れる,おれる,to break or be folded
|
||||||
|
終わり,おわり,the end
|
||||||
|
海岸,かいがん,coast
|
||||||
|
会議,かいぎ,meeting
|
||||||
|
会議室,かいぎしつ,meeting room
|
||||||
|
会場,かいじょう,assembly hall or meeting place
|
||||||
|
会話,かいわ,conversation
|
||||||
|
帰り,かえり,return
|
||||||
|
変える,かえる,to change
|
||||||
|
科学,かがく,science
|
||||||
|
鏡,かがみ,mirror
|
||||||
|
掛ける,かける,to hang something
|
||||||
|
飾る,かざる,to decorate
|
||||||
|
火事,かじ,fire
|
||||||
|
,ガス,gas
|
||||||
|
堅/硬/固い,かたい,hard
|
||||||
|
形,かたち,shape
|
||||||
|
片付ける,かたづける,to tidy up
|
||||||
|
課長,かちょう,section manager
|
||||||
|
勝つ,かつ,to win
|
||||||
|
家内,かない,housewife
|
||||||
|
悲しい,かなしい,sad
|
||||||
|
必ず,かならず,"certainly,necessarily"
|
||||||
|
お・金持ち,かねもち/おかねもち,rich man
|
||||||
|
彼女,かのじょ,"she,girlfriend"
|
||||||
|
壁,かべ,wall
|
||||||
|
髪,かみ,hair
|
||||||
|
噛む,かむ,"to bite,to chew"
|
||||||
|
通う,かよう,to commute
|
||||||
|
彼,かれ,"he,boyfriend"
|
||||||
|
乾く,かわく,to get dry
|
||||||
|
代わり,かわり,"substitute,alternate"
|
||||||
|
変わる,かわる,to change
|
||||||
|
考える,かんがえる,to consider
|
||||||
|
関係,かんけい,relationship
|
||||||
|
看護師,かんごし, nurse
|
||||||
|
簡単,かんたん,simple
|
||||||
|
気,き,"spirit,mood"
|
||||||
|
機会,きかい,opportunity
|
||||||
|
危険,きけん,danger
|
||||||
|
聞こえる,きこえる,to be heard
|
||||||
|
汽車,きしゃ,steam train
|
||||||
|
技術,ぎじゅつ,"art,technology,skill"
|
||||||
|
季節,きせつ,season
|
||||||
|
規則,きそく,regulations
|
||||||
|
,きっと,surely
|
||||||
|
絹,きぬ,silk
|
||||||
|
厳しい,きびしい,strict
|
||||||
|
気分,きぶん,mood
|
||||||
|
決る,きまる,to be decided
|
||||||
|
君,きみ,(informal) You
|
||||||
|
決める,きめる,to decide
|
||||||
|
気持ち,きもち,"feeling,mood"
|
||||||
|
着物,きもの,kimono
|
||||||
|
客,きゃく,"guest,customer"
|
||||||
|
急,きゅう,"urgent, steep"
|
||||||
|
急行,きゅうこう,"speedy, express"
|
||||||
|
教育,きょういく,education
|
||||||
|
教会,きょうかい,church
|
||||||
|
競争,きょうそう,competition
|
||||||
|
興味,きょうみ,an interest
|
||||||
|
近所,きんじょ,neighbourhood
|
||||||
|
具合,ぐあい,"condition,health"
|
||||||
|
空気,くうき,"air,atmosphere"
|
||||||
|
空港,くうこう,airport
|
||||||
|
草,くさ,grass
|
||||||
|
首,くび,neck
|
||||||
|
雲,くも,cloud
|
||||||
|
比べる,くらべる,to compare
|
||||||
|
,くれる,to give
|
||||||
|
暮れる,くれる,"to get dark,to come to an end"
|
||||||
|
君,くん,suffix for familiar young male
|
||||||
|
毛,け,hair or fur
|
||||||
|
経済,けいざい,"finance,economy"
|
||||||
|
警察,けいさつ,police
|
||||||
|
景色,けしき,"scene,landscape"
|
||||||
|
消しゴム,けしゴム,eraser
|
||||||
|
下宿,げしゅく,lodging
|
||||||
|
決して,けっして,never
|
||||||
|
,けれど/けれども,however
|
||||||
|
原因,げんいん,"cause,source"
|
||||||
|
,けんか・する,to quarrel
|
||||||
|
研究,けんきゅう,research
|
||||||
|
研究室,けんきゅうしつ,"study room,laboratory"
|
||||||
|
見物,けんぶつ,sightseeing
|
||||||
|
子,こ,child
|
||||||
|
,こう,this way
|
||||||
|
郊外,こうがい,outskirts
|
||||||
|
講義,こうぎ,lecture
|
||||||
|
工業,こうぎょう,the manufacturing industry
|
||||||
|
高校,こうこう,high school
|
||||||
|
高校生,こうこうせい,high school student
|
||||||
|
工場,こうじょう/こうば,"factory,plant,mill,workshop"
|
||||||
|
校長,こうちょう,headmaster
|
||||||
|
交通,こうつう,"traffic,transportation"
|
||||||
|
講堂,こうどう,auditorium
|
||||||
|
高等学校,こうとうがっこう,high school
|
||||||
|
公務員,こうむいん,"civil servant, government worker"
|
||||||
|
国際,こくさい,international
|
||||||
|
心,こころ,"heart, mind, core"
|
||||||
|
御主人,ごしゅじん,(honorable) your husband
|
||||||
|
故障,こしょう・する,to break-down
|
||||||
|
ご存じ,ごぞんじ,(respect form ) to know
|
||||||
|
答,こたえ,response
|
||||||
|
,ごちそう,a feast
|
||||||
|
小鳥,ことり,small bird
|
||||||
|
,このあいだ,"the other day,recently"
|
||||||
|
,このごろ,"these days,nowadays"
|
||||||
|
細かい,こまかい,"small, fine"
|
||||||
|
込む,こむ,to include
|
||||||
|
米,こめ,uncooked rice
|
||||||
|
,ごらんになる,(respectful) to see
|
||||||
|
,これから,after this
|
||||||
|
怖い,こわい,frightening
|
||||||
|
壊す,こわす,to break
|
||||||
|
壊れる,こわれる,to be broken
|
||||||
|
今度,こんど,"now,next time"
|
||||||
|
今夜,こんや,tonight
|
||||||
|
最近,さいきん,"latest,nowadays"
|
||||||
|
最後,さいご,"last,end"
|
||||||
|
最初,さいしょ,"beginning,first"
|
||||||
|
坂,さか,"slope,hill"
|
||||||
|
探す,さがす,to look for
|
||||||
|
下る,さがる,"to get down,to descend"
|
||||||
|
盛ん,さかん,"popularity,prosperous"
|
||||||
|
下げる,さげる,"to hang,to lower,to move back"
|
||||||
|
差し上げる,さしあげる,(polite) to give
|
||||||
|
,さっき,some time ago
|
||||||
|
寂しい,さびしい,lonely
|
||||||
|
さ来月,さらいげつ,the month after next
|
||||||
|
さ来週,さらいしゅう,the week after next
|
||||||
|
騒ぐ,さわぐ,"to make noise,to be excited"
|
||||||
|
触る,さわる,to touch
|
||||||
|
産業,さんぎょう,industry
|
||||||
|
残念,ざんねん,disappointment
|
||||||
|
市,し,city
|
||||||
|
字,じ,character
|
||||||
|
試合,しあい,"match,game"
|
||||||
|
仕方,しかた,method
|
||||||
|
試験,しけん,examination
|
||||||
|
事故,じこ,accident
|
||||||
|
地震,じしん,earthquake
|
||||||
|
時代,じだい,era
|
||||||
|
下着,したぎ,underwear
|
||||||
|
,しっかり,"firmly,steadily"
|
||||||
|
失敗,しっぱい,"failure,mistake"
|
||||||
|
辞典,じてん,dictionary
|
||||||
|
品物,しなもの,goods
|
||||||
|
,しばらく,little while
|
||||||
|
島,しま,island
|
||||||
|
市民,しみん,citizen
|
||||||
|
事務所,じむしょ,office
|
||||||
|
社会,しゃかい,"society,public"
|
||||||
|
社長,しゃちょう,company president
|
||||||
|
自由,じゆう,freedom
|
||||||
|
習慣,しゅうかん,"custom,manners"
|
||||||
|
住所,じゅうしょ,"an address,a residence"
|
||||||
|
柔道,じゅうどう,judo
|
||||||
|
十分,じゅうぶん,enough
|
||||||
|
趣味,しゅみ,hobby
|
||||||
|
紹介,しょうかい,introduction
|
||||||
|
小学校,しょうがっこう,elementary school
|
||||||
|
小説,しょうせつ,novel
|
||||||
|
将来,しょうらい,"future,prospects"
|
||||||
|
食料品,しょくりょうひん,groceries
|
||||||
|
女性,じょせい,woman
|
||||||
|
知らせる,しらせる,to notify
|
||||||
|
調べる,しらべる,to investigate
|
||||||
|
人口,じんこう,population
|
||||||
|
神社,じんじゃ,Shinto shrine
|
||||||
|
親切,しんせつ,kindness
|
||||||
|
新聞社,しんぶんしゃ,newspaper company
|
||||||
|
水泳,すいえい,swimming
|
||||||
|
水道,すいどう,water supply
|
||||||
|
数学,すうがく,"mathematics,arithmetic"
|
||||||
|
過ぎる,すぎる,to exceed
|
||||||
|
凄い,すごい,terrific
|
||||||
|
進む,すすむ,to make progress
|
||||||
|
,すっかり,completely
|
||||||
|
,すっと,"straight,all of a sudden"
|
||||||
|
捨てる,すてる,to throw away
|
||||||
|
砂,すな,sand
|
||||||
|
滑る,すべる,"to slide,to slip"
|
||||||
|
隅,すみ,"corner,nook"
|
||||||
|
済む,すむ,to finish
|
||||||
|
,すり,pickpocket
|
||||||
|
,すると,then
|
||||||
|
生活,せいかつ・する,to live
|
||||||
|
生産,せいさん・する,to produce
|
||||||
|
政治,せいじ,"politics,government"
|
||||||
|
西洋,せいよう,western countries
|
||||||
|
世界,せかい,the world
|
||||||
|
席,せき,seat
|
||||||
|
説明,せつめい,explanation
|
||||||
|
背中,せなか,back of the body
|
||||||
|
線,せん,line
|
||||||
|
戦争,せんそう,war
|
||||||
|
先輩,せんぱい,senior
|
||||||
|
,そう,really
|
||||||
|
育てる,そだてる,"to rear,to bring up"
|
||||||
|
卒業,そつぎょう,graduation
|
||||||
|
祖父,そふ,grandfather
|
||||||
|
祖母,そぼ,grandmother
|
||||||
|
,それで,because of that
|
||||||
|
,それに,moreover
|
||||||
|
,それほど,to that extent
|
||||||
|
,そろそろ,"gradually,soon"
|
||||||
|
,そんな,that sort of
|
||||||
|
,そんなに,"so much,like that"
|
||||||
|
退院,たいいん・する,to leave hospital
|
||||||
|
大学生,だいがくせい,university student
|
||||||
|
大事,だいじ,"important,valuable,serious matter"
|
||||||
|
大体,だいたい,generally
|
||||||
|
,たいてい,usually
|
||||||
|
大分,だいぶ,greatly
|
||||||
|
台風,たいふう,typhoon
|
||||||
|
倒れる,たおれる,to break down
|
||||||
|
,だから,"so,therefore"
|
||||||
|
確か,たしか,definite
|
||||||
|
足す,たす,to add a number
|
||||||
|
訪ねる,たずねる,to visit
|
||||||
|
尋ねる,たずねる,to ask
|
||||||
|
正しい,ただしい,correct
|
||||||
|
畳,たたみ,Japanese straw mat
|
||||||
|
立てる,たてる,to stand something up
|
||||||
|
建てる,たてる,to build
|
||||||
|
例えば,たとえば,for example
|
||||||
|
棚,たな,shelves
|
||||||
|
楽しみ,たのしみ,joy
|
||||||
|
楽む,たのしむ,to enjoy oneself
|
||||||
|
,たまに,occasionally
|
||||||
|
為,ため,in order to
|
||||||
|
足りる,たりる,to be enough
|
||||||
|
男性,だんせい,male
|
||||||
|
暖房,だんぼう,heating
|
||||||
|
血,ち,blood
|
||||||
|
,チェック・する,to check
|
||||||
|
力,ちから,"strength,power"
|
||||||
|
,ちっとも,not at all (used with a negative verb)
|
||||||
|
,ちゃん,suffix for familiar person
|
||||||
|
注意,ちゅうい,caution
|
||||||
|
中学校,ちゅうがっこう,"junior high school,middle school"
|
||||||
|
注射,ちゅうしゃ,injection
|
||||||
|
駐車場,ちゅうしゃじょう,parking lot
|
||||||
|
地理,ちり,geography
|
||||||
|
捕まえる,つかまえる,to seize
|
||||||
|
付く,つく,to be attached
|
||||||
|
漬ける,つける,"to soak,to pickle"
|
||||||
|
都合,つごう,"circumstances,convenience"
|
||||||
|
伝える,つたえる,to report
|
||||||
|
続く,つづく,to be continued
|
||||||
|
続ける,つづける,to continue
|
||||||
|
包む,つつむ,to wrap
|
||||||
|
妻,つま,my wife
|
||||||
|
,つもり,intention
|
||||||
|
釣る,つる,to fish
|
||||||
|
丁寧,ていねい,polite
|
||||||
|
適当,てきとう,suitability
|
||||||
|
手伝う,てつだう,to assist
|
||||||
|
手袋,てぶくろ,glove
|
||||||
|
寺,てら,temple
|
||||||
|
点,てん,"point,dot"
|
||||||
|
店員,てんいん,shop assistant
|
||||||
|
天気予報,てんきよほう,weather forecast
|
||||||
|
電灯,でんとう,electric light
|
||||||
|
電報,でんぽう,telegram
|
||||||
|
展覧会,てんらんかい,exhibition
|
||||||
|
都,と,metropolitan
|
||||||
|
道具,どうぐ,"tool,means"
|
||||||
|
,とうとう,"finally, after all"
|
||||||
|
動物園,どうぶつえん,zoo
|
||||||
|
遠く,とおく,distant
|
||||||
|
通る,とおる,to go through
|
||||||
|
特に,とくに,"particularly,especially"
|
||||||
|
特別,とくべつ,special
|
||||||
|
,とこや,barber
|
||||||
|
途中,とちゅう,on the way
|
||||||
|
特急,とっきゅう,limited express train (faster than an express train)
|
||||||
|
届ける,とどける,"to send, to deliver, to report"
|
||||||
|
泊まる,とまる,to lodge at
|
||||||
|
止める,とめる,to stop something
|
||||||
|
取り替える,とりかえる,to exchange
|
||||||
|
泥棒,どろぼう,thief
|
||||||
|
,どんどん,more and more
|
||||||
|
直す,なおす,"to fix,to repair"
|
||||||
|
直る,なおる,"to be fixed,to be repaired"
|
||||||
|
治る,なおる,"to be cured,to heal"
|
||||||
|
泣く,なく,to weep
|
||||||
|
無くなる,なくなる,"to disappear,to get lost"
|
||||||
|
亡くなる,なくなる,to die
|
||||||
|
投げる,なげる,to throw or cast away
|
||||||
|
,なさる,(respectful) to do
|
||||||
|
鳴る,なる,to sound
|
||||||
|
,なるべく,as much as possible
|
||||||
|
,なるほど,now I understand
|
||||||
|
慣れる,なれる,to grow accustomed to
|
||||||
|
苦い,にがい,bitter
|
||||||
|
二階建て,にかいだて,two storied
|
||||||
|
逃げる,にげる,to escape
|
||||||
|
日記,にっき,journal
|
||||||
|
入院,にゅういん・する,"to hospitalise, hospitalisation"
|
||||||
|
入学,にゅうがく・する,to enter school or university
|
||||||
|
似る,にる,to be similar
|
||||||
|
人形,にんぎょう,"doll, figure"
|
||||||
|
盗む,ぬすむ,to steal
|
||||||
|
塗る,ぬる,"to paint, to colour, to plaster"
|
||||||
|
,ぬれる,to get wet
|
||||||
|
,ねだん,price
|
||||||
|
熱,ねつ,fever
|
||||||
|
寝坊,ねぼう,sleeping in late
|
||||||
|
眠い,ねむい,sleepy
|
||||||
|
眠る,ねむる,to sleep
|
||||||
|
残る,のこる,to remain
|
||||||
|
乗り換える,のりかえる,to change between buses or trains
|
||||||
|
乗り物,のりもの,vehicle
|
||||||
|
葉,は,leaf
|
||||||
|
場合,ばあい,situation
|
||||||
|
倍,ばい,double
|
||||||
|
拝見,はいけん・する,(humble) to look at
|
||||||
|
歯医者,はいしゃ,dentist
|
||||||
|
運ぶ,はこぶ,to transport
|
||||||
|
始める,はじめる,to begin
|
||||||
|
場所,ばしょ,location
|
||||||
|
,はず,it should be so
|
||||||
|
恥ずかしい,はずかしい,embarrassed
|
||||||
|
発音,はつおん,pronunciation
|
||||||
|
,はっきり,clearly
|
||||||
|
花見,はなみ,cherry-blossom viewing
|
||||||
|
林,はやし,"woods,forester"
|
||||||
|
払う,はらう,to pay
|
||||||
|
番組,ばんぐみ,television or radio program
|
||||||
|
反対,はんたい,opposition
|
||||||
|
日,ひ,"day, sun"
|
||||||
|
火,ひ,fire
|
||||||
|
冷える,ひえる,to grow cold
|
||||||
|
光,ひかり,light
|
||||||
|
光る,ひかる,"to shine,to glitter"
|
||||||
|
引き出し,ひきだし,"drawer,drawing out"
|
||||||
|
,ひきだす,to withdraw
|
||||||
|
,ひげ,beard
|
||||||
|
飛行場,ひこうじょう,airport
|
||||||
|
久しぶり,ひさしぶり,after a long time
|
||||||
|
美術館,びじゅつかん,art gallery
|
||||||
|
非常に,ひじょうに,extremely
|
||||||
|
引っ越す,ひっこす,to move house
|
||||||
|
必要,ひつよう,necessary
|
||||||
|
,ひどい,awful
|
||||||
|
開く,ひらく,to open an event
|
||||||
|
昼間,ひるま,"daytime,during the day"
|
||||||
|
昼休み,ひるやすみ,noon break
|
||||||
|
拾う,ひろう,"to pick up,to gather"
|
||||||
|
増える,ふえる,to increase
|
||||||
|
深い,ふかい,deep
|
||||||
|
複雑,ふくざつ,"complexity,complication"
|
||||||
|
復習,ふくしゅう,revision
|
||||||
|
部長,ぶちょう,head of a section
|
||||||
|
普通,ふつう,"usually, or a train that stops at every station"
|
||||||
|
,ぶどう,grapes
|
||||||
|
太る,ふとる,to become fat
|
||||||
|
布団,ふとん,"Japanese bedding, futon"
|
||||||
|
舟,ふね,ship
|
||||||
|
不便,ふべん,inconvenience
|
||||||
|
踏む,ふむ,to step on
|
||||||
|
降り出す,ふりだす,to start to rain
|
||||||
|
文化,ぶんか,culture
|
||||||
|
文学,ぶんがく,literature
|
||||||
|
文法,ぶんぽう,grammar
|
||||||
|
別,べつ,different
|
||||||
|
変,へん,strange
|
||||||
|
返事,へんじ,reply
|
||||||
|
貿易,ぼうえき,trade
|
||||||
|
法律,ほうりつ,law
|
||||||
|
僕,ぼく,I (used by males)
|
||||||
|
星,ほし,star
|
||||||
|
,ほとんど,mostly
|
||||||
|
,ほめる,to praise
|
||||||
|
翻訳,ほんやく,translation
|
||||||
|
参る,まいる,"(humble) to go,to come"
|
||||||
|
負ける,まける,to lose
|
||||||
|
,または,"or,otherwise"
|
||||||
|
間違える,まちがえる,to make a mistake
|
||||||
|
間に合う,まにあう,to be in time for
|
||||||
|
周り,まわり,surroundings
|
||||||
|
回る,まわる,to go around
|
||||||
|
漫画,まんが,comic
|
||||||
|
真中,まんなか,middle
|
||||||
|
見える,みえる,to be in sight
|
||||||
|
湖,みずうみ,lake
|
||||||
|
味噌,みそ,"miso, soybean paste"
|
||||||
|
見つかる,みつかる,to be discovered
|
||||||
|
見つける,みつける,to discover
|
||||||
|
皆,みな,everybody
|
||||||
|
港,みなと,harbour
|
||||||
|
向かう,むかう,to face
|
||||||
|
迎える,むかえる,to go out to meet
|
||||||
|
昔,むかし,"old times, old days, long ago, formerly"
|
||||||
|
虫,むし,insect
|
||||||
|
息子,むすこ,(humble) son
|
||||||
|
娘,むすめ,(humble) daughter
|
||||||
|
無理,むり,impossible
|
||||||
|
召し上がる,めしあがる,(polite) to eat
|
||||||
|
珍しい,めずらしい,rare
|
||||||
|
申し上げる,もうしあげる,"(humble) to say,to tell"
|
||||||
|
申す,もうす,"(humble) to be called,to say"
|
||||||
|
,もうすぐ,soon
|
||||||
|
,もし,if
|
||||||
|
戻る,もどる,to turn back
|
||||||
|
木綿,もめん,cotton
|
||||||
|
森,もり,forest
|
||||||
|
焼く,やく,"to bake,to grill"
|
||||||
|
約束,やくそく,promise
|
||||||
|
役に立つ,やくにたつ,to be helpful
|
||||||
|
焼ける,やける,"to burn,to be roasted"
|
||||||
|
優しい,やさしい,kind
|
||||||
|
痩せる,やせる,to become thin
|
||||||
|
,やっと,at last
|
||||||
|
止む,やむ,to stop
|
||||||
|
止める,やめる,to stop
|
||||||
|
柔らかい,やわらかい,soft
|
||||||
|
湯,ゆ,hot water
|
||||||
|
指,ゆび,finger
|
||||||
|
指輪,ゆびわ,a ring
|
||||||
|
夢,ゆめ,dream
|
||||||
|
揺れる,ゆれる,"to shake,to sway"
|
||||||
|
用,よう,use
|
||||||
|
用意,ようい,preparation
|
||||||
|
用事,ようじ,things to do
|
||||||
|
汚れる,よごれる,to get dirty
|
||||||
|
予習,よしゅう,preparation for a lesson
|
||||||
|
予定,よてい,arrangement
|
||||||
|
予約,よやく,reservation
|
||||||
|
寄る,よる,to visit
|
||||||
|
喜ぶ,よろこぶ,to be delighted
|
||||||
|
理由,りゆう,reason
|
||||||
|
利用,りよう,utilization
|
||||||
|
両方,りょうほう,both sides
|
||||||
|
旅館,りょかん,Japanese hotel
|
||||||
|
留守,るす,absence
|
||||||
|
冷房,れいぼう,air conditioning
|
||||||
|
歴史,れきし,history
|
||||||
|
連絡,れんらく,contact
|
||||||
|
沸かす,わかす,"to boil,to heat"
|
||||||
|
別れる,わかれる,to separate
|
||||||
|
沸く,わく,"to boil, to grow hot,to get excited"
|
||||||
|
訳,わけ,"meaning,reason"
|
||||||
|
忘れ物,わすれもの,lost article
|
||||||
|
笑う,わらう,"to laugh,to smile"
|
||||||
|
割合,わりあい,"rate,ratio,percentage"
|
||||||
|
割れる,われる,to break
|
||||||
|
,アクセサリー,accessory
|
||||||
|
,アジア,Asia
|
||||||
|
,アナウンサー,announcer
|
||||||
|
,アフリカ,Africa
|
||||||
|
,アメリカ,America
|
||||||
|
,アルコール,alcohol
|
||||||
|
,アルバイト,part-time job
|
||||||
|
,エスカレーター,escalator
|
||||||
|
,オートバイ,motorcycle
|
||||||
|
,カーテン,curtain
|
||||||
|
,ガス,gas
|
||||||
|
,ガソリン,petrol
|
||||||
|
,ガソリンスタンド,petrol station
|
||||||
|
,ガラス,a glass pane
|
||||||
|
,ケーキ,cake
|
||||||
|
消しゴム,けしゴム,"eraser, rubber"
|
||||||
|
,コンサート,concert
|
||||||
|
,コンピューター,computer
|
||||||
|
,サラダ,salad
|
||||||
|
,サンダル,sandal
|
||||||
|
,サンドイッチ,sandwich
|
||||||
|
,ジャム,jam
|
||||||
|
,スーツ,suit
|
||||||
|
,スーツケース,suitcase
|
||||||
|
,スクリーン,screen
|
||||||
|
,ステーキ,steak
|
||||||
|
,ステレオ,stereo
|
||||||
|
,ソフト,soft
|
||||||
|
,タイプ,"type,style"
|
||||||
|
,チェック・する,to check
|
||||||
|
,テキスト,"text,text book"
|
||||||
|
,テニス,tennis
|
||||||
|
,パート,part time
|
||||||
|
,パソコン,personal computer
|
||||||
|
,ハンドバッグ,handbag
|
||||||
|
,ピアノ,piano
|
||||||
|
,ビル,building or bill
|
||||||
|
,ファックス,fax
|
||||||
|
,プレゼント,present
|
||||||
|
,ベル,bell
|
||||||
|
,レジ,register
|
||||||
|
,レポート/リポート,report
|
||||||
|
,ワープロ,word processor
|
||||||
|
669
data/tanos-jlpt/n5.csv
Normal file
669
data/tanos-jlpt/n5.csv
Normal file
@@ -0,0 +1,669 @@
|
|||||||
|
会う,あう,to meet
|
||||||
|
青,あお,blue
|
||||||
|
青い,あおい,blue
|
||||||
|
赤,あか,red
|
||||||
|
赤い,あかい,red
|
||||||
|
明い,あかるい,bright
|
||||||
|
秋,あき,autumn
|
||||||
|
開く,あく,"to open,to become open"
|
||||||
|
開ける,あける,to open
|
||||||
|
上げる,あげる,to give
|
||||||
|
朝,あさ,morning
|
||||||
|
朝御飯,あさごはん,breakfast
|
||||||
|
,あさって,day after tomorrow
|
||||||
|
足,あし,"foot,leg"
|
||||||
|
明日,あした,tomorrow
|
||||||
|
,あそこ,over there
|
||||||
|
遊ぶ,あそぶ,"to play,to make a visit"
|
||||||
|
暖かい,あたたかい,warm
|
||||||
|
頭,あたま,head
|
||||||
|
新しい,あたらしい,new
|
||||||
|
,あちら,there
|
||||||
|
暑い,あつい,hot
|
||||||
|
熱い,あつい,hot to the touch
|
||||||
|
厚い,あつい,"kind, deep, thick"
|
||||||
|
,あっち,over there
|
||||||
|
後,あと,afterwards
|
||||||
|
,あなた,you
|
||||||
|
兄,あに,(humble) older brother
|
||||||
|
姉,あね,(humble) older sister
|
||||||
|
,あの,that over there
|
||||||
|
,あの,um...
|
||||||
|
,アパート,apartment
|
||||||
|
,あびる,"to bathe,to shower"
|
||||||
|
危ない,あぶない,dangerous
|
||||||
|
甘い,あまい,sweet
|
||||||
|
,あまり,not very
|
||||||
|
雨,あめ,rain
|
||||||
|
飴,あめ,candy
|
||||||
|
洗う,あらう,to wash
|
||||||
|
,ある,"to be,to have (used for inanimate objects)"
|
||||||
|
歩く,あるく,to walk
|
||||||
|
,あれ,that
|
||||||
|
,いい/よい,good
|
||||||
|
,いいえ,no
|
||||||
|
言う,いう,to say
|
||||||
|
家,いえ,house
|
||||||
|
,いかが,how
|
||||||
|
行く,いく,to go
|
||||||
|
,いくつ,"how many?,how old?"
|
||||||
|
,いくら,how much?
|
||||||
|
池,いけ,pond
|
||||||
|
医者,いしゃ,medical doctor
|
||||||
|
,いす,chair
|
||||||
|
忙しい,いそがしい,"busy,irritated"
|
||||||
|
痛い,いたい,painful
|
||||||
|
一,いち,one
|
||||||
|
一日,いちにち,"(1) one day, (2) first of month"
|
||||||
|
,いちばん,"best,first"
|
||||||
|
,いつ,when
|
||||||
|
五日,いつか,"five days, fifth day"
|
||||||
|
一緒,いっしょ,together
|
||||||
|
五つ,いつつ,five
|
||||||
|
,いつも,always
|
||||||
|
犬,いぬ,dog
|
||||||
|
今,いま,now
|
||||||
|
意味,いみ,meaning
|
||||||
|
妹,いもうと,(humble) younger sister
|
||||||
|
嫌,いや,unpleasant
|
||||||
|
入口,いりぐち,entrance
|
||||||
|
居る,いる,"to be, to have (used for people and animals)"
|
||||||
|
要る,いる,to need
|
||||||
|
入れる,いれる,to put in
|
||||||
|
色,いろ,colour
|
||||||
|
,いろいろ,various
|
||||||
|
上,うえ,on top of
|
||||||
|
後ろ,うしろ,behind
|
||||||
|
薄い,うすい,"thin,weak"
|
||||||
|
歌,うた,song
|
||||||
|
歌う,うたう,to sing
|
||||||
|
生まれる,うまれる,to be born
|
||||||
|
海,うみ,sea
|
||||||
|
売る,うる,to sell
|
||||||
|
煩い,うるさい,"noisy,annoying"
|
||||||
|
上着,うわぎ,jacket
|
||||||
|
絵,え,picture
|
||||||
|
映画,えいが,movie
|
||||||
|
映画館,えいがかん,cinema
|
||||||
|
英語,えいご,English language
|
||||||
|
,ええ,yes
|
||||||
|
駅,えき,station
|
||||||
|
,エレベーター,elevator
|
||||||
|
鉛筆,えんぴつ,pencil
|
||||||
|
,おいしい,delicious
|
||||||
|
多い,おおい,many
|
||||||
|
大きい,おおきい,big
|
||||||
|
大きな,おおきな,big
|
||||||
|
大勢,おおぜい,great number of people
|
||||||
|
お母さん,おかあさん,(honorable) mother
|
||||||
|
お菓子,おかし,"sweets, candy"
|
||||||
|
お金,おかね,money
|
||||||
|
起きる,おきる,to get up
|
||||||
|
置く,おく,to put
|
||||||
|
奥さん,おくさん,(honorable) wife
|
||||||
|
お酒,おさけ,"alcohol, rice wine"
|
||||||
|
お皿,おさら,"plate, dish"
|
||||||
|
伯父/叔父,おじいさん,"grandfather,male senior citizen"
|
||||||
|
教える,おしえる,"to teach,to tell"
|
||||||
|
伯父/叔父,おじさん,"uncle,middle aged gentleman"
|
||||||
|
押す,おす,"to push, to stamp something"
|
||||||
|
遅い,おそい,"late,slow"
|
||||||
|
お茶,おちゃ,green tea
|
||||||
|
お手洗い,おてあらい,bathroom
|
||||||
|
お父さん,おとうさん,(honorable) father
|
||||||
|
弟,おとうと,younger brother
|
||||||
|
男,おとこ,man
|
||||||
|
男の子,おとこのこ,boy
|
||||||
|
一昨日,おととい,day before yesterday
|
||||||
|
一昨年,おととし,year before last
|
||||||
|
大人,おとな,adult
|
||||||
|
,おなか,stomach
|
||||||
|
同じ,おなじ,same
|
||||||
|
お兄さん,おにいさん,(honorable) older brother
|
||||||
|
お姉さん,おねえさん,(honorable) older sister
|
||||||
|
,おばあさん,"grandmother,female senior-citizen"
|
||||||
|
伯母さん/叔母さん,おばさん,aunt
|
||||||
|
お風呂,おふろ,bath
|
||||||
|
お弁当,おべんとう,boxed lunch
|
||||||
|
覚える,おぼえる,to remember
|
||||||
|
,おまわりさん,friendly term for policeman
|
||||||
|
重い,おもい,heavy
|
||||||
|
,おもしろい,interesting
|
||||||
|
泳ぐ,およぐ,to swim
|
||||||
|
降りる,おりる,"to get off, to descend"
|
||||||
|
終る,おわる,to finish
|
||||||
|
音楽,おんがく,music
|
||||||
|
女,おんな,woman
|
||||||
|
女の子,おんなのこ,girl
|
||||||
|
外国,がいこく,foreign country
|
||||||
|
外国人,がいこくじん,foreigner
|
||||||
|
会社,かいしゃ,company
|
||||||
|
階段,かいだん,stairs
|
||||||
|
買い物,かいもの,shopping
|
||||||
|
買う,かう,to buy
|
||||||
|
返す,かえす,to return something
|
||||||
|
帰る,かえる,to go back
|
||||||
|
,かかる,to take time or money
|
||||||
|
,かぎ,key
|
||||||
|
書く,かく,to write
|
||||||
|
学生,がくせい,student
|
||||||
|
,かける,to call by phone
|
||||||
|
傘,かさ,umbrella
|
||||||
|
貸す,かす,to lend
|
||||||
|
風,かぜ,wind
|
||||||
|
風邪,かぜ,a cold
|
||||||
|
家族,かぞく,family
|
||||||
|
方,かた,"person, way of doing"
|
||||||
|
学校,がっこう,school
|
||||||
|
,カップ,cup
|
||||||
|
家庭,かてい,household
|
||||||
|
角,かど,a corner
|
||||||
|
,かばん,"bag,basket"
|
||||||
|
花瓶,かびん,a vase
|
||||||
|
紙,かみ,paper
|
||||||
|
,カメラ,camera
|
||||||
|
火曜日,かようび,Tuesday
|
||||||
|
辛い,からい,spicy
|
||||||
|
体,からだ,body
|
||||||
|
借りる,かりる,to borrow
|
||||||
|
軽い,かるい,light
|
||||||
|
,カレー,curry
|
||||||
|
,カレンダー,calendar
|
||||||
|
川/河,かわ,river
|
||||||
|
,かわいい,cute
|
||||||
|
漢字,かんじ,Chinese character
|
||||||
|
木,き,"tree,wood"
|
||||||
|
黄色,きいろ,yellow
|
||||||
|
黄色い,きいろい,yellow
|
||||||
|
消える,きえる,to disappear
|
||||||
|
聞く,きく,"to hear,to listen to,to ask"
|
||||||
|
北,きた,north
|
||||||
|
,ギター,guitar
|
||||||
|
汚い,きたない,dirty
|
||||||
|
喫茶店,きっさてん,coffee lounge
|
||||||
|
切手,きって,postage stamp
|
||||||
|
切符,きっぷ,ticket
|
||||||
|
昨日,きのう,yesterday
|
||||||
|
九,きゅう / く,nine
|
||||||
|
牛肉,ぎゅうにく,beef
|
||||||
|
牛乳,ぎゅうにゅう,milk
|
||||||
|
今日,きょう,today
|
||||||
|
教室,きょうしつ,classroom
|
||||||
|
兄弟,きょうだい,(humble) siblings
|
||||||
|
去年,きょねん,last year
|
||||||
|
嫌い,きらい,hate
|
||||||
|
切る,きる,to cut
|
||||||
|
着る,きる,to put on from the shoulders down
|
||||||
|
,きれい,"pretty,clean"
|
||||||
|
,キロ/キログラム,kilogram
|
||||||
|
,キロ/キロメートル,kilometre
|
||||||
|
銀行,ぎんこう,bank
|
||||||
|
金曜日,きんようび,Friday
|
||||||
|
薬,くすり,medicine
|
||||||
|
,ください,please
|
||||||
|
果物,くだもの,fruit
|
||||||
|
口,くち,"mouth,opening"
|
||||||
|
靴,くつ,shoes
|
||||||
|
靴下,くつした,socks
|
||||||
|
国,くに,country
|
||||||
|
曇り,くもり,cloudy weather
|
||||||
|
曇る,くもる,"to become cloudy,to become dim"
|
||||||
|
暗い,くらい,gloomy
|
||||||
|
,クラス,class
|
||||||
|
,グラム,gram
|
||||||
|
来る,くる,to come
|
||||||
|
車,くるま,"car,vehicle"
|
||||||
|
黒,くろ,black
|
||||||
|
黒い,くろい,black
|
||||||
|
警官,けいかん,policeman
|
||||||
|
今朝,けさ,this morning
|
||||||
|
消す,けす,"to erase,to turn off power"
|
||||||
|
結構,けっこう,"splendid,enough"
|
||||||
|
結婚,けっこん,marriage
|
||||||
|
月曜日,げつようび,Monday
|
||||||
|
玄関,げんかん,entry hall
|
||||||
|
元気,げんき,"health, vitality"
|
||||||
|
五,ご,five
|
||||||
|
公園,こうえん,park
|
||||||
|
交差点,こうさてん,intersection
|
||||||
|
紅茶,こうちゃ,black tea
|
||||||
|
交番,こうばん,police box
|
||||||
|
声,こえ,voice
|
||||||
|
,コート,"coat,tennis court"
|
||||||
|
,コーヒー,coffee
|
||||||
|
,ここ,here
|
||||||
|
午後,ごご,afternoon
|
||||||
|
九日,ここのか,"nine days, ninth day"
|
||||||
|
九つ,ここのつ,nine
|
||||||
|
午前,ごぜん,morning
|
||||||
|
答える,こたえる,to answer
|
||||||
|
,こちら,this person or way
|
||||||
|
,こっち,this person or way
|
||||||
|
,コップ,a glass
|
||||||
|
今年,ことし,this year
|
||||||
|
言葉,ことば,"word,language"
|
||||||
|
子供,こども,child
|
||||||
|
,この,this
|
||||||
|
御飯,ごはん,"cooked rice,meal"
|
||||||
|
,コピーする,to copy
|
||||||
|
困る,こまる,to be worried
|
||||||
|
,これ,this
|
||||||
|
今月,こんげつ,this month
|
||||||
|
今週,こんしゅう,this week
|
||||||
|
,こんな,such
|
||||||
|
今晩,こんばん,this evening
|
||||||
|
,さあ,well…
|
||||||
|
財布,さいふ,wallet
|
||||||
|
魚,さかな,fish
|
||||||
|
先,さき,"the future,previous"
|
||||||
|
咲く,さく,to bloom
|
||||||
|
作文,さくぶん,"composition,writing"
|
||||||
|
差す,さす,"to stretch out hands,to raise an umbrella"
|
||||||
|
雑誌,ざっし,magazine
|
||||||
|
砂糖,さとう,sugar
|
||||||
|
寒い,さむい,cold
|
||||||
|
さ来年,さらいねん,year after next
|
||||||
|
三,さん,three
|
||||||
|
散歩,さんぽする,to stroll
|
||||||
|
四,し / よん,four
|
||||||
|
塩,しお,salt
|
||||||
|
,しかし,however
|
||||||
|
時間,じかん,time
|
||||||
|
仕事,しごと,job
|
||||||
|
辞書,じしょ,dictionary
|
||||||
|
静か,しずか,quiet
|
||||||
|
下,した,below
|
||||||
|
七,しち / なな,seven
|
||||||
|
質問,しつもん,question
|
||||||
|
自転車,じてんしゃ,bicycle
|
||||||
|
自動車,じどうしゃ,automobile
|
||||||
|
死ぬ,しぬ,to die
|
||||||
|
字引,じびき,dictionary
|
||||||
|
自分,じぶん,oneself
|
||||||
|
閉まる,しまる,"to close,to be closed"
|
||||||
|
閉める,しめる,to close something
|
||||||
|
締める,しめる,to tie
|
||||||
|
,じゃ/じゃあ,well then…
|
||||||
|
写真,しゃしん,photograph
|
||||||
|
,シャツ,shirt
|
||||||
|
,シャワー,shower
|
||||||
|
十,じゅう とお,ten
|
||||||
|
授業,じゅぎょう,"lesson,class work"
|
||||||
|
宿題,しゅくだい,homework
|
||||||
|
上手,じょうず,skillful
|
||||||
|
丈夫,じょうぶ,"strong,durable"
|
||||||
|
,しょうゆ,soy sauce
|
||||||
|
食堂,しょくどう,dining hall
|
||||||
|
知る,しる,to know
|
||||||
|
白,しろ,white
|
||||||
|
白い,しろい,white
|
||||||
|
新聞,しんぶん,newspaper
|
||||||
|
水曜日,すいようび,Wednesday
|
||||||
|
吸う,すう,"to smoke,to suck"
|
||||||
|
,スカート,skirt
|
||||||
|
好き,すき,likeable
|
||||||
|
少ない,すくない,a few
|
||||||
|
,すぐに,instantly
|
||||||
|
少し,すこし,few
|
||||||
|
涼しい,すずしい,refreshing
|
||||||
|
,ストーブ,heater
|
||||||
|
,スプーン,spoon
|
||||||
|
,スポーツ,sport
|
||||||
|
,ズボン,trousers
|
||||||
|
住む,すむ,to live in
|
||||||
|
,スリッパ,slippers
|
||||||
|
,する,to do
|
||||||
|
座る,すわる,to sit
|
||||||
|
背,せ,"height,stature"
|
||||||
|
生徒,せいと,pupil
|
||||||
|
,セーター,"sweater,jumper"
|
||||||
|
,せっけん,soap
|
||||||
|
背広,せびろ,business suit
|
||||||
|
狭い,せまい,narrow
|
||||||
|
,ゼロ,zero
|
||||||
|
千,せん,thousand
|
||||||
|
先月,せんげつ,last month
|
||||||
|
先週,せんしゅう,last week
|
||||||
|
先生,せんせい,"teacher,doctor"
|
||||||
|
洗濯,せんたく,washing
|
||||||
|
全部,ぜんぶ,all
|
||||||
|
掃除,そうじする,"to clean, to sweep"
|
||||||
|
,そうして/そして,and
|
||||||
|
,そこ,that place
|
||||||
|
,そちら,over there
|
||||||
|
,そっち,over there
|
||||||
|
外,そと,outside
|
||||||
|
,その,that
|
||||||
|
,そば,"near,beside"
|
||||||
|
空,そら,sky
|
||||||
|
,それ,that
|
||||||
|
,それから,after that
|
||||||
|
,それでは,in that situation
|
||||||
|
大学,だいがく,university
|
||||||
|
大使館,たいしかん,embassy
|
||||||
|
大丈夫,だいじょうぶ,all right
|
||||||
|
大好き,だいすき,to be very likeable
|
||||||
|
大切,たいせつ,important
|
||||||
|
台所,だいどころ,kitchen
|
||||||
|
,たいへん,very
|
||||||
|
,たいへん,difficult situation
|
||||||
|
高い,たかい,"tall, expensive"
|
||||||
|
,たくさん,many
|
||||||
|
,タクシー,taxi
|
||||||
|
出す,だす,to put out
|
||||||
|
立つ,たつ,to stand
|
||||||
|
,たて,"length,height"
|
||||||
|
建物,たてもの,building
|
||||||
|
楽しい,たのしい,enjoyable
|
||||||
|
頼む,たのむ,to ask
|
||||||
|
,たばこ,"tobacco,cigarettes"
|
||||||
|
,たぶん,probably
|
||||||
|
食べ物,たべもの,food
|
||||||
|
食べる,たべる,to eat
|
||||||
|
卵,たまご,egg
|
||||||
|
誰,だれ,who
|
||||||
|
誰,だれか,somebody
|
||||||
|
誕生日,たんじょうび,birthday
|
||||||
|
,だんだん,gradually
|
||||||
|
小さい,ちいさい,little
|
||||||
|
小さな,ちいさな,little
|
||||||
|
近い,ちかい,near
|
||||||
|
違う,ちがう,to differ
|
||||||
|
近く,ちかく,near
|
||||||
|
地下鉄,ちかてつ,underground train
|
||||||
|
地図,ちず,map
|
||||||
|
茶色,ちゃいろ,brown
|
||||||
|
,ちゃわん,rice bowl
|
||||||
|
,ちょうど,exactly
|
||||||
|
,ちょっと,somewhat
|
||||||
|
一日,ついたち,first of month
|
||||||
|
使う,つかう,to use
|
||||||
|
疲れる,つかれる,to get tired
|
||||||
|
次,つぎ,next
|
||||||
|
着く,つく,to arrive at
|
||||||
|
机,つくえ,desk
|
||||||
|
作る,つくる,to make
|
||||||
|
,つける,to turn on
|
||||||
|
勤める,つとめる,to work for someone
|
||||||
|
,つまらない,boring
|
||||||
|
冷たい,つめたい,cold to the touch
|
||||||
|
強い,つよい,powerful
|
||||||
|
手,て,hand
|
||||||
|
,テープ,tape
|
||||||
|
,テーブル,table
|
||||||
|
,テープレコーダー,tape recorder
|
||||||
|
出かける,でかける,to go out
|
||||||
|
手紙,てがみ,letter
|
||||||
|
,できる,to be able to
|
||||||
|
出口,でぐち,exit
|
||||||
|
,テスト,test
|
||||||
|
,では,with that...
|
||||||
|
,デパート,department store
|
||||||
|
,でも,but
|
||||||
|
出る,でる,"to appear,to leave"
|
||||||
|
,テレビ,television
|
||||||
|
天気,てんき,weather
|
||||||
|
電気,でんき,"electricity,electric light"
|
||||||
|
電車,でんしゃ,electric train
|
||||||
|
電話,でんわ,telephone
|
||||||
|
戸,と,Japanese style door
|
||||||
|
,ドア,Western style door
|
||||||
|
,トイレ,toilet
|
||||||
|
,どう,"how,in what way"
|
||||||
|
,どうして,for what reason
|
||||||
|
,どうぞ,please
|
||||||
|
動物,どうぶつ,animal
|
||||||
|
,どうも,thanks
|
||||||
|
遠い,とおい,far
|
||||||
|
十日,とおか,"ten days,the tenth day"
|
||||||
|
時々,ときどき,sometimes
|
||||||
|
時計,とけい,"watch,clock"
|
||||||
|
,どこ,where
|
||||||
|
所,ところ,place
|
||||||
|
年,とし,year
|
||||||
|
図書館,としょかん,library
|
||||||
|
,どちら,which of two
|
||||||
|
,どっち,which
|
||||||
|
,とても,very
|
||||||
|
,どなた,who
|
||||||
|
隣,となり,next door to
|
||||||
|
,どの,which
|
||||||
|
飛ぶ,とぶ,"to fly,to hop"
|
||||||
|
止まる,とまる,to come to a halt
|
||||||
|
友達,ともだち,friend
|
||||||
|
土曜日,どようび,Saturday
|
||||||
|
鳥,とり,bird
|
||||||
|
とり肉,とりにく,chicken meat
|
||||||
|
取る,とる,to take something
|
||||||
|
撮る,とる,to take a photo or record a film
|
||||||
|
,どれ,which (of three or more)
|
||||||
|
,ナイフ,knife
|
||||||
|
中,なか,middle
|
||||||
|
長い,ながい,long
|
||||||
|
鳴く,なく,"animal noise. to chirp, roar or croak etc."
|
||||||
|
無くす,なくす,to lose something
|
||||||
|
,なぜ,why
|
||||||
|
夏,なつ,summer
|
||||||
|
夏休み,なつやすみ,summer holiday
|
||||||
|
,など,et cetera
|
||||||
|
七つ,ななつ,seven
|
||||||
|
七日,なのか,"seven days,the seventh day"
|
||||||
|
名前,なまえ,name
|
||||||
|
習う,ならう,to learn
|
||||||
|
並ぶ,ならぶ,"to line up,to stand in a line"
|
||||||
|
並べる,ならべる,"to line up,to set up"
|
||||||
|
,なる,to become
|
||||||
|
何,なん/なに,what
|
||||||
|
二,に,two
|
||||||
|
賑やか,にぎやか,"bustling,busy"
|
||||||
|
肉,にく,meat
|
||||||
|
西,にし,west
|
||||||
|
日曜日,にちようび,Sunday
|
||||||
|
荷物,にもつ,luggage
|
||||||
|
,ニュース,news
|
||||||
|
庭,にわ,garden
|
||||||
|
脱ぐ,ぬぐ,to take off clothes
|
||||||
|
温い,ぬるい,luke warm
|
||||||
|
,ネクタイ,"tie,necktie"
|
||||||
|
猫,ねこ,cat
|
||||||
|
寝る,ねる,"to go to bed,to sleep"
|
||||||
|
,ノート,"notebook,exercise book"
|
||||||
|
登る,のぼる,to climb
|
||||||
|
飲み物,のみもの,a drink
|
||||||
|
飲む,のむ,to drink
|
||||||
|
乗る,のる,"to get on,to ride"
|
||||||
|
歯,は,tooth
|
||||||
|
,パーティー,party
|
||||||
|
,はい,yes
|
||||||
|
灰皿,はいざら,ashtray
|
||||||
|
入る,はいる,"to enter,to contain"
|
||||||
|
葉書,はがき,postcard
|
||||||
|
,はく,"to wear,to put on trousers"
|
||||||
|
箱,はこ,box
|
||||||
|
橋,はし,bridge
|
||||||
|
,はし,chopsticks
|
||||||
|
始まる,はじまる,to begin
|
||||||
|
初め/始め,はじめ,beginning
|
||||||
|
初めて,はじめて,for the first time
|
||||||
|
走る,はしる,to run
|
||||||
|
,バス,bus
|
||||||
|
,バター,butter
|
||||||
|
二十歳,はたち,"20 years old,20th year"
|
||||||
|
働く,はたらく,to work
|
||||||
|
八,はち,eight
|
||||||
|
二十日,はつか,"twenty days,twentieth"
|
||||||
|
花,はな,flower
|
||||||
|
鼻,はな,nose
|
||||||
|
話,はなし,"talk,story"
|
||||||
|
話す,はなす,to speak
|
||||||
|
早い,はやい,early
|
||||||
|
速い,はやい,quick
|
||||||
|
春,はる,spring
|
||||||
|
貼る,はる,to stick
|
||||||
|
晴れ,はれ,clear weather
|
||||||
|
晴れる,はれる,to be sunny
|
||||||
|
半,はん,half
|
||||||
|
晩,ばん,evening
|
||||||
|
,パン,bread
|
||||||
|
,ハンカチ,handkerchief
|
||||||
|
番号,ばんごう,number
|
||||||
|
晩御飯,ばんごはん,evening meal
|
||||||
|
半分,はんぶん,half minute
|
||||||
|
東,ひがし,east
|
||||||
|
引く,ひく,to pull
|
||||||
|
弾く,ひく,"to play an instrument with strings, including piano"
|
||||||
|
低い,ひくい,"short,low"
|
||||||
|
飛行機,ひこうき,aeroplane
|
||||||
|
左,ひだり,left hand side
|
||||||
|
人,ひと,person
|
||||||
|
一つ,ひとつ,one
|
||||||
|
一月,ひとつき,one month
|
||||||
|
一人,ひとり,one person
|
||||||
|
暇,ひま,free time
|
||||||
|
百,ひゃく,hundred
|
||||||
|
病院,びょういん,hospital
|
||||||
|
病気,びょうき,illness
|
||||||
|
昼,ひる,"noon, daytime"
|
||||||
|
昼御飯,ひるごはん,midday meal
|
||||||
|
広い,ひろい,"spacious,wide"
|
||||||
|
,フィルム,roll of film
|
||||||
|
封筒,ふうとう,envelope
|
||||||
|
,プール,swimming pool
|
||||||
|
,フォーク,fork
|
||||||
|
吹く,ふく,to blow
|
||||||
|
服,ふく,clothes
|
||||||
|
二つ,ふたつ,two
|
||||||
|
豚肉,ぶたにく,pork
|
||||||
|
二人,ふたり,two people
|
||||||
|
二日,ふつか,"two days, second day of the month"
|
||||||
|
太い,ふとい,fat
|
||||||
|
冬,ふゆ,winter
|
||||||
|
降る,ふる,"to fall, e.g. rain or snow"
|
||||||
|
古い,ふるい,old (not used for people)
|
||||||
|
,ふろ,bath
|
||||||
|
文章,ぶんしょう,"sentence,text"
|
||||||
|
,ページ,page
|
||||||
|
下手,へた,unskillful
|
||||||
|
,ベッド,bed
|
||||||
|
,ペット,pet
|
||||||
|
部屋,へや,room
|
||||||
|
辺,へん,area
|
||||||
|
,ペン,pen
|
||||||
|
勉強,べんきょうする,to study
|
||||||
|
便利,べんり,"useful, convenient"
|
||||||
|
帽子,ぼうし,hat
|
||||||
|
,ボールペン,ball-point pen
|
||||||
|
,ほか,"other, the rest"
|
||||||
|
,ポケット,pocket
|
||||||
|
欲しい,ほしい,want
|
||||||
|
,ポスト,post
|
||||||
|
細い,ほそい,thin
|
||||||
|
,ボタン,button
|
||||||
|
,ホテル,hotel
|
||||||
|
本,ほん,book
|
||||||
|
本棚,ほんだな,bookshelves
|
||||||
|
,ほんとう,truth
|
||||||
|
毎朝,まいあさ,every morning
|
||||||
|
毎月,まいげつ/まいつき,every month
|
||||||
|
毎週,まいしゅう,every week
|
||||||
|
毎日,まいにち,every day
|
||||||
|
毎年,まいねん/まいとし,every year
|
||||||
|
毎晩,まいばん,every night
|
||||||
|
前,まえ,before
|
||||||
|
曲る,まがる,"to turn,to bend"
|
||||||
|
,まずい,unpleasant
|
||||||
|
,また,"again,and"
|
||||||
|
,まだ,"yet,still"
|
||||||
|
町,まち,"town,city"
|
||||||
|
待つ,まつ,to wait
|
||||||
|
,まっすぐ,"straight ahead,direct"
|
||||||
|
,マッチ,match
|
||||||
|
窓,まど,window
|
||||||
|
丸い/円い,まるい,"round,circular"
|
||||||
|
万,まん,ten thousand
|
||||||
|
万年筆,まんねんひつ,fountain pen
|
||||||
|
磨く,みがく,"to brush teeth, to polish"
|
||||||
|
右,みぎ,right side
|
||||||
|
短い,みじかい,short
|
||||||
|
水,みず,water
|
||||||
|
店,みせ,shop
|
||||||
|
見せる,みせる,to show
|
||||||
|
道,みち,street
|
||||||
|
三日,みっか,"three days, third day of the month"
|
||||||
|
三つ,みっつ,three
|
||||||
|
緑,みどり,green
|
||||||
|
皆さん,みなさん,everyone
|
||||||
|
南,みなみ,south
|
||||||
|
耳,みみ,ear
|
||||||
|
見る 観る,みる,"to see, to watch"
|
||||||
|
,みんな,everyone
|
||||||
|
六日,むいか,"six days, sixth day of the month"
|
||||||
|
向こう,むこう,over there
|
||||||
|
難しい,むずかしい,difficult
|
||||||
|
六つ,むっつ,six
|
||||||
|
村,むら,village
|
||||||
|
目,め,eye
|
||||||
|
,メートル,metre
|
||||||
|
眼鏡,めがね,glasses
|
||||||
|
,もう,already
|
||||||
|
もう一度,もういちど,again
|
||||||
|
木曜日,もくようび,Thursday
|
||||||
|
持つ,もつ,to hold
|
||||||
|
,もっと,more
|
||||||
|
物,もの,thing
|
||||||
|
門,もん,gate
|
||||||
|
問題,もんだい,problem
|
||||||
|
八百屋,やおや,greengrocer
|
||||||
|
野菜,やさい,vegetable
|
||||||
|
易しい,やさしい,"easy, simple"
|
||||||
|
安い,やすい,cheap
|
||||||
|
休み,やすみ,"rest,holiday"
|
||||||
|
休む,やすむ,to rest
|
||||||
|
八つ,やっつ,eight
|
||||||
|
山,やま,mountain
|
||||||
|
,やる,to do
|
||||||
|
夕方,ゆうがた,evening
|
||||||
|
夕飯,ゆうはん,dinner
|
||||||
|
郵便局,ゆうびんきょく,post office
|
||||||
|
昨夜,ゆうべ,last night
|
||||||
|
有名,ゆうめい,famous
|
||||||
|
雪,ゆき,snow
|
||||||
|
行く,ゆく,to go
|
||||||
|
,ゆっくりと,slowly
|
||||||
|
八日,ようか,"eight days, eighth day of the month"
|
||||||
|
洋服,ようふく,western-style clothes
|
||||||
|
,よく,"often, well"
|
||||||
|
横,よこ,"beside,side,width"
|
||||||
|
四日,よっか,"four days, fouth day of the month"
|
||||||
|
四つ,よっつ,four
|
||||||
|
呼ぶ,よぶ,"to call out,to invite"
|
||||||
|
読む,よむ,to read
|
||||||
|
夜,よる,"evening,night"
|
||||||
|
弱い,よわい,weak
|
||||||
|
来月,らいげつ,next month
|
||||||
|
来週,らいしゅう,next week
|
||||||
|
来年,らいねん,next year
|
||||||
|
,ラジオ,radio
|
||||||
|
,ラジカセ / ラジオカセット,radio cassette player
|
||||||
|
,りっぱ,splendid
|
||||||
|
留学生,りゅうがくせい,overseas student
|
||||||
|
両親,りょうしん,both parents
|
||||||
|
料理,りょうり,cuisine
|
||||||
|
旅行,りょこう,travel
|
||||||
|
零,れい,zero
|
||||||
|
冷蔵庫,れいぞうこ,refrigerator
|
||||||
|
,レコード,record
|
||||||
|
,レストラン,restaurant
|
||||||
|
練習,れんしゅうする,to practice
|
||||||
|
廊下,ろうか,corridor
|
||||||
|
六,ろく,six
|
||||||
|
,ワイシャツ,business shirt
|
||||||
|
若い,わかい,young
|
||||||
|
分かる,わかる,to be understood
|
||||||
|
忘れる,わすれる,to forget
|
||||||
|
私,わたくし,"(humble) I,myself"
|
||||||
|
私,わたし,"I,myself"
|
||||||
|
渡す,わたす,to hand over
|
||||||
|
渡る,わたる,to go across
|
||||||
|
悪い,わるい,bad
|
||||||
|
,より、ほう,Used for comparison.
|
||||||
|
@@ -1,28 +0,0 @@
|
|||||||
# Database
|
|
||||||
|
|
||||||
Here are some choices that have been made when designing the schema
|
|
||||||
|
|
||||||
### `JMdict_{Reading,Kanji}Element.elementId` and `JMdict_Sense.senseId`
|
|
||||||
|
|
||||||
The `elementId`/`senseId` field acts as a unique identifier for each individual element in these tables.
|
|
||||||
It is a packed version of the `(entryId, orderNum)` pair, where the first number is given 7 digits and the second is given 2 digits (max count found so far is `40`).
|
|
||||||
Since `entryId` already is a field in the table, it would technically have been fine to store the `orderNum` as a separate field,
|
|
||||||
but it is easier to be able to refer to the entries without a composite foreign key in other tables.
|
|
||||||
|
|
||||||
(NOTE: `entryId` is now inferred from `elementId` within sqlite using a generated column, so saying it is "stored in a separate field" might be a stretch)
|
|
||||||
|
|
||||||
In addition, the reading element id's are added with `1000000000` to make them unique from the kanji element id's. This reduces the amount of space needed for indices in some locations, because you can simply filter out each part with `>` or `<`.
|
|
||||||
|
|
||||||
We used to generate the `elementId` separately from `orderNum` as a sequential id, but it lead to all values
|
|
||||||
shifting whenever the data was updated, leading to very big diffs. Making it be a unique composite of data coming
|
|
||||||
from the source data itself means that the values will be stable across updates.
|
|
||||||
|
|
||||||
Due to the way the data is structured, we can use the `elementId` as the ordering number as well.
|
|
||||||
|
|
||||||
### `JMdict_EntryScore`
|
|
||||||
|
|
||||||
The `JMdict_EntryScore` table is used to store the score of each entry, which is used for sorting search results. The score is calculated based on a number of variables.
|
|
||||||
|
|
||||||
The table is automatically generated from other tables via triggers, and should be considered as a materialized view.
|
|
||||||
|
|
||||||
There is a score row for every single entry in both `JMdict_KanjiElement` and `JMdict_ReadingElement`, split by the `type` field.
|
|
||||||
@@ -3,7 +3,6 @@
|
|||||||
This is the documentation for `jadb`. Since I'm currently the only one working on it, the documentation is more or less just notes to myself, to ensure I remember how and why I implemented certain features in a certain way a few months down the road. This is not a comprehensive and formal documentation for downstream use, neither for developers nor end-users.
|
This is the documentation for `jadb`. Since I'm currently the only one working on it, the documentation is more or less just notes to myself, to ensure I remember how and why I implemented certain features in a certain way a few months down the road. This is not a comprehensive and formal documentation for downstream use, neither for developers nor end-users.
|
||||||
|
|
||||||
- [Word Search](./word-search.md)
|
- [Word Search](./word-search.md)
|
||||||
- [Database](./database.md)
|
|
||||||
- [Lemmatizer](./lemmatizer.md)
|
- [Lemmatizer](./lemmatizer.md)
|
||||||
|
|
||||||
## Project structure
|
## Project structure
|
||||||
|
|||||||
71
flake.lock
generated
71
flake.lock
generated
@@ -1,32 +1,48 @@
|
|||||||
{
|
{
|
||||||
"nodes": {
|
"nodes": {
|
||||||
"datasources": {
|
"jmdict-src": {
|
||||||
"inputs": {
|
"flake": false,
|
||||||
"nixpkgs": [
|
|
||||||
"nixpkgs"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1775550160,
|
"narHash": "sha256-eOc3a/AYNRFF3w6lWhyf0Sh92xeXS7+9Qvn0tvvH6Ys=",
|
||||||
"narHash": "sha256-bgvKrMGUPaDY4EZv+82z1ccYoxwaergdVw/3PZhc2Fc=",
|
"type": "file",
|
||||||
"ref": "refs/heads/main",
|
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz"
|
||||||
"rev": "f46229af3678124c5ea7c8dff3292747d0274f69",
|
|
||||||
"revCount": 8,
|
|
||||||
"type": "git",
|
|
||||||
"url": "https://git.pvv.ntnu.no/Mugiten/datasources.git"
|
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"type": "git",
|
"type": "file",
|
||||||
"url": "https://git.pvv.ntnu.no/Mugiten/datasources.git"
|
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"jmdict-with-examples-src": {
|
||||||
|
"flake": false,
|
||||||
|
"locked": {
|
||||||
|
"narHash": "sha256-nx+WMkscWvA/XImKM7NESYVmICwSgXWOO1KPXasHY94=",
|
||||||
|
"type": "file",
|
||||||
|
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"type": "file",
|
||||||
|
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"kanjidic2-src": {
|
||||||
|
"flake": false,
|
||||||
|
"locked": {
|
||||||
|
"narHash": "sha256-2T/cAS/kZmVMURStgHVhz524+J9+v5onKs8eEYf2fY0=",
|
||||||
|
"type": "file",
|
||||||
|
"url": "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"type": "file",
|
||||||
|
"url": "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1775423009,
|
"lastModified": 1774386573,
|
||||||
"narHash": "sha256-vPKLpjhIVWdDrfiUM8atW6YkIggCEKdSAlJPzzhkQlw=",
|
"narHash": "sha256-4hAV26quOxdC6iyG7kYaZcM3VOskcPUrdCQd/nx8obc=",
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "68d8aa3d661f0e6bd5862291b5bb263b2a6595c9",
|
"rev": "46db2e09e1d3f113a13c0d7b81e2f221c63b8ce9",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
@@ -35,10 +51,25 @@
|
|||||||
"type": "indirect"
|
"type": "indirect"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"radkfile-src": {
|
||||||
|
"flake": false,
|
||||||
|
"locked": {
|
||||||
|
"narHash": "sha256-DHpMUE2Umje8PbzXUCS6pHZeXQ5+WTxbjSkGU3erDHQ=",
|
||||||
|
"type": "file",
|
||||||
|
"url": "http://ftp.edrdg.org/pub/Nihongo/radkfile.gz"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"type": "file",
|
||||||
|
"url": "http://ftp.edrdg.org/pub/Nihongo/radkfile.gz"
|
||||||
|
}
|
||||||
|
},
|
||||||
"root": {
|
"root": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"datasources": "datasources",
|
"jmdict-src": "jmdict-src",
|
||||||
"nixpkgs": "nixpkgs"
|
"jmdict-with-examples-src": "jmdict-with-examples-src",
|
||||||
|
"kanjidic2-src": "kanjidic2-src",
|
||||||
|
"nixpkgs": "nixpkgs",
|
||||||
|
"radkfile-src": "radkfile-src"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
79
flake.nix
79
flake.nix
@@ -4,16 +4,35 @@
|
|||||||
inputs = {
|
inputs = {
|
||||||
nixpkgs.url = "nixpkgs/nixos-unstable";
|
nixpkgs.url = "nixpkgs/nixos-unstable";
|
||||||
|
|
||||||
datasources = {
|
jmdict-src = {
|
||||||
url = "git+https://git.pvv.ntnu.no/Mugiten/datasources.git";
|
# url = "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz";
|
||||||
inputs.nixpkgs.follows = "nixpkgs";
|
url = "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz";
|
||||||
|
flake = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
jmdict-with-examples-src = {
|
||||||
|
url = "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz";
|
||||||
|
flake = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
radkfile-src = {
|
||||||
|
url = "http://ftp.edrdg.org/pub/Nihongo/radkfile.gz";
|
||||||
|
flake = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
kanjidic2-src = {
|
||||||
|
url = "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz";
|
||||||
|
flake = false;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
outputs = {
|
outputs = {
|
||||||
self,
|
self,
|
||||||
nixpkgs,
|
nixpkgs,
|
||||||
datasources,
|
jmdict-src,
|
||||||
|
jmdict-with-examples-src,
|
||||||
|
radkfile-src,
|
||||||
|
kanjidic2-src
|
||||||
}: let
|
}: let
|
||||||
inherit (nixpkgs) lib;
|
inherit (nixpkgs) lib;
|
||||||
systems = [
|
systems = [
|
||||||
@@ -24,7 +43,12 @@
|
|||||||
"armv7l-linux"
|
"armv7l-linux"
|
||||||
];
|
];
|
||||||
|
|
||||||
forAllSystems = f: lib.genAttrs systems (system: f system nixpkgs.legacyPackages.${system});
|
forAllSystems = f: lib.genAttrs systems (system: let
|
||||||
|
pkgs = import nixpkgs {
|
||||||
|
inherit system;
|
||||||
|
overlays = [ self.overlays.sqlite-icu-ext ];
|
||||||
|
};
|
||||||
|
in f system pkgs);
|
||||||
in {
|
in {
|
||||||
apps = forAllSystems (system: pkgs: {
|
apps = forAllSystems (system: pkgs: {
|
||||||
default = {
|
default = {
|
||||||
@@ -63,7 +87,7 @@
|
|||||||
gnumake
|
gnumake
|
||||||
lcov
|
lcov
|
||||||
sqldiff
|
sqldiff
|
||||||
sqlite-interactive
|
sqlite-interactive-icu-ext
|
||||||
];
|
];
|
||||||
env = {
|
env = {
|
||||||
LIBSQLITE_PATH = "${pkgs.sqlite.out}/lib/libsqlite3.so";
|
LIBSQLITE_PATH = "${pkgs.sqlite.out}/lib/libsqlite3.so";
|
||||||
@@ -74,15 +98,31 @@
|
|||||||
|
|
||||||
sqlite-debugging = pkgs.mkShell {
|
sqlite-debugging = pkgs.mkShell {
|
||||||
packages = with pkgs; [
|
packages = with pkgs; [
|
||||||
sqlite-interactive
|
sqlite-interactive-icu-ext
|
||||||
sqlite-analyzer
|
sqlite-analyzer
|
||||||
sqlite-web
|
sqlite-web
|
||||||
# sqlint
|
sqlint
|
||||||
sqlfluff
|
sqlfluff
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
|
overlays.sqlite-icu-ext = final: prev: let
|
||||||
|
overrideArgs = prev': {
|
||||||
|
configureFlags = prev'.configureFlags ++ [
|
||||||
|
"--with-icu-config=${lib.getExe' prev.icu.dev "icu-config"}"
|
||||||
|
"--enable-icu-collations"
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = prev'.buildInputs ++ [
|
||||||
|
prev.icu
|
||||||
|
];
|
||||||
|
};
|
||||||
|
in {
|
||||||
|
sqlite-icu-ext = prev.sqlite.overrideAttrs overrideArgs;
|
||||||
|
sqlite-interactive-icu-ext = prev.sqlite-interactive.overrideAttrs overrideArgs;
|
||||||
|
};
|
||||||
|
|
||||||
packages = let
|
packages = let
|
||||||
edrdgMetadata = {
|
edrdgMetadata = {
|
||||||
license = [{
|
license = [{
|
||||||
@@ -116,23 +156,36 @@
|
|||||||
ln -s ${src} $out
|
ln -s ${src} $out
|
||||||
'';
|
'';
|
||||||
|
|
||||||
inherit (datasources.packages.${system}) jmdict radkfile kanjidic2;
|
inherit (pkgs) sqlite-icu-ext sqlite-interactive-icu-ext;
|
||||||
|
|
||||||
|
jmdict = pkgs.callPackage ./nix/jmdict.nix {
|
||||||
|
inherit jmdict-src jmdict-with-examples-src edrdgMetadata;
|
||||||
|
};
|
||||||
|
|
||||||
|
radkfile = pkgs.callPackage ./nix/radkfile.nix {
|
||||||
|
inherit radkfile-src edrdgMetadata;
|
||||||
|
};
|
||||||
|
|
||||||
|
kanjidic2 = pkgs.callPackage ./nix/kanjidic2.nix {
|
||||||
|
inherit kanjidic2-src edrdgMetadata;
|
||||||
|
};
|
||||||
|
|
||||||
database-tool = pkgs.callPackage ./nix/database_tool.nix {
|
database-tool = pkgs.callPackage ./nix/database_tool.nix {
|
||||||
inherit src;
|
inherit src;
|
||||||
|
sqlite = pkgs.sqlite-icu-ext;
|
||||||
};
|
};
|
||||||
|
|
||||||
database = pkgs.callPackage ./nix/database.nix {
|
database = pkgs.callPackage ./nix/database.nix {
|
||||||
inherit (datasources.packages.${system}) jmdict radkfile kanjidic2 tanos-jlpt;
|
inherit (self.packages.${system}) database-tool jmdict radkfile kanjidic2;
|
||||||
inherit (self.packages.${system}) database-tool;
|
|
||||||
inherit src;
|
inherit src;
|
||||||
|
sqlite = pkgs.sqlite-icu-ext;
|
||||||
};
|
};
|
||||||
|
|
||||||
database-wal = pkgs.callPackage ./nix/database.nix {
|
database-wal = pkgs.callPackage ./nix/database.nix {
|
||||||
inherit (datasources.packages.${system}) jmdict radkfile kanjidic2 tanos-jlpt;
|
inherit (self.packages.${system}) database-tool jmdict radkfile kanjidic2;
|
||||||
inherit (self.packages.${system}) database-tool;
|
|
||||||
inherit src;
|
inherit src;
|
||||||
wal = true;
|
wal = true;
|
||||||
|
sqlite = pkgs.sqlite-icu-ext;
|
||||||
};
|
};
|
||||||
|
|
||||||
docs = pkgs.callPackage ./nix/docs.nix {
|
docs = pkgs.callPackage ./nix/docs.nix {
|
||||||
|
|||||||
@@ -1,15 +1,13 @@
|
|||||||
import 'package:jadb/_data_ingestion/sql_writable.dart';
|
import 'package:jadb/_data_ingestion/sql_writable.dart';
|
||||||
|
|
||||||
abstract class Element extends SQLWritable {
|
abstract class Element extends SQLWritable {
|
||||||
final int elementId;
|
|
||||||
final String reading;
|
final String reading;
|
||||||
final int? news;
|
final int? news;
|
||||||
final int? ichi;
|
final int? ichi;
|
||||||
final int? spec;
|
final int? spec;
|
||||||
final int? gai;
|
final int? gai;
|
||||||
final int? nf;
|
final int? nf;
|
||||||
Element({
|
const Element({
|
||||||
required this.elementId,
|
|
||||||
required this.reading,
|
required this.reading,
|
||||||
this.news,
|
this.news,
|
||||||
this.ichi,
|
this.ichi,
|
||||||
@@ -20,7 +18,6 @@ abstract class Element extends SQLWritable {
|
|||||||
|
|
||||||
@override
|
@override
|
||||||
Map<String, Object?> get sqlValue => {
|
Map<String, Object?> get sqlValue => {
|
||||||
'elementId': elementId,
|
|
||||||
'reading': reading,
|
'reading': reading,
|
||||||
'news': news,
|
'news': news,
|
||||||
'ichi': ichi,
|
'ichi': ichi,
|
||||||
@@ -31,11 +28,12 @@ abstract class Element extends SQLWritable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class KanjiElement extends Element {
|
class KanjiElement extends Element {
|
||||||
|
int orderNum;
|
||||||
List<String> info;
|
List<String> info;
|
||||||
|
|
||||||
KanjiElement({
|
KanjiElement({
|
||||||
this.info = const [],
|
this.info = const [],
|
||||||
required super.elementId,
|
required this.orderNum,
|
||||||
required super.reading,
|
required super.reading,
|
||||||
super.news,
|
super.news,
|
||||||
super.ichi,
|
super.ichi,
|
||||||
@@ -47,19 +45,21 @@ class KanjiElement extends Element {
|
|||||||
@override
|
@override
|
||||||
Map<String, Object?> get sqlValue => {
|
Map<String, Object?> get sqlValue => {
|
||||||
...super.sqlValue,
|
...super.sqlValue,
|
||||||
|
'orderNum': orderNum,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
class ReadingElement extends Element {
|
class ReadingElement extends Element {
|
||||||
|
int orderNum;
|
||||||
bool readingDoesNotMatchKanji;
|
bool readingDoesNotMatchKanji;
|
||||||
List<String> info;
|
List<String> info;
|
||||||
List<String> restrictions;
|
List<String> restrictions;
|
||||||
|
|
||||||
ReadingElement({
|
ReadingElement({
|
||||||
|
required this.orderNum,
|
||||||
required this.readingDoesNotMatchKanji,
|
required this.readingDoesNotMatchKanji,
|
||||||
this.info = const [],
|
this.info = const [],
|
||||||
this.restrictions = const [],
|
this.restrictions = const [],
|
||||||
required super.elementId,
|
|
||||||
required super.reading,
|
required super.reading,
|
||||||
super.news,
|
super.news,
|
||||||
super.ichi,
|
super.ichi,
|
||||||
@@ -71,6 +71,7 @@ class ReadingElement extends Element {
|
|||||||
@override
|
@override
|
||||||
Map<String, Object?> get sqlValue => {
|
Map<String, Object?> get sqlValue => {
|
||||||
...super.sqlValue,
|
...super.sqlValue,
|
||||||
|
'orderNum': orderNum,
|
||||||
'readingDoesNotMatchKanji': readingDoesNotMatchKanji,
|
'readingDoesNotMatchKanji': readingDoesNotMatchKanji,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -141,6 +142,7 @@ class XRef {
|
|||||||
|
|
||||||
class Sense extends SQLWritable {
|
class Sense extends SQLWritable {
|
||||||
final int senseId;
|
final int senseId;
|
||||||
|
final int orderNum;
|
||||||
final List<XRefParts> antonyms;
|
final List<XRefParts> antonyms;
|
||||||
final List<String> dialects;
|
final List<String> dialects;
|
||||||
final List<String> fields;
|
final List<String> fields;
|
||||||
@@ -155,6 +157,7 @@ class Sense extends SQLWritable {
|
|||||||
|
|
||||||
const Sense({
|
const Sense({
|
||||||
required this.senseId,
|
required this.senseId,
|
||||||
|
required this.orderNum,
|
||||||
this.antonyms = const [],
|
this.antonyms = const [],
|
||||||
this.dialects = const [],
|
this.dialects = const [],
|
||||||
this.fields = const [],
|
this.fields = const [],
|
||||||
@@ -171,6 +174,7 @@ class Sense extends SQLWritable {
|
|||||||
@override
|
@override
|
||||||
Map<String, Object?> get sqlValue => {
|
Map<String, Object?> get sqlValue => {
|
||||||
'senseId': senseId,
|
'senseId': senseId,
|
||||||
|
'orderNum': orderNum,
|
||||||
};
|
};
|
||||||
|
|
||||||
bool get isEmpty =>
|
bool get isEmpty =>
|
||||||
|
|||||||
@@ -8,10 +8,11 @@ import 'package:sqflite_common/sqlite_api.dart';
|
|||||||
/// A wrapper for the result of resolving an xref, which includes the resolved entry and a flag
|
/// A wrapper for the result of resolving an xref, which includes the resolved entry and a flag
|
||||||
/// indicating whether the xref was ambiguous (i.e. could refer to multiple entries).
|
/// indicating whether the xref was ambiguous (i.e. could refer to multiple entries).
|
||||||
class ResolvedXref {
|
class ResolvedXref {
|
||||||
final Entry entry;
|
Entry entry;
|
||||||
final bool ambiguous;
|
bool ambiguous;
|
||||||
|
int? senseOrderNum;
|
||||||
|
|
||||||
const ResolvedXref(this.entry, this.ambiguous);
|
ResolvedXref(this.entry, this.ambiguous, senseOrderNum);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Resolves an xref (pair of kanji, optionally reading, and optionally sense number) to an a specific
|
/// Resolves an xref (pair of kanji, optionally reading, and optionally sense number) to an a specific
|
||||||
@@ -74,9 +75,10 @@ ResolvedXref resolveXref(
|
|||||||
'kanjiRef: ${xref.kanjiRef}, readingRef: ${xref.readingRef}, '
|
'kanjiRef: ${xref.kanjiRef}, readingRef: ${xref.readingRef}, '
|
||||||
'senseOrderNum: ${xref.senseOrderNum}',
|
'senseOrderNum: ${xref.senseOrderNum}',
|
||||||
);
|
);
|
||||||
return ResolvedXref(candidateEntries.first, true);
|
|
||||||
|
return ResolvedXref(candidateEntries.first, true, xref.senseOrderNum);
|
||||||
} else {
|
} else {
|
||||||
return ResolvedXref(candidateEntries.first, false);
|
return ResolvedXref(candidateEntries.first, false, xref.senseOrderNum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -84,32 +86,41 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
|
|||||||
print(' [JMdict] Batch 1 - Kanji and readings');
|
print(' [JMdict] Batch 1 - Kanji and readings');
|
||||||
Batch b = db.batch();
|
Batch b = db.batch();
|
||||||
|
|
||||||
|
int elementId = 0;
|
||||||
for (final e in entries) {
|
for (final e in entries) {
|
||||||
b.insert(JMdictTableNames.entry, e.sqlValue);
|
b.insert(JMdictTableNames.entry, e.sqlValue);
|
||||||
|
|
||||||
for (final k in e.kanji) {
|
for (final k in e.kanji) {
|
||||||
b.insert(JMdictTableNames.kanjiElement, k.sqlValue);
|
elementId++;
|
||||||
|
b.insert(
|
||||||
|
JMdictTableNames.kanjiElement,
|
||||||
|
k.sqlValue..addAll({'entryId': e.entryId, 'elementId': elementId}),
|
||||||
|
);
|
||||||
|
|
||||||
for (final i in k.info) {
|
for (final i in k.info) {
|
||||||
b.insert(JMdictTableNames.kanjiInfo, {
|
b.insert(JMdictTableNames.kanjiInfo, {
|
||||||
'elementId': k.elementId,
|
'elementId': elementId,
|
||||||
'info': i,
|
'info': i,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (final r in e.readings) {
|
for (final r in e.readings) {
|
||||||
b.insert(JMdictTableNames.readingElement, r.sqlValue);
|
elementId++;
|
||||||
|
b.insert(
|
||||||
|
JMdictTableNames.readingElement,
|
||||||
|
r.sqlValue..addAll({'entryId': e.entryId, 'elementId': elementId}),
|
||||||
|
);
|
||||||
|
|
||||||
for (final i in r.info) {
|
for (final i in r.info) {
|
||||||
b.insert(JMdictTableNames.readingInfo, {
|
b.insert(JMdictTableNames.readingInfo, {
|
||||||
'elementId': r.elementId,
|
'elementId': elementId,
|
||||||
'info': i,
|
'info': i,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
for (final res in r.restrictions) {
|
for (final res in r.restrictions) {
|
||||||
b.insert(JMdictTableNames.readingRestriction, {
|
b.insert(JMdictTableNames.readingRestriction, {
|
||||||
'elementId': r.elementId,
|
'elementId': elementId,
|
||||||
'restriction': res,
|
'restriction': res,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -123,7 +134,10 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
|
|||||||
|
|
||||||
for (final e in entries) {
|
for (final e in entries) {
|
||||||
for (final s in e.senses) {
|
for (final s in e.senses) {
|
||||||
b.insert(JMdictTableNames.sense, s.sqlValue);
|
b.insert(
|
||||||
|
JMdictTableNames.sense,
|
||||||
|
s.sqlValue..addAll({'entryId': e.entryId}),
|
||||||
|
);
|
||||||
for (final d in s.dialects) {
|
for (final d in s.dialects) {
|
||||||
b.insert(JMdictTableNames.senseDialect, {
|
b.insert(JMdictTableNames.senseDialect, {
|
||||||
'senseId': s.senseId,
|
'senseId': s.senseId,
|
||||||
@@ -147,14 +161,16 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
|
|||||||
}
|
}
|
||||||
for (final rk in s.restrictedToKanji) {
|
for (final rk in s.restrictedToKanji) {
|
||||||
b.insert(JMdictTableNames.senseRestrictedToKanji, {
|
b.insert(JMdictTableNames.senseRestrictedToKanji, {
|
||||||
|
'entryId': e.entryId,
|
||||||
'senseId': s.senseId,
|
'senseId': s.senseId,
|
||||||
'kanji': rk,
|
'kanjiOrderNum': e.kanji.indexWhere((k) => k.reading == rk) + 1,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
for (final rr in s.restrictedToReading) {
|
for (final rr in s.restrictedToReading) {
|
||||||
b.insert(JMdictTableNames.senseRestrictedToReading, {
|
b.insert(JMdictTableNames.senseRestrictedToReading, {
|
||||||
|
'entryId': e.entryId,
|
||||||
'senseId': s.senseId,
|
'senseId': s.senseId,
|
||||||
'reading': rr,
|
'readingOrderNum': e.readings.indexWhere((r) => r.reading == rr) + 1,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
for (final ls in s.languageSource) {
|
for (final ls in s.languageSource) {
|
||||||
@@ -215,11 +231,7 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
|
|||||||
b.insert(JMdictTableNames.senseSeeAlso, {
|
b.insert(JMdictTableNames.senseSeeAlso, {
|
||||||
'senseId': s.senseId,
|
'senseId': s.senseId,
|
||||||
'xrefEntryId': resolvedEntry.entry.entryId,
|
'xrefEntryId': resolvedEntry.entry.entryId,
|
||||||
'seeAlsoKanji': xref.kanjiRef,
|
'xrefSenseOrderNum': resolvedEntry.senseOrderNum,
|
||||||
'seeAlsoReading': xref.readingRef,
|
|
||||||
'seeAlsoSense': xref.senseOrderNum != null
|
|
||||||
? xref.senseOrderNum! - 1
|
|
||||||
: null,
|
|
||||||
'ambiguous': resolvedEntry.ambiguous,
|
'ambiguous': resolvedEntry.ambiguous,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -244,11 +256,6 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
|
|||||||
b.insert(JMdictTableNames.senseAntonyms, {
|
b.insert(JMdictTableNames.senseAntonyms, {
|
||||||
'senseId': s.senseId,
|
'senseId': s.senseId,
|
||||||
'xrefEntryId': resolvedEntry.entry.entryId,
|
'xrefEntryId': resolvedEntry.entry.entryId,
|
||||||
'antonymKanji': ant.kanjiRef,
|
|
||||||
'antonymReading': ant.readingRef,
|
|
||||||
'antonymSense': ant.senseOrderNum != null
|
|
||||||
? ant.senseOrderNum! - 1
|
|
||||||
: null,
|
|
||||||
'ambiguous': resolvedEntry.ambiguous,
|
'ambiguous': resolvedEntry.ambiguous,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -71,6 +71,8 @@ XRefParts parseXrefParts(String s) {
|
|||||||
List<Entry> parseJMDictData(XmlElement root) {
|
List<Entry> parseJMDictData(XmlElement root) {
|
||||||
final List<Entry> entries = [];
|
final List<Entry> entries = [];
|
||||||
|
|
||||||
|
int senseId = 0;
|
||||||
|
|
||||||
for (final entry in root.childElements) {
|
for (final entry in root.childElements) {
|
||||||
final entryId = int.parse(entry.findElements('ent_seq').first.innerText);
|
final entryId = int.parse(entry.findElements('ent_seq').first.innerText);
|
||||||
|
|
||||||
@@ -78,18 +80,11 @@ List<Entry> parseJMDictData(XmlElement root) {
|
|||||||
final List<ReadingElement> readingEls = [];
|
final List<ReadingElement> readingEls = [];
|
||||||
final List<Sense> senses = [];
|
final List<Sense> senses = [];
|
||||||
|
|
||||||
for (final (orderNum, kEle) in entry.findElements('k_ele').indexed) {
|
for (final (kanjiNum, kEle) in entry.findElements('k_ele').indexed) {
|
||||||
assert(
|
|
||||||
orderNum < 100,
|
|
||||||
'Entry $entryId has more than 100 kanji elements, which will break the elementId generation logic.',
|
|
||||||
);
|
|
||||||
final elementId = entryId * 100 + orderNum;
|
|
||||||
|
|
||||||
final kePri = getPriorityValues(kEle, 'ke');
|
final kePri = getPriorityValues(kEle, 'ke');
|
||||||
|
|
||||||
kanjiEls.add(
|
kanjiEls.add(
|
||||||
KanjiElement(
|
KanjiElement(
|
||||||
elementId: elementId,
|
orderNum: kanjiNum + 1,
|
||||||
info: kEle
|
info: kEle
|
||||||
.findElements('ke_inf')
|
.findElements('ke_inf')
|
||||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||||
@@ -105,20 +100,13 @@ List<Entry> parseJMDictData(XmlElement root) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (final (orderNum, rEle) in entry.findElements('r_ele').indexed) {
|
for (final (orderNum, rEle) in entry.findElements('r_ele').indexed) {
|
||||||
assert(
|
|
||||||
orderNum < 100,
|
|
||||||
'Entry $entryId has more than 100 readings, which will break the elementId generation logic.',
|
|
||||||
);
|
|
||||||
final elementId = 1_000_000_000 + entryId * 100 + orderNum;
|
|
||||||
|
|
||||||
final rePri = getPriorityValues(rEle, 're');
|
final rePri = getPriorityValues(rEle, 're');
|
||||||
final readingDoesNotMatchKanji = rEle
|
final readingDoesNotMatchKanji = rEle
|
||||||
.findElements('re_nokanji')
|
.findElements('re_nokanji')
|
||||||
.isNotEmpty;
|
.isNotEmpty;
|
||||||
|
|
||||||
readingEls.add(
|
readingEls.add(
|
||||||
ReadingElement(
|
ReadingElement(
|
||||||
elementId: elementId,
|
orderNum: orderNum + 1,
|
||||||
readingDoesNotMatchKanji: readingDoesNotMatchKanji,
|
readingDoesNotMatchKanji: readingDoesNotMatchKanji,
|
||||||
info: rEle
|
info: rEle
|
||||||
.findElements('re_inf')
|
.findElements('re_inf')
|
||||||
@@ -139,14 +127,10 @@ List<Entry> parseJMDictData(XmlElement root) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (final (orderNum, sense) in entry.findElements('sense').indexed) {
|
for (final (orderNum, sense) in entry.findElements('sense').indexed) {
|
||||||
assert(
|
senseId++;
|
||||||
orderNum < 100,
|
|
||||||
'Entry $entryId has more than 100 senses, which will break the senseId generation logic.',
|
|
||||||
);
|
|
||||||
final senseId = entryId * 100 + orderNum;
|
|
||||||
|
|
||||||
final result = Sense(
|
final result = Sense(
|
||||||
senseId: senseId,
|
senseId: senseId,
|
||||||
|
orderNum: orderNum + 1,
|
||||||
restrictedToKanji: sense
|
restrictedToKanji: sense
|
||||||
.findElements('stagk')
|
.findElements('stagk')
|
||||||
.map((e) => e.innerText)
|
.map((e) => e.innerText)
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ Future<Database> openLocalDb({
|
|||||||
await db.execute('PRAGMA journal_mode = WAL');
|
await db.execute('PRAGMA journal_mode = WAL');
|
||||||
}
|
}
|
||||||
await db.execute('PRAGMA foreign_keys = ON');
|
await db.execute('PRAGMA foreign_keys = ON');
|
||||||
|
await db.execute("SELECT icu_load_collation('ja_JP', 'japanese')");
|
||||||
},
|
},
|
||||||
readOnly: !readWrite,
|
readOnly: !readWrite,
|
||||||
),
|
),
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import 'dart:io';
|
import 'dart:io';
|
||||||
|
|
||||||
Iterable<String> parseRADKFILEBlocks(File radkfile) {
|
Iterable<String> parseRADKFILEBlocks(File radkfile) {
|
||||||
final String content = radkfile.readAsStringSync();
|
final String content = File('data/tmp/radkfile_utf8').readAsStringSync();
|
||||||
|
|
||||||
final Iterable<String> blocks = content
|
final Iterable<String> blocks = content
|
||||||
.replaceAll(RegExp(r'^#.*$'), '')
|
.replaceAll(RegExp(r'^#.*$'), '')
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ Future<void> seedData(Database db) async {
|
|||||||
|
|
||||||
Future<void> parseAndSeedDataFromJMdict(Database db) async {
|
Future<void> parseAndSeedDataFromJMdict(Database db) async {
|
||||||
print('[JMdict] Reading file content...');
|
print('[JMdict] Reading file content...');
|
||||||
final String rawXML = File('data/JMdict.xml').readAsStringSync();
|
final String rawXML = File('data/tmp/JMdict.xml').readAsStringSync();
|
||||||
|
|
||||||
print('[JMdict] Parsing XML tags...');
|
print('[JMdict] Parsing XML tags...');
|
||||||
final XmlElement root = XmlDocument.parse(rawXML).getElement('JMdict')!;
|
final XmlElement root = XmlDocument.parse(rawXML).getElement('JMdict')!;
|
||||||
@@ -38,7 +38,7 @@ Future<void> parseAndSeedDataFromJMdict(Database db) async {
|
|||||||
|
|
||||||
Future<void> parseAndSeedDataFromKANJIDIC(Database db) async {
|
Future<void> parseAndSeedDataFromKANJIDIC(Database db) async {
|
||||||
print('[KANJIDIC2] Reading file...');
|
print('[KANJIDIC2] Reading file...');
|
||||||
final String rawXML = File('data/kanjidic2.xml').readAsStringSync();
|
final String rawXML = File('data/tmp/kanjidic2.xml').readAsStringSync();
|
||||||
|
|
||||||
print('[KANJIDIC2] Parsing XML...');
|
print('[KANJIDIC2] Parsing XML...');
|
||||||
final XmlElement root = XmlDocument.parse(rawXML).getElement('kanjidic2')!;
|
final XmlElement root = XmlDocument.parse(rawXML).getElement('kanjidic2')!;
|
||||||
@@ -52,7 +52,7 @@ Future<void> parseAndSeedDataFromKANJIDIC(Database db) async {
|
|||||||
|
|
||||||
Future<void> parseAndSeedDataFromRADKFILE(Database db) async {
|
Future<void> parseAndSeedDataFromRADKFILE(Database db) async {
|
||||||
print('[RADKFILE] Reading file...');
|
print('[RADKFILE] Reading file...');
|
||||||
final File raw = File('data/RADKFILE');
|
final File raw = File('data/tmp/RADKFILE');
|
||||||
|
|
||||||
print('[RADKFILE] Parsing content...');
|
print('[RADKFILE] Parsing content...');
|
||||||
final blocks = parseRADKFILEBlocks(raw);
|
final blocks = parseRADKFILEBlocks(raw);
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import 'dart:io';
|
|||||||
|
|
||||||
import 'package:csv/csv.dart';
|
import 'package:csv/csv.dart';
|
||||||
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
|
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
|
||||||
|
import 'package:xml/xml_events.dart';
|
||||||
|
|
||||||
Future<List<JLPTRankedWord>> parseJLPTRankedWords(
|
Future<List<JLPTRankedWord>> parseJLPTRankedWords(
|
||||||
Map<String, File> files,
|
Map<String, File> files,
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ Future<List<Map<String, Object?>>> _readingelementsQuery(
|
|||||||
JMdictTableNames.readingElement,
|
JMdictTableNames.readingElement,
|
||||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||||
whereArgs: entryIds,
|
whereArgs: entryIds,
|
||||||
orderBy: 'elementId',
|
orderBy: 'orderNum',
|
||||||
);
|
);
|
||||||
|
|
||||||
Future<List<Map<String, Object?>>> _kanjielementsQuery(
|
Future<List<Map<String, Object?>>> _kanjielementsQuery(
|
||||||
@@ -79,7 +79,7 @@ Future<List<Map<String, Object?>>> _kanjielementsQuery(
|
|||||||
JMdictTableNames.kanjiElement,
|
JMdictTableNames.kanjiElement,
|
||||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||||
whereArgs: entryIds,
|
whereArgs: entryIds,
|
||||||
orderBy: 'elementId',
|
orderBy: 'orderNum',
|
||||||
);
|
);
|
||||||
|
|
||||||
Future<List<Map<String, Object?>>> _jlpttagsQuery(
|
Future<List<Map<String, Object?>>> _jlpttagsQuery(
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ String _filterFTSSensitiveCharacters(String word) {
|
|||||||
JOIN "$tableName" USING ("elementId")
|
JOIN "$tableName" USING ("elementId")
|
||||||
JOIN "JMdict_EntryScore" USING ("elementId")
|
JOIN "JMdict_EntryScore" USING ("elementId")
|
||||||
WHERE "${tableName}FTS"."reading" MATCH ? || '*'
|
WHERE "${tableName}FTS"."reading" MATCH ? || '*'
|
||||||
AND "JMdict_EntryScore"."elementId" ${tableName == JMdictTableNames.kanjiElement ? '<' : '>='} 1000000000
|
AND "JMdict_EntryScore"."type" = '${tableName == JMdictTableNames.kanjiElement ? 'k' : 'r'}'
|
||||||
),
|
),
|
||||||
non_fts_results AS (
|
non_fts_results AS (
|
||||||
SELECT DISTINCT
|
SELECT DISTINCT
|
||||||
@@ -86,7 +86,7 @@ String _filterFTSSensitiveCharacters(String word) {
|
|||||||
JOIN "JMdict_EntryScore" USING ("elementId")
|
JOIN "JMdict_EntryScore" USING ("elementId")
|
||||||
WHERE "reading" LIKE '%' || ? || '%'
|
WHERE "reading" LIKE '%' || ? || '%'
|
||||||
AND "$tableName"."entryId" NOT IN (SELECT "entryId" FROM "fts_results")
|
AND "$tableName"."entryId" NOT IN (SELECT "entryId" FROM "fts_results")
|
||||||
AND "JMdict_EntryScore"."elementId" ${tableName == JMdictTableNames.kanjiElement ? '<' : '>='} 1000000000
|
AND "JMdict_EntryScore"."type" = '${tableName == JMdictTableNames.kanjiElement ? 'k' : 'r'}'
|
||||||
)
|
)
|
||||||
|
|
||||||
SELECT ${countOnly ? 'COUNT(DISTINCT "entryId") AS count' : '"entryId", MAX("score") AS "score"'}
|
SELECT ${countOnly ? 'COUNT(DISTINCT "entryId") AS count' : '"entryId", MAX("score") AS "score"'}
|
||||||
@@ -199,8 +199,8 @@ Future<List<ScoredEntryId>> _queryEnglish(
|
|||||||
SELECT
|
SELECT
|
||||||
"${JMdictTableNames.sense}"."entryId",
|
"${JMdictTableNames.sense}"."entryId",
|
||||||
MAX("JMdict_EntryScore"."score")
|
MAX("JMdict_EntryScore"."score")
|
||||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 0) * 50)
|
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 1) * 50)
|
||||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 1) * 30)
|
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 2) * 30)
|
||||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1) * 20)
|
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1) * 20)
|
||||||
as "score"
|
as "score"
|
||||||
FROM "${JMdictTableNames.senseGlossary}"
|
FROM "${JMdictTableNames.senseGlossary}"
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
SELECT icu_load_collation('ja_JP', 'japanese');
|
||||||
|
|
||||||
CREATE TABLE "JMdict_Version" (
|
CREATE TABLE "JMdict_Version" (
|
||||||
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
|
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
|
||||||
"date" DATE NOT NULL,
|
"date" DATE NOT NULL,
|
||||||
@@ -53,18 +55,19 @@ CREATE TABLE "JMdict_Entry" (
|
|||||||
|
|
||||||
CREATE TABLE "JMdict_KanjiElement" (
|
CREATE TABLE "JMdict_KanjiElement" (
|
||||||
"elementId" INTEGER PRIMARY KEY,
|
"elementId" INTEGER PRIMARY KEY,
|
||||||
"entryId" INTEGER NOT NULL GENERATED ALWAYS AS ("elementId" / 100) STORED,
|
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
|
||||||
"orderNum" INTEGER NOT NULL GENERATED ALWAYS AS ("elementId" % 100) VIRTUAL,
|
"orderNum" INTEGER NOT NULL,
|
||||||
"reading" TEXT NOT NULL,
|
"reading" TEXT NOT NULL COLLATE japanese,
|
||||||
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
|
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
|
||||||
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
|
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
|
||||||
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
|
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
|
||||||
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
|
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
|
||||||
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
|
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
|
||||||
FOREIGN KEY ("entryId") REFERENCES "JMdict_Entry"("entryId"),
|
-- UNIQUE("entryId", "reading"),
|
||||||
UNIQUE("entryId", "reading")
|
UNIQUE("entryId", "orderNum")
|
||||||
) WITHOUT ROWID;
|
) WITHOUT ROWID;
|
||||||
|
|
||||||
|
CREATE INDEX "JMdict_KanjiElement_byEntryId_byOrderNum" ON "JMdict_KanjiElement"("entryId", "orderNum");
|
||||||
CREATE INDEX "JMdict_KanjiElement_byReading" ON "JMdict_KanjiElement"("reading");
|
CREATE INDEX "JMdict_KanjiElement_byReading" ON "JMdict_KanjiElement"("reading");
|
||||||
|
|
||||||
CREATE TABLE "JMdict_KanjiElementInfo" (
|
CREATE TABLE "JMdict_KanjiElementInfo" (
|
||||||
@@ -77,19 +80,20 @@ CREATE TABLE "JMdict_KanjiElementInfo" (
|
|||||||
|
|
||||||
CREATE TABLE "JMdict_ReadingElement" (
|
CREATE TABLE "JMdict_ReadingElement" (
|
||||||
"elementId" INTEGER PRIMARY KEY,
|
"elementId" INTEGER PRIMARY KEY,
|
||||||
"entryId" INTEGER NOT NULL GENERATED ALWAYS AS (("elementId" / 100) % 10000000) STORED,
|
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
|
||||||
"orderNum" INTEGER NOT NULL GENERATED ALWAYS AS ("elementId" % 100) VIRTUAL,
|
"orderNum" INTEGER NOT NULL,
|
||||||
"reading" TEXT NOT NULL,
|
"reading" TEXT NOT NULL COLLATE japanese,
|
||||||
"readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE,
|
"readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE,
|
||||||
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
|
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
|
||||||
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
|
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
|
||||||
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
|
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
|
||||||
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
|
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
|
||||||
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
|
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
|
||||||
FOREIGN KEY ("entryId") REFERENCES "JMdict_Entry"("entryId"),
|
-- UNIQUE("entryId", "reading"),
|
||||||
UNIQUE("entryId", "reading")
|
UNIQUE("entryId", "orderNum")
|
||||||
) WITHOUT ROWID;
|
) WITHOUT ROWID;
|
||||||
|
|
||||||
|
CREATE INDEX "JMdict_ReadingElement_byEntryId_byOrderNum" ON "JMdict_ReadingElement"("entryId", "orderNum");
|
||||||
CREATE INDEX "JMdict_ReadingElement_byReading" ON "JMdict_ReadingElement"("reading");
|
CREATE INDEX "JMdict_ReadingElement_byReading" ON "JMdict_ReadingElement"("reading");
|
||||||
|
|
||||||
CREATE TABLE "JMdict_ReadingElementRestriction" (
|
CREATE TABLE "JMdict_ReadingElementRestriction" (
|
||||||
@@ -108,26 +112,27 @@ CREATE TABLE "JMdict_ReadingElementInfo" (
|
|||||||
|
|
||||||
CREATE TABLE "JMdict_Sense" (
|
CREATE TABLE "JMdict_Sense" (
|
||||||
"senseId" INTEGER PRIMARY KEY,
|
"senseId" INTEGER PRIMARY KEY,
|
||||||
"entryId" INTEGER NOT NULL GENERATED ALWAYS AS ("senseId" / 100) STORED,
|
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
|
||||||
"orderNum" INTEGER NOT NULL GENERATED ALWAYS AS ("senseId" % 100) VIRTUAL,
|
"orderNum" INTEGER NOT NULL,
|
||||||
FOREIGN KEY ("entryId") REFERENCES "JMdict_Entry"("entryId"),
|
|
||||||
UNIQUE("entryId", "orderNum")
|
UNIQUE("entryId", "orderNum")
|
||||||
);
|
);
|
||||||
|
|
||||||
|
CREATE INDEX "JMdict_Sense_byEntryId_byOrderNum" ON "JMdict_Sense"("entryId", "orderNum");
|
||||||
|
|
||||||
CREATE TABLE "JMdict_SenseRestrictedToKanji" (
|
CREATE TABLE "JMdict_SenseRestrictedToKanji" (
|
||||||
|
"entryId" INTEGER NOT NULL,
|
||||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||||
"kanji" TEXT NOT NULL,
|
"kanjiOrderNum" INTEGER NOT NULL CHECK ("kanjiOrderNum" > 0),
|
||||||
"entryId" INTEGER NOT NULL GENERATED ALWAYS AS ("senseId" / 100) VIRTUAL,
|
FOREIGN KEY ("entryId", "kanjiOrderNum") REFERENCES "JMdict_KanjiElement"("entryId", "orderNum"),
|
||||||
FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
|
PRIMARY KEY ("entryId", "senseId", "kanjiOrderNum")
|
||||||
PRIMARY KEY ("senseId", "kanji")
|
|
||||||
) WITHOUT ROWID;
|
) WITHOUT ROWID;
|
||||||
|
|
||||||
CREATE TABLE "JMdict_SenseRestrictedToReading" (
|
CREATE TABLE "JMdict_SenseRestrictedToReading" (
|
||||||
|
"entryId" INTEGER NOT NULL,
|
||||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||||
"reading" TEXT NOT NULL,
|
"readingOrderNum" INTEGER NOT NULL CHECK ("readingOrderNum" > 0),
|
||||||
"entryId" INTEGER NOT NULL GENERATED ALWAYS AS ("senseId" / 100) VIRTUAL,
|
FOREIGN KEY ("entryId", "readingOrderNum") REFERENCES "JMdict_ReadingElement"("entryId", "orderNum"),
|
||||||
FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
PRIMARY KEY ("entryId", "senseId", "readingOrderNum")
|
||||||
PRIMARY KEY ("senseId", "reading")
|
|
||||||
) WITHOUT ROWID;
|
) WITHOUT ROWID;
|
||||||
|
|
||||||
-- In order to add xrefs, you will need to have added the entry to xref to.
|
-- In order to add xrefs, you will need to have added the entry to xref to.
|
||||||
@@ -142,50 +147,23 @@ CREATE TABLE "JMdict_SenseRestrictedToReading" (
|
|||||||
|
|
||||||
CREATE TABLE "JMdict_SenseSeeAlso" (
|
CREATE TABLE "JMdict_SenseSeeAlso" (
|
||||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||||
"xrefEntryId" INTEGER NOT NULL,
|
"xrefEntryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
|
||||||
"seeAlsoReading" TEXT,
|
-- Sometimes the cross reference is to a specific sense
|
||||||
"seeAlsoKanji" TEXT,
|
"xrefSenseOrderNum" INTEGER,
|
||||||
"seeAlsoSense" INTEGER,
|
|
||||||
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
|
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
|
||||||
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
|
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
|
||||||
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
|
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
|
||||||
|
FOREIGN KEY ("xrefEntryId", "xrefSenseOrderNum") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
|
||||||
"seeAlsoSenseKey" INTEGER GENERATED ALWAYS AS (
|
UNIQUE("senseId", "xrefEntryId", "xrefSenseOrderNum")
|
||||||
CASE
|
|
||||||
WHEN "seeAlsoSense" IS NOT NULL THEN ("xrefEntryId" * 100) + "seeAlsoSense"
|
|
||||||
ELSE NULL
|
|
||||||
END
|
|
||||||
) VIRTUAL,
|
|
||||||
|
|
||||||
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
|
|
||||||
FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
|
||||||
FOREIGN KEY ("seeAlsoSenseKey") REFERENCES "JMdict_Sense"("senseId"),
|
|
||||||
|
|
||||||
UNIQUE("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
|
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE "JMdict_SenseAntonym" (
|
CREATE TABLE "JMdict_SenseAntonym" (
|
||||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||||
"xrefEntryId" INTEGER NOT NULL,
|
"xrefEntryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
|
||||||
"antonymReading" TEXT,
|
|
||||||
"antonymKanji" TEXT,
|
|
||||||
"antonymSense" INTEGER,
|
|
||||||
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
|
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
|
||||||
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
|
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
|
||||||
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
|
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
|
||||||
|
UNIQUE("senseId", "xrefEntryId")
|
||||||
"antonymSenseKey" INTEGER GENERATED ALWAYS AS (
|
|
||||||
CASE
|
|
||||||
WHEN "antonymSense" IS NOT NULL THEN ("xrefEntryId" * 100) + "antonymSense"
|
|
||||||
ELSE NULL
|
|
||||||
END
|
|
||||||
) VIRTUAL,
|
|
||||||
|
|
||||||
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
|
|
||||||
FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
|
||||||
FOREIGN KEY ("antonymSenseKey") REFERENCES "JMdict_Sense"("senseId"),
|
|
||||||
|
|
||||||
UNIQUE("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
|
|
||||||
);
|
);
|
||||||
|
|
||||||
-- These cross references are going to be mostly accessed from a sense
|
-- These cross references are going to be mostly accessed from a sense
|
||||||
|
|||||||
@@ -1,28 +1,26 @@
|
|||||||
CREATE TABLE "JMdict_EntryScore" (
|
CREATE TABLE "JMdict_EntryScore" (
|
||||||
"elementId" INTEGER PRIMARY KEY,
|
"type" CHAR(1) NOT NULL CHECK ("type" IN ('r', 'k')),
|
||||||
|
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
|
||||||
|
"elementId" INTEGER NOT NULL,
|
||||||
"score" INTEGER NOT NULL DEFAULT 0,
|
"score" INTEGER NOT NULL DEFAULT 0,
|
||||||
"common" BOOLEAN NOT NULL DEFAULT FALSE,
|
"common" BOOLEAN NOT NULL DEFAULT FALSE,
|
||||||
|
PRIMARY KEY ("type", "elementId")
|
||||||
"entryId" INTEGER NOT NULL GENERATED ALWAYS AS (("elementId" / 100) % 10000000) STORED,
|
|
||||||
"type" CHAR(1) NOT NULL GENERATED ALWAYS AS (CASE
|
|
||||||
WHEN "elementId" / 1000000000 = 0 THEN 'k'
|
|
||||||
ELSE 'r'
|
|
||||||
END) VIRTUAL,
|
|
||||||
|
|
||||||
FOREIGN KEY ("entryId") REFERENCES "JMdict_Entry"("entryId")
|
|
||||||
) WITHOUT ROWID;
|
) WITHOUT ROWID;
|
||||||
|
|
||||||
CREATE INDEX "JMdict_EntryScore_byElementId_byScore" ON "JMdict_EntryScore"("elementId", "score");
|
CREATE INDEX "JMdict_EntryScore_byElementId_byScore" ON "JMdict_EntryScore"("elementId", "score");
|
||||||
CREATE INDEX "JMdict_EntryScore_byScore" ON "JMdict_EntryScore"("score");
|
CREATE INDEX "JMdict_EntryScore_byScore" ON "JMdict_EntryScore"("score");
|
||||||
|
|
||||||
CREATE INDEX "JMdict_EntryScore_byElementId_byCommon" ON "JMdict_EntryScore"("elementId", "common");
|
|
||||||
CREATE INDEX "JMdict_EntryScore_byCommon" ON "JMdict_EntryScore"("common");
|
CREATE INDEX "JMdict_EntryScore_byCommon" ON "JMdict_EntryScore"("common");
|
||||||
|
|
||||||
|
CREATE INDEX "JMdict_EntryScore_byType_byElementId_byScore" ON "JMdict_EntryScore"("type", "elementId", "score");
|
||||||
|
CREATE INDEX "JMdict_EntryScore_byType_byScore" ON "JMdict_EntryScore"("type", "score");
|
||||||
|
CREATE INDEX "JMdict_EntryScore_byType_byCommon" ON "JMdict_EntryScore"("type", "common");
|
||||||
|
|
||||||
-- NOTE: these views are deduplicated in order not to perform an unnecessary
|
-- NOTE: these views are deduplicated in order not to perform an unnecessary
|
||||||
-- UNION on every trigger
|
-- UNION on every trigger
|
||||||
|
|
||||||
CREATE VIEW "JMdict_EntryScoreView_Reading" AS
|
CREATE VIEW "JMdict_EntryScoreView_Reading" AS
|
||||||
SELECT
|
SELECT
|
||||||
|
'r' AS "type",
|
||||||
"JMdict_ReadingElement"."entryId",
|
"JMdict_ReadingElement"."entryId",
|
||||||
"JMdict_ReadingElement"."elementId",
|
"JMdict_ReadingElement"."elementId",
|
||||||
(
|
(
|
||||||
@@ -46,7 +44,7 @@ SELECT
|
|||||||
+ (("spec" IS 2) * 5)
|
+ (("spec" IS 2) * 5)
|
||||||
+ (("gai" IS 1) * 10)
|
+ (("gai" IS 1) * 10)
|
||||||
+ (("gai" IS 2) * 5)
|
+ (("gai" IS 2) * 5)
|
||||||
+ (("orderNum" IS 0) * 20)
|
+ (("orderNum" IS 1) * 20)
|
||||||
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
|
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
|
||||||
AS "score"
|
AS "score"
|
||||||
FROM "JMdict_ReadingElement"
|
FROM "JMdict_ReadingElement"
|
||||||
@@ -54,6 +52,7 @@ LEFT JOIN "JMdict_JLPTTag" USING ("entryId");
|
|||||||
|
|
||||||
CREATE VIEW "JMdict_EntryScoreView_Kanji" AS
|
CREATE VIEW "JMdict_EntryScoreView_Kanji" AS
|
||||||
SELECT
|
SELECT
|
||||||
|
'k' AS "type",
|
||||||
"JMdict_KanjiElement"."entryId",
|
"JMdict_KanjiElement"."entryId",
|
||||||
"JMdict_KanjiElement"."elementId",
|
"JMdict_KanjiElement"."elementId",
|
||||||
(
|
(
|
||||||
@@ -77,7 +76,7 @@ SELECT
|
|||||||
+ (("spec" IS 2) * 5)
|
+ (("spec" IS 2) * 5)
|
||||||
+ (("gai" IS 1) * 10)
|
+ (("gai" IS 1) * 10)
|
||||||
+ (("gai" IS 2) * 5)
|
+ (("gai" IS 2) * 5)
|
||||||
+ (("orderNum" IS 0) * 20)
|
+ (("orderNum" IS 1) * 20)
|
||||||
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
|
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
|
||||||
AS "score"
|
AS "score"
|
||||||
FROM "JMdict_KanjiElement"
|
FROM "JMdict_KanjiElement"
|
||||||
@@ -97,17 +96,19 @@ CREATE TRIGGER "JMdict_EntryScore_Insert_JMdict_ReadingElement"
|
|||||||
AFTER INSERT ON "JMdict_ReadingElement"
|
AFTER INSERT ON "JMdict_ReadingElement"
|
||||||
BEGIN
|
BEGIN
|
||||||
INSERT INTO "JMdict_EntryScore" (
|
INSERT INTO "JMdict_EntryScore" (
|
||||||
|
"type",
|
||||||
|
"entryId",
|
||||||
"elementId",
|
"elementId",
|
||||||
"score",
|
"score",
|
||||||
"common"
|
"common"
|
||||||
)
|
)
|
||||||
SELECT "elementId", "score", "common"
|
SELECT "type", "entryId", "elementId", "score", "common"
|
||||||
FROM "JMdict_EntryScoreView_Reading"
|
FROM "JMdict_EntryScoreView_Reading"
|
||||||
WHERE "elementId" = NEW."elementId";
|
WHERE "elementId" = NEW."elementId";
|
||||||
END;
|
END;
|
||||||
|
|
||||||
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_ReadingElement"
|
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_ReadingElement"
|
||||||
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "elementId"
|
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum"
|
||||||
ON "JMdict_ReadingElement"
|
ON "JMdict_ReadingElement"
|
||||||
BEGIN
|
BEGIN
|
||||||
UPDATE "JMdict_EntryScore"
|
UPDATE "JMdict_EntryScore"
|
||||||
@@ -122,7 +123,8 @@ CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_ReadingElement"
|
|||||||
AFTER DELETE ON "JMdict_ReadingElement"
|
AFTER DELETE ON "JMdict_ReadingElement"
|
||||||
BEGIN
|
BEGIN
|
||||||
DELETE FROM "JMdict_EntryScore"
|
DELETE FROM "JMdict_EntryScore"
|
||||||
WHERE "elementId" = OLD."elementId";
|
WHERE "type" = 'r'
|
||||||
|
AND "elementId" = OLD."elementId";
|
||||||
END;
|
END;
|
||||||
|
|
||||||
--- JMdict_KanjiElement triggers
|
--- JMdict_KanjiElement triggers
|
||||||
@@ -131,17 +133,19 @@ CREATE TRIGGER "JMdict_EntryScore_Insert_JMdict_KanjiElement"
|
|||||||
AFTER INSERT ON "JMdict_KanjiElement"
|
AFTER INSERT ON "JMdict_KanjiElement"
|
||||||
BEGIN
|
BEGIN
|
||||||
INSERT INTO "JMdict_EntryScore" (
|
INSERT INTO "JMdict_EntryScore" (
|
||||||
|
"type",
|
||||||
|
"entryId",
|
||||||
"elementId",
|
"elementId",
|
||||||
"score",
|
"score",
|
||||||
"common"
|
"common"
|
||||||
)
|
)
|
||||||
SELECT "elementId", "score", "common"
|
SELECT "type", "entryId", "elementId", "score", "common"
|
||||||
FROM "JMdict_EntryScoreView_Kanji"
|
FROM "JMdict_EntryScoreView_Kanji"
|
||||||
WHERE "elementId" = NEW."elementId";
|
WHERE "elementId" = NEW."elementId";
|
||||||
END;
|
END;
|
||||||
|
|
||||||
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_KanjiElement"
|
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_KanjiElement"
|
||||||
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "elementId"
|
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum"
|
||||||
ON "JMdict_KanjiElement"
|
ON "JMdict_KanjiElement"
|
||||||
BEGIN
|
BEGIN
|
||||||
UPDATE "JMdict_EntryScore"
|
UPDATE "JMdict_EntryScore"
|
||||||
@@ -156,7 +160,8 @@ CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_KanjiElement"
|
|||||||
AFTER DELETE ON "JMdict_KanjiElement"
|
AFTER DELETE ON "JMdict_KanjiElement"
|
||||||
BEGIN
|
BEGIN
|
||||||
DELETE FROM "JMdict_EntryScore"
|
DELETE FROM "JMdict_EntryScore"
|
||||||
WHERE "elementId" = OLD."elementId";
|
WHERE "type" = 'k'
|
||||||
|
AND "elementId" = OLD."elementId";
|
||||||
END;
|
END;
|
||||||
|
|
||||||
--- JMdict_JLPTTag triggers
|
--- JMdict_JLPTTag triggers
|
||||||
|
|||||||
@@ -32,9 +32,9 @@ SELECT
|
|||||||
THEN "JMdict_ReadingElement"."reading"
|
THEN "JMdict_ReadingElement"."reading"
|
||||||
ELSE NULL
|
ELSE NULL
|
||||||
END AS "furigana",
|
END AS "furigana",
|
||||||
COALESCE("JMdict_KanjiElement"."orderNum", 0)
|
COALESCE("JMdict_KanjiElement"."orderNum", 1)
|
||||||
+ "JMdict_ReadingElement"."orderNum"
|
+ "JMdict_ReadingElement"."orderNum"
|
||||||
= 0
|
= 2
|
||||||
AS "isFirst",
|
AS "isFirst",
|
||||||
"JMdict_KanjiElement"."orderNum" AS "kanjiOrderNum",
|
"JMdict_KanjiElement"."orderNum" AS "kanjiOrderNum",
|
||||||
"JMdict_ReadingElement"."orderNum" AS "readingOrderNum"
|
"JMdict_ReadingElement"."orderNum" AS "readingOrderNum"
|
||||||
@@ -65,7 +65,9 @@ JOIN "JMdict_KanjiElement"
|
|||||||
ON "JMdict_KanjiElementFTS"."entryId" = "JMdict_KanjiElement"."entryId"
|
ON "JMdict_KanjiElementFTS"."entryId" = "JMdict_KanjiElement"."entryId"
|
||||||
AND "JMdict_KanjiElementFTS"."reading" LIKE '%' || "JMdict_KanjiElement"."reading"
|
AND "JMdict_KanjiElementFTS"."reading" LIKE '%' || "JMdict_KanjiElement"."reading"
|
||||||
JOIN "JMdict_EntryScore"
|
JOIN "JMdict_EntryScore"
|
||||||
ON "JMdict_EntryScore"."elementId" = "JMdict_KanjiElement"."elementId"
|
ON "JMdict_EntryScore"."type" = 'k'
|
||||||
|
AND "JMdict_KanjiElement"."entryId" = "JMdict_EntryScore"."entryId"
|
||||||
|
AND "JMdict_KanjiElement"."reading" = "JMdict_EntryScore"."reading"
|
||||||
WHERE "JMdict_EntryScore"."common" = 1;
|
WHERE "JMdict_EntryScore"."common" = 1;
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,6 @@
|
|||||||
jmdict,
|
jmdict,
|
||||||
radkfile,
|
radkfile,
|
||||||
kanjidic2,
|
kanjidic2,
|
||||||
tanos-jlpt,
|
|
||||||
sqlite,
|
sqlite,
|
||||||
wal ? false,
|
wal ? false,
|
||||||
}:
|
}:
|
||||||
@@ -22,11 +21,10 @@ stdenvNoCC.mkDerivation {
|
|||||||
buildPhase = ''
|
buildPhase = ''
|
||||||
runHook preBuild
|
runHook preBuild
|
||||||
|
|
||||||
mkdir -p data
|
mkdir -p data/tmp
|
||||||
ln -s '${jmdict}'/* data/
|
ln -s "${jmdict}"/* data/tmp
|
||||||
ln -s '${radkfile}'/* data/
|
ln -s "${radkfile}"/* data/tmp
|
||||||
ln -s '${kanjidic2}'/* data/
|
ln -s "${kanjidic2}"/* data/tmp
|
||||||
ln -s '${tanos-jlpt}' data/tanos-jlpt
|
|
||||||
|
|
||||||
for migration in migrations/*.sql; do
|
for migration in migrations/*.sql; do
|
||||||
sqlite3 jadb.sqlite < "$migration"
|
sqlite3 jadb.sqlite < "$migration"
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
{
|
{
|
||||||
src,
|
src,
|
||||||
buildDartApplication,
|
buildDartApplication,
|
||||||
|
sqlite,
|
||||||
}:
|
}:
|
||||||
buildDartApplication {
|
buildDartApplication {
|
||||||
pname = "jadb-database-tool";
|
pname = "jadb-database-tool";
|
||||||
@@ -9,6 +10,9 @@ buildDartApplication {
|
|||||||
|
|
||||||
dartEntryPoints."bin/jadb" = "bin/jadb.dart";
|
dartEntryPoints."bin/jadb" = "bin/jadb.dart";
|
||||||
|
|
||||||
|
# NOTE: here we are overriding the implicitly added runtimeDependency from the package fixup in pub2nix.
|
||||||
|
runtimeDependencies = [ sqlite ];
|
||||||
|
|
||||||
# NOTE: the default dart hooks are using `dart compile`, which is not able to call the
|
# NOTE: the default dart hooks are using `dart compile`, which is not able to call the
|
||||||
# new dart build hooks required to use package:sqlite3 >= 3.0.0. So we override
|
# new dart build hooks required to use package:sqlite3 >= 3.0.0. So we override
|
||||||
# these phases to use `dart build` instead.
|
# these phases to use `dart build` instead.
|
||||||
|
|||||||
46
nix/jmdict.nix
Normal file
46
nix/jmdict.nix
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
{
|
||||||
|
stdenvNoCC,
|
||||||
|
jmdict-src,
|
||||||
|
jmdict-with-examples-src,
|
||||||
|
xmlformat,
|
||||||
|
gzip,
|
||||||
|
edrdgMetadata,
|
||||||
|
}:
|
||||||
|
stdenvNoCC.mkDerivation {
|
||||||
|
name = "jmdict";
|
||||||
|
|
||||||
|
dontUnpack = true;
|
||||||
|
srcs = [
|
||||||
|
jmdict-src
|
||||||
|
jmdict-with-examples-src
|
||||||
|
];
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
gzip
|
||||||
|
xmlformat
|
||||||
|
];
|
||||||
|
|
||||||
|
buildPhase = ''
|
||||||
|
runHook preBuild
|
||||||
|
|
||||||
|
gzip -dkc "${jmdict-src}" > JMdict.xml
|
||||||
|
gzip -dkc "${jmdict-with-examples-src}" > JMdict_with_examples.xml
|
||||||
|
xmlformat -i JMdict.xml
|
||||||
|
xmlformat -i JMdict_with_examples.xml
|
||||||
|
|
||||||
|
runHook postBuild
|
||||||
|
'';
|
||||||
|
|
||||||
|
installPhase = ''
|
||||||
|
runHook preInstall
|
||||||
|
|
||||||
|
install -Dt "$out" JMdict.xml JMdict_with_examples.xml
|
||||||
|
|
||||||
|
runHook postInstall
|
||||||
|
'';
|
||||||
|
|
||||||
|
meta = edrdgMetadata // {
|
||||||
|
description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words";
|
||||||
|
homepage = "https://www.edrdg.org/jmdict/j_jmdict.html";
|
||||||
|
};
|
||||||
|
}
|
||||||
40
nix/kanjidic2.nix
Normal file
40
nix/kanjidic2.nix
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
{
|
||||||
|
stdenvNoCC,
|
||||||
|
kanjidic2-src,
|
||||||
|
xmlformat,
|
||||||
|
gzip,
|
||||||
|
edrdgMetadata,
|
||||||
|
}:
|
||||||
|
stdenvNoCC.mkDerivation {
|
||||||
|
name = "kanjidic2";
|
||||||
|
|
||||||
|
src = kanjidic2-src;
|
||||||
|
dontUnpack = true;
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
gzip
|
||||||
|
xmlformat
|
||||||
|
];
|
||||||
|
|
||||||
|
buildPhase = ''
|
||||||
|
runHook preBuild
|
||||||
|
|
||||||
|
gzip -dkc "${kanjidic2-src}" > kanjidic2.xml
|
||||||
|
xmlformat -i kanjidic2.xml
|
||||||
|
|
||||||
|
runHook postBuild
|
||||||
|
'';
|
||||||
|
|
||||||
|
installPhase = ''
|
||||||
|
runHook preInstall
|
||||||
|
|
||||||
|
install -Dt "$out" kanjidic2.xml
|
||||||
|
|
||||||
|
runHook postInstall
|
||||||
|
'';
|
||||||
|
|
||||||
|
meta = edrdgMetadata // {
|
||||||
|
description = "A consolidated XML-format kanji database";
|
||||||
|
homepage = "https://www.edrdg.org/kanjidic/kanjd2index_legacy.html";
|
||||||
|
};
|
||||||
|
}
|
||||||
40
nix/radkfile.nix
Normal file
40
nix/radkfile.nix
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
{
|
||||||
|
stdenv,
|
||||||
|
radkfile-src,
|
||||||
|
gzip,
|
||||||
|
iconv,
|
||||||
|
edrdgMetadata,
|
||||||
|
}:
|
||||||
|
stdenv.mkDerivation {
|
||||||
|
name = "radkfile";
|
||||||
|
|
||||||
|
src = radkfile-src;
|
||||||
|
dontUnpack = true;
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
gzip
|
||||||
|
iconv
|
||||||
|
];
|
||||||
|
|
||||||
|
buildPhase = ''
|
||||||
|
runHook preBuild
|
||||||
|
|
||||||
|
gzip -dkc "$src" > radkfile
|
||||||
|
iconv -f EUC-JP -t UTF-8 -o radkfile_utf8 radkfile
|
||||||
|
|
||||||
|
runHook postBuild
|
||||||
|
'';
|
||||||
|
|
||||||
|
installPhase = ''
|
||||||
|
runHook preInstall
|
||||||
|
|
||||||
|
install -Dt "$out" radkfile_utf8
|
||||||
|
|
||||||
|
runHook postInstall
|
||||||
|
'';
|
||||||
|
|
||||||
|
meta = edrdgMetadata // {
|
||||||
|
description = "A file providing searchable decompositions of kanji characters";
|
||||||
|
homepage = "https://www.edrdg.org/krad/kradinf.html";
|
||||||
|
};
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user