Add jmdict data + update script
This commit is contained in:
3862797
jmdict/jmdict.xml
Normal file
3862797
jmdict/jmdict.xml
Normal file
File diff suppressed because it is too large
Load Diff
1
jmdict/version.txt
Normal file
1
jmdict/version.txt
Normal file
@@ -0,0 +1 @@
|
||||
2026-04-07
|
||||
28
scripts/update-jmdict.sh
Executable file
28
scripts/update-jmdict.sh
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env nix-shell
|
||||
#!nix-shell -i bash -p coreutils curl gitMinimal gzip gnugrep gnused xmlformat
|
||||
|
||||
set -euo pipefail
|
||||
set -x
|
||||
|
||||
URL='http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz'
|
||||
TMP="$(mktemp -d)"
|
||||
|
||||
PROJECT_ROOT="$(git rev-parse --show-toplevel)"
|
||||
DATA_DIR="$PROJECT_ROOT/jmdict"
|
||||
|
||||
function cleanup {
|
||||
rm -rf "$TMP"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
curl -L -o "$TMP/jmdict.gz" "$URL"
|
||||
gzip -dkc "$TMP/jmdict.gz" > "$TMP/jmdict.xml"
|
||||
xmlformat "$TMP/jmdict.xml" > "$TMP/jmdict_formatted.xml"
|
||||
mv "$TMP/jmdict_formatted.xml" "$DATA_DIR/jmdict.xml"
|
||||
|
||||
head -n 2000 "$DATA_DIR/jmdict.xml" \
|
||||
| grep -Eo 'JMdict created: [0-9]{4}-[0-9]{2}-[0-9]{2}' \
|
||||
| sed 's/JMdict created: //g' \
|
||||
> "$DATA_DIR/version.txt"
|
||||
|
||||
cleanup
|
||||
Reference in New Issue
Block a user