Files
datasources/scripts/update-jmdict.sh

29 lines
703 B
Bash
Executable File

#!/usr/bin/env nix-shell
#!nix-shell -i bash -p coreutils curl gitMinimal gzip gnugrep gnused xmlformat
set -euo pipefail
set -x
URL='http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz'
TMP="$(mktemp -d)"
PROJECT_ROOT="$(git rev-parse --show-toplevel)"
DATA_DIR="$PROJECT_ROOT/jmdict"
function cleanup {
rm -rf "$TMP"
}
trap cleanup EXIT
curl -L -o "$TMP/jmdict.gz" "$URL"
gzip -dkc "$TMP/jmdict.gz" > "$TMP/jmdict.xml"
xmlformat "$TMP/jmdict.xml" > "$TMP/jmdict_formatted.xml"
mv "$TMP/jmdict_formatted.xml" "$DATA_DIR/jmdict.xml"
head -n 2000 "$DATA_DIR/jmdict.xml" \
| grep -Eo 'JMdict created: [0-9]{4}-[0-9]{2}-[0-9]{2}' \
| sed 's/JMdict created: //g' \
> "$DATA_DIR/version.txt"
cleanup