Files
datasources/scripts/update-kanjidic2.sh

44 lines
1.1 KiB
Bash
Executable File

#!/usr/bin/env nix-shell
#!nix-shell -i bash -p coreutils curl gitMinimal gzip gnugrep gnused xmlformat
set -euo pipefail
set -x
URL='https://www.edrdg.org/kanjidic/kanjidic2.xml.gz'
TMP="$(mktemp -d)"
PROJECT_ROOT="$(git rev-parse --show-toplevel)"
DATA_DIR="$PROJECT_ROOT/kanjidic2"
function cleanup {
rm -rf "$TMP"
}
trap cleanup EXIT
curl -L -o "$TMP/kanjidic2.xml.gz" "$URL"
gzip -dkc "$TMP/kanjidic2.xml.gz" > "$TMP/kanjidic2.xml"
xmlformat "$TMP/kanjidic2.xml" > "$TMP/kanjidic2_formatted.xml"
mv "$TMP/kanjidic2_formatted.xml" "$DATA_DIR/kanjidic2.xml"
FILE_VERSION="$(\
head -n 1000 "$DATA_DIR/kanjidic2.xml" \
| grep -Eo '<file_version>[0-9]+</file_version>' \
| sed 's|</\?file_version>||g' \
)"
DB_VERSION="$(\
head -n 1000 "$DATA_DIR/kanjidic2.xml" \
| grep -Eo '<database_version>[0-9-]+</database_version>' \
| sed 's|</\?database_version>||g' \
)"
CREATION_DATE="$(\
head -n 1000 "$DATA_DIR/kanjidic2.xml" \
| grep -Eo '<date_of_creation>[0-9-]+</date_of_creation>' \
| sed 's|</\?date_of_creation>||g' \
)"
echo "$CREATION_DATE.$DB_VERSION.$FILE_VERSION" > "$DATA_DIR/version.txt"
cleanup