Add kanjidic data + update script
This commit is contained in:
538387
kanjidic2/kanjidic2.xml
Normal file
538387
kanjidic2/kanjidic2.xml
Normal file
File diff suppressed because it is too large
Load Diff
1
kanjidic2/version.txt
Normal file
1
kanjidic2/version.txt
Normal file
@@ -0,0 +1 @@
|
||||
2026-04-07.2026-097.4
|
||||
43
scripts/update-kanjidic2.sh
Executable file
43
scripts/update-kanjidic2.sh
Executable file
@@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env nix-shell
|
||||
#!nix-shell -i bash -p coreutils curl gitMinimal gzip gnugrep gnused xmlformat
|
||||
|
||||
set -euo pipefail
|
||||
set -x
|
||||
|
||||
URL='https://www.edrdg.org/kanjidic/kanjidic2.xml.gz'
|
||||
TMP="$(mktemp -d)"
|
||||
|
||||
PROJECT_ROOT="$(git rev-parse --show-toplevel)"
|
||||
DATA_DIR="$PROJECT_ROOT/kanjidic2"
|
||||
|
||||
function cleanup {
|
||||
rm -rf "$TMP"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
curl -L -o "$TMP/kanjidic2.xml.gz" "$URL"
|
||||
gzip -dkc "$TMP/kanjidic2.xml.gz" > "$TMP/kanjidic2.xml"
|
||||
xmlformat "$TMP/kanjidic2.xml" > "$TMP/kanjidic2_formatted.xml"
|
||||
mv "$TMP/kanjidic2_formatted.xml" "$DATA_DIR/kanjidic2.xml"
|
||||
|
||||
FILE_VERSION="$(\
|
||||
head -n 1000 "$DATA_DIR/kanjidic2.xml" \
|
||||
| grep -Eo '<file_version>[0-9]+</file_version>' \
|
||||
| sed 's|</\?file_version>||g' \
|
||||
)"
|
||||
|
||||
DB_VERSION="$(\
|
||||
head -n 1000 "$DATA_DIR/kanjidic2.xml" \
|
||||
| grep -Eo '<database_version>[0-9-]+</database_version>' \
|
||||
| sed 's|</\?database_version>||g' \
|
||||
)"
|
||||
|
||||
CREATION_DATE="$(\
|
||||
head -n 1000 "$DATA_DIR/kanjidic2.xml" \
|
||||
| grep -Eo '<date_of_creation>[0-9-]+</date_of_creation>' \
|
||||
| sed 's|</\?date_of_creation>||g' \
|
||||
)"
|
||||
|
||||
echo "$CREATION_DATE.$DB_VERSION.$FILE_VERSION" > "$DATA_DIR/version.txt"
|
||||
|
||||
cleanup
|
||||
Reference in New Issue
Block a user