205 lines
5.0 KiB
Bash
Executable File
205 lines
5.0 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
OUTPUT_DIR="out"
|
|
mkdir -p "$OUTPUT_DIR"
|
|
|
|
# TODO: all paths in cgi-bin should be considered a cgi-script, independent of file extension
|
|
|
|
echo "Starting webdoc analysis..."
|
|
|
|
if [[ ! -f "$OUTPUT_DIR/cgi-paths.txt" ]]; then
|
|
echo "Searching for CGI scripts..."
|
|
|
|
EXCLUDE_LOCATIONS=(
|
|
"*/.bundle/*"
|
|
"*/.bzr/*"
|
|
"*/.cache/*"
|
|
"*/.cargo/*"
|
|
"*/.git/*"
|
|
"*/.hg/*"
|
|
"*/.npm/*"
|
|
"*/.nvm/*"
|
|
"*/.rvm/*"
|
|
"*/.svn/*"
|
|
"*/.tox/*"
|
|
"*/__pycache__/*"
|
|
"*/node_modules/*"
|
|
"*/vendor/*"
|
|
)
|
|
|
|
FILE_NAME_PATTERNS=(
|
|
"*.cgi"
|
|
"*.php"
|
|
"*.php[0-9]*"
|
|
"*.pm"
|
|
"*.pl"
|
|
"*.sh"
|
|
"*.bash"
|
|
"*.phtml"
|
|
"*.shtml"
|
|
"*.lisp"
|
|
"*.cl"
|
|
)
|
|
|
|
FILE_NAME_ANTI_PATTERNS=(
|
|
"*.so"
|
|
"#*"
|
|
"*~"
|
|
)
|
|
|
|
|
|
FIND_ARGS=()
|
|
for d in /home/pvv/?/*/web-docs; do
|
|
if [[ -d "$d" ]]; then
|
|
FIND_ARGS+=("$d")
|
|
fi
|
|
done
|
|
|
|
if [[ ${#EXCLUDE_LOCATIONS[@]} -gt 0 ]]; then
|
|
FIND_ARGS+=( \( )
|
|
for ((i=0; i<${#EXCLUDE_LOCATIONS[@]}; i++)); do
|
|
FIND_ARGS+=(-path "${EXCLUDE_LOCATIONS[i]}")
|
|
if [[ $i -lt $(( ${#EXCLUDE_LOCATIONS[@]} - 1 )) ]]; then
|
|
FIND_ARGS+=(-o)
|
|
fi
|
|
done
|
|
FIND_ARGS+=( \) -prune -o )
|
|
fi
|
|
|
|
FIND_ARGS+=(
|
|
"-type" "f"
|
|
"-executable"
|
|
)
|
|
|
|
FIND_ARGS+=(\( )
|
|
for ((i=0; i<${#FILE_NAME_PATTERNS[@]}; i++)); do
|
|
pattern="${FILE_NAME_PATTERNS[i]}"
|
|
if [[ $i -gt 0 ]]; then
|
|
FIND_ARGS+=(-o)
|
|
fi
|
|
FIND_ARGS+=(-name "$pattern")
|
|
done
|
|
FIND_ARGS+=(\) )
|
|
|
|
for anti_pattern in "${FILE_NAME_ANTI_PATTERNS[@]}"; do
|
|
FIND_ARGS+=(-not -name "$anti_pattern")
|
|
done
|
|
|
|
FIND_ARGS+=("-print")
|
|
|
|
echo "find \\"
|
|
for arg in "${FIND_ARGS[@]}"; do
|
|
echo " '$arg' \\"
|
|
done
|
|
echo ""
|
|
|
|
find "${FIND_ARGS[@]}" 2>/dev/null | tee "$OUTPUT_DIR/cgi-paths.txt"
|
|
else
|
|
echo "'$OUTPUT_DIR/cgi-paths.txt' already exists, reusing..."
|
|
fi
|
|
|
|
##########################
|
|
# ANALYZE PERL LIBRARIES #
|
|
##########################
|
|
|
|
if [[ ! -f "$OUTPUT_DIR/perl-libs.txt" ]]; then
|
|
echo "Analyzing Perl libraries..."
|
|
|
|
: > "$OUTPUT_DIR/perl-libs.txt"
|
|
mapfile -t ALL_CGI_PATHS < "$OUTPUT_DIR/cgi-paths.txt"
|
|
|
|
PERL_PATHS=()
|
|
for p in "${ALL_CGI_PATHS[@]}"; do
|
|
case "$p" in
|
|
*.pm|*.pl)
|
|
PERL_PATHS+=("$p")
|
|
;;
|
|
esac
|
|
done
|
|
|
|
for p in "${PERL_PATHS[@]}"; do
|
|
rg '^use ([^;]+);' --no-messages -N -o -r '$1' "$p" 2>/dev/null >> "$OUTPUT_DIR/perl-libs.txt" || true
|
|
done
|
|
else
|
|
echo "'$OUTPUT_DIR/perl-libs.txt' already exists, reusing..."
|
|
fi
|
|
|
|
cat "$OUTPUT_DIR/perl-libs.txt" | sort > "$OUTPUT_DIR/perl-libs-tmp1.txt"
|
|
|
|
# Remove import specifiers
|
|
sed -E \
|
|
-e 's|\s*(qw)?\s*\(.*\)||g' \
|
|
-e 's|\s*(qw)?\s*\/.*\/||g' \
|
|
"$OUTPUT_DIR/perl-libs-tmp1.txt" | sort > "$OUTPUT_DIR/perl-libs-tmp2.txt"
|
|
|
|
# Remove standard Perl modules
|
|
comm -2 -3 "$OUTPUT_DIR/perl-libs-tmp2.txt" <(cat PERL_STANDARD_MODULES.txt | sort) > "$OUTPUT_DIR/perl-libs-tmp3.txt"
|
|
|
|
# Remove pragmas
|
|
readarray -t PERL_PRAGMAS < PERL_PRAGMAS.txt
|
|
remove_pragmas_regex=$(printf '|^%s' "${PERL_PRAGMAS[@]}")
|
|
remove_pragmas_regex="${remove_pragmas_regex:1}" # remove leading '|'
|
|
sed -E "/${remove_pragmas_regex}/d" "$OUTPUT_DIR/perl-libs-tmp3.txt" > "$OUTPUT_DIR/perl-libs-filtered.txt"
|
|
|
|
cat "$OUTPUT_DIR/perl-libs-filtered.txt" | uniq -c | sort -gr > "$OUTPUT_DIR/perl-libs-overview.txt"
|
|
|
|
#########################
|
|
# ANALYZE PHP LIBRARIES #
|
|
#########################
|
|
|
|
if [[ ! -f "$OUTPUT_DIR/php-libs.txt" ]]; then
|
|
echo "Analyzing PHP libraries..."
|
|
|
|
: > "$OUTPUT_DIR/php-libs.txt"
|
|
mapfile -t ALL_CGI_PATHS < "$OUTPUT_DIR/cgi-paths.txt"
|
|
|
|
PHP_PATHS=()
|
|
for p in "${ALL_CGI_PATHS[@]}"; do
|
|
case "$p" in
|
|
*.php[0-9]*|*.phtml)
|
|
PHP_PATHS+=("$p")
|
|
;;
|
|
esac
|
|
done
|
|
|
|
for p in "${PHP_PATHS[@]}"; do
|
|
rg '^use ([^;]+);' --no-messages -N -o -r '$1' "$p" 2>/dev/null >> "$OUTPUT_DIR/php-libs.txt" || true
|
|
done
|
|
else
|
|
echo "'$OUTPUT_DIR/php-libs.txt' already exists, reusing..."
|
|
fi
|
|
|
|
cat "$OUTPUT_DIR/php-libs.txt" | sort | uniq -c | sort -gr > "$OUTPUT_DIR/php-libs-overview.txt"
|
|
|
|
########################
|
|
# ANALYZE CGI PROGRAMS #
|
|
########################
|
|
|
|
if [[ ! -f "$OUTPUT_DIR/cgi-progs.txt" ]]; then
|
|
echo "Analyzing CGI programs..."
|
|
|
|
: > "$OUTPUT_DIR/cgi-progs.txt"
|
|
mapfile -t ALL_CGI_PATHS < "$OUTPUT_DIR/cgi-paths.txt"
|
|
|
|
readarray -t BIN_LOCATIONS < BIN_LOCATIONS.txt
|
|
bin_locations_regex=$(printf '|%s' "${BIN_LOCATIONS[@]}")
|
|
bin_locations_regex="${bin_locations_regex:1}" # remove leading '|'
|
|
bin_locations_regex="(?:${bin_locations_regex})"
|
|
|
|
for p in "${ALL_CGI_PATHS[@]}"; do
|
|
rg "$bin_locations_regex/(?:env\\s*)?(\\w+(?:/\\w+)*)" --no-messages -N -o -r '$1' "$p" 2>/dev/null >> "$OUTPUT_DIR/cgi-progs.txt" || true
|
|
done
|
|
|
|
# TODO: extract non-absolute binary references from calls that invoke subprocesses in the various languages
|
|
else
|
|
echo "'$OUTPUT_DIR/cgi-progs.txt' already exists, reusing..."
|
|
fi
|
|
|
|
cat "$OUTPUT_DIR/cgi-progs.txt" | sort | uniq -c | sort -gr > "$OUTPUT_DIR/cgi-progs-overview.txt"
|
|
|
|
##########################
|
|
# ANALYZE LISP LIBRARIES #
|
|
##########################
|
|
|
|
# TODO find lisp libraries
|