From 56c5f5909e41c827e77c181b5648fd758a804287 Mon Sep 17 00:00:00 2001 From: Nicolas Williams Date: Thu, 7 Nov 2019 23:56:35 -0600 Subject: [PATCH] roken: Add rkvis program for test scripts This will help programs that need to URL-escape strings. Also, this changes `do_hvis()` to not fallback on `do_svis()` for chars in `extra` -- that `do_hvis()` was doing that seems like an oversight. Christos Zoulas, of NetBSD, agrees. `do_hvis()` still falls back on `do_svis()` for characters not in the RFC 1808 / 3986 to-be-escaped set *and* characters not in the `extra` set -- that much seems to have been the intent. --- lib/roken/Makefile.am | 5 +- lib/roken/vis.c | 281 ++++++++++++++++++++++++++++++++++-------- lib/roken/vis.hin | 37 ++++-- 3 files changed, 262 insertions(+), 61 deletions(-) diff --git a/lib/roken/Makefile.am b/lib/roken/Makefile.am index 322a3c6d5..26554dc19 100644 --- a/lib/roken/Makefile.am +++ b/lib/roken/Makefile.am @@ -20,7 +20,7 @@ if HAVE_DBHEADER AM_CPPFLAGS += -I$(DBHEADER) endif -noinst_PROGRAMS = snprintf-test resolve-test rkpty test-detach test-auxval +noinst_PROGRAMS = snprintf-test resolve-test rkpty test-detach test-auxval rkvis CHECK_LOCAL = snprintf-test resolve-test rkpty make-roken @@ -57,6 +57,9 @@ test_auxval_SOURCES = test-auxval.c test_getuserinfo_SOURCES = test-getuserinfo.c test_getuserinfo_LDADD = libtest.la $(LDADD) +rkvis_SOURCES = vis.c +rkvis_CPPFLAGS = -DTEST + test_detach_SOURCES = test-detach.c rkpty_LDADD = $(LIB_openpty) $(LDADD) diff --git a/lib/roken/vis.c b/lib/roken/vis.c index 19ff29d95..c2c21a9ce 100644 --- a/lib/roken/vis.c +++ b/lib/roken/vis.c @@ -55,48 +55,109 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#if 1 +#define _DEFAULT_SOURCE #include #include "roken.h" +#ifdef TEST +#include "getarg.h" +#endif #ifndef _DIAGASSERT #define _DIAGASSERT(X) #endif -#else /* heimdal */ -#include -#if defined(LIBC_SCCS) && !defined(lint) -__RCSID("$NetBSD: vis.c,v 1.37 2008/07/25 22:29:23 dsl Exp $"); -#endif /* LIBC_SCCS and not lint */ - -#include "namespace.h" -#endif /* heimdal */ #include - #include #include +#ifdef TEST +#include +#endif #include #include #include -#include #include - -#if 0 -#ifdef __weak_alias -__weak_alias(strsvis,_strsvis) -__weak_alias(strsvisx,_strsvisx) -__weak_alias(strvis,_strvis) -__weak_alias(strvisx,_strvisx) -__weak_alias(svis,_svis) -__weak_alias(vis,_vis) -#endif -#endif +#include #if !HAVE_VIS || !HAVE_SVIS #include #include #include #include +#endif +#if !HAVE_VIS || !HAVE_SVIS || TEST +/* + * We use makextralist() in main(), so we need it even if we have all the VIS + * routines in the host's C libraries. + */ + +/* 5 is for VIS_SP, VIS_TAB, VIS_NL, VIS_DQ, and VIS_NOSLASH */ +#define MAXEXTRAS (sizeof(char_glob) - 1 + sizeof(char_shell) - 1 + 5) + +#ifndef VIS_SHELL +#define VIS_SHELL 0x2000 +#endif +#ifndef VIS_GLOB +#define VIS_GLOB 0x0100 +#endif + +#ifndef VIS_SP +#define VIS_SP 0x0004 /* also encode space */ +#endif +#ifndef VIS_TAB +#define VIS_TAB 0x0008 /* also encode tab */ +#endif +#ifndef VIS_NL +#define VIS_NL 0x0010 /* also encode newline */ +#endif +#ifndef VIS_WHITE +#define VIS_WHITE (VIS_SP | VIS_TAB | VIS_NL) +#endif +#ifndef VIS_SAFE +#define VIS_SAFE 0x0020 /* only encode "unsafe" characters */ +#endif +#ifndef VIS_DQ +#define VIS_DQ 0x8000 /* also encode double quotes */ +#endif + + +/* + * Expand list of extra characters to not visually encode. + */ +static char * +makeextralist(int flags, const char *src) +{ + static const char char_glob[] = "*?[#"; + static const char char_shell[] = "'`\";&<>()|{}]\\$!^~"; + char *dst, *d; + size_t len; + + len = strlen(src); + if ((dst = d = calloc(1, len + MAXEXTRAS + 1)) == NULL) + return NULL; + + memcpy(dst, src, len); + d += len; + + if (flags & VIS_GLOB) { + memcpy(d, char_glob, sizeof(char_glob) - 1); + d += sizeof(char_glob) - 1; + } + if (flags & VIS_SHELL) { + memcpy(d, char_shell, sizeof(char_shell) - 1); + d += sizeof(char_shell) - 1; + } + + if (flags & VIS_SP) *d++ = ' '; + if (flags & VIS_TAB) *d++ = '\t'; + if (flags & VIS_NL) *d++ = '\n'; + if (flags & VIS_DQ) *d++ = '"'; + if ((flags & VIS_NOSLASH) == 0) *d++ = '\\'; + + return dst; +} +#endif + +#if !HAVE_VIS || !HAVE_SVIS static char *do_svis(char *, int, int, int, const char *); #undef BELL @@ -119,40 +180,19 @@ ROKEN_LIB_FUNCTION int ROKEN_LIB_CALL ROKEN_LIB_FUNCTION int ROKEN_LIB_CALL rk_strsvisx (char *, const char *, size_t, int, const char *); - #define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7') #define iswhite(c) (c == ' ' || c == '\t' || c == '\n') #define issafe(c) (c == '\b' || c == BELL || c == '\r') #define xtoa(c) "0123456789abcdef"[c] -#define MAXEXTRAS 5 - -#define MAKEEXTRALIST(flag, extra, orig_str) \ -do { \ - const char *orig = orig_str; \ - const char *o = orig; \ - char *e; \ - while (*o++) \ - continue; \ - extra = malloc((size_t)((o - orig) + MAXEXTRAS)); \ - if (!extra) break; \ - for (o = orig, e = extra; (*e++ = *o++) != '\0';) \ - continue; \ - e--; \ - if (flag & VIS_SP) *e++ = ' '; \ - if (flag & VIS_TAB) *e++ = '\t'; \ - if (flag & VIS_NL) *e++ = '\n'; \ - if ((flag & VIS_NOSLASH) == 0) *e++ = '\\'; \ - *e = '\0'; \ -} while (/*CONSTCOND*/0) - /* * This is do_hvis, for HTTP style (RFC 1808) */ static char * do_hvis(char *dst, int c, int flag, int nextc, const char *extra) { - if (!isascii(c) || !isalnum(c) || strchr("$-_.+!*'(),", c) != NULL) { + if (!isascii(c) || !isalnum(c) || strchr("$-_.+!*'(),", c) != NULL || + strchr(extra, c)) { *dst++ = '%'; *dst++ = xtoa(((unsigned int)c >> 4) & 0xf); *dst++ = xtoa((unsigned int)c & 0xf); @@ -256,7 +296,7 @@ rk_svis(char *dst, int c, int flag, int nextc, const char *extra) _DIAGASSERT(dst != NULL); _DIAGASSERT(extra != NULL); - MAKEEXTRALIST(flag, nextra, extra); + nextra = makeextralist(flag, extra); if (!nextra) { *dst = '\0'; /* can't create nextra, return "" */ return dst; @@ -298,7 +338,7 @@ rk_strsvis(char *dst, const char *csrc, int flag, const char *extra) _DIAGASSERT(dst != NULL); _DIAGASSERT(src != NULL); _DIAGASSERT(extra != NULL); - MAKEEXTRALIST(flag, nextra, extra); + nextra = makeextralist(flag, extra); if (!nextra) { *dst = '\0'; /* can't create nextra, return "" */ return 0; @@ -327,7 +367,7 @@ rk_strsvisx(char *dst, const char *csrc, size_t len, int flag, const char *extra _DIAGASSERT(dst != NULL); _DIAGASSERT(src != NULL); _DIAGASSERT(extra != NULL); - MAKEEXTRALIST(flag, nextra, extra); + nextra = makeextralist(flag, extra); if (! nextra) { *dst = '\0'; /* can't create nextra, return "" */ return 0; @@ -362,7 +402,7 @@ rk_vis(char *dst, int c, int flag, int nextc) _DIAGASSERT(dst != NULL); - MAKEEXTRALIST(flag, extra, ""); + extra = makeextralist(flag, ""); if (! extra) { *dst = '\0'; /* can't create extra, return "" */ return dst; @@ -393,7 +433,7 @@ rk_strvis(char *dst, const char *src, int flag) char *extra = NULL; int rv; - MAKEEXTRALIST(flag, extra, ""); + extra = makeextralist(flag, ""); if (!extra) { *dst = '\0'; /* can't create extra, return "" */ return 0; @@ -410,7 +450,7 @@ rk_strvisx(char *dst, const char *src, size_t len, int flag) char *extra = NULL; int rv; - MAKEEXTRALIST(flag, extra, ""); + extra = makeextralist(flag, ""); if (!extra) { *dst = '\0'; /* can't create extra, return "" */ return 0; @@ -420,3 +460,142 @@ rk_strvisx(char *dst, const char *src, size_t len, int flag) return rv; } #endif + +#ifdef TEST +static const char *extra_arg = ""; +static int cstyle_flag; +static int glob_flag; +static int help_flag; +static int http_flag; +static int httponly_flag; +static int line_flag; +static int octal_flag; +static int safe_flag; +static int shell_flag; +static int stdin_flag; +static int tab_flag; +static int whitespace_flag; + +/* + * The short options are compatible with a subset of the FreeBSD contrib + * vis(1). Heimdal additions have long option names only. + */ +static struct getargs args[] = { + { "c", 'C', arg_flag, &cstyle_flag, "C style", "C style" }, + { "extra", 'e', arg_string, &extra_arg, "also encode extra", "also encode extra"}, + { "glob", 'g', arg_flag, &glob_flag, "escape glob specials", "escape glob specials" }, + { "help", 0, arg_flag, &help_flag, "help", "help"}, + { "line", 0, arg_flag, &line_flag, "read and escape stdin without escaping newlines", NULL }, + { "octal", 'o', arg_flag, &octal_flag, "octal escape", "octal escape" }, + { "safe", 's', arg_flag, &safe_flag, "only encode \"unsafe\" characters", "only encode \"unsafe\" characters" }, + { "shell", 'S', arg_flag, &shell_flag, "encode shell meta-characters", "encode shell meta-characters" }, + { "stdin", 0, arg_flag, &stdin_flag, "read and escape stdin", NULL }, + { "tab", 't', arg_flag, &tab_flag, "encode tabs", "encode tabs" }, + { "url", 'h', arg_flag, &http_flag, "url escape", "url escape" }, + { "url-only", 0, arg_flag, &httponly_flag, "url escape", "url escape" }, + { "whitespace", 'w', arg_flag, &whitespace_flag, "encode whitespace", "encode whitespace" }, + { 0, 0, 0, 0, 0, 0} +}; +static size_t num_args = sizeof(args)/sizeof(args[0]); + +int +main(int argc, char **argv) +{ + size_t sz = 0; + char *nextra = NULL; + char *s = NULL; + int goptind = 0; + int flags = 0; + + setprogname("vis"); + if (getarg(args, num_args, argc, argv, &goptind) || help_flag) { + arg_printusage(args, num_args, NULL, "strings..."); + return help_flag ? 0 : 1; + } + + argc -= goptind; + argv += goptind; + + if (argc == 0 && !stdin_flag && !line_flag) { + arg_printusage(args, num_args, NULL, "strings..."); + return 1; + } + + if (http_flag && cstyle_flag) + errx(1, "--http and --cstyle are mutually exclusive"); + + flags |= cstyle_flag ? VIS_CSTYLE : 0; + flags |= http_flag ? VIS_HTTPSTYLE : 0; + flags |= httponly_flag ? VIS_HTTPSTYLE | VIS_NOESCAPE : 0; + flags |= octal_flag ? VIS_OCTAL : 0; + flags |= safe_flag ? VIS_SAFE : 0; + flags |= tab_flag ? VIS_TAB : 0; + flags |= whitespace_flag ? VIS_WHITE : 0; + + if ((nextra = makeextralist(flags, extra_arg)) == NULL) + err(1, "Out of memory"); + + while (argc) { + size_t len = strlen(argv[0]); + + if (len > sz) { + char *tmp; + + if ((tmp = realloc(s, 4 * len + 1)) == NULL) + err(2, "Out of memory"); + s = tmp; + sz = 4 * len + 1; + } + strsvis(s, argv[0], flags, nextra); + printf("%s\n", s); + argc--; + } + if (line_flag) { + ssize_t nbytes; + size_t linesz = 0; + char *line = NULL; + + while (!feof(stdin) && + (nbytes = getline(&line, &linesz, stdin)) > 0) { + int nl = 0; + + if (4 * (size_t)nbytes + 2 > sz) { + char *tmp; + + if ((tmp = realloc(s, 4 * nbytes + 2)) == NULL) + err(2, "Out of memory"); + s = tmp; + sz = 4 * nbytes + 2; + } + if (line[nbytes - 1] == '\n') { + line[nbytes - 1] = '\0'; + nl = 1; + } else { + nl = 0; + } + strsvis(s, line, flags, nextra); + printf("%s%s", s, nl ? "\n" : ""); + } + fflush(stdout); + if (ferror(stdin)) + errx(2, "I/O error"); + } else if (stdin_flag) { + size_t nbytes; + char buf[2048 + 1]; + char vbuf[4 * (sizeof(buf) - 1) + 1]; + + while (!feof(stdin) && + (nbytes = fread(buf, 1, sizeof(buf) - 1, stdin))) { + buf[nbytes] = '\0'; + strsvis(vbuf, buf, flags, nextra); + printf("%s", vbuf); + } + fflush(stdout); + if (ferror(stdin)) + errx(2, "I/O error"); + } + + free(nextra); + return 0; +} +#endif diff --git a/lib/roken/vis.hin b/lib/roken/vis.hin index 25d662a98..f1027f524 100644 --- a/lib/roken/vis.hin +++ b/lib/roken/vis.hin @@ -51,24 +51,43 @@ /* * to select alternate encoding format */ -#define VIS_OCTAL 0x01 /* use octal \ddd format */ -#define VIS_CSTYLE 0x02 /* use \[nrft0..] where appropiate */ +#define VIS_OCTAL 0x0001 /* use octal \ddd format */ +#define VIS_CSTYLE 0x0002 /* use \[nrft0..] where appropiate */ /* * to alter set of characters encoded (default is to encode all * non-graphic except space, tab, and newline). */ -#define VIS_SP 0x04 /* also encode space */ -#define VIS_TAB 0x08 /* also encode tab */ -#define VIS_NL 0x10 /* also encode newline */ -#define VIS_WHITE (VIS_SP | VIS_TAB | VIS_NL) -#define VIS_SAFE 0x20 /* only encode "unsafe" characters */ +#define VIS_SP 0x0004 /* also encode space */ +#define VIS_TAB 0x0008 /* also encode tab */ +#define VIS_NL 0x0010 /* also encode newline */ +#define VIS_WHITE (VIS_SP | VIS_TAB | VIS_NL) +#define VIS_SAFE 0x0020 /* only encode "unsafe" characters */ +#define VIS_DQ 0x8000 /* also encode double quotes */ /* * other */ -#define VIS_NOSLASH 0x40 /* inhibit printing '\' */ -#define VIS_HTTPSTYLE 0x80 /* http-style escape % HEX HEX */ +#define VIS_NOSLASH 0x0040 /* inhibit printing '\' */ +#define VIS_HTTPSTYLE 0x0080 /* http-style escape % HEX HEX */ +#define VIS_HTTP1808 0x0080 /* http-style escape % hex hex */ +#define VIS_GLOB 0x0100 /* encode glob(3) magic characters */ +#if 0 +/* + * Not yet. FreeBSD calls 0x0400 VIS_HTTP1866, but if that means RFC1866, + * that's HTML 2.0, not HTTP. Argh. + */ +#define VIS_MIMESTYLE 0x0200 /* mime-style escape = HEX HEX */ +#define VIS_HTML1866 0x0400 /* HTML-style &#num; or &string; */ +#endif +#define VIS_NOESCAPE 0x0800 /* don't decode `\'; URL-encode only when given + * with VIS_HTTPSTYLE */ +#if 0 +#define _VIS_END 0x1000 /* for unvis */ +#endif +#define VIS_SHELL 0x2000 /* encode shell special characters [not glob] */ +#define VIS_META (VIS_WHITE | VIS_GLOB | VIS_SHELL) +#define VIS_NOLOCALE 0x4000 /* encode using the C locale */ /* * unvis return codes