roken: Add rkvis program for test scripts

This will help programs that need to URL-escape strings.

Also, this changes `do_hvis()` to not fallback on `do_svis()` for chars in
`extra` -- that `do_hvis()` was doing that seems like an oversight.  Christos
Zoulas, of NetBSD, agrees.  `do_hvis()` still falls back on `do_svis()` for
characters not in the RFC 1808 / 3986 to-be-escaped set *and* characters not in
the `extra` set -- that much seems to have been the intent.
This commit is contained in:
Nicolas Williams
2019-11-07 23:56:35 -06:00
parent 4981cfc420
commit 56c5f5909e
3 changed files with 262 additions and 61 deletions

View File

@@ -20,7 +20,7 @@ if HAVE_DBHEADER
AM_CPPFLAGS += -I$(DBHEADER)
endif
noinst_PROGRAMS = snprintf-test resolve-test rkpty test-detach test-auxval
noinst_PROGRAMS = snprintf-test resolve-test rkpty test-detach test-auxval rkvis
CHECK_LOCAL = snprintf-test resolve-test rkpty make-roken
@@ -57,6 +57,9 @@ test_auxval_SOURCES = test-auxval.c
test_getuserinfo_SOURCES = test-getuserinfo.c
test_getuserinfo_LDADD = libtest.la $(LDADD)
rkvis_SOURCES = vis.c
rkvis_CPPFLAGS = -DTEST
test_detach_SOURCES = test-detach.c
rkpty_LDADD = $(LIB_openpty) $(LDADD)

View File

@@ -55,48 +55,109 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#if 1
#define _DEFAULT_SOURCE
#include <config.h>
#include "roken.h"
#ifdef TEST
#include "getarg.h"
#endif
#ifndef _DIAGASSERT
#define _DIAGASSERT(X)
#endif
#else /* heimdal */
#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
__RCSID("$NetBSD: vis.c,v 1.37 2008/07/25 22:29:23 dsl Exp $");
#endif /* LIBC_SCCS and not lint */
#include "namespace.h"
#endif /* heimdal */
#include <sys/types.h>
#include <assert.h>
#include <ctype.h>
#ifdef TEST
#include <err.h>
#endif
#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <vis.h>
#include <stdlib.h>
#if 0
#ifdef __weak_alias
__weak_alias(strsvis,_strsvis)
__weak_alias(strsvisx,_strsvisx)
__weak_alias(strvis,_strvis)
__weak_alias(strvisx,_strvisx)
__weak_alias(svis,_svis)
__weak_alias(vis,_vis)
#endif
#endif
#include <vis.h>
#if !HAVE_VIS || !HAVE_SVIS
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <string.h>
#endif
#if !HAVE_VIS || !HAVE_SVIS || TEST
/*
* We use makextralist() in main(), so we need it even if we have all the VIS
* routines in the host's C libraries.
*/
/* 5 is for VIS_SP, VIS_TAB, VIS_NL, VIS_DQ, and VIS_NOSLASH */
#define MAXEXTRAS (sizeof(char_glob) - 1 + sizeof(char_shell) - 1 + 5)
#ifndef VIS_SHELL
#define VIS_SHELL 0x2000
#endif
#ifndef VIS_GLOB
#define VIS_GLOB 0x0100
#endif
#ifndef VIS_SP
#define VIS_SP 0x0004 /* also encode space */
#endif
#ifndef VIS_TAB
#define VIS_TAB 0x0008 /* also encode tab */
#endif
#ifndef VIS_NL
#define VIS_NL 0x0010 /* also encode newline */
#endif
#ifndef VIS_WHITE
#define VIS_WHITE (VIS_SP | VIS_TAB | VIS_NL)
#endif
#ifndef VIS_SAFE
#define VIS_SAFE 0x0020 /* only encode "unsafe" characters */
#endif
#ifndef VIS_DQ
#define VIS_DQ 0x8000 /* also encode double quotes */
#endif
/*
* Expand list of extra characters to not visually encode.
*/
static char *
makeextralist(int flags, const char *src)
{
static const char char_glob[] = "*?[#";
static const char char_shell[] = "'`\";&<>()|{}]\\$!^~";
char *dst, *d;
size_t len;
len = strlen(src);
if ((dst = d = calloc(1, len + MAXEXTRAS + 1)) == NULL)
return NULL;
memcpy(dst, src, len);
d += len;
if (flags & VIS_GLOB) {
memcpy(d, char_glob, sizeof(char_glob) - 1);
d += sizeof(char_glob) - 1;
}
if (flags & VIS_SHELL) {
memcpy(d, char_shell, sizeof(char_shell) - 1);
d += sizeof(char_shell) - 1;
}
if (flags & VIS_SP) *d++ = ' ';
if (flags & VIS_TAB) *d++ = '\t';
if (flags & VIS_NL) *d++ = '\n';
if (flags & VIS_DQ) *d++ = '"';
if ((flags & VIS_NOSLASH) == 0) *d++ = '\\';
return dst;
}
#endif
#if !HAVE_VIS || !HAVE_SVIS
static char *do_svis(char *, int, int, int, const char *);
#undef BELL
@@ -119,40 +180,19 @@ ROKEN_LIB_FUNCTION int ROKEN_LIB_CALL
ROKEN_LIB_FUNCTION int ROKEN_LIB_CALL
rk_strsvisx (char *, const char *, size_t, int, const char *);
#define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
#define iswhite(c) (c == ' ' || c == '\t' || c == '\n')
#define issafe(c) (c == '\b' || c == BELL || c == '\r')
#define xtoa(c) "0123456789abcdef"[c]
#define MAXEXTRAS 5
#define MAKEEXTRALIST(flag, extra, orig_str) \
do { \
const char *orig = orig_str; \
const char *o = orig; \
char *e; \
while (*o++) \
continue; \
extra = malloc((size_t)((o - orig) + MAXEXTRAS)); \
if (!extra) break; \
for (o = orig, e = extra; (*e++ = *o++) != '\0';) \
continue; \
e--; \
if (flag & VIS_SP) *e++ = ' '; \
if (flag & VIS_TAB) *e++ = '\t'; \
if (flag & VIS_NL) *e++ = '\n'; \
if ((flag & VIS_NOSLASH) == 0) *e++ = '\\'; \
*e = '\0'; \
} while (/*CONSTCOND*/0)
/*
* This is do_hvis, for HTTP style (RFC 1808)
*/
static char *
do_hvis(char *dst, int c, int flag, int nextc, const char *extra)
{
if (!isascii(c) || !isalnum(c) || strchr("$-_.+!*'(),", c) != NULL) {
if (!isascii(c) || !isalnum(c) || strchr("$-_.+!*'(),", c) != NULL ||
strchr(extra, c)) {
*dst++ = '%';
*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
*dst++ = xtoa((unsigned int)c & 0xf);
@@ -256,7 +296,7 @@ rk_svis(char *dst, int c, int flag, int nextc, const char *extra)
_DIAGASSERT(dst != NULL);
_DIAGASSERT(extra != NULL);
MAKEEXTRALIST(flag, nextra, extra);
nextra = makeextralist(flag, extra);
if (!nextra) {
*dst = '\0'; /* can't create nextra, return "" */
return dst;
@@ -298,7 +338,7 @@ rk_strsvis(char *dst, const char *csrc, int flag, const char *extra)
_DIAGASSERT(dst != NULL);
_DIAGASSERT(src != NULL);
_DIAGASSERT(extra != NULL);
MAKEEXTRALIST(flag, nextra, extra);
nextra = makeextralist(flag, extra);
if (!nextra) {
*dst = '\0'; /* can't create nextra, return "" */
return 0;
@@ -327,7 +367,7 @@ rk_strsvisx(char *dst, const char *csrc, size_t len, int flag, const char *extra
_DIAGASSERT(dst != NULL);
_DIAGASSERT(src != NULL);
_DIAGASSERT(extra != NULL);
MAKEEXTRALIST(flag, nextra, extra);
nextra = makeextralist(flag, extra);
if (! nextra) {
*dst = '\0'; /* can't create nextra, return "" */
return 0;
@@ -362,7 +402,7 @@ rk_vis(char *dst, int c, int flag, int nextc)
_DIAGASSERT(dst != NULL);
MAKEEXTRALIST(flag, extra, "");
extra = makeextralist(flag, "");
if (! extra) {
*dst = '\0'; /* can't create extra, return "" */
return dst;
@@ -393,7 +433,7 @@ rk_strvis(char *dst, const char *src, int flag)
char *extra = NULL;
int rv;
MAKEEXTRALIST(flag, extra, "");
extra = makeextralist(flag, "");
if (!extra) {
*dst = '\0'; /* can't create extra, return "" */
return 0;
@@ -410,7 +450,7 @@ rk_strvisx(char *dst, const char *src, size_t len, int flag)
char *extra = NULL;
int rv;
MAKEEXTRALIST(flag, extra, "");
extra = makeextralist(flag, "");
if (!extra) {
*dst = '\0'; /* can't create extra, return "" */
return 0;
@@ -420,3 +460,142 @@ rk_strvisx(char *dst, const char *src, size_t len, int flag)
return rv;
}
#endif
#ifdef TEST
static const char *extra_arg = "";
static int cstyle_flag;
static int glob_flag;
static int help_flag;
static int http_flag;
static int httponly_flag;
static int line_flag;
static int octal_flag;
static int safe_flag;
static int shell_flag;
static int stdin_flag;
static int tab_flag;
static int whitespace_flag;
/*
* The short options are compatible with a subset of the FreeBSD contrib
* vis(1). Heimdal additions have long option names only.
*/
static struct getargs args[] = {
{ "c", 'C', arg_flag, &cstyle_flag, "C style", "C style" },
{ "extra", 'e', arg_string, &extra_arg, "also encode extra", "also encode extra"},
{ "glob", 'g', arg_flag, &glob_flag, "escape glob specials", "escape glob specials" },
{ "help", 0, arg_flag, &help_flag, "help", "help"},
{ "line", 0, arg_flag, &line_flag, "read and escape stdin without escaping newlines", NULL },
{ "octal", 'o', arg_flag, &octal_flag, "octal escape", "octal escape" },
{ "safe", 's', arg_flag, &safe_flag, "only encode \"unsafe\" characters", "only encode \"unsafe\" characters" },
{ "shell", 'S', arg_flag, &shell_flag, "encode shell meta-characters", "encode shell meta-characters" },
{ "stdin", 0, arg_flag, &stdin_flag, "read and escape stdin", NULL },
{ "tab", 't', arg_flag, &tab_flag, "encode tabs", "encode tabs" },
{ "url", 'h', arg_flag, &http_flag, "url escape", "url escape" },
{ "url-only", 0, arg_flag, &httponly_flag, "url escape", "url escape" },
{ "whitespace", 'w', arg_flag, &whitespace_flag, "encode whitespace", "encode whitespace" },
{ 0, 0, 0, 0, 0, 0}
};
static size_t num_args = sizeof(args)/sizeof(args[0]);
int
main(int argc, char **argv)
{
size_t sz = 0;
char *nextra = NULL;
char *s = NULL;
int goptind = 0;
int flags = 0;
setprogname("vis");
if (getarg(args, num_args, argc, argv, &goptind) || help_flag) {
arg_printusage(args, num_args, NULL, "strings...");
return help_flag ? 0 : 1;
}
argc -= goptind;
argv += goptind;
if (argc == 0 && !stdin_flag && !line_flag) {
arg_printusage(args, num_args, NULL, "strings...");
return 1;
}
if (http_flag && cstyle_flag)
errx(1, "--http and --cstyle are mutually exclusive");
flags |= cstyle_flag ? VIS_CSTYLE : 0;
flags |= http_flag ? VIS_HTTPSTYLE : 0;
flags |= httponly_flag ? VIS_HTTPSTYLE | VIS_NOESCAPE : 0;
flags |= octal_flag ? VIS_OCTAL : 0;
flags |= safe_flag ? VIS_SAFE : 0;
flags |= tab_flag ? VIS_TAB : 0;
flags |= whitespace_flag ? VIS_WHITE : 0;
if ((nextra = makeextralist(flags, extra_arg)) == NULL)
err(1, "Out of memory");
while (argc) {
size_t len = strlen(argv[0]);
if (len > sz) {
char *tmp;
if ((tmp = realloc(s, 4 * len + 1)) == NULL)
err(2, "Out of memory");
s = tmp;
sz = 4 * len + 1;
}
strsvis(s, argv[0], flags, nextra);
printf("%s\n", s);
argc--;
}
if (line_flag) {
ssize_t nbytes;
size_t linesz = 0;
char *line = NULL;
while (!feof(stdin) &&
(nbytes = getline(&line, &linesz, stdin)) > 0) {
int nl = 0;
if (4 * (size_t)nbytes + 2 > sz) {
char *tmp;
if ((tmp = realloc(s, 4 * nbytes + 2)) == NULL)
err(2, "Out of memory");
s = tmp;
sz = 4 * nbytes + 2;
}
if (line[nbytes - 1] == '\n') {
line[nbytes - 1] = '\0';
nl = 1;
} else {
nl = 0;
}
strsvis(s, line, flags, nextra);
printf("%s%s", s, nl ? "\n" : "");
}
fflush(stdout);
if (ferror(stdin))
errx(2, "I/O error");
} else if (stdin_flag) {
size_t nbytes;
char buf[2048 + 1];
char vbuf[4 * (sizeof(buf) - 1) + 1];
while (!feof(stdin) &&
(nbytes = fread(buf, 1, sizeof(buf) - 1, stdin))) {
buf[nbytes] = '\0';
strsvis(vbuf, buf, flags, nextra);
printf("%s", vbuf);
}
fflush(stdout);
if (ferror(stdin))
errx(2, "I/O error");
}
free(nextra);
return 0;
}
#endif

View File

@@ -51,24 +51,43 @@
/*
* to select alternate encoding format
*/
#define VIS_OCTAL 0x01 /* use octal \ddd format */
#define VIS_CSTYLE 0x02 /* use \[nrft0..] where appropiate */
#define VIS_OCTAL 0x0001 /* use octal \ddd format */
#define VIS_CSTYLE 0x0002 /* use \[nrft0..] where appropiate */
/*
* to alter set of characters encoded (default is to encode all
* non-graphic except space, tab, and newline).
*/
#define VIS_SP 0x04 /* also encode space */
#define VIS_TAB 0x08 /* also encode tab */
#define VIS_NL 0x10 /* also encode newline */
#define VIS_WHITE (VIS_SP | VIS_TAB | VIS_NL)
#define VIS_SAFE 0x20 /* only encode "unsafe" characters */
#define VIS_SP 0x0004 /* also encode space */
#define VIS_TAB 0x0008 /* also encode tab */
#define VIS_NL 0x0010 /* also encode newline */
#define VIS_WHITE (VIS_SP | VIS_TAB | VIS_NL)
#define VIS_SAFE 0x0020 /* only encode "unsafe" characters */
#define VIS_DQ 0x8000 /* also encode double quotes */
/*
* other
*/
#define VIS_NOSLASH 0x40 /* inhibit printing '\' */
#define VIS_HTTPSTYLE 0x80 /* http-style escape % HEX HEX */
#define VIS_NOSLASH 0x0040 /* inhibit printing '\' */
#define VIS_HTTPSTYLE 0x0080 /* http-style escape % HEX HEX */
#define VIS_HTTP1808 0x0080 /* http-style escape % hex hex */
#define VIS_GLOB 0x0100 /* encode glob(3) magic characters */
#if 0
/*
* Not yet. FreeBSD calls 0x0400 VIS_HTTP1866, but if that means RFC1866,
* that's HTML 2.0, not HTTP. Argh.
*/
#define VIS_MIMESTYLE 0x0200 /* mime-style escape = HEX HEX */
#define VIS_HTML1866 0x0400 /* HTML-style &#num; or &string; */
#endif
#define VIS_NOESCAPE 0x0800 /* don't decode `\'; URL-encode only when given
* with VIS_HTTPSTYLE */
#if 0
#define _VIS_END 0x1000 /* for unvis */
#endif
#define VIS_SHELL 0x2000 /* encode shell special characters [not glob] */
#define VIS_META (VIS_WHITE | VIS_GLOB | VIS_SHELL)
#define VIS_NOLOCALE 0x4000 /* encode using the C locale */
/*
* unvis return codes