From f5b9ec280e1b5b0186e0985b927f5ddc4a965558 Mon Sep 17 00:00:00 2001 From: Viktor Dukhovni Date: Tue, 15 Nov 2016 00:57:53 -0500 Subject: [PATCH] Import (unmodified) updated upstream vis/unvis Will not compile, full integration in next commit. --- lib/libedit/src/unvis.c | 106 ++++-- lib/libedit/src/vis.c | 818 ++++++++++++++++++++++++++++------------ lib/libedit/src/vis.h | 59 ++- 3 files changed, 694 insertions(+), 289 deletions(-) diff --git a/lib/libedit/src/unvis.c b/lib/libedit/src/unvis.c index 7a66000a2..30fad8154 100644 --- a/lib/libedit/src/unvis.c +++ b/lib/libedit/src/unvis.c @@ -1,4 +1,4 @@ -/* $NetBSD: unvis.c,v 1.32 2010/11/27 21:22:11 christos Exp $ */ +/* $NetBSD: unvis.c,v 1.44 2014/09/26 15:43:36 roy Exp $ */ /*- * Copyright (c) 1989, 1993 @@ -29,30 +29,27 @@ * SUCH DAMAGE. */ -#include "config.h" +#include #if defined(LIBC_SCCS) && !defined(lint) #if 0 static char sccsid[] = "@(#)unvis.c 8.1 (Berkeley) 6/4/93"; #else -__RCSID("$NetBSD: unvis.c,v 1.32 2010/11/27 21:22:11 christos Exp $"); +__RCSID("$NetBSD: unvis.c,v 1.44 2014/09/26 15:43:36 roy Exp $"); #endif #endif /* LIBC_SCCS and not lint */ +#include "namespace.h" #include -#ifdef HAVE_STDINT_H -#include -#endif - #include #include +#include #include +#include #include -#ifdef _LIBC #ifdef __weak_alias -__weak_alias(strunvis,_strunvis) -#endif +__weak_alias(strnunvisx,_strnunvisx) #endif #if !HAVE_VIS @@ -66,24 +63,25 @@ __weak_alias(strunvis,_strunvis) #define S_CTRL 4 /* control char started (^) */ #define S_OCTAL2 5 /* octal digit 2 */ #define S_OCTAL3 6 /* octal digit 3 */ -#define S_HEX1 7 /* http hex digit */ -#define S_HEX2 8 /* http hex digit 2 */ -#define S_MIME1 9 /* mime hex digit 1 */ -#define S_MIME2 10 /* mime hex digit 2 */ -#define S_EATCRNL 11 /* mime eating CRNL */ -#define S_AMP 12 /* seen & */ -#define S_NUMBER 13 /* collecting number */ -#define S_STRING 14 /* collecting string */ +#define S_HEX 7 /* mandatory hex digit */ +#define S_HEX1 8 /* http hex digit */ +#define S_HEX2 9 /* http hex digit 2 */ +#define S_MIME1 10 /* mime hex digit 1 */ +#define S_MIME2 11 /* mime hex digit 2 */ +#define S_EATCRNL 12 /* mime eating CRNL */ +#define S_AMP 13 /* seen & */ +#define S_NUMBER 14 /* collecting number */ +#define S_STRING 15 /* collecting string */ #define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7') -#define xtod(c) (isdigit(c) ? (c - '0') : ((tolower(c) - 'a') + 10)) -#define XTOD(c) (isdigit(c) ? (c - '0') : ((c - 'A') + 10)) +#define xtod(c) (isdigit(c) ? (c - '0') : ((tolower(c) - 'a') + 10)) +#define XTOD(c) (isdigit(c) ? (c - '0') : ((c - 'A') + 10)) /* * RFC 1866 */ static const struct nv { - const char *name; + char name[7]; uint8_t value; } nv[] = { { "AElig", 198 }, /* capital AE diphthong (ligature) */ @@ -300,6 +298,9 @@ unvis(char *cp, int c, int *astate, int flag) *cp = '\033'; *astate = SS(0, S_GROUND); return UNVIS_VALID; + case 'x': + *astate = SS(0, S_HEX); + return UNVIS_NOCHAR; case '\n': /* * hidden newline @@ -312,6 +313,12 @@ unvis(char *cp, int c, int *astate, int flag) */ *astate = SS(0, S_GROUND); return UNVIS_NOCHAR; + default: + if (isgraph(c)) { + *cp = c; + *astate = SS(0, S_GROUND); + return UNVIS_VALID; + } } goto bad; @@ -320,7 +327,7 @@ unvis(char *cp, int c, int *astate, int flag) *astate = SS(0, S_META1); else if (c == '^') *astate = SS(0, S_CTRL); - else + else goto bad; return UNVIS_NOCHAR; @@ -363,6 +370,10 @@ unvis(char *cp, int c, int *astate, int flag) */ return UNVIS_VALIDPUSH; + case S_HEX: + if (!isxdigit(uc)) + goto bad; + /*FALLTHROUGH*/ case S_HEX1: if (isxdigit(uc)) { *cp = xtod(uc); @@ -441,7 +452,7 @@ unvis(char *cp, int c, int *astate, int flag) break; } - if (*cp == __arraycount(nv)) + if (ia == __arraycount(nv)) goto bad; if (uc != 0) { @@ -473,47 +484,76 @@ unvis(char *cp, int c, int *astate, int flag) } /* - * strunvis - decode src into dst + * strnunvisx - decode src into dst * * Number of chars decoded into dst is returned, -1 on error. * Dst is null terminated. */ int -strunvisx(char *dst, const char *src, int flag) +strnunvisx(char *dst, size_t dlen, const char *src, int flag) { char c; - char *start = dst; + char t = '\0', *start = dst; int state = 0; _DIAGASSERT(src != NULL); _DIAGASSERT(dst != NULL); +#define CHECKSPACE() \ + do { \ + if (dlen-- == 0) { \ + errno = ENOSPC; \ + return -1; \ + } \ + } while (/*CONSTCOND*/0) while ((c = *src++) != '\0') { again: - switch (unvis(dst, c, &state, flag)) { + switch (unvis(&t, c, &state, flag)) { case UNVIS_VALID: - dst++; + CHECKSPACE(); + *dst++ = t; break; case UNVIS_VALIDPUSH: - dst++; + CHECKSPACE(); + *dst++ = t; goto again; case 0: case UNVIS_NOCHAR: break; + case UNVIS_SYNBAD: + errno = EINVAL; + return -1; default: - return (-1); + _DIAGASSERT(/*CONSTCOND*/0); + errno = EINVAL; + return -1; } } - if (unvis(dst, c, &state, UNVIS_END) == UNVIS_VALID) - dst++; + if (unvis(&t, c, &state, UNVIS_END) == UNVIS_VALID) { + CHECKSPACE(); + *dst++ = t; + } + CHECKSPACE(); *dst = '\0'; return (int)(dst - start); } +int +strunvisx(char *dst, const char *src, int flag) +{ + return strnunvisx(dst, (size_t)~0, src, flag); +} + int strunvis(char *dst, const char *src) { - return strunvisx(dst, src, 0); + return strnunvisx(dst, (size_t)~0, src, 0); +} + +int +strnunvis(char *dst, size_t dlen, const char *src) +{ + return strnunvisx(dst, dlen, src, 0); } #endif diff --git a/lib/libedit/src/vis.c b/lib/libedit/src/vis.c index 463ea67ad..b4a5f3291 100644 --- a/lib/libedit/src/vis.c +++ b/lib/libedit/src/vis.c @@ -1,4 +1,4 @@ -/* $NetBSD: vis.c,v 1.41 2009/11/23 10:08:47 plunky Exp $ */ +/* $NetBSD: vis.c,v 1.71 2016/01/14 20:41:23 christos Exp $ */ /*- * Copyright (c) 1989, 1993 @@ -55,27 +55,28 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "config.h" - +#include #if defined(LIBC_SCCS) && !defined(lint) -__RCSID("$NetBSD: vis.c,v 1.41 2009/11/23 10:08:47 plunky Exp $"); +__RCSID("$NetBSD: vis.c,v 1.71 2016/01/14 20:41:23 christos Exp $"); #endif /* LIBC_SCCS and not lint */ +#ifdef __FBSDID +__FBSDID("$FreeBSD$"); +#define _DIAGASSERT(x) assert(x) +#endif +#include "namespace.h" #include +#include #include #include +#include #include +#include +#include -#ifdef _LIBC #ifdef __weak_alias -__weak_alias(strsvis,_strsvis) -__weak_alias(strsvisx,_strsvisx) -__weak_alias(strvis,_strvis) __weak_alias(strvisx,_strvisx) -__weak_alias(svis,_svis) -__weak_alias(vis,_vis) -#endif #endif #if !HAVE_VIS || !HAVE_SVIS @@ -84,54 +85,93 @@ __weak_alias(vis,_vis) #include #include -static char *do_svis(char *, int, int, int, const char *); +/* + * The reason for going through the trouble to deal with character encodings + * in vis(3), is that we use this to safe encode output of commands. This + * safe encoding varies depending on the character set. For example if we + * display ps output in French, we don't want to display French characters + * as M-foo. + */ + +static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *); #undef BELL -#define BELL '\a' +#define BELL L'\a' + +#if defined(LC_C_LOCALE) +#define iscgraph(c) isgraph_l(c, LC_C_LOCALE) +#else +/* Keep it simple for now, no locale stuff */ +#define iscgraph(c) isgraph(c) +#ifdef notyet +#include +static int +iscgraph(int c) { + int rv; + char *ol; -#define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7') -#define iswhite(c) (c == ' ' || c == '\t' || c == '\n') -#define issafe(c) (c == '\b' || c == BELL || c == '\r') -#define xtoa(c) "0123456789abcdef"[c] -#define XTOA(c) "0123456789ABCDEF"[c] + ol = setlocale(LC_CTYPE, "C"); + rv = isgraph(c); + if (ol) + setlocale(LC_CTYPE, ol); + return rv; +} +#endif +#endif -#define MAXEXTRAS 5 +#define ISGRAPH(flags, c) \ + (((flags) & VIS_NOLOCALE) ? iscgraph(c) : iswgraph(c)) -#define MAKEEXTRALIST(flag, extra, orig_str) \ -do { \ - const char *orig = orig_str; \ - const char *o = orig; \ - char *e; \ - while (*o++) \ - continue; \ - extra = malloc((size_t)((o - orig) + MAXEXTRAS)); \ - if (!extra) break; \ - for (o = orig, e = extra; (*e++ = *o++) != '\0';) \ - continue; \ - e--; \ - if (flag & VIS_SP) *e++ = ' '; \ - if (flag & VIS_TAB) *e++ = '\t'; \ - if (flag & VIS_NL) *e++ = '\n'; \ - if ((flag & VIS_NOSLASH) == 0) *e++ = '\\'; \ - *e = '\0'; \ -} while (/*CONSTCOND*/0) +#define iswoctal(c) (((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7') +#define iswwhite(c) (c == L' ' || c == L'\t' || c == L'\n') +#define iswsafe(c) (c == L'\b' || c == BELL || c == L'\r') +#define xtoa(c) L"0123456789abcdef"[c] +#define XTOA(c) L"0123456789ABCDEF"[c] + +#define MAXEXTRAS 30 + +static const wchar_t char_shell[] = L"'`\";&<>()|{}]\\$!^~"; +static const wchar_t char_glob[] = L"*?[#"; + +#if !HAVE_NBTOOL_CONFIG_H +#ifndef __NetBSD__ +/* + * On NetBSD MB_LEN_MAX is currently 32 which does not fit on any integer + * integral type and it is probably wrong, since currently the maximum + * number of bytes and character needs is 6. Until this is fixed, the + * loops below are using sizeof(uint64_t) - 1 instead of MB_LEN_MAX, and + * the assertion is commented out. + */ +#ifdef __FreeBSD__ +/* + * On FreeBSD including for CTASSERT only works in kernel + * mode. + */ +#ifndef CTASSERT +#define CTASSERT(x) _CTASSERT(x, __LINE__) +#define _CTASSERT(x, y) __CTASSERT(x, y) +#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1] +#endif +#endif /* __FreeBSD__ */ +CTASSERT(MB_LEN_MAX <= sizeof(uint64_t)); +#endif /* !__NetBSD__ */ +#endif /* * This is do_hvis, for HTTP style (RFC 1808) */ -static char * -do_hvis(char *dst, int c, int flag, int nextc, const char *extra) +static wchar_t * +do_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra) { - - if ((isascii(c) && isalnum(c)) + if (iswalnum(c) /* safe */ - || c == '$' || c == '-' || c == '_' || c == '.' || c == '+' + || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+' /* extra */ - || c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' - || c == ',') { - dst = do_svis(dst, c, flag, nextc, extra); - } else { - *dst++ = '%'; + || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')' + || c == L',') + dst = do_svis(dst, c, flags, nextc, extra); + else { + *dst++ = L'%'; *dst++ = xtoa(((unsigned int)c >> 4) & 0xf); *dst++ = xtoa((unsigned int)c & 0xf); } @@ -143,22 +183,112 @@ do_hvis(char *dst, int c, int flag, int nextc, const char *extra) * This is do_mvis, for Quoted-Printable MIME (RFC 2045) * NB: No handling of long lines or CRLF. */ -static char * -do_mvis(char *dst, int c, int flag, int nextc, const char *extra) +static wchar_t * +do_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra) { - if ((c != '\n') && + if ((c != L'\n') && /* Space at the end of the line */ - ((isspace(c) && (nextc == '\r' || nextc == '\n')) || + ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) || /* Out of range */ - (!isspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) || + (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) || /* Specific char to be escaped */ - strchr("#$@[\\]^`{|}~", c) != NULL)) { - *dst++ = '='; + wcschr(L"#$@[\\]^`{|}~", c) != NULL)) { + *dst++ = L'='; *dst++ = XTOA(((unsigned int)c >> 4) & 0xf); *dst++ = XTOA((unsigned int)c & 0xf); - } else { - dst = do_svis(dst, c, flag, nextc, extra); + } else + dst = do_svis(dst, c, flags, nextc, extra); + return dst; +} + +/* + * Output single byte of multibyte character. + */ +static wchar_t * +do_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra) +{ + if (flags & VIS_CSTYLE) { + switch (c) { + case L'\n': + *dst++ = L'\\'; *dst++ = L'n'; + return dst; + case L'\r': + *dst++ = L'\\'; *dst++ = L'r'; + return dst; + case L'\b': + *dst++ = L'\\'; *dst++ = L'b'; + return dst; + case BELL: + *dst++ = L'\\'; *dst++ = L'a'; + return dst; + case L'\v': + *dst++ = L'\\'; *dst++ = L'v'; + return dst; + case L'\t': + *dst++ = L'\\'; *dst++ = L't'; + return dst; + case L'\f': + *dst++ = L'\\'; *dst++ = L'f'; + return dst; + case L' ': + *dst++ = L'\\'; *dst++ = L's'; + return dst; + case L'\0': + *dst++ = L'\\'; *dst++ = L'0'; + if (iswoctal(nextc)) { + *dst++ = L'0'; + *dst++ = L'0'; + } + return dst; + /* We cannot encode these characters in VIS_CSTYLE + * because they special meaning */ + case L'n': + case L'r': + case L'b': + case L'a': + case L'v': + case L't': + case L'f': + case L's': + case L'0': + case L'M': + case L'^': + case L'$': /* vis(1) -l */ + break; + default: + if (ISGRAPH(flags, c) && !iswoctal(c)) { + *dst++ = L'\\'; + *dst++ = c; + return dst; + } + } } + if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) { + *dst++ = L'\\'; + *dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0'; + *dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0'; + *dst++ = (c & 07) + L'0'; + } else { + if ((flags & VIS_NOSLASH) == 0) + *dst++ = L'\\'; + + if (c & 0200) { + c &= 0177; + *dst++ = L'M'; + } + + if (iswcntrl(c)) { + *dst++ = L'^'; + if (c == 0177) + *dst++ = L'?'; + else + *dst++ = c + L'@'; + } else { + *dst++ = L'-'; + *dst++ = c; + } + } + return dst; } @@ -166,191 +296,375 @@ do_mvis(char *dst, int c, int flag, int nextc, const char *extra) * This is do_vis, the central code of vis. * dst: Pointer to the destination buffer * c: Character to encode - * flag: Flag word + * flags: Flags word * nextc: The character following 'c' * extra: Pointer to the list of extra characters to be * backslash-protected. */ -static char * -do_svis(char *dst, int c, int flag, int nextc, const char *extra) +static wchar_t * +do_svis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra) { - int isextra; - isextra = strchr(extra, c) != NULL; - if (!isextra && isascii(c) && (isgraph(c) || iswhite(c) || - ((flag & VIS_SAFE) && issafe(c)))) { + int iswextra, i, shft; + uint64_t bmsk, wmsk; + + iswextra = wcschr(extra, c) != NULL; + if (!iswextra && (ISGRAPH(flags, c) || iswwhite(c) || + ((flags & VIS_SAFE) && iswsafe(c)))) { *dst++ = c; return dst; } - if (flag & VIS_CSTYLE) { - switch (c) { - case '\n': - *dst++ = '\\'; *dst++ = 'n'; - return dst; - case '\r': - *dst++ = '\\'; *dst++ = 'r'; - return dst; - case '\b': - *dst++ = '\\'; *dst++ = 'b'; - return dst; - case BELL: - *dst++ = '\\'; *dst++ = 'a'; - return dst; - case '\v': - *dst++ = '\\'; *dst++ = 'v'; - return dst; - case '\t': - *dst++ = '\\'; *dst++ = 't'; - return dst; - case '\f': - *dst++ = '\\'; *dst++ = 'f'; - return dst; - case ' ': - *dst++ = '\\'; *dst++ = 's'; - return dst; - case '\0': - *dst++ = '\\'; *dst++ = '0'; - if (isoctal(nextc)) { - *dst++ = '0'; - *dst++ = '0'; - } - return dst; - default: - if (isgraph(c)) { - *dst++ = '\\'; *dst++ = c; - return dst; - } - } - } - if (isextra || ((c & 0177) == ' ') || (flag & VIS_OCTAL)) { - *dst++ = '\\'; - *dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + '0'; - *dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + '0'; - *dst++ = (c & 07) + '0'; - } else { - if ((flag & VIS_NOSLASH) == 0) *dst++ = '\\'; - if (c & 0200) { - c &= 0177; *dst++ = 'M'; - } - if (iscntrl(c)) { - *dst++ = '^'; - if (c == 0177) - *dst++ = '?'; - else - *dst++ = c + '@'; - } else { - *dst++ = '-'; *dst++ = c; - } + + /* See comment in istrsenvisx() output loop, below. */ + wmsk = 0; + for (i = sizeof(wmsk) - 1; i >= 0; i--) { + shft = i * NBBY; + bmsk = (uint64_t)0xffLL << shft; + wmsk |= bmsk; + if ((c & wmsk) || i == 0) + dst = do_mbyte(dst, (wint_t)( + (uint64_t)(c & bmsk) >> shft), + flags, nextc, iswextra); } + return dst; } -typedef char *(*visfun_t)(char *, int, int, int, const char *); +typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *); /* * Return the appropriate encoding function depending on the flags given. */ static visfun_t -getvisfun(int flag) +getvisfun(int flags) { - if (flag & VIS_HTTPSTYLE) + if (flags & VIS_HTTPSTYLE) return do_hvis; - if (flag & VIS_MIMESTYLE) + if (flags & VIS_MIMESTYLE) return do_mvis; return do_svis; } /* - * svis - visually encode characters, also encoding the characters - * pointed to by `extra' + * Expand list of extra characters to not visually encode. */ -char * -svis(char *dst, int c, int flag, int nextc, const char *extra) +static wchar_t * +makeextralist(int flags, const char *src) { - char *nextra = NULL; - visfun_t f; + wchar_t *dst, *d; + size_t len; + const wchar_t *s; + + len = strlen(src); + if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL) + return NULL; + + if ((flags & VIS_NOLOCALE) || mbstowcs(dst, src, len) == (size_t)-1) { + size_t i; + for (i = 0; i < len; i++) + dst[i] = (wchar_t)(u_char)src[i]; + d = dst + len; + } else + d = dst + wcslen(dst); + + if (flags & VIS_GLOB) + for (s = char_glob; *s; *d++ = *s++) + continue; + + if (flags & VIS_SHELL) + for (s = char_shell; *s; *d++ = *s++) + continue; + + if (flags & VIS_SP) *d++ = L' '; + if (flags & VIS_TAB) *d++ = L'\t'; + if (flags & VIS_NL) *d++ = L'\n'; + if ((flags & VIS_NOSLASH) == 0) *d++ = L'\\'; + *d = L'\0'; - _DIAGASSERT(dst != NULL); - _DIAGASSERT(extra != NULL); - MAKEEXTRALIST(flag, nextra, extra); - if (!nextra) { - *dst = '\0'; /* can't create nextra, return "" */ - return dst; - } - f = getvisfun(flag); - dst = (*f)(dst, c, flag, nextc, nextra); - free(nextra); - *dst = '\0'; return dst; } - /* - * strsvis, strsvisx - visually encode characters from src into dst - * - * Extra is a pointer to a \0-terminated list of characters to - * be encoded, too. These functions are useful e. g. to - * encode strings in such a way so that they are not interpreted - * by a shell. - * - * Dst must be 4 times the size of src to account for possible - * expansion. The length of dst, not including the trailing NULL, - * is returned. - * - * Strsvisx encodes exactly len bytes from src into dst. - * This is useful for encoding a block of data. + * istrsenvisx() + * The main internal function. + * All user-visible functions call this one. */ -int -strsvis(char *dst, const char *csrc, int flag, const char *extra) +static int +istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength, + int flags, const char *mbextra, int *cerr_ptr) { - int c; - char *start; - char *nextra = NULL; - const unsigned char *src = (const unsigned char *)csrc; + wchar_t *dst, *src, *pdst, *psrc, *start, *extra; + size_t len, olen; + uint64_t bmsk, wmsk; + wint_t c; visfun_t f; + int clen = 0, cerr, error = -1, i, shft; + char *mbdst, *mdst; + ssize_t mbslength, maxolen; - _DIAGASSERT(dst != NULL); - _DIAGASSERT(src != NULL); - _DIAGASSERT(extra != NULL); - MAKEEXTRALIST(flag, nextra, extra); - if (!nextra) { - *dst = '\0'; /* can't create nextra, return "" */ - return 0; - } - f = getvisfun(flag); - for (start = dst; (c = *src++) != '\0'; /* empty */) - dst = (*f)(dst, c, flag, *src, nextra); - free(nextra); - *dst = '\0'; - return (int)(dst - start); -} + _DIAGASSERT(mbdstp != NULL); + _DIAGASSERT(mbsrc != NULL || mblength == 0); + _DIAGASSERT(mbextra != NULL); + /* + * Input (mbsrc) is a char string considered to be multibyte + * characters. The input loop will read this string pulling + * one character, possibly multiple bytes, from mbsrc and + * converting each to wchar_t in src. + * + * The vis conversion will be done using the wide char + * wchar_t string. + * + * This will then be converted back to a multibyte string to + * return to the caller. + */ -int -strsvisx(char *dst, const char *csrc, size_t len, int flag, const char *extra) -{ - unsigned char c; - char *start; - char *nextra = NULL; - const unsigned char *src = (const unsigned char *)csrc; - visfun_t f; - - _DIAGASSERT(dst != NULL); - _DIAGASSERT(src != NULL); - _DIAGASSERT(extra != NULL); - MAKEEXTRALIST(flag, nextra, extra); - if (! nextra) { - *dst = '\0'; /* can't create nextra, return "" */ - return 0; + /* Allocate space for the wide char strings */ + psrc = pdst = extra = NULL; + mdst = NULL; + if ((psrc = calloc(mblength + 1, sizeof(*psrc))) == NULL) + return -1; + if ((pdst = calloc((4 * mblength) + 1, sizeof(*pdst))) == NULL) + goto out; + if (*mbdstp == NULL) { + if ((mdst = calloc((4 * mblength) + 1, sizeof(*mdst))) == NULL) + goto out; + *mbdstp = mdst; } - f = getvisfun(flag); + mbdst = *mbdstp; + dst = pdst; + src = psrc; + + if (flags & VIS_NOLOCALE) { + /* Do one byte at a time conversion */ + cerr = 1; + } else { + /* Use caller's multibyte conversion error flag. */ + cerr = cerr_ptr ? *cerr_ptr : 0; + } + + /* + * Input loop. + * Handle up to mblength characters (not bytes). We do not + * stop at NULs because we may be processing a block of data + * that includes NULs. + */ + mbslength = (ssize_t)mblength; + /* + * When inputing a single character, must also read in the + * next character for nextc, the look-ahead character. + */ + if (mbslength == 1) + mbslength++; + while (mbslength > 0) { + /* Convert one multibyte character to wchar_t. */ + if (!cerr) + clen = mbtowc(src, mbsrc, MB_LEN_MAX); + if (cerr || clen < 0) { + /* Conversion error, process as a byte instead. */ + *src = (wint_t)(u_char)*mbsrc; + clen = 1; + cerr = 1; + } + if (clen == 0) + /* + * NUL in input gives 0 return value. process + * as single NUL byte and keep going. + */ + clen = 1; + /* Advance buffer character pointer. */ + src++; + /* Advance input pointer by number of bytes read. */ + mbsrc += clen; + /* Decrement input byte count. */ + mbslength -= clen; + } + len = src - psrc; + src = psrc; + /* + * In the single character input case, we will have actually + * processed two characters, c and nextc. Reset len back to + * just a single character. + */ + if (mblength < len) + len = mblength; + + /* Convert extra argument to list of characters for this mode. */ + extra = makeextralist(flags, mbextra); + if (!extra) { + if (dlen && *dlen == 0) { + errno = ENOSPC; + goto out; + } + *mbdst = '\0'; /* can't create extra, return "" */ + error = 0; + goto out; + } + + /* Look up which processing function to call. */ + f = getvisfun(flags); + + /* + * Main processing loop. + * Call do_Xvis processing function one character at a time + * with next character available for look-ahead. + */ for (start = dst; len > 0; len--) { c = *src++; - dst = (*f)(dst, c, flag, len > 1 ? *src : '\0', nextra); + dst = (*f)(dst, c, flags, len >= 1 ? *src : L'\0', extra); + if (dst == NULL) { + errno = ENOSPC; + goto out; + } } - free(nextra); - *dst = '\0'; - return (int)(dst - start); + + /* Terminate the string in the buffer. */ + *dst = L'\0'; + + /* + * Output loop. + * Convert wchar_t string back to multibyte output string. + * If we have hit a multi-byte conversion error on input, + * output byte-by-byte here. Else use wctomb(). + */ + len = wcslen(start); + maxolen = dlen ? *dlen : (wcslen(start) * MB_LEN_MAX + 1); + olen = 0; + for (dst = start; len > 0; len--) { + if (!cerr) + clen = wctomb(mbdst, *dst); + if (cerr || clen < 0) { + /* + * Conversion error, process as a byte(s) instead. + * Examine each byte and higher-order bytes for + * data. E.g., + * 0x000000000000a264 -> a2 64 + * 0x000000001f00a264 -> 1f 00 a2 64 + */ + clen = 0; + wmsk = 0; + for (i = sizeof(wmsk) - 1; i >= 0; i--) { + shft = i * NBBY; + bmsk = (uint64_t)0xffLL << shft; + wmsk |= bmsk; + if ((*dst & wmsk) || i == 0) + mbdst[clen++] = (char)( + (uint64_t)(*dst & bmsk) >> + shft); + } + cerr = 1; + } + /* If this character would exceed our output limit, stop. */ + if (olen + clen > (size_t)maxolen) + break; + /* Advance output pointer by number of bytes written. */ + mbdst += clen; + /* Advance buffer character pointer. */ + dst++; + /* Incrment output character count. */ + olen += clen; + } + + /* Terminate the output string. */ + *mbdst = '\0'; + + if (flags & VIS_NOLOCALE) { + /* Pass conversion error flag out. */ + if (cerr_ptr) + *cerr_ptr = cerr; + } + + free(extra); + free(pdst); + free(psrc); + + return (int)olen; +out: + free(extra); + free(pdst); + free(psrc); + free(mdst); + return error; +} + +static int +istrsenvisxl(char **mbdstp, size_t *dlen, const char *mbsrc, + int flags, const char *mbextra, int *cerr_ptr) +{ + return istrsenvisx(mbdstp, dlen, mbsrc, + mbsrc != NULL ? strlen(mbsrc) : 0, flags, mbextra, cerr_ptr); +} + +#endif + +#if !HAVE_SVIS +/* + * The "svis" variants all take an "extra" arg that is a pointer + * to a NUL-terminated list of characters to be encoded, too. + * These functions are useful e. g. to encode strings in such a + * way so that they are not interpreted by a shell. + */ + +char * +svis(char *mbdst, int c, int flags, int nextc, const char *mbextra) +{ + char cc[2]; + int ret; + + cc[0] = c; + cc[1] = nextc; + + ret = istrsenvisx(&mbdst, NULL, cc, 1, flags, mbextra, NULL); + if (ret < 0) + return NULL; + return mbdst + ret; +} + +char * +snvis(char *mbdst, size_t dlen, int c, int flags, int nextc, const char *mbextra) +{ + char cc[2]; + int ret; + + cc[0] = c; + cc[1] = nextc; + + ret = istrsenvisx(&mbdst, &dlen, cc, 1, flags, mbextra, NULL); + if (ret < 0) + return NULL; + return mbdst + ret; +} + +int +strsvis(char *mbdst, const char *mbsrc, int flags, const char *mbextra) +{ + return istrsenvisxl(&mbdst, NULL, mbsrc, flags, mbextra, NULL); +} + +int +strsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags, const char *mbextra) +{ + return istrsenvisxl(&mbdst, &dlen, mbsrc, flags, mbextra, NULL); +} + +int +strsvisx(char *mbdst, const char *mbsrc, size_t len, int flags, const char *mbextra) +{ + return istrsenvisx(&mbdst, NULL, mbsrc, len, flags, mbextra, NULL); +} + +int +strsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags, + const char *mbextra) +{ + return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, mbextra, NULL); +} + +int +strsenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags, + const char *mbextra, int *cerr_ptr) +{ + return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, mbextra, cerr_ptr); } #endif @@ -359,67 +673,89 @@ strsvisx(char *dst, const char *csrc, size_t len, int flag, const char *extra) * vis - visually encode characters */ char * -vis(char *dst, int c, int flag, int nextc) +vis(char *mbdst, int c, int flags, int nextc) { - char *extra = NULL; - unsigned char uc = (unsigned char)c; - visfun_t f; + char cc[2]; + int ret; - _DIAGASSERT(dst != NULL); + cc[0] = c; + cc[1] = nextc; - MAKEEXTRALIST(flag, extra, ""); - if (! extra) { - *dst = '\0'; /* can't create extra, return "" */ - return dst; - } - f = getvisfun(flag); - dst = (*f)(dst, uc, flag, nextc, extra); - free(extra); - *dst = '\0'; - return dst; + ret = istrsenvisx(&mbdst, NULL, cc, 1, flags, "", NULL); + if (ret < 0) + return NULL; + return mbdst + ret; } +char * +nvis(char *mbdst, size_t dlen, int c, int flags, int nextc) +{ + char cc[2]; + int ret; + + cc[0] = c; + cc[1] = nextc; + + ret = istrsenvisx(&mbdst, &dlen, cc, 1, flags, "", NULL); + if (ret < 0) + return NULL; + return mbdst + ret; +} /* - * strvis, strvisx - visually encode characters from src into dst + * strvis - visually encode characters from src into dst + * + * Dst must be 4 times the size of src to account for possible + * expansion. The length of dst, not including the trailing NULL, + * is returned. + */ + +int +strvis(char *mbdst, const char *mbsrc, int flags) +{ + return istrsenvisxl(&mbdst, NULL, mbsrc, flags, "", NULL); +} + +int +strnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags) +{ + return istrsenvisxl(&mbdst, &dlen, mbsrc, flags, "", NULL); +} + +int +stravis(char **mbdstp, const char *mbsrc, int flags) +{ + *mbdstp = NULL; + return istrsenvisxl(mbdstp, NULL, mbsrc, flags, "", NULL); +} + +/* + * strvisx - visually encode characters from src into dst * * Dst must be 4 times the size of src to account for possible * expansion. The length of dst, not including the trailing NULL, * is returned. * - * Strvisx encodes exactly len bytes from src into dst. + * Strvisx encodes exactly len characters from src into dst. * This is useful for encoding a block of data. */ -int -strvis(char *dst, const char *src, int flag) -{ - char *extra = NULL; - int rv; - MAKEEXTRALIST(flag, extra, ""); - if (!extra) { - *dst = '\0'; /* can't create extra, return "" */ - return 0; - } - rv = strsvis(dst, src, flag, extra); - free(extra); - return rv; +int +strvisx(char *mbdst, const char *mbsrc, size_t len, int flags) +{ + return istrsenvisx(&mbdst, NULL, mbsrc, len, flags, "", NULL); } +int +strnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags) +{ + return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, "", NULL); +} int -strvisx(char *dst, const char *src, size_t len, int flag) +strenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags, + int *cerr_ptr) { - char *extra = NULL; - int rv; - - MAKEEXTRALIST(flag, extra, ""); - if (!extra) { - *dst = '\0'; /* can't create extra, return "" */ - return 0; - } - rv = strsvisx(dst, src, len, flag, extra); - free(extra); - return rv; + return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, "", cerr_ptr); } #endif diff --git a/lib/libedit/src/vis.h b/lib/libedit/src/vis.h index 23fc0d8e6..501ad182e 100644 --- a/lib/libedit/src/vis.h +++ b/lib/libedit/src/vis.h @@ -1,4 +1,4 @@ -/* $NetBSD: vis.h,v 1.18 2010/11/27 19:44:54 christos Exp $ */ +/* $NetBSD: vis.h,v 1.24 2016/01/14 20:42:14 christos Exp $ */ /*- * Copyright (c) 1990, 1993 @@ -34,31 +34,38 @@ #ifndef _VIS_H_ #define _VIS_H_ +#include + /* * to select alternate encoding format */ -#define VIS_OCTAL 0x001 /* use octal \ddd format */ -#define VIS_CSTYLE 0x002 /* use \[nrft0..] where appropiate */ +#define VIS_OCTAL 0x0001 /* use octal \ddd format */ +#define VIS_CSTYLE 0x0002 /* use \[nrft0..] where appropiate */ /* * to alter set of characters encoded (default is to encode all * non-graphic except space, tab, and newline). */ -#define VIS_SP 0x004 /* also encode space */ -#define VIS_TAB 0x008 /* also encode tab */ -#define VIS_NL 0x010 /* also encode newline */ +#define VIS_SP 0x0004 /* also encode space */ +#define VIS_TAB 0x0008 /* also encode tab */ +#define VIS_NL 0x0010 /* also encode newline */ #define VIS_WHITE (VIS_SP | VIS_TAB | VIS_NL) -#define VIS_SAFE 0x020 /* only encode "unsafe" characters */ +#define VIS_SAFE 0x0020 /* only encode "unsafe" characters */ /* * other */ -#define VIS_NOSLASH 0x040 /* inhibit printing '\' */ -#define VIS_HTTP1808 0x080 /* http-style escape % hex hex */ -#define VIS_HTTPSTYLE 0x080 /* http-style escape % hex hex */ -#define VIS_MIMESTYLE 0x100 /* mime-style escape = HEX HEX */ -#define VIS_HTTP1866 0x200 /* http-style &#num; or &string; */ -#define VIS_NOESCAPE 0x400 /* don't decode `\' */ +#define VIS_NOSLASH 0x0040 /* inhibit printing '\' */ +#define VIS_HTTP1808 0x0080 /* http-style escape % hex hex */ +#define VIS_HTTPSTYLE 0x0080 /* http-style escape % hex hex */ +#define VIS_MIMESTYLE 0x0100 /* mime-style escape = HEX HEX */ +#define VIS_HTTP1866 0x0200 /* http-style &#num; or &string; */ +#define VIS_NOESCAPE 0x0400 /* don't decode `\' */ +#define _VIS_END 0x0800 /* for unvis */ +#define VIS_GLOB 0x1000 /* encode glob(3) magic characters */ +#define VIS_SHELL 0x2000 /* encode shell special characters [not glob] */ +#define VIS_META (VIS_WHITE | VIS_GLOB | VIS_SHELL) +#define VIS_NOLOCALE 0x4000 /* encode using the C locale */ /* * unvis return codes @@ -72,19 +79,41 @@ /* * unvis flags */ -#define UNVIS_END 1 /* no more characters */ +#define UNVIS_END _VIS_END /* no more characters */ + +#include __BEGIN_DECLS char *vis(char *, int, int, int); +char *nvis(char *, size_t, int, int, int); + char *svis(char *, int, int, int, const char *); +char *snvis(char *, size_t, int, int, int, const char *); + int strvis(char *, const char *, int); +int stravis(char **, const char *, int); +int strnvis(char *, size_t, const char *, int); + int strsvis(char *, const char *, int, const char *); +int strsnvis(char *, size_t, const char *, int, const char *); + int strvisx(char *, const char *, size_t, int); +int strnvisx(char *, size_t, const char *, size_t, int); +int strenvisx(char *, size_t, const char *, size_t, int, int *); + int strsvisx(char *, const char *, size_t, int, const char *); +int strsnvisx(char *, size_t, const char *, size_t, int, const char *); +int strsenvisx(char *, size_t, const char *, size_t , int, const char *, + int *); + int strunvis(char *, const char *); +int strnunvis(char *, size_t, const char *); + int strunvisx(char *, const char *, int); +int strnunvisx(char *, size_t, const char *, int); + #ifndef __LIBC12_SOURCE__ -int unvis(char *, int, int *, int); +int unvis(char *, int, int *, int) __RENAME(__unvis50); #endif __END_DECLS