From c6a46f0c96dde73ef4f3a247a1e904d4cf15aeb2 Mon Sep 17 00:00:00 2001 From: Nicolas Williams Date: Sun, 11 Sep 2022 00:28:00 -0500 Subject: [PATCH] base: Implement JSON string escaping We encode JSON in the KDC's audit logs, and soon in bx509d's /get-tgts. Therefore, we should be reasonable in terms of what we encode. --- lib/base/json.c | 731 ++++++++++++++++++++++++++++++++---- lib/base/string.c | 3 +- lib/base/test_base.c | 282 +++++++++++++- lib/base/version-script.map | 1 + 4 files changed, 937 insertions(+), 80 deletions(-) diff --git a/lib/base/json.c b/lib/base/json.c index 2ef371b97..cf69e4b00 100644 --- a/lib/base/json.c +++ b/lib/base/json.c @@ -166,12 +166,175 @@ base2json(heim_object_t obj, struct twojson *j) j->first = first; break; - case HEIM_TID_STRING: + case HEIM_TID_STRING: { + const unsigned char *s = (const unsigned char *)heim_string_get_utf8(obj); + const unsigned char *p; + unsigned int c, cp, ctop, cbot; + char e[sizeof("\\u0123\\u3210")]; + int good; + size_t i; + indent(j); j->out(j->ctx, "\""); - j->out(j->ctx, heim_string_get_utf8(obj)); + for (p = s; (c = *p); p++) { + switch (c) { + /* ASCII control characters w/ C-like escapes */ + case '\b': j->out(j->ctx, "\\b"); continue; + case '\f': j->out(j->ctx, "\\f"); continue; + case '\n': j->out(j->ctx, "\\n"); continue; + case '\r': j->out(j->ctx, "\\r"); continue; + case '\t': j->out(j->ctx, "\\t"); continue; + /* Other must-escape non-control ASCII characters */ + case '"': j->out(j->ctx, "\\\""); continue; + case '\\': j->out(j->ctx, "\\\\"); continue; + default: break; + } + + /* + * JSON string encoding is... complex. + * + * Invalid UTF-8 w/ HEIM_JSON_F_STRICT_STRINGS set -> return 1 + * + * Invalid UTF-8 w/o HEIM_JSON_F_STRICT_STRINGS set -> pass + * through, a sort of Heimdal WTF-8, but not _the_ WTF-8. + */ + if (c < 0x20) { + /* ASCII control character w/o C-like escape */ + e[0] = '\\'; + e[1] = 'u'; + e[2] = '0'; + e[3] = '0'; + e[4] = "0123456789ABCDEF"[c>>4]; + e[5] = "0123456789ABCDEF"[c & 0x0f]; + e[6] = '\0'; + j->out(j->ctx, e); + continue; + } + if (c < 0x80) { + /* ASCII */ + e[0] = c; + e[1] = '\0'; + j->out(j->ctx, e); + continue; + } + if ((c & 0xc0) == 0x80) { + /* UTF-8 bare non-leading byte */ + if (!(j->flags & HEIM_JSON_F_STRICT_STRINGS)) { + e[0] = c; + e[1] = '\0'; + j->out(j->ctx, e); + continue; + } + return 1; + } + if ((c & 0xe0) == 0xc0) { + /* UTF-8 leading byte of two-byte sequence */ + good = 1; + for (i = 1; i < 2 && good && p[i]; i++) { + if ((p[i] & 0xc0) != 0x80) + good = 0; + } + if (i != 2) + good = 0; + if (!good && !(j->flags & HEIM_JSON_F_STRICT_STRINGS)) { + e[0] = c; + e[1] = '\0'; + j->out(j->ctx, e); + continue; + } else if (!good) { + return 1; + } + e[0] = c; + e[1] = p[1]; + e[2] = '\0'; + j->out(j->ctx, e); + p += 1; + continue; + } + if ((c & 0xf0) == 0xe0) { + /* UTF-8 leading byte of three-byte sequence */ + good = 1; + for (i = 1; i < 3 && good && p[i]; i++) { + if ((p[i] & 0xc0) != 0x80) + good = 0; + } + if (i != 3) + good = 0; + if (!good && !(j->flags & HEIM_JSON_F_STRICT_STRINGS)) { + e[0] = c; + e[1] = '\0'; + j->out(j->ctx, e); + continue; + } else if (!good) { + return 1; + } + e[0] = c; + e[1] = p[1]; + e[2] = p[2]; + e[3] = '\0'; + j->out(j->ctx, e); + p += 2; + continue; + } + + if (c > 0xf7) { + /* Invalid UTF-8 leading byte */ + if (!(j->flags & HEIM_JSON_F_STRICT_STRINGS)) { + e[0] = c; + e[1] = '\0'; + j->out(j->ctx, e); + continue; + } + return 1; + } + + /* + * A codepoint > U+FFFF, needs encoding a la UTF-16 surrogate + * pair because JSON takes after JS which uses UTF-16. Ugly. + */ + cp = c & 0x7; + good = 1; + for (i = 1; i < 4 && good && p[i]; i++) { + if ((p[i] & 0xc0) == 0x80) + cp = (cp << 6) | (p[i] & 0x3f); + else + good = 0; + } + if (i != 4) + good = 0; + if (!good && !(j->flags & HEIM_JSON_F_STRICT_STRINGS)) { + e[0] = c; + e[1] = '\0'; + j->out(j->ctx, e); + continue; + } else if (!good) { + return 1; + } + p += 3; + + cp -= 0x10000; + ctop = 0xD800 + (cp >> 10); + cbot = 0xDC00 + (cp & 0x3ff); + + e[0 ] = '\\'; + e[1 ] = 'u'; + e[2 ] = "0123456789ABCDEF"[(ctop ) >> 12]; + e[3 ] = "0123456789ABCDEF"[(ctop & 0x0f00) >> 8]; + e[4 ] = "0123456789ABCDEF"[(ctop & 0x00f0) >> 4]; + e[5 ] = "0123456789ABCDEF"[(ctop & 0x000f) ]; + e[6 ] = '\\'; + e[7 ] = 'u'; + e[8 ] = "0123456789ABCDEF"[(cbot ) >> 12]; + e[9 ] = "0123456789ABCDEF"[(cbot & 0x0f00) >> 8]; + e[10] = "0123456789ABCDEF"[(cbot & 0x00f0) >> 4]; + e[11] = "0123456789ABCDEF"[(cbot & 0x000f) ]; + e[12] = '\0'; + j->out(j->ctx, e); + continue; + } j->out(j->ctx, "\""); break; + } case HEIM_TID_DATA: { heim_dict_t d; @@ -255,9 +418,6 @@ heim_base2json(heim_object_t obj, void *ctx, heim_json_flags_t flags, { struct twojson j; - if (flags & HEIM_JSON_F_STRICT_STRINGS) - return ENOTSUP; /* Sorry, not yet! */ - heim_base_once_f(&heim_json_once, NULL, json_init_once); j.indent = 0; @@ -342,93 +502,428 @@ parse_number(struct parse_ctx *ctx) return heim_number_create(number * neg); } +/* + * Read 4 hex digits from ctx->p. + * + * If we don't have enough, rewind ctx->p and return -1 . + */ +static int +unescape_unicode(struct parse_ctx *ctx) +{ + int c = 0; + int i; + + for (i = 0; i < 4 && ctx->p < ctx->pend; i++, ctx->p++) { + if (*ctx->p >= '0' && *ctx->p <= '9') { + c = (c << 4) + (*ctx->p - '0'); + } else if (*ctx->p >= 'A' && *ctx->p <= 'F') { + c = (c << 4) + (10 + *ctx->p - 'A'); + } else if (*ctx->p >= 'a' && *ctx->p <= 'f') { + c = (c << 4) + (10 + *ctx->p - 'a'); + } else { + ctx->p -= i; + return -1; + } + } + return c; +} + +static int +encode_utf8(struct parse_ctx *ctx, char **pp, char *pend, int c) +{ + char *p = *pp; + + if (c < 0x80) { + /* ASCII */ + if (p >= pend) return 0; + *(p++) = c; + *pp = p; + return 1; + } + if (c < 0x800) { + /* 2 code unit UTF-8 sequence */ + if (p >= pend) return 0; + *(p++) = 0xc0 | ((c >> 6) ); + if (p == pend) return 0; + *(p++) = 0x80 | ((c ) & 0x3f); + *pp = p; + return 1; + } + if (c < 0x10000) { + /* 3 code unit UTF-8 sequence */ + if (p >= pend) return 0; + *(p++) = 0xe0 | ((c >> 12) ); + if (p == pend) return 0; + *(p++) = 0x80 | ((c >> 6) & 0x3f); + if (p == pend) return 0; + *(p++) = 0x80 | ((c) & 0x3f); + *pp = p; + return 1; + } + if (c < 0x110000) { + /* 4 code unit UTF-8 sequence */ + if (p >= pend) return 0; + *(p++) = 0xf0 | ((c >> 18) ); + if (p == pend) return 0; + *(p++) = 0x80 | ((c >> 12) & 0x3f); + if (p == pend) return 0; + *(p++) = 0x80 | ((c >> 6) & 0x3f); + if (p == pend) return 0; + *(p++) = 0x80 | ((c) & 0x3f); + *pp = p; + return 1; + } + return 0; +} + +static heim_string_t +parse_string_error(struct parse_ctx *ctx, + char *freeme, + const char *msg) +{ + free(freeme); + ctx->error = heim_error_create(EINVAL, "%s at %lu", msg, ctx->lineno); + return NULL; +} + static heim_string_t parse_string(struct parse_ctx *ctx) { const uint8_t *start; - int quote = 0; + heim_object_t o; + size_t alloc_len = 0; + size_t need = 0; + char *p0, *p, *pend; + int strict = ctx->flags & HEIM_JSON_F_STRICT_STRINGS; + int binary = 0; - if (ctx->flags & HEIM_JSON_F_STRICT_STRINGS) { - ctx->error = heim_error_create(EINVAL, "Strict JSON string encoding " - "not yet supported"); - return NULL; + if (*ctx->p != '"') + return parse_string_error(ctx, NULL, + "Expected a JSON string but found " + "something else"); + start = ++(ctx->p); + + /* Estimate how many bytes we need to allocate */ + p0 = p = pend = NULL; + for (need = 1; ctx->p < ctx->pend; ctx->p++) { + need++; + if (*ctx->p == '\\') + ctx->p++; + else if (*ctx->p == '"') + break; } + if (ctx->p == ctx->pend) + return parse_string_error(ctx, NULL, "Unterminated JSON string"); - if (*ctx->p != '"') { - ctx->error = heim_error_create(EINVAL, "Expected a JSON string but " - "found something else at line %lu", - ctx->lineno); - return NULL; - } - start = ++ctx->p; - + ctx->p = start; while (ctx->p < ctx->pend) { - if (*ctx->p == '\n') { - ctx->lineno++; - } else if (*ctx->p == '\\') { - if (ctx->p + 1 == ctx->pend) - goto out; - ctx->p++; - quote = 1; - } else if (*ctx->p == '"') { - heim_object_t o; + const unsigned char *p_save; + int32_t ctop, cbot; - if (quote) { - char *p0, *p; - p = p0 = malloc(ctx->p - start); - if (p == NULL) - goto out; - while (start < ctx->p) { - if (*start == '\\') { - start++; - /* XXX validate quoted char */ - } - *p++ = *start++; - } - o = heim_string_create_with_bytes(p0, p - p0); - free(p0); - } else { - o = heim_string_create_with_bytes(start, ctx->p - start); - if (o == NULL) { - ctx->error = heim_error_create_enomem(); - return NULL; - } + if (*ctx->p == '"') { + ctx->p++; + break; + } - /* If we can decode as base64, then let's */ - if (ctx->flags & HEIM_JSON_F_TRY_DECODE_DATA) { - void *buf; - size_t len; - const char *s; + /* Allocate or resize our output buffer if need be */ + if (need || p == pend) { + char *tmp = realloc(p0, alloc_len + need + 5 /* slop? */); - s = heim_string_get_utf8(o); - len = strlen(s); + if (tmp == NULL) { + ctx->error = heim_error_create_enomem(); + free(p0); + return NULL; + } + alloc_len += need + 5; + p = tmp + (p - p0); + p0 = tmp; + pend = p0 + alloc_len; - if (len >= 4 && strspn(s, base64_chars) >= len - 2) { - buf = malloc(len); - if (buf == NULL) { - heim_release(o); - ctx->error = heim_error_create_enomem(); - return NULL; - } - len = rk_base64_decode(s, buf); - if (len == -1) { - free(buf); - return o; - } - heim_release(o); - o = heim_data_ref_create(buf, len, free); - } - } - } - ctx->p += 1; + need = 0; + } - return o; - } - ctx->p += 1; + if (*ctx->p != '\\') { + unsigned char c = *ctx->p; + + /* + * Not backslashed -> consume now. + * + * NOTE: All cases in this block must continue or return w/ error. + */ + + /* Check for unescaped ASCII control characters */ + if (c == '\n') { + if (strict) + return parse_string_error(ctx, p0, + "Unescaped newline in JSON string"); + /* Count the newline but don't add it to the decoding */ + ctx->lineno++; + } else if (strict && *ctx->p <= 0x1f) { + return parse_string_error(ctx, p0, "Unescaped ASCII control character"); + } else if (c == 0) { + binary = 1; + } + if (!strict || c < 0x80) { + /* ASCII, or not strict -> no need to validate */ + *(p++) = c; + ctx->p++; + continue; + } + + /* + * Being strict for parsing means we want to detect malformed UTF-8 + * sequences. + * + * If not strict then we just go on below and add to `p' whatever + * bytes we find in `ctx->p' as we find them. + * + * For each two-byte sequence we need one more byte in `p[]'. For + * each three-byte sequence we need two more bytes in `p[]'. + * + * Setting `need' and looping will cause `p0' to be grown. + * + * NOTE: All cases in this block must continue or return w/ error. + */ + if ((c & 0xe0) == 0xc0) { + /* Two-byte UTF-8 encoding */ + if (pend - p < 2) { + need = 2; + continue; /* realloc p0 */ + } + + *(p++) = c; + ctx->p++; + if (ctx->p == ctx->pend) + return parse_string_error(ctx, p0, "Truncated UTF-8"); + c = *(ctx->p++); + if ((c & 0xc0) != 0x80) + return parse_string_error(ctx, p0, "Truncated UTF-8"); + *(p++) = c; + continue; + } + if ((c & 0xf0) == 0xe0) { + /* Three-byte UTF-8 encoding */ + if (pend - p < 3) { + need = 3; + continue; /* realloc p0 */ + } + + *(p++) = c; + ctx->p++; + if (ctx->p == ctx->pend) + return parse_string_error(ctx, p0, "Truncated UTF-8"); + c = *(ctx->p++); + if ((c & 0xc0) != 0x80) + return parse_string_error(ctx, p0, "Truncated UTF-8"); + *(p++) = c; + c = *(ctx->p++); + if ((c & 0xc0) != 0x80) + return parse_string_error(ctx, p0, "Truncated UTF-8"); + *(p++) = c; + continue; + } + if ((c & 0xf8) == 0xf0) + return parse_string_error(ctx, p0, "UTF-8 sequence not " + "encoded as escaped UTF-16"); + if ((c & 0xc0) == 0x80) + return parse_string_error(ctx, p0, + "Invalid UTF-8 " + "(bare continuation code unit)"); + + return parse_string_error(ctx, p0, "Not UTF-8"); + } + + /* Backslash-quoted character */ + ctx->p++; + if (ctx->p == ctx->pend) { + ctx->error = + heim_error_create(EINVAL, + "Unterminated JSON string at line %lu", + ctx->lineno); + free(p0); + return NULL; + } + switch (*ctx->p) { + /* Simple escapes */ + case 'b': *(p++) = '\b'; ctx->p++; continue; + case 'f': *(p++) = '\f'; ctx->p++; continue; + case 'n': *(p++) = '\n'; ctx->p++; continue; + case 'r': *(p++) = '\r'; ctx->p++; continue; + case 't': *(p++) = '\t'; ctx->p++; continue; + case '"': *(p++) = '"'; ctx->p++; continue; + case '\\': *(p++) = '\\'; ctx->p++; continue; + /* Escaped Unicode handled below */ + case 'u': + /* + * Worst case for !strict we need 11 bytes for a truncated non-BMP + * codepoint escape. Call it 12. + */ + if (strict) + need = 4; + else + need = 12; + if (pend - p < need) { + /* Go back to the backslash, realloc, try again */ + ctx->p--; + continue; + } + + need = 0; + ctx->p++; + break; + default: + if (!strict) { + *(p++) = *ctx->p; + ctx->p++; + continue; + } + ctx->error = + heim_error_create(EINVAL, + "Invalid backslash escape at line %lu", + ctx->lineno); + free(p0); + return NULL; + } + + /* Unicode code point */ + if (pend - p < 12) { + need = 12; + ctx->p -= 2; /* for "\\u" */ + continue; /* This will cause p0 to be realloc'ed */ + } + p_save = ctx->p; + ctop = cbot = -3; + ctop = unescape_unicode(ctx); + if (ctop == -1 && strict) + return parse_string_error(ctx, p0, "Invalid escaped Unicode"); + if (ctop == -1) { + /* + * Not strict; tolerate bad input. + * + * Output "\\u" and then loop to treat what we expected to be four + * digits as if they were not part of an escaped Unicode codepoint. + */ + ctx->p = p_save; + if (p < pend) + *(p++) = '\\'; + if (p < pend) + *(p++) = 'u'; + continue; + } + if (ctop == 0) { + *(p++) = '\0'; + binary = 1; + continue; + } + if (ctop < 0xd800) { + if (!encode_utf8(ctx, &p, pend, ctop)) + return parse_string_error(ctx, p0, + "Internal JSON string parse error"); + continue; + } + + /* + * We parsed the top escaped codepoint of a surrogate pair encoding + * of a non-BMP Unicode codepoint. What follows must be another + * escaped codepoint. + */ + if (ctx->p < ctx->pend && ctx->p[0] == '\\') + ctx->p++; + else + ctop = -1; + if (ctop > -1 && ctx->p < ctx->pend && ctx->p[0] == 'u') + ctx->p++; + else + ctop = -1; + if (ctop > -1) { + /* Parse the hex digits of the bottom half of the surrogate pair */ + cbot = unescape_unicode(ctx); + if (cbot == -1 || cbot < 0xdc00) + ctop = -1; + } + if (ctop == -1) { + if (strict) + return parse_string_error(ctx, p0, + "Invalid surrogate pair"); + + /* + * Output "\\u", rewind, output the digits of `ctop'. + * + * When we get to what should have been the bottom half of the + * pair we'll necessarily fail to parse it as a normal escaped + * Unicode codepoint, and once again, rewind and output its digits. + */ + if (p < pend) + *(p++) = '\\'; + if (p < pend) + *(p++) = 'u'; + ctx->p = p_save; + continue; + } + + /* Finally decode the surrogate pair then encode as UTF-8 */ + ctop -= 0xd800; + cbot -= 0xdc00; + if (!encode_utf8(ctx, &p, pend, 0x10000 + ((ctop << 10) | (cbot & 0x3ff)))) + return parse_string_error(ctx, p0, + "Internal JSON string parse error"); } - out: - ctx->error = heim_error_create(EINVAL, "ran out of string"); - return NULL; + + if (p0 == NULL) + return heim_string_create(""); + + /* NUL-terminate for rk_base64_decode() and plain paranoia */ + if (p0 != NULL && p == pend) { + char *tmp = realloc(p0, 1 + pend - p); + + if (tmp == NULL) { + ctx->error = heim_error_create_enomem(); + free(p0); + return NULL; + } + p = tmp + (p - p0); + pend = tmp + 1 + (pend - p0); + p0 = tmp; + } + *(p++) = '\0'; + + /* If there's embedded NULs, it's not a C string */ + if (binary) { + o = heim_data_ref_create(p0, (p - 1) - p0, free); + return o; + } + + /* If we can decode as base64, then let's */ + if (ctx->flags & HEIM_JSON_F_TRY_DECODE_DATA) { + void *buf; + size_t len = p - p0; + + if (len > 0) + len--; + + if (len >= 4 && strspn(p0, base64_chars) >= len - 2) { + buf = malloc(len); + if (buf == NULL) { + ctx->error = heim_error_create_enomem(); + free(p0); + return NULL; + } + len = rk_base64_decode(p0, buf); + if (len > -1) { + /* Yes base64, return the decoded data */ + o = heim_data_ref_create(buf, len, free); + free(p0); + return o; + } + /* Not base64, so return what we had */ + free(buf); + } + } + + /* Sadly this will copy `p0' */ + o = heim_string_create_with_bytes(p0, p - p0); + free(p0); + return o; } static int @@ -809,3 +1304,83 @@ heim_json_copy_serialize(heim_object_t obj, heim_json_flags_t flags, heim_error_ } return str; } + +struct heim_eq_f_ctx { + heim_dict_t other; + int ret; +}; + +static void +heim_eq_dict_iter_f(heim_object_t key, heim_object_t val, void *d) +{ + struct heim_eq_f_ctx *ctx = d; + heim_object_t other_val; + + if (!ctx->ret) + return; + + /* + * This doesn't work if the key is an array or a dict, which, anyways, + * isn't allowed in JSON, though we allow it. + */ + other_val = heim_dict_get_value(ctx->other, key); + ctx->ret = heim_json_eq(val, other_val); +} + +int +heim_json_eq(heim_object_t a, heim_object_t b) +{ + heim_tid_t atid, btid; + + if (a == b) + return 1; + if (a == NULL || b == NULL) + return 0; + atid = heim_get_tid(a); + btid = heim_get_tid(b); + if (atid != btid) + return 0; + switch (atid) { + case HEIM_TID_ARRAY: { + size_t len = heim_array_get_length(b); + size_t i; + + if (heim_array_get_length(a) != len) + return 0; + for (i = 0; i < len; i++) { + if (!heim_json_eq(heim_array_get_value(a, i), + heim_array_get_value(b, i))) + return 0; + } + return 1; + } + case HEIM_TID_DICT: { + struct heim_eq_f_ctx ctx; + + ctx.other = b; + ctx.ret = 1; + heim_dict_iterate_f(a, &ctx, heim_eq_dict_iter_f); + + if (ctx.ret) { + ctx.other = a; + heim_dict_iterate_f(b, &ctx, heim_eq_dict_iter_f); + } + return ctx.ret; + } + case HEIM_TID_STRING: + return strcmp(heim_string_get_utf8(a), heim_string_get_utf8(b)) == 0; + case HEIM_TID_DATA: { + return heim_data_get_length(a) == heim_data_get_length(b) && + memcmp(heim_data_get_ptr(a), heim_data_get_ptr(b), + heim_data_get_length(a)) == 0; + } + case HEIM_TID_NUMBER: + return heim_number_get_long(a) == heim_number_get_long(b); + case HEIM_TID_NULL: + case HEIM_TID_BOOL: + return heim_bool_val(a) == heim_bool_val(b); + default: + break; + } + return 0; +} diff --git a/lib/base/string.c b/lib/base/string.c index f94244716..7682fb39d 100644 --- a/lib/base/string.c +++ b/lib/base/string.c @@ -153,7 +153,8 @@ heim_string_create_with_bytes(const void *data, size_t len) s = _heim_alloc_object(&_heim_string_object, len + 1); if (s) { - memcpy(s, data, len); + if (len) + memcpy(s, data, len); ((char *)s)[len] = '\0'; } return s; diff --git a/lib/base/test_base.c b/lib/base/test_base.c index be6c860e2..c2a27c9b2 100644 --- a/lib/base/test_base.c +++ b/lib/base/test_base.c @@ -244,26 +244,266 @@ test_json(void) }; char *s; size_t i, k; - heim_object_t o, o2; + heim_object_t o, o2, o3; heim_string_t k1 = heim_string_create("k1"); o = heim_json_create("\"string\"", 10, 0, NULL); heim_assert(o != NULL, "string"); heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid"); heim_assert(strcmp("string", heim_string_get_utf8(o)) == 0, "wrong string"); + o2 = heim_json_copy_serialize(o, 0, NULL); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); heim_release(o); + /* + * Test string escaping: + * + * - C-like must-escapes + * - ASCII control character must-escapes + * - surrogate pairs + * + * We test round-tripping. First we parse, then we serialize, then parse, + * then compare the second parse to the first for equality. + * + * We do compare serialized forms in spite of their not being canonical. + * That means that some changes to serialization can cause failures here. + */ + o = heim_json_create("\"" + "\\b\\f\\n\\r\\t" /* ASCII C-like escapes */ + "\x1e" /* ASCII control character w/o C-like escape */ + "\\u00e1" /* á */ + "\\u07ff" + "\\u0801" + "\\u8001" + "\\uD834\\udd1e" /* U+1D11E, as shown in RFC 7159 */ + "\"", 10, 0, NULL); + heim_assert(o != NULL, "string"); + heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid"); + heim_assert(strcmp( + "\b\f\n\r\t" + "\x1e" + "\xc3\xa1" + "\xdf\xbf" + "\xe0\xA0\x81" + "\xe8\x80\x81" + "\xf0\x9d\x84\x9e", heim_string_get_utf8(o)) == 0, "wrong string"); + o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001Eá߿ࠁ老\\uD834\\uDD1E\"", + heim_string_get_utf8(o2)) == 0, + "JSON encoding changed; please check that it is till valid"); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); + heim_release(o); + + o = heim_json_create("\"" + "\\b\\f\\n\\r\\t" /* ASCII C-like escapes */ + "\x1e" /* ASCII control character w/o C-like escape */ + "\xc3\xa1" + "\xdf\xbf" + "\xe0\xa0\x81" + "\xE8\x80\x81" + "\\uD834\\udd1e" /* U+1D11E, as shown in RFC 7159 */ + "\"", 10, 0, NULL); + heim_assert(o != NULL, "string"); + heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid"); + heim_assert(strcmp( + "\b\f\n\r\t" + "\x1e" + "\xc3\xa1" + "\xdf\xbf" + "\xe0\xA0\x81" + "\xe8\x80\x81" + "\xf0\x9d\x84\x9e", heim_string_get_utf8(o)) == 0, "wrong string"); + o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001Eá߿ࠁ老\\uD834\\uDD1E\"", + heim_string_get_utf8(o2)) == 0, + "JSON encoding changed; please check that it is till valid"); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); + heim_release(o); + + /* Test rejection of unescaped ASCII control characters */ + o = heim_json_create("\"\b\\f\"", 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(o == NULL, "strict parse accepted bad input"); + o = heim_json_create("\"\b\x1e\"", 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(o == NULL, "strict parse accepted bad input"); + + o = heim_json_create("\"\b\\f\"", 10, 0, NULL); + heim_assert(o != NULL, "string"); + heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid"); + heim_assert(strcmp("\b\f", heim_string_get_utf8(o)) == 0, "wrong string"); + o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + heim_assert(strcmp("\"\\b\\f\"", heim_string_get_utf8(o2)) == 0, + "JSON encoding changed; please check that it is till valid"); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); + heim_release(o); + + /* Test bogus backslash escape */ + o = heim_json_create("\"" + "\\ " + "\"", 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(o == NULL, "malformed string accepted"); + o = heim_json_create("\"" + "\\ " + "\"", 10, 0, NULL); + heim_assert(o != NULL, "malformed string rejected (not strict)"); + heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid"); + heim_assert(strcmp(" ", heim_string_get_utf8(o)) == 0, "wrong string"); + o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + heim_assert(strcmp("\" \"", heim_string_get_utf8(o2)) == 0, + "JSON encoding changed; please check that it is till valid"); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); + heim_release(o); + + /* Test truncated surrogate encoding (bottom code unit) */ + o = heim_json_create("\"" + "\xE8\x80\x81" + "\\uD834\\udd" + "\"", 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(o == NULL, "malformed string accepted"); + o = heim_json_create("\"" + "\xE8\x80\x81" + "\\uD834\\udd" + "\"", 10, 0, NULL); + heim_assert(o != NULL, "malformed string rejected (not strict)"); + heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid"); + heim_assert(strcmp( + "\xe8\x80\x81" + "\\uD834\\udd", heim_string_get_utf8(o)) == 0, "wrong string"); + o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + heim_assert(strcmp("\"老\\\\uD834\\\\udd\"", + heim_string_get_utf8(o2)) == 0, + "JSON encoding changed; please check that it is till valid"); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); + heim_release(o); + + /* Test truncated surrogate encodings (top code unit) */ + o = heim_json_create("\"" + "\xE8\x80\x81" + "\\uD83" + "\"", 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(o == NULL, "malformed string accepted"); + o = heim_json_create("\"" + "\xE8\x80\x81" + "\\uD83" + "\"", 10, 0, NULL); + heim_assert(o != NULL, "malformed string rejected (not strict)"); + heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid"); + heim_assert(strcmp( + "\xe8\x80\x81" + "\\uD83", heim_string_get_utf8(o)) == 0, "wrong string"); + o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + heim_assert(strcmp("\"老\\\\uD83\"", + heim_string_get_utf8(o2)) == 0, + "JSON encoding changed; please check that it is till valid"); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); + heim_release(o); + + /* + * Test handling of truncated UTF-8 multi-byte sequences. + */ + o = heim_json_create("\"" + "\xE8\x80" + "\"", 10, 0, NULL); + heim_assert(o != NULL, "malformed string rejected (not strict)"); + heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid"); + heim_assert(strcmp("\xe8\x80", + heim_string_get_utf8(o)) == 0, "wrong string"); + o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + heim_assert(o2 == NULL, "malformed string serialized"); + o2 = heim_json_copy_serialize(o, 0, NULL); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(o3 == NULL, "malformed string accepted (not strict)"); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL); + heim_assert(strcmp("\xe8\x80", + heim_string_get_utf8(o3)) == 0, "wrong string"); + heim_release(o3); + heim_release(o2); + heim_release(o); + + /* Test handling of unescaped / embedded newline */ + o = heim_json_create("\"\n\"", 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(o == NULL, "malformed string accepted (strict)"); + o = heim_json_create("\"\n\"", 10, 0, NULL); + heim_assert(o != NULL, "malformed string rejected (not strict)"); + heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid"); + heim_assert(strcmp("\n", heim_string_get_utf8(o)) == 0, "wrong string"); + o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + heim_assert(o2 != NULL, "string not serialized"); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(o3 != NULL, "string not accepted"); + heim_assert(strcmp("\n", heim_string_get_utf8(o3)) == 0, "wrong string"); + heim_release(o3); + heim_release(o2); + heim_release(o); + + /* Test handling of embedded NULs (must decode as data, not string) */ + o = heim_json_create("\"\\u0000\"", 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(o != NULL, "string with NULs rejected"); + heim_assert(heim_get_tid(o) == heim_data_get_type_id(), "data-tid"); + heim_assert(heim_data_get_length(o) == 1, "wrong data length"); + heim_assert(((const char *)heim_data_get_ptr(o))[0] == '\0', + "wrong data NUL"); + o2 = heim_json_copy_serialize(o, 0, NULL); + heim_assert(o2 != NULL, "data not serialized"); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, + HEIM_JSON_F_TRY_DECODE_DATA, NULL); + heim_assert(o3 != NULL, "data not accepted"); + heim_assert(heim_data_get_length(o3) == 1, "wrong data length"); + heim_assert(((const char *)heim_data_get_ptr(o3))[0] == '\0', + "wrong data NUL"); + heim_release(o3); + heim_release(o2); + heim_release(o); + + /* + * Note that the trailing ']' is not part of the JSON text (which is just a + * string). + */ o = heim_json_create(" \"foo\\\"bar\" ]", 10, 0, NULL); heim_assert(o != NULL, "string"); heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid"); heim_assert(strcmp("foo\"bar", heim_string_get_utf8(o)) == 0, "wrong string"); + o2 = heim_json_copy_serialize(o, 0, NULL); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); heim_release(o); o = heim_json_create(" { \"key\" : \"value\" }", 10, 0, NULL); heim_assert(o != NULL, "dict"); heim_assert(heim_get_tid(o) == heim_dict_get_type_id(), "dict-tid"); + o2 = heim_json_copy_serialize(o, 0, NULL); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); heim_release(o); + /* + * heim_json_eq() can't handle dicts with dicts as keys, so we don't check + * for round-tripping here + */ o = heim_json_create("{ { \"k1\" : \"s1\", \"k2\" : \"s2\" } : \"s3\", " "{ \"k3\" : \"s4\" } : -1 }", 10, 0, NULL); heim_assert(o != NULL, "dict"); @@ -281,6 +521,11 @@ test_json(void) o2 = heim_dict_copy_value(o, k1); heim_assert(heim_get_tid(o2) == heim_string_get_type_id(), "string-tid"); heim_release(o2); + o2 = heim_json_copy_serialize(o, 0, NULL); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); heim_release(o); o = heim_json_create(" { \"k1\" : { \"k2\" : \"s2\" } }", 10, 0, NULL); @@ -289,6 +534,11 @@ test_json(void) o2 = heim_dict_copy_value(o, k1); heim_assert(heim_get_tid(o2) == heim_dict_get_type_id(), "dict-tid"); heim_release(o2); + o2 = heim_json_copy_serialize(o, 0, NULL); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); heim_release(o); o = heim_json_create("{ \"k1\" : 1 }", 10, 0, NULL); @@ -297,26 +547,51 @@ test_json(void) o2 = heim_dict_copy_value(o, k1); heim_assert(heim_get_tid(o2) == heim_number_get_type_id(), "number-tid"); heim_release(o2); + o2 = heim_json_copy_serialize(o, 0, NULL); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); heim_release(o); o = heim_json_create("-10", 10, 0, NULL); heim_assert(o != NULL, "number"); heim_assert(heim_get_tid(o) == heim_number_get_type_id(), "number-tid"); + o2 = heim_json_copy_serialize(o, 0, NULL); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); heim_release(o); o = heim_json_create("99", 10, 0, NULL); heim_assert(o != NULL, "number"); heim_assert(heim_get_tid(o) == heim_number_get_type_id(), "number-tid"); + o2 = heim_json_copy_serialize(o, 0, NULL); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); heim_release(o); o = heim_json_create(" [ 1 ]", 10, 0, NULL); heim_assert(o != NULL, "array"); heim_assert(heim_get_tid(o) == heim_array_get_type_id(), "array-tid"); + o2 = heim_json_copy_serialize(o, 0, NULL); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); heim_release(o); o = heim_json_create(" [ -1 ]", 10, 0, NULL); heim_assert(o != NULL, "array"); heim_assert(heim_get_tid(o) == heim_array_get_type_id(), "array-tid"); + o2 = heim_json_copy_serialize(o, 0, NULL); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); heim_release(o); for (i = 0; i < (sizeof (j) / sizeof (j[0])); i++) { @@ -325,6 +600,11 @@ test_json(void) fprintf(stderr, "Failed to parse this JSON: %s\n", j[i]); return 1; } + o2 = heim_json_copy_serialize(o, 0, NULL); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); heim_release(o); /* Simple fuzz test */ for (k = strlen(j[i]) - 1; k > 0; k--) { diff --git a/lib/base/version-script.map b/lib/base/version-script.map index 928e86199..70188985c 100644 --- a/lib/base/version-script.map +++ b/lib/base/version-script.map @@ -146,6 +146,7 @@ HEIMDAL_BASE_1.0 { heim_json_copy_serialize; heim_json_create; heim_json_create_with_bytes; + heim_json_eq; heim_load_plugins; heim_log; heim_log_msg;