diff --git a/lib/base/heimbase.h b/lib/base/heimbase.h index 76af38f94..7684498f4 100644 --- a/lib/base/heimbase.h +++ b/lib/base/heimbase.h @@ -464,10 +464,12 @@ typedef enum heim_json_flags { HEIM_JSON_F_CNULL2JSNULL = 32, HEIM_JSON_F_TRY_DECODE_DATA = 64, HEIM_JSON_F_ONE_LINE = 128, + HEIM_JSON_F_ESCAPE_NON_ASCII = 256, + HEIM_JSON_F_NO_ESCAPE_NON_ASCII = 512, /* The default is to indent with one tab */ - HEIM_JSON_F_INDENT2 = 256, - HEIM_JSON_F_INDENT4 = 512, - HEIM_JSON_F_INDENT8 = 1024, + HEIM_JSON_F_INDENT2 = 1024, + HEIM_JSON_F_INDENT4 = 2048, + HEIM_JSON_F_INDENT8 = 4096, } heim_json_flags_t; heim_object_t heim_json_create(const char *, size_t, heim_json_flags_t, diff --git a/lib/base/json.c b/lib/base/json.c index 605d34a6d..794aee425 100644 --- a/lib/base/json.c +++ b/lib/base/json.c @@ -37,6 +37,10 @@ #include #include +#ifndef WIN32 +#include +#endif + static heim_base_once_t heim_json_once = HEIM_BASE_ONCE_INIT; static heim_string_t heim_tid_data_uuid_key = NULL; static const char base64_chars[] = @@ -136,6 +140,54 @@ dict2json(heim_object_t key, heim_object_t value, void *ctx) } } +#ifndef WIN32 +static void +init_is_utf8(void *ptr) +{ + *(int *)ptr = strcasecmp("utf-8", nl_langinfo(CODESET)) == 0; +} +#endif + +int +heim_locale_is_utf8(void) +{ +#ifdef WIN32 + return 0; /* XXX Implement */ +#else + static int locale_is_utf8 = -1; + static heim_base_once_t once = HEIM_BASE_ONCE_INIT; + + heim_base_once_f(&once, &locale_is_utf8, init_is_utf8); + return locale_is_utf8; +#endif +} + +static void +out_escaped_bmp(struct twojson *j, const unsigned char *p, int nbytes) +{ + unsigned char e[sizeof("\\u0000")]; + unsigned codepoint; + + if (nbytes == 2) + codepoint = ((p[0] & 0x1f) << 6) | (p[1] & 0x3f); + else if (nbytes == 3) + codepoint = ((p[0] & 0x0f) << 12) | ((p[1] & 0x3f) << 6) | (p[2] & 0x3f); + else + abort(); + e[0] = '\\'; + e[1] = 'u'; + e[2] = codepoint >> 12; + e[2] += (e[2] < 10) ? '0' : ('A' - 10); + e[3] = (codepoint >> 8) & 0x0f; + e[3] += (e[3] < 10) ? '0' : ('A' - 10); + e[4] = (codepoint >> 4) & 0x0f; + e[4] += (e[4] < 10) ? '0' : ('A' - 10); + e[5] = codepoint & 0x0f; + e[5] += (e[5] < 10) ? '0' : ('A' - 10); + e[6] = '\0'; + j->out(j->ctx, (char *)e); +} + static int base2json(heim_object_t obj, struct twojson *j, int skip_indent) { @@ -265,6 +317,11 @@ base2json(heim_object_t obj, struct twojson *j, int skip_indent) } else if (!good) { return 1; } + if (j->flags & HEIM_JSON_F_ESCAPE_NON_ASCII) { + out_escaped_bmp(j, p, 2); + p += 1; + continue; + } e[0] = c; e[1] = p[1]; e[2] = '\0'; @@ -289,6 +346,11 @@ base2json(heim_object_t obj, struct twojson *j, int skip_indent) } else if (!good) { return 1; } + if (j->flags & HEIM_JSON_F_ESCAPE_NON_ASCII) { + out_escaped_bmp(j, p, 3); + p += 2; + continue; + } e[0] = c; e[1] = p[1]; e[2] = p[2]; @@ -451,6 +513,10 @@ heim_base2json(heim_object_t obj, void *ctx, heim_json_flags_t flags, j.ret = 0; j.first = 1; + if (!(flags & HEIM_JSON_F_NO_ESCAPE_NON_ASCII) && + !heim_locale_is_utf8()) + j.flags |= HEIM_JSON_F_ESCAPE_NON_ASCII; + return base2json(obj, &j, 0); } diff --git a/lib/base/string.c b/lib/base/string.c index 7682fb39d..5e79e00b1 100644 --- a/lib/base/string.c +++ b/lib/base/string.c @@ -239,7 +239,7 @@ heim_string_t __heim_string_constant(const char *_str) { static HEIMDAL_MUTEX mutex = HEIMDAL_MUTEX_INITIALIZER; - static heim_base_once_t once; + static heim_base_once_t once = HEIM_BASE_ONCE_INIT; static heim_dict_t dict = NULL; heim_string_t s, s2; diff --git a/lib/base/test_base.c b/lib/base/test_base.c index c2a27c9b2..e26de0905 100644 --- a/lib/base/test_base.c +++ b/lib/base/test_base.c @@ -53,6 +53,7 @@ #include #ifndef WIN32 #include +#include #endif #ifdef HAVE_IO_H #include @@ -290,7 +291,9 @@ test_json(void) "\xe0\xA0\x81" "\xe8\x80\x81" "\xf0\x9d\x84\x9e", heim_string_get_utf8(o)) == 0, "wrong string"); - o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + o2 = heim_json_copy_serialize(o, + HEIM_JSON_F_STRICT | + HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL); heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001Eá߿ࠁ老\\uD834\\uDD1E\"", heim_string_get_utf8(o2)) == 0, "JSON encoding changed; please check that it is till valid"); @@ -319,7 +322,9 @@ test_json(void) "\xe0\xA0\x81" "\xe8\x80\x81" "\xf0\x9d\x84\x9e", heim_string_get_utf8(o)) == 0, "wrong string"); - o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + o2 = heim_json_copy_serialize(o, + HEIM_JSON_F_STRICT | + HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL); heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001Eá߿ࠁ老\\uD834\\uDD1E\"", heim_string_get_utf8(o2)) == 0, "JSON encoding changed; please check that it is till valid"); @@ -329,6 +334,51 @@ test_json(void) heim_release(o2); heim_release(o); + /* + * Test HEIM_JSON_F_ESCAPE_NON_ASCII. + * + * Also test that we get escaped non-ASCII because we're in a not-UTF-8 + * locale, since we setlocale(LC_ALL, "C"), so we should escape non-ASCII + * by default. + */ + o = heim_json_create("\"" + "\\b\\f\\n\\r\\t" /* ASCII C-like escapes */ + "\x1e" /* ASCII control character w/o C-like escape */ + "\xc3\xa1" + "\xdf\xbf" + "\xe0\xa0\x81" + "\xE8\x80\x81" + "\\uD834\\udd1e" /* U+1D11E, as shown in RFC 7159 */ + "\"", 10, 0, NULL); + heim_assert(o != NULL, "string"); + heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid"); + heim_assert(strcmp( + "\b\f\n\r\t" + "\x1e" + "\xc3\xa1" + "\xdf\xbf" + "\xe0\xA0\x81" + "\xe8\x80\x81" + "\xf0\x9d\x84\x9e", heim_string_get_utf8(o)) == 0, "wrong string"); + o2 = heim_json_copy_serialize(o, + HEIM_JSON_F_STRICT | + HEIM_JSON_F_ESCAPE_NON_ASCII, NULL); + heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001E\\u00E1\\u07FF\\u0801\\u8001" + "\\uD834\\uDD1E\"", + heim_string_get_utf8(o2)) == 0, + "JSON encoding changed; please check that it is till valid"); + heim_release(o2); + o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001E\\u00E1\\u07FF\\u0801\\u8001" + "\\uD834\\uDD1E\"", + heim_string_get_utf8(o2)) == 0, + "JSON encoding changed; please check that it is till valid"); + o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL); + heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip"); + heim_release(o3); + heim_release(o2); + heim_release(o); + /* Test rejection of unescaped ASCII control characters */ o = heim_json_create("\"\b\\f\"", 10, HEIM_JSON_F_STRICT, NULL); heim_assert(o == NULL, "strict parse accepted bad input"); @@ -339,7 +389,9 @@ test_json(void) heim_assert(o != NULL, "string"); heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid"); heim_assert(strcmp("\b\f", heim_string_get_utf8(o)) == 0, "wrong string"); - o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + o2 = heim_json_copy_serialize(o, + HEIM_JSON_F_STRICT | + HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL); heim_assert(strcmp("\"\\b\\f\"", heim_string_get_utf8(o2)) == 0, "JSON encoding changed; please check that it is till valid"); o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL); @@ -359,7 +411,9 @@ test_json(void) heim_assert(o != NULL, "malformed string rejected (not strict)"); heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid"); heim_assert(strcmp(" ", heim_string_get_utf8(o)) == 0, "wrong string"); - o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + o2 = heim_json_copy_serialize(o, + HEIM_JSON_F_STRICT | + HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL); heim_assert(strcmp("\" \"", heim_string_get_utf8(o2)) == 0, "JSON encoding changed; please check that it is till valid"); o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL); @@ -383,7 +437,9 @@ test_json(void) heim_assert(strcmp( "\xe8\x80\x81" "\\uD834\\udd", heim_string_get_utf8(o)) == 0, "wrong string"); - o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + o2 = heim_json_copy_serialize(o, + HEIM_JSON_F_STRICT | + HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL); heim_assert(strcmp("\"老\\\\uD834\\\\udd\"", heim_string_get_utf8(o2)) == 0, "JSON encoding changed; please check that it is till valid"); @@ -408,7 +464,9 @@ test_json(void) heim_assert(strcmp( "\xe8\x80\x81" "\\uD83", heim_string_get_utf8(o)) == 0, "wrong string"); - o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + o2 = heim_json_copy_serialize(o, + HEIM_JSON_F_STRICT | + HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL); heim_assert(strcmp("\"老\\\\uD83\"", heim_string_get_utf8(o2)) == 0, "JSON encoding changed; please check that it is till valid"); @@ -428,9 +486,11 @@ test_json(void) heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid"); heim_assert(strcmp("\xe8\x80", heim_string_get_utf8(o)) == 0, "wrong string"); - o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL); + o2 = heim_json_copy_serialize(o, + HEIM_JSON_F_STRICT | + HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL); heim_assert(o2 == NULL, "malformed string serialized"); - o2 = heim_json_copy_serialize(o, 0, NULL); + o2 = heim_json_copy_serialize(o, HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL); o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL); heim_assert(o3 == NULL, "malformed string accepted (not strict)"); o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL); @@ -1225,6 +1285,11 @@ main(int argc, char **argv) { int res = 0; +#ifndef WIN32 + setlocale(LC_ALL, "C"); + heim_assert(!heim_locale_is_utf8(), "setlocale(LC_ALL, \"C\") failed?"); +#endif + res |= test_memory(); res |= test_mutex(); res |= test_rwlock(); diff --git a/lib/base/version-script.map b/lib/base/version-script.map index 70188985c..9493ee692 100644 --- a/lib/base/version-script.map +++ b/lib/base/version-script.map @@ -148,6 +148,7 @@ HEIMDAL_BASE_1.0 { heim_json_create_with_bytes; heim_json_eq; heim_load_plugins; + heim_locale_is_utf8; heim_log; heim_log_msg; _heim_make_permanent;