base: Add JSON string non-ASCII escaping options
- Add HEIM_JSON_F_ESCAPE_NON_ASCII to indicate that non-ASCII must be escaped as \uXXXX. - Add HEIM_JSON_F_NO_ESCAPE_NON_ASCII to force non-escaping of BMP codepoints. - If the locale's codeset is not UTF-8 and HEIM_JSON_F_NO_ESCAPE_NON_ASCII is not set, then set HEIM_JSON_F_ESCAPE_NON_ASCII.
This commit is contained in:
@@ -464,10 +464,12 @@ typedef enum heim_json_flags {
|
||||
HEIM_JSON_F_CNULL2JSNULL = 32,
|
||||
HEIM_JSON_F_TRY_DECODE_DATA = 64,
|
||||
HEIM_JSON_F_ONE_LINE = 128,
|
||||
HEIM_JSON_F_ESCAPE_NON_ASCII = 256,
|
||||
HEIM_JSON_F_NO_ESCAPE_NON_ASCII = 512,
|
||||
/* The default is to indent with one tab */
|
||||
HEIM_JSON_F_INDENT2 = 256,
|
||||
HEIM_JSON_F_INDENT4 = 512,
|
||||
HEIM_JSON_F_INDENT8 = 1024,
|
||||
HEIM_JSON_F_INDENT2 = 1024,
|
||||
HEIM_JSON_F_INDENT4 = 2048,
|
||||
HEIM_JSON_F_INDENT8 = 4096,
|
||||
} heim_json_flags_t;
|
||||
|
||||
heim_object_t heim_json_create(const char *, size_t, heim_json_flags_t,
|
||||
|
@@ -37,6 +37,10 @@
|
||||
#include <ctype.h>
|
||||
#include <base64.h>
|
||||
|
||||
#ifndef WIN32
|
||||
#include <langinfo.h>
|
||||
#endif
|
||||
|
||||
static heim_base_once_t heim_json_once = HEIM_BASE_ONCE_INIT;
|
||||
static heim_string_t heim_tid_data_uuid_key = NULL;
|
||||
static const char base64_chars[] =
|
||||
@@ -136,6 +140,54 @@ dict2json(heim_object_t key, heim_object_t value, void *ctx)
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef WIN32
|
||||
static void
|
||||
init_is_utf8(void *ptr)
|
||||
{
|
||||
*(int *)ptr = strcasecmp("utf-8", nl_langinfo(CODESET)) == 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
int
|
||||
heim_locale_is_utf8(void)
|
||||
{
|
||||
#ifdef WIN32
|
||||
return 0; /* XXX Implement */
|
||||
#else
|
||||
static int locale_is_utf8 = -1;
|
||||
static heim_base_once_t once = HEIM_BASE_ONCE_INIT;
|
||||
|
||||
heim_base_once_f(&once, &locale_is_utf8, init_is_utf8);
|
||||
return locale_is_utf8;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
out_escaped_bmp(struct twojson *j, const unsigned char *p, int nbytes)
|
||||
{
|
||||
unsigned char e[sizeof("\\u0000")];
|
||||
unsigned codepoint;
|
||||
|
||||
if (nbytes == 2)
|
||||
codepoint = ((p[0] & 0x1f) << 6) | (p[1] & 0x3f);
|
||||
else if (nbytes == 3)
|
||||
codepoint = ((p[0] & 0x0f) << 12) | ((p[1] & 0x3f) << 6) | (p[2] & 0x3f);
|
||||
else
|
||||
abort();
|
||||
e[0] = '\\';
|
||||
e[1] = 'u';
|
||||
e[2] = codepoint >> 12;
|
||||
e[2] += (e[2] < 10) ? '0' : ('A' - 10);
|
||||
e[3] = (codepoint >> 8) & 0x0f;
|
||||
e[3] += (e[3] < 10) ? '0' : ('A' - 10);
|
||||
e[4] = (codepoint >> 4) & 0x0f;
|
||||
e[4] += (e[4] < 10) ? '0' : ('A' - 10);
|
||||
e[5] = codepoint & 0x0f;
|
||||
e[5] += (e[5] < 10) ? '0' : ('A' - 10);
|
||||
e[6] = '\0';
|
||||
j->out(j->ctx, (char *)e);
|
||||
}
|
||||
|
||||
static int
|
||||
base2json(heim_object_t obj, struct twojson *j, int skip_indent)
|
||||
{
|
||||
@@ -265,6 +317,11 @@ base2json(heim_object_t obj, struct twojson *j, int skip_indent)
|
||||
} else if (!good) {
|
||||
return 1;
|
||||
}
|
||||
if (j->flags & HEIM_JSON_F_ESCAPE_NON_ASCII) {
|
||||
out_escaped_bmp(j, p, 2);
|
||||
p += 1;
|
||||
continue;
|
||||
}
|
||||
e[0] = c;
|
||||
e[1] = p[1];
|
||||
e[2] = '\0';
|
||||
@@ -289,6 +346,11 @@ base2json(heim_object_t obj, struct twojson *j, int skip_indent)
|
||||
} else if (!good) {
|
||||
return 1;
|
||||
}
|
||||
if (j->flags & HEIM_JSON_F_ESCAPE_NON_ASCII) {
|
||||
out_escaped_bmp(j, p, 3);
|
||||
p += 2;
|
||||
continue;
|
||||
}
|
||||
e[0] = c;
|
||||
e[1] = p[1];
|
||||
e[2] = p[2];
|
||||
@@ -451,6 +513,10 @@ heim_base2json(heim_object_t obj, void *ctx, heim_json_flags_t flags,
|
||||
j.ret = 0;
|
||||
j.first = 1;
|
||||
|
||||
if (!(flags & HEIM_JSON_F_NO_ESCAPE_NON_ASCII) &&
|
||||
!heim_locale_is_utf8())
|
||||
j.flags |= HEIM_JSON_F_ESCAPE_NON_ASCII;
|
||||
|
||||
return base2json(obj, &j, 0);
|
||||
}
|
||||
|
||||
|
@@ -239,7 +239,7 @@ heim_string_t
|
||||
__heim_string_constant(const char *_str)
|
||||
{
|
||||
static HEIMDAL_MUTEX mutex = HEIMDAL_MUTEX_INITIALIZER;
|
||||
static heim_base_once_t once;
|
||||
static heim_base_once_t once = HEIM_BASE_ONCE_INIT;
|
||||
static heim_dict_t dict = NULL;
|
||||
heim_string_t s, s2;
|
||||
|
||||
|
@@ -53,6 +53,7 @@
|
||||
#include <sys/stat.h>
|
||||
#ifndef WIN32
|
||||
#include <sys/file.h>
|
||||
#include <locale.h>
|
||||
#endif
|
||||
#ifdef HAVE_IO_H
|
||||
#include <io.h>
|
||||
@@ -290,7 +291,9 @@ test_json(void)
|
||||
"\xe0\xA0\x81"
|
||||
"\xe8\x80\x81"
|
||||
"\xf0\x9d\x84\x9e", heim_string_get_utf8(o)) == 0, "wrong string");
|
||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
||||
o2 = heim_json_copy_serialize(o,
|
||||
HEIM_JSON_F_STRICT |
|
||||
HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||
heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001Eá߿ࠁ老\\uD834\\uDD1E\"",
|
||||
heim_string_get_utf8(o2)) == 0,
|
||||
"JSON encoding changed; please check that it is till valid");
|
||||
@@ -319,7 +322,9 @@ test_json(void)
|
||||
"\xe0\xA0\x81"
|
||||
"\xe8\x80\x81"
|
||||
"\xf0\x9d\x84\x9e", heim_string_get_utf8(o)) == 0, "wrong string");
|
||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
||||
o2 = heim_json_copy_serialize(o,
|
||||
HEIM_JSON_F_STRICT |
|
||||
HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||
heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001Eá߿ࠁ老\\uD834\\uDD1E\"",
|
||||
heim_string_get_utf8(o2)) == 0,
|
||||
"JSON encoding changed; please check that it is till valid");
|
||||
@@ -329,6 +334,51 @@ test_json(void)
|
||||
heim_release(o2);
|
||||
heim_release(o);
|
||||
|
||||
/*
|
||||
* Test HEIM_JSON_F_ESCAPE_NON_ASCII.
|
||||
*
|
||||
* Also test that we get escaped non-ASCII because we're in a not-UTF-8
|
||||
* locale, since we setlocale(LC_ALL, "C"), so we should escape non-ASCII
|
||||
* by default.
|
||||
*/
|
||||
o = heim_json_create("\""
|
||||
"\\b\\f\\n\\r\\t" /* ASCII C-like escapes */
|
||||
"\x1e" /* ASCII control character w/o C-like escape */
|
||||
"\xc3\xa1"
|
||||
"\xdf\xbf"
|
||||
"\xe0\xa0\x81"
|
||||
"\xE8\x80\x81"
|
||||
"\\uD834\\udd1e" /* U+1D11E, as shown in RFC 7159 */
|
||||
"\"", 10, 0, NULL);
|
||||
heim_assert(o != NULL, "string");
|
||||
heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid");
|
||||
heim_assert(strcmp(
|
||||
"\b\f\n\r\t"
|
||||
"\x1e"
|
||||
"\xc3\xa1"
|
||||
"\xdf\xbf"
|
||||
"\xe0\xA0\x81"
|
||||
"\xe8\x80\x81"
|
||||
"\xf0\x9d\x84\x9e", heim_string_get_utf8(o)) == 0, "wrong string");
|
||||
o2 = heim_json_copy_serialize(o,
|
||||
HEIM_JSON_F_STRICT |
|
||||
HEIM_JSON_F_ESCAPE_NON_ASCII, NULL);
|
||||
heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001E\\u00E1\\u07FF\\u0801\\u8001"
|
||||
"\\uD834\\uDD1E\"",
|
||||
heim_string_get_utf8(o2)) == 0,
|
||||
"JSON encoding changed; please check that it is till valid");
|
||||
heim_release(o2);
|
||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
||||
heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001E\\u00E1\\u07FF\\u0801\\u8001"
|
||||
"\\uD834\\uDD1E\"",
|
||||
heim_string_get_utf8(o2)) == 0,
|
||||
"JSON encoding changed; please check that it is till valid");
|
||||
o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL);
|
||||
heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip");
|
||||
heim_release(o3);
|
||||
heim_release(o2);
|
||||
heim_release(o);
|
||||
|
||||
/* Test rejection of unescaped ASCII control characters */
|
||||
o = heim_json_create("\"\b\\f\"", 10, HEIM_JSON_F_STRICT, NULL);
|
||||
heim_assert(o == NULL, "strict parse accepted bad input");
|
||||
@@ -339,7 +389,9 @@ test_json(void)
|
||||
heim_assert(o != NULL, "string");
|
||||
heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid");
|
||||
heim_assert(strcmp("\b\f", heim_string_get_utf8(o)) == 0, "wrong string");
|
||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
||||
o2 = heim_json_copy_serialize(o,
|
||||
HEIM_JSON_F_STRICT |
|
||||
HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||
heim_assert(strcmp("\"\\b\\f\"", heim_string_get_utf8(o2)) == 0,
|
||||
"JSON encoding changed; please check that it is till valid");
|
||||
o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL);
|
||||
@@ -359,7 +411,9 @@ test_json(void)
|
||||
heim_assert(o != NULL, "malformed string rejected (not strict)");
|
||||
heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid");
|
||||
heim_assert(strcmp(" ", heim_string_get_utf8(o)) == 0, "wrong string");
|
||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
||||
o2 = heim_json_copy_serialize(o,
|
||||
HEIM_JSON_F_STRICT |
|
||||
HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||
heim_assert(strcmp("\" \"", heim_string_get_utf8(o2)) == 0,
|
||||
"JSON encoding changed; please check that it is till valid");
|
||||
o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL);
|
||||
@@ -383,7 +437,9 @@ test_json(void)
|
||||
heim_assert(strcmp(
|
||||
"\xe8\x80\x81"
|
||||
"\\uD834\\udd", heim_string_get_utf8(o)) == 0, "wrong string");
|
||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
||||
o2 = heim_json_copy_serialize(o,
|
||||
HEIM_JSON_F_STRICT |
|
||||
HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||
heim_assert(strcmp("\"老\\\\uD834\\\\udd\"",
|
||||
heim_string_get_utf8(o2)) == 0,
|
||||
"JSON encoding changed; please check that it is till valid");
|
||||
@@ -408,7 +464,9 @@ test_json(void)
|
||||
heim_assert(strcmp(
|
||||
"\xe8\x80\x81"
|
||||
"\\uD83", heim_string_get_utf8(o)) == 0, "wrong string");
|
||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
||||
o2 = heim_json_copy_serialize(o,
|
||||
HEIM_JSON_F_STRICT |
|
||||
HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||
heim_assert(strcmp("\"老\\\\uD83\"",
|
||||
heim_string_get_utf8(o2)) == 0,
|
||||
"JSON encoding changed; please check that it is till valid");
|
||||
@@ -428,9 +486,11 @@ test_json(void)
|
||||
heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid");
|
||||
heim_assert(strcmp("\xe8\x80",
|
||||
heim_string_get_utf8(o)) == 0, "wrong string");
|
||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
||||
o2 = heim_json_copy_serialize(o,
|
||||
HEIM_JSON_F_STRICT |
|
||||
HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||
heim_assert(o2 == NULL, "malformed string serialized");
|
||||
o2 = heim_json_copy_serialize(o, 0, NULL);
|
||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||
o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL);
|
||||
heim_assert(o3 == NULL, "malformed string accepted (not strict)");
|
||||
o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL);
|
||||
@@ -1225,6 +1285,11 @@ main(int argc, char **argv)
|
||||
{
|
||||
int res = 0;
|
||||
|
||||
#ifndef WIN32
|
||||
setlocale(LC_ALL, "C");
|
||||
heim_assert(!heim_locale_is_utf8(), "setlocale(LC_ALL, \"C\") failed?");
|
||||
#endif
|
||||
|
||||
res |= test_memory();
|
||||
res |= test_mutex();
|
||||
res |= test_rwlock();
|
||||
|
@@ -148,6 +148,7 @@ HEIMDAL_BASE_1.0 {
|
||||
heim_json_create_with_bytes;
|
||||
heim_json_eq;
|
||||
heim_load_plugins;
|
||||
heim_locale_is_utf8;
|
||||
heim_log;
|
||||
heim_log_msg;
|
||||
_heim_make_permanent;
|
||||
|
Reference in New Issue
Block a user