base: Add JSON string non-ASCII escaping options
- Add HEIM_JSON_F_ESCAPE_NON_ASCII to indicate that non-ASCII must be escaped as \uXXXX. - Add HEIM_JSON_F_NO_ESCAPE_NON_ASCII to force non-escaping of BMP codepoints. - If the locale's codeset is not UTF-8 and HEIM_JSON_F_NO_ESCAPE_NON_ASCII is not set, then set HEIM_JSON_F_ESCAPE_NON_ASCII.
This commit is contained in:
@@ -464,10 +464,12 @@ typedef enum heim_json_flags {
|
|||||||
HEIM_JSON_F_CNULL2JSNULL = 32,
|
HEIM_JSON_F_CNULL2JSNULL = 32,
|
||||||
HEIM_JSON_F_TRY_DECODE_DATA = 64,
|
HEIM_JSON_F_TRY_DECODE_DATA = 64,
|
||||||
HEIM_JSON_F_ONE_LINE = 128,
|
HEIM_JSON_F_ONE_LINE = 128,
|
||||||
|
HEIM_JSON_F_ESCAPE_NON_ASCII = 256,
|
||||||
|
HEIM_JSON_F_NO_ESCAPE_NON_ASCII = 512,
|
||||||
/* The default is to indent with one tab */
|
/* The default is to indent with one tab */
|
||||||
HEIM_JSON_F_INDENT2 = 256,
|
HEIM_JSON_F_INDENT2 = 1024,
|
||||||
HEIM_JSON_F_INDENT4 = 512,
|
HEIM_JSON_F_INDENT4 = 2048,
|
||||||
HEIM_JSON_F_INDENT8 = 1024,
|
HEIM_JSON_F_INDENT8 = 4096,
|
||||||
} heim_json_flags_t;
|
} heim_json_flags_t;
|
||||||
|
|
||||||
heim_object_t heim_json_create(const char *, size_t, heim_json_flags_t,
|
heim_object_t heim_json_create(const char *, size_t, heim_json_flags_t,
|
||||||
|
@@ -37,6 +37,10 @@
|
|||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <base64.h>
|
#include <base64.h>
|
||||||
|
|
||||||
|
#ifndef WIN32
|
||||||
|
#include <langinfo.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
static heim_base_once_t heim_json_once = HEIM_BASE_ONCE_INIT;
|
static heim_base_once_t heim_json_once = HEIM_BASE_ONCE_INIT;
|
||||||
static heim_string_t heim_tid_data_uuid_key = NULL;
|
static heim_string_t heim_tid_data_uuid_key = NULL;
|
||||||
static const char base64_chars[] =
|
static const char base64_chars[] =
|
||||||
@@ -136,6 +140,54 @@ dict2json(heim_object_t key, heim_object_t value, void *ctx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef WIN32
|
||||||
|
static void
|
||||||
|
init_is_utf8(void *ptr)
|
||||||
|
{
|
||||||
|
*(int *)ptr = strcasecmp("utf-8", nl_langinfo(CODESET)) == 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int
|
||||||
|
heim_locale_is_utf8(void)
|
||||||
|
{
|
||||||
|
#ifdef WIN32
|
||||||
|
return 0; /* XXX Implement */
|
||||||
|
#else
|
||||||
|
static int locale_is_utf8 = -1;
|
||||||
|
static heim_base_once_t once = HEIM_BASE_ONCE_INIT;
|
||||||
|
|
||||||
|
heim_base_once_f(&once, &locale_is_utf8, init_is_utf8);
|
||||||
|
return locale_is_utf8;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
out_escaped_bmp(struct twojson *j, const unsigned char *p, int nbytes)
|
||||||
|
{
|
||||||
|
unsigned char e[sizeof("\\u0000")];
|
||||||
|
unsigned codepoint;
|
||||||
|
|
||||||
|
if (nbytes == 2)
|
||||||
|
codepoint = ((p[0] & 0x1f) << 6) | (p[1] & 0x3f);
|
||||||
|
else if (nbytes == 3)
|
||||||
|
codepoint = ((p[0] & 0x0f) << 12) | ((p[1] & 0x3f) << 6) | (p[2] & 0x3f);
|
||||||
|
else
|
||||||
|
abort();
|
||||||
|
e[0] = '\\';
|
||||||
|
e[1] = 'u';
|
||||||
|
e[2] = codepoint >> 12;
|
||||||
|
e[2] += (e[2] < 10) ? '0' : ('A' - 10);
|
||||||
|
e[3] = (codepoint >> 8) & 0x0f;
|
||||||
|
e[3] += (e[3] < 10) ? '0' : ('A' - 10);
|
||||||
|
e[4] = (codepoint >> 4) & 0x0f;
|
||||||
|
e[4] += (e[4] < 10) ? '0' : ('A' - 10);
|
||||||
|
e[5] = codepoint & 0x0f;
|
||||||
|
e[5] += (e[5] < 10) ? '0' : ('A' - 10);
|
||||||
|
e[6] = '\0';
|
||||||
|
j->out(j->ctx, (char *)e);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
base2json(heim_object_t obj, struct twojson *j, int skip_indent)
|
base2json(heim_object_t obj, struct twojson *j, int skip_indent)
|
||||||
{
|
{
|
||||||
@@ -265,6 +317,11 @@ base2json(heim_object_t obj, struct twojson *j, int skip_indent)
|
|||||||
} else if (!good) {
|
} else if (!good) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
if (j->flags & HEIM_JSON_F_ESCAPE_NON_ASCII) {
|
||||||
|
out_escaped_bmp(j, p, 2);
|
||||||
|
p += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
e[0] = c;
|
e[0] = c;
|
||||||
e[1] = p[1];
|
e[1] = p[1];
|
||||||
e[2] = '\0';
|
e[2] = '\0';
|
||||||
@@ -289,6 +346,11 @@ base2json(heim_object_t obj, struct twojson *j, int skip_indent)
|
|||||||
} else if (!good) {
|
} else if (!good) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
if (j->flags & HEIM_JSON_F_ESCAPE_NON_ASCII) {
|
||||||
|
out_escaped_bmp(j, p, 3);
|
||||||
|
p += 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
e[0] = c;
|
e[0] = c;
|
||||||
e[1] = p[1];
|
e[1] = p[1];
|
||||||
e[2] = p[2];
|
e[2] = p[2];
|
||||||
@@ -451,6 +513,10 @@ heim_base2json(heim_object_t obj, void *ctx, heim_json_flags_t flags,
|
|||||||
j.ret = 0;
|
j.ret = 0;
|
||||||
j.first = 1;
|
j.first = 1;
|
||||||
|
|
||||||
|
if (!(flags & HEIM_JSON_F_NO_ESCAPE_NON_ASCII) &&
|
||||||
|
!heim_locale_is_utf8())
|
||||||
|
j.flags |= HEIM_JSON_F_ESCAPE_NON_ASCII;
|
||||||
|
|
||||||
return base2json(obj, &j, 0);
|
return base2json(obj, &j, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -239,7 +239,7 @@ heim_string_t
|
|||||||
__heim_string_constant(const char *_str)
|
__heim_string_constant(const char *_str)
|
||||||
{
|
{
|
||||||
static HEIMDAL_MUTEX mutex = HEIMDAL_MUTEX_INITIALIZER;
|
static HEIMDAL_MUTEX mutex = HEIMDAL_MUTEX_INITIALIZER;
|
||||||
static heim_base_once_t once;
|
static heim_base_once_t once = HEIM_BASE_ONCE_INIT;
|
||||||
static heim_dict_t dict = NULL;
|
static heim_dict_t dict = NULL;
|
||||||
heim_string_t s, s2;
|
heim_string_t s, s2;
|
||||||
|
|
||||||
|
@@ -53,6 +53,7 @@
|
|||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#ifndef WIN32
|
#ifndef WIN32
|
||||||
#include <sys/file.h>
|
#include <sys/file.h>
|
||||||
|
#include <locale.h>
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAVE_IO_H
|
#ifdef HAVE_IO_H
|
||||||
#include <io.h>
|
#include <io.h>
|
||||||
@@ -290,7 +291,9 @@ test_json(void)
|
|||||||
"\xe0\xA0\x81"
|
"\xe0\xA0\x81"
|
||||||
"\xe8\x80\x81"
|
"\xe8\x80\x81"
|
||||||
"\xf0\x9d\x84\x9e", heim_string_get_utf8(o)) == 0, "wrong string");
|
"\xf0\x9d\x84\x9e", heim_string_get_utf8(o)) == 0, "wrong string");
|
||||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
o2 = heim_json_copy_serialize(o,
|
||||||
|
HEIM_JSON_F_STRICT |
|
||||||
|
HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||||
heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001Eá߿ࠁ老\\uD834\\uDD1E\"",
|
heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001Eá߿ࠁ老\\uD834\\uDD1E\"",
|
||||||
heim_string_get_utf8(o2)) == 0,
|
heim_string_get_utf8(o2)) == 0,
|
||||||
"JSON encoding changed; please check that it is till valid");
|
"JSON encoding changed; please check that it is till valid");
|
||||||
@@ -319,7 +322,9 @@ test_json(void)
|
|||||||
"\xe0\xA0\x81"
|
"\xe0\xA0\x81"
|
||||||
"\xe8\x80\x81"
|
"\xe8\x80\x81"
|
||||||
"\xf0\x9d\x84\x9e", heim_string_get_utf8(o)) == 0, "wrong string");
|
"\xf0\x9d\x84\x9e", heim_string_get_utf8(o)) == 0, "wrong string");
|
||||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
o2 = heim_json_copy_serialize(o,
|
||||||
|
HEIM_JSON_F_STRICT |
|
||||||
|
HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||||
heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001Eá߿ࠁ老\\uD834\\uDD1E\"",
|
heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001Eá߿ࠁ老\\uD834\\uDD1E\"",
|
||||||
heim_string_get_utf8(o2)) == 0,
|
heim_string_get_utf8(o2)) == 0,
|
||||||
"JSON encoding changed; please check that it is till valid");
|
"JSON encoding changed; please check that it is till valid");
|
||||||
@@ -329,6 +334,51 @@ test_json(void)
|
|||||||
heim_release(o2);
|
heim_release(o2);
|
||||||
heim_release(o);
|
heim_release(o);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test HEIM_JSON_F_ESCAPE_NON_ASCII.
|
||||||
|
*
|
||||||
|
* Also test that we get escaped non-ASCII because we're in a not-UTF-8
|
||||||
|
* locale, since we setlocale(LC_ALL, "C"), so we should escape non-ASCII
|
||||||
|
* by default.
|
||||||
|
*/
|
||||||
|
o = heim_json_create("\""
|
||||||
|
"\\b\\f\\n\\r\\t" /* ASCII C-like escapes */
|
||||||
|
"\x1e" /* ASCII control character w/o C-like escape */
|
||||||
|
"\xc3\xa1"
|
||||||
|
"\xdf\xbf"
|
||||||
|
"\xe0\xa0\x81"
|
||||||
|
"\xE8\x80\x81"
|
||||||
|
"\\uD834\\udd1e" /* U+1D11E, as shown in RFC 7159 */
|
||||||
|
"\"", 10, 0, NULL);
|
||||||
|
heim_assert(o != NULL, "string");
|
||||||
|
heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid");
|
||||||
|
heim_assert(strcmp(
|
||||||
|
"\b\f\n\r\t"
|
||||||
|
"\x1e"
|
||||||
|
"\xc3\xa1"
|
||||||
|
"\xdf\xbf"
|
||||||
|
"\xe0\xA0\x81"
|
||||||
|
"\xe8\x80\x81"
|
||||||
|
"\xf0\x9d\x84\x9e", heim_string_get_utf8(o)) == 0, "wrong string");
|
||||||
|
o2 = heim_json_copy_serialize(o,
|
||||||
|
HEIM_JSON_F_STRICT |
|
||||||
|
HEIM_JSON_F_ESCAPE_NON_ASCII, NULL);
|
||||||
|
heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001E\\u00E1\\u07FF\\u0801\\u8001"
|
||||||
|
"\\uD834\\uDD1E\"",
|
||||||
|
heim_string_get_utf8(o2)) == 0,
|
||||||
|
"JSON encoding changed; please check that it is till valid");
|
||||||
|
heim_release(o2);
|
||||||
|
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
||||||
|
heim_assert(strcmp("\"\\b\\f\\n\\r\\t\\u001E\\u00E1\\u07FF\\u0801\\u8001"
|
||||||
|
"\\uD834\\uDD1E\"",
|
||||||
|
heim_string_get_utf8(o2)) == 0,
|
||||||
|
"JSON encoding changed; please check that it is till valid");
|
||||||
|
o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL);
|
||||||
|
heim_assert(heim_json_eq(o, o3), "JSON text did not round-trip");
|
||||||
|
heim_release(o3);
|
||||||
|
heim_release(o2);
|
||||||
|
heim_release(o);
|
||||||
|
|
||||||
/* Test rejection of unescaped ASCII control characters */
|
/* Test rejection of unescaped ASCII control characters */
|
||||||
o = heim_json_create("\"\b\\f\"", 10, HEIM_JSON_F_STRICT, NULL);
|
o = heim_json_create("\"\b\\f\"", 10, HEIM_JSON_F_STRICT, NULL);
|
||||||
heim_assert(o == NULL, "strict parse accepted bad input");
|
heim_assert(o == NULL, "strict parse accepted bad input");
|
||||||
@@ -339,7 +389,9 @@ test_json(void)
|
|||||||
heim_assert(o != NULL, "string");
|
heim_assert(o != NULL, "string");
|
||||||
heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid");
|
heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid");
|
||||||
heim_assert(strcmp("\b\f", heim_string_get_utf8(o)) == 0, "wrong string");
|
heim_assert(strcmp("\b\f", heim_string_get_utf8(o)) == 0, "wrong string");
|
||||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
o2 = heim_json_copy_serialize(o,
|
||||||
|
HEIM_JSON_F_STRICT |
|
||||||
|
HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||||
heim_assert(strcmp("\"\\b\\f\"", heim_string_get_utf8(o2)) == 0,
|
heim_assert(strcmp("\"\\b\\f\"", heim_string_get_utf8(o2)) == 0,
|
||||||
"JSON encoding changed; please check that it is till valid");
|
"JSON encoding changed; please check that it is till valid");
|
||||||
o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL);
|
o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL);
|
||||||
@@ -359,7 +411,9 @@ test_json(void)
|
|||||||
heim_assert(o != NULL, "malformed string rejected (not strict)");
|
heim_assert(o != NULL, "malformed string rejected (not strict)");
|
||||||
heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid");
|
heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid");
|
||||||
heim_assert(strcmp(" ", heim_string_get_utf8(o)) == 0, "wrong string");
|
heim_assert(strcmp(" ", heim_string_get_utf8(o)) == 0, "wrong string");
|
||||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
o2 = heim_json_copy_serialize(o,
|
||||||
|
HEIM_JSON_F_STRICT |
|
||||||
|
HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||||
heim_assert(strcmp("\" \"", heim_string_get_utf8(o2)) == 0,
|
heim_assert(strcmp("\" \"", heim_string_get_utf8(o2)) == 0,
|
||||||
"JSON encoding changed; please check that it is till valid");
|
"JSON encoding changed; please check that it is till valid");
|
||||||
o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL);
|
o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL);
|
||||||
@@ -383,7 +437,9 @@ test_json(void)
|
|||||||
heim_assert(strcmp(
|
heim_assert(strcmp(
|
||||||
"\xe8\x80\x81"
|
"\xe8\x80\x81"
|
||||||
"\\uD834\\udd", heim_string_get_utf8(o)) == 0, "wrong string");
|
"\\uD834\\udd", heim_string_get_utf8(o)) == 0, "wrong string");
|
||||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
o2 = heim_json_copy_serialize(o,
|
||||||
|
HEIM_JSON_F_STRICT |
|
||||||
|
HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||||
heim_assert(strcmp("\"老\\\\uD834\\\\udd\"",
|
heim_assert(strcmp("\"老\\\\uD834\\\\udd\"",
|
||||||
heim_string_get_utf8(o2)) == 0,
|
heim_string_get_utf8(o2)) == 0,
|
||||||
"JSON encoding changed; please check that it is till valid");
|
"JSON encoding changed; please check that it is till valid");
|
||||||
@@ -408,7 +464,9 @@ test_json(void)
|
|||||||
heim_assert(strcmp(
|
heim_assert(strcmp(
|
||||||
"\xe8\x80\x81"
|
"\xe8\x80\x81"
|
||||||
"\\uD83", heim_string_get_utf8(o)) == 0, "wrong string");
|
"\\uD83", heim_string_get_utf8(o)) == 0, "wrong string");
|
||||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
o2 = heim_json_copy_serialize(o,
|
||||||
|
HEIM_JSON_F_STRICT |
|
||||||
|
HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||||
heim_assert(strcmp("\"老\\\\uD83\"",
|
heim_assert(strcmp("\"老\\\\uD83\"",
|
||||||
heim_string_get_utf8(o2)) == 0,
|
heim_string_get_utf8(o2)) == 0,
|
||||||
"JSON encoding changed; please check that it is till valid");
|
"JSON encoding changed; please check that it is till valid");
|
||||||
@@ -428,9 +486,11 @@ test_json(void)
|
|||||||
heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid");
|
heim_assert(heim_get_tid(o) == heim_string_get_type_id(), "string-tid");
|
||||||
heim_assert(strcmp("\xe8\x80",
|
heim_assert(strcmp("\xe8\x80",
|
||||||
heim_string_get_utf8(o)) == 0, "wrong string");
|
heim_string_get_utf8(o)) == 0, "wrong string");
|
||||||
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_STRICT, NULL);
|
o2 = heim_json_copy_serialize(o,
|
||||||
|
HEIM_JSON_F_STRICT |
|
||||||
|
HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||||
heim_assert(o2 == NULL, "malformed string serialized");
|
heim_assert(o2 == NULL, "malformed string serialized");
|
||||||
o2 = heim_json_copy_serialize(o, 0, NULL);
|
o2 = heim_json_copy_serialize(o, HEIM_JSON_F_NO_ESCAPE_NON_ASCII, NULL);
|
||||||
o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL);
|
o3 = heim_json_create(heim_string_get_utf8(o2), 10, HEIM_JSON_F_STRICT, NULL);
|
||||||
heim_assert(o3 == NULL, "malformed string accepted (not strict)");
|
heim_assert(o3 == NULL, "malformed string accepted (not strict)");
|
||||||
o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL);
|
o3 = heim_json_create(heim_string_get_utf8(o2), 10, 0, NULL);
|
||||||
@@ -1225,6 +1285,11 @@ main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
int res = 0;
|
int res = 0;
|
||||||
|
|
||||||
|
#ifndef WIN32
|
||||||
|
setlocale(LC_ALL, "C");
|
||||||
|
heim_assert(!heim_locale_is_utf8(), "setlocale(LC_ALL, \"C\") failed?");
|
||||||
|
#endif
|
||||||
|
|
||||||
res |= test_memory();
|
res |= test_memory();
|
||||||
res |= test_mutex();
|
res |= test_mutex();
|
||||||
res |= test_rwlock();
|
res |= test_rwlock();
|
||||||
|
@@ -148,6 +148,7 @@ HEIMDAL_BASE_1.0 {
|
|||||||
heim_json_create_with_bytes;
|
heim_json_create_with_bytes;
|
||||||
heim_json_eq;
|
heim_json_eq;
|
||||||
heim_load_plugins;
|
heim_load_plugins;
|
||||||
|
heim_locale_is_utf8;
|
||||||
heim_log;
|
heim_log;
|
||||||
heim_log_msg;
|
heim_log_msg;
|
||||||
_heim_make_permanent;
|
_heim_make_permanent;
|
||||||
|
Reference in New Issue
Block a user