Files
Joseph Sutton 551f0c32fa base: Fix code spelling
Signed-off-by: Joseph Sutton <josephsutton@catalyst.net.nz>
2023-11-28 21:34:35 -05:00

1472 lines
38 KiB
C

/*
* Copyright (c) 2010 Kungliga Tekniska Högskolan
* (Royal Institute of Technology, Stockholm, Sweden).
* All rights reserved.
*
* Portions Copyright (c) 2010 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of the Institute nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "baselocl.h"
#include <ctype.h>
#include <base64.h>
#ifndef WIN32
#include <langinfo.h>
#endif
static heim_base_once_t heim_json_once = HEIM_BASE_ONCE_INIT;
static heim_string_t heim_tid_data_uuid_key = NULL;
static void
json_init_once(void *arg)
{
heim_tid_data_uuid_key = __heim_string_constant("heimdal-type-data-76d7fca2-d0da-4b20-a126-1a10f8a0eae6");
}
struct twojson {
void *ctx;
void (*out)(void *, const char *);
size_t indent;
heim_json_flags_t flags;
int ret;
int first;
};
struct heim_strbuf {
char *str;
size_t len;
size_t alloced;
int enomem;
heim_json_flags_t flags;
};
static int
base2json(heim_object_t, struct twojson *, int);
static void
indent(struct twojson *j)
{
size_t i = j->indent;
if (j->flags & HEIM_JSON_F_ONE_LINE)
return;
if (j->flags & HEIM_JSON_F_INDENT2)
while (i--)
j->out(j->ctx, " ");
else if (j->flags & HEIM_JSON_F_INDENT4)
while (i--)
j->out(j->ctx, " ");
else if (j->flags & HEIM_JSON_F_INDENT8)
while (i--)
j->out(j->ctx, " ");
else
while (i--)
j->out(j->ctx, "\t");
}
static void
array2json(heim_object_t value, void *ctx, int *stop)
{
struct twojson *j = ctx;
if (j->ret)
return;
if (j->first) {
j->first = 0;
} else {
j->out(j->ctx, NULL); /* eat previous '\n' if possible */
j->out(j->ctx, ",\n");
}
j->ret = base2json(value, j, 0);
}
static void
dict2json(heim_object_t key, heim_object_t value, void *ctx)
{
struct twojson *j = ctx;
if (j->ret)
return;
if (j->first) {
j->first = 0;
} else {
j->out(j->ctx, NULL); /* eat previous '\n' if possible */
j->out(j->ctx, ",\n");
}
j->ret = base2json(key, j, 0);
if (j->ret)
return;
switch (heim_get_tid(value)) {
case HEIM_TID_ARRAY:
case HEIM_TID_DICT:
case HEIM_TID_DATA:
j->out(j->ctx, ":\n");
j->indent++;
j->ret = base2json(value, j, 0);
if (j->ret)
return;
j->indent--;
break;
default:
j->out(j->ctx, ": ");
j->ret = base2json(value, j, 1);
break;
}
}
#ifndef WIN32
static void
init_is_utf8(void *ptr)
{
*(int *)ptr = strcasecmp("utf-8", nl_langinfo(CODESET)) == 0;
}
#endif
int
heim_locale_is_utf8(void)
{
#ifdef WIN32
return 0; /* XXX Implement */
#else
static int locale_is_utf8 = -1;
static heim_base_once_t once = HEIM_BASE_ONCE_INIT;
heim_base_once_f(&once, &locale_is_utf8, init_is_utf8);
return locale_is_utf8;
#endif
}
static void
out_escaped_bmp(struct twojson *j, const unsigned char *p, int nbytes)
{
unsigned char e[sizeof("\\u0000")];
unsigned codepoint;
if (nbytes == 2)
codepoint = ((p[0] & 0x1f) << 6) | (p[1] & 0x3f);
else if (nbytes == 3)
codepoint = ((p[0] & 0x0f) << 12) | ((p[1] & 0x3f) << 6) | (p[2] & 0x3f);
else
abort();
e[0] = '\\';
e[1] = 'u';
e[2] = codepoint >> 12;
e[2] += (e[2] < 10) ? '0' : ('A' - 10);
e[3] = (codepoint >> 8) & 0x0f;
e[3] += (e[3] < 10) ? '0' : ('A' - 10);
e[4] = (codepoint >> 4) & 0x0f;
e[4] += (e[4] < 10) ? '0' : ('A' - 10);
e[5] = codepoint & 0x0f;
e[5] += (e[5] < 10) ? '0' : ('A' - 10);
e[6] = '\0';
j->out(j->ctx, (char *)e);
}
static int
base2json(heim_object_t obj, struct twojson *j, int skip_indent)
{
heim_tid_t type;
int first = 0;
if (obj == NULL) {
if (j->flags & HEIM_JSON_F_CNULL2JSNULL) {
obj = heim_null_create();
} else if (j->flags & HEIM_JSON_F_NO_C_NULL) {
return EINVAL;
} else {
indent(j);
j->out(j->ctx, "<NULL>\n"); /* This is NOT valid JSON! */
return 0;
}
}
type = heim_get_tid(obj);
switch (type) {
case HEIM_TID_ARRAY:
indent(j);
j->out(j->ctx, "[\n");
j->indent++;
first = j->first;
j->first = 1;
heim_array_iterate_f(obj, j, array2json);
j->indent--;
if (!j->first)
j->out(j->ctx, "\n");
indent(j);
j->out(j->ctx, "]\n");
j->first = first;
break;
case HEIM_TID_DICT:
indent(j);
j->out(j->ctx, "{\n");
j->indent++;
first = j->first;
j->first = 1;
heim_dict_iterate_f(obj, j, dict2json);
j->indent--;
if (!j->first)
j->out(j->ctx, "\n");
indent(j);
j->out(j->ctx, "}\n");
j->first = first;
break;
case HEIM_TID_STRING: {
const unsigned char *s = (const unsigned char *)heim_string_get_utf8(obj);
const unsigned char *p;
unsigned int c, cp, ctop, cbot;
char e[sizeof("\\u0123\\u3210")];
int good;
size_t i;
if (!skip_indent)
indent(j);
j->out(j->ctx, "\"");
for (p = s; (c = *p); p++) {
switch (c) {
/* ASCII control characters w/ C-like escapes */
case '\b': j->out(j->ctx, "\\b"); continue;
case '\f': j->out(j->ctx, "\\f"); continue;
case '\n': j->out(j->ctx, "\\n"); continue;
case '\r': j->out(j->ctx, "\\r"); continue;
case '\t': j->out(j->ctx, "\\t"); continue;
/* Other must-escape non-control ASCII characters */
case '"': j->out(j->ctx, "\\\""); continue;
case '\\': j->out(j->ctx, "\\\\"); continue;
default: break;
}
/*
* JSON string encoding is... complex.
*
* Invalid UTF-8 w/ HEIM_JSON_F_STRICT_STRINGS set -> return 1
*
* Invalid UTF-8 w/o HEIM_JSON_F_STRICT_STRINGS set -> pass
* through, a sort of Heimdal WTF-8, but not _the_ WTF-8.
*/
if (c < 0x20) {
/* ASCII control character w/o C-like escape */
e[0] = '\\';
e[1] = 'u';
e[2] = '0';
e[3] = '0';
e[4] = "0123456789ABCDEF"[c>>4];
e[5] = "0123456789ABCDEF"[c & 0x0f];
e[6] = '\0';
j->out(j->ctx, e);
continue;
}
if (c < 0x80) {
/* ASCII */
e[0] = c;
e[1] = '\0';
j->out(j->ctx, e);
continue;
}
if ((c & 0xc0) == 0x80) {
/* UTF-8 bare non-leading byte */
if (!(j->flags & HEIM_JSON_F_STRICT_STRINGS)) {
e[0] = c;
e[1] = '\0';
j->out(j->ctx, e);
continue;
}
return 1;
}
if ((c & 0xe0) == 0xc0) {
/* UTF-8 leading byte of two-byte sequence */
good = 1;
for (i = 1; i < 2 && good && p[i]; i++) {
if ((p[i] & 0xc0) != 0x80)
good = 0;
}
if (i != 2)
good = 0;
if (!good && !(j->flags & HEIM_JSON_F_STRICT_STRINGS)) {
e[0] = c;
e[1] = '\0';
j->out(j->ctx, e);
continue;
} else if (!good) {
return 1;
}
if (j->flags & HEIM_JSON_F_ESCAPE_NON_ASCII) {
out_escaped_bmp(j, p, 2);
p += 1;
continue;
}
e[0] = c;
e[1] = p[1];
e[2] = '\0';
j->out(j->ctx, e);
p += 1;
continue;
}
if ((c & 0xf0) == 0xe0) {
/* UTF-8 leading byte of three-byte sequence */
good = 1;
for (i = 1; i < 3 && good && p[i]; i++) {
if ((p[i] & 0xc0) != 0x80)
good = 0;
}
if (i != 3)
good = 0;
if (!good && !(j->flags & HEIM_JSON_F_STRICT_STRINGS)) {
e[0] = c;
e[1] = '\0';
j->out(j->ctx, e);
continue;
} else if (!good) {
return 1;
}
if (j->flags & HEIM_JSON_F_ESCAPE_NON_ASCII) {
out_escaped_bmp(j, p, 3);
p += 2;
continue;
}
e[0] = c;
e[1] = p[1];
e[2] = p[2];
e[3] = '\0';
j->out(j->ctx, e);
p += 2;
continue;
}
if (c > 0xf7) {
/* Invalid UTF-8 leading byte */
if (!(j->flags & HEIM_JSON_F_STRICT_STRINGS)) {
e[0] = c;
e[1] = '\0';
j->out(j->ctx, e);
continue;
}
return 1;
}
/*
* A codepoint > U+FFFF, needs encoding a la UTF-16 surrogate
* pair because JSON takes after JS which uses UTF-16. Ugly.
*/
cp = c & 0x7;
good = 1;
for (i = 1; i < 4 && good && p[i]; i++) {
if ((p[i] & 0xc0) == 0x80)
cp = (cp << 6) | (p[i] & 0x3f);
else
good = 0;
}
if (i != 4)
good = 0;
if (!good && !(j->flags & HEIM_JSON_F_STRICT_STRINGS)) {
e[0] = c;
e[1] = '\0';
j->out(j->ctx, e);
continue;
} else if (!good) {
return 1;
}
p += 3;
cp -= 0x10000;
ctop = 0xD800 + (cp >> 10);
cbot = 0xDC00 + (cp & 0x3ff);
e[0 ] = '\\';
e[1 ] = 'u';
e[2 ] = "0123456789ABCDEF"[(ctop ) >> 12];
e[3 ] = "0123456789ABCDEF"[(ctop & 0x0f00) >> 8];
e[4 ] = "0123456789ABCDEF"[(ctop & 0x00f0) >> 4];
e[5 ] = "0123456789ABCDEF"[(ctop & 0x000f) ];
e[6 ] = '\\';
e[7 ] = 'u';
e[8 ] = "0123456789ABCDEF"[(cbot ) >> 12];
e[9 ] = "0123456789ABCDEF"[(cbot & 0x0f00) >> 8];
e[10] = "0123456789ABCDEF"[(cbot & 0x00f0) >> 4];
e[11] = "0123456789ABCDEF"[(cbot & 0x000f) ];
e[12] = '\0';
j->out(j->ctx, e);
continue;
}
j->out(j->ctx, "\"");
break;
}
case HEIM_TID_DATA: {
heim_dict_t d;
heim_string_t v;
const heim_octet_string *data;
char *b64 = NULL;
int ret;
if (j->flags & HEIM_JSON_F_NO_DATA)
return EINVAL; /* JSON doesn't do binary */
data = heim_data_get_data(obj);
ret = rk_base64_encode(data->data, data->length, &b64);
if (ret < 0 || b64 == NULL)
return ENOMEM;
if (j->flags & HEIM_JSON_F_NO_DATA_DICT) {
indent(j);
j->out(j->ctx, "\"");
j->out(j->ctx, b64); /* base64-encode; hope there's no aliasing */
j->out(j->ctx, "\"");
free(b64);
} else {
/*
* JSON has no way to represent binary data, therefore the
* following is a Heimdal-specific convention.
*
* We encode binary data as a dict with a single very magic
* key with a base64-encoded value. The magic key includes
* a uuid, so we're not likely to alias accidentally.
*/
d = heim_dict_create(2);
if (d == NULL) {
free(b64);
return ENOMEM;
}
v = heim_string_ref_create(b64, free);
if (v == NULL) {
free(b64);
heim_release(d);
return ENOMEM;
}
ret = heim_dict_set_value(d, heim_tid_data_uuid_key, v);
heim_release(v);
if (ret) {
heim_release(d);
return ENOMEM;
}
ret = base2json(d, j, 0);
heim_release(d);
if (ret)
return ret;
}
break;
}
case HEIM_TID_NUMBER: {
char num[32];
if (!skip_indent)
indent(j);
snprintf(num, sizeof (num), "%d", heim_number_get_int(obj));
j->out(j->ctx, num);
break;
}
case HEIM_TID_NULL:
if (!skip_indent)
indent(j);
j->out(j->ctx, "null");
break;
case HEIM_TID_BOOL:
if (!skip_indent)
indent(j);
j->out(j->ctx, heim_bool_val(obj) ? "true" : "false");
break;
default:
return 1;
}
return 0;
}
static int
heim_base2json(heim_object_t obj, void *ctx, heim_json_flags_t flags,
void (*out)(void *, const char *))
{
struct twojson j;
heim_base_once_f(&heim_json_once, NULL, json_init_once);
j.indent = 0;
j.ctx = ctx;
j.out = out;
j.flags = flags;
j.ret = 0;
j.first = 1;
if (!(flags & HEIM_JSON_F_NO_ESCAPE_NON_ASCII) &&
!heim_locale_is_utf8())
j.flags |= HEIM_JSON_F_ESCAPE_NON_ASCII;
return base2json(obj, &j, 0);
}
/*
*
*/
struct parse_ctx {
unsigned long lineno;
const uint8_t *p;
const uint8_t *pstart;
const uint8_t *pend;
heim_error_t error;
size_t depth;
heim_json_flags_t flags;
};
static heim_object_t
parse_value(struct parse_ctx *ctx);
/*
* This function eats whitespace, but, critically, it also succeeds
* only if there's anything left to parse.
*/
static int
white_spaces(struct parse_ctx *ctx)
{
while (ctx->p < ctx->pend) {
uint8_t c = *ctx->p;
if (c == ' ' || c == '\t' || c == '\r') {
} else if (c == '\n') {
ctx->lineno++;
} else
return 0;
(ctx->p)++;
}
return -1;
}
static int
is_number(uint8_t n)
{
return ('0' <= n && n <= '9');
}
static heim_number_t
parse_number(struct parse_ctx *ctx)
{
int number = 0, neg = 1;
if (ctx->p >= ctx->pend)
return NULL;
if (*ctx->p == '-') {
if (ctx->p + 1 >= ctx->pend)
return NULL;
neg = -1;
ctx->p += 1;
}
while (ctx->p < ctx->pend) {
if (is_number(*ctx->p)) {
number = (number * 10) + (*ctx->p - '0');
} else {
break;
}
ctx->p += 1;
}
return heim_number_create(number * neg);
}
/*
* Read 4 hex digits from ctx->p.
*
* If we don't have enough, rewind ctx->p and return -1 .
*/
static int
unescape_unicode(struct parse_ctx *ctx)
{
int c = 0;
int i;
for (i = 0; i < 4 && ctx->p < ctx->pend; i++, ctx->p++) {
if (*ctx->p >= '0' && *ctx->p <= '9') {
c = (c << 4) + (*ctx->p - '0');
} else if (*ctx->p >= 'A' && *ctx->p <= 'F') {
c = (c << 4) + (10 + *ctx->p - 'A');
} else if (*ctx->p >= 'a' && *ctx->p <= 'f') {
c = (c << 4) + (10 + *ctx->p - 'a');
} else {
ctx->p -= i;
return -1;
}
}
return c;
}
static int
encode_utf8(struct parse_ctx *ctx, char **pp, char *pend, int c)
{
char *p = *pp;
if (c < 0x80) {
/* ASCII */
if (p >= pend) return 0;
*(p++) = c;
*pp = p;
return 1;
}
if (c < 0x800) {
/* 2 code unit UTF-8 sequence */
if (p >= pend) return 0;
*(p++) = 0xc0 | ((c >> 6) );
if (p == pend) return 0;
*(p++) = 0x80 | ((c ) & 0x3f);
*pp = p;
return 1;
}
if (c < 0x10000) {
/* 3 code unit UTF-8 sequence */
if (p >= pend) return 0;
*(p++) = 0xe0 | ((c >> 12) );
if (p == pend) return 0;
*(p++) = 0x80 | ((c >> 6) & 0x3f);
if (p == pend) return 0;
*(p++) = 0x80 | ((c) & 0x3f);
*pp = p;
return 1;
}
if (c < 0x110000) {
/* 4 code unit UTF-8 sequence */
if (p >= pend) return 0;
*(p++) = 0xf0 | ((c >> 18) );
if (p == pend) return 0;
*(p++) = 0x80 | ((c >> 12) & 0x3f);
if (p == pend) return 0;
*(p++) = 0x80 | ((c >> 6) & 0x3f);
if (p == pend) return 0;
*(p++) = 0x80 | ((c) & 0x3f);
*pp = p;
return 1;
}
return 0;
}
static heim_string_t
parse_string_error(struct parse_ctx *ctx,
char *freeme,
const char *msg)
{
free(freeme);
ctx->error = heim_error_create(EINVAL, "%s at %lu", msg, ctx->lineno);
return NULL;
}
static heim_string_t
parse_string(struct parse_ctx *ctx)
{
const uint8_t *start;
heim_object_t o;
size_t alloc_len = 0;
size_t need = 0;
char *p0, *p, *pend;
int strict = ctx->flags & HEIM_JSON_F_STRICT_STRINGS;
int binary = 0;
if (*ctx->p != '"')
return parse_string_error(ctx, NULL,
"Expected a JSON string but found "
"something else");
start = ++(ctx->p);
/* Estimate how many bytes we need to allocate */
p0 = p = pend = NULL;
for (need = 1; ctx->p < ctx->pend; ctx->p++) {
need++;
if (*ctx->p == '\\')
ctx->p++;
else if (*ctx->p == '"')
break;
}
if (ctx->p == ctx->pend)
return parse_string_error(ctx, NULL, "Unterminated JSON string");
ctx->p = start;
while (ctx->p < ctx->pend) {
const unsigned char *p_save;
int32_t ctop, cbot;
if (*ctx->p == '"') {
ctx->p++;
break;
}
/* Allocate or resize our output buffer if need be */
if (need || p == pend) {
char *tmp;
/*
* Work out how far p is into p0 to re-esablish p after
* the realloc()
*/
size_t p0_to_p_len = (p - p0);
tmp = realloc(p0, alloc_len + need + 5 /* slop? */);
if (tmp == NULL) {
ctx->error = heim_error_create_enomem();
free(p0);
return NULL;
}
alloc_len += need + 5;
/*
* We have two pointers, p and p0, we want to keep them
* pointing into the same memory after the realloc()
*/
p = tmp + p0_to_p_len;
p0 = tmp;
pend = p0 + alloc_len;
need = 0;
}
if (*ctx->p != '\\') {
unsigned char c = *ctx->p;
/*
* Not backslashed -> consume now.
*
* NOTE: All cases in this block must continue or return w/ error.
*/
/* Check for unescaped ASCII control characters */
if (c == '\n') {
if (strict)
return parse_string_error(ctx, p0,
"Unescaped newline in JSON string");
/* Count the newline but don't add it to the decoding */
ctx->lineno++;
} else if (strict && *ctx->p <= 0x1f) {
return parse_string_error(ctx, p0, "Unescaped ASCII control character");
} else if (c == 0) {
binary = 1;
}
if (!strict || c < 0x80) {
/* ASCII, or not strict -> no need to validate */
*(p++) = c;
ctx->p++;
continue;
}
/*
* Being strict for parsing means we want to detect malformed UTF-8
* sequences.
*
* If not strict then we just go on below and add to `p' whatever
* bytes we find in `ctx->p' as we find them.
*
* For each two-byte sequence we need one more byte in `p[]'. For
* each three-byte sequence we need two more bytes in `p[]'.
*
* Setting `need' and looping will cause `p0' to be grown.
*
* NOTE: All cases in this block must continue or return w/ error.
*/
if ((c & 0xe0) == 0xc0) {
/* Two-byte UTF-8 encoding */
if (pend - p < 2) {
need = 2;
continue; /* realloc p0 */
}
*(p++) = c;
ctx->p++;
if (ctx->p == ctx->pend)
return parse_string_error(ctx, p0, "Truncated UTF-8");
c = *(ctx->p++);
if ((c & 0xc0) != 0x80)
return parse_string_error(ctx, p0, "Truncated UTF-8");
*(p++) = c;
continue;
}
if ((c & 0xf0) == 0xe0) {
/* Three-byte UTF-8 encoding */
if (pend - p < 3) {
need = 3;
continue; /* realloc p0 */
}
*(p++) = c;
ctx->p++;
if (ctx->p == ctx->pend)
return parse_string_error(ctx, p0, "Truncated UTF-8");
c = *(ctx->p++);
if ((c & 0xc0) != 0x80)
return parse_string_error(ctx, p0, "Truncated UTF-8");
*(p++) = c;
c = *(ctx->p++);
if ((c & 0xc0) != 0x80)
return parse_string_error(ctx, p0, "Truncated UTF-8");
*(p++) = c;
continue;
}
if ((c & 0xf8) == 0xf0)
return parse_string_error(ctx, p0, "UTF-8 sequence not "
"encoded as escaped UTF-16");
if ((c & 0xc0) == 0x80)
return parse_string_error(ctx, p0,
"Invalid UTF-8 "
"(bare continuation code unit)");
return parse_string_error(ctx, p0, "Not UTF-8");
}
/* Backslash-quoted character */
ctx->p++;
if (ctx->p == ctx->pend) {
ctx->error =
heim_error_create(EINVAL,
"Unterminated JSON string at line %lu",
ctx->lineno);
free(p0);
return NULL;
}
switch (*ctx->p) {
/* Simple escapes */
case 'b': *(p++) = '\b'; ctx->p++; continue;
case 'f': *(p++) = '\f'; ctx->p++; continue;
case 'n': *(p++) = '\n'; ctx->p++; continue;
case 'r': *(p++) = '\r'; ctx->p++; continue;
case 't': *(p++) = '\t'; ctx->p++; continue;
case '"': *(p++) = '"'; ctx->p++; continue;
case '\\': *(p++) = '\\'; ctx->p++; continue;
/* Escaped Unicode handled below */
case 'u':
/*
* Worst case for !strict we need 11 bytes for a truncated non-BMP
* codepoint escape. Call it 12.
*/
if (strict)
need = 4;
else
need = 12;
if (pend - p < need) {
/* Go back to the backslash, realloc, try again */
ctx->p--;
continue;
}
need = 0;
ctx->p++;
break;
default:
if (!strict) {
*(p++) = *ctx->p;
ctx->p++;
continue;
}
ctx->error =
heim_error_create(EINVAL,
"Invalid backslash escape at line %lu",
ctx->lineno);
free(p0);
return NULL;
}
/* Unicode code point */
if (pend - p < 12) {
need = 12;
ctx->p -= 2; /* for "\\u" */
continue; /* This will cause p0 to be realloc'ed */
}
p_save = ctx->p;
cbot = -3;
ctop = unescape_unicode(ctx);
if (ctop == -1 && strict)
return parse_string_error(ctx, p0, "Invalid escaped Unicode");
if (ctop == -1) {
/*
* Not strict; tolerate bad input.
*
* Output "\\u" and then loop to treat what we expected to be four
* digits as if they were not part of an escaped Unicode codepoint.
*/
ctx->p = p_save;
if (p < pend)
*(p++) = '\\';
if (p < pend)
*(p++) = 'u';
continue;
}
if (ctop == 0) {
*(p++) = '\0';
binary = 1;
continue;
}
if (ctop < 0xd800) {
if (!encode_utf8(ctx, &p, pend, ctop))
return parse_string_error(ctx, p0,
"Internal JSON string parse error");
continue;
}
/*
* We parsed the top escaped codepoint of a surrogate pair encoding
* of a non-BMP Unicode codepoint. What follows must be another
* escaped codepoint.
*/
if (ctx->p < ctx->pend && ctx->p[0] == '\\')
ctx->p++;
else
ctop = -1;
if (ctop > -1 && ctx->p < ctx->pend && ctx->p[0] == 'u')
ctx->p++;
else
ctop = -1;
if (ctop > -1) {
/* Parse the hex digits of the bottom half of the surrogate pair */
cbot = unescape_unicode(ctx);
if (cbot == -1 || cbot < 0xdc00)
ctop = -1;
}
if (ctop == -1) {
if (strict)
return parse_string_error(ctx, p0,
"Invalid surrogate pair");
/*
* Output "\\u", rewind, output the digits of `ctop'.
*
* When we get to what should have been the bottom half of the
* pair we'll necessarily fail to parse it as a normal escaped
* Unicode codepoint, and once again, rewind and output its digits.
*/
if (p < pend)
*(p++) = '\\';
if (p < pend)
*(p++) = 'u';
ctx->p = p_save;
continue;
}
/* Finally decode the surrogate pair then encode as UTF-8 */
ctop -= 0xd800;
cbot -= 0xdc00;
if (!encode_utf8(ctx, &p, pend, 0x10000 + ((ctop << 10) | (cbot & 0x3ff))))
return parse_string_error(ctx, p0,
"Internal JSON string parse error");
}
if (p0 == NULL)
return heim_string_create("");
/* NUL-terminate for rk_base64_decode() and plain paranoia */
if (p0 != NULL && p == pend) {
/*
* Work out how far p is into p0 to re-establish p after
* the realloc()
*/
size_t p0_to_pend_len = (pend - p0);
char *tmp = realloc(p0, 1 + p0_to_pend_len);
if (tmp == NULL) {
ctx->error = heim_error_create_enomem();
free(p0);
return NULL;
}
/*
* We have three pointers, p, pend (which are the same)
* and p0, we want to keep them pointing into the same
* memory after the realloc()
*/
p = tmp + p0_to_pend_len;
pend = p + 1;
p0 = tmp;
}
*(p++) = '\0';
/* If there's embedded NULs, it's not a C string */
if (binary) {
o = heim_data_ref_create(p0, (p - 1) - p0, free);
return o;
}
/* Sadly this will copy `p0' */
o = heim_string_create_with_bytes(p0, p - p0);
free(p0);
return o;
}
static int
parse_pair(heim_dict_t dict, struct parse_ctx *ctx)
{
heim_string_t key;
heim_object_t value;
if (white_spaces(ctx))
return -1;
if (*ctx->p == '}') {
ctx->p++;
return 0;
}
if (ctx->flags & HEIM_JSON_F_STRICT_DICT)
/* JSON allows only string keys */
key = parse_string(ctx);
else
/* heim_dict_t allows any heim_object_t as key */
key = parse_value(ctx);
if (key == NULL)
/* Even heim_dict_t does not allow C NULLs as keys though! */
return -1;
if (white_spaces(ctx)) {
heim_release(key);
return -1;
}
if (*ctx->p != ':') {
heim_release(key);
return -1;
}
ctx->p += 1; /* safe because we call white_spaces() next */
if (white_spaces(ctx)) {
heim_release(key);
return -1;
}
value = parse_value(ctx);
if (value == NULL &&
(ctx->error != NULL || (ctx->flags & HEIM_JSON_F_NO_C_NULL))) {
if (ctx->error == NULL)
ctx->error = heim_error_create(EINVAL, "Invalid JSON encoding");
heim_release(key);
return -1;
}
heim_dict_set_value(dict, key, value);
heim_release(key);
heim_release(value);
if (white_spaces(ctx))
return -1;
if (*ctx->p == '}') {
/*
* Return 1 but don't consume the '}' so we can count the one
* pair in a one-pair dict
*/
return 1;
} else if (*ctx->p == ',') {
ctx->p++;
return 1;
}
return -1;
}
static heim_dict_t
parse_dict(struct parse_ctx *ctx)
{
heim_dict_t dict;
size_t count = 0;
int ret;
heim_assert(*ctx->p == '{', "string doesn't start with {");
dict = heim_dict_create(11);
if (dict == NULL) {
ctx->error = heim_error_create_enomem();
return NULL;
}
ctx->p += 1; /* safe because parse_pair() calls white_spaces() first */
while ((ret = parse_pair(dict, ctx)) > 0)
count++;
if (ret < 0) {
heim_release(dict);
return NULL;
}
if (count == 1 && !(ctx->flags & HEIM_JSON_F_NO_DATA_DICT)) {
heim_object_t v = heim_dict_copy_value(dict, heim_tid_data_uuid_key);
/*
* Binary data encoded as a dict with a single magic key with
* base64-encoded value? Decode as heim_data_t.
*/
if (v != NULL && heim_get_tid(v) == HEIM_TID_STRING) {
void *buf;
size_t len;
buf = malloc(strlen(heim_string_get_utf8(v)));
if (buf == NULL) {
heim_release(dict);
heim_release(v);
ctx->error = heim_error_create_enomem();
return NULL;
}
len = rk_base64_decode(heim_string_get_utf8(v), buf);
heim_release(v);
if (len == -1) {
free(buf);
return dict; /* assume aliasing accident */
}
heim_release(dict);
return (heim_dict_t)heim_data_ref_create(buf, len, free);
}
}
return dict;
}
static int
parse_item(heim_array_t array, struct parse_ctx *ctx)
{
heim_object_t value;
if (white_spaces(ctx))
return -1;
if (*ctx->p == ']') {
ctx->p++; /* safe because parse_value() calls white_spaces() first */
return 0;
}
value = parse_value(ctx);
if (value == NULL &&
(ctx->error || (ctx->flags & HEIM_JSON_F_NO_C_NULL)))
return -1;
heim_array_append_value(array, value);
heim_release(value);
if (white_spaces(ctx))
return -1;
if (*ctx->p == ']') {
ctx->p++;
return 0;
} else if (*ctx->p == ',') {
ctx->p++;
return 1;
}
return -1;
}
static heim_array_t
parse_array(struct parse_ctx *ctx)
{
heim_array_t array = heim_array_create();
int ret;
heim_assert(*ctx->p == '[', "array doesn't start with [");
ctx->p += 1;
while ((ret = parse_item(array, ctx)) > 0)
;
if (ret < 0) {
heim_release(array);
return NULL;
}
return array;
}
static heim_object_t
parse_value(struct parse_ctx *ctx)
{
size_t len;
heim_object_t o;
if (white_spaces(ctx))
return NULL;
if (*ctx->p == '"') {
return parse_string(ctx);
} else if (*ctx->p == '{') {
if (ctx->depth-- == 1) {
ctx->error = heim_error_create(EINVAL, "JSON object too deep");
return NULL;
}
o = parse_dict(ctx);
ctx->depth++;
return o;
} else if (*ctx->p == '[') {
if (ctx->depth-- == 1) {
ctx->error = heim_error_create(EINVAL, "JSON object too deep");
return NULL;
}
o = parse_array(ctx);
ctx->depth++;
return o;
} else if (is_number(*ctx->p) || *ctx->p == '-') {
return parse_number(ctx);
}
len = ctx->pend - ctx->p;
if ((ctx->flags & HEIM_JSON_F_NO_C_NULL) == 0 &&
len >= 6 && memcmp(ctx->p, "<NULL>", 6) == 0) {
ctx->p += 6;
return heim_null_create();
} else if (len >= 4 && memcmp(ctx->p, "null", 4) == 0) {
ctx->p += 4;
return heim_null_create();
} else if (len >= 4 && strncasecmp((char *)ctx->p, "true", 4) == 0) {
ctx->p += 4;
return heim_bool_create(1);
} else if (len >= 5 && strncasecmp((char *)ctx->p, "false", 5) == 0) {
ctx->p += 5;
return heim_bool_create(0);
}
ctx->error = heim_error_create(EINVAL, "unknown char %c at %lu line %lu",
(char)*ctx->p,
(unsigned long)(ctx->p - ctx->pstart),
ctx->lineno);
return NULL;
}
heim_object_t
heim_json_create(const char *string, size_t max_depth, heim_json_flags_t flags,
heim_error_t *error)
{
return heim_json_create_with_bytes(string, strlen(string), max_depth, flags,
error);
}
heim_object_t
heim_json_create_with_bytes(const void *data, size_t length, size_t max_depth,
heim_json_flags_t flags, heim_error_t *error)
{
struct parse_ctx ctx;
heim_object_t o;
heim_base_once_f(&heim_json_once, NULL, json_init_once);
ctx.lineno = 1;
ctx.p = data;
ctx.pstart = data;
ctx.pend = ((uint8_t *)data) + length;
ctx.error = NULL;
ctx.flags = flags;
ctx.depth = max_depth;
o = parse_value(&ctx);
if (o == NULL && error) {
*error = ctx.error;
} else if (ctx.error) {
heim_release(ctx.error);
}
return o;
}
static void
show_printf(void *ctx, const char *str)
{
if (str == NULL)
return;
fprintf(ctx, "%s", str);
}
/**
* Dump a heimbase object to stderr (useful from the debugger!)
*
* @param obj object to dump using JSON or JSON-like format
*
* @addtogroup heimbase
*/
void
heim_show(heim_object_t obj)
{
heim_base2json(obj, stderr, HEIM_JSON_F_NO_DATA_DICT, show_printf);
}
static void
strbuf_add(void *ctx, const char *str)
{
struct heim_strbuf *strbuf = ctx;
size_t len;
if (strbuf->enomem)
return;
if (str == NULL) {
/*
* Eat the last '\n'; this is used when formatting dict pairs
* and array items so that the ',' separating them is never
* preceded by a '\n'.
*/
if (strbuf->len > 0 && strbuf->str[strbuf->len - 1] == '\n')
strbuf->len--;
return;
}
len = strlen(str);
if ((len + 1) > (strbuf->alloced - strbuf->len)) {
size_t new_len = strbuf->alloced + (strbuf->alloced >> 2) + len + 1;
char *s;
s = realloc(strbuf->str, new_len);
if (s == NULL) {
strbuf->enomem = 1;
return;
}
strbuf->str = s;
strbuf->alloced = new_len;
}
/* +1 so we copy the NUL */
(void) memcpy(strbuf->str + strbuf->len, str, len + 1);
strbuf->len += len;
if (strbuf->str[strbuf->len - 1] == '\n' &&
strbuf->flags & HEIM_JSON_F_ONE_LINE)
strbuf->len--;
}
#define STRBUF_INIT_SZ 64
heim_string_t
heim_json_copy_serialize(heim_object_t obj, heim_json_flags_t flags, heim_error_t *error)
{
heim_string_t str;
struct heim_strbuf strbuf;
int ret;
if (error)
*error = NULL;
memset(&strbuf, 0, sizeof (strbuf));
strbuf.str = malloc(STRBUF_INIT_SZ);
if (strbuf.str == NULL) {
if (error)
*error = heim_error_create_enomem();
return NULL;
}
strbuf.len = 0;
strbuf.alloced = STRBUF_INIT_SZ;
strbuf.str[0] = '\0';
strbuf.flags = flags;
ret = heim_base2json(obj, &strbuf, flags, strbuf_add);
if (ret || strbuf.enomem) {
if (error) {
if (strbuf.enomem || ret == ENOMEM)
*error = heim_error_create_enomem();
else
*error = heim_error_create(1, "Impossible to JSON-encode "
"object");
}
free(strbuf.str);
return NULL;
}
if (flags & HEIM_JSON_F_ONE_LINE) {
strbuf.flags &= ~HEIM_JSON_F_ONE_LINE;
strbuf_add(&strbuf, "\n");
}
str = heim_string_ref_create(strbuf.str, free);
if (str == NULL) {
if (error)
*error = heim_error_create_enomem();
free(strbuf.str);
}
return str;
}
struct heim_eq_f_ctx {
heim_dict_t other;
int ret;
};
static void
heim_eq_dict_iter_f(heim_object_t key, heim_object_t val, void *d)
{
struct heim_eq_f_ctx *ctx = d;
heim_object_t other_val;
if (!ctx->ret)
return;
/*
* This doesn't work if the key is an array or a dict, which, anyways,
* isn't allowed in JSON, though we allow it.
*/
other_val = heim_dict_get_value(ctx->other, key);
ctx->ret = heim_json_eq(val, other_val);
}
int
heim_json_eq(heim_object_t a, heim_object_t b)
{
heim_tid_t atid, btid;
if (a == b)
return 1;
if (a == NULL || b == NULL)
return 0;
atid = heim_get_tid(a);
btid = heim_get_tid(b);
if (atid != btid)
return 0;
switch (atid) {
case HEIM_TID_ARRAY: {
size_t len = heim_array_get_length(b);
size_t i;
if (heim_array_get_length(a) != len)
return 0;
for (i = 0; i < len; i++) {
if (!heim_json_eq(heim_array_get_value(a, i),
heim_array_get_value(b, i)))
return 0;
}
return 1;
}
case HEIM_TID_DICT: {
struct heim_eq_f_ctx ctx;
ctx.other = b;
ctx.ret = 1;
heim_dict_iterate_f(a, &ctx, heim_eq_dict_iter_f);
if (ctx.ret) {
ctx.other = a;
heim_dict_iterate_f(b, &ctx, heim_eq_dict_iter_f);
}
return ctx.ret;
}
case HEIM_TID_STRING:
return strcmp(heim_string_get_utf8(a), heim_string_get_utf8(b)) == 0;
case HEIM_TID_DATA: {
return heim_data_get_length(a) == heim_data_get_length(b) &&
memcmp(heim_data_get_ptr(a), heim_data_get_ptr(b),
heim_data_get_length(a)) == 0;
}
case HEIM_TID_NUMBER:
return heim_number_get_long(a) == heim_number_get_long(b);
case HEIM_TID_NULL:
case HEIM_TID_BOOL:
return heim_bool_val(a) == heim_bool_val(b);
default:
break;
}
return 0;
}