Pluggable libheimbase interface for DBs and misc libheimbase enhancements

[Code reviewed by Love Hörnquist Åstrand <lha@kth.se>]

    Added heim_db_*() entry points for dealing with databases, and
    make krb5_aname_to_localname() use it.

    The following enhancements to libheimbase are included:

     - Add heim_data_t and heim_string_t "reference" variants to
       avoid memory copies of potentially large data/strings.

       See heim_data_ref_create() and heim_string_ref_create().

     - Added enhancements to heim_array_t to allow their use for
       queues and stacks, and to improve performance.  See
       heim_array_insert_value().

     - Added XPath-like accessors for heim_object_t.  See
       heim_path_get(), heim_path_copy(), heim_path_create(), and
       heim_path_delete().  These are used extensively in the DB
       framework's generic composition of ACID support and in the
       test_base program

     - Made libheimbase more consistent with Core Foundation naming
       conventions.  See heim_{dict, array}_{get, copy}_value() and
       heim_path_{get, copy}().

     - Added functionality to and fixed bugs in base/json.c:
        - heim_serialize();
        - depth limit for JSON parsing (for DoS protection);
        - pretty-printing;
        - JSON compliance (see below);
        - flag options for parsing and serializing; these are needed
          because of impedance mismatches between heim_object_t and
          JSON (e.g., heim_dict_t allows non-string keys, but JSON
          does not; heimbase supports binary data, while JSON does
          not).

     - Added heim_error_enomem().

     - Enhanced the test_base program to test new functionality and
       to use heim_path*() to better test JSON encoding.  This
       includes some fuzz testing of JSON parsing, and running the
       test under valgrind.

     - Started to add doxygen documentation for libheimbase (but doc
       build for libheimbase is still incomplete).

    Note that there's still some incomplete JSON support:

     - JSON string quoting is not fully implemented;

     - libheimbase lacks support for real numbers, while JSON has
       it -- otherwise libheimbase is a superset of JSON,
       specifically in that any heim_object_t can be a key for an
       associative array.

    The following DB backends are supported natively:

     - "sorted-text", a binary search of sorted (in C locale), flat
       text files;

     - "json", a backend that stores DB contents serialized as JSON
       (this is intended for configuration-like contents).

    The DB framework supports:

     - multiple key/value tables per-DB
     - ACID transactions

    The DB framework also natively implements ACID transactions for
    any DB backends that a) do not provide transactions natively, b)
    do provide lock/unlock/sync methods (even on Windows).  This
    includes autocommit of DB updates outside transactions.

    Future DB enhancements may include:

     - add backends for various DB types (BDB, CDB, MDB, ...);

     - make libhdb use heim_db_t;

     - add a command-line tool for interfacing to databases via
       libheimbase (e.g., to get/set/delete values, create/copy/
       backup DBs, inspect history, check integrity);

     - framework-level transaction logging (with redo and undo
       logging), for generic incremental replication;

     - framework-level DB integrity checking.

       We could store a MAC of the XOR of a hash function applied to
       {key, value} for every entry in the DB, then use this to check
       DB integrity incrementally during incremental replication, as
       well as for the whole DB.
This commit is contained in:
Nicolas Williams
2011-12-29 01:29:26 -06:00
parent df73c96b74
commit f4ba41ebdd
30 changed files with 4211 additions and 259 deletions

View File

@@ -35,11 +35,34 @@
#include "baselocl.h"
#include <ctype.h>
#include <base64.h>
static heim_base_once_t heim_json_once = HEIM_BASE_ONCE_INIT;
static heim_string_t heim_tid_data_uuid_key = NULL;
static const char base64_chars[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static void
json_init_once(void *arg)
{
heim_tid_data_uuid_key = __heim_string_constant("heimdal-type-data-76d7fca2-d0da-4b20-a126-1a10f8a0eae6");
}
struct twojson {
void *ctx;
size_t indent;
void (*out)(void *, const char *);
size_t indent;
heim_json_flags_t flags;
int ret;
int first;
};
struct strbuf {
char *str;
size_t len;
size_t alloced;
int enomem;
heim_json_flags_t flags;
};
static int
@@ -49,6 +72,8 @@ static void
indent(struct twojson *j)
{
size_t indent = j->indent;
if (j->flags & HEIM_JSON_F_ONE_LINE)
return;
while (indent--)
j->out(j->ctx, "\t");
}
@@ -57,31 +82,56 @@ static void
array2json(heim_object_t value, void *ctx)
{
struct twojson *j = ctx;
indent(j);
base2json(value, j);
j->out(j->ctx, ",\n");
j->indent--;
if (j->ret)
return;
if (j->first) {
j->first = 0;
} else {
j->out(j->ctx, NULL); /* eat previous '\n' if possible */
j->out(j->ctx, ",\n");
}
j->ret = base2json(value, j);
}
static void
dict2json(heim_object_t key, heim_object_t value, void *ctx)
{
struct twojson *j = ctx;
indent(j);
base2json(key, j);
j->out(j->ctx, " = ");
base2json(value, j);
j->out(j->ctx, ",\n");
if (j->ret)
return;
if (j->first) {
j->first = 0;
} else {
j->out(j->ctx, NULL); /* eat previous '\n' if possible */
j->out(j->ctx, ",\n");
}
j->ret = base2json(key, j);
if (j->ret)
return;
j->out(j->ctx, " : \n");
j->indent++;
j->ret = base2json(value, j);
if (j->ret)
return;
j->indent--;
}
static int
base2json(heim_object_t obj, struct twojson *j)
{
heim_tid_t type;
int first = 0;
if (obj == NULL) {
indent(j);
j->out(j->ctx, "<NULL>\n");
if (j->flags & HEIM_JSON_F_CNULL2JSNULL) {
obj = heim_null_create();
} else if (j->flags & HEIM_JSON_F_NO_C_NULL) {
return EINVAL;
} else {
indent(j);
j->out(j->ctx, "<NULL>\n"); /* This is NOT valid JSON! */
return 0;
}
}
type = heim_get_tid(obj);
@@ -90,20 +140,30 @@ base2json(heim_object_t obj, struct twojson *j)
indent(j);
j->out(j->ctx, "[\n");
j->indent++;
first = j->first;
j->first = 1;
heim_array_iterate_f(obj, j, array2json);
j->indent--;
if (!j->first)
j->out(j->ctx, "\n");
indent(j);
j->out(j->ctx, "]\n");
j->first = first;
break;
case HEIM_TID_DICT:
indent(j);
j->out(j->ctx, "{\n");
j->indent++;
first = j->first;
j->first = 1;
heim_dict_iterate_f(obj, j, dict2json);
j->indent--;
if (!j->first)
j->out(j->ctx, "\n");
indent(j);
j->out(j->ctx, "}\n");
j->first = first;
break;
case HEIM_TID_STRING:
@@ -113,10 +173,65 @@ base2json(heim_object_t obj, struct twojson *j)
j->out(j->ctx, "\"");
break;
case HEIM_TID_DATA: {
heim_dict_t d;
heim_string_t v;
const heim_octet_string *data;
char *b64 = NULL;
int ret;
if (j->flags & HEIM_JSON_F_NO_DATA)
return EINVAL; /* JSON doesn't do binary */
data = heim_data_get_data(obj);
ret = base64_encode(data->data, data->length, &b64);
if (ret < 0 || b64 == NULL)
return ENOMEM;
if (j->flags & HEIM_JSON_F_NO_DATA_DICT) {
indent(j);
j->out(j->ctx, "\"");
j->out(j->ctx, b64); /* base64-encode; hope there's no aliasing */
j->out(j->ctx, "\"");
free(b64);
} else {
/*
* JSON has no way to represent binary data, therefore the
* following is a Heimdal-specific convention.
*
* We encode binary data as a dict with a single very magic
* key with a base64-encoded value. The magic key includes
* a uuid, so we're not likely to alias accidentally.
*/
d = heim_dict_create(2);
if (d == NULL) {
free(b64);
return ENOMEM;
}
v = heim_string_ref_create(b64, free);
if (v == NULL) {
free(b64);
heim_release(d);
return ENOMEM;
}
ret = heim_dict_set_value(d, heim_tid_data_uuid_key, v);
heim_release(v);
if (ret) {
heim_release(d);
return ENOMEM;
}
ret = base2json(d, j);
heim_release(d);
if (ret)
return ret;
}
break;
}
case HEIM_TID_NUMBER: {
char num[16];
char num[32];
indent(j);
snprintf(num, sizeof(num), "%d", heim_number_get_int(obj));
snprintf(num, sizeof (num), "%d", heim_number_get_int(obj));
j->out(j->ctx, num);
break;
}
@@ -135,14 +250,22 @@ base2json(heim_object_t obj, struct twojson *j)
}
static int
heim_base2json(heim_object_t obj, void *ctx,
heim_base2json(heim_object_t obj, void *ctx, heim_json_flags_t flags,
void (*out)(void *, const char *))
{
struct twojson j;
if (flags & HEIM_JSON_F_STRICT_STRINGS)
return ENOTSUP; /* Sorry, not yet! */
heim_base_once_f(&heim_json_once, NULL, json_init_once);
j.indent = 0;
j.ctx = ctx;
j.out = out;
j.flags = flags;
j.ret = 0;
j.first = 1;
return base2json(obj, &j);
}
@@ -158,12 +281,18 @@ struct parse_ctx {
const uint8_t *pstart;
const uint8_t *pend;
heim_error_t error;
size_t depth;
heim_json_flags_t flags;
};
static heim_object_t
parse_value(struct parse_ctx *ctx);
/*
* This function eats whitespace, but, critically, it also succeeds
* only if there's anything left to parse.
*/
static int
white_spaces(struct parse_ctx *ctx)
{
@@ -195,6 +324,8 @@ parse_number(struct parse_ctx *ctx)
return NULL;
if (*ctx->p == '-') {
if (ctx->p + 1 >= ctx->pend)
return NULL;
neg = -1;
ctx->p += 1;
}
@@ -217,7 +348,18 @@ parse_string(struct parse_ctx *ctx)
const uint8_t *start;
int quote = 0;
heim_assert(*ctx->p == '"', "string doesnt' start with \"");
if (ctx->flags & HEIM_JSON_F_STRICT_STRINGS) {
ctx->error = heim_error_create(EINVAL, "Strict JSON string encoding "
"not yet supported");
return NULL;
}
if (*ctx->p != '"') {
ctx->error = heim_error_create(EINVAL, "Expected a JSON string but "
"found something else at line %lu",
ctx->lineno);
return NULL;
}
start = ++ctx->p;
while (ctx->p < ctx->pend) {
@@ -226,7 +368,7 @@ parse_string(struct parse_ctx *ctx)
} else if (*ctx->p == '\\') {
if (ctx->p + 1 == ctx->pend)
goto out;
ctx->p += 1;
ctx->p++;
quote = 1;
} else if (*ctx->p == '"') {
heim_object_t o;
@@ -239,7 +381,7 @@ parse_string(struct parse_ctx *ctx)
while (start < ctx->p) {
if (*start == '\\') {
start++;
/* XXX validate qouted char */
/* XXX validate quoted char */
}
*p++ = *start++;
}
@@ -247,11 +389,41 @@ parse_string(struct parse_ctx *ctx)
free(p0);
} else {
o = heim_string_create_with_bytes(start, ctx->p - start);
if (o == NULL) {
ctx->error = heim_error_enomem();
return NULL;
}
/* If we can decode as base64, then let's */
if (ctx->flags & HEIM_JSON_F_TRY_DECODE_DATA) {
void *buf;
size_t len;
const char *s;
s = heim_string_get_utf8(o);
len = strlen(s);
if (len >= 4 && strspn(s, base64_chars) >= len - 2) {
buf = malloc(len);
if (buf == NULL) {
heim_release(o);
ctx->error = heim_error_enomem();
return NULL;
}
len = base64_decode(s, buf);
if (len == -1) {
free(buf);
return o;
}
heim_release(o);
o = heim_data_ref_create(buf, len, free);
}
}
}
ctx->p += 1;
return o;
}
}
ctx->p += 1;
}
out:
@@ -268,22 +440,32 @@ parse_pair(heim_dict_t dict, struct parse_ctx *ctx)
if (white_spaces(ctx))
return -1;
if (*ctx->p == '}')
if (*ctx->p == '}') {
ctx->p++;
return 0;
}
key = parse_string(ctx);
if (ctx->flags & HEIM_JSON_F_STRICT_DICT)
/* JSON allows only string keys */
key = parse_string(ctx);
else
/* heim_dict_t allows any heim_object_t as key */
key = parse_value(ctx);
if (key == NULL)
/* Even heim_dict_t does not allow C NULLs as keys though! */
return -1;
if (white_spaces(ctx))
if (white_spaces(ctx)) {
heim_release(key);
return -1;
}
if (*ctx->p != ':') {
heim_release(key);
return -1;
}
ctx->p += 1;
ctx->p += 1; /* safe because we call white_spaces() next */
if (white_spaces(ctx)) {
heim_release(key);
@@ -291,7 +473,10 @@ parse_pair(heim_dict_t dict, struct parse_ctx *ctx)
}
value = parse_value(ctx);
if (value == NULL) {
if (value == NULL &&
(ctx->error != NULL || (ctx->flags & HEIM_JSON_F_NO_C_NULL))) {
if (ctx->error == NULL)
ctx->error = heim_error_create(EINVAL, "Invalid JSON encoding");
heim_release(key);
return -1;
}
@@ -303,8 +488,11 @@ parse_pair(heim_dict_t dict, struct parse_ctx *ctx)
return -1;
if (*ctx->p == '}') {
ctx->p++;
return 0;
/*
* Return 1 but don't consume the '}' so we can count the one
* pair in a one-pair dict
*/
return 1;
} else if (*ctx->p == ',') {
ctx->p++;
return 1;
@@ -315,18 +503,54 @@ parse_pair(heim_dict_t dict, struct parse_ctx *ctx)
static heim_dict_t
parse_dict(struct parse_ctx *ctx)
{
heim_dict_t dict = heim_dict_create(11);
heim_dict_t dict;
size_t count = 0;
int ret;
heim_assert(*ctx->p == '{', "string doesn't start with {");
ctx->p += 1;
dict = heim_dict_create(11);
if (dict == NULL) {
ctx->error = heim_error_enomem();
return NULL;
}
ctx->p += 1; /* safe because parse_pair() calls white_spaces() first */
while ((ret = parse_pair(dict, ctx)) > 0)
;
count++;
if (ret < 0) {
heim_release(dict);
return NULL;
}
if (count == 1 && !(ctx->flags & HEIM_JSON_F_NO_DATA_DICT)) {
heim_object_t v = heim_dict_copy_value(dict, heim_tid_data_uuid_key);
/*
* Binary data encoded as a dict with a single magic key with
* base64-encoded value? Decode as heim_data_t.
*/
if (v != NULL && heim_get_tid(v) == HEIM_TID_STRING) {
void *buf;
size_t len;
buf = malloc(strlen(heim_string_get_utf8(v)));
if (buf == NULL) {
heim_release(dict);
heim_release(v);
ctx->error = heim_error_enomem();
return NULL;
}
len = base64_decode(heim_string_get_utf8(v), buf);
heim_release(v);
if (len == -1) {
free(buf);
return dict; /* assume aliasing accident */
}
heim_release(dict);
return (heim_dict_t)heim_data_ref_create(buf, len, free);
}
}
return dict;
}
@@ -338,11 +562,14 @@ parse_item(heim_array_t array, struct parse_ctx *ctx)
if (white_spaces(ctx))
return -1;
if (*ctx->p == ']')
if (*ctx->p == ']') {
ctx->p++; /* safe because parse_value() calls white_spaces() first */
return 0;
}
value = parse_value(ctx);
if (value == NULL)
if (value == NULL &&
(ctx->error || (ctx->flags & HEIM_JSON_F_NO_C_NULL)))
return -1;
heim_array_append_value(array, value);
@@ -383,6 +610,7 @@ static heim_object_t
parse_value(struct parse_ctx *ctx)
{
size_t len;
heim_object_t o;
if (white_spaces(ctx))
return NULL;
@@ -390,16 +618,32 @@ parse_value(struct parse_ctx *ctx)
if (*ctx->p == '"') {
return parse_string(ctx);
} else if (*ctx->p == '{') {
return parse_dict(ctx);
if (ctx->depth-- == 1) {
ctx->error = heim_error_create(EINVAL, "JSON object too deep");
return NULL;
}
o = parse_dict(ctx);
ctx->depth++;
return o;
} else if (*ctx->p == '[') {
return parse_array(ctx);
if (ctx->depth-- == 1) {
ctx->error = heim_error_create(EINVAL, "JSON object too deep");
return NULL;
}
o = parse_array(ctx);
ctx->depth++;
return o;
} else if (is_number(*ctx->p) || *ctx->p == '-') {
return parse_number(ctx);
}
len = ctx->pend - ctx->p;
if (len >= 4 && memcmp(ctx->p, "null", 4) == 0) {
if ((ctx->flags & HEIM_JSON_F_NO_C_NULL) == 0 &&
len >= 6 && memcmp(ctx->p, "<NULL>", 6) == 0) {
ctx->p += 6;
return heim_null_create();
} else if (len >= 4 && memcmp(ctx->p, "null", 4) == 0) {
ctx->p += 4;
return heim_null_create();
} else if (len >= 4 && strncasecmp((char *)ctx->p, "true", 4) == 0) {
@@ -419,22 +663,29 @@ parse_value(struct parse_ctx *ctx)
heim_object_t
heim_json_create(const char *string, heim_error_t *error)
heim_json_create(const char *string, size_t max_depth, heim_json_flags_t flags,
heim_error_t *error)
{
return heim_json_create_with_bytes(string, strlen(string), error);
return heim_json_create_with_bytes(string, strlen(string), max_depth, flags,
error);
}
heim_object_t
heim_json_create_with_bytes(const void *data, size_t length, heim_error_t *error)
heim_json_create_with_bytes(const void *data, size_t length, size_t max_depth,
heim_json_flags_t flags, heim_error_t *error)
{
struct parse_ctx ctx;
heim_object_t o;
heim_base_once_f(&heim_json_once, NULL, json_init_once);
ctx.lineno = 1;
ctx.p = data;
ctx.pstart = data;
ctx.pend = ((uint8_t *)data) + length;
ctx.error = NULL;
ctx.flags = flags;
ctx.depth = max_depth;
o = parse_value(&ctx);
@@ -451,11 +702,110 @@ heim_json_create_with_bytes(const void *data, size_t length, heim_error_t *error
static void
show_printf(void *ctx, const char *str)
{
if (str == NULL)
return;
fprintf(ctx, "%s", str);
}
/**
* Dump a heimbase object to stderr (useful from the debugger!)
*
* @param obj object to dump using JSON or JSON-like format
*
* @addtogroup heimbase
*/
void
heim_show(heim_object_t obj)
{
heim_base2json(obj, stderr, show_printf);
heim_base2json(obj, stderr, HEIM_JSON_F_NO_DATA_DICT, show_printf);
}
static void
strbuf_add(void *ctx, const char *str)
{
struct strbuf *strbuf = ctx;
size_t len;
if (strbuf->enomem)
return;
if (str == NULL) {
/*
* Eat the last '\n'; this is used when formatting dict pairs
* and array items so that the ',' separating them is never
* preceded by a '\n'.
*/
if (strbuf->len > 0 && strbuf->str[strbuf->len - 1] == '\n')
strbuf->len--;
return;
}
len = strlen(str);
if ((len + 1) > (strbuf->alloced - strbuf->len)) {
size_t new_len = strbuf->alloced + (strbuf->alloced >> 2) + len + 1;
char *s;
s = realloc(strbuf->str, new_len);
if (s == NULL) {
strbuf->enomem = 1;
return;
}
strbuf->str = s;
strbuf->alloced = new_len;
}
/* +1 so we copy the NUL */
(void) memcpy(strbuf->str + strbuf->len, str, len + 1);
strbuf->len += len;
if (strbuf->str[strbuf->len - 1] == '\n' &&
strbuf->flags & HEIM_JSON_F_ONE_LINE)
strbuf->len--;
}
#define STRBUF_INIT_SZ 64
heim_string_t
heim_serialize(heim_object_t obj, heim_json_flags_t flags, heim_error_t *error)
{
heim_string_t str;
struct strbuf strbuf;
int ret;
if (error)
*error = NULL;
memset(&strbuf, 0, sizeof (strbuf));
strbuf.str = malloc(STRBUF_INIT_SZ);
if (strbuf.str == NULL) {
if (error)
*error = heim_error_enomem();
return NULL;
}
strbuf.len = 0;
strbuf.alloced = STRBUF_INIT_SZ;
strbuf.str[0] = '\0';
strbuf.flags = flags;
ret = heim_base2json(obj, &strbuf, flags, strbuf_add);
if (ret || strbuf.enomem) {
if (error) {
if (strbuf.enomem || ret == ENOMEM)
*error = heim_error_enomem();
else
*error = heim_error_create(1, "Impossible to JSON-encode "
"object");
}
free(strbuf.str);
return NULL;
}
if (flags & HEIM_JSON_F_ONE_LINE) {
strbuf.flags &= ~HEIM_JSON_F_ONE_LINE;
strbuf_add(&strbuf, "\n");
}
str = heim_string_ref_create(strbuf.str, free);
if (str == NULL) {
if (error)
*error = heim_error_enomem();
free(strbuf.str);
}
return str;
}