summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/cursor/cur_json.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/cursor/cur_json.c')
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_json.c931
1 files changed, 931 insertions, 0 deletions
diff --git a/src/third_party/wiredtiger/src/cursor/cur_json.c b/src/third_party/wiredtiger/src/cursor/cur_json.c
new file mode 100644
index 00000000000..f4459819259
--- /dev/null
+++ b/src/third_party/wiredtiger/src/cursor/cur_json.c
@@ -0,0 +1,931 @@
+/*-
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+static size_t __json_unpack_put(WT_SESSION_IMPL *, void *, u_char *, size_t,
+ WT_CONFIG_ITEM *);
+static inline int __json_struct_size(WT_SESSION_IMPL *, const void *, size_t,
+ const char *, WT_CONFIG_ITEM *, int, size_t *);
+static inline int __json_struct_unpackv(WT_SESSION_IMPL *, const void *, size_t,
+ const char *, WT_CONFIG_ITEM *, u_char *, size_t, int, va_list);
+static int json_string_arg(WT_SESSION_IMPL *, const char **, WT_ITEM *);
+static int json_int_arg(WT_SESSION_IMPL *, const char **, int64_t *);
+static int json_uint_arg(WT_SESSION_IMPL *, const char **, uint64_t *);
+static int __json_pack_struct(WT_SESSION_IMPL *, void *, size_t, const char *,
+ const char *);
+static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *,
+ int, const char *, size_t *);
+
+#define WT_PACK_JSON_GET(session, pv, jstr) do { \
+ switch (pv.type) { \
+ case 'x': \
+ break; \
+ case 's': \
+ case 'S': \
+ WT_RET(json_string_arg(session, &jstr, &pv.u.item)); \
+ pv.type = pv.type == 's' ? 'j' : 'J'; \
+ break; \
+ case 'b': \
+ case 'h': \
+ case 'i': \
+ case 'l': \
+ case 'q': \
+ WT_RET(json_int_arg(session, &jstr, &pv.u.i)); \
+ break; \
+ case 'B': \
+ case 'H': \
+ case 'I': \
+ case 'L': \
+ case 'Q': \
+ case 'r': \
+ case 'R': \
+ case 't': \
+ WT_RET(json_uint_arg(session, &jstr, &pv.u.u)); \
+ break; \
+ /* User format strings have already been validated. */ \
+ WT_ILLEGAL_VALUE(session); \
+ } \
+} while (0)
+
+/*
+ * __json_unpack_put --
+ * Calculate the size of a packed byte string as formatted for JSON.
+ */
+static size_t
+__json_unpack_put(WT_SESSION_IMPL *session, void *voidpv,
+ u_char *buf, size_t bufsz, WT_CONFIG_ITEM *name)
+{
+ WT_PACK_VALUE *pv;
+ const char *p, *end;
+ size_t s, n;
+
+ pv = (WT_PACK_VALUE *)voidpv;
+ s = (size_t)snprintf((char *)buf, bufsz, "\"%.*s\" : ",
+ (int)name->len, name->str);
+ if (s <= bufsz) {
+ bufsz -= s;
+ buf += s;
+ }
+ else
+ bufsz = 0;
+
+ switch (pv->type) {
+ case 'x':
+ return (0);
+ case 's':
+ case 'S':
+ /* Account for '"' quote in front and back. */
+ s += 2;
+ p = (const char *)pv->u.s;
+ if (bufsz > 0) {
+ *buf++ = '"';
+ bufsz--;
+ }
+ if (pv->type == 's' || pv->havesize) {
+ end = p + pv->size;
+ for (; p < end; p++) {
+ n = __wt_json_unpack_char(*p, buf, bufsz, 0);
+ if (n > bufsz)
+ bufsz = 0;
+ else {
+ bufsz -= n;
+ buf += n;
+ }
+ s += n;
+ }
+ } else
+ for (; *p; p++) {
+ n = __wt_json_unpack_char(*p, buf, bufsz, 0);
+ if (n > bufsz)
+ bufsz = 0;
+ else {
+ bufsz -= n;
+ buf += n;
+ }
+ s += n;
+ }
+ if (bufsz > 0)
+ *buf++ = '"';
+ return (s);
+ case 'U':
+ case 'u':
+ s += 2;
+ p = (const char *)pv->u.item.data;
+ end = p + pv->u.item.size;
+ if (bufsz > 0) {
+ *buf++ = '"';
+ bufsz--;
+ }
+ for (; p < end; p++) {
+ n = __wt_json_unpack_char(*p, buf, bufsz, 1);
+ if (n > bufsz)
+ bufsz = 0;
+ else {
+ bufsz -= n;
+ buf += n;
+ }
+ s += n;
+ }
+ if (bufsz > 0)
+ *buf++ = '"';
+ return (s);
+ case 'b':
+ case 'h':
+ case 'i':
+ case 'l':
+ case 'q':
+ return (s +
+ (size_t)snprintf((char *)buf, bufsz, "%" PRId64, pv->u.i));
+ case 'B':
+ case 't':
+ case 'H':
+ case 'I':
+ case 'L':
+ case 'Q':
+ case 'r':
+ case 'R':
+ return (s +
+ (size_t)snprintf((char *)buf, bufsz, "%" PRId64, pv->u.u));
+ }
+ __wt_err(session, EINVAL, "unknown pack-value type: %c", (int)pv->type);
+ return ((size_t)-1);
+}
+
+/*
+ * __json_struct_size --
+ * Calculate the size of a packed byte string as formatted for JSON.
+ */
+static inline int
+__json_struct_size(WT_SESSION_IMPL *session, const void *buffer,
+ size_t size, const char *fmt, WT_CONFIG_ITEM *names, int iskey,
+ size_t *presult)
+{
+ WT_CONFIG_ITEM name;
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_PACK pack;
+ WT_PACK_NAME packname;
+ const uint8_t *p, *end;
+ size_t result;
+ int needcr;
+
+ p = buffer;
+ end = p + size;
+ result = 0;
+ needcr = 0;
+
+ WT_RET(__pack_name_init(session, names, iskey, &packname));
+ WT_RET(__pack_init(session, &pack, fmt));
+ while ((ret = __pack_next(&pack, &pv)) == 0) {
+ if (needcr)
+ result += 2;
+ needcr = 1;
+ WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p)));
+ WT_RET(__pack_name_next(&packname, &name));
+ result += __json_unpack_put(session, &pv, NULL, 0, &name);
+ }
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+
+ /* Be paranoid - __pack_write should never overflow. */
+ WT_ASSERT(session, p <= end);
+
+ *presult = result;
+ return (ret);
+}
+
+/*
+ * __json_struct_unpackv --
+ * Unpack a byte string to JSON (va_list version).
+ */
+static inline int
+__json_struct_unpackv(WT_SESSION_IMPL *session,
+ const void *buffer, size_t size, const char *fmt, WT_CONFIG_ITEM *names,
+ u_char *jbuf, size_t jbufsize, int iskey, va_list ap)
+{
+ WT_CONFIG_ITEM name;
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_PACK pack;
+ WT_PACK_NAME packname;
+ int needcr;
+ size_t jsize;
+ const uint8_t *p, *end;
+
+ p = buffer;
+ end = p + size;
+ needcr = 0;
+
+ /* Unpacking a cursor marked as json implies a single arg. */
+ *va_arg(ap, const char **) = (char *)jbuf;
+
+ WT_RET(__pack_name_init(session, names, iskey, &packname));
+ WT_RET(__pack_init(session, &pack, fmt));
+ while ((ret = __pack_next(&pack, &pv)) == 0) {
+ if (needcr) {
+ WT_ASSERT(session, jbufsize >= 3);
+ strncat((char *)jbuf, ",\n", jbufsize);
+ jbuf += 2;
+ jbufsize -= 2;
+ }
+ needcr = 1;
+ WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p)));
+ WT_RET(__pack_name_next(&packname, &name));
+ jsize = __json_unpack_put(session,
+ (u_char *)&pv, jbuf, jbufsize, &name);
+ WT_ASSERT(session, jsize <= jbufsize);
+ jbuf += jsize;
+ jbufsize -= jsize;
+ }
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+
+ /* Be paranoid - __unpack_read should never overflow. */
+ WT_ASSERT(session, p <= end);
+
+ WT_ASSERT(session, jbufsize == 1);
+
+ return (ret);
+}
+
+/*
+ * __wt_json_alloc_unpack --
+ * Allocate space for, and unpack an entry into JSON format.
+ */
+int
+__wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer,
+ size_t size, const char *fmt, WT_CURSOR_JSON *json,
+ int iskey, va_list ap)
+{
+ WT_CONFIG_ITEM *names;
+ WT_DECL_RET;
+ size_t needed;
+ char **json_bufp;
+
+ if (iskey) {
+ names = &json->key_names;
+ json_bufp = &json->key_buf;
+ } else {
+ names = &json->value_names;
+ json_bufp = &json->value_buf;
+ }
+ needed = 0;
+ WT_RET(__json_struct_size(session, buffer, size, fmt, names,
+ iskey, &needed));
+ WT_RET(__wt_realloc(session, NULL, needed + 1, json_bufp));
+ WT_RET(__json_struct_unpackv(session, buffer, size, fmt,
+ names, (u_char *)*json_bufp, needed + 1, iskey, ap));
+
+ return (ret);
+}
+
+/*
+ * __wt_json_close --
+ * Release any json related resources.
+ */
+void
+__wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
+{
+ WT_CURSOR_JSON *json;
+
+ if ((json = (WT_CURSOR_JSON *)cursor->json_private) != NULL) {
+ __wt_free(session, json->key_buf);
+ __wt_free(session, json->value_buf);
+ __wt_free(session, json);
+ }
+ return;
+}
+
+/*
+ * __wt_json_unpack_char --
+ * Unpack a single character into JSON escaped format.
+ * Can be called with null buf for sizing.
+ */
+size_t
+__wt_json_unpack_char(char ch, u_char *buf, size_t bufsz, int force_unicode)
+{
+ char abbrev;
+ u_char h;
+
+ if (!force_unicode) {
+ if (isprint(ch) && ch != '\\' && ch != '"') {
+ if (bufsz >= 1)
+ *buf = (u_char)ch;
+ return (1);
+ } else {
+ abbrev = '\0';
+ switch (ch) {
+ case '\\':
+ case '"':
+ abbrev = ch;
+ break;
+ case '\f':
+ abbrev = 'f';
+ break;
+ case '\n':
+ abbrev = 'n';
+ break;
+ case '\r':
+ abbrev = 'r';
+ break;
+ case '\t':
+ abbrev = 't';
+ break;
+ }
+ if (abbrev != '\0') {
+ if (bufsz >= 2) {
+ *buf++ = '\\';
+ *buf = (u_char)abbrev;
+ }
+ return (2);
+ }
+ }
+ }
+ if (bufsz >= 6) {
+ *buf++ = '\\';
+ *buf++ = 'u';
+ *buf++ = '0';
+ *buf++ = '0';
+ h = (((u_char)ch) >> 4) & 0xF;
+ if (h >= 10)
+ *buf++ = 'A' + (h - 10);
+ else
+ *buf++ = '0' + h;
+ h = ((u_char)ch) & 0xF;
+ if (h >= 10)
+ *buf++ = 'A' + (h - 10);
+ else
+ *buf++ = '0' + h;
+ }
+ return (6);
+}
+
+/*
+ * __wt_json_column_init --
+ * set json_key_names, json_value_names to comma separated lists
+ * of column names.
+ */
+int
+__wt_json_column_init(WT_CURSOR *cursor, const char *keyformat,
+ const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf)
+{
+ WT_CURSOR_JSON *json;
+ const char *p, *end, *beginkey;
+ uint32_t keycnt, nkeys;
+
+ json = (WT_CURSOR_JSON *)cursor->json_private;
+ beginkey = colconf->str;
+ end = beginkey + colconf->len;
+
+ if (idxconf != NULL) {
+ json->key_names.str = idxconf->str;
+ json->key_names.len = idxconf->len;
+ } else if (colconf->len > 0 && *beginkey == '(') {
+ beginkey++;
+ if (end[-1] == ')')
+ end--;
+ }
+
+ for (nkeys = 0; *keyformat; keyformat++)
+ if (!isdigit(*keyformat))
+ nkeys++;
+
+ p = beginkey;
+ keycnt = 0;
+ while (p < end && keycnt < nkeys) {
+ if (*p == ',')
+ keycnt++;
+ p++;
+ }
+ json->value_names.str = p;
+ json->value_names.len = WT_PTRDIFF(end, p);
+ if (idxconf == NULL) {
+ if (p > beginkey)
+ p--;
+ json->key_names.str = beginkey;
+ json->key_names.len = WT_PTRDIFF(p, beginkey);
+ }
+ return (0);
+}
+
+#define MATCH_KEYWORD(session, in, result, keyword, matchval) do { \
+ size_t _kwlen = strlen(keyword); \
+ if (strncmp(in, keyword, _kwlen) == 0 && !isalnum(in[_kwlen])) { \
+ in += _kwlen; \
+ result = matchval; \
+ } else { \
+ const char *_bad = in; \
+ while (isalnum(*in)) \
+ in++; \
+ __wt_errx(session, "unknown keyword \"%.*s\" in JSON", \
+ (int)(in - _bad), _bad); \
+ } \
+} while (0)
+
+/*
+ * __wt_json_token --
+ * Return the type, start position and length of the next JSON
+ * token in the input. String tokens include the quotes. JSON
+ * can be entirely parsed using calls to this tokenizer, each
+ * call using a src pointer that is the previously returned
+ * tokstart + toklen.
+ *
+ * The token type returned is one of:
+ * 0 : EOF
+ * 's' : string
+ * 'i' : intnum
+ * 'f' : floatnum
+ * ':' : colon
+ * ',' : comma
+ * '{' : lbrace
+ * '}' : rbrace
+ * '[' : lbracket
+ * ']' : rbracket
+ * 'N' : null
+ * 'T' : true
+ * 'F' : false
+ */
+int
+__wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype,
+ const char **tokstart, size_t *toklen)
+{
+ WT_SESSION_IMPL *session;
+ char ch;
+ const char *bad;
+ int backslash, isalph, isfloat, result;
+
+ result = -1;
+ session = (WT_SESSION_IMPL *)wt_session;
+ while (isspace(*src))
+ src++;
+ *tokstart = src;
+
+ if (*src == '\0') {
+ *toktype = 0;
+ *toklen = 0;
+ return (0);
+ }
+
+ /* JSON is specified in RFC 4627. */
+ switch (*src) {
+ case '"':
+ backslash = 0;
+ src++;
+ while ((ch = *src) != '\0') {
+ if (!backslash) {
+ if (ch == '"') {
+ src++;
+ result = 's';
+ break;
+ }
+ if (ch == '\\')
+ backslash = 1;
+ } else {
+ /* We validate Unicode on this pass. */
+ if (ch == 'u') {
+ u_char ignored;
+ const u_char *uc;
+
+ uc = (const u_char *)src;
+ if (__wt_hex2byte(&uc[1], &ignored) ||
+ __wt_hex2byte(&uc[3], &ignored)) {
+ __wt_errx(session,
+ "invalid Unicode within JSON string");
+ return (-1);
+ }
+ src += 5;
+ }
+ backslash = 0;
+ }
+ src++;
+ }
+ if (result != 's')
+ __wt_errx(session, "unterminated string in JSON");
+ break;
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ isfloat = 0;
+ if (*src == '-')
+ src++;
+ while ((ch = *src) != '\0' && isdigit(ch))
+ src++;
+ if (*src == '.') {
+ isfloat = 1;
+ src++;
+ while ((ch = *src) != '\0' &&
+ isdigit(ch))
+ src++;
+ }
+ if (*src == 'e' || *src == 'E') {
+ isfloat = 1;
+ src++;
+ if (*src == '+' || *src == '-')
+ src++;
+ while ((ch = *src) != '\0' &&
+ isdigit(ch))
+ src++;
+ }
+ result = isfloat ? 'f' : 'i';
+ break;
+ case ':':
+ case ',':
+ case '{':
+ case '}':
+ case '[':
+ case ']':
+ result = *src++;
+ break;
+ case 'n':
+ MATCH_KEYWORD(session, src, result, "null", 'N');
+ break;
+ case 't':
+ MATCH_KEYWORD(session, src, result, "true", 'T');
+ break;
+ case 'f':
+ MATCH_KEYWORD(session, src, result, "false", 'F');
+ break;
+ default:
+ /* An illegal token, move past it anyway */
+ bad = src;
+ isalph = isalnum(*src);
+ src++;
+ if (isalph)
+ while (*src != '\0' && isalnum(*src))
+ src++;
+ __wt_errx(session, "unknown token \"%.*s\" in JSON",
+ (int)(src - bad), bad);
+ break;
+ }
+ *toklen = (size_t)(src - *tokstart);
+ *toktype = result;
+ return (result < 0 ? EINVAL : 0);
+}
+
+/*
+ * __wt_json_tokname
+ * Return a descriptive name from the token type returned by
+ * __wt_json_token
+ */
+const char *
+__wt_json_tokname(int toktype)
+{
+ switch (toktype) {
+ case 0: return ("<EOF>");
+ case 's': return ("<string>");
+ case 'i': return ("<integer>");
+ case 'f': return ("<float>");
+ case ':': return ("':'");
+ case ',': return ("','");
+ case '{': return ("'{'");
+ case '}': return ("'}'");
+ case '[': return ("'['");
+ case ']': return ("']'");
+ case 'N': return ("'null'");
+ case 'T': return ("'true'");
+ case 'F': return ("'false'");
+ default: return ("<UNKNOWN>");
+ }
+}
+
+/*
+ * json_string_arg --
+ * Returns a first cut of the needed string in item.
+ * The result has not been stripped of escapes.
+ */
+static int
+json_string_arg(WT_SESSION_IMPL *session, const char **jstr, WT_ITEM *item)
+{
+ const char *tokstart;
+ int tok;
+ WT_DECL_RET;
+
+ WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart,
+ &item->size));
+ if (tok == 's') {
+ *jstr = tokstart + item->size;
+ /* The tokenizer includes the '"' chars */
+ item->data = tokstart + 1;
+ item->size -= 2;
+ ret = 0;
+ } else {
+ __wt_errx(session, "expected JSON <string>, got %s",
+ __wt_json_tokname(tok));
+ ret = EINVAL;
+ }
+ return (ret);
+}
+
+/*
+ * json_int_arg --
+ * Returns a signed integral value from the current position
+ * in the JSON string.
+ */
+static int
+json_int_arg(WT_SESSION_IMPL *session, const char **jstr, int64_t *ip)
+{
+ char *end;
+ const char *tokstart;
+ int tok;
+ size_t toksize;
+
+ WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart,
+ &toksize));
+ if (tok == 'i') {
+ /* JSON only allows decimal */
+ *ip = strtoll(tokstart, &end, 10);
+ if (end != tokstart + toksize)
+ WT_RET_MSG(session, EINVAL,
+ "JSON <int> extraneous input");
+ *jstr = tokstart + toksize;
+ } else {
+ __wt_errx(session, "expected JSON <int>, got %s",
+ __wt_json_tokname(tok));
+ return (EINVAL);
+ }
+ return (0);
+}
+
+/*
+ * json_uint_arg --
+ * Returns an unsigned integral value from the current position
+ * in the JSON string.
+ */
+static int
+json_uint_arg(WT_SESSION_IMPL *session, const char **jstr, uint64_t *up)
+{
+ char *end;
+ const char *tokstart;
+ int tok;
+ size_t toksize;
+
+ WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart,
+ &toksize));
+ if (tok == 'i' && *tokstart != '-') {
+ /* JSON only allows decimal */
+ *up = strtoull(tokstart, &end, 10);
+ if (end != tokstart + toksize)
+ WT_RET_MSG(session, EINVAL,
+ "JSON <int> extraneous input");
+ *jstr = tokstart + toksize;
+ } else {
+ __wt_errx(session, "expected unsigned JSON <int>, got %s",
+ __wt_json_tokname(tok));
+ return (EINVAL);
+ }
+ return (0);
+}
+
+#define JSON_EXPECT_TOKEN_GET(session, jstr, tokval, start, sz) do { \
+ int __tok; \
+ WT_RET(__wt_json_token((WT_SESSION *)session, jstr, &__tok, &start, &sz));\
+ if (__tok != tokval) { \
+ __wt_errx(session, "expected JSON %s, got %s", \
+ __wt_json_tokname(tokval), __wt_json_tokname(__tok)); \
+ return (EINVAL); \
+ } \
+ jstr = start + sz; \
+} while (0)
+
+#define JSON_EXPECT_TOKEN(session, jstr, tokval) do { \
+ const char *__start; \
+ size_t __sz; \
+ JSON_EXPECT_TOKEN_GET(session, jstr, tokval, __start, __sz); \
+} while (0)
+
+/*
+ * __json_pack_struct --
+ * Pack a byte string from a JSON string.
+ */
+static int
+__json_pack_struct(WT_SESSION_IMPL *session, void *buffer, size_t size,
+ const char *fmt, const char *jstr)
+{
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_PACK pack;
+ const char *tokstart;
+ int multi;
+ size_t toksize;
+ uint8_t *p, *end;
+
+ p = buffer;
+ end = p + size;
+ multi = 0;
+
+ if (fmt[0] != '\0' && fmt[1] == '\0') {
+ JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
+ /* the key name was verified in __json_pack_size */
+ JSON_EXPECT_TOKEN(session, jstr, ':');
+ pv.type = fmt[0];
+ WT_PACK_JSON_GET(session, pv, jstr);
+ return (__pack_write(session, &pv, &p, size));
+ }
+
+ WT_RET(__pack_init(session, &pack, fmt));
+ while ((ret = __pack_next(&pack, &pv)) == 0) {
+ if (multi)
+ JSON_EXPECT_TOKEN(session, jstr, ',');
+ JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
+ /* the key name was verified in __json_pack_size */
+ JSON_EXPECT_TOKEN(session, jstr, ':');
+ WT_PACK_JSON_GET(session, pv, jstr);
+ WT_RET(__pack_write(session, &pv, &p, (size_t)(end - p)));
+ multi = 1;
+ }
+
+ /* Be paranoid - __pack_write should never overflow. */
+ WT_ASSERT(session, p <= end);
+
+ if (ret != WT_NOTFOUND)
+ return (ret);
+
+ return (0);
+}
+
+/*
+ * __json_pack_size --
+ * Calculate the size of a packed byte string from a JSON string.
+ * We verify that the names and value types provided in JSON match
+ * the column names and type from the schema format, returning error
+ * if not.
+ */
+static int
+__json_pack_size(
+ WT_SESSION_IMPL *session, const char *fmt, WT_CONFIG_ITEM *names,
+ int iskey, const char *jstr, size_t *sizep)
+{
+ WT_CONFIG_ITEM name;
+ WT_DECL_PACK_VALUE(pv);
+ WT_PACK pack;
+ WT_PACK_NAME packname;
+ const char *tokstart;
+ int multi;
+ size_t toksize, total;
+
+ WT_RET(__pack_name_init(session, names, iskey, &packname));
+ multi = 0;
+ WT_RET(__pack_init(session, &pack, fmt));
+ for (total = 0; __pack_next(&pack, &pv) == 0;) {
+ if (multi)
+ JSON_EXPECT_TOKEN(session, jstr, ',');
+ JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
+ WT_RET(__pack_name_next(&packname, &name));
+ if (toksize - 2 != name.len ||
+ strncmp(tokstart + 1, name.str, toksize - 2) != 0) {
+ __wt_errx(session, "JSON expected %s name: \"%.*s\"",
+ iskey ? "key" : "value", (int)name.len, name.str);
+ return (EINVAL);
+ }
+ JSON_EXPECT_TOKEN(session, jstr, ':');
+ WT_PACK_JSON_GET(session, pv, jstr);
+ total += __pack_size(session, &pv);
+ multi = 1;
+ }
+ /* check end of string */
+ JSON_EXPECT_TOKEN(session, jstr, 0);
+
+ *sizep = total;
+ return (0);
+}
+
+/*
+ * __wt_json_to_item --
+ * Convert a JSON input string for either key/value to a raw WT_ITEM.
+ * Checks that the input matches the expected format.
+ */
+int
+__wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr,
+ const char *format, WT_CURSOR_JSON *json, int iskey, WT_ITEM *item)
+{
+ size_t sz;
+ sz = 0; /* Initialize because GCC 4.1 is paranoid */
+
+ WT_RET(__json_pack_size(session, format,
+ iskey ? &json->key_names : &json->value_names, iskey, jstr, &sz));
+ WT_RET(__wt_buf_initsize(session, item, sz));
+ WT_RET(__json_pack_struct(session, item->mem, sz, format, jstr));
+ return (0);
+}
+
+/*
+ * __wt_json_strlen --
+ * Return the number of bytes represented by a string in JSON format,
+ * or -1 if the format is incorrect.
+ */
+ssize_t
+__wt_json_strlen(const char *src, size_t srclen)
+{
+ const char *srcend;
+ size_t dstlen;
+ u_char hi, lo;
+
+ dstlen = 0;
+ srcend = src + srclen;
+ while (src < srcend) {
+ /* JSON can include any UTF-8 expressed in 4 hex chars. */
+ if (*src == '\\') {
+ if (*++src == 'u') {
+ if (__wt_hex2byte((const u_char *)++src, &hi))
+ return (-1);
+ src += 2;
+ if (__wt_hex2byte((const u_char *)src, &lo))
+ return (-1);
+ src += 2;
+ /* RFC 3629 */
+ if (hi >= 0x8) {
+ /* 3 bytes total */
+ dstlen += 2;
+ }
+ else if (hi != 0 || lo >= 0x80) {
+ /* 2 bytes total */
+ dstlen++;
+ }
+ /* else 1 byte total */
+ }
+ }
+ dstlen++;
+ src++;
+ }
+ if (src != srcend)
+ return (-1); /* invalid input, e.g. final char is '\\' */
+ return ((ssize_t)dstlen);
+}
+
+/*
+ * __wt_json_strncpy --
+ * Copy bytes of string in JSON format to a destination,
+ * up to dstlen bytes. If dstlen is greater than the needed size,
+ * the result if zero padded.
+ */
+int
+__wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen)
+{
+ char *dst;
+ const char *dstend, *srcend;
+ u_char hi, lo;
+
+ dst = *pdst;
+ dstend = dst + dstlen;
+ srcend = src + srclen;
+ while (src < srcend && dst < dstend) {
+ /* JSON can include any UTF-8 expressed in 4 hex chars. */
+ if (*src == '\\') {
+ if (*++src == 'u') {
+ if (__wt_hex2byte((const u_char *)++src, &hi))
+ return (EINVAL);
+ src += 2;
+ if (__wt_hex2byte((const u_char *)src, &lo))
+ return (EINVAL);
+ src += 2;
+ /* RFC 3629 */
+ if (hi >= 0x8) {
+ /* 3 bytes total */
+ /* byte 0: 1110HHHH */
+ /* byte 1: 10HHHHLL */
+ /* byte 2: 10LLLLLL */
+ *dst++ = (char)(0xe0 |
+ ((hi >> 4) & 0x0f));
+ *dst++ = (char)(0x80 |
+ ((hi << 2) & 0x3c) |
+ ((lo >> 6) & 0x03));
+ *dst++ = (char)(0x80 | (lo & 0x3f));
+ } else if (hi != 0 || lo >= 0x80) {
+ /* 2 bytes total */
+ /* byte 0: 110HHHLL */
+ /* byte 1: 10LLLLLL */
+ *dst++ = (char)(0xc0 |
+ (hi << 2) |
+ ((lo >> 6) & 0x03));
+ *dst++ = (char)(0x80 | (lo & 0x3f));
+ } else
+ /* else 1 byte total */
+ /* byte 0: 0LLLLLLL */
+ *dst++ = (char)lo;
+ }
+ else
+ *dst++ = *src;
+ } else
+ *dst++ = *src;
+ src++;
+ }
+ if (src != srcend)
+ return (ENOMEM);
+ *pdst = dst;
+ while (dst < dstend)
+ *dst++ = '\0';
+ return (0);
+}