summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexg@wiredtiger.com>2014-10-15 11:55:19 +1100
committerAlex Gorrod <alexg@wiredtiger.com>2014-10-15 11:55:19 +1100
commit9ef6222eb484e2328e90f639e49bf64584a92a38 (patch)
treebc1338268962976bbe00c1af9ffcd7a52e808cbe
parentba4f6023c5c580b5f3be1d5538f57c03a8c49fe8 (diff)
parent2bca93d54b3b3c3ad01f1fe932a783e83495701e (diff)
downloadmongo-9ef6222eb484e2328e90f639e49bf64584a92a38.tar.gz
Merge pull request #1154 from wiredtiger/json-load
Add JSON loading to cursors and wt load utility. refs #740.
-rw-r--r--build_posix/Make.base1
-rw-r--r--dist/api_data.py7
-rw-r--r--dist/s_string.ok26
-rw-r--r--lang/python/wiredtiger.i27
-rw-r--r--src/cursor/cur_dump.c25
-rw-r--r--src/cursor/cur_json.c561
-rw-r--r--src/cursor/cur_std.c8
-rw-r--r--src/cursor/cur_table.c2
-rw-r--r--src/docs/command-line.dox4
-rw-r--r--src/include/extern.h6
-rw-r--r--src/include/packing.i36
-rw-r--r--src/include/wiredtiger.in7
-rw-r--r--src/support/hex.c10
-rw-r--r--src/utilities/util_dump.c8
-rw-r--r--src/utilities/util_load.c221
-rw-r--r--src/utilities/util_load.h30
-rw-r--r--src/utilities/util_load_json.c567
-rw-r--r--test/suite/test_jsondump01.py28
-rw-r--r--test/suite/test_jsondump02.py143
19 files changed, 1577 insertions, 140 deletions
diff --git a/build_posix/Make.base b/build_posix/Make.base
index 3340bd8ad80..51a8e77cebe 100644
--- a/build_posix/Make.base
+++ b/build_posix/Make.base
@@ -25,6 +25,7 @@ wt_SOURCES =\
src/utilities/util_dump.c \
src/utilities/util_list.c \
src/utilities/util_load.c \
+ src/utilities/util_load_json.c \
src/utilities/util_loadtext.c \
src/utilities/util_main.c \
src/utilities/util_misc.c \
diff --git a/dist/api_data.py b/dist/api_data.py
index e92db02c6e6..8e42ba72b88 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -532,11 +532,10 @@ methods = {
Config('dump', '', r'''
configure the cursor for dump format inputs and outputs: "hex"
selects a simple hexadecimal format, "json" selects a JSON format
- with each record formats as fields named by column names if
+ with each record formatted as fields named by column names if
available, and "print" selects a format where only non-printing
- characters are hexadecimal encoded, and "json" produces a JSON
- encoding of the data. The "hex" and "print" dump format are
- compatible with the @ref util_dump and @ref util_load commands''',
+ characters are hexadecimal encoded. These formats are compatible
+ with the @ref util_dump and @ref util_load commands''',
choices=['hex', 'json', 'print']),
Config('next_random', 'false', r'''
configure the cursor to return a pseudo-random record from
diff --git a/dist/s_string.ok b/dist/s_string.ok
index a6cddfa8a72..69545dbda1f 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -68,6 +68,7 @@ CreateFileMapping
CreateThread
CustomersPhone
DATAITEMs
+DECL
DESC
DHANDLE
DLFCN
@@ -137,6 +138,9 @@ GetModuleHandleEx
GetProcAddress
Givargis
Google
+HHHH
+HHHHLL
+HHHLL
HYPERLEVELDB
HyperLevelDB
IEC
@@ -174,6 +178,8 @@ LIBPTHREAD
LIBRT
LIBSNAPPY
LIBZ
+LLLLLL
+LLLLLLL
LNO
LOGREC
LOGSCAN
@@ -233,6 +239,8 @@ PADDR
PAGE's
PARAM
POSIX
+PRIu
+PRIu64
PSIZE
PTHREAD
PTR
@@ -523,6 +531,7 @@ ds
dsk
dsrc
dst
+dstlen
dsync
dt
dtype
@@ -580,6 +589,7 @@ fillms
firstfit
fixup
flcs
+floatnum
fmt
fmterr
fnv
@@ -656,6 +666,7 @@ insertK
insertV
instantiation
intl
+intnum
intpack
ints
inttypes
@@ -664,6 +675,7 @@ io
ip
ispo
iteratively
+jnr
jrx
json
kb
@@ -673,10 +685,13 @@ keygen
keyname
keyv
kv
+kvraw
kvs
kvsbdb
lang
latencies
+lbrace
+lbracket
lbz
ld
ldl
@@ -817,6 +832,7 @@ os
ovfl
ownp
packv
+parens
parserp
patchp
pathname
@@ -838,6 +854,7 @@ primary's
printf
printlog
priv
+progname
ps
pse
psp
@@ -855,6 +872,8 @@ qsort
quartile
qup
rS
+rbrace
+rbracket
rdlock
rduppo
readlock
@@ -950,6 +969,7 @@ strerror
strftime
strget
stringin
+strlen
strncmp
strncpy
strndup
@@ -994,6 +1014,11 @@ tlist
tload
tmp
toffpage
+tokenizer
+toklen
+tokname
+tokstart
+toktype
toverflow
tparent
tprintlog
@@ -1053,6 +1078,7 @@ unpackv
unreferenced
unregister
unsized
+unterminated
untyped
upd
update's
diff --git a/lang/python/wiredtiger.i b/lang/python/wiredtiger.i
index 0c228c56e5f..be55845a7b2 100644
--- a/lang/python/wiredtiger.i
+++ b/lang/python/wiredtiger.i
@@ -388,7 +388,9 @@ COMPARE_OK(__wt_cursor::search_near)
%exception __wt_async_op::_set_key;
%exception __wt_async_op::_set_value;
%exception __wt_cursor::_set_key;
+%exception __wt_cursor::_set_key_str;
%exception __wt_cursor::_set_value;
+%exception __wt_cursor::_set_value_str;
%exception wiredtiger_strerror;
%exception wiredtiger_version;
@@ -577,6 +579,11 @@ typedef int int_void;
$self->set_key($self, &k);
}
+ /* Get / set keys and values */
+ void _set_key_str(char *str) {
+ $self->set_key($self, str);
+ }
+
int_void _set_recno(uint64_t recno) {
WT_ITEM k;
uint8_t recno_buf[20];
@@ -601,6 +608,11 @@ typedef int int_void;
$self->set_value($self, &v);
}
+ /* Get / set keys and values */
+ void _set_value_str(char *str) {
+ $self->set_value($self, str);
+ }
+
/* Don't return values, just throw exceptions on failure. */
int_void _get_key(char **datap, int *sizep) {
WT_ITEM k;
@@ -739,6 +751,8 @@ typedef int int_void;
args = args[0]
if self.is_column:
self._set_recno(long(args[0]))
+ elif self.is_json:
+ self._set_key_str(args[0])
else:
# Keep the Python string pinned
self._key = pack(self.key_format, *args)
@@ -748,11 +762,14 @@ typedef int int_void;
'''set_value(self) -> None
@copydoc WT_CURSOR::set_value'''
- if len(args) == 1 and type(args[0]) == tuple:
- args = args[0]
- # Keep the Python string pinned
- self._value = pack(self.value_format, *args)
- self._set_value(self._value)
+ if self.is_json:
+ self._set_value_str(args[0])
+ else:
+ if len(args) == 1 and type(args[0]) == tuple:
+ args = args[0]
+ # Keep the Python string pinned
+ self._value = pack(self.value_format, *args)
+ self._set_value(self._value)
def __iter__(self):
'''Cursor objects support iteration, equivalent to calling
diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c
index 31d40d32060..003b7e1f961 100644
--- a/src/cursor/cur_dump.c
+++ b/src/cursor/cur_dump.c
@@ -160,10 +160,6 @@ __curdump_set_key(WT_CURSOR *cursor, ...)
child = cdump->child;
CURSOR_API_CALL(cursor, session, set_key, NULL);
- if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
- WT_ERR_MSG(session, EINVAL,
- "Setting keys for JSON cursors not permitted");
-
va_start(ap, cursor);
if (F_ISSET(cursor, WT_CURSTD_RAW))
p = va_arg(ap, WT_ITEM *)->data;
@@ -176,8 +172,13 @@ __curdump_set_key(WT_CURSOR *cursor, ...)
child->set_key(child, recno);
} else {
- WT_ERR(__dump_to_raw(session, p, &cursor->key,
- F_ISSET(cursor, WT_CURSTD_DUMP_HEX) ? 1 : 0));
+ if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
+ WT_ERR(__wt_json_to_item(session, p, cursor->key_format,
+ (WT_CURSOR_JSON *)cursor->json_private, 1,
+ &cursor->key));
+ else
+ WT_ERR(__dump_to_raw(session, p, &cursor->key,
+ F_ISSET(cursor, WT_CURSTD_DUMP_HEX) ? 1 : 0));
child->set_key(child, &cursor->key);
}
@@ -255,10 +256,6 @@ __curdump_set_value(WT_CURSOR *cursor, ...)
child = cdump->child;
CURSOR_API_CALL(cursor, session, set_value, NULL);
- if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
- WT_ERR_MSG(session, EINVAL,
- "Setting values for JSON cursors not permitted");
-
va_start(ap, cursor);
if (F_ISSET(cursor, WT_CURSTD_RAW))
p = va_arg(ap, WT_ITEM *)->data;
@@ -266,8 +263,12 @@ __curdump_set_value(WT_CURSOR *cursor, ...)
p = va_arg(ap, const char *);
va_end(ap);
- WT_ERR(__dump_to_raw(session,
- p, &cursor->value, F_ISSET(cursor, WT_CURSTD_DUMP_HEX) ? 1 : 0));
+ if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
+ WT_ERR(__wt_json_to_item(session, p, cursor->value_format,
+ (WT_CURSOR_JSON *)cursor->json_private, 0, &cursor->value));
+ else
+ WT_ERR(__dump_to_raw(session, p, &cursor->value,
+ F_ISSET(cursor, WT_CURSTD_DUMP_HEX) ? 1 : 0));
child->set_value(child, &cursor->value);
diff --git a/src/cursor/cur_json.c b/src/cursor/cur_json.c
index 618596e39b8..4a4ae7544b1 100644
--- a/src/cursor/cur_json.c
+++ b/src/cursor/cur_json.c
@@ -7,6 +7,51 @@
#include "wt_internal.h"
+static size_t __json_unpack_put(WT_SESSION_IMPL *, void *, u_char *, size_t,
+ WT_CONFIG_ITEM *);
+static inline int __json_struct_size(WT_SESSION_IMPL *, const void *, size_t,
+ const char *, WT_CONFIG_ITEM *, int, size_t *);
+static inline int __json_struct_unpackv(WT_SESSION_IMPL *, const void *, size_t,
+ const char *, WT_CONFIG_ITEM *, u_char *, size_t, int, va_list);
+static int json_string_arg(WT_SESSION_IMPL *, const char **, WT_ITEM *);
+static int json_int_arg(WT_SESSION_IMPL *, const char **, int64_t *);
+static int json_uint_arg(WT_SESSION_IMPL *, const char **, uint64_t *);
+static int __json_pack_struct(WT_SESSION_IMPL *, void *, size_t, const char *,
+ const char *);
+static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *,
+ int, const char *, size_t *);
+
+#define WT_PACK_JSON_GET(session, pv, jstr) do { \
+ switch (pv.type) { \
+ case 'x': \
+ break; \
+ case 's': \
+ case 'S': \
+ WT_RET(json_string_arg(session, &jstr, &pv.u.item)); \
+ pv.type = pv.type == 's' ? 'j' : 'J'; \
+ break; \
+ case 'b': \
+ case 'h': \
+ case 'i': \
+ case 'l': \
+ case 'q': \
+ WT_RET(json_int_arg(session, &jstr, &pv.u.i)); \
+ break; \
+ case 'B': \
+ case 'H': \
+ case 'I': \
+ case 'L': \
+ case 'Q': \
+ case 'r': \
+ case 'R': \
+ case 't': \
+ WT_RET(json_uint_arg(session, &jstr, &pv.u.u)); \
+ break; \
+ /* User format strings have already been validated. */ \
+ WT_ILLEGAL_VALUE(session); \
+ } \
+} while (0)
+
/*
* __json_unpack_put --
* Calculate the size of a packed byte string as formatted for JSON.
@@ -367,3 +412,519 @@ __wt_json_column_init(WT_CURSOR *cursor, const char *keyformat,
}
return (0);
}
+
+#define MATCH_KEYWORD(session, in, result, keyword, matchval) do { \
+ size_t _kwlen = strlen(keyword); \
+ if (strncmp(in, keyword, _kwlen) == 0 && !isalnum(in[_kwlen])) { \
+ in += _kwlen; \
+ result = matchval; \
+ } else { \
+ const char *_bad = in; \
+ while (isalnum(*in)) \
+ in++; \
+ __wt_errx(session, "unknown keyword \"%.*s\" in JSON", \
+ (int)(in - _bad), _bad); \
+ } \
+} while (0)
+
+/*
+ * __wt_json_token --
+ * Return the type, start position and length of the next JSON
+ * token in the input. String tokens include the quotes. JSON
+ * can be entirely parsed using calls to this tokenizer, each
+ * call using a src pointer that is the previously returned
+ * tokstart + toklen.
+ *
+ * The token type returned is one of:
+ * 0 : EOF
+ * 's' : string
+ * 'i' : intnum
+ * 'f' : floatnum
+ * ':' : colon
+ * ',' : comma
+ * '{' : lbrace
+ * '}' : rbrace
+ * '[' : lbracket
+ * ']' : rbracket
+ * 'N' : null
+ * 'T' : true
+ * 'F' : false
+ */
+int
+__wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype,
+ const char **tokstart, size_t *toklen)
+{
+ WT_SESSION_IMPL *session;
+ char ch;
+ const char *bad;
+ int backslash, isalph, isfloat, result;
+
+ result = -1;
+ session = (WT_SESSION_IMPL *)wt_session;
+ while (isspace(*src))
+ src++;
+ *tokstart = src;
+
+ if (*src == '\0') {
+ *toktype = 0;
+ *toklen = 0;
+ return (0);
+ }
+
+ /* JSON is specified in RFC 4627. */
+ switch (*src) {
+ case '"':
+ backslash = 0;
+ src++;
+ while ((ch = *src) != '\0') {
+ if (!backslash) {
+ if (ch == '"') {
+ src++;
+ result = 's';
+ break;
+ }
+ if (ch == '\\')
+ backslash = 1;
+ } else {
+ /* We validate Unicode on this pass. */
+ if (ch == 'u') {
+ u_char ignored;
+ const u_char *uc;
+
+ uc = (const u_char *)src;
+ if (__wt_hex2byte(&uc[1], &ignored) ||
+ __wt_hex2byte(&uc[3], &ignored)) {
+ __wt_errx(session,
+ "invalid Unicode within JSON string");
+ return (-1);
+ }
+ src += 5;
+ }
+ backslash = 0;
+ }
+ src++;
+ }
+ if (result != 's')
+ __wt_errx(session, "unterminated string in JSON");
+ break;
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ isfloat = 0;
+ if (*src == '-')
+ src++;
+ while ((ch = *src) != '\0' && isdigit(ch))
+ src++;
+ if (*src == '.') {
+ isfloat = 1;
+ src++;
+ while ((ch = *src) != '\0' &&
+ isdigit(ch))
+ src++;
+ }
+ if (*src == 'e' || *src == 'E') {
+ isfloat = 1;
+ src++;
+ if (*src == '+' || *src == '-')
+ src++;
+ while ((ch = *src) != '\0' &&
+ isdigit(ch))
+ src++;
+ }
+ result = isfloat ? 'f' : 'i';
+ break;
+ case ':':
+ case ',':
+ case '{':
+ case '}':
+ case '[':
+ case ']':
+ result = *src++;
+ break;
+ case 'n':
+ MATCH_KEYWORD(session, src, result, "null", 'N');
+ break;
+ case 't':
+ MATCH_KEYWORD(session, src, result, "true", 'T');
+ break;
+ case 'f':
+ MATCH_KEYWORD(session, src, result, "false", 'F');
+ break;
+ default:
+ /* An illegal token, move past it anyway */
+ bad = src;
+ isalph = isalnum(*src);
+ src++;
+ if (isalph)
+ while (*src != '\0' && isalnum(*src))
+ src++;
+ __wt_errx(session, "unknown token \"%.*s\" in JSON",
+ (int)(src - bad), bad);
+ break;
+ }
+ *toklen = (size_t)(src - *tokstart);
+ *toktype = result;
+ return (result < 0 ? EINVAL : 0);
+}
+
+/*
+ * __wt_json_tokname
+ * Return a descriptive name from the token type returned by
+ * __wt_json_token
+ */
+const char *
+__wt_json_tokname(int toktype)
+{
+ switch (toktype) {
+ case 0: return ("<EOF>");
+ case 's': return ("<string>");
+ case 'i': return ("<integer>");
+ case 'f': return ("<float>");
+ case ':': return ("':'");
+ case ',': return ("','");
+ case '{': return ("'{'");
+ case '}': return ("'}'");
+ case '[': return ("'['");
+ case ']': return ("']'");
+ case 'N': return ("'null'");
+ case 'T': return ("'true'");
+ case 'F': return ("'false'");
+ default: return ("<UNKNOWN>");
+ }
+}
+
+/*
+ * json_string_arg --
+ * Returns a first cut of the needed string in item.
+ * The result has not been stripped of escapes.
+ */
+static int
+json_string_arg(WT_SESSION_IMPL *session, const char **jstr, WT_ITEM *item)
+{
+ const char *tokstart;
+ int tok;
+ WT_DECL_RET;
+
+ WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart,
+ &item->size));
+ if (tok == 's') {
+ *jstr = tokstart + item->size;
+ /* The tokenizer includes the '"' chars */
+ item->data = tokstart + 1;
+ item->size -= 2;
+ ret = 0;
+ } else {
+ __wt_errx(session, "expected JSON <string>, got %s",
+ __wt_json_tokname(tok));
+ ret = EINVAL;
+ }
+ return (ret);
+}
+
+/*
+ * json_int_arg --
+ * Returns a signed integral value from the current position
+ * in the JSON string.
+ */
+static int
+json_int_arg(WT_SESSION_IMPL *session, const char **jstr, int64_t *ip)
+{
+ char *end;
+ const char *tokstart;
+ int tok;
+ size_t toksize;
+
+ WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart,
+ &toksize));
+ if (tok == 'i') {
+ /* JSON only allows decimal */
+ *ip = strtoll(tokstart, &end, 10);
+ if (end != tokstart + toksize)
+ WT_RET_MSG(session, EINVAL,
+ "JSON <int> extraneous input");
+ *jstr = tokstart + toksize;
+ } else {
+ __wt_errx(session, "expected JSON <int>, got %s",
+ __wt_json_tokname(tok));
+ return (EINVAL);
+ }
+ return (0);
+}
+
+/*
+ * json_uint_arg --
+ * Returns an unsigned integral value from the current position
+ * in the JSON string.
+ */
+static int
+json_uint_arg(WT_SESSION_IMPL *session, const char **jstr, uint64_t *up)
+{
+ char *end;
+ const char *tokstart;
+ int tok;
+ size_t toksize;
+
+ WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart,
+ &toksize));
+ if (tok == 'i' && *tokstart != '-') {
+ /* JSON only allows decimal */
+ *up = strtoull(tokstart, &end, 10);
+ if (end != tokstart + toksize)
+ WT_RET_MSG(session, EINVAL,
+ "JSON <int> extraneous input");
+ *jstr = tokstart + toksize;
+ } else {
+ __wt_errx(session, "expected unsigned JSON <int>, got %s",
+ __wt_json_tokname(tok));
+ return (EINVAL);
+ }
+ return (0);
+}
+
+#define JSON_EXPECT_TOKEN_GET(session, jstr, tokval, start, sz) do { \
+ int __tok; \
+ WT_RET(__wt_json_token((WT_SESSION *)session, jstr, &__tok, &start, &sz));\
+ if (__tok != tokval) { \
+ __wt_errx(session, "expected JSON %s, got %s", \
+ __wt_json_tokname(tokval), __wt_json_tokname(__tok)); \
+ return (EINVAL); \
+ } \
+ jstr = start + sz; \
+} while (0)
+
+#define JSON_EXPECT_TOKEN(session, jstr, tokval) do { \
+ const char *__start; \
+ size_t __sz; \
+ JSON_EXPECT_TOKEN_GET(session, jstr, tokval, __start, __sz); \
+} while (0)
+
+/*
+ * __json_pack_struct --
+ * Pack a byte string from a JSON string.
+ */
+static int
+__json_pack_struct(WT_SESSION_IMPL *session, void *buffer, size_t size,
+ const char *fmt, const char *jstr)
+{
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_PACK pack;
+ const char *tokstart;
+ int multi;
+ size_t toksize;
+ uint8_t *p, *end;
+
+ p = buffer;
+ end = p + size;
+ multi = 0;
+
+ if (fmt[0] != '\0' && fmt[1] == '\0') {
+ JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
+ /* the key name was verified in __json_pack_size */
+ JSON_EXPECT_TOKEN(session, jstr, ':');
+ pv.type = fmt[0];
+ WT_PACK_JSON_GET(session, pv, jstr);
+ return (__pack_write(session, &pv, &p, size));
+ }
+
+ WT_RET(__pack_init(session, &pack, fmt));
+ while ((ret = __pack_next(&pack, &pv)) == 0) {
+ if (multi)
+ JSON_EXPECT_TOKEN(session, jstr, ',');
+ JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
+ /* the key name was verified in __json_pack_size */
+ JSON_EXPECT_TOKEN(session, jstr, ':');
+ WT_PACK_JSON_GET(session, pv, jstr);
+ WT_RET(__pack_write(session, &pv, &p, (size_t)(end - p)));
+ multi = 1;
+ }
+
+ /* Be paranoid - __pack_write should never overflow. */
+ WT_ASSERT(session, p <= end);
+
+ if (ret != WT_NOTFOUND)
+ return (ret);
+
+ return (0);
+}
+
+/*
+ * __json_pack_size --
+ * Calculate the size of a packed byte string from a JSON string.
+ * We verify that the names and value types provided in JSON match
+ * the column names and type from the schema format, returning error
+ * if not.
+ */
+static int
+__json_pack_size(
+ WT_SESSION_IMPL *session, const char *fmt, WT_CONFIG_ITEM *names,
+ int iskey, const char *jstr, size_t *sizep)
+{
+ WT_CONFIG_ITEM name;
+ WT_DECL_PACK_VALUE(pv);
+ WT_PACK pack;
+ WT_PACK_NAME packname;
+ const char *tokstart;
+ int multi;
+ size_t toksize, total;
+
+ WT_RET(__pack_name_init(session, names, iskey, &packname));
+ multi = 0;
+ WT_RET(__pack_init(session, &pack, fmt));
+ for (total = 0; __pack_next(&pack, &pv) == 0;) {
+ if (multi)
+ JSON_EXPECT_TOKEN(session, jstr, ',');
+ JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
+ WT_RET(__pack_name_next(&packname, &name));
+ if (toksize - 2 != name.len ||
+ strncmp(tokstart + 1, name.str, toksize - 2) != 0) {
+ __wt_errx(session, "JSON expected %s name: \"%.*s\"",
+ iskey ? "key" : "value", (int)name.len, name.str);
+ return (EINVAL);
+ }
+ JSON_EXPECT_TOKEN(session, jstr, ':');
+ WT_PACK_JSON_GET(session, pv, jstr);
+ total += __pack_size(session, &pv);
+ multi = 1;
+ }
+ /* check end of string */
+ JSON_EXPECT_TOKEN(session, jstr, 0);
+
+ *sizep = total;
+ return (0);
+}
+
+/*
+ * __wt_json_to_item --
+ * Convert a JSON input string for either key/value to a raw WT_ITEM.
+ * Checks that the input matches the expected format.
+ */
+int
+__wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr,
+ const char *format, WT_CURSOR_JSON *json, int iskey, WT_ITEM *item)
+{
+ size_t sz;
+
+ WT_RET(__json_pack_size(session, format,
+ iskey ? &json->key_names : &json->value_names, iskey, jstr, &sz));
+ WT_RET(__wt_buf_initsize(session, item, sz));
+ WT_RET(__json_pack_struct(session, item->mem, sz, format, jstr));
+ return (0);
+}
+
+/*
+ * __wt_json_strlen --
+ * Return the number of bytes represented by a string in JSON format,
+ * or -1 if the format is incorrect.
+ */
+ssize_t
+__wt_json_strlen(const char *src, size_t srclen)
+{
+ const char *srcend;
+ size_t dstlen;
+ u_char hi, lo;
+
+ dstlen = 0;
+ srcend = src + srclen;
+ while (src < srcend) {
+ /* JSON can include any UTF-8 expressed in 4 hex chars. */
+ if (*src == '\\') {
+ if (*++src == 'u') {
+ if (__wt_hex2byte((const u_char *)++src, &hi))
+ return (-1);
+ src += 2;
+ if (__wt_hex2byte((const u_char *)src, &lo))
+ return (-1);
+ src += 2;
+ /* RFC 3629 */
+ if (hi >= 0x8) {
+ /* 3 bytes total */
+ dstlen += 2;
+ }
+ else if (hi != 0 || lo >= 0x80) {
+ /* 2 bytes total */
+ dstlen++;
+ }
+ /* else 1 byte total */
+ }
+ }
+ dstlen++;
+ src++;
+ }
+ if (src != srcend)
+ return (-1); /* invalid input, e.g. final char is '\\' */
+ return ((ssize_t)dstlen);
+}
+
+/*
+ * __wt_json_strncpy --
+ * Copy bytes of string in JSON format to a destination,
+ * up to dstlen bytes. If dstlen is greater than the needed size,
+ * the result if zero padded.
+ */
+int
+__wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen)
+{
+ char *dst;
+ const char *dstend, *srcend;
+ u_char hi, lo;
+
+ dst = *pdst;
+ dstend = dst + dstlen;
+ srcend = src + srclen;
+ while (src < srcend && dst < dstend) {
+ /* JSON can include any UTF-8 expressed in 4 hex chars. */
+ if (*src == '\\') {
+ if (*++src == 'u') {
+ if (__wt_hex2byte((const u_char *)++src, &hi))
+ return (EINVAL);
+ src += 2;
+ if (__wt_hex2byte((const u_char *)src, &lo))
+ return (EINVAL);
+ src += 2;
+ /* RFC 3629 */
+ if (hi >= 0x8) {
+ /* 3 bytes total */
+ /* byte 0: 1110HHHH */
+ /* byte 1: 10HHHHLL */
+ /* byte 2: 10LLLLLL */
+ *dst++ = (char)(0xe0 |
+ ((hi << 4) & 0x0f));
+ *dst++ = (char)(0x80 |
+ ((hi << 2) & 0x3c) |
+ ((lo >> 6) & 0x03));
+ *dst++ = (char)(0x80 | (lo & 0x3f));
+ } else if (hi != 0 || lo >= 0x80) {
+ /* 2 bytes total */
+ /* byte 0: 110HHHLL */
+ /* byte 1: 10LLLLLL */
+ *dst++ = (char)(0xc0 |
+ (hi << 2) |
+ ((lo >> 6) & 0x03));
+ *dst++ = (char)(0x80 | (lo & 0x3f));
+ } else
+ /* else 1 byte total */
+ /* byte 0: 0LLLLLLL */
+ *dst++ = (char)lo;
+ }
+ else
+ *dst++ = *src;
+ } else
+ *dst++ = *src;
+ src++;
+ }
+ if (src != srcend)
+ return (ENOMEM);
+ *pdst = dst;
+ while (dst < dstend)
+ *dst++ = '\0';
+ return (0);
+}
diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c
index 52cdf232279..cfaf83824fd 100644
--- a/src/cursor/cur_std.c
+++ b/src/cursor/cur_std.c
@@ -276,9 +276,10 @@ __wt_cursor_set_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap)
cursor->key.data = &cursor->recno;
sz = sizeof(cursor->recno);
} else {
- /* Fast path some common cases. */
+ /* Fast path some common cases and special case WT_ITEMs. */
fmt = cursor->key_format;
- if (LF_ISSET(WT_CURSOR_RAW_OK) || WT_STREQ(fmt, "u")) {
+ if (LF_ISSET(WT_CURSOR_RAW_OK | WT_CURSTD_DUMP_JSON) ||
+ WT_STREQ(fmt, "u")) {
item = va_arg(ap, WT_ITEM *);
sz = item->size;
cursor->key.data = item->data;
@@ -399,7 +400,8 @@ __wt_cursor_set_valuev(WT_CURSOR *cursor, va_list ap)
/* Fast path some common cases. */
fmt = cursor->value_format;
- if (F_ISSET(cursor, WT_CURSOR_RAW_OK) || WT_STREQ(fmt, "u")) {
+ if (F_ISSET(cursor, WT_CURSOR_RAW_OK | WT_CURSTD_DUMP_JSON) ||
+ WT_STREQ(fmt, "u")) {
item = va_arg(ap, WT_ITEM *);
sz = item->size;
cursor->value.data = item->data;
diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c
index 21a1b6e07e4..aa336805d06 100644
--- a/src/cursor/cur_table.c
+++ b/src/cursor/cur_table.c
@@ -146,7 +146,7 @@ __wt_curtable_set_value(WT_CURSOR *cursor, ...)
CURSOR_API_CALL(cursor, session, set_value, NULL);
va_start(ap, cursor);
- if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) {
+ if (F_ISSET(cursor, WT_CURSOR_RAW_OK | WT_CURSTD_DUMP_JSON)) {
item = va_arg(ap, WT_ITEM *);
cursor->value.data = item->data;
cursor->value.size = item->size;
diff --git a/src/docs/command-line.dox b/src/docs/command-line.dox
index ee51cc21c2a..a4de4d85e71 100644
--- a/src/docs/command-line.dox
+++ b/src/docs/command-line.dox
@@ -175,6 +175,10 @@ column store.
By default, the \c load command reads from the standard input; the \c
-f option reads the input from the specified file.
+@par <code>-j</code>
+Load input in the JSON (<a href="http://www.json.org">JavaScript Object
+Notation</a>) format that was created by the <code>dump -j</code> command.
+
@par <code>-n</code>
By default, input data will overwrite existing data where the key/value
pair already exists in the data source; the \c -n option causes the \c
diff --git a/src/include/extern.h b/src/include/extern.h
index d9f0dd48abb..807092f6060 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -270,6 +270,11 @@ extern int __wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer,
extern void __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor);
extern size_t __wt_json_unpack_char(char ch, u_char *buf, size_t bufsz, int force_unicode);
extern int __wt_json_column_init(WT_CURSOR *cursor, const char *keyformat, const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf);
+extern int __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, const char **tokstart, size_t *toklen);
+extern const char *__wt_json_tokname(int toktype);
+extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const char *format, WT_CURSOR_JSON *json, int iskey, WT_ITEM *item);
+extern ssize_t __wt_json_strlen(const char *src, size_t srclen);
+extern int __wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen);
extern int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp);
extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
extern void __wt_curstat_dsrc_final(WT_CURSOR_STAT *cst);
@@ -569,6 +574,7 @@ extern void __wt_hazard_close(WT_SESSION_IMPL *session);
extern int __wt_raw_to_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to);
extern void __wt_raw_to_hex_mem( const uint8_t *from, size_t size, uint8_t *dest, size_t dest_size);
extern int __wt_raw_to_esc_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to);
+extern int __wt_hex2byte(const u_char *from, u_char *to);
extern int __wt_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to);
extern int __wt_nhex_to_raw( WT_SESSION_IMPL *session, const char *from, size_t size, WT_ITEM *to);
extern int __wt_esc_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to);
diff --git a/src/include/packing.i b/src/include/packing.i
index 7178052ed91..6e0e7be13eb 100644
--- a/src/include/packing.i
+++ b/src/include/packing.i
@@ -248,6 +248,20 @@ __pack_size(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv)
switch (pv->type) {
case 'x':
return (pv->size);
+ case 'j':
+ case 'J':
+ if (pv->type == 'j' || pv->havesize)
+ s = pv->size;
+ else {
+ ssize_t len;
+
+ /* The string was previously validated. */
+ len = __wt_json_strlen(pv->u.item.data,
+ pv->u.item.size);
+ WT_ASSERT(session, len >= 0);
+ s = (size_t)len + 1;
+ }
+ return (s);
case 's':
case 'S':
if (pv->type == 's' || pv->havesize)
@@ -329,6 +343,28 @@ __pack_write(
*pp += pad;
}
break;
+ case 'j':
+ case 'J':
+ s = pv->u.item.size;
+ if ((pv->type == 'j' || pv->havesize) && pv->size < s) {
+ s = pv->size;
+ pad = 0;
+ } else if (pv->havesize)
+ pad = pv->size - s;
+ else
+ pad = 1;
+ if (s > 0) {
+ oldp = *pp;
+ WT_RET(__wt_json_strncpy((char **)pp, maxlen,
+ pv->u.item.data, s));
+ maxlen -= (size_t)(*pp - oldp);
+ }
+ if (pad > 0) {
+ WT_SIZE_CHECK(pad, maxlen);
+ memset(*pp, 0, pad);
+ *pp += pad;
+ }
+ break;
case 'U':
case 'u':
s = pv->u.item.size;
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index f985fc062c4..c83c5f49144 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -851,10 +851,9 @@ struct __wt_session {
* modification., a string; default empty.}
* @config{dump, configure the cursor for dump format inputs and
* outputs: "hex" selects a simple hexadecimal format\, "json" selects a
- * JSON format with each record formats as fields named by column names
- * if available\, and "print" selects a format where only non-printing
- * characters are hexadecimal encoded\, and "json" produces a JSON
- * encoding of the data. The "hex" and "print" dump format are
+ * JSON format with each record formatted as fields named by column
+ * names if available\, and "print" selects a format where only
+ * non-printing characters are hexadecimal encoded. These formats are
* compatible with the @ref util_dump and @ref util_load commands., a
* string\, chosen from the following options: \c "hex"\, \c "json"\, \c
* "print"; default empty.}
diff --git a/src/support/hex.c b/src/support/hex.c
index 552fbfa1375..96cf5ecc4d4 100644
--- a/src/support/hex.c
+++ b/src/support/hex.c
@@ -106,11 +106,11 @@ __wt_raw_to_esc_hex(
}
/*
- * hex2byte --
+ * __wt_hex2byte --
* Convert a pair of hex characters into a byte.
*/
-static inline int
-hex2byte(const u_char *from, u_char *to)
+int
+__wt_hex2byte(const u_char *from, u_char *to)
{
uint8_t byte;
@@ -196,7 +196,7 @@ __wt_nhex_to_raw(
WT_RET(__wt_buf_init(session, to, size / 2));
for (p = (u_char *)from, t = to->mem; size > 0; p += 2, size -= 2, ++t)
- if (hex2byte(p, t))
+ if (__wt_hex2byte(p, t))
return (__hex_fmterr(session));
to->size = WT_PTRDIFF(t, to->mem);
@@ -220,7 +220,7 @@ __wt_esc_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to)
continue;
++p;
if (p[0] != '\\') {
- if (p[0] == '\0' || p[1] == '\0' || hex2byte(p, t))
+ if (p[0] == '\0' || p[1] == '\0' || __wt_hex2byte(p, t))
return (__hex_fmterr(session));
++p;
}
diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c
index 85b63b6ab9c..bd0590948b4 100644
--- a/src/utilities/util_dump.c
+++ b/src/utilities/util_dump.c
@@ -251,7 +251,7 @@ dump_json_table_begin(WT_CURSOR *cursor, const char *uri, const char *config)
dump_json_table_cg(cursor, uri, name, "index:", "indices");
}
- if (printf("\n },\n [") < 0)
+ if (printf("\n },\n {\n \"data\" : [") < 0)
goto eio;
if (0) {
@@ -422,7 +422,7 @@ dump_json_table_config(WT_SESSION *session, const char *uri)
static int
dump_json_table_end(void)
{
- if (printf(" ]\n ]") < 0)
+ if (printf(" ]\n }\n ]") < 0)
return (util_err(EIO, NULL));
return (0);
}
@@ -595,9 +595,9 @@ dump_record(WT_CURSOR *cursor, const char *name, int reverse, int json)
once = 0;
if (json) {
- prefix = "\n {\n";
+ prefix = "\n{\n";
infix = ",\n";
- suffix = "\n }";
+ suffix = "\n}";
} else {
prefix = "";
infix = "\n";
diff --git a/src/utilities/util_load.c b/src/utilities/util_load.c
index 4bdf356cfd6..1a7e71571a6 100644
--- a/src/utilities/util_load.c
+++ b/src/utilities/util_load.c
@@ -6,26 +6,30 @@
*/
#include "util.h"
+#include "util_load.h"
static int format(void);
static int insert(WT_CURSOR *, const char *);
static int load_dump(WT_SESSION *);
-static int config_read(char ***, int *);
-static int config_rename(char **, const char *);
-static int config_update(WT_SESSION *, char **);
static int usage(void);
static int append; /* -a append (ignore record number keys) */
static char *cmdname; /* -r rename */
static char **cmdconfig; /* configuration pairs */
+static int json; /* -j input is JSON format */
static int no_overwrite; /* -n don't overwrite existing data */
int
util_load(WT_SESSION *session, int argc, char *argv[])
{
int ch;
+ const char *filename;
+ uint32_t flags;
- while ((ch = __wt_getopt(progname, argc, argv, "af:nr:")) != EOF)
+ flags = 0;
+
+ filename = "<stdin>";
+ while ((ch = __wt_getopt(progname, argc, argv, "af:jnr:")) != EOF)
switch (ch) {
case 'a': /* append (ignore record number keys) */
append = 1;
@@ -34,6 +38,11 @@ util_load(WT_SESSION *session, int argc, char *argv[])
if (freopen(__wt_optarg, "r", stdin) == NULL)
return (
util_err(errno, "%s: reopen", __wt_optarg));
+ else
+ filename = __wt_optarg;
+ break;
+ case 'j': /* input is JSON */
+ json = 1;
break;
case 'n': /* don't overwrite existing data */
no_overwrite = 1;
@@ -61,7 +70,14 @@ util_load(WT_SESSION *session, int argc, char *argv[])
cmdconfig = argv;
}
- return (load_dump(session));
+ if (json) {
+ if (append)
+ flags |= LOAD_JSON_APPEND;
+ if (no_overwrite)
+ flags |= LOAD_JSON_NO_OVERWRITE;
+ return (util_load_json(session, filename, flags));
+ } else
+ return (load_dump(session));
}
/*
@@ -74,7 +90,7 @@ load_dump(WT_SESSION *session)
WT_CURSOR *cursor;
WT_DECL_RET;
int hex, tret;
- char **entry, **list, *p, **tlist, *uri, config[64];
+ char **list, **tlist, *uri, config[64];
cursor = NULL;
list = NULL; /* -Wuninitialized */
@@ -85,48 +101,18 @@ load_dump(WT_SESSION *session)
if ((ret = config_read(&list, &hex)) != 0)
return (ret);
- /*
- * Search for a table name -- if we find one, then it's table dump,
- * otherwise, it's a single file dump.
- */
- for (entry = list; *entry != NULL; ++entry)
- if (WT_PREFIX_MATCH(*entry, "table:"))
- break;
- if (*entry == NULL) {
- /*
- * Single file dumps can only have two lines, the file name and
- * the configuration information.
- */
- if ((list[0] == NULL || list[1] == NULL || list[2] != NULL) ||
- (WT_PREFIX_MATCH(list[0], "file:") &&
- WT_PREFIX_MATCH(list[0], "lsm:"))) {
- ret = format();
- goto err;
- }
-
- entry = list;
- }
-
- /*
- * Make sure the table key/value pair comes first, then we can just
- * run through the array in order. (We already checked that we had
- * a multiple of 2 entries, so this is safe.)
- */
- if (entry != list) {
- p = list[0]; list[0] = entry[0]; entry[0] = p;
- p = list[1]; list[1] = entry[1]; entry[1] = p;
- }
+ /* Reorder and check the list. */
+ if ((ret = config_reorder(list)) != 0)
+ return (ret);
/* Update the config based on any command-line configuration. */
if ((ret = config_update(session, list)) != 0)
goto err;
uri = list[0];
- for (entry = list; *entry != NULL; entry += 2)
- if ((ret = session->create(session, entry[0], entry[1])) != 0) {
- ret = util_err(ret, "%s: session.create", entry[0]);
- goto err;
- }
+ /* Create the items in the list. */
+ if ((ret = config_exec(session, list)) != 0)
+ goto err;
/* Open the insert cursor. */
(void)snprintf(config, sizeof(config),
@@ -173,10 +159,51 @@ err: /*
}
/*
+ * config_exec --
+ * Create the tables/indices/colgroups implied by the list.
+ */
+int
+config_exec(WT_SESSION *session, char **list)
+{
+ WT_DECL_RET;
+
+ for (; *list != NULL; list += 2)
+ if ((ret = session->create(session, list[0], list[1])) != 0)
+ return (util_err(ret, "%s: session.create", list[0]));
+ return (0);
+}
+
+int
+config_list_add(CONFIG_LIST *clp, char *val)
+{
+ if (clp->entry + 1 >= clp->max_entry)
+ if ((clp->list = realloc(clp->list, (size_t)
+ (clp->max_entry += 100) * sizeof(char *))) == NULL)
+ /* List already freed by realloc. */
+ return (util_err(errno, NULL));
+
+ clp->list[clp->entry++] = val;
+ clp->list[clp->entry] = NULL;
+ return (0);
+}
+
+void
+config_list_free(CONFIG_LIST *clp)
+{
+ char **entry;
+
+ if (clp->list != NULL)
+ for (entry = &clp->list[0]; *entry != NULL; entry++)
+ free(*entry);
+ free(clp->list);
+ clp->list = NULL;
+}
+
+/*
* config_read --
* Read the config lines and do some basic validation.
*/
-static int
+int
config_read(char ***listp, int *hexp)
{
ULINE l;
@@ -260,16 +287,62 @@ err: if (list != NULL) {
}
/*
+ * config_reorder --
+ * For table dumps, reorder the list so tables are first.
+ * For other dumps, make any needed checks.
+ */
+int
+config_reorder(char **list)
+{
+ char **entry, *p;
+
+ /*
+ * Search for a table name -- if we find one, then it's table dump,
+ * otherwise, it's a single file dump.
+ */
+ for (entry = list; *entry != NULL; ++entry)
+ if (WT_PREFIX_MATCH(*entry, "table:"))
+ break;
+ if (*entry == NULL) {
+ /*
+ * Single file dumps can only have two lines, the file name and
+ * the configuration information.
+ */
+ if ((list[0] == NULL || list[1] == NULL || list[2] != NULL) ||
+ (WT_PREFIX_MATCH(list[0], "file:") &&
+ WT_PREFIX_MATCH(list[0], "lsm:")))
+ return (format());
+
+ entry = list;
+ }
+
+ /*
+ * Make sure the table key/value pair comes first, then we can just
+ * run through the array in order. (We already checked that we had
+ * a multiple of 2 entries, so this is safe.)
+ */
+ if (entry != list) {
+ p = list[0]; list[0] = entry[0]; entry[0] = p;
+ p = list[1]; list[1] = entry[1]; entry[1] = p;
+ }
+ return (0);
+}
+
+/*
* config_update --
* Reconcile and update the command line configuration against the
- * config we found.
+ * config we found.
*/
-static int
+int
config_update(WT_SESSION *session, char **list)
{
int found;
const char *cfg[] = { NULL, NULL, NULL };
- char **configp, **listp, *p, *t;
+ char **configp, **listp;
+ const char **rm;
+ static const char *rmnames[] = {
+ "filename", "id", "checkpoint", "checkpoint_lsn",
+ "version", "source", NULL };
/*
* If the object has been renamed, replace all of the column group,
@@ -296,16 +369,14 @@ config_update(WT_SESSION *session, char **list)
}
/*
- * Remove all "filename=" configurations from the values, new filenames
- * are chosen as part of table load.
+ * Remove all "filename=", "source=" and other configurations
+ * that foil loading from the values. New filenames are chosen
+ * as part of table load.
*/
for (listp = list; *listp != NULL; listp += 2)
- if ((p = strstr(listp[1], "filename=")) != NULL) {
- if ((t = strchr(p, ',')) == NULL)
- *p = '\0';
- else
- memmove(p, t + 1, strlen(t + 1) + 1);
- }
+ for (rm = rmnames; *rm != NULL; rm++)
+ if (strstr(listp[1], *rm) != NULL)
+ config_remove(listp[1], *rm);
/*
* It's possible to update everything except the key/value formats.
@@ -375,7 +446,7 @@ config_update(WT_SESSION *session, char **list)
* config_rename --
* Update the URI name.
*/
-static int
+int
config_rename(char **urip, const char *name)
{
size_t len;
@@ -403,6 +474,46 @@ config_rename(char **urip, const char *name)
}
/*
+ * config_remove --
+ * Remove a single config key and its value.
+ */
+void
+config_remove(char *config, const char *ckey)
+{
+ int parens, quoted;
+ char *begin, match[100], *next, *p;
+
+ snprintf(match, sizeof(match), "%s=", ckey);
+ if ((begin = strstr(config, match)) != NULL) {
+ parens = 0;
+ quoted = 0;
+ next = NULL;
+ for (p = begin + strlen(match); !next && *p; p++)
+ switch (*p) {
+ case '(':
+ if (!quoted)
+ parens++;
+ break;
+ case ')':
+ if (!quoted)
+ parens--;
+ break;
+ case '"':
+ quoted = !quoted;
+ break;
+ case ',':
+ if (!quoted && parens == 0)
+ next = p + 1;
+ break;
+ }
+ if (next)
+ memmove(begin, next, strlen(next) + 1);
+ else
+ *begin = '\0';
+ }
+}
+
+/*
* format --
* The input doesn't match the dump format.
*/
diff --git a/src/utilities/util_load.h b/src/utilities/util_load.h
new file mode 100644
index 00000000000..13174b95c72
--- /dev/null
+++ b/src/utilities/util_load.h
@@ -0,0 +1,30 @@
+/*-
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+/*
+ * A list of configuration strings.
+ */
+typedef struct {
+ char **list; /* array of alternating (uri, config) values */
+ int entry; /* next entry available in list */
+ int max_entry; /* how many allocated in list */
+} CONFIG_LIST;
+
+int config_exec(WT_SESSION *, char **);
+int config_list_add(CONFIG_LIST *, char *);
+void config_list_free(CONFIG_LIST *);
+int config_read(char ***, int *);
+int config_rename(char **, const char *);
+void config_remove(char *, const char *);
+int config_reorder(char **);
+int config_update(WT_SESSION *, char **);
+
+/* Flags for util_load_json */
+#define LOAD_JSON_APPEND 0x0001 /* append (ignore record number keys) */
+#define LOAD_JSON_NO_OVERWRITE 0x0002 /* don't overwrite existing data */
+
+int util_load_json(WT_SESSION *, const char *, uint32_t);
diff --git a/src/utilities/util_load_json.c b/src/utilities/util_load_json.c
new file mode 100644
index 00000000000..9fba6b73948
--- /dev/null
+++ b/src/utilities/util_load_json.c
@@ -0,0 +1,567 @@
+/*-
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "util.h"
+#include "util_load.h"
+
+/*
+ * Encapsulates the input state for parsing JSON.
+ *
+ * At any time, we may be peeking at an unconsumed token; this is
+ * indicated by 'peeking' as true. toktype, tokstart, toklen will be
+ * set in this case.
+ *
+ * Generally we are collecting and processing tokens one by one.
+ * In JSON, tokens never span lines so this makes processing easy.
+ * The exception is that a JSON dump cursor takes the complete
+ * set of keys or values during cursor->set_key/set_value calls,
+ * which may contain many tokens and span lines. E.g.
+ * cursor->set_value("\"name\" : \"John\", \"phone\" : 2348765");
+ * The raw key/value string is collected in the kvraw field.
+ */
+typedef struct {
+ WT_SESSION *session; /* associated session */
+ ULINE line; /* current line */
+ const char *p; /* points to cur position in line.mem */
+ int ateof; /* current token is EOF */
+ int peeking; /* peeking at next token */
+ int toktype; /* next token, defined by __wt_json_token() */
+ const char *tokstart; /* next token start (points into line.mem) */
+ size_t toklen; /* next token length */
+ char *kvraw; /* multiple line raw content collected so far */
+ size_t kvrawstart; /* pos on cur line that JSON key/value starts */
+ const char *filename; /* filename for error reporting */
+ int linenum; /* line number for error reporting */
+} JSON_INPUT_STATE;
+
+static int json_column_group_index(WT_SESSION *, JSON_INPUT_STATE *,
+ CONFIG_LIST *, int);
+static int json_data(WT_SESSION *, JSON_INPUT_STATE *, CONFIG_LIST *, uint32_t);
+static int json_expect(WT_SESSION *, JSON_INPUT_STATE *, int);
+static int json_peek(WT_SESSION *, JSON_INPUT_STATE *);
+static int json_skip(WT_SESSION *, JSON_INPUT_STATE *, const char **);
+static int json_kvraw_append(JSON_INPUT_STATE *, const char *, size_t);
+static int json_strdup(JSON_INPUT_STATE *, char **);
+static int json_top_level(WT_SESSION *, JSON_INPUT_STATE *, uint32_t);
+
+#define JSON_STRING_MATCH(ins, match) \
+ ((ins)->toklen - 2 == strlen(match) && \
+ strncmp((ins)->tokstart + 1, (match), (ins)->toklen - 2) == 0)
+
+#define JSON_INPUT_POS(ins) \
+ ((size_t)((ins)->p - (const char *)(ins)->line.mem))
+
+#define JSON_EXPECT(session, ins, tok) do { \
+ if (json_expect(session, ins, tok)) \
+ goto err; \
+} while (0)
+
+/*
+ * json_column_group_index --
+ * Parse a column group or index entry from JSON input.
+ */
+static int
+json_column_group_index(WT_SESSION *session, JSON_INPUT_STATE *ins,
+ CONFIG_LIST *clp, int idx)
+{
+ WT_DECL_RET;
+ char *config, *p, *uri;
+ int isconfig;
+
+ uri = NULL;
+ config = NULL;
+
+ while (json_peek(session, ins) == '{') {
+ JSON_EXPECT(session, ins, '{');
+ JSON_EXPECT(session, ins, 's');
+ isconfig = JSON_STRING_MATCH(ins, "config");
+ if (!isconfig && !JSON_STRING_MATCH(ins, "uri"))
+ goto err;
+ JSON_EXPECT(session, ins, ':');
+ JSON_EXPECT(session, ins, 's');
+
+ if ((ret = json_strdup(ins, &p)) != 0) {
+ ret = util_err(ret, NULL);
+ goto err;
+ }
+ if (isconfig)
+ config = p;
+ else
+ uri = p;
+
+ isconfig = !isconfig;
+ JSON_EXPECT(session, ins, ',');
+ JSON_EXPECT(session, ins, 's');
+ if (!JSON_STRING_MATCH(ins, isconfig ? "config" : "uri"))
+ goto err;
+ JSON_EXPECT(session, ins, ':');
+ JSON_EXPECT(session, ins, 's');
+
+ if ((ret = json_strdup(ins, &p)) != 0) {
+ ret = util_err(ret, NULL);
+ goto err;
+ }
+ if (isconfig)
+ config = p;
+ else
+ uri = p;
+ JSON_EXPECT(session, ins, '}');
+ if ((idx && strncmp(uri, "index:", 6) != 0) ||
+ (!idx && strncmp(uri, "colgroup:", 9) != 0)) {
+ ret = util_err(EINVAL,
+ "%s: misplaced colgroup or index", uri);
+ goto err;
+ }
+ if ((ret = config_list_add(clp, uri)) != 0 ||
+ (ret = config_list_add(clp, config)) != 0)
+ goto err;
+
+ if (json_peek(session, ins) != ',')
+ break;
+ JSON_EXPECT(session, ins, ',');
+ if (json_peek(session, ins) != '{')
+ goto err;
+ }
+ if (0) {
+err: if (ret == 0)
+ ret = EINVAL;
+ }
+ return (ret);
+}
+
+/*
+ * json_kvraw_append --
+ * Append to the kvraw buffer, which is used to collect all the
+ * raw key/value pairs from JSON input.
+ */
+static int json_kvraw_append(JSON_INPUT_STATE *ins, const char *str, size_t len)
+{
+ char *tmp;
+ size_t needsize;
+
+ if (len > 0) {
+ needsize = strlen(ins->kvraw) + len + 2;
+ if ((tmp = malloc(needsize)) == NULL)
+ return (util_err(errno, NULL));
+ snprintf(tmp, needsize, "%s %.*s", ins->kvraw, (int)len, str);
+ free(ins->kvraw);
+ ins->kvraw = tmp;
+ }
+ return (0);
+}
+
+/*
+ * json_strdup --
+ * Return a string, with no escapes or other JSON-isms, from the
+ * JSON string at the current input position.
+ */
+static int
+json_strdup(JSON_INPUT_STATE *ins, char **resultp)
+{
+ WT_DECL_RET;
+ char *result, *resultcpy;
+ const char *src;
+ ssize_t resultlen;
+ size_t srclen;
+
+ result = NULL;
+ src = ins->tokstart + 1; /*strip "" from token */
+ srclen = ins->toklen - 2;
+ if ((resultlen = __wt_json_strlen(src, srclen)) < 0) {
+ ret = util_err(EINVAL, "Invalid config string");
+ goto err;
+ }
+ resultlen += 1;
+ if ((result = (char *)malloc((size_t)resultlen)) == NULL) {
+ ret = util_err(errno, NULL);
+ goto err;
+ }
+ *resultp = result;
+ resultcpy = result;
+ if ((ret = __wt_json_strncpy(&resultcpy, (size_t)resultlen, src,
+ srclen))
+ != 0) {
+ ret = util_err(ret, NULL);
+ goto err;
+ }
+
+ if (0) {
+err: if (ret == 0)
+ ret = EINVAL;
+ if (result != NULL)
+ free(result);
+ *resultp = NULL;
+ }
+ return (ret);
+}
+
+/*
+ * json_data --
+ * Parse the data portion of the JSON input, and insert all
+ * values.
+ */
+static int
+json_data(WT_SESSION *session, JSON_INPUT_STATE *ins, CONFIG_LIST *clp,
+ uint32_t flags)
+{
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ char config[64], *endp, *uri;
+ const char *keyformat;
+ int isrec, nfield, nkeys, toktype, tret;
+ size_t keystrlen;
+ ssize_t gotnolen;
+ uint64_t gotno, recno;
+
+ cursor = NULL;
+ uri = NULL;
+
+ /* Reorder and check the list. */
+ if ((ret = config_reorder(clp->list)) != 0)
+ goto err;
+
+ /* Update config based on command-line configuration. */
+ if ((ret = config_update(session, clp->list)) != 0)
+ goto err;
+
+ /* Create the items collected. */
+ if ((ret = config_exec(session, clp->list)) != 0)
+ goto err;
+
+ uri = clp->list[0];
+ (void)snprintf(config, sizeof(config),
+ "dump=json%s%s",
+ LF_ISSET(LOAD_JSON_APPEND) ? ",append" : "",
+ LF_ISSET(LOAD_JSON_NO_OVERWRITE) ? ",overwrite=false" : "");
+ if ((ret = session->open_cursor(
+ session, uri, NULL, config, &cursor)) != 0) {
+ ret = util_err(ret, "%s: session.open", uri);
+ goto err;
+ }
+ keyformat = cursor->key_format;
+ isrec = (strcmp(keyformat, "r") == 0);
+ for (nkeys = 0; *keyformat; keyformat++)
+ if (!isdigit(*keyformat))
+ nkeys++;
+
+ recno = 0;
+ while (json_peek(session, ins) == '{') {
+ nfield = 0;
+ JSON_EXPECT(session, ins, '{');
+ if ((ins)->kvraw == NULL)
+ (ins)->kvraw = (char *)malloc(1);
+ (ins)->kvraw[0] = '\0';
+ (ins)->kvrawstart = JSON_INPUT_POS(ins);
+ keystrlen = 0;
+ while (json_peek(session, ins) == 's') {
+ JSON_EXPECT(session, ins, 's');
+ JSON_EXPECT(session, ins, ':');
+ toktype = json_peek(session, ins);
+ JSON_EXPECT(session, ins, toktype);
+ if (isrec && nfield == 0) {
+ /* Verify the dump has recnos in order. */
+ recno++;
+ gotno = __wt_strtouq(ins->tokstart, &endp, 0);
+ gotnolen = (endp - ins->tokstart);
+ if (recno != gotno ||
+ ins->toklen != (size_t)gotnolen) {
+ ret = util_err(0,
+ "%s: recno out of order", uri);
+ goto err;
+ }
+ }
+ if (++nfield == nkeys) {
+ size_t curpos = JSON_INPUT_POS(ins);
+ if ((ret = json_kvraw_append(ins,
+ (char *)(ins)->line.mem + (ins)->kvrawstart,
+ curpos - (ins)->kvrawstart)) != 0)
+ goto err;
+ ins->kvrawstart = curpos;
+ keystrlen = strlen(ins->kvraw);
+ }
+ if (json_peek(session, ins) != ',')
+ break;
+ JSON_EXPECT(session, ins, ',');
+ if (json_peek(session, ins) != 's')
+ goto err;
+ }
+ if (json_kvraw_append(ins, ins->line.mem, JSON_INPUT_POS(ins)))
+ goto err;
+
+ ins->kvraw[keystrlen] = '\0';
+ if (!LF_ISSET(LOAD_JSON_APPEND))
+ cursor->set_key(cursor, ins->kvraw);
+ /* skip over inserted space and comma */
+ cursor->set_value(cursor, &ins->kvraw[keystrlen+2]);
+ if ((ret = cursor->insert(cursor)) != 0) {
+ ret = util_err(ret, "%s: cursor.insert", uri);
+ goto err;
+ }
+
+ JSON_EXPECT(session, ins, '}');
+ if (json_peek(session, ins) != ',')
+ break;
+ JSON_EXPECT(session, ins, ',');
+ if (json_peek(session, ins) != '{')
+ goto err;
+ }
+ if (0) {
+err: if (ret == 0)
+ ret = EINVAL;
+ }
+ /*
+ * Technically, we don't have to close the cursor because the session
+ * handle will do it for us, but I'd like to see the flush to disk and
+ * the close succeed, it's better to fail early when loading files.
+ */
+ if (cursor != NULL && (tret = cursor->close(cursor)) != 0) {
+ tret = util_err(tret, "%s: cursor.close", uri);
+ if (ret == 0)
+ ret = tret;
+ }
+ if (ret == 0)
+ ret = util_flush(session, uri);
+ return (ret);
+}
+
+/*
+ * json_top_level --
+ * Parse the top level JSON input.
+ */
+static int
+json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags)
+{
+ CONFIG_LIST cl;
+ WT_DECL_RET;
+ char *config, *tableuri;
+ int toktype;
+ static const char *json_markers[] = {
+ "\"config\"", "\"colgroups\"", "\"indices\"", "\"data\"", NULL };
+
+ memset(&cl, 0, sizeof(cl));
+ tableuri = NULL;
+ JSON_EXPECT(session, ins, '{');
+ while (json_peek(session, ins) == 's') {
+ JSON_EXPECT(session, ins, 's');
+ tableuri = realloc(tableuri, ins->toklen);
+ snprintf(tableuri, ins->toklen, "%.*s",
+ (int)(ins->toklen - 2), ins->tokstart + 1);
+ JSON_EXPECT(session, ins, ':');
+
+ /*
+ * Allow any ordering of 'config', 'colgroups',
+ * 'indices' before 'data', which must appear last.
+ * The non-'data' items build up a list of entries
+ * that created in our session before the data is
+ * inserted.
+ */
+ for (;;) {
+ if (json_skip(session, ins, json_markers) != 0)
+ goto err;
+ JSON_EXPECT(session, ins, 's');
+ if (JSON_STRING_MATCH(ins, "config")) {
+ JSON_EXPECT(session, ins, ':');
+ JSON_EXPECT(session, ins, 's');
+ if ((ret = json_strdup(ins, &config)) != 0) {
+ ret = util_err(ret, NULL);
+ goto err;
+ }
+ config_list_add(&cl, tableuri);
+ config_list_add(&cl, config);
+ tableuri = NULL;
+ } else if (JSON_STRING_MATCH(ins, "colgroups")) {
+ JSON_EXPECT(session, ins, ':');
+ JSON_EXPECT(session, ins, '[');
+ if ((ret = json_column_group_index(
+ session, ins, &cl, 0)) != 0)
+ goto err;
+ JSON_EXPECT(session, ins, ']');
+ } else if (JSON_STRING_MATCH(ins, "indices")) {
+ JSON_EXPECT(session, ins, ':');
+ JSON_EXPECT(session, ins, '[');
+ if ((ret = json_column_group_index(
+ session, ins, &cl, 1)) != 0)
+ goto err;
+ JSON_EXPECT(session, ins, ']');
+ } else if (JSON_STRING_MATCH(ins, "data")) {
+ JSON_EXPECT(session, ins, ':');
+ JSON_EXPECT(session, ins, '[');
+ if ((ret = json_data(session, ins, &cl,
+ flags)) != 0)
+ goto err;
+ config_list_free(&cl);
+ break;
+ }
+ else
+ goto err;
+ }
+
+ while ((toktype = json_peek(session, ins)) == '}' ||
+ toktype == ']')
+ JSON_EXPECT(session, ins, toktype);
+ if (toktype == 0) /* Check EOF. */
+ break;
+ if (toktype == ',') {
+ JSON_EXPECT(session, ins, ',');
+ if (json_peek(session, ins) != 's')
+ goto err;
+ continue;
+ }
+ }
+ JSON_EXPECT(session, ins, 0);
+
+ if (0) {
+err: if (ret == 0)
+ ret = EINVAL;
+ }
+ config_list_free(&cl);
+ if (tableuri != NULL)
+ free(tableuri);
+ return (ret);
+}
+
+/*
+ * json_peek --
+ * Set the input state to the next available token in the input
+ * and return its token type, a code defined by __wt_json_token().
+ */
+static int
+json_peek(WT_SESSION *session, JSON_INPUT_STATE *ins)
+{
+ WT_DECL_RET;
+
+ if (!ins->peeking) {
+ while (!ins->ateof) {
+ while (isspace(*ins->p))
+ ins->p++;
+ if (*ins->p)
+ break;
+ if (ins->kvraw != NULL) {
+ if (json_kvraw_append(ins,
+ (char *)ins->line.mem + ins->kvrawstart,
+ strlen(ins->line.mem) - ins->kvrawstart)) {
+ ret = -1;
+ goto err;
+ }
+ ins->kvrawstart = 0;
+ }
+ if (util_read_line(&ins->line, 1,
+ &ins->ateof)) {
+ ins->toktype = -1;
+ ret = -1;
+ goto err;
+ }
+ ins->linenum++;
+ ins->p = (const char *)ins->line.mem;
+ }
+ if (ins->ateof)
+ ins->toktype = 0;
+ else if (__wt_json_token(session, ins->p,
+ &ins->toktype, &ins->tokstart,
+ &ins->toklen) != 0)
+ ins->toktype = -1;
+ ins->peeking = 1;
+ }
+ if (0) {
+ err: if (ret == 0)
+ ret = -1;
+ }
+ return (ret == 0 ? ins->toktype : -1);
+}
+
+/*
+ * json_expect --
+ * Ensure that the type of the next token in the input matches
+ * the wanted value, and advance past it. The values of the
+ * input state will be set so specific string or integer values
+ * can be pulled out after this call.
+ */
+static int
+json_expect(WT_SESSION *session, JSON_INPUT_STATE *ins, int wanttok)
+{
+ if (json_peek(session, ins) < 0)
+ return (1);
+ ins->p += ins->toklen;
+ ins->peeking = 0;
+ if (ins->toktype != wanttok) {
+ fprintf(stderr,
+ "%s: %d: %ld: expected %s, got %s\n",
+ ins->filename,
+ ins->linenum,
+ JSON_INPUT_POS(ins) + 1,
+ __wt_json_tokname(wanttok),
+ __wt_json_tokname(ins->toktype));
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * json_skip --
+ * Skip over JSON input until one of the specified strings appears.
+ * The tokenizer will be set to point to the beginning of
+ * that string.
+ */
+static int
+json_skip(WT_SESSION *session, JSON_INPUT_STATE *ins, const char **matches)
+{
+ char *hit;
+ const char **match;
+
+ if (ins->kvraw != NULL)
+ return (1);
+
+ hit = NULL;
+ while (!ins->ateof) {
+ for (match = matches; *match != NULL; match++)
+ if ((hit = strstr(ins->p, *match)) != NULL)
+ goto out;
+ if (util_read_line(&ins->line, 1, &ins->ateof)) {
+ ins->toktype = -1;
+ return (1);
+ }
+ ins->linenum++;
+ ins->p = (const char *)ins->line.mem;
+ }
+out:
+ if (hit == NULL)
+ return (1);
+
+ /* Set to this token. */
+ ins->p = hit;
+ ins->peeking = 0;
+ ins->toktype = 0;
+ (void)json_peek(session, ins);
+ return (0);
+}
+
+/*
+ * load_json --
+ * Load from the JSON format produced by 'wt dump -j'.
+ */
+int
+util_load_json(WT_SESSION *session, const char *filename, uint32_t flags)
+{
+ JSON_INPUT_STATE instate;
+ WT_DECL_RET;
+
+ memset(&instate, 0, sizeof(instate));
+ instate.session = session;
+ if (util_read_line(&instate.line, 0, &instate.ateof))
+ return (1);
+ instate.p = (const char *)instate.line.mem;
+ instate.linenum = 1;
+ instate.filename = filename;
+
+ if ((ret = json_top_level(session, &instate, flags)) != 0)
+ goto err;
+
+err: if (instate.line.mem != NULL)
+ free(instate.line.mem);
+ free(instate.kvraw);
+ return (ret);
+}
diff --git a/test/suite/test_jsondump01.py b/test/suite/test_jsondump01.py
index d7c83b1e7ff..730fbf0a05a 100644
--- a/test/suite/test_jsondump01.py
+++ b/test/suite/test_jsondump01.py
@@ -63,12 +63,11 @@ class FakeCursor:
return tup
# test_jsondump.py
-# Utilities: wt jsondump
-# Test the jsondump utility (I'm not testing the 'json' cursors,
-# that's what the utility uses underneath).
+# Utilities: wt dump
+# Test the dump utility with the -j option.
class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
-
name = 'test_jsondump01'
+ name2 = 'test_jsondump01b'
nentries = 2500
keyfmt = [
@@ -109,7 +108,7 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
# spot check
configs = tables[uri][0]
- data = tables[uri][1]
+ data = tables[uri][1]["data"]
d = data[24]
if 'column5' in d:
self.assertEqual(d['column5'], '25: abcde')
@@ -123,5 +122,24 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
cursor.close()
self.populate_check(self, fake, self.nentries)
+ # Dump using util, re-load using python's JSON, and do a content comparison.
+ def test_jsonload_util(self):
+ # Create the object.
+ uri = self.type + self.name
+ uri2 = self.type + self.name2
+ self.populate(self, uri, 'key_format=' + self.keyfmt, self.nentries)
+
+ # Dump the object.
+ self.runWt(['dump', '-j', uri], outfilename='jsondump.out')
+
+ loadcmd = ['load', '-jf', 'jsondump.out', '-r', self.name2]
+ if self.keyfmt == 'r':
+ loadcmd.append('-a')
+ self.runWt(loadcmd)
+
+ # check the contents of the data we read.
+ cursor = self.session.open_cursor(uri2, None)
+ self.populate_check(self, cursor, self.nentries)
+
if __name__ == '__main__':
wttest.run()
diff --git a/test/suite/test_jsondump02.py b/test/suite/test_jsondump02.py
index 3ed073cf022..2eff8b755a7 100644
--- a/test/suite/test_jsondump02.py
+++ b/test/suite/test_jsondump02.py
@@ -29,9 +29,7 @@ import os
import wiredtiger, wttest
# test_jsondump.py
-# Utilities: wt jsondump
-# Test the jsondump utility (I'm not testing the 'json' cursors,
-# that's what the utility uses underneath).
+# Test dump output from json cursors.
class test_jsondump02(wttest.WiredTigerTestCase):
table_uri1 = 'table:jsondump02a.wt'
@@ -79,6 +77,19 @@ class test_jsondump02(wttest.WiredTigerTestCase):
pos += 1
self.assertEqual(pos, len(expect))
cursor.close()
+
+ # Check the result of using a JSON cursor on the URI.
+ def load_json(self, uri, inserts):
+ cursor = self.session.open_cursor(uri, None, 'dump=json')
+ pos = 0
+ try:
+ for insert in inserts:
+ #tty_pr('Insert: ' + str(insert))
+ cursor.set_key(insert[0])
+ cursor.set_value(insert[1])
+ cursor.insert()
+ finally:
+ cursor.close()
# Create JSON cursors and test them directly.
def test_json_cursor(self):
@@ -114,13 +125,93 @@ class test_jsondump02(wttest.WiredTigerTestCase):
self.set_kv(self.table_uri3, 2, '\x77\x88\x99\x00\xff\xfe')
self.populate_squarecube(self.table_uri4)
- self.check_json(self.table_uri1, (
- ('"key0" : "KEY000"', '"value0" : "string value"'),
- ('"key0" : "KEY001"', '"value0" : ' +
- '"\'\\\"({[]})\\\"\', etc. allowed"')))
- self.check_json(self.table_uri2, (
- ('"key0" : "KEY000"', '"value0" : 123,\n"value1" : "str0"'),
- ('"key0" : "KEY001"', '"value0" : 234,\n"value1" : "str1"')))
+ table1_json = (
+ ('"key0" : "KEY000"', '"value0" : "string value"'),
+ ('"key0" : "KEY001"', '"value0" : ' +
+ '"\'\\\"({[]})\\\"\', etc. allowed"'))
+ self.check_json(self.table_uri1, table1_json)
+
+ self.session.truncate(self.table_uri1, None, None, None)
+ self.load_json(self.table_uri1, table1_json)
+ self.check_json(self.table_uri1, table1_json)
+
+ table2_json = (
+ ('"key0" : "KEY000"', '"value0" : 123,\n"value1" : "str0"'),
+ ('"key0" : "KEY001"', '"value0" : 234,\n"value1" : "str1"'))
+ self.check_json(self.table_uri2, table2_json)
+ self.session.truncate(self.table_uri2, None, None, None)
+ self.load_json(self.table_uri2, table2_json)
+ self.check_json(self.table_uri2, table2_json)
+ self.session.truncate(self.table_uri2, None, None, None)
+
+ # bad tokens
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.load_json(self.table_uri2,
+ (('<>abc?', '9'),)),
+ '/unknown token/')
+
+ # bad tokens
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.load_json(self.table_uri2,
+ (('"abc\u"', ''),)),
+ '/invalid Unicode/')
+
+ # bad tokens
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.load_json(self.table_uri2,
+ (('"abc', ''),)),
+ '/unterminated string/')
+
+ # bad syntax
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.load_json(self.table_uri2,
+ (('"stuff" "jibberish"', '"value0" "more jibberish"'),)),
+ '/expected key name.*\"key0\"/')
+
+ # bad types
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.load_json(self.table_uri2,
+ (('"key0" : "KEY002"', '"value0" : "xyz",\n"value1" : "str0"'),)),
+ '/expected unsigned JSON <int>, got <string>/')
+
+ # bad types
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.load_json(self.table_uri2,
+ (('"key0" : "KEY002"', '"value0" : 123,\n"value1" : 456'),)),
+ '/expected JSON <string>, got <integer>/')
+
+ # extra stuff
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.load_json(self.table_uri2,
+ (('"key0" : "KEY002"',
+ '"value0" : 123,\n"value1" : "str0",'),)),
+ '/expected JSON <EOF>, got \',\'/')
+
+ # fields out of order currently not supported
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.load_json(self.table_uri2,
+ (('"key0" : "KEY002"', '"value1" : "str0",\n"value0" : 123'),)),
+ '/expected value name.*\"value0\"/')
+
+ # various invalid unicode
+ invalid_unicode = (
+ '\\u', '\\ux', '\\u0', '\\u0F', '\\u0FA', '\\u0FAx', '\\u0FA\\x')
+ for uni in invalid_unicode:
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.load_json(self.table_uri2,
+ (('"key0" : "KEY002"', '"value0" : 123,\n"value1" : "'
+ + uni + '"'),)),
+ '/invalid Unicode/')
+
+ # this one should work
+ self.load_json(self.table_uri2,
+ (('"key0" : "KEY002"', '"value0" : 345,\n"value1" : "str2"'),))
+
+ # extraneous/missing space is okay
+ self.load_json(self.table_uri2,
+ ((' "key0"\n:\t"KEY003" ',
+ '"value0":456,"value1"\n\n\r\n:\t\n"str3"'),))
+
self.check_json(self.table_uri3, (
('"key0" : 1', '"value0" : "\\u0001\\u0002\\u0003"'),
('"key0" : 2',
@@ -163,37 +254,5 @@ class test_jsondump02(wttest.WiredTigerTestCase):
('"i2" : 16,\n"i4" : 64',
'"S1" : "val16",\n"i2" : 16,\n"S3" : "val64",\n"i4" : 64')))
- def test_json_illegal(self):
- """
- Create JSON cursors and use them illegally
- """
- extra_params = ',allocation_size=512,' +\
- 'internal_page_max=16384,leaf_page_max=131072'
- self.session.create(self.table_uri1,
- 'key_format=S,value_format=S' + extra_params)
-
- self.set_kv(self.table_uri1, 'A', 'aaaa')
- self.check_json(self.table_uri1, (
- ('"key0" : "A"', '"value0" : "aaaa"'),))
-
- self.set_kv(self.table_uri1, 'B', 'bbbb')
- self.check_json(self.table_uri1, (
- ('"key0" : "A"', '"value0" : "aaaa"'),
- ('"key0" : "B"', '"value0" : "bbbb"')))
-
- cursor = self.session.open_cursor(self.table_uri1, None, 'dump=json')
- cursor.next()
-
- with self.expectedStderrPattern('Setting keys for JSON cursors not permitted'):
- cursor.set_key('stuff')
- with self.expectedStderrPattern('Setting values for JSON cursors not permitted'):
- cursor.set_value('other stuff')
- cursor.close()
-
- self.check_json(self.table_uri1, (
- ('"key0" : "A"', '"value0" : "aaaa"'),
- ('"key0" : "B"', '"value0" : "bbbb"')))
-
-
if __name__ == '__main__':
wttest.run()