Merge pull request #1154 from wiredtiger/json-load

Add JSON loading to cursors and wt load utility. refs #740.
author: Alex Gorrod <alexg@wiredtiger.com> 2014-10-15 11:55:19 +1100
committer: Alex Gorrod <alexg@wiredtiger.com> 2014-10-15 11:55:19 +1100
commit: 9ef6222eb484e2328e90f639e49bf64584a92a38 (patch)
tree: bc1338268962976bbe00c1af9ffcd7a52e808cbe
parent: ba4f6023c5c580b5f3be1d5538f57c03a8c49fe8 (diff)
parent: 2bca93d54b3b3c3ad01f1fe932a783e83495701e (diff)
download: mongo-9ef6222eb484e2328e90f639e49bf64584a92a38.tar.gz
19 files changed, 1577 insertions, 140 deletions
diff --git a/build_posix/Make.base b/build_posix/Make.base
index 3340bd8ad80..51a8e77cebe 100644
--- a/build_posix/Make.base
+++ b/build_posix/Make.base
@@ -25,6 +25,7 @@ wt_SOURCES =\
 	src/utilities/util_dump.c \
 	src/utilities/util_list.c \
 	src/utilities/util_load.c \
+	src/utilities/util_load_json.c \
 	src/utilities/util_loadtext.c \
 	src/utilities/util_main.c \
 	src/utilities/util_misc.c \
diff --git a/dist/api_data.py b/dist/api_data.py
index e92db02c6e6..8e42ba72b88 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -532,11 +532,10 @@ methods = {
 	Config('dump', '', r'''
 	    configure the cursor for dump format inputs and outputs: "hex"
 	    selects a simple hexadecimal format, "json" selects a JSON format
-	    with each record formats as fields named by column names if
+	    with each record formatted as fields named by column names if
 	    available, and "print" selects a format where only non-printing
-	    characters are hexadecimal encoded, and "json" produces a JSON
-	    encoding of the data.  The "hex" and "print" dump format are
-	    compatible with the @ref util_dump and @ref util_load commands''',
+	    characters are hexadecimal encoded.  These formats are compatible
+	    with the @ref util_dump and @ref util_load commands''',
 	    choices=['hex', 'json', 'print']),
 	Config('next_random', 'false', r'''
 	    configure the cursor to return a pseudo-random record from
diff --git a/dist/s_string.ok b/dist/s_string.ok
index a6cddfa8a72..69545dbda1f 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -68,6 +68,7 @@ CreateFileMapping
 CreateThread
 CustomersPhone
 DATAITEMs
+DECL
 DESC
 DHANDLE
 DLFCN
@@ -137,6 +138,9 @@ GetModuleHandleEx
 GetProcAddress
 Givargis
 Google
+HHHH
+HHHHLL
+HHHLL
 HYPERLEVELDB
 HyperLevelDB
 IEC
@@ -174,6 +178,8 @@ LIBPTHREAD
 LIBRT
 LIBSNAPPY
 LIBZ
+LLLLLL
+LLLLLLL
 LNO
 LOGREC
 LOGSCAN
@@ -233,6 +239,8 @@ PADDR
 PAGE's
 PARAM
 POSIX
+PRIu
+PRIu64
 PSIZE
 PTHREAD
 PTR
@@ -523,6 +531,7 @@ ds
 dsk
 dsrc
 dst
+dstlen
 dsync
 dt
 dtype
@@ -580,6 +589,7 @@ fillms
 firstfit
 fixup
 flcs
+floatnum
 fmt
 fmterr
 fnv
@@ -656,6 +666,7 @@ insertK
 insertV
 instantiation
 intl
+intnum
 intpack
 ints
 inttypes
@@ -664,6 +675,7 @@ io
 ip
 ispo
 iteratively
+jnr
 jrx
 json
 kb
@@ -673,10 +685,13 @@ keygen
 keyname
 keyv
 kv
+kvraw
 kvs
 kvsbdb
 lang
 latencies
+lbrace
+lbracket
 lbz
 ld
 ldl
@@ -817,6 +832,7 @@ os
 ovfl
 ownp
 packv
+parens
 parserp
 patchp
 pathname
@@ -838,6 +854,7 @@ primary's
 printf
 printlog
 priv
+progname
 ps
 pse
 psp
@@ -855,6 +872,8 @@ qsort
 quartile
 qup
 rS
+rbrace
+rbracket
 rdlock
 rduppo
 readlock
@@ -950,6 +969,7 @@ strerror
 strftime
 strget
 stringin
+strlen
 strncmp
 strncpy
 strndup
@@ -994,6 +1014,11 @@ tlist
 tload
 tmp
 toffpage
+tokenizer
+toklen
+tokname
+tokstart
+toktype
 toverflow
 tparent
 tprintlog
@@ -1053,6 +1078,7 @@ unpackv
 unreferenced
 unregister
 unsized
+unterminated
 untyped
 upd
 update's
diff --git a/lang/python/wiredtiger.i b/lang/python/wiredtiger.i
index 0c228c56e5f..be55845a7b2 100644
--- a/lang/python/wiredtiger.i
+++ b/lang/python/wiredtiger.i
@@ -388,7 +388,9 @@ COMPARE_OK(__wt_cursor::search_near)
 %exception __wt_async_op::_set_key;
 %exception __wt_async_op::_set_value;
 %exception __wt_cursor::_set_key;
+%exception __wt_cursor::_set_key_str;
 %exception __wt_cursor::_set_value;
+%exception __wt_cursor::_set_value_str;
 %exception wiredtiger_strerror;
 %exception wiredtiger_version;
 
@@ -577,6 +579,11 @@ typedef int int_void;
 		$self->set_key($self, &k);
 	}
 
+	/* Get / set keys and values */
+	void _set_key_str(char *str) {
+		$self->set_key($self, str);
+	}
+
 	int_void _set_recno(uint64_t recno) {
 		WT_ITEM k;
 		uint8_t recno_buf[20];
@@ -601,6 +608,11 @@ typedef int int_void;
 		$self->set_value($self, &v);
 	}
 
+	/* Get / set keys and values */
+	void _set_value_str(char *str) {
+		$self->set_value($self, str);
+	}
+
 	/* Don't return values, just throw exceptions on failure. */
 	int_void _get_key(char **datap, int *sizep) {
 		WT_ITEM k;
@@ -739,6 +751,8 @@ typedef int int_void;
 			args = args[0]
 		if self.is_column:
 			self._set_recno(long(args[0]))
+		elif self.is_json:
+			self._set_key_str(args[0])
 		else:
 			# Keep the Python string pinned
 			self._key = pack(self.key_format, *args)
@@ -748,11 +762,14 @@ typedef int int_void;
 		'''set_value(self) -> None
 		
 		@copydoc WT_CURSOR::set_value'''
-		if len(args) == 1 and type(args[0]) == tuple:
-			args = args[0]
-		# Keep the Python string pinned
-		self._value = pack(self.value_format, *args)
-		self._set_value(self._value)
+		if self.is_json:
+			self._set_value_str(args[0])
+		else:
+			if len(args) == 1 and type(args[0]) == tuple:
+				args = args[0]
+			# Keep the Python string pinned
+			self._value = pack(self.value_format, *args)
+			self._set_value(self._value)
 
 	def __iter__(self):
 		'''Cursor objects support iteration, equivalent to calling
diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c
index 31d40d32060..003b7e1f961 100644
--- a/src/cursor/cur_dump.c
+++ b/src/cursor/cur_dump.c
@@ -160,10 +160,6 @@ __curdump_set_key(WT_CURSOR *cursor, ...)
 	child = cdump->child;
 	CURSOR_API_CALL(cursor, session, set_key, NULL);
 
-	if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
-		WT_ERR_MSG(session, EINVAL,
-		    "Setting keys for JSON cursors not permitted");
-
 	va_start(ap, cursor);
 	if (F_ISSET(cursor, WT_CURSTD_RAW))
 		p = va_arg(ap, WT_ITEM *)->data;
@@ -176,8 +172,13 @@ __curdump_set_key(WT_CURSOR *cursor, ...)
 
 		child->set_key(child, recno);
 	} else {
-		WT_ERR(__dump_to_raw(session, p, &cursor->key,
-		    F_ISSET(cursor, WT_CURSTD_DUMP_HEX) ? 1 : 0));
+		if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
+			WT_ERR(__wt_json_to_item(session, p, cursor->key_format,
+			    (WT_CURSOR_JSON *)cursor->json_private, 1,
+			    &cursor->key));
+		else
+			WT_ERR(__dump_to_raw(session, p, &cursor->key,
+			    F_ISSET(cursor, WT_CURSTD_DUMP_HEX) ? 1 : 0));
 
 		child->set_key(child, &cursor->key);
 	}
@@ -255,10 +256,6 @@ __curdump_set_value(WT_CURSOR *cursor, ...)
 	child = cdump->child;
 	CURSOR_API_CALL(cursor, session, set_value, NULL);
 
-	if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
-		WT_ERR_MSG(session, EINVAL,
-		    "Setting values for JSON cursors not permitted");
-
 	va_start(ap, cursor);
 	if (F_ISSET(cursor, WT_CURSTD_RAW))
 		p = va_arg(ap, WT_ITEM *)->data;
@@ -266,8 +263,12 @@ __curdump_set_value(WT_CURSOR *cursor, ...)
 		p = va_arg(ap, const char *);
 	va_end(ap);
 
-	WT_ERR(__dump_to_raw(session,
-	    p, &cursor->value, F_ISSET(cursor, WT_CURSTD_DUMP_HEX) ? 1 : 0));
+	if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
+		WT_ERR(__wt_json_to_item(session, p, cursor->value_format,
+		    (WT_CURSOR_JSON *)cursor->json_private, 0, &cursor->value));
+	else
+		WT_ERR(__dump_to_raw(session, p, &cursor->value,
+		    F_ISSET(cursor, WT_CURSTD_DUMP_HEX) ? 1 : 0));
 
 	child->set_value(child, &cursor->value);
 
diff --git a/src/cursor/cur_json.c b/src/cursor/cur_json.c
index 618596e39b8..4a4ae7544b1 100644
--- a/src/cursor/cur_json.c
+++ b/src/cursor/cur_json.c
@@ -7,6 +7,51 @@
 
 #include "wt_internal.h"
 
+static size_t __json_unpack_put(WT_SESSION_IMPL *, void *, u_char *, size_t,
+    WT_CONFIG_ITEM *);
+static inline int __json_struct_size(WT_SESSION_IMPL *, const void *, size_t,
+    const char *, WT_CONFIG_ITEM *, int, size_t *);
+static inline int __json_struct_unpackv(WT_SESSION_IMPL *, const void *, size_t,
+    const char *, WT_CONFIG_ITEM *, u_char *, size_t, int, va_list);
+static int json_string_arg(WT_SESSION_IMPL *, const char **, WT_ITEM *);
+static int json_int_arg(WT_SESSION_IMPL *, const char **, int64_t *);
+static int json_uint_arg(WT_SESSION_IMPL *, const char **, uint64_t *);
+static int __json_pack_struct(WT_SESSION_IMPL *, void *, size_t, const char *,
+    const char *);
+static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *,
+    int, const char *, size_t *);
+
+#define	WT_PACK_JSON_GET(session, pv, jstr) do {			\
+	switch (pv.type) {						\
+	case 'x':							\
+		break;							\
+	case 's':							\
+	case 'S':							\
+		WT_RET(json_string_arg(session, &jstr, &pv.u.item));	\
+		pv.type = pv.type == 's' ? 'j' : 'J';			\
+		break;							\
+	case 'b':							\
+	case 'h':							\
+	case 'i':							\
+	case 'l':							\
+	case 'q':							\
+		WT_RET(json_int_arg(session, &jstr, &pv.u.i));		\
+		break;							\
+	case 'B':							\
+	case 'H':							\
+	case 'I':							\
+	case 'L':							\
+	case 'Q':							\
+	case 'r':							\
+	case 'R':							\
+	case 't':							\
+		WT_RET(json_uint_arg(session, &jstr, &pv.u.u));		\
+		break;							\
+	/* User format strings have already been validated. */		\
+	WT_ILLEGAL_VALUE(session);					\
+	}								\
+} while (0)
+
 /*
  * __json_unpack_put --
  *	Calculate the size of a packed byte string as formatted for JSON.
@@ -367,3 +412,519 @@ __wt_json_column_init(WT_CURSOR *cursor, const char *keyformat,
 	}
 	return (0);
 }
+
+#define	MATCH_KEYWORD(session, in, result, keyword, matchval) 	do {	\
+	size_t _kwlen = strlen(keyword);				\
+	if (strncmp(in, keyword, _kwlen) == 0 && !isalnum(in[_kwlen])) { \
+		in += _kwlen;						\
+		result = matchval;					\
+	} else {							\
+		const char *_bad = in;					\
+		while (isalnum(*in))					\
+			in++;						\
+		__wt_errx(session, "unknown keyword \"%.*s\" in JSON",	\
+		    (int)(in - _bad), _bad);				\
+	}								\
+} while (0)
+
+/*
+ * __wt_json_token --
+ *	Return the type, start position and length of the next JSON
+ *	token in the input.  String tokens include the quotes.  JSON
+ *	can be entirely parsed using calls to this tokenizer, each
+ *	call using a src pointer that is the previously returned
+ *	tokstart + toklen.
+ *
+ *	The token type returned is one of:
+ *		0	:  EOF
+ *		's'	:  string
+ *		'i'	:  intnum
+ *		'f'	:  floatnum
+ *		':'	:  colon
+ *		','	:  comma
+ *		'{'	:  lbrace
+ *		'}'	:  rbrace
+ *		'['	:  lbracket
+ *		']'	:  rbracket
+ *		'N'	:  null
+ *		'T'	:  true
+ *		'F'	:  false
+ */
+int
+__wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype,
+    const char **tokstart, size_t *toklen)
+{
+	WT_SESSION_IMPL *session;
+	char ch;
+	const char *bad;
+	int backslash, isalph, isfloat, result;
+
+	result = -1;
+	session = (WT_SESSION_IMPL *)wt_session;
+	while (isspace(*src))
+		src++;
+	*tokstart = src;
+
+	if (*src == '\0') {
+		*toktype = 0;
+		*toklen = 0;
+		return (0);
+	}
+
+	/* JSON is specified in RFC 4627. */
+	switch (*src) {
+	case '"':
+		backslash = 0;
+		src++;
+		while ((ch = *src) != '\0') {
+			if (!backslash) {
+				if (ch == '"') {
+					src++;
+					result = 's';
+					break;
+				}
+				if (ch == '\\')
+					backslash = 1;
+			} else {
+				/* We validate Unicode on this pass. */
+				if (ch == 'u') {
+					u_char ignored;
+					const u_char *uc;
+
+					uc = (const u_char *)src;
+					if (__wt_hex2byte(&uc[1], &ignored) ||
+					    __wt_hex2byte(&uc[3], &ignored)) {
+						__wt_errx(session,
+				    "invalid Unicode within JSON string");
+						return (-1);
+					}
+					src += 5;
+				}
+				backslash = 0;
+			}
+			src++;
+		}
+		if (result != 's')
+			__wt_errx(session, "unterminated string in JSON");
+		break;
+	case '-':
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+		isfloat = 0;
+		if (*src == '-')
+			src++;
+		while ((ch = *src) != '\0' && isdigit(ch))
+			src++;
+		if (*src == '.') {
+			isfloat = 1;
+			src++;
+			while ((ch = *src) != '\0' &&
+			    isdigit(ch))
+				src++;
+		}
+		if (*src == 'e' || *src == 'E') {
+			isfloat = 1;
+			src++;
+			if (*src == '+' || *src == '-')
+				src++;
+			while ((ch = *src) != '\0' &&
+			    isdigit(ch))
+				src++;
+		}
+		result = isfloat ? 'f' : 'i';
+		break;
+	case ':':
+	case ',':
+	case '{':
+	case '}':
+	case '[':
+	case ']':
+		result = *src++;
+		break;
+	case 'n':
+		MATCH_KEYWORD(session, src, result, "null", 'N');
+		break;
+	case 't':
+		MATCH_KEYWORD(session, src, result, "true", 'T');
+		break;
+	case 'f':
+		MATCH_KEYWORD(session, src, result, "false", 'F');
+		break;
+	default:
+		/* An illegal token, move past it anyway */
+		bad = src;
+		isalph = isalnum(*src);
+		src++;
+		if (isalph)
+			while (*src != '\0' && isalnum(*src))
+				src++;
+		__wt_errx(session, "unknown token \"%.*s\" in JSON",
+		    (int)(src - bad), bad);
+		break;
+	}
+	*toklen = (size_t)(src - *tokstart);
+	*toktype = result;
+	return (result < 0 ? EINVAL : 0);
+}
+
+/*
+ * __wt_json_tokname
+ *	Return a descriptive name from the token type returned by
+ *	__wt_json_token
+ */
+const char *
+__wt_json_tokname(int toktype)
+{
+	switch (toktype) {
+	case 0:		return ("<EOF>");
+	case 's':	return ("<string>");
+	case 'i':	return ("<integer>");
+	case 'f':	return ("<float>");
+	case ':':	return ("':'");
+	case ',':	return ("','");
+	case '{':	return ("'{'");
+	case '}':	return ("'}'");
+	case '[':	return ("'['");
+	case ']':	return ("']'");
+	case 'N':	return ("'null'");
+	case 'T':	return ("'true'");
+	case 'F':	return ("'false'");
+	default:	return ("<UNKNOWN>");
+	}
+}
+
+/*
+ * json_string_arg --
+ *	Returns a first cut of the needed string in item.
+ *	The result has not been stripped of escapes.
+ */
+static int
+json_string_arg(WT_SESSION_IMPL *session, const char **jstr, WT_ITEM *item)
+{
+	const char *tokstart;
+	int tok;
+	WT_DECL_RET;
+
+	WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart,
+		&item->size));
+	if (tok == 's') {
+		*jstr = tokstart + item->size;
+		/* The tokenizer includes the '"' chars */
+		item->data = tokstart + 1;
+		item->size -= 2;
+		ret = 0;
+	} else {
+		__wt_errx(session, "expected JSON <string>, got %s",
+		    __wt_json_tokname(tok));
+		ret = EINVAL;
+	}
+	return (ret);
+}
+
+/*
+ * json_int_arg --
+ *	Returns a signed integral value from the current position
+ *	in the JSON string.
+ */
+static int
+json_int_arg(WT_SESSION_IMPL *session, const char **jstr, int64_t *ip)
+{
+	char *end;
+	const char *tokstart;
+	int tok;
+	size_t toksize;
+
+	WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart,
+		&toksize));
+	if (tok == 'i') {
+		/* JSON only allows decimal */
+		*ip = strtoll(tokstart, &end, 10);
+		if (end != tokstart + toksize)
+			WT_RET_MSG(session, EINVAL,
+			    "JSON <int> extraneous input");
+		*jstr = tokstart + toksize;
+	} else {
+		__wt_errx(session, "expected JSON <int>, got %s",
+		    __wt_json_tokname(tok));
+		return (EINVAL);
+	}
+	return (0);
+}
+
+/*
+ * json_uint_arg --
+ *	Returns an unsigned integral value from the current position
+ *	in the JSON string.
+ */
+static int
+json_uint_arg(WT_SESSION_IMPL *session, const char **jstr, uint64_t *up)
+{
+	char *end;
+	const char *tokstart;
+	int tok;
+	size_t toksize;
+
+	WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart,
+		&toksize));
+	if (tok == 'i' && *tokstart != '-') {
+		/* JSON only allows decimal */
+		*up = strtoull(tokstart, &end, 10);
+		if (end != tokstart + toksize)
+			WT_RET_MSG(session, EINVAL,
+			    "JSON <int> extraneous input");
+		*jstr = tokstart + toksize;
+	} else {
+		__wt_errx(session, "expected unsigned JSON <int>, got %s",
+		    __wt_json_tokname(tok));
+		return (EINVAL);
+	}
+	return (0);
+}
+
+#define	JSON_EXPECT_TOKEN_GET(session, jstr, tokval, start, sz) do {	\
+    int __tok;								\
+    WT_RET(__wt_json_token((WT_SESSION *)session, jstr, &__tok, &start, &sz));\
+    if (__tok != tokval) {						\
+	    __wt_errx(session, "expected JSON %s, got %s",		\
+		__wt_json_tokname(tokval), __wt_json_tokname(__tok));	\
+	    return (EINVAL);						\
+    }									\
+    jstr = start + sz;							\
+} while (0)
+
+#define	JSON_EXPECT_TOKEN(session, jstr, tokval) do {			\
+    const char *__start;						\
+    size_t __sz;							\
+    JSON_EXPECT_TOKEN_GET(session, jstr, tokval, __start, __sz);	\
+} while (0)
+
+/*
+ * __json_pack_struct --
+ *	Pack a byte string from a JSON string.
+ */
+static int
+__json_pack_struct(WT_SESSION_IMPL *session, void *buffer, size_t size,
+    const char *fmt, const char *jstr)
+{
+	WT_DECL_PACK_VALUE(pv);
+	WT_DECL_RET;
+	WT_PACK pack;
+	const char *tokstart;
+	int multi;
+	size_t toksize;
+	uint8_t *p, *end;
+
+	p = buffer;
+	end = p + size;
+	multi = 0;
+
+	if (fmt[0] != '\0' && fmt[1] == '\0') {
+		JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
+		/* the key name was verified in __json_pack_size */
+		JSON_EXPECT_TOKEN(session, jstr, ':');
+		pv.type = fmt[0];
+		WT_PACK_JSON_GET(session, pv, jstr);
+		return (__pack_write(session, &pv, &p, size));
+	}
+
+	WT_RET(__pack_init(session, &pack, fmt));
+	while ((ret = __pack_next(&pack, &pv)) == 0) {
+		if (multi)
+			JSON_EXPECT_TOKEN(session, jstr, ',');
+		JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
+		/* the key name was verified in __json_pack_size */
+		JSON_EXPECT_TOKEN(session, jstr, ':');
+		WT_PACK_JSON_GET(session, pv, jstr);
+		WT_RET(__pack_write(session, &pv, &p, (size_t)(end - p)));
+		multi = 1;
+	}
+
+	/* Be paranoid - __pack_write should never overflow. */
+	WT_ASSERT(session, p <= end);
+
+	if (ret != WT_NOTFOUND)
+		return (ret);
+
+	return (0);
+}
+
+/*
+ * __json_pack_size --
+ *	Calculate the size of a packed byte string from a JSON string.
+ *	We verify that the names and value types provided in JSON match
+ *	the column names and type from the schema format, returning error
+ *	if not.
+ */
+static int
+__json_pack_size(
+    WT_SESSION_IMPL *session, const char *fmt, WT_CONFIG_ITEM *names,
+	int iskey, const char *jstr, size_t *sizep)
+{
+	WT_CONFIG_ITEM name;
+	WT_DECL_PACK_VALUE(pv);
+	WT_PACK pack;
+	WT_PACK_NAME packname;
+	const char *tokstart;
+	int multi;
+	size_t toksize, total;
+
+	WT_RET(__pack_name_init(session, names, iskey, &packname));
+	multi = 0;
+	WT_RET(__pack_init(session, &pack, fmt));
+	for (total = 0; __pack_next(&pack, &pv) == 0;) {
+		if (multi)
+			JSON_EXPECT_TOKEN(session, jstr, ',');
+		JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
+		WT_RET(__pack_name_next(&packname, &name));
+		if (toksize - 2 != name.len ||
+		    strncmp(tokstart + 1, name.str, toksize - 2) != 0) {
+			__wt_errx(session, "JSON expected %s name: \"%.*s\"",
+			    iskey ? "key" : "value", (int)name.len, name.str);
+			return (EINVAL);
+		}
+		JSON_EXPECT_TOKEN(session, jstr, ':');
+		WT_PACK_JSON_GET(session, pv, jstr);
+		total += __pack_size(session, &pv);
+		multi = 1;
+	}
+	/* check end of string */
+	JSON_EXPECT_TOKEN(session, jstr, 0);
+
+	*sizep = total;
+	return (0);
+}
+
+/*
+ * __wt_json_to_item --
+ *	Convert a JSON input string for either key/value to a raw WT_ITEM.
+ *	Checks that the input matches the expected format.
+ */
+int
+__wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr,
+    const char *format, WT_CURSOR_JSON *json, int iskey, WT_ITEM *item)
+{
+	size_t sz;
+
+	WT_RET(__json_pack_size(session, format,
+	    iskey ? &json->key_names : &json->value_names, iskey, jstr, &sz));
+	WT_RET(__wt_buf_initsize(session, item, sz));
+	WT_RET(__json_pack_struct(session, item->mem, sz, format, jstr));
+	return (0);
+}
+
+/*
+ * __wt_json_strlen --
+ *	Return the number of bytes represented by a string in JSON format,
+ *	or -1 if the format is incorrect.
+ */
+ssize_t
+__wt_json_strlen(const char *src, size_t srclen)
+{
+	const char *srcend;
+	size_t dstlen;
+	u_char hi, lo;
+
+	dstlen = 0;
+	srcend = src + srclen;
+	while (src < srcend) {
+		/* JSON can include any UTF-8 expressed in 4 hex chars. */
+		if (*src == '\\') {
+			if (*++src == 'u') {
+				if (__wt_hex2byte((const u_char *)++src, &hi))
+					return (-1);
+				src += 2;
+				if (__wt_hex2byte((const u_char *)src, &lo))
+					return (-1);
+				src += 2;
+				/* RFC 3629 */
+				if (hi >= 0x8) {
+					/* 3 bytes total */
+					dstlen += 2;
+				}
+				else if (hi != 0 || lo >= 0x80) {
+					/* 2 bytes total */
+					dstlen++;
+				}
+				/* else 1 byte total */
+			}
+		}
+		dstlen++;
+		src++;
+	}
+	if (src != srcend)
+		return (-1);   /* invalid input, e.g. final char is '\\' */
+	return ((ssize_t)dstlen);
+}
+
+/*
+ * __wt_json_strncpy --
+ *	Copy bytes of string in JSON format to a destination,
+ *	up to dstlen bytes.  If dstlen is greater than the needed size,
+ *	the result if zero padded.
+ */
+int
+__wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen)
+{
+	char *dst;
+	const char *dstend, *srcend;
+	u_char hi, lo;
+
+	dst = *pdst;
+	dstend = dst + dstlen;
+	srcend = src + srclen;
+	while (src < srcend && dst < dstend) {
+		/* JSON can include any UTF-8 expressed in 4 hex chars. */
+		if (*src == '\\') {
+			if (*++src == 'u') {
+				if (__wt_hex2byte((const u_char *)++src, &hi))
+					return (EINVAL);
+				src += 2;
+				if (__wt_hex2byte((const u_char *)src, &lo))
+					return (EINVAL);
+				src += 2;
+				/* RFC 3629 */
+				if (hi >= 0x8) {
+					/* 3 bytes total */
+					/* byte 0: 1110HHHH */
+					/* byte 1: 10HHHHLL */
+					/* byte 2: 10LLLLLL */
+					*dst++ = (char)(0xe0 |
+					    ((hi << 4) & 0x0f));
+					*dst++ = (char)(0x80 |
+					    ((hi << 2) & 0x3c) |
+					    ((lo >> 6) & 0x03));
+					*dst++ = (char)(0x80 | (lo & 0x3f));
+				} else if (hi != 0 || lo >= 0x80) {
+					/* 2 bytes total */
+					/* byte 0: 110HHHLL */
+					/* byte 1: 10LLLLLL */
+					*dst++ = (char)(0xc0 |
+					    (hi << 2) |
+					    ((lo >> 6) & 0x03));
+					*dst++ = (char)(0x80 | (lo & 0x3f));
+				} else
+					/* else 1 byte total */
+					/* byte 0: 0LLLLLLL */
+					*dst++ = (char)lo;
+			}
+			else
+				*dst++ = *src;
+		} else
+			*dst++ = *src;
+		src++;
+	}
+	if (src != srcend)
+		return (ENOMEM);
+	*pdst = dst;
+	while (dst < dstend)
+		*dst++ = '\0';
+	return (0);
+}
diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c
index 52cdf232279..cfaf83824fd 100644
--- a/src/cursor/cur_std.c
+++ b/src/cursor/cur_std.c
@@ -276,9 +276,10 @@ __wt_cursor_set_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap)
 		cursor->key.data = &cursor->recno;
 		sz = sizeof(cursor->recno);
 	} else {
-		/* Fast path some common cases. */
+		/* Fast path some common cases and special case WT_ITEMs. */
 		fmt = cursor->key_format;
-		if (LF_ISSET(WT_CURSOR_RAW_OK) || WT_STREQ(fmt, "u")) {
+		if (LF_ISSET(WT_CURSOR_RAW_OK | WT_CURSTD_DUMP_JSON) ||
+		    WT_STREQ(fmt, "u")) {
 			item = va_arg(ap, WT_ITEM *);
 			sz = item->size;
 			cursor->key.data = item->data;
@@ -399,7 +400,8 @@ __wt_cursor_set_valuev(WT_CURSOR *cursor, va_list ap)
 
 	/* Fast path some common cases. */
 	fmt = cursor->value_format;
-	if (F_ISSET(cursor, WT_CURSOR_RAW_OK) || WT_STREQ(fmt, "u")) {
+	if (F_ISSET(cursor, WT_CURSOR_RAW_OK | WT_CURSTD_DUMP_JSON) ||
+	    WT_STREQ(fmt, "u")) {
 		item = va_arg(ap, WT_ITEM *);
 		sz = item->size;
 		cursor->value.data = item->data;
diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c
index 21a1b6e07e4..aa336805d06 100644
--- a/src/cursor/cur_table.c
+++ b/src/cursor/cur_table.c
@@ -146,7 +146,7 @@ __wt_curtable_set_value(WT_CURSOR *cursor, ...)
 	CURSOR_API_CALL(cursor, session, set_value, NULL);
 
 	va_start(ap, cursor);
-	if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) {
+	if (F_ISSET(cursor, WT_CURSOR_RAW_OK | WT_CURSTD_DUMP_JSON)) {
 		item = va_arg(ap, WT_ITEM *);
 		cursor->value.data = item->data;
 		cursor->value.size = item->size;
diff --git a/src/docs/command-line.dox b/src/docs/command-line.dox
index ee51cc21c2a..a4de4d85e71 100644
--- a/src/docs/command-line.dox
+++ b/src/docs/command-line.dox
@@ -175,6 +175,10 @@ column store.
 By default, the \c load command reads from the standard input; the \c
 -f option reads the input from the specified file.
 
+@par <code>-j</code>
+Load input in the JSON (<a href="http://www.json.org">JavaScript Object
+Notation</a>) format that was created by the <code>dump -j</code> command.
+
 @par <code>-n</code>
 By default, input data will overwrite existing data where the key/value
 pair already exists in the data source; the \c -n option causes the \c
diff --git a/src/include/extern.h b/src/include/extern.h
index d9f0dd48abb..807092f6060 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -270,6 +270,11 @@ extern int __wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer,
 extern void __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor);
 extern size_t __wt_json_unpack_char(char ch, u_char *buf, size_t bufsz, int force_unicode);
 extern int __wt_json_column_init(WT_CURSOR *cursor, const char *keyformat, const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf);
+extern int __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, const char **tokstart, size_t *toklen);
+extern const char *__wt_json_tokname(int toktype);
+extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const char *format, WT_CURSOR_JSON *json, int iskey, WT_ITEM *item);
+extern ssize_t __wt_json_strlen(const char *src, size_t srclen);
+extern int __wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen);
 extern int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp);
 extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
 extern void __wt_curstat_dsrc_final(WT_CURSOR_STAT *cst);
@@ -569,6 +574,7 @@ extern void __wt_hazard_close(WT_SESSION_IMPL *session);
 extern int __wt_raw_to_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to);
 extern void __wt_raw_to_hex_mem( const uint8_t *from, size_t size, uint8_t *dest, size_t dest_size);
 extern int __wt_raw_to_esc_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to);
+extern int __wt_hex2byte(const u_char *from, u_char *to);
 extern int __wt_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to);
 extern int __wt_nhex_to_raw( WT_SESSION_IMPL *session, const char *from, size_t size, WT_ITEM *to);
 extern int __wt_esc_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to);
diff --git a/src/include/packing.i b/src/include/packing.i
index 7178052ed91..6e0e7be13eb 100644
--- a/src/include/packing.i
+++ b/src/include/packing.i
@@ -248,6 +248,20 @@ __pack_size(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv)
 	switch (pv->type) {
 	case 'x':
 		return (pv->size);
+	case 'j':
+	case 'J':
+		if (pv->type == 'j' || pv->havesize)
+			s = pv->size;
+		else {
+			ssize_t len;
+
+			/* The string was previously validated. */
+			len = __wt_json_strlen(pv->u.item.data,
+			    pv->u.item.size);
+			WT_ASSERT(session, len >= 0);
+			s = (size_t)len + 1;
+		}
+		return (s);
 	case 's':
 	case 'S':
 		if (pv->type == 's' || pv->havesize)
@@ -329,6 +343,28 @@ __pack_write(
 			*pp += pad;
 		}
 		break;
+	case 'j':
+	case 'J':
+		s = pv->u.item.size;
+		if ((pv->type == 'j' || pv->havesize) && pv->size < s) {
+			s = pv->size;
+			pad = 0;
+		} else if (pv->havesize)
+			pad = pv->size - s;
+		else
+			pad = 1;
+		if (s > 0) {
+			oldp = *pp;
+			WT_RET(__wt_json_strncpy((char **)pp, maxlen,
+			    pv->u.item.data, s));
+			maxlen -= (size_t)(*pp - oldp);
+		}
+		if (pad > 0) {
+			WT_SIZE_CHECK(pad, maxlen);
+			memset(*pp, 0, pad);
+			*pp += pad;
+		}
+		break;
 	case 'U':
 	case 'u':
 		s = pv->u.item.size;
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index f985fc062c4..c83c5f49144 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -851,10 +851,9 @@ struct __wt_session {
 	 * modification., a string; default empty.}
 	 * @config{dump, configure the cursor for dump format inputs and
 	 * outputs: "hex" selects a simple hexadecimal format\, "json" selects a
-	 * JSON format with each record formats as fields named by column names
-	 * if available\, and "print" selects a format where only non-printing
-	 * characters are hexadecimal encoded\, and "json" produces a JSON
-	 * encoding of the data.  The "hex" and "print" dump format are
+	 * JSON format with each record formatted as fields named by column
+	 * names if available\, and "print" selects a format where only
+	 * non-printing characters are hexadecimal encoded.  These formats are
 	 * compatible with the @ref util_dump and @ref util_load commands., a
 	 * string\, chosen from the following options: \c "hex"\, \c "json"\, \c
 	 * "print"; default empty.}
diff --git a/src/support/hex.c b/src/support/hex.c
index 552fbfa1375..96cf5ecc4d4 100644
--- a/src/support/hex.c
+++ b/src/support/hex.c
@@ -106,11 +106,11 @@ __wt_raw_to_esc_hex(
 }
 
 /*
- * hex2byte --
+ * __wt_hex2byte --
  *	Convert a pair of hex characters into a byte.
  */
-static inline int
-hex2byte(const u_char *from, u_char *to)
+int
+__wt_hex2byte(const u_char *from, u_char *to)
 {
 	uint8_t byte;
 
@@ -196,7 +196,7 @@ __wt_nhex_to_raw(
 	WT_RET(__wt_buf_init(session, to, size / 2));
 
 	for (p = (u_char *)from, t = to->mem; size > 0; p += 2, size -= 2, ++t)
-		if (hex2byte(p, t))
+		if (__wt_hex2byte(p, t))
 			return (__hex_fmterr(session));
 
 	to->size = WT_PTRDIFF(t, to->mem);
@@ -220,7 +220,7 @@ __wt_esc_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to)
 			continue;
 		++p;
 		if (p[0] != '\\') {
-			if (p[0] == '\0' || p[1] == '\0' || hex2byte(p, t))
+			if (p[0] == '\0' || p[1] == '\0' || __wt_hex2byte(p, t))
 				return (__hex_fmterr(session));
 			++p;
 		}
diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c
index 85b63b6ab9c..bd0590948b4 100644
--- a/src/utilities/util_dump.c
+++ b/src/utilities/util_dump.c
@@ -251,7 +251,7 @@ dump_json_table_begin(WT_CURSOR *cursor, const char *uri, const char *config)
 		    dump_json_table_cg(cursor, uri, name, "index:", "indices");
 	}
 
-	if (printf("\n        },\n        [") < 0)
+	if (printf("\n        },\n        {\n            \"data\" : [") < 0)
 		goto eio;
 
 	if (0) {
@@ -422,7 +422,7 @@ dump_json_table_config(WT_SESSION *session, const char *uri)
 static int
 dump_json_table_end(void)
 {
-	if (printf("        ]\n    ]") < 0)
+	if (printf("            ]\n        }\n    ]") < 0)
 		return (util_err(EIO, NULL));
 	return (0);
 }
@@ -595,9 +595,9 @@ dump_record(WT_CURSOR *cursor, const char *name, int reverse, int json)
 
 	once = 0;
 	if (json) {
-		prefix = "\n            {\n";
+		prefix = "\n{\n";
 		infix = ",\n";
-		suffix = "\n            }";
+		suffix = "\n}";
 	} else {
 		prefix = "";
 		infix = "\n";
diff --git a/src/utilities/util_load.c b/src/utilities/util_load.c
index 4bdf356cfd6..1a7e71571a6 100644
--- a/src/utilities/util_load.c
+++ b/src/utilities/util_load.c
@@ -6,26 +6,30 @@
  */
 
 #include "util.h"
+#include "util_load.h"
 
 static int format(void);
 static int insert(WT_CURSOR *, const char *);
 static int load_dump(WT_SESSION *);
-static int config_read(char ***, int *);
-static int config_rename(char **, const char *);
-static int config_update(WT_SESSION *, char **);
 static int usage(void);
 
 static int	append;		/* -a append (ignore record number keys) */
 static char    *cmdname;	/* -r rename */
 static char   **cmdconfig;	/* configuration pairs */
+static int	json;		/* -j input is JSON format */
 static int	no_overwrite;	/* -n don't overwrite existing data */
 
 int
 util_load(WT_SESSION *session, int argc, char *argv[])
 {
 	int ch;
+	const char *filename;
+	uint32_t flags;
 
-	while ((ch = __wt_getopt(progname, argc, argv, "af:nr:")) != EOF)
+	flags = 0;
+
+	filename = "<stdin>";
+	while ((ch = __wt_getopt(progname, argc, argv, "af:jnr:")) != EOF)
 		switch (ch) {
 		case 'a':	/* append (ignore record number keys) */
 			append = 1;
@@ -34,6 +38,11 @@ util_load(WT_SESSION *session, int argc, char *argv[])
 			if (freopen(__wt_optarg, "r", stdin) == NULL)
 				return (
 				    util_err(errno, "%s: reopen", __wt_optarg));
+			else
+				filename = __wt_optarg;
+			break;
+		case 'j':	/* input is JSON */
+			json = 1;
 			break;
 		case 'n':	/* don't overwrite existing data */
 			no_overwrite = 1;
@@ -61,7 +70,14 @@ util_load(WT_SESSION *session, int argc, char *argv[])
 		cmdconfig = argv;
 	}
 
-	return (load_dump(session));
+	if (json) {
+		if (append)
+			flags |= LOAD_JSON_APPEND;
+		if (no_overwrite)
+			flags |= LOAD_JSON_NO_OVERWRITE;
+		return (util_load_json(session, filename, flags));
+	} else
+		return (load_dump(session));
 }
 
 /*
@@ -74,7 +90,7 @@ load_dump(WT_SESSION *session)
 	WT_CURSOR *cursor;
 	WT_DECL_RET;
 	int hex, tret;
-	char **entry, **list, *p, **tlist, *uri, config[64];
+	char **list, **tlist, *uri, config[64];
 
 	cursor = NULL;
 	list = NULL;		/* -Wuninitialized */
@@ -85,48 +101,18 @@ load_dump(WT_SESSION *session)
 	if ((ret = config_read(&list, &hex)) != 0)
 		return (ret);
 
-	/*
-	 * Search for a table name -- if we find one, then it's table dump,
-	 * otherwise, it's a single file dump.
-	 */
-	for (entry = list; *entry != NULL; ++entry)
-		if (WT_PREFIX_MATCH(*entry, "table:"))
-			break;
-	if (*entry == NULL) {
-		/*
-		 * Single file dumps can only have two lines, the file name and
-		 * the configuration information.
-		 */
-		if ((list[0] == NULL || list[1] == NULL || list[2] != NULL) ||
-		    (WT_PREFIX_MATCH(list[0], "file:") &&
-		    WT_PREFIX_MATCH(list[0], "lsm:"))) {
-			ret = format();
-			goto err;
-		}
-
-		entry = list;
-	}
-
-	/*
-	 * Make sure the table key/value pair comes first, then we can just
-	 * run through the array in order.  (We already checked that we had
-	 * a multiple of 2 entries, so this is safe.)
-	 */
-	if (entry != list) {
-		p = list[0]; list[0] = entry[0]; entry[0] = p;
-		p = list[1]; list[1] = entry[1]; entry[1] = p;
-	}
+	/* Reorder and check the list. */
+	if ((ret = config_reorder(list)) != 0)
+		return (ret);
 
 	/* Update the config based on any command-line configuration. */
 	if ((ret = config_update(session, list)) != 0)
 		goto err;
 
 	uri = list[0];
-	for (entry = list; *entry != NULL; entry += 2)
-		if ((ret = session->create(session, entry[0], entry[1])) != 0) {
-			ret = util_err(ret, "%s: session.create", entry[0]);
-			goto err;
-		}
+	/* Create the items in the list. */
+	if ((ret = config_exec(session, list)) != 0)
+		goto err;
 
 	/* Open the insert cursor. */
 	(void)snprintf(config, sizeof(config),
@@ -173,10 +159,51 @@ err:	/*
 }
 
 /*
+ * config_exec --
+ *	Create the tables/indices/colgroups implied by the list.
+ */
+int
+config_exec(WT_SESSION *session, char **list)
+{
+	WT_DECL_RET;
+
+	for (; *list != NULL; list += 2)
+		if ((ret = session->create(session, list[0], list[1])) != 0)
+			return (util_err(ret, "%s: session.create", list[0]));
+	return (0);
+}
+
+int
+config_list_add(CONFIG_LIST *clp, char *val)
+{
+	if (clp->entry + 1 >= clp->max_entry)
+		if ((clp->list = realloc(clp->list, (size_t)
+		    (clp->max_entry += 100) * sizeof(char *))) == NULL)
+			/* List already freed by realloc. */
+			return (util_err(errno, NULL));
+
+	clp->list[clp->entry++] = val;
+	clp->list[clp->entry] = NULL;
+	return (0);
+}
+
+void
+config_list_free(CONFIG_LIST *clp)
+{
+	char **entry;
+
+	if (clp->list != NULL)
+		for (entry = &clp->list[0]; *entry != NULL; entry++)
+			free(*entry);
+	free(clp->list);
+	clp->list = NULL;
+}
+
+/*
  * config_read --
  *	Read the config lines and do some basic validation.
  */
-static int
+int
 config_read(char ***listp, int *hexp)
 {
 	ULINE l;
@@ -260,16 +287,62 @@ err:	if (list != NULL) {
 }
 
 /*
+ * config_reorder --
+ *	For table dumps, reorder the list so tables are first.
+ *	For other dumps, make any needed checks.
+ */
+int
+config_reorder(char **list)
+{
+	char **entry, *p;
+
+	/*
+	 * Search for a table name -- if we find one, then it's table dump,
+	 * otherwise, it's a single file dump.
+	 */
+	for (entry = list; *entry != NULL; ++entry)
+		if (WT_PREFIX_MATCH(*entry, "table:"))
+			break;
+	if (*entry == NULL) {
+		/*
+		 * Single file dumps can only have two lines, the file name and
+		 * the configuration information.
+		 */
+		if ((list[0] == NULL || list[1] == NULL || list[2] != NULL) ||
+		    (WT_PREFIX_MATCH(list[0], "file:") &&
+		    WT_PREFIX_MATCH(list[0], "lsm:")))
+			return (format());
+
+		entry = list;
+	}
+
+	/*
+	 * Make sure the table key/value pair comes first, then we can just
+	 * run through the array in order.  (We already checked that we had
+	 * a multiple of 2 entries, so this is safe.)
+	 */
+	if (entry != list) {
+		p = list[0]; list[0] = entry[0]; entry[0] = p;
+		p = list[1]; list[1] = entry[1]; entry[1] = p;
+	}
+	return (0);
+}
+
+/*
  * config_update --
  *	Reconcile and update the command line configuration against the
- * config we found.
+ *	config we found.
  */
-static int
+int
 config_update(WT_SESSION *session, char **list)
 {
 	int found;
 	const char *cfg[] = { NULL, NULL, NULL };
-	char **configp, **listp, *p, *t;
+	char **configp, **listp;
+	const char **rm;
+	static const char *rmnames[] = {
+		"filename", "id", "checkpoint",	"checkpoint_lsn",
+		"version", "source", NULL };
 
 	/*
 	 * If the object has been renamed, replace all of the column group,
@@ -296,16 +369,14 @@ config_update(WT_SESSION *session, char **list)
 	}
 
 	/*
-	 * Remove all "filename=" configurations from the values, new filenames
-	 * are chosen as part of table load.
+	 * Remove all "filename=", "source=" and other configurations
+	 * that foil loading from the values. New filenames are chosen
+	 * as part of table load.
 	 */
 	for (listp = list; *listp != NULL; listp += 2)
-		if ((p = strstr(listp[1], "filename=")) != NULL) {
-			if ((t = strchr(p, ',')) == NULL)
-				*p = '\0';
-			else
-				memmove(p, t + 1, strlen(t + 1) + 1);
-		}
+		for (rm = rmnames; *rm != NULL; rm++)
+			if (strstr(listp[1], *rm) != NULL)
+				config_remove(listp[1], *rm);
 
 	/*
 	 * It's possible to update everything except the key/value formats.
@@ -375,7 +446,7 @@ config_update(WT_SESSION *session, char **list)
  * config_rename --
  *	Update the URI name.
  */
-static int
+int
 config_rename(char **urip, const char *name)
 {
 	size_t len;
@@ -403,6 +474,46 @@ config_rename(char **urip, const char *name)
 }
 
 /*
+ * config_remove --
+ *	Remove a single config key and its value.
+ */
+void
+config_remove(char *config, const char *ckey)
+{
+	int parens, quoted;
+	char *begin, match[100], *next, *p;
+
+	snprintf(match, sizeof(match), "%s=", ckey);
+	if ((begin = strstr(config, match)) != NULL) {
+		parens = 0;
+		quoted = 0;
+		next = NULL;
+		for (p = begin + strlen(match); !next && *p; p++)
+			switch (*p) {
+			case '(':
+				if (!quoted)
+					parens++;
+				break;
+			case ')':
+				if (!quoted)
+					parens--;
+				break;
+			case '"':
+				quoted = !quoted;
+				break;
+			case ',':
+				if (!quoted && parens == 0)
+					next = p + 1;
+				break;
+			}
+		if (next)
+			memmove(begin, next, strlen(next) + 1);
+		else
+			*begin = '\0';
+	}
+}
+
+/*
  * format --
  *	The input doesn't match the dump format.
  */
diff --git a/src/utilities/util_load.h b/src/utilities/util_load.h
new file mode 100644
index 00000000000..13174b95c72
--- /dev/null
+++ b/src/utilities/util_load.h
@@ -0,0 +1,30 @@
+/*-
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+/*
+ * A list of configuration strings.
+ */
+typedef struct {
+	char **list;		/* array of alternating (uri, config) values */
+	int entry;		/* next entry available in list */
+	int max_entry;		/* how many allocated in list */
+} CONFIG_LIST;
+
+int	 config_exec(WT_SESSION *, char **);
+int	 config_list_add(CONFIG_LIST *, char *);
+void	 config_list_free(CONFIG_LIST *);
+int	 config_read(char ***, int *);
+int	 config_rename(char **, const char *);
+void	 config_remove(char *, const char *);
+int	 config_reorder(char **);
+int	 config_update(WT_SESSION *, char **);
+
+/* Flags for util_load_json */
+#define	LOAD_JSON_APPEND	0x0001	/* append (ignore record number keys) */
+#define	LOAD_JSON_NO_OVERWRITE	0x0002	/* don't overwrite existing data */
+
+int	 util_load_json(WT_SESSION *, const char *, uint32_t);
diff --git a/src/utilities/util_load_json.c b/src/utilities/util_load_json.c
new file mode 100644
index 00000000000..9fba6b73948
--- /dev/null
+++ b/src/utilities/util_load_json.c
@@ -0,0 +1,567 @@
+/*-
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "util.h"
+#include "util_load.h"
+
+/*
+ * Encapsulates the input state for parsing JSON.
+ *
+ * At any time, we may be peeking at an unconsumed token; this is
+ * indicated by 'peeking' as true.  toktype, tokstart, toklen will be
+ * set in this case.
+ *
+ * Generally we are collecting and processing tokens one by one.
+ * In JSON, tokens never span lines so this makes processing easy.
+ * The exception is that a JSON dump cursor takes the complete
+ * set of keys or values during cursor->set_key/set_value calls,
+ * which may contain many tokens and span lines.  E.g.
+ *   cursor->set_value("\"name\" : \"John\", \"phone\" : 2348765");
+ * The raw key/value string is collected in the kvraw field.
+ */
+typedef struct {
+	WT_SESSION *session;    /* associated session */
+	ULINE line;		/* current line */
+	const char *p;		/* points to cur position in line.mem */
+	int ateof;		/* current token is EOF */
+	int peeking;		/* peeking at next token */
+	int toktype;		/* next token, defined by __wt_json_token() */
+	const char *tokstart;	/* next token start (points into line.mem) */
+	size_t toklen;		/* next token length */
+	char *kvraw;		/* multiple line raw content collected so far */
+	size_t kvrawstart;	/* pos on cur line that JSON key/value starts */
+	const char *filename;   /* filename for error reporting */
+	int linenum;		/* line number for error reporting */
+} JSON_INPUT_STATE;
+
+static int json_column_group_index(WT_SESSION *, JSON_INPUT_STATE *,
+    CONFIG_LIST *, int);
+static int json_data(WT_SESSION *, JSON_INPUT_STATE *, CONFIG_LIST *, uint32_t);
+static int json_expect(WT_SESSION *, JSON_INPUT_STATE *, int);
+static int json_peek(WT_SESSION *, JSON_INPUT_STATE *);
+static int json_skip(WT_SESSION *, JSON_INPUT_STATE *, const char **);
+static int json_kvraw_append(JSON_INPUT_STATE *, const char *, size_t);
+static int json_strdup(JSON_INPUT_STATE *, char **);
+static int json_top_level(WT_SESSION *, JSON_INPUT_STATE *, uint32_t);
+
+#define	JSON_STRING_MATCH(ins, match)					\
+	((ins)->toklen - 2 == strlen(match) &&				\
+	    strncmp((ins)->tokstart + 1, (match), (ins)->toklen - 2) == 0)
+
+#define	JSON_INPUT_POS(ins)						\
+	((size_t)((ins)->p - (const char *)(ins)->line.mem))
+
+#define	JSON_EXPECT(session, ins, tok) do {				\
+	if (json_expect(session, ins, tok))				\
+		goto err;						\
+} while (0)
+
+/*
+ * json_column_group_index --
+ *	Parse a column group or index entry from JSON input.
+ */
+static int
+json_column_group_index(WT_SESSION *session, JSON_INPUT_STATE *ins,
+    CONFIG_LIST *clp, int idx)
+{
+	WT_DECL_RET;
+	char *config, *p, *uri;
+	int isconfig;
+
+	uri = NULL;
+	config = NULL;
+
+	while (json_peek(session, ins) == '{') {
+		JSON_EXPECT(session, ins, '{');
+		JSON_EXPECT(session, ins, 's');
+		isconfig = JSON_STRING_MATCH(ins, "config");
+		if (!isconfig && !JSON_STRING_MATCH(ins, "uri"))
+			goto err;
+		JSON_EXPECT(session, ins, ':');
+		JSON_EXPECT(session, ins, 's');
+
+		if ((ret = json_strdup(ins, &p)) != 0) {
+			ret = util_err(ret, NULL);
+			goto err;
+		}
+		if (isconfig)
+			config = p;
+		else
+			uri = p;
+
+		isconfig = !isconfig;
+		JSON_EXPECT(session, ins, ',');
+		JSON_EXPECT(session, ins, 's');
+		if (!JSON_STRING_MATCH(ins, isconfig ? "config" : "uri"))
+			goto err;
+		JSON_EXPECT(session, ins, ':');
+		JSON_EXPECT(session, ins, 's');
+
+		if ((ret = json_strdup(ins, &p)) != 0) {
+			ret = util_err(ret, NULL);
+			goto err;
+		}
+		if (isconfig)
+			config = p;
+		else
+			uri = p;
+		JSON_EXPECT(session, ins, '}');
+		if ((idx && strncmp(uri, "index:", 6) != 0) ||
+		    (!idx && strncmp(uri, "colgroup:", 9) != 0)) {
+			ret = util_err(EINVAL,
+			    "%s: misplaced colgroup or index", uri);
+			goto err;
+		}
+		if ((ret = config_list_add(clp, uri)) != 0 ||
+		    (ret = config_list_add(clp, config)) != 0)
+			goto err;
+
+		if (json_peek(session, ins) != ',')
+			break;
+		JSON_EXPECT(session, ins, ',');
+		if (json_peek(session, ins) != '{')
+			goto err;
+	}
+	if (0) {
+err:		if (ret == 0)
+			ret = EINVAL;
+	}
+	return (ret);
+}
+
+/*
+ * json_kvraw_append --
+ *	Append to the kvraw buffer, which is used to collect all the
+ *	raw key/value pairs from JSON input.
+ */
+static int json_kvraw_append(JSON_INPUT_STATE *ins, const char *str, size_t len)
+{
+	char *tmp;
+	size_t needsize;
+
+	if (len > 0) {
+		needsize = strlen(ins->kvraw) + len + 2;
+		if ((tmp = malloc(needsize)) == NULL)
+			return (util_err(errno, NULL));
+		snprintf(tmp, needsize, "%s %.*s", ins->kvraw, (int)len, str);
+		free(ins->kvraw);
+		ins->kvraw = tmp;
+	}
+	return (0);
+}
+
+/*
+ * json_strdup --
+ *	Return a string, with no escapes or other JSON-isms, from the
+ *	JSON string at the current input position.
+ */
+static int
+json_strdup(JSON_INPUT_STATE *ins, char **resultp)
+{
+	WT_DECL_RET;
+	char *result, *resultcpy;
+	const char *src;
+	ssize_t resultlen;
+	size_t srclen;
+
+	result = NULL;
+	src = ins->tokstart + 1;  /*strip "" from token */
+	srclen = ins->toklen - 2;
+	if ((resultlen = __wt_json_strlen(src, srclen)) < 0) {
+		ret = util_err(EINVAL, "Invalid config string");
+		goto err;
+	}
+	resultlen += 1;
+	if ((result = (char *)malloc((size_t)resultlen)) == NULL) {
+		ret = util_err(errno, NULL);
+		goto err;
+	}
+	*resultp = result;
+	resultcpy = result;
+	if ((ret = __wt_json_strncpy(&resultcpy, (size_t)resultlen, src,
+	    srclen))
+	    != 0) {
+		ret = util_err(ret, NULL);
+		goto err;
+	}
+
+	if (0) {
+err:		if (ret == 0)
+			ret = EINVAL;
+		if (result != NULL)
+			free(result);
+		*resultp = NULL;
+	}
+	return (ret);
+}
+
+/*
+ * json_data --
+ *	Parse the data portion of the JSON input, and insert all
+ *	values.
+ */
+static int
+json_data(WT_SESSION *session, JSON_INPUT_STATE *ins, CONFIG_LIST *clp,
+    uint32_t flags)
+{
+	WT_CURSOR *cursor;
+	WT_DECL_RET;
+	char config[64], *endp, *uri;
+	const char *keyformat;
+	int isrec, nfield, nkeys, toktype, tret;
+	size_t keystrlen;
+	ssize_t gotnolen;
+	uint64_t gotno, recno;
+
+	cursor = NULL;
+	uri = NULL;
+
+	/* Reorder and check the list. */
+	if ((ret = config_reorder(clp->list)) != 0)
+		goto err;
+
+	/* Update config based on command-line configuration. */
+	if ((ret = config_update(session, clp->list)) != 0)
+		goto err;
+
+	/* Create the items collected. */
+	if ((ret = config_exec(session, clp->list)) != 0)
+		goto err;
+
+	uri = clp->list[0];
+	(void)snprintf(config, sizeof(config),
+	    "dump=json%s%s",
+	    LF_ISSET(LOAD_JSON_APPEND) ? ",append" : "",
+	    LF_ISSET(LOAD_JSON_NO_OVERWRITE) ? ",overwrite=false" : "");
+	if ((ret = session->open_cursor(
+	    session, uri, NULL, config, &cursor)) != 0) {
+		ret = util_err(ret, "%s: session.open", uri);
+		goto err;
+	}
+	keyformat = cursor->key_format;
+	isrec = (strcmp(keyformat, "r") == 0);
+	for (nkeys = 0; *keyformat; keyformat++)
+		if (!isdigit(*keyformat))
+			nkeys++;
+
+	recno = 0;
+	while (json_peek(session, ins) == '{') {
+		nfield = 0;
+		JSON_EXPECT(session, ins, '{');
+		if ((ins)->kvraw == NULL)
+			(ins)->kvraw = (char *)malloc(1);
+		(ins)->kvraw[0] = '\0';
+		(ins)->kvrawstart = JSON_INPUT_POS(ins);
+		keystrlen = 0;
+		while (json_peek(session, ins) == 's') {
+			JSON_EXPECT(session, ins, 's');
+			JSON_EXPECT(session, ins, ':');
+			toktype = json_peek(session, ins);
+			JSON_EXPECT(session, ins, toktype);
+			if (isrec && nfield == 0) {
+				/* Verify the dump has recnos in order. */
+				recno++;
+				gotno = __wt_strtouq(ins->tokstart, &endp, 0);
+				gotnolen = (endp - ins->tokstart);
+				if (recno != gotno ||
+				    ins->toklen != (size_t)gotnolen) {
+					ret = util_err(0,
+					    "%s: recno out of order", uri);
+					goto err;
+				}
+			}
+			if (++nfield == nkeys) {
+				size_t curpos = JSON_INPUT_POS(ins);
+				if ((ret = json_kvraw_append(ins,
+				    (char *)(ins)->line.mem + (ins)->kvrawstart,
+				    curpos - (ins)->kvrawstart)) != 0)
+					goto err;
+				ins->kvrawstart = curpos;
+				keystrlen = strlen(ins->kvraw);
+			}
+			if (json_peek(session, ins) != ',')
+				break;
+			JSON_EXPECT(session, ins, ',');
+			if (json_peek(session, ins) != 's')
+				goto err;
+		}
+		if (json_kvraw_append(ins, ins->line.mem, JSON_INPUT_POS(ins)))
+			goto err;
+
+		ins->kvraw[keystrlen] = '\0';
+		if (!LF_ISSET(LOAD_JSON_APPEND))
+			cursor->set_key(cursor, ins->kvraw);
+		/* skip over inserted space and comma */
+		cursor->set_value(cursor, &ins->kvraw[keystrlen+2]);
+		if ((ret = cursor->insert(cursor)) != 0) {
+			ret = util_err(ret, "%s: cursor.insert", uri);
+			goto err;
+		}
+
+		JSON_EXPECT(session, ins, '}');
+		if (json_peek(session, ins) != ',')
+			break;
+		JSON_EXPECT(session, ins, ',');
+		if (json_peek(session, ins) != '{')
+			goto err;
+	}
+	if (0) {
+err:		if (ret == 0)
+			ret = EINVAL;
+	}
+	/*
+	 * Technically, we don't have to close the cursor because the session
+	 * handle will do it for us, but I'd like to see the flush to disk and
+	 * the close succeed, it's better to fail early when loading files.
+	 */
+	if (cursor != NULL && (tret = cursor->close(cursor)) != 0) {
+		tret = util_err(tret, "%s: cursor.close", uri);
+		if (ret == 0)
+			ret = tret;
+	}
+	if (ret == 0)
+		ret = util_flush(session, uri);
+	return (ret);
+}
+
+/*
+ * json_top_level --
+ *	Parse the top level JSON input.
+ */
+static int
+json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags)
+{
+	CONFIG_LIST cl;
+	WT_DECL_RET;
+	char *config, *tableuri;
+	int toktype;
+	static const char *json_markers[] = {
+	    "\"config\"", "\"colgroups\"", "\"indices\"", "\"data\"", NULL };
+
+	memset(&cl, 0, sizeof(cl));
+	tableuri = NULL;
+	JSON_EXPECT(session, ins, '{');
+	while (json_peek(session, ins) == 's') {
+		JSON_EXPECT(session, ins, 's');
+		tableuri = realloc(tableuri, ins->toklen);
+		snprintf(tableuri, ins->toklen, "%.*s",
+		    (int)(ins->toklen - 2), ins->tokstart + 1);
+		JSON_EXPECT(session, ins, ':');
+
+		/*
+		 * Allow any ordering of 'config', 'colgroups',
+		 * 'indices' before 'data', which must appear last.
+		 * The non-'data' items build up a list of entries
+		 * that created in our session before the data is
+		 * inserted.
+		 */
+		for (;;) {
+			if (json_skip(session, ins, json_markers) != 0)
+				goto err;
+			JSON_EXPECT(session, ins, 's');
+			if (JSON_STRING_MATCH(ins, "config")) {
+				JSON_EXPECT(session, ins, ':');
+				JSON_EXPECT(session, ins, 's');
+				if ((ret = json_strdup(ins, &config)) != 0) {
+					ret = util_err(ret, NULL);
+					goto err;
+				}
+				config_list_add(&cl, tableuri);
+				config_list_add(&cl, config);
+				tableuri = NULL;
+			} else if (JSON_STRING_MATCH(ins, "colgroups")) {
+				JSON_EXPECT(session, ins, ':');
+				JSON_EXPECT(session, ins, '[');
+				if ((ret = json_column_group_index(
+				    session, ins, &cl, 0)) != 0)
+					goto err;
+				JSON_EXPECT(session, ins, ']');
+			} else if (JSON_STRING_MATCH(ins, "indices")) {
+				JSON_EXPECT(session, ins, ':');
+				JSON_EXPECT(session, ins, '[');
+				if ((ret = json_column_group_index(
+				    session, ins, &cl, 1)) != 0)
+					goto err;
+				JSON_EXPECT(session, ins, ']');
+			} else if (JSON_STRING_MATCH(ins, "data")) {
+				JSON_EXPECT(session, ins, ':');
+				JSON_EXPECT(session, ins, '[');
+				if ((ret = json_data(session, ins, &cl,
+				    flags)) != 0)
+					goto err;
+				config_list_free(&cl);
+				break;
+			}
+			else
+				goto err;
+		}
+
+		while ((toktype = json_peek(session, ins)) == '}' ||
+		    toktype == ']')
+			JSON_EXPECT(session, ins, toktype);
+		if (toktype == 0) /* Check EOF. */
+			break;
+		if (toktype == ',') {
+			JSON_EXPECT(session, ins, ',');
+			if (json_peek(session, ins) != 's')
+				goto err;
+			continue;
+		}
+	}
+	JSON_EXPECT(session, ins, 0);
+
+	if (0) {
+err:		if (ret == 0)
+			ret = EINVAL;
+	}
+	config_list_free(&cl);
+	if (tableuri != NULL)
+		free(tableuri);
+	return (ret);
+}
+
+/*
+ * json_peek --
+ *	Set the input state to the next available token in the input
+ *	and return its token type, a code defined by __wt_json_token().
+ */
+static int
+json_peek(WT_SESSION *session, JSON_INPUT_STATE *ins)
+{
+	WT_DECL_RET;
+
+	if (!ins->peeking) {
+		while (!ins->ateof) {
+			while (isspace(*ins->p))
+				ins->p++;
+			if (*ins->p)
+				break;
+			if (ins->kvraw != NULL) {
+				if (json_kvraw_append(ins,
+				    (char *)ins->line.mem + ins->kvrawstart,
+				    strlen(ins->line.mem) - ins->kvrawstart)) {
+					ret = -1;
+					goto err;
+				}
+				ins->kvrawstart = 0;
+			}
+			if (util_read_line(&ins->line, 1,
+			    &ins->ateof)) {
+				ins->toktype = -1;
+				ret = -1;
+				goto err;
+			}
+			ins->linenum++;
+			ins->p = (const char *)ins->line.mem;
+		}
+		if (ins->ateof)
+			ins->toktype = 0;
+		else if (__wt_json_token(session, ins->p,
+		    &ins->toktype, &ins->tokstart,
+		    &ins->toklen) != 0)
+			ins->toktype = -1;
+		ins->peeking = 1;
+	}
+	if (0) {
+	err:	if (ret == 0)
+			ret = -1;
+	}
+	return (ret == 0 ? ins->toktype : -1);
+}
+
+/*
+ * json_expect --
+ *	Ensure that the type of the next token in the input matches
+ *	the wanted value, and advance past it.  The values of the
+ *	input state will be set so specific string or integer values
+ *	can be pulled out after this call.
+ */
+static int
+json_expect(WT_SESSION *session, JSON_INPUT_STATE *ins, int wanttok)
+{
+	if (json_peek(session, ins) < 0)
+		return (1);
+	ins->p += ins->toklen;
+	ins->peeking = 0;
+	if (ins->toktype != wanttok) {
+		fprintf(stderr,
+		    "%s: %d: %ld: expected %s, got %s\n",
+		    ins->filename,
+		    ins->linenum,
+		    JSON_INPUT_POS(ins) + 1,
+		    __wt_json_tokname(wanttok),
+		    __wt_json_tokname(ins->toktype));
+		return (1);
+	}
+	return (0);
+}
+
+/*
+ * json_skip --
+ *	Skip over JSON input until one of the specified strings appears.
+ *	The tokenizer will be set to point to the beginning of
+ *	that string.
+ */
+static int
+json_skip(WT_SESSION *session, JSON_INPUT_STATE *ins, const char **matches)
+{
+	char *hit;
+	const char **match;
+
+	if (ins->kvraw != NULL)
+		return (1);
+
+	hit = NULL;
+	while (!ins->ateof) {
+		for (match = matches; *match != NULL; match++)
+			if ((hit = strstr(ins->p, *match)) != NULL)
+				goto out;
+		if (util_read_line(&ins->line, 1, &ins->ateof)) {
+			ins->toktype = -1;
+			return (1);
+		}
+		ins->linenum++;
+		ins->p = (const char *)ins->line.mem;
+	}
+out:
+	if (hit == NULL)
+		return (1);
+
+	/* Set to this token. */
+	ins->p = hit;
+	ins->peeking = 0;
+	ins->toktype = 0;
+	(void)json_peek(session, ins);
+	return (0);
+}
+
+/*
+ * load_json --
+ *	Load from the JSON format produced by 'wt dump -j'.
+ */
+int
+util_load_json(WT_SESSION *session, const char *filename, uint32_t flags)
+{
+	JSON_INPUT_STATE instate;
+	WT_DECL_RET;
+
+	memset(&instate, 0, sizeof(instate));
+	instate.session = session;
+	if (util_read_line(&instate.line, 0, &instate.ateof))
+		return (1);
+	instate.p = (const char *)instate.line.mem;
+	instate.linenum = 1;
+	instate.filename = filename;
+
+	if ((ret = json_top_level(session, &instate, flags)) != 0)
+		goto err;
+
+err:	if (instate.line.mem != NULL)
+		free(instate.line.mem);
+	free(instate.kvraw);
+	return (ret);
+}
diff --git a/test/suite/test_jsondump01.py b/test/suite/test_jsondump01.py
index d7c83b1e7ff..730fbf0a05a 100644
--- a/test/suite/test_jsondump01.py
+++ b/test/suite/test_jsondump01.py
@@ -63,12 +63,11 @@ class FakeCursor:
             return tup
 
 # test_jsondump.py
-#    Utilities: wt jsondump
-# Test the jsondump utility (I'm not testing the 'json' cursors,
-# that's what the utility uses underneath).
+#    Utilities: wt dump
+# Test the dump utility with the -j option.
 class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
-
     name = 'test_jsondump01'
+    name2 = 'test_jsondump01b'
     nentries = 2500
 
     keyfmt = [
@@ -109,7 +108,7 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
 
         # spot check
         configs = tables[uri][0]
-        data = tables[uri][1]
+        data = tables[uri][1]["data"]
         d = data[24]
         if 'column5' in d:
             self.assertEqual(d['column5'], '25: abcde')
@@ -123,5 +122,24 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
         cursor.close()
         self.populate_check(self, fake, self.nentries)
 
+    # Dump using util, re-load using python's JSON, and do a content comparison.
+    def test_jsonload_util(self):
+        # Create the object.
+        uri = self.type + self.name
+        uri2 = self.type + self.name2
+        self.populate(self, uri, 'key_format=' + self.keyfmt, self.nentries)
+
+        # Dump the object.
+        self.runWt(['dump', '-j', uri], outfilename='jsondump.out')
+
+        loadcmd = ['load', '-jf', 'jsondump.out', '-r', self.name2]
+        if self.keyfmt == 'r':
+            loadcmd.append('-a')
+        self.runWt(loadcmd)
+
+        # check the contents of the data we read.
+        cursor = self.session.open_cursor(uri2, None)
+        self.populate_check(self, cursor, self.nentries)
+
 if __name__ == '__main__':
     wttest.run()
diff --git a/test/suite/test_jsondump02.py b/test/suite/test_jsondump02.py
index 3ed073cf022..2eff8b755a7 100644
--- a/test/suite/test_jsondump02.py
+++ b/test/suite/test_jsondump02.py
@@ -29,9 +29,7 @@ import os
 import wiredtiger, wttest
 
 # test_jsondump.py
-#    Utilities: wt jsondump
-# Test the jsondump utility (I'm not testing the 'json' cursors,
-# that's what the utility uses underneath).
+# Test dump output from json cursors.
 class test_jsondump02(wttest.WiredTigerTestCase):
 
     table_uri1 = 'table:jsondump02a.wt'
@@ -79,6 +77,19 @@ class test_jsondump02(wttest.WiredTigerTestCase):
             pos += 1
         self.assertEqual(pos, len(expect))
         cursor.close()
+
+    # Check the result of using a JSON cursor on the URI.
+    def load_json(self, uri, inserts):
+        cursor = self.session.open_cursor(uri, None, 'dump=json')
+        pos = 0
+        try:
+            for insert in inserts:
+                #tty_pr('Insert: ' + str(insert))
+                cursor.set_key(insert[0])
+                cursor.set_value(insert[1])
+                cursor.insert()
+        finally:
+            cursor.close()
         
     # Create JSON cursors and test them directly.
     def test_json_cursor(self):
@@ -114,13 +125,93 @@ class test_jsondump02(wttest.WiredTigerTestCase):
         self.set_kv(self.table_uri3, 2, '\x77\x88\x99\x00\xff\xfe')
         self.populate_squarecube(self.table_uri4)
 
-        self.check_json(self.table_uri1, (
-                ('"key0" : "KEY000"', '"value0" : "string value"'),
-                ('"key0" : "KEY001"', '"value0" : ' +
-                 '"\'\\\"({[]})\\\"\', etc. allowed"')))
-        self.check_json(self.table_uri2, (
-                ('"key0" : "KEY000"', '"value0" : 123,\n"value1" : "str0"'),
-                ('"key0" : "KEY001"', '"value0" : 234,\n"value1" : "str1"')))
+        table1_json =  (
+            ('"key0" : "KEY000"', '"value0" : "string value"'),
+            ('"key0" : "KEY001"', '"value0" : ' +
+             '"\'\\\"({[]})\\\"\', etc. allowed"'))
+        self.check_json(self.table_uri1, table1_json)
+
+        self.session.truncate(self.table_uri1, None, None, None)
+        self.load_json(self.table_uri1, table1_json)
+        self.check_json(self.table_uri1, table1_json)
+
+        table2_json =  (
+            ('"key0" : "KEY000"', '"value0" : 123,\n"value1" : "str0"'),
+            ('"key0" : "KEY001"', '"value0" : 234,\n"value1" : "str1"'))
+        self.check_json(self.table_uri2, table2_json)
+        self.session.truncate(self.table_uri2, None, None, None)
+        self.load_json(self.table_uri2, table2_json)
+        self.check_json(self.table_uri2, table2_json)
+        self.session.truncate(self.table_uri2, None, None, None)
+
+        # bad tokens
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.load_json(self.table_uri2, 
+              (('<>abc?', '9'),)),
+            '/unknown token/')
+
+        # bad tokens
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.load_json(self.table_uri2, 
+              (('"abc\u"', ''),)),
+            '/invalid Unicode/')
+
+        # bad tokens
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.load_json(self.table_uri2, 
+              (('"abc', ''),)),
+            '/unterminated string/')
+
+        # bad syntax
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.load_json(self.table_uri2, 
+              (('"stuff" "jibberish"', '"value0" "more jibberish"'),)),
+            '/expected key name.*\"key0\"/')
+
+        # bad types
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.load_json(self.table_uri2, 
+              (('"key0" : "KEY002"', '"value0" : "xyz",\n"value1" : "str0"'),)),
+            '/expected unsigned JSON <int>, got <string>/')
+
+        # bad types
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.load_json(self.table_uri2, 
+              (('"key0" : "KEY002"', '"value0" : 123,\n"value1" : 456'),)),
+            '/expected JSON <string>, got <integer>/')
+
+        # extra stuff
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.load_json(self.table_uri2, 
+              (('"key0" : "KEY002"',
+                '"value0" : 123,\n"value1" : "str0",'),)),
+            '/expected JSON <EOF>, got \',\'/')
+
+        # fields out of order currently not supported
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.load_json(self.table_uri2, 
+              (('"key0" : "KEY002"', '"value1" : "str0",\n"value0" : 123'),)),
+            '/expected value name.*\"value0\"/')
+
+        # various invalid unicode
+        invalid_unicode = (
+            '\\u', '\\ux', '\\u0', '\\u0F', '\\u0FA', '\\u0FAx',  '\\u0FA\\x')
+        for uni in invalid_unicode:
+            self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+                lambda: self.load_json(self.table_uri2, 
+                  (('"key0" : "KEY002"', '"value0" : 123,\n"value1" : "'
+                    + uni + '"'),)),
+                '/invalid Unicode/')
+
+        # this one should work
+        self.load_json(self.table_uri2, 
+              (('"key0" : "KEY002"', '"value0" : 345,\n"value1" : "str2"'),))
+
+        # extraneous/missing space is okay
+        self.load_json(self.table_uri2, 
+              (('  "key0"\n:\t"KEY003"    ',
+                '"value0":456,"value1"\n\n\r\n:\t\n"str3"'),))
+
         self.check_json(self.table_uri3, (
                 ('"key0" : 1', '"value0" : "\\u0001\\u0002\\u0003"'),
                 ('"key0" : 2',
@@ -163,37 +254,5 @@ class test_jsondump02(wttest.WiredTigerTestCase):
                 ('"i2" : 16,\n"i4" : 64',
                  '"S1" : "val16",\n"i2" : 16,\n"S3" : "val64",\n"i4" : 64')))
 
-    def test_json_illegal(self):
-        """
-        Create JSON cursors and use them illegally
-        """
-        extra_params = ',allocation_size=512,' +\
-            'internal_page_max=16384,leaf_page_max=131072'
-        self.session.create(self.table_uri1,
-            'key_format=S,value_format=S' + extra_params)
-
-        self.set_kv(self.table_uri1, 'A', 'aaaa')
-        self.check_json(self.table_uri1, (
-                ('"key0" : "A"', '"value0" : "aaaa"'),))
-
-        self.set_kv(self.table_uri1, 'B', 'bbbb')
-        self.check_json(self.table_uri1, (
-                ('"key0" : "A"', '"value0" : "aaaa"'),
-                ('"key0" : "B"', '"value0" : "bbbb"')))
-
-        cursor = self.session.open_cursor(self.table_uri1, None, 'dump=json')
-        cursor.next()
-
-        with self.expectedStderrPattern('Setting keys for JSON cursors not permitted'):
-            cursor.set_key('stuff')
-        with self.expectedStderrPattern('Setting values for JSON cursors not permitted'):
-            cursor.set_value('other stuff')
-        cursor.close()
-
-        self.check_json(self.table_uri1, (
-                ('"key0" : "A"', '"value0" : "aaaa"'),
-                ('"key0" : "B"', '"value0" : "bbbb"')))
-        
-
 if __name__ == '__main__':
     wttest.run()
author	Alex Gorrod <alexg@wiredtiger.com>	2014-10-15 11:55:19 +1100
committer	Alex Gorrod <alexg@wiredtiger.com>	2014-10-15 11:55:19 +1100
commit	9ef6222eb484e2328e90f639e49bf64584a92a38 (patch)
tree	bc1338268962976bbe00c1af9ffcd7a52e808cbe
parent	ba4f6023c5c580b5f3be1d5538f57c03a8c49fe8 (diff)
parent	2bca93d54b3b3c3ad01f1fe932a783e83495701e (diff)
download	mongo-9ef6222eb484e2328e90f639e49bf64584a92a38.tar.gz