summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2016-06-01 12:38:19 +1000
committerAlex Gorrod <alexander.gorrod@mongodb.com>2016-06-01 12:38:19 +1000
commitff108d7c705b82e482fb17e33488dc14304bf259 (patch)
tree6cdc57dd2b037f9f61848ba1eae74c214b406e6a
parent6f9a7a41f21476235b0bb99f0f16a2d7f738c2e0 (diff)
parent7033ed47f3986edb08ec8c4933e6ef211e73b3c4 (diff)
downloadmongo-ff108d7c705b82e482fb17e33488dc14304bf259.tar.gz
Merge branch 'develop' into mongodb-3.4
-rw-r--r--src/btree/bt_handle.c10
-rw-r--r--src/conn/conn_api.c1
-rw-r--r--src/conn/conn_dhandle.c24
-rw-r--r--src/cursor/cur_dump.c21
-rw-r--r--src/cursor/cur_json.c98
-rw-r--r--src/include/connection.h1
-rw-r--r--src/include/dhandle.h5
-rw-r--r--src/include/extern.h3
-rw-r--r--src/include/meta.h3
-rw-r--r--src/include/packing.i12
-rw-r--r--src/log/log.c11
-rw-r--r--src/meta/meta_table.c12
-rw-r--r--src/utilities/util_dump.c20
-rw-r--r--src/utilities/util_dump.h11
-rw-r--r--src/utilities/util_load_json.c29
-rw-r--r--test/suite/test_jsondump01.py35
-rw-r--r--test/suite/test_jsondump02.py172
17 files changed, 343 insertions, 125 deletions
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index ba545859d07..118195779e9 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -689,22 +689,20 @@ __btree_page_sizes(WT_SESSION_IMPL *session)
"size (%" PRIu32 "B)", btree->allocsize);
/*
- * When a page is forced to split, we want at least 50 entries on its
- * parent.
- *
* Don't let pages grow larger than a quarter of the cache, with too-
* small caches, we can end up in a situation where nothing can be
* evicted. Take care getting the cache size: with a shared cache,
* it may not have been set.
*/
WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval));
- btree->maxmempage =
- WT_MAX((uint64_t)cval.val, 50 * (uint64_t)btree->maxleafpage);
+ btree->maxmempage = (uint64_t)cval.val;
if (!F_ISSET(conn, WT_CONN_CACHE_POOL)) {
if ((cache_size = conn->cache_size) > 0)
btree->maxmempage =
- WT_MIN(btree->maxmempage, cache_size / 4);
+ WT_MIN(btree->maxmempage, cache_size / 10);
}
+ /* Enforce a lower bound of a single disk leaf page */
+ btree->maxmempage = WT_MAX(btree->maxmempage, btree->maxleafpage);
/*
* Try in-memory splits once we hit 80% of the maximum in-memory page
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 279e3d4a8b5..dde3fb6930e 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -2330,7 +2330,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
*/
WT_ERR(__wt_turtle_init(session));
- __wt_metadata_init(session);
WT_ERR(__wt_metadata_cursor(session, NULL));
/* Start the worker threads and run recovery. */
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index 5019ab59fe3..4c2cf9a8dc2 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -39,6 +39,9 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session,
WT_BTREE *btree;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
+ uint64_t bucket;
+
+ *dhandlep = NULL;
WT_RET(__wt_calloc_one(session, &dhandle));
@@ -57,6 +60,16 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session,
__wt_stat_dsrc_init(dhandle);
+ if (strcmp(uri, WT_METAFILE_URI) == 0)
+ F_SET(dhandle, WT_DHANDLE_IS_METADATA);
+
+ /*
+ * Prepend the handle to the connection list, assuming we're likely to
+ * need new files again soon, until they are cached by all sessions.
+ */
+ bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
+ WT_CONN_DHANDLE_INSERT(S2C(session), dhandle, bucket);
+
*dhandlep = dhandle;
return (0);
@@ -106,14 +119,6 @@ __wt_conn_dhandle_find(
WT_RET(__conn_dhandle_alloc(session, uri, checkpoint, &dhandle));
- /*
- * Prepend the handle to the connection list, assuming we're likely to
- * need new files again soon, until they are cached by all sessions.
- * Find the right hash bucket to insert into as well.
- */
- bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
- WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket);
-
session->dhandle = dhandle;
return (0);
}
@@ -158,7 +163,8 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force)
/*
* We may not be holding the schema lock, and threads may be walking
* the list of open handles (for example, checkpoint). Acquire the
- * handle's close lock.
+ * handle's close lock. We don't have the sweep server acquire the
+ * handle's rwlock so we have to prevent races through the close code.
*/
__wt_spin_lock(session, &dhandle->close_lock);
diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c
index a7b1c98871a..32353e0a28d 100644
--- a/src/cursor/cur_dump.c
+++ b/src/cursor/cur_dump.c
@@ -155,7 +155,9 @@ __curdump_set_key(WT_CURSOR *cursor, ...)
WT_SESSION_IMPL *session;
uint64_t recno;
va_list ap;
+ const uint8_t *up;
const char *p;
+ bool json;
cdump = (WT_CURSOR_DUMP *)cursor;
child = cdump->child;
@@ -168,16 +170,23 @@ __curdump_set_key(WT_CURSOR *cursor, ...)
p = va_arg(ap, const char *);
va_end(ap);
+ json = F_ISSET(cursor, WT_CURSTD_DUMP_JSON);
+ if (json)
+ WT_ERR(__wt_json_to_item(session, p, cursor->key_format,
+ (WT_CURSOR_JSON *)cursor->json_private, true,
+ &cursor->key));
+
if (WT_CURSOR_RECNO(cursor) && !F_ISSET(cursor, WT_CURSTD_RAW)) {
- WT_ERR(str2recno(session, p, &recno));
+ if (json) {
+ up = (const uint8_t *)cursor->key.data;
+ WT_ERR(__wt_vunpack_uint(&up, cursor->key.size,
+ &recno));
+ } else
+ WT_ERR(str2recno(session, p, &recno));
child->set_key(child, recno);
} else {
- if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
- WT_ERR(__wt_json_to_item(session, p, cursor->key_format,
- (WT_CURSOR_JSON *)cursor->json_private, true,
- &cursor->key));
- else
+ if (!json)
WT_ERR(__dump_to_raw(session, p, &cursor->key,
F_ISSET(cursor, WT_CURSTD_DUMP_HEX)));
diff --git a/src/cursor/cur_json.c b/src/cursor/cur_json.c
index fcb66d3e8b3..133b7b9ac9b 100644
--- a/src/cursor/cur_json.c
+++ b/src/cursor/cur_json.c
@@ -48,6 +48,10 @@ static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *,
case 't': \
WT_RET(json_uint_arg(session, &jstr, &pv.u.u)); \
break; \
+ case 'u': \
+ WT_RET(json_string_arg(session, &jstr, &pv.u.item)); \
+ pv.type = 'K'; \
+ break; \
/* User format strings have already been validated. */ \
WT_ILLEGAL_VALUE(session); \
} \
@@ -493,7 +497,7 @@ __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype,
"invalid Unicode within JSON string");
return (-1);
}
- src += 5;
+ src += 4;
}
backslash = false;
}
@@ -840,20 +844,17 @@ __wt_json_strlen(const char *src, size_t srclen)
if (__wt_hex2byte((const u_char *)src, &lo))
return (-1);
src += 2;
- /* RFC 3629 */
- if (hi >= 0x8) {
- /* 3 bytes total */
- dstlen += 2;
- }
- else if (hi != 0 || lo >= 0x80) {
- /* 2 bytes total */
- dstlen++;
- }
- /* else 1 byte total */
+ if (hi != 0)
+ /*
+ * For our dump representation,
+ * every Unicode character on input
+ * represents a single byte.
+ */
+ return (-1);
}
- }
+ } else
+ src++;
dstlen++;
- src++;
}
if (src != srcend)
return (-1); /* invalid input, e.g. final char is '\\' */
@@ -867,55 +868,58 @@ __wt_json_strlen(const char *src, size_t srclen)
* the result if zero padded.
*/
int
-__wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen)
+__wt_json_strncpy(WT_SESSION *wt_session, char **pdst, size_t dstlen,
+ const char *src, size_t srclen)
{
- char *dst;
+ WT_SESSION_IMPL *session;
+ char ch, *dst;
const char *dstend, *srcend;
u_char hi, lo;
+ session = (WT_SESSION_IMPL *)wt_session;
+
dst = *pdst;
dstend = dst + dstlen;
srcend = src + srclen;
while (src < srcend && dst < dstend) {
/* JSON can include any UTF-8 expressed in 4 hex chars. */
- if (*src == '\\') {
- if (*++src == 'u') {
- if (__wt_hex2byte((const u_char *)++src, &hi))
+ if ((ch = *src++) == '\\')
+ switch (ch = *src++) {
+ case 'u':
+ if (__wt_hex2byte((const u_char *)src, &hi))
return (EINVAL);
src += 2;
if (__wt_hex2byte((const u_char *)src, &lo))
return (EINVAL);
src += 2;
- /* RFC 3629 */
- if (hi >= 0x8) {
- /* 3 bytes total */
- /* byte 0: 1110HHHH */
- /* byte 1: 10HHHHLL */
- /* byte 2: 10LLLLLL */
- *dst++ = (char)(0xe0 |
- ((hi >> 4) & 0x0f));
- *dst++ = (char)(0x80 |
- ((hi << 2) & 0x3c) |
- ((lo >> 6) & 0x03));
- *dst++ = (char)(0x80 | (lo & 0x3f));
- } else if (hi != 0 || lo >= 0x80) {
- /* 2 bytes total */
- /* byte 0: 110HHHLL */
- /* byte 1: 10LLLLLL */
- *dst++ = (char)(0xc0 |
- (hi << 2) |
- ((lo >> 6) & 0x03));
- *dst++ = (char)(0x80 | (lo & 0x3f));
- } else
- /* else 1 byte total */
- /* byte 0: 0LLLLLLL */
- *dst++ = (char)lo;
+ if (hi != 0) {
+ __wt_errx(NULL, "Unicode \"%6.6s\""
+ " byte out of range in JSON",
+ src - 6);
+ return (EINVAL);
+ }
+ *dst++ = (char)lo;
+ break;
+ case 'f':
+ *dst++ = '\f';
+ break;
+ case 'n':
+ *dst++ = '\n';
+ break;
+ case 'r':
+ *dst++ = '\r';
+ break;
+ case 't':
+ *dst++ = '\t';
+ break;
+ case '"':
+ case '\\':
+ *dst++ = ch;
+ break;
+ WT_ILLEGAL_VALUE(session);
}
- else
- *dst++ = *src;
- } else
- *dst++ = *src;
- src++;
+ else
+ *dst++ = ch;
}
if (src != srcend)
return (ENOMEM);
diff --git a/src/include/connection.h b/src/include/connection.h
index e6cff08f0ae..0e0c357279a 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -352,7 +352,6 @@ struct __wt_connection_impl {
uint32_t txn_logsync; /* Log sync configuration */
WT_SESSION_IMPL *meta_ckpt_session;/* Metadata checkpoint session */
- uint64_t meta_uri_hash; /* Metadata file name hash */
WT_SESSION_IMPL *sweep_session; /* Handle sweep session */
wt_thread_t sweep_tid; /* Handle sweep thread */
diff --git a/src/include/dhandle.h b/src/include/dhandle.h
index 8b313428d06..9a11594c893 100644
--- a/src/include/dhandle.h
+++ b/src/include/dhandle.h
@@ -82,7 +82,8 @@ struct __wt_data_handle {
#define WT_DHANDLE_DISCARD 0x02 /* Discard on release */
#define WT_DHANDLE_DISCARD_FORCE 0x04 /* Force discard on release */
#define WT_DHANDLE_EXCLUSIVE 0x08 /* Need exclusive access */
-#define WT_DHANDLE_LOCK_ONLY 0x10 /* Handle only used as a lock */
-#define WT_DHANDLE_OPEN 0x20 /* Handle is open */
+#define WT_DHANDLE_IS_METADATA 0x10 /* Metadata handle */
+#define WT_DHANDLE_LOCK_ONLY 0x20 /* Handle only used as a lock */
+#define WT_DHANDLE_OPEN 0x40 /* Handle is open */
uint32_t flags;
};
diff --git a/src/include/extern.h b/src/include/extern.h
index e8c20930aaf..4ca5c8461a0 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -297,7 +297,7 @@ extern int __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype
extern const char *__wt_json_tokname(int toktype);
extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const char *format, WT_CURSOR_JSON *json, bool iskey, WT_ITEM *item);
extern ssize_t __wt_json_strlen(const char *src, size_t srclen);
-extern int __wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen);
+extern int __wt_json_strncpy(WT_SESSION *wt_session, char **pdst, size_t dstlen, const char *src, size_t srclen);
extern int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp);
extern int __wt_schema_create_final( WT_SESSION_IMPL *session, char *cfg_arg[], char **value_ret);
extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
@@ -461,7 +461,6 @@ extern int __wt_ext_metadata_search(WT_EXTENSION_API *wt_api, WT_SESSION *wt_ses
extern int __wt_ext_metadata_update(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value);
extern int __wt_metadata_get_ckptlist( WT_SESSION *session, const char *name, WT_CKPT **ckptbasep);
extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase);
-extern void __wt_metadata_init(WT_SESSION_IMPL *session);
extern int __wt_metadata_cursor_open( WT_SESSION_IMPL *session, const char *config, WT_CURSOR **cursorp);
extern int __wt_metadata_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp);
extern int __wt_metadata_cursor_release(WT_SESSION_IMPL *session, WT_CURSOR **cursorp);
diff --git a/src/include/meta.h b/src/include/meta.h
index ba4149979ef..63c79dbc72e 100644
--- a/src/include/meta.h
+++ b/src/include/meta.h
@@ -34,8 +34,7 @@
* when diagnostic is enabled.
*/
#define WT_IS_METADATA(session, dh) \
- ((dh)->name_hash == S2C(session)->meta_uri_hash && \
- strcmp((dh)->name, WT_METAFILE_URI) == 0)
+ F_ISSET((dh), WT_DHANDLE_IS_METADATA)
#define WT_METAFILE_ID 0 /* Metadata file ID */
#define WT_METADATA_VERSION "WiredTiger version" /* Version keys */
diff --git a/src/include/packing.i b/src/include/packing.i
index 35b2ddc43db..9d5971ed99f 100644
--- a/src/include/packing.i
+++ b/src/include/packing.i
@@ -260,6 +260,8 @@ __pack_size(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv)
return (pv->size);
case 'j':
case 'J':
+ case 'K':
+ /* These formats are only used internally. */
if (pv->type == 'j' || pv->havesize)
s = pv->size;
else {
@@ -269,7 +271,7 @@ __pack_size(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv)
len = __wt_json_strlen(pv->u.item.data,
pv->u.item.size);
WT_ASSERT(session, len >= 0);
- s = (size_t)len + 1;
+ s = (size_t)len + (pv->type == 'K' ? 0 : 1);
}
return (s);
case 's':
@@ -357,18 +359,22 @@ __pack_write(
break;
case 'j':
case 'J':
+ case 'K':
+ /* These formats are only used internally. */
s = pv->u.item.size;
if ((pv->type == 'j' || pv->havesize) && pv->size < s) {
s = pv->size;
pad = 0;
} else if (pv->havesize)
pad = pv->size - s;
+ else if (pv->type == 'K')
+ pad = 0;
else
pad = 1;
if (s > 0) {
oldp = *pp;
- WT_RET(__wt_json_strncpy((char **)pp, maxlen,
- pv->u.item.data, s));
+ WT_RET(__wt_json_strncpy((WT_SESSION *)session,
+ (char **)pp, maxlen, pv->u.item.data, s));
maxlen -= (size_t)(*pp - oldp);
}
if (pad > 0) {
diff --git a/src/log/log.c b/src/log/log.c
index 1c4298b73e5..01bfb97718f 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -2137,10 +2137,19 @@ __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags)
* We need to flush out the current slot first to get the real
* end of log LSN in log->alloc_lsn.
*/
- WT_RET(__wt_log_flush_lsn(session, &lsn, 0));
+ WT_RET(__wt_log_flush_lsn(session, &lsn, false));
last_lsn = log->alloc_lsn;
/*
+ * If the last write caused a switch to a new log file, we should only
+ * wait for the last write to be flushed. Otherwise, if the workload
+ * is single-threaded we could wait here forever because the write LSN
+ * doesn't switch into the new file until it contains a record.
+ */
+ if (last_lsn.l.offset == WT_LOG_FIRST_RECORD)
+ last_lsn = log->log_close_lsn;
+
+ /*
* Wait until all current outstanding writes have been written
* to the file system.
*/
diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c
index dd65f1a7ef9..38a2edd7219 100644
--- a/src/meta/meta_table.c
+++ b/src/meta/meta_table.c
@@ -9,18 +9,6 @@
#include "wt_internal.h"
/*
- * __wt_metadata_init --
- * Metadata initialization.
- */
-void
-__wt_metadata_init(WT_SESSION_IMPL *session)
-{
- /* We cache the metadata file's URI hash for fast detection. */
- S2C(session)->meta_uri_hash =
- __wt_hash_city64(WT_METAFILE_URI, strlen(WT_METAFILE_URI));
-}
-
-/*
* __metadata_turtle --
* Return if a key's value should be taken from the turtle file.
*/
diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c
index 0f09009cd4c..3314b5ba485 100644
--- a/src/utilities/util_dump.c
+++ b/src/utilities/util_dump.c
@@ -7,6 +7,7 @@
*/
#include "util.h"
+#include "util_dump.h"
static int dump_config(WT_SESSION *, const char *, bool, bool);
static int dump_json_begin(WT_SESSION *);
@@ -73,7 +74,9 @@ util_dump(WT_SESSION *session, int argc, char *argv[])
if (argc < 1 || (argc != 1 && !json))
return (usage());
- if (json && (ret = dump_json_begin(session)) != 0)
+ if (json &&
+ ((ret = dump_json_begin(session)) != 0 ||
+ (ret = dump_prefix(session, hex, json)) != 0))
goto err;
for (i = 0; i < argc; i++) {
@@ -155,7 +158,7 @@ dump_config(WT_SESSION *session, const char *uri, bool hex, bool json)
*/
cursor->set_key(cursor, uri);
if ((ret = cursor->search(cursor)) == 0) {
- if (dump_prefix(session, hex, json) != 0 ||
+ if ((!json && dump_prefix(session, hex, json) != 0) ||
dump_table_config(session, cursor, uri, json) != 0 ||
dump_suffix(session, json) != 0)
ret = 1;
@@ -456,17 +459,20 @@ dump_prefix(WT_SESSION *session, bool hex, bool json)
{
int vmajor, vminor, vpatch;
- if (json)
- return (0);
-
(void)wiredtiger_version(&vmajor, &vminor, &vpatch);
- if (printf(
+ if (!json && (printf(
"WiredTiger Dump (WiredTiger Version %d.%d.%d)\n",
vmajor, vminor, vpatch) < 0 ||
printf("Format=%s\n", hex ? "hex" : "print") < 0 ||
- printf("Header\n") < 0)
+ printf("Header\n") < 0))
return (util_err(session, EIO, NULL));
+ else if (json && printf(
+ " \"%s\" : \"%d (%d.%d.%d)\",\n",
+ DUMP_JSON_VERSION_MARKER, DUMP_JSON_CURRENT_VERSION,
+ vmajor, vminor, vpatch) < 0)
+ return (util_err(session, EIO, NULL));
+
return (0);
}
diff --git a/src/utilities/util_dump.h b/src/utilities/util_dump.h
new file mode 100644
index 00000000000..e3fd8e6a501
--- /dev/null
+++ b/src/utilities/util_dump.h
@@ -0,0 +1,11 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#define DUMP_JSON_VERSION_MARKER "WiredTiger Dump Version"
+#define DUMP_JSON_CURRENT_VERSION 1
+#define DUMP_JSON_SUPPORTED_VERSION 1
diff --git a/src/utilities/util_load_json.c b/src/utilities/util_load_json.c
index 3a1f847a95f..f1f6675e99c 100644
--- a/src/utilities/util_load_json.c
+++ b/src/utilities/util_load_json.c
@@ -7,6 +7,7 @@
*/
#include "util.h"
+#include "util_dump.h"
#include "util_load.h"
/*
@@ -186,9 +187,8 @@ json_strdup(WT_SESSION *session, JSON_INPUT_STATE *ins, char **resultp)
}
*resultp = result;
resultcpy = result;
- if ((ret = __wt_json_strncpy(&resultcpy, (size_t)resultlen, src,
- srclen))
- != 0) {
+ if ((ret = __wt_json_strncpy(
+ session, &resultcpy, (size_t)resultlen, src, srclen)) != 0) {
ret = util_err(session, ret, NULL);
goto err;
}
@@ -344,13 +344,16 @@ json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags)
{
CONFIG_LIST cl;
WT_DECL_RET;
- int toktype;
static const char *json_markers[] = {
"\"config\"", "\"colgroups\"", "\"indices\"", "\"data\"", NULL };
char *config, *tableuri;
+ int curversion, toktype;
+ bool hasversion;
memset(&cl, 0, sizeof(cl));
tableuri = NULL;
+ hasversion = false;
+
JSON_EXPECT(session, ins, '{');
while (json_peek(session, ins) == 's') {
JSON_EXPECT(session, ins, 's');
@@ -358,6 +361,24 @@ json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags)
snprintf(tableuri, ins->toklen, "%.*s",
(int)(ins->toklen - 2), ins->tokstart + 1);
JSON_EXPECT(session, ins, ':');
+ if (!hasversion) {
+ if (strcmp(tableuri, DUMP_JSON_VERSION_MARKER) != 0) {
+ ret = util_err(session, ENOTSUP,
+ "missing \"%s\"", DUMP_JSON_VERSION_MARKER);
+ goto err;
+ }
+ hasversion = true;
+ JSON_EXPECT(session, ins, 's');
+ if ((curversion = atoi(ins->tokstart + 1)) <= 0 ||
+ curversion > DUMP_JSON_SUPPORTED_VERSION) {
+ ret = util_err(session, ENOTSUP,
+ "unsupported JSON dump version \"%.*s\"",
+ (int)(ins->toklen - 1), ins->tokstart + 1);
+ goto err;
+ }
+ JSON_EXPECT(session, ins, ',');
+ continue;
+ }
/*
* Allow any ordering of 'config', 'colgroups',
diff --git a/test/suite/test_jsondump01.py b/test/suite/test_jsondump01.py
index ddf871d9a24..10262edc777 100644
--- a/test/suite/test_jsondump01.py
+++ b/test/suite/test_jsondump01.py
@@ -77,16 +77,22 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
('string', dict(keyfmt='S'))
]
types = [
- ('file', dict(type='file:',
- name='file',
+ ('file', dict(uri='file:', config='', lsm=False,
populate=simple_populate,
populate_check=simple_populate_check_cursor)),
- ('table-simple', dict(type='table:',
- name='table-simple',
+ ('lsm', dict(uri='lsm:', config='', lsm=True,
populate=simple_populate,
populate_check=simple_populate_check_cursor)),
- ('table-complex', dict(type='table:',
- name='table-complex',
+ ('table-simple', dict(uri='table:', config='', lsm=False,
+ populate=simple_populate,
+ populate_check=simple_populate_check_cursor)),
+ ('table-simple-lsm', dict(uri='table:', config='type=lsm', lsm=True,
+ populate=simple_populate,
+ populate_check=simple_populate_check_cursor)),
+ ('table-complex', dict(uri='table:', config='', lsm=False,
+ populate=complex_populate,
+ populate_check=complex_populate_check_cursor)),
+ ('table-complex-lsm', dict(uri='table:', config='type=lsm', lsm=True,
populate=complex_populate,
populate_check=complex_populate_check_cursor))
]
@@ -95,9 +101,14 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
# Dump using util, re-load using python's JSON, and do a content comparison.
def test_jsondump_util(self):
+ # LSM and column-store isn't a valid combination.
+ if self.lsm and self.keyfmt == 'r':
+ return
+
# Create the object.
- uri = self.type + self.name
- self.populate(self, uri, 'key_format=' + self.keyfmt, self.nentries)
+ uri = self.uri + self.name
+ self.populate(self, uri, self.config + ',key_format=' + self.keyfmt,
+ self.nentries)
# Dump the object.
self.runWt(['dump', '-j', uri], outfilename='jsondump.out')
@@ -125,9 +136,13 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
# Dump using util, re-load using python's JSON, and do a content comparison.
def test_jsonload_util(self):
+ # LSM and column-store isn't a valid combination.
+ if self.lsm and self.keyfmt == 'r':
+ return
+
# Create the object.
- uri = self.type + self.name
- uri2 = self.type + self.name2
+ uri = self.uri + self.name
+ uri2 = self.uri + self.name2
self.populate(self, uri, 'key_format=' + self.keyfmt, self.nentries)
# Dump the object.
diff --git a/test/suite/test_jsondump02.py b/test/suite/test_jsondump02.py
index c6cd464e453..251237f3faf 100644
--- a/test/suite/test_jsondump02.py
+++ b/test/suite/test_jsondump02.py
@@ -28,16 +28,19 @@
import os
import wiredtiger, wttest
+from suite_subprocess import suite_subprocess
# test_jsondump.py
# Test dump output from json cursors.
-class test_jsondump02(wttest.WiredTigerTestCase):
+class test_jsondump02(wttest.WiredTigerTestCase, suite_subprocess):
table_uri1 = 'table:jsondump02a.wt'
table_uri2 = 'table:jsondump02b.wt'
table_uri3 = 'table:jsondump02c.wt'
basename_uri4 = 'jsondump02d.wt'
table_uri4 = 'table:' + basename_uri4
+ table_uri5 = 'table:jsondump02e.wt'
+ table_uri6 = 'table:jsondump02f.wt'
def set_kv(self, uri, key, val):
cursor = self.session.open_cursor(uri, None, None)
@@ -80,15 +83,14 @@ class test_jsondump02(wttest.WiredTigerTestCase):
pos = 0
try:
for insert in inserts:
- #tty_pr('Insert: ' + str(insert))
cursor[insert[0]] = insert[1]
finally:
cursor.close()
- # Create JSON cursors and test them directly.
def test_json_cursor(self):
"""
- Create a table, add a key, get it back
+ Create JSON cursors and test them directly, also test
+ dump/load commands.
"""
extra_params = ',allocation_size=512,' +\
'internal_page_max=16384,leaf_page_max=131072'
@@ -112,7 +114,12 @@ class test_jsondump02(wttest.WiredTigerTestCase):
self.session.create(uri4index3, "columns=(i2,i4)")
self.set_kv(self.table_uri1, 'KEY000', 'string value')
- self.set_kv(self.table_uri1, 'KEY001', '\'\"({[]})\"\', etc. allowed')
+ self.set_kv(self.table_uri1, 'KEY001', '\'\"({[]})\"\'\\, etc. allowed')
+ # \u03c0 is pi in Unicode, converted by Python to UTF-8: 0xcf 0x80.
+ # Here's how UTF-8 might be used.
+ self.set_kv(self.table_uri1, 'KEY002', u'\u03c0'.encode('utf-8'))
+ # 0xf5-0xff are illegal in Unicode, but may occur legally in C strings.
+ self.set_kv(self.table_uri1, 'KEY003', '\xff\xfe')
self.set_kv2(self.table_uri2, 'KEY000', 123, 'str0')
self.set_kv2(self.table_uri2, 'KEY001', 234, 'str1')
self.set_kv(self.table_uri3, 1, '\x01\x02\x03')
@@ -122,7 +129,9 @@ class test_jsondump02(wttest.WiredTigerTestCase):
table1_json = (
('"key0" : "KEY000"', '"value0" : "string value"'),
('"key0" : "KEY001"', '"value0" : ' +
- '"\'\\\"({[]})\\\"\', etc. allowed"'))
+ '"\'\\\"({[]})\\\"\'\\\\, etc. allowed"'),
+ ('"key0" : "KEY002"', '"value0" : "\\u00cf\\u0080"'),
+ ('"key0" : "KEY003"', '"value0" : "\\u00ff\\u00fe"'))
self.check_json(self.table_uri1, table1_json)
self.session.truncate(self.table_uri1, None, None, None)
@@ -206,11 +215,12 @@ class test_jsondump02(wttest.WiredTigerTestCase):
((' "key0"\n:\t"KEY003" ',
'"value0":456,"value1"\n\n\r\n:\t\n"str3"'),))
- self.check_json(self.table_uri3, (
- ('"key0" : 1', '"value0" : "\\u0001\\u0002\\u0003"'),
- ('"key0" : 2',
- '"value0" : "\\u0077\\u0088\\u0099\\u0000\\u00ff\\u00fe"')))
- self.check_json(self.table_uri4, (
+ table3_json = (
+ ('"key0" : 1', '"value0" : "\\u0001\\u0002\\u0003"'),
+ ('"key0" : 2',
+ '"value0" : "\\u0077\\u0088\\u0099\\u0000\\u00ff\\u00fe"'))
+ self.check_json(self.table_uri3, table3_json)
+ table4_json = (
('"ikey" : 1,\n"Skey" : "key1"',
'"S1" : "val1",\n"i2" : 1,\n"S3" : "val1",\n"i4" : 1'),
('"ikey" : 2,\n"Skey" : "key2"',
@@ -218,7 +228,8 @@ class test_jsondump02(wttest.WiredTigerTestCase):
('"ikey" : 3,\n"Skey" : "key3"',
'"S1" : "val9",\n"i2" : 9,\n"S3" : "val27",\n"i4" : 27'),
('"ikey" : 4,\n"Skey" : "key4"',
- '"S1" : "val16",\n"i2" : 16,\n"S3" : "val64",\n"i4" : 64')))
+ '"S1" : "val16",\n"i2" : 16,\n"S3" : "val64",\n"i4" : 64'))
+ self.check_json(self.table_uri4, table4_json)
# The dump config currently is not supported for the index type.
self.check_json(uri4index1, (
('"Skey" : "key1"',
@@ -248,5 +259,142 @@ class test_jsondump02(wttest.WiredTigerTestCase):
('"i2" : 16,\n"i4" : 64',
'"S1" : "val16",\n"i2" : 16,\n"S3" : "val64",\n"i4" : 64')))
+ # Dump all the tables into a single file, and also each
+ # table into its own file.
+ self.runWt(['dump', '-j',
+ self.table_uri1,
+ self.table_uri2,
+ self.table_uri3,
+ self.table_uri4],
+ outfilename='jsondump-all.out')
+ self.runWt(['dump', '-j', self.table_uri1], outfilename='jsondump1.out')
+ self.runWt(['dump', '-j', self.table_uri2], outfilename='jsondump2.out')
+ self.runWt(['dump', '-j', self.table_uri3], outfilename='jsondump3.out')
+ self.runWt(['dump', '-j', self.table_uri4], outfilename='jsondump4.out')
+ self.session.drop(self.table_uri1)
+ self.session.drop(self.table_uri2)
+ self.session.drop(self.table_uri3)
+ self.session.drop(self.table_uri4)
+ self.runWt(['load', '-jf', 'jsondump1.out'])
+ self.session.drop(self.table_uri1)
+ self.runWt(['load', '-jf', 'jsondump2.out'])
+ self.session.drop(self.table_uri2)
+ self.runWt(['load', '-jf', 'jsondump3.out'])
+ self.session.drop(self.table_uri3)
+ self.runWt(['load', '-jf', 'jsondump4.out'])
+ self.session.drop(self.table_uri4)
+
+ # Note: only the first table is loaded.
+ self.runWt(['load', '-jf', 'jsondump-all.out'])
+ self.check_json(self.table_uri1, table1_json)
+ #self.check_json(self.table_uri2, table2_json)
+ #self.check_json(self.table_uri3, table3_json)
+ #self.check_json(self.table_uri4, table4_json)
+
+ # Generate two byte keys that cover some range of byte values.
+ # For simplicity, the keys are monotonically increasing.
+ # A null byte is disallowed in a string key, so we don't use it.
+ def generate_key(self, i, k):
+ k[0] = ((i & 0xffc0) >> 6) + 1
+ k[1] = (i & 0x3f) + 1
+
+ # Generate three byte values:
+ # i==0 : v:[0x00, 0x01, 0x02]
+ # i==1 : v:[0x01, 0x02, 0x03]
+ # etc.
+ # A null byte is disallowed in a string value, it is replaced by 'X'
+ def generate_value(self, i, v, isstring):
+ for j in range(0, 3):
+ val = (i + j) % 256
+ if isstring and val == 0:
+ val = 88 # 'X'
+ v[j] = val
+
+ def test_json_all_bytes(self):
+ """
+ Test the generated JSON for all byte values in byte array and
+ string formats.
+ """
+ self.session.create(self.table_uri5, 'key_format=u,value_format=u')
+ self.session.create(self.table_uri6, 'key_format=S,value_format=S')
+
+ c5 = self.session.open_cursor(self.table_uri5, None, None)
+ c6 = self.session.open_cursor(self.table_uri6, None, None)
+ k = bytearray(b'\x00\x00')
+ v = bytearray(b'\x00\x00\x00')
+ for i in range(0, 512):
+ self.generate_key(i, k)
+ self.generate_value(i, v, False)
+ c5[str(k)] = str(v)
+ self.generate_value(i, v, True) # no embedded nuls
+ c6[str(k)] = str(v)
+ c5.close()
+ c6.close()
+
+ # Build table5_json, we want it to look like this:
+ # ('"key0" : "\u0001\u0001"', '"value0" : "\u0000\u0001\u0002"'),
+ # ('"key0" : "\u0001\u0002"', '"value0" : "\u0001\u0002\u0003"'))
+ # ('"key0" : "\u0001\u0003"', '"value0" : "\u0003\u0003\u0004"'))
+ # ...
+ # table6_json is similar, except that printable values like '\u0041'
+ # would appear as 'A'. The string type cannot have embedded nulls,
+ # so '\u0000' in table6_json appears instead as an 'X'.
+ #
+ # Start by creating two tables of individual Unicode values.
+ # bin_unicode[] contains only the \u escape sequences.
+ # mix_unicode[] contains printable characters or \t \n etc. escapes
+ bin_unicode = []
+ mix_unicode = []
+ for i in range(0, 256):
+ u = "\\u00" + hex(256 + i)[3:] # e.g. "\u00ab")
+ bin_unicode.append(u)
+ mix_unicode.append(u)
+ for i in range(0x20, 0x7f):
+ mix_unicode[i] = chr(i)
+ mix_unicode[ord('"')] = '\\"'
+ mix_unicode[ord('\\')] = '\\\\'
+ mix_unicode[ord('\f')] = '\\f'
+ mix_unicode[ord('\n')] = '\\n'
+ mix_unicode[ord('\r')] = '\\r'
+ mix_unicode[ord('\t')] = '\\t'
+
+ table5_json = []
+ table6_json = []
+ for i in range(0, 512):
+ self.generate_key(i, k)
+ self.generate_value(i, v, False)
+ j = i if (i > 0 and i < 254) or (i > 256 and i < 510) else 88
+ table5_json.append(('"key0" : "' + bin_unicode[k[0]] +
+ bin_unicode[k[1]] + '"',
+ '"value0" : "' + bin_unicode[v[0]] +
+ bin_unicode[v[1]] +
+ bin_unicode[v[2]] + '"'))
+ self.generate_value(i, v, True)
+ table6_json.append(('"key0" : "' + mix_unicode[k[0]] +
+ mix_unicode[k[1]] + '"',
+ '"value0" : "' + mix_unicode[v[0]] +
+ mix_unicode[v[1]] +
+ mix_unicode[v[2]] + '"'))
+
+ self.check_json(self.table_uri5, table5_json)
+ self.check_json(self.table_uri6, table6_json)
+
+ self.session.truncate(self.table_uri5, None, None, None)
+ self.session.truncate(self.table_uri6, None, None, None)
+ self.load_json(self.table_uri5, table5_json)
+ self.load_json(self.table_uri6, table6_json)
+ self.check_json(self.table_uri5, table5_json)
+ self.check_json(self.table_uri6, table6_json)
+
+ self.runWt(['dump', '-j', self.table_uri5], outfilename='jsondump5.out')
+ self.runWt(['dump', '-j', self.table_uri6], outfilename='jsondump6.out')
+ self.session.drop(self.table_uri5)
+ self.session.drop(self.table_uri6)
+ self.runWt(['load', '-jf', 'jsondump5.out'])
+ self.runWt(['load', '-jf', 'jsondump6.out'])
+ self.session.drop(self.table_uri5)
+ self.session.drop(self.table_uri6)
+
+
if __name__ == '__main__':
wttest.run()