diff options
-rw-r--r-- | SConstruct | 1 | ||||
-rw-r--r-- | build_posix/Make.base | 1 | ||||
-rw-r--r-- | build_win/filelist.win | 1 | ||||
-rw-r--r-- | dist/api_data.py | 2 | ||||
-rw-r--r-- | dist/filelist | 1 | ||||
-rw-r--r-- | dist/flags.py | 1 | ||||
-rw-r--r-- | dist/s_string.ok | 2 | ||||
-rw-r--r-- | examples/c/ex_all.c | 4 | ||||
-rw-r--r-- | lang/java/java_doc.i | 1 | ||||
-rw-r--r-- | src/btree/bt_handle.c | 25 | ||||
-rw-r--r-- | src/btree/bt_rebalance.c | 473 | ||||
-rw-r--r-- | src/config/config_def.c | 34 | ||||
-rw-r--r-- | src/conn/conn_api.c | 1 | ||||
-rw-r--r-- | src/include/btree.h | 12 | ||||
-rw-r--r-- | src/include/config.h | 39 | ||||
-rw-r--r-- | src/include/extern.h | 2 | ||||
-rw-r--r-- | src/include/flags.h | 21 | ||||
-rw-r--r-- | src/include/wiredtiger.in | 26 | ||||
-rw-r--r-- | src/session/session_api.c | 27 | ||||
-rw-r--r-- | src/txn/txn_ckpt.c | 5 | ||||
-rw-r--r-- | src/utilities/util.h | 1 | ||||
-rw-r--r-- | src/utilities/util_main.c | 5 | ||||
-rw-r--r-- | src/utilities/util_rebalance.c | 63 | ||||
-rw-r--r-- | test/format/Makefile.am | 2 | ||||
-rw-r--r-- | test/format/config.c | 2 | ||||
-rw-r--r-- | test/format/config.h | 6 | ||||
-rw-r--r-- | test/format/format.h | 5 | ||||
-rw-r--r-- | test/format/rebalance.c | 84 | ||||
-rw-r--r-- | test/format/t.c | 5 | ||||
-rw-r--r-- | test/format/wts.c | 26 |
30 files changed, 807 insertions, 71 deletions
diff --git a/SConstruct b/SConstruct index 6a2b0497d15..914a7a137e1 100644 --- a/SConstruct +++ b/SConstruct @@ -290,6 +290,7 @@ wtbin = env.Program("wt", [ "src/utilities/util_misc.c", "src/utilities/util_printlog.c", "src/utilities/util_read.c", + "src/utilities/util_rebalance.c", "src/utilities/util_rename.c", "src/utilities/util_salvage.c", "src/utilities/util_stat.c", diff --git a/build_posix/Make.base b/build_posix/Make.base index 3037d70528a..4efbe3f76c3 100644 --- a/build_posix/Make.base +++ b/build_posix/Make.base @@ -31,6 +31,7 @@ wt_SOURCES =\ src/utilities/util_misc.c \ src/utilities/util_printlog.c \ src/utilities/util_read.c \ + src/utilities/util_rebalance.c \ src/utilities/util_rename.c \ src/utilities/util_salvage.c \ src/utilities/util_stat.c \ diff --git a/build_win/filelist.win b/build_win/filelist.win index af6ddf98da9..9a7c26217ec 100644 --- a/build_win/filelist.win +++ b/build_win/filelist.win @@ -31,6 +31,7 @@ src/btree/bt_misc.c src/btree/bt_ovfl.c src/btree/bt_page.c src/btree/bt_read.c +src/btree/bt_rebalance.c src/btree/bt_ret.c src/btree/bt_slvg.c src/btree/bt_split.c diff --git a/dist/api_data.py b/dist/api_data.py index ff6d3f3ccb5..40dabedd6f7 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -564,6 +564,7 @@ connection_runtime_config = [ 'mutex', 'overflow', 'read', + 'rebalance', 'reconcile', 'recovery', 'salvage', @@ -892,6 +893,7 @@ methods = { type='list'), ]), +'WT_SESSION.rebalance' : Method([]), 'WT_SESSION.rename' : Method([]), 'WT_SESSION.reset' : Method([]), 'WT_SESSION.salvage' : Method([ diff --git a/dist/filelist b/dist/filelist index 52af87c2a68..830e58fcc25 100644 --- a/dist/filelist +++ b/dist/filelist @@ -31,6 +31,7 @@ src/btree/bt_misc.c src/btree/bt_ovfl.c src/btree/bt_page.c src/btree/bt_read.c +src/btree/bt_rebalance.c src/btree/bt_ret.c src/btree/bt_slvg.c src/btree/bt_split.c diff --git a/dist/flags.py b/dist/flags.py index 7d237dd39a4..84261555ce1 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -70,6 +70,7 @@ flags = { 'VERB_MUTEX', 'VERB_OVERFLOW', 'VERB_READ', + 'VERB_REBALANCE', 'VERB_RECONCILE', 'VERB_RECOVERY', 'VERB_SALVAGE', diff --git a/dist/s_string.ok b/dist/s_string.ok index 81393664de2..352fcb1a4ad 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -254,6 +254,7 @@ RNG RPC RUNDIR Radu +Rebalance RedHat Redistributions Resize @@ -857,6 +858,7 @@ readlock readonly readunlock realloc +rebalance rebalancing recno recnos diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c index 213e058d4cc..22a2afdb345 100644 --- a/examples/c/ex_all.c +++ b/examples/c/ex_all.c @@ -673,6 +673,10 @@ session_ops(WT_SESSION *session) ret = session->compact(session, "table:mytable", NULL); /*! [Compact a table] */ + /*! [Rebalance a table] */ + ret = session->rebalance(session, "table:old", NULL); + /*! [Rebalance a table] */ + /*! [Rename a table] */ ret = session->rename(session, "table:old", "table:new", NULL); /*! [Rename a table] */ diff --git a/lang/java/java_doc.i b/lang/java/java_doc.i index 17317ab875b..450cb1d5ab2 100644 --- a/lang/java/java_doc.i +++ b/lang/java/java_doc.i @@ -36,6 +36,7 @@ COPYDOC(__wt_session, WT_SESSION, drop) COPYDOC(__wt_session, WT_SESSION, join) COPYDOC(__wt_session, WT_SESSION, log_flush) COPYDOC(__wt_session, WT_SESSION, log_printf) +COPYDOC(__wt_session, WT_SESSION, rebalance) COPYDOC(__wt_session, WT_SESSION, rename) COPYDOC(__wt_session, WT_SESSION, reset) COPYDOC(__wt_session, WT_SESSION, salvage) diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 9935d92677c..1b5dbcb21e7 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -105,14 +105,23 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) WT_ERR(__wt_btree_tree_open( session, root_addr, root_addr_size)); - /* Warm the cache, if possible. */ - WT_WITH_PAGE_INDEX(session, - ret = __btree_preload(session)); - WT_ERR(ret); - - /* Get the last record number in a column-store file. */ - if (btree->type != BTREE_ROW) - WT_ERR(__btree_get_last_recno(session)); + /* + * Rebalance uses the cache, but only wants the root + * page, nothing else. + */ + if (!F_ISSET(btree, WT_BTREE_REBALANCE)) { + /* Warm the cache, if possible. */ + WT_WITH_PAGE_INDEX(session, + ret = __btree_preload(session)); + WT_ERR(ret); + + /* + * Get the last record number in a column-store + * file. + */ + if (btree->type != BTREE_ROW) + WT_ERR(__btree_get_last_recno(session)); + } } } diff --git a/src/btree/bt_rebalance.c b/src/btree/bt_rebalance.c new file mode 100644 index 00000000000..9e7d6ac2fad --- /dev/null +++ b/src/btree/bt_rebalance.c @@ -0,0 +1,473 @@ +/*- + * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * Shared rebalance information. + */ +typedef struct { + WT_REF **leaf; /* List of leaf pages */ + uint64_t leaf_next; /* Next entry */ + size_t leaf_allocated; /* Allocated bytes */ + + WT_ADDR *fl; /* List of objects to free */ + uint64_t fl_next; /* Next entry */ + size_t fl_allocated; /* Allocated bytes */ + + WT_PAGE *root; /* Created root page */ + + uint8_t type; /* Internal page type */ + +#define WT_REBALANCE_PROGRESS_INTERVAL 100 + uint64_t progress; /* Progress counter */ + + WT_ITEM *tmp1; /* Temporary buffers */ + WT_ITEM *tmp2; +} WT_RSTUFF; + +/* + * __rebalance_discard -- + * Free the allocated information. + */ +static void +__rebalance_discard(WT_SESSION_IMPL *session, WT_RSTUFF *rs) +{ + while (rs->leaf_next > 0) { + --rs->leaf_next; + __wt_free_ref( + session, rs->leaf[rs->leaf_next], rs->type, false); + } + __wt_free(session, rs->leaf); + + while (rs->fl_next > 0) { + --rs->fl_next; + __wt_free(session, rs->fl[rs->fl_next].addr); + } + __wt_free(session, rs->fl); +} + +/* + * __rebalance_leaf_append -- + * Add a new entry to the list of leaf pages. + */ +static int +__rebalance_leaf_append(WT_SESSION_IMPL *session, + const uint8_t *key, size_t key_len, + uint64_t recno, + const uint8_t *addr, size_t addr_len, u_int addr_type, WT_RSTUFF *rs) +{ + WT_ADDR *copy_addr; + WT_REF *copy; + + WT_RET(__wt_verbose(session, WT_VERB_REBALANCE, + "rebalance leaf-list append %s, %s", + __wt_buf_set_printable(session, key, key_len, rs->tmp2), + __wt_addr_string(session, addr, addr_len, rs->tmp1))); + + /* Allocate and initialize a new leaf page reference. */ + WT_RET(__wt_realloc_def( + session, &rs->leaf_allocated, rs->leaf_next + 1, &rs->leaf)); + WT_RET(__wt_calloc_one(session, ©)); + rs->leaf[rs->leaf_next++] = copy; + + copy->page = NULL; + copy->home = NULL; + copy->pindex_hint = 0; + copy->state = WT_REF_DISK; + + WT_RET(__wt_calloc_one(session, ©_addr)); + copy->addr = copy_addr; + WT_RET(__wt_strndup(session, addr, addr_len, ©_addr->addr)); + copy_addr->size = (uint8_t)addr_len; + copy_addr->type = (uint8_t)addr_type; + + if (recno == WT_RECNO_OOB) + WT_RET(__wt_row_ikey(session, 0, key, key_len, copy)); + else + copy->key.recno = recno; + + copy->page_del = NULL; + return (0); +} + +/* + * __rebalance_fl_append -- + * Add a new entry to the free list. + */ +static int +__rebalance_fl_append(WT_SESSION_IMPL *session, + const uint8_t *addr, size_t addr_len, WT_RSTUFF *rs) +{ + WT_ADDR *copy; + + WT_RET(__wt_realloc_def( + session, &rs->fl_allocated, rs->fl_next + 1, &rs->fl)); + copy = &rs->fl[rs->fl_next++]; + + WT_RET(__wt_strndup(session, addr, addr_len, ©->addr)); + copy->size = (uint8_t)addr_len; + copy->type = 0; + + return (0); +} + +/* + * __rebalance_internal -- + * Build an in-memory page that references all of the leaf pages we've + * found. + */ +static int +__rebalance_internal(WT_SESSION_IMPL *session, WT_RSTUFF *rs) +{ + WT_BTREE *btree; + WT_DECL_RET; + WT_PAGE *page; + WT_PAGE_INDEX *pindex; + WT_REF **refp; + uint64_t i; + + btree = S2BT(session); + + /* + * There's a limit to the number of pages we can rebalance: the number + * of elements on a page is a 4B quantity and it's technically possible + * there could be more pages than that in a tree. + */ + if (rs->leaf_next > UINT32_MAX) + WT_RET_MSG(session, ENOTSUP, + "too many leaf pages to rebalance, %" PRIu64 " pages " + "exceeds the maximum of %" PRIu32, + rs->leaf_next, UINT32_MAX); + + /* Allocate a row-store root (internal) page and fill it in. */ + WT_RET(__wt_page_alloc(session, rs->type, + rs->type == WT_PAGE_COL_INT ? 1 : 0, rs->leaf_next, false, &page)); + page->pg_intl_parent_ref = &btree->root; + WT_ERR(__wt_page_modify_init(session, page)); + __wt_page_modify_set(session, page); + + pindex = WT_INTL_INDEX_GET_SAFE(page); + for (refp = pindex->index, i = 0; i < rs->leaf_next; ++i) { + rs->leaf[i]->home = page; + *refp++ = rs->leaf[i]; + rs->leaf[i] = NULL; + } + + rs->root = page; + return (0); + +err: __wt_page_out(session, &page); + return (ret); +} + +/* + * __rebalance_free_original -- + * Free the tracked internal pages and overflow keys. + */ +static int +__rebalance_free_original(WT_SESSION_IMPL *session, WT_RSTUFF *rs) +{ + WT_ADDR *addr; + uint64_t i; + + for (i = 0; i < rs->fl_next; ++i) { + addr = &rs->fl[i]; + + WT_RET(__wt_verbose(session, WT_VERB_REBALANCE, + "rebalance discarding %s", + __wt_addr_string( + session, addr->addr, addr->size, rs->tmp1))); + + WT_RET(__wt_btree_block_free(session, addr->addr, addr->size)); + } + return (0); +} + +/* + * __rebalance_col_walk -- + * Walk a column-store page and its descendants. + */ +static int +__rebalance_col_walk( + WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_RSTUFF *rs) +{ + WT_BTREE *btree; + WT_CELL *cell; + WT_CELL_UNPACK unpack; + WT_DECL_ITEM(buf); + WT_DECL_RET; + uint32_t i; + + btree = S2BT(session); + + WT_ERR(__wt_scr_alloc(session, 0, &buf)); + + /* Report progress periodically. */ + if (++rs->progress % WT_REBALANCE_PROGRESS_INTERVAL == 0) + WT_ERR(__wt_progress(session, NULL, rs->progress)); + + /* + * Walk the page, instantiating keys: the page contains sorted key and + * location cookie pairs. Keys are on-page/overflow items and location + * cookies are WT_CELL_ADDR_XXX items. + */ + WT_CELL_FOREACH(btree, dsk, cell, &unpack, i) { + __wt_cell_unpack(cell, &unpack); + switch (unpack.type) { + case WT_CELL_ADDR_INT: + /* An internal page: read it and recursively walk it. */ + WT_ERR(__wt_bt_read( + session, buf, unpack.data, unpack.size)); + WT_ERR(__rebalance_col_walk(session, buf->data, rs)); + WT_ERR(__wt_verbose(session, WT_VERB_REBALANCE, + "free-list append internal page: %s", + __wt_addr_string( + session, unpack.data, unpack.size, rs->tmp1))); + WT_ERR(__rebalance_fl_append( + session, unpack.data, unpack.size, rs)); + break; + case WT_CELL_ADDR_LEAF: + case WT_CELL_ADDR_LEAF_NO: + WT_ERR(__rebalance_leaf_append(session, + NULL, 0, unpack.v, unpack.data, unpack.size, + unpack.type == WT_CELL_ADDR_LEAF ? + WT_ADDR_LEAF : WT_ADDR_LEAF_NO, rs)); + break; + WT_ILLEGAL_VALUE_ERR(session); + } + } + +err: __wt_scr_free(session, &buf); + return (ret); +} + +/* + * __rebalance_row_leaf_key -- + * Acquire a copy of the key for a leaf page. + */ +static int +__rebalance_row_leaf_key(WT_SESSION_IMPL *session, + const uint8_t *addr, size_t addr_len, WT_ITEM *key, WT_RSTUFF *rs) +{ + WT_PAGE *page; + WT_DECL_RET; + + /* + * We need the first key from a leaf page. Leaf pages are relatively + * complex (Huffman encoding, prefix compression, and so on), do the + * work to instantiate the page and copy the first key to the buffer. + */ + WT_RET(__wt_bt_read(session, rs->tmp1, addr, addr_len)); + WT_RET(__wt_page_inmem(session, NULL, rs->tmp1->data, 0, 0, &page)); + ret = __wt_row_leaf_key_copy(session, page, &page->pg_row_d[0], key); + __wt_page_out(session, &page); + return (ret); +} + +/* + * __rebalance_row_walk -- + * Walk a row-store page and its descendants. + */ +static int +__rebalance_row_walk( + WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_RSTUFF *rs) +{ + WT_BTREE *btree; + WT_CELL *cell; + WT_CELL_UNPACK key, unpack; + WT_DECL_ITEM(buf); + WT_DECL_ITEM(leafkey); + WT_DECL_RET; + size_t len; + uint32_t i; + bool first_cell; + const void *p; + + btree = S2BT(session); + WT_CLEAR(key); /* [-Werror=maybe-uninitialized] */ + + WT_ERR(__wt_scr_alloc(session, 0, &buf)); + WT_ERR(__wt_scr_alloc(session, 0, &leafkey)); + + /* Report progress periodically. */ + if (++rs->progress % WT_REBALANCE_PROGRESS_INTERVAL == 0) + WT_ERR(__wt_progress(session, NULL, rs->progress)); + + /* + * Walk the page, instantiating keys: the page contains sorted key and + * location cookie pairs. Keys are on-page/overflow items and location + * cookies are WT_CELL_ADDR_XXX items. + */ + first_cell = true; + WT_CELL_FOREACH(btree, dsk, cell, &unpack, i) { + __wt_cell_unpack(cell, &unpack); + switch (unpack.type) { + case WT_CELL_KEY: + key = unpack; + break; + case WT_CELL_KEY_OVFL: + /* + * Any overflow key that references an internal page is + * no longer of any use. + * + * We could potentially use the same overflow key being + * freed here for the internal page we're creating, but + * that's more work to get reconciliation to understand + * and overflow keys are (well, should be), uncommon. + */ + WT_ERR(__wt_verbose(session, WT_VERB_REBALANCE, + "free-list append overflow key: %s", + __wt_addr_string( + session, unpack.data, unpack.size, rs->tmp1))); + + WT_ERR(__rebalance_fl_append( + session, unpack.data, unpack.size, rs)); + + key = unpack; + break; + case WT_CELL_ADDR_DEL: + /* + * A deleted leaf page: we're rebalancing this tree, + * which means no transaction can be active in it, + * which means no deleted leaf page is interesting, + * ignore it. + */ + first_cell = false; + break; + case WT_CELL_ADDR_INT: + /* An internal page: read it and recursively walk it. */ + WT_ERR(__wt_bt_read( + session, buf, unpack.data, unpack.size)); + WT_ERR(__rebalance_row_walk(session, buf->data, rs)); + WT_ERR(__wt_verbose(session, WT_VERB_REBALANCE, + "free-list append internal page: %s", + __wt_addr_string( + session, unpack.data, unpack.size, rs->tmp1))); + WT_ERR(__rebalance_fl_append( + session, unpack.data, unpack.size, rs)); + break; + case WT_CELL_ADDR_LEAF: + case WT_CELL_ADDR_LEAF_NO: + /* + * A leaf page. + * If the internal page key is an overflow, instantiate + * it and use it. + * Else, we can't trust the 0th key on an internal page + * (we generally try not to instantiate them during + * reconciliation because it saves space), so we have to + * get it from the underlying leaf page. + * Else, we can use the internal page's key as is, it's + * sufficient for the page. + */ + if (key.type == WT_CELL_KEY_OVFL) { + WT_ERR(__wt_dsk_cell_data_ref( + session, WT_PAGE_ROW_INT, &key, leafkey)); + p = leafkey->data; + len = leafkey->size; + } else if (first_cell) { + WT_ERR(__rebalance_row_leaf_key(session, + unpack.data, unpack.size, leafkey, rs)); + p = leafkey->data; + len = leafkey->size; + } else { + p = key.data; + len = key.size; + } + WT_ERR(__rebalance_leaf_append(session, + p, len, WT_RECNO_OOB, unpack.data, unpack.size, + unpack.type == WT_CELL_ADDR_LEAF ? + WT_ADDR_LEAF : WT_ADDR_LEAF_NO, rs)); + + first_cell = false; + break; + WT_ILLEGAL_VALUE_ERR(session); + } + } + +err: __wt_scr_free(session, &buf); + __wt_scr_free(session, &leafkey); + return (ret); +} + +/* + * __wt_bt_rebalance -- + * Rebalance the last checkpoint in the file. + */ +int +__wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_BTREE *btree; + WT_DECL_RET; + WT_RSTUFF *rs, _rstuff; + bool evict_reset; + + WT_UNUSED(cfg); + + btree = S2BT(session); + + WT_CLEAR(_rstuff); + rs = &_rstuff; + + WT_ERR(__wt_scr_alloc(session, 0, &rs->tmp1)); + WT_ERR(__wt_scr_alloc(session, 0, &rs->tmp2)); + + /* Set the internal page tree type. */ + rs->type = btree->root.page->type; + + /* + * Get exclusive access to the file. (Not required, the only page in the + * cache is the root page, and that cannot be evicted; however, this way + * eviction ignores the tree entirely.) + */ + WT_ERR(__wt_evict_file_exclusive_on(session, &evict_reset)); + + /* Recursively walk the tree. */ + switch (rs->type) { + case WT_PAGE_ROW_INT: + WT_ERR( + __rebalance_row_walk(session, btree->root.page->dsk, rs)); + break; + case WT_PAGE_COL_INT: + WT_ERR( + __rebalance_col_walk(session, btree->root.page->dsk, rs)); + break; + WT_ILLEGAL_VALUE_ERR(session); + } + + /* Build a new root page. */ + WT_ERR(__rebalance_internal(session, rs)); + + /* + * Schedule the free of the original blocks (they shouldn't actually be + * freed until the next checkpoint completes). + */ + WT_ERR(__rebalance_free_original(session, rs)); + + /* + * Swap the old root page for our newly built root page, writing the new + * root page as part of a checkpoint will finish the rebalance. + */ + __wt_page_out(session, &btree->root.page); + btree->root.page = rs->root; + rs->root = NULL; + +err: /* Discard any leftover root page we created. */ + if (rs->root != NULL) { + __wt_page_modify_clear(session, rs->root); + __wt_page_out(session, &rs->root); + } + + /* Discard any leftover leaf and internal page information. */ + __rebalance_discard(session, rs); + + __wt_scr_free(session, &rs->tmp1); + __wt_scr_free(session, &rs->tmp2); + + return (ret); +} diff --git a/src/config/config_def.c b/src/config/config_def.c index 9d12e953498..71131e567fd 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -151,9 +151,9 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"reconcile\",\"recovery\",\"salvage\",\"shared_cache\"," - "\"split\",\"temporary\",\"transaction\",\"verify\",\"version\"," - "\"write\"]", + "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -563,9 +563,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"reconcile\",\"recovery\",\"salvage\",\"shared_cache\"," - "\"split\",\"temporary\",\"transaction\",\"verify\",\"version\"," - "\"write\"]", + "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { "write_through", "list", NULL, "choices=[\"data\",\"log\"]", @@ -643,9 +643,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"reconcile\",\"recovery\",\"salvage\",\"shared_cache\"," - "\"split\",\"temporary\",\"transaction\",\"verify\",\"version\"," - "\"write\"]", + "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { "write_through", "list", @@ -718,9 +718,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"reconcile\",\"recovery\",\"salvage\",\"shared_cache\"," - "\"split\",\"temporary\",\"transaction\",\"verify\",\"version\"," - "\"write\"]", + "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { "write_through", "list", @@ -793,9 +793,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"reconcile\",\"recovery\",\"salvage\",\"shared_cache\"," - "\"split\",\"temporary\",\"transaction\",\"verify\",\"version\"," - "\"write\"]", + "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { "write_through", "list", NULL, "choices=[\"data\",\"log\"]", @@ -926,6 +926,10 @@ static const WT_CONFIG_ENTRY config_entries[] = { "skip_sort_check=0,statistics=,target=", confchk_WT_SESSION_open_cursor, 12 }, + { "WT_SESSION.rebalance", + "", + NULL, 0 + }, { "WT_SESSION.reconfigure", "isolation=read-committed", confchk_WT_SESSION_reconfigure, 1 diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index bd14e1bf4fd..aa8cf8a3ec9 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1605,6 +1605,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) { "mutex", WT_VERB_MUTEX }, { "overflow", WT_VERB_OVERFLOW }, { "read", WT_VERB_READ }, + { "rebalance", WT_VERB_REBALANCE }, { "reconcile", WT_VERB_RECONCILE }, { "recovery", WT_VERB_RECOVERY }, { "salvage", WT_VERB_SALVAGE }, diff --git a/src/include/btree.h b/src/include/btree.h index a1d8e395cfc..04cb561cb32 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -153,16 +153,18 @@ struct __wt_btree { #define WT_BTREE_NO_CHECKPOINT 0x00800 /* Disable checkpoints */ #define WT_BTREE_NO_EVICTION 0x01000 /* Disable eviction */ #define WT_BTREE_NO_LOGGING 0x02000 /* Disable logging */ -#define WT_BTREE_SALVAGE 0x04000 /* Handle is for salvage */ -#define WT_BTREE_SKIP_CKPT 0x08000 /* Handle skipped checkpoint */ -#define WT_BTREE_UPGRADE 0x10000 /* Handle is for upgrade */ -#define WT_BTREE_VERIFY 0x20000 /* Handle is for verify */ +#define WT_BTREE_REBALANCE 0x04000 /* Handle is for rebalance */ +#define WT_BTREE_SALVAGE 0x08000 /* Handle is for salvage */ +#define WT_BTREE_SKIP_CKPT 0x10000 /* Handle skipped checkpoint */ +#define WT_BTREE_UPGRADE 0x20000 /* Handle is for upgrade */ +#define WT_BTREE_VERIFY 0x40000 /* Handle is for verify */ uint32_t flags; }; /* Flags that make a btree handle special (not for normal use). */ #define WT_BTREE_SPECIAL_FLAGS \ - (WT_BTREE_BULK | WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY) + (WT_BTREE_BULK | WT_BTREE_REBALANCE | \ + WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY) /* * WT_SALVAGE_COOKIE -- diff --git a/src/include/config.h b/src/include/config.h index e836abaccba..a960ab2e8d6 100644 --- a/src/include/config.h +++ b/src/include/config.h @@ -72,25 +72,26 @@ struct __wt_config_parser_impl { #define WT_CONFIG_ENTRY_WT_SESSION_log_flush 20 #define WT_CONFIG_ENTRY_WT_SESSION_log_printf 21 #define WT_CONFIG_ENTRY_WT_SESSION_open_cursor 22 -#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 23 -#define WT_CONFIG_ENTRY_WT_SESSION_rename 24 -#define WT_CONFIG_ENTRY_WT_SESSION_reset 25 -#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 26 -#define WT_CONFIG_ENTRY_WT_SESSION_salvage 27 -#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 28 -#define WT_CONFIG_ENTRY_WT_SESSION_strerror 29 -#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 30 -#define WT_CONFIG_ENTRY_WT_SESSION_truncate 31 -#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 32 -#define WT_CONFIG_ENTRY_WT_SESSION_verify 33 -#define WT_CONFIG_ENTRY_colgroup_meta 34 -#define WT_CONFIG_ENTRY_file_meta 35 -#define WT_CONFIG_ENTRY_index_meta 36 -#define WT_CONFIG_ENTRY_table_meta 37 -#define WT_CONFIG_ENTRY_wiredtiger_open 38 -#define WT_CONFIG_ENTRY_wiredtiger_open_all 39 -#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 40 -#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 41 +#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 23 +#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 24 +#define WT_CONFIG_ENTRY_WT_SESSION_rename 25 +#define WT_CONFIG_ENTRY_WT_SESSION_reset 26 +#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 27 +#define WT_CONFIG_ENTRY_WT_SESSION_salvage 28 +#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 29 +#define WT_CONFIG_ENTRY_WT_SESSION_strerror 30 +#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 31 +#define WT_CONFIG_ENTRY_WT_SESSION_truncate 32 +#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 33 +#define WT_CONFIG_ENTRY_WT_SESSION_verify 34 +#define WT_CONFIG_ENTRY_colgroup_meta 35 +#define WT_CONFIG_ENTRY_file_meta 36 +#define WT_CONFIG_ENTRY_index_meta 37 +#define WT_CONFIG_ENTRY_table_meta 38 +#define WT_CONFIG_ENTRY_wiredtiger_open 39 +#define WT_CONFIG_ENTRY_wiredtiger_open_all 40 +#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 41 +#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 42 /* * configuration section: END * DO NOT EDIT: automatically built by dist/flags.py. diff --git a/src/include/extern.h b/src/include/extern.h index 948aba20997..70eedb64373 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -139,6 +139,7 @@ extern const char *__wt_page_type_string(u_int type); extern const char *__wt_cell_type_string(uint8_t type); extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf); extern const char *__wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_ITEM *buf); +extern const char *__wt_buf_set_printable( WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf); extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store); extern int __wt_ovfl_cache(WT_SESSION_IMPL *session, WT_PAGE *page, void *cookie, WT_CELL_UNPACK *vpack); extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell); @@ -151,6 +152,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags , const char *file, int line #endif ); +extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd); extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]); extern void __wt_split_stash_discard(WT_SESSION_IMPL *session); diff --git a/src/include/flags.h b/src/include/flags.h index bafff92fbc0..e9d29b8ea0e 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -85,16 +85,17 @@ #define WT_VERB_MUTEX 0x00000800 #define WT_VERB_OVERFLOW 0x00001000 #define WT_VERB_READ 0x00002000 -#define WT_VERB_RECONCILE 0x00004000 -#define WT_VERB_RECOVERY 0x00008000 -#define WT_VERB_SALVAGE 0x00010000 -#define WT_VERB_SHARED_CACHE 0x00020000 -#define WT_VERB_SPLIT 0x00040000 -#define WT_VERB_TEMPORARY 0x00080000 -#define WT_VERB_TRANSACTION 0x00100000 -#define WT_VERB_VERIFY 0x00200000 -#define WT_VERB_VERSION 0x00400000 -#define WT_VERB_WRITE 0x00800000 +#define WT_VERB_REBALANCE 0x00004000 +#define WT_VERB_RECONCILE 0x00008000 +#define WT_VERB_RECOVERY 0x00010000 +#define WT_VERB_SALVAGE 0x00020000 +#define WT_VERB_SHARED_CACHE 0x00040000 +#define WT_VERB_SPLIT 0x00080000 +#define WT_VERB_TEMPORARY 0x00100000 +#define WT_VERB_TRANSACTION 0x00200000 +#define WT_VERB_VERIFY 0x00400000 +#define WT_VERB_VERSION 0x00800000 +#define WT_VERB_WRITE 0x01000000 #define WT_VISIBILITY_ERR 0x00000010 /* * flags section: END diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index bdd8bb65910..fcd78d572a4 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1329,6 +1329,19 @@ struct __wt_session { int __F(log_printf)(WT_SESSION *session, const char *fmt, ...); /*! + * Rebalance a table + * + * @snippet ex_all.c Rebalance a table + * + * @param session the session handle + * @param uri the current URI of the object, such as \c "table:mytable" + * @configempty{WT_SESSION.rebalance, see dist/api_data.py} + * @ebusy_errors + */ + int __F(rebalance)( + WT_SESSION *session, const char *uri, const char *config); + + /*! * Rename an object. * * @snippet ex_all.c Rename a table @@ -1920,9 +1933,10 @@ struct __wt_connection { * "block"\, \c "checkpoint"\, \c "compact"\, \c "evict"\, \c * "evictserver"\, \c "fileops"\, \c "log"\, \c "lsm"\, \c * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c - * "read"\, \c "reconcile"\, \c "recovery"\, \c "salvage"\, \c - * "shared_cache"\, \c "split"\, \c "temporary"\, \c "transaction"\, \c - * "verify"\, \c "version"\, \c "write"; default empty.} + * "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c + * "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c + * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default + * empty.} * @configend * @errors */ @@ -2405,9 +2419,9 @@ struct __wt_connection { * values chosen from the following options: \c "api"\, \c "block"\, \c * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evictserver"\, \c "fileops"\, * \c "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c - * "overflow"\, \c "read"\, \c "reconcile"\, \c "recovery"\, \c "salvage"\, \c - * "shared_cache"\, \c "split"\, \c "temporary"\, \c "transaction"\, \c - * "verify"\, \c "version"\, \c "write"; default empty.} + * "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c + * "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c + * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.} * @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to * files. Ignored on non-Windows systems. Options are given as a list\, such * as <code>"write_through=[data]"</code>. Configuring \c write_through requires diff --git a/src/session/session_api.c b/src/session/session_api.c index f0d0f26db54..da24f40cfb0 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -554,6 +554,32 @@ err: API_END_RET(session, ret); } /* + * __session_rebalance -- + * WT_SESSION->rebalance method. + */ +static int +__session_rebalance(WT_SESSION *wt_session, const char *uri, const char *config) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + + SESSION_API_CALL(session, rebalance, config, cfg); + + if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) + WT_ERR(ENOTSUP); + + /* Block out checkpoints to avoid spurious EBUSY errors. */ + WT_WITH_CHECKPOINT_LOCK(session, + WT_WITH_SCHEMA_LOCK(session, ret = + __wt_schema_worker(session, uri, __wt_bt_rebalance, + NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_REBALANCE))); + +err: API_END_RET_NOTFOUND_MAP(session, ret); +} + +/* * __session_rename -- * WT_SESSION->rename method. */ @@ -1287,6 +1313,7 @@ __open_session(WT_CONNECTION_IMPL *conn, __session_join, __session_log_flush, __session_log_printf, + __session_rebalance, __session_rename, __session_reset, __session_salvage, diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index bc1537ca878..fd1cea5564d 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -1037,12 +1037,13 @@ nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT); "for a bulk-loaded file"); fake_ckpt = true; goto fake; + case WT_BTREE_REBALANCE: case WT_BTREE_SALVAGE: case WT_BTREE_UPGRADE: case WT_BTREE_VERIFY: WT_ERR_MSG(session, EINVAL, - "checkpoints are blocked during salvage, upgrade " - "or verify operations"); + "checkpoints are blocked during rebalance, " + "salvage, upgrade or verify operations"); } /* diff --git a/src/utilities/util.h b/src/utilities/util.h index 08d0537956f..b9399c33e78 100644 --- a/src/utilities/util.h +++ b/src/utilities/util.h @@ -42,6 +42,7 @@ char *util_name(WT_SESSION *, const char *, const char *); int util_printlog(WT_SESSION *, int, char *[]); int util_read(WT_SESSION *, int, char *[]); int util_read_line(WT_SESSION *, ULINE *, bool, bool *); +int util_rebalance(WT_SESSION *, int, char *[]); int util_rename(WT_SESSION *, int, char *[]); int util_salvage(WT_SESSION *, int, char *[]); int util_stat(WT_SESSION *, int, char *[]); diff --git a/src/utilities/util_main.c b/src/utilities/util_main.c index 3b7187bd0de..8e0bb8df448 100644 --- a/src/utilities/util_main.c +++ b/src/utilities/util_main.c @@ -159,6 +159,8 @@ main(int argc, char *argv[]) case 'r': if (strcmp(command, "read") == 0) func = util_read; + else if (strcmp(command, "rebalance") == 0) + func = util_rebalance; else if (strcmp(command, "rename") == 0) func = util_rename; break; @@ -259,9 +261,10 @@ usage(void) "\t" "dump\t dump an object\n" "\t" "list\t list database objects\n" "\t" "load\t load an object\n" - "\t" "loadtext\t load an object from a text file\n" + "\t" "loadtext load an object from a text file\n" "\t" "printlog display the database log\n" "\t" "read\t read values from an object\n" + "\t" "rebalance rebalance an object\n" "\t" "rename\t rename an object\n" "\t" "salvage\t salvage a file\n" "\t" "stat\t display statistics for an object\n" diff --git a/src/utilities/util_rebalance.c b/src/utilities/util_rebalance.c new file mode 100644 index 00000000000..671afc19cdc --- /dev/null +++ b/src/utilities/util_rebalance.c @@ -0,0 +1,63 @@ +/*- + * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "util.h" + +static int usage(void); + +int +util_rebalance(WT_SESSION *session, int argc, char *argv[]) +{ + WT_DECL_RET; + int ch; + char *name; + + name = NULL; + while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF) + switch (ch) { + case '?': + default: + return (usage()); + } + argc -= __wt_optind; + argv += __wt_optind; + + /* The remaining argument is the table name. */ + if (argc != 1) + return (usage()); + if ((name = util_name(session, *argv, "table")) == NULL) + return (1); + + if ((ret = session->rebalance(session, name, NULL)) != 0) { + fprintf(stderr, "%s: rebalance(%s): %s\n", + progname, name, session->strerror(session, ret)); + goto err; + } + + /* Verbose configures a progress counter, move to the next line. */ + if (verbose) + printf("\n"); + + if (0) { +err: ret = 1; + } + + free(name); + + return (ret); +} + +static int +usage(void) +{ + (void)fprintf(stderr, + "usage: %s %s " + "rebalance uri\n", + progname, usage_prefix); + return (1); +} diff --git a/test/format/Makefile.am b/test/format/Makefile.am index 17cbd53222b..8a2e2b49e4b 100644 --- a/test/format/Makefile.am +++ b/test/format/Makefile.am @@ -9,7 +9,7 @@ noinst_PROGRAMS = t noinst_SCRIPTS = s_dumpcmp t_SOURCES =\ config.h format.h backup.c bulk.c compact.c config.c lrt.c ops.c \ - salvage.c t.c util.c wts.c + rebalance.c salvage.c t.c util.c wts.c if HAVE_BERKELEY_DB t_SOURCES += bdb.c diff --git a/test/format/config.c b/test/format/config.c index 7aa4575efbd..8651aa9d086 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -331,6 +331,8 @@ config_in_memory(void) g.c_compression = 0; if (!config_is_perm("logging")) g.c_logging = 0; + if (!config_is_perm("rebalance")) + g.c_rebalance = 0; if (!config_is_perm("salvage")) g.c_salvage = 0; if (!config_is_perm("verify")) diff --git a/test/format/config.h b/test/format/config.h index a259217690c..6f97af66719 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -263,8 +263,12 @@ static CONFIG c[] = { "the number of runs", C_IGNORE, 0, UINT_MAX, UINT_MAX, &g.c_runs, NULL }, + { "rebalance", + "rebalance testing", /* 100% */ + C_BOOL, 100, 1, 0, &g.c_rebalance, NULL }, + { "salvage", - "verify integrity via salvage", /* 100% */ + "salvage testing", /* 100% */ C_BOOL, 100, 1, 0, &g.c_salvage, NULL }, { "split_pct", diff --git a/test/format/format.h b/test/format/format.h index 88300c28292..e15de473e59 100644 --- a/test/format/format.h +++ b/test/format/format.h @@ -122,6 +122,8 @@ typedef struct { char *helium_mount; /* Helium volume */ + char *wiredtiger_open_config; /* Database open config */ + #ifdef HAVE_BERKELEY_DB void *bdb; /* BDB comparison handle */ void *dbc; /* BDB cursor handle */ @@ -216,6 +218,7 @@ typedef struct { uint32_t c_reverse; uint32_t c_rows; uint32_t c_runs; + uint32_t c_rebalance; uint32_t c_salvage; uint32_t c_split_pct; uint32_t c_statistics; @@ -328,6 +331,8 @@ void wts_load(void); void wts_open(const char *, int, WT_CONNECTION **); void wts_ops(int); void wts_read_scan(void); +void wts_rebalance(void); +void wts_reopen(void); void wts_salvage(void); void wts_stats(void); void wts_verify(const char *); diff --git a/test/format/rebalance.c b/test/format/rebalance.c new file mode 100644 index 00000000000..477368c36db --- /dev/null +++ b/test/format/rebalance.c @@ -0,0 +1,84 @@ +/*- + * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "format.h" + +void +wts_rebalance(void) +{ + WT_CONNECTION *conn; + WT_SESSION *session; + int ret; + char cmd[1024]; + + if (g.c_rebalance == 0) + return; + + track("rebalance", 0ULL, NULL); + + /* Dump the current object. */ + (void)snprintf(cmd, sizeof(cmd), + "../../wt -h %s dump -f %s/rebalance.orig %s", + g.home, g.home, g.uri); + if ((ret = system(cmd)) != 0) + die(ret, "command failed: %s", cmd); + + /* Rebalance, then verify the object. */ + wts_reopen(); + conn = g.wts_conn; + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "connection.open_session"); + if (g.logging != 0) + (void)g.wt_api->msg_printf(g.wt_api, session, + "=============== rebalance start ==============="); + + if ((ret = session->rebalance(session, g.uri, NULL)) != 0) + die(ret, "session.rebalance: %s: %s", g.uri); + + if (g.logging != 0) + (void)g.wt_api->msg_printf(g.wt_api, session, + "=============== rebalance stop ==============="); + if ((ret = session->close(session, NULL)) != 0) + die(ret, "session.close"); + + wts_verify("post-rebalance verify"); + wts_close(); + + (void)snprintf(cmd, sizeof(cmd), + "../../wt -h %s dump -f %s/rebalance.new %s", + g.home, g.home, g.uri); + if ((ret = system(cmd)) != 0) + die(ret, "command failed: %s", cmd); + + /* Compare the old/new versions of the object. */ + (void)snprintf(cmd, sizeof(cmd), + "cmp %s/rebalance.orig %s/rebalance.new > /dev/null", + g.home, g.home); + if ((ret = system(cmd)) != 0) + die(ret, "command failed: %s", cmd); +} diff --git a/test/format/t.c b/test/format/t.c index 603706e0ba1..9687c3c6dba 100644 --- a/test/format/t.c +++ b/test/format/t.c @@ -238,6 +238,11 @@ main(int argc, char *argv[]) wts_close(); /* + * Rebalance testing. + */ + wts_rebalance(); + + /* * If single-threaded, we can dump and compare the WiredTiger * and Berkeley DB data sets. */ diff --git a/test/format/wts.c b/test/format/wts.c index c1ea469d7b5..98cb2daa7b9 100644 --- a/test/format/wts.c +++ b/test/format/wts.c @@ -132,12 +132,16 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp) { WT_CONNECTION *conn; int ret; - char config[4096], *end, *p; + char *config, *end, *p; *connp = NULL; - p = config; - end = config + sizeof(config); +#define WIREDTIGER_OPEN_CONFIG_LEN (4 * 1024) + if ((g.wiredtiger_open_config = + calloc(WIREDTIGER_OPEN_CONFIG_LEN, sizeof(char))) == NULL) + die(ENOMEM, "calloc"); + config = p = g.wiredtiger_open_config; + end = config + WIREDTIGER_OPEN_CONFIG_LEN; p += snprintf(p, REMAIN(p, end), "create,checkpoint_sync=false,cache_size=%" PRIu32 "MB", @@ -273,6 +277,20 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp) } /* + * wts_reopen -- + * Re-open a connection to a WiredTiger database. + */ +void +wts_reopen(void) +{ + int ret; + + if ((ret = wiredtiger_open(g.home, + &event_handler, g.wiredtiger_open_config, &g.wts_conn)) != 0) + die(ret, "wiredtiger_open: %s", g.home); +} + +/* * wts_create -- * Create the underlying store. */ @@ -452,6 +470,8 @@ wts_close(void) if ((ret = conn->close(conn, config)) != 0) die(ret, "connection.close"); + g.wts_conn = NULL; + g.wt_api = NULL; } void |