summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src
diff options
context:
space:
mode:
authorDan Pasette <dan@10gen.com>2015-02-04 06:48:51 -0500
committerDan Pasette <dan@mongodb.com>2015-02-04 06:55:52 -0500
commit7d9ec251cf0e70bc0f9bb246aacfb6e62226ad37 (patch)
tree5b436359112bc5610dcf9fd1e1f7331854b388d6 /src/third_party/wiredtiger/src
parentfc14926f9c8256edce8bbd15d439ca34667c6ebb (diff)
downloadmongo-7d9ec251cf0e70bc0f9bb246aacfb6e62226ad37.tar.gz
Import wiredtiger-wiredtiger-mongodb-2.8-rc7-105-g92325a3.tar.gz from wiredtiger branch mongodb-2.8
Diffstat (limited to 'src/third_party/wiredtiger/src')
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c16
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c84
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_huffman.c27
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c5
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c12
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c172
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_stat.c7
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c7
-rw-r--r--src/third_party/wiredtiger/src/btree/col_srch.c1
-rw-r--r--src/third_party/wiredtiger/src/btree/row_srch.c3
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c11
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache_pool.c16
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_sweep.c8
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_file.c42
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c7
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c25
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h21
-rw-r--r--src/third_party/wiredtiger/src/include/btree.h23
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i203
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h5
-rw-r--r--src/third_party/wiredtiger/src/include/cache.i2
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h2
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.i5
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h20
-rw-r--r--src/third_party/wiredtiger/src/include/gcc.h8
-rw-r--r--src/third_party/wiredtiger/src/include/lint.h9
-rw-r--r--src/third_party/wiredtiger/src/include/log.h4
-rw-r--r--src/third_party/wiredtiger/src/include/lsm.h4
-rw-r--r--src/third_party/wiredtiger/src/include/msvc.h15
-rw-r--r--src/third_party/wiredtiger/src/include/mutex.h8
-rw-r--r--src/third_party/wiredtiger/src/include/serial.i6
-rw-r--r--src/third_party/wiredtiger/src/include/session.h4
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h4
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h4
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_track.c41
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c27
37 files changed, 492 insertions, 372 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 704b258a7dd..1960e4605ef 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -782,7 +782,9 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
WT_RET(__cursor_func_init(cbt, 1));
- WT_ERR(__wt_row_random(session, cbt));
+ WT_WITH_PAGE_INDEX(session,
+ ret = __wt_row_random(session, cbt));
+ WT_ERR(ret);
if (__cursor_valid(cbt, &upd))
WT_ERR(__wt_kv_return(session, cbt, upd));
else
@@ -948,7 +950,11 @@ __cursor_truncate(WT_SESSION_IMPL *session,
} else {
do {
WT_RET(__wt_btcur_remove(start));
- for (;;) {
+ /*
+ * Reset ret each time through so that we don't loop
+ * forever in the cursor equals case.
+ */
+ for (ret = 0;;) {
if (stop != NULL &&
__cursor_equals(start, stop))
break;
@@ -1009,7 +1015,11 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session,
} else {
do {
WT_RET(__wt_btcur_remove(start));
- for (;;) {
+ /*
+ * Reset ret each time through so that we don't loop
+ * forever in the cursor equals case.
+ */
+ for (ret = 0;;) {
if (stop != NULL &&
__cursor_equals(start, stop))
break;
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index af9f6a669f2..e84a63695f9 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -48,7 +48,7 @@ static void __debug_row_skip(WT_DBG *, WT_INSERT_HEAD *);
static int __debug_tree(WT_SESSION_IMPL *, WT_PAGE *, const char *, uint32_t);
static void __debug_update(WT_DBG *, WT_UPDATE *, int);
static void __dmsg(WT_DBG *, const char *, ...)
- WT_GCC_ATTRIBUTE((format (printf, 2, 3)));
+ WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3)));
static void __dmsg_wrapup(WT_DBG *);
/*
@@ -548,7 +548,9 @@ __debug_page(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
session = ds->session;
/* Dump the page metadata. */
- WT_RET(__debug_page_metadata(ds, page));
+ WT_WITH_PAGE_INDEX(session,
+ ret = __debug_page_metadata(ds, page));
+ WT_RET(ret);
/* Dump the page. */
switch (page->type) {
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index f0414c4e855..b47c9c897a6 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -12,7 +12,7 @@ static int __btree_conf(WT_SESSION_IMPL *, WT_CKPT *ckpt);
static int __btree_get_last_recno(WT_SESSION_IMPL *);
static int __btree_page_sizes(WT_SESSION_IMPL *);
static int __btree_preload(WT_SESSION_IMPL *);
-static int __btree_tree_open_empty(WT_SESSION_IMPL *, int, int);
+static int __btree_tree_open_empty(WT_SESSION_IMPL *, int);
/*
* __wt_btree_open --
@@ -100,8 +100,7 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
ckpt.raw.data, ckpt.raw.size,
root_addr, &root_addr_size, readonly));
if (creation || root_addr_size == 0)
- WT_ERR(__btree_tree_open_empty(
- session, creation, readonly));
+ WT_ERR(__btree_tree_open_empty(session, creation));
else {
WT_ERR(__wt_btree_tree_open(
session, root_addr, root_addr_size));
@@ -391,16 +390,17 @@ err: __wt_buf_free(session, &dsk);
* Create an empty in-memory tree.
*/
static int
-__btree_tree_open_empty(WT_SESSION_IMPL *session, int creation, int readonly)
+__btree_tree_open_empty(WT_SESSION_IMPL *session, int creation)
{
WT_BTREE *btree;
WT_DECL_RET;
- WT_PAGE *root, *leaf;
+ WT_PAGE *leaf, *root;
WT_PAGE_INDEX *pindex;
WT_REF *ref;
btree = S2BT(session);
root = leaf = NULL;
+ ref = NULL;
/*
* Newly created objects can be used for cursor inserts or for bulk
@@ -414,13 +414,10 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, int creation, int readonly)
}
/*
- * A note about empty trees: the initial tree is a root page and a leaf
- * page. We need a pair of pages instead of just a single page because
- * we can reconcile the leaf page while the root stays pinned in memory.
- * If the pair is evicted without being modified, that's OK, nothing is
- * ever written.
- *
- * Create the root and leaf pages.
+ * A note about empty trees: the initial tree is a single root page.
+ * It has a single reference to a leaf page, marked deleted. The leaf
+ * page will be created by the first update. If the root is evicted
+ * without being modified, that's OK, nothing is ever written.
*
* !!!
* Be cautious about changing the order of updates in this code: to call
@@ -437,10 +434,9 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, int creation, int readonly)
pindex = WT_INTL_INDEX_COPY(root);
ref = pindex->index[0];
ref->home = root;
- WT_ERR(__wt_btree_new_leaf_page(session, &leaf));
- ref->page = leaf;
+ ref->page = NULL;
ref->addr = NULL;
- ref->state = WT_REF_MEM;
+ ref->state = WT_REF_DELETED;
ref->key.recno = 1;
break;
case BTREE_ROW:
@@ -451,48 +447,20 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, int creation, int readonly)
pindex = WT_INTL_INDEX_COPY(root);
ref = pindex->index[0];
ref->home = root;
- WT_ERR(__wt_btree_new_leaf_page(session, &leaf));
- ref->page = leaf;
+ ref->page = NULL;
ref->addr = NULL;
- ref->state = WT_REF_MEM;
+ ref->state = WT_REF_DELETED;
WT_ERR(__wt_row_ikey_incr(
session, root, 0, "", 1, &ref->key.ikey));
break;
WT_ILLEGAL_VALUE_ERR(session);
}
- /*
- * Mark the leaf page dirty: we didn't create an entirely valid root
- * page (specifically, the root page's disk address isn't set, and it's
- * the act of reconciling the leaf page that makes it work, we don't
- * try and use the original disk address of modified pages). We could
- * get around that by leaving the leaf page clean and building a better
- * root page, but then we get into trouble because a checkpoint marks
- * the root page dirty to force a write, and without reconciling the
- * leaf page we won't realize there's no records to write, we'll write
- * a root page, which isn't correct for an empty tree.
- *
- * Earlier versions of this code kept the leaf page clean, but with the
- * "empty" flag set in the leaf page's modification structure; in that
- * case, checkpoints works (forced reconciliation of a root with a
- * single "empty" page wouldn't write any blocks). That version had
- * memory leaks because the eviction code didn't correctly handle pages
- * that were "clean" (and so never reconciled), yet "modified" with an
- * "empty" flag. The goal of this code is to mimic a real tree that
- * simply has no records, for whatever reason, and trust reconciliation
- * to figure out it's empty and not write any blocks.
- *
- * We do not set the tree's modified flag because the checkpoint code
- * skips unmodified files in closing checkpoints (checkpoints that
- * don't require a write unless the file is actually dirty). There's
- * no need to reconcile this file unless the application does a real
- * checkpoint or it's actually modified.
- *
- * Only do this for a live tree, not for checkpoints. If we open an
- * empty checkpoint, the leaf page cannot be dirty or eviction may try
- * to write it, which will fail because checkpoints are read-only.
- */
- if (!readonly) {
+ /* Bulk loads require a leaf page for reconciliation: create it now. */
+ if (F_ISSET(btree, WT_BTREE_BULK)) {
+ WT_ERR(__wt_btree_new_leaf_page(session, &leaf));
+ ref->page = leaf;
+ ref->state = WT_REF_MEM;
WT_ERR(__wt_page_modify_init(session, leaf));
__wt_page_only_modify_set(session, leaf);
}
@@ -676,6 +644,22 @@ __btree_page_sizes(WT_SESSION_IMPL *session)
leaf_split_size = __wt_split_page_size(btree, btree->maxleafpage);
/*
+ * In-memory split configuration.
+ */
+ if (__wt_config_gets(
+ session, cfg, "split_deepen_min_child", &cval) == WT_NOTFOUND ||
+ cval.val == 0)
+ btree->split_deepen_min_child = WT_SPLIT_DEEPEN_MIN_CHILD_DEF;
+ else
+ btree->split_deepen_min_child = (u_int)cval.val;
+ if (__wt_config_gets(
+ session, cfg, "split_deepen_per_child", &cval) == WT_NOTFOUND ||
+ cval.val == 0)
+ btree->split_deepen_per_child = WT_SPLIT_DEEPEN_PER_CHILD_DEF;
+ else
+ btree->split_deepen_per_child = (u_int)cval.val;
+
+ /*
* Get the maximum internal/leaf page key/value sizes.
*
* In historic versions of WiredTiger, the maximum internal/leaf page
diff --git a/src/third_party/wiredtiger/src/btree/bt_huffman.c b/src/third_party/wiredtiger/src/btree/bt_huffman.c
index c1cf3431c3b..c31b3f2fdf1 100644
--- a/src/third_party/wiredtiger/src/btree/bt_huffman.c
+++ b/src/third_party/wiredtiger/src/btree/bt_huffman.c
@@ -128,6 +128,30 @@ static const struct __wt_huffman_table __wt_huffman_nytenglish[] = {
static int __wt_huffman_read(WT_SESSION_IMPL *,
WT_CONFIG_ITEM *, struct __wt_huffman_table **, u_int *, u_int *);
+#define WT_HUFFMAN_CONFIG_VALID(str, len) \
+ (WT_STRING_CASE_MATCH("english", (str), (len)) || \
+ WT_PREFIX_MATCH((str), "utf8") || WT_PREFIX_MATCH((str), "utf16"))
+
+/*
+ * __btree_huffman_config --
+ * Verify the key or value strings passed in.
+ */
+static int
+__btree_huffman_config(WT_SESSION_IMPL *session,
+ WT_CONFIG_ITEM *key_conf, WT_CONFIG_ITEM *value_conf)
+{
+ if (key_conf->len != 0 &&
+ !WT_HUFFMAN_CONFIG_VALID(key_conf->str, key_conf->len))
+ WT_RET_MSG(
+ session, EINVAL, "illegal Huffman key configuration");
+ if (value_conf->len != 0 &&
+ !WT_HUFFMAN_CONFIG_VALID(value_conf->str, value_conf->len))
+ WT_RET_MSG(
+ session, EINVAL, "illegal Huffman value configuration");
+ return (0);
+
+}
+
/*
* __wt_btree_huffman_open --
* Configure Huffman encoding for the tree.
@@ -150,6 +174,7 @@ __wt_btree_huffman_open(WT_SESSION_IMPL *session)
__wt_config_gets_none(session, cfg, "huffman_value", &value_conf));
if (key_conf.len == 0 && value_conf.len == 0)
return (0);
+ WT_RET(__btree_huffman_config(session, &key_conf, &value_conf));
switch (btree->type) { /* Check file type compatibility. */
case BTREE_COL_FIX:
@@ -311,6 +336,8 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
tp->frequency = (uint32_t)frequency;
}
+ if (ret == EOF)
+ ret = 0;
*entriesp = lineno - 1;
*tablep = table;
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index 561e1c19218..1e539b7caee 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -131,8 +131,8 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
force_attempts < 10 &&
__evict_force_check(session, page)) {
++force_attempts;
- if ((ret = __wt_page_release_busy(
- session, ref, flags)) == EBUSY) {
+ ret = __wt_page_release_evict(session, ref);
+ if (ret == EBUSY) {
/* If forced eviction fails, stall. */
ret = 0;
wait_cnt += 1000;
@@ -285,6 +285,7 @@ err: if ((pindex = WT_INTL_INDEX_COPY(page)) != NULL) {
/* Increment the cache statistics. */
__wt_cache_page_inmem_incr(session, page, size);
+ (void)WT_ATOMIC_ADD8(cache->bytes_read, size);
(void)WT_ATOMIC_ADD8(cache->pages_inmem, 1);
*pagep = page;
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index fbc3890f23b..1cf616a2f6b 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -294,12 +294,16 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[])
switch (ss->page_type) {
case WT_PAGE_COL_FIX:
case WT_PAGE_COL_VAR:
- WT_ERR(
- __slvg_col_build_internal(session, leaf_cnt, ss));
+ WT_WITH_PAGE_INDEX(session,
+ ret = __slvg_col_build_internal(
+ session, leaf_cnt, ss));
+ WT_ERR(ret);
break;
case WT_PAGE_ROW_LEAF:
- WT_ERR(
- __slvg_row_build_internal(session, leaf_cnt, ss));
+ WT_WITH_PAGE_INDEX(session,
+ ret = __slvg_row_build_internal(
+ session, leaf_cnt, ss));
+ WT_ERR(ret);
break;
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index e4fe51ea28f..05af1a2f885 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -8,17 +8,10 @@
#include "wt_internal.h"
-/*
- * Track allocation increments, matching the cache calculations, which add an
- * estimate of allocation overhead to every object.
- */
-#define WT_MEMSIZE_ADD(total, len) do { \
- total += (len); \
-} while (0)
-#define WT_MEMSIZE_TRANSFER(from_decr, to_incr, len) do { \
+#define WT_MEM_TRANSFER(from_decr, to_incr, len) do { \
size_t __len = (len); \
- WT_MEMSIZE_ADD(from_decr, __len); \
- WT_MEMSIZE_ADD(to_incr, __len); \
+ from_decr += __len; \
+ to_incr += __len; \
} while (0)
/*
@@ -49,7 +42,8 @@ __split_oldest_gen(WT_SESSION_IMPL *session)
* Add a new entry into the session's split stash list.
*/
static int
-__split_stash_add(WT_SESSION_IMPL *session, void *p, size_t len)
+__split_stash_add(
+ WT_SESSION_IMPL *session, uint64_t split_gen, void *p, size_t len)
{
WT_SPLIT_STASH *stash;
@@ -60,7 +54,7 @@ __split_stash_add(WT_SESSION_IMPL *session, void *p, size_t len)
session->split_stash_cnt + 1, &session->split_stash));
stash = session->split_stash + session->split_stash_cnt++;
- stash->split_gen = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1);
+ stash->split_gen = split_gen;
stash->p = p;
stash->len = len;
@@ -150,14 +144,14 @@ __wt_split_stash_discard_all(
* it to be freed otherwise.
*/
static int
-__split_safe_free(WT_SESSION_IMPL *session, int exclusive, void *p, size_t s)
+__split_safe_free(WT_SESSION_IMPL *session,
+ uint64_t split_gen, int exclusive, void *p, size_t s)
{
/*
* We have swapped something in a page: if we don't have exclusive
* access, check whether there are other threads in the same tree.
*/
- if (!exclusive &&
- __split_oldest_gen(session) == S2C(session)->split_gen + 1)
+ if (!exclusive && __split_oldest_gen(session) > split_gen)
exclusive = 1;
if (exclusive) {
@@ -165,17 +159,10 @@ __split_safe_free(WT_SESSION_IMPL *session, int exclusive, void *p, size_t s)
return (0);
}
- return (__split_stash_add(session, p, s));
+ return (__split_stash_add(session, split_gen, p, s));
}
/*
- * Tuning; global variables to allow the binary to be patched, we don't yet have
- * any real understanding of what might be useful to surface to applications.
- */
-static u_int __split_deepen_min_child = 10000;
-static u_int __split_deepen_per_child = 100;
-
-/*
* __split_should_deepen --
* Return if we should deepen the tree.
*/
@@ -183,11 +170,13 @@ static int
__split_should_deepen(
WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *childrenp)
{
- WT_PAGE_INDEX *pindex;
+ WT_BTREE *btree;
WT_PAGE *page;
+ WT_PAGE_INDEX *pindex;
*childrenp = 0;
+ btree = S2BT(session);
page = ref->page;
pindex = WT_INTL_INDEX_COPY(page);
@@ -204,8 +193,8 @@ __split_should_deepen(
* we get a significant payback (in the case of a set of large keys,
* splitting won't help).
*/
- if (pindex->entries > __split_deepen_min_child) {
- *childrenp = pindex->entries / __split_deepen_per_child;
+ if (pindex->entries > btree->split_deepen_min_child) {
+ *childrenp = pindex->entries / btree->split_deepen_per_child;
return (1);
}
@@ -296,10 +285,9 @@ __split_ref_deepen_move(WT_SESSION_IMPL *session,
ref->key.ikey = ikey;
} else {
WT_RET(__split_ovfl_key_cleanup(session, parent, ref));
- WT_MEMSIZE_ADD(*parent_decrp,
- sizeof(WT_IKEY) + ikey->size);
+ *parent_decrp += sizeof(WT_IKEY) + ikey->size;
}
- WT_MEMSIZE_ADD(*child_incrp, sizeof(WT_IKEY) + ikey->size);
+ *child_incrp += sizeof(WT_IKEY) + ikey->size;
}
/*
@@ -323,7 +311,7 @@ __split_ref_deepen_move(WT_SESSION_IMPL *session,
}
/* And finally, the WT_REF itself. */
- WT_MEMSIZE_TRANSFER(*parent_decrp, *child_incrp, sizeof(WT_REF));
+ WT_MEM_TRANSFER(*parent_decrp, *child_incrp, sizeof(WT_REF));
return (0);
}
@@ -393,6 +381,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
WT_REF **alloc_refp;
WT_REF *child_ref, **child_refp, *parent_ref, **parent_refp, *ref;
size_t child_incr, parent_decr, parent_incr, size;
+ uint64_t split_gen;
uint32_t chunk, i, j, remain, slots;
int panic;
void *p;
@@ -432,7 +421,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
size = sizeof(WT_PAGE_INDEX) +
(children + SPLIT_CORRECT_2) * sizeof(WT_REF *);
WT_ERR(__wt_calloc(session, 1, size, &alloc_index));
- WT_MEMSIZE_ADD(parent_incr, size);
+ parent_incr += size;
alloc_index->index = (WT_REF **)(alloc_index + 1);
alloc_index->entries = children + SPLIT_CORRECT_2;
alloc_index->index[0] = pindex->index[0];
@@ -441,7 +430,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
for (alloc_refp = alloc_index->index + SPLIT_CORRECT_1,
i = 0; i < children; ++alloc_refp, ++i) {
WT_ERR(__wt_calloc_one(session, alloc_refp));
- WT_MEMSIZE_ADD(parent_incr, sizeof(WT_REF));
+ parent_incr += sizeof(WT_REF);
}
/* Allocate child pages, and connect them into the new page index. */
@@ -466,7 +455,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
__wt_ref_key(parent, *parent_refp, &p, &size);
WT_ERR(
__wt_row_ikey(session, 0, p, size, &ref->key.ikey));
- WT_MEMSIZE_ADD(parent_incr, sizeof(WT_IKEY) + size);
+ parent_incr += sizeof(WT_IKEY) + size;
} else
ref->key.recno = (*parent_refp)->key.recno;
ref->state = WT_REF_MEM;
@@ -527,6 +516,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
* needs to be paid.
*/
WT_INTL_INDEX_SET(parent, alloc_index);
+ split_gen = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1);
panic = 1;
#ifdef HAVE_DIAGNOSTIC
@@ -596,31 +586,14 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
* be using the new index.
*/
size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
- WT_ERR(__split_safe_free(session, 0, pindex, size));
- WT_MEMSIZE_ADD(parent_decr, size);
+ WT_ERR(__split_safe_free(session, split_gen, 0, pindex, size));
+ parent_decr += size;
-#if 0
/*
- * Adjust the parent's memory footprint. This may look odd, but we
- * have already taken the allocation overhead into account, and an
- * increment followed by a decrement will cancel out the normal
- * adjustment.
+ * Adjust the parent's memory footprint.
*/
__wt_cache_page_inmem_incr(session, parent, parent_incr);
__wt_cache_page_inmem_decr(session, parent, parent_decr);
-#else
- /*
- * XXX
- * The code to track page sizes is fundamentally flawed in the face of
- * splits: for example, we don't add in an overhead allocation constant
- * when allocating WT_REF structures as pages are created, but the
- * calculations during split assume that correction. For now, ignore
- * our carefully calculated values and force the internal page size to
- * 5% of its current value.
- */
- size = parent->memory_footprint - (parent->memory_footprint / 20);
- __wt_cache_page_inmem_decr(session, parent, size);
-#endif
if (0) {
err: __wt_free_ref_index(session, parent, alloc_index, 1);
@@ -753,11 +726,10 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
incr = 0;
/* In some cases, the underlying WT_REF has not yet been allocated. */
- if (*refp == NULL) {
+ if (*refp == NULL)
WT_RET(__wt_calloc_one(session, refp));
- WT_MEMSIZE_ADD(incr, sizeof(WT_REF));
- }
ref = *refp;
+ incr += sizeof(WT_REF);
/*
* Any parent reference must be filled in by our caller; the primary
@@ -790,7 +762,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
ikey = multi->key.ikey;
WT_RET(__wt_row_ikey(session, 0,
WT_IKEY_DATA(ikey), ikey->size, &ref->key.ikey));
- WT_MEMSIZE_ADD(incr, sizeof(WT_IKEY) + ikey->size);
+ incr += sizeof(WT_IKEY) + ikey->size;
break;
default:
ref->key.recno = multi->key.recno;
@@ -815,7 +787,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
static int
__split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
uint32_t new_entries, size_t parent_decr, size_t parent_incr,
- int exclusive, int ref_discard)
+ int exclusive, int ref_discard, uint64_t *split_genp)
{
WT_DECL_RET;
WT_IKEY *ikey;
@@ -823,6 +795,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
WT_PAGE_INDEX *alloc_index, *pindex;
WT_REF **alloc_refp, *next_ref, *parent_ref;
size_t size;
+ uint64_t split_gen;
uint32_t children, i, j;
uint32_t deleted_entries, parent_entries, result_entries;
int complete, hazard, locked;
@@ -902,7 +875,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
*/
size = sizeof(WT_PAGE_INDEX) + result_entries * sizeof(WT_REF *);
WT_ERR(__wt_calloc(session, 1, size, &alloc_index));
- WT_MEMSIZE_ADD(parent_incr, size);
+ parent_incr += size;
alloc_index->index = (WT_REF **)(alloc_index + 1);
alloc_index->entries = result_entries;
for (alloc_refp = alloc_index->index, i = 0; i < parent_entries; ++i) {
@@ -929,6 +902,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
* to threads descending the tree.
*/
WT_INTL_INDEX_SET(parent, alloc_index);
+ split_gen = *split_genp = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1);
alloc_index = NULL;
#ifdef HAVE_DIAGNOSTIC
@@ -975,8 +949,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
if (ikey != NULL) {
size = sizeof(WT_IKEY) + ikey->size;
WT_TRET(__split_safe_free(
- session, 0, ikey, size));
- WT_MEMSIZE_ADD(parent_decr, size);
+ session, split_gen, 0, ikey, size));
+ parent_decr += size;
}
/*
* The page_del structure can be freed
@@ -993,8 +967,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
}
WT_TRET(__split_safe_free(
- session, 0, next_ref, sizeof(WT_REF)));
- WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF));
+ session, split_gen, 0, next_ref, sizeof(WT_REF)));
+ parent_decr += sizeof(WT_REF);
}
}
@@ -1003,8 +977,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
* Add it to the session discard list, to be freed when it's safe.
*/
size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
- WT_TRET(__split_safe_free(session, exclusive, pindex, size));
- WT_MEMSIZE_ADD(parent_decr, size);
+ WT_TRET(__split_safe_free(session, split_gen, exclusive, pindex, size));
+ parent_decr += size;
/*
* Row-store trees where the old version of the page is being discarded:
@@ -1020,10 +994,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
WT_TRET(__split_ovfl_key_cleanup(session, parent, ref));
/*
- * Adjust the parent's memory footprint. This may look odd, but we
- * have already taken the allocation overhead into account, and an
- * increment followed by a decrement will cancel out the normal
- * adjustment.
+ * Adjust the parent's memory footprint.
*/
__wt_cache_page_inmem_incr(session, parent, parent_incr);
__wt_cache_page_inmem_decr(session, parent, parent_decr);
@@ -1061,8 +1032,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
*/
uint64_t __a, __b;
__a = parent->memory_footprint;
- WT_WITH_PAGE_INDEX(session,
- ret = __split_deepen(session, parent, children));
+ ret = __split_deepen(session, parent, children);
__b = parent->memory_footprint;
if (__b * 2 >= __a)
F_SET_ATOMIC(parent, WT_PAGE_REFUSE_DEEPEN);
@@ -1110,6 +1080,7 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
WT_PAGE *page, *right;
WT_REF *child, *split_ref[2] = { NULL, NULL };
size_t page_decr, parent_decr, parent_incr, right_incr;
+ uint64_t split_gen;
int i;
*splitp = 0;
@@ -1198,9 +1169,9 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
} else
WT_ERR(__wt_row_leaf_key(
session, page, &page->pg_row_d[0], key, 1));
-
WT_ERR(__wt_row_ikey(
session, 0, key->data, key->size, &child->key.ikey));
+ parent_incr += sizeof(WT_REF) + sizeof(WT_IKEY) + key->size;
__wt_scr_free(session, &key);
/*
@@ -1209,8 +1180,8 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
WT_ERR(__wt_page_alloc(session, WT_PAGE_ROW_LEAF, 0, 0, 0, &right));
WT_ERR(__wt_calloc_one(session, &right->pg_row_ins));
WT_ERR(__wt_calloc_one(session, &right->pg_row_ins[0]));
- WT_MEMSIZE_ADD(right_incr, sizeof(WT_INSERT_HEAD));
- WT_MEMSIZE_ADD(right_incr, sizeof(WT_INSERT_HEAD *));
+ right_incr += sizeof(WT_INSERT_HEAD);
+ right_incr += sizeof(WT_INSERT_HEAD *);
WT_ERR(__wt_calloc_one(session, &split_ref[1]));
child = split_ref[1];
@@ -1219,19 +1190,18 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
WT_ERR(__wt_row_ikey(session, 0,
WT_INSERT_KEY(moved_ins), WT_INSERT_KEY_SIZE(moved_ins),
&child->key.ikey));
+ parent_incr +=
+ sizeof(WT_REF) + sizeof(WT_IKEY) + WT_INSERT_KEY_SIZE(moved_ins);
/*
- * We're swapping WT_REFs in the parent, adjust the accounting, and
- * row store pages may have instantiated keys.
+ * After the split, we're going to discard the WT_REF, account for the
+ * change in memory footprint. Row store pages have keys that may be
+ * instantiated, check for that.
*/
- WT_MEMSIZE_ADD(parent_incr, sizeof(WT_REF));
- WT_MEMSIZE_ADD(
- parent_incr, sizeof(WT_IKEY) + WT_INSERT_KEY_SIZE(moved_ins));
- WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF));
+ parent_decr += sizeof(WT_REF);
if (page->type == WT_PAGE_ROW_LEAF || page->type == WT_PAGE_ROW_INT)
if ((ikey = __wt_ref_key_instantiated(ref)) != NULL)
- WT_MEMSIZE_ADD(
- parent_decr, sizeof(WT_IKEY) + ikey->size);
+ parent_decr += sizeof(WT_IKEY) + ikey->size;
/* The new page is dirty by definition. */
WT_ERR(__wt_page_modify_init(session, right));
@@ -1253,10 +1223,10 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
*/
for (i = 0; i < WT_SKIP_MAXDEPTH && ins_head->tail[i] == moved_ins; ++i)
;
- WT_MEMSIZE_TRANSFER(page_decr, right_incr, sizeof(WT_INSERT) +
+ WT_MEM_TRANSFER(page_decr, right_incr, sizeof(WT_INSERT) +
(size_t)i * sizeof(WT_INSERT *) + WT_INSERT_KEY_SIZE(moved_ins));
- WT_MEMSIZE_TRANSFER(page_decr, right_incr,
- __wt_update_list_memsize(moved_ins->upd));
+ WT_MEM_TRANSFER(
+ page_decr, right_incr, __wt_update_list_memsize(moved_ins->upd));
/*
* Allocation operations completed, move the last insert list item from
@@ -1349,7 +1319,12 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
*/
page->modify->inmem_split_txn = __wt_txn_new_id(session);
- /* Update the page accounting. */
+ /*
+ * Update the page accounting.
+ *
+ * XXX
+ * If we fail to split the parent, the page's accounting will be wrong.
+ */
__wt_cache_page_inmem_decr(session, page, page_decr);
__wt_cache_page_inmem_incr(session, right, right_incr);
@@ -1358,8 +1333,8 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
* longer locked, so we cannot safely look at it.
*/
page = NULL;
- if ((ret = __split_parent(
- session, ref, split_ref, 2, parent_decr, parent_incr, 0, 0)) != 0) {
+ if ((ret = __split_parent(session, ref, split_ref, 2,
+ parent_decr, parent_incr, 0, 0, &split_gen)) != 0) {
/*
* Move the insert list element back to the original page list.
* For simplicity, the previous skip list pointers originally
@@ -1396,8 +1371,8 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
*/
if (ikey != NULL)
WT_TRET(__split_safe_free(
- session, 0, ikey, sizeof(WT_IKEY) + ikey->size));
- WT_TRET(__split_safe_free(session, 0, ref, sizeof(WT_REF)));
+ session, split_gen, 0, ikey, sizeof(WT_IKEY) + ikey->size));
+ WT_TRET(__split_safe_free(session, split_gen, 0, ref, sizeof(WT_REF)));
/*
* A note on error handling: if we completed the split, return success,
@@ -1480,6 +1455,7 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
WT_PAGE_MODIFY *mod;
WT_REF **ref_new;
size_t parent_decr, parent_incr;
+ uint64_t split_gen;
uint32_t i, new_entries;
page = ref->page;
@@ -1503,15 +1479,14 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
* change in memory footprint. Row store pages have keys that may be
* instantiated, check for that.
*/
- WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF));
+ parent_decr += sizeof(WT_REF);
if (page->type == WT_PAGE_ROW_LEAF || page->type == WT_PAGE_ROW_INT)
if ((ikey = __wt_ref_key_instantiated(ref)) != NULL)
- WT_MEMSIZE_ADD(
- parent_decr, sizeof(WT_IKEY) + ikey->size);
+ parent_decr += sizeof(WT_IKEY) + ikey->size;
/* Split into the parent. */
- WT_ERR(__split_parent(session,
- ref, ref_new, new_entries, parent_decr, parent_incr, exclusive, 1));
+ WT_ERR(__split_parent(session, ref, ref_new, new_entries,
+ parent_decr, parent_incr, exclusive, 1, &split_gen));
__wt_free(session, ref_new);
@@ -1534,9 +1509,10 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
* safe.
*/
if (ikey != NULL)
- WT_TRET(__split_safe_free(
- session, exclusive, ikey, sizeof(WT_IKEY) + ikey->size));
- WT_TRET(__split_safe_free(session, exclusive, ref, sizeof(WT_REF)));
+ WT_TRET(__split_safe_free(session, split_gen, exclusive,
+ ikey, sizeof(WT_IKEY) + ikey->size));
+ WT_TRET(__split_safe_free(session, split_gen, exclusive,
+ ref, sizeof(WT_REF)));
/*
* A note on error handling: if we completed the split, return success,
diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c
index d9ff2a6af1e..b7108b52395 100644
--- a/src/third_party/wiredtiger/src/btree/bt_stat.c
+++ b/src/third_party/wiredtiger/src/btree/bt_stat.c
@@ -45,8 +45,11 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
next_walk = NULL;
while ((ret =
- __wt_tree_walk(session, &next_walk, 0)) == 0 && next_walk != NULL)
- WT_RET(__stat_page(session, next_walk->page, stats));
+ __wt_tree_walk(session, &next_walk, 0)) == 0 && next_walk != NULL) {
+ WT_WITH_PAGE_INDEX(session,
+ ret = __stat_page(session, next_walk->page, stats));
+ WT_RET(ret);
+ }
return (ret == WT_NOTFOUND ? 0 : ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index fafb4b58fc4..2957eda3a49 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -366,11 +366,16 @@ recno_chk: if (recno != vs->record_total + 1)
goto celltype_err;
break;
case WT_PAGE_COL_VAR:
- case WT_PAGE_ROW_LEAF:
if (unpack->raw != WT_CELL_ADDR_LEAF &&
unpack->raw != WT_CELL_ADDR_LEAF_NO)
goto celltype_err;
break;
+ case WT_PAGE_ROW_LEAF:
+ if (unpack->raw != WT_CELL_ADDR_DEL &&
+ unpack->raw != WT_CELL_ADDR_LEAF &&
+ unpack->raw != WT_CELL_ADDR_LEAF_NO)
+ goto celltype_err;
+ break;
case WT_PAGE_COL_INT:
case WT_PAGE_ROW_INT:
if (unpack->raw != WT_CELL_ADDR_INT)
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index 4c418f91de0..db1b565b439 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -49,6 +49,7 @@ restart: page = current->page;
WT_ASSERT(session, current->key.recno == page->pg_intl_recno);
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(page);
base = pindex->entries;
descent = pindex->index[base - 1];
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index 036e11bec6d..9967c5ecb0c 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -195,6 +195,7 @@ restart: page = current->page;
if (page->type != WT_PAGE_ROW_INT)
break;
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(page);
/*
@@ -487,6 +488,7 @@ restart:
if (page->type != WT_PAGE_ROW_INT)
break;
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(page);
descent = pindex->index[
__wt_random(session->rnd) % pindex->entries];
@@ -521,6 +523,7 @@ restart:
*/
cbt->ref = current;
cbt->compare = 0;
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(btree->root.page);
cbt->slot = pindex->entries < 2 ?
__wt_random(session->rnd) % page->pg_row_entries : 0;
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 646551cdd38..a7e9419a65c 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -152,6 +152,8 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = {
{ "os_cache_max", "int", "min=0", NULL },
{ "prefix_compression", "boolean", NULL, NULL },
{ "prefix_compression_min", "int", "min=0", NULL },
+ { "split_deepen_min_child", "int", NULL, NULL },
+ { "split_deepen_per_child", "int", NULL, NULL },
{ "split_pct", "int", "min=25,max=100", NULL },
{ "value_format", "format", NULL, NULL },
{ "version", "string", NULL, NULL },
@@ -246,6 +248,8 @@ static const WT_CONFIG_CHECK confchk_session_create[] = {
{ "prefix_compression", "boolean", NULL, NULL },
{ "prefix_compression_min", "int", "min=0", NULL },
{ "source", "string", NULL, NULL },
+ { "split_deepen_min_child", "int", NULL, NULL },
+ { "split_deepen_per_child", "int", NULL, NULL },
{ "split_pct", "int", "min=25,max=100", NULL },
{ "type", "string", NULL, NULL },
{ "value_format", "format", NULL, NULL },
@@ -585,7 +589,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
",key_format=u,key_gap=10,leaf_item_max=0,leaf_key_max=0,"
"leaf_page_max=32KB,leaf_value_max=0,memory_page_max=5MB,"
"os_cache_dirty_max=0,os_cache_max=0,prefix_compression=0,"
- "prefix_compression_min=4,split_pct=75,value_format=u,"
+ "prefix_compression_min=4,split_deepen_min_child=0,"
+ "split_deepen_per_child=0,split_pct=75,value_format=u,"
"version=(major=0,minor=0)",
confchk_file_meta
},
@@ -626,8 +631,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"bloom_hash_count=8,bloom_oldest=0,chunk_max=5GB,chunk_size=10MB,"
"merge_max=15,merge_min=0),memory_page_max=5MB,"
"os_cache_dirty_max=0,os_cache_max=0,prefix_compression=0,"
- "prefix_compression_min=4,source=,split_pct=75,type=file,"
- "value_format=u",
+ "prefix_compression_min=4,source=,split_deepen_min_child=0,"
+ "split_deepen_per_child=0,split_pct=75,type=file,value_format=u",
confchk_session_create
},
{ "session.drop",
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
index 861bafed900..f5b78e33b04 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
@@ -448,15 +448,15 @@ __cache_pool_assess(WT_SESSION_IMPL *session, uint64_t *phighest)
continue;
cache = entry->cache;
++entries;
- new = cache->bytes_evict;
+ new = cache->bytes_read;
/* Handle wrapping of eviction requests. */
- if (new >= cache->cp_saved_evict)
- cache->cp_current_evict = new - cache->cp_saved_evict;
+ if (new >= cache->cp_saved_read)
+ cache->cp_current_read = new - cache->cp_saved_read;
else
- cache->cp_current_evict = new;
- cache->cp_saved_evict = new;
- if (cache->cp_current_evict > highest)
- highest = cache->cp_current_evict;
+ cache->cp_current_read = new;
+ cache->cp_saved_read = new;
+ if (cache->cp_current_read > highest)
+ highest = cache->cp_current_read;
}
WT_RET(__wt_verbose(session, WT_VERB_SHARED_CACHE,
"Highest eviction count: %" PRIu64 ", entries: %" PRIu64,
@@ -501,7 +501,7 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
reserved = cache->cp_reserved;
adjusted = 0;
- read_pressure = cache->cp_current_evict / highest;
+ read_pressure = cache->cp_current_read / highest;
WT_RET(__wt_verbose(session, WT_VERB_SHARED_CACHE,
"\t%" PRIu64 ", %" PRIu64 ", %" PRIu32,
entry->cache_size, read_pressure, cache->cp_skip_count));
diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c
index 01f08aa5f07..a5bd8e1343c 100644
--- a/src/third_party/wiredtiger/src/conn/conn_sweep.c
+++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c
@@ -32,14 +32,14 @@ __sweep(WT_SESSION_IMPL *session)
dhandle_next = SLIST_NEXT(dhandle, l);
if (WT_IS_METADATA(dhandle))
continue;
- if (dhandle->session_inuse == 0 && dhandle->timeofdeath == 0) {
+ if (dhandle->session_inuse != 0 ||
+ now <= dhandle->timeofdeath + WT_DHANDLE_SWEEP_WAIT)
+ continue;
+ if (dhandle->timeofdeath == 0) {
dhandle->timeofdeath = now;
WT_STAT_FAST_CONN_INCR(session, dh_conn_tod);
continue;
}
- if (dhandle->session_inuse != 0 ||
- now <= dhandle->timeofdeath + WT_DHANDLE_SWEEP_WAIT)
- continue;
/*
* We have a candidate for closing; if it's open, acquire an
diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c
index 553acc988f0..5aa85872a3b 100644
--- a/src/third_party/wiredtiger/src/evict/evict_file.c
+++ b/src/third_party/wiredtiger/src/evict/evict_file.c
@@ -90,33 +90,43 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop)
WT_ERR(__wt_evict(session, ref, 1));
break;
case WT_SYNC_DISCARD:
- case WT_SYNC_DISCARD_FORCE:
/*
- * Discard the page, whether clean or dirty.
- *
- * Clean the page, both to keep statistics correct, and
- * to let the page-discard function assert no dirty page
- * is ever discarded.
+ * Ordinary discard of the page, whether clean or dirty.
+ * If we see a dirty page in an ordinary discard (e.g.,
+ * from sweep), give up: an update must have happened
+ * since the file was selected for sweeping.
*/
- if (__wt_page_is_modified(page)) {
- page->modify->write_gen = 0;
- __wt_cache_dirty_decr(session, page);
- }
+ if (__wt_page_is_modified(page))
+ WT_ERR(EBUSY);
+
/*
* If the page contains an update that is too recent to
* evict, stop. This should never happen during
- * connection close, and in other paths our caller
+ * connection close, but in other paths our caller
* should be prepared to deal with this case.
*/
- if (syncop == WT_SYNC_DISCARD &&
- page->modify != NULL &&
+ if (page->modify != NULL &&
!__wt_txn_visible_all(session,
page->modify->rec_max_txn))
WT_ERR(EBUSY);
- if (syncop == WT_SYNC_DISCARD_FORCE)
- F_SET(session, WT_SESSION_DISCARD_FORCE);
- __wt_rec_page_clean_update(session, ref);
+ __wt_evict_page_clean_update(session, ref);
+ break;
+ case WT_SYNC_DISCARD_FORCE:
+ /*
+ * Forced discard of the page, whether clean or dirty.
+ * If we see a dirty page in a forced discard, clean
+ * the page, both to keep statistics correct, and to
+ * let the page-discard function assert no dirty page
+ * is ever discarded.
+ */
+ if (__wt_page_is_modified(page)) {
+ page->modify->write_gen = 0;
+ __wt_cache_dirty_decr(session, page);
+ }
+
+ F_SET(session, WT_SESSION_DISCARD_FORCE);
+ __wt_evict_page_clean_update(session, ref);
F_CLR(session, WT_SESSION_DISCARD_FORCE);
break;
WT_ILLEGAL_VALUE_ERR(session);
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 384ec9be5b3..6e7d3e9c6cd 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -205,11 +205,10 @@ __evict_server(void *arg)
"cache server: exiting with %" PRIu64 " pages in "
"memory and %" PRIu64 " pages evicted",
cache->pages_inmem, cache->pages_evict);
- if (cache->bytes_inmem != cache->bytes_evict)
+ if (cache->bytes_inmem != 0)
__wt_errx(session,
- "cache server: exiting with %" PRIu64 " bytes in "
- "memory and %" PRIu64 " bytes evicted",
- cache->bytes_inmem, cache->bytes_evict);
+ "cache server: exiting with %" PRIu64 " bytes in memory",
+ cache->bytes_inmem);
if (cache->bytes_dirty != 0 || cache->pages_dirty != 0)
__wt_errx(session,
"cache server: exiting with %" PRIu64
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 5bbf3b891f7..0cff584f2ab 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -98,7 +98,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
if (__wt_ref_is_root(ref))
__wt_ref_out(session, ref);
else
- __wt_rec_page_clean_update(session, ref);
+ __wt_evict_page_clean_update(session, ref);
WT_STAT_FAST_CONN_INCR(session, cache_eviction_clean);
WT_STAT_FAST_DATA_INCR(session, cache_eviction_clean);
@@ -139,11 +139,11 @@ done: session->excl_next = 0;
}
/*
- * __wt_rec_page_clean_update --
+ * __wt_evict_page_clean_update --
* Update a clean page's reference on eviction.
*/
void
-__wt_rec_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref)
+__wt_evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref)
{
/*
* Discard the page and update the reference structure; if the page has
@@ -327,6 +327,7 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
uint32_t flags;
btree = S2BT(session);
+ flags = WT_EVICTING;
/*
* Get exclusive access to the page if our caller doesn't have the tree
@@ -472,7 +473,6 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
* they are not expected to split).
*/
if (__wt_page_is_modified(page)) {
- flags = WT_EVICTING;
if (exclusive)
LF_SET(WT_SKIP_UPDATE_ERR);
else if (top && !WT_PAGE_IS_INTERNAL(page) &&
@@ -482,17 +482,18 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
WT_ASSERT(session,
!__wt_page_is_modified(page) ||
LF_ISSET(WT_SKIP_UPDATE_RESTORE));
- } else {
- /*
- * If the page was ever modified, make sure all of the updates
- * on the page are old enough they can be discarded from cache.
- */
- if (!exclusive && mod != NULL &&
- !__wt_txn_visible_all(session, mod->rec_max_txn))
- return (EBUSY);
}
/*
+ * If the page was ever modified, make sure all of the updates
+ * on the page are old enough they can be discarded from cache.
+ */
+ if (!exclusive && mod != NULL &&
+ !__wt_txn_visible_all(session, mod->rec_max_txn) &&
+ !LF_ISSET(WT_SKIP_UPDATE_RESTORE))
+ return (EBUSY);
+
+ /*
* Repeat the test: fail if any page in the top-level page's subtree
* won't be merged into its parent.
*/
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index aea9ee98742..ef6f9b40414 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -192,7 +192,7 @@ struct __wt_page_modify {
uint64_t inmem_split_txn;
/* Dirty bytes added to the cache. */
- uint64_t bytes_dirty;
+ size_t bytes_dirty;
/*
* When pages are reconciled, the result is one or more replacement
@@ -532,7 +532,7 @@ struct __wt_page {
#define WT_READGEN_STEP 100
uint64_t read_gen;
- uint64_t memory_footprint; /* Memory attached to the page */
+ size_t memory_footprint; /* Memory attached to the page */
#define WT_PAGE_IS_INTERNAL(page) \
((page)->type == WT_PAGE_COL_INT || (page)->type == WT_PAGE_ROW_INT)
@@ -759,11 +759,11 @@ struct __wt_col {
* with RLE counts greater than 1 when reading the page. We can do a binary
* search in this array, then an offset calculation to find the cell.
*/
-struct __wt_col_rle {
+WT_PACKED_STRUCT_BEGIN(__wt_col_rle)
uint64_t recno; /* Record number of first repeat. */
uint64_t rle; /* Repeat count. */
uint32_t indx; /* Slot of entry in col_var.d */
-} WT_GCC_ATTRIBUTE((packed));
+WT_PACKED_STRUCT_END
/*
* WT_COL_PTR, WT_COL_PTR_SET --
@@ -827,7 +827,7 @@ struct __wt_ikey {
* is done for an entry, WT_UPDATE structures are formed into a forward-linked
* list.
*/
-struct __wt_update {
+WT_PACKED_STRUCT_BEGIN(__wt_update)
uint64_t txnid; /* update transaction */
WT_UPDATE *next; /* forward-linked list */
@@ -846,7 +846,7 @@ struct __wt_update {
/* The untyped value immediately follows the WT_UPDATE structure. */
#define WT_UPDATE_DATA(upd) \
((void *)((uint8_t *)(upd) + sizeof(WT_UPDATE)))
-} WT_GCC_ATTRIBUTE((packed));
+};
/*
* WT_INSERT --
@@ -1004,11 +1004,18 @@ struct __wt_insert_head {
* already have a split generation, leave it alone. If our caller is examining
* an index, we don't want the oldest split generation to move forward and
* potentially free it.
+ *
+ * Check that we haven't raced with a split_gen update after publishing: we
+ * rely on the published value not being missed when scanning for the oldest
+ * active split_gen.
*/
#define WT_ENTER_PAGE_INDEX(session) do { \
uint64_t __prev_split_gen = (session)->split_gen; \
if (__prev_split_gen == 0) \
- WT_PUBLISH((session)->split_gen, S2C(session)->split_gen)
+ do { \
+ WT_PUBLISH((session)->split_gen, \
+ S2C(session)->split_gen); \
+ } while ((session)->split_gen != S2C(session)->split_gen)
#define WT_LEAVE_PAGE_INDEX(session) \
if (__prev_split_gen == 0) \
diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h
index fa01dd5edc2..dd3acf6940d 100644
--- a/src/third_party/wiredtiger/src/include/btree.h
+++ b/src/third_party/wiredtiger/src/include/btree.h
@@ -98,14 +98,21 @@ struct __wt_btree {
CKSUM_UNCOMPRESSED=3 /* Uncompressed blocks only */
} checksum; /* Checksum configuration */
- u_int dictionary; /* Reconcile: dictionary slots */
- int internal_key_truncate; /* Reconcile: internal key truncate */
- int maximum_depth; /* Reconcile: maximum tree depth */
- int prefix_compression; /* Reconcile: prefix compression */
- u_int prefix_compression_min; /* Reconcile: prefix compression min */
- int split_pct; /* Reconcile: split page percent */
- WT_COMPRESSOR *compressor; /* Reconcile: page compressor */
- WT_RWLOCK *ovfl_lock; /* Reconcile: overflow lock */
+ /*
+ * Reconciliation...
+ */
+ u_int dictionary; /* Dictionary slots */
+ int internal_key_truncate; /* Internal key truncate */
+ int maximum_depth; /* Maximum tree depth */
+ int prefix_compression; /* Prefix compression */
+ u_int prefix_compression_min; /* Prefix compression min */
+#define WT_SPLIT_DEEPEN_MIN_CHILD_DEF 10000
+ u_int split_deepen_min_child; /* Minimum entries to deepen tree */
+#define WT_SPLIT_DEEPEN_PER_CHILD_DEF 100
+ u_int split_deepen_per_child; /* Entries per child when deepened */
+ int split_pct; /* Split page percent */
+ WT_COMPRESSOR *compressor; /* Page compressor */
+ WT_RWLOCK *ovfl_lock; /* Overflow lock */
uint64_t last_recno; /* Column-store last record number */
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 0dffdc798af..a0cbb23f126 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -75,6 +75,52 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
#endif
/*
+ * __wt_cache_page_byte_dirty_decr --
+ * Decrement the page's dirty byte count, guarding from underflow.
+ */
+static inline void
+__wt_cache_page_byte_dirty_decr(
+ WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
+{
+ WT_CACHE *cache;
+ size_t decr, orig;
+ int i;
+
+ cache = S2C(session)->cache;
+
+ /*
+ * We don't have exclusive access and there are ways of decrementing the
+ * page's dirty byte count by a too-large value. For example:
+ * T1: __wt_cache_page_inmem_incr(page, size)
+ * page is clean, don't increment dirty byte count
+ * T2: mark page dirty
+ * T1: __wt_cache_page_inmem_decr(page, size)
+ * page is dirty, decrement dirty byte count
+ * and, of course, the reverse where the page is dirty at the increment
+ * and clean at the decrement.
+ *
+ * The page's dirty-byte value always reflects bytes represented in the
+ * cache's dirty-byte count, decrement the page/cache as much as we can
+ * without underflow. If we can't decrement the dirty byte counts after
+ * few tries, give up: the cache's value will be wrong, but consistent,
+ * and we'll fix it the next time this page is marked clean, or evicted.
+ */
+ for (i = 0; i < 5; ++i) {
+ /*
+ * Take care to read the dirty-byte count only once in case
+ * we're racing with updates.
+ */
+ orig = page->modify->bytes_dirty;
+ decr = WT_MIN(size, orig);
+ if (WT_ATOMIC_CAS8(
+ page->modify->bytes_dirty, orig, orig - decr)) {
+ WT_CACHE_DECR(session, cache->bytes_dirty, decr);
+ break;
+ }
+ }
+}
+
+/*
* __wt_cache_page_inmem_decr --
* Decrement a page's memory footprint in the cache.
*/
@@ -87,17 +133,16 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
WT_ASSERT(session, size < WT_EXABYTE);
- WT_CACHE_DECR(session, page->memory_footprint, size);
WT_CACHE_DECR(session, cache->bytes_inmem, size);
- if (__wt_page_is_modified(page)) {
- WT_CACHE_DECR(session, cache->bytes_dirty, size);
- WT_CACHE_DECR(session, page->modify->bytes_dirty, size);
- }
+ WT_CACHE_DECR(session, page->memory_footprint, size);
+ if (__wt_page_is_modified(page))
+ __wt_cache_page_byte_dirty_decr(session, page, size);
}
/*
* __wt_cache_dirty_incr --
- * Increment the cache dirty page/byte counts.
+ * Page switch from clean to dirty: increment the cache dirty page/byte
+ * counts.
*/
static inline void
__wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page)
@@ -119,42 +164,29 @@ __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page)
/*
* __wt_cache_dirty_decr --
- * Decrement the cache dirty page/byte counts.
+ * Page switch from dirty to clean: decrement the cache dirty page/byte
+ * counts.
*/
static inline void
__wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_CACHE *cache;
- size_t size;
+ WT_PAGE_MODIFY *modify;
cache = S2C(session)->cache;
if (cache->pages_dirty < 1) {
- (void)__wt_errx(session,
- "cache dirty decrement failed: cache dirty page count went "
- "negative");
+ __wt_errx(session,
+ "cache eviction dirty-page decrement failed: dirty page"
+ "count went negative");
cache->pages_dirty = 0;
} else
(void)WT_ATOMIC_SUB8(cache->pages_dirty, 1);
- /*
- * It is possible to decrement the footprint of the page without making
- * the page dirty (for example when freeing an obsolete update list),
- * so the footprint could change between read and decrement, and we
- * might attempt to decrement by a different amount than the bytes held
- * by the page.
- *
- * We catch that by maintaining a per-page dirty size, and fixing the
- * cache stats if that is non-zero when the page is discarded.
- *
- * Also take care that the global size doesn't go negative. This may
- * lead to small accounting errors (particularly on the last page of the
- * last file in a checkpoint), but that will come out in the wash when
- * the page is evicted.
- */
- size = WT_MIN(page->memory_footprint, cache->bytes_dirty);
- (void)WT_ATOMIC_SUB8(cache->bytes_dirty, size);
- (void)WT_ATOMIC_SUB8(page->modify->bytes_dirty, size);
+ modify = page->modify;
+ if (modify != NULL && modify->bytes_dirty != 0)
+ __wt_cache_page_byte_dirty_decr(
+ session, page, modify->bytes_dirty);
}
/*
@@ -165,23 +197,28 @@ static inline void
__wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_CACHE *cache;
- WT_PAGE_MODIFY *mod;
+ WT_PAGE_MODIFY *modify;
cache = S2C(session)->cache;
- mod = page->modify;
-
- /*
- * In rare cases, we may race tracking a page's dirty footprint.
- * If so, we will get here with a non-zero dirty_size in the page, and
- * we can fix the global stats.
- */
- if (mod != NULL && mod->bytes_dirty != 0)
- (void)WT_ATOMIC_SUB8(cache->bytes_dirty, mod->bytes_dirty);
+ modify = page->modify;
+
+ /* Update the bytes in-memory to reflect the eviction. */
+ WT_CACHE_DECR(session, cache->bytes_inmem, page->memory_footprint);
+
+ /* Update the cache's dirty-byte count. */
+ if (modify != NULL && modify->bytes_dirty != 0) {
+ if (cache->bytes_dirty < modify->bytes_dirty) {
+ __wt_errx(session,
+ "cache eviction dirty-bytes decrement failed: "
+ "dirty byte count went negative");
+ cache->bytes_dirty = 0;
+ } else
+ WT_CACHE_DECR(
+ session, cache->bytes_dirty, modify->bytes_dirty);
+ }
- WT_ASSERT(session, page->memory_footprint != 0);
+ /* Update pages and bytes evicted. */
(void)WT_ATOMIC_ADD8(cache->bytes_evict, page->memory_footprint);
- page->memory_footprint = 0;
-
(void)WT_ATOMIC_ADD8(cache->pages_evict, 1);
}
@@ -221,8 +258,7 @@ __wt_page_refp(WT_SESSION_IMPL *session,
WT_PAGE_INDEX *pindex;
uint32_t i;
- WT_ASSERT(session,
- WT_SESSION_TXN_STATE(session)->snap_min != WT_TXN_NONE);
+ WT_ASSERT(session, session->split_gen != 0);
/*
* Copy the parent page's index value: the page can split at any time,
@@ -894,11 +930,11 @@ __wt_ref_info(WT_SESSION_IMPL *session,
}
/*
- * __wt_page_release_busy --
- * Release a reference to a page, fail if busy during forced eviction.
+ * __wt_page_release_evict --
+ * Attempt to release and immediately evict a page.
*/
static inline int
-__wt_page_release_busy(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
+__wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
{
WT_BTREE *btree;
WT_DECL_RET;
@@ -906,37 +942,8 @@ __wt_page_release_busy(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
int locked, too_big;
btree = S2BT(session);
-
- /*
- * Discard our hazard pointer. Ignore pages we don't have and the root
- * page, which sticks in memory, regardless.
- */
- if (ref == NULL || __wt_ref_is_root(ref))
- return (0);
page = ref->page;
-
- too_big = (page->memory_footprint < btree->maxmempage) ? 0 : 1;
-
- /*
- * Attempt to evict pages with the special "oldest" read generation.
- *
- * This is set for pages that grow larger than the configured
- * memory_page_max setting, and when we are attempting to scan without
- * trashing the cache.
- *
- * Skip this if eviction is disabled for this operation or this tree,
- * or if there is no chance of eviction succeeding for dirty pages due
- * to a checkpoint or because we've already tried writing this page and
- * it contains an update that isn't stable. Also skip forced eviction
- * if we just did an in-memory split.
- */
- if (LF_ISSET(WT_READ_NO_EVICT) ||
- page->read_gen != WT_READGEN_OLDEST ||
- F_ISSET(btree, WT_BTREE_NO_EVICTION) ||
- (__wt_page_is_modified(page) && (btree->checkpointing ||
- !__wt_txn_visible_all(session, page->modify->first_dirty_txn) ||
- !__wt_txn_visible_all(session, page->modify->inmem_split_txn))))
- return (__wt_hazard_clear(session, page));
+ too_big = (page->memory_footprint > btree->maxmempage) ? 1 : 0;
/*
* Take some care with order of operations: if we release the hazard
@@ -945,8 +952,10 @@ __wt_page_release_busy(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
*/
locked = WT_ATOMIC_CAS4(ref->state, WT_REF_MEM, WT_REF_LOCKED);
WT_TRET(__wt_hazard_clear(session, page));
- if (!locked)
+ if (!locked) {
+ WT_TRET(EBUSY);
return (ret);
+ }
(void)WT_ATOMIC_ADD4(btree->evict_busy, 1);
if ((ret = __wt_evict_page(session, ref)) == 0) {
@@ -970,12 +979,46 @@ __wt_page_release_busy(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
/*
* __wt_page_release --
- * Release a reference to a page.
+ * Release a reference to a page, fail if busy during forced eviction.
*/
static inline int
__wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
{
- WT_RET_BUSY_OK(__wt_page_release_busy(session, ref, flags));
+ WT_BTREE *btree;
+ WT_PAGE *page;
+
+ btree = S2BT(session);
+
+ /*
+ * Discard our hazard pointer. Ignore pages we don't have and the root
+ * page, which sticks in memory, regardless.
+ */
+ if (ref == NULL || __wt_ref_is_root(ref))
+ return (0);
+ page = ref->page;
+
+ /*
+ * Attempt to evict pages with the special "oldest" read generation.
+ *
+ * This is set for pages that grow larger than the configured
+ * memory_page_max setting, and when we are attempting to scan without
+ * trashing the cache.
+ *
+ * Skip this if eviction is disabled for this operation or this tree,
+ * or if there is no chance of eviction succeeding for dirty pages due
+ * to a checkpoint or because we've already tried writing this page and
+ * it contains an update that isn't stable. Also skip forced eviction
+ * if we just did an in-memory split.
+ */
+ if (LF_ISSET(WT_READ_NO_EVICT) ||
+ page->read_gen != WT_READGEN_OLDEST ||
+ F_ISSET(btree, WT_BTREE_NO_EVICTION) ||
+ (__wt_page_is_modified(page) && (btree->checkpointing ||
+ !__wt_txn_visible_all(session, page->modify->first_dirty_txn) ||
+ !__wt_txn_visible_all(session, page->modify->inmem_split_txn))))
+ return (__wt_hazard_clear(session, page));
+
+ WT_RET_BUSY_OK(__wt_page_release_evict(session, ref));
return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index deccd676e26..9db0729fe3c 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -60,6 +60,7 @@ struct __wt_cache {
uint64_t pages_evict;
uint64_t bytes_dirty; /* Bytes/pages currently dirty */
uint64_t pages_dirty;
+ uint64_t bytes_read; /* Bytes read into memory */
uint64_t evict_max_page_size; /* Largest page seen at eviction */
@@ -102,8 +103,8 @@ struct __wt_cache {
/*
* Cache pool information.
*/
- uint64_t cp_saved_evict; /* Evict count from last pass */
- uint64_t cp_current_evict; /* Evict count from current pass */
+ uint64_t cp_saved_read; /* Read count from last pass */
+ uint64_t cp_current_read; /* Read count from current pass */
uint32_t cp_skip_count; /* Post change stabilization */
uint64_t cp_reserved; /* Base size for this cache */
WT_SESSION_IMPL *cp_session; /* May be used for cache management */
diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i
index b1ace5e6a80..4bceb5c0d6c 100644
--- a/src/third_party/wiredtiger/src/include/cache.i
+++ b/src/third_party/wiredtiger/src/include/cache.i
@@ -62,7 +62,7 @@ __wt_cache_pages_inuse(WT_CACHE *cache)
static inline uint64_t
__wt_cache_bytes_inuse(WT_CACHE *cache)
{
- return (cache->bytes_inmem - cache->bytes_evict);
+ return (cache->bytes_inmem);
}
/*
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index ff34b014ecf..7b94a7ea94b 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -146,7 +146,7 @@ struct __wt_connection_impl {
WT_FH *lock_fh; /* Lock file handle */
- uint64_t split_gen; /* Generation number for splits */
+ volatile uint64_t split_gen; /* Generation number for splits */
/*
* The connection keeps a cache of data handles. The set of handles
diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i
index 8fa9790e096..ad42f989bf4 100644
--- a/src/third_party/wiredtiger/src/include/cursor.i
+++ b/src/third_party/wiredtiger/src/include/cursor.i
@@ -164,8 +164,11 @@ __wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session)
dhandle = session->dhandle;
+ /* If we close a handle with a time of death set, clear it. */
WT_ASSERT(session, dhandle->session_inuse > 0);
- (void)WT_ATOMIC_SUB4(dhandle->session_inuse, 1);
+ if (WT_ATOMIC_SUB4(dhandle->session_inuse, 1) == 0 &&
+ dhandle->timeofdeath != 0)
+ dhandle->timeofdeath = 0;
}
/*
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index ee9c27581c8..e47f4ba09c0 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -302,7 +302,7 @@ extern int __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_server);
extern int __wt_cache_wait(WT_SESSION_IMPL *session, int full);
extern void __wt_cache_dump(WT_SESSION_IMPL *session);
extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive);
-extern void __wt_rec_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref);
+extern void __wt_evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref);
extern int __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn);
extern void __wt_log_written_reset(WT_SESSION_IMPL *session);
extern int __wt_log_get_all_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, int active_only);
@@ -423,7 +423,7 @@ extern int __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, int created);
extern int __wt_turtle_init(WT_SESSION_IMPL *session);
extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep);
extern int __wt_turtle_update( WT_SESSION_IMPL *session, const char *key, const char *value);
-extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_ATTRIBUTE((noreturn));
+extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp);
extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
@@ -567,13 +567,13 @@ extern uint32_t __wt_cksum(const void *chunk, size_t len);
extern void __wt_cksum_init(void);
extern void __wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler);
extern int __wt_eventv(WT_SESSION_IMPL *session, int msg_event, int error, const char *file_name, int line_number, const char *fmt, va_list ap);
-extern void __wt_err(WT_SESSION_IMPL *session, int error, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 3, 4)));
-extern void __wt_errx(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 2, 3)));
-extern int __wt_ext_err_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 3, 4)));
-extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 2, 3)));
-extern int __wt_ext_msg_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 3, 4)));
+extern void __wt_err(WT_SESSION_IMPL *session, int error, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
+extern void __wt_errx(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3)));
+extern int __wt_ext_err_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
+extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3)));
+extern int __wt_ext_msg_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
extern int __wt_progress(WT_SESSION_IMPL *session, const char *s, uint64_t v);
-extern void __wt_assert(WT_SESSION_IMPL *session, int error, const char *file_name, int line_number, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 5, 6)));
+extern void __wt_assert(WT_SESSION_IMPL *session, int error, const char *file_name, int line_number, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 5, 6)));
extern int __wt_panic(WT_SESSION_IMPL *session);
extern int __wt_illegal_value(WT_SESSION_IMPL *session, const char *name);
extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri);
@@ -616,8 +616,8 @@ extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2);
extern void __wt_random_init(uint32_t *rnd);
extern uint32_t __wt_random(uint32_t *rnd);
extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size);
-extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 3, 4)));
-extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 3, 4)));
+extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
+extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
extern int
__wt_scr_alloc_func(WT_SESSION_IMPL *session, size_t size, WT_ITEM **scratchp
#ifdef HAVE_DIAGNOSTIC
diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h
index 7b606ca80b1..805838eb84b 100644
--- a/src/third_party/wiredtiger/src/include/gcc.h
+++ b/src/third_party/wiredtiger/src/include/gcc.h
@@ -7,7 +7,12 @@
*/
/* Add GCC-specific attributes to types and function declarations. */
-#define WT_GCC_ATTRIBUTE(x) __attribute__(x)
+#define WT_COMPILER_TYPE_ALIGN(x) __attribute__((aligned(x)))
+
+#define WT_PACKED_STRUCT_BEGIN(name) \
+ struct __attribute__ ((__packed__)) name {
+#define WT_PACKED_STRUCT_END \
+ };
/*
* Attribute are only permitted on function declarations, not definitions.
@@ -15,6 +20,7 @@
* dist/s_prototypes to create extern.h.
*/
#define WT_GCC_FUNC_ATTRIBUTE(x)
+#define WT_GCC_FUNC_DECL_ATTRIBUTE(x) __attribute__(x)
/*
* Atomic writes:
diff --git a/src/third_party/wiredtiger/src/include/lint.h b/src/third_party/wiredtiger/src/include/lint.h
index 9c560339e03..5668abc6dab 100644
--- a/src/third_party/wiredtiger/src/include/lint.h
+++ b/src/third_party/wiredtiger/src/include/lint.h
@@ -6,8 +6,15 @@
* See the file LICENSE for redistribution information.
*/
-#define WT_GCC_ATTRIBUTE(x)
+#define WT_COMPILER_TYPE_ALIGN(x)
+
+#define WT_PACKED_STRUCT_BEGIN(name) \
+ struct name {
+#define WT_PACKED_STRUCT_END \
+ };
+
#define WT_GCC_FUNC_ATTRIBUTE(x)
+#define WT_GCC_FUNC_DECL_ATTRIBUTE(x)
#define __WT_ATOMIC_ADD(v, val) \
((v) += (val))
diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h
index f88a5381227..82d90070609 100644
--- a/src/third_party/wiredtiger/src/include/log.h
+++ b/src/third_party/wiredtiger/src/include/log.h
@@ -70,7 +70,7 @@
#define WT_LOG_SLOT_FREE 1
#define WT_LOG_SLOT_PENDING 2
#define WT_LOG_SLOT_READY 3
-typedef struct {
+typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct {
int64_t slot_state; /* Slot state */
uint64_t slot_group_size; /* Group size */
int32_t slot_error; /* Error value */
@@ -90,7 +90,7 @@ typedef struct {
#define SLOT_SYNC 0x08 /* Needs sync on release */
#define SLOT_SYNC_DIR 0x10 /* Directory sync on release */
uint32_t flags; /* Flags */
-} WT_LOGSLOT WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
+} WT_LOGSLOT;
typedef struct {
WT_LOGSLOT *slot;
diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h
index 8038e5a34ab..8d50f3ea73b 100644
--- a/src/third_party/wiredtiger/src/include/lsm.h
+++ b/src/third_party/wiredtiger/src/include/lsm.h
@@ -73,7 +73,7 @@ struct __wt_cursor_lsm {
* WT_LSM_CHUNK --
* A single chunk (file) in an LSM tree.
*/
-struct __wt_lsm_chunk {
+struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_lsm_chunk {
const char *uri; /* Data source for this chunk */
const char *bloom_uri; /* URI of Bloom filter, if any */
struct timespec create_ts; /* Creation time (for rate limiting) */
@@ -101,7 +101,7 @@ struct __wt_lsm_chunk {
#define WT_LSM_CHUNK_ONDISK 0x04
#define WT_LSM_CHUNK_STABLE 0x08
uint32_t flags;
-} WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
+};
/*
* Different types of work units. Used by LSM worker threads to choose which
diff --git a/src/third_party/wiredtiger/src/include/msvc.h b/src/third_party/wiredtiger/src/include/msvc.h
index f176a40f2bf..3ec74b2d629 100644
--- a/src/third_party/wiredtiger/src/include/msvc.h
+++ b/src/third_party/wiredtiger/src/include/msvc.h
@@ -13,8 +13,21 @@
#define inline __inline
-#define WT_GCC_ATTRIBUTE(x)
+/*
+ * Add MSVC-specific attributes and pragmas to types and function declarations.
+ */
+#define WT_COMPILER_TYPE_ALIGN(x) __declspec(align(x))
+
+#define WT_PACKED_STRUCT_BEGIN(name) \
+ __pragma(pack(push,1)) \
+ struct name {
+
+#define WT_PACKED_STRUCT_END \
+ }; \
+ __pragma(pack(pop))
+
#define WT_GCC_FUNC_ATTRIBUTE(x)
+#define WT_GCC_FUNC_DECL_ATTRIBUTE(x)
#define __WT_ATOMIC_ADD(v, val, n, s, t) \
(WT_STATIC_ASSERT(sizeof(v) == (n)), \
diff --git a/src/third_party/wiredtiger/src/include/mutex.h b/src/third_party/wiredtiger/src/include/mutex.h
index c5b7587303d..07aa740c525 100644
--- a/src/third_party/wiredtiger/src/include/mutex.h
+++ b/src/third_party/wiredtiger/src/include/mutex.h
@@ -68,15 +68,15 @@ struct __wt_rwlock {
#if SPINLOCK_TYPE == SPINLOCK_GCC
-typedef volatile int
- WT_SPINLOCK WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
+typedef volatile int WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT)
+ WT_SPINLOCK;
#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\
SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE ||\
SPINLOCK_TYPE == SPINLOCK_MSVC ||\
SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_LOGGING
-typedef struct {
+typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct {
wt_mutex_t lock;
uint64_t counter; /* Statistics: counter */
@@ -85,7 +85,7 @@ typedef struct {
int8_t id; /* Statistics: current holder ID */
int8_t initialized; /* Lock initialized, for cleanup */
-} WT_SPINLOCK WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
+} WT_SPINLOCK;
#else
diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i
index fb610383a75..b42b792f5a7 100644
--- a/src/third_party/wiredtiger/src/include/serial.i
+++ b/src/third_party/wiredtiger/src/include/serial.i
@@ -245,6 +245,9 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
*/
__wt_cache_page_inmem_incr(session, page, upd_size);
+ /* Mark the page dirty after updating the footprint. */
+ __wt_page_modify_set(session, page);
+
/*
* If there are subsequent WT_UPDATE structures, we're evicting pages
* and the page-scanning mutex isn't held, discard obsolete WT_UPDATE
@@ -264,8 +267,5 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
__wt_update_obsolete_free(session, page, obsolete);
}
- /* Mark the page dirty after updating the footprint. */
- __wt_page_modify_set(session, page);
-
return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index c2ed3473dfb..909f1daf5a4 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -42,7 +42,7 @@ struct __wt_hazard {
* WT_SESSION_IMPL --
* Implementation of WT_SESSION.
*/
-struct __wt_session_impl {
+struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
WT_SESSION iface;
void *lang_private; /* Language specific private storage */
@@ -190,4 +190,4 @@ struct __wt_session_impl {
uint32_t hazard_size; /* Allocated slots in hazard array. */
uint32_t nhazard; /* Count of active hazard pointers */
WT_HAZARD *hazard; /* Hazard pointer array */
-} WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
+};
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 8380e55effb..c1c4703316b 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -25,10 +25,10 @@
#define WT_SESSION_TXN_STATE(s) (&S2C(s)->txn_global.states[(s)->id])
-struct __wt_txn_state {
+struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_txn_state {
volatile uint64_t id;
volatile uint64_t snap_min;
-} WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
+};
struct __wt_txn_global {
volatile uint64_t current; /* Current transaction ID. */
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index 1b3a9b62626..576827bebcd 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -36,7 +36,9 @@ extern "C" {
#include <io.h>
#endif
#include <limits.h>
-#ifndef _WIN32
+#ifdef _WIN32
+#include <process.h>
+#else
#include <pthread.h>
#endif
#ifdef HAVE_PTHREAD_NP_H
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_track.c b/src/third_party/wiredtiger/src/reconcile/rec_track.c
index c5c72391248..2533ad9e201 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_track.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_track.c
@@ -335,12 +335,12 @@ __ovfl_reuse_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
* fixing up skiplist links.
*/
for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i)
- for (e = &head[i]; *e != NULL;) {
- if (F_ISSET(*e, WT_OVFL_REUSE_INUSE)) {
- e = &(*e)->next[i];
+ for (e = &head[i]; (reuse = *e) != NULL;) {
+ if (F_ISSET(reuse, WT_OVFL_REUSE_INUSE)) {
+ e = &reuse->next[i];
continue;
}
- *e = (*e)->next[i];
+ *e = reuse->next[i];
}
/*
@@ -359,19 +359,20 @@ __ovfl_reuse_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
if (F_ISSET(reuse, WT_OVFL_REUSE_INUSE)) {
F_CLR(reuse,
WT_OVFL_REUSE_INUSE | WT_OVFL_REUSE_JUST_ADDED);
- e = &(*e)->next[0];
+ e = &reuse->next[0];
continue;
}
- *e = (*e)->next[0];
+ *e = reuse->next[0];
WT_ASSERT(session, !F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED));
- decr += WT_OVFL_SIZE(reuse, WT_OVFL_REUSE);
if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
WT_RET(
__ovfl_reuse_verbose(session, page, reuse, "free"));
+
WT_RET(bm->free(
bm, session, WT_OVFL_REUSE_ADDR(reuse), reuse->addr_size));
+ decr += WT_OVFL_SIZE(reuse, WT_OVFL_REUSE);
__wt_free(session, reuse);
}
@@ -404,12 +405,12 @@ __ovfl_reuse_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page)
* fixing up skiplist links.
*/
for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i)
- for (e = &head[i]; *e != NULL;) {
- if (!F_ISSET(*e, WT_OVFL_REUSE_JUST_ADDED)) {
- e = &(*e)->next[i];
+ for (e = &head[i]; (reuse = *e) != NULL;) {
+ if (!F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED)) {
+ e = &reuse->next[i];
continue;
}
- *e = (*e)->next[i];
+ *e = reuse->next[i];
}
/*
@@ -420,17 +421,17 @@ __ovfl_reuse_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page)
for (e = &head[0]; (reuse = *e) != NULL;) {
if (!F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED)) {
F_CLR(reuse, WT_OVFL_REUSE_INUSE);
- e = &(*e)->next[0];
+ e = &reuse->next[0];
continue;
}
- *e = (*e)->next[0];
+ *e = reuse->next[0];
if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
WT_RET(
__ovfl_reuse_verbose(session, page, reuse, "free"));
+
WT_TRET(bm->free(
bm, session, WT_OVFL_REUSE_ADDR(reuse), reuse->addr_size));
-
decr += WT_OVFL_SIZE(reuse, WT_OVFL_REUSE);
__wt_free(session, reuse);
}
@@ -722,26 +723,26 @@ __ovfl_txnc_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i)
for (e = &head[i]; (txnc = *e) != NULL;) {
if (TXNID_LE(oldest_txn, txnc->current)) {
- e = &(*e)->next[i];
+ e = &txnc->next[i];
continue;
}
- *e = (*e)->next[i];
+ *e = txnc->next[i];
}
/* Second, discard any no longer needed transaction-cache records. */
decr = 0;
for (e = &head[0]; (txnc = *e) != NULL;) {
if (TXNID_LE(oldest_txn, txnc->current)) {
- e = &(*e)->next[0];
+ e = &txnc->next[0];
continue;
}
- *e = (*e)->next[0];
-
- decr += WT_OVFL_SIZE(txnc, WT_OVFL_TXNC);
+ *e = txnc->next[0];
if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
WT_RET(
__ovfl_txnc_verbose(session, page, txnc, "free"));
+
+ decr += WT_OVFL_SIZE(txnc, WT_OVFL_TXNC);
__wt_free(session, txnc);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 0300596f90b..be66309c77f 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -440,8 +440,11 @@ __wt_reconcile(WT_SESSION_IMPL *session,
* Root pages are special, splits have to be done, we can't put it off
* as the parent's problem any more.
*/
- if (__wt_ref_is_root(ref))
- return (__rec_root_write(session, page, flags));
+ if (__wt_ref_is_root(ref)) {
+ WT_WITH_PAGE_INDEX(session,
+ ret = __rec_root_write(session, page, flags));
+ return (ret);
+ }
/*
* Otherwise, mark the page's parent dirty.
@@ -504,6 +507,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
WT_ILLEGAL_VALUE(session);
}
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(next);
for (i = 0; i < mod->mod_multi_entries; ++i) {
WT_ERR(__wt_multi_to_ref(session,
@@ -2895,7 +2899,7 @@ __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
WT_RET_MSG(session, EINVAL,
"bulk-load is only possible for newly created trees");
- /* Set a reference to the empty leaf page. */
+ /* Get a reference to the empty leaf page. */
pindex = WT_INTL_INDEX_COPY(btree->root.page);
cbulk->ref = pindex->index[0];
cbulk->leaf = cbulk->ref->page;
@@ -4005,7 +4009,6 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_ERR(__rec_child_modify(session, r, ref, &hazard, &state));
addr = ref->addr;
child = ref->page;
- vtype = 0;
/* Deleted child we don't have to write. */
if (state == WT_CHILD_IGNORE) {
@@ -4023,10 +4026,6 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
continue;
}
- /* Deleted child requiring a proxy cell. */
- if (state == WT_CHILD_PROXY)
- vtype = WT_CELL_ADDR_DEL;
-
/*
* Modified child. Empty pages are merged into the parent and
* discarded.
@@ -4076,22 +4075,22 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
/*
* Build the value cell, the child page's address. Addr points
- * to an on-page cell or an off-page WT_ADDR structure. The
- * cell type has been set in the case of page deletion requiring
+ * to an on-page cell or an off-page WT_ADDR structure. There's
+ * a special cell type in the case of page deletion requiring
* a proxy cell, otherwise use the information from the addr or
* original cell.
*/
if (__wt_off_page(page, addr)) {
p = addr->addr;
size = addr->size;
- if (vtype == 0)
- vtype = __rec_vtype(addr);
+ vtype = state == WT_CHILD_PROXY ?
+ WT_CELL_ADDR_DEL : __rec_vtype(addr);
} else {
__wt_cell_unpack(ref->addr, vpack);
p = vpack->data;
size = vpack->size;
- if (vtype == 0)
- vtype = vpack->raw;
+ vtype = state == WT_CHILD_PROXY ?
+ WT_CELL_ADDR_DEL : (u_int)vpack->raw;
}
__rec_cell_build_addr(r, p, size, vtype, 0);
CHILD_RELEASE_ERR(session, hazard, ref);