summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith@wiredtiger.com>2015-01-23 13:51:26 -0500
committerKeith Bostic <keith@wiredtiger.com>2015-01-23 13:51:26 -0500
commit724d863fcfcab8da5b95214fc603a11cc303b32b (patch)
tree8a676de416120a6198308d51a82f6c1b5be47b02
parent58e7e4771e26d1d037aad300cb9139414450b816 (diff)
parent8da46ac273653b629623694a916ef27e5920d173 (diff)
downloadmongo-724d863fcfcab8da5b95214fc603a11cc303b32b.tar.gz
Merge branch 'develop' into mongodb-2.8mongodb-2.8-rc6
-rw-r--r--src/btree/bt_split.c51
-rw-r--r--src/include/btree.i33
2 files changed, 58 insertions, 26 deletions
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 519452b8253..94c73106d70 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -257,12 +257,13 @@ __split_ovfl_key_cleanup(WT_SESSION_IMPL *session, WT_PAGE *page, WT_REF *ref)
}
/*
- * __split_ref_instantiate --
- * Instantiate key/address pairs in memory in service of a split.
+ * __split_ref_deepen_move --
+ * Move a WT_REF from a parent to a child in service of a split to deepen
+ * the tree, including updating the accounting information.
*/
static int
-__split_ref_instantiate(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_REF *ref, size_t *parent_decrp, size_t *child_incrp)
+__split_ref_deepen_move(WT_SESSION_IMPL *session,
+ WT_PAGE *parent, WT_REF *ref, size_t *parent_decrp, size_t *child_incrp)
{
WT_ADDR *addr;
WT_CELL_UNPACK unpack;
@@ -279,8 +280,6 @@ __split_ref_instantiate(WT_SESSION_IMPL *session,
* of child pages, and so we can no longer reference the block image
* that remains with the page being split.
*
- * Track how much memory the parent is losing and the child gaining.
- *
* No locking is required to update the WT_REF structure because we're
* the only thread splitting the parent page, and there's no way for
* readers to race with our updates of single pointers. The changes
@@ -289,13 +288,13 @@ __split_ref_instantiate(WT_SESSION_IMPL *session,
*
* Row-store keys, first.
*/
- if (page->type == WT_PAGE_ROW_INT) {
+ if (parent->type == WT_PAGE_ROW_INT) {
if ((ikey = __wt_ref_key_instantiated(ref)) == NULL) {
- __wt_ref_key(page, ref, &key, &size);
+ __wt_ref_key(parent, ref, &key, &size);
WT_RET(__wt_row_ikey(session, 0, key, size, &ikey));
ref->key.ikey = ikey;
} else {
- WT_RET(__split_ovfl_key_cleanup(session, page, ref));
+ WT_RET(__split_ovfl_key_cleanup(session, parent, ref));
WT_MEMSIZE_ADD(*parent_decrp,
sizeof(WT_IKEY) + ikey->size);
}
@@ -307,12 +306,8 @@ __split_ref_instantiate(WT_SESSION_IMPL *session,
* address has been instantiated, there's no work to do. Otherwise,
* get the address from the on-page cell.
*/
- if ((addr = ref->addr) == NULL)
- return (0);
- if (__wt_off_page(page, addr))
- WT_MEMSIZE_TRANSFER(*parent_decrp, *child_incrp,
- sizeof(WT_ADDR) + addr->size);
- else {
+ addr = ref->addr;
+ if (addr != NULL && !__wt_off_page(parent, addr)) {
__wt_cell_unpack((WT_CELL *)ref->addr, &unpack);
WT_RET(__wt_calloc_one(session, &addr));
if ((ret = __wt_strndup(
@@ -324,8 +319,11 @@ __split_ref_instantiate(WT_SESSION_IMPL *session,
addr->type =
unpack.raw == WT_CELL_ADDR_INT ? WT_ADDR_INT : WT_ADDR_LEAF;
ref->addr = addr;
- WT_MEMSIZE_ADD(*child_incrp, sizeof(WT_ADDR) + addr->size);
}
+
+ /* And finally, the WT_REF itself. */
+ WT_MEMSIZE_TRANSFER(*parent_decrp, *child_incrp, sizeof(WT_REF));
+
return (0);
}
@@ -502,12 +500,9 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
child_incr = 0;
child_pindex = WT_INTL_INDEX_COPY(child);
for (child_refp = child_pindex->index, j = 0; j < slots; ++j) {
- WT_ERR(__split_ref_instantiate(session,
+ WT_ERR(__split_ref_deepen_move(session,
parent, *parent_refp, &parent_decr, &child_incr));
*child_refp++ = *parent_refp++;
-
- WT_MEMSIZE_TRANSFER(
- parent_decr, child_incr, sizeof(WT_REF));
}
__wt_cache_page_inmem_incr(session, child, child_incr);
}
@@ -603,6 +598,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
WT_ERR(__split_safe_free(session, 0, pindex, size));
WT_MEMSIZE_ADD(parent_decr, size);
+#if 0
/*
* Adjust the parent's memory footprint. This may look odd, but we
* have already taken the allocation overhead into account, and an
@@ -611,6 +607,19 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
*/
__wt_cache_page_inmem_incr(session, parent, parent_incr);
__wt_cache_page_inmem_decr(session, parent, parent_decr);
+#else
+ /*
+ * XXX
+ * The code to track page sizes is fundamentally flawed in the face of
+ * splits: for example, we don't add in an overhead allocation constant
+ * when allocating WT_REF structures as pages are created, but the
+ * calculations during split assume that correction. For now, ignore
+ * our carefully calculated values and force the internal page size to
+ * 5% of its current value.
+ */
+ size = parent->memory_footprint - (parent->memory_footprint / 20);
+ __wt_cache_page_inmem_decr(session, parent, size);
+#endif
if (0) {
err: __wt_free_ref_index(session, parent, alloc_index, 1);
@@ -774,13 +783,11 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
* the confusion.
*/
WT_RET(__wt_calloc_one(session, &addr));
- WT_MEMSIZE_ADD(incr, sizeof(WT_ADDR));
ref->addr = addr;
addr->size = multi->addr.size;
addr->type = multi->addr.type;
WT_RET(__wt_strndup(session,
multi->addr.addr, addr->size, &addr->addr));
- WT_MEMSIZE_ADD(incr, addr->size);
} else
WT_RET(__split_multi_inmem(session, page, ref, multi));
diff --git a/src/include/btree.i b/src/include/btree.i
index d30ee46486a..7b3f6a10403 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -35,6 +35,31 @@ __wt_page_is_modified(WT_PAGE *page)
#define WT_ALLOC_OVERHEAD 32U
/*
+ * Track a field in the cache. Use atomic CAS so that we can reliably avoid
+ * decrementing the cache below zero - since we use an unsigned number.
+ * Track if we would go below zero in a diagnostic build - something has gone
+ * wrong.
+ */
+#ifdef HAVE_DIAGNOSTIC
+#define WT_CACHE_DECR(session, f, sz) do { \
+ uint64_t __val = f; \
+ uint64_t __sz = WT_MIN(__val, sz); \
+ if (__sz < sz) \
+ __wt_errx(session, "%s underflow: decrementing %" \
+ WT_SIZET_FMT, #f, sz); \
+ while (!WT_ATOMIC_CAS8(f, __val, __val - __sz)) \
+ __val = f, __sz = WT_MIN(__val, __sz); \
+} while (0)
+#else
+#define WT_CACHE_DECR(session, f, sz) do { \
+ uint64_t __val = f; \
+ uint64_t __sz = WT_MIN(__val, sz); \
+ while (!WT_ATOMIC_CAS8(f, __val, __val - __sz)) \
+ __val = f, __sz = WT_MIN(__val, __sz); \
+} while (0)
+#endif
+
+/*
* __wt_cache_page_inmem_incr --
* Increment a page's memory footprint in the cache.
*/
@@ -66,11 +91,11 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
size += WT_ALLOC_OVERHEAD;
cache = S2C(session)->cache;
- (void)WT_ATOMIC_SUB8(cache->bytes_inmem, size);
- (void)WT_ATOMIC_SUB8(page->memory_footprint, size);
+ WT_CACHE_DECR(session, cache->bytes_inmem, size);
+ WT_CACHE_DECR(session, page->memory_footprint, size);
if (__wt_page_is_modified(page)) {
- (void)WT_ATOMIC_SUB8(cache->bytes_dirty, size);
- (void)WT_ATOMIC_SUB8(page->modify->bytes_dirty, size);
+ WT_CACHE_DECR(session, cache->bytes_dirty, size);
+ WT_CACHE_DECR(session, page->modify->bytes_dirty, size);
}
}