summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith@wiredtiger.com>2015-01-31 12:59:34 -0500
committerKeith Bostic <keith@wiredtiger.com>2015-01-31 12:59:34 -0500
commitb0a828b262a2d0d3cf1361eed98aa25a1168a7a6 (patch)
treec3e12a7740682dba9243120409f14f172da6fdc1
parentb640366c28fc66744e482c20c16973cb052aef8e (diff)
downloadmongo-b0a828b262a2d0d3cf1361eed98aa25a1168a7a6.tar.gz
We no longer calculate allocation overhead per allocation chunk, revert
the workaround the problem with page memory size calculations during splits where we forced the new parent page memory size to 5% of its current value; reference #1564, #1565. This fixes a problem where 5% of a page's memory footprint isn't large enough to accommodate the cache decrements that will be done in the page's future, leading to page underflow. Minor cleanups: we no longer calculate allocation overhead per allocation chunk, the macro WT_MEMSIZE_ADD is no longer needed at all, and WT_MEMSIZE_TRANSFER is renamed to WT_MEM_TRANSFER.
-rw-r--r--src/btree/bt_split.c79
1 files changed, 27 insertions, 52 deletions
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 7ebcc18fd6e..5206fa055e4 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -8,17 +8,10 @@
#include "wt_internal.h"
-/*
- * Track allocation increments, matching the cache calculations, which add an
- * estimate of allocation overhead to every object.
- */
-#define WT_MEMSIZE_ADD(total, len) do { \
- total += (len); \
-} while (0)
-#define WT_MEMSIZE_TRANSFER(from_decr, to_incr, len) do { \
+#define WT_MEM_TRANSFER(from_decr, to_incr, len) do { \
size_t __len = (len); \
- WT_MEMSIZE_ADD(from_decr, __len); \
- WT_MEMSIZE_ADD(to_incr, __len); \
+ from_decr += __len; \
+ to_incr += __len; \
} while (0)
/*
@@ -296,10 +289,9 @@ __split_ref_deepen_move(WT_SESSION_IMPL *session,
ref->key.ikey = ikey;
} else {
WT_RET(__split_ovfl_key_cleanup(session, parent, ref));
- WT_MEMSIZE_ADD(*parent_decrp,
- sizeof(WT_IKEY) + ikey->size);
+ *parent_decrp += sizeof(WT_IKEY) + ikey->size;
}
- WT_MEMSIZE_ADD(*child_incrp, sizeof(WT_IKEY) + ikey->size);
+ *child_incrp += sizeof(WT_IKEY) + ikey->size;
}
/*
@@ -323,7 +315,7 @@ __split_ref_deepen_move(WT_SESSION_IMPL *session,
}
/* And finally, the WT_REF itself. */
- WT_MEMSIZE_TRANSFER(*parent_decrp, *child_incrp, sizeof(WT_REF));
+ WT_MEM_TRANSFER(*parent_decrp, *child_incrp, sizeof(WT_REF));
return (0);
}
@@ -432,7 +424,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
size = sizeof(WT_PAGE_INDEX) +
(children + SPLIT_CORRECT_2) * sizeof(WT_REF *);
WT_ERR(__wt_calloc(session, 1, size, &alloc_index));
- WT_MEMSIZE_ADD(parent_incr, size);
+ parent_incr += size;
alloc_index->index = (WT_REF **)(alloc_index + 1);
alloc_index->entries = children + SPLIT_CORRECT_2;
alloc_index->index[0] = pindex->index[0];
@@ -441,7 +433,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
for (alloc_refp = alloc_index->index + SPLIT_CORRECT_1,
i = 0; i < children; ++alloc_refp, ++i) {
WT_ERR(__wt_calloc_one(session, alloc_refp));
- WT_MEMSIZE_ADD(parent_incr, sizeof(WT_REF));
+ parent_incr += sizeof(WT_REF);
}
/* Allocate child pages, and connect them into the new page index. */
@@ -466,7 +458,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
__wt_ref_key(parent, *parent_refp, &p, &size);
WT_ERR(
__wt_row_ikey(session, 0, p, size, &ref->key.ikey));
- WT_MEMSIZE_ADD(parent_incr, sizeof(WT_IKEY) + size);
+ parent_incr += sizeof(WT_IKEY) + size;
} else
ref->key.recno = (*parent_refp)->key.recno;
ref->state = WT_REF_MEM;
@@ -597,27 +589,13 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
*/
size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
WT_ERR(__split_safe_free(session, 0, pindex, size));
- WT_MEMSIZE_ADD(parent_decr, size);
+ parent_decr += size;
-#if 0
/*
* Adjust the parent's memory footprint.
*/
__wt_cache_page_inmem_incr(session, parent, parent_incr);
__wt_cache_page_inmem_decr(session, parent, parent_decr);
-#else
- /*
- * XXX
- * The code to track page sizes is fundamentally flawed in the face of
- * splits: for example, we don't add in an overhead allocation constant
- * when allocating WT_REF structures as pages are created, but the
- * calculations during split assume that correction. For now, ignore
- * our carefully calculated values and force the internal page size to
- * 5% of its current value.
- */
- size = parent->memory_footprint - (parent->memory_footprint / 20);
- __wt_cache_page_inmem_decr(session, parent, size);
-#endif
if (0) {
err: __wt_free_ref_index(session, parent, alloc_index, 1);
@@ -752,7 +730,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
/* In some cases, the underlying WT_REF has not yet been allocated. */
if (*refp == NULL) {
WT_RET(__wt_calloc_one(session, refp));
- WT_MEMSIZE_ADD(incr, sizeof(WT_REF));
+ incr += sizeof(WT_REF);
}
ref = *refp;
@@ -787,7 +765,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
ikey = multi->key.ikey;
WT_RET(__wt_row_ikey(session, 0,
WT_IKEY_DATA(ikey), ikey->size, &ref->key.ikey));
- WT_MEMSIZE_ADD(incr, sizeof(WT_IKEY) + ikey->size);
+ incr += sizeof(WT_IKEY) + ikey->size;
break;
default:
ref->key.recno = multi->key.recno;
@@ -899,7 +877,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
*/
size = sizeof(WT_PAGE_INDEX) + result_entries * sizeof(WT_REF *);
WT_ERR(__wt_calloc(session, 1, size, &alloc_index));
- WT_MEMSIZE_ADD(parent_incr, size);
+ parent_incr += size;
alloc_index->index = (WT_REF **)(alloc_index + 1);
alloc_index->entries = result_entries;
for (alloc_refp = alloc_index->index, i = 0; i < parent_entries; ++i) {
@@ -973,7 +951,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
size = sizeof(WT_IKEY) + ikey->size;
WT_TRET(__split_safe_free(
session, 0, ikey, size));
- WT_MEMSIZE_ADD(parent_decr, size);
+ parent_decr += size;
}
/*
* The page_del structure can be freed
@@ -991,7 +969,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
WT_TRET(__split_safe_free(
session, 0, next_ref, sizeof(WT_REF)));
- WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF));
+ parent_decr += sizeof(WT_REF);
}
}
@@ -1001,7 +979,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
*/
size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
WT_TRET(__split_safe_free(session, exclusive, pindex, size));
- WT_MEMSIZE_ADD(parent_decr, size);
+ parent_decr += size;
/*
* Row-store trees where the old version of the page is being discarded:
@@ -1203,8 +1181,8 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
WT_ERR(__wt_page_alloc(session, WT_PAGE_ROW_LEAF, 0, 0, 0, &right));
WT_ERR(__wt_calloc_one(session, &right->pg_row_ins));
WT_ERR(__wt_calloc_one(session, &right->pg_row_ins[0]));
- WT_MEMSIZE_ADD(right_incr, sizeof(WT_INSERT_HEAD));
- WT_MEMSIZE_ADD(right_incr, sizeof(WT_INSERT_HEAD *));
+ right_incr += sizeof(WT_INSERT_HEAD);
+ right_incr += sizeof(WT_INSERT_HEAD *);
WT_ERR(__wt_calloc_one(session, &split_ref[1]));
child = split_ref[1];
@@ -1218,14 +1196,12 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
* We're swapping WT_REFs in the parent, adjust the accounting, and
* row store pages may have instantiated keys.
*/
- WT_MEMSIZE_ADD(parent_incr, sizeof(WT_REF));
- WT_MEMSIZE_ADD(
- parent_incr, sizeof(WT_IKEY) + WT_INSERT_KEY_SIZE(moved_ins));
- WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF));
+ parent_incr += sizeof(WT_REF);
+ parent_incr += sizeof(WT_IKEY) + WT_INSERT_KEY_SIZE(moved_ins);
+ parent_decr += sizeof(WT_REF);
if (page->type == WT_PAGE_ROW_LEAF || page->type == WT_PAGE_ROW_INT)
if ((ikey = __wt_ref_key_instantiated(ref)) != NULL)
- WT_MEMSIZE_ADD(
- parent_decr, sizeof(WT_IKEY) + ikey->size);
+ parent_decr += sizeof(WT_IKEY) + ikey->size;
/* The new page is dirty by definition. */
WT_ERR(__wt_page_modify_init(session, right));
@@ -1247,10 +1223,10 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
*/
for (i = 0; i < WT_SKIP_MAXDEPTH && ins_head->tail[i] == moved_ins; ++i)
;
- WT_MEMSIZE_TRANSFER(page_decr, right_incr, sizeof(WT_INSERT) +
+ WT_MEM_TRANSFER(page_decr, right_incr, sizeof(WT_INSERT) +
(size_t)i * sizeof(WT_INSERT *) + WT_INSERT_KEY_SIZE(moved_ins));
- WT_MEMSIZE_TRANSFER(page_decr, right_incr,
- __wt_update_list_memsize(moved_ins->upd));
+ WT_MEM_TRANSFER(
+ page_decr, right_incr, __wt_update_list_memsize(moved_ins->upd));
/*
* Allocation operations completed, move the last insert list item from
@@ -1497,11 +1473,10 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
* change in memory footprint. Row store pages have keys that may be
* instantiated, check for that.
*/
- WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF));
+ parent_decr += sizeof(WT_REF);
if (page->type == WT_PAGE_ROW_LEAF || page->type == WT_PAGE_ROW_INT)
if ((ikey = __wt_ref_key_instantiated(ref)) != NULL)
- WT_MEMSIZE_ADD(
- parent_decr, sizeof(WT_IKEY) + ikey->size);
+ parent_decr += sizeof(WT_IKEY) + ikey->size;
/* Split into the parent. */
WT_ERR(__split_parent(session,