diff options
author | Keith Bostic <keith@wiredtiger.com> | 2015-01-31 12:59:34 -0500 |
---|---|---|
committer | Keith Bostic <keith@wiredtiger.com> | 2015-01-31 12:59:34 -0500 |
commit | b0a828b262a2d0d3cf1361eed98aa25a1168a7a6 (patch) | |
tree | c3e12a7740682dba9243120409f14f172da6fdc1 | |
parent | b640366c28fc66744e482c20c16973cb052aef8e (diff) | |
download | mongo-b0a828b262a2d0d3cf1361eed98aa25a1168a7a6.tar.gz |
We no longer calculate allocation overhead per allocation chunk, revert
the workaround the problem with page memory size calculations during
splits where we forced the new parent page memory size to 5% of its
current value; reference #1564, #1565. This fixes a problem where 5% of
a page's memory footprint isn't large enough to accommodate the cache
decrements that will be done in the page's future, leading to page
underflow.
Minor cleanups: we no longer calculate allocation overhead per
allocation chunk, the macro WT_MEMSIZE_ADD is no longer needed at all,
and WT_MEMSIZE_TRANSFER is renamed to WT_MEM_TRANSFER.
-rw-r--r-- | src/btree/bt_split.c | 79 |
1 files changed, 27 insertions, 52 deletions
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 7ebcc18fd6e..5206fa055e4 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -8,17 +8,10 @@ #include "wt_internal.h" -/* - * Track allocation increments, matching the cache calculations, which add an - * estimate of allocation overhead to every object. - */ -#define WT_MEMSIZE_ADD(total, len) do { \ - total += (len); \ -} while (0) -#define WT_MEMSIZE_TRANSFER(from_decr, to_incr, len) do { \ +#define WT_MEM_TRANSFER(from_decr, to_incr, len) do { \ size_t __len = (len); \ - WT_MEMSIZE_ADD(from_decr, __len); \ - WT_MEMSIZE_ADD(to_incr, __len); \ + from_decr += __len; \ + to_incr += __len; \ } while (0) /* @@ -296,10 +289,9 @@ __split_ref_deepen_move(WT_SESSION_IMPL *session, ref->key.ikey = ikey; } else { WT_RET(__split_ovfl_key_cleanup(session, parent, ref)); - WT_MEMSIZE_ADD(*parent_decrp, - sizeof(WT_IKEY) + ikey->size); + *parent_decrp += sizeof(WT_IKEY) + ikey->size; } - WT_MEMSIZE_ADD(*child_incrp, sizeof(WT_IKEY) + ikey->size); + *child_incrp += sizeof(WT_IKEY) + ikey->size; } /* @@ -323,7 +315,7 @@ __split_ref_deepen_move(WT_SESSION_IMPL *session, } /* And finally, the WT_REF itself. */ - WT_MEMSIZE_TRANSFER(*parent_decrp, *child_incrp, sizeof(WT_REF)); + WT_MEM_TRANSFER(*parent_decrp, *child_incrp, sizeof(WT_REF)); return (0); } @@ -432,7 +424,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children) size = sizeof(WT_PAGE_INDEX) + (children + SPLIT_CORRECT_2) * sizeof(WT_REF *); WT_ERR(__wt_calloc(session, 1, size, &alloc_index)); - WT_MEMSIZE_ADD(parent_incr, size); + parent_incr += size; alloc_index->index = (WT_REF **)(alloc_index + 1); alloc_index->entries = children + SPLIT_CORRECT_2; alloc_index->index[0] = pindex->index[0]; @@ -441,7 +433,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children) for (alloc_refp = alloc_index->index + SPLIT_CORRECT_1, i = 0; i < children; ++alloc_refp, ++i) { WT_ERR(__wt_calloc_one(session, alloc_refp)); - WT_MEMSIZE_ADD(parent_incr, sizeof(WT_REF)); + parent_incr += sizeof(WT_REF); } /* Allocate child pages, and connect them into the new page index. */ @@ -466,7 +458,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children) __wt_ref_key(parent, *parent_refp, &p, &size); WT_ERR( __wt_row_ikey(session, 0, p, size, &ref->key.ikey)); - WT_MEMSIZE_ADD(parent_incr, sizeof(WT_IKEY) + size); + parent_incr += sizeof(WT_IKEY) + size; } else ref->key.recno = (*parent_refp)->key.recno; ref->state = WT_REF_MEM; @@ -597,27 +589,13 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children) */ size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *); WT_ERR(__split_safe_free(session, 0, pindex, size)); - WT_MEMSIZE_ADD(parent_decr, size); + parent_decr += size; -#if 0 /* * Adjust the parent's memory footprint. */ __wt_cache_page_inmem_incr(session, parent, parent_incr); __wt_cache_page_inmem_decr(session, parent, parent_decr); -#else - /* - * XXX - * The code to track page sizes is fundamentally flawed in the face of - * splits: for example, we don't add in an overhead allocation constant - * when allocating WT_REF structures as pages are created, but the - * calculations during split assume that correction. For now, ignore - * our carefully calculated values and force the internal page size to - * 5% of its current value. - */ - size = parent->memory_footprint - (parent->memory_footprint / 20); - __wt_cache_page_inmem_decr(session, parent, size); -#endif if (0) { err: __wt_free_ref_index(session, parent, alloc_index, 1); @@ -752,7 +730,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, /* In some cases, the underlying WT_REF has not yet been allocated. */ if (*refp == NULL) { WT_RET(__wt_calloc_one(session, refp)); - WT_MEMSIZE_ADD(incr, sizeof(WT_REF)); + incr += sizeof(WT_REF); } ref = *refp; @@ -787,7 +765,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, ikey = multi->key.ikey; WT_RET(__wt_row_ikey(session, 0, WT_IKEY_DATA(ikey), ikey->size, &ref->key.ikey)); - WT_MEMSIZE_ADD(incr, sizeof(WT_IKEY) + ikey->size); + incr += sizeof(WT_IKEY) + ikey->size; break; default: ref->key.recno = multi->key.recno; @@ -899,7 +877,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, */ size = sizeof(WT_PAGE_INDEX) + result_entries * sizeof(WT_REF *); WT_ERR(__wt_calloc(session, 1, size, &alloc_index)); - WT_MEMSIZE_ADD(parent_incr, size); + parent_incr += size; alloc_index->index = (WT_REF **)(alloc_index + 1); alloc_index->entries = result_entries; for (alloc_refp = alloc_index->index, i = 0; i < parent_entries; ++i) { @@ -973,7 +951,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, size = sizeof(WT_IKEY) + ikey->size; WT_TRET(__split_safe_free( session, 0, ikey, size)); - WT_MEMSIZE_ADD(parent_decr, size); + parent_decr += size; } /* * The page_del structure can be freed @@ -991,7 +969,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, WT_TRET(__split_safe_free( session, 0, next_ref, sizeof(WT_REF))); - WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF)); + parent_decr += sizeof(WT_REF); } } @@ -1001,7 +979,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, */ size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *); WT_TRET(__split_safe_free(session, exclusive, pindex, size)); - WT_MEMSIZE_ADD(parent_decr, size); + parent_decr += size; /* * Row-store trees where the old version of the page is being discarded: @@ -1203,8 +1181,8 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp) WT_ERR(__wt_page_alloc(session, WT_PAGE_ROW_LEAF, 0, 0, 0, &right)); WT_ERR(__wt_calloc_one(session, &right->pg_row_ins)); WT_ERR(__wt_calloc_one(session, &right->pg_row_ins[0])); - WT_MEMSIZE_ADD(right_incr, sizeof(WT_INSERT_HEAD)); - WT_MEMSIZE_ADD(right_incr, sizeof(WT_INSERT_HEAD *)); + right_incr += sizeof(WT_INSERT_HEAD); + right_incr += sizeof(WT_INSERT_HEAD *); WT_ERR(__wt_calloc_one(session, &split_ref[1])); child = split_ref[1]; @@ -1218,14 +1196,12 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp) * We're swapping WT_REFs in the parent, adjust the accounting, and * row store pages may have instantiated keys. */ - WT_MEMSIZE_ADD(parent_incr, sizeof(WT_REF)); - WT_MEMSIZE_ADD( - parent_incr, sizeof(WT_IKEY) + WT_INSERT_KEY_SIZE(moved_ins)); - WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF)); + parent_incr += sizeof(WT_REF); + parent_incr += sizeof(WT_IKEY) + WT_INSERT_KEY_SIZE(moved_ins); + parent_decr += sizeof(WT_REF); if (page->type == WT_PAGE_ROW_LEAF || page->type == WT_PAGE_ROW_INT) if ((ikey = __wt_ref_key_instantiated(ref)) != NULL) - WT_MEMSIZE_ADD( - parent_decr, sizeof(WT_IKEY) + ikey->size); + parent_decr += sizeof(WT_IKEY) + ikey->size; /* The new page is dirty by definition. */ WT_ERR(__wt_page_modify_init(session, right)); @@ -1247,10 +1223,10 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp) */ for (i = 0; i < WT_SKIP_MAXDEPTH && ins_head->tail[i] == moved_ins; ++i) ; - WT_MEMSIZE_TRANSFER(page_decr, right_incr, sizeof(WT_INSERT) + + WT_MEM_TRANSFER(page_decr, right_incr, sizeof(WT_INSERT) + (size_t)i * sizeof(WT_INSERT *) + WT_INSERT_KEY_SIZE(moved_ins)); - WT_MEMSIZE_TRANSFER(page_decr, right_incr, - __wt_update_list_memsize(moved_ins->upd)); + WT_MEM_TRANSFER( + page_decr, right_incr, __wt_update_list_memsize(moved_ins->upd)); /* * Allocation operations completed, move the last insert list item from @@ -1497,11 +1473,10 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive) * change in memory footprint. Row store pages have keys that may be * instantiated, check for that. */ - WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF)); + parent_decr += sizeof(WT_REF); if (page->type == WT_PAGE_ROW_LEAF || page->type == WT_PAGE_ROW_INT) if ((ikey = __wt_ref_key_instantiated(ref)) != NULL) - WT_MEMSIZE_ADD( - parent_decr, sizeof(WT_IKEY) + ikey->size); + parent_decr += sizeof(WT_IKEY) + ikey->size; /* Split into the parent. */ WT_ERR(__split_parent(session, |