summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith@wiredtiger.com>2015-09-30 16:51:35 +0000
committerKeith Bostic <keith@wiredtiger.com>2015-09-30 16:51:35 +0000
commita2349b6a03a68fa914c8c9acabae592da0302955 (patch)
tree7eb6b6d07b2f50a16f5ebdfda87323791428df15
parentffcb88877201021dd6cb9062ec8acf2b53943798 (diff)
downloadmongo-a2349b6a03a68fa914c8c9acabae592da0302955.tar.gz
SERVER-20303: This change tunes for a test with a maximum page size of
10MB and multi-threaded append of 100K key/value pairs. That means 100 inserts is sufficient to trigger forced eviction, and the previous test would refuse in-memory splits unless there were approximately 4K items on the insert list, that is, it assumed small insert objects. Change the code to allow in-memory splits for both large numbers of items and large insert objects.
-rw-r--r--src/include/btree.i27
1 files changed, 18 insertions, 9 deletions
diff --git a/src/include/btree.i b/src/include/btree.i
index c7466019e14..f43fb73dc5b 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -977,6 +977,7 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_BTREE *btree;
WT_INSERT_HEAD *ins_head;
WT_INSERT *ins;
+ uint64_t mem;
int i;
btree = S2BT(session);
@@ -1007,25 +1008,33 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
return (false);
/*
- * There is no point splitting if the list is small, no deep items is
- * our heuristic for that. A 1/4 probability of adding a new skiplist
- * level, with level-0 always created, means there will be a 5th level
- * entry for roughly every 1024 entries in the list. If there are at
- * least 4 5th level entries (4K items), the list is large enough.
+ * There is no point in splitting in-memory if the insert list doesn't
+ * represent most of the page footprint. Split if there are many items,
+ * or if there are enough items and the items are a significant part
+ * of the page's footprint. A 1/4 probability of adding a new skiplist
+ * level (with level-0 always created), implies a 2nd level entry for
+ * every 16 entries in the list. If there are at least 256 2nd level
+ * entries (4K items), or if the update list hits the maximum leaf page
+ * size, split. The reason we're walking the 2nd level list (rather than
+ * walking, for example, the 5th level list and looking for at least 4
+ * entries), is it combines the number of entries test and the size of
+ * the entries test in one loop.
*/
-#define WT_MIN_SPLIT_SKIPLIST_DEPTH WT_MIN(5, WT_SKIP_MAXDEPTH - 1)
+#define WT_MIN_SPLIT_SKIPLIST_DEPTH WT_MIN(2, WT_SKIP_MAXDEPTH - 1)
ins_head = page->pg_row_entries == 0 ?
WT_ROW_INSERT_SMALLEST(page) :
WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1);
if (ins_head == NULL)
return (false);
- for (i = 0, ins = ins_head->head[WT_MIN_SPLIT_SKIPLIST_DEPTH];
- ins != NULL; ins = ins->next[WT_MIN_SPLIT_SKIPLIST_DEPTH])
- if (++i == 4) {
+ for (i = 0, mem = 0, ins = ins_head->head[WT_MIN_SPLIT_SKIPLIST_DEPTH];
+ ins != NULL; ins = ins->next[WT_MIN_SPLIT_SKIPLIST_DEPTH]) {
+ mem += WT_UPDATE_MEMSIZE(ins->upd);
+ if (++i == 256 || mem > btree->maxleafpage) {
WT_STAT_FAST_CONN_INCR(session, cache_inmem_splittable);
WT_STAT_FAST_DATA_INCR(session, cache_inmem_splittable);
return (true);
}
+ }
return (false);
}