summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith.bostic@mongodb.com>2015-10-01 06:45:28 -0400
committerMichael Cahill <michael.cahill@mongodb.com>2015-10-06 15:37:06 +1100
commit01dbcf110a971b8155ad76ff3b2c8b528c1d9b73 (patch)
tree49ca4c1093eb3c17af3fd7268505b38dbfa35c17
parent457da5c312e6608e154d9c9485caf3342972e729 (diff)
downloadmongo-01dbcf110a971b8155ad76ff3b2c8b528c1d9b73.tar.gz
SERVER-20303 Require a minimum item count for in-memory splits.
Merge pull request #2236 from wiredtiger/SERVER-20303-mjc (cherry picked from commit f12d478229822ee19b5767e05e73083adbe095f4)
-rw-r--r--src/include/btree.i32
1 files changed, 22 insertions, 10 deletions
diff --git a/src/include/btree.i b/src/include/btree.i
index dad695f8628..edddcdd6fe4 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -946,7 +946,8 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_BTREE *btree;
WT_INSERT_HEAD *ins_head;
WT_INSERT *ins;
- int i;
+ size_t size;
+ int count;
btree = S2BT(session);
@@ -976,22 +977,33 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
return (false);
/*
- * There is no point splitting if the list is small, no deep items is
- * our heuristic for that. A 1/4 probability of adding a new skiplist
- * level, with level-0 always created, means there will be a 5th level
- * entry for roughly every 1024 entries in the list. If there are at
- * least 4 5th level entries (4K items), the list is large enough.
+ * There is no point doing an in-memory split unless there is a lot of
+ * data in the last skiplist on the page. Split if there are enough
+ * items and the skiplist does not fit within a single disk page.
+ *
+ * Rather than scanning the whole list, walk a higher level, which
+ * gives a sample of the items -- at level 0 we have all the items, at
+ * level 1 we have 1/4 and at level 2 we have 1/16th. If we see more
+ * than 30 items and more data than would fit in a disk page, split.
*/
-#define WT_MIN_SPLIT_SKIPLIST_DEPTH WT_MIN(5, WT_SKIP_MAXDEPTH - 1)
+#define WT_MIN_SPLIT_DEPTH 2
+#define WT_MIN_SPLIT_COUNT 30
+#define WT_MIN_SPLIT_MULTIPLIER 16 /* At level 2, we see 1/16th entries */
+
ins_head = page->pg_row_entries == 0 ?
WT_ROW_INSERT_SMALLEST(page) :
WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1);
if (ins_head == NULL)
return (false);
- for (i = 0, ins = ins_head->head[WT_MIN_SPLIT_SKIPLIST_DEPTH];
- ins != NULL; ins = ins->next[WT_MIN_SPLIT_SKIPLIST_DEPTH])
- if (++i == 4)
+ for (count = 0, size = 0, ins = ins_head->head[WT_MIN_SPLIT_DEPTH];
+ ins != NULL; ins = ins->next[WT_MIN_SPLIT_DEPTH]) {
+ count += WT_MIN_SPLIT_MULTIPLIER;
+ size += WT_MIN_SPLIT_MULTIPLIER *
+ (WT_INSERT_KEY_SIZE(ins) + WT_UPDATE_MEMSIZE(ins->upd));
+ if (count > WT_MIN_SPLIT_COUNT &&
+ size > (size_t)btree->maxleafpage)
return (true);
+ }
return (false);
}