SERVER-20303: This change tunes for a test with a maximum page size of

10MB and multi-threaded append of 100K key/value pairs. That means 100 inserts is sufficient to trigger forced eviction, and the previous test would refuse in-memory splits unless there were approximately 4K items on the insert list, that is, it assumed small insert objects. Change the code to allow in-memory splits for both large numbers of items and large insert objects.
author: Keith Bostic <keith@wiredtiger.com> 2015-09-30 16:51:35 +0000
committer: Keith Bostic <keith@wiredtiger.com> 2015-09-30 16:51:35 +0000
commit: a2349b6a03a68fa914c8c9acabae592da0302955 (patch)
tree: 7eb6b6d07b2f50a16f5ebdfda87323791428df15
parent: ffcb88877201021dd6cb9062ec8acf2b53943798 (diff)
download: mongo-a2349b6a03a68fa914c8c9acabae592da0302955.tar.gz
1 files changed, 18 insertions, 9 deletions
diff --git a/src/include/btree.i b/src/include/btree.i
index c7466019e14..f43fb73dc5b 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -977,6 +977,7 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
 	WT_BTREE *btree;
 	WT_INSERT_HEAD *ins_head;
 	WT_INSERT *ins;
+	uint64_t mem;
 	int i;
 
 	btree = S2BT(session);
@@ -1007,25 +1008,33 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
 		return (false);
 
 	/*
-	 * There is no point splitting if the list is small, no deep items is
-	 * our heuristic for that. A 1/4 probability of adding a new skiplist
-	 * level, with level-0 always created, means there will be a 5th level
-	 * entry for roughly every 1024 entries in the list. If there are at
-	 * least 4 5th level entries (4K items), the list is large enough.
+	 * There is no point in splitting in-memory if the insert list doesn't 
+	 * represent most of the page footprint. Split if there are many items,
+	 * or if there are enough items and the items are a significant part
+	 * of the page's footprint. A 1/4 probability of adding a new skiplist
+	 * level (with level-0 always created), implies a 2nd level entry for
+	 * every 16 entries in the list. If there are at least 256 2nd level
+	 * entries (4K items), or if the update list hits the maximum leaf page
+	 * size, split. The reason we're walking the 2nd level list (rather than
+	 * walking, for example, the 5th level list and looking for at least 4
+	 * entries), is it combines the number of entries test and the size of
+	 * the entries test in one loop.
 	 */
-#define	WT_MIN_SPLIT_SKIPLIST_DEPTH	WT_MIN(5, WT_SKIP_MAXDEPTH - 1)
+#define	WT_MIN_SPLIT_SKIPLIST_DEPTH	WT_MIN(2, WT_SKIP_MAXDEPTH - 1)
 	ins_head = page->pg_row_entries == 0 ?
 	    WT_ROW_INSERT_SMALLEST(page) :
 	    WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1);
 	if (ins_head == NULL)
 		return (false);
-	for (i = 0, ins = ins_head->head[WT_MIN_SPLIT_SKIPLIST_DEPTH];
-	    ins != NULL; ins = ins->next[WT_MIN_SPLIT_SKIPLIST_DEPTH])
-		if (++i == 4) {
+	for (i = 0, mem = 0, ins = ins_head->head[WT_MIN_SPLIT_SKIPLIST_DEPTH];
+	    ins != NULL; ins = ins->next[WT_MIN_SPLIT_SKIPLIST_DEPTH]) {
+		mem += WT_UPDATE_MEMSIZE(ins->upd);
+		if (++i == 256 || mem > btree->maxleafpage) {
 			WT_STAT_FAST_CONN_INCR(session, cache_inmem_splittable);
 			WT_STAT_FAST_DATA_INCR(session, cache_inmem_splittable);
 			return (true);
 		}
+	}
 	return (false);
 }
author	Keith Bostic <keith@wiredtiger.com>	2015-09-30 16:51:35 +0000
committer	Keith Bostic <keith@wiredtiger.com>	2015-09-30 16:51:35 +0000
commit	a2349b6a03a68fa914c8c9acabae592da0302955 (patch)
tree	7eb6b6d07b2f50a16f5ebdfda87323791428df15
parent	ffcb88877201021dd6cb9062ec8acf2b53943798 (diff)
download	mongo-a2349b6a03a68fa914c8c9acabae592da0302955.tar.gz