diff options
author | Keith Bostic <keith.bostic@mongodb.com> | 2015-10-01 06:45:28 -0400 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2015-10-06 15:37:06 +1100 |
commit | 01dbcf110a971b8155ad76ff3b2c8b528c1d9b73 (patch) | |
tree | 49ca4c1093eb3c17af3fd7268505b38dbfa35c17 | |
parent | 457da5c312e6608e154d9c9485caf3342972e729 (diff) | |
download | mongo-01dbcf110a971b8155ad76ff3b2c8b528c1d9b73.tar.gz |
SERVER-20303 Require a minimum item count for in-memory splits.
Merge pull request #2236 from wiredtiger/SERVER-20303-mjc
(cherry picked from commit f12d478229822ee19b5767e05e73083adbe095f4)
-rw-r--r-- | src/include/btree.i | 32 |
1 files changed, 22 insertions, 10 deletions
diff --git a/src/include/btree.i b/src/include/btree.i index dad695f8628..edddcdd6fe4 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -946,7 +946,8 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) WT_BTREE *btree; WT_INSERT_HEAD *ins_head; WT_INSERT *ins; - int i; + size_t size; + int count; btree = S2BT(session); @@ -976,22 +977,33 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) return (false); /* - * There is no point splitting if the list is small, no deep items is - * our heuristic for that. A 1/4 probability of adding a new skiplist - * level, with level-0 always created, means there will be a 5th level - * entry for roughly every 1024 entries in the list. If there are at - * least 4 5th level entries (4K items), the list is large enough. + * There is no point doing an in-memory split unless there is a lot of + * data in the last skiplist on the page. Split if there are enough + * items and the skiplist does not fit within a single disk page. + * + * Rather than scanning the whole list, walk a higher level, which + * gives a sample of the items -- at level 0 we have all the items, at + * level 1 we have 1/4 and at level 2 we have 1/16th. If we see more + * than 30 items and more data than would fit in a disk page, split. */ -#define WT_MIN_SPLIT_SKIPLIST_DEPTH WT_MIN(5, WT_SKIP_MAXDEPTH - 1) +#define WT_MIN_SPLIT_DEPTH 2 +#define WT_MIN_SPLIT_COUNT 30 +#define WT_MIN_SPLIT_MULTIPLIER 16 /* At level 2, we see 1/16th entries */ + ins_head = page->pg_row_entries == 0 ? WT_ROW_INSERT_SMALLEST(page) : WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1); if (ins_head == NULL) return (false); - for (i = 0, ins = ins_head->head[WT_MIN_SPLIT_SKIPLIST_DEPTH]; - ins != NULL; ins = ins->next[WT_MIN_SPLIT_SKIPLIST_DEPTH]) - if (++i == 4) + for (count = 0, size = 0, ins = ins_head->head[WT_MIN_SPLIT_DEPTH]; + ins != NULL; ins = ins->next[WT_MIN_SPLIT_DEPTH]) { + count += WT_MIN_SPLIT_MULTIPLIER; + size += WT_MIN_SPLIT_MULTIPLIER * + (WT_INSERT_KEY_SIZE(ins) + WT_UPDATE_MEMSIZE(ins->upd)); + if (count > WT_MIN_SPLIT_COUNT && + size > (size_t)btree->maxleafpage) return (true); + } return (false); } |