summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith@wiredtiger.com>2013-03-07 17:25:29 -0500
committerKeith Bostic <keith@wiredtiger.com>2013-03-07 17:25:29 -0500
commit53d506adda74f5c5d92c9b1ab0dd57ec7c606fe3 (patch)
treed30d9e17911279bf100a2c901bed6571f82a3446
parentd44e31cc0cf419ae65848ee2522b1e4a35fa5219 (diff)
downloadmongo-53d506adda74f5c5d92c9b1ab0dd57ec7c606fe3.tar.gz
Make the split percentage apply even when the resulting split page size
is smaller than the allocation size.
-rw-r--r--src/btree/bt_handle.c50
-rw-r--r--src/btree/rec_write.c7
-rw-r--r--src/include/btree.h7
-rw-r--r--src/include/extern.h1
4 files changed, 42 insertions, 23 deletions
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index 77f7f253f8d..e46ec867f2b 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -280,9 +280,6 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
break;
}
- WT_RET(__wt_config_getones(session, config, "split_pct", &cval));
- btree->split_pct = (u_int)cval.val;
-
WT_RET(__wt_config_getones(session, config, "block_compressor", &cval));
if (cval.len > 0) {
TAILQ_FOREACH(ncomp, &conn->compqh, q)
@@ -546,7 +543,7 @@ __btree_page_sizes(WT_SESSION_IMPL *session, const char *config)
{
WT_BTREE *btree;
WT_CONFIG_ITEM cval;
- uint32_t intl_split_size, leaf_split_size, split_pct;
+ uint32_t intl_split_size, leaf_split_size;
btree = session->btree;
@@ -564,6 +561,9 @@ __btree_page_sizes(WT_SESSION_IMPL *session, const char *config)
session, config, "leaf_item_max", &cval));
btree->maxleafitem = (uint32_t)cval.val;
+ WT_RET(__wt_config_getones(session, config, "split_pct", &cval));
+ btree->split_pct = (u_int)cval.val;
+
/*
* When a page is forced to split, we want at least 50 entries on its
* parent.
@@ -601,12 +601,8 @@ __btree_page_sizes(WT_SESSION_IMPL *session, const char *config)
* Set the split percentage: reconciliation splits to a smaller-than-
* maximum page size so we don't split every time a new entry is added.
*/
- WT_RET(__wt_config_getones(session, config, "split_pct", &cval));
- split_pct = (uint32_t)cval.val;
- intl_split_size = WT_SPLIT_PAGE_SIZE(
- btree->maxintlpage, btree->allocsize, split_pct);
- leaf_split_size = WT_SPLIT_PAGE_SIZE(
- btree->maxleafpage, btree->allocsize, split_pct);
+ intl_split_size = __wt_split_page_size(btree, btree->maxintlpage);
+ leaf_split_size = __wt_split_page_size(btree, btree->maxleafpage);
/*
* Default values for internal and leaf page items: make sure at least
@@ -640,14 +636,44 @@ __btree_page_sizes(WT_SESSION_IMPL *session, const char *config)
*/
if (btree->maxintlitem > intl_split_size / 2)
return (pse2(session, "internal",
- btree->maxintlpage, btree->maxintlitem, split_pct));
+ btree->maxintlpage, btree->maxintlitem, btree->split_pct));
if (btree->maxleafitem > leaf_split_size / 2)
return (pse2(session, "leaf",
- btree->maxleafpage, btree->maxleafitem, split_pct));
+ btree->maxleafpage, btree->maxleafitem, btree->split_pct));
return (0);
}
+/*
+ * __wt_split_page_size --
+ * Split page size calculation: we don't want to repeatedly split every
+ * time a new entry is added, so we split to a smaller-than-maximum page size.
+ */
+uint32_t
+__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize)
+{
+ uintmax_t a;
+ uint32_t split_size;
+
+ /*
+ * Ideally, the split page size is some percentage of the maximum page
+ * size rounded to an allocation unit (round to an allocation unit so
+ * we don't waste space when we write).
+ */
+ a = maxpagesize; /* Don't overflow. */
+ split_size = WT_ALIGN32((a * btree->split_pct) / 100, btree->allocsize);
+
+ /*
+ * If the result of that calculation is the same as the allocation unit
+ * (that happens if the maximum size is the same size as an allocation
+ * unit, use a percentage of the maximum page size).
+ */
+ if (split_size == btree->allocsize)
+ split_size = (btree->allocsize * btree->split_pct) / 100;
+
+ return (split_size);
+}
+
static int
pse1(WT_SESSION_IMPL *session, const char *type, uint32_t max, uint32_t ovfl)
{
diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c
index 5dce1b4b0a9..441a9364e06 100644
--- a/src/btree/rec_write.c
+++ b/src/btree/rec_write.c
@@ -1092,10 +1092,9 @@ __rec_split_init(WT_SESSION_IMPL *session,
if (r->raw_compression)
r->split_size = 0;
else if (page->type == WT_PAGE_COL_FIX)
- r->split_size = r->page_size;
+ r->split_size = r->page_size_max;
else
- r->split_size = WT_SPLIT_PAGE_SIZE(
- r->page_size, btree->allocsize, btree->split_pct);
+ r->split_size = __wt_split_page_size(btree, r->page_size_max);
/*
* If the maximum page size is the same as the split page size, either
@@ -1835,7 +1834,7 @@ __rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
* WT_PAGE_HEADER header onto the scratch buffer, most of the header
* information remains unchanged between the pages.
*/
- WT_RET(__wt_scr_alloc(session, r->split_size, &tmp));
+ WT_RET(__wt_scr_alloc(session, r->page_size_max, &tmp));
dsk = tmp->mem;
memcpy(dsk, r->dsk.mem, WT_PAGE_HEADER_SIZE);
diff --git a/src/include/btree.h b/src/include/btree.h
index 7ed1ceaf5f8..2070f20f294 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -40,13 +40,6 @@
#define WT_BTREE_MAX_ADDR_COOKIE 255 /* Maximum address cookie */
/*
- * Split page size calculation -- we don't want to repeatedly split every time
- * a new entry is added, so we split to a smaller-than-maximum page size.
- */
-#define WT_SPLIT_PAGE_SIZE(pagesize, allocsize, pct) \
- WT_ALIGN32(((uintmax_t)(pagesize) * (pct)) / 100, allocsize)
-
-/*
* XXX
* The server threads use their own WT_SESSION_IMPL handles because they may
* want to block (for example, the eviction server calls reconciliation, and
diff --git a/src/include/extern.h b/src/include/extern.h
index f09a6c7c165..111a5152318 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -272,6 +272,7 @@ extern int __wt_btree_get_memsize(WT_SESSION_IMPL *session,
uint32_t **memsizep);
extern int __wt_btree_release_memsize(WT_SESSION_IMPL *session,
WT_BTREE *btree);
+extern uint32_t __wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize);
extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session,
const char *config);
extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session);