From c62a2810e54ed4ac7b98c75896b614d3ff3eb619 Mon Sep 17 00:00:00 2001 From: Ramon Fernandez Date: Mon, 29 Feb 2016 14:17:00 -0500 Subject: Import wiredtiger-wiredtiger-mongodb-3.0.9-3-g3dbc6c6.tar.gz from wiredtiger branch mongodb-3.0 ref: 62b3ca8..3dbc6c6 WT-2130 Improve on-disk page utlilization with random workloads SERVER-22898 High fragmentation on WiredTiger databases under write workloads --- src/third_party/wiredtiger/src/include/misc.h | 3 +++ .../wiredtiger/src/reconcile/rec_write.c | 26 +++++++++++++++++----- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h index e2b46d0dbdc..a099213e004 100644 --- a/src/third_party/wiredtiger/src/include/misc.h +++ b/src/third_party/wiredtiger/src/include/misc.h @@ -47,6 +47,9 @@ #define WT_ALIGN(n, v) \ ((((uintmax_t)(n)) + ((v) - 1)) & ~(((uintmax_t)(v)) - 1)) +#define WT_ALIGN_NEAREST(n, v) \ + ((((uintmax_t)(n)) + ((v) / 2)) & ~(((uintmax_t)(v)) - 1)) + /* Min, max. */ #define WT_MIN(a, b) ((a) < (b) ? (a) : (b)) #define WT_MAX(a, b) ((a) < (b) ? (b) : (a)) diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 67b43057c8a..a2a8a330c1d 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -1628,15 +1628,18 @@ __wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) * we don't waste space when we write). */ a = maxpagesize; /* Don't overflow. */ - split_size = (uint32_t) - WT_ALIGN((a * (u_int)btree->split_pct) / 100, btree->allocsize); + split_size = (uint32_t)WT_ALIGN_NEAREST( + (a * (u_int)btree->split_pct) / 100, btree->allocsize); /* - * If the result of that calculation is the same as the allocation unit - * (that happens if the maximum size is the same size as an allocation - * unit, use a percentage of the maximum page size). + * Respect the configured split percentage if the calculated split + * size is either zero or a full page. The user has either configured + * an allocation size that matches the page size, or a split + * percentage that is close to zero or one hundred. Rounding is going + * to provide a worse outcome than having a split point that doesn't + * fall on an allocation size boundary in those cases. */ - if (split_size == btree->allocsize) + if (split_size == 0 || split_size == maxpagesize) split_size = (uint32_t)((a * (u_int)btree->split_pct) / 100); return (split_size); @@ -2957,6 +2960,17 @@ skip_check_complete: } } + bnd->entries = r->entries; + /* Output a verbose message if we create a page without many entries */ + if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && r->entries < 6) + WT_ERR(__wt_verbose(session, WT_VERB_SPLIT, + "Reconciliation creating a page with %" PRIu32 + " entries, memory footprint %" PRIu64 + ", page count %" PRIu32 ", %s, split state: %d\n", + r->entries, r->page->memory_footprint, r->bnd_next, + F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint", + r->bnd_state)); + WT_ERR(__wt_bt_write(session, buf, addr, &addr_size, false, bnd->already_compressed)); WT_ERR(__wt_strndup(session, addr, addr_size, &bnd->addr.addr)); -- cgit v1.2.1