diff options
author | Sulabh Mahajan <sulabh.mahajan@mongodb.com> | 2016-07-06 14:23:25 +1000 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2016-07-06 14:23:25 +1000 |
commit | 67d614b0b6faec17b8396b8dfce4acd50958d7ef (patch) | |
tree | 08120259fa6e05da0d6eabb84f2ea81f4643a98c | |
parent | 1b6a9220c3ce948f902c6bc44660b76982c7e621 (diff) | |
download | mongo-67d614b0b6faec17b8396b8dfce4acd50958d7ef.tar.gz |
WT-2026 Allow forced eviction to split pages when BTREE_NO_EVICTION set (#2840)
Previously getting exclusive access for a checkpoint was blocking page splits, which allows pages to grow larger than the configured memory_page_max.
-rw-r--r-- | src/btree/bt_read.c | 6 | ||||
-rw-r--r-- | src/btree/bt_sync.c | 12 | ||||
-rw-r--r-- | src/evict/evict_page.c | 4 | ||||
-rw-r--r-- | src/include/btree.h | 11 |
4 files changed, 26 insertions, 7 deletions
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index 94f3440d06e..3d396d5ae5b 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -548,10 +548,14 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags * if the page qualifies for forced eviction and update * the page's generation number. If eviction isn't being * done on this file, we're done. + * In-memory split of large pages is allowed while + * no_eviction is set on btree, whereas reconciliation + * is not allowed. */ if (LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(session, WT_SESSION_NO_EVICTION) || - F_ISSET(btree, WT_BTREE_NO_EVICTION)) + (F_ISSET(btree, WT_BTREE_NO_EVICTION) && + !F_ISSET(btree, WT_BTREE_NO_RECONCILE))) goto skip_evict; /* diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index c1a8ec3f73b..1371ed5ad49 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -128,7 +128,17 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) */ WT_PUBLISH(btree->checkpointing, WT_CKPT_PREPARE); - WT_ERR(__wt_evict_file_exclusive_on(session)); + /* + * Sync for checkpoint allows splits to happen while the queue + * is being drained, but not reconciliation. We need to do this, + * since draining the queue can take long enough for hot pages + * to grow significantly larger than the configured maximum + * size. + */ + F_SET(btree, WT_BTREE_NO_RECONCILE); + ret = __wt_evict_file_exclusive_on(session); + F_CLR(btree, WT_BTREE_NO_RECONCILE); + WT_ERR(ret); __wt_evict_file_exclusive_off(session); WT_PUBLISH(btree->checkpointing, WT_CKPT_RUNNING); diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index f5c900684a3..e661f27637e 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -467,6 +467,10 @@ __evict_review( */ if (LF_ISSET(WT_EVICT_INMEM_SPLIT)) return (__wt_split_insert(session, ref)); + + /* We are done if reconciliation is disabled. */ + if (F_ISSET(S2BT(session), WT_BTREE_NO_RECONCILE)) + return (EBUSY); } /* If the page is clean, we're done and we can evict. */ diff --git a/src/include/btree.h b/src/include/btree.h index 96097115afd..a8b3cae3e55 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -156,11 +156,12 @@ struct __wt_btree { #define WT_BTREE_NO_CHECKPOINT 0x00800 /* Disable checkpoints */ #define WT_BTREE_NO_EVICTION 0x01000 /* Disable eviction */ #define WT_BTREE_NO_LOGGING 0x02000 /* Disable logging */ -#define WT_BTREE_REBALANCE 0x04000 /* Handle is for rebalance */ -#define WT_BTREE_SALVAGE 0x08000 /* Handle is for salvage */ -#define WT_BTREE_SKIP_CKPT 0x10000 /* Handle skipped checkpoint */ -#define WT_BTREE_UPGRADE 0x20000 /* Handle is for upgrade */ -#define WT_BTREE_VERIFY 0x40000 /* Handle is for verify */ +#define WT_BTREE_NO_RECONCILE 0x04000 /* Allow splits, even with no evict */ +#define WT_BTREE_REBALANCE 0x08000 /* Handle is for rebalance */ +#define WT_BTREE_SALVAGE 0x10000 /* Handle is for salvage */ +#define WT_BTREE_SKIP_CKPT 0x20000 /* Handle skipped checkpoint */ +#define WT_BTREE_UPGRADE 0x40000 /* Handle is for upgrade */ +#define WT_BTREE_VERIFY 0x80000 /* Handle is for verify */ uint32_t flags; }; |