summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSulabh Mahajan <sulabh.mahajan@mongodb.com>2016-07-06 14:23:25 +1000
committerAlex Gorrod <alexander.gorrod@mongodb.com>2016-07-06 14:23:25 +1000
commit67d614b0b6faec17b8396b8dfce4acd50958d7ef (patch)
tree08120259fa6e05da0d6eabb84f2ea81f4643a98c
parent1b6a9220c3ce948f902c6bc44660b76982c7e621 (diff)
downloadmongo-67d614b0b6faec17b8396b8dfce4acd50958d7ef.tar.gz
WT-2026 Allow forced eviction to split pages when BTREE_NO_EVICTION set (#2840)
Previously getting exclusive access for a checkpoint was blocking page splits, which allows pages to grow larger than the configured memory_page_max.
-rw-r--r--src/btree/bt_read.c6
-rw-r--r--src/btree/bt_sync.c12
-rw-r--r--src/evict/evict_page.c4
-rw-r--r--src/include/btree.h11
4 files changed, 26 insertions, 7 deletions
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c
index 94f3440d06e..3d396d5ae5b 100644
--- a/src/btree/bt_read.c
+++ b/src/btree/bt_read.c
@@ -548,10 +548,14 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
* if the page qualifies for forced eviction and update
* the page's generation number. If eviction isn't being
* done on this file, we're done.
+ * In-memory split of large pages is allowed while
+ * no_eviction is set on btree, whereas reconciliation
+ * is not allowed.
*/
if (LF_ISSET(WT_READ_NO_EVICT) ||
F_ISSET(session, WT_SESSION_NO_EVICTION) ||
- F_ISSET(btree, WT_BTREE_NO_EVICTION))
+ (F_ISSET(btree, WT_BTREE_NO_EVICTION) &&
+ !F_ISSET(btree, WT_BTREE_NO_RECONCILE)))
goto skip_evict;
/*
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index c1a8ec3f73b..1371ed5ad49 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -128,7 +128,17 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
*/
WT_PUBLISH(btree->checkpointing, WT_CKPT_PREPARE);
- WT_ERR(__wt_evict_file_exclusive_on(session));
+ /*
+ * Sync for checkpoint allows splits to happen while the queue
+ * is being drained, but not reconciliation. We need to do this,
+ * since draining the queue can take long enough for hot pages
+ * to grow significantly larger than the configured maximum
+ * size.
+ */
+ F_SET(btree, WT_BTREE_NO_RECONCILE);
+ ret = __wt_evict_file_exclusive_on(session);
+ F_CLR(btree, WT_BTREE_NO_RECONCILE);
+ WT_ERR(ret);
__wt_evict_file_exclusive_off(session);
WT_PUBLISH(btree->checkpointing, WT_CKPT_RUNNING);
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index f5c900684a3..e661f27637e 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -467,6 +467,10 @@ __evict_review(
*/
if (LF_ISSET(WT_EVICT_INMEM_SPLIT))
return (__wt_split_insert(session, ref));
+
+ /* We are done if reconciliation is disabled. */
+ if (F_ISSET(S2BT(session), WT_BTREE_NO_RECONCILE))
+ return (EBUSY);
}
/* If the page is clean, we're done and we can evict. */
diff --git a/src/include/btree.h b/src/include/btree.h
index 96097115afd..a8b3cae3e55 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -156,11 +156,12 @@ struct __wt_btree {
#define WT_BTREE_NO_CHECKPOINT 0x00800 /* Disable checkpoints */
#define WT_BTREE_NO_EVICTION 0x01000 /* Disable eviction */
#define WT_BTREE_NO_LOGGING 0x02000 /* Disable logging */
-#define WT_BTREE_REBALANCE 0x04000 /* Handle is for rebalance */
-#define WT_BTREE_SALVAGE 0x08000 /* Handle is for salvage */
-#define WT_BTREE_SKIP_CKPT 0x10000 /* Handle skipped checkpoint */
-#define WT_BTREE_UPGRADE 0x20000 /* Handle is for upgrade */
-#define WT_BTREE_VERIFY 0x40000 /* Handle is for verify */
+#define WT_BTREE_NO_RECONCILE 0x04000 /* Allow splits, even with no evict */
+#define WT_BTREE_REBALANCE 0x08000 /* Handle is for rebalance */
+#define WT_BTREE_SALVAGE 0x10000 /* Handle is for salvage */
+#define WT_BTREE_SKIP_CKPT 0x20000 /* Handle skipped checkpoint */
+#define WT_BTREE_UPGRADE 0x40000 /* Handle is for upgrade */
+#define WT_BTREE_VERIFY 0x80000 /* Handle is for verify */
uint32_t flags;
};