diff options
author | Keith Bostic <keith.bostic@mongodb.com> | 2017-04-13 00:59:39 -0400 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2017-04-13 14:59:39 +1000 |
commit | 787c625446989be6745e069d7b427f2370d2ddda (patch) | |
tree | 28a494fba54fda654b10eff0598ad44a3618f9f9 | |
parent | 45759b71aacc73b71cd8741fc5c46f34a5332f7e (diff) | |
download | mongo-787c625446989be6745e069d7b427f2370d2ddda.tar.gz |
WT-3261 add a checkpoint epoch to avoid draining the eviction queue (#3370)
-rw-r--r-- | src/btree/bt_read.c | 7 | ||||
-rw-r--r-- | src/btree/bt_sync.c | 22 | ||||
-rw-r--r-- | src/evict/evict_page.c | 12 | ||||
-rw-r--r-- | src/include/btree.h | 27 | ||||
-rw-r--r-- | src/include/btree.i | 20 | ||||
-rw-r--r-- | src/include/extern.h | 2 | ||||
-rw-r--r-- | src/include/session.h | 9 | ||||
-rw-r--r-- | src/support/generation.c | 20 |
8 files changed, 54 insertions, 65 deletions
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index fb69afb166c..72a69e8591c 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -585,15 +585,10 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags * if the page qualifies for forced eviction and update * the page's generation number. If eviction isn't being * done on this file, we're done. - * In-memory split of large pages is allowed while - * no_eviction is set on btree, whereas reconciliation - * is not allowed. */ if (LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(session, WT_SESSION_NO_EVICTION) || - btree->lsm_primary || - (btree->evict_disabled > 0 && - !F_ISSET(btree, WT_BTREE_ALLOW_SPLITS))) + btree->evict_disabled > 0 || btree->lsm_primary) goto skip_evict; /* diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index 81e9d1757bb..112f0725f94 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -180,21 +180,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) * any problematic eviction or page splits to complete. */ WT_PUBLISH(btree->checkpointing, WT_CKPT_PREPARE); - - /* - * Sync for checkpoint allows splits to happen while the queue - * is being drained, but not reconciliation. We need to do this, - * since draining the queue can take long enough for hot pages - * to grow significantly larger than the configured maximum - * size. - */ - F_SET(btree, WT_BTREE_ALLOW_SPLITS); - ret = __wt_evict_file_exclusive_on(session); - F_CLR(btree, WT_BTREE_ALLOW_SPLITS); - WT_ERR(ret); - __wt_evict_file_exclusive_off(session); - - WT_PUBLISH(btree->checkpointing, WT_CKPT_RUNNING); + (void)__wt_gen_next_drain(session, WT_GEN_EVICT); + btree->checkpointing = WT_CKPT_RUNNING; /* Write all dirty in-cache pages. */ flags |= WT_READ_NO_EVICT; @@ -268,9 +255,8 @@ err: /* On error, clear any left-over tree walk. */ saved_pinned_id == WT_TXN_NONE) __wt_txn_release_snapshot(session); - /* Clear the checkpoint flag and push the change. */ - if (btree->checkpointing != WT_CKPT_OFF) - WT_PUBLISH(btree->checkpointing, WT_CKPT_OFF); + /* Clear the checkpoint flag. */ + btree->checkpointing = WT_CKPT_OFF; __wt_spin_unlock(session, &btree->flush_lock); diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index 9498e2fb313..edcd108e7e4 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -113,6 +113,9 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) /* Checkpoints should never do eviction. */ WT_ASSERT(session, !WT_SESSION_IS_CHECKPOINT(session)); + /* Enter the eviction generation. */ + __wt_session_gen_enter(session, WT_GEN_EVICT); + page = ref->page; tree_dead = F_ISSET(session->dhandle, WT_DHANDLE_DEAD); @@ -133,7 +136,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) * we want: there is nothing more to do. */ if (LF_ISSET(WT_EVICT_INMEM_SPLIT)) - return (0); + goto done; /* Count evictions of internal pages during normal operation. */ if (!closing && WT_PAGE_IS_INTERNAL(page)) { @@ -182,6 +185,9 @@ err: if (!closing) WT_STAT_DATA_INCR(session, cache_eviction_fail); } +done: /* Leave the eviction generation. */ + __wt_session_gen_leave(session, WT_GEN_EVICT); + return (ret); } @@ -479,10 +485,6 @@ __evict_review( */ if (LF_ISSET(WT_EVICT_INMEM_SPLIT)) return (__wt_split_insert(session, ref)); - - /* If splits are the only permitted operation, we're done. */ - if (F_ISSET(S2BT(session), WT_BTREE_ALLOW_SPLITS)) - return (EBUSY); } /* If the page is clean, we're done and we can evict. */ diff --git a/src/include/btree.h b/src/include/btree.h index 19db27d84a2..8ce77b5ecd3 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -151,7 +151,7 @@ struct __wt_btree { volatile uint32_t evict_busy; /* Count of threads in eviction */ int evict_start_type; /* Start position for eviction walk (see WT_EVICT_WALK_START). */ - enum { + volatile enum { WT_CKPT_OFF, WT_CKPT_PREPARE, WT_CKPT_RUNNING } checkpointing; /* Checkpoint in progress */ @@ -163,19 +163,18 @@ struct __wt_btree { WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */ /* Flags values up to 0xff are reserved for WT_DHANDLE_* */ -#define WT_BTREE_ALLOW_SPLITS 0x000100 /* Allow splits, even with no evict */ -#define WT_BTREE_BULK 0x000200 /* Bulk-load handle */ -#define WT_BTREE_CLOSED 0x000400 /* Handle closed */ -#define WT_BTREE_IGNORE_CACHE 0x000800 /* Cache-resident object */ -#define WT_BTREE_IN_MEMORY 0x001000 /* Cache-resident object */ -#define WT_BTREE_LOOKASIDE 0x002000 /* Look-aside table */ -#define WT_BTREE_NO_CHECKPOINT 0x004000 /* Disable checkpoints */ -#define WT_BTREE_NO_LOGGING 0x008000 /* Disable logging */ -#define WT_BTREE_REBALANCE 0x020000 /* Handle is for rebalance */ -#define WT_BTREE_SALVAGE 0x040000 /* Handle is for salvage */ -#define WT_BTREE_SKIP_CKPT 0x080000 /* Handle skipped checkpoint */ -#define WT_BTREE_UPGRADE 0x100000 /* Handle is for upgrade */ -#define WT_BTREE_VERIFY 0x200000 /* Handle is for verify */ +#define WT_BTREE_BULK 0x000100 /* Bulk-load handle */ +#define WT_BTREE_CLOSED 0x000200 /* Handle closed */ +#define WT_BTREE_IGNORE_CACHE 0x000400 /* Cache-resident object */ +#define WT_BTREE_IN_MEMORY 0x000800 /* Cache-resident object */ +#define WT_BTREE_LOOKASIDE 0x001000 /* Look-aside table */ +#define WT_BTREE_NO_CHECKPOINT 0x002000 /* Disable checkpoints */ +#define WT_BTREE_NO_LOGGING 0x004000 /* Disable logging */ +#define WT_BTREE_REBALANCE 0x008000 /* Handle is for rebalance */ +#define WT_BTREE_SALVAGE 0x010000 /* Handle is for salvage */ +#define WT_BTREE_SKIP_CKPT 0x020000 /* Handle skipped checkpoint */ +#define WT_BTREE_UPGRADE 0x040000 /* Handle is for upgrade */ +#define WT_BTREE_VERIFY 0x080000 /* Handle is for verify */ uint32_t flags; }; diff --git a/src/include/btree.i b/src/include/btree.i index 474b40bf805..d4db65b2033 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1286,6 +1286,16 @@ __wt_page_can_evict( return (true); /* + * We can't split or evict multiblock row-store pages where the parent's + * key for the page is an overflow item, because the split into the + * parent frees the backing blocks for any no-longer-used overflow keys, + * which will corrupt the checkpoint's block management. + */ + if (btree->checkpointing != WT_CKPT_OFF && + F_ISSET_ATOMIC(ref->home, WT_PAGE_OVERFLOW_KEYS)) + return (false); + + /* * Check for in-memory splits before other eviction tests. If the page * should split in-memory, return success immediately and skip more * detailed eviction tests. We don't need further tests since the page @@ -1312,16 +1322,6 @@ __wt_page_can_evict( } /* - * We can't evict clean, multiblock row-store pages where the parent's - * key for the page is an overflow item, because the split into the - * parent frees the backing blocks for any no-longer-used overflow keys, - * which will corrupt the checkpoint's block management. - */ - if (btree->checkpointing != WT_CKPT_OFF && - F_ISSET_ATOMIC(ref->home, WT_PAGE_OVERFLOW_KEYS)) - return (false); - - /* * If a split created new internal pages, those newly created internal * pages cannot be evicted until all threads are known to have exited * the original parent page's index, because evicting an internal page diff --git a/src/include/extern.h b/src/include/extern.h index ff835bacc56..4f38b7ac433 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -649,6 +649,8 @@ extern int __wt_unexpected_object_type( WT_SESSION_IMPL *session, const char *ur extern void __wt_gen_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern uint64_t __wt_gen(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern uint64_t __wt_gen_next(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern uint64_t __wt_gen_next_drain(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_gen_drain(WT_SESSION_IMPL *session, int which, uint64_t generation) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern uint64_t __wt_gen_oldest(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern uint64_t __wt_session_gen(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_session_gen_enter(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/session.h b/src/include/session.h index adef5e39068..de2c1463684 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -169,10 +169,11 @@ struct __wt_session_impl { /* Generations manager */ #define WT_GEN_CHECKPOINT 0 /* Checkpoint generation */ -#define WT_GEN_HAZARD 1 /* Hazard pointer */ -#define WT_GEN_SCHEMA 2 /* Schema version */ -#define WT_GEN_SPLIT 3 /* Page splits */ -#define WT_GENERATIONS 4 /* Total generation manager entries */ +#define WT_GEN_EVICT 1 /* Eviction generation */ +#define WT_GEN_HAZARD 2 /* Hazard pointer */ +#define WT_GEN_SCHEMA 3 /* Schema version */ +#define WT_GEN_SPLIT 4 /* Page splits */ +#define WT_GENERATIONS 5 /* Total generation manager entries */ volatile uint64_t generations[WT_GENERATIONS]; /* diff --git a/src/support/generation.c b/src/support/generation.c index ed615d4c7cd..6e16d7e57fe 100644 --- a/src/support/generation.c +++ b/src/support/generation.c @@ -57,14 +57,12 @@ __wt_gen_next(WT_SESSION_IMPL *session, int which) return (__wt_atomic_addv64(&S2C(session)->generations[which], 1)); } -#if 0 /* * __wt_gen_next_drain -- * Switch the resource to its next generation, then wait for it to drain. */ uint64_t - TABBED IN to avoid dist/ functions: - __wt_gen_next_drain(WT_SESSION_IMPL *session, int which) +__wt_gen_next_drain(WT_SESSION_IMPL *session, int which) { uint64_t v; @@ -80,8 +78,7 @@ uint64_t * Wait for the resource to drain. */ void - TABBED IN to avoid dist/ functions: - __wt_gen_drain(WT_SESSION_IMPL *session, int which, uint64_t generation) +__wt_gen_drain(WT_SESSION_IMPL *session, int which, uint64_t generation) { WT_CONNECTION_IMPL *conn; WT_SESSION_IMPL *s; @@ -109,7 +106,14 @@ void /* Ensure we only read the value once. */ WT_ORDERED_READ(v, s->generations[which]); - if (v == 0 || generation <= v) + /* + * The generation argument is newer than the limit. Wait + * for threads in generations older than the argument + * generation, threads in argument generations are OK. + * + * The thread's generation may be 0 (that is, not set). + */ + if (v == 0 || v >= generation) break; /* @@ -124,7 +128,6 @@ void } } } -#endif /* * __wt_gen_oldest -- @@ -156,6 +159,7 @@ __wt_gen_oldest(WT_SESSION_IMPL *session, int which) /* Ensure we only read the value once. */ WT_ORDERED_READ(v, s->generations[which]); + if (v != 0 && v < oldest) oldest = v; } @@ -328,7 +332,7 @@ __wt_stash_discard_all(WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session) /* * This function is called during WT_CONNECTION.close to discard any - * memory that remains. For that reason, we take two WT_SESSION_IMPL + * memory that remains. For that reason, we take two WT_SESSION_IMPL * arguments: session_safe is still linked to the WT_CONNECTION and * can be safely used for calls to other WiredTiger functions, while * session is the WT_SESSION_IMPL we're cleaning up. |