summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith.bostic@mongodb.com>2016-12-15 00:01:12 -0500
committerMichael Cahill <michael.cahill@mongodb.com>2016-12-15 16:01:12 +1100
commit18fb0f8da42f5e6045e296b81d86370bf60376f5 (patch)
treeecc32e074229a3dbde2a8b32f79c1a86a60d02d8
parenta026912444c9714011acfec0ca40722e3b55e2a1 (diff)
downloadmongo-18fb0f8da42f5e6045e296b81d86370bf60376f5.tar.gz
SERVER-26545 Remove fixed-size limitation on WiredTiger hazard pointers (#3187)
* Default to an initial 250 hazard slots and grow from there. * Make hazard_max undocumented, add an internal limit of 1000 eviction walks. * If we grow the hazard pointer array, schedule the original to be freed when the database is closed. * Update test_bug011 back to stress eviction with the hard-coded limit of 1000 active trees. Only run during "long" tests.
-rw-r--r--dist/api_data.py2
-rw-r--r--src/btree/bt_discard.c2
-rw-r--r--src/config/config_api.c31
-rw-r--r--src/conn/conn_api.c3
-rw-r--r--src/docs/upgrading.dox10
-rw-r--r--src/evict/evict_lru.c14
-rw-r--r--src/evict/evict_page.c2
-rw-r--r--src/include/btree.i47
-rw-r--r--src/include/cache.h2
-rw-r--r--src/include/connection.h6
-rw-r--r--src/include/extern.h2
-rw-r--r--src/include/session.h10
-rw-r--r--src/include/wiredtiger.in2
-rw-r--r--src/session/session_api.c24
-rw-r--r--src/support/hazard.c281
-rw-r--r--test/suite/test_bug011.py15
-rw-r--r--test/suite/test_config03.py10
-rw-r--r--test/suite/test_config04.py4
-rw-r--r--test/suite/test_hazard.py58
19 files changed, 330 insertions, 195 deletions
diff --git a/dist/api_data.py b/dist/api_data.py
index 2b7ef4a94e1..acbbf0f2a68 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -727,7 +727,7 @@ wiredtiger_open_common =\
Config('hazard_max', '1000', r'''
maximum number of simultaneous hazard pointers per session
handle''',
- min='15'),
+ min=15, undoc=True),
Config('mmap', 'true', r'''
Use memory mapping to access files when possible''',
type='boolean'),
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index 7858d2cb14e..c2733d6567b 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -44,7 +44,7 @@ __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref)
* Wait for up to a second for hazard pointers to be cleared.
*/
for (hp = NULL, i = 0; i < 100; i++) {
- if ((hp = __wt_page_hazard_check(session, ref)) == NULL)
+ if ((hp = __wt_hazard_check(session, ref)) == NULL)
break;
__wt_sleep(0, 10000);
}
diff --git a/src/config/config_api.c b/src/config/config_api.c
index 76a51903588..05c5c1287a7 100644
--- a/src/config/config_api.c
+++ b/src/config/config_api.c
@@ -158,24 +158,25 @@ wiredtiger_config_validate(WT_SESSION *wt_session,
}
/*
- * __conn_foc_add --
+ * __wt_conn_foc_add --
* Add a new entry into the connection's free-on-close list.
*/
-static int
-__conn_foc_add(WT_SESSION_IMPL *session, const void *p)
+void
+__wt_conn_foc_add(WT_SESSION_IMPL *session, const void *p)
{
WT_CONNECTION_IMPL *conn;
conn = S2C(session);
/*
- * Our caller is expected to be holding any locks we need.
+ * Callers of this function are expected to be holding the connection's
+ * api_lock.
+ *
+ * All callers of this function currently ignore errors.
*/
- WT_RET(__wt_realloc_def(
- session, &conn->foc_size, conn->foc_cnt + 1, &conn->foc));
-
- conn->foc[conn->foc_cnt++] = (void *)p;
- return (0);
+ if (__wt_realloc_def(
+ session, &conn->foc_size, conn->foc_cnt + 1, &conn->foc) == 0)
+ conn->foc[conn->foc_cnt++] = (void *)p;
}
/*
@@ -328,12 +329,12 @@ __wt_configure_method(WT_SESSION_IMPL *session,
* order to avoid freeing chunks of memory twice. Again, this isn't a
* commonly used API and it shouldn't ever happen, just leak it.
*/
- (void)__conn_foc_add(session, entry->base);
- (void)__conn_foc_add(session, entry);
- (void)__conn_foc_add(session, checks);
- (void)__conn_foc_add(session, newcheck->type);
- (void)__conn_foc_add(session, newcheck->checks);
- (void)__conn_foc_add(session, newcheck_name);
+ __wt_conn_foc_add(session, entry->base);
+ __wt_conn_foc_add(session, entry);
+ __wt_conn_foc_add(session, checks);
+ __wt_conn_foc_add(session, newcheck->type);
+ __wt_conn_foc_add(session, newcheck->checks);
+ __wt_conn_foc_add(session, newcheck_name);
/*
* Instead of using locks to protect configuration information, assume
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 1bc4a501ce2..474b8bbad8a 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -2309,9 +2309,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
}
WT_ERR(__wt_verbose_config(session, cfg));
- WT_ERR(__wt_config_gets(session, cfg, "hazard_max", &cval));
- conn->hazard_max = (uint32_t)cval.val;
-
WT_ERR(__wt_config_gets(session, cfg, "session_max", &cval));
conn->session_size = (uint32_t)cval.val + WT_EXTRA_INTERNAL_SESSIONS;
diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox
index 78d09a56ea9..0b0826f2646 100644
--- a/src/docs/upgrading.dox
+++ b/src/docs/upgrading.dox
@@ -1,5 +1,15 @@
/*! @page upgrading Upgrading WiredTiger applications
+@section version_291 Upgrading to Version 2.9.1
+
+<dl>
+<dt>Changes to hazard pointer configuration</dt>
+<dd>
+The \c hazard_max parameter to ::wiredtiger_open is now ignored. Memory is
+allocated for hazard pointers as required by each session.
+</dd>
+</dl>
+
@section version_290 Upgrading to Version 2.9.0
<dl>
<dt>Changes to cursor behavior after WT_CURSOR::insert</dt>
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 5b857566299..6fa728916de 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -1148,9 +1148,17 @@ retry: while (slot < max_entries) {
!__wt_cache_aggressive(session))
continue;
- /* Skip files if we have used all available hazard pointers. */
- if (btree->evict_ref == NULL && session->nhazard >=
- conn->hazard_max - WT_MIN(conn->hazard_max / 2, 10))
+ /*
+ * Skip files if we have too many active walks.
+ *
+ * This used to be limited by the configured maximum number of
+ * hazard pointers per session. Even though that ceiling has
+ * been removed, we need to test eviction with huge numbers of
+ * active trees before allowing larger numbers of hazard
+ * pointers in the walk session.
+ */
+ if (btree->evict_ref == NULL &&
+ session->nhazard > WT_EVICT_MAX_TREES)
continue;
/*
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index 56a0fcfc790..5b17a78a4dd 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -37,7 +37,7 @@ __evict_exclusive(WT_SESSION_IMPL *session, WT_REF *ref)
* Check for a hazard pointer indicating another thread is using the
* page, meaning the page cannot be evicted.
*/
- if (__wt_page_hazard_check(session, ref) == NULL)
+ if (__wt_hazard_check(session, ref) == NULL)
return (0);
WT_STAT_DATA_INCR(session, cache_eviction_hazard);
diff --git a/src/include/btree.i b/src/include/btree.i
index e591209f39a..4f69c258621 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -1437,53 +1437,6 @@ __wt_page_swap_func(
}
/*
- * __wt_page_hazard_check --
- * Return if there's a hazard pointer to the page in the system.
- */
-static inline WT_HAZARD *
-__wt_page_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref)
-{
- WT_CONNECTION_IMPL *conn;
- WT_HAZARD *hp;
- WT_SESSION_IMPL *s;
- uint32_t i, j, hazard_size, max, session_cnt;
-
- conn = S2C(session);
-
- /*
- * No lock is required because the session array is fixed size, but it
- * may contain inactive entries. We must review any active session
- * that might contain a hazard pointer, so insert a barrier before
- * reading the active session count. That way, no matter what sessions
- * come or go, we'll check the slots for all of the sessions that could
- * have been active when we started our check.
- */
- WT_STAT_CONN_INCR(session, cache_hazard_checks);
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (s = conn->sessions, i = 0, j = 0, max = 0;
- i < session_cnt; ++s, ++i) {
- if (!s->active)
- continue;
- WT_ORDERED_READ(hazard_size, s->hazard_size);
- if (s->hazard_size > max) {
- max = s->hazard_size;
- WT_STAT_CONN_SET(session,
- cache_hazard_max, max);
- }
- for (hp = s->hazard; hp < s->hazard + hazard_size; ++hp) {
- ++j;
- if (hp->ref == ref) {
- WT_STAT_CONN_INCRV(session,
- cache_hazard_walks, j);
- return (hp);
- }
- }
- }
- WT_STAT_CONN_INCRV(session, cache_hazard_walks, j);
- return (NULL);
-}
-
-/*
* __wt_skip_choose_depth --
* Randomly choose a depth for a skiplist insert.
*/
diff --git a/src/include/cache.h b/src/include/cache.h
index 6ea13ff63ac..70f6169200d 100644
--- a/src/include/cache.h
+++ b/src/include/cache.h
@@ -16,6 +16,8 @@
#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */
#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */
+#define WT_EVICT_MAX_TREES 1000 /* Maximum walk points */
+
/*
* WT_EVICT_ENTRY --
* Encapsulation of an eviction candidate.
diff --git a/src/include/connection.h b/src/include/connection.h
index 7d3d07a6abc..60ce5f55234 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -255,12 +255,6 @@ struct __wt_connection_impl {
size_t session_scratch_max; /* Max scratch memory per session */
- /*
- * WiredTiger allocates space for a fixed number of hazard pointers
- * in each thread of control.
- */
- uint32_t hazard_max; /* Hazard array size */
-
WT_CACHE *cache; /* Page cache */
volatile uint64_t cache_size; /* Cache size (either statically
configured or the current size
diff --git a/src/include/extern.h b/src/include/extern.h
index 7dce310dabf..c7506e55976 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -218,6 +218,7 @@ extern int __wt_config_getones_none(WT_SESSION_IMPL *session, const char *config
extern int __wt_config_gets_def(WT_SESSION_IMPL *session, const char **cfg, const char *key, int def, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern int __wt_config_subgetraw(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern int __wt_config_subgets(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
+extern void __wt_conn_foc_add(WT_SESSION_IMPL *session, const void *p) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern void __wt_conn_foc_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern int __wt_configure_method(WT_SESSION_IMPL *session, const char *method, const char *uri, const char *config, const char *type, const char *check) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern int __wt_config_check(WT_SESSION_IMPL *session, const WT_CONFIG_ENTRY *entry, const char *config, size_t config_len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
@@ -654,6 +655,7 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp
) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern int __wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern void __wt_hazard_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
+extern WT_HAZARD *__wt_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern u_int __wt_hazard_count(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern void __wt_fill_hex(const uint8_t *src, size_t src_max, uint8_t *dest, size_t dest_max, size_t *lenp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern int __wt_raw_to_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
diff --git a/src/include/session.h b/src/include/session.h
index 76cb463602c..7e855a3db25 100644
--- a/src/include/session.h
+++ b/src/include/session.h
@@ -200,9 +200,13 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
#define WT_SESSION_FIRST_USE(s) \
((s)->hazard == NULL)
- /* The number of hazard pointers grows dynamically. */
-#define WT_HAZARD_INCR 1
- uint32_t hazard_size; /* Allocated slots in hazard array. */
+ /*
+ * The hazard pointer array grows as necessary, initialize with 250
+ * slots.
+ */
+#define WT_SESSION_INITIAL_HAZARD_SLOTS 250
+ uint32_t hazard_size; /* Hazard pointer array slots */
+ uint32_t hazard_inuse; /* Hazard pointer array slots in-use */
uint32_t nhazard; /* Count of active hazard pointers */
WT_HAZARD *hazard; /* Hazard pointer array */
};
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 2365135e08d..4c72df0f073 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -2355,8 +2355,6 @@ struct __wt_connection {
* in seconds at which to check for files that are inactive and close them., an
* integer between 1 and 100000; default \c 10.}
* @config{ ),,}
- * @config{hazard_max, maximum number of simultaneous hazard pointers per
- * session handle., an integer greater than or equal to 15; default \c 1000.}
* @config{in_memory, keep data in-memory only. See @ref in_memory for more
* information., a boolean flag; default \c false.}
* @config{log = (, enable logging. Enabling logging uses three sessions from
diff --git a/src/session/session_api.c b/src/session/session_api.c
index ed6339aa66e..88a3c32ee11 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -129,9 +129,11 @@ __session_clear(WT_SESSION_IMPL *session)
* For these reasons, be careful when clearing the session structure.
*/
memset(session, 0, WT_SESSION_CLEAR_SIZE(session));
- session->hazard_size = 0;
- session->nhazard = 0;
+
WT_INIT_LSN(&session->bg_sync_lsn);
+
+ session->hazard_inuse = 0;
+ session->nhazard = 0;
}
/*
@@ -1819,17 +1821,13 @@ __open_session(WT_CONNECTION_IMPL *conn,
* session close because access to it isn't serialized. Allocate the
* first time we open this session.
*/
- if (WT_SESSION_FIRST_USE(session_ret))
- WT_ERR(__wt_calloc_def(
- session, conn->hazard_max, &session_ret->hazard));
-
- /*
- * Set an initial size for the hazard array. It will be grown as
- * required up to hazard_max. The hazard_size is reset on close, since
- * __wt_hazard_close ensures the array is cleared - so it is safe to
- * reset the starting size on each open.
- */
- session_ret->hazard_size = 0;
+ if (WT_SESSION_FIRST_USE(session_ret)) {
+ WT_ERR(__wt_calloc_def(session,
+ WT_SESSION_INITIAL_HAZARD_SLOTS, &session_ret->hazard));
+ session_ret->hazard_size = WT_SESSION_INITIAL_HAZARD_SLOTS;
+ session_ret->hazard_inuse = 0;
+ session_ret->nhazard = 0;
+ }
/* Cache the offset of this session's statistics bucket. */
session_ret->stat_bucket = WT_STATS_SLOT_ID(session);
diff --git a/src/support/hazard.c b/src/support/hazard.c
index b0fa7e129bb..7e88ad183fe 100644
--- a/src/support/hazard.c
+++ b/src/support/hazard.c
@@ -13,6 +13,48 @@ static void __hazard_dump(WT_SESSION_IMPL *);
#endif
/*
+ * hazard_grow --
+ * Grow a hazard pointer array.
+ */
+static int
+hazard_grow(WT_SESSION_IMPL *session)
+{
+ WT_HAZARD *nhazard;
+ size_t size;
+ void *ohazard;
+
+ /*
+ * Allocate a new, larger hazard pointer array and copy the contents of
+ * the original into place.
+ */
+ size = session->hazard_size;
+ WT_RET(__wt_calloc_def(session, size * 2, &nhazard));
+ memcpy(nhazard, session->hazard, size * sizeof(WT_HAZARD));
+
+ /*
+ * Swap the new hazard pointer array into place after initialization
+ * is complete (initialization must complete before eviction can see
+ * the new hazard pointer array), then schedule the original to be
+ * freed.
+ */
+ ohazard = session->hazard;
+ WT_PUBLISH(session->hazard, nhazard);
+
+ __wt_spin_lock(session, &S2C(session)->api_lock);
+ __wt_conn_foc_add(session, ohazard);
+ __wt_spin_unlock(session, &S2C(session)->api_lock);
+
+ /*
+ * Increase the size of the session's pointer array after swapping it
+ * into place (the session's reference must be updated before eviction
+ * can see the new size).
+ */
+ WT_PUBLISH(session->hazard_size, (uint32_t)(size * 2));
+
+ return (0);
+}
+
+/*
* __wt_hazard_set --
* Set a hazard pointer.
*/
@@ -23,17 +65,12 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp
#endif
)
{
- WT_BTREE *btree;
- WT_CONNECTION_IMPL *conn;
WT_HAZARD *hp;
- int restarts = 0;
- btree = S2BT(session);
- conn = S2C(session);
*busyp = false;
/* If a file can never be evicted, hazard pointers aren't required. */
- if (F_ISSET(btree, WT_BTREE_IN_MEMORY))
+ if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
return (0);
/*
@@ -46,6 +83,45 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp
return (0);
}
+ /* If we have filled the current hazard pointer array, grow it. */
+ if (session->nhazard >= session->hazard_size) {
+ WT_ASSERT(session,
+ session->nhazard == session->hazard_size &&
+ session->hazard_inuse == session->hazard_size);
+ WT_RET(hazard_grow(session));
+ }
+
+ /*
+ * If there are no available hazard pointer slots, make another one
+ * visible.
+ */
+ if (session->nhazard >= session->hazard_inuse) {
+ WT_ASSERT(session,
+ session->nhazard == session->hazard_inuse &&
+ session->hazard_inuse < session->hazard_size);
+ hp = &session->hazard[session->hazard_inuse++];
+ } else {
+ WT_ASSERT(session,
+ session->nhazard < session->hazard_inuse &&
+ session->hazard_inuse <= session->hazard_size);
+
+ /*
+ * There must be an empty slot in the array, find it. Skip most
+ * of the active slots by starting after the active count slot;
+ * there may be a free slot before there, but checking is
+ * expensive. If we reach the end of the array, continue the
+ * search from the beginning of the array.
+ */
+ for (hp = session->hazard + session->nhazard;; ++hp) {
+ if (hp >= session->hazard + session->hazard_inuse)
+ hp = session->hazard;
+ if (hp->ref == NULL)
+ break;
+ }
+ }
+
+ WT_ASSERT(session, hp->ref == NULL);
+
/*
* Do the dance:
*
@@ -59,82 +135,43 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp
* pointer before it discards the page (the eviction server sets the
* state to WT_REF_LOCKED, then flushes memory and checks the hazard
* pointers).
- *
- * For sessions with many active hazard pointers, skip most of the
- * active slots: there may be a free slot in there, but checking is
- * expensive. Most hazard pointers are released quickly: optimize
- * for that case.
*/
- for (hp = session->hazard + session->nhazard;; ++hp) {
- /*
- * If we get to the end of the array, either:
- * 1. If we know there are free slots somewhere, and this is
- * the first time through, continue the search from the
- * start. Don't actually continue the loop because that
- * will skip the first slot.
- * 2. If we have searched all the way through and we have
- * allocated the maximum number of slots, give up.
- * 3. Allocate another increment of slots, up to the maximum.
- * The slot we are on should now be available.
- */
- if (hp >= session->hazard + session->hazard_size) {
- if (session->nhazard < session->hazard_size &&
- restarts++ == 0)
- hp = session->hazard;
- else if (session->hazard_size >= conn->hazard_max)
- break;
- else
- WT_PUBLISH(session->hazard_size, WT_MIN(
- session->hazard_size + WT_HAZARD_INCR,
- conn->hazard_max));
- }
-
- if (hp->ref != NULL)
- continue;
-
- hp->ref = ref;
+ hp->ref = ref;
#ifdef HAVE_DIAGNOSTIC
- hp->file = file;
- hp->line = line;
+ hp->file = file;
+ hp->line = line;
#endif
- /* Publish the hazard pointer before reading page's state. */
- WT_FULL_BARRIER();
+ /* Publish the hazard pointer before reading page's state. */
+ WT_FULL_BARRIER();
- /*
- * Check if the page state is still valid, where valid means a
- * state of WT_REF_MEM.
- */
- if (ref->state == WT_REF_MEM) {
- ++session->nhazard;
-
- /*
- * Callers require a barrier here so operations holding
- * the hazard pointer see consistent data.
- */
- WT_READ_BARRIER();
- return (0);
- }
+ /*
+ * Check if the page state is still valid, where valid means a
+ * state of WT_REF_MEM.
+ */
+ if (ref->state == WT_REF_MEM) {
+ ++session->nhazard;
/*
- * The page isn't available, it's being considered for eviction
- * (or being evicted, for all we know). If the eviction server
- * sees our hazard pointer before evicting the page, it will
- * return the page to use, no harm done, if it doesn't, it will
- * go ahead and complete the eviction.
- *
- * We don't bother publishing this update: the worst case is we
- * prevent some random page from being evicted.
+ * Callers require a barrier here so operations holding
+ * the hazard pointer see consistent data.
*/
- hp->ref = NULL;
- *busyp = true;
+ WT_READ_BARRIER();
return (0);
}
-#ifdef HAVE_DIAGNOSTIC
- __hazard_dump(session);
-#endif
- WT_RET_MSG(session, ENOMEM,
- "session %p: hazard pointer table full", (void *)session);
+ /*
+ * The page isn't available, it's being considered for eviction
+ * (or being evicted, for all we know). If the eviction server
+ * sees our hazard pointer before evicting the page, it will
+ * return the page to use, no harm done, if it doesn't, it will
+ * go ahead and complete the eviction.
+ *
+ * We don't bother publishing this update: the worst case is we
+ * prevent some random page from being evicted.
+ */
+ hp->ref = NULL;
+ *busyp = true;
+ return (0);
}
/*
@@ -144,20 +181,17 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp
int
__wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_BTREE *btree;
WT_HAZARD *hp;
- btree = S2BT(session);
-
/* If a file can never be evicted, hazard pointers aren't required. */
- if (F_ISSET(btree, WT_BTREE_IN_MEMORY))
+ if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
return (0);
/*
* Clear the caller's hazard pointer.
* The common pattern is LIFO, so do a reverse search.
*/
- for (hp = session->hazard + session->hazard_size - 1;
+ for (hp = session->hazard + session->hazard_inuse - 1;
hp >= session->hazard;
--hp)
if (hp->ref == ref) {
@@ -174,9 +208,13 @@ __wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref)
/*
* If this was the last hazard pointer in the session,
* reset the size so that checks can skip this session.
+ *
+ * A write-barrier() is necessary before the change to
+ * the in-use value, the number of active references
+ * can never be less than the number of in-use slots.
*/
if (--session->nhazard == 0)
- WT_PUBLISH(session->hazard_size, 0);
+ WT_PUBLISH(session->hazard_inuse, 0);
return (0);
}
@@ -205,7 +243,7 @@ __wt_hazard_close(WT_SESSION_IMPL *session)
* diagnostic.
*/
for (found = false, hp = session->hazard;
- hp < session->hazard + session->hazard_size; ++hp)
+ hp < session->hazard + session->hazard_inuse; ++hp)
if (hp->ref != NULL) {
found = true;
break;
@@ -233,7 +271,7 @@ __wt_hazard_close(WT_SESSION_IMPL *session)
* can't think of a reason it would be).
*/
for (hp = session->hazard;
- hp < session->hazard + session->hazard_size; ++hp)
+ hp < session->hazard + session->hazard_inuse; ++hp)
if (hp->ref != NULL) {
hp->ref = NULL;
--session->nhazard;
@@ -247,6 +285,80 @@ __wt_hazard_close(WT_SESSION_IMPL *session)
}
/*
+ * hazard_get_reference --
+ * Return a consistent reference to a hazard pointer array.
+ */
+static inline void
+hazard_get_reference(
+ WT_SESSION_IMPL *session, WT_HAZARD **hazardp, uint32_t *hazard_inusep)
+{
+ /*
+ * Hazard pointer arrays can be swapped out from under us if they grow.
+ * First, read the current in-use value. The read must precede the read
+ * of the hazard pointer itself (so the in-use value is pessimistic
+ * should the hazard array grow), and additionally ensure we only read
+ * the in-use value once. Then, read the hazard pointer, also ensuring
+ * we only read it once.
+ *
+ * Use a barrier instead of marking the fields volatile because we don't
+ * want to slow down the rest of the hazard pointer functions that don't
+ * need special treatment.
+ */
+ WT_ORDERED_READ(*hazard_inusep, session->hazard_inuse);
+ WT_ORDERED_READ(*hazardp, session->hazard);
+}
+
+/*
+ * __wt_hazard_check --
+ * Return if there's a hazard pointer to the page in the system.
+ */
+WT_HAZARD *
+__wt_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_HAZARD *hp;
+ WT_SESSION_IMPL *s;
+ uint32_t i, j, hazard_inuse, max, session_cnt, walk_cnt;
+
+ conn = S2C(session);
+
+ WT_STAT_CONN_INCR(session, cache_hazard_checks);
+
+ /*
+ * No lock is required because the session array is fixed size, but it
+ * may contain inactive entries. We must review any active session
+ * that might contain a hazard pointer, so insert a read barrier after
+ * reading the active session count. That way, no matter what sessions
+ * come or go, we'll check the slots for all of the sessions that could
+ * have been active when we started our check.
+ */
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+ for (s = conn->sessions,
+ i = j = max = walk_cnt = 0; i < session_cnt; ++s, ++i) {
+ if (!s->active)
+ continue;
+
+ hazard_get_reference(s, &hp, &hazard_inuse);
+
+ if (hazard_inuse > max) {
+ max = hazard_inuse;
+ WT_STAT_CONN_SET(session, cache_hazard_max, max);
+ }
+
+ for (j = 0; j < hazard_inuse; ++hp, ++j) {
+ ++walk_cnt;
+ if (hp->ref == ref) {
+ WT_STAT_CONN_INCRV(session,
+ cache_hazard_walks, walk_cnt);
+ return (hp);
+ }
+ }
+ }
+ WT_STAT_CONN_INCRV(session, cache_hazard_walks, walk_cnt);
+ return (NULL);
+}
+
+/*
* __wt_hazard_count --
* Count how many hazard pointers this session has on the given page.
*/
@@ -254,11 +366,12 @@ u_int
__wt_hazard_count(WT_SESSION_IMPL *session, WT_REF *ref)
{
WT_HAZARD *hp;
+ uint32_t i, hazard_inuse;
u_int count;
- for (count = 0, hp = session->hazard + session->hazard_size - 1;
- hp >= session->hazard;
- --hp)
+ hazard_get_reference(session, &hp, &hazard_inuse);
+
+ for (count = 0, i = 0; i < hazard_inuse; ++hp, ++i)
if (hp->ref == ref)
++count;
@@ -276,7 +389,7 @@ __hazard_dump(WT_SESSION_IMPL *session)
WT_HAZARD *hp;
for (hp = session->hazard;
- hp < session->hazard + session->hazard_size; ++hp)
+ hp < session->hazard + session->hazard_inuse; ++hp)
if (hp->ref != NULL)
__wt_errx(session,
"session %p: hazard pointer %p: %s, line %d",
diff --git a/test/suite/test_bug011.py b/test/suite/test_bug011.py
index 29bb08ec2e5..969aaeb5b39 100644
--- a/test/suite/test_bug011.py
+++ b/test/suite/test_bug011.py
@@ -30,25 +30,28 @@ import random, wiredtiger, wttest
from wtdataset import SimpleDataSet
# test_bug011.py
-# Eviction working on more files than there are hazard pointers.
+# Eviction working on more trees than the eviction server can walk
+# simultaneously. There is a builtin limit of 1000 trees, we open double
+# that, which makes this a long-running test.
class test_bug011(wttest.WiredTigerTestCase):
"""
Test having eviction working on more files than the number of
allocated hazard pointers.
"""
table_name = 'test_bug011'
- ntables = 50
+ ntables = 2000
nrows = 10000
nops = 10000
# Add connection configuration for this test.
def conn_config(self, dir):
- return 'cache_size=10MB,eviction_dirty_target=99,eviction_dirty_trigger=99,hazard_max=' + str(self.ntables / 2)
+ return 'cache_size=1GB'
+ @wttest.longtest("Eviction copes with lots of files")
def test_eviction(self):
cursors = []
datasets = []
for i in range(0, self.ntables):
- this_uri = 'table:%s-%03d' % (self.table_name, i)
+ this_uri = 'table:%s-%05d' % (self.table_name, i)
ds = SimpleDataSet(self, this_uri, self.nrows,
config='allocation_size=1KB,leaf_page_max=1KB')
ds.populate()
@@ -57,9 +60,9 @@ class test_bug011(wttest.WiredTigerTestCase):
# Switch over to on-disk trees with multiple leaf pages
self.reopen_conn()
- # Make sure we have a cursor for the table so it stays in cache.
+ # Make sure we have a cursor for every table so it stays in cache.
for i in range(0, self.ntables):
- this_uri = 'table:%s-%03d' % (self.table_name, i)
+ this_uri = 'table:%s-%05d' % (self.table_name, i)
cursors.append(self.session.open_cursor(this_uri, None))
# Make use of the cache.
diff --git a/test/suite/test_config03.py b/test/suite/test_config03.py
index 88ca6ae3f39..6699f7d2650 100644
--- a/test/suite/test_config03.py
+++ b/test/suite/test_config03.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!usr/bin/env python
#
# Public Domain 2014-2016 MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
@@ -48,8 +48,6 @@ class test_config03(test_base03.test_base03):
eviction_trigger_scenarios = wtscenario.quick_scenarios(
's_eviction_trigger',
[50, 90, 95, 99], None)
- hazard_max_scenarios = wtscenario.quick_scenarios('s_hazard_max',
- [15, 50, 500], [0.4, 0.8, 0.8])
multiprocess_scenarios = wtscenario.quick_scenarios('s_multiprocess',
[True,False], [1.0,1.0])
session_max_scenarios = wtscenario.quick_scenarios('s_session_max',
@@ -66,13 +64,13 @@ class test_config03(test_base03.test_base03):
verbose_scenarios = wtscenario.quick_scenarios('s_verbose', [None], None)
config_vars = [ 'cache_size', 'create', 'error_prefix', 'eviction_target',
- 'eviction_trigger', 'hazard_max', 'multiprocess',
- 'session_max', 'verbose' ]
+ 'eviction_trigger', 'multiprocess', 'session_max',
+ 'verbose' ]
scenarios = wtscenario.make_scenarios(
cache_size_scenarios, create_scenarios, error_prefix_scenarios,
eviction_target_scenarios, eviction_trigger_scenarios,
- hazard_max_scenarios, multiprocess_scenarios, session_max_scenarios,
+ multiprocess_scenarios, session_max_scenarios,
transactional_scenarios, verbose_scenarios, prune=1000)
#wttest.WiredTigerTestCase.printVerbose(2, 'test_config03: running ' + \
diff --git a/test/suite/test_config04.py b/test/suite/test_config04.py
index 204aa7e27d5..db8a5f4a16a 100644
--- a/test/suite/test_config04.py
+++ b/test/suite/test_config04.py
@@ -154,10 +154,6 @@ class test_config04(wttest.WiredTigerTestCase):
'eviction_trigger=86'),
"/eviction target must be lower than the eviction trigger/")
- def test_hazard_max(self):
- # Note: There isn't any direct way to know that this was set.
- self.common_test('hazard_max=50')
-
def test_invalid_config(self):
msg = '/Unbalanced brackets/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
diff --git a/test/suite/test_hazard.py b/test/suite/test_hazard.py
new file mode 100644
index 00000000000..f2891fce526
--- /dev/null
+++ b/test/suite/test_hazard.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_hazard.py
+# Hazard pointer tests.
+
+import wiredtiger, wttest
+from wtdataset import SimpleDataSet
+
+# Regression tests.
+class test_hazard(wttest.WiredTigerTestCase):
+
+ # Allocate a large number of hazard pointers in a session, forcing the
+ # hazard pointer array to repeatedly grow.
+ def test_hazard(self):
+ uri = "table:hazard"
+ ds = SimpleDataSet(self, uri, 1000)
+ ds.populate()
+
+ # Open 10,000 cursors and pin a page to set a hazard pointer.
+ cursors = []
+ for i in range(0, 10000):
+ c = self.session.open_cursor(uri, None)
+ c.set_key(ds.key(10))
+ c.search()
+ cursors.append(c)
+
+ # Close the cursors, clearing the hazard pointer.
+ for c in cursors:
+ c.close()
+
+if __name__ == '__main__':
+ wttest.run()