/*- * Copyright (c) 2014-2018 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * * See the file LICENSE for redistribution information. */ #include "wt_internal.h" #ifdef HAVE_DIAGNOSTIC static void __hazard_dump(WT_SESSION_IMPL *); #endif /* * hazard_grow -- * Grow a hazard pointer array. */ static int hazard_grow(WT_SESSION_IMPL *session) { WT_HAZARD *nhazard; size_t size; uint64_t hazard_gen; void *ohazard; /* * Allocate a new, larger hazard pointer array and copy the contents of * the original into place. */ size = session->hazard_size; WT_RET(__wt_calloc_def(session, size * 2, &nhazard)); memcpy(nhazard, session->hazard, size * sizeof(WT_HAZARD)); /* * Swap the new hazard pointer array into place after initialization * is complete (initialization must complete before eviction can see * the new hazard pointer array), then schedule the original to be * freed. */ ohazard = session->hazard; WT_PUBLISH(session->hazard, nhazard); /* * Increase the size of the session's pointer array after swapping it * into place (the session's reference must be updated before eviction * can see the new size). */ WT_PUBLISH(session->hazard_size, (uint32_t)(size * 2)); /* * Threads using the hazard pointer array from now on will use the new * one. Increment the hazard pointer generation number, and schedule a * future free of the old memory. Ignore any failure, leak the memory. */ hazard_gen = __wt_gen_next(session, WT_GEN_HAZARD); WT_IGNORE_RET( __wt_stash_add(session, WT_GEN_HAZARD, hazard_gen, ohazard, 0)); return (0); } /* * __wt_hazard_set -- * Set a hazard pointer. */ int __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp #ifdef HAVE_DIAGNOSTIC , const char *func, int line #endif ) { WT_HAZARD *hp; uint32_t current_state; *busyp = false; /* If a file can never be evicted, hazard pointers aren't required. */ if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY)) return (0); /* * If there isn't a valid page, we're done. This read can race with * eviction and splits, we re-check it after a barrier to make sure * we have a valid reference. */ current_state = ref->state; if (current_state != WT_REF_LIMBO && current_state != WT_REF_MEM) { *busyp = true; return (0); } /* If we have filled the current hazard pointer array, grow it. */ if (session->nhazard >= session->hazard_size) { WT_ASSERT(session, session->nhazard == session->hazard_size && session->hazard_inuse == session->hazard_size); WT_RET(hazard_grow(session)); } /* * If there are no available hazard pointer slots, make another one * visible. */ if (session->nhazard >= session->hazard_inuse) { WT_ASSERT(session, session->nhazard == session->hazard_inuse && session->hazard_inuse < session->hazard_size); hp = &session->hazard[session->hazard_inuse++]; } else { WT_ASSERT(session, session->nhazard < session->hazard_inuse && session->hazard_inuse <= session->hazard_size); /* * There must be an empty slot in the array, find it. Skip most * of the active slots by starting after the active count slot; * there may be a free slot before there, but checking is * expensive. If we reach the end of the array, continue the * search from the beginning of the array. */ for (hp = session->hazard + session->nhazard;; ++hp) { if (hp >= session->hazard + session->hazard_inuse) hp = session->hazard; if (hp->ref == NULL) break; } } WT_ASSERT(session, hp->ref == NULL); /* * Do the dance: * * The memory location which makes a page "real" is the WT_REF's state * of WT_REF_LIMBO or WT_REF_MEM, which can be set to WT_REF_LOCKED * at any time by the page eviction server. * * Add the WT_REF reference to the session's hazard list and flush the * write, then see if the page's state is still valid. If so, we can * use the page because the page eviction server will see our hazard * pointer before it discards the page (the eviction server sets the * state to WT_REF_LOCKED, then flushes memory and checks the hazard * pointers). */ hp->ref = ref; #ifdef HAVE_DIAGNOSTIC hp->func = func; hp->line = line; #endif /* Publish the hazard pointer before reading page's state. */ WT_FULL_BARRIER(); /* * Check if the page state is still valid, where valid means a * state of WT_REF_LIMBO or WT_REF_MEM. */ current_state = ref->state; if (current_state == WT_REF_LIMBO || current_state == WT_REF_MEM) { ++session->nhazard; /* * Callers require a barrier here so operations holding * the hazard pointer see consistent data. */ WT_READ_BARRIER(); return (0); } /* * The page isn't available, it's being considered for eviction * (or being evicted, for all we know). If the eviction server * sees our hazard pointer before evicting the page, it will * return the page to use, no harm done, if it doesn't, it will * go ahead and complete the eviction. * * We don't bother publishing this update: the worst case is we * prevent some random page from being evicted. */ hp->ref = NULL; *busyp = true; return (0); } /* * __wt_hazard_clear -- * Clear a hazard pointer. */ int __wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref) { WT_HAZARD *hp; /* If a file can never be evicted, hazard pointers aren't required. */ if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY)) return (0); /* * Clear the caller's hazard pointer. * The common pattern is LIFO, so do a reverse search. */ for (hp = session->hazard + session->hazard_inuse - 1; hp >= session->hazard; --hp) if (hp->ref == ref) { /* * We don't publish the hazard pointer clear in the * general case. It's not required for correctness; * it gives an eviction thread faster access to the * page were the page selected for eviction, but the * generation number was just set, it's unlikely the * page will be selected for eviction. */ hp->ref = NULL; /* * If this was the last hazard pointer in the session, * reset the size so that checks can skip this session. * * A write-barrier() is necessary before the change to * the in-use value, the number of active references * can never be less than the number of in-use slots. */ if (--session->nhazard == 0) WT_PUBLISH(session->hazard_inuse, 0); return (0); } /* * A serious error, we should always find the hazard pointer. Panic, * because using a page we didn't have pinned down implies corruption. */ WT_PANIC_RET(session, EINVAL, "session %p: clear hazard pointer: %p: not found", (void *)session, (void *)ref); } /* * __wt_hazard_close -- * Verify that no hazard pointers are set. */ void __wt_hazard_close(WT_SESSION_IMPL *session) { WT_HAZARD *hp; bool found; /* * Check for a set hazard pointer and complain if we find one. We could * just check the session's hazard pointer count, but this is a useful * diagnostic. */ for (found = false, hp = session->hazard; hp < session->hazard + session->hazard_inuse; ++hp) if (hp->ref != NULL) { found = true; break; } if (session->nhazard == 0 && !found) return; __wt_errx(session, "session %p: close hazard pointer table: table not empty", (void *)session); #ifdef HAVE_DIAGNOSTIC __hazard_dump(session); #endif /* * Clear any hazard pointers because it's not a correctness problem * (any hazard pointer we find can't be real because the session is * being closed when we're called). We do this work because session * close isn't that common that it's an expensive check, and we don't * want to let a hazard pointer lie around, keeping a page from being * evicted. * * We don't panic: this shouldn't be a correctness issue (at least, I * can't think of a reason it would be). */ for (hp = session->hazard; hp < session->hazard + session->hazard_inuse; ++hp) if (hp->ref != NULL) { hp->ref = NULL; --session->nhazard; } if (session->nhazard != 0) __wt_errx(session, "session %p: close hazard pointer table: count didn't " "match entries", (void *)session); } /* * hazard_get_reference -- * Return a consistent reference to a hazard pointer array. */ static inline void hazard_get_reference( WT_SESSION_IMPL *session, WT_HAZARD **hazardp, uint32_t *hazard_inusep) { /* * Hazard pointer arrays can be swapped out from under us if they grow. * First, read the current in-use value. The read must precede the read * of the hazard pointer itself (so the in-use value is pessimistic * should the hazard array grow), and additionally ensure we only read * the in-use value once. Then, read the hazard pointer, also ensuring * we only read it once. * * Use a barrier instead of marking the fields volatile because we don't * want to slow down the rest of the hazard pointer functions that don't * need special treatment. */ WT_ORDERED_READ(*hazard_inusep, session->hazard_inuse); WT_ORDERED_READ(*hazardp, session->hazard); } /* * __wt_hazard_check -- * Return if there's a hazard pointer to the page in the system. */ WT_HAZARD * __wt_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref, WT_SESSION_IMPL **sessionp) { WT_CONNECTION_IMPL *conn; WT_HAZARD *hp; WT_SESSION_IMPL *s; uint32_t i, j, hazard_inuse, max, session_cnt, walk_cnt; conn = S2C(session); WT_STAT_CONN_INCR(session, cache_hazard_checks); /* * Hazard pointer arrays might grow and be freed underneath us; enter * the current hazard resource generation for the duration of the walk * to ensure that doesn't happen. */ __wt_session_gen_enter(session, WT_GEN_HAZARD); /* * No lock is required because the session array is fixed size, but it * may contain inactive entries. We must review any active session * that might contain a hazard pointer, so insert a read barrier after * reading the active session count. That way, no matter what sessions * come or go, we'll check the slots for all of the sessions that could * have been active when we started our check. */ WT_ORDERED_READ(session_cnt, conn->session_cnt); for (s = conn->sessions, i = j = max = walk_cnt = 0; i < session_cnt; ++s, ++i) { if (!s->active) continue; hazard_get_reference(s, &hp, &hazard_inuse); if (hazard_inuse > max) { max = hazard_inuse; WT_STAT_CONN_SET(session, cache_hazard_max, max); } for (j = 0; j < hazard_inuse; ++hp, ++j) { ++walk_cnt; if (hp->ref == ref) { WT_STAT_CONN_INCRV(session, cache_hazard_walks, walk_cnt); if (sessionp != NULL) *sessionp = s; goto done; } } } WT_STAT_CONN_INCRV(session, cache_hazard_walks, walk_cnt); hp = NULL; done: /* Leave the current resource generation. */ __wt_session_gen_leave(session, WT_GEN_HAZARD); return (hp); } /* * __wt_hazard_count -- * Count how many hazard pointers this session has on the given page. */ u_int __wt_hazard_count(WT_SESSION_IMPL *session, WT_REF *ref) { WT_HAZARD *hp; uint32_t i, hazard_inuse; u_int count; hazard_get_reference(session, &hp, &hazard_inuse); for (count = 0, i = 0; i < hazard_inuse; ++hp, ++i) if (hp->ref == ref) ++count; return (count); } #ifdef HAVE_DIAGNOSTIC /* * __wt_hazard_check_assert -- * Assert there's no hazard pointer to the page. */ bool __wt_hazard_check_assert(WT_SESSION_IMPL *session, void *ref, bool waitfor) { WT_HAZARD *hp; WT_SESSION_IMPL *s; int i; s = NULL; for (i = 0;;) { if ((hp = __wt_hazard_check(session, ref, &s)) == NULL) return (true); if (!waitfor || ++i > 100) break; __wt_sleep(0, 10000); } __wt_errx(session, "hazard pointer reference to discarded object: " "(%p: session %p name %s: %s, line %d)", (void *)hp->ref, (void *)s, s->name == NULL ? "UNKNOWN" : s->name, hp->func, hp->line); return (false); } /* * __hazard_dump -- * Display the list of hazard pointers. */ static void __hazard_dump(WT_SESSION_IMPL *session) { WT_HAZARD *hp; for (hp = session->hazard; hp < session->hazard + session->hazard_inuse; ++hp) if (hp->ref != NULL) __wt_errx(session, "session %p: hazard pointer %p: %s, line %d", (void *)session, (void *)hp->ref, hp->func, hp->line); } #endif