diff options
author | Keith Bostic <keith.bostic@mongodb.com> | 2016-12-07 22:05:26 -0500 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2016-12-08 14:05:26 +1100 |
commit | a5c91bce5ff5147e5e11e1154baa536de1b23506 (patch) | |
tree | 3881634fb47aa014049d25d89d7b50fcdb070f6f | |
parent | 124eed30a0791955a0a5e6edf0c3b57a8c827c55 (diff) | |
download | mongo-a5c91bce5ff5147e5e11e1154baa536de1b23506.tar.gz |
WT-3057 WiredTiger hazard pointers should use the WT_REF, not the WT_PAGE. (#3181)
-rw-r--r-- | src/btree/bt_discard.c | 50 | ||||
-rw-r--r-- | src/btree/bt_read.c | 2 | ||||
-rw-r--r-- | src/btree/bt_split.c | 4 | ||||
-rw-r--r-- | src/evict/evict_page.c | 4 | ||||
-rw-r--r-- | src/include/btmem.h | 3 | ||||
-rw-r--r-- | src/include/btree.i | 6 | ||||
-rw-r--r-- | src/include/extern.h | 4 | ||||
-rw-r--r-- | src/include/session.h | 2 | ||||
-rw-r--r-- | src/support/hazard.c | 57 |
9 files changed, 63 insertions, 69 deletions
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index 162bc07a1c2..7858d2cb14e 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -27,9 +27,35 @@ __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) /* * A version of the page-out function that allows us to make additional * diagnostic checks. + * + * The WT_REF cannot be the eviction thread's location. */ WT_ASSERT(session, S2BT(session)->evict_ref != ref); +#ifdef HAVE_DIAGNOSTIC + { + WT_HAZARD *hp; + int i; + /* + * Make sure no other thread has a hazard pointer on the page we are + * about to discard. This is complicated by the fact that readers + * publish their hazard pointer before re-checking the page state, so + * our check can race with readers without indicating a real problem. + * Wait for up to a second for hazard pointers to be cleared. + */ + for (hp = NULL, i = 0; i < 100; i++) { + if ((hp = __wt_page_hazard_check(session, ref)) == NULL) + break; + __wt_sleep(0, 10000); + } + if (hp != NULL) + __wt_errx(session, + "discarded page has hazard pointer: (%p: %s, line %d)", + (void *)hp->ref, hp->file, hp->line); + WT_ASSERT(session, hp == NULL); + } +#endif + __wt_page_out(session, &ref->page); } @@ -63,30 +89,6 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU)); WT_ASSERT(session, !__wt_rwlock_islocked(session, &page->page_lock)); -#ifdef HAVE_DIAGNOSTIC - { - WT_HAZARD *hp; - int i; - /* - * Make sure no other thread has a hazard pointer on the page we are - * about to discard. This is complicated by the fact that readers - * publish their hazard pointer before re-checking the page state, so - * our check can race with readers without indicating a real problem. - * Wait for up to a second for hazard pointers to be cleared. - */ - for (hp = NULL, i = 0; i < 100; i++) { - if ((hp = __wt_page_hazard_check(session, page)) == NULL) - break; - __wt_sleep(0, 10000); - } - if (hp != NULL) - __wt_errx(session, - "discarded page has hazard pointer: (%p: %s, line %d)", - (void *)hp->page, hp->file, hp->line); - WT_ASSERT(session, hp == NULL); - } -#endif - /* * If a root page split, there may be one or more pages linked from the * page; walk the list, discarding pages. diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index 90188498535..39f9e1159cb 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -324,7 +324,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref) * If this session has more than one hazard pointer, eviction will fail * and there is no point trying. */ - if (__wt_hazard_count(session, page) > 1) + if (__wt_hazard_count(session, ref) > 1) return (false); /* If we can do an in-memory split, do it. */ diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 017c820ea29..fe49f937719 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -513,7 +513,7 @@ __split_ref_step2( __split_verify_intl_key_order(session, child)); #endif - WT_ERR(__wt_hazard_clear(session, child)); + WT_ERR(__wt_hazard_clear(session, ref)); } return (0); @@ -1331,7 +1331,7 @@ __split_internal_unlock(WT_SESSION_IMPL *session, WT_PAGE *parent, bool hazard) WT_DECL_RET; if (hazard) - ret = __wt_hazard_clear(session, parent); + ret = __wt_hazard_clear(session, parent->pg_intl_parent_ref); __wt_writeunlock(session, &parent->page_lock); return (ret); diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index 893133432bb..56a0fcfc790 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -37,7 +37,7 @@ __evict_exclusive(WT_SESSION_IMPL *session, WT_REF *ref) * Check for a hazard pointer indicating another thread is using the * page, meaning the page cannot be evicted. */ - if (__wt_page_hazard_check(session, ref->page) == NULL) + if (__wt_page_hazard_check(session, ref) == NULL) return (0); WT_STAT_DATA_INCR(session, cache_eviction_hazard); @@ -66,7 +66,7 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) * between. */ locked = __wt_atomic_casv32(&ref->state, WT_REF_MEM, WT_REF_LOCKED); - if ((ret = __wt_hazard_clear(session, page)) != 0 || !locked) { + if ((ret = __wt_hazard_clear(session, ref)) != 0 || !locked) { if (locked) ref->state = WT_REF_MEM; return (ret == 0 ? EBUSY : ret); diff --git a/src/include/btmem.h b/src/include/btmem.h index be4e53a1af0..9bd835f5d09 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -444,9 +444,6 @@ struct __wt_page { /* * Internal pages (both column- and row-store). * - * The page record number is only used by column-store, but it's - * simpler having only one kind of internal page. - * * In-memory internal pages have an array of pointers to child * structures, maintained in collated order. When a page is * read into memory, the initial list of children is stored in diff --git a/src/include/btree.i b/src/include/btree.i index 8f44bc4eddd..e591209f39a 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1355,7 +1355,7 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) F_ISSET(session, WT_SESSION_NO_EVICTION) || F_ISSET(btree, WT_BTREE_NO_EVICTION) || !__wt_page_can_evict(session, ref, NULL)) - return (__wt_hazard_clear(session, page)); + return (__wt_hazard_clear(session, ref)); WT_RET_BUSY_OK(__wt_page_release_evict(session, ref)); return (0); @@ -1441,7 +1441,7 @@ __wt_page_swap_func( * Return if there's a hazard pointer to the page in the system. */ static inline WT_HAZARD * -__wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page) +__wt_page_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref) { WT_CONNECTION_IMPL *conn; WT_HAZARD *hp; @@ -1472,7 +1472,7 @@ __wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page) } for (hp = s->hazard; hp < s->hazard + hazard_size; ++hp) { ++j; - if (hp->page == page) { + if (hp->ref == ref) { WT_STAT_CONN_INCRV(session, cache_hazard_walks, j); return (hp); diff --git a/src/include/extern.h b/src/include/extern.h index 42b34f75a17..7dce310dabf 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -652,9 +652,9 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp , const char *file, int line #endif ) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_hazard_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern u_int __wt_hazard_count(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern u_int __wt_hazard_count(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_fill_hex(const uint8_t *src, size_t src_max, uint8_t *dest, size_t dest_max, size_t *lenp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_raw_to_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_raw_to_esc_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/session.h b/src/include/session.h index c4345789520..76cb463602c 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -23,7 +23,7 @@ struct __wt_data_handle_cache { * A hazard pointer. */ struct __wt_hazard { - WT_PAGE *page; /* Page address */ + WT_REF *ref; /* Page reference */ #ifdef HAVE_DIAGNOSTIC const char *file; /* File/line where hazard acquired */ int line; diff --git a/src/support/hazard.c b/src/support/hazard.c index 27508bbdece..b0fa7e129bb 100644 --- a/src/support/hazard.c +++ b/src/support/hazard.c @@ -26,7 +26,6 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp WT_BTREE *btree; WT_CONNECTION_IMPL *conn; WT_HAZARD *hp; - WT_PAGE *page; int restarts = 0; btree = S2BT(session); @@ -38,11 +37,11 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp return (0); /* - * If there isn't a valid page pointer, we're done. This read can race - * with eviction and splits, we re-check it after a barrier to make - * sure we have a valid pointer. + * If there isn't a valid page, we're done. This read can race with + * eviction and splits, we re-check it after a barrier to make sure + * we have a valid reference. */ - if (ref->state != WT_REF_MEM || (page = ref->page) == NULL) { + if (ref->state != WT_REF_MEM) { *busyp = true; return (0); } @@ -90,10 +89,10 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp conn->hazard_max)); } - if (hp->page != NULL) + if (hp->ref != NULL) continue; - hp->page = page; + hp->ref = ref; #ifdef HAVE_DIAGNOSTIC hp->file = file; hp->line = line; @@ -103,21 +102,17 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp /* * Check if the page state is still valid, where valid means a - * state of WT_REF_MEM and the pointer is unchanged. (The - * pointer can change, it means the page was evicted between - * the time we set our hazard pointer and the publication. It - * would theoretically be possible for the page to be evicted - * and a different page read into the same memory, so the - * pointer hasn't changed but the contents have. That's OK, we - * found this page using the tree's key space, whatever page we - * find here is the page for us to use.) + * state of WT_REF_MEM. */ if (ref->state == WT_REF_MEM) { + ++session->nhazard; + + /* + * Callers require a barrier here so operations holding + * the hazard pointer see consistent data. + */ WT_READ_BARRIER(); - if (ref->page == page) { - ++session->nhazard; - return (0); - } + return (0); } /* @@ -130,7 +125,7 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp * We don't bother publishing this update: the worst case is we * prevent some random page from being evicted. */ - hp->page = NULL; + hp->ref = NULL; *busyp = true; return (0); } @@ -147,7 +142,7 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp * Clear a hazard pointer. */ int -__wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page) +__wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref) { WT_BTREE *btree; WT_HAZARD *hp; @@ -165,7 +160,7 @@ __wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page) for (hp = session->hazard + session->hazard_size - 1; hp >= session->hazard; --hp) - if (hp->page == page) { + if (hp->ref == ref) { /* * We don't publish the hazard pointer clear in the * general case. It's not required for correctness; @@ -174,7 +169,7 @@ __wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page) * generation number was just set, it's unlikely the * page will be selected for eviction. */ - hp->page = NULL; + hp->ref = NULL; /* * If this was the last hazard pointer in the session, @@ -191,7 +186,7 @@ __wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page) */ WT_PANIC_RET(session, EINVAL, "session %p: clear hazard pointer: %p: not found", - (void *)session, (void *)page); + (void *)session, (void *)ref); } /* @@ -211,7 +206,7 @@ __wt_hazard_close(WT_SESSION_IMPL *session) */ for (found = false, hp = session->hazard; hp < session->hazard + session->hazard_size; ++hp) - if (hp->page != NULL) { + if (hp->ref != NULL) { found = true; break; } @@ -239,8 +234,8 @@ __wt_hazard_close(WT_SESSION_IMPL *session) */ for (hp = session->hazard; hp < session->hazard + session->hazard_size; ++hp) - if (hp->page != NULL) { - hp->page = NULL; + if (hp->ref != NULL) { + hp->ref = NULL; --session->nhazard; } @@ -256,7 +251,7 @@ __wt_hazard_close(WT_SESSION_IMPL *session) * Count how many hazard pointers this session has on the given page. */ u_int -__wt_hazard_count(WT_SESSION_IMPL *session, WT_PAGE *page) +__wt_hazard_count(WT_SESSION_IMPL *session, WT_REF *ref) { WT_HAZARD *hp; u_int count; @@ -264,7 +259,7 @@ __wt_hazard_count(WT_SESSION_IMPL *session, WT_PAGE *page) for (count = 0, hp = session->hazard + session->hazard_size - 1; hp >= session->hazard; --hp) - if (hp->page == page) + if (hp->ref == ref) ++count; return (count); @@ -282,10 +277,10 @@ __hazard_dump(WT_SESSION_IMPL *session) for (hp = session->hazard; hp < session->hazard + session->hazard_size; ++hp) - if (hp->page != NULL) + if (hp->ref != NULL) __wt_errx(session, "session %p: hazard pointer %p: %s, line %d", (void *)session, - (void *)hp->page, hp->file, hp->line); + (void *)hp->ref, hp->file, hp->line); } #endif |