diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2017-02-15 18:08:10 +1100 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2017-02-15 18:08:10 +1100 |
commit | 70b5ab64d84cb8a22553def853ddb1a11393ff73 (patch) | |
tree | 6467bdaa7b2af39bd63d2f12af0757e82630fb7f | |
parent | 7a725a97d281095280515b0609f0e61747fd1b58 (diff) | |
download | mongo-70b5ab64d84cb8a22553def853ddb1a11393ff73.tar.gz |
WT-3149 Make random lookups for eviction more lightweight. (#3302)
Eviction walks don't need to start on leaf pages: just try to descend
through the tree and as soon as we can't swap to a child page, start
the walk from the parent.
-rw-r--r-- | src/btree/bt_random.c | 38 |
1 files changed, 26 insertions, 12 deletions
diff --git a/src/btree/bt_random.c b/src/btree/bt_random.c index 3cc6838c4c8..44de511f787 100644 --- a/src/btree/bt_random.c +++ b/src/btree/bt_random.c @@ -166,7 +166,7 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) /* * __wt_random_descent -- - * Find a random leaf page in a tree. + * Find a random page in a tree for either sampling or eviction. */ int __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, bool eviction) @@ -183,9 +183,11 @@ __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, bool eviction) retry = 100; /* Eviction should not be tapped to do eviction. */ - flags = WT_READ_RESTART_OK; if (eviction) - LF_SET(WT_READ_NO_EVICT); + flags = WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | + WT_READ_NO_WAIT | WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK; + else + flags = WT_READ_RESTART_OK; if (0) { restart: /* @@ -205,6 +207,13 @@ restart: /* WT_INTL_INDEX_GET(session, page, pindex); entries = pindex->entries; + /* Eviction just wants any random child. */ + if (eviction) { + descent = pindex->index[ + __wt_random(&session->rnd) % entries]; + goto descend; + } + /* * There may be empty pages in the tree, and they're useless to * us. If we don't find a non-empty page in "entries" random @@ -212,10 +221,8 @@ restart: /* * search page contains nothing other than empty pages, restart * from the root some number of times before giving up. * - * Eviction is only looking for a place in the cache and so only - * wants in-memory pages (but a deleted page is fine); currently - * our other caller is looking for a key/value pair on a random - * leave page, and so will accept any page that contains a valid + * Random sampling is looking for a key/value pair on a random + * leaf page, and so will accept any page that contains a valid * key/value pair, so on-disk is fine, but deleted is not. */ descent = NULL; @@ -223,15 +230,14 @@ restart: /* descent = pindex->index[__wt_random(&session->rnd) % entries]; if (descent->state == WT_REF_MEM || - (!eviction && descent->state == WT_REF_DISK)) + descent->state == WT_REF_DISK) break; } if (i == entries) for (i = 0; i < entries; ++i) { descent = pindex->index[i]; if (descent->state == WT_REF_MEM || - (!eviction && - descent->state == WT_REF_DISK)) + descent->state == WT_REF_DISK) break; } if (i == entries || descent == NULL) { @@ -249,17 +255,25 @@ restart: /* * On other error, simply return, the swap call ensures we're * holding nothing on failure. */ - if ((ret = +descend: if ((ret = __wt_page_swap(session, current, descent, flags)) == 0) { current = descent; continue; } + if (eviction && (ret == WT_NOTFOUND || ret == WT_RESTART)) + break; if (ret == WT_RESTART) goto restart; return (ret); } - *refp = current; + /* + * There is no point starting with the root page: the walk will exit + * immediately. In that case we aren't holding a hazard pointer so + * there is nothing to release. + */ + if (!eviction || !__wt_ref_is_root(current)) + *refp = current; return (0); } |