diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/bt_random.c')
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_random.c | 780 |
1 files changed, 377 insertions, 403 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c index 255c08e0b60..525728b73dc 100644 --- a/src/third_party/wiredtiger/src/btree/bt_random.c +++ b/src/third_party/wiredtiger/src/btree/bt_random.c @@ -10,433 +10,407 @@ /* * __wt_row_random_leaf -- - * Return a random key from a row-store leaf page. + * Return a random key from a row-store leaf page. */ int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) { - WT_INSERT *ins, **start, **stop; - WT_INSERT_HEAD *ins_head; - WT_PAGE *page; - uint64_t samples; - uint32_t choice, entries, i; - int level; - - page = cbt->ref->page; - start = stop = NULL; /* [-Wconditional-uninitialized] */ - entries = 0; /* [-Wconditional-uninitialized] */ - - __cursor_pos_clear(cbt); - - /* If the page has disk-based entries, select from them. */ - if (page->entries != 0) { - cbt->compare = 0; - cbt->slot = __wt_random(&session->rnd) % page->entries; - - /* - * The real row-store search function builds the key, so we - * have to as well. - */ - return (__wt_row_leaf_key(session, - page, page->pg_row + cbt->slot, cbt->tmp, false)); - } - - /* - * If the tree is new (and not empty), it might have a large insert - * list. - * - * Walk down the list until we find a level with at least 50 entries, - * that's where we'll start rolling random numbers. The value 50 is - * used to ignore levels with only a few entries, that is, levels which - * are potentially badly skewed. - */ - F_SET(cbt, WT_CBT_SEARCH_SMALLEST); - if ((ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL) - return (WT_NOTFOUND); - for (level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) { - start = &ins_head->head[level]; - for (entries = 0, stop = start; - *stop != NULL; stop = &(*stop)->next[level]) - ++entries; - - if (entries > 50) - break; - } - - /* - * If it's a tiny list and we went all the way to level 0, correct the - * level; entries is correctly set. - */ - if (level < 0) - level = 0; - - /* - * Step down the skip list levels, selecting a random chunk of the name - * space at each level. - */ - for (samples = entries; level > 0; samples += entries) { - /* - * There are (entries) or (entries + 1) chunks of the name space - * considered at each level. They are: between start and the 1st - * element, between the 1st and 2nd elements, and so on to the - * last chunk which is the name space after the stop element on - * the current level. This last chunk of name space may or may - * not be there: as we descend the levels of the skip list, this - * chunk may appear, depending if the next level down has - * entries logically after the stop point in the current level. - * We can't ignore those entries: because of the algorithm used - * to determine the depth of a skiplist, there may be a large - * number of entries "revealed" by descending a level. - * - * If the next level down has more items after the current stop - * point, there are (entries + 1) chunks to consider, else there - * are (entries) chunks. - */ - if (*(stop - 1) == NULL) - choice = __wt_random(&session->rnd) % entries; - else - choice = __wt_random(&session->rnd) % (entries + 1); - - if (choice == entries) { - /* - * We selected the name space after the stop element on - * this level. Set the start point to the current stop - * point, descend a level and move the stop element to - * the end of the list, that is, the end of the newly - * discovered name space, counting entries as we go. - */ - start = stop; - --start; - --level; - for (entries = 0, stop = start; - *stop != NULL; stop = &(*stop)->next[level]) - ++entries; - } else { - /* - * We selected another name space on the level. Move the - * start pointer the selected number of entries forward - * to the start of the selected chunk (if the selected - * number is 0, start won't move). Set the stop pointer - * to the next element in the list and drop both start - * and stop down a level. - */ - for (i = 0; i < choice; ++i) - start = &(*start)->next[level]; - stop = &(*start)->next[level]; - - --start; - --stop; - --level; - - /* Count the entries in the selected name space. */ - for (entries = 0, - ins = *start; ins != *stop; ins = ins->next[level]) - ++entries; - } - } - - /* - * When we reach the bottom level, entries will already be set. Select - * a random entry from the name space and return it. - * - * It should be impossible for the entries count to be 0 at this point, - * but check for it out of paranoia and to quiet static testing tools. - */ - if (entries > 0) - entries = __wt_random(&session->rnd) % entries; - for (ins = *start; entries > 0; --entries) - ins = ins->next[0]; - - cbt->ins = ins; - cbt->ins_head = ins_head; - cbt->compare = 0; - - /* - * Random lookups in newly created collections can be slow if a page - * consists of a large skiplist. Schedule the page for eviction if we - * encounter a large skiplist. This worthwhile because applications - * that take a sample often take many samples, so the overhead of - * traversing the skip list each time accumulates to real time. - */ - if (samples > 5000) - __wt_page_evict_soon(session, cbt->ref); - - return (0); + WT_INSERT *ins, **start, **stop; + WT_INSERT_HEAD *ins_head; + WT_PAGE *page; + uint64_t samples; + uint32_t choice, entries, i; + int level; + + page = cbt->ref->page; + start = stop = NULL; /* [-Wconditional-uninitialized] */ + entries = 0; /* [-Wconditional-uninitialized] */ + + __cursor_pos_clear(cbt); + + /* If the page has disk-based entries, select from them. */ + if (page->entries != 0) { + cbt->compare = 0; + cbt->slot = __wt_random(&session->rnd) % page->entries; + + /* + * The real row-store search function builds the key, so we have to as well. + */ + return (__wt_row_leaf_key(session, page, page->pg_row + cbt->slot, cbt->tmp, false)); + } + + /* + * If the tree is new (and not empty), it might have a large insert + * list. + * + * Walk down the list until we find a level with at least 50 entries, + * that's where we'll start rolling random numbers. The value 50 is + * used to ignore levels with only a few entries, that is, levels which + * are potentially badly skewed. + */ + F_SET(cbt, WT_CBT_SEARCH_SMALLEST); + if ((ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL) + return (WT_NOTFOUND); + for (level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) { + start = &ins_head->head[level]; + for (entries = 0, stop = start; *stop != NULL; stop = &(*stop)->next[level]) + ++entries; + + if (entries > 50) + break; + } + + /* + * If it's a tiny list and we went all the way to level 0, correct the level; entries is + * correctly set. + */ + if (level < 0) + level = 0; + + /* + * Step down the skip list levels, selecting a random chunk of the name space at each level. + */ + for (samples = entries; level > 0; samples += entries) { + /* + * There are (entries) or (entries + 1) chunks of the name space + * considered at each level. They are: between start and the 1st + * element, between the 1st and 2nd elements, and so on to the + * last chunk which is the name space after the stop element on + * the current level. This last chunk of name space may or may + * not be there: as we descend the levels of the skip list, this + * chunk may appear, depending if the next level down has + * entries logically after the stop point in the current level. + * We can't ignore those entries: because of the algorithm used + * to determine the depth of a skiplist, there may be a large + * number of entries "revealed" by descending a level. + * + * If the next level down has more items after the current stop + * point, there are (entries + 1) chunks to consider, else there + * are (entries) chunks. + */ + if (*(stop - 1) == NULL) + choice = __wt_random(&session->rnd) % entries; + else + choice = __wt_random(&session->rnd) % (entries + 1); + + if (choice == entries) { + /* + * We selected the name space after the stop element on this level. Set the start point + * to the current stop point, descend a level and move the stop element to the end of + * the list, that is, the end of the newly discovered name space, counting entries as we + * go. + */ + start = stop; + --start; + --level; + for (entries = 0, stop = start; *stop != NULL; stop = &(*stop)->next[level]) + ++entries; + } else { + /* + * We selected another name space on the level. Move the start pointer the selected + * number of entries forward to the start of the selected chunk (if the selected number + * is 0, start won't move). Set the stop pointer to the next element in the list and + * drop both start and stop down a level. + */ + for (i = 0; i < choice; ++i) + start = &(*start)->next[level]; + stop = &(*start)->next[level]; + + --start; + --stop; + --level; + + /* Count the entries in the selected name space. */ + for (entries = 0, ins = *start; ins != *stop; ins = ins->next[level]) + ++entries; + } + } + + /* + * When we reach the bottom level, entries will already be set. Select + * a random entry from the name space and return it. + * + * It should be impossible for the entries count to be 0 at this point, + * but check for it out of paranoia and to quiet static testing tools. + */ + if (entries > 0) + entries = __wt_random(&session->rnd) % entries; + for (ins = *start; entries > 0; --entries) + ins = ins->next[0]; + + cbt->ins = ins; + cbt->ins_head = ins_head; + cbt->compare = 0; + + /* + * Random lookups in newly created collections can be slow if a page consists of a large + * skiplist. Schedule the page for eviction if we encounter a large skiplist. This worthwhile + * because applications that take a sample often take many samples, so the overhead of + * traversing the skip list each time accumulates to real time. + */ + if (samples > 5000) + __wt_page_evict_soon(session, cbt->ref); + + return (0); } /* * __wt_random_descent -- - * Find a random page in a tree for either sampling or eviction. + * Find a random page in a tree for either sampling or eviction. */ int __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) { - WT_BTREE *btree; - WT_DECL_RET; - WT_PAGE *page; - WT_PAGE_INDEX *pindex; - WT_REF *current, *descent; - uint32_t i, entries, retry; - bool eviction; - - *refp = NULL; - - btree = S2BT(session); - current = NULL; - retry = 100; - /* - * This function is called by eviction to find a random page in the - * cache. That case is indicated by the WT_READ_CACHE flag. Ordinary - * lookups in a tree will read pages into cache as needed. - */ - eviction = LF_ISSET(WT_READ_CACHE); - - if (0) { + WT_BTREE *btree; + WT_DECL_RET; + WT_PAGE *page; + WT_PAGE_INDEX *pindex; + WT_REF *current, *descent; + uint32_t i, entries, retry; + bool eviction; + + *refp = NULL; + + btree = S2BT(session); + current = NULL; + retry = 100; + /* + * This function is called by eviction to find a random page in the cache. That case is + * indicated by the WT_READ_CACHE flag. Ordinary lookups in a tree will read pages into cache as + * needed. + */ + eviction = LF_ISSET(WT_READ_CACHE); + + if (0) { restart: - /* - * Discard the currently held page and restart the search from - * the root. - */ - WT_RET(__wt_page_release(session, current, flags)); - } - - /* Search the internal pages of the tree. */ - current = &btree->root; - for (;;) { - page = current->page; - if (!WT_PAGE_IS_INTERNAL(page)) - break; - - WT_INTL_INDEX_GET(session, page, pindex); - entries = pindex->entries; - - /* Eviction just wants any random child. */ - if (eviction) { - descent = pindex->index[ - __wt_random(&session->rnd) % entries]; - goto descend; - } - - /* - * There may be empty pages in the tree, and they're useless to - * us. If we don't find a non-empty page in "entries" random - * guesses, take the first non-empty page in the tree. If the - * search page contains nothing other than empty pages, restart - * from the root some number of times before giving up. - * - * Random sampling is looking for a key/value pair on a random - * leaf page, and so will accept any page that contains a valid - * key/value pair, so on-disk is fine, but deleted is not. - */ - descent = NULL; - for (i = 0; i < entries; ++i) { - descent = - pindex->index[__wt_random(&session->rnd) % entries]; - if (descent->state == WT_REF_DISK || - descent->state == WT_REF_LIMBO || - descent->state == WT_REF_LOOKASIDE || - descent->state == WT_REF_MEM) - break; - } - if (i == entries) - for (i = 0; i < entries; ++i) { - descent = pindex->index[i]; - if (descent->state == WT_REF_DISK || - descent->state == WT_REF_LIMBO || - descent->state == WT_REF_LOOKASIDE || - descent->state == WT_REF_MEM) - break; - } - if (i == entries || descent == NULL) { - if (--retry > 0) - goto restart; - - WT_RET(__wt_page_release(session, current, flags)); - return (WT_NOTFOUND); - } - - /* - * Swap the current page for the child page. If the page splits - * while we're retrieving it, restart the search at the root. - * - * On other error, simply return, the swap call ensures we're - * holding nothing on failure. - */ -descend: if ((ret = __wt_page_swap( - session, current, descent, flags)) == 0) { - current = descent; - continue; - } - if (eviction && (ret == WT_NOTFOUND || ret == WT_RESTART)) - break; - if (ret == WT_RESTART) - goto restart; - return (ret); - } - - /* - * There is no point starting with the root page: the walk will exit - * immediately. In that case we aren't holding a hazard pointer so - * there is nothing to release. - */ - if (!eviction || !__wt_ref_is_root(current)) - *refp = current; - return (0); + /* + * Discard the currently held page and restart the search from the root. + */ + WT_RET(__wt_page_release(session, current, flags)); + } + + /* Search the internal pages of the tree. */ + current = &btree->root; + for (;;) { + page = current->page; + if (!WT_PAGE_IS_INTERNAL(page)) + break; + + WT_INTL_INDEX_GET(session, page, pindex); + entries = pindex->entries; + + /* Eviction just wants any random child. */ + if (eviction) { + descent = pindex->index[__wt_random(&session->rnd) % entries]; + goto descend; + } + + /* + * There may be empty pages in the tree, and they're useless to + * us. If we don't find a non-empty page in "entries" random + * guesses, take the first non-empty page in the tree. If the + * search page contains nothing other than empty pages, restart + * from the root some number of times before giving up. + * + * Random sampling is looking for a key/value pair on a random + * leaf page, and so will accept any page that contains a valid + * key/value pair, so on-disk is fine, but deleted is not. + */ + descent = NULL; + for (i = 0; i < entries; ++i) { + descent = pindex->index[__wt_random(&session->rnd) % entries]; + if (descent->state == WT_REF_DISK || descent->state == WT_REF_LIMBO || + descent->state == WT_REF_LOOKASIDE || descent->state == WT_REF_MEM) + break; + } + if (i == entries) + for (i = 0; i < entries; ++i) { + descent = pindex->index[i]; + if (descent->state == WT_REF_DISK || descent->state == WT_REF_LIMBO || + descent->state == WT_REF_LOOKASIDE || descent->state == WT_REF_MEM) + break; + } + if (i == entries || descent == NULL) { + if (--retry > 0) + goto restart; + + WT_RET(__wt_page_release(session, current, flags)); + return (WT_NOTFOUND); + } + + /* + * Swap the current page for the child page. If the page splits + * while we're retrieving it, restart the search at the root. + * + * On other error, simply return, the swap call ensures we're + * holding nothing on failure. + */ +descend: + if ((ret = __wt_page_swap(session, current, descent, flags)) == 0) { + current = descent; + continue; + } + if (eviction && (ret == WT_NOTFOUND || ret == WT_RESTART)) + break; + if (ret == WT_RESTART) + goto restart; + return (ret); + } + + /* + * There is no point starting with the root page: the walk will exit immediately. In that case + * we aren't holding a hazard pointer so there is nothing to release. + */ + if (!eviction || !__wt_ref_is_root(current)) + *refp = current; + return (0); } /* * __wt_btcur_next_random -- - * Move to a random record in the tree. There are two algorithms, one - * where we select a record at random from the whole tree on each - * retrieval and one where we first select a record at random from the - * whole tree, and then subsequently sample forward from that location. - * The sampling approach allows us to select reasonably uniform random - * points from unbalanced trees. + * Move to a random record in the tree. There are two algorithms, one where we select a record + * at random from the whole tree on each retrieval and one where we first select a record at + * random from the whole tree, and then subsequently sample forward from that location. The + * sampling approach allows us to select reasonably uniform random points from unbalanced trees. */ int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) { - WT_BTREE *btree; - WT_CURSOR *cursor; - WT_DECL_RET; - WT_SESSION_IMPL *session; - WT_UPDATE *upd; - wt_off_t size; - uint64_t n, skip; - uint32_t read_flags; - bool valid; - - btree = cbt->btree; - cursor = &cbt->iface; - session = (WT_SESSION_IMPL *)cbt->iface.session; - read_flags = WT_READ_RESTART_OK; - if (F_ISSET(cbt, WT_CBT_READ_ONCE)) - FLD_SET(read_flags, WT_READ_WONT_NEED); - - /* - * Only supports row-store: applications can trivially select a random - * value from a column-store, if there were any reason to do so. - */ - if (btree->type != BTREE_ROW) - WT_RET_MSG(session, ENOTSUP, - "WT_CURSOR.next_random only supported by row-store tables"); - - WT_STAT_CONN_INCR(session, cursor_next); - WT_STAT_DATA_INCR(session, cursor_next); - - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + WT_BTREE *btree; + WT_CURSOR *cursor; + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UPDATE *upd; + wt_off_t size; + uint64_t n, skip; + uint32_t read_flags; + bool valid; + + btree = cbt->btree; + cursor = &cbt->iface; + session = (WT_SESSION_IMPL *)cbt->iface.session; + read_flags = WT_READ_RESTART_OK; + if (F_ISSET(cbt, WT_CBT_READ_ONCE)) + FLD_SET(read_flags, WT_READ_WONT_NEED); + + /* + * Only supports row-store: applications can trivially select a random value from a + * column-store, if there were any reason to do so. + */ + if (btree->type != BTREE_ROW) + WT_RET_MSG(session, ENOTSUP, "WT_CURSOR.next_random only supported by row-store tables"); + + WT_STAT_CONN_INCR(session, cursor_next); + WT_STAT_DATA_INCR(session, cursor_next); + + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); #ifdef HAVE_DIAGNOSTIC - /* - * Under some conditions we end up using the underlying cursor.next to - * walk through the object. Since there are multiple calls, we can hit - * the cursor-order checks, turn them off. - */ - __wt_cursor_key_order_reset(cbt); + /* + * Under some conditions we end up using the underlying cursor.next to walk through the object. + * Since there are multiple calls, we can hit the cursor-order checks, turn them off. + */ + __wt_cursor_key_order_reset(cbt); #endif - /* - * If we don't have a current position in the tree, or if retrieving - * random values without sampling, pick a roughly random leaf page in - * the tree and return an entry from it. - */ - if (cbt->ref == NULL || cbt->next_random_sample_size == 0) { - WT_ERR(__cursor_func_init(cbt, true)); - WT_WITH_PAGE_INDEX(session, - ret = __wt_random_descent(session, &cbt->ref, read_flags)); - if (ret == 0) - goto random_page_entry; - - /* - * Random descent may return not-found: the tree might be empty - * or have so many deleted items we didn't find any valid pages. - * We can't return WT_NOTFOUND to the application unless a tree - * is really empty, fallback to skipping through tree pages. - */ - WT_ERR_NOTFOUND_OK(ret); - } - - /* - * Cursor through the tree, skipping past the sample size of the leaf - * pages in the tree between each random key return to compensate for - * unbalanced trees. - * - * If the random descent attempt failed, we don't have a configured - * sample size, use 100 for no particular reason. - */ - if (cbt->next_random_sample_size == 0) - cbt->next_random_sample_size = 100; - - /* - * If the random descent attempt failed, or it's our first skip attempt, - * we haven't yet set the pages to skip, do it now. - * - * Use the underlying file size divided by its block allocation size as - * our guess of leaf pages in the file (this can be entirely wrong, as - * it depends on how many pages are in this particular checkpoint, how - * large the leaf and internal pages really are, and other factors). - * Then, divide that value by the configured sample size and increment - * the final result to make sure tiny files don't leave us with a skip - * value of 0. - * - * !!! - * Ideally, the number would be prime to avoid restart issues. - */ - if (cbt->next_random_leaf_skip == 0) { - WT_ERR(btree->bm->size(btree->bm, session, &size)); - cbt->next_random_leaf_skip = (uint64_t) - ((size / btree->allocsize) / - cbt->next_random_sample_size) + 1; - } - - /* - * Be paranoid about loop termination: first, if the last leaf page - * skipped was also the last leaf page in the tree, skip may be set to - * zero on return along with the NULL WT_REF end-of-walk condition. - * Second, if a tree has no valid pages at all (the condition after - * initial creation), we might make no progress at all, or finally, if - * a tree has only deleted pages, we'll make progress, but never get a - * useful WT_REF. And, of course, the tree can switch from one of these - * states to another without warning. Decrement skip regardless of what - * is happening in the search, guarantee we eventually quit. - * - * Pages read for data sampling aren't "useful"; don't update the read - * generation of pages already in memory, and if a page is read, set - * its generation to a low value so it is evicted quickly. - */ - for (skip = cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;) { - n = skip; - WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip)); - if (n == skip) { - if (skip == 0) - break; - --skip; - } - } - - /* - * We can't return WT_NOTFOUND to the application unless a tree is - * really empty, fallback to a random entry from the first page in the - * tree that has anything at all. - */ - if (cbt->ref == NULL) - WT_ERR(__wt_btcur_next(cbt, false)); + /* + * If we don't have a current position in the tree, or if retrieving random values without + * sampling, pick a roughly random leaf page in the tree and return an entry from it. + */ + if (cbt->ref == NULL || cbt->next_random_sample_size == 0) { + WT_ERR(__cursor_func_init(cbt, true)); + WT_WITH_PAGE_INDEX(session, ret = __wt_random_descent(session, &cbt->ref, read_flags)); + if (ret == 0) + goto random_page_entry; + + /* + * Random descent may return not-found: the tree might be empty or have so many deleted + * items we didn't find any valid pages. We can't return WT_NOTFOUND to the application + * unless a tree is really empty, fallback to skipping through tree pages. + */ + WT_ERR_NOTFOUND_OK(ret); + } + + /* + * Cursor through the tree, skipping past the sample size of the leaf + * pages in the tree between each random key return to compensate for + * unbalanced trees. + * + * If the random descent attempt failed, we don't have a configured + * sample size, use 100 for no particular reason. + */ + if (cbt->next_random_sample_size == 0) + cbt->next_random_sample_size = 100; + + /* + * If the random descent attempt failed, or it's our first skip attempt, + * we haven't yet set the pages to skip, do it now. + * + * Use the underlying file size divided by its block allocation size as + * our guess of leaf pages in the file (this can be entirely wrong, as + * it depends on how many pages are in this particular checkpoint, how + * large the leaf and internal pages really are, and other factors). + * Then, divide that value by the configured sample size and increment + * the final result to make sure tiny files don't leave us with a skip + * value of 0. + * + * !!! + * Ideally, the number would be prime to avoid restart issues. + */ + if (cbt->next_random_leaf_skip == 0) { + WT_ERR(btree->bm->size(btree->bm, session, &size)); + cbt->next_random_leaf_skip = + (uint64_t)((size / btree->allocsize) / cbt->next_random_sample_size) + 1; + } + + /* + * Be paranoid about loop termination: first, if the last leaf page + * skipped was also the last leaf page in the tree, skip may be set to + * zero on return along with the NULL WT_REF end-of-walk condition. + * Second, if a tree has no valid pages at all (the condition after + * initial creation), we might make no progress at all, or finally, if + * a tree has only deleted pages, we'll make progress, but never get a + * useful WT_REF. And, of course, the tree can switch from one of these + * states to another without warning. Decrement skip regardless of what + * is happening in the search, guarantee we eventually quit. + * + * Pages read for data sampling aren't "useful"; don't update the read + * generation of pages already in memory, and if a page is read, set + * its generation to a low value so it is evicted quickly. + */ + for (skip = cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;) { + n = skip; + WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip)); + if (n == skip) { + if (skip == 0) + break; + --skip; + } + } + + /* + * We can't return WT_NOTFOUND to the application unless a tree is really empty, fallback to a + * random entry from the first page in the tree that has anything at all. + */ + if (cbt->ref == NULL) + WT_ERR(__wt_btcur_next(cbt, false)); random_page_entry: - /* - * Select a random entry from the leaf page. If it's not valid, move to - * the next entry, if that doesn't work, move to the previous entry. - */ - WT_ERR(__wt_row_random_leaf(session, cbt)); - WT_ERR(__wt_cursor_valid(cbt, &upd, &valid)); - if (valid) - WT_ERR(__cursor_kv_return(session, cbt, upd)); - else { - if ((ret = __wt_btcur_next(cbt, false)) == WT_NOTFOUND) - ret = __wt_btcur_prev(cbt, false); - WT_ERR(ret); - } - return (0); - -err: WT_TRET(__cursor_reset(cbt)); - return (ret); + /* + * Select a random entry from the leaf page. If it's not valid, move to the next entry, if that + * doesn't work, move to the previous entry. + */ + WT_ERR(__wt_row_random_leaf(session, cbt)); + WT_ERR(__wt_cursor_valid(cbt, &upd, &valid)); + if (valid) + WT_ERR(__cursor_kv_return(session, cbt, upd)); + else { + if ((ret = __wt_btcur_next(cbt, false)) == WT_NOTFOUND) + ret = __wt_btcur_prev(cbt, false); + WT_ERR(ret); + } + return (0); + +err: + WT_TRET(__cursor_reset(cbt)); + return (ret); } |