diff options
author | Dan Pasette <dan@mongodb.com> | 2015-12-16 13:15:31 -0500 |
---|---|---|
committer | Dan Pasette <dan@mongodb.com> | 2015-12-16 13:15:46 -0500 |
commit | 33831818603c3c00dee06dd8bfa2bc9bea06a8af (patch) | |
tree | 3ea23b289efc57d2de1d11208fd179a1494ebdca | |
parent | a014a946bdea5013883bff963ae6fae9fe39e2a7 (diff) | |
download | mongo-33831818603c3c00dee06dd8bfa2bc9bea06a8af.tar.gz |
Import wiredtiger-wiredtiger-mongodb-3.2-rc4-75-gdecd916.tar.gz from wiredtiger branch mongodb-3.2
ref: 197eef0..decd916
48e1343 WT-2262 Have random sampling walk the tree so it isn't biased in skewed trees.
eb838c7 WT-2260 Avoid adding internal pages to the eviction queue.
a695751 WT-2258 WiredTiger preloads pages even when direct-IO is configured.
26 files changed, 378 insertions, 164 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index f58a48b4a0b..ff6d3f3ccb5 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -814,21 +814,19 @@ methods = { 'WT_SESSION.open_cursor' : Method(cursor_runtime_config + [ Config('bulk', 'false', r''' - configure the cursor for bulk-loading, a fast, initial load - path (see @ref tune_bulk_load for more information). Bulk-load - may only be used for newly created objects and cursors - configured for bulk-load only support the WT_CURSOR::insert - and WT_CURSOR::close methods. When bulk-loading row-store - objects, keys must be loaded in sorted order. The value is - usually a true/false flag; when bulk-loading fixed-length - column store objects, the special value \c bitmap allows - chunks of a memory resident bitmap to be loaded directly into - a file by passing a \c WT_ITEM to WT_CURSOR::set_value where - the \c size field indicates the number of records in the - bitmap (as specified by the object's \c value_format - configuration). Bulk-loaded bitmap values must end on a byte - boundary relative to the bit count (except for the last set - of values loaded)'''), + configure the cursor for bulk-loading, a fast, initial load path + (see @ref tune_bulk_load for more information). Bulk-load may + only be used for newly created objects and applications should + use the WT_CURSOR::insert method to insert rows. When + bulk-loading, rows must be loaded in sorted order. The value + is usually a true/false flag; when bulk-loading fixed-length + column store objects, the special value \c bitmap allows chunks + of a memory resident bitmap to be loaded directly into a file + by passing a \c WT_ITEM to WT_CURSOR::set_value where the \c + size field indicates the number of records in the bitmap (as + specified by the object's \c value_format configuration). + Bulk-loaded bitmap values must end on a byte boundary relative + to the bit count (except for the last set of values loaded)'''), Config('checkpoint', '', r''' the name of a checkpoint to open (the reserved name "WiredTigerCheckpoint" opens the most recent internal @@ -843,12 +841,20 @@ methods = { with the @ref util_dump and @ref util_load commands''', choices=['hex', 'json', 'print']), Config('next_random', 'false', r''' - configure the cursor to return a pseudo-random record from - the object; valid only for row-store cursors. Cursors - configured with \c next_random=true only support the - WT_CURSOR::next and WT_CURSOR::close methods. See @ref - cursor_random for details''', + configure the cursor to return a pseudo-random record from the + object when the WT_CURSOR::next method is called; valid only for + row-store cursors. See @ref cursor_random for details''', type='boolean'), + Config('next_random_sample_size', '0', r''' + cursors configured by \c next_random to return pseudo-random + records from the object randomly select from the entire object, + by default. Setting \c next_random_sample_size to a non-zero + value sets the number of samples the application expects to take + using the \c next_random cursor. A cursor configured with both + \c next_random and \c next_random_sample_size attempts to divide + the object into \c next_random_sample_size equal-sized pieces, + and each retrieval returns a record from one of those pieces. See + @ref cursor_random for details'''), Config('raw', 'false', r''' ignore the encodings for the key and value, manage data as if the formats were \c "u". See @ref cursor_raw for details''', diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py index 1965dfb7dbe..7d237dd39a4 100644 --- a/src/third_party/wiredtiger/dist/flags.py +++ b/src/third_party/wiredtiger/dist/flags.py @@ -37,6 +37,7 @@ flags = { 'READ_NO_WAIT', 'READ_PREV', 'READ_SKIP_INTL', + 'READ_SKIP_LEAF', 'READ_TRUNCATE', 'READ_WONT_NEED', ], diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c index 4b32e02a73a..ca7797f17af 100644 --- a/src/third_party/wiredtiger/src/block/block_read.c +++ b/src/third_party/wiredtiger/src/block/block_read.c @@ -13,10 +13,11 @@ * Pre-load a page. */ int -__wt_bm_preload(WT_BM *bm, - WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) +__wt_bm_preload( + WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) { WT_BLOCK *block; + WT_DECL_ITEM(tmp); WT_DECL_RET; wt_off_t offset; uint32_t cksum, size; @@ -24,7 +25,15 @@ __wt_bm_preload(WT_BM *bm, WT_UNUSED(addr_size); block = bm->block; - ret = EINVAL; /* Play games due to conditional compilation */ + + /* + * Turn off pre-load when direct I/O is configured for the file, + * the kernel cache isn't interesting. + */ + if (block->fh->direct_io) + return (0); + + WT_STAT_FAST_CONN_INCR(session, block_preload); /* Crack the cookie. */ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum)); @@ -32,26 +41,19 @@ __wt_bm_preload(WT_BM *bm, /* Check for a mapped block. */ mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen; if (mapped) - WT_RET(__wt_mmap_preload( + return (__wt_mmap_preload( session, (uint8_t *)bm->map + offset, size)); - else { + #ifdef HAVE_POSIX_FADVISE - ret = posix_fadvise(block->fh->fd, - (wt_off_t)offset, (wt_off_t)size, POSIX_FADV_WILLNEED); + if (posix_fadvise(block->fh->fd, + (wt_off_t)offset, (wt_off_t)size, POSIX_FADV_WILLNEED) == 0) + return (0); #endif - if (ret != 0) { - WT_DECL_ITEM(tmp); - WT_RET(__wt_scr_alloc(session, size, &tmp)); - ret = __wt_block_read_off( - session, block, tmp, offset, size, cksum); - __wt_scr_free(session, &tmp); - WT_RET(ret); - } - } - WT_STAT_FAST_CONN_INCR(session, block_preload); - - return (0); + WT_RET(__wt_scr_alloc(session, size, &tmp)); + ret = __wt_block_read_off(session, block, tmp, offset, size, cksum); + __wt_scr_free(session, &tmp); + return (ret); } /* diff --git a/src/third_party/wiredtiger/src/btree/bt_compact.c b/src/third_party/wiredtiger/src/btree/bt_compact.c index b2c9e4b67f8..8044d4f852d 100644 --- a/src/third_party/wiredtiger/src/btree/bt_compact.c +++ b/src/third_party/wiredtiger/src/btree/bt_compact.c @@ -45,7 +45,7 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) * Ignore empty pages, they get merged into the parent. */ if (mod == NULL || mod->rec_result == 0) { - WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL)); + __wt_ref_info(ref, &addr, &addr_size, NULL); if (addr == NULL) return (0); WT_RET( @@ -130,7 +130,7 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[]) * read, set its generation to a low value so it is evicted * quickly. */ - WT_ERR(__wt_tree_walk(session, &ref, NULL, + WT_ERR(__wt_tree_walk(session, &ref, WT_READ_COMPACT | WT_READ_NO_GEN | WT_READ_WONT_NEED)); if (ref == NULL) break; @@ -182,7 +182,7 @@ __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) * address, the page isn't on disk, but we have to read internal pages * to walk the tree regardless; throw up our hands and read it. */ - WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, &type)); + __wt_ref_info(ref, &addr, &addr_size, &type); if (addr == NULL) return (0); diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c index 3c96bad39d7..55843d1cae5 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curnext.c +++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c @@ -527,7 +527,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) __wt_page_evict_soon(page); cbt->page_deleted_count = 0; - WT_ERR(__wt_tree_walk(session, &cbt->ref, NULL, flags)); + WT_ERR(__wt_tree_walk(session, &cbt->ref, flags)); WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND); } diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c index b7cea561b48..1d23b976edd 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curprev.c +++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c @@ -615,7 +615,7 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) __wt_page_evict_soon(page); cbt->page_deleted_count = 0; - WT_ERR(__wt_tree_walk(session, &cbt->ref, NULL, flags)); + WT_ERR(__wt_tree_walk(session, &cbt->ref, flags)); WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND); } diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index 69512f45933..f2bf2978320 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -816,7 +816,12 @@ err: if (ret == WT_RESTART) { /* * __wt_btcur_next_random -- - * Move to a random record in the tree. + * Move to a random record in the tree. There are two algorithms, one + * where we select a record at random from the whole tree on each + * retrieval and one where we first select a record at random from the + * whole tree, and then subsequently sample forward from that location. + * The sampling approach allows us to select reasonably uniform random + * points from unbalanced trees. */ int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) @@ -825,6 +830,7 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_DECL_RET; WT_SESSION_IMPL *session; WT_UPDATE *upd; + uint64_t skip; session = (WT_SESSION_IMPL *)cbt->iface.session; btree = cbt->btree; @@ -839,11 +845,63 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_STAT_FAST_CONN_INCR(session, cursor_next); WT_STAT_FAST_DATA_INCR(session, cursor_next); - WT_RET(__cursor_func_init(cbt, true)); + /* + * If retrieving random values without sampling, or we don't have a + * page reference, pick a roughly random leaf page in the tree. + */ + if (cbt->ref == NULL || cbt->next_random_sample_size == 0) { + /* + * Skip past the sample size of the leaf pages in the tree + * between each random key return to compensate for unbalanced + * trees. + * + * Use the underlying file size divided by its block allocation + * size as our guess of leaf pages in the file (this can be + * entirely wrong, as it depends on how many pages are in this + * particular checkpoint, how large the leaf and internal pages + * really are, and other factors). Then, divide that value by + * the configured sample size and increment the final result to + * make sure tiny files don't leave us with a skip value of 0. + * + * !!! + * Ideally, the number would be prime to avoid restart issues. + */ + if (cbt->next_random_sample_size != 0) + cbt->next_random_leaf_skip = (uint64_t) + ((btree->bm->block->fh->size / btree->allocsize) / + cbt->next_random_sample_size) + 1; - WT_WITH_PAGE_INDEX(session, - ret = __wt_row_random(session, cbt)); - WT_ERR(ret); + /* + * Choose a leaf page from the tree. + */ + WT_ERR(__cursor_func_init(cbt, true)); + WT_WITH_PAGE_INDEX( + session, ret = __wt_row_random_descent(session, cbt)); + WT_ERR(ret); + } else { + /* + * Read through the tree, skipping leaf pages. Be cautious about + * the skip count: if the last leaf page skipped was also the + * last leaf page in the tree, it may be set to zero on return + * with the end-of-walk condition. + * + * Pages read for data sampling aren't "useful"; don't update + * the read generation of pages already in memory, and if a page + * is read, set its generation to a low value so it is evicted + * quickly. + */ + for (skip = + cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;) + WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip, + WT_READ_NO_GEN | + WT_READ_SKIP_INTL | WT_READ_WONT_NEED)); + } + + /* + * Select a random entry from the leaf page. If it's not valid, move to + * the next entry, if that doesn't work, move to the previous entry. + */ + WT_ERR(__wt_row_random_leaf(session, cbt)); if (__cursor_valid(cbt, &upd)) WT_ERR(__wt_kv_return(session, cbt, upd)); else { @@ -851,9 +909,9 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) ret = __wt_btcur_prev(cbt, false); WT_ERR(ret); } + return (0); -err: if (ret != 0) - WT_TRET(__cursor_reset(cbt)); +err: WT_TRET(__cursor_reset(cbt)); return (ret); } diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index 0f47c060daf..d52a94a6da2 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -43,7 +43,7 @@ static int __debug_page_col_var(WT_DBG *, WT_PAGE *); static int __debug_page_metadata(WT_DBG *, WT_PAGE *); static int __debug_page_row_int(WT_DBG *, WT_PAGE *, uint32_t); static int __debug_page_row_leaf(WT_DBG *, WT_PAGE *); -static int __debug_ref(WT_DBG *, WT_REF *); +static void __debug_ref(WT_DBG *, WT_REF *); static void __debug_row_skip(WT_DBG *, WT_INSERT_HEAD *); static int __debug_tree( WT_SESSION_IMPL *, WT_BTREE *, WT_PAGE *, const char *, uint32_t); @@ -769,7 +769,7 @@ __debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags) WT_INTL_FOREACH_BEGIN(session, page, ref) { __dmsg(ds, "\trecno %" PRIu64 "\n", ref->key.recno); - WT_RET(__debug_ref(ds, ref)); + __debug_ref(ds, ref); } WT_INTL_FOREACH_END; if (LF_ISSET(WT_DEBUG_TREE_WALK)) @@ -843,7 +843,7 @@ __debug_page_row_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags) WT_INTL_FOREACH_BEGIN(session, page, ref) { __wt_ref_key(page, ref, &p, &len); __debug_item(ds, "K", p, len); - WT_RET(__debug_ref(ds, ref)); + __debug_ref(ds, ref); } WT_INTL_FOREACH_END; if (LF_ISSET(WT_DEBUG_TREE_WALK)) @@ -965,7 +965,7 @@ __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte) * __debug_ref -- * Dump a WT_REF structure. */ -static int +static void __debug_ref(WT_DBG *ds, WT_REF *ref) { WT_SESSION_IMPL *session; @@ -994,14 +994,14 @@ __debug_ref(WT_DBG *ds, WT_REF *ref) case WT_REF_SPLIT: __dmsg(ds, "split"); break; - WT_ILLEGAL_VALUE(session); + default: + __dmsg(ds, "INVALID"); + break; } - WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL)); + __wt_ref_info(ref, &addr, &addr_size, NULL); __dmsg(ds, " %s\n", __wt_addr_string(session, addr, addr_size, ds->tmp)); - - return (0); } /* diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index 294cc399d65..a6330326954 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -601,7 +601,7 @@ __btree_preload(WT_SESSION_IMPL *session) /* Pre-load the second-level internal pages. */ WT_INTL_FOREACH_BEGIN(session, btree->root.page, ref) { - WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL)); + __wt_ref_info(ref, &addr, &addr_size, NULL); if (addr != NULL) WT_RET(bm->preload(bm, session, addr, addr_size)); } WT_INTL_FOREACH_END; @@ -622,7 +622,7 @@ __btree_get_last_recno(WT_SESSION_IMPL *session) btree = S2BT(session); next_walk = NULL; - WT_RET(__wt_tree_walk(session, &next_walk, NULL, WT_READ_PREV)); + WT_RET(__wt_tree_walk(session, &next_walk, WT_READ_PREV)); if (next_walk == NULL) return (WT_NOTFOUND); diff --git a/src/third_party/wiredtiger/src/btree/bt_misc.c b/src/third_party/wiredtiger/src/btree/bt_misc.c index d2b16bb5d21..a60499ef8b7 100644 --- a/src/third_party/wiredtiger/src/btree/bt_misc.c +++ b/src/third_party/wiredtiger/src/btree/bt_misc.c @@ -101,7 +101,7 @@ __wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf) return (buf->data); } - (void)__wt_ref_info(session, ref, &addr, &addr_size, NULL); + __wt_ref_info(ref, &addr, &addr_size, NULL); return (__wt_addr_string(session, addr, addr_size, buf)); } diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index 77215474359..c50f97bbe14 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -375,7 +375,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref) * Get the address: if there is no address, the page was deleted, but a * subsequent search or insert is forcing re-creation of the name space. */ - WT_ERR(__wt_ref_info(session, ref, &addr, &addr_size, NULL)); + __wt_ref_info(ref, &addr, &addr_size, NULL); if (addr == NULL) { WT_ASSERT(session, previous_state == WT_REF_DELETED); diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c index 2f8759b9d82..5dd75835b0b 100644 --- a/src/third_party/wiredtiger/src/btree/bt_stat.c +++ b/src/third_party/wiredtiger/src/btree/bt_stat.c @@ -59,8 +59,8 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) WT_STAT_SET(session, stats, btree_row_leaf, 0); next_walk = NULL; - while ((ret = __wt_tree_walk(session, &next_walk, NULL, 0)) == 0 && - next_walk != NULL) { + while ((ret = __wt_tree_walk( + session, &next_walk, 0)) == 0 && next_walk != NULL) { WT_WITH_PAGE_INDEX(session, ret = __stat_page(session, next_walk->page, stats)); WT_RET(ret); diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c index 07bb2eb3a01..86607d8f187 100644 --- a/src/third_party/wiredtiger/src/btree/bt_sync.c +++ b/src/third_party/wiredtiger/src/btree/bt_sync.c @@ -58,7 +58,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) flags |= WT_READ_NO_WAIT | WT_READ_SKIP_INTL; for (walk = NULL;;) { - WT_ERR(__wt_tree_walk(session, &walk, NULL, flags)); + WT_ERR(__wt_tree_walk(session, &walk, flags)); if (walk == NULL) break; @@ -124,7 +124,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) /* Write all dirty in-cache pages. */ flags |= WT_READ_NO_EVICT; for (walk = NULL;;) { - WT_ERR(__wt_tree_walk(session, &walk, NULL, flags)); + WT_ERR(__wt_tree_walk(session, &walk, flags)); if (walk == NULL) break; diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c index c7d83d8dfff..b46c9a03dcf 100644 --- a/src/third_party/wiredtiger/src/btree/bt_walk.c +++ b/src/third_party/wiredtiger/src/btree/bt_walk.c @@ -69,16 +69,36 @@ retry: WT_INTL_INDEX_GET(session, ref->home, pindex); } /* - * __wt_tree_walk -- + * __ref_is_leaf -- + * Check if a reference is for a leaf page. + */ +static inline bool +__ref_is_leaf(WT_REF *ref) +{ + size_t addr_size; + u_int type; + const uint8_t *addr; + + /* + * If the page has a disk address, we can crack it to figure out if + * this page is a leaf page or not. If there's no address, the page + * isn't on disk and we don't know the page type. + */ + __wt_ref_info(ref, &addr, &addr_size, &type); + return (addr == NULL ? + false : type == WT_CELL_ADDR_LEAF || type == WT_CELL_ADDR_LEAF_NO); +} + +/* + * __tree_walk_internal -- * Move to the next/previous page in the tree. */ -int -__wt_tree_walk(WT_SESSION_IMPL *session, - WT_REF **refp, uint64_t *walkcntp, uint32_t flags) +static inline int +__tree_walk_internal(WT_SESSION_IMPL *session, + WT_REF **refp, uint64_t *walkcntp, uint64_t *skipleafcntp, uint32_t flags) { WT_BTREE *btree; WT_DECL_RET; - WT_PAGE *page; WT_PAGE_INDEX *pindex; WT_REF *couple, *couple_orig, *ref; bool empty_internal, prev, skip; @@ -304,6 +324,31 @@ ascend: /* break; } + /* + * Optionally skip leaf pages: skip all leaf pages if + * WT_READ_SKIP_LEAF is set, when the skip-leaf-count + * variable is non-zero, skip some count of leaf pages. + * If this page is disk-based, crack the cell to figure + * out it's a leaf page without reading it. + * + * If skipping some number of leaf pages, decrement the + * count of pages to zero, and then take the next leaf + * page we can. Be cautious around the page decrement, + * if for some reason don't take this particular page, + * we can take the next one, and, there are additional + * tests/decrements when we're about to return a leaf + * page. + */ + if (skipleafcntp != NULL || LF_ISSET(WT_READ_SKIP_LEAF)) + if (__ref_is_leaf(ref)) { + if (LF_ISSET(WT_READ_SKIP_LEAF)) + break; + if (*skipleafcntp > 0) { + --*skipleafcntp; + break; + } + } + ret = __wt_page_swap(session, couple, ref, flags); /* @@ -359,13 +404,29 @@ ascend: /* * A new page: configure for traversal of any internal * page's children, else return the leaf page. */ -descend: couple = ref; - page = ref->page; - if (WT_PAGE_IS_INTERNAL(page)) { - WT_INTL_INDEX_GET(session, page, pindex); + if (WT_PAGE_IS_INTERNAL(ref->page)) { +descend: couple = ref; + WT_INTL_INDEX_GET(session, ref->page, pindex); slot = prev ? pindex->entries - 1 : 0; empty_internal = true; } else { + /* + * Optionally skip leaf pages, the second half. + * We didn't have an on-page cell to figure out + * if it was a leaf page, we had to acquire the + * hazard pointer and look at the page. + */ + if (skipleafcntp != NULL || + LF_ISSET(WT_READ_SKIP_LEAF)) { + couple = ref; + if (LF_ISSET(WT_READ_SKIP_LEAF)) + break; + if (*skipleafcntp > 0) { + --*skipleafcntp; + break; + } + } + *refp = ref; goto done; } @@ -376,3 +437,37 @@ done: err: WT_LEAVE_PAGE_INDEX(session); return (ret); } + +/* + * __wt_tree_walk -- + * Move to the next/previous page in the tree. + */ +int +__wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) +{ + return (__tree_walk_internal(session, refp, NULL, NULL, flags)); +} + +/* + * __wt_tree_walk_count -- + * Move to the next/previous page in the tree, tracking how many + * references were visited to get there. + */ +int +__wt_tree_walk_count(WT_SESSION_IMPL *session, + WT_REF **refp, uint64_t *walkcntp, uint32_t flags) +{ + return (__tree_walk_internal(session, refp, walkcntp, NULL, flags)); +} + +/* + * __wt_tree_walk_skip -- + * Move to the next/previous page in the tree, skipping a certain number + * of leaf pages before returning. + */ +int +__wt_tree_walk_skip(WT_SESSION_IMPL *session, + WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags) +{ + return (__tree_walk_internal(session, refp, NULL, skipleafcntp, flags)); +} diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c index d2d8a4640ca..079f9d3bad1 100644 --- a/src/third_party/wiredtiger/src/btree/row_srch.c +++ b/src/third_party/wiredtiger/src/btree/row_srch.c @@ -536,19 +536,66 @@ err: /* } /* - * __wt_row_random -- - * Return a random key from a row-store tree. + * __wt_row_random_leaf -- + * Return a random key from a row-store leaf page. */ int -__wt_row_random(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) +__wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) +{ + WT_INSERT *p, *t; + WT_PAGE *page; + uint32_t cnt; + + page = cbt->ref->page; + + if (page->pg_row_entries != 0) { + cbt->compare = 0; + cbt->slot = __wt_random(&session->rnd) % page->pg_row_entries; + + /* + * The real row-store search function builds the key, so we + * have to as well. + */ + return (__wt_row_leaf_key(session, + page, page->pg_row_d + cbt->slot, cbt->tmp, false)); + } + + /* + * If the tree is new (and not empty), it might have a large insert + * list. Count how many records are in the list. + */ + F_SET(cbt, WT_CBT_SEARCH_SMALLEST); + if ((cbt->ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL) + return (WT_NOTFOUND); + for (cnt = 1, p = WT_SKIP_FIRST(cbt->ins_head);; ++cnt) + if ((p = WT_SKIP_NEXT(p)) == NULL) + break; + + /* + * Select a random number from 0 to (N - 1), return that record. + */ + cnt = __wt_random(&session->rnd) % cnt; + for (p = t = WT_SKIP_FIRST(cbt->ins_head);; t = p) + if (cnt-- == 0 || (p = WT_SKIP_NEXT(p)) == NULL) + break; + cbt->compare = 0; + cbt->ins = t; + + return (0); +} + +/* + * __wt_row_random_descent -- + * Find a random leaf page in a row-store tree. + */ +int +__wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; WT_DECL_RET; - WT_INSERT *p, *t; WT_PAGE *page; WT_PAGE_INDEX *pindex; WT_REF *current, *descent; - uint32_t cnt; btree = S2BT(session); @@ -585,43 +632,6 @@ restart_root: return (ret); } - if (page->pg_row_entries != 0) { - cbt->ref = current; - cbt->compare = 0; - cbt->slot = __wt_random(&session->rnd) % page->pg_row_entries; - - /* - * The real row-store search function builds the key, so we - * have to as well. - */ - return (__wt_row_leaf_key(session, - page, page->pg_row_d + cbt->slot, cbt->tmp, false)); - } - - /* - * If the tree is new (and not empty), it might have a large insert - * list. Count how many records are in the list. - */ - F_SET(cbt, WT_CBT_SEARCH_SMALLEST); - if ((cbt->ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL) - WT_ERR(WT_NOTFOUND); - for (cnt = 1, p = WT_SKIP_FIRST(cbt->ins_head);; ++cnt) - if ((p = WT_SKIP_NEXT(p)) == NULL) - break; - - /* - * Select a random number from 0 to (N - 1), return that record. - */ - cnt = __wt_random(&session->rnd) % cnt; - for (p = t = WT_SKIP_FIRST(cbt->ins_head);; t = p) - if (cnt-- == 0 || (p = WT_SKIP_NEXT(p)) == NULL) - break; cbt->ref = current; - cbt->compare = 0; - cbt->ins = t; - return (0); - -err: WT_TRET(__wt_page_release(session, current, 0)); - return (ret); } diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index d79ce6853e6..9d12e953498 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -323,6 +323,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = { NULL, "choices=[\"hex\",\"json\",\"print\"]", NULL, 0 }, { "next_random", "boolean", NULL, NULL, NULL, 0 }, + { "next_random_sample_size", "string", NULL, NULL, NULL, 0 }, { "overwrite", "boolean", NULL, NULL, NULL, 0 }, { "raw", "boolean", NULL, NULL, NULL, 0 }, { "readonly", "boolean", NULL, NULL, NULL, 0 }, @@ -920,9 +921,10 @@ static const WT_CONFIG_ENTRY config_entries[] = { NULL, 0 }, { "WT_SESSION.open_cursor", - "append=0,bulk=0,checkpoint=,dump=,next_random=0,overwrite=,raw=0" - ",readonly=0,skip_sort_check=0,statistics=,target=", - confchk_WT_SESSION_open_cursor, 11 + "append=0,bulk=0,checkpoint=,dump=,next_random=0," + "next_random_sample_size=0,overwrite=,raw=0,readonly=0," + "skip_sort_check=0,statistics=,target=", + confchk_WT_SESSION_open_cursor, 12 }, { "WT_SESSION.reconfigure", "isolation=read-committed", diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c index 63f77248ca8..b955b292292 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_file.c +++ b/src/third_party/wiredtiger/src/cursor/cur_file.c @@ -455,14 +455,24 @@ __wt_curfile_create(WT_SESSION_IMPL *session, } /* - * random_retrieval - * Random retrieval cursors only support next, reset and close. + * Random retrieval, row-store only. + * Random retrieval cursors support a limited set of methods. */ WT_ERR(__wt_config_gets_def(session, cfg, "next_random", 0, &cval)); if (cval.val != 0) { + if (WT_CURSOR_RECNO(cursor)) + WT_ERR_MSG(session, ENOTSUP, + "next_random configuration not supported for " + "column-store objects"); + __wt_cursor_set_notsup(cursor); cursor->next = __curfile_next_random; cursor->reset = __curfile_reset; + + WT_ERR(__wt_config_gets_def( + session, cfg, "next_random_sample_size", 0, &cval)); + if (cval.val != 0) + cbt->next_random_sample_size = (u_int)cval.val; } /* Underlying btree initialization. */ diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c index f92426355ef..da38988b6c2 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_std.c +++ b/src/third_party/wiredtiger/src/cursor/cur_std.c @@ -40,11 +40,11 @@ void __wt_cursor_set_notsup(WT_CURSOR *cursor) { /* - * Set all of the cursor methods (except for close and reset), to fail. - * Close is unchanged so the cursor can be discarded, reset defaults to + * Set cursor methods other than close, reconfigure and reset, to fail. + * Close is unchanged so the cursor can be discarded; reset is set to * a no-op because session transactional operations reset all of the - * cursors in a session, and random cursors shouldn't block transactions - * or checkpoints. + * cursors in a session. Reconfigure is left open in case it's possible + * in the future to change these configurations. */ cursor->compare = (int (*)(WT_CURSOR *, WT_CURSOR *, int *))__wt_cursor_notsup; diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c index 2b2117ad9fd..c5f6ae3d4d1 100644 --- a/src/third_party/wiredtiger/src/evict/evict_file.c +++ b/src/third_party/wiredtiger/src/evict/evict_file.c @@ -31,8 +31,8 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) /* Walk the tree, discarding pages. */ next_ref = NULL; - WT_ERR(__wt_tree_walk(session, &next_ref, NULL, - WT_READ_CACHE | WT_READ_NO_EVICT)); + WT_ERR(__wt_tree_walk( + session, &next_ref, WT_READ_CACHE | WT_READ_NO_EVICT)); while ((ref = next_ref) != NULL) { page = ref->page; @@ -68,8 +68,8 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) * the reconciliation, the next walk call could miss a page in * the tree. */ - WT_ERR(__wt_tree_walk(session, &next_ref, NULL, - WT_READ_CACHE | WT_READ_NO_EVICT)); + WT_ERR(__wt_tree_walk(session, + &next_ref, WT_READ_CACHE | WT_READ_NO_EVICT)); switch (syncop) { case WT_SYNC_CLOSE: diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index b5cb850d83c..ac481581c23 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -473,6 +473,15 @@ __evict_update_work(WT_SESSION_IMPL *session) return (false); /* + * Setup the number of refs to consider in each handle, depending + * on how many handles are open. We want to consider less candidates + * from each file as more files are open. Handle the case where there + * are no files open by adding 1. + */ + cache->evict_max_refs_per_file = + WT_MAX(100, WT_MILLION / (conn->open_file_count + 1)); + + /* * Page eviction overrides the dirty target and other types of eviction, * that is, we don't care where we are with respect to the dirty target * if page eviction is configured. @@ -1214,9 +1223,9 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp) */ for (evict = start, pages_walked = 0; evict < end && !enough && (ret == 0 || ret == WT_NOTFOUND); - ret = __wt_tree_walk( + ret = __wt_tree_walk_count( session, &btree->evict_ref, &pages_walked, walk_flags)) { - enough = pages_walked > WT_EVICT_MAX_PER_FILE; + enough = pages_walked > cache->evict_max_refs_per_file; if ((ref = btree->evict_ref) == NULL) { if (++restarts == 2 || enough) break; @@ -1321,8 +1330,9 @@ fast: /* If the page can't be evicted, give up. */ if (__wt_ref_is_root(ref)) WT_RET(__evict_clear_walk(session)); else if (ref->page->read_gen == WT_READGEN_OLDEST) - WT_RET_NOTFOUND_OK(__wt_tree_walk(session, - &btree->evict_ref, &pages_walked, walk_flags)); + WT_RET_NOTFOUND_OK(__wt_tree_walk_count( + session, &btree->evict_ref, + &pages_walked, walk_flags)); } WT_STAT_FAST_CONN_INCRV(session, cache_eviction_walk, pages_walked); @@ -1602,7 +1612,7 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) next_walk = NULL; session->dhandle = dhandle; - while (__wt_tree_walk(session, &next_walk, NULL, + while (__wt_tree_walk(session, &next_walk, WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 && next_walk != NULL) { page = next_walk->page; diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 3e2e7158e04..23e0dfea2cd 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -948,9 +948,8 @@ __wt_row_leaf_value(WT_PAGE *page, WT_ROW *rip, WT_ITEM *value) * __wt_ref_info -- * Return the addr/size and type triplet for a reference. */ -static inline int -__wt_ref_info(WT_SESSION_IMPL *session, - WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep) +static inline void +__wt_ref_info(WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep) { WT_ADDR *addr; WT_CELL_UNPACK *unpack, _unpack; @@ -984,7 +983,9 @@ __wt_ref_info(WT_SESSION_IMPL *session, case WT_ADDR_LEAF_NO: *typep = WT_CELL_ADDR_LEAF_NO; break; - WT_ILLEGAL_VALUE(session); + default: + *typep = 0; + break; } } else { __wt_cell_unpack((WT_CELL *)addr, unpack); @@ -993,7 +994,6 @@ __wt_ref_info(WT_SESSION_IMPL *session, if (typep != NULL) *typep = unpack->type; } - return (0); } /* @@ -1009,7 +1009,7 @@ __wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref) if (ref->addr == NULL) return (0); - WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL)); + __wt_ref_info(ref, &addr, &addr_size, NULL); WT_RET(__wt_btree_block_free(session, addr, addr_size)); /* Clear the address (so we don't free it twice). */ diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index d8a3829863f..a0440f23a00 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -14,7 +14,6 @@ pages by this many increments of the read generation. */ #define WT_EVICT_WALK_PER_FILE 10 /* Pages to queue per file */ -#define WT_EVICT_MAX_PER_FILE 100 /* Max pages to visit per file */ #define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */ #define WT_EVICT_WALK_INCR 100 /* Pages added each walk */ @@ -107,6 +106,7 @@ struct __wt_cache { uint32_t evict_slots; /* LRU list eviction slots */ WT_DATA_HANDLE *evict_file_next; /* LRU next file to search */ + uint32_t evict_max_refs_per_file;/* LRU pages per file per pass */ /* * Cache pool information. diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 54787d2227b..275e2f2db46 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -104,6 +104,14 @@ struct __wt_cursor_btree { uint64_t recno; /* Record number */ /* + * Next-random cursors can optionally be configured to step through a + * percentage of the total leaf pages to their next value. Note the + * configured value and the calculated number of leaf pages to skip. + */ + uint64_t next_random_leaf_skip; + u_int next_random_sample_size; + + /* * The search function sets compare to: * < 1 if the found key is less than the specified key * 0 if the found key matches the specified key diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index af8a7aa70e9..d84403cc16d 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -51,7 +51,7 @@ extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block); extern int __wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize); extern void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats); extern int __wt_block_manager_size( WT_SESSION_IMPL *session, const char *filename, WT_DSRC_STATS *stats); -extern int __wt_bm_preload(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size); +extern int __wt_bm_preload( WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size); extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size); extern int __wt_block_read_off_blind( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset); extern int __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset, uint32_t size, uint32_t cksum); @@ -166,7 +166,9 @@ extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, size_t size, bool empty_page_ok); extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *buf); -extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags); +extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags); +extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags); +extern int __wt_tree_walk_skip(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags); extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove); extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt); extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page); @@ -183,7 +185,8 @@ extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd); extern int __wt_search_insert( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key); extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert); -extern int __wt_row_random(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt); +extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt); +extern int __wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt); extern void __wt_las_stats_update(WT_SESSION_IMPL *session); extern int __wt_las_create(WT_SESSION_IMPL *session); extern int __wt_las_destroy(WT_SESSION_IMPL *session); diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h index 064349125cc..bafff92fbc0 100644 --- a/src/third_party/wiredtiger/src/include/flags.h +++ b/src/third_party/wiredtiger/src/include/flags.h @@ -45,8 +45,9 @@ #define WT_READ_NO_WAIT 0x00000020 #define WT_READ_PREV 0x00000040 #define WT_READ_SKIP_INTL 0x00000080 -#define WT_READ_TRUNCATE 0x00000100 -#define WT_READ_WONT_NEED 0x00000200 +#define WT_READ_SKIP_LEAF 0x00000100 +#define WT_READ_TRUNCATE 0x00000200 +#define WT_READ_WONT_NEED 0x00000400 #define WT_SESSION_CAN_WAIT 0x00000001 #define WT_SESSION_CLEAR_EVICT_WALK 0x00000002 #define WT_SESSION_INTERNAL 0x00000004 diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 08f73386090..bdd8bb65910 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -896,18 +896,17 @@ struct __wt_session { * boolean flag; default \c false.} * @config{bulk, configure the cursor for bulk-loading\, a fast\, * initial load path (see @ref tune_bulk_load for more information). - * Bulk-load may only be used for newly created objects and cursors - * configured for bulk-load only support the WT_CURSOR::insert and - * WT_CURSOR::close methods. When bulk-loading row-store objects\, keys - * must be loaded in sorted order. The value is usually a true/false - * flag; when bulk-loading fixed-length column store objects\, the - * special value \c bitmap allows chunks of a memory resident bitmap to - * be loaded directly into a file by passing a \c WT_ITEM to - * WT_CURSOR::set_value where the \c size field indicates the number of - * records in the bitmap (as specified by the object's \c value_format - * configuration). Bulk-loaded bitmap values must end on a byte boundary - * relative to the bit count (except for the last set of values - * loaded)., a string; default \c false.} + * Bulk-load may only be used for newly created objects and applications + * should use the WT_CURSOR::insert method to insert rows. When + * bulk-loading\, rows must be loaded in sorted order. The value is + * usually a true/false flag; when bulk-loading fixed-length column + * store objects\, the special value \c bitmap allows chunks of a memory + * resident bitmap to be loaded directly into a file by passing a \c + * WT_ITEM to WT_CURSOR::set_value where the \c size field indicates the + * number of records in the bitmap (as specified by the object's \c + * value_format configuration). Bulk-loaded bitmap values must end on a + * byte boundary relative to the bit count (except for the last set of + * values loaded)., a string; default \c false.} * @config{checkpoint, the name of a checkpoint to open (the reserved * name "WiredTigerCheckpoint" opens the most recent internal checkpoint * taken for the object). The cursor does not support data @@ -921,10 +920,19 @@ struct __wt_session { * string\, chosen from the following options: \c "hex"\, \c "json"\, \c * "print"; default empty.} * @config{next_random, configure the cursor to return a pseudo-random - * record from the object; valid only for row-store cursors. Cursors - * configured with \c next_random=true only support the WT_CURSOR::next - * and WT_CURSOR::close methods. See @ref cursor_random for details., a - * boolean flag; default \c false.} + * record from the object when the WT_CURSOR::next method is called; + * valid only for row-store cursors. See @ref cursor_random for + * details., a boolean flag; default \c false.} + * @config{next_random_sample_size, cursors configured by \c next_random + * to return pseudo-random records from the object randomly select from + * the entire object\, by default. Setting \c next_random_sample_size + * to a non-zero value sets the number of samples the application + * expects to take using the \c next_random cursor. A cursor configured + * with both \c next_random and \c next_random_sample_size attempts to + * divide the object into \c next_random_sample_size equal-sized + * pieces\, and each retrieval returns a record from one of those + * pieces. See @ref cursor_random for details., a string; default \c + * 0.} * @config{overwrite, configures whether the cursor's insert\, update * and remove methods check the existing state of the record. If \c * overwrite is \c false\, WT_CURSOR::insert fails with |