summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith@wiredtiger.com>2015-12-12 10:34:59 -0500
committerKeith Bostic <keith@wiredtiger.com>2015-12-12 10:34:59 -0500
commitf38bd7a3271dff84025131a96761e8d6a19b3eab (patch)
tree861a44096b4b7e50abc3e6c0f3853836168daf0a
parente6550aed804c5cbe0d5e5c6d31a954b09afb7173 (diff)
downloadmongo-f38bd7a3271dff84025131a96761e8d6a19b3eab.tar.gz
Change the parameter to be called "sample_size" and to be a count of the
number of samples the application will perform (which might be 1000, or 0.1%).
-rw-r--r--dist/api_data.py18
-rw-r--r--src/btree/bt_cursor.c10
-rw-r--r--src/config/config_def.c6
-rw-r--r--src/cursor/cur_file.c4
-rw-r--r--src/docs/cursor-random.dox26
-rw-r--r--src/include/cursor.h4
-rw-r--r--src/include/wiredtiger.in18
-rw-r--r--test/suite/test_cursor_random.py6
8 files changed, 44 insertions, 48 deletions
diff --git a/dist/api_data.py b/dist/api_data.py
index 9334ab7f734..93dd305b1df 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -845,15 +845,17 @@ methods = {
object when the WT_CURSOR::next method is called; valid only for
row-store cursors. See @ref cursor_random for details''',
type='boolean'),
- Config('next_random_sample_percent', '0', r'''
- cursors configured with \c next_random to return pseudo-random
+ Config('next_random_sample_size', '0', r'''
+ cursors configured by \c next_random to return pseudo-random
records from the object randomly select from the entire object,
- by default. Setting \c next_random_sample_percent to a non-zero
- value causes \c next_random cursors to randomly select the first
- returned record from the object as a whole, and on subsequent
- returns to skip forward through the specified percentage of
- pages in the tree, returning a record from the new location.''',
- min='1', max='100'),
+ by default. Setting \c next_random_sample_size to a non-zero
+ value sets the number of samples the application expects to take
+ using the \c next_random cursor. A cursor configured with both
+ \c next_random and \c next_random_sample_size attempts to divide
+ the object into \c next_random_sample_size equal-sized pieces,
+ and each retrieval returns a record from one of those pieces. See
+ @ref cursor_random for details''',
+ min='1'),
Config('raw', 'false', r'''
ignore the encodings for the key and value, manage data as if
the formats were \c "u". See @ref cursor_raw for details''',
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c
index f06a0c9751e..262cd7d9213 100644
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -830,7 +830,6 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
WT_DECL_RET;
WT_SESSION_IMPL *session;
WT_UPDATE *upd;
- wt_off_t percent;
uint64_t skip;
session = (WT_SESSION_IMPL *)cbt->iface.session;
@@ -853,7 +852,7 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
* from that location. If the former, it's the same as a first retrieval
* for the latter.
*/
- if (cbt->next_random_sample_percent == 0)
+ if (cbt->next_random_sample_size == 0)
goto no_sample;
/*
@@ -884,10 +883,9 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
* !!!
* Ideally, the number would be prime to avoid restart issues.
*/
- percent =
- ((btree->bm->block->fh->size / btree->allocsize) / 100) + 1;
- cbt->next_random_leaf_skip =
- (uint32_t)(percent * cbt->next_random_sample_percent);
+ cbt->next_random_leaf_skip = (uint64_t)
+ ((btree->bm->block->fh->size / btree->allocsize) /
+ cbt->next_random_sample_size) + 1;
no_sample: /*
* Choose a leaf page from the tree.
diff --git a/src/config/config_def.c b/src/config/config_def.c
index 15615f47f49..d333d2c1898 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -323,9 +323,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = {
NULL, "choices=[\"hex\",\"json\",\"print\"]",
NULL, 0 },
{ "next_random", "boolean", NULL, NULL, NULL, 0 },
- { "next_random_sample_percent", "int",
- NULL, "min=1,max=100",
- NULL, 0 },
+ { "next_random_sample_size", "int", NULL, "min=1", NULL, 0 },
{ "overwrite", "boolean", NULL, NULL, NULL, 0 },
{ "raw", "boolean", NULL, NULL, NULL, 0 },
{ "readonly", "boolean", NULL, NULL, NULL, 0 },
@@ -924,7 +922,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "WT_SESSION.open_cursor",
"append=0,bulk=0,checkpoint=,dump=,next_random=0,"
- "next_random_sample_percent=0,overwrite=,raw=0,readonly=0,"
+ "next_random_sample_size=0,overwrite=,raw=0,readonly=0,"
"skip_sort_check=0,statistics=,target=",
confchk_WT_SESSION_open_cursor, 12
},
diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c
index f1987826f49..b955b292292 100644
--- a/src/cursor/cur_file.c
+++ b/src/cursor/cur_file.c
@@ -470,9 +470,9 @@ __wt_curfile_create(WT_SESSION_IMPL *session,
cursor->reset = __curfile_reset;
WT_ERR(__wt_config_gets_def(
- session, cfg, "next_random_sample_percent", 0, &cval));
+ session, cfg, "next_random_sample_size", 0, &cval));
if (cval.val != 0)
- cbt->next_random_sample_percent = (u_int)cval.val;
+ cbt->next_random_sample_size = (u_int)cval.val;
}
/* Underlying btree initialization. */
diff --git a/src/docs/cursor-random.dox b/src/docs/cursor-random.dox
index 3fdb0634261..a0a3212be6d 100644
--- a/src/docs/cursor-random.dox
+++ b/src/docs/cursor-random.dox
@@ -10,21 +10,19 @@ from the object, most other cursor methods are not supported. For
example, it's not possible to update using a cursor configured for
random retrieval.
-By default, records are pseudo-randomly selected from the underlying
-Btree as a whole. that can lead to skewed results when the Btree is
-unbalanced or records are not uniformly distributed. In such cases, the
-\c next_random_sample_percent configuration can additionally be
-specified. Setting \c next_random_sample_percent to a non-zero value
-causes \c next_random cursors to randomly select the first returned
-value from the object as a whole, and on subsequent returns to skip
-forward through the specified percentage of pages in the tree, returning
-a value from the new location.
+By default, each returned record is pseudo-randomly selected from the
+underlying object as a whole. That can lead to skewed results when the
+underlying tree structure is unbalanced or records are not uniformly
+distributed. In such cases, the \c next_random_sample_size configuration
+can also be specified. Setting \c next_random_sample_size configures the
+number of samples the application expects to take using the cursor. A
+cursor configured using \c next_random_sample_size divides the object
+into \c next_random_sample_size pieces, and each subsequent retrieval
+returns a record from the next one of those pieces.
For example, setting \c next_random_sample_percent to \c 10 would cause
-the cursor to skip forward through 10% of the underlying object, and
-each retrieved row would represent a sample from 10% of the object.
-Similarly, setting \c next_random_sample_percent to \c 1 would cause the
-cursor to skip forward through 1% of the underlying object, and each
-retrieved row would represent a sample from 1% of the object.
+the cursor to sequentially return records from each tenth part of the
+object. Setting \c next_random_sample_percent to \c 1000 would cause the
+cursor to sequentially return records from each .1% of the object.
*/
diff --git a/src/include/cursor.h b/src/include/cursor.h
index 6cadeb9c245..13f18adab4a 100644
--- a/src/include/cursor.h
+++ b/src/include/cursor.h
@@ -108,8 +108,8 @@ struct __wt_cursor_btree {
* percentage of the total leaf pages to their next value. Note the
* configured value and the calculated number of leaf pages to skip.
*/
- uint32_t next_random_leaf_skip;
- u_int next_random_sample_percent;
+ uint64_t next_random_leaf_skip;
+ u_int next_random_sample_size;
/*
* The search function sets compare to:
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 62f2bb0712f..315d77e3fc5 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -923,14 +923,16 @@ struct __wt_session {
* record from the object when the WT_CURSOR::next method is called;
* valid only for row-store cursors. See @ref cursor_random for
* details., a boolean flag; default \c false.}
- * @config{next_random_sample_percent, cursors configured with \c
- * next_random to return pseudo-random records from the object randomly
- * select from the entire object\, by default. Setting \c
- * next_random_sample_percent to a non-zero value causes \c next_random
- * cursors to randomly select the first returned record from the object
- * as a whole\, and on subsequent returns to skip forward through the
- * specified percentage of pages in the tree\, returning a record from
- * the new location., an integer between 1 and 100; default \c 0.}
+ * @config{next_random_sample_size, cursors configured by \c next_random
+ * to return pseudo-random records from the object randomly select from
+ * the entire object\, by default. Setting \c next_random_sample_size
+ * to a non-zero value sets the number of samples the application
+ * expects to take using the \c next_random cursor. A cursor configured
+ * with both \c next_random and \c next_random_sample_size attempts to
+ * divide the object into \c next_random_sample_size equal-sized
+ * pieces\, and each retrieval returns a record from one of those
+ * pieces. See @ref cursor_random for details., an integer greater than
+ * or equal to 1; default \c 0.}
* @config{overwrite, configures whether the cursor's insert\, update
* and remove methods check the existing state of the record. If \c
* overwrite is \c false\, WT_CURSOR::insert fails with
diff --git a/test/suite/test_cursor_random.py b/test/suite/test_cursor_random.py
index 7e0d9fb8e5f..b424dbbc7e3 100644
--- a/test/suite/test_cursor_random.py
+++ b/test/suite/test_cursor_random.py
@@ -39,8 +39,7 @@ class test_cursor_random(wttest.WiredTigerTestCase):
('table', dict(type='table:random'))
]
config = [
- ('sample',
- dict(config='next_random=true,next_random_sample_percent=35')),
+ ('sample', dict(config='next_random=true,next_random_sample_size=35')),
('not-sample', dict(config='next_random=true'))
]
scenarios =number_scenarios(multiply_scenarios('.', types, config))
@@ -151,8 +150,7 @@ class test_cursor_random_invisible(wttest.WiredTigerTestCase):
('table', dict(type='table:random'))
]
config = [
- ('sample',
- dict(config='next_random=true,next_random_sample_percent=35')),
+ ('sample', dict(config='next_random=true,next_random_sample_size=35')),
('not-sample', dict(config='next_random=true'))
]
scenarios =number_scenarios(multiply_scenarios('.', types, config))