Change the parameter to be called "sample_size" and to be a count of the

number of samples the application will perform (which might be 1000, or 0.1%).
author: Keith Bostic <keith@wiredtiger.com> 2015-12-12 10:34:59 -0500
committer: Keith Bostic <keith@wiredtiger.com> 2015-12-12 10:34:59 -0500
commit: f38bd7a3271dff84025131a96761e8d6a19b3eab (patch)
tree: 861a44096b4b7e50abc3e6c0f3853836168daf0a
parent: e6550aed804c5cbe0d5e5c6d31a954b09afb7173 (diff)
download: mongo-f38bd7a3271dff84025131a96761e8d6a19b3eab.tar.gz
8 files changed, 44 insertions, 48 deletions
diff --git a/dist/api_data.py b/dist/api_data.py
index 9334ab7f734..93dd305b1df 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -845,15 +845,17 @@ methods = {
         object when the WT_CURSOR::next method is called; valid only for
         row-store cursors. See @ref cursor_random for details''',
         type='boolean'),
-    Config('next_random_sample_percent', '0', r'''
-        cursors configured with \c next_random to return pseudo-random
+    Config('next_random_sample_size', '0', r'''
+        cursors configured by \c next_random to return pseudo-random
         records from the object randomly select from the entire object,
-        by default. Setting \c next_random_sample_percent to a non-zero
-        value causes \c next_random cursors to randomly select the first
-        returned record from the object as a whole, and on subsequent
-        returns to skip forward through the specified percentage of
-        pages in the tree, returning a record from the new location.''',
-        min='1', max='100'),
+        by default. Setting \c next_random_sample_size to a non-zero
+        value sets the number of samples the application expects to take
+        using the \c next_random cursor. A cursor configured with both
+        \c next_random and \c next_random_sample_size attempts to divide
+        the object into \c next_random_sample_size equal-sized pieces,
+        and each retrieval returns a record from one of those pieces. See
+        @ref cursor_random for details''',
+        min='1'),
     Config('raw', 'false', r'''
         ignore the encodings for the key and value, manage data as if
         the formats were \c "u".  See @ref cursor_raw for details''',
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c
index f06a0c9751e..262cd7d9213 100644
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -830,7 +830,6 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
 	WT_DECL_RET;
 	WT_SESSION_IMPL *session;
 	WT_UPDATE *upd;
-	wt_off_t percent;
 	uint64_t skip;
 
 	session = (WT_SESSION_IMPL *)cbt->iface.session;
@@ -853,7 +852,7 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
 	 * from that location. If the former, it's the same as a first retrieval
 	 * for the latter.
 	 */
-	if (cbt->next_random_sample_percent == 0)
+	if (cbt->next_random_sample_size == 0)
 		goto no_sample;
 
 	/*
@@ -884,10 +883,9 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
 		 * !!!
 		 * Ideally, the number would be prime to avoid restart issues.
 		 */
-		percent =
-		    ((btree->bm->block->fh->size / btree->allocsize) / 100) + 1;
-		cbt->next_random_leaf_skip =
-		    (uint32_t)(percent * cbt->next_random_sample_percent);
+		cbt->next_random_leaf_skip = (uint64_t)
+		    ((btree->bm->block->fh->size / btree->allocsize) /
+		    cbt->next_random_sample_size) + 1;
 
 no_sample:	/*
 		 * Choose a leaf page from the tree.
diff --git a/src/config/config_def.c b/src/config/config_def.c
index 15615f47f49..d333d2c1898 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -323,9 +323,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = {
 	    NULL, "choices=[\"hex\",\"json\",\"print\"]",
 	    NULL, 0 },
 	{ "next_random", "boolean", NULL, NULL, NULL, 0 },
-	{ "next_random_sample_percent", "int",
-	    NULL, "min=1,max=100",
-	    NULL, 0 },
+	{ "next_random_sample_size", "int", NULL, "min=1", NULL, 0 },
 	{ "overwrite", "boolean", NULL, NULL, NULL, 0 },
 	{ "raw", "boolean", NULL, NULL, NULL, 0 },
 	{ "readonly", "boolean", NULL, NULL, NULL, 0 },
@@ -924,7 +922,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	},
 	{ "WT_SESSION.open_cursor",
 	  "append=0,bulk=0,checkpoint=,dump=,next_random=0,"
-	  "next_random_sample_percent=0,overwrite=,raw=0,readonly=0,"
+	  "next_random_sample_size=0,overwrite=,raw=0,readonly=0,"
 	  "skip_sort_check=0,statistics=,target=",
 	  confchk_WT_SESSION_open_cursor, 12
 	},
diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c
index f1987826f49..b955b292292 100644
--- a/src/cursor/cur_file.c
+++ b/src/cursor/cur_file.c
@@ -470,9 +470,9 @@ __wt_curfile_create(WT_SESSION_IMPL *session,
 		cursor->reset = __curfile_reset;
 
 		WT_ERR(__wt_config_gets_def(
-		    session, cfg, "next_random_sample_percent", 0, &cval));
+		    session, cfg, "next_random_sample_size", 0, &cval));
 		if (cval.val != 0)
-			cbt->next_random_sample_percent = (u_int)cval.val;
+			cbt->next_random_sample_size = (u_int)cval.val;
 	}
 
 	/* Underlying btree initialization. */
diff --git a/src/docs/cursor-random.dox b/src/docs/cursor-random.dox
index 3fdb0634261..a0a3212be6d 100644
--- a/src/docs/cursor-random.dox
+++ b/src/docs/cursor-random.dox
@@ -10,21 +10,19 @@ from the object, most other cursor methods are not supported. For
 example, it's not possible to update using a cursor configured for
 random retrieval.
 
-By default, records are pseudo-randomly selected from the underlying
-Btree as a whole. that can lead to skewed results when the Btree is
-unbalanced or records are not uniformly distributed. In such cases, the
-\c next_random_sample_percent configuration can additionally be
-specified.  Setting \c next_random_sample_percent to a non-zero value
-causes \c next_random cursors to randomly select the first returned
-value from the object as a whole, and on subsequent returns to skip
-forward through the specified percentage of pages in the tree, returning
-a value from the new location.
+By default, each returned record is pseudo-randomly selected from the
+underlying object as a whole. That can lead to skewed results when the
+underlying tree structure is unbalanced or records are not uniformly
+distributed. In such cases, the \c next_random_sample_size configuration
+can also be specified. Setting \c next_random_sample_size configures the
+number of samples the application expects to take using the cursor. A
+cursor configured using \c next_random_sample_size divides the object
+into \c next_random_sample_size pieces, and each subsequent retrieval
+returns a record from the next one of those pieces.
 
 For example, setting \c next_random_sample_percent to \c 10 would cause
-the cursor to skip forward through 10% of the underlying object, and
-each retrieved row would represent a sample from 10% of the object.
-Similarly, setting \c next_random_sample_percent to \c 1 would cause the
-cursor to skip forward through 1% of the underlying object, and each
-retrieved row would represent a sample from 1% of the object.
+the cursor to sequentially return records from each tenth part of the
+object. Setting \c next_random_sample_percent to \c 1000 would cause the
+cursor to sequentially return records from each .1% of the object.
 
  */
diff --git a/src/include/cursor.h b/src/include/cursor.h
index 6cadeb9c245..13f18adab4a 100644
--- a/src/include/cursor.h
+++ b/src/include/cursor.h
@@ -108,8 +108,8 @@ struct __wt_cursor_btree {
 	 * percentage of the total leaf pages to their next value. Note the
 	 * configured value and the calculated number of leaf pages to skip.
 	 */
-	uint32_t next_random_leaf_skip;
-	u_int	 next_random_sample_percent;
+	uint64_t next_random_leaf_skip;
+	u_int	 next_random_sample_size;
 
 	/*
 	 * The search function sets compare to:
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 62f2bb0712f..315d77e3fc5 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -923,14 +923,16 @@ struct __wt_session {
 	 * record from the object when the WT_CURSOR::next method is called;
 	 * valid only for row-store cursors.  See @ref cursor_random for
 	 * details., a boolean flag; default \c false.}
-	 * @config{next_random_sample_percent, cursors configured with \c
-	 * next_random to return pseudo-random records from the object randomly
-	 * select from the entire object\, by default.  Setting \c
-	 * next_random_sample_percent to a non-zero value causes \c next_random
-	 * cursors to randomly select the first returned record from the object
-	 * as a whole\, and on subsequent returns to skip forward through the
-	 * specified percentage of pages in the tree\, returning a record from
-	 * the new location., an integer between 1 and 100; default \c 0.}
+	 * @config{next_random_sample_size, cursors configured by \c next_random
+	 * to return pseudo-random records from the object randomly select from
+	 * the entire object\, by default.  Setting \c next_random_sample_size
+	 * to a non-zero value sets the number of samples the application
+	 * expects to take using the \c next_random cursor.  A cursor configured
+	 * with both \c next_random and \c next_random_sample_size attempts to
+	 * divide the object into \c next_random_sample_size equal-sized
+	 * pieces\, and each retrieval returns a record from one of those
+	 * pieces.  See @ref cursor_random for details., an integer greater than
+	 * or equal to 1; default \c 0.}
 	 * @config{overwrite, configures whether the cursor's insert\, update
 	 * and remove methods check the existing state of the record.  If \c
 	 * overwrite is \c false\, WT_CURSOR::insert fails with
diff --git a/test/suite/test_cursor_random.py b/test/suite/test_cursor_random.py
index 7e0d9fb8e5f..b424dbbc7e3 100644
--- a/test/suite/test_cursor_random.py
+++ b/test/suite/test_cursor_random.py
@@ -39,8 +39,7 @@ class test_cursor_random(wttest.WiredTigerTestCase):
         ('table', dict(type='table:random'))
     ]
     config = [
-        ('sample',
-            dict(config='next_random=true,next_random_sample_percent=35')),
+        ('sample', dict(config='next_random=true,next_random_sample_size=35')),
         ('not-sample', dict(config='next_random=true'))
     ]
     scenarios =number_scenarios(multiply_scenarios('.', types, config))
@@ -151,8 +150,7 @@ class test_cursor_random_invisible(wttest.WiredTigerTestCase):
         ('table', dict(type='table:random'))
     ]
     config = [
-        ('sample',
-            dict(config='next_random=true,next_random_sample_percent=35')),
+        ('sample', dict(config='next_random=true,next_random_sample_size=35')),
         ('not-sample', dict(config='next_random=true'))
     ]
     scenarios =number_scenarios(multiply_scenarios('.', types, config))
author	Keith Bostic <keith@wiredtiger.com>	2015-12-12 10:34:59 -0500
committer	Keith Bostic <keith@wiredtiger.com>	2015-12-12 10:34:59 -0500
commit	f38bd7a3271dff84025131a96761e8d6a19b3eab (patch)
tree	861a44096b4b7e50abc3e6c0f3853836168daf0a
parent	e6550aed804c5cbe0d5e5c6d31a954b09afb7173 (diff)
download	mongo-f38bd7a3271dff84025131a96761e8d6a19b3eab.tar.gz