diff options
author | Keith Bostic <keith@wiredtiger.com> | 2015-12-12 10:34:59 -0500 |
---|---|---|
committer | Keith Bostic <keith@wiredtiger.com> | 2015-12-12 10:34:59 -0500 |
commit | f38bd7a3271dff84025131a96761e8d6a19b3eab (patch) | |
tree | 861a44096b4b7e50abc3e6c0f3853836168daf0a | |
parent | e6550aed804c5cbe0d5e5c6d31a954b09afb7173 (diff) | |
download | mongo-f38bd7a3271dff84025131a96761e8d6a19b3eab.tar.gz |
Change the parameter to be called "sample_size" and to be a count of the
number of samples the application will perform (which might be 1000, or 0.1%).
-rw-r--r-- | dist/api_data.py | 18 | ||||
-rw-r--r-- | src/btree/bt_cursor.c | 10 | ||||
-rw-r--r-- | src/config/config_def.c | 6 | ||||
-rw-r--r-- | src/cursor/cur_file.c | 4 | ||||
-rw-r--r-- | src/docs/cursor-random.dox | 26 | ||||
-rw-r--r-- | src/include/cursor.h | 4 | ||||
-rw-r--r-- | src/include/wiredtiger.in | 18 | ||||
-rw-r--r-- | test/suite/test_cursor_random.py | 6 |
8 files changed, 44 insertions, 48 deletions
diff --git a/dist/api_data.py b/dist/api_data.py index 9334ab7f734..93dd305b1df 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -845,15 +845,17 @@ methods = { object when the WT_CURSOR::next method is called; valid only for row-store cursors. See @ref cursor_random for details''', type='boolean'), - Config('next_random_sample_percent', '0', r''' - cursors configured with \c next_random to return pseudo-random + Config('next_random_sample_size', '0', r''' + cursors configured by \c next_random to return pseudo-random records from the object randomly select from the entire object, - by default. Setting \c next_random_sample_percent to a non-zero - value causes \c next_random cursors to randomly select the first - returned record from the object as a whole, and on subsequent - returns to skip forward through the specified percentage of - pages in the tree, returning a record from the new location.''', - min='1', max='100'), + by default. Setting \c next_random_sample_size to a non-zero + value sets the number of samples the application expects to take + using the \c next_random cursor. A cursor configured with both + \c next_random and \c next_random_sample_size attempts to divide + the object into \c next_random_sample_size equal-sized pieces, + and each retrieval returns a record from one of those pieces. See + @ref cursor_random for details''', + min='1'), Config('raw', 'false', r''' ignore the encodings for the key and value, manage data as if the formats were \c "u". See @ref cursor_raw for details''', diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index f06a0c9751e..262cd7d9213 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -830,7 +830,6 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_DECL_RET; WT_SESSION_IMPL *session; WT_UPDATE *upd; - wt_off_t percent; uint64_t skip; session = (WT_SESSION_IMPL *)cbt->iface.session; @@ -853,7 +852,7 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) * from that location. If the former, it's the same as a first retrieval * for the latter. */ - if (cbt->next_random_sample_percent == 0) + if (cbt->next_random_sample_size == 0) goto no_sample; /* @@ -884,10 +883,9 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) * !!! * Ideally, the number would be prime to avoid restart issues. */ - percent = - ((btree->bm->block->fh->size / btree->allocsize) / 100) + 1; - cbt->next_random_leaf_skip = - (uint32_t)(percent * cbt->next_random_sample_percent); + cbt->next_random_leaf_skip = (uint64_t) + ((btree->bm->block->fh->size / btree->allocsize) / + cbt->next_random_sample_size) + 1; no_sample: /* * Choose a leaf page from the tree. diff --git a/src/config/config_def.c b/src/config/config_def.c index 15615f47f49..d333d2c1898 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -323,9 +323,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = { NULL, "choices=[\"hex\",\"json\",\"print\"]", NULL, 0 }, { "next_random", "boolean", NULL, NULL, NULL, 0 }, - { "next_random_sample_percent", "int", - NULL, "min=1,max=100", - NULL, 0 }, + { "next_random_sample_size", "int", NULL, "min=1", NULL, 0 }, { "overwrite", "boolean", NULL, NULL, NULL, 0 }, { "raw", "boolean", NULL, NULL, NULL, 0 }, { "readonly", "boolean", NULL, NULL, NULL, 0 }, @@ -924,7 +922,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "WT_SESSION.open_cursor", "append=0,bulk=0,checkpoint=,dump=,next_random=0," - "next_random_sample_percent=0,overwrite=,raw=0,readonly=0," + "next_random_sample_size=0,overwrite=,raw=0,readonly=0," "skip_sort_check=0,statistics=,target=", confchk_WT_SESSION_open_cursor, 12 }, diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index f1987826f49..b955b292292 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -470,9 +470,9 @@ __wt_curfile_create(WT_SESSION_IMPL *session, cursor->reset = __curfile_reset; WT_ERR(__wt_config_gets_def( - session, cfg, "next_random_sample_percent", 0, &cval)); + session, cfg, "next_random_sample_size", 0, &cval)); if (cval.val != 0) - cbt->next_random_sample_percent = (u_int)cval.val; + cbt->next_random_sample_size = (u_int)cval.val; } /* Underlying btree initialization. */ diff --git a/src/docs/cursor-random.dox b/src/docs/cursor-random.dox index 3fdb0634261..a0a3212be6d 100644 --- a/src/docs/cursor-random.dox +++ b/src/docs/cursor-random.dox @@ -10,21 +10,19 @@ from the object, most other cursor methods are not supported. For example, it's not possible to update using a cursor configured for random retrieval. -By default, records are pseudo-randomly selected from the underlying -Btree as a whole. that can lead to skewed results when the Btree is -unbalanced or records are not uniformly distributed. In such cases, the -\c next_random_sample_percent configuration can additionally be -specified. Setting \c next_random_sample_percent to a non-zero value -causes \c next_random cursors to randomly select the first returned -value from the object as a whole, and on subsequent returns to skip -forward through the specified percentage of pages in the tree, returning -a value from the new location. +By default, each returned record is pseudo-randomly selected from the +underlying object as a whole. That can lead to skewed results when the +underlying tree structure is unbalanced or records are not uniformly +distributed. In such cases, the \c next_random_sample_size configuration +can also be specified. Setting \c next_random_sample_size configures the +number of samples the application expects to take using the cursor. A +cursor configured using \c next_random_sample_size divides the object +into \c next_random_sample_size pieces, and each subsequent retrieval +returns a record from the next one of those pieces. For example, setting \c next_random_sample_percent to \c 10 would cause -the cursor to skip forward through 10% of the underlying object, and -each retrieved row would represent a sample from 10% of the object. -Similarly, setting \c next_random_sample_percent to \c 1 would cause the -cursor to skip forward through 1% of the underlying object, and each -retrieved row would represent a sample from 1% of the object. +the cursor to sequentially return records from each tenth part of the +object. Setting \c next_random_sample_percent to \c 1000 would cause the +cursor to sequentially return records from each .1% of the object. */ diff --git a/src/include/cursor.h b/src/include/cursor.h index 6cadeb9c245..13f18adab4a 100644 --- a/src/include/cursor.h +++ b/src/include/cursor.h @@ -108,8 +108,8 @@ struct __wt_cursor_btree { * percentage of the total leaf pages to their next value. Note the * configured value and the calculated number of leaf pages to skip. */ - uint32_t next_random_leaf_skip; - u_int next_random_sample_percent; + uint64_t next_random_leaf_skip; + u_int next_random_sample_size; /* * The search function sets compare to: diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 62f2bb0712f..315d77e3fc5 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -923,14 +923,16 @@ struct __wt_session { * record from the object when the WT_CURSOR::next method is called; * valid only for row-store cursors. See @ref cursor_random for * details., a boolean flag; default \c false.} - * @config{next_random_sample_percent, cursors configured with \c - * next_random to return pseudo-random records from the object randomly - * select from the entire object\, by default. Setting \c - * next_random_sample_percent to a non-zero value causes \c next_random - * cursors to randomly select the first returned record from the object - * as a whole\, and on subsequent returns to skip forward through the - * specified percentage of pages in the tree\, returning a record from - * the new location., an integer between 1 and 100; default \c 0.} + * @config{next_random_sample_size, cursors configured by \c next_random + * to return pseudo-random records from the object randomly select from + * the entire object\, by default. Setting \c next_random_sample_size + * to a non-zero value sets the number of samples the application + * expects to take using the \c next_random cursor. A cursor configured + * with both \c next_random and \c next_random_sample_size attempts to + * divide the object into \c next_random_sample_size equal-sized + * pieces\, and each retrieval returns a record from one of those + * pieces. See @ref cursor_random for details., an integer greater than + * or equal to 1; default \c 0.} * @config{overwrite, configures whether the cursor's insert\, update * and remove methods check the existing state of the record. If \c * overwrite is \c false\, WT_CURSOR::insert fails with diff --git a/test/suite/test_cursor_random.py b/test/suite/test_cursor_random.py index 7e0d9fb8e5f..b424dbbc7e3 100644 --- a/test/suite/test_cursor_random.py +++ b/test/suite/test_cursor_random.py @@ -39,8 +39,7 @@ class test_cursor_random(wttest.WiredTigerTestCase): ('table', dict(type='table:random')) ] config = [ - ('sample', - dict(config='next_random=true,next_random_sample_percent=35')), + ('sample', dict(config='next_random=true,next_random_sample_size=35')), ('not-sample', dict(config='next_random=true')) ] scenarios =number_scenarios(multiply_scenarios('.', types, config)) @@ -151,8 +150,7 @@ class test_cursor_random_invisible(wttest.WiredTigerTestCase): ('table', dict(type='table:random')) ] config = [ - ('sample', - dict(config='next_random=true,next_random_sample_percent=35')), + ('sample', dict(config='next_random=true,next_random_sample_size=35')), ('not-sample', dict(config='next_random=true')) ] scenarios =number_scenarios(multiply_scenarios('.', types, config)) |