summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/btree/row_srch.c15
-rw-r--r--src/docs/cursor-random.dox16
-rw-r--r--src/include/serial.i17
-rw-r--r--src/txn/txn.c30
-rw-r--r--test/format/ops.c1
-rw-r--r--test/suite/test_cursor_random.py14
6 files changed, 54 insertions, 39 deletions
diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c
index 9803b924355..d83d3253c44 100644
--- a/src/btree/row_srch.c
+++ b/src/btree/row_srch.c
@@ -471,6 +471,7 @@ __wt_row_random(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
WT_PAGE *page;
WT_PAGE_INDEX *pindex;
WT_REF *current, *descent;
+ uint32_t cnt;
btree = S2BT(session);
@@ -528,18 +529,22 @@ restart:
/*
* If the tree is new (and not empty), it might have a large insert
- * list, pick the key in the middle of that insert list.
+ * list. Count how many records are in the list.
*/
F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
if ((cbt->ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL)
WT_ERR(WT_NOTFOUND);
- for (p = t = WT_SKIP_FIRST(cbt->ins_head);;) {
+ for (cnt = 1, p = WT_SKIP_FIRST(cbt->ins_head);; ++cnt)
if ((p = WT_SKIP_NEXT(p)) == NULL)
break;
- if ((p = WT_SKIP_NEXT(p)) == NULL)
+
+ /*
+ * Select a random number from 0 to (N - 1), return that record.
+ */
+ cnt = __wt_random(&session->rnd) % cnt;
+ for (p = t = WT_SKIP_FIRST(cbt->ins_head);; t = p)
+ if (cnt-- == 0 || (p = WT_SKIP_NEXT(p)) == NULL)
break;
- t = WT_SKIP_NEXT(t);
- }
cbt->ref = current;
cbt->compare = 0;
cbt->ins = t;
diff --git a/src/docs/cursor-random.dox b/src/docs/cursor-random.dox
index 70a28407ea5..5d0b89d6547 100644
--- a/src/docs/cursor-random.dox
+++ b/src/docs/cursor-random.dox
@@ -2,15 +2,11 @@
The \c next_random configuration to the WT_SESSION::open_cursor method
configures the cursor to return a pseudo-random record from a row-store
-object.
-
-The ability to return a random record was added to support a particular
-application, and as a result has somewhat unusual semantics. First, the
-returned record may not be random at all in the case of objects with only a few
-rows (especially when the object has never been written to the backing store).
-In such objects, the WT_CURSOR::next method for cursors configured with \c
-next_random may return the same row on each call. Additionally, even in larger
-objects, the WT_CURSOR::next method usually returns the first record from a
-random page in the underlying file, not a random record from a random page.
+object (the configuration is not supported on other types of objects).
+The configuration has somewhat unusual semantics: first, the returned
+record may not be very random in the case of objects with only a few
+rows. Additionally, even in larger objects, the WT_CURSOR::next method
+generally returns the first record from a random page in the underlying
+file, not a random record from a random page.
*/
diff --git a/src/include/serial.i b/src/include/serial.i
index 5b290822a10..0fc23348800 100644
--- a/src/include/serial.i
+++ b/src/include/serial.i
@@ -50,11 +50,15 @@ __insert_simple_func(WT_SESSION_IMPL *session,
*
* All structure setup must be flushed before the structure is entered
* into the list. We need a write barrier here, our callers depend on
- * it.
+ * it. Don't pass complex arguments to the macro, some implementations
+ * read the old value multiple times.
*/
- for (i = 0; i < skipdepth; i++)
- if (!WT_ATOMIC_CAS8(*ins_stack[i], new_ins->next[i], new_ins))
+ for (i = 0; i < skipdepth; i++) {
+ WT_INSERT *old_ins = *ins_stack[i];
+ if (old_ins != new_ins->next[i] ||
+ !WT_ATOMIC_CAS8(*ins_stack[i], old_ins, new_ins))
return (i == 0 ? WT_RESTART : 0);
+ }
return (0);
}
@@ -83,10 +87,13 @@ __insert_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head,
*
* All structure setup must be flushed before the structure is entered
* into the list. We need a write barrier here, our callers depend on
- * it.
+ * it. Don't pass complex arguments to the macro, some implementations
+ * read the old value multiple times.
*/
for (i = 0; i < skipdepth; i++) {
- if (!WT_ATOMIC_CAS8(*ins_stack[i], new_ins->next[i], new_ins))
+ WT_INSERT *old_ins = *ins_stack[i];
+ if (old_ins != new_ins->next[i] ||
+ !WT_ATOMIC_CAS8(*ins_stack[i], old_ins, new_ins))
return (i == 0 ? WT_RESTART : 0);
if (ins_head->tail[i] == NULL ||
ins_stack[i] == &ins_head->tail[i]->next[i])
diff --git a/src/txn/txn.c b/src/txn/txn.c
index a9566876c8a..210c5dde5d0 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -208,7 +208,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force)
WT_SESSION_IMPL *oldest_session;
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *s;
- uint64_t current_id, id, oldest_id, prev_oldest_id, snap_min;
+ uint64_t current_id, id, last_running, oldest_id, prev_oldest_id;
uint32_t i, session_cnt;
int32_t count;
int last_running_moved;
@@ -216,7 +216,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force)
conn = S2C(session);
txn_global = &conn->txn_global;
- current_id = snap_min = txn_global->current;
+ current_id = last_running = txn_global->current;
oldest_session = NULL;
prev_oldest_id = txn_global->oldest_id;
@@ -241,7 +241,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force)
/* The oldest ID cannot change until the scan count goes to zero. */
prev_oldest_id = txn_global->oldest_id;
- current_id = oldest_id = snap_min = txn_global->current;
+ current_id = oldest_id = last_running = txn_global->current;
/* Walk the array of concurrent transactions. */
WT_ORDERED_READ(session_cnt, conn->session_cnt);
@@ -256,8 +256,8 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force)
*/
if ((id = s->id) != WT_TXN_NONE &&
WT_TXNID_LE(prev_oldest_id, id) &&
- WT_TXNID_LT(id, snap_min))
- snap_min = id;
+ WT_TXNID_LT(id, last_running))
+ last_running = id;
/*
* !!!
@@ -274,8 +274,8 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force)
}
}
- if (WT_TXNID_LT(snap_min, oldest_id))
- oldest_id = snap_min;
+ if (WT_TXNID_LT(last_running, oldest_id))
+ oldest_id = last_running;
/* The oldest ID can't move past any named snapshots. */
if ((id = txn_global->nsnap_oldest_id) != WT_TXN_NONE &&
@@ -283,7 +283,8 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force)
oldest_id = id;
/* Update the last running ID. */
- last_running_moved = WT_TXNID_LT(txn_global->last_running, snap_min);
+ last_running_moved =
+ WT_TXNID_LT(txn_global->last_running, last_running);
/* Update the oldest ID. */
if ((WT_TXNID_LT(prev_oldest_id, oldest_id) || last_running_moved) &&
@@ -291,15 +292,15 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force)
WT_ORDERED_READ(session_cnt, conn->session_cnt);
for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
if ((id = s->id) != WT_TXN_NONE &&
- WT_TXNID_LT(id, snap_min))
- oldest_id = id;
+ WT_TXNID_LT(id, last_running))
+ last_running = id;
if ((id = s->snap_min) != WT_TXN_NONE &&
WT_TXNID_LT(id, oldest_id))
oldest_id = id;
}
- if (WT_TXNID_LT(snap_min, oldest_id))
- oldest_id = snap_min;
+ if (WT_TXNID_LT(last_running, oldest_id))
+ oldest_id = last_running;
#ifdef HAVE_DIAGNOSTIC
/*
@@ -313,10 +314,11 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force)
WT_ASSERT(session,
id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
#endif
- if (WT_TXNID_LT(txn_global->last_running, snap_min))
- txn_global->last_running = snap_min;
+ if (WT_TXNID_LT(txn_global->last_running, last_running))
+ txn_global->last_running = last_running;
if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
txn_global->oldest_id = oldest_id;
+ WT_ASSERT(session, txn_global->scan_count == -1);
txn_global->scan_count = 0;
} else {
if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) &&
diff --git a/test/format/ops.c b/test/format/ops.c
index 468b961d182..7c38aec4757 100644
--- a/test/format/ops.c
+++ b/test/format/ops.c
@@ -63,6 +63,7 @@ wts_ops(int lastrun)
session = NULL; /* -Wconditional-uninitialized */
memset(&backup_tid, 0, sizeof(backup_tid));
memset(&compact_tid, 0, sizeof(compact_tid));
+ memset(&lrt_tid, 0, sizeof(lrt_tid));
/*
* There are two mechanisms to specify the length of the run, a number
diff --git a/test/suite/test_cursor_random.py b/test/suite/test_cursor_random.py
index be08c59210f..10a3140a2fd 100644
--- a/test/suite/test_cursor_random.py
+++ b/test/suite/test_cursor_random.py
@@ -92,7 +92,7 @@ class test_cursor_random(wttest.WiredTigerTestCase):
# Check that next_random works in the presence of a larger set of values,
# where the values are in a disk format page.
- def test_cursor_random_multiple_page_records(self):
+ def cursor_random_multiple_page_records(self, reopen):
uri = self.type + 'random'
if self.type == 'file:':
simple_populate(self, uri,
@@ -103,10 +103,10 @@ class test_cursor_random(wttest.WiredTigerTestCase):
'allocation_size=512,leaf_page_max=512,key_format=' +\
self.fmt, 10000)
- # Close the connection so everything is forced to disk (otherwise the
- # values are on an insert list and the underlying engine doesn't make
- # random selections, it selects the middle of the list.
- self.reopen_conn()
+ # Optionally close the connection so everything is forced to disk,
+ # insert lists are an entirely different path in the code.
+ if reopen:
+ self.reopen_conn()
cursor = self.session.open_cursor(uri, None, "next_random=true")
last = ''
@@ -120,6 +120,10 @@ class test_cursor_random(wttest.WiredTigerTestCase):
self.assertLess(match, 5,
'next_random did not return random records, too many matches found')
+ def test_cursor_random_multiple_page_records_reopen(self):
+ self.cursor_random_multiple_page_records(1)
+ def test_cursor_random_multiple_page_records(self):
+ self.cursor_random_multiple_page_records(0)
# Check that opening a random cursor on column-store returns not-supported.
class test_cursor_random_column(wttest.WiredTigerTestCase):