diff options
-rw-r--r-- | src/btree/row_srch.c | 15 | ||||
-rw-r--r-- | src/docs/cursor-random.dox | 16 | ||||
-rw-r--r-- | src/include/serial.i | 17 | ||||
-rw-r--r-- | src/txn/txn.c | 30 | ||||
-rw-r--r-- | test/format/ops.c | 1 | ||||
-rw-r--r-- | test/suite/test_cursor_random.py | 14 |
6 files changed, 54 insertions, 39 deletions
diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index 9803b924355..d83d3253c44 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -471,6 +471,7 @@ __wt_row_random(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_PAGE *page; WT_PAGE_INDEX *pindex; WT_REF *current, *descent; + uint32_t cnt; btree = S2BT(session); @@ -528,18 +529,22 @@ restart: /* * If the tree is new (and not empty), it might have a large insert - * list, pick the key in the middle of that insert list. + * list. Count how many records are in the list. */ F_SET(cbt, WT_CBT_SEARCH_SMALLEST); if ((cbt->ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL) WT_ERR(WT_NOTFOUND); - for (p = t = WT_SKIP_FIRST(cbt->ins_head);;) { + for (cnt = 1, p = WT_SKIP_FIRST(cbt->ins_head);; ++cnt) if ((p = WT_SKIP_NEXT(p)) == NULL) break; - if ((p = WT_SKIP_NEXT(p)) == NULL) + + /* + * Select a random number from 0 to (N - 1), return that record. + */ + cnt = __wt_random(&session->rnd) % cnt; + for (p = t = WT_SKIP_FIRST(cbt->ins_head);; t = p) + if (cnt-- == 0 || (p = WT_SKIP_NEXT(p)) == NULL) break; - t = WT_SKIP_NEXT(t); - } cbt->ref = current; cbt->compare = 0; cbt->ins = t; diff --git a/src/docs/cursor-random.dox b/src/docs/cursor-random.dox index 70a28407ea5..5d0b89d6547 100644 --- a/src/docs/cursor-random.dox +++ b/src/docs/cursor-random.dox @@ -2,15 +2,11 @@ The \c next_random configuration to the WT_SESSION::open_cursor method configures the cursor to return a pseudo-random record from a row-store -object. - -The ability to return a random record was added to support a particular -application, and as a result has somewhat unusual semantics. First, the -returned record may not be random at all in the case of objects with only a few -rows (especially when the object has never been written to the backing store). -In such objects, the WT_CURSOR::next method for cursors configured with \c -next_random may return the same row on each call. Additionally, even in larger -objects, the WT_CURSOR::next method usually returns the first record from a -random page in the underlying file, not a random record from a random page. +object (the configuration is not supported on other types of objects). +The configuration has somewhat unusual semantics: first, the returned +record may not be very random in the case of objects with only a few +rows. Additionally, even in larger objects, the WT_CURSOR::next method +generally returns the first record from a random page in the underlying +file, not a random record from a random page. */ diff --git a/src/include/serial.i b/src/include/serial.i index 5b290822a10..0fc23348800 100644 --- a/src/include/serial.i +++ b/src/include/serial.i @@ -50,11 +50,15 @@ __insert_simple_func(WT_SESSION_IMPL *session, * * All structure setup must be flushed before the structure is entered * into the list. We need a write barrier here, our callers depend on - * it. + * it. Don't pass complex arguments to the macro, some implementations + * read the old value multiple times. */ - for (i = 0; i < skipdepth; i++) - if (!WT_ATOMIC_CAS8(*ins_stack[i], new_ins->next[i], new_ins)) + for (i = 0; i < skipdepth; i++) { + WT_INSERT *old_ins = *ins_stack[i]; + if (old_ins != new_ins->next[i] || + !WT_ATOMIC_CAS8(*ins_stack[i], old_ins, new_ins)) return (i == 0 ? WT_RESTART : 0); + } return (0); } @@ -83,10 +87,13 @@ __insert_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head, * * All structure setup must be flushed before the structure is entered * into the list. We need a write barrier here, our callers depend on - * it. + * it. Don't pass complex arguments to the macro, some implementations + * read the old value multiple times. */ for (i = 0; i < skipdepth; i++) { - if (!WT_ATOMIC_CAS8(*ins_stack[i], new_ins->next[i], new_ins)) + WT_INSERT *old_ins = *ins_stack[i]; + if (old_ins != new_ins->next[i] || + !WT_ATOMIC_CAS8(*ins_stack[i], old_ins, new_ins)) return (i == 0 ? WT_RESTART : 0); if (ins_head->tail[i] == NULL || ins_stack[i] == &ins_head->tail[i]->next[i]) diff --git a/src/txn/txn.c b/src/txn/txn.c index a9566876c8a..210c5dde5d0 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -208,7 +208,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) WT_SESSION_IMPL *oldest_session; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *s; - uint64_t current_id, id, oldest_id, prev_oldest_id, snap_min; + uint64_t current_id, id, last_running, oldest_id, prev_oldest_id; uint32_t i, session_cnt; int32_t count; int last_running_moved; @@ -216,7 +216,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) conn = S2C(session); txn_global = &conn->txn_global; - current_id = snap_min = txn_global->current; + current_id = last_running = txn_global->current; oldest_session = NULL; prev_oldest_id = txn_global->oldest_id; @@ -241,7 +241,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) /* The oldest ID cannot change until the scan count goes to zero. */ prev_oldest_id = txn_global->oldest_id; - current_id = oldest_id = snap_min = txn_global->current; + current_id = oldest_id = last_running = txn_global->current; /* Walk the array of concurrent transactions. */ WT_ORDERED_READ(session_cnt, conn->session_cnt); @@ -256,8 +256,8 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) */ if ((id = s->id) != WT_TXN_NONE && WT_TXNID_LE(prev_oldest_id, id) && - WT_TXNID_LT(id, snap_min)) - snap_min = id; + WT_TXNID_LT(id, last_running)) + last_running = id; /* * !!! @@ -274,8 +274,8 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) } } - if (WT_TXNID_LT(snap_min, oldest_id)) - oldest_id = snap_min; + if (WT_TXNID_LT(last_running, oldest_id)) + oldest_id = last_running; /* The oldest ID can't move past any named snapshots. */ if ((id = txn_global->nsnap_oldest_id) != WT_TXN_NONE && @@ -283,7 +283,8 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) oldest_id = id; /* Update the last running ID. */ - last_running_moved = WT_TXNID_LT(txn_global->last_running, snap_min); + last_running_moved = + WT_TXNID_LT(txn_global->last_running, last_running); /* Update the oldest ID. */ if ((WT_TXNID_LT(prev_oldest_id, oldest_id) || last_running_moved) && @@ -291,15 +292,15 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) WT_ORDERED_READ(session_cnt, conn->session_cnt); for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) { if ((id = s->id) != WT_TXN_NONE && - WT_TXNID_LT(id, snap_min)) - oldest_id = id; + WT_TXNID_LT(id, last_running)) + last_running = id; if ((id = s->snap_min) != WT_TXN_NONE && WT_TXNID_LT(id, oldest_id)) oldest_id = id; } - if (WT_TXNID_LT(snap_min, oldest_id)) - oldest_id = snap_min; + if (WT_TXNID_LT(last_running, oldest_id)) + oldest_id = last_running; #ifdef HAVE_DIAGNOSTIC /* @@ -313,10 +314,11 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) WT_ASSERT(session, id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id)); #endif - if (WT_TXNID_LT(txn_global->last_running, snap_min)) - txn_global->last_running = snap_min; + if (WT_TXNID_LT(txn_global->last_running, last_running)) + txn_global->last_running = last_running; if (WT_TXNID_LT(txn_global->oldest_id, oldest_id)) txn_global->oldest_id = oldest_id; + WT_ASSERT(session, txn_global->scan_count == -1); txn_global->scan_count = 0; } else { if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) && diff --git a/test/format/ops.c b/test/format/ops.c index 468b961d182..7c38aec4757 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -63,6 +63,7 @@ wts_ops(int lastrun) session = NULL; /* -Wconditional-uninitialized */ memset(&backup_tid, 0, sizeof(backup_tid)); memset(&compact_tid, 0, sizeof(compact_tid)); + memset(&lrt_tid, 0, sizeof(lrt_tid)); /* * There are two mechanisms to specify the length of the run, a number diff --git a/test/suite/test_cursor_random.py b/test/suite/test_cursor_random.py index be08c59210f..10a3140a2fd 100644 --- a/test/suite/test_cursor_random.py +++ b/test/suite/test_cursor_random.py @@ -92,7 +92,7 @@ class test_cursor_random(wttest.WiredTigerTestCase): # Check that next_random works in the presence of a larger set of values, # where the values are in a disk format page. - def test_cursor_random_multiple_page_records(self): + def cursor_random_multiple_page_records(self, reopen): uri = self.type + 'random' if self.type == 'file:': simple_populate(self, uri, @@ -103,10 +103,10 @@ class test_cursor_random(wttest.WiredTigerTestCase): 'allocation_size=512,leaf_page_max=512,key_format=' +\ self.fmt, 10000) - # Close the connection so everything is forced to disk (otherwise the - # values are on an insert list and the underlying engine doesn't make - # random selections, it selects the middle of the list. - self.reopen_conn() + # Optionally close the connection so everything is forced to disk, + # insert lists are an entirely different path in the code. + if reopen: + self.reopen_conn() cursor = self.session.open_cursor(uri, None, "next_random=true") last = '' @@ -120,6 +120,10 @@ class test_cursor_random(wttest.WiredTigerTestCase): self.assertLess(match, 5, 'next_random did not return random records, too many matches found') + def test_cursor_random_multiple_page_records_reopen(self): + self.cursor_random_multiple_page_records(1) + def test_cursor_random_multiple_page_records(self): + self.cursor_random_multiple_page_records(0) # Check that opening a random cursor on column-store returns not-supported. class test_cursor_random_column(wttest.WiredTigerTestCase): |