diff options
author | Sulabh Mahajan <sulabh.mahajan@mongodb.com> | 2017-08-16 16:31:20 +1000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-08-16 16:31:20 +1000 |
commit | 4b0b370d4e11cd38ef3a3f382cfc1f754a2014f1 (patch) | |
tree | 0555a5f900b476db7cc0da17928b09d235881e27 | |
parent | 02c2ec9b1ae1df9c63a41d0aac4151dfa6c3a96b (diff) | |
download | mongo-4b0b370d4e11cd38ef3a3f382cfc1f754a2014f1.tar.gz |
WT-3460 Implement col store rollback to stable timestamp (#3584)
* Implement coll store rollback to stable timestamp
* Add testing for col store fixed timestamp rollback
* Address Keith's comments
* review feedback
-rw-r--r-- | src/txn/txn_rollback_to_stable.c | 77 | ||||
-rw-r--r-- | test/suite/test_timestamp04.py | 26 |
2 files changed, 74 insertions, 29 deletions
diff --git a/src/txn/txn_rollback_to_stable.c b/src/txn/txn_rollback_to_stable.c index e972cc5a684..c9c3d3247c4 100644 --- a/src/txn/txn_rollback_to_stable.c +++ b/src/txn/txn_rollback_to_stable.c @@ -120,11 +120,11 @@ __txn_abort_newer_update(WT_SESSION_IMPL *session, } /* - * __txn_abort_newer_row_skip -- + * __txn_abort_newer_insert -- * Apply the update abort check to each entry in an insert skip list */ static void -__txn_abort_newer_row_skip(WT_SESSION_IMPL *session, +__txn_abort_newer_insert(WT_SESSION_IMPL *session, WT_INSERT_HEAD *head, wt_timestamp_t *rollback_timestamp) { WT_INSERT *ins; @@ -134,6 +134,50 @@ __txn_abort_newer_row_skip(WT_SESSION_IMPL *session, } /* + * __txn_abort_newer_col_var -- + * Abort updates on a variable length col leaf page with timestamps newer + * than the rollback timestamp. + */ +static void +__txn_abort_newer_col_var( + WT_SESSION_IMPL *session, WT_PAGE *page, wt_timestamp_t *rollback_timestamp) +{ + WT_COL *cip; + WT_INSERT_HEAD *ins; + uint32_t i; + + /* Review the changes to the original on-page data items */ + WT_COL_FOREACH(page, cip, i) + if ((ins = WT_COL_UPDATE(page, cip)) != NULL) + __txn_abort_newer_insert(session, + ins, rollback_timestamp); + + /* Review the append list */ + if ((ins = WT_COL_APPEND(page)) != NULL) + __txn_abort_newer_insert(session, ins, rollback_timestamp); +} + +/* + * __txn_abort_newer_col_fix -- + * Abort updates on a fixed length col leaf page with timestamps newer than + * the rollback timestamp. + */ +static void +__txn_abort_newer_col_fix( + WT_SESSION_IMPL *session, WT_PAGE *page, wt_timestamp_t *rollback_timestamp) +{ + WT_INSERT_HEAD *ins; + + /* Review the changes to the original on-page data items */ + if ((ins = WT_COL_UPDATE_SINGLE(page)) != NULL) + __txn_abort_newer_insert(session, ins, rollback_timestamp); + + /* Review the append list */ + if ((ins = WT_COL_APPEND(page)) != NULL) + __txn_abort_newer_insert(session, ins, rollback_timestamp); +} + +/* * __txn_abort_newer_row_leaf -- * Abort updates on a row leaf page with timestamps newer than the * rollback timestamp. @@ -152,8 +196,7 @@ __txn_abort_newer_row_leaf( * page. */ if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL) - __txn_abort_newer_row_skip( - session, insert, rollback_timestamp); + __txn_abort_newer_insert(session, insert, rollback_timestamp); /* * Review updates that belong to keys that are on the disk image, @@ -165,7 +208,7 @@ __txn_abort_newer_row_leaf( session, upd, rollback_timestamp); if ((insert = WT_ROW_INSERT(page, rip)) != NULL) - __txn_abort_newer_row_skip( + __txn_abort_newer_insert( session, insert, rollback_timestamp); } } @@ -182,6 +225,13 @@ __txn_abort_newer_updates( page = ref->page; switch (page->type) { + case WT_PAGE_COL_FIX: + __txn_abort_newer_col_fix(session, page, rollback_timestamp); + break; + case WT_PAGE_COL_VAR: + __txn_abort_newer_col_var(session, page, rollback_timestamp); + break; + case WT_PAGE_COL_INT: case WT_PAGE_ROW_INT: /* * There is nothing to do for internal pages, since we aren't @@ -193,9 +243,7 @@ __txn_abort_newer_updates( case WT_PAGE_ROW_LEAF: __txn_abort_newer_row_leaf(session, page, rollback_timestamp); break; - default: - WT_RET_MSG(session, EINVAL, "rollback_to_stable " - "is only supported for row store btrees"); + WT_ILLEGAL_VALUE(session); } return (0); @@ -209,14 +257,11 @@ static int __txn_rollback_to_stable_custom_skip( WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp) { - WT_UNUSED(session); WT_UNUSED(context); + WT_UNUSED(session); /* Review all pages that are in memory. */ - if (ref->state == WT_REF_MEM || ref->state == WT_REF_DELETED) - *skipp = false; - else - *skipp = true; + *skipp = !(ref->state == WT_REF_MEM || ref->state == WT_REF_DELETED); return (0); } @@ -296,10 +341,6 @@ __txn_rollback_to_stable_btree( if (btree->root.page == NULL) return (0); - if (btree->type != BTREE_ROW) - WT_RET_MSG(session, EINVAL, "rollback_to_stable " - "is only supported for row store btrees"); - /* * Copy the stable timestamp, otherwise we'd need to lock it each time * it's accessed. Even though the stable timestamp isn't supposed to be @@ -368,7 +409,7 @@ __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[]) #ifndef HAVE_TIMESTAMPS WT_UNUSED(cfg); - WT_RET_MSG(session, EINVAL, "rollback_to_stable " + WT_RET_MSG(session, ENOTSUP, "rollback_to_stable " "requires a version of WiredTiger built with timestamp support"); #else WT_CONNECTION_IMPL *conn; diff --git a/test/suite/test_timestamp04.py b/test/suite/test_timestamp04.py index 146326834db..3af0feed31b 100644 --- a/test/suite/test_timestamp04.py +++ b/test/suite/test_timestamp04.py @@ -30,8 +30,6 @@ # Timestamps: Test that rollback_to_stable obeys expected visibility rules # -import datetime -import random from suite_subprocess import suite_subprocess import wiredtiger, wttest from wtscenario import make_scenarios @@ -50,9 +48,10 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess): uri = 'table:' + tablename scenarios = make_scenarios([ - #('col', dict(extra_config=',key_format=r')), - #('lsm', dict(extra_config=',type=lsm')), - ('row', dict(extra_config=',memory_page_max=32k,leaf_page_max=8k,internal_page_max=8k')), + ('col_fix', dict(empty=1, extra_config=',key_format=r, value_format=8t')), + ('col_var', dict(empty=0, extra_config=',key_format=r')), + #('lsm', dict(empty=0, extra_config=',type=lsm')), + ('row', dict(empty=0, extra_config='')), ]) # Rollback only works for non-durable tables @@ -65,17 +64,21 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess): session.begin_transaction(txn_config) c = session.open_cursor(self.uri, None) if missing == False: - actual = dict((k, v) for k, v, pad in c if v != 0) + actual = dict((k, v) for k, v in c if v != 0) #print expected #print actual self.assertEqual(actual, expected) # Search for the expected items as well as iterating for k, v in expected.iteritems(): if missing == False: - self.assertEqual(c[k][0], v, "for key " + str(k)) + self.assertEqual(c[k], v, "for key " + str(k)) else: c.set_key(k) - self.assertEqual(c.search(), wiredtiger.WT_NOTFOUND) + if self.empty: + # Fixed-length column-store rows always exist. + self.assertEqual(c.search(), 0) + else: + self.assertEqual(c.search(), wiredtiger.WT_NOTFOUND) c.close() if txn_config: session.commit_transaction() @@ -87,7 +90,8 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess): # Configure small page sizes to ensure eviction comes through and we have a # somewhat complex tree self.session.create(self.uri, - 'key_format=i,value_format=iS,memory_page_max=16k,leaf_page_max=8k' + self.extra_config) + 'key_format=i,value_format=i,memory_page_max=32k,leaf_page_max=8k,internal_page_max=8k' + + self.extra_config) c = self.session.open_cursor(self.uri) # Insert keys each with timestamp=key, in some order @@ -96,7 +100,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess): for k in keys: self.session.begin_transaction() - c[k] = (1, 'the quick brown fox') + c[k] = 1 self.session.commit_transaction('commit_timestamp=' + timestamp_str(k)) # Setup an oldest timestamp to ensure state remains in cache. if k == 1: @@ -119,7 +123,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess): # Update the values again in preparation for rolling back more for k in keys: self.session.begin_transaction() - c[k] = (2, 'jumped over the lazy dog') + c[k] = 2 self.session.commit_transaction('commit_timestamp=' + timestamp_str(k + key_range)) # Now we should have: keys 1-100 with value 2 |