summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSulabh Mahajan <sulabh.mahajan@mongodb.com>2017-08-16 16:31:20 +1000
committerGitHub <noreply@github.com>2017-08-16 16:31:20 +1000
commit4b0b370d4e11cd38ef3a3f382cfc1f754a2014f1 (patch)
tree0555a5f900b476db7cc0da17928b09d235881e27
parent02c2ec9b1ae1df9c63a41d0aac4151dfa6c3a96b (diff)
downloadmongo-4b0b370d4e11cd38ef3a3f382cfc1f754a2014f1.tar.gz
WT-3460 Implement col store rollback to stable timestamp (#3584)
* Implement coll store rollback to stable timestamp * Add testing for col store fixed timestamp rollback * Address Keith's comments * review feedback
-rw-r--r--src/txn/txn_rollback_to_stable.c77
-rw-r--r--test/suite/test_timestamp04.py26
2 files changed, 74 insertions, 29 deletions
diff --git a/src/txn/txn_rollback_to_stable.c b/src/txn/txn_rollback_to_stable.c
index e972cc5a684..c9c3d3247c4 100644
--- a/src/txn/txn_rollback_to_stable.c
+++ b/src/txn/txn_rollback_to_stable.c
@@ -120,11 +120,11 @@ __txn_abort_newer_update(WT_SESSION_IMPL *session,
}
/*
- * __txn_abort_newer_row_skip --
+ * __txn_abort_newer_insert --
* Apply the update abort check to each entry in an insert skip list
*/
static void
-__txn_abort_newer_row_skip(WT_SESSION_IMPL *session,
+__txn_abort_newer_insert(WT_SESSION_IMPL *session,
WT_INSERT_HEAD *head, wt_timestamp_t *rollback_timestamp)
{
WT_INSERT *ins;
@@ -134,6 +134,50 @@ __txn_abort_newer_row_skip(WT_SESSION_IMPL *session,
}
/*
+ * __txn_abort_newer_col_var --
+ * Abort updates on a variable length col leaf page with timestamps newer
+ * than the rollback timestamp.
+ */
+static void
+__txn_abort_newer_col_var(
+ WT_SESSION_IMPL *session, WT_PAGE *page, wt_timestamp_t *rollback_timestamp)
+{
+ WT_COL *cip;
+ WT_INSERT_HEAD *ins;
+ uint32_t i;
+
+ /* Review the changes to the original on-page data items */
+ WT_COL_FOREACH(page, cip, i)
+ if ((ins = WT_COL_UPDATE(page, cip)) != NULL)
+ __txn_abort_newer_insert(session,
+ ins, rollback_timestamp);
+
+ /* Review the append list */
+ if ((ins = WT_COL_APPEND(page)) != NULL)
+ __txn_abort_newer_insert(session, ins, rollback_timestamp);
+}
+
+/*
+ * __txn_abort_newer_col_fix --
+ * Abort updates on a fixed length col leaf page with timestamps newer than
+ * the rollback timestamp.
+ */
+static void
+__txn_abort_newer_col_fix(
+ WT_SESSION_IMPL *session, WT_PAGE *page, wt_timestamp_t *rollback_timestamp)
+{
+ WT_INSERT_HEAD *ins;
+
+ /* Review the changes to the original on-page data items */
+ if ((ins = WT_COL_UPDATE_SINGLE(page)) != NULL)
+ __txn_abort_newer_insert(session, ins, rollback_timestamp);
+
+ /* Review the append list */
+ if ((ins = WT_COL_APPEND(page)) != NULL)
+ __txn_abort_newer_insert(session, ins, rollback_timestamp);
+}
+
+/*
* __txn_abort_newer_row_leaf --
* Abort updates on a row leaf page with timestamps newer than the
* rollback timestamp.
@@ -152,8 +196,7 @@ __txn_abort_newer_row_leaf(
* page.
*/
if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
- __txn_abort_newer_row_skip(
- session, insert, rollback_timestamp);
+ __txn_abort_newer_insert(session, insert, rollback_timestamp);
/*
* Review updates that belong to keys that are on the disk image,
@@ -165,7 +208,7 @@ __txn_abort_newer_row_leaf(
session, upd, rollback_timestamp);
if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
- __txn_abort_newer_row_skip(
+ __txn_abort_newer_insert(
session, insert, rollback_timestamp);
}
}
@@ -182,6 +225,13 @@ __txn_abort_newer_updates(
page = ref->page;
switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ __txn_abort_newer_col_fix(session, page, rollback_timestamp);
+ break;
+ case WT_PAGE_COL_VAR:
+ __txn_abort_newer_col_var(session, page, rollback_timestamp);
+ break;
+ case WT_PAGE_COL_INT:
case WT_PAGE_ROW_INT:
/*
* There is nothing to do for internal pages, since we aren't
@@ -193,9 +243,7 @@ __txn_abort_newer_updates(
case WT_PAGE_ROW_LEAF:
__txn_abort_newer_row_leaf(session, page, rollback_timestamp);
break;
- default:
- WT_RET_MSG(session, EINVAL, "rollback_to_stable "
- "is only supported for row store btrees");
+ WT_ILLEGAL_VALUE(session);
}
return (0);
@@ -209,14 +257,11 @@ static int
__txn_rollback_to_stable_custom_skip(
WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp)
{
- WT_UNUSED(session);
WT_UNUSED(context);
+ WT_UNUSED(session);
/* Review all pages that are in memory. */
- if (ref->state == WT_REF_MEM || ref->state == WT_REF_DELETED)
- *skipp = false;
- else
- *skipp = true;
+ *skipp = !(ref->state == WT_REF_MEM || ref->state == WT_REF_DELETED);
return (0);
}
@@ -296,10 +341,6 @@ __txn_rollback_to_stable_btree(
if (btree->root.page == NULL)
return (0);
- if (btree->type != BTREE_ROW)
- WT_RET_MSG(session, EINVAL, "rollback_to_stable "
- "is only supported for row store btrees");
-
/*
* Copy the stable timestamp, otherwise we'd need to lock it each time
* it's accessed. Even though the stable timestamp isn't supposed to be
@@ -368,7 +409,7 @@ __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[])
#ifndef HAVE_TIMESTAMPS
WT_UNUSED(cfg);
- WT_RET_MSG(session, EINVAL, "rollback_to_stable "
+ WT_RET_MSG(session, ENOTSUP, "rollback_to_stable "
"requires a version of WiredTiger built with timestamp support");
#else
WT_CONNECTION_IMPL *conn;
diff --git a/test/suite/test_timestamp04.py b/test/suite/test_timestamp04.py
index 146326834db..3af0feed31b 100644
--- a/test/suite/test_timestamp04.py
+++ b/test/suite/test_timestamp04.py
@@ -30,8 +30,6 @@
# Timestamps: Test that rollback_to_stable obeys expected visibility rules
#
-import datetime
-import random
from suite_subprocess import suite_subprocess
import wiredtiger, wttest
from wtscenario import make_scenarios
@@ -50,9 +48,10 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
uri = 'table:' + tablename
scenarios = make_scenarios([
- #('col', dict(extra_config=',key_format=r')),
- #('lsm', dict(extra_config=',type=lsm')),
- ('row', dict(extra_config=',memory_page_max=32k,leaf_page_max=8k,internal_page_max=8k')),
+ ('col_fix', dict(empty=1, extra_config=',key_format=r, value_format=8t')),
+ ('col_var', dict(empty=0, extra_config=',key_format=r')),
+ #('lsm', dict(empty=0, extra_config=',type=lsm')),
+ ('row', dict(empty=0, extra_config='')),
])
# Rollback only works for non-durable tables
@@ -65,17 +64,21 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
session.begin_transaction(txn_config)
c = session.open_cursor(self.uri, None)
if missing == False:
- actual = dict((k, v) for k, v, pad in c if v != 0)
+ actual = dict((k, v) for k, v in c if v != 0)
#print expected
#print actual
self.assertEqual(actual, expected)
# Search for the expected items as well as iterating
for k, v in expected.iteritems():
if missing == False:
- self.assertEqual(c[k][0], v, "for key " + str(k))
+ self.assertEqual(c[k], v, "for key " + str(k))
else:
c.set_key(k)
- self.assertEqual(c.search(), wiredtiger.WT_NOTFOUND)
+ if self.empty:
+ # Fixed-length column-store rows always exist.
+ self.assertEqual(c.search(), 0)
+ else:
+ self.assertEqual(c.search(), wiredtiger.WT_NOTFOUND)
c.close()
if txn_config:
session.commit_transaction()
@@ -87,7 +90,8 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
# Configure small page sizes to ensure eviction comes through and we have a
# somewhat complex tree
self.session.create(self.uri,
- 'key_format=i,value_format=iS,memory_page_max=16k,leaf_page_max=8k' + self.extra_config)
+ 'key_format=i,value_format=i,memory_page_max=32k,leaf_page_max=8k,internal_page_max=8k'
+ + self.extra_config)
c = self.session.open_cursor(self.uri)
# Insert keys each with timestamp=key, in some order
@@ -96,7 +100,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
for k in keys:
self.session.begin_transaction()
- c[k] = (1, 'the quick brown fox')
+ c[k] = 1
self.session.commit_transaction('commit_timestamp=' + timestamp_str(k))
# Setup an oldest timestamp to ensure state remains in cache.
if k == 1:
@@ -119,7 +123,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
# Update the values again in preparation for rolling back more
for k in keys:
self.session.begin_transaction()
- c[k] = (2, 'jumped over the lazy dog')
+ c[k] = 2
self.session.commit_transaction('commit_timestamp=' + timestamp_str(k + key_range))
# Now we should have: keys 1-100 with value 2