summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsueloverso <sue@mongodb.com>2017-07-26 03:26:05 -0400
committerMichael Cahill <michael.cahill@mongodb.com>2017-07-26 17:26:05 +1000
commitd4cb28503cfb57e6dc1549de344e4db96a5dc8a3 (patch)
treec3aba07cc66358db83a39b3f30ff7077ccb69b34
parentcca4225d100fb9f192036ba9abb4a5ea1042f578 (diff)
downloadmongo-d4cb28503cfb57e6dc1549de344e4db96a5dc8a3.tar.gz
WT-3387 Add use_timestamp option for checkpoint (#3503)
* Add stable_timestamp config and basic parsing. * Change to use stable timestamp: Remove calls to timestamp_dump. Add get=stable to timestamp query and have checkpoint code use it. Add test component to update the stable timestamp and recheck the backup. * Add check for read_timestamp being after stable_timestamp and test usage.
-rw-r--r--dist/api_data.py13
-rw-r--r--src/config/config_def.c11
-rw-r--r--src/docs/transactions.dox7
-rw-r--r--src/include/txn.h7
-rw-r--r--src/include/wiredtiger.in12
-rw-r--r--src/txn/txn.c10
-rw-r--r--src/txn/txn_ckpt.c24
-rw-r--r--src/txn/txn_timestamp.c101
-rw-r--r--test/suite/test_timestamp02.py7
-rw-r--r--test/suite/test_timestamp03.py33
10 files changed, 183 insertions, 42 deletions
diff --git a/dist/api_data.py b/dist/api_data.py
index ebc2e536aa8..726c79590e5 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -1151,6 +1151,12 @@ methods = {
undoc=True),
Config('target', '', r'''
if non-empty, checkpoint the list of objects''', type='list'),
+ Config('use_timestamp', 'true', r'''
+ by default, create the checkpoint as of the last stable timestamp
+ if timestamps are in use, or all current updates if there is no
+ stable timestamp set. If false, this option generates a checkpoint
+ with all updates including those later than the timestamp''',
+ type='boolean'),
]),
'WT_SESSION.snapshot' : Method([
@@ -1249,7 +1255,12 @@ methods = {
Config('oldest_timestamp', '', r'''
future commits and queries will be no earlier than the specified
timestamp. Supplied values must be monotonically increasing.
- see @ref transaction_timestamps'''),
+ See @ref transaction_timestamps'''),
+ Config('stable_timestamp', '', r'''
+ future checkpoints will be no later than the specified
+ timestamp. Supplied values must be monotonically increasing.
+ The stable timestamp data stability only applies to tables
+ that are not being logged. See @ref transaction_timestamps'''),
]),
'WT_SESSION.reconfigure' : Method(session_config),
diff --git a/src/config/config_def.c b/src/config/config_def.c
index 622db3fb6da..9106385bf9e 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -179,6 +179,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_set_timestamp[] = {
{ "oldest_timestamp", "string", NULL, NULL, NULL, 0 },
+ { "stable_timestamp", "string", NULL, NULL, NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
@@ -224,6 +225,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_checkpoint[] = {
{ "name", "string", NULL, NULL, NULL, 0 },
{ "read_timestamp", "string", NULL, NULL, NULL, 0 },
{ "target", "list", NULL, NULL, NULL, 0 },
+ { "use_timestamp", "boolean", NULL, NULL, NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
@@ -1141,8 +1143,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
NULL, 0
},
{ "WT_CONNECTION.set_timestamp",
- "oldest_timestamp=",
- confchk_WT_CONNECTION_set_timestamp, 1
+ "oldest_timestamp=,stable_timestamp=",
+ confchk_WT_CONNECTION_set_timestamp, 2
},
{ "WT_CURSOR.close",
"",
@@ -1161,8 +1163,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {
confchk_WT_SESSION_begin_transaction, 6
},
{ "WT_SESSION.checkpoint",
- "drop=,force=false,name=,read_timestamp=,target=",
- confchk_WT_SESSION_checkpoint, 5
+ "drop=,force=false,name=,read_timestamp=,target=,"
+ "use_timestamp=true",
+ confchk_WT_SESSION_checkpoint, 6
},
{ "WT_SESSION.close",
"",
diff --git a/src/docs/transactions.dox b/src/docs/transactions.dox
index 64d1c701acc..d9cc72dcf24 100644
--- a/src/docs/transactions.dox
+++ b/src/docs/transactions.dox
@@ -177,6 +177,13 @@ WiredTiger can discard history before the specified point. It is critical
that the oldest timestamp update frequently or the cache can become full of
updates, reducing performance.
+Setting a stable timestamp in WT_CONNECTION::set_timestamp indicates a
+known stable location that is sufficient for durability. During a checkpoint
+the state of a table will be saved only as of the stable timestamp. Newer
+updates after that stable timestamp will not be included in the checkpoint.
+That can be overridden in the call to WT_SESSION::checkpoint. It is expected
+that the stable timestamp is updated frequently.
+
Commit timestamps cannot be set in the past of any read timestamp that has
been used. This is enforced by assertions in diagnostic builds, if
applications violate this rule, data consistency can be violated.
diff --git a/src/include/txn.h b/src/include/txn.h
index b2121532c40..83e9baaac6a 100644
--- a/src/include/txn.h
+++ b/src/include/txn.h
@@ -92,8 +92,13 @@ struct __wt_txn_global {
WT_DECL_TIMESTAMP(commit_timestamp)
WT_DECL_TIMESTAMP(oldest_timestamp)
WT_DECL_TIMESTAMP(pinned_timestamp)
- bool has_commit_timestamp, has_oldest_timestamp, has_pinned_timestamp;
+ WT_DECL_TIMESTAMP(stable_timestamp)
+ bool has_commit_timestamp;
+ bool has_oldest_timestamp;
+ bool has_pinned_timestamp;
+ bool has_stable_timestamp;
bool oldest_is_pinned;
+ bool stable_is_pinned;
WT_SPINLOCK id_lock;
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 07c3e756670..068197842a0 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -1778,6 +1778,11 @@ struct __wt_session {
* empty.}
* @config{target, if non-empty\, checkpoint the list of objects., a
* list of strings; default empty.}
+ * @config{use_timestamp, by default\, create the checkpoint as of the
+ * last stable timestamp if timestamps are in use\, or all current
+ * updates if there is no stable timestamp set. If false\, this option
+ * generates a checkpoint with all updates including those later than
+ * the timestamp., a boolean flag; default \c true.}
* @configend
* @errors
*/
@@ -2239,8 +2244,13 @@ struct __wt_connection {
* @configstart{WT_CONNECTION.set_timestamp, see dist/api_data.py}
* @config{oldest_timestamp, future commits and queries will be no
* earlier than the specified timestamp. Supplied values must be
- * monotonically increasing. see @ref transaction_timestamps., a
+ * monotonically increasing. See @ref transaction_timestamps., a
* string; default empty.}
+ * @config{stable_timestamp, future checkpoints will be no later than
+ * the specified timestamp. Supplied values must be monotonically
+ * increasing. The stable timestamp data stability only applies to
+ * tables that are not being logged. See @ref transaction_timestamps.,
+ * a string; default empty.}
* @configend
* @errors
*/
diff --git a/src/txn/txn.c b/src/txn/txn.c
index 5ef5cc8cd84..c7e7999d887 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -441,19 +441,27 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
if (cval.len > 0) {
#ifdef HAVE_TIMESTAMPS
WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
- wt_timestamp_t oldest_timestamp;
+ wt_timestamp_t oldest_timestamp, stable_timestamp;
WT_RET(__wt_txn_parse_timestamp(
session, "read", &txn->read_timestamp, &cval));
__wt_readlock(session, &txn_global->rwlock);
__wt_timestamp_set(
&oldest_timestamp, &txn_global->oldest_timestamp);
+ __wt_timestamp_set(
+ &stable_timestamp, &txn_global->stable_timestamp);
__wt_readunlock(session, &txn_global->rwlock);
if (__wt_timestamp_cmp(
&txn->read_timestamp, &oldest_timestamp) < 0)
WT_RET_MSG(session, EINVAL,
"read timestamp %.*s older than oldest timestamp",
(int)cval.len, cval.str);
+ if (!__wt_timestamp_iszero(&stable_timestamp) &&
+ __wt_timestamp_cmp(
+ &txn->read_timestamp, &stable_timestamp) > 0)
+ WT_RET_MSG(session, EINVAL,
+ "read timestamp %.*s newer than stable timestamp",
+ (int)cval.len, cval.str);
__wt_txn_set_read_timestamp(session);
txn->isolation = WT_ISO_SNAPSHOT;
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index d21c68f6681..8ea6bf609c4 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -571,7 +571,9 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, const char *cfg[])
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *txn_state;
- char timestamp_config[100];
+ char timestamp_buf[2 * WT_TIMESTAMP_SIZE + 1], timestamp_config[100];
+ const char *query_cfg[] = { WT_CONFIG_BASE(session,
+ WT_CONNECTION_query_timestamp), "get=stable", NULL };
const char *txn_cfg[] = { WT_CONFIG_BASE(session,
WT_SESSION_begin_transaction), "isolation=snapshot", NULL, NULL };
@@ -580,11 +582,31 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, const char *cfg[])
txn_global = &conn->txn_global;
txn_state = WT_SESSION_TXN_STATE(session);
+ /*
+ * Someone giving us a specific timestamp overrides the general
+ * use_timestamp.
+ */
WT_RET(__wt_config_gets(session, cfg, "read_timestamp", &cval));
if (cval.len > 0) {
WT_RET(__wt_snprintf(timestamp_config, sizeof(timestamp_config),
"read_timestamp=%.*s", (int)cval.len, cval.str));
txn_cfg[2] = timestamp_config;
+ } else if (txn_global->has_stable_timestamp) {
+ WT_RET(__wt_config_gets(session, cfg, "use_timestamp", &cval));
+ /*
+ * Get the stable timestamp currently set. Then set that as
+ * the read timestamp for the transaction.
+ */
+ if (cval.val != 0) {
+ if ((ret = __wt_txn_global_query_timestamp(session,
+ timestamp_buf, query_cfg)) != 0 &&
+ ret != WT_NOTFOUND)
+ return (ret);
+ WT_RET(__wt_snprintf(timestamp_config,
+ sizeof(timestamp_config),
+ "read_timestamp=%s", timestamp_buf));
+ txn_cfg[2] = timestamp_config;
+ }
}
/*
diff --git a/src/txn/txn_timestamp.c b/src/txn/txn_timestamp.c
index fbbec33c325..6cb324756e9 100644
--- a/src/txn/txn_timestamp.c
+++ b/src/txn/txn_timestamp.c
@@ -151,6 +151,12 @@ __txn_global_query_timestamp(
__wt_timestamp_cmp(&txn->read_timestamp, &ts) < 0)
__wt_timestamp_set(&ts, &txn->read_timestamp);
__wt_readunlock(session, &txn_global->read_timestamp_rwlock);
+ } else if (WT_STRING_MATCH("stable", cval.str, cval.len)) {
+ if (!txn_global->has_stable_timestamp)
+ return (WT_NOTFOUND);
+ __wt_readlock(session, &txn_global->rwlock);
+ __wt_timestamp_set(&ts, &txn_global->stable_timestamp);
+ __wt_readunlock(session, &txn_global->rwlock);
} else
WT_RET_MSG(session, EINVAL,
"unknown timestamp query %.*s", (int)cval.len, cval.str);
@@ -281,43 +287,98 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session)
int
__wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
+ WT_CONFIG_ITEM oldest_cval, stable_cval;
+ bool has_oldest, has_stable;
/*
* Look for a commit timestamp.
*/
- WT_RET(
- __wt_config_gets_def(session, cfg, "oldest_timestamp", 0, &cval));
- if (cval.len != 0) {
+ WT_RET(__wt_config_gets_def(session,
+ cfg, "oldest_timestamp", 0, &oldest_cval));
+ WT_RET(__wt_config_gets_def(session,
+ cfg, "stable_timestamp", 0, &stable_cval));
+ if (oldest_cval.len != 0)
+ has_oldest = true;
+ else
+ has_oldest = false;
+ if (stable_cval.len != 0)
+ has_stable = true;
+ else
+ has_stable = false;
+ if (has_oldest || has_stable) {
#ifdef HAVE_TIMESTAMPS
WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t oldest_timestamp;
-
- WT_RET(__wt_txn_parse_timestamp(
- session, "oldest", &oldest_timestamp, &cval));
+ wt_timestamp_t oldest_ts, stable_ts;
+ txn_global = &S2C(session)->txn_global;
/*
- * This method can be called from multiple threads, check that
- * we are moving the global oldest timestamp forwards.
+ * Parsing will initialize the timestamp to zero even if
+ * it is not configured.
*/
- txn_global = &S2C(session)->txn_global;
+ WT_RET(__wt_txn_parse_timestamp(
+ session, "oldest", &oldest_ts, &oldest_cval));
+ WT_RET(__wt_txn_parse_timestamp(
+ session, "stable", &stable_ts, &stable_cval));
__wt_writelock(session, &txn_global->rwlock);
- if (!txn_global->has_oldest_timestamp || __wt_timestamp_cmp(
- &txn_global->oldest_timestamp, &oldest_timestamp) < 0) {
- __wt_timestamp_set(
- &txn_global->oldest_timestamp, &oldest_timestamp);
- txn_global->has_oldest_timestamp = true;
- txn_global->oldest_is_pinned = false;
+ /*
+ * First do error checking on the timestamp values. The
+ * oldest timestamp must always be less than or equal to
+ * the stable timestamp. If we're only setting one
+ * then compare against the system timestamp. If we're
+ * setting both then compare the passed in values.
+ */
+ if ((has_oldest && !has_stable && /* only oldest given */
+ txn_global->has_stable_timestamp &&
+ __wt_timestamp_cmp(&oldest_ts,
+ &txn_global->stable_timestamp) > 0) ||
+ (has_stable && !has_oldest && /* only stable given */
+ txn_global->has_oldest_timestamp &&
+ __wt_timestamp_cmp(&stable_ts,
+ &txn_global->oldest_timestamp) < 0) ||
+ (has_oldest && has_stable && /* both given */
+ __wt_timestamp_cmp(&oldest_ts, &stable_ts) > 0)) {
+ __wt_writeunlock(session, &txn_global->rwlock);
+ WT_RET_MSG(session, EINVAL,
+ "set_timestamp: oldest timestamp must not be "
+ "later than stable timestamp");
+ }
+ if (has_oldest) {
+ /*
+ * This method can be called from multiple threads,
+ * check that we are moving the global oldest
+ * timestamp forwards.
+ */
+ if (!txn_global->has_oldest_timestamp ||
+ __wt_timestamp_cmp(&txn_global->oldest_timestamp,
+ &oldest_ts) < 0) {
+ __wt_timestamp_set(
+ &txn_global->oldest_timestamp, &oldest_ts);
+ txn_global->has_oldest_timestamp = true;
+ txn_global->oldest_is_pinned = false;
+ }
+ }
+ if (has_stable) {
+ /*
+ * This method can be called from multiple threads,
+ * check that we are moving the global stable
+ * timestamp forwards.
+ */
+ if (!txn_global->has_stable_timestamp ||
+ __wt_timestamp_cmp(&txn_global->stable_timestamp,
+ &stable_ts) < 0) {
+ __wt_timestamp_set(
+ &txn_global->stable_timestamp, &stable_ts);
+ txn_global->has_stable_timestamp = true;
+ txn_global->stable_is_pinned = false;
+ }
}
__wt_writeunlock(session, &txn_global->rwlock);
-
WT_RET(__wt_txn_update_pinned_timestamp(session));
#else
- WT_RET_MSG(session, EINVAL, "oldest_timestamp requires a "
+ WT_RET_MSG(session, EINVAL, "set_timestamp requires a "
"version of WiredTiger built with timestamp support");
#endif
}
-
return (0);
}
diff --git a/test/suite/test_timestamp02.py b/test/suite/test_timestamp02.py
index 0ad007ec8e2..735e954fc7f 100644
--- a/test/suite/test_timestamp02.py
+++ b/test/suite/test_timestamp02.py
@@ -89,6 +89,8 @@ class test_timestamp02(wttest.WiredTigerTestCase, suite_subprocess):
c[k] = 1
self.session.commit_transaction('commit_timestamp=' + timestamp_str(k))
+ # Don't set a stable timestamp yet. Make sure we can read with
+ # a timestamp before the stable timestamp has been set.
# Now check that we see the expected state when reading at each
# timestamp
for i, t in enumerate(orig_keys):
@@ -106,6 +108,9 @@ class test_timestamp02(wttest.WiredTigerTestCase, suite_subprocess):
c[k] = 2
self.session.commit_transaction('commit_timestamp=' + timestamp_str(k + 100))
+ # Now the stable timestamp before we read.
+ self.conn.set_timestamp('stable_timestamp=' + timestamp_str(200))
+
for i, t in enumerate(orig_keys):
self.check(self.session, 'read_timestamp=' + timestamp_str(t + 100),
dict((k, (2 if j <= i else 1)) for j, k in enumerate(orig_keys)))
@@ -121,6 +126,8 @@ class test_timestamp02(wttest.WiredTigerTestCase, suite_subprocess):
del c[k]
self.session.commit_transaction('commit_timestamp=' + timestamp_str(k + 200))
+ # We have to continue to advance the stable timestamp before reading.
+ self.conn.set_timestamp('stable_timestamp=' + timestamp_str(300))
for i, t in enumerate(orig_keys):
self.check(self.session, 'read_timestamp=' + timestamp_str(t + 200),
dict((k, 2) for k in orig_keys[i+1:]))
diff --git a/test/suite/test_timestamp03.py b/test/suite/test_timestamp03.py
index 5b42896fdcd..20f4d532658 100644
--- a/test/suite/test_timestamp03.py
+++ b/test/suite/test_timestamp03.py
@@ -58,25 +58,25 @@ class test_timestamp03(wttest.WiredTigerTestCase, suite_subprocess):
('table-simple', dict(uri='table:', use_cg=False, use_index=False)),
]
- conncfg = [
- ('nolog', dict(conncfg='create')),
- ('V1', dict(conncfg='create,log=(enabled),compatibility=(release="2.9")')),
- ('V2', dict(conncfg='create,log=(enabled)')),
- ]
-
ckpt = [
+ ('use_ts_def', dict(ckptcfg='', val='none')),
+ ('use_ts_false', dict(ckptcfg='use_timestamp=false', val='all')),
+ ('use_ts_true', dict(ckptcfg='use_timestamp=true', val='none')),
('read_ts', dict(ckptcfg='read_timestamp', val='none')),
]
+ conncfg = [
+ ('nolog', dict(conn_config='create')),
+ ('V1', dict(conn_config='create,log=(enabled),compatibility=(release="2.9")')),
+ ('V2', dict(conn_config='create,log=(enabled)')),
+ ]
+
scenarios = make_scenarios(types, ckpt, conncfg)
# Binary values.
value = u'\u0001\u0002abcd\u0003\u0004'
value2 = u'\u0001\u0002dcba\u0003\u0004'
- def conn_config(self):
- return self.conncfg
-
# Check that a cursor (optionally started in a new transaction), sees the
# expected values.
def check(self, session, txn_config, expected):
@@ -193,6 +193,7 @@ class test_timestamp03(wttest.WiredTigerTestCase, suite_subprocess):
self.assertEqual(self.conn.query_timestamp(), timestamp_ret_str(100))
self.oldts = timestamp_str(100)
self.conn.set_timestamp('oldest_timestamp=' + self.oldts)
+ self.conn.set_timestamp('stable_timestamp=' + self.oldts)
# print "Oldest " + self.oldts
# Update them and retry.
@@ -208,7 +209,7 @@ class test_timestamp03(wttest.WiredTigerTestCase, suite_subprocess):
self.session.begin_transaction()
c[k] = self.value2
c3[k] = self.value2
- ts = timestamp_str(k + 101)
+ ts = timestamp_str(k + 100)
self.session.commit_transaction('commit_timestamp=' + ts)
# print "Commit key " + str(k) + " ts " + ts
count += 1
@@ -216,15 +217,21 @@ class test_timestamp03(wttest.WiredTigerTestCase, suite_subprocess):
# Take a checkpoint using the given configuration. Then verify
# whether value2 appears in a copy of that data or not.
+ valcnt2 = nkeys
if self.val == 'all':
valcnt = nkeys
else:
valcnt = 0
- # Table 2 should always see all the keys
- # Table 3 should see whatever table 1 sees.
- valcnt2 = nkeys
+ # XXX adjust when logged + timestamps is fixed and defined.
valcnt3 = valcnt
self.ckpt_backup(valcnt, valcnt2, valcnt3)
+ if self.ckptcfg != 'read_timestamp':
+ # Update the stable timestamp to the latest, but not the oldest
+ # timestamp and make sure we can see the data. Once the stable
+ # timestamp is moved we should see all keys with value2.
+ self.conn.set_timestamp('stable_timestamp=' + \
+ timestamp_str(100+nkeys))
+ self.ckpt_backup(nkeys, nkeys, nkeys)
if __name__ == '__main__':
wttest.run()