summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/txn/txn_timestamp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/txn/txn_timestamp.c')
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c270
1 files changed, 200 insertions, 70 deletions
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index a975341c189..4caf0102e3c 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -15,15 +15,8 @@
*/
int
__wt_txn_parse_timestamp(WT_SESSION_IMPL *session,
- const char *name, uint8_t *timestamp, WT_CONFIG_ITEM *cval)
+ const char *name, wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval)
{
- WT_DECL_RET;
- WT_ITEM ts;
- wt_timestamp_t tsbuf;
- size_t hexlen;
- const char *hexts;
- char padbuf[2 * WT_TIMESTAMP_SIZE + 1];
-
__wt_timestamp_set_zero(timestamp);
if (cval->len == 0)
@@ -35,6 +28,40 @@ __wt_txn_parse_timestamp(WT_SESSION_IMPL *session,
"Failed to parse %s timestamp '%.*s': too long",
name, (int)cval->len, cval->str);
+#if WT_TIMESTAMP_SIZE == 8
+ {
+ static const u_char hextable[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 0, 0, 0, 0, 0, 0,
+ 0, 10, 11, 12, 13, 14, 15, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 10, 11, 12, 13, 14, 15
+ };
+ wt_timestamp_t ts;
+ size_t len;
+ const char *hex;
+
+ for (ts.val = 0, hex = cval->str, len = cval->len; len > 0; --len)
+ ts.val = (ts.val << 4) | hextable[(int)*hex++];
+ __wt_timestamp_set(timestamp, &ts);
+ }
+#else
+ {
+ WT_DECL_RET;
+ WT_ITEM ts;
+ wt_timestamp_t tsbuf;
+ size_t hexlen;
+ const char *hexts;
+ char padbuf[2 * WT_TIMESTAMP_SIZE + 1];
+
/*
* The decoding function assumes it is decoding data produced by dump
* and so requires an even number of hex digits.
@@ -50,8 +77,8 @@ __wt_txn_parse_timestamp(WT_SESSION_IMPL *session,
}
/* Avoid memory allocation to decode timestamps. */
- ts.data = ts.mem = tsbuf;
- ts.memsize = sizeof(tsbuf);
+ ts.data = ts.mem = tsbuf.ts;
+ ts.memsize = sizeof(tsbuf.ts);
if ((ret = __wt_nhex_to_raw(session, hexts, hexlen, &ts)) != 0)
WT_RET_MSG(session, ret, "Failed to parse %s timestamp '%.*s'",
@@ -59,15 +86,16 @@ __wt_txn_parse_timestamp(WT_SESSION_IMPL *session,
WT_ASSERT(session, ts.size <= WT_TIMESTAMP_SIZE);
/* Copy the raw value to the end of the timestamp. */
- memcpy(timestamp + WT_TIMESTAMP_SIZE - ts.size,
+ memcpy(timestamp->ts + WT_TIMESTAMP_SIZE - ts.size,
ts.data, ts.size);
-
+ }
+#endif
if (__wt_timestamp_iszero(timestamp))
WT_RET_MSG(session, EINVAL,
"Failed to parse %s timestamp '%.*s': zero not permitted",
name, (int)cval->len, cval->str);
- return (ret);
+ return (0);
}
/*
@@ -76,12 +104,13 @@ __wt_txn_parse_timestamp(WT_SESSION_IMPL *session,
*/
static int
__txn_global_query_timestamp(
- WT_SESSION_IMPL *session, uint8_t *ts, const char *cfg[])
+ WT_SESSION_IMPL *session, wt_timestamp_t *tsp, const char *cfg[])
{
WT_CONNECTION_IMPL *conn;
WT_CONFIG_ITEM cval;
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t ts;
conn = S2C(session);
txn_global = &conn->txn_global;
@@ -91,40 +120,51 @@ __txn_global_query_timestamp(
if (!txn_global->has_commit_timestamp)
return (WT_NOTFOUND);
__wt_readlock(session, &txn_global->rwlock);
- __wt_timestamp_set(ts, txn_global->commit_timestamp);
+ __wt_timestamp_set(&ts, &txn_global->commit_timestamp);
+ WT_ASSERT(session, !__wt_timestamp_iszero(&ts));
__wt_readunlock(session, &txn_global->rwlock);
/* Compare with the oldest running transaction. */
__wt_readlock(session, &txn_global->commit_timestamp_rwlock);
txn = TAILQ_FIRST(&txn_global->commit_timestamph);
if (txn != NULL &&
- __wt_timestamp_cmp(txn->commit_timestamp, ts) < 0)
- __wt_timestamp_set(ts, txn->commit_timestamp);
+ __wt_timestamp_cmp(&txn->first_commit_timestamp, &ts) < 0) {
+ __wt_timestamp_set(&ts, &txn->first_commit_timestamp);
+ WT_ASSERT(session, !__wt_timestamp_iszero(&ts));
+ }
__wt_readunlock(session, &txn_global->commit_timestamp_rwlock);
} else if (WT_STRING_MATCH("oldest_reader", cval.str, cval.len)) {
if (!txn_global->has_oldest_timestamp)
return (WT_NOTFOUND);
__wt_readlock(session, &txn_global->rwlock);
- __wt_timestamp_set(ts, txn_global->oldest_timestamp);
+ __wt_timestamp_set(&ts, &txn_global->oldest_timestamp);
/* Check for a running checkpoint */
txn = txn_global->checkpoint_txn;
if (txn_global->checkpoint_state.pinned_id != WT_TXN_NONE &&
- !__wt_timestamp_iszero(txn->read_timestamp) &&
- __wt_timestamp_cmp(txn->read_timestamp, ts) < 0)
- __wt_timestamp_set(ts, txn->read_timestamp);
+ !__wt_timestamp_iszero(&txn->read_timestamp) &&
+ __wt_timestamp_cmp(&txn->read_timestamp, &ts) < 0)
+ __wt_timestamp_set(&ts, &txn->read_timestamp);
__wt_readunlock(session, &txn_global->rwlock);
/* Look for the oldest ordinary reader. */
__wt_readlock(session, &txn_global->read_timestamp_rwlock);
txn = TAILQ_FIRST(&txn_global->read_timestamph);
if (txn != NULL &&
- __wt_timestamp_cmp(txn->read_timestamp, ts) < 0)
- __wt_timestamp_set(ts, txn->read_timestamp);
+ __wt_timestamp_cmp(&txn->read_timestamp, &ts) < 0)
+ __wt_timestamp_set(&ts, &txn->read_timestamp);
__wt_readunlock(session, &txn_global->read_timestamp_rwlock);
+ } else if (WT_STRING_MATCH("stable", cval.str, cval.len)) {
+ if (!txn_global->has_stable_timestamp)
+ return (WT_NOTFOUND);
+ __wt_readlock(session, &txn_global->rwlock);
+ __wt_timestamp_set(&ts, &txn_global->stable_timestamp);
+ __wt_readunlock(session, &txn_global->rwlock);
} else
- return (__wt_illegal_value(session, NULL));
+ WT_RET_MSG(session, EINVAL,
+ "unknown timestamp query %.*s", (int)cval.len, cval.str);
+ __wt_timestamp_set(tsp, &ts);
return (0);
}
#endif
@@ -138,8 +178,28 @@ __wt_txn_global_query_timestamp(
WT_SESSION_IMPL *session, char *hex_timestamp, const char *cfg[])
{
#ifdef HAVE_TIMESTAMPS
- WT_ITEM hexts;
wt_timestamp_t ts;
+
+ WT_RET(__txn_global_query_timestamp(session, &ts, cfg));
+
+#if WT_TIMESTAMP_SIZE == 8
+ {
+ char *p, v;
+
+ for (p = hex_timestamp; ts.val != 0; ts.val >>= 4)
+ *p++ = (char)__wt_hex((u_char)(ts.val & 0x0f));
+ *p = '\0';
+
+ /* Reverse the string. */
+ for (--p; p > hex_timestamp;) {
+ v = *p;
+ *p-- = *hex_timestamp;
+ *hex_timestamp++ = v;
+ }
+ }
+#else
+ {
+ WT_ITEM hexts;
size_t len;
uint8_t *tsp;
@@ -147,25 +207,28 @@ __wt_txn_global_query_timestamp(
* Keep clang-analyzer happy: it can't tell that ts will be set
* whenever the call below succeeds.
*/
- WT_CLEAR(ts);
- WT_RET(__txn_global_query_timestamp(session, ts, cfg));
+ __wt_timestamp_set_zero(&ts);
+ WT_RET(__txn_global_query_timestamp(session, &ts, cfg));
/* Avoid memory allocation: set up an item guaranteed large enough. */
hexts.data = hexts.mem = hex_timestamp;
hexts.memsize = 2 * WT_TIMESTAMP_SIZE + 1;
/* Trim leading zeros. */
- for (tsp = ts, len = WT_TIMESTAMP_SIZE;
+ for (tsp = ts.ts, len = WT_TIMESTAMP_SIZE;
len > 0 && *tsp == 0;
++tsp, --len)
;
WT_RET(__wt_raw_to_hex(session, tsp, len, &hexts));
+ }
+#endif
return (0);
#else
- WT_UNUSED(session);
WT_UNUSED(hex_timestamp);
WT_UNUSED(cfg);
- return (ENOTSUP);
+ WT_RET_MSG(session, ENOTSUP,
+ "WT_CONNECTION.query_timestamp requires a version of WiredTiger "
+ "built with timestamp support");
#endif
}
@@ -191,28 +254,28 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session)
return (0);
__wt_readlock(session, &txn_global->rwlock);
- __wt_timestamp_set(oldest_timestamp, txn_global->oldest_timestamp);
+ __wt_timestamp_set(&oldest_timestamp, &txn_global->oldest_timestamp);
__wt_readunlock(session, &txn_global->rwlock);
/* Scan to find the global pinned timestamp. */
if ((ret = __txn_global_query_timestamp(
- session, active_timestamp, query_cfg)) != 0)
+ session, &active_timestamp, query_cfg)) != 0)
return (ret == WT_NOTFOUND ? 0 : ret);
- if (__wt_timestamp_cmp(oldest_timestamp, active_timestamp) < 0) {
- __wt_timestamp_set(pinned_timestamp, oldest_timestamp);
+ if (__wt_timestamp_cmp(&oldest_timestamp, &active_timestamp) < 0) {
+ __wt_timestamp_set(&pinned_timestamp, &oldest_timestamp);
} else
- __wt_timestamp_set(pinned_timestamp, active_timestamp);
+ __wt_timestamp_set(&pinned_timestamp, &active_timestamp);
__wt_writelock(session, &txn_global->rwlock);
if (!txn_global->has_pinned_timestamp || __wt_timestamp_cmp(
- txn_global->pinned_timestamp, pinned_timestamp) < 0) {
+ &txn_global->pinned_timestamp, &pinned_timestamp) < 0) {
__wt_timestamp_set(
- txn_global->pinned_timestamp, pinned_timestamp);
+ &txn_global->pinned_timestamp, &pinned_timestamp);
txn_global->has_pinned_timestamp = true;
txn_global->oldest_is_pinned = __wt_timestamp_cmp(
- txn_global->pinned_timestamp,
- txn_global->oldest_timestamp) == 0;
+ &txn_global->pinned_timestamp,
+ &txn_global->oldest_timestamp) == 0;
}
__wt_writeunlock(session, &txn_global->rwlock);
@@ -227,43 +290,98 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session)
int
__wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
+ WT_CONFIG_ITEM oldest_cval, stable_cval;
+ bool has_oldest, has_stable;
/*
* Look for a commit timestamp.
*/
- WT_RET(
- __wt_config_gets_def(session, cfg, "oldest_timestamp", 0, &cval));
- if (cval.len != 0) {
+ WT_RET(__wt_config_gets_def(session,
+ cfg, "oldest_timestamp", 0, &oldest_cval));
+ WT_RET(__wt_config_gets_def(session,
+ cfg, "stable_timestamp", 0, &stable_cval));
+ if (oldest_cval.len != 0)
+ has_oldest = true;
+ else
+ has_oldest = false;
+ if (stable_cval.len != 0)
+ has_stable = true;
+ else
+ has_stable = false;
+ if (has_oldest || has_stable) {
#ifdef HAVE_TIMESTAMPS
WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t oldest_timestamp;
-
- WT_RET(__wt_txn_parse_timestamp(
- session, "oldest", oldest_timestamp, &cval));
+ wt_timestamp_t oldest_ts, stable_ts;
+ txn_global = &S2C(session)->txn_global;
/*
- * This method can be called from multiple threads, check that
- * we are moving the global oldest timestamp forwards.
+ * Parsing will initialize the timestamp to zero even if
+ * it is not configured.
*/
- txn_global = &S2C(session)->txn_global;
+ WT_RET(__wt_txn_parse_timestamp(
+ session, "oldest", &oldest_ts, &oldest_cval));
+ WT_RET(__wt_txn_parse_timestamp(
+ session, "stable", &stable_ts, &stable_cval));
__wt_writelock(session, &txn_global->rwlock);
- if (!txn_global->has_oldest_timestamp || __wt_timestamp_cmp(
- txn_global->oldest_timestamp, oldest_timestamp) < 0) {
- __wt_timestamp_set(
- txn_global->oldest_timestamp, oldest_timestamp);
- txn_global->has_oldest_timestamp = true;
- txn_global->oldest_is_pinned = false;
+ /*
+ * First do error checking on the timestamp values. The
+ * oldest timestamp must always be less than or equal to
+ * the stable timestamp. If we're only setting one
+ * then compare against the system timestamp. If we're
+ * setting both then compare the passed in values.
+ */
+ if ((has_oldest && !has_stable && /* only oldest given */
+ txn_global->has_stable_timestamp &&
+ __wt_timestamp_cmp(&oldest_ts,
+ &txn_global->stable_timestamp) > 0) ||
+ (has_stable && !has_oldest && /* only stable given */
+ txn_global->has_oldest_timestamp &&
+ __wt_timestamp_cmp(&stable_ts,
+ &txn_global->oldest_timestamp) < 0) ||
+ (has_oldest && has_stable && /* both given */
+ __wt_timestamp_cmp(&oldest_ts, &stable_ts) > 0)) {
+ __wt_writeunlock(session, &txn_global->rwlock);
+ WT_RET_MSG(session, EINVAL,
+ "set_timestamp: oldest timestamp must not be "
+ "later than stable timestamp");
+ }
+ if (has_oldest) {
+ /*
+ * This method can be called from multiple threads,
+ * check that we are moving the global oldest
+ * timestamp forwards.
+ */
+ if (!txn_global->has_oldest_timestamp ||
+ __wt_timestamp_cmp(&txn_global->oldest_timestamp,
+ &oldest_ts) < 0) {
+ __wt_timestamp_set(
+ &txn_global->oldest_timestamp, &oldest_ts);
+ txn_global->has_oldest_timestamp = true;
+ txn_global->oldest_is_pinned = false;
+ }
+ }
+ if (has_stable) {
+ /*
+ * This method can be called from multiple threads,
+ * check that we are moving the global stable
+ * timestamp forwards.
+ */
+ if (!txn_global->has_stable_timestamp ||
+ __wt_timestamp_cmp(&txn_global->stable_timestamp,
+ &stable_ts) < 0) {
+ __wt_timestamp_set(
+ &txn_global->stable_timestamp, &stable_ts);
+ txn_global->has_stable_timestamp = true;
+ txn_global->stable_is_pinned = false;
+ }
}
__wt_writeunlock(session, &txn_global->rwlock);
-
WT_RET(__wt_txn_update_pinned_timestamp(session));
#else
- WT_RET_MSG(session, EINVAL, "oldest_timestamp requires a "
+ WT_RET_MSG(session, EINVAL, "set_timestamp requires a "
"version of WiredTiger built with timestamp support");
#endif
}
-
return (0);
}
@@ -286,7 +404,7 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
WT_TXN *txn = &session->txn;
WT_RET(__wt_txn_parse_timestamp(
- session, "commit", txn->commit_timestamp, &cval));
+ session, "commit", &txn->commit_timestamp, &cval));
__wt_txn_set_commit_timestamp(session);
#else
WT_RET_MSG(session, EINVAL, "commit_timestamp requires a "
@@ -298,6 +416,7 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
return (0);
}
+#ifdef HAVE_TIMESTAMPS
/*
* __wt_txn_set_commit_timestamp --
* Publish a transaction's commit timestamp.
@@ -305,19 +424,28 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
void
__wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session)
{
+ wt_timestamp_t ts;
WT_TXN *prev, *txn;
WT_TXN_GLOBAL *txn_global;
txn = &session->txn;
txn_global = &S2C(session)->txn_global;
- if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
+ if (F_ISSET(txn, WT_TXN_PUBLIC_TS_COMMIT))
return;
+ /*
+ * Copy the current commit timestamp (which can change while the
+ * transaction is running) into the first_commit_timestamp, which is
+ * fixed.
+ */
+ __wt_timestamp_set(&ts, &txn->commit_timestamp);
+ __wt_timestamp_set(&txn->first_commit_timestamp, &ts);
+
__wt_writelock(session, &txn_global->commit_timestamp_rwlock);
for (prev = TAILQ_LAST(&txn_global->commit_timestamph, __wt_txn_cts_qh);
- prev != NULL && __wt_timestamp_cmp(
- prev->commit_timestamp, txn->commit_timestamp) > 0;
+ prev != NULL &&
+ __wt_timestamp_cmp(&prev->first_commit_timestamp, &ts) > 0;
prev = TAILQ_PREV(prev, __wt_txn_cts_qh, commit_timestampq))
;
if (prev == NULL)
@@ -327,7 +455,7 @@ __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session)
TAILQ_INSERT_AFTER(&txn_global->commit_timestamph,
prev, txn, commit_timestampq);
__wt_writeunlock(session, &txn_global->commit_timestamp_rwlock);
- F_SET(txn, WT_TXN_HAS_TS_COMMIT);
+ F_SET(txn, WT_TXN_HAS_TS_COMMIT | WT_TXN_PUBLIC_TS_COMMIT);
}
/*
@@ -343,12 +471,13 @@ __wt_txn_clear_commit_timestamp(WT_SESSION_IMPL *session)
txn = &session->txn;
txn_global = &S2C(session)->txn_global;
- if (!F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
+ if (!F_ISSET(txn, WT_TXN_PUBLIC_TS_COMMIT))
return;
__wt_writelock(session, &txn_global->commit_timestamp_rwlock);
TAILQ_REMOVE(&txn_global->commit_timestamph, txn, commit_timestampq);
__wt_writeunlock(session, &txn_global->commit_timestamp_rwlock);
+ F_CLR(txn, WT_TXN_PUBLIC_TS_COMMIT);
}
/*
@@ -364,13 +493,13 @@ __wt_txn_set_read_timestamp(WT_SESSION_IMPL *session)
txn = &session->txn;
txn_global = &S2C(session)->txn_global;
- if (F_ISSET(txn, WT_TXN_HAS_TS_READ))
+ if (F_ISSET(txn, WT_TXN_PUBLIC_TS_READ))
return;
__wt_writelock(session, &txn_global->read_timestamp_rwlock);
for (prev = TAILQ_LAST(&txn_global->read_timestamph, __wt_txn_rts_qh);
prev != NULL && __wt_timestamp_cmp(
- prev->read_timestamp, txn->read_timestamp) > 0;
+ &prev->read_timestamp, &txn->read_timestamp) > 0;
prev = TAILQ_PREV(prev, __wt_txn_rts_qh, read_timestampq))
;
if (prev == NULL)
@@ -380,7 +509,7 @@ __wt_txn_set_read_timestamp(WT_SESSION_IMPL *session)
TAILQ_INSERT_AFTER(
&txn_global->read_timestamph, prev, txn, read_timestampq);
__wt_writeunlock(session, &txn_global->read_timestamp_rwlock);
- F_SET(txn, WT_TXN_HAS_TS_READ);
+ F_SET(txn, WT_TXN_HAS_TS_READ | WT_TXN_PUBLIC_TS_READ);
}
/*
@@ -396,11 +525,12 @@ __wt_txn_clear_read_timestamp(WT_SESSION_IMPL *session)
txn = &session->txn;
txn_global = &S2C(session)->txn_global;
- if (!F_ISSET(txn, WT_TXN_HAS_TS_READ))
+ if (!F_ISSET(txn, WT_TXN_PUBLIC_TS_READ))
return;
__wt_writelock(session, &txn_global->read_timestamp_rwlock);
TAILQ_REMOVE(&txn_global->read_timestamph, txn, read_timestampq);
__wt_writeunlock(session, &txn_global->read_timestamp_rwlock);
- F_CLR(txn, WT_TXN_HAS_TS_READ);
+ F_CLR(txn, WT_TXN_PUBLIC_TS_READ);
}
+#endif