summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2019-03-08 08:19:53 +1100
committerLuke Chen <luke.chen@mongodb.com>2019-03-08 08:19:53 +1100
commit5a7ccd5f073341f5f98e0ced39a80416869c181b (patch)
treee4da15db6037ad02ebc7e69e292afe81fdcff53e /src/third_party/wiredtiger/src
parentf386c49be72744d88fbb0950c54d6621a9d242b8 (diff)
downloadmongo-5a7ccd5f073341f5f98e0ced39a80416869c181b.tar.gz
Import wiredtiger: 82c3f83acae289e0ada5ab4e7fe549e47af1078e from branch mongodb-4.2
ref: afdead1093..82c3f83aca for: 4.1.9 WT-4526 Allow durable updates to be evicted to the lookaside file WT-4571 Explore performance improvement in __wt_txn_id_alloc WT-4602 Clang Tidy and Clang Scan fixes WT-4607 Allow prepared transaction to commit behind oldest timestamp WT-4619 Coverity 111398: leaked memory
Diffstat (limited to 'src/third_party/wiredtiger/src')
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_huffman.c1
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c1
-rw-r--r--src/third_party/wiredtiger/src/cache/cache_las.c56
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_capacity.c1
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_reconfig.c2
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h3
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h2
-rw-r--r--src/third_party/wiredtiger/src/include/gcc.h1
-rw-r--r--src/third_party/wiredtiger/src/include/posix.h1
-rw-r--r--src/third_party/wiredtiger/src/include/swap.h1
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h1
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i42
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in1
-rw-r--r--src/third_party/wiredtiger/src/log/log.c1
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_ckpt.c1
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_turtle.c17
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_fs.c2
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c71
-rw-r--r--src/third_party/wiredtiger/src/support/hash_city.c2
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c72
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_recover.c1
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c4
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c27
24 files changed, 237 insertions, 76 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_huffman.c b/src/third_party/wiredtiger/src/btree/bt_huffman.c
index 2e282e33e75..fa85a54f532 100644
--- a/src/third_party/wiredtiger/src/btree/bt_huffman.c
+++ b/src/third_party/wiredtiger/src/btree/bt_huffman.c
@@ -336,6 +336,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
WT_ERR(__wt_getline(session, fs, tmp));
if (tmp->size == 0)
break;
+ /* NOLINTNEXTLINE(cert-err34-c) */
n = sscanf(
tmp->data, "%" SCNi64 " %" SCNi64, &symbol, &frequency);
/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index c0933d4c4f8..5a653b4a546 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -283,7 +283,7 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
!ref->page_las->has_prepares &&
!S2C(session)->txn_global.has_stable_timestamp &&
__wt_txn_visible_all(session, ref->page_las->unstable_txn,
- ref->page_las->unstable_timestamp)) {
+ ref->page_las->unstable_durable_timestamp)) {
page->modify->rec_max_txn = ref->page_las->max_txn;
page->modify->rec_max_timestamp =
ref->page_las->max_timestamp;
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index 235fc76b28a..1a412ace8f9 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -96,6 +96,7 @@ __verify_config_offsets(
* verify because that's where we "dump blocks" for debugging.)
*/
*quitp = true;
+ /* NOLINTNEXTLINE(cert-err34-c) */
if (v.len != 0 || sscanf(k.str, "%" SCNu64, &offset) != 1)
WT_RET_MSG(session, EINVAL,
"unexpected dump offset format");
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index a68c706ad95..4ade406c619 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -426,16 +426,28 @@ __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref)
* versions of data and all the updates are in the past.
*/
if (ref->page_las->skew_newest &&
- txn->read_timestamp > ref->page_las->unstable_timestamp)
+ txn->read_timestamp > ref->page_las->unstable_durable_timestamp)
return (true);
/*
* Skip lookaside pages if reading as of a timestamp, we evicted old
* versions of data and all the unstable updates are in the future.
*/
- if (!ref->page_las->skew_newest &&
- txn->read_timestamp < ref->page_las->unstable_timestamp)
- return (true);
+ if (!ref->page_las->skew_newest) {
+ /*
+ * Skip lookaside pages during checkpoint if all the unstable
+ * durable updates are in the future. Checking for just the
+ * unstable updates during checkpoint would end up reading more
+ * content from lookaside than necessary.
+ */
+ if (WT_SESSION_IS_CHECKPOINT(session) &&
+ txn->read_timestamp <
+ ref->page_las->unstable_durable_timestamp)
+ return (true);
+
+ if (txn->read_timestamp < ref->page_las->unstable_timestamp)
+ return (true);
+ }
return (false);
}
@@ -545,7 +557,7 @@ __las_insert_block_verbose(
double pct_dirty, pct_full;
uint64_t ckpt_gen_current, ckpt_gen_last;
uint32_t btree_id;
- char ts_string[WT_TS_INT_STRING_SIZE];
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
btree_id = btree->id;
@@ -571,19 +583,23 @@ __las_insert_block_verbose(
(void)__wt_eviction_dirty_needed(session, &pct_dirty);
__wt_timestamp_to_string(
multi->page_las.unstable_timestamp,
- ts_string, sizeof(ts_string));
+ ts_string[0], sizeof(ts_string));
+ __wt_timestamp_to_string(
+ multi->page_las.unstable_durable_timestamp,
+ ts_string[1], sizeof(ts_string));
__wt_verbose(session,
WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY,
"Page reconciliation triggered lookaside write "
"file ID %" PRIu32 ", page ID %" PRIu64 ". "
- "Max txn ID %" PRIu64 ", unstable timestamp %s, %s. "
+ "Max txn ID %" PRIu64 ", unstable timestamp %s,"
+ " unstable durable timestamp %s, %s. "
"Entries now in lookaside file: %" PRId64 ", "
"cache dirty: %2.3f%% , "
"cache use: %2.3f%%",
btree_id, multi->page_las.las_pageid,
multi->page_las.max_txn,
- ts_string,
+ ts_string[0], ts_string[1],
multi->page_las.skew_newest ? "newest" : "not newest",
WT_STAT_READ(conn->stats, cache_lookaside_entries),
pct_dirty, pct_full);
@@ -1128,6 +1144,26 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
session, saved_key, las_key.data, las_key.size));
/*
+ * Never expect an entry with prepare locked state or
+ * with durable timestamp as max timestamp or with
+ * in-progress prepare state and non-zero durable
+ * timestamp. In all other cases the durable timestamp
+ * is higher or same as the las timestamp.
+ */
+ WT_ASSERT(session,
+ prepare_state != WT_PREPARE_LOCKED ||
+ durable_timestamp != WT_TS_MAX ||
+ (prepare_state != WT_PREPARE_INPROGRESS ||
+ durable_timestamp == 0));
+
+ /*
+ * FIXME Disable this assertion until fixed by WT-4598.
+ * WT_ASSERT(session,
+ * (prepare_state == WT_PREPARE_INPROGRESS ||
+ * durable_timestamp >= las_timestamp));
+ */
+
+ /*
* There are several conditions that need to be met
* before we choose to remove a key block:
* * The entries were written with skew newest.
@@ -1136,8 +1172,8 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
* * The entry wasn't from a prepared transaction.
*/
if (upd_type == WT_UPDATE_BIRTHMARK &&
- __wt_txn_visible_all(
- session, las_txnid, las_timestamp) &&
+ __wt_txn_visible_all(session,
+ las_txnid, durable_timestamp) &&
prepare_state != WT_PREPARE_INPROGRESS)
removing_key_block = true;
else
diff --git a/src/third_party/wiredtiger/src/conn/conn_capacity.c b/src/third_party/wiredtiger/src/conn/conn_capacity.c
index 3a6f02a5d02..a75bdd259c4 100644
--- a/src/third_party/wiredtiger/src/conn/conn_capacity.c
+++ b/src/third_party/wiredtiger/src/conn/conn_capacity.c
@@ -306,6 +306,7 @@ __wt_capacity_throttle(WT_SESSION_IMPL *session, uint64_t bytes,
conn = S2C(session);
cap = &conn->capacity;
+ /* NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) */
capacity = steal_capacity = 0;
reservation = steal = NULL;
switch (type) {
diff --git a/src/third_party/wiredtiger/src/conn/conn_reconfig.c b/src/third_party/wiredtiger/src/conn/conn_reconfig.c
index c6d7203f08e..7df9fa212df 100644
--- a/src/third_party/wiredtiger/src/conn/conn_reconfig.c
+++ b/src/third_party/wiredtiger/src/conn/conn_reconfig.c
@@ -23,8 +23,10 @@ __conn_compat_parse(WT_SESSION_IMPL *session,
* release string. We ignore the patch value, but allow it in
* the string.
*/
+ /* NOLINTNEXTLINE(cert-err34-c) */
if (sscanf(cvalp->str,
"%" SCNu16 ".%" SCNu16, majorp, minorp) != 2 &&
+ /* NOLINTNEXTLINE(cert-err34-c) */
sscanf(cvalp->str, "%" SCNu16 ".%" SCNu16 ".%" SCNu16,
majorp, minorp, &unused_patch) != 3)
WT_RET_MSG(session, EINVAL,
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 14d5a04b096..dc1bdc07419 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -247,6 +247,9 @@ struct __wt_page_lookaside {
uint64_t unstable_txn; /* First transaction ID not on page */
wt_timestamp_t max_timestamp; /* Maximum timestamp */
wt_timestamp_t unstable_timestamp;/* First timestamp not on page */
+ wt_timestamp_t unstable_durable_timestamp;
+ /* First durable timestamp not on
+ * page */
bool eviction_to_lookaside; /* Revert to lookaside on eviction */
bool has_prepares; /* One or more updates are prepared */
bool skew_newest; /* Page image has newest versions */
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 2bc60a1a85d..f550f00a88c 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -860,7 +860,7 @@ extern int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name,
extern int __wt_txn_query_timestamp(WT_SESSION_IMPL *session, char *hex_timestamp, const char *cfg[], bool global_txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t ts, WT_CONFIG_ITEM *cval, bool compare_stable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_commit_timestamp_validate(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t ts, WT_CONFIG_ITEM *cval, bool durable_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_parse_prepare_timestamp(WT_SESSION_IMPL *session, const char *cfg[], wt_timestamp_t *timestamp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_parse_read_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h
index b824d4eb24d..a9d271ed0bd 100644
--- a/src/third_party/wiredtiger/src/include/gcc.h
+++ b/src/third_party/wiredtiger/src/include/gcc.h
@@ -11,6 +11,7 @@
/* GCC-specific attributes. */
#define WT_PACKED_STRUCT_BEGIN(name) \
+ /* NOLINTNEXTLINE(misc-macro-parentheses) */ \
struct __attribute__ ((__packed__)) name {
#define WT_PACKED_STRUCT_END \
};
diff --git a/src/third_party/wiredtiger/src/include/posix.h b/src/third_party/wiredtiger/src/include/posix.h
index f1e040968fc..4ad8db1f43a 100644
--- a/src/third_party/wiredtiger/src/include/posix.h
+++ b/src/third_party/wiredtiger/src/include/posix.h
@@ -33,6 +33,7 @@ typedef struct {
/*
* Thread callbacks need to match the platform specific callback types
*/
+/* NOLINTNEXTLINE(misc-macro-parentheses) */
#define WT_THREAD_CALLBACK(x) void* (x)
#define WT_THREAD_RET void*
#define WT_THREAD_RET_VALUE NULL
diff --git a/src/third_party/wiredtiger/src/include/swap.h b/src/third_party/wiredtiger/src/include/swap.h
index 6d39afa539f..30cdf0d08d2 100644
--- a/src/third_party/wiredtiger/src/include/swap.h
+++ b/src/third_party/wiredtiger/src/include/swap.h
@@ -47,6 +47,7 @@ static inline uint64_t
__wt_bswap64(uint64_t v)
{
return (
+ /* NOLINTNEXTLINE(misc-redundant-expression) */
((v << 56) & 0xff00000000000000UL) |
((v << 40) & 0x00ff000000000000UL) |
((v << 24) & 0x0000ff0000000000UL) |
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index cce5bd4cbc7..1ac34a4defd 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -98,6 +98,7 @@ struct __wt_txn_state {
volatile uint64_t id;
volatile uint64_t pinned_id;
volatile uint64_t metadata_pinned;
+ volatile bool is_allocating;
WT_CACHE_LINE_PAD_END
};
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 6ce224ec65a..a358e74d925 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -972,14 +972,16 @@ __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish)
/*
* Allocating transaction IDs involves several steps.
*
- * Firstly, we do an atomic increment to allocate a unique ID. The
- * field we increment is not used anywhere else.
+ * Firstly, publish that this transaction is allocating its ID, then
+ * publish the transaction ID as the current global ID. Note that this
+ * transaction ID might not be unique among threads and hence not valid
+ * at this moment. The flag will notify other transactions that are
+ * attempting to get their own snapshot for this transaction ID to
+ * retry.
*
- * Then we optionally publish the allocated ID into the global
- * transaction table. It is critical that this becomes visible before
- * the global current value moves past our ID, or some concurrent
- * reader could get a snapshot that makes our changes visible before we
- * commit.
+ * Then we do an atomic increment to allocate a unique ID. This will
+ * give the valid ID to this transaction that we publish to the global
+ * transaction table.
*
* We want the global value to lead the allocated values, so that any
* allocated transaction ID eventually becomes globally visible. When
@@ -991,21 +993,16 @@ __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish)
* for unlocked reads to be well defined, we must use an atomic
* increment here.
*/
- __wt_spin_lock(session, &txn_global->id_lock);
- id = txn_global->current;
-
if (publish) {
+ WT_PUBLISH(txn_state->is_allocating, true);
+ WT_PUBLISH(txn_state->id, txn_global->current);
+ id = __wt_atomic_addv64(&txn_global->current, 1) - 1;
session->txn.id = id;
WT_PUBLISH(txn_state->id, id);
- }
+ WT_PUBLISH(txn_state->is_allocating, false);
+ } else
+ id = __wt_atomic_addv64(&txn_global->current, 1) - 1;
- /*
- * Even though we are in a spinlock, readers are not. We rely on
- * atomic reads of the current ID to create snapshots, so for unlocked
- * reads to be well defined, we must use an atomic increment here.
- */
- (void)__wt_atomic_addv64(&txn_global->current, 1);
- __wt_spin_unlock(session, &txn_global->id_lock);
return (id);
}
@@ -1193,7 +1190,14 @@ __wt_txn_am_oldest(WT_SESSION_IMPL *session)
WT_ORDERED_READ(session_cnt, conn->session_cnt);
for (i = 0, s = txn_global->states; i < session_cnt; i++, s++)
- if ((id = s->id) != WT_TXN_NONE && WT_TXNID_LT(id, txn->id))
+ /*
+ * We are checking if the transaction is oldest one in the
+ * system. It is safe to ignore any sessions that are
+ * allocating transaction IDs, since we already have an ID,
+ * they are guaranteed to be newer.
+ */
+ if (!s->is_allocating && (id = s->id) != WT_TXN_NONE &&
+ WT_TXNID_LT(id, txn->id))
return (false);
return (true);
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index c2fcb720575..028a7663975 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -36,6 +36,7 @@ extern "C" {
#if defined(DOXYGEN) || defined(SWIG)
#define __F(func) func
#else
+/* NOLINTNEXTLINE(misc-macro-parentheses) */
#define __F(func) (*func)
#endif
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 9e27a996251..10b52246987 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -571,6 +571,7 @@ __wt_log_extract_lognum(
WT_RET_MSG(session, EINVAL,
"unexpected usage: no id or no name");
if ((p = strrchr(name, '.')) == NULL ||
+ /* NOLINTNEXTLINE(cert-err34-c) */
sscanf(++p, "%" SCNu32, id) != 1)
WT_RET_MSG(session, WT_ERROR, "Bad log file name '%s'", name);
return (0);
diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
index ccb61aa4fb0..d42d902d0a4 100644
--- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c
+++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
@@ -340,6 +340,7 @@ __ckpt_load(WT_SESSION_IMPL *session,
goto format;
memcpy(timebuf, a.str, a.len);
timebuf[a.len] = '\0';
+ /* NOLINTNEXTLINE(cert-err34-c) */
if (sscanf(timebuf, "%" SCNuMAX, &ckpt->sec) != 1)
goto format;
diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c
index 3531440e76f..0c89dca92ed 100644
--- a/src/third_party/wiredtiger/src/meta/meta_turtle.c
+++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c
@@ -193,7 +193,6 @@ __wt_turtle_init(WT_SESSION_IMPL *session)
bool exist_backup, exist_incr, exist_isrc, exist_turtle;
bool load, loadTurtle;
- metaconf = NULL;
load = loadTurtle = false;
/*
@@ -226,10 +225,12 @@ __wt_turtle_init(WT_SESSION_IMPL *session)
* Failure to read means a bad turtle file. Remove it and create
* a new turtle file.
*/
- if (F_ISSET(S2C(session), WT_CONN_SALVAGE))
+ if (F_ISSET(S2C(session), WT_CONN_SALVAGE)) {
WT_WITH_TURTLE_LOCK(session,
- ret = __wt_turtle_read(session,
- WT_METAFILE_URI, &unused_value));
+ ret = __wt_turtle_read(
+ session, WT_METAFILE_URI, &unused_value));
+ __wt_free(session, unused_value);
+ }
if (ret != 0) {
WT_RET(__wt_remove_if_exists(
@@ -282,14 +283,12 @@ __wt_turtle_init(WT_SESSION_IMPL *session)
WT_RET(__metadata_config(session, &metaconf));
WT_WITH_TURTLE_LOCK(session, ret =
__wt_turtle_update(session, WT_METAFILE_URI, metaconf));
- WT_ERR(ret);
+ __wt_free(session, metaconf);
+ WT_RET(ret);
}
/* Remove the backup files, we'll never read them again. */
- WT_ERR(__wt_backup_file_remove(session));
-
-err: __wt_free(session, metaconf);
- return (ret);
+ return (__wt_backup_file_remove(session));
}
/*
diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c
index 438af2eb58d..7a5c4a07e58 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_fs.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c
@@ -140,7 +140,7 @@ __posix_directory_sync(WT_SESSION_IMPL *session, const char *path)
fd = 0; /* -Wconditional-uninitialized */
WT_SYSCALL_RETRY((
- (fd = open(dir, O_RDONLY, 0444)) == -1 ? -1 : 0), ret);
+ (fd = open(dir, O_RDONLY | O_CLOEXEC, 0444)) == -1 ? -1 : 0), ret);
if (ret != 0)
WT_ERR_MSG(session, ret, "%s: directory-sync: open", dir);
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index a63d3eab361..1791317eb1b 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -51,6 +51,7 @@ typedef struct {
/* Lookaside boundary tracking. */
uint64_t unstable_txn;
+ wt_timestamp_t unstable_durable_timestamp;
wt_timestamp_t unstable_timestamp;
u_int updates_seen; /* Count of updates seen. */
@@ -59,6 +60,9 @@ typedef struct {
bool update_uncommitted; /* An update was uncommitted */
bool update_used; /* An update could be used */
+ /* All the updates are with prepare in-progress state. */
+ bool all_upd_prepare_in_prog;
+
/*
* When we can't mark the page clean (for example, checkpoint found some
* uncommitted updates), there's a leave-dirty flag.
@@ -926,15 +930,20 @@ __rec_init(WT_SESSION_IMPL *session,
if (r->las_skew_newest) {
r->unstable_txn = WT_TXN_NONE;
r->unstable_timestamp = 0;
+ r->unstable_durable_timestamp = 0;
} else {
r->unstable_txn = WT_TXN_ABORTED;
r->unstable_timestamp = UINT64_MAX;
+ r->unstable_durable_timestamp = UINT64_MAX;
}
/* Track if updates were used and/or uncommitted. */
r->updates_seen = r->updates_unstable = 0;
r->update_uncommitted = r->update_used = false;
+ /* Track if all the updates are with prepare in-progress state. */
+ r->all_upd_prepare_in_prog = true;
+
/* Track if the page can be marked clean. */
r->leave_dirty = false;
@@ -1242,6 +1251,13 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins,
max_txn = txnid;
/*
+ * Track if all the updates are not with in-progress prepare
+ * state.
+ */
+ if (upd->prepare_state == WT_PREPARE_RESOLVED)
+ r->all_upd_prepare_in_prog = false;
+
+ /*
* Check whether the update was committed before reconciliation
* started. The global commit point can move forward during
* reconciliation so we use a cached copy to avoid races when a
@@ -1268,7 +1284,6 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins,
uncommitted = r->update_uncommitted = true;
continue;
}
-
}
/* Track the first update with non-zero timestamp. */
@@ -1296,7 +1311,6 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins,
if (upd_select->upd == NULL && r->las_skew_newest)
upd_select->upd = upd;
- /* Consider non durable updates as uncommitted. */
if ((F_ISSET(r, WT_REC_VISIBLE_ALL) ?
!__wt_txn_upd_visible_all(session, upd) :
!__wt_txn_upd_visible(session, upd)) ||
@@ -1395,8 +1409,8 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins,
r->max_txn = max_txn;
/* Update the maximum timestamp. */
- if (first_ts_upd != NULL && r->max_timestamp < first_ts_upd->start_ts)
- r->max_timestamp = first_ts_upd->start_ts;
+ if (first_ts_upd != NULL && r->max_timestamp < first_ts_upd->durable_ts)
+ r->max_timestamp = first_ts_upd->durable_ts;
/*
* If the update we chose was a birthmark, or we are doing
@@ -1477,9 +1491,23 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins,
if (F_ISSET(r, WT_REC_LOOKASIDE) && r->las_skew_newest) {
if (WT_TXNID_LT(r->unstable_txn, first_upd->txnid))
r->unstable_txn = first_upd->txnid;
- if (first_ts_upd != NULL &&
- r->unstable_timestamp < first_ts_upd->start_ts)
- r->unstable_timestamp = first_ts_upd->start_ts;
+ if (first_ts_upd != NULL) {
+ /*
+ * FIXME Disable this assertion until fixed by WT-4598.
+ * WT_ASSERT(session,
+ * first_ts_upd->prepare_state ==
+ * WT_PREPARE_INPROGRESS ||
+ * first_ts_upd->start_ts <=
+ * first_ts_upd->durable_ts);
+ */
+ if (r->unstable_timestamp < first_ts_upd->start_ts)
+ r->unstable_timestamp = first_ts_upd->start_ts;
+
+ if (r->unstable_durable_timestamp <
+ first_ts_upd->durable_ts)
+ r->unstable_durable_timestamp =
+ first_ts_upd->durable_ts;
+ }
} else if (F_ISSET(r, WT_REC_LOOKASIDE)) {
for (upd = first_upd; upd != upd_select->upd; upd = upd->next) {
if (upd->txnid == WT_TXN_ABORTED)
@@ -1488,8 +1516,29 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins,
if (upd->txnid != WT_TXN_NONE &&
WT_TXNID_LT(upd->txnid, r->unstable_txn))
r->unstable_txn = upd->txnid;
+ /*
+ * The durable timestamp is always set, and usually
+ * the same as the start timestamp, which makes it OK
+ * to use the two independently and be confident both
+ * will be set.
+ */
+ /*
+ * FIXME Disable this assertion until fixed by WT-4598.
+ * WT_ASSERT(session,
+ * upd->prepare_state == WT_PREPARE_INPROGRESS ||
+ * upd->durable_ts >= upd->start_ts);
+ */
if (upd->start_ts < r->unstable_timestamp)
r->unstable_timestamp = upd->start_ts;
+ /*
+ * Don't set the unstable durable timestamp with the
+ * durable timestamp of an in-progress prepared update.
+ * An in-progress prepared update will always have a
+ * zero durable timestamp.
+ */
+ if (upd->prepare_state != WT_PREPARE_INPROGRESS &&
+ upd->durable_ts < r->unstable_durable_timestamp)
+ r->unstable_durable_timestamp = upd->durable_ts;
}
}
@@ -2969,7 +3018,14 @@ done: if (F_ISSET(r, WT_REC_LOOKASIDE)) {
multi->page_las.unstable_txn = r->unstable_txn;
WT_ASSERT(session, r->unstable_txn != WT_TXN_NONE);
multi->page_las.max_timestamp = r->max_timestamp;
+ /*
+ * FIXME Disable this assertion until fixed by WT-4598.
+ * WT_ASSERT(session, r->all_upd_prepare_in_prog == true ||
+ * r->unstable_durable_timestamp >= r->unstable_timestamp);
+ */
multi->page_las.unstable_timestamp = r->unstable_timestamp;
+ multi->page_las.unstable_durable_timestamp =
+ r->unstable_durable_timestamp;
}
err: __wt_scr_free(session, &key);
@@ -4177,6 +4233,7 @@ __rec_col_var(WT_SESSION_IMPL *session,
* Set the start/stop values to cause failure if they're not set.
* [-Werror=maybe-uninitialized]
*/
+ /* NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) */
start_ts = stop_ts = WT_TS_NONE;
WT_RET(__rec_split_init(session,
diff --git a/src/third_party/wiredtiger/src/support/hash_city.c b/src/third_party/wiredtiger/src/support/hash_city.c
index b1fa26cd8cf..5b685b5c05a 100644
--- a/src/third_party/wiredtiger/src/support/hash_city.c
+++ b/src/third_party/wiredtiger/src/support/hash_city.c
@@ -306,7 +306,7 @@ static inline uint64_t CityHash64(const char *s, size_t len) {
if (len <= 32) {
if (len <= 16) {
return HashLen0to16(s, len);
- } else {
+ } else { /* NOLINT(readability-else-after-return) */
return HashLen17to32(s, len);
}
} else if (len <= 64) {
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index f3d9f762750..8c5d7ffb826 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -209,13 +209,38 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
* can happen if we race with a thread that is allocating
* an ID -- the ID will not be used because the thread will
* keep spinning until it gets a valid one.
+ * - The ID if it is higher than the current ID we saw. This
+ * can happen if the transaction is already finished. In
+ * this case, we ignore this transaction because it would
+ * not be visible to the current snapshot.
*/
- if (s != txn_state &&
+ while (s != txn_state &&
(id = s->id) != WT_TXN_NONE &&
- WT_TXNID_LE(prev_oldest_id, id)) {
- txn->snapshot[n++] = id;
- if (WT_TXNID_LT(id, pinned_id))
- pinned_id = id;
+ WT_TXNID_LE(prev_oldest_id, id) &&
+ WT_TXNID_LT(id, current_id)) {
+ /*
+ * If the transaction is still allocating its ID, then
+ * we spin here until it gets its valid ID.
+ */
+ WT_READ_BARRIER();
+ if (!s->is_allocating) {
+ /*
+ * There is still a chance that fetched ID is
+ * not valid after ID allocation, so we check
+ * again here. The read of transaction ID
+ * should be carefully ordered: we want to
+ * re-read ID from transaction state after this
+ * transaction completes ID allocation.
+ */
+ WT_READ_BARRIER();
+ if (id == s->id) {
+ txn->snapshot[n++] = id;
+ if (WT_TXNID_LT(id, pinned_id))
+ pinned_id = id;
+ break;
+ }
+ }
+ WT_PAUSE();
}
}
@@ -261,10 +286,31 @@ __txn_oldest_scan(WT_SESSION_IMPL *session,
WT_ORDERED_READ(session_cnt, conn->session_cnt);
for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
/* Update the last running transaction ID. */
- if ((id = s->id) != WT_TXN_NONE &&
+ while ((id = s->id) != WT_TXN_NONE &&
WT_TXNID_LE(prev_oldest_id, id) &&
- WT_TXNID_LT(id, last_running))
- last_running = id;
+ WT_TXNID_LT(id, last_running)) {
+ /*
+ * If the transaction is still allocating its ID, then
+ * we spin here until it gets its valid ID.
+ */
+ WT_READ_BARRIER();
+ if (!s->is_allocating) {
+ /*
+ * There is still a chance that fetched ID is
+ * not valid after ID allocation, so we check
+ * again here. The read of transaction ID
+ * should be carefully ordered: we want to
+ * re-read ID from transaction state after this
+ * transaction completes ID allocation.
+ */
+ WT_READ_BARRIER();
+ if (id == s->id) {
+ last_running = id;
+ break;
+ }
+ }
+ WT_PAUSE();
+ }
/* Update the metadata pinned ID. */
if ((id = s->metadata_pinned) != WT_TXN_NONE &&
@@ -576,11 +622,11 @@ __wt_txn_release(WT_SESSION_IMPL *session)
}
/*
- * __txn_commit_timestamps_validate --
+ * __txn_commit_timestamps_assert --
* Validate that timestamps provided to commit are legal.
*/
static inline int
-__txn_commit_timestamps_validate(WT_SESSION_IMPL *session)
+__txn_commit_timestamps_assert(WT_SESSION_IMPL *session)
{
WT_CURSOR *cursor;
WT_DECL_RET;
@@ -754,7 +800,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
* For prepared transactions commit timestamp could be earlier
* than stable timestamp.
*/
- WT_ERR(__wt_timestamp_validate(
+ WT_ERR(__wt_txn_commit_timestamp_validate(
session, "commit", ts, &cval, !prepare));
txn->commit_timestamp = ts;
__wt_txn_set_commit_timestamp(session);
@@ -787,11 +833,11 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
/* Durable timestamp should be later than stable timestamp. */
F_SET(txn, WT_TXN_HAS_TS_DURABLE);
txn->durable_timestamp = ts;
- WT_ERR(__wt_timestamp_validate(
+ WT_ERR(__wt_txn_commit_timestamp_validate(
session, "durable", ts, &cval, true));
}
- WT_ERR(__txn_commit_timestamps_validate(session));
+ WT_ERR(__txn_commit_timestamps_assert(session));
/*
* The default sync setting is inherited from the connection, but can
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index 2e5e1f61344..77eecbe2e84 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -440,6 +440,7 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
/* If there is checkpoint logged for the file, apply everything. */
if (cval.type != WT_CONFIG_ITEM_STRUCT)
WT_INIT_LSN(&lsn);
+ /* NOLINTNEXTLINE(cert-err34-c) */
else if (sscanf(cval.str,
"(%" SCNu32 ",%" SCNu32 ")", &lsnfile, &lsnoffset) == 2)
WT_SET_LSN(&lsn, lsnfile, lsnoffset);
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 2ff42b7d220..f45069248fd 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -68,7 +68,7 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
* which will fail the following check and cause them to never
* be removed.
*/
- if (rollback_timestamp < las_timestamp) {
+ if (rollback_timestamp < durable_timestamp) {
WT_ERR(cursor->remove(cursor));
WT_STAT_CONN_INCR(session, txn_rollback_las_removed);
--las_total;
@@ -250,7 +250,7 @@ __txn_abort_newer_updates(
local_read = false;
read_flags = WT_READ_WONT_NEED;
if (ref->page_las != NULL && ref->page_las->skew_newest &&
- rollback_timestamp < ref->page_las->unstable_timestamp) {
+ rollback_timestamp < ref->page_las->unstable_durable_timestamp) {
/* Make sure get back a page with history, not limbo page */
WT_ASSERT(session,
!F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT));
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index de474dba222..bf65db45153 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -41,7 +41,9 @@ __wt_timestamp_to_hex_string(wt_timestamp_t ts, char *hex_timestamp)
return;
}
if (ts == WT_TS_MAX) {
- (void)strcpy(hex_timestamp, "ffffffffffffffff");
+#define WT_TS_MAX_HEX_STRING "ffffffffffffffff"
+ (void)memcpy(hex_timestamp,
+ WT_TS_MAX_HEX_STRING, strlen(WT_TS_MAX_HEX_STRING) + 1);
return;
}
@@ -579,14 +581,15 @@ set: __wt_writelock(session, &txn_global->rwlock);
}
/*
- * __wt_timestamp_validate --
- * Validate a timestamp to be not older than the global oldest and global
- * stable and running transaction commit timestamp and running transaction
- * prepare timestamp.
+ * __wt_txn_commit_timestamp_validate --
+ * Validate a timestamp to be not older than running transaction commit
+ * timestamp and running transaction prepare timestamp. Validate a durable
+ * timestamp to be not older than the global oldest and global stable
+ * timestamp.
*/
int
-__wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name,
- wt_timestamp_t ts, WT_CONFIG_ITEM *cval, bool compare_stable)
+__wt_txn_commit_timestamp_validate(WT_SESSION_IMPL *session, const char *name,
+ wt_timestamp_t ts, WT_CONFIG_ITEM *cval, bool durable_ts)
{
WT_TXN *txn = &session->txn;
WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
@@ -609,14 +612,14 @@ __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name,
if (has_stable_ts)
stable_ts = txn_global->stable_timestamp;
- if (has_oldest_ts && ts < oldest_ts) {
+ if (durable_ts && has_oldest_ts && ts < oldest_ts) {
__wt_timestamp_to_string(
oldest_ts, ts_string[0], sizeof(ts_string[0]));
WT_RET_MSG(session, EINVAL,
"%s timestamp %.*s older than oldest timestamp %s",
name, (int)cval->len, cval->str, ts_string[0]);
}
- if (compare_stable && has_stable_ts && ts < stable_ts) {
+ if (durable_ts && has_stable_ts && ts < stable_ts) {
__wt_timestamp_to_string(
stable_ts, ts_string[0], sizeof(ts_string[0]));
WT_RET_MSG(session, EINVAL,
@@ -696,10 +699,10 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
* than stable timestamp.
*/
if (prepare)
- WT_RET(__wt_timestamp_validate(
+ WT_RET(__wt_txn_commit_timestamp_validate(
session, "commit", ts, &cval, false));
else
- WT_RET(__wt_timestamp_validate(
+ WT_RET(__wt_txn_commit_timestamp_validate(
session, "commit", ts, &cval, true));
txn->commit_timestamp = ts;
__wt_txn_set_commit_timestamp(session);
@@ -727,7 +730,7 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
* is required.
*/
if (ret == 0 && cval.len != 0)
- WT_RET(__wt_timestamp_validate(
+ WT_RET(__wt_txn_commit_timestamp_validate(
session, "durable", txn->durable_timestamp, &cval, true));
/*
* We allow setting the commit timestamp and durable timestamp after a