diff options
Diffstat (limited to 'src/third_party')
27 files changed, 303 insertions, 87 deletions
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 1d49ea15866..3868f96c431 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -101,6 +101,7 @@ DUPLICATEV DataSet DbCursor DbEnv +DeadStores Decrement Decrypt DeleteFileW @@ -264,6 +265,8 @@ Mutex MySecret NEEDKEY NEEDVALUE +NOLINT +NOLINTNEXTLINE NOLL NOLOCK NONINFRINGEMENT @@ -639,6 +642,7 @@ dbc dbs dcalloc ddd +deadcode decile deciles decl @@ -1102,6 +1106,7 @@ primary's printf printlog priv +prog progname ps psp diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 0842ac032b3..23d74cd8a2f 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "afdead1093b5c5b41dd54ddddf6f42d92bef1666", + "commit": "82c3f83acae289e0ada5ab4e7fe549e47af1078e", "github": "wiredtiger/wiredtiger.git", "vendor": "wiredtiger", "branch": "mongodb-4.2" diff --git a/src/third_party/wiredtiger/src/btree/bt_huffman.c b/src/third_party/wiredtiger/src/btree/bt_huffman.c index 2e282e33e75..fa85a54f532 100644 --- a/src/third_party/wiredtiger/src/btree/bt_huffman.c +++ b/src/third_party/wiredtiger/src/btree/bt_huffman.c @@ -336,6 +336,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, WT_ERR(__wt_getline(session, fs, tmp)); if (tmp->size == 0) break; + /* NOLINTNEXTLINE(cert-err34-c) */ n = sscanf( tmp->data, "%" SCNi64 " %" SCNi64, &symbol, &frequency); /* diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index c0933d4c4f8..5a653b4a546 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -283,7 +283,7 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) !ref->page_las->has_prepares && !S2C(session)->txn_global.has_stable_timestamp && __wt_txn_visible_all(session, ref->page_las->unstable_txn, - ref->page_las->unstable_timestamp)) { + ref->page_las->unstable_durable_timestamp)) { page->modify->rec_max_txn = ref->page_las->max_txn; page->modify->rec_max_timestamp = ref->page_las->max_timestamp; diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c index 235fc76b28a..1a412ace8f9 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c @@ -96,6 +96,7 @@ __verify_config_offsets( * verify because that's where we "dump blocks" for debugging.) */ *quitp = true; + /* NOLINTNEXTLINE(cert-err34-c) */ if (v.len != 0 || sscanf(k.str, "%" SCNu64, &offset) != 1) WT_RET_MSG(session, EINVAL, "unexpected dump offset format"); diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c index a68c706ad95..4ade406c619 100644 --- a/src/third_party/wiredtiger/src/cache/cache_las.c +++ b/src/third_party/wiredtiger/src/cache/cache_las.c @@ -426,16 +426,28 @@ __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref) * versions of data and all the updates are in the past. */ if (ref->page_las->skew_newest && - txn->read_timestamp > ref->page_las->unstable_timestamp) + txn->read_timestamp > ref->page_las->unstable_durable_timestamp) return (true); /* * Skip lookaside pages if reading as of a timestamp, we evicted old * versions of data and all the unstable updates are in the future. */ - if (!ref->page_las->skew_newest && - txn->read_timestamp < ref->page_las->unstable_timestamp) - return (true); + if (!ref->page_las->skew_newest) { + /* + * Skip lookaside pages during checkpoint if all the unstable + * durable updates are in the future. Checking for just the + * unstable updates during checkpoint would end up reading more + * content from lookaside than necessary. + */ + if (WT_SESSION_IS_CHECKPOINT(session) && + txn->read_timestamp < + ref->page_las->unstable_durable_timestamp) + return (true); + + if (txn->read_timestamp < ref->page_las->unstable_timestamp) + return (true); + } return (false); } @@ -545,7 +557,7 @@ __las_insert_block_verbose( double pct_dirty, pct_full; uint64_t ckpt_gen_current, ckpt_gen_last; uint32_t btree_id; - char ts_string[WT_TS_INT_STRING_SIZE]; + char ts_string[2][WT_TS_INT_STRING_SIZE]; btree_id = btree->id; @@ -571,19 +583,23 @@ __las_insert_block_verbose( (void)__wt_eviction_dirty_needed(session, &pct_dirty); __wt_timestamp_to_string( multi->page_las.unstable_timestamp, - ts_string, sizeof(ts_string)); + ts_string[0], sizeof(ts_string)); + __wt_timestamp_to_string( + multi->page_las.unstable_durable_timestamp, + ts_string[1], sizeof(ts_string)); __wt_verbose(session, WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY, "Page reconciliation triggered lookaside write " "file ID %" PRIu32 ", page ID %" PRIu64 ". " - "Max txn ID %" PRIu64 ", unstable timestamp %s, %s. " + "Max txn ID %" PRIu64 ", unstable timestamp %s," + " unstable durable timestamp %s, %s. " "Entries now in lookaside file: %" PRId64 ", " "cache dirty: %2.3f%% , " "cache use: %2.3f%%", btree_id, multi->page_las.las_pageid, multi->page_las.max_txn, - ts_string, + ts_string[0], ts_string[1], multi->page_las.skew_newest ? "newest" : "not newest", WT_STAT_READ(conn->stats, cache_lookaside_entries), pct_dirty, pct_full); @@ -1128,6 +1144,26 @@ __wt_las_sweep(WT_SESSION_IMPL *session) session, saved_key, las_key.data, las_key.size)); /* + * Never expect an entry with prepare locked state or + * with durable timestamp as max timestamp or with + * in-progress prepare state and non-zero durable + * timestamp. In all other cases the durable timestamp + * is higher or same as the las timestamp. + */ + WT_ASSERT(session, + prepare_state != WT_PREPARE_LOCKED || + durable_timestamp != WT_TS_MAX || + (prepare_state != WT_PREPARE_INPROGRESS || + durable_timestamp == 0)); + + /* + * FIXME Disable this assertion until fixed by WT-4598. + * WT_ASSERT(session, + * (prepare_state == WT_PREPARE_INPROGRESS || + * durable_timestamp >= las_timestamp)); + */ + + /* * There are several conditions that need to be met * before we choose to remove a key block: * * The entries were written with skew newest. @@ -1136,8 +1172,8 @@ __wt_las_sweep(WT_SESSION_IMPL *session) * * The entry wasn't from a prepared transaction. */ if (upd_type == WT_UPDATE_BIRTHMARK && - __wt_txn_visible_all( - session, las_txnid, las_timestamp) && + __wt_txn_visible_all(session, + las_txnid, durable_timestamp) && prepare_state != WT_PREPARE_INPROGRESS) removing_key_block = true; else diff --git a/src/third_party/wiredtiger/src/conn/conn_capacity.c b/src/third_party/wiredtiger/src/conn/conn_capacity.c index 3a6f02a5d02..a75bdd259c4 100644 --- a/src/third_party/wiredtiger/src/conn/conn_capacity.c +++ b/src/third_party/wiredtiger/src/conn/conn_capacity.c @@ -306,6 +306,7 @@ __wt_capacity_throttle(WT_SESSION_IMPL *session, uint64_t bytes, conn = S2C(session); cap = &conn->capacity; + /* NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) */ capacity = steal_capacity = 0; reservation = steal = NULL; switch (type) { diff --git a/src/third_party/wiredtiger/src/conn/conn_reconfig.c b/src/third_party/wiredtiger/src/conn/conn_reconfig.c index c6d7203f08e..7df9fa212df 100644 --- a/src/third_party/wiredtiger/src/conn/conn_reconfig.c +++ b/src/third_party/wiredtiger/src/conn/conn_reconfig.c @@ -23,8 +23,10 @@ __conn_compat_parse(WT_SESSION_IMPL *session, * release string. We ignore the patch value, but allow it in * the string. */ + /* NOLINTNEXTLINE(cert-err34-c) */ if (sscanf(cvalp->str, "%" SCNu16 ".%" SCNu16, majorp, minorp) != 2 && + /* NOLINTNEXTLINE(cert-err34-c) */ sscanf(cvalp->str, "%" SCNu16 ".%" SCNu16 ".%" SCNu16, majorp, minorp, &unused_patch) != 3) WT_RET_MSG(session, EINVAL, diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 14d5a04b096..dc1bdc07419 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -247,6 +247,9 @@ struct __wt_page_lookaside { uint64_t unstable_txn; /* First transaction ID not on page */ wt_timestamp_t max_timestamp; /* Maximum timestamp */ wt_timestamp_t unstable_timestamp;/* First timestamp not on page */ + wt_timestamp_t unstable_durable_timestamp; + /* First durable timestamp not on + * page */ bool eviction_to_lookaside; /* Revert to lookaside on eviction */ bool has_prepares; /* One or more updates are prepared */ bool skew_newest; /* Page image has newest versions */ diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 2bc60a1a85d..f550f00a88c 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -860,7 +860,7 @@ extern int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name, extern int __wt_txn_query_timestamp(WT_SESSION_IMPL *session, char *hex_timestamp, const char *cfg[], bool global_txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t ts, WT_CONFIG_ITEM *cval, bool compare_stable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_commit_timestamp_validate(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t ts, WT_CONFIG_ITEM *cval, bool durable_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_parse_prepare_timestamp(WT_SESSION_IMPL *session, const char *cfg[], wt_timestamp_t *timestamp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_parse_read_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h index b824d4eb24d..a9d271ed0bd 100644 --- a/src/third_party/wiredtiger/src/include/gcc.h +++ b/src/third_party/wiredtiger/src/include/gcc.h @@ -11,6 +11,7 @@ /* GCC-specific attributes. */ #define WT_PACKED_STRUCT_BEGIN(name) \ + /* NOLINTNEXTLINE(misc-macro-parentheses) */ \ struct __attribute__ ((__packed__)) name { #define WT_PACKED_STRUCT_END \ }; diff --git a/src/third_party/wiredtiger/src/include/posix.h b/src/third_party/wiredtiger/src/include/posix.h index f1e040968fc..4ad8db1f43a 100644 --- a/src/third_party/wiredtiger/src/include/posix.h +++ b/src/third_party/wiredtiger/src/include/posix.h @@ -33,6 +33,7 @@ typedef struct { /* * Thread callbacks need to match the platform specific callback types */ +/* NOLINTNEXTLINE(misc-macro-parentheses) */ #define WT_THREAD_CALLBACK(x) void* (x) #define WT_THREAD_RET void* #define WT_THREAD_RET_VALUE NULL diff --git a/src/third_party/wiredtiger/src/include/swap.h b/src/third_party/wiredtiger/src/include/swap.h index 6d39afa539f..30cdf0d08d2 100644 --- a/src/third_party/wiredtiger/src/include/swap.h +++ b/src/third_party/wiredtiger/src/include/swap.h @@ -47,6 +47,7 @@ static inline uint64_t __wt_bswap64(uint64_t v) { return ( + /* NOLINTNEXTLINE(misc-redundant-expression) */ ((v << 56) & 0xff00000000000000UL) | ((v << 40) & 0x00ff000000000000UL) | ((v << 24) & 0x0000ff0000000000UL) | diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h index cce5bd4cbc7..1ac34a4defd 100644 --- a/src/third_party/wiredtiger/src/include/txn.h +++ b/src/third_party/wiredtiger/src/include/txn.h @@ -98,6 +98,7 @@ struct __wt_txn_state { volatile uint64_t id; volatile uint64_t pinned_id; volatile uint64_t metadata_pinned; + volatile bool is_allocating; WT_CACHE_LINE_PAD_END }; diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 6ce224ec65a..a358e74d925 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -972,14 +972,16 @@ __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish) /* * Allocating transaction IDs involves several steps. * - * Firstly, we do an atomic increment to allocate a unique ID. The - * field we increment is not used anywhere else. + * Firstly, publish that this transaction is allocating its ID, then + * publish the transaction ID as the current global ID. Note that this + * transaction ID might not be unique among threads and hence not valid + * at this moment. The flag will notify other transactions that are + * attempting to get their own snapshot for this transaction ID to + * retry. * - * Then we optionally publish the allocated ID into the global - * transaction table. It is critical that this becomes visible before - * the global current value moves past our ID, or some concurrent - * reader could get a snapshot that makes our changes visible before we - * commit. + * Then we do an atomic increment to allocate a unique ID. This will + * give the valid ID to this transaction that we publish to the global + * transaction table. * * We want the global value to lead the allocated values, so that any * allocated transaction ID eventually becomes globally visible. When @@ -991,21 +993,16 @@ __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish) * for unlocked reads to be well defined, we must use an atomic * increment here. */ - __wt_spin_lock(session, &txn_global->id_lock); - id = txn_global->current; - if (publish) { + WT_PUBLISH(txn_state->is_allocating, true); + WT_PUBLISH(txn_state->id, txn_global->current); + id = __wt_atomic_addv64(&txn_global->current, 1) - 1; session->txn.id = id; WT_PUBLISH(txn_state->id, id); - } + WT_PUBLISH(txn_state->is_allocating, false); + } else + id = __wt_atomic_addv64(&txn_global->current, 1) - 1; - /* - * Even though we are in a spinlock, readers are not. We rely on - * atomic reads of the current ID to create snapshots, so for unlocked - * reads to be well defined, we must use an atomic increment here. - */ - (void)__wt_atomic_addv64(&txn_global->current, 1); - __wt_spin_unlock(session, &txn_global->id_lock); return (id); } @@ -1193,7 +1190,14 @@ __wt_txn_am_oldest(WT_SESSION_IMPL *session) WT_ORDERED_READ(session_cnt, conn->session_cnt); for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) - if ((id = s->id) != WT_TXN_NONE && WT_TXNID_LT(id, txn->id)) + /* + * We are checking if the transaction is oldest one in the + * system. It is safe to ignore any sessions that are + * allocating transaction IDs, since we already have an ID, + * they are guaranteed to be newer. + */ + if (!s->is_allocating && (id = s->id) != WT_TXN_NONE && + WT_TXNID_LT(id, txn->id)) return (false); return (true); diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index c2fcb720575..028a7663975 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -36,6 +36,7 @@ extern "C" { #if defined(DOXYGEN) || defined(SWIG) #define __F(func) func #else +/* NOLINTNEXTLINE(misc-macro-parentheses) */ #define __F(func) (*func) #endif diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index 9e27a996251..10b52246987 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -571,6 +571,7 @@ __wt_log_extract_lognum( WT_RET_MSG(session, EINVAL, "unexpected usage: no id or no name"); if ((p = strrchr(name, '.')) == NULL || + /* NOLINTNEXTLINE(cert-err34-c) */ sscanf(++p, "%" SCNu32, id) != 1) WT_RET_MSG(session, WT_ERROR, "Bad log file name '%s'", name); return (0); diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c index ccb61aa4fb0..d42d902d0a4 100644 --- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c +++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c @@ -340,6 +340,7 @@ __ckpt_load(WT_SESSION_IMPL *session, goto format; memcpy(timebuf, a.str, a.len); timebuf[a.len] = '\0'; + /* NOLINTNEXTLINE(cert-err34-c) */ if (sscanf(timebuf, "%" SCNuMAX, &ckpt->sec) != 1) goto format; diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c index 3531440e76f..0c89dca92ed 100644 --- a/src/third_party/wiredtiger/src/meta/meta_turtle.c +++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c @@ -193,7 +193,6 @@ __wt_turtle_init(WT_SESSION_IMPL *session) bool exist_backup, exist_incr, exist_isrc, exist_turtle; bool load, loadTurtle; - metaconf = NULL; load = loadTurtle = false; /* @@ -226,10 +225,12 @@ __wt_turtle_init(WT_SESSION_IMPL *session) * Failure to read means a bad turtle file. Remove it and create * a new turtle file. */ - if (F_ISSET(S2C(session), WT_CONN_SALVAGE)) + if (F_ISSET(S2C(session), WT_CONN_SALVAGE)) { WT_WITH_TURTLE_LOCK(session, - ret = __wt_turtle_read(session, - WT_METAFILE_URI, &unused_value)); + ret = __wt_turtle_read( + session, WT_METAFILE_URI, &unused_value)); + __wt_free(session, unused_value); + } if (ret != 0) { WT_RET(__wt_remove_if_exists( @@ -282,14 +283,12 @@ __wt_turtle_init(WT_SESSION_IMPL *session) WT_RET(__metadata_config(session, &metaconf)); WT_WITH_TURTLE_LOCK(session, ret = __wt_turtle_update(session, WT_METAFILE_URI, metaconf)); - WT_ERR(ret); + __wt_free(session, metaconf); + WT_RET(ret); } /* Remove the backup files, we'll never read them again. */ - WT_ERR(__wt_backup_file_remove(session)); - -err: __wt_free(session, metaconf); - return (ret); + return (__wt_backup_file_remove(session)); } /* diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c index 438af2eb58d..7a5c4a07e58 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_fs.c +++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c @@ -140,7 +140,7 @@ __posix_directory_sync(WT_SESSION_IMPL *session, const char *path) fd = 0; /* -Wconditional-uninitialized */ WT_SYSCALL_RETRY(( - (fd = open(dir, O_RDONLY, 0444)) == -1 ? -1 : 0), ret); + (fd = open(dir, O_RDONLY | O_CLOEXEC, 0444)) == -1 ? -1 : 0), ret); if (ret != 0) WT_ERR_MSG(session, ret, "%s: directory-sync: open", dir); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index a63d3eab361..1791317eb1b 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -51,6 +51,7 @@ typedef struct { /* Lookaside boundary tracking. */ uint64_t unstable_txn; + wt_timestamp_t unstable_durable_timestamp; wt_timestamp_t unstable_timestamp; u_int updates_seen; /* Count of updates seen. */ @@ -59,6 +60,9 @@ typedef struct { bool update_uncommitted; /* An update was uncommitted */ bool update_used; /* An update could be used */ + /* All the updates are with prepare in-progress state. */ + bool all_upd_prepare_in_prog; + /* * When we can't mark the page clean (for example, checkpoint found some * uncommitted updates), there's a leave-dirty flag. @@ -926,15 +930,20 @@ __rec_init(WT_SESSION_IMPL *session, if (r->las_skew_newest) { r->unstable_txn = WT_TXN_NONE; r->unstable_timestamp = 0; + r->unstable_durable_timestamp = 0; } else { r->unstable_txn = WT_TXN_ABORTED; r->unstable_timestamp = UINT64_MAX; + r->unstable_durable_timestamp = UINT64_MAX; } /* Track if updates were used and/or uncommitted. */ r->updates_seen = r->updates_unstable = 0; r->update_uncommitted = r->update_used = false; + /* Track if all the updates are with prepare in-progress state. */ + r->all_upd_prepare_in_prog = true; + /* Track if the page can be marked clean. */ r->leave_dirty = false; @@ -1242,6 +1251,13 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, max_txn = txnid; /* + * Track if all the updates are not with in-progress prepare + * state. + */ + if (upd->prepare_state == WT_PREPARE_RESOLVED) + r->all_upd_prepare_in_prog = false; + + /* * Check whether the update was committed before reconciliation * started. The global commit point can move forward during * reconciliation so we use a cached copy to avoid races when a @@ -1268,7 +1284,6 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, uncommitted = r->update_uncommitted = true; continue; } - } /* Track the first update with non-zero timestamp. */ @@ -1296,7 +1311,6 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, if (upd_select->upd == NULL && r->las_skew_newest) upd_select->upd = upd; - /* Consider non durable updates as uncommitted. */ if ((F_ISSET(r, WT_REC_VISIBLE_ALL) ? !__wt_txn_upd_visible_all(session, upd) : !__wt_txn_upd_visible(session, upd)) || @@ -1395,8 +1409,8 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, r->max_txn = max_txn; /* Update the maximum timestamp. */ - if (first_ts_upd != NULL && r->max_timestamp < first_ts_upd->start_ts) - r->max_timestamp = first_ts_upd->start_ts; + if (first_ts_upd != NULL && r->max_timestamp < first_ts_upd->durable_ts) + r->max_timestamp = first_ts_upd->durable_ts; /* * If the update we chose was a birthmark, or we are doing @@ -1477,9 +1491,23 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, if (F_ISSET(r, WT_REC_LOOKASIDE) && r->las_skew_newest) { if (WT_TXNID_LT(r->unstable_txn, first_upd->txnid)) r->unstable_txn = first_upd->txnid; - if (first_ts_upd != NULL && - r->unstable_timestamp < first_ts_upd->start_ts) - r->unstable_timestamp = first_ts_upd->start_ts; + if (first_ts_upd != NULL) { + /* + * FIXME Disable this assertion until fixed by WT-4598. + * WT_ASSERT(session, + * first_ts_upd->prepare_state == + * WT_PREPARE_INPROGRESS || + * first_ts_upd->start_ts <= + * first_ts_upd->durable_ts); + */ + if (r->unstable_timestamp < first_ts_upd->start_ts) + r->unstable_timestamp = first_ts_upd->start_ts; + + if (r->unstable_durable_timestamp < + first_ts_upd->durable_ts) + r->unstable_durable_timestamp = + first_ts_upd->durable_ts; + } } else if (F_ISSET(r, WT_REC_LOOKASIDE)) { for (upd = first_upd; upd != upd_select->upd; upd = upd->next) { if (upd->txnid == WT_TXN_ABORTED) @@ -1488,8 +1516,29 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, if (upd->txnid != WT_TXN_NONE && WT_TXNID_LT(upd->txnid, r->unstable_txn)) r->unstable_txn = upd->txnid; + /* + * The durable timestamp is always set, and usually + * the same as the start timestamp, which makes it OK + * to use the two independently and be confident both + * will be set. + */ + /* + * FIXME Disable this assertion until fixed by WT-4598. + * WT_ASSERT(session, + * upd->prepare_state == WT_PREPARE_INPROGRESS || + * upd->durable_ts >= upd->start_ts); + */ if (upd->start_ts < r->unstable_timestamp) r->unstable_timestamp = upd->start_ts; + /* + * Don't set the unstable durable timestamp with the + * durable timestamp of an in-progress prepared update. + * An in-progress prepared update will always have a + * zero durable timestamp. + */ + if (upd->prepare_state != WT_PREPARE_INPROGRESS && + upd->durable_ts < r->unstable_durable_timestamp) + r->unstable_durable_timestamp = upd->durable_ts; } } @@ -2969,7 +3018,14 @@ done: if (F_ISSET(r, WT_REC_LOOKASIDE)) { multi->page_las.unstable_txn = r->unstable_txn; WT_ASSERT(session, r->unstable_txn != WT_TXN_NONE); multi->page_las.max_timestamp = r->max_timestamp; + /* + * FIXME Disable this assertion until fixed by WT-4598. + * WT_ASSERT(session, r->all_upd_prepare_in_prog == true || + * r->unstable_durable_timestamp >= r->unstable_timestamp); + */ multi->page_las.unstable_timestamp = r->unstable_timestamp; + multi->page_las.unstable_durable_timestamp = + r->unstable_durable_timestamp; } err: __wt_scr_free(session, &key); @@ -4177,6 +4233,7 @@ __rec_col_var(WT_SESSION_IMPL *session, * Set the start/stop values to cause failure if they're not set. * [-Werror=maybe-uninitialized] */ + /* NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) */ start_ts = stop_ts = WT_TS_NONE; WT_RET(__rec_split_init(session, diff --git a/src/third_party/wiredtiger/src/support/hash_city.c b/src/third_party/wiredtiger/src/support/hash_city.c index b1fa26cd8cf..5b685b5c05a 100644 --- a/src/third_party/wiredtiger/src/support/hash_city.c +++ b/src/third_party/wiredtiger/src/support/hash_city.c @@ -306,7 +306,7 @@ static inline uint64_t CityHash64(const char *s, size_t len) { if (len <= 32) { if (len <= 16) { return HashLen0to16(s, len); - } else { + } else { /* NOLINT(readability-else-after-return) */ return HashLen17to32(s, len); } } else if (len <= 64) { diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index f3d9f762750..8c5d7ffb826 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -209,13 +209,38 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) * can happen if we race with a thread that is allocating * an ID -- the ID will not be used because the thread will * keep spinning until it gets a valid one. + * - The ID if it is higher than the current ID we saw. This + * can happen if the transaction is already finished. In + * this case, we ignore this transaction because it would + * not be visible to the current snapshot. */ - if (s != txn_state && + while (s != txn_state && (id = s->id) != WT_TXN_NONE && - WT_TXNID_LE(prev_oldest_id, id)) { - txn->snapshot[n++] = id; - if (WT_TXNID_LT(id, pinned_id)) - pinned_id = id; + WT_TXNID_LE(prev_oldest_id, id) && + WT_TXNID_LT(id, current_id)) { + /* + * If the transaction is still allocating its ID, then + * we spin here until it gets its valid ID. + */ + WT_READ_BARRIER(); + if (!s->is_allocating) { + /* + * There is still a chance that fetched ID is + * not valid after ID allocation, so we check + * again here. The read of transaction ID + * should be carefully ordered: we want to + * re-read ID from transaction state after this + * transaction completes ID allocation. + */ + WT_READ_BARRIER(); + if (id == s->id) { + txn->snapshot[n++] = id; + if (WT_TXNID_LT(id, pinned_id)) + pinned_id = id; + break; + } + } + WT_PAUSE(); } } @@ -261,10 +286,31 @@ __txn_oldest_scan(WT_SESSION_IMPL *session, WT_ORDERED_READ(session_cnt, conn->session_cnt); for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) { /* Update the last running transaction ID. */ - if ((id = s->id) != WT_TXN_NONE && + while ((id = s->id) != WT_TXN_NONE && WT_TXNID_LE(prev_oldest_id, id) && - WT_TXNID_LT(id, last_running)) - last_running = id; + WT_TXNID_LT(id, last_running)) { + /* + * If the transaction is still allocating its ID, then + * we spin here until it gets its valid ID. + */ + WT_READ_BARRIER(); + if (!s->is_allocating) { + /* + * There is still a chance that fetched ID is + * not valid after ID allocation, so we check + * again here. The read of transaction ID + * should be carefully ordered: we want to + * re-read ID from transaction state after this + * transaction completes ID allocation. + */ + WT_READ_BARRIER(); + if (id == s->id) { + last_running = id; + break; + } + } + WT_PAUSE(); + } /* Update the metadata pinned ID. */ if ((id = s->metadata_pinned) != WT_TXN_NONE && @@ -576,11 +622,11 @@ __wt_txn_release(WT_SESSION_IMPL *session) } /* - * __txn_commit_timestamps_validate -- + * __txn_commit_timestamps_assert -- * Validate that timestamps provided to commit are legal. */ static inline int -__txn_commit_timestamps_validate(WT_SESSION_IMPL *session) +__txn_commit_timestamps_assert(WT_SESSION_IMPL *session) { WT_CURSOR *cursor; WT_DECL_RET; @@ -754,7 +800,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) * For prepared transactions commit timestamp could be earlier * than stable timestamp. */ - WT_ERR(__wt_timestamp_validate( + WT_ERR(__wt_txn_commit_timestamp_validate( session, "commit", ts, &cval, !prepare)); txn->commit_timestamp = ts; __wt_txn_set_commit_timestamp(session); @@ -787,11 +833,11 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) /* Durable timestamp should be later than stable timestamp. */ F_SET(txn, WT_TXN_HAS_TS_DURABLE); txn->durable_timestamp = ts; - WT_ERR(__wt_timestamp_validate( + WT_ERR(__wt_txn_commit_timestamp_validate( session, "durable", ts, &cval, true)); } - WT_ERR(__txn_commit_timestamps_validate(session)); + WT_ERR(__txn_commit_timestamps_assert(session)); /* * The default sync setting is inherited from the connection, but can diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c index 2e5e1f61344..77eecbe2e84 100644 --- a/src/third_party/wiredtiger/src/txn/txn_recover.c +++ b/src/third_party/wiredtiger/src/txn/txn_recover.c @@ -440,6 +440,7 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config) /* If there is checkpoint logged for the file, apply everything. */ if (cval.type != WT_CONFIG_ITEM_STRUCT) WT_INIT_LSN(&lsn); + /* NOLINTNEXTLINE(cert-err34-c) */ else if (sscanf(cval.str, "(%" SCNu32 ",%" SCNu32 ")", &lsnfile, &lsnoffset) == 2) WT_SET_LSN(&lsn, lsnfile, lsnoffset); diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index 2ff42b7d220..f45069248fd 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -68,7 +68,7 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session) * which will fail the following check and cause them to never * be removed. */ - if (rollback_timestamp < las_timestamp) { + if (rollback_timestamp < durable_timestamp) { WT_ERR(cursor->remove(cursor)); WT_STAT_CONN_INCR(session, txn_rollback_las_removed); --las_total; @@ -250,7 +250,7 @@ __txn_abort_newer_updates( local_read = false; read_flags = WT_READ_WONT_NEED; if (ref->page_las != NULL && ref->page_las->skew_newest && - rollback_timestamp < ref->page_las->unstable_timestamp) { + rollback_timestamp < ref->page_las->unstable_durable_timestamp) { /* Make sure get back a page with history, not limbo page */ WT_ASSERT(session, !F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT)); diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c index de474dba222..bf65db45153 100644 --- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c +++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c @@ -41,7 +41,9 @@ __wt_timestamp_to_hex_string(wt_timestamp_t ts, char *hex_timestamp) return; } if (ts == WT_TS_MAX) { - (void)strcpy(hex_timestamp, "ffffffffffffffff"); +#define WT_TS_MAX_HEX_STRING "ffffffffffffffff" + (void)memcpy(hex_timestamp, + WT_TS_MAX_HEX_STRING, strlen(WT_TS_MAX_HEX_STRING) + 1); return; } @@ -579,14 +581,15 @@ set: __wt_writelock(session, &txn_global->rwlock); } /* - * __wt_timestamp_validate -- - * Validate a timestamp to be not older than the global oldest and global - * stable and running transaction commit timestamp and running transaction - * prepare timestamp. + * __wt_txn_commit_timestamp_validate -- + * Validate a timestamp to be not older than running transaction commit + * timestamp and running transaction prepare timestamp. Validate a durable + * timestamp to be not older than the global oldest and global stable + * timestamp. */ int -__wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name, - wt_timestamp_t ts, WT_CONFIG_ITEM *cval, bool compare_stable) +__wt_txn_commit_timestamp_validate(WT_SESSION_IMPL *session, const char *name, + wt_timestamp_t ts, WT_CONFIG_ITEM *cval, bool durable_ts) { WT_TXN *txn = &session->txn; WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global; @@ -609,14 +612,14 @@ __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name, if (has_stable_ts) stable_ts = txn_global->stable_timestamp; - if (has_oldest_ts && ts < oldest_ts) { + if (durable_ts && has_oldest_ts && ts < oldest_ts) { __wt_timestamp_to_string( oldest_ts, ts_string[0], sizeof(ts_string[0])); WT_RET_MSG(session, EINVAL, "%s timestamp %.*s older than oldest timestamp %s", name, (int)cval->len, cval->str, ts_string[0]); } - if (compare_stable && has_stable_ts && ts < stable_ts) { + if (durable_ts && has_stable_ts && ts < stable_ts) { __wt_timestamp_to_string( stable_ts, ts_string[0], sizeof(ts_string[0])); WT_RET_MSG(session, EINVAL, @@ -696,10 +699,10 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) * than stable timestamp. */ if (prepare) - WT_RET(__wt_timestamp_validate( + WT_RET(__wt_txn_commit_timestamp_validate( session, "commit", ts, &cval, false)); else - WT_RET(__wt_timestamp_validate( + WT_RET(__wt_txn_commit_timestamp_validate( session, "commit", ts, &cval, true)); txn->commit_timestamp = ts; __wt_txn_set_commit_timestamp(session); @@ -727,7 +730,7 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) * is required. */ if (ret == 0 && cval.len != 0) - WT_RET(__wt_timestamp_validate( + WT_RET(__wt_txn_commit_timestamp_validate( session, "durable", txn->durable_timestamp, &cval, true)); /* * We allow setting the commit timestamp and durable timestamp after a diff --git a/src/third_party/wiredtiger/test/suite/test_durable_ts03.py b/src/third_party/wiredtiger/test/suite/test_durable_ts03.py index 03b7c1091cd..a43bc786e36 100644 --- a/src/third_party/wiredtiger/test/suite/test_durable_ts03.py +++ b/src/third_party/wiredtiger/test/suite/test_durable_ts03.py @@ -33,21 +33,19 @@ def timestamp_str(t): return '%x' %t # test_durable_ts03.py -# Check that the updates with durable timestamp newer than the stable -# timestamp fill up the cache and leave it stuck. +# Check that the checkpoint honors the durable timestamp of updates. class test_durable_ts03(wttest.WiredTigerTestCase): - # Reducing the cache size to 10MB to will generate a stuck cache. This - # has been kept to a higher size to avoid pull request failure. def conn_config(self): - return 'cache_size=50MB' + return 'cache_size=10MB' def test_durable_ts03(self): # Create a table. uri = 'table:test_durable_ts03' - nrows = 300000 + nrows = 3000 self.session.create(uri, 'key_format=i,value_format=u') - value1 = "aaaaa" * 100 - value2 = "bbbbb" * 100 + valueA = "aaaaa" * 100 + valueB = "bbbbb" * 100 + valueC = "ccccc" * 100 # Start with setting a stable and oldest timestamp. self.conn.set_timestamp('stable_timestamp=' + timestamp_str(1) + \ @@ -58,7 +56,7 @@ class test_durable_ts03(wttest.WiredTigerTestCase): cursor = session.open_cursor(uri, None) for i in range(0, nrows): session.begin_transaction() - cursor[i] = value1 + cursor[i] = valueA session.commit_transaction('commit_timestamp=' + timestamp_str(50)) cursor.close() @@ -72,10 +70,62 @@ class test_durable_ts03(wttest.WiredTigerTestCase): cursor = session.open_cursor(uri, None) for i in range(0, nrows): session.begin_transaction() - cursor[i] = value2 + cursor[i] = valueB session.prepare_transaction('prepare_timestamp=' + timestamp_str(150)) session.commit_transaction('commit_timestamp=' + timestamp_str(200) + \ ',durable_timestamp=' + timestamp_str(220)) + # Check the checkpoint wrote only the durable updates. + cursor2 = self.session.open_cursor( + uri, None, 'checkpoint=WiredTigerCheckpoint') + for key, value in cursor2: + self.assertEqual(value, valueA) + + self.assertEquals(cursor.reset(), 0) + session.begin_transaction('read_timestamp=' + timestamp_str(150)) + for key, value in cursor: + self.assertEqual(value, valueA) + session.commit_transaction() + + # Read the updated data to confirm that it is visible. + self.assertEquals(cursor.reset(), 0) + session.begin_transaction('read_timestamp=' + timestamp_str(210)) + for key, value in cursor: + self.assertEqual(value, valueB) + session.commit_transaction() + + self.session.checkpoint("use_timestamp=true") + cursor.close() + session.close() + + self.reopen_conn() + session = self.conn.open_session() + cursor = session.open_cursor(uri, None) + self.conn.set_timestamp('stable_timestamp=' + timestamp_str(210) + \ + ',oldest_timestamp=' + timestamp_str(210)) + for key, value in cursor: + self.assertEqual(value, valueA) + + self.assertEquals(cursor.reset(), 0) + for i in range(0, nrows): + session.begin_transaction() + cursor[i] = valueC + session.prepare_transaction('prepare_timestamp=' + timestamp_str(220)) + session.commit_transaction('commit_timestamp=' + timestamp_str(230) + \ + ',durable_timestamp=' + timestamp_str(240)) + + self.conn.set_timestamp('stable_timestamp=' + timestamp_str(250)) + self.session.checkpoint() + cursor.close() + session.close() + + self.reopen_conn() + session = self.conn.open_session() + cursor = session.open_cursor(uri, None) + self.conn.set_timestamp('stable_timestamp=' + timestamp_str(250) + \ + ',oldest_timestamp=' + timestamp_str(250)) + for key, value in cursor: + self.assertEqual(value, valueC) + if __name__ == '__main__': wttest.run() |