diff options
author | Luke Chen <luke.chen@mongodb.com> | 2020-04-22 10:45:21 +1000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-04-22 01:01:19 +0000 |
commit | c8279c67d309858027cdb4d079ef9fd7122b1690 (patch) | |
tree | c8b4b404a399e87e3d1743d60c243c882b96fede /src/third_party/wiredtiger/src/include | |
parent | b38c6815cee932098722b72430b3237da3fe312a (diff) | |
download | mongo-c8279c67d309858027cdb4d079ef9fd7122b1690.tar.gz |
Import wiredtiger: 7b994a862e899a12eb7c3ac814c9fada7d8d1ab9 from branch mongodb-4.4r4.4.0-rc2
ref: 9bd1ece797..7b994a862e
for: 4.4.0-rc2
WT-4701 Switch test/format to use WiredTiger locking primitives
WT-5766 Separate out internal and shared transaction data
WT-5791 Prepare checkpoint can finish in 0msec and reset prepare min stat
WT-5794 Remove skew_newest option from reconciliation
WT-5833 Fix caching issue for overflow key/value items
WT-5919 Disallow logging archival testing with log-based incremental backup
WT-5946 Eviction server handles can deadlock when opening HS cursors
WT-5968 Make the WT_SESSION_IMPL.txn field an allocated structure
WT-5986 Create script for emulating multiversion tests
WT-6016 Fill source code comments where lines start with parentheticals
WT-6020 __rec_append_orig_value() cleanup
WT-6026 Fix s_all breakage on format.h
Diffstat (limited to 'src/third_party/wiredtiger/src/include')
-rw-r--r-- | src/third_party/wiredtiger/src/include/api.h | 50 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/btmem.h | 12 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/btree.i | 4 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/cache.h | 6 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/cache.i | 8 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/cursor.i | 5 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/extern.h | 20 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/reconcile.h | 5 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/session.h | 22 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/time.i | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/txn.h | 118 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/txn.i | 105 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/verify_build.h | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/wt_internal.h | 4 |
14 files changed, 182 insertions, 181 deletions
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h index 81118e421d2..e4455d62b03 100644 --- a/src/third_party/wiredtiger/src/include/api.h +++ b/src/third_party/wiredtiger/src/include/api.h @@ -86,39 +86,39 @@ while (0) /* An API call wrapped in a transaction if necessary. */ -#define TXN_API_CALL(s, h, n, bt, config, cfg) \ - do { \ - bool __autotxn = false, __update = false; \ - API_CALL(s, h, n, bt, config, cfg); \ - __wt_txn_timestamp_flags(s); \ - __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING); \ - if (__autotxn) \ - F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT); \ - __update = !F_ISSET(&(s)->txn, WT_TXN_UPDATE); \ - if (__update) \ - F_SET(&(s)->txn, WT_TXN_UPDATE); +#define TXN_API_CALL(s, h, n, bt, config, cfg) \ + do { \ + bool __autotxn = false, __update = false; \ + API_CALL(s, h, n, bt, config, cfg); \ + __wt_txn_timestamp_flags(s); \ + __autotxn = !F_ISSET((s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING); \ + if (__autotxn) \ + F_SET((s)->txn, WT_TXN_AUTOCOMMIT); \ + __update = !F_ISSET((s)->txn, WT_TXN_UPDATE); \ + if (__update) \ + F_SET((s)->txn, WT_TXN_UPDATE); /* An API call wrapped in a transaction if necessary. */ -#define TXN_API_CALL_NOCONF(s, h, n, dh) \ - do { \ - bool __autotxn = false, __update = false; \ - API_CALL_NOCONF(s, h, n, dh); \ - __wt_txn_timestamp_flags(s); \ - __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING); \ - if (__autotxn) \ - F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT); \ - __update = !F_ISSET(&(s)->txn, WT_TXN_UPDATE); \ - if (__update) \ - F_SET(&(s)->txn, WT_TXN_UPDATE); +#define TXN_API_CALL_NOCONF(s, h, n, dh) \ + do { \ + bool __autotxn = false, __update = false; \ + API_CALL_NOCONF(s, h, n, dh); \ + __wt_txn_timestamp_flags(s); \ + __autotxn = !F_ISSET((s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING); \ + if (__autotxn) \ + F_SET((s)->txn, WT_TXN_AUTOCOMMIT); \ + __update = !F_ISSET((s)->txn, WT_TXN_UPDATE); \ + if (__update) \ + F_SET((s)->txn, WT_TXN_UPDATE); /* End a transactional API call, optional retry on deadlock. */ #define TXN_API_END_RETRY(s, ret, retry) \ API_END(s, ret); \ if (__update) \ - F_CLR(&(s)->txn, WT_TXN_UPDATE); \ + F_CLR((s)->txn, WT_TXN_UPDATE); \ if (__autotxn) { \ - if (F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT)) \ - F_CLR(&(s)->txn, WT_TXN_AUTOCOMMIT); \ + if (F_ISSET((s)->txn, WT_TXN_AUTOCOMMIT)) \ + F_CLR((s)->txn, WT_TXN_AUTOCOMMIT); \ else if ((ret) == 0) \ (ret) = __wt_txn_commit((s), NULL); \ else { \ diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index bffb93036d6..6985cce0508 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -447,15 +447,6 @@ struct __wt_page_modify { WT_CELL **discard; size_t discard_entries; size_t discard_allocated; - - /* Cached overflow value cell/update address pairs. */ - struct { - WT_CELL *cell; - uint8_t *data; - size_t size; - } * remove; - size_t remove_allocated; - uint32_t remove_next; } * ovfl_track; #define WT_PAGE_LOCK(s, p) __wt_spin_lock((s), &(p)->modify->page_lock) @@ -485,8 +476,7 @@ struct __wt_page_modify { #define WT_PM_REC_REPLACE 3 /* Reconciliation: single block */ uint8_t rec_result; /* Reconciliation state */ -#define WT_PAGE_RS_HS 0x1 -#define WT_PAGE_RS_RESTORED 0x2 +#define WT_PAGE_RS_RESTORED 0x1 uint8_t restore_state; /* Created by restoring updates */ }; diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index b0ff54c70b2..7cbfddbd381 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -555,8 +555,8 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page) } /* Check if this is the largest transaction ID to update the page. */ - if (WT_TXNID_LT(page->modify->update_txn, session->txn.id)) - page->modify->update_txn = session->txn.id; + if (WT_TXNID_LT(page->modify->update_txn, session->txn->id)) + page->modify->update_txn = session->txn->id; } /* diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index 698cea9447c..64f1efe1201 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -170,9 +170,9 @@ struct __wt_cache { uint32_t evict_aggressive_score; /* - * Score of how often LRU queues are empty on refill. This score varies - * between 0 (if the queue hasn't been empty for a long time) and 100 - * (if the queue has been empty the last 10 times we filled up. + * Score of how often LRU queues are empty on refill. This score varies between 0 (if the queue + * hasn't been empty for a long time) and 100 (if the queue has been empty the last 10 times we + * filled up. */ uint32_t evict_empty_score; diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i index b96f079f5bd..d8d11943c94 100644 --- a/src/third_party/wiredtiger/src/include/cache.i +++ b/src/third_party/wiredtiger/src/include/cache.i @@ -374,7 +374,7 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool readonly, bo { WT_BTREE *btree; WT_TXN_GLOBAL *txn_global; - WT_TXN_STATE *txn_state; + WT_TXN_SHARED *txn_shared; double pct_full; if (didworkp != NULL) @@ -387,9 +387,9 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool readonly, bo * sure there is free space in the cache. */ txn_global = &S2C(session)->txn_global; - txn_state = WT_SESSION_TXN_STATE(session); - busy = busy || txn_state->id != WT_TXN_NONE || session->nhazard > 0 || - (txn_state->pinned_id != WT_TXN_NONE && txn_global->current != txn_global->oldest_id); + txn_shared = WT_SESSION_TXN_SHARED(session); + busy = busy || txn_shared->id != WT_TXN_NONE || session->nhazard > 0 || + (txn_shared->pinned_id != WT_TXN_NONE && txn_global->current != txn_global->oldest_id); /* * LSM sets the "ignore cache size" flag when holding the LSM tree lock, in that case, or when diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i index 4d8b83b4d34..14de00f80c3 100644 --- a/src/third_party/wiredtiger/src/include/cursor.i +++ b/src/third_party/wiredtiger/src/include/cursor.i @@ -219,9 +219,8 @@ __cursor_reset(WT_CURSOR_BTREE *cbt) cbt->page_deleted_count = 0; /* - * Release any page references we're holding. This can trigger eviction - * (e.g., forced eviction of big pages), so it's important to do after - * releasing our snapshot above. + * Release any page references we're holding. This can trigger eviction (e.g., forced eviction + * of big pages), so it's important to do after releasing our snapshot above. * * Clear the reference regardless, so we don't try the release twice. */ diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 56363846dfe..0888eeee453 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -928,14 +928,15 @@ extern int __wt_logop_row_truncate_unpack(WT_SESSION_IMPL *session, const uint8_ const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_txn_timestamp_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, - uint64_t time_sec, uint64_t time_nsec, uint64_t commit_ts, uint64_t durable_ts, uint64_t first_ts, - uint64_t prepare_ts, uint64_t read_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); + uint64_t time_sec, uint64_t time_nsec, uint64_t commit_ts, uint64_t durable_ts, + uint64_t first_commit_ts, uint64_t prepare_ts, uint64_t read_ts, uint64_t pinned_read_ts) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_txn_timestamp_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_txn_timestamp_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint64_t *time_secp, uint64_t *time_nsecp, uint64_t *commit_tsp, - uint64_t *durable_tsp, uint64_t *first_tsp, uint64_t *prepare_tsp, uint64_t *read_tsp) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); + uint64_t *durable_tsp, uint64_t *first_commit_tsp, uint64_t *prepare_tsp, uint64_t *read_tsp, + uint64_t *pinned_read_tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logrec_alloc(WT_SESSION_IMPL *session, size_t size, WT_ITEM **logrecp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logrec_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, @@ -1126,8 +1127,8 @@ extern int __wt_ovfl_discard_add(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CEL WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store, bool *decoded) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, - bool evicting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_ovfl_reuse_add(WT_SESSION_IMPL *session, WT_PAGE *page, const uint8_t *addr, size_t addr_size, const void *value, size_t value_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -1530,8 +1531,8 @@ extern int __wt_verbose_dump_sessions(WT_SESSION_IMPL *session, bool show_cursor WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_verbose_dump_txn(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_verbose_dump_txn_one(WT_SESSION_IMPL *session, WT_TXN *txn, int error_code, - const char *error_string) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verbose_dump_txn_one(WT_SESSION_IMPL *session, WT_SESSION_IMPL *txn_session, + int error_code, const char *error_string) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_verbose_dump_update(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) @@ -1701,7 +1702,6 @@ extern void __wt_optrack_record_funcid( WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp); extern void __wt_os_stdio(WT_SESSION_IMPL *session); extern void __wt_ovfl_discard_free(WT_SESSION_IMPL *session, WT_PAGE *page); -extern void __wt_ovfl_discard_remove(WT_SESSION_IMPL *session, WT_PAGE *page); extern void __wt_ovfl_reuse_free(WT_SESSION_IMPL *session, WT_PAGE *page); extern void __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep); extern void __wt_print_huffman_code(void *huffman_arg, uint16_t symbol); @@ -1759,8 +1759,8 @@ extern void __wt_txn_destroy(WT_SESSION_IMPL *session); extern void __wt_txn_get_snapshot(WT_SESSION_IMPL *session); extern void __wt_txn_global_destroy(WT_SESSION_IMPL *session); extern void __wt_txn_op_free(WT_SESSION_IMPL *session, WT_TXN_OP *op); +extern void __wt_txn_publish_durable_timestamp(WT_SESSION_IMPL *session); extern void __wt_txn_publish_read_timestamp(WT_SESSION_IMPL *session); -extern void __wt_txn_publish_timestamp(WT_SESSION_IMPL *session); extern void __wt_txn_release(WT_SESSION_IMPL *session); extern void __wt_txn_release_resources(WT_SESSION_IMPL *session); extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session); diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h index 2eefed10cf7..43581c7cc1f 100644 --- a/src/third_party/wiredtiger/src/include/reconcile.h +++ b/src/third_party/wiredtiger/src/include/reconcile.h @@ -25,10 +25,7 @@ struct __wt_reconcile { uint64_t orig_btree_checkpoint_gen; uint64_t orig_txn_checkpoint_gen; - /* - * Track the oldest running transaction and whether to skew history store to the newest update. - */ - bool hs_skew_newest; + /* Track the oldest running transaction. */ uint64_t last_running; /* Track the page's min/maximum transactions. */ diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index 98be0b299ce..bd877622ca1 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -129,7 +129,7 @@ struct __wt_session_impl { WT_ITEM err; /* Error buffer */ WT_TXN_ISOLATION isolation; - WT_TXN txn; /* Transaction state */ + WT_TXN *txn; /* Transaction state */ #define WT_SESSION_BG_SYNC_MSEC 1200000 WT_LSN bg_sync_lsn; /* Background sync operation LSN. */ @@ -145,11 +145,10 @@ struct __wt_session_impl { /* * Operations acting on handles. * - * The preferred pattern is to gather all of the required handles at - * the beginning of an operation, then drop any other locks, perform - * the operation, then release the handles. This cannot be easily - * merged with the list of checkpoint handles because some operations - * (such as compact) do checkpoints internally. + * The preferred pattern is to gather all of the required handles at the beginning of an + * operation, then drop any other locks, perform the operation, then release the handles. This + * cannot be easily merged with the list of checkpoint handles because some operations (such as + * compact) do checkpoints internally. */ WT_DATA_HANDLE **op_handle; /* Handle list */ u_int op_handle_next; /* Next empty slot */ @@ -190,8 +189,9 @@ struct __wt_session_impl { #define WT_SESSION_QUIET_CORRUPT_FILE 0x02000000u #define WT_SESSION_READ_WONT_NEED 0x04000000u #define WT_SESSION_RESOLVING_TXN 0x08000000u -#define WT_SESSION_SCHEMA_TXN 0x10000000u -#define WT_SESSION_SERVER_ASYNC 0x20000000u +#define WT_SESSION_ROLLBACK_TO_STABLE 0x10000000u +#define WT_SESSION_SCHEMA_TXN 0x20000000u +#define WT_SESSION_SERVER_ASYNC 0x40000000u /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint32_t flags; @@ -271,9 +271,3 @@ struct __wt_session_impl { WT_SESSION_STATS stats; }; - -/* - * Rollback to stable should ignore tombstones in the history store since it needs to scan the - * entire table sequentially. - */ -#define WT_SESSION_ROLLBACK_TO_STABLE_FLAGS (WT_SESSION_IGNORE_HS_TOMBSTONE) diff --git a/src/third_party/wiredtiger/src/include/time.i b/src/third_party/wiredtiger/src/include/time.i index 759b8338370..cff5e0850ea 100644 --- a/src/third_party/wiredtiger/src/include/time.i +++ b/src/third_party/wiredtiger/src/include/time.i @@ -163,7 +163,7 @@ __wt_op_timer_start(WT_SESSION_IMPL *session) uint64_t timeout_us; /* Timer can be configured per-transaction, and defaults to per-connection. */ - if ((timeout_us = session->txn.operation_timeout_us) == 0) + if (session->txn == NULL || (timeout_us = session->txn->operation_timeout_us) == 0) timeout_us = S2C(session)->operation_timeout_us; if (timeout_us == 0) session->operation_start_us = session->operation_timeout_us = 0; diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h index aedc94a96a2..fd54e279171 100644 --- a/src/third_party/wiredtiger/src/include/txn.h +++ b/src/third_party/wiredtiger/src/include/txn.h @@ -46,7 +46,7 @@ typedef enum { #define WT_TXNID_LT(t1, t2) ((t1) < (t2)) -#define WT_SESSION_TXN_STATE(s) (&S2C(s)->txn_global.states[(s)->id]) +#define WT_SESSION_TXN_SHARED(s) (&S2C(s)->txn_global.txn_shared_list[(s)->id]) #define WT_SESSION_IS_CHECKPOINT(s) ((s)->id != 0 && (s)->id == S2C(s)->txn_global.checkpoint_id) @@ -72,39 +72,59 @@ typedef enum { /* * Perform an operation at the specified isolation level. * - * This is fiddly: we can't cope with operations that begin transactions - * (leaving an ID allocated), and operations must not move our published - * snap_min forwards (or updates we need could be freed while this operation is - * in progress). Check for those cases: the bugs they cause are hard to debug. + * This is fiddly: we can't cope with operations that begin transactions (leaving an ID allocated), + * and operations must not move our published snap_min forwards (or updates we need could be freed + * while this operation is in progress). Check for those cases: the bugs they cause are hard to + * debug. */ -#define WT_WITH_TXN_ISOLATION(s, iso, op) \ - do { \ - WT_TXN_ISOLATION saved_iso = (s)->isolation; \ - WT_TXN_ISOLATION saved_txn_iso = (s)->txn.isolation; \ - WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(s); \ - WT_TXN_STATE saved_state = *txn_state; \ - (s)->txn.forced_iso++; \ - (s)->isolation = (s)->txn.isolation = (iso); \ - op; \ - (s)->isolation = saved_iso; \ - (s)->txn.isolation = saved_txn_iso; \ - WT_ASSERT((s), (s)->txn.forced_iso > 0); \ - (s)->txn.forced_iso--; \ - WT_ASSERT((s), txn_state->id == saved_state.id && \ - (txn_state->metadata_pinned == saved_state.metadata_pinned || \ - saved_state.metadata_pinned == WT_TXN_NONE) && \ - (txn_state->pinned_id == saved_state.pinned_id || \ - saved_state.pinned_id == WT_TXN_NONE)); \ - txn_state->metadata_pinned = saved_state.metadata_pinned; \ - txn_state->pinned_id = saved_state.pinned_id; \ +#define WT_WITH_TXN_ISOLATION(s, iso, op) \ + do { \ + WT_TXN_ISOLATION saved_iso = (s)->isolation; \ + WT_TXN_ISOLATION saved_txn_iso = (s)->txn->isolation; \ + WT_TXN_SHARED *txn_shared = WT_SESSION_TXN_SHARED(s); \ + WT_TXN_SHARED saved_txn_shared = *txn_shared; \ + (s)->txn->forced_iso++; \ + (s)->isolation = (s)->txn->isolation = (iso); \ + op; \ + (s)->isolation = saved_iso; \ + (s)->txn->isolation = saved_txn_iso; \ + WT_ASSERT((s), (s)->txn->forced_iso > 0); \ + (s)->txn->forced_iso--; \ + WT_ASSERT((s), txn_shared->id == saved_txn_shared.id && \ + (txn_shared->metadata_pinned == saved_txn_shared.metadata_pinned || \ + saved_txn_shared.metadata_pinned == WT_TXN_NONE) && \ + (txn_shared->pinned_id == saved_txn_shared.pinned_id || \ + saved_txn_shared.pinned_id == WT_TXN_NONE)); \ + txn_shared->metadata_pinned = saved_txn_shared.metadata_pinned; \ + txn_shared->pinned_id = saved_txn_shared.pinned_id; \ } while (0) -struct __wt_txn_state { +struct __wt_txn_shared { WT_CACHE_LINE_PAD_BEGIN volatile uint64_t id; volatile uint64_t pinned_id; volatile uint64_t metadata_pinned; - volatile bool is_allocating; + + /* + * The first commit or durable timestamp used for this transaction. Determines its position in + * the durable queue and prevents the all_durable timestamp moving past this point. + */ + wt_timestamp_t pinned_durable_timestamp; + + /* + * Set to the first read timestamp used in the transaction. As part of our history store + * mechanism, we can move the read timestamp forward so we need to keep track of the original + * read timestamp to know what history should be pinned in front of oldest. + */ + wt_timestamp_t pinned_read_timestamp; + + TAILQ_ENTRY(__wt_txn_shared) read_timestampq; + TAILQ_ENTRY(__wt_txn_shared) durable_timestampq; + /* Set if need to clear from the durable queue */ + + volatile uint8_t is_allocating; + uint8_t clear_durable_q; + uint8_t clear_read_q; /* Set if need to clear from the read queue */ WT_CACHE_LINE_PAD_END }; @@ -144,12 +164,12 @@ struct __wt_txn_global { /* List of transactions sorted by durable timestamp. */ WT_RWLOCK durable_timestamp_rwlock; - TAILQ_HEAD(__wt_txn_dts_qh, __wt_txn) durable_timestamph; + TAILQ_HEAD(__wt_txn_dts_qh, __wt_txn_shared) durable_timestamph; uint32_t durable_timestampq_len; /* List of transactions sorted by read timestamp. */ WT_RWLOCK read_timestamp_rwlock; - TAILQ_HEAD(__wt_txn_rts_qh, __wt_txn) read_timestamph; + TAILQ_HEAD(__wt_txn_rts_qh, __wt_txn_shared) read_timestamph; uint32_t read_timestampq_len; /* @@ -163,14 +183,14 @@ struct __wt_txn_global { */ volatile bool checkpoint_running; /* Checkpoint running */ volatile uint32_t checkpoint_id; /* Checkpoint's session ID */ - WT_TXN_STATE checkpoint_state; /* Checkpoint's txn state */ + WT_TXN_SHARED checkpoint_txn_shared; /* Checkpoint's txn shared state */ wt_timestamp_t checkpoint_timestamp; /* Checkpoint's timestamp */ volatile uint64_t debug_ops; /* Debug mode op counter */ uint64_t debug_rollback; /* Debug mode rollback */ volatile uint64_t metadata_pinned; /* Oldest ID for metadata */ - WT_TXN_STATE *states; /* Per-session transaction states */ + WT_TXN_SHARED *txn_shared_list; /* Per-session shared transaction states */ }; typedef enum __wt_txn_isolation { @@ -288,12 +308,6 @@ struct __wt_txn { /* Read updates committed as of this timestamp. */ wt_timestamp_t read_timestamp; - TAILQ_ENTRY(__wt_txn) durable_timestampq; - TAILQ_ENTRY(__wt_txn) read_timestampq; - /* Set if need to clear from the durable queue */ - bool clear_durable_q; - bool clear_read_q; /* Set if need to clear from the read queue */ - /* Array of modifications by this transaction. */ WT_TXN_OP *mod; size_t mod_alloc; @@ -322,7 +336,7 @@ struct __wt_txn { * WT_TXN_HAS_TS_DURABLE -- * The transaction has an explicitly set durable timestamp (that is, it * hasn't been mirrored from its commit timestamp value). - * WT_TXN_TS_PUBLISHED -- + * WT_TXN_SHARED_TS_DURABLE -- * The transaction has been published to the durable queue. Setting this * flag lets us know that, on release, we need to mark the transaction for * clearing. @@ -339,20 +353,26 @@ struct __wt_txn { #define WT_TXN_HAS_TS_READ 0x000080u #define WT_TXN_IGNORE_PREPARE 0x000100u #define WT_TXN_PREPARE 0x000200u -#define WT_TXN_PUBLIC_TS_READ 0x000400u -#define WT_TXN_READONLY 0x000800u -#define WT_TXN_RUNNING 0x001000u -#define WT_TXN_SYNC_SET 0x002000u -#define WT_TXN_TS_COMMIT_ALWAYS 0x004000u -#define WT_TXN_TS_COMMIT_KEYS 0x008000u -#define WT_TXN_TS_COMMIT_NEVER 0x010000u -#define WT_TXN_TS_DURABLE_ALWAYS 0x020000u -#define WT_TXN_TS_DURABLE_KEYS 0x040000u -#define WT_TXN_TS_DURABLE_NEVER 0x080000u -#define WT_TXN_TS_PUBLISHED 0x100000u +#define WT_TXN_READONLY 0x000400u +#define WT_TXN_RUNNING 0x000800u +#define WT_TXN_SHARED_TS_DURABLE 0x001000u +#define WT_TXN_SHARED_TS_READ 0x002000u +#define WT_TXN_SYNC_SET 0x004000u +#define WT_TXN_TS_COMMIT_ALWAYS 0x008000u +#define WT_TXN_TS_COMMIT_KEYS 0x010000u +#define WT_TXN_TS_COMMIT_NEVER 0x020000u +#define WT_TXN_TS_DURABLE_ALWAYS 0x040000u +#define WT_TXN_TS_DURABLE_KEYS 0x080000u +#define WT_TXN_TS_DURABLE_NEVER 0x100000u #define WT_TXN_TS_ROUND_PREPARED 0x200000u #define WT_TXN_TS_ROUND_READ 0x400000u #define WT_TXN_UPDATE 0x800000u /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint32_t flags; + + /* + * Zero or more bytes of value (the payload) immediately follows the WT_UPDATE structure. We use + * a C99 flexible array member which has the semantics we want. + */ + uint64_t __snapshot[]; }; diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 9d154b892ca..574eece2e5f 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -13,7 +13,7 @@ static inline int __wt_txn_context_prepare_check(WT_SESSION_IMPL *session) { - if (F_ISSET(&session->txn, WT_TXN_PREPARE)) + if (F_ISSET(session->txn, WT_TXN_PREPARE)) WT_RET_MSG(session, EINVAL, "not permitted in a prepared transaction"); return (0); } @@ -25,9 +25,9 @@ __wt_txn_context_prepare_check(WT_SESSION_IMPL *session) static inline int __wt_txn_context_check(WT_SESSION_IMPL *session, bool requires_txn) { - if (requires_txn && !F_ISSET(&session->txn, WT_TXN_RUNNING)) + if (requires_txn && !F_ISSET(session->txn, WT_TXN_RUNNING)) WT_RET_MSG(session, EINVAL, "only permitted in a running transaction"); - if (!requires_txn && F_ISSET(&session->txn, WT_TXN_RUNNING)) + if (!requires_txn && F_ISSET(session->txn, WT_TXN_RUNNING)) WT_RET_MSG(session, EINVAL, "not permitted in a running transaction"); return (0); } @@ -41,7 +41,7 @@ __wt_txn_err_set(WT_SESSION_IMPL *session, int ret) { WT_TXN *txn; - txn = &session->txn; + txn = session->txn; /* Ignore standard errors that don't fail the transaction. */ if (ret == WT_NOTFOUND || ret == WT_DUPLICATE_KEY || ret == WT_PREPARE_CONFLICT) @@ -78,17 +78,17 @@ __wt_txn_timestamp_flags(WT_SESSION_IMPL *session) if (btree == NULL) return; if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_ALWAYS)) - F_SET(&session->txn, WT_TXN_TS_COMMIT_ALWAYS); + F_SET(session->txn, WT_TXN_TS_COMMIT_ALWAYS); if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_KEYS)) - F_SET(&session->txn, WT_TXN_TS_COMMIT_KEYS); + F_SET(session->txn, WT_TXN_TS_COMMIT_KEYS); if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_NEVER)) - F_SET(&session->txn, WT_TXN_TS_COMMIT_NEVER); + F_SET(session->txn, WT_TXN_TS_COMMIT_NEVER); if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_ALWAYS)) - F_SET(&session->txn, WT_TXN_TS_DURABLE_ALWAYS); + F_SET(session->txn, WT_TXN_TS_DURABLE_ALWAYS); if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_KEYS)) - F_SET(&session->txn, WT_TXN_TS_DURABLE_KEYS); + F_SET(session->txn, WT_TXN_TS_DURABLE_KEYS); if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_NEVER)) - F_SET(&session->txn, WT_TXN_TS_DURABLE_NEVER); + F_SET(session->txn, WT_TXN_TS_DURABLE_NEVER); } /* @@ -101,7 +101,7 @@ __wt_txn_op_set_recno(WT_SESSION_IMPL *session, uint64_t recno) WT_TXN *txn; WT_TXN_OP *op; - txn = &session->txn; + txn = session->txn; WT_ASSERT(session, txn->mod_count > 0 && recno != WT_RECNO_OOB); op = txn->mod + txn->mod_count - 1; @@ -132,7 +132,7 @@ __wt_txn_op_set_key(WT_SESSION_IMPL *session, const WT_ITEM *key) WT_TXN *txn; WT_TXN_OP *op; - txn = &session->txn; + txn = session->txn; WT_ASSERT(session, txn->mod_count > 0 && key->data != NULL); @@ -163,7 +163,7 @@ __txn_resolve_prepared_update(WT_SESSION_IMPL *session, WT_UPDATE *upd) { WT_TXN *txn; - txn = &session->txn; + txn = session->txn; /* * In case of a prepared transaction, the order of modification of the prepare timestamp to * commit timestamp in the update chain will not affect the data visibility, a reader will @@ -190,7 +190,7 @@ __txn_next_op(WT_SESSION_IMPL *session, WT_TXN_OP **opp) *opp = NULL; - txn = &session->txn; + txn = session->txn; /* * We're about to perform an update. Make sure we have allocated a transaction ID. @@ -219,7 +219,7 @@ __wt_txn_unmodify(WT_SESSION_IMPL *session) WT_TXN *txn; WT_TXN_OP *op; - txn = &session->txn; + txn = session->txn; if (F_ISSET(txn, WT_TXN_HAS_ID)) { WT_ASSERT(session, txn->mod_count > 0); --txn->mod_count; @@ -241,7 +241,7 @@ __wt_txn_op_apply_prepare_state(WT_SESSION_IMPL *session, WT_REF *ref, bool comm wt_timestamp_t ts; uint8_t prepare_state, previous_state; - txn = &session->txn; + txn = session->txn; /* * Lock the ref to ensure we don't race with eviction freeing the page deleted update list or @@ -285,7 +285,7 @@ __wt_txn_op_delete_commit_apply_timestamps(WT_SESSION_IMPL *session, WT_REF *ref WT_UPDATE **updp; uint8_t previous_state; - txn = &session->txn; + txn = session->txn; /* * Lock the ref to ensure we don't race with eviction freeing the page deleted update list or @@ -314,7 +314,7 @@ __wt_txn_op_set_timestamp(WT_SESSION_IMPL *session, WT_TXN_OP *op) WT_UPDATE *upd; wt_timestamp_t *timestamp; - txn = &session->txn; + txn = session->txn; /* * Updates in the metadata never get timestamps (either now or at commit): metadata cannot be @@ -366,7 +366,7 @@ __wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_TXN *txn; WT_TXN_OP *op; - txn = &session->txn; + txn = session->txn; if (F_ISSET(txn, WT_TXN_READONLY)) { if (F_ISSET(txn, WT_TXN_IGNORE_PREPARE)) @@ -393,7 +393,7 @@ __wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd) /* History store bypasses transactions, transaction modify should never be called on it. */ WT_ASSERT(session, !WT_IS_HS(S2BT(session))); - upd->txnid = session->txn.id; + upd->txnid = session->txn->id; __wt_txn_op_set_timestamp(session, op); return (0); @@ -410,7 +410,7 @@ __wt_txn_modify_page_delete(WT_SESSION_IMPL *session, WT_REF *ref) WT_TXN *txn; WT_TXN_OP *op; - txn = &session->txn; + txn = session->txn; WT_RET(__txn_next_op(session, &op)); op->type = WT_TXN_OP_REF_DELETE; @@ -472,7 +472,7 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) * If there is no active checkpoint or this handle is up to date with the active checkpoint then * it's safe to ignore the checkpoint ID in the visibility check. */ - checkpoint_pinned = txn_global->checkpoint_state.pinned_id; + checkpoint_pinned = txn_global->checkpoint_txn_shared.pinned_id; if (checkpoint_pinned == WT_TXN_NONE || WT_TXNID_LT(oldest_id, checkpoint_pinned)) return (oldest_id); @@ -593,7 +593,7 @@ __txn_visible_id(WT_SESSION_IMPL *session, uint64_t id) WT_TXN *txn; bool found; - txn = &session->txn; + txn = session->txn; /* Changes with no associated transaction are always visible. */ if (id == WT_TXN_NONE) @@ -642,13 +642,13 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp { WT_TXN *txn; - txn = &session->txn; + txn = session->txn; if (!__txn_visible_id(session, id)) return (false); /* Transactions read their writes, regardless of timestamps. */ - if (F_ISSET(&session->txn, WT_TXN_HAS_ID) && id == session->txn.id) + if (F_ISSET(session->txn, WT_TXN_HAS_ID) && id == session->txn->id) return (true); /* Timestamp check. */ @@ -694,7 +694,7 @@ __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd) /* Ignore the prepared update, if transaction configuration says so. */ if (prepare_state == WT_PREPARE_INPROGRESS) return ( - F_ISSET(&session->txn, WT_TXN_IGNORE_PREPARE) ? WT_VISIBLE_FALSE : WT_VISIBLE_PREPARE); + F_ISSET(session->txn, WT_TXN_IGNORE_PREPARE) ? WT_VISIBLE_FALSE : WT_VISIBLE_PREPARE); return (WT_VISIBLE_TRUE); } @@ -876,7 +876,7 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]) { WT_TXN *txn; - txn = &session->txn; + txn = session->txn; txn->isolation = session->isolation; txn->txn_logsync = S2C(session)->txn_logsync; @@ -916,7 +916,7 @@ __wt_txn_autocommit_check(WT_SESSION_IMPL *session) { WT_TXN *txn; - txn = &session->txn; + txn = session->txn; if (F_ISSET(txn, WT_TXN_AUTOCOMMIT)) { F_CLR(txn, WT_TXN_AUTOCOMMIT); return (__wt_txn_begin(session, NULL)); @@ -933,10 +933,10 @@ static inline int __wt_txn_idle_cache_check(WT_SESSION_IMPL *session) { WT_TXN *txn; - WT_TXN_STATE *txn_state; + WT_TXN_SHARED *txn_shared; - txn = &session->txn; - txn_state = WT_SESSION_TXN_STATE(session); + txn = session->txn; + txn_shared = WT_SESSION_TXN_SHARED(session); /* * Check the published snap_min because read-uncommitted never sets WT_TXN_HAS_SNAPSHOT. We @@ -945,7 +945,7 @@ __wt_txn_idle_cache_check(WT_SESSION_IMPL *session) * necessary. */ if (F_ISSET(txn, WT_TXN_RUNNING) && !F_ISSET(txn, WT_TXN_HAS_ID) && - txn_state->pinned_id == WT_TXN_NONE) + txn_shared->pinned_id == WT_TXN_NONE) WT_RET(__wt_cache_eviction_check(session, false, true, NULL)); return (0); @@ -959,11 +959,11 @@ static inline uint64_t __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish) { WT_TXN_GLOBAL *txn_global; - WT_TXN_STATE *txn_state; + WT_TXN_SHARED *txn_shared; uint64_t id; txn_global = &S2C(session)->txn_global; - txn_state = WT_SESSION_TXN_STATE(session); + txn_shared = WT_SESSION_TXN_SHARED(session); /* * Allocating transaction IDs involves several steps. @@ -985,12 +985,12 @@ __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish) * well defined, we must use an atomic increment here. */ if (publish) { - WT_PUBLISH(txn_state->is_allocating, true); - WT_PUBLISH(txn_state->id, txn_global->current); + WT_PUBLISH(txn_shared->is_allocating, true); + WT_PUBLISH(txn_shared->id, txn_global->current); id = __wt_atomic_addv64(&txn_global->current, 1) - 1; - session->txn.id = id; - WT_PUBLISH(txn_state->id, id); - WT_PUBLISH(txn_state->is_allocating, false); + session->txn->id = id; + WT_PUBLISH(txn_shared->id, id); + WT_PUBLISH(txn_shared->is_allocating, false); } else id = __wt_atomic_addv64(&txn_global->current, 1) - 1; @@ -1006,7 +1006,7 @@ __wt_txn_id_check(WT_SESSION_IMPL *session) { WT_TXN *txn; - txn = &session->txn; + txn = session->txn; WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING)); @@ -1038,20 +1038,21 @@ __wt_txn_search_check(WT_SESSION_IMPL *session) WT_BTREE *btree; WT_TXN *txn; - txn = &session->txn; btree = S2BT(session); + txn = session->txn; + /* * If the user says a table should always use a read timestamp, verify this transaction has one. * Same if it should never have a read timestamp. */ if (!F_ISSET(S2C(session), WT_CONN_RECOVERING) && FLD_ISSET(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS) && - !F_ISSET(txn, WT_TXN_PUBLIC_TS_READ)) + !F_ISSET(txn, WT_TXN_SHARED_TS_READ)) WT_RET_MSG(session, EINVAL, "read_timestamp required and " "none set on this transaction"); if (FLD_ISSET(btree->assert_flags, WT_ASSERT_READ_TS_NEVER) && - F_ISSET(txn, WT_TXN_PUBLIC_TS_READ)) + F_ISSET(txn, WT_TXN_SHARED_TS_READ)) WT_RET_MSG(session, EINVAL, "no read_timestamp required and " "timestamp set on this transaction"); @@ -1072,7 +1073,7 @@ __wt_txn_update_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE bool ignore_prepare_set, rollback; rollback = false; - txn = &session->txn; + txn = session->txn; txn_global = &S2C(session)->txn_global; if (txn->isolation != WT_ISO_SNAPSHOT) @@ -1130,7 +1131,7 @@ __wt_txn_read_last(WT_SESSION_IMPL *session) { WT_TXN *txn; - txn = &session->txn; + txn = session->txn; /* * Release the snap_min ID we put in the global table. @@ -1152,11 +1153,11 @@ __wt_txn_cursor_op(WT_SESSION_IMPL *session) { WT_TXN *txn; WT_TXN_GLOBAL *txn_global; - WT_TXN_STATE *txn_state; + WT_TXN_SHARED *txn_shared; - txn = &session->txn; + txn = session->txn; txn_global = &S2C(session)->txn_global; - txn_state = WT_SESSION_TXN_STATE(session); + txn_shared = WT_SESSION_TXN_SHARED(session); /* * We are about to read data, which means we need to protect against @@ -1176,10 +1177,10 @@ __wt_txn_cursor_op(WT_SESSION_IMPL *session) * positioned on a value, it can't be freed. */ if (txn->isolation == WT_ISO_READ_UNCOMMITTED) { - if (txn_state->pinned_id == WT_TXN_NONE) - txn_state->pinned_id = txn_global->last_running; - if (txn_state->metadata_pinned == WT_TXN_NONE) - txn_state->metadata_pinned = txn_state->pinned_id; + if (txn_shared->pinned_id == WT_TXN_NONE) + txn_shared->pinned_id = txn_global->last_running; + if (txn_shared->metadata_pinned == WT_TXN_NONE) + txn_shared->metadata_pinned = txn_shared->pinned_id; } else if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT)) __wt_txn_get_snapshot(session); } diff --git a/src/third_party/wiredtiger/src/include/verify_build.h b/src/third_party/wiredtiger/src/include/verify_build.h index 23f1d16df95..5f131b7ab0c 100644 --- a/src/third_party/wiredtiger/src/include/verify_build.h +++ b/src/third_party/wiredtiger/src/include/verify_build.h @@ -65,7 +65,7 @@ __wt_verify_build(void) WT_STATIC_ASSERT( \ sizeof(s) > WT_CACHE_LINE_ALIGNMENT || sizeof(s) % WT_CACHE_LINE_ALIGNMENT == 0) WT_PADDING_CHECK(WT_LOGSLOT); - WT_PADDING_CHECK(WT_TXN_STATE); + WT_PADDING_CHECK(WT_TXN_SHARED); /* * The btree code encodes key/value pairs in size_t's, and requires at least 8B size_t's. diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h index 31b8b740ed9..204e6fd0eb9 100644 --- a/src/third_party/wiredtiger/src/include/wt_internal.h +++ b/src/third_party/wiredtiger/src/include/wt_internal.h @@ -325,8 +325,8 @@ struct __wt_txn_op; typedef struct __wt_txn_op WT_TXN_OP; struct __wt_txn_printlog_args; typedef struct __wt_txn_printlog_args WT_TXN_PRINTLOG_ARGS; -struct __wt_txn_state; -typedef struct __wt_txn_state WT_TXN_STATE; +struct __wt_txn_shared; +typedef struct __wt_txn_shared WT_TXN_SHARED; struct __wt_update; typedef struct __wt_update WT_UPDATE; union __wt_lsn; |