From 835bfb21d8e67663d84a40aa4f7370a4403725a9 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Tue, 1 Aug 2017 16:42:49 +1000 Subject: Import wiredtiger: 2e9744d11a65c63ba7445060dc78371250f04051 from branch mongodb-3.6 ref: 6173a98979..2e9744d11a for: 3.5.11 WT-2309 Add yields and/or sleeps in #DIAGNOSTIC mode WT-3047 Add mode aimed at uncovering race conditions in split code WT-3308 Add statistics tracking around yield loops WT-3316 Add new engineering section to reference guide documentation WT-3338 Optimize cursor modify WT-3380 Special case 8-byte timestamps WT-3387 Add support for a stable timestamp WT-3389 Restructure split code to hold a split generation for the entire operation. WT-3406 Reconciliation is choosing reserved records for writing. WT-3410 Add developer documentation for table rename WT-3412 Add backoff logic to the btree delete and walk yield loops WT-3418 block manager object race WT-3422 WiredTiger upgrading documents out of date WT-3432 workgen needs braces around an "if" body WT-3433 session->alter method should not be supported in read-only mode WT-3439 lint/cleanup WT-3440 Add a log record when starting a checkpoint WT-3442 Coverity 1378213: false positive on diagnostic assignment. WT-3446 Temporarily disable timestamp testing in test/checkpoint WT-3447 test_stat_log02 can assert before table stats are printed WT-3461 Avoid long sleeps when the system clock is adjusted WT-3463 Add recovery of backup to test_timestamp03.py WT-3466 Track the first commit timestamp for each transaction WT-3467 Minor lint/cleanup --- src/third_party/wiredtiger/src/include/bitstring.i | 3 - src/third_party/wiredtiger/src/include/btmem.h | 67 ++++++----- src/third_party/wiredtiger/src/include/btree.i | 25 ++++- src/third_party/wiredtiger/src/include/buf.i | 24 ++-- src/third_party/wiredtiger/src/include/cell.i | 9 +- src/third_party/wiredtiger/src/include/cursor.i | 7 +- src/third_party/wiredtiger/src/include/dhandle.h | 6 +- src/third_party/wiredtiger/src/include/extern.h | 31 +++++- .../wiredtiger/src/include/extern_posix.h | 2 +- .../wiredtiger/src/include/extern_win.h | 2 +- src/third_party/wiredtiger/src/include/flags.h | 2 + src/third_party/wiredtiger/src/include/misc.h | 22 ++-- src/third_party/wiredtiger/src/include/misc.i | 39 ------- src/third_party/wiredtiger/src/include/serial.i | 15 +-- src/third_party/wiredtiger/src/include/stat.h | 8 ++ src/third_party/wiredtiger/src/include/txn.h | 29 ++++- src/third_party/wiredtiger/src/include/txn.i | 100 ++++++++++++----- .../wiredtiger/src/include/verify_build.h | 10 +- .../wiredtiger/src/include/wiredtiger.in | 123 +++++++++++++++------ .../wiredtiger/src/include/wt_internal.h | 2 + 20 files changed, 340 insertions(+), 186 deletions(-) (limited to 'src/third_party/wiredtiger/src/include') diff --git a/src/third_party/wiredtiger/src/include/bitstring.i b/src/third_party/wiredtiger/src/include/bitstring.i index a9ec91d49ff..bd14fa613a8 100644 --- a/src/third_party/wiredtiger/src/include/bitstring.i +++ b/src/third_party/wiredtiger/src/include/bitstring.i @@ -166,8 +166,6 @@ __bit_ffc(uint8_t *bitf, uint64_t nbits, uint64_t *retp) uint8_t lb; uint64_t byte, stopbyte, value; - value = 0; /* -Wuninitialized */ - if (nbits == 0) return (-1); @@ -199,7 +197,6 @@ __bit_ffs(uint8_t *bitf, uint64_t nbits, uint64_t *retp) uint8_t lb; uint64_t byte, stopbyte, value; - value = 0; if (nbits == 0) return (-1); diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index e8d3307b013..01a9179aedc 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -263,17 +263,17 @@ struct __wt_page_modify { void *disk_image; /* - * List of unresolved updates. Updates are either a WT_INSERT - * or a row-store leaf page entry; when creating lookaside - * records, there is an additional value, the committed item's - * transaction ID. + * List of unresolved updates. Updates are either a row-store + * insert or update list, or column-store insert list. When + * creating lookaside records, there is an additional value, + * the committed item's transaction information. * * If there are unresolved updates, the block wasn't written and * there will always be a disk image. */ struct __wt_save_upd { - WT_INSERT *ins; - WT_ROW *rip; + WT_INSERT *ins; /* Insert list reference */ + WT_ROW *ripcip; /* Original on-page reference */ uint64_t onpage_txn; WT_DECL_TIMESTAMP(onpage_timestamp) } *supd; @@ -695,7 +695,7 @@ struct __wt_page { * Related information for fast-delete, on-disk pages. */ struct __wt_page_deleted { - uint64_t txnid; /* Transaction ID */ + volatile uint64_t txnid; /* Transaction ID */ WT_DECL_TIMESTAMP(timestamp) WT_UPDATE **update_list; /* List of updates for abort */ @@ -885,42 +885,59 @@ struct __wt_ikey { * is done for an entry, WT_UPDATE structures are formed into a forward-linked * list. */ -WT_PACKED_STRUCT_BEGIN(__wt_update) - uint64_t txnid; /* transaction */ - WT_DECL_TIMESTAMP(timestamp) +struct __wt_update { + volatile uint64_t txnid; /* transaction ID */ +#if WT_TIMESTAMP_SIZE == 8 + WT_DECL_TIMESTAMP(timestamp) /* aligned uint64_t timestamp */ +#endif WT_UPDATE *next; /* forward-linked list */ uint32_t size; /* data length */ -#define WT_UPDATE_STANDARD 0 -#define WT_UPDATE_DELETED 1 -#define WT_UPDATE_RESERVED 2 +#define WT_UPDATE_DELETED 0 /* deleted */ +#define WT_UPDATE_MODIFIED 1 /* partial-update modify value */ +#define WT_UPDATE_RESERVED 2 /* reserved */ +#define WT_UPDATE_STANDARD 3 /* complete value */ uint8_t type; /* type (one byte to conserve memory) */ - /* The update includes a complete value. */ + /* If the update includes a complete value. */ #define WT_UPDATE_DATA_VALUE(upd) \ ((upd)->type == WT_UPDATE_STANDARD || (upd)->type == WT_UPDATE_DELETED) - /* The untyped value immediately follows the WT_UPDATE structure. */ -#define WT_UPDATE_DATA(upd) \ - ((void *)((uint8_t *)(upd) + sizeof(WT_UPDATE))) +#if WT_TIMESTAMP_SIZE != 8 + WT_DECL_TIMESTAMP(timestamp) /* unaligned uint8_t array timestamp */ +#endif /* - * The memory size of an update: include some padding because this is - * such a common case that overhead of tiny allocations can swamp our - * cache overhead calculation. + * Zero or more bytes of value (the payload) immediately follows the + * WT_UPDATE structure. We use a C99 flexible array member which has + * the semantics we want. */ -#define WT_UPDATE_MEMSIZE(upd) \ - WT_ALIGN(sizeof(WT_UPDATE) + (upd)->size, 32) -WT_PACKED_STRUCT_END + uint8_t data[]; /* start of the data */ +}; /* - * WT_UPDATE_SIZE is the expected structure size -- we verify the build to - * ensure the compiler hasn't inserted padding. + * WT_UPDATE_SIZE is the expected structure size excluding the payload data -- + * we verify the build to ensure the compiler hasn't inserted padding. */ #define WT_UPDATE_SIZE (21 + WT_TIMESTAMP_SIZE) +/* + * The memory size of an update: include some padding because this is such a + * common case that overhead of tiny allocations can swamp our cache overhead + * calculation. + */ +#define WT_UPDATE_MEMSIZE(upd) \ + WT_ALIGN(WT_UPDATE_SIZE + (upd)->size, 32) + +/* + * WT_MAX_MODIFY_UPDATE -- + * Limit update chains to a small value to avoid penalizing reads and + * permit truncation. + */ +#define WT_MAX_MODIFY_UPDATE 100 + /* * WT_INSERT -- * diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 305de509424..216c99b1d9e 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -1341,8 +1341,8 @@ __wt_page_can_evict( * If the page is clean but has modifications that appear too new to * evict, skip it. */ - if (!modified && !__wt_txn_visible_all( - session, mod->rec_max_txn, WT_TIMESTAMP(mod->rec_max_timestamp))) + if (!modified && !__wt_txn_visible_all(session, + mod->rec_max_txn, WT_TIMESTAMP_NULL(&mod->rec_max_timestamp))) return (false); return (true); @@ -1602,3 +1602,24 @@ __wt_split_descent_race( WT_INTL_INDEX_GET(session, ref->home, pindex); return (pindex != saved_pindex); } + +/* + * __wt_ref_state_yield_sleep -- + * sleep while waiting for the wt_ref state after THOUSAND yields. + */ +static inline void +__wt_ref_state_yield_sleep(uint64_t *yield_count, uint64_t *sleep_count) +{ + /* + * We yield before retrying, and if we've yielded enough times, start + * sleeping so we don't burn CPU to no purpose. + */ + if ((*yield_count) < WT_THOUSAND) { + (*yield_count)++; + __wt_yield(); + return; + } + + (*sleep_count) = WT_MIN((*sleep_count) + WT_THOUSAND, 10 * WT_THOUSAND); + __wt_sleep(0, (*sleep_count)); +} diff --git a/src/third_party/wiredtiger/src/include/buf.i b/src/third_party/wiredtiger/src/include/buf.i index 17f67afefce..8ff52f86ced 100644 --- a/src/third_party/wiredtiger/src/include/buf.i +++ b/src/third_party/wiredtiger/src/include/buf.i @@ -116,18 +116,18 @@ __wt_scr_free(WT_SESSION_IMPL *session, WT_ITEM **bufp) { WT_ITEM *buf; - if ((buf = *bufp) != NULL) { - *bufp = NULL; + if ((buf = *bufp) == NULL) + return; + *bufp = NULL; - if (session->scratch_cached + buf->memsize >= - S2C(session)->session_scratch_max) { - __wt_free(session, buf->mem); - buf->memsize = 0; - } else - session->scratch_cached += buf->memsize; + if (session->scratch_cached + buf->memsize >= + S2C(session)->session_scratch_max) { + __wt_free(session, buf->mem); + buf->memsize = 0; + } else + session->scratch_cached += buf->memsize; - buf->data = NULL; - buf->size = 0; - F_CLR(buf, WT_ITEM_INUSE); - } + buf->data = NULL; + buf->size = 0; + F_CLR(buf, WT_ITEM_INUSE); } diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i index 0dbf29d21c3..52e9f3c9637 100644 --- a/src/third_party/wiredtiger/src/include/cell.i +++ b/src/third_party/wiredtiger/src/include/cell.i @@ -730,6 +730,7 @@ __cell_data_ref(WT_SESSION_IMPL *session, { WT_BTREE *btree; void *huffman; + bool decoded; btree = S2BT(session); @@ -749,14 +750,16 @@ __cell_data_ref(WT_SESSION_IMPL *session, huffman = btree->huffman_value; break; case WT_CELL_KEY_OVFL: - WT_RET(__wt_ovfl_read(session, page, unpack, store)); - if (page_type == WT_PAGE_ROW_INT) + WT_RET(__wt_ovfl_read(session, page, unpack, store, &decoded)); + if (page_type == WT_PAGE_ROW_INT || decoded) return (0); huffman = btree->huffman_key; break; case WT_CELL_VALUE_OVFL: - WT_RET(__wt_ovfl_read(session, page, unpack, store)); + WT_RET(__wt_ovfl_read(session, page, unpack, store, &decoded)); + if (decoded) + return (0); huffman = btree->huffman_value; break; WT_ILLEGAL_VALUE(session); diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i index 75fd935fc91..e49a9258329 100644 --- a/src/third_party/wiredtiger/src/include/cursor.i +++ b/src/third_party/wiredtiger/src/include/cursor.i @@ -441,11 +441,8 @@ value: * caller passes us the update: it has already resolved which one * (if any) is visible. */ - if (upd != NULL) { - vb->data = WT_UPDATE_DATA(upd); - vb->size = upd->size; - return (0); - } + if (upd != NULL) + return (__wt_value_return(session, cbt, upd)); /* Else, simple values have their location encoded in the WT_ROW. */ if (__wt_row_leaf_value(page, rip, vb)) diff --git a/src/third_party/wiredtiger/src/include/dhandle.h b/src/third_party/wiredtiger/src/include/dhandle.h index 0db59d45691..32574f05fa1 100644 --- a/src/third_party/wiredtiger/src/include/dhandle.h +++ b/src/third_party/wiredtiger/src/include/dhandle.h @@ -99,9 +99,9 @@ struct __wt_data_handle { /* Flags values over 0xff are reserved for WT_BTREE_* */ #define WT_DHANDLE_DEAD 0x01 /* Dead, awaiting discard */ -#define WT_DHANDLE_DISCARD 0x02 /* Discard on release */ -#define WT_DHANDLE_DISCARD_FORCE 0x04 /* Force discard on release */ -#define WT_DHANDLE_EXCLUSIVE 0x08 /* Need exclusive access */ +#define WT_DHANDLE_DISCARD 0x02 /* Close on release */ +#define WT_DHANDLE_DISCARD_KILL 0x04 /* Mark dead on release */ +#define WT_DHANDLE_EXCLUSIVE 0x08 /* Exclusive access */ #define WT_DHANDLE_IS_METADATA 0x10 /* Metadata handle */ #define WT_DHANDLE_LOCK_ONLY 0x20 /* Handle only used as a lock */ #define WT_DHANDLE_OPEN 0x40 /* Handle is open */ diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 8b48fd587bd..7c9806788bb 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -105,6 +105,7 @@ extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_GCC_FUNC extern int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_btcur_reserve(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -147,7 +148,7 @@ extern const char *__wt_page_type_string(u_int type) WT_GCC_FUNC_DECL_ATTRIBUTE( extern const char *__wt_cell_type_string(uint8_t type); extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf); extern const char *__wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_ITEM *buf); -extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store, bool *decoded) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd_list, WT_CELL_UNPACK *unpack) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -164,7 +165,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags ); extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_value_return( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -258,11 +259,11 @@ extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_ extern void __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize); extern int __wt_conn_dhandle_alloc( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_conn_dhandle_find( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_btree_sync_and_close( WT_SESSION_IMPL *session, bool final, bool mark_dead) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_conn_btree_open( WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *uri, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session, bool final, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *uri, bool mark_dead) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session, bool final, bool mark_dead) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_connection_destroy(WT_CONNECTION_IMPL *conn); @@ -362,6 +363,7 @@ extern int __wt_verbose_dump_cache(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_AT extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session); +extern int __wt_log_printf(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn); extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn); @@ -388,6 +390,9 @@ extern int __wt_logrec_alloc(WT_SESSION_IMPL *session, size_t size, WT_ITEM **lo extern void __wt_logrec_free(WT_SESSION_IMPL *session, WT_ITEM **logrecp); extern int __wt_logrec_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *rectypep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *optypep, uint32_t *opsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_modify_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_modify_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_modify_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_col_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_col_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_col_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -397,6 +402,9 @@ extern int __wt_logop_col_remove_print(WT_SESSION_IMPL *session, const uint8_t * extern int __wt_logop_col_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t start, uint64_t stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_col_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *startp, uint64_t *stopp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_col_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_modify_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_modify_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_modify_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_row_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_row_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_row_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -406,6 +414,12 @@ extern int __wt_logop_row_remove_print(WT_SESSION_IMPL *session, const uint8_t * extern int __wt_logop_row_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *start, WT_ITEM *stop, uint32_t mode) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_checkpoint_start_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_checkpoint_start_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_checkpoint_start_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_prev_lsn_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_LSN *prev_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_prev_lsn_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *prev_lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_prev_lsn_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -691,6 +705,9 @@ extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg); extern void __wt_print_huffman_code(void *huffman_arg, uint16_t symbol); extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_modify_pack(WT_SESSION_IMPL *session, WT_ITEM **modifyp, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_modify_apply_api( WT_SESSION_IMPL *session, WT_ITEM *value, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_modify_apply(WT_SESSION_IMPL *session, WT_ITEM *value, const void *modify) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK *l); extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -747,6 +764,8 @@ extern int __wt_thread_group_create( WT_SESSION_IMPL *session, WT_THREAD_GROUP * extern int __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_thread_group_start_one( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool is_locked); extern void __wt_thread_group_stop_one(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group); +extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_seconds(WT_SESSION_IMPL *session, time_t *timep); extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session); extern void __wt_txn_get_snapshot(WT_SESSION_IMPL *session); extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -786,7 +805,7 @@ extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[], bool *has_create, bool *has_drops) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session); extern int __wt_txn_recover(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name, uint8_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_global_query_timestamp( WT_SESSION_IMPL *session, char *hex_timestamp, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/third_party/wiredtiger/src/include/extern_posix.h b/src/third_party/wiredtiger/src/include/extern_posix.h index b6b5ac51f73..864a40aa325 100644 --- a/src/third_party/wiredtiger/src/include/extern_posix.h +++ b/src/third_party/wiredtiger/src/include/extern_posix.h @@ -28,5 +28,5 @@ extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, co extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp); extern void __wt_yield(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); diff --git a/src/third_party/wiredtiger/src/include/extern_win.h b/src/third_party/wiredtiger/src/include/extern_win.h index d548ee0b2ec..85db8175615 100644 --- a/src/third_party/wiredtiger/src/include/extern_win.h +++ b/src/third_party/wiredtiger/src/include/extern_win.h @@ -26,7 +26,7 @@ extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, co extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp); +extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp); extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char*utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t*wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern DWORD __wt_getlasterror(void); diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h index ef66a186fa4..4f7b59c7849 100644 --- a/src/third_party/wiredtiger/src/include/flags.h +++ b/src/third_party/wiredtiger/src/include/flags.h @@ -79,6 +79,8 @@ #define WT_STAT_TYPE_SIZE 0x00000040 #define WT_STAT_TYPE_TREE_WALK 0x00000080 #define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x00000001 +#define WT_TIMING_STRESS_INTERNAL_PAGE_SPLIT_RACE 0x00000002 +#define WT_TIMING_STRESS_PAGE_SPLIT_RACE 0x00000004 #define WT_TXN_LOG_CKPT_CLEANUP 0x00000001 #define WT_TXN_LOG_CKPT_PREPARE 0x00000002 #define WT_TXN_LOG_CKPT_START 0x00000004 diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h index 838086c2ced..bf7d36e19ca 100644 --- a/src/third_party/wiredtiger/src/include/misc.h +++ b/src/third_party/wiredtiger/src/include/misc.h @@ -251,22 +251,28 @@ /* Timestamp type and helper macros. */ #if WT_TIMESTAMP_SIZE > 0 -#define HAVE_TIMESTAMPS 1 +#define HAVE_TIMESTAMPS #else -#undef HAVE_TIMESTAMPS +#undef HAVE_TIMESTAMPS #endif #ifdef HAVE_TIMESTAMPS -#define WT_TIMESTAMP(x) (x) -typedef uint8_t wt_timestamp_t[WT_TIMESTAMP_SIZE]; -#define WT_DECL_TIMESTAMP(x) wt_timestamp_t x; +struct __wt_timestamp_t { +#if WT_TIMESTAMP_SIZE == 8 + uint64_t val; #else -#define WT_TIMESTAMP(x) (NULL) + uint8_t ts[WT_TIMESTAMP_SIZE]; +#endif +}; +typedef struct __wt_timestamp_t wt_timestamp_t; +#define WT_DECL_TIMESTAMP(x) wt_timestamp_t x; +#define WT_TIMESTAMP_NULL(x) (x) +#else +typedef void wt_timestamp_t; +#define WT_TIMESTAMP_NULL(x) (NULL) #define WT_DECL_TIMESTAMP(x) #endif -#define WT_GET_TIMESTAMP(x) WT_TIMESTAMP((x)->timestamp) - /* * In diagnostic mode we track the locations from which hazard pointers and * scratch buffers were acquired. diff --git a/src/third_party/wiredtiger/src/include/misc.i b/src/third_party/wiredtiger/src/include/misc.i index bb785a63072..dbb921f0946 100644 --- a/src/third_party/wiredtiger/src/include/misc.i +++ b/src/third_party/wiredtiger/src/include/misc.i @@ -40,45 +40,6 @@ __wt_strdup(WT_SESSION_IMPL *session, const char *str, void *retp) session, str, (str == NULL) ? 0 : strlen(str), retp)); } -/* - * __wt_seconds -- - * Return the seconds since the Epoch. - */ -static inline void -__wt_seconds(WT_SESSION_IMPL *session, time_t *timep) -{ - struct timespec t; - - __wt_epoch(session, &t); - - *timep = t.tv_sec; -} - -/* - * __wt_time_check_monotonic -- - * Check and prevent time running backward. If we detect that it has, we - * set the time structure to the previous values, making time stand still - * until we see a time in the future of the highest value seen so far. - */ -static inline void -__wt_time_check_monotonic(WT_SESSION_IMPL *session, struct timespec *tsp) -{ - /* - * Detect time going backward. If so, use the last - * saved timestamp. - */ - if (session == NULL) - return; - - if (tsp->tv_sec < session->last_epoch.tv_sec || - (tsp->tv_sec == session->last_epoch.tv_sec && - tsp->tv_nsec < session->last_epoch.tv_nsec)) { - WT_STAT_CONN_INCR(session, time_travel); - *tsp = session->last_epoch; - } else - session->last_epoch = *tsp; -} - /* * __wt_snprintf -- * snprintf convenience function, ignoring the returned size. diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i index 15d159192f9..d9c72cd2bad 100644 --- a/src/third_party/wiredtiger/src/include/serial.i +++ b/src/third_party/wiredtiger/src/include/serial.i @@ -259,8 +259,7 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, */ static inline int __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, - WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size, - bool exclusive) + WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size, bool exclusive) { WT_DECL_RET; WT_UPDATE *obsolete, *upd = *updp; @@ -290,19 +289,17 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, } /* - * Increment in-memory footprint after releasing the mutex: that's safe - * because the structures we added cannot be discarded while visible to - * any running transaction, and we're a running transaction, which means - * there can be no corresponding delete until we complete. + * Increment in-memory footprint after swapping the update into place. + * Safe because the structures we added cannot be discarded while + * visible to any running transaction, and we're a running transaction, + * which means there can be no corresponding delete until we complete. */ __wt_cache_page_inmem_incr(session, page, upd_size); /* Mark the page dirty after updating the footprint. */ __wt_page_modify_set(session, page); - /* - * If there are no subsequent WT_UPDATE structures we are done here. - */ + /* If there are no subsequent WT_UPDATE structures we are done here. */ if (upd->next == NULL || exclusive) return (0); diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index b340b278684..7ffc1b69c12 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -502,11 +502,19 @@ struct __wt_connection_stats { int64_t thread_write_active; int64_t application_evict_time; int64_t application_cache_time; + int64_t txn_release_blocked; + int64_t conn_close_blocked_lsm; + int64_t dhandle_lock_blocked; + int64_t page_index_slot_ref_blocked; + int64_t log_server_sync_blocked; int64_t page_busy_blocked; int64_t page_forcible_evict_blocked; int64_t page_locked_blocked; int64_t page_read_blocked; int64_t page_sleep; + int64_t page_del_rollback_blocked; + int64_t child_modify_blocked_page; + int64_t tree_descend_blocked; int64_t txn_snapshots_created; int64_t txn_snapshots_dropped; int64_t txn_begin; diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h index e4cc0b04046..61ab343151c 100644 --- a/src/third_party/wiredtiger/src/include/txn.h +++ b/src/third_party/wiredtiger/src/include/txn.h @@ -92,8 +92,13 @@ struct __wt_txn_global { WT_DECL_TIMESTAMP(commit_timestamp) WT_DECL_TIMESTAMP(oldest_timestamp) WT_DECL_TIMESTAMP(pinned_timestamp) - bool has_commit_timestamp, has_oldest_timestamp, has_pinned_timestamp; + WT_DECL_TIMESTAMP(stable_timestamp) + bool has_commit_timestamp; + bool has_oldest_timestamp; + bool has_pinned_timestamp; + bool has_stable_timestamp; bool oldest_is_pinned; + bool stable_is_pinned; WT_SPINLOCK id_lock; @@ -200,7 +205,21 @@ struct __wt_txn { uint32_t snapshot_count; uint32_t txn_logsync; /* Log sync configuration */ + /* + * Timestamp copied into updates created by this transaction. + * + * In some use cases, this can be updated while the transaction is + * running. + */ WT_DECL_TIMESTAMP(commit_timestamp) + + /* + * Set to the first commit timestamp used in the transaction and fixed + * while the transaction is on the public list of committed timestamps. + */ + WT_DECL_TIMESTAMP(first_commit_timestamp) + + /* Read updates committed as of this timestamp. */ WT_DECL_TIMESTAMP(read_timestamp) TAILQ_ENTRY(__wt_txn) commit_timestampq; @@ -230,8 +249,10 @@ struct __wt_txn { #define WT_TXN_HAS_TS_COMMIT 0x010 #define WT_TXN_HAS_TS_READ 0x020 #define WT_TXN_NAMED_SNAPSHOT 0x040 -#define WT_TXN_READONLY 0x080 -#define WT_TXN_RUNNING 0x100 -#define WT_TXN_SYNC_SET 0x200 +#define WT_TXN_PUBLIC_TS_COMMIT 0x080 +#define WT_TXN_PUBLIC_TS_READ 0x100 +#define WT_TXN_READONLY 0x200 +#define WT_TXN_RUNNING 0x400 +#define WT_TXN_SYNC_SET 0x800 uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 6de86eb0aaf..d693633fabe 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -10,14 +10,15 @@ static inline int __wt_txn_id_check(WT_SESSION_IMPL *session); static inline void __wt_txn_read_last(WT_SESSION_IMPL *session); #ifdef HAVE_TIMESTAMPS +#if WT_TIMESTAMP_SIZE == 8 /* * __wt_timestamp_cmp -- * Compare two timestamps. */ static inline int -__wt_timestamp_cmp(const uint8_t *ts1, const uint8_t *ts2) +__wt_timestamp_cmp(const wt_timestamp_t *ts1, const wt_timestamp_t *ts2) { - return (memcmp(ts1, ts2, WT_TIMESTAMP_SIZE)); + return (ts1->val == ts2->val ? 0 : (ts1->val > ts2->val ? 1 : -1)); } /* @@ -25,9 +26,9 @@ __wt_timestamp_cmp(const uint8_t *ts1, const uint8_t *ts2) * Set a timestamp. */ static inline void -__wt_timestamp_set(uint8_t *dest, const uint8_t *src) +__wt_timestamp_set(wt_timestamp_t *dest, const wt_timestamp_t *src) { - (void)memcpy(dest, src, WT_TIMESTAMP_SIZE); + dest->val = src->val; } /* @@ -35,11 +36,62 @@ __wt_timestamp_set(uint8_t *dest, const uint8_t *src) * Check if a timestamp is equal to the special "zero" time. */ static inline bool -__wt_timestamp_iszero(const uint8_t *ts) +__wt_timestamp_iszero(wt_timestamp_t *ts) +{ + return (ts->val == 0); +} + +/* + * __wt_timestamp_set_inf -- + * Set a timestamp to the maximum value. + */ +static inline void +__wt_timestamp_set_inf(wt_timestamp_t *ts) +{ + ts->val = UINT64_MAX; +} + +/* + * __wt_timestamp_set_zero -- + * Zero out a timestamp. + */ +static inline void +__wt_timestamp_set_zero(wt_timestamp_t *ts) +{ + ts->val = 0; +} +#else +/* + * __wt_timestamp_cmp -- + * Compare two timestamps. + */ +static inline int +__wt_timestamp_cmp(const wt_timestamp_t *ts1, const wt_timestamp_t *ts2) +{ + return (memcmp(ts1->ts, ts2->ts, WT_TIMESTAMP_SIZE)); +} + +/* + * __wt_timestamp_set -- + * Set a timestamp. + */ +static inline void +__wt_timestamp_set(wt_timestamp_t *dest, const wt_timestamp_t *src) +{ + (void)memcpy(dest->ts, src->ts, WT_TIMESTAMP_SIZE); +} + +/* + * __wt_timestamp_iszero -- + * Check if a timestamp is equal to the special "zero" time. + */ +static inline bool +__wt_timestamp_iszero(wt_timestamp_t *ts) { static const wt_timestamp_t zero_timestamp; - return (memcmp(ts, zero_timestamp, WT_TIMESTAMP_SIZE) == 0); + return (memcmp(ts->ts, + WT_TIMESTAMP_NULL(&zero_timestamp), WT_TIMESTAMP_SIZE) == 0); } /* @@ -47,9 +99,9 @@ __wt_timestamp_iszero(const uint8_t *ts) * Set a timestamp to the maximum value. */ static inline void -__wt_timestamp_set_inf(uint8_t *ts) +__wt_timestamp_set_inf(wt_timestamp_t *ts) { - memset(ts, 0xff, WT_TIMESTAMP_SIZE); + memset(ts->ts, 0xff, WT_TIMESTAMP_SIZE); } /* @@ -57,11 +109,12 @@ __wt_timestamp_set_inf(uint8_t *ts) * Zero out a timestamp. */ static inline void -__wt_timestamp_set_zero(uint8_t *ts) +__wt_timestamp_set_zero(wt_timestamp_t *ts) { - memset(ts, 0x00, WT_TIMESTAMP_SIZE); + memset(ts->ts, 0x00, WT_TIMESTAMP_SIZE); } -#endif +#endif /* WT_TIMESTAMP_SIZE == 8 */ +#endif /* HAVE_TIMESTAMPS */ /* * __txn_next_op -- @@ -130,7 +183,7 @@ __wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_TXN_OP_INMEM : WT_TXN_OP_BASIC; #ifdef HAVE_TIMESTAMPS if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) { - __wt_timestamp_set(upd->timestamp, txn->commit_timestamp); + __wt_timestamp_set(&upd->timestamp, &txn->commit_timestamp); if (!F_ISSET(session, WT_SESSION_LOGGING_INMEM)) op->type = WT_TXN_OP_BASIC_TS; } @@ -214,16 +267,6 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) return (checkpoint_pinned); } -/* - * __wt_txn_committed -- - * Return if a transaction has been committed. - */ -static inline bool -__wt_txn_committed(WT_SESSION_IMPL *session, uint64_t id) -{ - return (WT_TXNID_LT(id, S2C(session)->txn_global.last_running)); -} - /* * __txn_visible_all_id -- * Check if a given transaction ID is "globally visible". This is, if @@ -248,7 +291,7 @@ __txn_visible_all_id(WT_SESSION_IMPL *session, uint64_t id) */ static inline bool __wt_txn_visible_all( - WT_SESSION_IMPL *session, uint64_t id, const uint8_t *timestamp) + WT_SESSION_IMPL *session, uint64_t id, const wt_timestamp_t *timestamp) { if (!__txn_visible_all_id(session, id)) return (false); @@ -263,7 +306,7 @@ __wt_txn_visible_all( return (true); __wt_readlock(session, &txn_global->rwlock); - cmp = __wt_timestamp_cmp(timestamp, txn_global->pinned_timestamp); + cmp = __wt_timestamp_cmp(timestamp, &txn_global->pinned_timestamp); __wt_readunlock(session, &txn_global->rwlock); /* @@ -289,7 +332,7 @@ static inline bool __wt_txn_upd_visible_all(WT_SESSION_IMPL *session, WT_UPDATE *upd) { return (__wt_txn_visible_all( - session, upd->txnid, WT_GET_TIMESTAMP(upd))); + session, upd->txnid, WT_TIMESTAMP_NULL(&upd->timestamp))); } /* @@ -351,7 +394,7 @@ __txn_visible_id(WT_SESSION_IMPL *session, uint64_t id) */ static inline bool __wt_txn_visible( - WT_SESSION_IMPL *session, uint64_t id, const uint8_t *timestamp) + WT_SESSION_IMPL *session, uint64_t id, const wt_timestamp_t *timestamp) { if (!__txn_visible_id(session, id)) return (false); @@ -364,7 +407,7 @@ __wt_txn_visible( if (!F_ISSET(txn, WT_TXN_HAS_TS_READ) || timestamp == NULL) return (true); - return (memcmp(timestamp, txn->read_timestamp, WT_TIMESTAMP_SIZE) <= 0); + return (__wt_timestamp_cmp(timestamp, &txn->read_timestamp) <= 0); } #else WT_UNUSED(timestamp); @@ -379,7 +422,8 @@ __wt_txn_visible( static inline bool __wt_txn_upd_visible(WT_SESSION_IMPL *session, WT_UPDATE *upd) { - return (__wt_txn_visible(session, upd->txnid, WT_GET_TIMESTAMP(upd))); + return (__wt_txn_visible(session, + upd->txnid, WT_TIMESTAMP_NULL(&upd->timestamp))); } /* diff --git a/src/third_party/wiredtiger/src/include/verify_build.h b/src/third_party/wiredtiger/src/include/verify_build.h index 57189b5c2b2..3973f786a90 100644 --- a/src/third_party/wiredtiger/src/include/verify_build.h +++ b/src/third_party/wiredtiger/src/include/verify_build.h @@ -52,7 +52,15 @@ __wt_verify_build(void) /* Check specific structures weren't padded. */ WT_SIZE_CHECK(WT_BLOCK_DESC, WT_BLOCK_DESC_SIZE); WT_SIZE_CHECK(WT_REF, WT_REF_SIZE); - WT_SIZE_CHECK(WT_UPDATE, WT_UPDATE_SIZE); + + /* + * WT_UPDATE is special: we arrange fields to avoid padding within the + * structure but it could be padded at the end depending on the + * timestamp size. Further check that the data field in the update + * structure is where we expect it. + */ + WT_SIZE_CHECK(WT_UPDATE, WT_ALIGN(WT_UPDATE_SIZE, 8)); + WT_STATIC_ASSERT(offsetof(WT_UPDATE, data) == WT_UPDATE_SIZE); /* Check specific structures were padded. */ #define WT_PADDING_CHECK(s) \ diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index d8d8b864766..f9993fbcca3 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -489,6 +489,11 @@ struct __wt_cursor { * The modify method is only supported on raw byte arrays accessed using * a WT_ITEM structure, that is, a format type of \c u. * + * Calling the WT_CURSOR::modify method outside of snapshot isolation + * can lead to unexpected results. While \c read-committed isolation + * is supported with the WT_CURSOR::modify method, \c read-uncommitted + * isolation is not. + * * @snippet ex_all.c Modify an existing record * * On success, the cursor ends positioned at the modified record; to @@ -1387,7 +1392,7 @@ struct __wt_session { * @configstart{WT_SESSION.drop, see dist/api_data.py} * @config{force, return success if the object does not exist., a * boolean flag; default \c false.} - * @config{remove_files, should the underlying files be removed?., a + * @config{remove_files, if the underlying files should be removed., a * boolean flag; default \c true.} * @configend * @ebusy_errors @@ -1778,6 +1783,11 @@ struct __wt_session { * empty.} * @config{target, if non-empty\, checkpoint the list of objects., a * list of strings; default empty.} + * @config{use_timestamp, by default\, create the checkpoint as of the + * last stable timestamp if timestamps are in use\, or all current + * updates if there is no stable timestamp set. If false\, this option + * generates a checkpoint with all updates including those later than + * the timestamp., a boolean flag; default \c true.} * @configend * @errors */ @@ -2239,8 +2249,13 @@ struct __wt_connection { * @configstart{WT_CONNECTION.set_timestamp, see dist/api_data.py} * @config{oldest_timestamp, future commits and queries will be no * earlier than the specified timestamp. Supplied values must be - * monotonically increasing. see @ref transaction_timestamps., a + * monotonically increasing. See @ref transaction_timestamps., a * string; default empty.} + * @config{stable_timestamp, future checkpoints will be no later than + * the specified timestamp. Supplied values must be monotonically + * increasing. The stable timestamp data stability only applies to + * tables that are not being logged. See @ref transaction_timestamps., + * a string; default empty.} * @configend * @errors */ @@ -4491,18 +4506,26 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_LOGREC_SYSTEM 4 /*! invalid operation */ #define WT_LOGOP_INVALID 0 -/*! column put */ +/*! column-store put */ #define WT_LOGOP_COL_PUT 1 -/*! column remove */ +/*! column-store remove */ #define WT_LOGOP_COL_REMOVE 2 -/*! column truncate */ +/*! column-store truncate */ #define WT_LOGOP_COL_TRUNCATE 3 -/*! row put */ +/*! row-store put */ #define WT_LOGOP_ROW_PUT 4 -/*! row remove */ +/*! row-store remove */ #define WT_LOGOP_ROW_REMOVE 5 -/*! row truncate */ +/*! row-store truncate */ #define WT_LOGOP_ROW_TRUNCATE 6 +/*! checkpoint start */ +#define WT_LOGOP_CHECKPOINT_START 7 +/*! previous LSN */ +#define WT_LOGOP_PREV_LSN 8 +/*! column-store modify */ +#define WT_LOGOP_COL_MODIFY 9 +/*! row-store modify */ +#define WT_LOGOP_ROW_MODIFY 10 /*! @} */ /******************************************* @@ -5045,74 +5068,102 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_APPLICATION_EVICT_TIME 1243 /*! thread-yield: application thread time waiting for cache (usecs) */ #define WT_STAT_CONN_APPLICATION_CACHE_TIME 1244 +/*! + * thread-yield: connection close blocked waiting for transaction state + * stabilization + */ +#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1245 +/*! thread-yield: connection close yielded for lsm manager shutdown */ +#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1246 +/*! thread-yield: data handle lock yielded */ +#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1247 +/*! + * thread-yield: get reference for page index and slot time sleeping + * (usecs) + */ +#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1248 +/*! thread-yield: log server sync yielded for log write */ +#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1249 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1245 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1250 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1246 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1251 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1247 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1252 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1248 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1253 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1249 +#define WT_STAT_CONN_PAGE_SLEEP 1254 +/*! + * thread-yield: page delete rollback time sleeping for state change + * (usecs) + */ +#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1255 +/*! thread-yield: page reconciliation yielded due to child modification */ +#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1256 +/*! + * thread-yield: tree descend one level yielded for split page index + * update + */ +#define WT_STAT_CONN_TREE_DESCEND_BLOCKED 1257 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1250 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1258 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1251 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1259 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1252 +#define WT_STAT_CONN_TXN_BEGIN 1260 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1253 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1261 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1254 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1262 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1255 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1263 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1256 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1264 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1257 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1265 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1258 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1266 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1259 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1267 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1260 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1268 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1261 +#define WT_STAT_CONN_TXN_CHECKPOINT 1269 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1262 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1270 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1263 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1271 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1264 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1272 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1265 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1273 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1266 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1274 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1267 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1275 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1268 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1276 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1269 +#define WT_STAT_CONN_TXN_SYNC 1277 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1270 +#define WT_STAT_CONN_TXN_COMMIT 1278 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1271 +#define WT_STAT_CONN_TXN_ROLLBACK 1279 /*! transaction: update conflicts */ -#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1272 +#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1280 /*! * @} diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h index 74fdc4c3925..84617dfcab8 100644 --- a/src/third_party/wiredtiger/src/include/wt_internal.h +++ b/src/third_party/wiredtiger/src/include/wt_internal.h @@ -290,6 +290,8 @@ struct __wt_thread; typedef struct __wt_thread WT_THREAD; struct __wt_thread_group; typedef struct __wt_thread_group WT_THREAD_GROUP; +struct __wt_timestamp_t; + typedef struct __wt_timestamp_t WT_TIMESTAMP_T; struct __wt_txn; typedef struct __wt_txn WT_TXN; struct __wt_txn_global; -- cgit v1.2.1