diff options
author | Luke Chen <luke.chen@mongodb.com> | 2019-09-20 16:23:41 -0400 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2019-09-20 16:23:41 -0400 |
commit | 2b0786f46b63e6210c26fc510575cc4ad039700a (patch) | |
tree | cbe9b6277ce48c43b878b110ab4f124f33dcfafc | |
parent | 8f8fb6f1b84134e8a6ab33e13e840916af0a6280 (diff) | |
download | mongo-2b0786f46b63e6210c26fc510575cc4ad039700a.tar.gz |
Import wiredtiger: 75c84e785fa6a9928299f6501cd85f6de4294c52 from branch mongodb-4.0
ref: 543111d3d8..75c84e785f
for: 4.0.13
WT-4956 Handle the case where 4 billion updates are made to a page without eviction
WT-5074 Fix "make check" on exotic architectures
-rw-r--r-- | src/third_party/wiredtiger/build_posix/Make.base | 6 | ||||
-rwxr-xr-x | src/third_party/wiredtiger/build_posix/reconf | 1 | ||||
-rwxr-xr-x | src/third_party/wiredtiger/dist/s_clang-format | 4 | ||||
-rw-r--r-- | src/third_party/wiredtiger/dist/s_string.ok | 1 | ||||
-rw-r--r-- | src/third_party/wiredtiger/import.data | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_debug.c | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/btmem.h | 21 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/btree.i | 33 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/reconcile.h | 5 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/serial.i | 31 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_write.c | 33 |
11 files changed, 69 insertions, 70 deletions
diff --git a/src/third_party/wiredtiger/build_posix/Make.base b/src/third_party/wiredtiger/build_posix/Make.base index 08d64440ff0..08153e9bd99 100644 --- a/src/third_party/wiredtiger/build_posix/Make.base +++ b/src/third_party/wiredtiger/build_posix/Make.base @@ -58,12 +58,6 @@ $(srcdir)/Makefile.am: $(srcdir)/build_posix/Make.base $(srcdir)/build_posix/mak libtool: $(LIBTOOL_DEPS) $(SHELL) ./config.status libtool -$(srcdir)/src/include/extern.h: auto-includes.chk -$(srcdir)/src/include/wt_internal.h: auto-includes.chk - -auto-includes.chk: $(libwiredtiger_la_SOURCES) - @(cd $(srcdir)/dist && sh s_prototypes && sh s_typedef -b) && touch $@ - $(srcdir)/docs/index.html: @cd $(srcdir)/dist && sh s_docs diff --git a/src/third_party/wiredtiger/build_posix/reconf b/src/third_party/wiredtiger/build_posix/reconf index 16d4002d9b9..ef0c5886b40 100755 --- a/src/third_party/wiredtiger/build_posix/reconf +++ b/src/third_party/wiredtiger/build_posix/reconf @@ -22,7 +22,6 @@ clean() Makefile.am \ Makefile.in \ aclocal.m4 \ - auto-includes.chk \ autom4te.cache \ config.cache \ config.hin \ diff --git a/src/third_party/wiredtiger/dist/s_clang-format b/src/third_party/wiredtiger/dist/s_clang-format index 92d375333d2..58e4b59f077 100755 --- a/src/third_party/wiredtiger/dist/s_clang-format +++ b/src/third_party/wiredtiger/dist/s_clang-format @@ -4,10 +4,10 @@ set -o pipefail download_clang_format() { if [ `uname` = "Linux" ]; then - wget https://s3.amazonaws.com/boxes.10gen.com/build/clang-format-3.8-rhel55.tar.gz -O dist/clang-format.tar.gz + curl https://s3.amazonaws.com/boxes.10gen.com/build/clang-format-3.8-rhel55.tar.gz -o dist/clang-format.tar.gz tar --strip=2 -C dist/ -xf dist/clang-format.tar.gz build/bin/clang-format && rm dist/clang-format.tar.gz elif [ `uname` = "Darwin" ]; then - wget https://s3.amazonaws.com/boxes.10gen.com/build/clang%2Bllvm-3.8.0-x86_64-apple-darwin.tar.xz -O dist/clang-format.tar.gz + curl https://s3.amazonaws.com/boxes.10gen.com/build/clang%2Bllvm-3.8.0-x86_64-apple-darwin.tar.xz -o dist/clang-format.tar.gz tar --strip=2 -C dist/ -xf dist/clang-format.tar.gz clang+llvm-3.8.0-x86_64-apple-darwin/bin/clang-format && rm dist/clang-format.tar.gz else echo "$0: unsupported environment $(uname)" diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index fd9eb3964c9..81d1686a9af 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -1308,6 +1308,7 @@ unmodify unordered unpackv unpadded +unreconciled unreferenced unregister unsized diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 5a534b4c1bf..6b7c23a8ee5 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "543111d3d8737ada1b741b3a25a201feb2ed13a3", + "commit": "75c84e785fa6a9928299f6501cd85f6de4294c52", "github": "wiredtiger/wiredtiger.git", "vendor": "wiredtiger", "branch": "mongodb-4.0" diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index cd25c0bed41..74f22505f3e 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -859,7 +859,7 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) if (split_gen != 0) WT_RET(ds->f(ds, ", split-gen=%" PRIu64, split_gen)); if (mod != NULL) - WT_RET(ds->f(ds, ", write-gen=%" PRIu32, mod->write_gen)); + WT_RET(ds->f(ds, ", page-state=%" PRIu32, mod->page_state)); WT_RET(ds->f(ds, ", memory-size %" WT_SIZET_FMT, page->memory_footprint)); WT_RET(ds->f(ds, "\n")); diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index c9ed00ddcdb..db775580eaa 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -451,11 +451,22 @@ struct __wt_page_modify { #define WT_PAGE_UNLOCK(s, p) __wt_spin_unlock((s), &(p)->modify->page_lock) WT_SPINLOCK page_lock; /* Page's spinlock */ - /* - * The write generation is incremented when a page is modified, a page is clean if the write - * generation is 0. - */ - uint32_t write_gen; +/* + * The page state is incremented when a page is modified. + * + * WT_PAGE_CLEAN -- + * The page is clean. + * WT_PAGE_DIRTY_FIRST -- + * The page is in this state after the first operation that marks a + * page dirty, or when reconciliation is checking to see if it has + * done enough work to be able to mark the page clean. + * WT_PAGE_DIRTY -- + * Two or more updates have been added to the page. + */ +#define WT_PAGE_CLEAN 0 +#define WT_PAGE_DIRTY_FIRST 1 +#define WT_PAGE_DIRTY 2 + uint32_t page_state; #define WT_PM_REC_EMPTY 1 /* Reconciliation: no replacement */ #define WT_PM_REC_MULTIBLOCK 2 /* Reconciliation: multiple blocks */ diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 636599c4941..36c73990d68 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -33,8 +33,8 @@ __wt_page_is_empty(WT_PAGE *page) static inline bool __wt_page_evict_clean(WT_PAGE *page) { - return ( - page->modify == NULL || (page->modify->write_gen == 0 && page->modify->rec_result == 0)); + return (page->modify == NULL || + (page->modify->page_state == WT_PAGE_CLEAN && page->modify->rec_result == 0)); } /* @@ -44,7 +44,7 @@ __wt_page_evict_clean(WT_PAGE *page) static inline bool __wt_page_is_modified(WT_PAGE *page) { - return (page->modify != NULL && page->modify->write_gen != 0); + return (page->modify != NULL && page->modify->page_state != WT_PAGE_CLEAN); } /* @@ -472,19 +472,24 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page) WT_ASSERT(session, !F_ISSET(session->dhandle, WT_DHANDLE_DEAD)); last_running = 0; - if (page->modify->write_gen == 0) + if (page->modify->page_state == WT_PAGE_CLEAN) last_running = S2C(session)->txn_global.last_running; /* - * We depend on atomic-add being a write barrier, that is, a barrier to - * ensure all changes to the page are flushed before updating the page - * write generation and/or marking the tree dirty, otherwise checkpoints + * We depend on the atomic operation being a write barrier, that is, a + * barrier to ensure all changes to the page are flushed before updating + * the page state and/or marking the tree dirty, otherwise checkpoints * and/or page reconciliation might be looking at a clean page/tree. * * Every time the page transitions from clean to dirty, update the cache * and transactional information. + * + * The page state can only ever be incremented above dirty by the number + * of concurrently running threads, so the counter will never approach + * the point where it would wrap. */ - if (__wt_atomic_add32(&page->modify->write_gen, 1) == 1) { + if (page->modify->page_state < WT_PAGE_DIRTY && + __wt_atomic_add32(&page->modify->page_state, 1) == WT_PAGE_DIRTY_FIRST) { __wt_cache_dirty_incr(session, page); /* @@ -555,7 +560,17 @@ __wt_page_modify_clear(WT_SESSION_IMPL *session, WT_PAGE *page) * Allow the call to be made on clean pages. */ if (__wt_page_is_modified(page)) { - page->modify->write_gen = 0; + /* + * The only part where ordering matters is during + * reconciliation where updates on other threads are performing + * writes to the page state that need to be visible to the + * reconciliation thread. + * + * Since clearing of the page state is not going to be happening + * during reconciliation on a separate thread, there's no write + * barrier needed here. + */ + page->modify->page_state = WT_PAGE_CLEAN; __wt_cache_dirty_decr(session, page); } } diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h index b44358e1e58..9917011b176 100644 --- a/src/third_party/wiredtiger/src/include/reconcile.h +++ b/src/third_party/wiredtiger/src/include/reconcile.h @@ -22,11 +22,6 @@ typedef struct { uint32_t flags; /* Caller's configuration */ /* - * Track start/stop write generation to decide if all changes to the page are written. - */ - uint32_t orig_write_gen; - - /* * Track start/stop checkpoint generations to decide if lookaside table records are correct. */ uint64_t orig_btree_checkpoint_gen; diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i index 1e605b15b72..0496bc91a8d 100644 --- a/src/third_party/wiredtiger/src/include/serial.i +++ b/src/third_party/wiredtiger/src/include/serial.i @@ -7,28 +7,6 @@ */ /* - * __page_write_gen_wrapped_check -- - * Confirm the page's write generation number won't wrap. - */ -static inline int -__page_write_gen_wrapped_check(WT_PAGE *page) -{ - /* - * Check to see if the page's write generation is about to wrap (wildly - * unlikely as it implies 4B updates between clean page reconciliations, - * but technically possible), and fail the update. - * - * The check is outside of the serialization mutex because the page's - * write generation is going to be a hot cache line, so technically it's - * possible for the page's write generation to wrap between the test and - * our subsequent modification of it. However, the test is (4B-1M), and - * there cannot be a million threads that have done the test but not yet - * completed their modification. - */ - return (page->modify->write_gen > UINT32_MAX - WT_MILLION ? WT_RESTART : 0); -} - -/* * __insert_simple_func -- * Worker function to add a WT_INSERT entry to the middle of a skiplist. */ @@ -157,9 +135,6 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD * new_ins = *new_insp; *new_insp = NULL; - /* Check for page write generation wrap. */ - WT_RET(__page_write_gen_wrapped_check(page)); - /* * Acquire the page's spinlock unless we already have exclusive access. Then call the worker * function. @@ -207,9 +182,6 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *ins_ new_ins = *new_insp; *new_insp = NULL; - /* Check for page write generation wrap. */ - WT_RET(__page_write_gen_wrapped_check(page)); - simple = true; for (i = 0; i < skipdepth; i++) if (new_ins->next[i] == NULL) @@ -262,9 +234,6 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE **srch_upd upd = *updp; *updp = NULL; - /* Check for page write generation wrap. */ - WT_RET(__page_write_gen_wrapped_check(page)); - /* * All structure setup must be flushed before the structure is entered * into the list. We need a write barrier here, our callers depend on diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 4dd9b0bbb7b..19d8d762456 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -420,14 +420,19 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r) } /* - * The page only might be clean; if the write generation is - * unchanged since reconciliation started, it's clean. + * We set the page state to mark it as having been dirtied for + * the first time prior to reconciliation. A failed atomic cas + * indicates that an update has taken place during + * reconciliation. * - * If the write generation changed, the page has been written - * since reconciliation started and remains dirty (that can't - * happen when evicting, the page is exclusively locked). + * The page only might be clean; if the page state is unchanged + * since reconciliation started, it's clean. + * + * If the page state changed, the page has been written since + * reconciliation started and remains dirty (that can't happen + * when evicting, the page is exclusively locked). */ - if (__wt_atomic_cas32(&mod->write_gen, r->orig_write_gen, 0)) + if (__wt_atomic_cas32(&mod->page_state, WT_PAGE_DIRTY_FIRST, WT_PAGE_CLEAN)) __wt_cache_dirty_decr(session, page); else WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT)); @@ -564,12 +569,22 @@ __rec_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COO r->page = page; /* - * Save the page's write generation before reading the page. Save the transaction generations - * before reading the page. These are all ordered reads, but we only need one. + * Save the transaction generations before reading the page. These are all ordered reads, but we + * only need one. */ r->orig_btree_checkpoint_gen = btree->checkpoint_gen; r->orig_txn_checkpoint_gen = __wt_gen(session, WT_GEN_CHECKPOINT); - WT_ORDERED_READ(r->orig_write_gen, page->modify->write_gen); + + /* + * Update the page state to indicate that all currently installed + * updates will be included in this reconciliation if it would mark the + * page clean. + * + * Add a write barrier to make it more likely that a thread adding an + * update will see this state change. + */ + page->modify->page_state = WT_PAGE_DIRTY_FIRST; + WT_FULL_BARRIER(); /* * Cache the oldest running transaction ID. This is used to check whether updates seen by |