summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2019-09-20 16:23:41 -0400
committerLuke Chen <luke.chen@mongodb.com>2019-09-20 16:23:41 -0400
commit2b0786f46b63e6210c26fc510575cc4ad039700a (patch)
treecbe9b6277ce48c43b878b110ab4f124f33dcfafc
parent8f8fb6f1b84134e8a6ab33e13e840916af0a6280 (diff)
downloadmongo-2b0786f46b63e6210c26fc510575cc4ad039700a.tar.gz
Import wiredtiger: 75c84e785fa6a9928299f6501cd85f6de4294c52 from branch mongodb-4.0
ref: 543111d3d8..75c84e785f for: 4.0.13 WT-4956 Handle the case where 4 billion updates are made to a page without eviction WT-5074 Fix "make check" on exotic architectures
-rw-r--r--src/third_party/wiredtiger/build_posix/Make.base6
-rwxr-xr-xsrc/third_party/wiredtiger/build_posix/reconf1
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_clang-format4
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok1
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c2
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h21
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i33
-rw-r--r--src/third_party/wiredtiger/src/include/reconcile.h5
-rw-r--r--src/third_party/wiredtiger/src/include/serial.i31
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c33
11 files changed, 69 insertions, 70 deletions
diff --git a/src/third_party/wiredtiger/build_posix/Make.base b/src/third_party/wiredtiger/build_posix/Make.base
index 08d64440ff0..08153e9bd99 100644
--- a/src/third_party/wiredtiger/build_posix/Make.base
+++ b/src/third_party/wiredtiger/build_posix/Make.base
@@ -58,12 +58,6 @@ $(srcdir)/Makefile.am: $(srcdir)/build_posix/Make.base $(srcdir)/build_posix/mak
libtool: $(LIBTOOL_DEPS)
$(SHELL) ./config.status libtool
-$(srcdir)/src/include/extern.h: auto-includes.chk
-$(srcdir)/src/include/wt_internal.h: auto-includes.chk
-
-auto-includes.chk: $(libwiredtiger_la_SOURCES)
- @(cd $(srcdir)/dist && sh s_prototypes && sh s_typedef -b) && touch $@
-
$(srcdir)/docs/index.html:
@cd $(srcdir)/dist && sh s_docs
diff --git a/src/third_party/wiredtiger/build_posix/reconf b/src/third_party/wiredtiger/build_posix/reconf
index 16d4002d9b9..ef0c5886b40 100755
--- a/src/third_party/wiredtiger/build_posix/reconf
+++ b/src/third_party/wiredtiger/build_posix/reconf
@@ -22,7 +22,6 @@ clean()
Makefile.am \
Makefile.in \
aclocal.m4 \
- auto-includes.chk \
autom4te.cache \
config.cache \
config.hin \
diff --git a/src/third_party/wiredtiger/dist/s_clang-format b/src/third_party/wiredtiger/dist/s_clang-format
index 92d375333d2..58e4b59f077 100755
--- a/src/third_party/wiredtiger/dist/s_clang-format
+++ b/src/third_party/wiredtiger/dist/s_clang-format
@@ -4,10 +4,10 @@ set -o pipefail
download_clang_format() {
if [ `uname` = "Linux" ]; then
- wget https://s3.amazonaws.com/boxes.10gen.com/build/clang-format-3.8-rhel55.tar.gz -O dist/clang-format.tar.gz
+ curl https://s3.amazonaws.com/boxes.10gen.com/build/clang-format-3.8-rhel55.tar.gz -o dist/clang-format.tar.gz
tar --strip=2 -C dist/ -xf dist/clang-format.tar.gz build/bin/clang-format && rm dist/clang-format.tar.gz
elif [ `uname` = "Darwin" ]; then
- wget https://s3.amazonaws.com/boxes.10gen.com/build/clang%2Bllvm-3.8.0-x86_64-apple-darwin.tar.xz -O dist/clang-format.tar.gz
+ curl https://s3.amazonaws.com/boxes.10gen.com/build/clang%2Bllvm-3.8.0-x86_64-apple-darwin.tar.xz -o dist/clang-format.tar.gz
tar --strip=2 -C dist/ -xf dist/clang-format.tar.gz clang+llvm-3.8.0-x86_64-apple-darwin/bin/clang-format && rm dist/clang-format.tar.gz
else
echo "$0: unsupported environment $(uname)"
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index fd9eb3964c9..81d1686a9af 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -1308,6 +1308,7 @@ unmodify
unordered
unpackv
unpadded
+unreconciled
unreferenced
unregister
unsized
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 5a534b4c1bf..6b7c23a8ee5 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "543111d3d8737ada1b741b3a25a201feb2ed13a3",
+ "commit": "75c84e785fa6a9928299f6501cd85f6de4294c52",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-4.0"
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index cd25c0bed41..74f22505f3e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -859,7 +859,7 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
if (split_gen != 0)
WT_RET(ds->f(ds, ", split-gen=%" PRIu64, split_gen));
if (mod != NULL)
- WT_RET(ds->f(ds, ", write-gen=%" PRIu32, mod->write_gen));
+ WT_RET(ds->f(ds, ", page-state=%" PRIu32, mod->page_state));
WT_RET(ds->f(ds, ", memory-size %" WT_SIZET_FMT, page->memory_footprint));
WT_RET(ds->f(ds, "\n"));
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index c9ed00ddcdb..db775580eaa 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -451,11 +451,22 @@ struct __wt_page_modify {
#define WT_PAGE_UNLOCK(s, p) __wt_spin_unlock((s), &(p)->modify->page_lock)
WT_SPINLOCK page_lock; /* Page's spinlock */
- /*
- * The write generation is incremented when a page is modified, a page is clean if the write
- * generation is 0.
- */
- uint32_t write_gen;
+/*
+ * The page state is incremented when a page is modified.
+ *
+ * WT_PAGE_CLEAN --
+ * The page is clean.
+ * WT_PAGE_DIRTY_FIRST --
+ * The page is in this state after the first operation that marks a
+ * page dirty, or when reconciliation is checking to see if it has
+ * done enough work to be able to mark the page clean.
+ * WT_PAGE_DIRTY --
+ * Two or more updates have been added to the page.
+ */
+#define WT_PAGE_CLEAN 0
+#define WT_PAGE_DIRTY_FIRST 1
+#define WT_PAGE_DIRTY 2
+ uint32_t page_state;
#define WT_PM_REC_EMPTY 1 /* Reconciliation: no replacement */
#define WT_PM_REC_MULTIBLOCK 2 /* Reconciliation: multiple blocks */
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 636599c4941..36c73990d68 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -33,8 +33,8 @@ __wt_page_is_empty(WT_PAGE *page)
static inline bool
__wt_page_evict_clean(WT_PAGE *page)
{
- return (
- page->modify == NULL || (page->modify->write_gen == 0 && page->modify->rec_result == 0));
+ return (page->modify == NULL ||
+ (page->modify->page_state == WT_PAGE_CLEAN && page->modify->rec_result == 0));
}
/*
@@ -44,7 +44,7 @@ __wt_page_evict_clean(WT_PAGE *page)
static inline bool
__wt_page_is_modified(WT_PAGE *page)
{
- return (page->modify != NULL && page->modify->write_gen != 0);
+ return (page->modify != NULL && page->modify->page_state != WT_PAGE_CLEAN);
}
/*
@@ -472,19 +472,24 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_ASSERT(session, !F_ISSET(session->dhandle, WT_DHANDLE_DEAD));
last_running = 0;
- if (page->modify->write_gen == 0)
+ if (page->modify->page_state == WT_PAGE_CLEAN)
last_running = S2C(session)->txn_global.last_running;
/*
- * We depend on atomic-add being a write barrier, that is, a barrier to
- * ensure all changes to the page are flushed before updating the page
- * write generation and/or marking the tree dirty, otherwise checkpoints
+ * We depend on the atomic operation being a write barrier, that is, a
+ * barrier to ensure all changes to the page are flushed before updating
+ * the page state and/or marking the tree dirty, otherwise checkpoints
* and/or page reconciliation might be looking at a clean page/tree.
*
* Every time the page transitions from clean to dirty, update the cache
* and transactional information.
+ *
+ * The page state can only ever be incremented above dirty by the number
+ * of concurrently running threads, so the counter will never approach
+ * the point where it would wrap.
*/
- if (__wt_atomic_add32(&page->modify->write_gen, 1) == 1) {
+ if (page->modify->page_state < WT_PAGE_DIRTY &&
+ __wt_atomic_add32(&page->modify->page_state, 1) == WT_PAGE_DIRTY_FIRST) {
__wt_cache_dirty_incr(session, page);
/*
@@ -555,7 +560,17 @@ __wt_page_modify_clear(WT_SESSION_IMPL *session, WT_PAGE *page)
* Allow the call to be made on clean pages.
*/
if (__wt_page_is_modified(page)) {
- page->modify->write_gen = 0;
+ /*
+ * The only part where ordering matters is during
+ * reconciliation where updates on other threads are performing
+ * writes to the page state that need to be visible to the
+ * reconciliation thread.
+ *
+ * Since clearing of the page state is not going to be happening
+ * during reconciliation on a separate thread, there's no write
+ * barrier needed here.
+ */
+ page->modify->page_state = WT_PAGE_CLEAN;
__wt_cache_dirty_decr(session, page);
}
}
diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h
index b44358e1e58..9917011b176 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.h
+++ b/src/third_party/wiredtiger/src/include/reconcile.h
@@ -22,11 +22,6 @@ typedef struct {
uint32_t flags; /* Caller's configuration */
/*
- * Track start/stop write generation to decide if all changes to the page are written.
- */
- uint32_t orig_write_gen;
-
- /*
* Track start/stop checkpoint generations to decide if lookaside table records are correct.
*/
uint64_t orig_btree_checkpoint_gen;
diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i
index 1e605b15b72..0496bc91a8d 100644
--- a/src/third_party/wiredtiger/src/include/serial.i
+++ b/src/third_party/wiredtiger/src/include/serial.i
@@ -7,28 +7,6 @@
*/
/*
- * __page_write_gen_wrapped_check --
- * Confirm the page's write generation number won't wrap.
- */
-static inline int
-__page_write_gen_wrapped_check(WT_PAGE *page)
-{
- /*
- * Check to see if the page's write generation is about to wrap (wildly
- * unlikely as it implies 4B updates between clean page reconciliations,
- * but technically possible), and fail the update.
- *
- * The check is outside of the serialization mutex because the page's
- * write generation is going to be a hot cache line, so technically it's
- * possible for the page's write generation to wrap between the test and
- * our subsequent modification of it. However, the test is (4B-1M), and
- * there cannot be a million threads that have done the test but not yet
- * completed their modification.
- */
- return (page->modify->write_gen > UINT32_MAX - WT_MILLION ? WT_RESTART : 0);
-}
-
-/*
* __insert_simple_func --
* Worker function to add a WT_INSERT entry to the middle of a skiplist.
*/
@@ -157,9 +135,6 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *
new_ins = *new_insp;
*new_insp = NULL;
- /* Check for page write generation wrap. */
- WT_RET(__page_write_gen_wrapped_check(page));
-
/*
* Acquire the page's spinlock unless we already have exclusive access. Then call the worker
* function.
@@ -207,9 +182,6 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *ins_
new_ins = *new_insp;
*new_insp = NULL;
- /* Check for page write generation wrap. */
- WT_RET(__page_write_gen_wrapped_check(page));
-
simple = true;
for (i = 0; i < skipdepth; i++)
if (new_ins->next[i] == NULL)
@@ -262,9 +234,6 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE **srch_upd
upd = *updp;
*updp = NULL;
- /* Check for page write generation wrap. */
- WT_RET(__page_write_gen_wrapped_check(page));
-
/*
* All structure setup must be flushed before the structure is entered
* into the list. We need a write barrier here, our callers depend on
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 4dd9b0bbb7b..19d8d762456 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -420,14 +420,19 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r)
}
/*
- * The page only might be clean; if the write generation is
- * unchanged since reconciliation started, it's clean.
+ * We set the page state to mark it as having been dirtied for
+ * the first time prior to reconciliation. A failed atomic cas
+ * indicates that an update has taken place during
+ * reconciliation.
*
- * If the write generation changed, the page has been written
- * since reconciliation started and remains dirty (that can't
- * happen when evicting, the page is exclusively locked).
+ * The page only might be clean; if the page state is unchanged
+ * since reconciliation started, it's clean.
+ *
+ * If the page state changed, the page has been written since
+ * reconciliation started and remains dirty (that can't happen
+ * when evicting, the page is exclusively locked).
*/
- if (__wt_atomic_cas32(&mod->write_gen, r->orig_write_gen, 0))
+ if (__wt_atomic_cas32(&mod->page_state, WT_PAGE_DIRTY_FIRST, WT_PAGE_CLEAN))
__wt_cache_dirty_decr(session, page);
else
WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
@@ -564,12 +569,22 @@ __rec_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COO
r->page = page;
/*
- * Save the page's write generation before reading the page. Save the transaction generations
- * before reading the page. These are all ordered reads, but we only need one.
+ * Save the transaction generations before reading the page. These are all ordered reads, but we
+ * only need one.
*/
r->orig_btree_checkpoint_gen = btree->checkpoint_gen;
r->orig_txn_checkpoint_gen = __wt_gen(session, WT_GEN_CHECKPOINT);
- WT_ORDERED_READ(r->orig_write_gen, page->modify->write_gen);
+
+ /*
+ * Update the page state to indicate that all currently installed
+ * updates will be included in this reconciliation if it would mark the
+ * page clean.
+ *
+ * Add a write barrier to make it more likely that a thread adding an
+ * update will see this state change.
+ */
+ page->modify->page_state = WT_PAGE_DIRTY_FIRST;
+ WT_FULL_BARRIER();
/*
* Cache the oldest running transaction ID. This is used to check whether updates seen by