diff options
-rw-r--r-- | src/third_party/wiredtiger/dist/s_string.ok | 1 | ||||
-rw-r--r-- | src/third_party/wiredtiger/import.data | 8 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_debug.c | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/btmem.h | 17 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/btree.i | 31 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/serial.i | 32 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_write.c | 37 |
7 files changed, 67 insertions, 61 deletions
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 0bfebbf8458..0b414b89894 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -1219,6 +1219,7 @@ unmodify unordered unpackv unpadded +unreconciled unreferenced unregister unsized diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index bfad8007be0..e448a673a88 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,6 +1,6 @@ { - "vendor": "wiredtiger", - "github": "wiredtiger/wiredtiger.git", - "branch": "mongodb-3.4", - "commit": "1d7a748f9f096ebf39e80ea442b6d3be3fc69381" + "commit": "3af8f2dc2c6028b3c18caa6be430d14c4da93c30", + "github": "wiredtiger/wiredtiger.git", + "vendor": "wiredtiger", + "branch": "mongodb-3.4" } diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index fdc33b608ec..6512dcd5d72 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -722,7 +722,7 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) } if (mod != NULL) WT_RET( - ds->f(ds, ", write generation=%" PRIu32, mod->write_gen)); + ds->f(ds, ", page-state=%" PRIu32, mod->page_state)); WT_RET(ds->f(ds, "\n")); return (0); diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index e965724dffe..b1423d976f2 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -423,10 +423,21 @@ struct __wt_page_modify { WT_SPINLOCK page_lock; /* Page's spinlock */ /* - * The write generation is incremented when a page is modified, a page - * is clean if the write generation is 0. + * The page state is incremented when a page is modified. + * + * WT_PAGE_CLEAN -- + * The page is clean. + * WT_PAGE_DIRTY_FIRST -- + * The page is in this state after the first operation that marks a + * page dirty, or when reconciliation is checking to see if it has + * done enough work to be able to mark the page clean. + * WT_PAGE_DIRTY -- + * Two or more updates have been added to the page. */ - uint32_t write_gen; +#define WT_PAGE_CLEAN 0 +#define WT_PAGE_DIRTY_FIRST 1 +#define WT_PAGE_DIRTY 2 + uint32_t page_state; #define WT_PM_REC_EMPTY 1 /* Reconciliation: no replacement */ #define WT_PM_REC_MULTIBLOCK 2 /* Reconciliation: multiple blocks */ diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 1d6fcd6272c..3286c84be3f 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -34,7 +34,8 @@ __wt_page_is_empty(WT_PAGE *page) static inline bool __wt_page_is_modified(WT_PAGE *page) { - return (page->modify != NULL && page->modify->write_gen != 0); + return (page->modify != NULL && + page->modify->page_state != WT_PAGE_CLEAN); } /* @@ -505,19 +506,25 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page) WT_ASSERT(session, !F_ISSET(session->dhandle, WT_DHANDLE_DEAD)); last_running = 0; - if (page->modify->write_gen == 0) + if (page->modify->page_state == WT_PAGE_CLEAN) last_running = S2C(session)->txn_global.last_running; /* - * We depend on atomic-add being a write barrier, that is, a barrier to - * ensure all changes to the page are flushed before updating the page - * write generation and/or marking the tree dirty, otherwise checkpoints + * We depend on the atomic operation being a write barrier, that is, a + * barrier to ensure all changes to the page are flushed before updating + * the page state and/or marking the tree dirty, otherwise checkpoints * and/or page reconciliation might be looking at a clean page/tree. * * Every time the page transitions from clean to dirty, update the cache * and transactional information. + * + * The page state can only ever be incremented above dirty by the number + * of concurrently running threads, so the counter will never approach + * the point where it would wrap. */ - if (__wt_atomic_add32(&page->modify->write_gen, 1) == 1) { + if (page->modify->page_state < WT_PAGE_DIRTY && + __wt_atomic_add32(&page->modify->page_state, 1) == + WT_PAGE_DIRTY_FIRST) { __wt_cache_dirty_incr(session, page); /* @@ -588,7 +595,17 @@ __wt_page_modify_clear(WT_SESSION_IMPL *session, WT_PAGE *page) * Allow the call to be made on clean pages. */ if (__wt_page_is_modified(page)) { - page->modify->write_gen = 0; + /* + * The only part where ordering matters is during + * reconciliation where updates on other threads are performing + * writes to the page state that need to be visible to the + * reconciliation thread. + * + * Since clearing of the page state is not going to be happening + * during reconciliation on a separate thread, there's no write + * barrier needed here. + */ + page->modify->page_state = WT_PAGE_CLEAN; __wt_cache_dirty_decr(session, page); } } diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i index 0134e1a9c20..59072278dcf 100644 --- a/src/third_party/wiredtiger/src/include/serial.i +++ b/src/third_party/wiredtiger/src/include/serial.i @@ -7,29 +7,6 @@ */ /* - * __page_write_gen_wrapped_check -- - * Confirm the page's write generation number won't wrap. - */ -static inline int -__page_write_gen_wrapped_check(WT_PAGE *page) -{ - /* - * Check to see if the page's write generation is about to wrap (wildly - * unlikely as it implies 4B updates between clean page reconciliations, - * but technically possible), and fail the update. - * - * The check is outside of the serialization mutex because the page's - * write generation is going to be a hot cache line, so technically it's - * possible for the page's write generation to wrap between the test and - * our subsequent modification of it. However, the test is (4B-1M), and - * there cannot be a million threads that have done the test but not yet - * completed their modification. - */ - return (page->modify->write_gen > - UINT32_MAX - WT_MILLION ? WT_RESTART : 0); -} - -/* * __insert_simple_func -- * Worker function to add a WT_INSERT entry to the middle of a skiplist. */ @@ -159,9 +136,6 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT *new_ins = *new_insp; WT_DECL_RET; - /* Check for page write generation wrap. */ - WT_RET(__page_write_gen_wrapped_check(page)); - /* Clear references to memory we now own and must free on error. */ *new_insp = NULL; @@ -210,9 +184,6 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, u_int i; bool simple; - /* Check for page write generation wrap. */ - WT_RET(__page_write_gen_wrapped_check(page)); - /* Clear references to memory we now own and must free on error. */ *new_insp = NULL; @@ -266,9 +237,6 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *obsolete, *upd = *updp; uint64_t txn; - /* Check for page write generation wrap. */ - WT_RET(__page_write_gen_wrapped_check(page)); - /* Clear references to memory we now own and must free on error. */ *updp = NULL; diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index b76192c0cf9..fd67939d7ca 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -33,12 +33,6 @@ typedef struct { WT_ITEM *interim_buf; /* - * Track start/stop write generation to decide if all changes to the - * page are written. - */ - uint32_t orig_write_gen; - - /* * Track start/stop checkpoint generations to decide if lookaside table * records are correct. */ @@ -708,14 +702,20 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r) btree->rec_max_txn = r->max_txn; /* - * The page only might be clean; if the write generation is - * unchanged since reconciliation started, it's clean. + * We set the page state to mark it as having been dirtied for + * the first time prior to reconciliation. A failed atomic cas + * indicates that an update has taken place during + * reconciliation. + * + * The page only might be clean; if the page state is unchanged + * since reconciliation started, it's clean. * - * If the write generation changed, the page has been written - * since reconciliation started and remains dirty (that can't - * happen when evicting, the page is exclusively locked). + * If the page state changed, the page has been written since + * reconciliation started and remains dirty (that can't happen + * when evicting, the page is exclusively locked). */ - if (__wt_atomic_cas32(&mod->write_gen, r->orig_write_gen, 0)) + if (__wt_atomic_cas32( + &mod->page_state, WT_PAGE_DIRTY_FIRST, WT_PAGE_CLEAN)) __wt_cache_dirty_decr(session, page); else WT_ASSERT(session, !F_ISSET(r, WT_EVICTING)); @@ -898,13 +898,22 @@ __rec_write_init(WT_SESSION_IMPL *session, r->page = page; /* - * Save the page's write generation before reading the page. * Save the transaction generations before reading the page. * These are all ordered reads, but we only need one. */ r->orig_btree_checkpoint_gen = btree->checkpoint_gen; r->orig_txn_checkpoint_gen = conn->txn_global.checkpoint_gen; - WT_ORDERED_READ(r->orig_write_gen, page->modify->write_gen); + + /* + * Update the page state to indicate that all currently installed + * updates will be included in this reconciliation if it would mark the + * page clean. + * + * Add a write barrier to make it more likely that a thread adding an + * update will see this state change. + */ + page->modify->page_state = WT_PAGE_DIRTY_FIRST; + WT_FULL_BARRIER(); /* * Cache the oldest running transaction ID. This is used to check |