diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2015-10-01 16:45:32 +1000 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2015-10-01 16:45:32 +1000 |
commit | 3e911933d058aa5842af77240fb0ac5027b1468d (patch) | |
tree | f8db5f2679bd2bff45d2db1ba1512678da4ec4a5 | |
parent | fc51ae17d2dab4d76e4bac3cbbc1f0eb5bb29b1a (diff) | |
parent | 79f74e505ae7b15c3c695cdc72f71e4f9a105647 (diff) | |
download | mongo-3e911933d058aa5842af77240fb0ac5027b1468d.tar.gz |
Merge branch 'develop' into log-zero
-rw-r--r-- | bench/wtperf/runners/multi-btree-stress.wtperf | 17 | ||||
-rw-r--r-- | bench/wtperf/wtperf.c | 31 | ||||
-rw-r--r-- | dist/s_string.ok | 1 | ||||
-rw-r--r-- | src/btree/bt_compact.c | 6 | ||||
-rw-r--r-- | src/btree/bt_debug.c | 7 | ||||
-rw-r--r-- | src/btree/bt_discard.c | 2 | ||||
-rw-r--r-- | src/btree/bt_split.c | 47 | ||||
-rw-r--r-- | src/conn/conn_log.c | 19 | ||||
-rw-r--r-- | src/evict/evict_lru.c | 66 | ||||
-rw-r--r-- | src/include/btmem.h | 9 | ||||
-rw-r--r-- | src/include/connection.h | 13 | ||||
-rw-r--r-- | src/include/hardware.h | 10 | ||||
-rw-r--r-- | src/include/mutex.h | 18 | ||||
-rw-r--r-- | src/include/mutex.i | 88 | ||||
-rw-r--r-- | src/include/serial.i | 5 | ||||
-rw-r--r-- | src/include/wt_internal.h | 2 | ||||
-rw-r--r-- | src/reconcile/rec_write.c | 4 |
17 files changed, 236 insertions, 109 deletions
diff --git a/bench/wtperf/runners/multi-btree-stress.wtperf b/bench/wtperf/runners/multi-btree-stress.wtperf new file mode 100644 index 00000000000..b10b08f6035 --- /dev/null +++ b/bench/wtperf/runners/multi-btree-stress.wtperf @@ -0,0 +1,17 @@ +# wtperf options file: multi-database configuration attempting to +# trigger slow operations by overloading CPU and disk. +# References Jira WT-2131 +conn_config="cache_size=2GB,eviction=(threads_min=2,threads_max=2),log=(enabled=false),direct_io=(data,checkpoint),buffer_alignment=4096,checkpoint_sync=true,checkpoint=(wait=60)" +table_config="allocation_size=4k,prefix_compression=false,split_pct=75,leaf_page_max=4k,internal_page_max=16k,leaf_item_max=1433,internal_item_max=3100,type=file" +# Divide original icount by database_count. +database_count=5 +icount=50000 +populate_threads=1 +random_range=50000000 +report_interval=5 +run_time=3600 +threads=((count=1,inserts=1),(count=10,reads=1)) +value_sz=100 +max_latency=1000 +sample_interval=5 +sample_rate=1 diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 8dceeab2832..0d30bb6b2b7 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -600,7 +600,34 @@ worker(void *arg) if (ret == WT_NOTFOUND) break; -op_err: lprintf(cfg, ret, 0, +op_err: if (ret == WT_ROLLBACK && ops_per_txn != 0) { + /* + * If we are running with explicit transactions + * configured and we hit a WT_ROLLBACK, then we + * should rollback the current transaction and + * attempt to continue. + * This does break the guarantee of insertion + * order in cases of ordered inserts, as we + * aren't retrying here. + */ + lprintf(cfg, ret, 1, + "%s for: %s, range: %"PRIu64, op_name(op), + key_buf, wtperf_value_range(cfg)); + if ((ret = session->rollback_transaction( + session, NULL)) != 0) { + lprintf(cfg, ret, 0, + "Failed rollback_transaction"); + goto err; + } + if ((ret = session->begin_transaction( + session, NULL)) != 0) { + lprintf(cfg, ret, 0, + "Worker begin transaction failed"); + goto err; + } + break; + } + lprintf(cfg, ret, 0, "%s failed for: %s, range: %"PRIu64, op_name(op), key_buf, wtperf_value_range(cfg)); goto err; @@ -644,7 +671,7 @@ op_err: lprintf(cfg, ret, 0, if ((ret = session->begin_transaction( session, NULL)) != 0) { lprintf(cfg, ret, 0, - "Worker transaction commit failed"); + "Worker begin transaction failed"); goto err; } } diff --git a/dist/s_string.ok b/dist/s_string.ok index 021e222919e..d234a3c101f 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -646,6 +646,7 @@ intrin inuse io ip +islocked ispo iteratively jnr diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c index 18b6860c758..b2c9e4b67f8 100644 --- a/src/btree/bt_compact.c +++ b/src/btree/bt_compact.c @@ -55,10 +55,12 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) * The page's modification information can change underfoot if * the page is being reconciled, serialize with reconciliation. */ - F_CAS_ATOMIC_WAIT(page, WT_PAGE_RECONCILIATION); + WT_RET(__wt_fair_lock(session, &page->page_lock)); + ret = bm->compact_page_skip(bm, session, mod->mod_replace.addr, mod->mod_replace.size, skipp); - F_CLR_ATOMIC(page, WT_PAGE_RECONCILIATION); + + WT_TRET(__wt_fair_unlock(session, &page->page_lock)); WT_RET(ret); } return (0); diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index ee2898f60be..15ae93522a7 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -636,7 +636,10 @@ __debug_page_metadata(WT_DBG *ds, WT_PAGE *page) __dmsg(ds, ": %s\n", __wt_page_type_string(page->type)); __dmsg(ds, "\t" "disk %p, entries %" PRIu32, page->dsk, entries); - __dmsg(ds, "%s", __wt_page_is_modified(page) ? ", dirty" : ", clean"); + __dmsg(ds, ", %s", __wt_page_is_modified(page) ? "dirty" : "clean"); + __dmsg(ds, ", %s", __wt_fair_islocked( + session, &page->page_lock) ? "locked" : "unlocked"); + if (F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS)) __dmsg(ds, ", keys-built"); if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) @@ -647,8 +650,6 @@ __debug_page_metadata(WT_DBG *ds, WT_PAGE *page) __dmsg(ds, ", evict-lru"); if (F_ISSET_ATOMIC(page, WT_PAGE_OVERFLOW_KEYS)) __dmsg(ds, ", overflow-keys"); - if (F_ISSET_ATOMIC(page, WT_PAGE_RECONCILIATION)) - __dmsg(ds, ", reconciliation"); if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_INSERT)) __dmsg(ds, ", split-insert"); diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index c27d42d38f4..998667e3e1f 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -55,7 +55,7 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) */ WT_ASSERT(session, !__wt_page_is_modified(page)); WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU)); - WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_RECONCILIATION)); + WT_ASSERT(session, !__wt_fair_islocked(session, &page->page_lock)); #ifdef HAVE_DIAGNOSTIC { diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 29153ced178..adda9145ee4 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -866,6 +866,18 @@ __split_parent_lock( *parentp = NULL; /* + * A checkpoint reconciling this parent page can deadlock with + * our split. We have an exclusive page lock on the child before + * we acquire the page's reconciliation lock, and reconciliation + * acquires the page's reconciliation lock before it encounters + * the child's exclusive lock (which causes reconciliation to + * loop until the exclusive lock is resolved). If we want to split + * the parent, give up to avoid that deadlock. + */ + if (S2BT(session)->checkpointing != WT_CKPT_OFF) + return (EBUSY); + + /* * Get a page-level lock on the parent to single-thread splits into the * page because we need to single-thread sizing/growing the page index. * It's OK to queue up multiple splits as the child pages split, but the @@ -882,32 +894,11 @@ __split_parent_lock( */ for (;;) { parent = ref->home; - F_CAS_ATOMIC(parent, WT_PAGE_RECONCILIATION, ret); - if (ret == 0) { - /* - * We can race with another thread deepening our parent. - * To deal with that, read the parent pointer each time - * we try to lock it, and check it's still correct after - * it's locked. - */ - if (parent == ref->home) - break; - F_CLR_ATOMIC(parent, WT_PAGE_RECONCILIATION); - continue; - } - - /* - * A checkpoint reconciling this parent page can deadlock with - * our split. We have an exclusive page lock on the child before - * we acquire the page's reconciliation lock, and reconciliation - * acquires the page's reconciliation lock before it encounters - * the child's exclusive lock (which causes reconciliation to - * loop until the exclusive lock is resolved). If we can't lock - * the parent, give up to avoid that deadlock. - */ - if (S2BT(session)->checkpointing != WT_CKPT_OFF) - return (EBUSY); - __wt_yield(); + WT_RET(__wt_fair_lock(session, &parent->page_lock)); + if (parent == ref->home) + break; + /* Try again if the page deepened while we were waiting */ + WT_RET(__wt_fair_unlock(session, &parent->page_lock)); } /* @@ -930,7 +921,7 @@ __split_parent_lock( *parentp = parent; return (0); -err: F_CLR_ATOMIC(parent, WT_PAGE_RECONCILIATION); +err: WT_TRET(__wt_fair_unlock(session, &parent->page_lock)); return (ret); } @@ -946,7 +937,7 @@ __split_parent_unlock(WT_SESSION_IMPL *session, WT_PAGE *parent, bool hazard) if (hazard) ret = __wt_hazard_clear(session, parent); - F_CLR_ATOMIC(parent, WT_PAGE_RECONCILIATION); + WT_TRET(__wt_fair_unlock(session, &parent->page_lock)); return (ret); } diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index 41420462f6e..6f9021ccfc3 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -50,15 +50,14 @@ __logmgr_config( conn = S2C(session); - WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); /* * If we're reconfiguring, enabled must match the already * existing setting. - */ - /* + * * If it is off and the user it turning it on, or it is on * and the user is turning it off, return an error. */ + WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); if (reconfig && ((cval.val != 0 && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) || @@ -104,20 +103,19 @@ __logmgr_config( log_max_filesize, conn->log_file_max); } - WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval)); /* * If pre-allocation is configured, set the initial number to a few. * We'll adapt as load dictates. */ - if (cval.val != 0) { - FLD_SET(conn->log_flags, WT_CONN_LOG_PREALLOC); + WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval)); + if (cval.val != 0) conn->log_prealloc = 5; - } - WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval)); + /* * Note that it is meaningless to reconfigure this value during * runtime. It only matters on create before recovery runs. */ + WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval)); if (cval.len != 0 && WT_STRING_MATCH("error", cval.str, cval.len)) FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR); @@ -858,11 +856,6 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) &conn->log_wrlsn_tid, __log_wrlsn_server, conn->log_wrlsn_session)); conn->log_wrlsn_tid_set = true; - /* If no log thread services are configured, we're done. */ - if (!FLD_ISSET(conn->log_flags, - (WT_CONN_LOG_ARCHIVE | WT_CONN_LOG_PREALLOC))) - return (0); - /* * If a log server thread exists, the user may have reconfigured * archiving or pre-allocation. Signal the thread. Otherwise the diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 3c00ee30896..d0cc60b583d 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1457,15 +1457,12 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) WT_DECL_RET; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *txn_state; - int count; - bool q_found, txn_busy; + uint64_t init_evict_count, max_pages_evicted; + bool txn_busy; conn = S2C(session); cache = conn->cache; - /* First, wake the eviction server. */ - WT_RET(__wt_evict_server_wake(session)); - /* * If the current transaction is keeping the oldest ID pinned, it is in * the middle of an operation. This may prevent the oldest ID from @@ -1479,11 +1476,15 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) session->nhazard > 0 || (txn_state->snap_min != WT_TXN_NONE && txn_global->current != txn_global->oldest_id); - if (txn_busy) { - if (pct_full < 100) - return (0); - busy = true; - } + + if (txn_busy && pct_full < 100) + return (0); + + if (busy == 1) + txn_busy = 1; + + /* Wake the eviction server if we need to do work. */ + WT_RET(__wt_evict_server_wake(session)); /* * If we're busy, either because of the transaction check we just did, @@ -1491,9 +1492,11 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) * as a page read), limit the work to a single eviction and return. If * that's not the case, we can do more. */ - count = busy ? 1 : 10; + init_evict_count = cache->pages_evict; for (;;) { + max_pages_evicted = txn_busy ? 5 : 20; + /* * A pathological case: if we're the oldest transaction in the * system and the eviction server is stuck trying to find space, @@ -1507,43 +1510,34 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) return (WT_ROLLBACK); } + /* See if eviction is still needed. */ + if (!__wt_eviction_needed(session, NULL) || + cache->pages_evict > init_evict_count + max_pages_evicted) + return (0); + /* Evict a page. */ - q_found = false; switch (ret = __evict_page(session, false)) { case 0: cache->app_evicts++; - if (--count == 0) + if (txn_busy) return (0); - - q_found = true; - break; + /* FALLTHROUGH */ case EBUSY: - continue; + break; case WT_NOTFOUND: + /* Allow the queue to re-populate before retrying. */ + WT_RET(__wt_cond_wait( + session, cache->evict_waiter_cond, 100000)); + cache->app_waits++; break; default: return (ret); } - /* See if eviction is still needed. */ - if (!__wt_eviction_needed(session, NULL)) - return (0); - - /* If we found pages in the eviction queue, continue there. */ - if (q_found) - continue; - - /* Wait for the queue to re-populate before trying again. */ - WT_RET( - __wt_cond_wait(session, cache->evict_waiter_cond, 100000)); - - cache->app_waits++; - /* Check if things have changed so that we are busy. */ - if (!busy && txn_state->snap_min != WT_TXN_NONE && - txn_global->current != txn_global->oldest_id) { - busy = true; - count = 1; - } + /* Check if we have become busy. */ + if (!txn_busy && txn_state->snap_min != WT_TXN_NONE && + txn_global->current != txn_global->oldest_id) + txn_busy = true; } /* NOTREACHED */ } diff --git a/src/include/btmem.h b/src/include/btmem.h index 0302533bb04..41b2c98f9e8 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -578,8 +578,7 @@ struct __wt_page { #define WT_PAGE_DISK_MAPPED 0x04 /* Disk image in mapped memory */ #define WT_PAGE_EVICT_LRU 0x08 /* Page is on the LRU queue */ #define WT_PAGE_OVERFLOW_KEYS 0x10 /* Page has overflow keys */ -#define WT_PAGE_RECONCILIATION 0x20 /* Page reconciliation lock */ -#define WT_PAGE_SPLIT_INSERT 0x40 /* A leaf page was split for append */ +#define WT_PAGE_SPLIT_INSERT 0x20 /* A leaf page was split for append */ uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */ /* @@ -603,6 +602,12 @@ struct __wt_page { #define WT_READGEN_STEP 100 uint64_t read_gen; + /* + * Used to protect and co-ordinate splits for internal pages and + * reconciliation for all pages. + */ + WT_FAIR_LOCK page_lock; + size_t memory_footprint; /* Memory attached to the page */ /* Page's on-disk representation: NULL for pages created in memory. */ diff --git a/src/include/connection.h b/src/include/connection.h index 0273414f42e..61ef9e2391c 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -336,13 +336,12 @@ struct __wt_connection_impl { const char *stat_stamp; /* Statistics log entry timestamp */ uint64_t stat_usecs; /* Statistics log period */ -#define WT_CONN_LOG_ARCHIVE 0x01 /* Archive is enabled */ -#define WT_CONN_LOG_ENABLED 0x02 /* Logging is enabled */ -#define WT_CONN_LOG_EXISTED 0x04 /* Log files found */ -#define WT_CONN_LOG_PREALLOC 0x08 /* Pre-allocation is enabled */ -#define WT_CONN_LOG_RECOVER_DONE 0x10 /* Recovery completed */ -#define WT_CONN_LOG_RECOVER_ERR 0x20 /* Error if recovery required */ -#define WT_CONN_LOG_ZERO_FILL 0x40 /* Manually zero files */ +#define WT_CONN_LOG_ARCHIVE 0x01 /* Archive is enabled */ +#define WT_CONN_LOG_ENABLED 0x02 /* Logging is enabled */ +#define WT_CONN_LOG_EXISTED 0x04 /* Log files found */ +#define WT_CONN_LOG_RECOVER_DONE 0x08 /* Recovery completed */ +#define WT_CONN_LOG_RECOVER_ERR 0x10 /* Error if recovery required */ +#define WT_CONN_LOG_ZERO_FILL 0x20 /* Manually zero files */ uint32_t log_flags; /* Global logging configuration */ WT_CONDVAR *log_cond; /* Log server wait mutex */ WT_SESSION_IMPL *log_session; /* Log server session */ diff --git a/src/include/hardware.h b/src/include/hardware.h index 32353072c5b..c9b72f8a609 100644 --- a/src/include/hardware.h +++ b/src/include/hardware.h @@ -50,16 +50,6 @@ &(p)->flags_atomic, __orig, __orig | (uint8_t)(mask))); \ } while (0) -#define F_CAS_ATOMIC_WAIT(p, mask) do { \ - int __ret; \ - for (;;) { \ - F_CAS_ATOMIC(p, mask, __ret); \ - if (__ret == 0) \ - break; \ - __wt_yield(); \ - } \ -} while (0) - #define F_CLR_ATOMIC(p, mask) do { \ uint8_t __orig; \ do { \ diff --git a/src/include/mutex.h b/src/include/mutex.h index 1f1bb8f4b5c..b67e5e610e8 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -52,6 +52,24 @@ struct __wt_rwlock { }; /* + * A light weight lock that can be used to replace spinlocks if fairness is + * necessary. Implements a ticket-based back off spin lock. + * The fields are available as a union to allow for atomically setting + * the state of the entire lock. + */ +struct __wt_fair_lock { + union { + uint32_t lock; + struct { + uint16_t owner; /* Ticket for current owner */ + uint16_t waiter; /* Last allocated ticket */ + } s; + } u; +#define fair_lock_owner u.s.owner +#define fair_lock_waiter u.s.waiter +}; + +/* * Spin locks: * * WiredTiger uses spinlocks for fast mutual exclusion (where operations done diff --git a/src/include/mutex.i b/src/include/mutex.i index 5ea4583a2ab..54a9cc6f9fd 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -251,3 +251,91 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) #error Unknown spinlock type #endif + +/* + * __wt_fair_trylock -- + * Try to get a lock - give up if it is not immediately available. + */ +static inline int +__wt_fair_trylock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock) +{ + WT_FAIR_LOCK new, old; + + WT_UNUSED(session); + + old = new = *lock; + + /* Exit early if there is no chance we can get the lock. */ + if (old.fair_lock_waiter != old.fair_lock_owner) + return (EBUSY); + + /* The replacement lock value is a result of allocating a new ticket. */ + ++new.fair_lock_waiter; + return (__wt_atomic_cas32( + &lock->u.lock, old.u.lock, new.u.lock) ? 0 : EBUSY); +} + +/* + * __wt_fair_lock -- + * Get a lock. + */ +static inline int +__wt_fair_lock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock) +{ + uint16_t ticket; + int pause_cnt; + + WT_UNUSED(session); + + /* + * Possibly wrap: if we have more than 64K lockers waiting, the ticket + * value will wrap and two lockers will simultaneously be granted the + * lock. + */ + ticket = __wt_atomic_fetch_add16(&lock->fair_lock_waiter, 1); + for (pause_cnt = 0; ticket != lock->fair_lock_owner;) { + /* + * We failed to get the lock; pause before retrying and if we've + * paused enough, sleep so we don't burn CPU to no purpose. This + * situation happens if there are more threads than cores in the + * system and we're thrashing on shared resources. + */ + if (++pause_cnt < 1000) + WT_PAUSE(); + else + __wt_sleep(0, 10); + } + + return (0); +} + +/* + * __wt_fair_unlock -- + * Release a shared lock. + */ +static inline int +__wt_fair_unlock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock) +{ + WT_UNUSED(session); + + /* + * We have exclusive access - the update does not need to be atomic. + */ + ++lock->fair_lock_owner; + + return (0); +} + +#ifdef HAVE_DIAGNOSTIC +/* + * __wt_fair_islocked -- + * Test whether the lock is currently held + */ +static inline bool +__wt_fair_islocked(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock) +{ + WT_UNUSED(session); + + return (lock->fair_lock_waiter != lock->fair_lock_owner); +} +#endif diff --git a/src/include/serial.i b/src/include/serial.i index 5358b874c06..ca22ce12d81 100644 --- a/src/include/serial.i +++ b/src/include/serial.i @@ -316,12 +316,11 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, } /* If we can't lock it, don't scan, that's okay. */ - F_CAS_ATOMIC(page, WT_PAGE_RECONCILIATION, ret); - if (ret != 0) + if (__wt_fair_trylock(session, &page->page_lock) != 0) return (0); obsolete = __wt_update_obsolete_check(session, page, upd->next); - F_CLR_ATOMIC(page, WT_PAGE_RECONCILIATION); + WT_RET(__wt_fair_unlock(session, &page->page_lock)); if (obsolete != NULL) __wt_update_obsolete_free(session, page, obsolete); diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index 4d46a25b63c..3f4e0ada7f1 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -164,6 +164,8 @@ struct __wt_ext; typedef struct __wt_ext WT_EXT; struct __wt_extlist; typedef struct __wt_extlist WT_EXTLIST; +struct __wt_fair_lock; + typedef struct __wt_fair_lock WT_FAIR_LOCK; struct __wt_fh; typedef struct __wt_fh WT_FH; struct __wt_hazard; diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 82264f7c58f..f2c32a434bf 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -394,7 +394,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, * In-memory splits: reconciliation of an internal page cannot handle * a child page splitting during the reconciliation. */ - F_CAS_ATOMIC_WAIT(page, WT_PAGE_RECONCILIATION); + WT_RET(__wt_fair_lock(session, &page->page_lock)); /* Reconcile the page. */ switch (page->type) { @@ -432,7 +432,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_TRET(__rec_write_wrapup_err(session, r, page)); /* Release the reconciliation lock. */ - F_CLR_ATOMIC(page, WT_PAGE_RECONCILIATION); + WT_TRET(__wt_fair_unlock(session, &page->page_lock)); /* Update statistics. */ WT_STAT_FAST_CONN_INCR(session, rec_pages); |