diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2015-12-01 20:10:57 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2015-12-01 20:11:18 +1100 |
commit | 86e7b69a6c52c926d28a60d816faefa6db81eb96 (patch) | |
tree | bd3de846771991c1735f6a22a24d654269587a1e /src/third_party/wiredtiger | |
parent | f45b7c8e34743ba89407d90ee3392acb0d2be255 (diff) | |
download | mongo-86e7b69a6c52c926d28a60d816faefa6db81eb96.tar.gz |
Import wiredtiger-wiredtiger-mongodb-3.2-rc4-56-g7a4f325.tar.gz from wiredtiger branch mongodb-3.2
ref: 8326df6..7a4f325
e731ef8 WT-2251 Free addresses when discarding deleted page references.
0e93d60 SERVER-21691 Avoid insert stalls
264ec21 WT-2249 Keep eviction stuck until cache usage is under 100%.
dca1411 WT-2250 Minor fix: use SET instead of INCRV for stat.
Diffstat (limited to 'src/third_party/wiredtiger')
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_discard.c | 6 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_slvg.c | 8 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_split.c | 31 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/evict/evict_lru.c | 42 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/evict/evict_page.c | 12 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/btree.i | 35 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/log/log_slot.c | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_write.c | 14 |
8 files changed, 80 insertions, 70 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c index 7cd97831044..54d9761c487 100644 --- a/src/third_party/wiredtiger/src/btree/bt_discard.c +++ b/src/third_party/wiredtiger/src/btree/bt_discard.c @@ -269,11 +269,7 @@ __wt_free_ref( * Free any address allocation; if there's no linked WT_REF page, it * must be allocated. */ - if (ref->addr != NULL && - (ref->home == NULL || __wt_off_page(ref->home, ref->addr))) { - __wt_free(session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - } + __wt_ref_free_addr(session, ref); /* Free any page-deleted information. */ if (ref->page_del != NULL) { diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c index 80e467b5707..e4a860bb421 100644 --- a/src/third_party/wiredtiger/src/btree/bt_slvg.c +++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c @@ -1290,9 +1290,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) * would have been lost.) Clear the reference addr so eviction doesn't * free the underlying blocks. */ - __wt_free(session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - ref->addr = NULL; + __wt_ref_free_addr(session, ref); /* Write the new version of the leaf page to disk. */ WT_ERR(__slvg_modify_init(session, page)); @@ -2013,9 +2011,7 @@ __slvg_row_build_leaf( * would have been lost.) Clear the reference addr so eviction doesn't * free the underlying blocks. */ - __wt_free(session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - ref->addr = NULL; + __wt_ref_free_addr(session, ref); /* Write the new version of the leaf page to disk. */ WT_ERR(__slvg_modify_init(session, page)); diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index a0dfbf32cad..e2031553aed 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -895,6 +895,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, */ WT_ASSERT(session, next_ref->page_del == NULL); + __wt_ref_free_addr(session, next_ref); WT_TRET(__split_safe_free( session, split_gen, exclusive, next_ref, sizeof(WT_REF))); parent_decr += sizeof(WT_REF); @@ -1182,8 +1183,8 @@ err: /* * Lock an internal page. */ static int -__split_internal_lock( - WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE **parentp, bool *hazardp) +__split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock, + WT_PAGE **parentp, bool *hazardp) { WT_DECL_RET; WT_PAGE *parent; @@ -1201,7 +1202,7 @@ __split_internal_lock( * loop until the exclusive lock is resolved). If we want to split * the parent, give up to avoid that deadlock. */ - if (S2BT(session)->checkpointing != WT_CKPT_OFF) + if (!trylock && S2BT(session)->checkpointing != WT_CKPT_OFF) return (EBUSY); /* @@ -1226,7 +1227,10 @@ __split_internal_lock( if (F_ISSET_ATOMIC(parent, WT_PAGE_SPLIT_BLOCK)) return (EBUSY); - WT_RET(__wt_fair_lock(session, &parent->page_lock)); + if (trylock) + WT_RET(__wt_fair_trylock(session, &parent->page_lock)); + else + WT_RET(__wt_fair_lock(session, &parent->page_lock)); if (parent == ref->home) break; WT_RET(__wt_fair_unlock(session, &parent->page_lock)); @@ -1370,7 +1374,7 @@ __split_parent_climb(WT_SESSION_IMPL *session, WT_PAGE *page, bool page_hazard) * locks, lock-coupling up the tree. */ WT_ERR(__split_internal_lock( - session, ref, &parent, &parent_hazard)); + session, ref, true, &parent, &parent_hazard)); ret = __split_internal(session, parent, page); WT_TRET(__split_internal_unlock(session, page, page_hazard)); @@ -1668,6 +1672,12 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) child->addr = ref->addr; /* + * The address has moved to the replacement WT_REF. Make sure it isn't + * freed when the original ref is discarded. + */ + ref->addr = NULL; + + /* * Copy the first key from the original page into first ref in the new * parent. Pages created in memory always have a "smallest" insert * list, so look there first. If we don't find one, get the first key @@ -1856,6 +1866,11 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) return (0); err: if (split_ref[0] != NULL) { + /* + * The address was moved to the replacement WT_REF, restore it. + */ + ref->addr = split_ref[0]->addr; + __wt_free(session, split_ref[0]->key.ikey); __wt_free(session, split_ref[0]); } @@ -1883,7 +1898,7 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref) WT_RET(__wt_verbose( session, WT_VERB_SPLIT, "%p: split-insert", ref->page)); - WT_RET(__split_internal_lock(session, ref, &parent, &hazard)); + WT_RET(__split_internal_lock(session, ref, true, &parent, &hazard)); if ((ret = __split_insert(session, ref)) != 0) { WT_TRET(__split_internal_unlock(session, parent, hazard)); return (ret); @@ -1975,7 +1990,7 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing) WT_RET(__wt_verbose( session, WT_VERB_SPLIT, "%p: split-multi", ref->page)); - WT_RET(__split_internal_lock(session, ref, &parent, &hazard)); + WT_RET(__split_internal_lock(session, ref, false, &parent, &hazard)); if ((ret = __split_multi(session, ref, closing)) != 0 || closing) { WT_TRET(__split_internal_unlock(session, parent, hazard)); return (ret); @@ -2004,7 +2019,7 @@ __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref) WT_RET(__wt_verbose( session, WT_VERB_SPLIT, "%p: reverse-split", ref->page)); - WT_RET(__split_internal_lock(session, ref, &parent, &hazard)); + WT_RET(__split_internal_lock(session, ref, false, &parent, &hazard)); ret = __split_parent(session, ref, NULL, 0, 0, false, true); WT_TRET(__split_internal_unlock(session, parent, hazard)); return (ret); diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index 306362de57f..f2784890ab7 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -159,7 +159,8 @@ __evict_server(void *arg) WT_DECL_RET; WT_SESSION_IMPL *session; #ifdef HAVE_DIAGNOSTIC - struct timespec now, stuck_ts = { 0, 0 }; + struct timespec now, stuck_ts; + uint64_t pages_evicted = 0; #endif u_int spins; @@ -204,10 +205,11 @@ __evict_server(void *arg) /* Next time we wake up, reverse the sweep direction. */ cache->flags ^= WT_CACHE_WALK_REVERSE; #ifdef HAVE_DIAGNOSTIC - stuck_ts.tv_sec = 0; - } else if (stuck_ts.tv_sec == 0) + pages_evicted = 0; + } else if (pages_evicted != cache->pages_evict) { WT_ERR(__wt_epoch(session, &stuck_ts)); - else { + pages_evicted = cache->pages_evict; + } else { /* After being stuck for 5 minutes, give up. */ WT_ERR(__wt_epoch(session, &now)); if (WT_TIMEDIFF_SEC(now, stuck_ts) > 300) { @@ -481,6 +483,13 @@ __evict_update_work(WT_SESSION_IMPL *session) goto done; } + /* + * If the cache has been stuck and is now under control, clear the + * stuck flag. + */ + if (bytes_inuse < bytes_max) + F_CLR(cache, WT_CACHE_STUCK); + dirty_inuse = __wt_cache_dirty_inuse(cache); if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100) { FLD_SET(cache->state, WT_EVICT_PASS_DIRTY); @@ -498,6 +507,7 @@ __evict_update_work(WT_SESSION_IMPL *session) F_CLR(cache, WT_CACHE_WOULD_BLOCK); goto done; } + return (false); done: if (F_ISSET(cache, WT_CACHE_STUCK)) @@ -1284,19 +1294,6 @@ fast: /* If the page can't be evicted, give up. */ continue; } - /* - * If the oldest transaction hasn't changed since the last time - * this page was written, it's unlikely we can make progress. - * Similarly, if the most recent update on the page is not yet - * globally visible, eviction will fail. These heuristics - * attempt to avoid repeated attempts to evict the same page. - */ - if (modified && !would_split && - !FLD_ISSET(cache->state, WT_CACHE_STUCK) && - (mod->last_oldest_id == __wt_txn_oldest_id(session) || - !__wt_txn_visible_all(session, mod->update_txn))) - continue; - WT_ASSERT(session, evict->ref == NULL); __evict_init_candidate(session, evict, ref); ++evict; @@ -1419,7 +1416,6 @@ static int __evict_page(WT_SESSION_IMPL *session, bool is_server) { WT_BTREE *btree; - WT_CACHE *cache; WT_DECL_RET; WT_PAGE *page; WT_REF *ref; @@ -1458,12 +1454,6 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) (void)__wt_atomic_subv32(&btree->evict_busy, 1); - WT_RET(ret); - - cache = S2C(session)->cache; - if (F_ISSET(cache, WT_CACHE_STUCK)) - F_CLR(cache, WT_CACHE_STUCK); - return (ret); } @@ -1607,8 +1597,8 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) next_walk = NULL; session->dhandle = dhandle; - while (__wt_tree_walk(session, - &next_walk, NULL, WT_READ_CACHE | WT_READ_NO_WAIT) == 0 && + while (__wt_tree_walk(session, &next_walk, NULL, + WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 && next_walk != NULL) { page = next_walk->page; size = page->memory_footprint; diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index 26ea9117fae..9281e7cdb2d 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -241,19 +241,14 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) { WT_ADDR *addr; WT_DECL_RET; - WT_PAGE *parent; WT_PAGE_MODIFY *mod; - parent = ref->home; mod = ref->page->modify; switch (mod->rec_result) { case WT_PM_REC_EMPTY: /* Page is empty */ /* Discard the parent's address. */ - if (ref->addr != NULL && __wt_off_page(parent, ref->addr)) { - __wt_free(session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - } + __wt_ref_free_addr(session, ref); /* * Update the parent to reference a deleted page. The fact that @@ -308,10 +303,7 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) return (EBUSY); /* Discard the parent's address. */ - if (ref->addr != NULL && __wt_off_page(parent, ref->addr)) { - __wt_free(session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - } + __wt_ref_free_addr(session, ref); /* * Update the parent to reference the replacement page. diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 3f8dc08a1da..02633c3206a 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -466,6 +466,22 @@ __wt_off_page(WT_PAGE *page, const void *p) } /* + * __wt_ref_free_addr -- + * Free the address in a reference, if necessary. + */ +static inline void +__wt_ref_free_addr(WT_SESSION_IMPL *session, WT_REF *ref) +{ + if (ref->addr != NULL) { + if (ref->home == NULL || __wt_off_page(ref->home, ref->addr)) { + __wt_free(session, ((WT_ADDR *)ref->addr)->addr); + __wt_free(session, ref->addr); + } else + ref->addr = NULL; + } +} + +/* * __wt_ref_key -- * Return a reference to a row-store internal page key as cheaply as * possible. @@ -1046,6 +1062,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) WT_BTREE *btree; WT_PAGE *page; WT_PAGE_MODIFY *mod; + bool modified; if (inmem_splitp != NULL) *inmem_splitp = false; @@ -1070,14 +1087,15 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) return (true); } + modified = __wt_page_is_modified(page); + /* * If the file is being checkpointed, we can't evict dirty pages: * if we write a page and free the previous version of the page, that * previous version might be referenced by an internal page already * been written in the checkpoint, leaving the checkpoint inconsistent. */ - if (btree->checkpointing != WT_CKPT_OFF && - __wt_page_is_modified(page)) { + if (btree->checkpointing != WT_CKPT_OFF && modified) { WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint); WT_STAT_FAST_DATA_INCR(session, cache_eviction_checkpoint); return (false); @@ -1104,6 +1122,19 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_BLOCK)) return (false); + /* + * If the oldest transaction hasn't changed since the last time + * this page was written, it's unlikely we can make progress. + * Similarly, if the most recent update on the page is not yet + * globally visible, eviction will fail. These heuristics + * attempt to avoid repeated attempts to evict the same page. + */ + if (modified && + !F_ISSET(S2C(session)->cache, WT_CACHE_STUCK) && + (mod->last_oldest_id == __wt_txn_oldest_id(session) || + !__wt_txn_visible_all(session, mod->update_txn))) + return (false); + return (true); } diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c index 255551f99a4..8155397d823 100644 --- a/src/third_party/wiredtiger/src/log/log_slot.c +++ b/src/third_party/wiredtiger/src/log/log_slot.c @@ -293,7 +293,7 @@ __wt_log_slot_init(WT_SESSION_IMPL *session) &log->slot_pool[i].slot_buf, log->slot_buf_size)); F_SET(&log->slot_pool[i], WT_SLOT_INIT_FLAGS); } - WT_STAT_FAST_CONN_INCRV(session, + WT_STAT_FAST_CONN_SET(session, log_buffer_size, log->slot_buf_size * WT_SLOT_POOL); /* * Set up the available slot from the pool the first time. diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 8bf81eafac2..0e1e7498568 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -1395,12 +1395,7 @@ __rec_child_deleted(WT_SESSION_IMPL *session, __wt_txn_visible_all(session, page_del->txnid))) { WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL)); WT_RET(__rec_block_free(session, addr, addr_size)); - - if (__wt_off_page(ref->home, ref->addr)) { - __wt_free(session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - } - ref->addr = NULL; + __wt_ref_free_addr(session, ref); } /* @@ -5434,12 +5429,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_RET(__wt_ref_info( session, ref, &addr, &addr_size, NULL)); WT_RET(__rec_block_free(session, addr, addr_size)); - if (__wt_off_page(ref->home, ref->addr)) { - __wt_free( - session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - } - ref->addr = NULL; + __wt_ref_free_addr(session, ref); } break; case WT_PM_REC_EMPTY: /* Page deleted */ |