summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2015-12-01 20:10:57 +1100
committerMichael Cahill <michael.cahill@mongodb.com>2015-12-01 20:11:18 +1100
commit86e7b69a6c52c926d28a60d816faefa6db81eb96 (patch)
treebd3de846771991c1735f6a22a24d654269587a1e
parentf45b7c8e34743ba89407d90ee3392acb0d2be255 (diff)
downloadmongo-86e7b69a6c52c926d28a60d816faefa6db81eb96.tar.gz
Import wiredtiger-wiredtiger-mongodb-3.2-rc4-56-g7a4f325.tar.gz from wiredtiger branch mongodb-3.2
ref: 8326df6..7a4f325 e731ef8 WT-2251 Free addresses when discarding deleted page references. 0e93d60 SERVER-21691 Avoid insert stalls 264ec21 WT-2249 Keep eviction stuck until cache usage is under 100%. dca1411 WT-2250 Minor fix: use SET instead of INCRV for stat.
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_discard.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c31
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c42
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c12
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i35
-rw-r--r--src/third_party/wiredtiger/src/log/log_slot.c2
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c14
8 files changed, 80 insertions, 70 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c
index 7cd97831044..54d9761c487 100644
--- a/src/third_party/wiredtiger/src/btree/bt_discard.c
+++ b/src/third_party/wiredtiger/src/btree/bt_discard.c
@@ -269,11 +269,7 @@ __wt_free_ref(
* Free any address allocation; if there's no linked WT_REF page, it
* must be allocated.
*/
- if (ref->addr != NULL &&
- (ref->home == NULL || __wt_off_page(ref->home, ref->addr))) {
- __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- }
+ __wt_ref_free_addr(session, ref);
/* Free any page-deleted information. */
if (ref->page_del != NULL) {
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index 80e467b5707..e4a860bb421 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -1290,9 +1290,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
* would have been lost.) Clear the reference addr so eviction doesn't
* free the underlying blocks.
*/
- __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- ref->addr = NULL;
+ __wt_ref_free_addr(session, ref);
/* Write the new version of the leaf page to disk. */
WT_ERR(__slvg_modify_init(session, page));
@@ -2013,9 +2011,7 @@ __slvg_row_build_leaf(
* would have been lost.) Clear the reference addr so eviction doesn't
* free the underlying blocks.
*/
- __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- ref->addr = NULL;
+ __wt_ref_free_addr(session, ref);
/* Write the new version of the leaf page to disk. */
WT_ERR(__slvg_modify_init(session, page));
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index a0dfbf32cad..e2031553aed 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -895,6 +895,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
*/
WT_ASSERT(session, next_ref->page_del == NULL);
+ __wt_ref_free_addr(session, next_ref);
WT_TRET(__split_safe_free(
session, split_gen, exclusive, next_ref, sizeof(WT_REF)));
parent_decr += sizeof(WT_REF);
@@ -1182,8 +1183,8 @@ err: /*
* Lock an internal page.
*/
static int
-__split_internal_lock(
- WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE **parentp, bool *hazardp)
+__split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock,
+ WT_PAGE **parentp, bool *hazardp)
{
WT_DECL_RET;
WT_PAGE *parent;
@@ -1201,7 +1202,7 @@ __split_internal_lock(
* loop until the exclusive lock is resolved). If we want to split
* the parent, give up to avoid that deadlock.
*/
- if (S2BT(session)->checkpointing != WT_CKPT_OFF)
+ if (!trylock && S2BT(session)->checkpointing != WT_CKPT_OFF)
return (EBUSY);
/*
@@ -1226,7 +1227,10 @@ __split_internal_lock(
if (F_ISSET_ATOMIC(parent, WT_PAGE_SPLIT_BLOCK))
return (EBUSY);
- WT_RET(__wt_fair_lock(session, &parent->page_lock));
+ if (trylock)
+ WT_RET(__wt_fair_trylock(session, &parent->page_lock));
+ else
+ WT_RET(__wt_fair_lock(session, &parent->page_lock));
if (parent == ref->home)
break;
WT_RET(__wt_fair_unlock(session, &parent->page_lock));
@@ -1370,7 +1374,7 @@ __split_parent_climb(WT_SESSION_IMPL *session, WT_PAGE *page, bool page_hazard)
* locks, lock-coupling up the tree.
*/
WT_ERR(__split_internal_lock(
- session, ref, &parent, &parent_hazard));
+ session, ref, true, &parent, &parent_hazard));
ret = __split_internal(session, parent, page);
WT_TRET(__split_internal_unlock(session, page, page_hazard));
@@ -1668,6 +1672,12 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
child->addr = ref->addr;
/*
+ * The address has moved to the replacement WT_REF. Make sure it isn't
+ * freed when the original ref is discarded.
+ */
+ ref->addr = NULL;
+
+ /*
* Copy the first key from the original page into first ref in the new
* parent. Pages created in memory always have a "smallest" insert
* list, so look there first. If we don't find one, get the first key
@@ -1856,6 +1866,11 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
return (0);
err: if (split_ref[0] != NULL) {
+ /*
+ * The address was moved to the replacement WT_REF, restore it.
+ */
+ ref->addr = split_ref[0]->addr;
+
__wt_free(session, split_ref[0]->key.ikey);
__wt_free(session, split_ref[0]);
}
@@ -1883,7 +1898,7 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
WT_RET(__wt_verbose(
session, WT_VERB_SPLIT, "%p: split-insert", ref->page));
- WT_RET(__split_internal_lock(session, ref, &parent, &hazard));
+ WT_RET(__split_internal_lock(session, ref, true, &parent, &hazard));
if ((ret = __split_insert(session, ref)) != 0) {
WT_TRET(__split_internal_unlock(session, parent, hazard));
return (ret);
@@ -1975,7 +1990,7 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing)
WT_RET(__wt_verbose(
session, WT_VERB_SPLIT, "%p: split-multi", ref->page));
- WT_RET(__split_internal_lock(session, ref, &parent, &hazard));
+ WT_RET(__split_internal_lock(session, ref, false, &parent, &hazard));
if ((ret = __split_multi(session, ref, closing)) != 0 || closing) {
WT_TRET(__split_internal_unlock(session, parent, hazard));
return (ret);
@@ -2004,7 +2019,7 @@ __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref)
WT_RET(__wt_verbose(
session, WT_VERB_SPLIT, "%p: reverse-split", ref->page));
- WT_RET(__split_internal_lock(session, ref, &parent, &hazard));
+ WT_RET(__split_internal_lock(session, ref, false, &parent, &hazard));
ret = __split_parent(session, ref, NULL, 0, 0, false, true);
WT_TRET(__split_internal_unlock(session, parent, hazard));
return (ret);
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 306362de57f..f2784890ab7 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -159,7 +159,8 @@ __evict_server(void *arg)
WT_DECL_RET;
WT_SESSION_IMPL *session;
#ifdef HAVE_DIAGNOSTIC
- struct timespec now, stuck_ts = { 0, 0 };
+ struct timespec now, stuck_ts;
+ uint64_t pages_evicted = 0;
#endif
u_int spins;
@@ -204,10 +205,11 @@ __evict_server(void *arg)
/* Next time we wake up, reverse the sweep direction. */
cache->flags ^= WT_CACHE_WALK_REVERSE;
#ifdef HAVE_DIAGNOSTIC
- stuck_ts.tv_sec = 0;
- } else if (stuck_ts.tv_sec == 0)
+ pages_evicted = 0;
+ } else if (pages_evicted != cache->pages_evict) {
WT_ERR(__wt_epoch(session, &stuck_ts));
- else {
+ pages_evicted = cache->pages_evict;
+ } else {
/* After being stuck for 5 minutes, give up. */
WT_ERR(__wt_epoch(session, &now));
if (WT_TIMEDIFF_SEC(now, stuck_ts) > 300) {
@@ -481,6 +483,13 @@ __evict_update_work(WT_SESSION_IMPL *session)
goto done;
}
+ /*
+ * If the cache has been stuck and is now under control, clear the
+ * stuck flag.
+ */
+ if (bytes_inuse < bytes_max)
+ F_CLR(cache, WT_CACHE_STUCK);
+
dirty_inuse = __wt_cache_dirty_inuse(cache);
if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100) {
FLD_SET(cache->state, WT_EVICT_PASS_DIRTY);
@@ -498,6 +507,7 @@ __evict_update_work(WT_SESSION_IMPL *session)
F_CLR(cache, WT_CACHE_WOULD_BLOCK);
goto done;
}
+
return (false);
done: if (F_ISSET(cache, WT_CACHE_STUCK))
@@ -1284,19 +1294,6 @@ fast: /* If the page can't be evicted, give up. */
continue;
}
- /*
- * If the oldest transaction hasn't changed since the last time
- * this page was written, it's unlikely we can make progress.
- * Similarly, if the most recent update on the page is not yet
- * globally visible, eviction will fail. These heuristics
- * attempt to avoid repeated attempts to evict the same page.
- */
- if (modified && !would_split &&
- !FLD_ISSET(cache->state, WT_CACHE_STUCK) &&
- (mod->last_oldest_id == __wt_txn_oldest_id(session) ||
- !__wt_txn_visible_all(session, mod->update_txn)))
- continue;
-
WT_ASSERT(session, evict->ref == NULL);
__evict_init_candidate(session, evict, ref);
++evict;
@@ -1419,7 +1416,6 @@ static int
__evict_page(WT_SESSION_IMPL *session, bool is_server)
{
WT_BTREE *btree;
- WT_CACHE *cache;
WT_DECL_RET;
WT_PAGE *page;
WT_REF *ref;
@@ -1458,12 +1454,6 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
(void)__wt_atomic_subv32(&btree->evict_busy, 1);
- WT_RET(ret);
-
- cache = S2C(session)->cache;
- if (F_ISSET(cache, WT_CACHE_STUCK))
- F_CLR(cache, WT_CACHE_STUCK);
-
return (ret);
}
@@ -1607,8 +1597,8 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
next_walk = NULL;
session->dhandle = dhandle;
- while (__wt_tree_walk(session,
- &next_walk, NULL, WT_READ_CACHE | WT_READ_NO_WAIT) == 0 &&
+ while (__wt_tree_walk(session, &next_walk, NULL,
+ WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 &&
next_walk != NULL) {
page = next_walk->page;
size = page->memory_footprint;
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 26ea9117fae..9281e7cdb2d 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -241,19 +241,14 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
{
WT_ADDR *addr;
WT_DECL_RET;
- WT_PAGE *parent;
WT_PAGE_MODIFY *mod;
- parent = ref->home;
mod = ref->page->modify;
switch (mod->rec_result) {
case WT_PM_REC_EMPTY: /* Page is empty */
/* Discard the parent's address. */
- if (ref->addr != NULL && __wt_off_page(parent, ref->addr)) {
- __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- }
+ __wt_ref_free_addr(session, ref);
/*
* Update the parent to reference a deleted page. The fact that
@@ -308,10 +303,7 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
return (EBUSY);
/* Discard the parent's address. */
- if (ref->addr != NULL && __wt_off_page(parent, ref->addr)) {
- __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- }
+ __wt_ref_free_addr(session, ref);
/*
* Update the parent to reference the replacement page.
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 3f8dc08a1da..02633c3206a 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -466,6 +466,22 @@ __wt_off_page(WT_PAGE *page, const void *p)
}
/*
+ * __wt_ref_free_addr --
+ * Free the address in a reference, if necessary.
+ */
+static inline void
+__wt_ref_free_addr(WT_SESSION_IMPL *session, WT_REF *ref)
+{
+ if (ref->addr != NULL) {
+ if (ref->home == NULL || __wt_off_page(ref->home, ref->addr)) {
+ __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
+ __wt_free(session, ref->addr);
+ } else
+ ref->addr = NULL;
+ }
+}
+
+/*
* __wt_ref_key --
* Return a reference to a row-store internal page key as cheaply as
* possible.
@@ -1046,6 +1062,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
WT_BTREE *btree;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
+ bool modified;
if (inmem_splitp != NULL)
*inmem_splitp = false;
@@ -1070,14 +1087,15 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
return (true);
}
+ modified = __wt_page_is_modified(page);
+
/*
* If the file is being checkpointed, we can't evict dirty pages:
* if we write a page and free the previous version of the page, that
* previous version might be referenced by an internal page already
* been written in the checkpoint, leaving the checkpoint inconsistent.
*/
- if (btree->checkpointing != WT_CKPT_OFF &&
- __wt_page_is_modified(page)) {
+ if (btree->checkpointing != WT_CKPT_OFF && modified) {
WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint);
WT_STAT_FAST_DATA_INCR(session, cache_eviction_checkpoint);
return (false);
@@ -1104,6 +1122,19 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_BLOCK))
return (false);
+ /*
+ * If the oldest transaction hasn't changed since the last time
+ * this page was written, it's unlikely we can make progress.
+ * Similarly, if the most recent update on the page is not yet
+ * globally visible, eviction will fail. These heuristics
+ * attempt to avoid repeated attempts to evict the same page.
+ */
+ if (modified &&
+ !F_ISSET(S2C(session)->cache, WT_CACHE_STUCK) &&
+ (mod->last_oldest_id == __wt_txn_oldest_id(session) ||
+ !__wt_txn_visible_all(session, mod->update_txn)))
+ return (false);
+
return (true);
}
diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c
index 255551f99a4..8155397d823 100644
--- a/src/third_party/wiredtiger/src/log/log_slot.c
+++ b/src/third_party/wiredtiger/src/log/log_slot.c
@@ -293,7 +293,7 @@ __wt_log_slot_init(WT_SESSION_IMPL *session)
&log->slot_pool[i].slot_buf, log->slot_buf_size));
F_SET(&log->slot_pool[i], WT_SLOT_INIT_FLAGS);
}
- WT_STAT_FAST_CONN_INCRV(session,
+ WT_STAT_FAST_CONN_SET(session,
log_buffer_size, log->slot_buf_size * WT_SLOT_POOL);
/*
* Set up the available slot from the pool the first time.
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 8bf81eafac2..0e1e7498568 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -1395,12 +1395,7 @@ __rec_child_deleted(WT_SESSION_IMPL *session,
__wt_txn_visible_all(session, page_del->txnid))) {
WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL));
WT_RET(__rec_block_free(session, addr, addr_size));
-
- if (__wt_off_page(ref->home, ref->addr)) {
- __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- }
- ref->addr = NULL;
+ __wt_ref_free_addr(session, ref);
}
/*
@@ -5434,12 +5429,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_RET(__wt_ref_info(
session, ref, &addr, &addr_size, NULL));
WT_RET(__rec_block_free(session, addr, addr_size));
- if (__wt_off_page(ref->home, ref->addr)) {
- __wt_free(
- session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- }
- ref->addr = NULL;
+ __wt_ref_free_addr(session, ref);
}
break;
case WT_PM_REC_EMPTY: /* Page deleted */