summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2017-11-28 12:53:30 +1100
committerAlex Gorrod <alexander.gorrod@mongodb.com>2017-11-28 12:53:30 +1100
commit42139b18dd8a105d0ccc3882786ae92bcb9a4c18 (patch)
tree979f8fc8db80fd46794e3c4107a00ef54f7409e5
parent6b3dee7f0808a7877129c804b95d1e986e4e5fa6 (diff)
downloadmongo-42139b18dd8a105d0ccc3882786ae92bcb9a4c18.tar.gz
WT-3763 Tune eviction for various MongoDB workloads. (#3804)
In particular, balance primary inserts, overflowing the cache to use the lookaside table, secondary inserts and secondary reads of the oplog (assuming the oplog is at least partially stored in the lookaside table).
-rw-r--r--src/btree/bt_read.c3
-rw-r--r--src/btree/bt_split.c5
-rw-r--r--src/cache/cache_las.c4
-rw-r--r--src/evict/evict_lru.c20
-rw-r--r--src/evict/evict_page.c10
-rw-r--r--src/include/btmem.h1
-rw-r--r--src/include/btree.i41
-rw-r--r--src/reconcile/rec_write.c20
8 files changed, 63 insertions, 41 deletions
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c
index 7ce1522daa3..dd39610a3e2 100644
--- a/src/btree/bt_read.c
+++ b/src/btree/bt_read.c
@@ -297,8 +297,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
* skipping the page indefinitely or large records can lead to
* extremely large memory footprints.
*/
- if (page->modify->update_restored &&
- !__wt_page_evict_retry(session, page))
+ if (!__wt_page_evict_retry(session, page))
return (false);
/* Trigger eviction on the next page release. */
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index a9643ed92a0..bf7ea54adb0 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -1493,9 +1493,10 @@ __split_multi_inmem(
page->modify->first_dirty_txn = WT_TXN_FIRST;
/*
- * If the new page is modified, save the oldest ID from reconciliation
- * to avoid repeatedly attempting eviction on the same page.
+ * If the new page is modified, save the eviction generation to avoid
+ * repeatedly attempting eviction on the same page.
*/
+ page->modify->last_evict_pass_gen = orig->modify->last_evict_pass_gen;
page->modify->last_eviction_id = orig->modify->last_eviction_id;
__wt_timestamp_set(&page->modify->last_eviction_timestamp,
&orig->modify->last_eviction_timestamp);
diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c
index 9f8aeb7cc9e..f7b62b5f809 100644
--- a/src/cache/cache_las.c
+++ b/src/cache/cache_las.c
@@ -64,7 +64,7 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
dstats = ((WT_CURSOR_BTREE *)
cache->las_session[0]->las_cursor)->btree->dhandle->stats;
- v = WT_STAT_READ(dstats, cursor_insert);
+ v = WT_STAT_READ(dstats, cursor_update);
WT_STAT_SET(session, cstats, cache_lookaside_insert, v);
v = WT_STAT_READ(dstats, cursor_remove);
WT_STAT_SET(session, cstats, cache_lookaside_remove, v);
@@ -433,6 +433,7 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
/* Wrap all the updates in a transaction. */
las_session = (WT_SESSION_IMPL *)cursor->session;
WT_RET(__wt_txn_begin(las_session, NULL));
+ las_session->txn.isolation = WT_TXN_ISO_READ_UNCOMMITTED;
/*
* Make sure there are no leftover entries (e.g., from a handle
@@ -638,6 +639,7 @@ __wt_las_remove_block(WT_SESSION_IMPL *session,
*/
if (local_cursor) {
WT_ERR(__wt_txn_begin(las_session, NULL));
+ las_session->txn.isolation = WT_TXN_ISO_READ_UNCOMMITTED;
local_txn = true;
}
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index b1e42fcf489..fe389b65e4d 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -1864,6 +1864,10 @@ __evict_walk_file(WT_SESSION_IMPL *session,
if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU))
continue;
+ /* Don't queue dirty pages in trees during checkpoints. */
+ if (modified && btree->checkpointing != WT_CKPT_OFF)
+ continue;
+
/*
* It's possible (but unlikely) to visit a page without a read
* generation, if we race with the read instantiating the page.
@@ -1944,14 +1948,13 @@ __evict_walk_file(WT_SESSION_IMPL *session,
goto fast;
/*
- * If there are active transaction and oldest transaction
- * hasn't changed since the last time this page was written,
- * it's unlikely we can make progress. Similarly, if the most
- * recent update on the page is not yet globally visible,
- * eviction will fail. This heuristic avoids repeated attempts
- * to evict the same page.
+ * If the global transaction state hasn't changed since the
+ * last time we tried eviction, it's unlikely we can make
+ * progress. Similarly, if the most recent update on the page
+ * is not yet globally visible, eviction will fail. This
+ * heuristic avoids repeated attempts to evict the same page.
*/
- if (modified && (!__wt_page_evict_retry(session, page) ||
+ if (!__wt_page_evict_retry(session, page) || (modified &&
!__txn_visible_all_id(session, page->modify->update_txn)))
continue;
@@ -2050,9 +2053,10 @@ __evict_get_ref(
cache = S2C(session)->cache;
is_app = !F_ISSET(session, WT_SESSION_INTERNAL);
server_only = is_server && !WT_EVICT_HAS_WORKERS(session);
+ /* Application threads do eviction when cache is full of dirty data */
urgent_ok = (!is_app && !is_server) ||
!WT_EVICT_HAS_WORKERS(session) ||
- (is_app && __wt_cache_aggressive(session));
+ (is_app && F_ISSET(cache, WT_CACHE_EVICT_DIRTY_HARD));
urgent_queue = cache->evict_urgent_queue;
WT_STAT_CONN_INCR(session, cache_eviction_get_ref);
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index cf56b8cfe7a..7a84f90eb81 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -567,13 +567,13 @@ __evict_review(
if (F_ISSET(conn, WT_CONN_IN_MEMORY))
LF_SET(WT_REC_IN_MEMORY |
WT_REC_SCRUB | WT_REC_UPDATE_RESTORE);
+ else if (WT_SESSION_IS_CHECKPOINT(session))
+ LF_SET(WT_REC_LOOKASIDE);
else if (!WT_IS_METADATA(session->dhandle)) {
- if (!WT_SESSION_IS_CHECKPOINT(session)) {
- LF_SET(WT_REC_UPDATE_RESTORE);
+ LF_SET(WT_REC_UPDATE_RESTORE);
- if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB))
- LF_SET(WT_REC_SCRUB);
- }
+ if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB))
+ LF_SET(WT_REC_SCRUB);
/*
* If the cache is under pressure with many updates
diff --git a/src/include/btmem.h b/src/include/btmem.h
index d45b68d1972..54a0f7c3487 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -215,6 +215,7 @@ struct __wt_page_modify {
uint64_t first_dirty_txn;
/* The transaction state last time eviction was attempted. */
+ uint64_t last_evict_pass_gen;
uint64_t last_eviction_id;
WT_DECL_TIMESTAMP(last_eviction_timestamp)
diff --git a/src/include/btree.i b/src/include/btree.i
index 9941fb038ce..560cc8eb212 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -1279,8 +1279,7 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
/*
* __wt_page_evict_retry --
- * Check if there has been transaction progress since the last eviction
- * attempt.
+ * Avoid busy-spinning attempting to evict the same page all the time.
*/
static inline bool
__wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
@@ -1290,29 +1289,43 @@ __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
txn_global = &S2C(session)->txn_global;
- if ((mod = page->modify) == NULL)
+ /*
+ * If the page hasn't been through one round of update/restore, give it
+ * a try.
+ */
+ if ((mod = page->modify) == NULL || !mod->update_restored)
return (true);
- if (txn_global->current != txn_global->oldest_id &&
- mod->last_eviction_id == __wt_txn_oldest_id(session))
- return (false);
+ /*
+ * Retry if a reasonable amount of eviction time has passed, the
+ * choice of 5 eviction passes as a reasonable amount of time is
+ * currently pretty arbitrary.
+ */
+ if (__wt_cache_aggressive(session) ||
+ mod->last_evict_pass_gen + 5 < S2C(session)->cache->evict_pass_gen)
+ return (true);
+
+ /* Retry if the global transaction state has moved forward. */
+ if (txn_global->current == txn_global->oldest_id ||
+ mod->last_eviction_id != __wt_txn_oldest_id(session))
+ return (true);
#ifdef HAVE_TIMESTAMPS
{
bool same_timestamp;
- if (__wt_timestamp_iszero(&mod->last_eviction_timestamp))
+ same_timestamp = false;
+ if (!__wt_timestamp_iszero(&mod->last_eviction_timestamp))
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ same_timestamp = __wt_timestamp_cmp(
+ &mod->last_eviction_timestamp,
+ &txn_global->pinned_timestamp) == 0);
+ if (!same_timestamp)
return (true);
-
- WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
- same_timestamp = __wt_timestamp_cmp(
- &mod->last_eviction_timestamp, &txn_global->pinned_timestamp) == 0);
- if (same_timestamp)
- return (false);
}
#endif
- return (true);
+ return (false);
}
/*
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index 77b8c2a2e78..18e8df4918c 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -428,7 +428,8 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
__wt_timestamp_set(&mod->last_eviction_timestamp,
&S2C(session)->txn_global.pinned_timestamp));
#endif
- }
+ mod->last_evict_pass_gen = S2C(session)->cache->evict_pass_gen;
+ }
#ifdef HAVE_DIAGNOSTIC
/*
@@ -620,10 +621,11 @@ __rec_write_check_complete(
/*
* If we have used the lookaside table, check for a lookaside table and
- * checkpoint collision.
+ * checkpoint collision. If there is no collision, go ahead with the
+ * eviction.
*/
- if (r->cache_write_lookaside && __rec_las_checkpoint_test(session, r))
- return (EBUSY);
+ if (r->cache_write_lookaside)
+ return (__rec_las_checkpoint_test(session, r) ? EBUSY : 0);
/*
* Fall back to lookaside eviction during checkpoints if a page can't
@@ -644,8 +646,11 @@ __rec_write_check_complete(
* likely get to write at least one of the blocks. If we've created a
* page image for a page that previously didn't have one, or we had a
* page image and it is now empty, that's also progress.
+ *
+ * Also check that the current reconciliation applied some updates, in
+ * which case evict/restore should gain us some space.
*/
- if (r->multi_next > 1)
+ if (r->multi_next > 1 && r->update_used)
return (0);
/*
@@ -661,13 +666,10 @@ __rec_write_check_complete(
return (0);
/*
- * Check if the current reconciliation applied some updates, in which
- * case evict/restore should gain us some space.
- *
* Check if lookaside eviction is possible. If any of the updates we
* saw were uncommitted, the lookaside table cannot be used.
*/
- if (r->update_uncommitted || r->update_used)
+ if (r->update_uncommitted)
return (0);
*lookaside_retryp = true;