summaryrefslogtreecommitdiff
path: root/src/reconcile
diff options
context:
space:
mode:
authorKeith Bostic <keith.bostic@mongodb.com>2016-12-06 21:52:34 -0500
committerAlex Gorrod <alexander.gorrod@mongodb.com>2016-12-07 13:52:34 +1100
commit1adda6a0a51e51ffa5a3fa9cf86f6f765585a5f4 (patch)
tree489278ba31b4a033111e33a0337fd9a545ebdabf /src/reconcile
parentc68e35c4c8aca43ebfebaf1ed8a68bef71ccb4cc (diff)
downloadmongo-1adda6a0a51e51ffa5a3fa9cf86f6f765585a5f4.tar.gz
WT-2960 Reduce likelihood of using the lookaside file, especially when inserting multi-megabyte values (#3171)
Don't configure the lookaside table as soon as eviction is stuck, only configure the lookaside table if update/restore reconciliation fails and there's reason to believe the lookaside table will be effective, based on the updates that were skipped during the update/restore reconciliation. The evaluation of whether an update/restore reconciliation would make progress was a check for a single block rewrite with no updates to restore or at least one update chain without a skipped entry. Check more deeply, if there's any block without updates to be restored or at least 10% of the update chains didn't have skipped entries, assume we're making progress.
Diffstat (limited to 'src/reconcile')
-rw-r--r--src/reconcile/rec_write.c50
1 files changed, 37 insertions, 13 deletions
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index f71715412af..86749eef2e1 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -44,7 +44,8 @@ typedef struct {
uint64_t max_txn;
/* Track if all updates were skipped. */
- bool all_skipped;
+ uint64_t update_cnt;
+ uint64_t update_skip_cnt;
/*
* When we can't mark the page clean (for example, checkpoint found some
@@ -349,8 +350,8 @@ static void __rec_dictionary_reset(WT_RECONCILE *);
* Reconcile an in-memory page into its on-disk format, and write it.
*/
int
-__wt_reconcile(WT_SESSION_IMPL *session,
- WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags)
+__wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
+ WT_SALVAGE_COOKIE *salvage, uint32_t flags, bool *lookaside_retryp)
{
WT_DECL_RET;
WT_PAGE *page;
@@ -360,6 +361,8 @@ __wt_reconcile(WT_SESSION_IMPL *session,
page = ref->page;
mod = page->modify;
+ if (lookaside_retryp != NULL)
+ *lookaside_retryp = false;
__wt_verbose(session,
WT_VERB_RECONCILE, "%s", __wt_page_type_string(page->type));
@@ -438,6 +441,14 @@ __wt_reconcile(WT_SESSION_IMPL *session,
/* Release the reconciliation lock. */
__wt_writeunlock(session, &page->page_lock);
+ /*
+ * If our caller can configure lookaside table reconciliation, flag if
+ * that's worth trying. The lookaside table doesn't help if we skipped
+ * updates, it can only help with older readers preventing eviction.
+ */
+ if (lookaside_retryp != NULL && r->update_cnt == r->update_skip_cnt)
+ *lookaside_retryp = true;
+
/* Update statistics. */
WT_STAT_CONN_INCR(session, rec_pages);
WT_STAT_DATA_INCR(session, rec_pages);
@@ -545,6 +556,9 @@ __rec_las_checkpoint_test(WT_SESSION_IMPL *session, WT_RECONCILE *r)
static int
__rec_write_check_complete(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
+ WT_BOUNDARY *bnd;
+ size_t i;
+
/*
* If we have used the lookaside table, check for a lookaside table and
* checkpoint collision.
@@ -553,14 +567,18 @@ __rec_write_check_complete(WT_SESSION_IMPL *session, WT_RECONCILE *r)
return (EBUSY);
/*
- * If we are doing eviction and restoring updates, there is only one
- * block and all update were skipped, no progress has been made and
- * there is no point swapping the new page into place.
+ * If we are doing update/restore based eviction, confirm part of the
+ * page is being discarded, or at least 10% of the updates won't have
+ * to be re-instantiated. Otherwise, it isn't progress, don't bother.
*/
- if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && r->all_skipped &&
- r->bnd_next == 1 && r->bnd[0].supd != NULL)
- return (EBUSY);
-
+ if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE)) {
+ for (bnd = r->bnd, i = 0; i < r->bnd_entries; ++bnd, ++i)
+ if (bnd->supd == NULL)
+ break;
+ if (i == r->bnd_entries &&
+ r->update_cnt / 10 >= r->update_skip_cnt)
+ return (EBUSY);
+ }
return (0);
}
@@ -724,7 +742,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
* Fake up a reference structure, and write the next root page.
*/
__wt_root_ref_init(&fake_ref, next, page->type == WT_PAGE_COL_INT);
- return (__wt_reconcile(session, &fake_ref, NULL, flags));
+ return (__wt_reconcile(session, &fake_ref, NULL, flags, NULL));
err: __wt_page_out(session, &next);
return (ret);
@@ -866,7 +884,7 @@ __rec_write_init(WT_SESSION_IMPL *session,
r->max_txn = WT_TXN_NONE;
/* Track if all updates were skipped. */
- r->all_skipped = true;
+ r->update_cnt = r->update_skip_cnt = 0;
/* Track if the page can be marked clean. */
r->leave_dirty = false;
@@ -1109,6 +1127,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
} else
upd_list = ins->upd;
+ ++r->update_cnt;
for (skipped = false,
max_txn = WT_TXN_NONE, min_txn = UINT64_MAX,
upd = upd_list; upd != NULL; upd = upd->next) {
@@ -1199,7 +1218,12 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
txnid != S2C(session)->txn_global.checkpoint_txnid ||
WT_SESSION_IS_CHECKPOINT(session));
#endif
- r->all_skipped = false;
+
+ /*
+ * Track how many update chains we saw vs. how many update
+ * chains had an entry we skipped.
+ */
+ ++r->update_skip_cnt;
return (0);
}