summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/reconcile/rec_write.c
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2016-12-12 12:23:13 +1100
committerAlex Gorrod <alexander.gorrod@mongodb.com>2016-12-12 12:23:13 +1100
commit21a6f07d859c132154166bd3d83bbed238d5d719 (patch)
treebc261840853dda4307c68fd1c889caf4c89dd3d3 /src/third_party/wiredtiger/src/reconcile/rec_write.c
parent7cf929f25638e4ad9525775c8ea0e18f3c86faf5 (diff)
downloadmongo-21a6f07d859c132154166bd3d83bbed238d5d719.tar.gz
Import wiredtiger: 1b6c815a3fd34f14c20d5cd627155799d1de535c from branch mongodb-3.6
ref: ca6eee06ff..1b6c815a3f for: 3.5.1 WT-2336 Add a test validating schema operations via file system call monitoring WT-2670 Add option to configure read-ahead per table and change default behavior WT-2960 Inserting multi-megabyte values can cause pathological lookaside usage WT-2969 Fix a bug that could cause snapshot corruption during compaction WT-3014 Add GCC/clang support for ELF symbol visibility. WT-3021 Fixes needed for Java log cursor example, Java raw mode cursors, log cursors in raw mode WT-3025 fix error path in log_force_sync WT-3028 Workloads with all dirty pages could trigger diagnostic stuck check WT-3030 Test failure indicating invalid key order during traversal WT-3034 Add support for single-writer named snapshots. WT-3037 Fix some outdated comments in logging WT-3048 WiredTiger maximum size warning uses the wrong format. WT-3051 Remove external __wt_hex symbol. WT-3052 Improve search if an index hint is wrong WT-3053 Review Python and Java calls to internal WiredTiger functions WT-3054 Java PackTest, PackTest03 do not compile WT-3055 Java AsyncTest faults WT-3057 WiredTiger hazard pointers should use the WT_REF, not the WT_PAGE. WT-3064 minor tree cleanups: .gitignore, NEWS misspelling
Diffstat (limited to 'src/third_party/wiredtiger/src/reconcile/rec_write.c')
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c94
1 files changed, 73 insertions, 21 deletions
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index fe288beed15..86749eef2e1 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -43,6 +43,10 @@ typedef struct {
/* Track the page's maximum transaction ID. */
uint64_t max_txn;
+ /* Track if all updates were skipped. */
+ uint64_t update_cnt;
+ uint64_t update_skip_cnt;
+
/*
* When we can't mark the page clean (for example, checkpoint found some
* uncommitted updates), there's a leave-dirty flag.
@@ -327,9 +331,10 @@ static int __rec_split_write(WT_SESSION_IMPL *,
WT_RECONCILE *, WT_BOUNDARY *, WT_ITEM *, bool);
static int __rec_update_las(
WT_SESSION_IMPL *, WT_RECONCILE *, uint32_t, WT_BOUNDARY *);
+static int __rec_write_check_complete(WT_SESSION_IMPL *, WT_RECONCILE *);
static int __rec_write_init(WT_SESSION_IMPL *,
WT_REF *, uint32_t, WT_SALVAGE_COOKIE *, void *);
-static int __rec_write_status(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
+static void __rec_write_page_status(WT_SESSION_IMPL *, WT_RECONCILE *);
static int __rec_write_wrapup(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
static int __rec_write_wrapup_err(
WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
@@ -345,8 +350,8 @@ static void __rec_dictionary_reset(WT_RECONCILE *);
* Reconcile an in-memory page into its on-disk format, and write it.
*/
int
-__wt_reconcile(WT_SESSION_IMPL *session,
- WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags)
+__wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
+ WT_SALVAGE_COOKIE *salvage, uint32_t flags, bool *lookaside_retryp)
{
WT_DECL_RET;
WT_PAGE *page;
@@ -356,6 +361,8 @@ __wt_reconcile(WT_SESSION_IMPL *session,
page = ref->page;
mod = page->modify;
+ if (lookaside_retryp != NULL)
+ *lookaside_retryp = false;
__wt_verbose(session,
WT_VERB_RECONCILE, "%s", __wt_page_type_string(page->type));
@@ -421,19 +428,27 @@ __wt_reconcile(WT_SESSION_IMPL *session,
WT_ILLEGAL_VALUE_SET(session);
}
- /* Get the final status for the reconciliation. */
+ /* Checks for a successful reconciliation. */
if (ret == 0)
- ret = __rec_write_status(session, r, page);
+ ret = __rec_write_check_complete(session, r);
/* Wrap up the page reconciliation. */
- if (ret == 0)
- ret = __rec_write_wrapup(session, r, page);
+ if (ret == 0 && (ret = __rec_write_wrapup(session, r, page)) == 0)
+ __rec_write_page_status(session, r);
else
WT_TRET(__rec_write_wrapup_err(session, r, page));
/* Release the reconciliation lock. */
__wt_writeunlock(session, &page->page_lock);
+ /*
+ * If our caller can configure lookaside table reconciliation, flag if
+ * that's worth trying. The lookaside table doesn't help if we skipped
+ * updates, it can only help with older readers preventing eviction.
+ */
+ if (lookaside_retryp != NULL && r->update_cnt == r->update_skip_cnt)
+ *lookaside_retryp = true;
+
/* Update statistics. */
WT_STAT_CONN_INCR(session, rec_pages);
WT_STAT_DATA_INCR(session, rec_pages);
@@ -535,17 +550,14 @@ __rec_las_checkpoint_test(WT_SESSION_IMPL *session, WT_RECONCILE *r)
}
/*
- * __rec_write_status --
- * Return the final status for reconciliation.
+ * __rec_write_check_complete --
+ * Check that reconciliation should complete
*/
static int
-__rec_write_status(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
+__rec_write_check_complete(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
- WT_BTREE *btree;
- WT_PAGE_MODIFY *mod;
-
- btree = S2BT(session);
- mod = page->modify;
+ WT_BOUNDARY *bnd;
+ size_t i;
/*
* If we have used the lookaside table, check for a lookaside table and
@@ -555,6 +567,37 @@ __rec_write_status(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
return (EBUSY);
/*
+ * If we are doing update/restore based eviction, confirm part of the
+ * page is being discarded, or at least 10% of the updates won't have
+ * to be re-instantiated. Otherwise, it isn't progress, don't bother.
+ */
+ if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE)) {
+ for (bnd = r->bnd, i = 0; i < r->bnd_entries; ++bnd, ++i)
+ if (bnd->supd == NULL)
+ break;
+ if (i == r->bnd_entries &&
+ r->update_cnt / 10 >= r->update_skip_cnt)
+ return (EBUSY);
+ }
+ return (0);
+}
+
+/*
+ * __rec_write_page_status --
+ * Set the page status after reconciliation.
+ */
+static void
+__rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r)
+{
+ WT_BTREE *btree;
+ WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
+
+ btree = S2BT(session);
+ page = r->page;
+ mod = page->modify;
+
+ /*
* Set the page's status based on whether or not we cleaned the page.
*/
if (r->leave_dirty) {
@@ -612,8 +655,6 @@ __rec_write_status(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
else
WT_ASSERT(session, !F_ISSET(r, WT_EVICTING));
}
-
- return (0);
}
/*
@@ -675,6 +716,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
* pages in memory; it's not needed here, asserted for safety.
*/
WT_ASSERT(session, mod->mod_multi[i].supd == NULL);
+ WT_ASSERT(session, mod->mod_multi[i].disk_image == NULL);
WT_ERR(__wt_multi_to_ref(session,
next, &mod->mod_multi[i], &pindex->index[i], NULL, false));
@@ -700,7 +742,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
* Fake up a reference structure, and write the next root page.
*/
__wt_root_ref_init(&fake_ref, next, page->type == WT_PAGE_COL_INT);
- return (__wt_reconcile(session, &fake_ref, NULL, flags));
+ return (__wt_reconcile(session, &fake_ref, NULL, flags, NULL));
err: __wt_page_out(session, &next);
return (ret);
@@ -841,6 +883,9 @@ __rec_write_init(WT_SESSION_IMPL *session,
/* Track the page's maximum transaction ID. */
r->max_txn = WT_TXN_NONE;
+ /* Track if all updates were skipped. */
+ r->update_cnt = r->update_skip_cnt = 0;
+
/* Track if the page can be marked clean. */
r->leave_dirty = false;
@@ -1082,6 +1127,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
} else
upd_list = ins->upd;
+ ++r->update_cnt;
for (skipped = false,
max_txn = WT_TXN_NONE, min_txn = UINT64_MAX,
upd = upd_list; upd != NULL; upd = upd->next) {
@@ -1172,6 +1218,12 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
txnid != S2C(session)->txn_global.checkpoint_txnid ||
WT_SESSION_IS_CHECKPOINT(session));
#endif
+
+ /*
+ * Track how many update chains we saw vs. how many update
+ * chains had an entry we skipped.
+ */
+ ++r->update_skip_cnt;
return (0);
}
@@ -3599,7 +3651,7 @@ __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
WT_RET(__rec_split_finish(session, r));
WT_RET(__rec_write_wrapup(session, r, r->page));
- WT_RET(__rec_write_status(session, r, r->page));
+ __rec_write_page_status(session, r);
/* Mark the page's parent and the tree dirty. */
parent = r->ref->home;
@@ -4450,8 +4502,8 @@ record_loop: /*
*
* Write a placeholder.
*/
- WT_ASSERT(session,
- F_ISSET(r, WT_EVICT_UPDATE_RESTORE));
+ WT_ASSERT(session,
+ F_ISSET(r, WT_EVICT_UPDATE_RESTORE));
data = "@";
size = 1;