diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/reconcile/rec_write.c')
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_write.c | 97 |
1 files changed, 74 insertions, 23 deletions
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index fe288beed15..e82f449a50d 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -43,6 +43,10 @@ typedef struct { /* Track the page's maximum transaction ID. */ uint64_t max_txn; + /* Track if all updates were skipped. */ + uint64_t update_cnt; + uint64_t update_skip_cnt; + /* * When we can't mark the page clean (for example, checkpoint found some * uncommitted updates), there's a leave-dirty flag. @@ -327,9 +331,10 @@ static int __rec_split_write(WT_SESSION_IMPL *, WT_RECONCILE *, WT_BOUNDARY *, WT_ITEM *, bool); static int __rec_update_las( WT_SESSION_IMPL *, WT_RECONCILE *, uint32_t, WT_BOUNDARY *); +static int __rec_write_check_complete(WT_SESSION_IMPL *, WT_RECONCILE *); static int __rec_write_init(WT_SESSION_IMPL *, WT_REF *, uint32_t, WT_SALVAGE_COOKIE *, void *); -static int __rec_write_status(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); +static void __rec_write_page_status(WT_SESSION_IMPL *, WT_RECONCILE *); static int __rec_write_wrapup(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_write_wrapup_err( WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); @@ -345,8 +350,8 @@ static void __rec_dictionary_reset(WT_RECONCILE *); * Reconcile an in-memory page into its on-disk format, and write it. */ int -__wt_reconcile(WT_SESSION_IMPL *session, - WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags) +__wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, + WT_SALVAGE_COOKIE *salvage, uint32_t flags, bool *lookaside_retryp) { WT_DECL_RET; WT_PAGE *page; @@ -356,6 +361,8 @@ __wt_reconcile(WT_SESSION_IMPL *session, page = ref->page; mod = page->modify; + if (lookaside_retryp != NULL) + *lookaside_retryp = false; __wt_verbose(session, WT_VERB_RECONCILE, "%s", __wt_page_type_string(page->type)); @@ -421,19 +428,27 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_ILLEGAL_VALUE_SET(session); } - /* Get the final status for the reconciliation. */ + /* Checks for a successful reconciliation. */ if (ret == 0) - ret = __rec_write_status(session, r, page); + ret = __rec_write_check_complete(session, r); /* Wrap up the page reconciliation. */ - if (ret == 0) - ret = __rec_write_wrapup(session, r, page); + if (ret == 0 && (ret = __rec_write_wrapup(session, r, page)) == 0) + __rec_write_page_status(session, r); else WT_TRET(__rec_write_wrapup_err(session, r, page)); /* Release the reconciliation lock. */ __wt_writeunlock(session, &page->page_lock); + /* + * If our caller can configure lookaside table reconciliation, flag if + * that's worth trying. The lookaside table doesn't help if we skipped + * updates, it can only help with older readers preventing eviction. + */ + if (lookaside_retryp != NULL && r->update_cnt == r->update_skip_cnt) + *lookaside_retryp = true; + /* Update statistics. */ WT_STAT_CONN_INCR(session, rec_pages); WT_STAT_DATA_INCR(session, rec_pages); @@ -535,17 +550,14 @@ __rec_las_checkpoint_test(WT_SESSION_IMPL *session, WT_RECONCILE *r) } /* - * __rec_write_status -- - * Return the final status for reconciliation. + * __rec_write_check_complete -- + * Check that reconciliation should complete */ static int -__rec_write_status(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) +__rec_write_check_complete(WT_SESSION_IMPL *session, WT_RECONCILE *r) { - WT_BTREE *btree; - WT_PAGE_MODIFY *mod; - - btree = S2BT(session); - mod = page->modify; + WT_BOUNDARY *bnd; + size_t i; /* * If we have used the lookaside table, check for a lookaside table and @@ -555,6 +567,37 @@ __rec_write_status(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) return (EBUSY); /* + * If we are doing update/restore based eviction, confirm part of the + * page is being discarded, or at least 10% of the updates won't have + * to be re-instantiated. Otherwise, it isn't progress, don't bother. + */ + if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE)) { + for (bnd = r->bnd, i = 0; i < r->bnd_entries; ++bnd, ++i) + if (bnd->supd == NULL) + break; + if (i == r->bnd_entries && + r->update_cnt / 10 >= r->update_skip_cnt) + return (EBUSY); + } + return (0); +} + +/* + * __rec_write_page_status -- + * Set the page status after reconciliation. + */ +static void +__rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r) +{ + WT_BTREE *btree; + WT_PAGE *page; + WT_PAGE_MODIFY *mod; + + btree = S2BT(session); + page = r->page; + mod = page->modify; + + /* * Set the page's status based on whether or not we cleaned the page. */ if (r->leave_dirty) { @@ -612,8 +655,6 @@ __rec_write_status(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) else WT_ASSERT(session, !F_ISSET(r, WT_EVICTING)); } - - return (0); } /* @@ -675,6 +716,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) * pages in memory; it's not needed here, asserted for safety. */ WT_ASSERT(session, mod->mod_multi[i].supd == NULL); + WT_ASSERT(session, mod->mod_multi[i].disk_image == NULL); WT_ERR(__wt_multi_to_ref(session, next, &mod->mod_multi[i], &pindex->index[i], NULL, false)); @@ -700,7 +742,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) * Fake up a reference structure, and write the next root page. */ __wt_root_ref_init(&fake_ref, next, page->type == WT_PAGE_COL_INT); - return (__wt_reconcile(session, &fake_ref, NULL, flags)); + return (__wt_reconcile(session, &fake_ref, NULL, flags, NULL)); err: __wt_page_out(session, &next); return (ret); @@ -841,6 +883,9 @@ __rec_write_init(WT_SESSION_IMPL *session, /* Track the page's maximum transaction ID. */ r->max_txn = WT_TXN_NONE; + /* Track if all updates were skipped. */ + r->update_cnt = r->update_skip_cnt = 0; + /* Track if the page can be marked clean. */ r->leave_dirty = false; @@ -1082,6 +1127,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, } else upd_list = ins->upd; + ++r->update_cnt; for (skipped = false, max_txn = WT_TXN_NONE, min_txn = UINT64_MAX, upd = upd_list; upd != NULL; upd = upd->next) { @@ -1172,6 +1218,12 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, txnid != S2C(session)->txn_global.checkpoint_txnid || WT_SESSION_IS_CHECKPOINT(session)); #endif + + /* + * Track how many update chains we saw vs. how many update + * chains had an entry we skipped. + */ + ++r->update_skip_cnt; return (0); } @@ -3599,7 +3651,7 @@ __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_RET(__rec_split_finish(session, r)); WT_RET(__rec_write_wrapup(session, r, r->page)); - WT_RET(__rec_write_status(session, r, r->page)); + __rec_write_page_status(session, r); /* Mark the page's parent and the tree dirty. */ parent = r->ref->home; @@ -4450,8 +4502,8 @@ record_loop: /* * * Write a placeholder. */ - WT_ASSERT(session, - F_ISSET(r, WT_EVICT_UPDATE_RESTORE)); + WT_ASSERT(session, + F_ISSET(r, WT_EVICT_UPDATE_RESTORE)); data = "@"; size = 1; @@ -5467,7 +5519,6 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) static int __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page) { - WT_DECL_RET; WT_PAGE_MODIFY *mod; WT_MULTI *multi; uint32_t i; @@ -5527,7 +5578,7 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page) break; } - return (ret); + return (0); } /* |