diff options
author | Luke Chen <luke.chen@mongodb.com> | 2020-07-09 17:14:36 +1000 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2020-07-09 17:14:36 +1000 |
commit | a92547268894ce0063abfe3e5a08f506645a9a2e (patch) | |
tree | 53a92e5220c4e7226089a2a33782d0646c8d2c89 | |
parent | 6125793d48356118548c7c466a630ea38dc50d97 (diff) | |
download | mongo-a92547268894ce0063abfe3e5a08f506645a9a2e.tar.gz |
Import wiredtiger: 874386744e7560ec9a4f68935360e2c66bbd8268 from branch mongodb-4.4
ref: 1ba46e7a92..874386744e
for: 4.4.0-rc13
WT-6488 Fail update restore eviction if no new updates were written
-rw-r--r-- | src/third_party/wiredtiger/import.data | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_ext.c | 4 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/evict/evict_page.c | 7 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/btmem.h | 17 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/reconcile.h | 10 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/reconcile.i | 9 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_row.c | 21 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_visibility.c | 7 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_write.c | 16 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/txn/txn.c | 8 |
10 files changed, 85 insertions, 16 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index aa07e6071a6..0f1b4911db3 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-4.4", - "commit": "1ba46e7a92c0679c9099783b88028b102040d47f" + "commit": "874386744e7560ec9a4f68935360e2c66bbd8268" } diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c index 8e854da15c1..b11cdf9efff 100644 --- a/src/third_party/wiredtiger/src/block/block_ext.c +++ b/src/third_party/wiredtiger/src/block/block_ext.c @@ -470,8 +470,8 @@ __block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off block->size += size; WT_STAT_DATA_INCR(session, block_extension); - __wt_verbose(session, WT_VERB_BLOCK, "file extend %" PRIdMAX "B @ %" PRIdMAX, (intmax_t)size, - (intmax_t)*offp); + __wt_verbose(session, WT_VERB_BLOCK, "file extend %" PRIdMAX "-%" PRIdMAX, (intmax_t)*offp, + (intmax_t)(*offp + size)); return (0); } diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index 2c4f13578ed..bb9e3604ec8 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -635,6 +635,13 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, bool */ cache = conn->cache; + /* + * Urgent eviction and forced eviction want two different behaviors for inefficient update + * restore evictions, pass this flag so that reconciliation knows which to use. + */ + if (FLD_ISSET(evict_flags, WT_EVICT_CALL_URGENT)) + LF_SET(WT_REC_CALL_URGENT); + if (closing) LF_SET(WT_REC_VISIBILITY_ERR); else if (F_ISSET(ref, WT_REF_FLAG_INTERNAL) || WT_IS_HS(S2BT(session))) diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 94159fd9c2b..98a0a1016a0 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -26,14 +26,15 @@ /* AUTOMATIC FLAG VALUE GENERATION STOP */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_REC_CHECKPOINT 0x01u -#define WT_REC_CLEAN_AFTER_REC 0x02u -#define WT_REC_EVICT 0x04u -#define WT_REC_HS 0x08u -#define WT_REC_IN_MEMORY 0x10u -#define WT_REC_SCRUB 0x20u -#define WT_REC_VISIBILITY_ERR 0x40u -#define WT_REC_VISIBLE_ALL 0x80u +#define WT_REC_CALL_URGENT 0x001u +#define WT_REC_CHECKPOINT 0x002u +#define WT_REC_CLEAN_AFTER_REC 0x004u +#define WT_REC_EVICT 0x008u +#define WT_REC_HS 0x010u +#define WT_REC_IN_MEMORY 0x020u +#define WT_REC_SCRUB 0x040u +#define WT_REC_VISIBILITY_ERR 0x080u +#define WT_REC_VISIBLE_ALL 0x100u /* AUTOMATIC FLAG VALUE GENERATION STOP */ /* diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h index 94a94a9dbc9..833ca2729af 100644 --- a/src/third_party/wiredtiger/src/include/reconcile.h +++ b/src/third_party/wiredtiger/src/include/reconcile.h @@ -43,6 +43,14 @@ struct __wt_reconcile { u_int updates_unstable; /* Count of updates not visible_all. */ /* + * When we do not find any update to be written for the whole page, we would like to mark + * eviction failed in the case of update-restore. There is no progress made by eviction in such + * a case, the page size stays the same and considering it a success could force the page + * through eviction repeatedly. + */ + bool update_used; + + /* * When we can't mark the page clean after reconciliation (for example, checkpoint or eviction * found some uncommitted updates), there's a leave-dirty flag. */ @@ -268,6 +276,8 @@ typedef struct { WT_UPDATE *upd; /* Update to write (or NULL) */ WT_TIME_WINDOW tw; + + bool upd_saved; /* An element on the row's update chain was saved */ } WT_UPDATE_SELECT; /* diff --git a/src/third_party/wiredtiger/src/include/reconcile.i b/src/third_party/wiredtiger/src/include/reconcile.i index 6e2839311b9..ba74a54f429 100644 --- a/src/third_party/wiredtiger/src/include/reconcile.i +++ b/src/third_party/wiredtiger/src/include/reconcile.i @@ -13,6 +13,13 @@ (WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len)) /* + * WT_REC_SPLIT_MIN_ITEMS_USE_MEM + * The minimum number of page items (entries on the disk image or saved updates) associated with + * a page required to consider in-memory updates in the split calculation. + */ +#define WT_REC_SPLIT_MIN_ITEMS_USE_MEM 10 + +/* * __rec_cell_addr_stats -- * Track statistics for time values associated with an address. */ @@ -212,7 +219,7 @@ __wt_rec_need_split(WT_RECONCILE *r, size_t len) * dominating the calculation and causing excessive splitting. Therefore, we'll limit the impact * to a tenth of the cache usage occupied by those updates. */ - if (r->page->type == WT_PAGE_ROW_LEAF && page_items > 10) + if (r->page->type == WT_PAGE_ROW_LEAF && page_items > WT_REC_SPLIT_MIN_ITEMS_USE_MEM) len += r->supd_memsize / 10; /* Check for the disk image crossing a boundary. */ diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c index 59ea0e1b4fd..c07b7de4546 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_row.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c @@ -568,8 +568,27 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) { WT_RET(__wt_rec_upd_select(session, r, ins, NULL, NULL, &upd_select)); - if ((upd = upd_select.upd) == NULL) + if ((upd = upd_select.upd) == NULL) { + /* + * In cases where a page has grown so large we are trying to force evict it (there is + * content, but none of the content can be evicted), we set up fake split points, to + * allow the page to use update restore eviction and be split into multiple reasonably + * sized pages. Check if we are in this situation. The call to split with zero + * additional size is odd, but split takes into account saved updates in a special way + * for this case already. + */ + if (!upd_select.upd_saved || !__wt_rec_need_split(r, 0)) + continue; + + WT_RET(__wt_buf_set(session, r->cur, WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins))); + WT_RET(__wt_rec_split_crossing_bnd(session, r, 0, false)); + + /* + * Turn off prefix and suffix compression until a full key is written into the new page. + */ + r->key_pfx_compress = r->key_sfx_compress = false; continue; + } WT_TIME_WINDOW_COPY(&tw, &upd_select.tw); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c index 197a3a920cb..6b7f5fff165 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c @@ -253,6 +253,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v * both must be initialized. */ upd_select->upd = NULL; + upd_select->upd_saved = false; select_tw = &upd_select->tw; WT_TIME_WINDOW_INIT(select_tw); @@ -386,6 +387,10 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v return (__wt_set_return(session, EBUSY)); } + /* If an update was selected, record that we're making progress. */ + if (upd != NULL) + r->update_used = true; + /* * The start timestamp is determined by the commit timestamp when the key is first inserted (or * last updated). The end timestamp is set when a key/value pair becomes invalid, either because @@ -526,7 +531,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v upd_select->upd != NULL && upd_select->upd->type == WT_UPDATE_TOMBSTONE ? NULL : upd_select->upd, supd_restore, upd_memsize)); - upd_saved = true; + upd_saved = upd_select->upd_saved = true; } /* diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 7452fea82ca..1b16ddd15fd 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -193,6 +193,19 @@ __reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, u if (F_ISSET(r, WT_REC_EVICT) && !WT_IS_HS(btree)) __wt_cache_update_hs_score(session, r->updates_seen, r->updates_unstable); + /* + * If eviction didn't use any updates and didn't split or delete the page, it didn't make + * progress. Give up rather than silently succeeding in doing no work: this way threads know to + * back off forced eviction rather than spinning. + * + * Do not return an error if we are syncing the file with eviction disabled or as part of a + * checkpoint. + */ + if (ret == 0 && !(btree->evict_disabled > 0 || !F_ISSET(btree->dhandle, WT_DHANDLE_OPEN)) && + F_ISSET(r, WT_REC_EVICT) && !WT_PAGE_IS_INTERNAL(r->page) && r->multi_next == 1 && + F_ISSET(r, WT_REC_CALL_URGENT) && !r->update_used && r->cache_write_restore) + ret = __wt_set_return(session, EBUSY); + /* Wrap up the page reconciliation. */ if (ret == 0 && (ret = __rec_write_wrapup(session, r, page)) == 0) __rec_write_page_status(session, r); @@ -546,6 +559,7 @@ __rec_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COO /* Track if updates were used and/or uncommitted. */ r->updates_seen = r->updates_unstable = 0; + r->update_used = false; /* Track if the page can be marked clean. */ r->leave_dirty = false; @@ -1000,7 +1014,7 @@ __rec_split_row_promote(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ITEM *key, /* * For a column-store, the promoted key is the recno and we already have a copy. For a - * row-store, it's the first key on the page, a variable- length byte string, get a copy. + * row-store, it's the first key on the page, a variable-length byte string, get a copy. * * This function is called from the split code at each split boundary, but that means we're not * called before the first boundary, and we will eventually have to get the first key explicitly diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index dd8504f7c08..1c0be9f8f11 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -2043,8 +2043,14 @@ __wt_txn_is_blocking(WT_SESSION_IMPL *session, bool conservative) * forced eviction successful. Specifically excuse it if: * * Hasn't done many updates * * Is in the middle of a commit or abort + * + * This threshold that we're comparing the number of updates to is related and must be greater + * than the threshold we use in reconciliation's "need split" helper. If we're going to rollback + * a transaction, we need to have considered splitting the page in the case that its updates are + * on a single page. */ - if (conservative && (txn->mod_count < 10 || F_ISSET(session, WT_SESSION_RESOLVING_TXN))) + if (conservative && (txn->mod_count < (10 + WT_REC_SPLIT_MIN_ITEMS_USE_MEM) || + F_ISSET(session, WT_SESSION_RESOLVING_TXN))) return (0); /* |