summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-07-09 17:14:36 +1000
committerLuke Chen <luke.chen@mongodb.com>2020-07-09 17:14:36 +1000
commita92547268894ce0063abfe3e5a08f506645a9a2e (patch)
tree53a92e5220c4e7226089a2a33782d0646c8d2c89
parent6125793d48356118548c7c466a630ea38dc50d97 (diff)
downloadmongo-a92547268894ce0063abfe3e5a08f506645a9a2e.tar.gz
Import wiredtiger: 874386744e7560ec9a4f68935360e2c66bbd8268 from branch mongodb-4.4
ref: 1ba46e7a92..874386744e for: 4.4.0-rc13 WT-6488 Fail update restore eviction if no new updates were written
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/block/block_ext.c4
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c7
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h17
-rw-r--r--src/third_party/wiredtiger/src/include/reconcile.h10
-rw-r--r--src/third_party/wiredtiger/src/include/reconcile.i9
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_row.c21
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_visibility.c7
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c16
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c8
10 files changed, 85 insertions, 16 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index aa07e6071a6..0f1b4911db3 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.4",
- "commit": "1ba46e7a92c0679c9099783b88028b102040d47f"
+ "commit": "874386744e7560ec9a4f68935360e2c66bbd8268"
}
diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c
index 8e854da15c1..b11cdf9efff 100644
--- a/src/third_party/wiredtiger/src/block/block_ext.c
+++ b/src/third_party/wiredtiger/src/block/block_ext.c
@@ -470,8 +470,8 @@ __block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off
block->size += size;
WT_STAT_DATA_INCR(session, block_extension);
- __wt_verbose(session, WT_VERB_BLOCK, "file extend %" PRIdMAX "B @ %" PRIdMAX, (intmax_t)size,
- (intmax_t)*offp);
+ __wt_verbose(session, WT_VERB_BLOCK, "file extend %" PRIdMAX "-%" PRIdMAX, (intmax_t)*offp,
+ (intmax_t)(*offp + size));
return (0);
}
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 2c4f13578ed..bb9e3604ec8 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -635,6 +635,13 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, bool
*/
cache = conn->cache;
+ /*
+ * Urgent eviction and forced eviction want two different behaviors for inefficient update
+ * restore evictions, pass this flag so that reconciliation knows which to use.
+ */
+ if (FLD_ISSET(evict_flags, WT_EVICT_CALL_URGENT))
+ LF_SET(WT_REC_CALL_URGENT);
+
if (closing)
LF_SET(WT_REC_VISIBILITY_ERR);
else if (F_ISSET(ref, WT_REF_FLAG_INTERNAL) || WT_IS_HS(S2BT(session)))
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 94159fd9c2b..98a0a1016a0 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -26,14 +26,15 @@
/* AUTOMATIC FLAG VALUE GENERATION STOP */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_REC_CHECKPOINT 0x01u
-#define WT_REC_CLEAN_AFTER_REC 0x02u
-#define WT_REC_EVICT 0x04u
-#define WT_REC_HS 0x08u
-#define WT_REC_IN_MEMORY 0x10u
-#define WT_REC_SCRUB 0x20u
-#define WT_REC_VISIBILITY_ERR 0x40u
-#define WT_REC_VISIBLE_ALL 0x80u
+#define WT_REC_CALL_URGENT 0x001u
+#define WT_REC_CHECKPOINT 0x002u
+#define WT_REC_CLEAN_AFTER_REC 0x004u
+#define WT_REC_EVICT 0x008u
+#define WT_REC_HS 0x010u
+#define WT_REC_IN_MEMORY 0x020u
+#define WT_REC_SCRUB 0x040u
+#define WT_REC_VISIBILITY_ERR 0x080u
+#define WT_REC_VISIBLE_ALL 0x100u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
/*
diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h
index 94a94a9dbc9..833ca2729af 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.h
+++ b/src/third_party/wiredtiger/src/include/reconcile.h
@@ -43,6 +43,14 @@ struct __wt_reconcile {
u_int updates_unstable; /* Count of updates not visible_all. */
/*
+ * When we do not find any update to be written for the whole page, we would like to mark
+ * eviction failed in the case of update-restore. There is no progress made by eviction in such
+ * a case, the page size stays the same and considering it a success could force the page
+ * through eviction repeatedly.
+ */
+ bool update_used;
+
+ /*
* When we can't mark the page clean after reconciliation (for example, checkpoint or eviction
* found some uncommitted updates), there's a leave-dirty flag.
*/
@@ -268,6 +276,8 @@ typedef struct {
WT_UPDATE *upd; /* Update to write (or NULL) */
WT_TIME_WINDOW tw;
+
+ bool upd_saved; /* An element on the row's update chain was saved */
} WT_UPDATE_SELECT;
/*
diff --git a/src/third_party/wiredtiger/src/include/reconcile.i b/src/third_party/wiredtiger/src/include/reconcile.i
index 6e2839311b9..ba74a54f429 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.i
+++ b/src/third_party/wiredtiger/src/include/reconcile.i
@@ -13,6 +13,13 @@
(WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len))
/*
+ * WT_REC_SPLIT_MIN_ITEMS_USE_MEM
+ * The minimum number of page items (entries on the disk image or saved updates) associated with
+ * a page required to consider in-memory updates in the split calculation.
+ */
+#define WT_REC_SPLIT_MIN_ITEMS_USE_MEM 10
+
+/*
* __rec_cell_addr_stats --
* Track statistics for time values associated with an address.
*/
@@ -212,7 +219,7 @@ __wt_rec_need_split(WT_RECONCILE *r, size_t len)
* dominating the calculation and causing excessive splitting. Therefore, we'll limit the impact
* to a tenth of the cache usage occupied by those updates.
*/
- if (r->page->type == WT_PAGE_ROW_LEAF && page_items > 10)
+ if (r->page->type == WT_PAGE_ROW_LEAF && page_items > WT_REC_SPLIT_MIN_ITEMS_USE_MEM)
len += r->supd_memsize / 10;
/* Check for the disk image crossing a boundary. */
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c
index 59ea0e1b4fd..c07b7de4546 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_row.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c
@@ -568,8 +568,27 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) {
WT_RET(__wt_rec_upd_select(session, r, ins, NULL, NULL, &upd_select));
- if ((upd = upd_select.upd) == NULL)
+ if ((upd = upd_select.upd) == NULL) {
+ /*
+ * In cases where a page has grown so large we are trying to force evict it (there is
+ * content, but none of the content can be evicted), we set up fake split points, to
+ * allow the page to use update restore eviction and be split into multiple reasonably
+ * sized pages. Check if we are in this situation. The call to split with zero
+ * additional size is odd, but split takes into account saved updates in a special way
+ * for this case already.
+ */
+ if (!upd_select.upd_saved || !__wt_rec_need_split(r, 0))
+ continue;
+
+ WT_RET(__wt_buf_set(session, r->cur, WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins)));
+ WT_RET(__wt_rec_split_crossing_bnd(session, r, 0, false));
+
+ /*
+ * Turn off prefix and suffix compression until a full key is written into the new page.
+ */
+ r->key_pfx_compress = r->key_sfx_compress = false;
continue;
+ }
WT_TIME_WINDOW_COPY(&tw, &upd_select.tw);
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
index 197a3a920cb..6b7f5fff165 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -253,6 +253,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
* both must be initialized.
*/
upd_select->upd = NULL;
+ upd_select->upd_saved = false;
select_tw = &upd_select->tw;
WT_TIME_WINDOW_INIT(select_tw);
@@ -386,6 +387,10 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
return (__wt_set_return(session, EBUSY));
}
+ /* If an update was selected, record that we're making progress. */
+ if (upd != NULL)
+ r->update_used = true;
+
/*
* The start timestamp is determined by the commit timestamp when the key is first inserted (or
* last updated). The end timestamp is set when a key/value pair becomes invalid, either because
@@ -526,7 +531,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
upd_select->upd != NULL && upd_select->upd->type == WT_UPDATE_TOMBSTONE ? NULL :
upd_select->upd,
supd_restore, upd_memsize));
- upd_saved = true;
+ upd_saved = upd_select->upd_saved = true;
}
/*
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 7452fea82ca..1b16ddd15fd 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -193,6 +193,19 @@ __reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, u
if (F_ISSET(r, WT_REC_EVICT) && !WT_IS_HS(btree))
__wt_cache_update_hs_score(session, r->updates_seen, r->updates_unstable);
+ /*
+ * If eviction didn't use any updates and didn't split or delete the page, it didn't make
+ * progress. Give up rather than silently succeeding in doing no work: this way threads know to
+ * back off forced eviction rather than spinning.
+ *
+ * Do not return an error if we are syncing the file with eviction disabled or as part of a
+ * checkpoint.
+ */
+ if (ret == 0 && !(btree->evict_disabled > 0 || !F_ISSET(btree->dhandle, WT_DHANDLE_OPEN)) &&
+ F_ISSET(r, WT_REC_EVICT) && !WT_PAGE_IS_INTERNAL(r->page) && r->multi_next == 1 &&
+ F_ISSET(r, WT_REC_CALL_URGENT) && !r->update_used && r->cache_write_restore)
+ ret = __wt_set_return(session, EBUSY);
+
/* Wrap up the page reconciliation. */
if (ret == 0 && (ret = __rec_write_wrapup(session, r, page)) == 0)
__rec_write_page_status(session, r);
@@ -546,6 +559,7 @@ __rec_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COO
/* Track if updates were used and/or uncommitted. */
r->updates_seen = r->updates_unstable = 0;
+ r->update_used = false;
/* Track if the page can be marked clean. */
r->leave_dirty = false;
@@ -1000,7 +1014,7 @@ __rec_split_row_promote(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ITEM *key,
/*
* For a column-store, the promoted key is the recno and we already have a copy. For a
- * row-store, it's the first key on the page, a variable- length byte string, get a copy.
+ * row-store, it's the first key on the page, a variable-length byte string, get a copy.
*
* This function is called from the split code at each split boundary, but that means we're not
* called before the first boundary, and we will eventually have to get the first key explicitly
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index dd8504f7c08..1c0be9f8f11 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -2043,8 +2043,14 @@ __wt_txn_is_blocking(WT_SESSION_IMPL *session, bool conservative)
* forced eviction successful. Specifically excuse it if:
* * Hasn't done many updates
* * Is in the middle of a commit or abort
+ *
+ * This threshold that we're comparing the number of updates to is related and must be greater
+ * than the threshold we use in reconciliation's "need split" helper. If we're going to rollback
+ * a transaction, we need to have considered splitting the page in the case that its updates are
+ * on a single page.
*/
- if (conservative && (txn->mod_count < 10 || F_ISSET(session, WT_SESSION_RESOLVING_TXN)))
+ if (conservative && (txn->mod_count < (10 + WT_REC_SPLIT_MIN_ITEMS_USE_MEM) ||
+ F_ISSET(session, WT_SESSION_RESOLVING_TXN)))
return (0);
/*