summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-07-09 16:42:21 +1000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-07-09 07:01:46 +0000
commit0e8bb99b4f9576d8e2ab32af4a8ab60efd80a083 (patch)
tree927b772f60a1b2c13f65317bb0482c17fd1d864b
parent882b737c34fb9ca5707942a8a6ad05d29de0985a (diff)
downloadmongo-0e8bb99b4f9576d8e2ab32af4a8ab60efd80a083.tar.gz
Import wiredtiger: 89446427f9525ecb7dd10c1b193d1a3f78999d77 from branch mongodb-4.6
ref: 1ba46e7a92..89446427f9 for: 4.5.1 WT-5970 Update test_wt4333_handle_locks to use 1GB cache size WT-6479 Don't insert updates after the first globally visible update to the history store WT-6488 Fail update restore eviction if no new updates were written
-rw-r--r--src/third_party/wiredtiger/import.data4
-rw-r--r--src/third_party/wiredtiger/src/block/block_ext.c4
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c7
-rw-r--r--src/third_party/wiredtiger/src/history/hs.c89
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h28
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h1
-rw-r--r--src/third_party/wiredtiger/src/include/reconcile.h10
-rw-r--r--src/third_party/wiredtiger/src/include/reconcile.i9
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_row.c21
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_visibility.c7
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c16
-rw-r--r--src/third_party/wiredtiger/src/support/modify.c10
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c8
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c8
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs15.py100
15 files changed, 270 insertions, 52 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index aa07e6071a6..d032a503a40 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,6 +1,6 @@
{
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
- "branch": "mongodb-4.4",
- "commit": "1ba46e7a92c0679c9099783b88028b102040d47f"
+ "branch": "mongodb-4.6",
+ "commit": "89446427f9525ecb7dd10c1b193d1a3f78999d77"
}
diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c
index 8e854da15c1..b11cdf9efff 100644
--- a/src/third_party/wiredtiger/src/block/block_ext.c
+++ b/src/third_party/wiredtiger/src/block/block_ext.c
@@ -470,8 +470,8 @@ __block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off
block->size += size;
WT_STAT_DATA_INCR(session, block_extension);
- __wt_verbose(session, WT_VERB_BLOCK, "file extend %" PRIdMAX "B @ %" PRIdMAX, (intmax_t)size,
- (intmax_t)*offp);
+ __wt_verbose(session, WT_VERB_BLOCK, "file extend %" PRIdMAX "-%" PRIdMAX, (intmax_t)*offp,
+ (intmax_t)(*offp + size));
return (0);
}
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 2c4f13578ed..bb9e3604ec8 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -635,6 +635,13 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, bool
*/
cache = conn->cache;
+ /*
+ * Urgent eviction and forced eviction want two different behaviors for inefficient update
+ * restore evictions, pass this flag so that reconciliation knows which to use.
+ */
+ if (FLD_ISSET(evict_flags, WT_EVICT_CALL_URGENT))
+ LF_SET(WT_REC_CALL_URGENT);
+
if (closing)
LF_SET(WT_REC_VISIBILITY_ERR);
else if (F_ISSET(ref, WT_REF_FLAG_INTERNAL) || WT_IS_HS(S2BT(session)))
diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c
index cb93b42e56a..e365f6bb69f 100644
--- a/src/third_party/wiredtiger/src/history/hs.c
+++ b/src/third_party/wiredtiger/src/history/hs.c
@@ -694,7 +694,8 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
WT_MODIFY entries[MAX_REVERSE_MODIFY_NUM];
WT_MODIFY_VECTOR modifies;
WT_SAVE_UPD *list;
- WT_UPDATE *first_non_ts_upd, *non_aborted_upd, *oldest_upd, *prev_upd, *upd;
+ WT_UPDATE *first_globally_visible_upd, *first_non_ts_upd;
+ WT_UPDATE *non_aborted_upd, *oldest_upd, *prev_upd, *upd;
WT_HS_TIME_POINT stop_time_point;
wt_off_t hs_size;
wt_timestamp_t min_insert_ts;
@@ -727,6 +728,19 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
if (list->onpage_upd == NULL)
continue;
+ /* Skip aborted updates. */
+ for (upd = list->onpage_upd->next; upd != NULL && upd->txnid == WT_TXN_ABORTED;
+ upd = upd->next)
+ ;
+
+ /* No update to insert to history store. */
+ if (upd == NULL)
+ continue;
+
+ /* Updates have already been inserted to the history store. */
+ if (F_ISSET(upd, WT_UPDATE_HS))
+ continue;
+
/* History store table key component: source key. */
switch (page->type) {
case WT_PAGE_COL_FIX:
@@ -757,10 +771,12 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
__wt_free_update_list(session, &upd);
upd = list->onpage_upd;
- first_non_ts_upd = NULL;
+ first_globally_visible_upd = first_non_ts_upd = NULL;
ts_updates_in_hs = false;
enable_reverse_modify = true;
+ __wt_modify_vector_clear(&modifies);
+
/*
* The algorithm assumes the oldest update on the update chain in memory is either a full
* update or a tombstone.
@@ -814,10 +830,21 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
__wt_timestamp_to_string(min_insert_ts, ts_string[2]));
upd->start_ts = upd->durable_ts = min_insert_ts;
WT_STAT_CONN_INCR(session, cache_hs_order_fixup_insert);
- } else
+ } else if (upd->start_ts != WT_TS_NONE)
+ /*
+ * Don't reset to WT_TS_NONE as we don't want to clear the timestamps for updates
+ * older than the update without timestamp.
+ */
min_insert_ts = upd->start_ts;
+
WT_ERR(__wt_modify_vector_push(&modifies, upd));
+ /* Track the first update that is globally visible. */
+ if (first_globally_visible_upd == NULL && __wt_txn_upd_visible_all(session, upd))
+ first_globally_visible_upd = upd;
+ else if (first_globally_visible_upd != NULL)
+ F_SET(upd, WT_UPDATE_OBSOLETE);
+
/*
* Always insert full update to the history store if we write a prepared update to the
* data store.
@@ -835,19 +862,26 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
enable_reverse_modify = false;
/* Find the first update without timestamp. */
- if (first_non_ts_upd == NULL && upd->start_ts == WT_TS_NONE) {
+ if (first_non_ts_upd == NULL && upd->start_ts == WT_TS_NONE)
first_non_ts_upd = upd;
- } else if (first_non_ts_upd != NULL && upd->start_ts != WT_TS_NONE) {
+ else if (first_non_ts_upd != NULL && upd->start_ts != WT_TS_NONE) {
/*
* Don't insert updates with timestamps after updates without timestamps to the
* history store.
*/
- F_SET(upd, WT_UPDATE_MASKED_BY_NON_TS_UPDATE);
+ F_SET(upd, WT_UPDATE_OBSOLETE);
if (F_ISSET(upd, WT_UPDATE_HS))
ts_updates_in_hs = true;
}
/*
+ * No need to continue if we see the first self contained value after the first globally
+ * visible value.
+ */
+ if (first_globally_visible_upd != NULL && WT_UPDATE_DATA_VALUE(upd))
+ break;
+
+ /*
* If we've reached a full update and it's in the history store we don't need to
* continue as anything beyond this point won't help with calculating deltas.
*/
@@ -857,21 +891,6 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
prev_upd = upd = NULL;
- /*
- * Trim from the end until there is a full update. We need this if we are dealing with
- * updates without timestamps, and there are timestamped modify updates at the end of update
- * chain that are not relevant due to newer full updates without timestamps.
- */
- for (; modifies.size > 0;) {
- __wt_modify_vector_peek(&modifies, &upd);
- if (upd->type == WT_UPDATE_MODIFY) {
- WT_ASSERT(session, F_ISSET(upd, WT_UPDATE_MASKED_BY_NON_TS_UPDATE));
- __wt_modify_vector_pop(&modifies, &upd);
- } else
- break;
- }
- upd = NULL;
-
/* Construct the oldest full update. */
WT_ASSERT(session, modifies.size > 0);
@@ -886,11 +905,13 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
* the correct place to delete the history store records when inserting the first update and
* it may be skipped if there is nothing to insert to the history store.
*/
- if (oldest_upd->type == WT_UPDATE_TOMBSTONE && oldest_upd == first_non_ts_upd) {
+ if (oldest_upd->type == WT_UPDATE_TOMBSTONE && oldest_upd == first_non_ts_upd &&
+ !F_ISSET(first_non_ts_upd, WT_UPDATE_CLEARED_HS)) {
/* We can only delete history store entries that have timestamps. */
WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1));
WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
clear_hs = false;
+ F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS);
} else
/*
* Clear the content with timestamps in the history store if we see updates without
@@ -900,7 +921,8 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
* list and there are no updates moved to the history store by checkpoint or a failed
* eviction.
*/
- clear_hs = first_non_ts_upd != NULL && !F_ISSET(first_non_ts_upd, WT_UPDATE_HS) &&
+ clear_hs = first_non_ts_upd != NULL &&
+ !F_ISSET(first_non_ts_upd, WT_UPDATE_CLEARED_HS) &&
(list->ins == NULL || ts_updates_in_hs);
WT_ERR(__hs_next_upd_full_value(session, &modifies, NULL, full_value, &upd));
@@ -951,8 +973,8 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
continue;
}
- /* Skip updates already in the history store or masked by updates without timestamps. */
- if (F_ISSET(upd, WT_UPDATE_HS | WT_UPDATE_MASKED_BY_NON_TS_UPDATE))
+ /* Skip updates that are already in the history store or are obsolete. */
+ if (F_ISSET(upd, WT_UPDATE_HS | WT_UPDATE_OBSOLETE))
continue;
/*
@@ -972,6 +994,19 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
}
/*
+ * Clear history store content if we skip inserting the updates without timestamp. e.g.,
+ * if we have an update chain U@30 -> M@20 -> U@0 and M@20 is globally visible, we skip
+ * writing U@0 to the history store.
+ */
+ if (clear_hs && upd->start_ts != WT_TS_NONE) {
+ /* We can only delete history store entries that have timestamps. */
+ WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1));
+ WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
+ clear_hs = false;
+ F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS);
+ }
+
+ /*
* Calculate reverse modify and clear the history store records with timestamps when
* inserting the first update.
*/
@@ -987,6 +1022,9 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
WT_ERR(__hs_insert_record(session, cursor, btree, key, upd, WT_UPDATE_STANDARD,
full_value, &stop_time_point, clear_hs));
+ if (clear_hs)
+ F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS);
+
clear_hs = false;
/* Flag the update as now in the history store. */
F_SET(upd, WT_UPDATE_HS);
@@ -1020,6 +1058,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
/* We can only delete history store entries that have timestamps. */
WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1));
WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
+ F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS);
}
}
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 94159fd9c2b..e61a5813a59 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -26,14 +26,15 @@
/* AUTOMATIC FLAG VALUE GENERATION STOP */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_REC_CHECKPOINT 0x01u
-#define WT_REC_CLEAN_AFTER_REC 0x02u
-#define WT_REC_EVICT 0x04u
-#define WT_REC_HS 0x08u
-#define WT_REC_IN_MEMORY 0x10u
-#define WT_REC_SCRUB 0x20u
-#define WT_REC_VISIBILITY_ERR 0x40u
-#define WT_REC_VISIBLE_ALL 0x80u
+#define WT_REC_CALL_URGENT 0x001u
+#define WT_REC_CHECKPOINT 0x002u
+#define WT_REC_CLEAN_AFTER_REC 0x004u
+#define WT_REC_EVICT 0x008u
+#define WT_REC_HS 0x010u
+#define WT_REC_IN_MEMORY 0x020u
+#define WT_REC_SCRUB 0x040u
+#define WT_REC_VISIBILITY_ERR 0x080u
+#define WT_REC_VISIBLE_ALL 0x100u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
/*
@@ -1074,11 +1075,12 @@ struct __wt_update {
volatile uint8_t prepare_state; /* prepare state */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_UPDATE_HS 0x01u /* Update has been written to history store. */
-#define WT_UPDATE_MASKED_BY_NON_TS_UPDATE 0x02u /* Update masked by updates without timestamp. */
-#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x04u /* Prepared update restored from data store. */
-#define WT_UPDATE_RESTORED_FROM_DS 0x08u /* Update restored from data store. */
-#define WT_UPDATE_RESTORED_FROM_HS 0x10u /* Update restored from history store. */
+#define WT_UPDATE_CLEARED_HS 0x01u /* Update that cleared the history store. */
+#define WT_UPDATE_HS 0x02u /* Update has been written to history store. */
+#define WT_UPDATE_OBSOLETE 0x04u /* Update that is obsolete. */
+#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x08u /* Prepared update restored from data store. */
+#define WT_UPDATE_RESTORED_FROM_DS 0x10u /* Update restored from data store. */
+#define WT_UPDATE_RESTORED_FROM_HS 0x20u /* Update restored from history store. */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint8_t flags;
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 513013eab3c..56ca47e4e79 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -1709,6 +1709,7 @@ extern void __wt_meta_track_discard(WT_SESSION_IMPL *session);
extern void __wt_meta_track_sub_on(WT_SESSION_IMPL *session);
extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase)
WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern void __wt_modify_vector_clear(WT_MODIFY_VECTOR *modifies);
extern void __wt_modify_vector_free(WT_MODIFY_VECTOR *modifies);
extern void __wt_modify_vector_init(WT_SESSION_IMPL *session, WT_MODIFY_VECTOR *modifies);
extern void __wt_modify_vector_peek(WT_MODIFY_VECTOR *modifies, WT_UPDATE **updp);
diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h
index 94a94a9dbc9..833ca2729af 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.h
+++ b/src/third_party/wiredtiger/src/include/reconcile.h
@@ -43,6 +43,14 @@ struct __wt_reconcile {
u_int updates_unstable; /* Count of updates not visible_all. */
/*
+ * When we do not find any update to be written for the whole page, we would like to mark
+ * eviction failed in the case of update-restore. There is no progress made by eviction in such
+ * a case, the page size stays the same and considering it a success could force the page
+ * through eviction repeatedly.
+ */
+ bool update_used;
+
+ /*
* When we can't mark the page clean after reconciliation (for example, checkpoint or eviction
* found some uncommitted updates), there's a leave-dirty flag.
*/
@@ -268,6 +276,8 @@ typedef struct {
WT_UPDATE *upd; /* Update to write (or NULL) */
WT_TIME_WINDOW tw;
+
+ bool upd_saved; /* An element on the row's update chain was saved */
} WT_UPDATE_SELECT;
/*
diff --git a/src/third_party/wiredtiger/src/include/reconcile.i b/src/third_party/wiredtiger/src/include/reconcile.i
index 6e2839311b9..ba74a54f429 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.i
+++ b/src/third_party/wiredtiger/src/include/reconcile.i
@@ -13,6 +13,13 @@
(WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len))
/*
+ * WT_REC_SPLIT_MIN_ITEMS_USE_MEM
+ * The minimum number of page items (entries on the disk image or saved updates) associated with
+ * a page required to consider in-memory updates in the split calculation.
+ */
+#define WT_REC_SPLIT_MIN_ITEMS_USE_MEM 10
+
+/*
* __rec_cell_addr_stats --
* Track statistics for time values associated with an address.
*/
@@ -212,7 +219,7 @@ __wt_rec_need_split(WT_RECONCILE *r, size_t len)
* dominating the calculation and causing excessive splitting. Therefore, we'll limit the impact
* to a tenth of the cache usage occupied by those updates.
*/
- if (r->page->type == WT_PAGE_ROW_LEAF && page_items > 10)
+ if (r->page->type == WT_PAGE_ROW_LEAF && page_items > WT_REC_SPLIT_MIN_ITEMS_USE_MEM)
len += r->supd_memsize / 10;
/* Check for the disk image crossing a boundary. */
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c
index 59ea0e1b4fd..c07b7de4546 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_row.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c
@@ -568,8 +568,27 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) {
WT_RET(__wt_rec_upd_select(session, r, ins, NULL, NULL, &upd_select));
- if ((upd = upd_select.upd) == NULL)
+ if ((upd = upd_select.upd) == NULL) {
+ /*
+ * In cases where a page has grown so large we are trying to force evict it (there is
+ * content, but none of the content can be evicted), we set up fake split points, to
+ * allow the page to use update restore eviction and be split into multiple reasonably
+ * sized pages. Check if we are in this situation. The call to split with zero
+ * additional size is odd, but split takes into account saved updates in a special way
+ * for this case already.
+ */
+ if (!upd_select.upd_saved || !__wt_rec_need_split(r, 0))
+ continue;
+
+ WT_RET(__wt_buf_set(session, r->cur, WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins)));
+ WT_RET(__wt_rec_split_crossing_bnd(session, r, 0, false));
+
+ /*
+ * Turn off prefix and suffix compression until a full key is written into the new page.
+ */
+ r->key_pfx_compress = r->key_sfx_compress = false;
continue;
+ }
WT_TIME_WINDOW_COPY(&tw, &upd_select.tw);
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
index 197a3a920cb..6b7f5fff165 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -253,6 +253,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
* both must be initialized.
*/
upd_select->upd = NULL;
+ upd_select->upd_saved = false;
select_tw = &upd_select->tw;
WT_TIME_WINDOW_INIT(select_tw);
@@ -386,6 +387,10 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
return (__wt_set_return(session, EBUSY));
}
+ /* If an update was selected, record that we're making progress. */
+ if (upd != NULL)
+ r->update_used = true;
+
/*
* The start timestamp is determined by the commit timestamp when the key is first inserted (or
* last updated). The end timestamp is set when a key/value pair becomes invalid, either because
@@ -526,7 +531,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
upd_select->upd != NULL && upd_select->upd->type == WT_UPDATE_TOMBSTONE ? NULL :
upd_select->upd,
supd_restore, upd_memsize));
- upd_saved = true;
+ upd_saved = upd_select->upd_saved = true;
}
/*
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 7452fea82ca..1b16ddd15fd 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -193,6 +193,19 @@ __reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, u
if (F_ISSET(r, WT_REC_EVICT) && !WT_IS_HS(btree))
__wt_cache_update_hs_score(session, r->updates_seen, r->updates_unstable);
+ /*
+ * If eviction didn't use any updates and didn't split or delete the page, it didn't make
+ * progress. Give up rather than silently succeeding in doing no work: this way threads know to
+ * back off forced eviction rather than spinning.
+ *
+ * Do not return an error if we are syncing the file with eviction disabled or as part of a
+ * checkpoint.
+ */
+ if (ret == 0 && !(btree->evict_disabled > 0 || !F_ISSET(btree->dhandle, WT_DHANDLE_OPEN)) &&
+ F_ISSET(r, WT_REC_EVICT) && !WT_PAGE_IS_INTERNAL(r->page) && r->multi_next == 1 &&
+ F_ISSET(r, WT_REC_CALL_URGENT) && !r->update_used && r->cache_write_restore)
+ ret = __wt_set_return(session, EBUSY);
+
/* Wrap up the page reconciliation. */
if (ret == 0 && (ret = __rec_write_wrapup(session, r, page)) == 0)
__rec_write_page_status(session, r);
@@ -546,6 +559,7 @@ __rec_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COO
/* Track if updates were used and/or uncommitted. */
r->updates_seen = r->updates_unstable = 0;
+ r->update_used = false;
/* Track if the page can be marked clean. */
r->leave_dirty = false;
@@ -1000,7 +1014,7 @@ __rec_split_row_promote(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ITEM *key,
/*
* For a column-store, the promoted key is the recno and we already have a copy. For a
- * row-store, it's the first key on the page, a variable- length byte string, get a copy.
+ * row-store, it's the first key on the page, a variable-length byte string, get a copy.
*
* This function is called from the split code at each split boundary, but that means we're not
* called before the first boundary, and we will eventually have to get the first key explicitly
diff --git a/src/third_party/wiredtiger/src/support/modify.c b/src/third_party/wiredtiger/src/support/modify.c
index 7c9f2a373c3..0d22afd615d 100644
--- a/src/third_party/wiredtiger/src/support/modify.c
+++ b/src/third_party/wiredtiger/src/support/modify.c
@@ -517,6 +517,16 @@ __wt_modify_vector_peek(WT_MODIFY_VECTOR *modifies, WT_UPDATE **updp)
}
/*
+ * __wt_modify_vector_clear --
+ * Clear a modify vector.
+ */
+void
+__wt_modify_vector_clear(WT_MODIFY_VECTOR *modifies)
+{
+ modifies->size = 0;
+}
+
+/*
* __wt_modify_vector_free --
* Free any resources associated with a modify vector. If we exceeded the allowed stack space on
* the vector and had to fallback to dynamic allocations, we'll be doing a free here.
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index dd8504f7c08..1c0be9f8f11 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -2043,8 +2043,14 @@ __wt_txn_is_blocking(WT_SESSION_IMPL *session, bool conservative)
* forced eviction successful. Specifically excuse it if:
* * Hasn't done many updates
* * Is in the middle of a commit or abort
+ *
+ * This threshold that we're comparing the number of updates to is related and must be greater
+ * than the threshold we use in reconciliation's "need split" helper. If we're going to rollback
+ * a transaction, we need to have considered splitting the page in the case that its updates are
+ * on a single page.
*/
- if (conservative && (txn->mod_count < 10 || F_ISSET(session, WT_SESSION_RESOLVING_TXN)))
+ if (conservative && (txn->mod_count < (10 + WT_REC_SPLIT_MIN_ITEMS_USE_MEM) ||
+ F_ISSET(session, WT_SESSION_RESOLVING_TXN)))
return (0);
/*
diff --git a/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c b/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c
index 9a263eb9ef6..b91d3c2222e 100644
--- a/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c
@@ -259,7 +259,7 @@ runone(bool config_cache)
testutil_check(__wt_snprintf(buf, sizeof(buf),
"create"
", cache_cursors=%s"
- ", cache_size=5GB"
+ ", cache_size=1GB"
", checkpoint_sync=true"
", eviction=(threads_max=5)"
", file_manager=("
@@ -306,10 +306,8 @@ run(int argc, char *argv[])
bool cache_cursors;
} runs[] = {
{1, 1, false}, {1, 1, true}, {8, 1, false}, {8, 1, true}, {16, 1, false}, {16, 1, true},
- {16, WT_ELEMENTS(uri_list), false}, {16, WT_ELEMENTS(uri_list), true}, {200, 100, false},
- {200, 100, true}, {200, WT_ELEMENTS(uri_list), false}, {200, WT_ELEMENTS(uri_list), true},
- {300, 100, false}, {300, 100, true}, {600, WT_ELEMENTS(uri_list), false},
- {600, WT_ELEMENTS(uri_list), true},
+ {16, WT_ELEMENTS(uri_list), false}, {16, WT_ELEMENTS(uri_list), true}, {64, 100, false},
+ {64, 100, true}, {64, WT_ELEMENTS(uri_list), false}, {64, WT_ELEMENTS(uri_list), true},
};
WT_RAND_STATE rnd;
u_int i, n;
diff --git a/src/third_party/wiredtiger/test/suite/test_hs15.py b/src/third_party/wiredtiger/test/suite/test_hs15.py
new file mode 100644
index 00000000000..8124f1d5f1e
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_hs15.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import time, wiredtiger, wttest
+
+def timestamp_str(t):
+ return '%x' % t
+
+# test_hs15.py
+# Ensure eviction doesn't clear the history store again after checkpoint has done so because of the same update without timestamp.
+class test_hs14(wttest.WiredTigerTestCase):
+ conn_config = 'cache_size=5MB'
+ session_config = 'isolation=snapshot'
+
+ def test_hs15(self):
+ uri = 'table:test_hs15'
+ self.session.create(uri, 'key_format=S,value_format=S')
+ cursor = self.session.open_cursor(uri)
+
+ value1 = 'a' * 500
+ value2 = 'b' * 500
+ value3 = 'c' * 500
+
+ # Insert an update without timestamp
+ self.session.begin_transaction()
+ cursor[str(0)] = value1
+ self.session.commit_transaction()
+
+ # Insert a bunch of other contents to trigger eviction
+ for i in range(1, 1000):
+ self.session.begin_transaction()
+ cursor[str(i)] = value2
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))
+
+ # Do a modify and an update with timestamps
+ self.session.begin_transaction()
+ cursor.set_key(str(0))
+ mods = [wiredtiger.Modify('B', 100, 1)]
+ self.assertEqual(cursor.modify(mods), 0)
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(1))
+
+ self.session.begin_transaction()
+ cursor[str(0)] = value2
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(2))
+
+ # Make the modify with timestamp and the update without timestamp obsolete
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1))
+
+ # Do a checkpoint
+ self.session.checkpoint()
+
+ self.session.begin_transaction()
+ cursor[str(0)] = value3
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))
+
+ # Insert a bunch of other contents to trigger eviction
+ for i in range(1, 1000):
+ self.session.begin_transaction()
+ cursor[str(i)] = value3
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))
+
+ expected = list(value1)
+ expected[100] = 'B'
+ expected = str().join(expected)
+ self.session.begin_transaction('read_timestamp=' + timestamp_str(1))
+ self.assertEqual(cursor[str(0)], expected)
+ self.session.rollback_transaction()
+
+ self.session.begin_transaction('read_timestamp=' + timestamp_str(2))
+ self.assertEqual(cursor[str(0)], value2)
+ self.session.rollback_transaction()
+
+ self.session.begin_transaction('read_timestamp=' + timestamp_str(3))
+ self.assertEqual(cursor[str(0)], value3)
+ self.session.rollback_transaction()