diff options
author | Luke Chen <luke.chen@mongodb.com> | 2020-07-09 16:42:21 +1000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-07-09 07:01:46 +0000 |
commit | 0e8bb99b4f9576d8e2ab32af4a8ab60efd80a083 (patch) | |
tree | 927b772f60a1b2c13f65317bb0482c17fd1d864b /src/third_party | |
parent | 882b737c34fb9ca5707942a8a6ad05d29de0985a (diff) | |
download | mongo-0e8bb99b4f9576d8e2ab32af4a8ab60efd80a083.tar.gz |
Import wiredtiger: 89446427f9525ecb7dd10c1b193d1a3f78999d77 from branch mongodb-4.6
ref: 1ba46e7a92..89446427f9
for: 4.5.1
WT-5970 Update test_wt4333_handle_locks to use 1GB cache size
WT-6479 Don't insert updates after the first globally visible update to the history store
WT-6488 Fail update restore eviction if no new updates were written
Diffstat (limited to 'src/third_party')
-rw-r--r-- | src/third_party/wiredtiger/import.data | 4 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_ext.c | 4 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/evict/evict_page.c | 7 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/history/hs.c | 89 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/btmem.h | 28 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/extern.h | 1 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/reconcile.h | 10 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/reconcile.i | 9 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_row.c | 21 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_visibility.c | 7 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_write.c | 16 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/support/modify.c | 10 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/txn/txn.c | 8 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c | 8 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/suite/test_hs15.py | 100 |
15 files changed, 270 insertions, 52 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index aa07e6071a6..d032a503a40 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,6 +1,6 @@ { "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", - "branch": "mongodb-4.4", - "commit": "1ba46e7a92c0679c9099783b88028b102040d47f" + "branch": "mongodb-4.6", + "commit": "89446427f9525ecb7dd10c1b193d1a3f78999d77" } diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c index 8e854da15c1..b11cdf9efff 100644 --- a/src/third_party/wiredtiger/src/block/block_ext.c +++ b/src/third_party/wiredtiger/src/block/block_ext.c @@ -470,8 +470,8 @@ __block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off block->size += size; WT_STAT_DATA_INCR(session, block_extension); - __wt_verbose(session, WT_VERB_BLOCK, "file extend %" PRIdMAX "B @ %" PRIdMAX, (intmax_t)size, - (intmax_t)*offp); + __wt_verbose(session, WT_VERB_BLOCK, "file extend %" PRIdMAX "-%" PRIdMAX, (intmax_t)*offp, + (intmax_t)(*offp + size)); return (0); } diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index 2c4f13578ed..bb9e3604ec8 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -635,6 +635,13 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, bool */ cache = conn->cache; + /* + * Urgent eviction and forced eviction want two different behaviors for inefficient update + * restore evictions, pass this flag so that reconciliation knows which to use. + */ + if (FLD_ISSET(evict_flags, WT_EVICT_CALL_URGENT)) + LF_SET(WT_REC_CALL_URGENT); + if (closing) LF_SET(WT_REC_VISIBILITY_ERR); else if (F_ISSET(ref, WT_REF_FLAG_INTERNAL) || WT_IS_HS(S2BT(session))) diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c index cb93b42e56a..e365f6bb69f 100644 --- a/src/third_party/wiredtiger/src/history/hs.c +++ b/src/third_party/wiredtiger/src/history/hs.c @@ -694,7 +694,8 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) WT_MODIFY entries[MAX_REVERSE_MODIFY_NUM]; WT_MODIFY_VECTOR modifies; WT_SAVE_UPD *list; - WT_UPDATE *first_non_ts_upd, *non_aborted_upd, *oldest_upd, *prev_upd, *upd; + WT_UPDATE *first_globally_visible_upd, *first_non_ts_upd; + WT_UPDATE *non_aborted_upd, *oldest_upd, *prev_upd, *upd; WT_HS_TIME_POINT stop_time_point; wt_off_t hs_size; wt_timestamp_t min_insert_ts; @@ -727,6 +728,19 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) if (list->onpage_upd == NULL) continue; + /* Skip aborted updates. */ + for (upd = list->onpage_upd->next; upd != NULL && upd->txnid == WT_TXN_ABORTED; + upd = upd->next) + ; + + /* No update to insert to history store. */ + if (upd == NULL) + continue; + + /* Updates have already been inserted to the history store. */ + if (F_ISSET(upd, WT_UPDATE_HS)) + continue; + /* History store table key component: source key. */ switch (page->type) { case WT_PAGE_COL_FIX: @@ -757,10 +771,12 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) __wt_free_update_list(session, &upd); upd = list->onpage_upd; - first_non_ts_upd = NULL; + first_globally_visible_upd = first_non_ts_upd = NULL; ts_updates_in_hs = false; enable_reverse_modify = true; + __wt_modify_vector_clear(&modifies); + /* * The algorithm assumes the oldest update on the update chain in memory is either a full * update or a tombstone. @@ -814,10 +830,21 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) __wt_timestamp_to_string(min_insert_ts, ts_string[2])); upd->start_ts = upd->durable_ts = min_insert_ts; WT_STAT_CONN_INCR(session, cache_hs_order_fixup_insert); - } else + } else if (upd->start_ts != WT_TS_NONE) + /* + * Don't reset to WT_TS_NONE as we don't want to clear the timestamps for updates + * older than the update without timestamp. + */ min_insert_ts = upd->start_ts; + WT_ERR(__wt_modify_vector_push(&modifies, upd)); + /* Track the first update that is globally visible. */ + if (first_globally_visible_upd == NULL && __wt_txn_upd_visible_all(session, upd)) + first_globally_visible_upd = upd; + else if (first_globally_visible_upd != NULL) + F_SET(upd, WT_UPDATE_OBSOLETE); + /* * Always insert full update to the history store if we write a prepared update to the * data store. @@ -835,19 +862,26 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) enable_reverse_modify = false; /* Find the first update without timestamp. */ - if (first_non_ts_upd == NULL && upd->start_ts == WT_TS_NONE) { + if (first_non_ts_upd == NULL && upd->start_ts == WT_TS_NONE) first_non_ts_upd = upd; - } else if (first_non_ts_upd != NULL && upd->start_ts != WT_TS_NONE) { + else if (first_non_ts_upd != NULL && upd->start_ts != WT_TS_NONE) { /* * Don't insert updates with timestamps after updates without timestamps to the * history store. */ - F_SET(upd, WT_UPDATE_MASKED_BY_NON_TS_UPDATE); + F_SET(upd, WT_UPDATE_OBSOLETE); if (F_ISSET(upd, WT_UPDATE_HS)) ts_updates_in_hs = true; } /* + * No need to continue if we see the first self contained value after the first globally + * visible value. + */ + if (first_globally_visible_upd != NULL && WT_UPDATE_DATA_VALUE(upd)) + break; + + /* * If we've reached a full update and it's in the history store we don't need to * continue as anything beyond this point won't help with calculating deltas. */ @@ -857,21 +891,6 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) prev_upd = upd = NULL; - /* - * Trim from the end until there is a full update. We need this if we are dealing with - * updates without timestamps, and there are timestamped modify updates at the end of update - * chain that are not relevant due to newer full updates without timestamps. - */ - for (; modifies.size > 0;) { - __wt_modify_vector_peek(&modifies, &upd); - if (upd->type == WT_UPDATE_MODIFY) { - WT_ASSERT(session, F_ISSET(upd, WT_UPDATE_MASKED_BY_NON_TS_UPDATE)); - __wt_modify_vector_pop(&modifies, &upd); - } else - break; - } - upd = NULL; - /* Construct the oldest full update. */ WT_ASSERT(session, modifies.size > 0); @@ -886,11 +905,13 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) * the correct place to delete the history store records when inserting the first update and * it may be skipped if there is nothing to insert to the history store. */ - if (oldest_upd->type == WT_UPDATE_TOMBSTONE && oldest_upd == first_non_ts_upd) { + if (oldest_upd->type == WT_UPDATE_TOMBSTONE && oldest_upd == first_non_ts_upd && + !F_ISSET(first_non_ts_upd, WT_UPDATE_CLEARED_HS)) { /* We can only delete history store entries that have timestamps. */ WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1)); WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts); clear_hs = false; + F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS); } else /* * Clear the content with timestamps in the history store if we see updates without @@ -900,7 +921,8 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) * list and there are no updates moved to the history store by checkpoint or a failed * eviction. */ - clear_hs = first_non_ts_upd != NULL && !F_ISSET(first_non_ts_upd, WT_UPDATE_HS) && + clear_hs = first_non_ts_upd != NULL && + !F_ISSET(first_non_ts_upd, WT_UPDATE_CLEARED_HS) && (list->ins == NULL || ts_updates_in_hs); WT_ERR(__hs_next_upd_full_value(session, &modifies, NULL, full_value, &upd)); @@ -951,8 +973,8 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) continue; } - /* Skip updates already in the history store or masked by updates without timestamps. */ - if (F_ISSET(upd, WT_UPDATE_HS | WT_UPDATE_MASKED_BY_NON_TS_UPDATE)) + /* Skip updates that are already in the history store or are obsolete. */ + if (F_ISSET(upd, WT_UPDATE_HS | WT_UPDATE_OBSOLETE)) continue; /* @@ -972,6 +994,19 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) } /* + * Clear history store content if we skip inserting the updates without timestamp. e.g., + * if we have an update chain U@30 -> M@20 -> U@0 and M@20 is globally visible, we skip + * writing U@0 to the history store. + */ + if (clear_hs && upd->start_ts != WT_TS_NONE) { + /* We can only delete history store entries that have timestamps. */ + WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1)); + WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts); + clear_hs = false; + F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS); + } + + /* * Calculate reverse modify and clear the history store records with timestamps when * inserting the first update. */ @@ -987,6 +1022,9 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) WT_ERR(__hs_insert_record(session, cursor, btree, key, upd, WT_UPDATE_STANDARD, full_value, &stop_time_point, clear_hs)); + if (clear_hs) + F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS); + clear_hs = false; /* Flag the update as now in the history store. */ F_SET(upd, WT_UPDATE_HS); @@ -1020,6 +1058,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) /* We can only delete history store entries that have timestamps. */ WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1)); WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts); + F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS); } } diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 94159fd9c2b..e61a5813a59 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -26,14 +26,15 @@ /* AUTOMATIC FLAG VALUE GENERATION STOP */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_REC_CHECKPOINT 0x01u -#define WT_REC_CLEAN_AFTER_REC 0x02u -#define WT_REC_EVICT 0x04u -#define WT_REC_HS 0x08u -#define WT_REC_IN_MEMORY 0x10u -#define WT_REC_SCRUB 0x20u -#define WT_REC_VISIBILITY_ERR 0x40u -#define WT_REC_VISIBLE_ALL 0x80u +#define WT_REC_CALL_URGENT 0x001u +#define WT_REC_CHECKPOINT 0x002u +#define WT_REC_CLEAN_AFTER_REC 0x004u +#define WT_REC_EVICT 0x008u +#define WT_REC_HS 0x010u +#define WT_REC_IN_MEMORY 0x020u +#define WT_REC_SCRUB 0x040u +#define WT_REC_VISIBILITY_ERR 0x080u +#define WT_REC_VISIBLE_ALL 0x100u /* AUTOMATIC FLAG VALUE GENERATION STOP */ /* @@ -1074,11 +1075,12 @@ struct __wt_update { volatile uint8_t prepare_state; /* prepare state */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_UPDATE_HS 0x01u /* Update has been written to history store. */ -#define WT_UPDATE_MASKED_BY_NON_TS_UPDATE 0x02u /* Update masked by updates without timestamp. */ -#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x04u /* Prepared update restored from data store. */ -#define WT_UPDATE_RESTORED_FROM_DS 0x08u /* Update restored from data store. */ -#define WT_UPDATE_RESTORED_FROM_HS 0x10u /* Update restored from history store. */ +#define WT_UPDATE_CLEARED_HS 0x01u /* Update that cleared the history store. */ +#define WT_UPDATE_HS 0x02u /* Update has been written to history store. */ +#define WT_UPDATE_OBSOLETE 0x04u /* Update that is obsolete. */ +#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x08u /* Prepared update restored from data store. */ +#define WT_UPDATE_RESTORED_FROM_DS 0x10u /* Update restored from data store. */ +#define WT_UPDATE_RESTORED_FROM_HS 0x20u /* Update restored from history store. */ /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint8_t flags; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 513013eab3c..56ca47e4e79 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -1709,6 +1709,7 @@ extern void __wt_meta_track_discard(WT_SESSION_IMPL *session); extern void __wt_meta_track_sub_on(WT_SESSION_IMPL *session); extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_modify_vector_clear(WT_MODIFY_VECTOR *modifies); extern void __wt_modify_vector_free(WT_MODIFY_VECTOR *modifies); extern void __wt_modify_vector_init(WT_SESSION_IMPL *session, WT_MODIFY_VECTOR *modifies); extern void __wt_modify_vector_peek(WT_MODIFY_VECTOR *modifies, WT_UPDATE **updp); diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h index 94a94a9dbc9..833ca2729af 100644 --- a/src/third_party/wiredtiger/src/include/reconcile.h +++ b/src/third_party/wiredtiger/src/include/reconcile.h @@ -43,6 +43,14 @@ struct __wt_reconcile { u_int updates_unstable; /* Count of updates not visible_all. */ /* + * When we do not find any update to be written for the whole page, we would like to mark + * eviction failed in the case of update-restore. There is no progress made by eviction in such + * a case, the page size stays the same and considering it a success could force the page + * through eviction repeatedly. + */ + bool update_used; + + /* * When we can't mark the page clean after reconciliation (for example, checkpoint or eviction * found some uncommitted updates), there's a leave-dirty flag. */ @@ -268,6 +276,8 @@ typedef struct { WT_UPDATE *upd; /* Update to write (or NULL) */ WT_TIME_WINDOW tw; + + bool upd_saved; /* An element on the row's update chain was saved */ } WT_UPDATE_SELECT; /* diff --git a/src/third_party/wiredtiger/src/include/reconcile.i b/src/third_party/wiredtiger/src/include/reconcile.i index 6e2839311b9..ba74a54f429 100644 --- a/src/third_party/wiredtiger/src/include/reconcile.i +++ b/src/third_party/wiredtiger/src/include/reconcile.i @@ -13,6 +13,13 @@ (WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len)) /* + * WT_REC_SPLIT_MIN_ITEMS_USE_MEM + * The minimum number of page items (entries on the disk image or saved updates) associated with + * a page required to consider in-memory updates in the split calculation. + */ +#define WT_REC_SPLIT_MIN_ITEMS_USE_MEM 10 + +/* * __rec_cell_addr_stats -- * Track statistics for time values associated with an address. */ @@ -212,7 +219,7 @@ __wt_rec_need_split(WT_RECONCILE *r, size_t len) * dominating the calculation and causing excessive splitting. Therefore, we'll limit the impact * to a tenth of the cache usage occupied by those updates. */ - if (r->page->type == WT_PAGE_ROW_LEAF && page_items > 10) + if (r->page->type == WT_PAGE_ROW_LEAF && page_items > WT_REC_SPLIT_MIN_ITEMS_USE_MEM) len += r->supd_memsize / 10; /* Check for the disk image crossing a boundary. */ diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c index 59ea0e1b4fd..c07b7de4546 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_row.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c @@ -568,8 +568,27 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) { WT_RET(__wt_rec_upd_select(session, r, ins, NULL, NULL, &upd_select)); - if ((upd = upd_select.upd) == NULL) + if ((upd = upd_select.upd) == NULL) { + /* + * In cases where a page has grown so large we are trying to force evict it (there is + * content, but none of the content can be evicted), we set up fake split points, to + * allow the page to use update restore eviction and be split into multiple reasonably + * sized pages. Check if we are in this situation. The call to split with zero + * additional size is odd, but split takes into account saved updates in a special way + * for this case already. + */ + if (!upd_select.upd_saved || !__wt_rec_need_split(r, 0)) + continue; + + WT_RET(__wt_buf_set(session, r->cur, WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins))); + WT_RET(__wt_rec_split_crossing_bnd(session, r, 0, false)); + + /* + * Turn off prefix and suffix compression until a full key is written into the new page. + */ + r->key_pfx_compress = r->key_sfx_compress = false; continue; + } WT_TIME_WINDOW_COPY(&tw, &upd_select.tw); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c index 197a3a920cb..6b7f5fff165 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c @@ -253,6 +253,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v * both must be initialized. */ upd_select->upd = NULL; + upd_select->upd_saved = false; select_tw = &upd_select->tw; WT_TIME_WINDOW_INIT(select_tw); @@ -386,6 +387,10 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v return (__wt_set_return(session, EBUSY)); } + /* If an update was selected, record that we're making progress. */ + if (upd != NULL) + r->update_used = true; + /* * The start timestamp is determined by the commit timestamp when the key is first inserted (or * last updated). The end timestamp is set when a key/value pair becomes invalid, either because @@ -526,7 +531,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v upd_select->upd != NULL && upd_select->upd->type == WT_UPDATE_TOMBSTONE ? NULL : upd_select->upd, supd_restore, upd_memsize)); - upd_saved = true; + upd_saved = upd_select->upd_saved = true; } /* diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 7452fea82ca..1b16ddd15fd 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -193,6 +193,19 @@ __reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, u if (F_ISSET(r, WT_REC_EVICT) && !WT_IS_HS(btree)) __wt_cache_update_hs_score(session, r->updates_seen, r->updates_unstable); + /* + * If eviction didn't use any updates and didn't split or delete the page, it didn't make + * progress. Give up rather than silently succeeding in doing no work: this way threads know to + * back off forced eviction rather than spinning. + * + * Do not return an error if we are syncing the file with eviction disabled or as part of a + * checkpoint. + */ + if (ret == 0 && !(btree->evict_disabled > 0 || !F_ISSET(btree->dhandle, WT_DHANDLE_OPEN)) && + F_ISSET(r, WT_REC_EVICT) && !WT_PAGE_IS_INTERNAL(r->page) && r->multi_next == 1 && + F_ISSET(r, WT_REC_CALL_URGENT) && !r->update_used && r->cache_write_restore) + ret = __wt_set_return(session, EBUSY); + /* Wrap up the page reconciliation. */ if (ret == 0 && (ret = __rec_write_wrapup(session, r, page)) == 0) __rec_write_page_status(session, r); @@ -546,6 +559,7 @@ __rec_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COO /* Track if updates were used and/or uncommitted. */ r->updates_seen = r->updates_unstable = 0; + r->update_used = false; /* Track if the page can be marked clean. */ r->leave_dirty = false; @@ -1000,7 +1014,7 @@ __rec_split_row_promote(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ITEM *key, /* * For a column-store, the promoted key is the recno and we already have a copy. For a - * row-store, it's the first key on the page, a variable- length byte string, get a copy. + * row-store, it's the first key on the page, a variable-length byte string, get a copy. * * This function is called from the split code at each split boundary, but that means we're not * called before the first boundary, and we will eventually have to get the first key explicitly diff --git a/src/third_party/wiredtiger/src/support/modify.c b/src/third_party/wiredtiger/src/support/modify.c index 7c9f2a373c3..0d22afd615d 100644 --- a/src/third_party/wiredtiger/src/support/modify.c +++ b/src/third_party/wiredtiger/src/support/modify.c @@ -517,6 +517,16 @@ __wt_modify_vector_peek(WT_MODIFY_VECTOR *modifies, WT_UPDATE **updp) } /* + * __wt_modify_vector_clear -- + * Clear a modify vector. + */ +void +__wt_modify_vector_clear(WT_MODIFY_VECTOR *modifies) +{ + modifies->size = 0; +} + +/* * __wt_modify_vector_free -- * Free any resources associated with a modify vector. If we exceeded the allowed stack space on * the vector and had to fallback to dynamic allocations, we'll be doing a free here. diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index dd8504f7c08..1c0be9f8f11 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -2043,8 +2043,14 @@ __wt_txn_is_blocking(WT_SESSION_IMPL *session, bool conservative) * forced eviction successful. Specifically excuse it if: * * Hasn't done many updates * * Is in the middle of a commit or abort + * + * This threshold that we're comparing the number of updates to is related and must be greater + * than the threshold we use in reconciliation's "need split" helper. If we're going to rollback + * a transaction, we need to have considered splitting the page in the case that its updates are + * on a single page. */ - if (conservative && (txn->mod_count < 10 || F_ISSET(session, WT_SESSION_RESOLVING_TXN))) + if (conservative && (txn->mod_count < (10 + WT_REC_SPLIT_MIN_ITEMS_USE_MEM) || + F_ISSET(session, WT_SESSION_RESOLVING_TXN))) return (0); /* diff --git a/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c b/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c index 9a263eb9ef6..b91d3c2222e 100644 --- a/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c @@ -259,7 +259,7 @@ runone(bool config_cache) testutil_check(__wt_snprintf(buf, sizeof(buf), "create" ", cache_cursors=%s" - ", cache_size=5GB" + ", cache_size=1GB" ", checkpoint_sync=true" ", eviction=(threads_max=5)" ", file_manager=(" @@ -306,10 +306,8 @@ run(int argc, char *argv[]) bool cache_cursors; } runs[] = { {1, 1, false}, {1, 1, true}, {8, 1, false}, {8, 1, true}, {16, 1, false}, {16, 1, true}, - {16, WT_ELEMENTS(uri_list), false}, {16, WT_ELEMENTS(uri_list), true}, {200, 100, false}, - {200, 100, true}, {200, WT_ELEMENTS(uri_list), false}, {200, WT_ELEMENTS(uri_list), true}, - {300, 100, false}, {300, 100, true}, {600, WT_ELEMENTS(uri_list), false}, - {600, WT_ELEMENTS(uri_list), true}, + {16, WT_ELEMENTS(uri_list), false}, {16, WT_ELEMENTS(uri_list), true}, {64, 100, false}, + {64, 100, true}, {64, WT_ELEMENTS(uri_list), false}, {64, WT_ELEMENTS(uri_list), true}, }; WT_RAND_STATE rnd; u_int i, n; diff --git a/src/third_party/wiredtiger/test/suite/test_hs15.py b/src/third_party/wiredtiger/test/suite/test_hs15.py new file mode 100644 index 00000000000..8124f1d5f1e --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_hs15.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2020 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import time, wiredtiger, wttest + +def timestamp_str(t): + return '%x' % t + +# test_hs15.py +# Ensure eviction doesn't clear the history store again after checkpoint has done so because of the same update without timestamp. +class test_hs14(wttest.WiredTigerTestCase): + conn_config = 'cache_size=5MB' + session_config = 'isolation=snapshot' + + def test_hs15(self): + uri = 'table:test_hs15' + self.session.create(uri, 'key_format=S,value_format=S') + cursor = self.session.open_cursor(uri) + + value1 = 'a' * 500 + value2 = 'b' * 500 + value3 = 'c' * 500 + + # Insert an update without timestamp + self.session.begin_transaction() + cursor[str(0)] = value1 + self.session.commit_transaction() + + # Insert a bunch of other contents to trigger eviction + for i in range(1, 1000): + self.session.begin_transaction() + cursor[str(i)] = value2 + self.session.commit_transaction('commit_timestamp=' + timestamp_str(3)) + + # Do a modify and an update with timestamps + self.session.begin_transaction() + cursor.set_key(str(0)) + mods = [wiredtiger.Modify('B', 100, 1)] + self.assertEqual(cursor.modify(mods), 0) + self.session.commit_transaction('commit_timestamp=' + timestamp_str(1)) + + self.session.begin_transaction() + cursor[str(0)] = value2 + self.session.commit_transaction('commit_timestamp=' + timestamp_str(2)) + + # Make the modify with timestamp and the update without timestamp obsolete + self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1)) + + # Do a checkpoint + self.session.checkpoint() + + self.session.begin_transaction() + cursor[str(0)] = value3 + self.session.commit_transaction('commit_timestamp=' + timestamp_str(3)) + + # Insert a bunch of other contents to trigger eviction + for i in range(1, 1000): + self.session.begin_transaction() + cursor[str(i)] = value3 + self.session.commit_transaction('commit_timestamp=' + timestamp_str(3)) + + expected = list(value1) + expected[100] = 'B' + expected = str().join(expected) + self.session.begin_transaction('read_timestamp=' + timestamp_str(1)) + self.assertEqual(cursor[str(0)], expected) + self.session.rollback_transaction() + + self.session.begin_transaction('read_timestamp=' + timestamp_str(2)) + self.assertEqual(cursor[str(0)], value2) + self.session.rollback_transaction() + + self.session.begin_transaction('read_timestamp=' + timestamp_str(3)) + self.assertEqual(cursor[str(0)], value3) + self.session.rollback_transaction() |