diff options
author | Ramon Fernandez <ramon@mongodb.com> | 2017-11-29 07:59:13 -0500 |
---|---|---|
committer | Ramon Fernandez <ramon@mongodb.com> | 2017-11-29 07:59:13 -0500 |
commit | aadbdfb13bdb628ebf6a2afa5307e8c0e9ecc0b5 (patch) | |
tree | 9b72590f6acc708dd24b564e0dcb75db4f1d2619 /src/third_party/wiredtiger | |
parent | 1c8800e3b730d2f574a4b3439d141849d24f4919 (diff) | |
download | mongo-aadbdfb13bdb628ebf6a2afa5307e8c0e9ecc0b5.tar.gz |
Import wiredtiger: 6dcff54e40ce18729d14a9e96b1cbcb4fbc331fe from branch mongodb-3.8
ref: bc0337ed00..6dcff54e40
for: 3.7.1
WT-3763 Tune eviction algorithm when cache overflow is in use
WT-3764 Evict pages faster when doing non-cache-disruptive walks
WT-3765 Python tests failed due to crash
WT-3773 Reconciling a page with modify records and lookaside can choose wrong values
Diffstat (limited to 'src/third_party/wiredtiger')
-rw-r--r-- | src/third_party/wiredtiger/import.data | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_compact.c | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_read.c | 11 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_ret.c | 13 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_split.c | 5 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/cache/cache_las.c | 6 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/evict/evict_lru.c | 20 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/evict/evict_page.c | 14 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/btmem.h | 3 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/btree.i | 75 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/extern.h | 1 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_write.c | 47 |
12 files changed, 127 insertions, 72 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 1b680914853..74b1e8caef3 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "bc0337ed0085ea2d7b00f73deb2f726b4ffbee1b", + "commit": "6dcff54e40ce18729d14a9e96b1cbcb4fbc331fe", "github": "wiredtiger/wiredtiger.git", "vendor": "wiredtiger", "branch": "mongodb-3.8" diff --git a/src/third_party/wiredtiger/src/btree/bt_compact.c b/src/third_party/wiredtiger/src/btree/bt_compact.c index b3e23a8251c..63015312232 100644 --- a/src/third_party/wiredtiger/src/btree/bt_compact.c +++ b/src/third_party/wiredtiger/src/btree/bt_compact.c @@ -35,7 +35,7 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) * If the page is a replacement, test the replacement addresses. * Ignore empty pages, they get merged into the parent. */ - if (mod == NULL || mod->rec_result == 0) { + if (__wt_page_evict_clean(page)) { __wt_ref_info(ref, &addr, &addr_size, NULL); if (addr == NULL) return (0); diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index fd9a7597d73..19ff15fb21e 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -221,7 +221,7 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id) */ page->modify->first_dirty_txn = WT_TXN_FIRST; - if (!ref->page_las->las_skew_oldest && + if (ref->page_las->las_skew_newest && !S2C(session)->txn_global.has_stable_timestamp && __wt_txn_visible_all(session, ref->page_las->las_max_txn, WT_TIMESTAMP_NULL(&ref->page_las->onpage_timestamp))) { @@ -268,7 +268,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref) * It's hard to imagine a page with a huge memory footprint that has * never been modified, but check to be sure. */ - if (page->modify == NULL) + if (__wt_page_evict_clean(page)) return (false); /* Pages are usually small enough, check that first. */ @@ -297,8 +297,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref) * skipping the page indefinitely or large records can lead to * extremely large memory footprints. */ - if (page->modify->update_restored && - !__wt_page_evict_retry(session, page)) + if (!__wt_page_evict_retry(session, page)) return (false); /* Trigger eviction on the next page release. */ @@ -496,7 +495,7 @@ __las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref) goto done; if (!F_ISSET(txn, WT_TXN_HAS_TS_READ) && - !ref->page_las->las_skew_oldest) { + ref->page_las->las_skew_newest) { skip = true; goto done; } @@ -512,7 +511,7 @@ __las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref) &session->txn.read_timestamp) <= 0); if (F_ISSET(&session->txn, WT_TXN_HAS_TS_READ) && - ref->page_las->las_skew_oldest && + !ref->page_las->las_skew_newest && __wt_timestamp_cmp( &ref->page_las->min_timestamp, &session->txn.read_timestamp) > 0) { skip = true; diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c index d63b5884fef..b24a4e1db45 100644 --- a/src/third_party/wiredtiger/src/btree/bt_ret.c +++ b/src/third_party/wiredtiger/src/btree/bt_ret.c @@ -137,13 +137,13 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) #define WT_MODIFY_ARRAY_SIZE (WT_MAX_MODIFY_UPDATE + 10) /* - * __value_return_upd -- + * __wt_value_return_upd -- * Change the cursor to reference an internal update structure return * value. */ -static inline int -__value_return_upd( - WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) +int +__wt_value_return_upd(WT_SESSION_IMPL *session, + WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility) { WT_CURSOR *cursor; WT_DECL_RET; @@ -173,7 +173,8 @@ __value_return_upd( * that are visible to us. */ for (i = 0, listp = list; upd != NULL; upd = upd->next) { - if (!__wt_txn_upd_visible(session, upd)) + if (upd->txnid == WT_TXN_ABORTED || + (!ignore_visibility && !__wt_txn_upd_visible(session, upd))) continue; if (WT_UPDATE_DATA_VALUE(upd)) @@ -273,7 +274,7 @@ __wt_value_return( if (upd == NULL) WT_RET(__value_return(session, cbt)); else - WT_RET(__value_return_upd(session, cbt, upd)); + WT_RET(__wt_value_return_upd(session, cbt, upd, false)); F_SET(cursor, WT_CURSTD_VALUE_INT); return (0); } diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index a9643ed92a0..bf7ea54adb0 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -1493,9 +1493,10 @@ __split_multi_inmem( page->modify->first_dirty_txn = WT_TXN_FIRST; /* - * If the new page is modified, save the oldest ID from reconciliation - * to avoid repeatedly attempting eviction on the same page. + * If the new page is modified, save the eviction generation to avoid + * repeatedly attempting eviction on the same page. */ + page->modify->last_evict_pass_gen = orig->modify->last_evict_pass_gen; page->modify->last_eviction_id = orig->modify->last_eviction_id; __wt_timestamp_set(&page->modify->last_eviction_timestamp, &orig->modify->last_eviction_timestamp); diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c index 9f8aeb7cc9e..e0d90ad836d 100644 --- a/src/third_party/wiredtiger/src/cache/cache_las.c +++ b/src/third_party/wiredtiger/src/cache/cache_las.c @@ -64,7 +64,7 @@ __wt_las_stats_update(WT_SESSION_IMPL *session) dstats = ((WT_CURSOR_BTREE *) cache->las_session[0]->las_cursor)->btree->dhandle->stats; - v = WT_STAT_READ(dstats, cursor_insert); + v = WT_STAT_READ(dstats, cursor_update); WT_STAT_SET(session, cstats, cache_lookaside_insert, v); v = WT_STAT_READ(dstats, cursor_remove); WT_STAT_SET(session, cstats, cache_lookaside_remove, v); @@ -385,7 +385,7 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi) btree_id, multi->page_las.las_pageid, multi->page_las.las_max_txn, hex_timestamp, - multi->page_las.las_skew_oldest? "oldest" : "youngest", + multi->page_las.las_skew_newest? "newest" : "oldest", WT_STAT_READ(conn->stats, cache_lookaside_entries), pct_dirty, pct_full); } @@ -433,6 +433,7 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, /* Wrap all the updates in a transaction. */ las_session = (WT_SESSION_IMPL *)cursor->session; WT_RET(__wt_txn_begin(las_session, NULL)); + las_session->txn.isolation = WT_TXN_ISO_READ_UNCOMMITTED; /* * Make sure there are no leftover entries (e.g., from a handle @@ -638,6 +639,7 @@ __wt_las_remove_block(WT_SESSION_IMPL *session, */ if (local_cursor) { WT_ERR(__wt_txn_begin(las_session, NULL)); + las_session->txn.isolation = WT_TXN_ISO_READ_UNCOMMITTED; local_txn = true; } diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index b1e42fcf489..fe389b65e4d 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -1864,6 +1864,10 @@ __evict_walk_file(WT_SESSION_IMPL *session, if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU)) continue; + /* Don't queue dirty pages in trees during checkpoints. */ + if (modified && btree->checkpointing != WT_CKPT_OFF) + continue; + /* * It's possible (but unlikely) to visit a page without a read * generation, if we race with the read instantiating the page. @@ -1944,14 +1948,13 @@ __evict_walk_file(WT_SESSION_IMPL *session, goto fast; /* - * If there are active transaction and oldest transaction - * hasn't changed since the last time this page was written, - * it's unlikely we can make progress. Similarly, if the most - * recent update on the page is not yet globally visible, - * eviction will fail. This heuristic avoids repeated attempts - * to evict the same page. + * If the global transaction state hasn't changed since the + * last time we tried eviction, it's unlikely we can make + * progress. Similarly, if the most recent update on the page + * is not yet globally visible, eviction will fail. This + * heuristic avoids repeated attempts to evict the same page. */ - if (modified && (!__wt_page_evict_retry(session, page) || + if (!__wt_page_evict_retry(session, page) || (modified && !__txn_visible_all_id(session, page->modify->update_txn))) continue; @@ -2050,9 +2053,10 @@ __evict_get_ref( cache = S2C(session)->cache; is_app = !F_ISSET(session, WT_SESSION_INTERNAL); server_only = is_server && !WT_EVICT_HAS_WORKERS(session); + /* Application threads do eviction when cache is full of dirty data */ urgent_ok = (!is_app && !is_server) || !WT_EVICT_HAS_WORKERS(session) || - (is_app && __wt_cache_aggressive(session)); + (is_app && F_ISSET(cache, WT_CACHE_EVICT_DIRTY_HARD)); urgent_queue = cache->evict_urgent_queue; WT_STAT_CONN_INCR(session, cache_eviction_get_ref); diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index 65009dc3449..7a84f90eb81 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -121,7 +121,6 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_PAGE *page; - WT_PAGE_MODIFY *mod; bool clean_page, inmem_split, tree_dead; conn = S2C(session); @@ -166,8 +165,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) conn->cache->evict_max_page_size = page->memory_footprint; /* Figure out whether reconciliation was done on the page */ - mod = page->modify; - clean_page = mod == NULL || mod->rec_result == 0; + clean_page = __wt_page_evict_clean(page); /* Update the reference and discard the page. */ if (__wt_ref_is_root(ref)) @@ -569,13 +567,13 @@ __evict_review( if (F_ISSET(conn, WT_CONN_IN_MEMORY)) LF_SET(WT_REC_IN_MEMORY | WT_REC_SCRUB | WT_REC_UPDATE_RESTORE); + else if (WT_SESSION_IS_CHECKPOINT(session)) + LF_SET(WT_REC_LOOKASIDE); else if (!WT_IS_METADATA(session->dhandle)) { - if (!WT_SESSION_IS_CHECKPOINT(session)) { - LF_SET(WT_REC_UPDATE_RESTORE); + LF_SET(WT_REC_UPDATE_RESTORE); - if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB)) - LF_SET(WT_REC_SCRUB); - } + if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB)) + LF_SET(WT_REC_SCRUB); /* * If the cache is under pressure with many updates diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index d45b68d1972..c5cdfe5850a 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -203,7 +203,7 @@ struct __wt_page_lookaside { lookaside */ WT_DECL_TIMESTAMP(min_timestamp) /* Min timestamp in lookaside */ WT_DECL_TIMESTAMP(onpage_timestamp) /* Max timestamp on page */ - bool las_skew_oldest; /* On-page skewed to oldest */ + bool las_skew_newest; /* On-page skewed to newest */ }; /* @@ -215,6 +215,7 @@ struct __wt_page_modify { uint64_t first_dirty_txn; /* The transaction state last time eviction was attempted. */ + uint64_t last_evict_pass_gen; uint64_t last_eviction_id; WT_DECL_TIMESTAMP(last_eviction_timestamp) diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 19b300908b1..560cc8eb212 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -28,6 +28,17 @@ __wt_page_is_empty(WT_PAGE *page) } /* + * __wt_page_evict_clean -- + * Return if the page can be evicted without dirtying the tree. + */ +static inline bool +__wt_page_evict_clean(WT_PAGE *page) +{ + return (page->modify == NULL || (page->modify->write_gen == 0 && + page->modify->rec_result == 0)); +} + +/* * __wt_page_is_modified -- * Return if the page is dirty. */ @@ -1268,8 +1279,7 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) /* * __wt_page_evict_retry -- - * Check if there has been transaction progress since the last eviction - * attempt. + * Avoid busy-spinning attempting to evict the same page all the time. */ static inline bool __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page) @@ -1279,29 +1289,43 @@ __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page) txn_global = &S2C(session)->txn_global; - if ((mod = page->modify) == NULL) + /* + * If the page hasn't been through one round of update/restore, give it + * a try. + */ + if ((mod = page->modify) == NULL || !mod->update_restored) return (true); - if (txn_global->current != txn_global->oldest_id && - mod->last_eviction_id == __wt_txn_oldest_id(session)) - return (false); + /* + * Retry if a reasonable amount of eviction time has passed, the + * choice of 5 eviction passes as a reasonable amount of time is + * currently pretty arbitrary. + */ + if (__wt_cache_aggressive(session) || + mod->last_evict_pass_gen + 5 < S2C(session)->cache->evict_pass_gen) + return (true); + + /* Retry if the global transaction state has moved forward. */ + if (txn_global->current == txn_global->oldest_id || + mod->last_eviction_id != __wt_txn_oldest_id(session)) + return (true); #ifdef HAVE_TIMESTAMPS { bool same_timestamp; - if (__wt_timestamp_iszero(&mod->last_eviction_timestamp)) + same_timestamp = false; + if (!__wt_timestamp_iszero(&mod->last_eviction_timestamp)) + WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock, + same_timestamp = __wt_timestamp_cmp( + &mod->last_eviction_timestamp, + &txn_global->pinned_timestamp) == 0); + if (!same_timestamp) return (true); - - WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock, - same_timestamp = __wt_timestamp_cmp( - &mod->last_eviction_timestamp, &txn_global->pinned_timestamp) == 0); - if (same_timestamp) - return (false); } #endif - return (true); + return (false); } /* @@ -1336,6 +1360,14 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) return (false); /* + * If the page was restored after a truncate, it can't be evicted until + * the truncate completes. + */ + if (ref->page_del != NULL && !__wt_txn_visible_all(session, + ref->page_del->txnid, WT_TIMESTAMP_NULL(&ref->page_del->timestamp))) + return (false); + + /* * Check for in-memory splits before other eviction tests. If the page * should split in-memory, return success immediately and skip more * detailed eviction tests. We don't need further tests since the page @@ -1423,15 +1455,22 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) * * Fast checks if eviction is disabled for this handle, operation or * tree, then perform a general check if eviction will be possible. + * + * Checkpoint should not queue pages for urgent eviction if it cannot + * evict them immediately: there is a special exemption that allows + * checkpoint to evict dirty pages in a tree that is being + * checkpointed, and no other thread can help with that. */ page = ref->page; if (WT_READGEN_EVICT_SOON(page->read_gen) && btree->evict_disabled == 0 && __wt_page_can_evict(session, ref, &inmem_split)) { - if ((LF_ISSET(WT_READ_NO_SPLIT) || (!inmem_split && - F_ISSET(session, WT_SESSION_NO_RECONCILE)))) - __wt_page_evict_urgent(session, ref); - else { + if (!__wt_page_evict_clean(page) && + (LF_ISSET(WT_READ_NO_SPLIT) || (!inmem_split && + F_ISSET(session, WT_SESSION_NO_RECONCILE)))) { + if (!WT_SESSION_IS_CHECKPOINT(session)) + __wt_page_evict_urgent(session, ref); + } else { WT_RET_BUSY_OK(__wt_page_release_evict(session, ref)); return (0); } diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index ce9e1e57a47..272544b08f7 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -166,6 +166,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags #endif ); extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_value_return_upd(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_value_return( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 77b8c2a2e78..233e0ec61f6 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -40,9 +40,9 @@ typedef struct { /* * Track the oldest running transaction and whether to skew lookaside - * to the newest or oldest update. + * to the newest update. */ - bool las_skew_oldest; + bool las_skew_newest; uint64_t last_running; /* Track the page's min/maximum transactions. */ @@ -428,7 +428,8 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, __wt_timestamp_set(&mod->last_eviction_timestamp, &S2C(session)->txn_global.pinned_timestamp)); #endif - } + mod->last_evict_pass_gen = S2C(session)->cache->evict_pass_gen; + } #ifdef HAVE_DIAGNOSTIC /* @@ -906,6 +907,7 @@ __rec_init(WT_SESSION_IMPL *session, WT_PAGE *page; WT_RECONCILE *r; WT_TXN_GLOBAL *txn_global; + bool las_skew_oldest; btree = S2BT(session); page = ref->page; @@ -951,10 +953,13 @@ __rec_init(WT_SESSION_IMPL *session, */ txn_global = &S2C(session)->txn_global; if (__wt_btree_immediately_durable(session)) - r->las_skew_oldest = false; + las_skew_oldest = false; else - WT_ORDERED_READ(r->las_skew_oldest, + WT_ORDERED_READ(las_skew_oldest, txn_global->has_stable_timestamp); + r->las_skew_newest = LF_ISSET(WT_REC_LOOKASIDE) && + LF_ISSET(WT_REC_VISIBLE_ALL) && !las_skew_oldest; + WT_ORDERED_READ(r->last_running, txn_global->last_running); /* @@ -1343,8 +1348,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, * version (but we save enough information that checkpoint can * fix things up if we choose an update that is too new). */ - if (*updp == NULL && F_ISSET(r, WT_REC_LOOKASIDE) && - F_ISSET(r, WT_REC_VISIBLE_ALL) && !r->las_skew_oldest) + if (*updp == NULL && r->las_skew_newest) *updp = upd; if (F_ISSET(r, WT_REC_VISIBLE_ALL) ? @@ -1479,7 +1483,6 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, * unresolved updates, move the entire update list. */ WT_RET(__rec_update_save(session, r, ins, ripcip, *updp, upd_memsize)); - if (upd_savedp != NULL) *upd_savedp = true; @@ -1522,7 +1525,7 @@ check_original_value: * - or any reconciliation of a backing overflow record that will be * physically removed once it's no longer needed. */ - if (*updp != NULL && ((*updp)->type == WT_UPDATE_MODIFIED || + if (*updp != NULL && (!WT_UPDATE_DATA_VALUE(*updp) || F_ISSET(r, WT_REC_LOOKASIDE) || (vpack != NULL && vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM))) WT_RET( @@ -3385,7 +3388,7 @@ __rec_split_write_supd(WT_SESSION_IMPL *session, } done: /* Track the oldest timestamp seen so far. */ - multi->page_las.las_skew_oldest = r->las_skew_oldest; + multi->page_las.las_skew_newest = r->las_skew_newest; multi->page_las.las_max_txn = r->max_txn; WT_ASSERT(session, r->max_txn != WT_TXN_NONE); #ifdef HAVE_TIMESTAMPS @@ -4628,8 +4631,9 @@ record_loop: /* break; case WT_UPDATE_MODIFIED: cbt->slot = WT_COL_SLOT(page, cip); - WT_ERR(__wt_value_return( - session, cbt, upd)); + WT_ERR(__wt_value_return_upd( + session, cbt, upd, + F_ISSET(r, WT_REC_VISIBLE_ALL))); data = cbt->iface.value.data; size = (uint32_t)cbt->iface.value.size; update_no_copy = false; @@ -4872,8 +4876,9 @@ compare: /* * on-page item. */ cbt->slot = UINT32_MAX; - WT_ERR(__wt_value_return( - session, cbt, upd)); + WT_ERR(__wt_value_return_upd( + session, cbt, upd, + F_ISSET(r, WT_REC_VISIBLE_ALL))); data = cbt->iface.value.data; size = (uint32_t)cbt->iface.value.size; update_no_copy = false; @@ -5470,7 +5475,9 @@ __rec_row_leaf(WT_SESSION_IMPL *session, goto leaf_insert; case WT_UPDATE_MODIFIED: cbt->slot = WT_ROW_SLOT(page, rip); - WT_ERR(__wt_value_return(session, cbt, upd)); + WT_ERR(__wt_value_return_upd( + session, cbt, upd, + F_ISSET(r, WT_REC_VISIBLE_ALL))); WT_ERR(__rec_cell_build_val(session, r, cbt->iface.value.data, cbt->iface.value.size, (uint64_t)0)); @@ -5673,10 +5680,10 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) session, r, WT_INSERT_KEY_SIZE(ins))); /* - * Turn off prefix compression until a full key is - * written into the new page. + * Turn off prefix and suffix compression until a full + * key is written into the new page. */ - r->key_pfx_compress = false; + r->key_pfx_compress = r->key_sfx_compress = false; continue; } @@ -5689,7 +5696,9 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) * item. */ cbt->slot = UINT32_MAX; - WT_RET(__wt_value_return(session, cbt, upd)); + WT_RET(__wt_value_return_upd( + session, cbt, upd, + F_ISSET(r, WT_REC_VISIBLE_ALL))); WT_RET(__rec_cell_build_val(session, r, cbt->iface.value.data, cbt->iface.value.size, (uint64_t)0)); |