summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2017-11-29 17:31:12 +1100
committerAlex Gorrod <alexander.gorrod@mongodb.com>2017-11-29 17:31:12 +1100
commit827882209411acf9c96d99c122053283ecb53055 (patch)
tree58494372b075b8c48c081a77479124ba12b59716
parent780a2fb15d6c0acb9f5fd942c90d0f0774a37628 (diff)
parent6dcff54e40ce18729d14a9e96b1cbcb4fbc331fe (diff)
downloadmongo-827882209411acf9c96d99c122053283ecb53055.tar.gz
Merge branch 'mongodb-3.8' into mongodb-3.6mongodb-3.6.0
-rw-r--r--src/btree/bt_compact.c2
-rw-r--r--src/btree/bt_read.c11
-rw-r--r--src/btree/bt_ret.c13
-rw-r--r--src/btree/bt_split.c5
-rw-r--r--src/cache/cache_las.c6
-rw-r--r--src/evict/evict_lru.c20
-rw-r--r--src/evict/evict_page.c14
-rw-r--r--src/include/btmem.h3
-rw-r--r--src/include/btree.i75
-rw-r--r--src/include/extern.h1
-rw-r--r--src/reconcile/rec_write.c47
11 files changed, 126 insertions, 71 deletions
diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c
index b3e23a8251c..63015312232 100644
--- a/src/btree/bt_compact.c
+++ b/src/btree/bt_compact.c
@@ -35,7 +35,7 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
* If the page is a replacement, test the replacement addresses.
* Ignore empty pages, they get merged into the parent.
*/
- if (mod == NULL || mod->rec_result == 0) {
+ if (__wt_page_evict_clean(page)) {
__wt_ref_info(ref, &addr, &addr_size, NULL);
if (addr == NULL)
return (0);
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c
index fd9a7597d73..19ff15fb21e 100644
--- a/src/btree/bt_read.c
+++ b/src/btree/bt_read.c
@@ -221,7 +221,7 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
*/
page->modify->first_dirty_txn = WT_TXN_FIRST;
- if (!ref->page_las->las_skew_oldest &&
+ if (ref->page_las->las_skew_newest &&
!S2C(session)->txn_global.has_stable_timestamp &&
__wt_txn_visible_all(session, ref->page_las->las_max_txn,
WT_TIMESTAMP_NULL(&ref->page_las->onpage_timestamp))) {
@@ -268,7 +268,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
* It's hard to imagine a page with a huge memory footprint that has
* never been modified, but check to be sure.
*/
- if (page->modify == NULL)
+ if (__wt_page_evict_clean(page))
return (false);
/* Pages are usually small enough, check that first. */
@@ -297,8 +297,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
* skipping the page indefinitely or large records can lead to
* extremely large memory footprints.
*/
- if (page->modify->update_restored &&
- !__wt_page_evict_retry(session, page))
+ if (!__wt_page_evict_retry(session, page))
return (false);
/* Trigger eviction on the next page release. */
@@ -496,7 +495,7 @@ __las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref)
goto done;
if (!F_ISSET(txn, WT_TXN_HAS_TS_READ) &&
- !ref->page_las->las_skew_oldest) {
+ ref->page_las->las_skew_newest) {
skip = true;
goto done;
}
@@ -512,7 +511,7 @@ __las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref)
&session->txn.read_timestamp) <= 0);
if (F_ISSET(&session->txn, WT_TXN_HAS_TS_READ) &&
- ref->page_las->las_skew_oldest &&
+ !ref->page_las->las_skew_newest &&
__wt_timestamp_cmp(
&ref->page_las->min_timestamp, &session->txn.read_timestamp) > 0) {
skip = true;
diff --git a/src/btree/bt_ret.c b/src/btree/bt_ret.c
index d63b5884fef..b24a4e1db45 100644
--- a/src/btree/bt_ret.c
+++ b/src/btree/bt_ret.c
@@ -137,13 +137,13 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
#define WT_MODIFY_ARRAY_SIZE (WT_MAX_MODIFY_UPDATE + 10)
/*
- * __value_return_upd --
+ * __wt_value_return_upd --
* Change the cursor to reference an internal update structure return
* value.
*/
-static inline int
-__value_return_upd(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
+int
+__wt_value_return_upd(WT_SESSION_IMPL *session,
+ WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility)
{
WT_CURSOR *cursor;
WT_DECL_RET;
@@ -173,7 +173,8 @@ __value_return_upd(
* that are visible to us.
*/
for (i = 0, listp = list; upd != NULL; upd = upd->next) {
- if (!__wt_txn_upd_visible(session, upd))
+ if (upd->txnid == WT_TXN_ABORTED ||
+ (!ignore_visibility && !__wt_txn_upd_visible(session, upd)))
continue;
if (WT_UPDATE_DATA_VALUE(upd))
@@ -273,7 +274,7 @@ __wt_value_return(
if (upd == NULL)
WT_RET(__value_return(session, cbt));
else
- WT_RET(__value_return_upd(session, cbt, upd));
+ WT_RET(__wt_value_return_upd(session, cbt, upd, false));
F_SET(cursor, WT_CURSTD_VALUE_INT);
return (0);
}
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index a9643ed92a0..bf7ea54adb0 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -1493,9 +1493,10 @@ __split_multi_inmem(
page->modify->first_dirty_txn = WT_TXN_FIRST;
/*
- * If the new page is modified, save the oldest ID from reconciliation
- * to avoid repeatedly attempting eviction on the same page.
+ * If the new page is modified, save the eviction generation to avoid
+ * repeatedly attempting eviction on the same page.
*/
+ page->modify->last_evict_pass_gen = orig->modify->last_evict_pass_gen;
page->modify->last_eviction_id = orig->modify->last_eviction_id;
__wt_timestamp_set(&page->modify->last_eviction_timestamp,
&orig->modify->last_eviction_timestamp);
diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c
index 9f8aeb7cc9e..e0d90ad836d 100644
--- a/src/cache/cache_las.c
+++ b/src/cache/cache_las.c
@@ -64,7 +64,7 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
dstats = ((WT_CURSOR_BTREE *)
cache->las_session[0]->las_cursor)->btree->dhandle->stats;
- v = WT_STAT_READ(dstats, cursor_insert);
+ v = WT_STAT_READ(dstats, cursor_update);
WT_STAT_SET(session, cstats, cache_lookaside_insert, v);
v = WT_STAT_READ(dstats, cursor_remove);
WT_STAT_SET(session, cstats, cache_lookaside_remove, v);
@@ -385,7 +385,7 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi)
btree_id, multi->page_las.las_pageid,
multi->page_las.las_max_txn,
hex_timestamp,
- multi->page_las.las_skew_oldest? "oldest" : "youngest",
+ multi->page_las.las_skew_newest? "newest" : "oldest",
WT_STAT_READ(conn->stats, cache_lookaside_entries),
pct_dirty, pct_full);
}
@@ -433,6 +433,7 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
/* Wrap all the updates in a transaction. */
las_session = (WT_SESSION_IMPL *)cursor->session;
WT_RET(__wt_txn_begin(las_session, NULL));
+ las_session->txn.isolation = WT_TXN_ISO_READ_UNCOMMITTED;
/*
* Make sure there are no leftover entries (e.g., from a handle
@@ -638,6 +639,7 @@ __wt_las_remove_block(WT_SESSION_IMPL *session,
*/
if (local_cursor) {
WT_ERR(__wt_txn_begin(las_session, NULL));
+ las_session->txn.isolation = WT_TXN_ISO_READ_UNCOMMITTED;
local_txn = true;
}
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index b1e42fcf489..fe389b65e4d 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -1864,6 +1864,10 @@ __evict_walk_file(WT_SESSION_IMPL *session,
if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU))
continue;
+ /* Don't queue dirty pages in trees during checkpoints. */
+ if (modified && btree->checkpointing != WT_CKPT_OFF)
+ continue;
+
/*
* It's possible (but unlikely) to visit a page without a read
* generation, if we race with the read instantiating the page.
@@ -1944,14 +1948,13 @@ __evict_walk_file(WT_SESSION_IMPL *session,
goto fast;
/*
- * If there are active transaction and oldest transaction
- * hasn't changed since the last time this page was written,
- * it's unlikely we can make progress. Similarly, if the most
- * recent update on the page is not yet globally visible,
- * eviction will fail. This heuristic avoids repeated attempts
- * to evict the same page.
+ * If the global transaction state hasn't changed since the
+ * last time we tried eviction, it's unlikely we can make
+ * progress. Similarly, if the most recent update on the page
+ * is not yet globally visible, eviction will fail. This
+ * heuristic avoids repeated attempts to evict the same page.
*/
- if (modified && (!__wt_page_evict_retry(session, page) ||
+ if (!__wt_page_evict_retry(session, page) || (modified &&
!__txn_visible_all_id(session, page->modify->update_txn)))
continue;
@@ -2050,9 +2053,10 @@ __evict_get_ref(
cache = S2C(session)->cache;
is_app = !F_ISSET(session, WT_SESSION_INTERNAL);
server_only = is_server && !WT_EVICT_HAS_WORKERS(session);
+ /* Application threads do eviction when cache is full of dirty data */
urgent_ok = (!is_app && !is_server) ||
!WT_EVICT_HAS_WORKERS(session) ||
- (is_app && __wt_cache_aggressive(session));
+ (is_app && F_ISSET(cache, WT_CACHE_EVICT_DIRTY_HARD));
urgent_queue = cache->evict_urgent_queue;
WT_STAT_CONN_INCR(session, cache_eviction_get_ref);
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index 65009dc3449..7a84f90eb81 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -121,7 +121,6 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_PAGE *page;
- WT_PAGE_MODIFY *mod;
bool clean_page, inmem_split, tree_dead;
conn = S2C(session);
@@ -166,8 +165,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
conn->cache->evict_max_page_size = page->memory_footprint;
/* Figure out whether reconciliation was done on the page */
- mod = page->modify;
- clean_page = mod == NULL || mod->rec_result == 0;
+ clean_page = __wt_page_evict_clean(page);
/* Update the reference and discard the page. */
if (__wt_ref_is_root(ref))
@@ -569,13 +567,13 @@ __evict_review(
if (F_ISSET(conn, WT_CONN_IN_MEMORY))
LF_SET(WT_REC_IN_MEMORY |
WT_REC_SCRUB | WT_REC_UPDATE_RESTORE);
+ else if (WT_SESSION_IS_CHECKPOINT(session))
+ LF_SET(WT_REC_LOOKASIDE);
else if (!WT_IS_METADATA(session->dhandle)) {
- if (!WT_SESSION_IS_CHECKPOINT(session)) {
- LF_SET(WT_REC_UPDATE_RESTORE);
+ LF_SET(WT_REC_UPDATE_RESTORE);
- if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB))
- LF_SET(WT_REC_SCRUB);
- }
+ if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB))
+ LF_SET(WT_REC_SCRUB);
/*
* If the cache is under pressure with many updates
diff --git a/src/include/btmem.h b/src/include/btmem.h
index d45b68d1972..c5cdfe5850a 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -203,7 +203,7 @@ struct __wt_page_lookaside {
lookaside */
WT_DECL_TIMESTAMP(min_timestamp) /* Min timestamp in lookaside */
WT_DECL_TIMESTAMP(onpage_timestamp) /* Max timestamp on page */
- bool las_skew_oldest; /* On-page skewed to oldest */
+ bool las_skew_newest; /* On-page skewed to newest */
};
/*
@@ -215,6 +215,7 @@ struct __wt_page_modify {
uint64_t first_dirty_txn;
/* The transaction state last time eviction was attempted. */
+ uint64_t last_evict_pass_gen;
uint64_t last_eviction_id;
WT_DECL_TIMESTAMP(last_eviction_timestamp)
diff --git a/src/include/btree.i b/src/include/btree.i
index 19b300908b1..560cc8eb212 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -28,6 +28,17 @@ __wt_page_is_empty(WT_PAGE *page)
}
/*
+ * __wt_page_evict_clean --
+ * Return if the page can be evicted without dirtying the tree.
+ */
+static inline bool
+__wt_page_evict_clean(WT_PAGE *page)
+{
+ return (page->modify == NULL || (page->modify->write_gen == 0 &&
+ page->modify->rec_result == 0));
+}
+
+/*
* __wt_page_is_modified --
* Return if the page is dirty.
*/
@@ -1268,8 +1279,7 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
/*
* __wt_page_evict_retry --
- * Check if there has been transaction progress since the last eviction
- * attempt.
+ * Avoid busy-spinning attempting to evict the same page all the time.
*/
static inline bool
__wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
@@ -1279,29 +1289,43 @@ __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
txn_global = &S2C(session)->txn_global;
- if ((mod = page->modify) == NULL)
+ /*
+ * If the page hasn't been through one round of update/restore, give it
+ * a try.
+ */
+ if ((mod = page->modify) == NULL || !mod->update_restored)
return (true);
- if (txn_global->current != txn_global->oldest_id &&
- mod->last_eviction_id == __wt_txn_oldest_id(session))
- return (false);
+ /*
+ * Retry if a reasonable amount of eviction time has passed, the
+ * choice of 5 eviction passes as a reasonable amount of time is
+ * currently pretty arbitrary.
+ */
+ if (__wt_cache_aggressive(session) ||
+ mod->last_evict_pass_gen + 5 < S2C(session)->cache->evict_pass_gen)
+ return (true);
+
+ /* Retry if the global transaction state has moved forward. */
+ if (txn_global->current == txn_global->oldest_id ||
+ mod->last_eviction_id != __wt_txn_oldest_id(session))
+ return (true);
#ifdef HAVE_TIMESTAMPS
{
bool same_timestamp;
- if (__wt_timestamp_iszero(&mod->last_eviction_timestamp))
+ same_timestamp = false;
+ if (!__wt_timestamp_iszero(&mod->last_eviction_timestamp))
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ same_timestamp = __wt_timestamp_cmp(
+ &mod->last_eviction_timestamp,
+ &txn_global->pinned_timestamp) == 0);
+ if (!same_timestamp)
return (true);
-
- WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
- same_timestamp = __wt_timestamp_cmp(
- &mod->last_eviction_timestamp, &txn_global->pinned_timestamp) == 0);
- if (same_timestamp)
- return (false);
}
#endif
- return (true);
+ return (false);
}
/*
@@ -1336,6 +1360,14 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
return (false);
/*
+ * If the page was restored after a truncate, it can't be evicted until
+ * the truncate completes.
+ */
+ if (ref->page_del != NULL && !__wt_txn_visible_all(session,
+ ref->page_del->txnid, WT_TIMESTAMP_NULL(&ref->page_del->timestamp)))
+ return (false);
+
+ /*
* Check for in-memory splits before other eviction tests. If the page
* should split in-memory, return success immediately and skip more
* detailed eviction tests. We don't need further tests since the page
@@ -1423,15 +1455,22 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
*
* Fast checks if eviction is disabled for this handle, operation or
* tree, then perform a general check if eviction will be possible.
+ *
+ * Checkpoint should not queue pages for urgent eviction if it cannot
+ * evict them immediately: there is a special exemption that allows
+ * checkpoint to evict dirty pages in a tree that is being
+ * checkpointed, and no other thread can help with that.
*/
page = ref->page;
if (WT_READGEN_EVICT_SOON(page->read_gen) &&
btree->evict_disabled == 0 &&
__wt_page_can_evict(session, ref, &inmem_split)) {
- if ((LF_ISSET(WT_READ_NO_SPLIT) || (!inmem_split &&
- F_ISSET(session, WT_SESSION_NO_RECONCILE))))
- __wt_page_evict_urgent(session, ref);
- else {
+ if (!__wt_page_evict_clean(page) &&
+ (LF_ISSET(WT_READ_NO_SPLIT) || (!inmem_split &&
+ F_ISSET(session, WT_SESSION_NO_RECONCILE)))) {
+ if (!WT_SESSION_IS_CHECKPOINT(session))
+ __wt_page_evict_urgent(session, ref);
+ } else {
WT_RET_BUSY_OK(__wt_page_release_evict(session, ref));
return (0);
}
diff --git a/src/include/extern.h b/src/include/extern.h
index ce9e1e57a47..272544b08f7 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -166,6 +166,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
#endif
);
extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_value_return_upd(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_value_return( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index 77b8c2a2e78..233e0ec61f6 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -40,9 +40,9 @@ typedef struct {
/*
* Track the oldest running transaction and whether to skew lookaside
- * to the newest or oldest update.
+ * to the newest update.
*/
- bool las_skew_oldest;
+ bool las_skew_newest;
uint64_t last_running;
/* Track the page's min/maximum transactions. */
@@ -428,7 +428,8 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
__wt_timestamp_set(&mod->last_eviction_timestamp,
&S2C(session)->txn_global.pinned_timestamp));
#endif
- }
+ mod->last_evict_pass_gen = S2C(session)->cache->evict_pass_gen;
+ }
#ifdef HAVE_DIAGNOSTIC
/*
@@ -906,6 +907,7 @@ __rec_init(WT_SESSION_IMPL *session,
WT_PAGE *page;
WT_RECONCILE *r;
WT_TXN_GLOBAL *txn_global;
+ bool las_skew_oldest;
btree = S2BT(session);
page = ref->page;
@@ -951,10 +953,13 @@ __rec_init(WT_SESSION_IMPL *session,
*/
txn_global = &S2C(session)->txn_global;
if (__wt_btree_immediately_durable(session))
- r->las_skew_oldest = false;
+ las_skew_oldest = false;
else
- WT_ORDERED_READ(r->las_skew_oldest,
+ WT_ORDERED_READ(las_skew_oldest,
txn_global->has_stable_timestamp);
+ r->las_skew_newest = LF_ISSET(WT_REC_LOOKASIDE) &&
+ LF_ISSET(WT_REC_VISIBLE_ALL) && !las_skew_oldest;
+
WT_ORDERED_READ(r->last_running, txn_global->last_running);
/*
@@ -1343,8 +1348,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* version (but we save enough information that checkpoint can
* fix things up if we choose an update that is too new).
*/
- if (*updp == NULL && F_ISSET(r, WT_REC_LOOKASIDE) &&
- F_ISSET(r, WT_REC_VISIBLE_ALL) && !r->las_skew_oldest)
+ if (*updp == NULL && r->las_skew_newest)
*updp = upd;
if (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
@@ -1479,7 +1483,6 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* unresolved updates, move the entire update list.
*/
WT_RET(__rec_update_save(session, r, ins, ripcip, *updp, upd_memsize));
-
if (upd_savedp != NULL)
*upd_savedp = true;
@@ -1522,7 +1525,7 @@ check_original_value:
* - or any reconciliation of a backing overflow record that will be
* physically removed once it's no longer needed.
*/
- if (*updp != NULL && ((*updp)->type == WT_UPDATE_MODIFIED ||
+ if (*updp != NULL && (!WT_UPDATE_DATA_VALUE(*updp) ||
F_ISSET(r, WT_REC_LOOKASIDE) || (vpack != NULL &&
vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)))
WT_RET(
@@ -3385,7 +3388,7 @@ __rec_split_write_supd(WT_SESSION_IMPL *session,
}
done: /* Track the oldest timestamp seen so far. */
- multi->page_las.las_skew_oldest = r->las_skew_oldest;
+ multi->page_las.las_skew_newest = r->las_skew_newest;
multi->page_las.las_max_txn = r->max_txn;
WT_ASSERT(session, r->max_txn != WT_TXN_NONE);
#ifdef HAVE_TIMESTAMPS
@@ -4628,8 +4631,9 @@ record_loop: /*
break;
case WT_UPDATE_MODIFIED:
cbt->slot = WT_COL_SLOT(page, cip);
- WT_ERR(__wt_value_return(
- session, cbt, upd));
+ WT_ERR(__wt_value_return_upd(
+ session, cbt, upd,
+ F_ISSET(r, WT_REC_VISIBLE_ALL)));
data = cbt->iface.value.data;
size = (uint32_t)cbt->iface.value.size;
update_no_copy = false;
@@ -4872,8 +4876,9 @@ compare: /*
* on-page item.
*/
cbt->slot = UINT32_MAX;
- WT_ERR(__wt_value_return(
- session, cbt, upd));
+ WT_ERR(__wt_value_return_upd(
+ session, cbt, upd,
+ F_ISSET(r, WT_REC_VISIBLE_ALL)));
data = cbt->iface.value.data;
size = (uint32_t)cbt->iface.value.size;
update_no_copy = false;
@@ -5470,7 +5475,9 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
goto leaf_insert;
case WT_UPDATE_MODIFIED:
cbt->slot = WT_ROW_SLOT(page, rip);
- WT_ERR(__wt_value_return(session, cbt, upd));
+ WT_ERR(__wt_value_return_upd(
+ session, cbt, upd,
+ F_ISSET(r, WT_REC_VISIBLE_ALL)));
WT_ERR(__rec_cell_build_val(session, r,
cbt->iface.value.data,
cbt->iface.value.size, (uint64_t)0));
@@ -5673,10 +5680,10 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
session, r, WT_INSERT_KEY_SIZE(ins)));
/*
- * Turn off prefix compression until a full key is
- * written into the new page.
+ * Turn off prefix and suffix compression until a full
+ * key is written into the new page.
*/
- r->key_pfx_compress = false;
+ r->key_pfx_compress = r->key_sfx_compress = false;
continue;
}
@@ -5689,7 +5696,9 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
* item.
*/
cbt->slot = UINT32_MAX;
- WT_RET(__wt_value_return(session, cbt, upd));
+ WT_RET(__wt_value_return_upd(
+ session, cbt, upd,
+ F_ISSET(r, WT_REC_VISIBLE_ALL)));
WT_RET(__rec_cell_build_val(session, r,
cbt->iface.value.data,
cbt->iface.value.size, (uint64_t)0));