summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2017-10-18 15:19:18 +1100
committerLuke Chen <luke.chen@mongodb.com>2017-10-18 15:19:18 +1100
commit4ecdb8f1a327067a178258ad025806eeefc1267b (patch)
tree7ad782300fe35ffa4136e3dbbdb110b68248f462
parent4b5ade6072d548fdebe3b376f94e0d672eea5359 (diff)
parent65fdf75f869af41b9f080c36bc3379675589e4b9 (diff)
downloadmongo-4ecdb8f1a327067a178258ad025806eeefc1267b.tar.gz
Merge branch 'develop' into mongodb-3.6
-rw-r--r--dist/flags.py1
-rw-r--r--dist/function.py40
-rw-r--r--src/btree/bt_compact.c104
-rw-r--r--src/btree/bt_debug.c2
-rw-r--r--src/btree/bt_read.c8
-rw-r--r--src/btree/bt_split.c34
-rw-r--r--src/cache/cache_las.c193
-rw-r--r--src/config/config.c7
-rw-r--r--src/cursor/cur_backup.c9
-rw-r--r--src/docs/basic-api.dox2
-rw-r--r--src/evict/evict_file.c10
-rw-r--r--src/evict/evict_lru.c22
-rw-r--r--src/evict/evict_page.c20
-rw-r--r--src/include/btree.i12
-rw-r--r--src/include/connection.h4
-rw-r--r--src/include/extern.h3
-rw-r--r--src/include/flags.h13
-rw-r--r--src/include/packing.i6
-rw-r--r--src/log/log.c6
-rw-r--r--src/log/log_sys.c10
-rw-r--r--src/reconcile/rec_write.c573
-rw-r--r--src/session/session_compact.c65
-rw-r--r--src/support/pow.c3
-rw-r--r--src/txn/txn.c6
-rw-r--r--src/txn/txn_ckpt.c6
-rw-r--r--src/txn/txn_log.c20
-rw-r--r--src/utilities/util_backup.c2
-rw-r--r--test/format/compact.c8
-rw-r--r--test/format/config.c54
-rw-r--r--test/format/config.h16
-rw-r--r--test/format/format.h11
-rw-r--r--test/format/ops.c6
-rw-r--r--test/format/wts.c12
-rw-r--r--test/suite/test_las.py7
-rw-r--r--test/utility/misc.c7
35 files changed, 774 insertions, 528 deletions
diff --git a/dist/flags.py b/dist/flags.py
index df897bcb91e..7ddbff62a63 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -38,7 +38,6 @@ flags = {
'rec_write' : [
'REC_CHECKPOINT',
'REC_EVICT',
- 'REC_INMEM_SPLIT',
'REC_IN_MEMORY',
'REC_LOOKASIDE',
'REC_SCRUB',
diff --git a/dist/function.py b/dist/function.py
index f7118e91874..0e36a539cc4 100644
--- a/dist/function.py
+++ b/dist/function.py
@@ -20,11 +20,12 @@ def missing_comment():
(f, s[:m.start(2)].count('\n'), m.group(2))
# Sort helper function, discard * operators so a pointer doesn't necessarily
-# sort before non-pointers, ignore const/volatile keywords.
+# sort before non-pointers, ignore const/static/volatile keywords.
def function_args_alpha(text):
s = text.strip()
s = re.sub("[*]","", s)
s = re.sub("^const ","", s)
+ s = re.sub("^static ","", s)
s = re.sub("^volatile ","", s)
return s
@@ -68,9 +69,10 @@ types = [
# Return the sort order of a variable declaration, or no-match.
# This order isn't defensible: it's roughly how WiredTiger looked when we
# settled on a style, and it's roughly what the KNF/BSD styles look like.
-def function_args(line):
+def function_args(name, line):
line = line.strip()
line = re.sub("^const ", "", line)
+ line = re.sub("^static ", "", line)
line = re.sub("^volatile ", "", line)
# Let WT_UNUSED terminate the parse. It often appears at the beginning
@@ -86,8 +88,7 @@ def function_args(line):
# Check for illegal types.
for m in illegal_types:
if re.search('^' + m + "\s*[\w(*]", line):
- print >>sys.stderr, \
- m + ": illegal declaration: " + line.strip()
+ print >>sys.stderr, name + ": illegal type: " + line.strip()
sys.exit(1)
# Check for matching types.
@@ -117,17 +118,36 @@ def function_declaration():
if not tracking:
tfile.write(line)
if re.search('^{$', line):
- r = [[] for i in range(len(types))]
+ list = [[] for i in range(len(types))]
+ static_list = [[] for i in range(len(types))]
tracking = True;
continue
- found,n = function_args(line)
+ found,n = function_args(name, line)
if found:
- r[n].append(line)
- else :
+ # List statics first.
+ if re.search("^\sstatic", line):
+ static_list[n].append(line)
+ continue
+
+ # Disallow assignments in the declaration. Ignore braces
+ # to allow automatic array initialization using constant
+ # initializers (and we've already skipped statics, which
+ # are also typically initialized in the declaration).
+ if re.search("\s=\s[-\w]", line):
+ print >>sys.stderr, \
+ name + ": assignment in string: " + line.strip()
+ sys.exit(1);
+
+ list[n].append(line)
+ else:
# Sort the resulting lines (we don't yet sort declarations
- # within a single line).
- for arg in filter(None, r):
+ # within a single line). It's two passes, first to catch
+ # the statics, then to catch everything else.
+ for arg in filter(None, static_list):
+ for p in sorted(arg, key=function_args_alpha):
+ tfile.write(p)
+ for arg in filter(None, list):
for p in sorted(arg, key=function_args_alpha):
tfile.write(p)
tfile.write(line)
diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c
index fe860034f88..75bb46aaf89 100644
--- a/src/btree/bt_compact.c
+++ b/src/btree/bt_compact.c
@@ -31,17 +31,6 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
mod = page->modify;
/*
- * Ignore the root: it may not have a replacement address, and besides,
- * if anything else gets written, so will it.
- */
- if (__wt_ref_is_root(ref))
- return (0);
-
- /* Ignore currently dirty pages, they will be written regardless. */
- if (__wt_page_is_modified(page))
- return (0);
-
- /*
* If the page is clean, test the original addresses.
* If the page is a replacement, test the replacement addresses.
* Ignore empty pages, they get merged into the parent.
@@ -86,6 +75,45 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
}
/*
+ * __compact_rewrite_lock --
+ * Lock out checkpoints and return if a page needs to be re-written.
+ */
+static int
+__compact_rewrite_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
+{
+ WT_BTREE *btree;
+ WT_DECL_RET;
+
+ *skipp = true; /* Default skip. */
+
+ btree = S2BT(session);
+
+ /*
+ * Reviewing in-memory pages requires looking at page reconciliation
+ * results, because we care about where the page is stored now, not
+ * where the page was stored when we first read it into the cache.
+ * We need to ensure we don't race with page reconciliation as it's
+ * writing the page modify information.
+ *
+ * There are two ways we call reconciliation: checkpoints and eviction.
+ * Get the tree's flush lock which blocks threads writing pages for
+ * checkpoints. If checkpoint is holding the lock, quit working this
+ * file, we'll visit it again in our next pass.
+ *
+ * Serializing with eviction is not quite as simple, and it gets done
+ * in the underlying function that checks modification information.
+ */
+ WT_RET(__wt_spin_trylock(session, &btree->flush_lock));
+
+ ret = __compact_rewrite(session, ref, skipp);
+
+ /* Unblock threads writing leaf pages. */
+ __wt_spin_unlock(session, &btree->flush_lock);
+
+ return (ret);
+}
+
+/*
* __wt_compact --
* Compact a file.
*/
@@ -93,14 +121,12 @@ int
__wt_compact(WT_SESSION_IMPL *session)
{
WT_BM *bm;
- WT_BTREE *btree;
WT_DECL_RET;
WT_REF *ref;
u_int i;
bool skip;
- btree = S2BT(session);
- bm = btree->bm;
+ bm = S2BT(session)->bm;
ref = NULL;
WT_STAT_DATA_INCR(session, session_compact);
@@ -114,28 +140,29 @@ __wt_compact(WT_SESSION_IMPL *session)
if (skip)
return (0);
- /*
- * Reviewing in-memory pages requires looking at page reconciliation
- * results, because we care about where the page is stored now, not
- * where the page was stored when we first read it into the cache.
- * We need to ensure we don't race with page reconciliation as it's
- * writing the page modify information.
- *
- * There are two ways we call reconciliation: checkpoints and eviction.
- * Get the tree's flush lock which blocks threads writing pages for
- * checkpoints.
- */
- __wt_spin_lock(session, &btree->flush_lock);
-
/* Walk the tree reviewing pages to see if they should be re-written. */
for (i = 0;;) {
- /* Periodically check if we've run out of time. */
+ /*
+ * Periodically check if we've timed out or eviction is stuck.
+ * Quit if eviction is stuck, we're making the problem worse.
+ */
if (++i > 100) {
WT_ERR(__wt_session_compact_check_timeout(session));
+
+ if (__wt_cache_stuck(session))
+ WT_ERR(EBUSY);
+
i = 0;
}
/*
+ * Compact pulls pages into cache during the walk without
+ * checking whether the cache is full. Check now to throttle
+ * compact to match eviction speed.
+ */
+ WT_ERR(__wt_cache_eviction_check(session, false, NULL));
+
+ /*
* Pages read for compaction aren't "useful"; don't update the
* read generation of pages already in memory, and if a page is
* read, set its generation to a low value so it is evicted
@@ -147,25 +174,34 @@ __wt_compact(WT_SESSION_IMPL *session)
if (ref == NULL)
break;
- WT_ERR(__compact_rewrite(session, ref, &skip));
- if (skip)
+ /*
+ * Cheap checks that don't require locking.
+ *
+ * Ignore the root: it may not have a replacement address, and
+ * besides, if anything else gets written, so will it.
+ *
+ * Ignore dirty pages, checkpoint writes them regardless.
+ */
+ if (__wt_ref_is_root(ref))
+ continue;
+ if (__wt_page_is_modified(ref->page))
continue;
- session->compact_state = WT_COMPACT_SUCCESS;
+ WT_ERR(__compact_rewrite_lock(session, ref, &skip));
+ if (skip)
+ continue;
/* Rewrite the page: mark the page and tree dirty. */
WT_ERR(__wt_page_modify_init(session, ref->page));
__wt_page_modify_set(session, ref->page);
+ session->compact_state = WT_COMPACT_SUCCESS;
WT_STAT_DATA_INCR(session, btree_compact_rewrite);
}
err: if (ref != NULL)
WT_TRET(__wt_page_release(session, ref, 0));
- /* Unblock threads writing leaf pages. */
- __wt_spin_unlock(session, &btree->flush_lock);
-
return (ret);
}
diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c
index f0388bd1f07..caa960d78ae 100644
--- a/src/btree/bt_debug.c
+++ b/src/btree/bt_debug.c
@@ -478,8 +478,8 @@ __debug_dsk_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk)
static char *
__debug_tree_shape_info(WT_PAGE *page)
{
- uint64_t v;
static char buf[128];
+ uint64_t v;
const char *unit;
v = page->memory_footprint;
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c
index 9d4e860f8fd..0246c1eca66 100644
--- a/src/btree/bt_read.c
+++ b/src/btree/bt_read.c
@@ -300,12 +300,11 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref)
{
struct timespec start, stop;
WT_BTREE *btree;
- WT_CURSOR *las_cursor;
WT_DECL_RET;
WT_ITEM tmp;
WT_PAGE *page;
size_t addr_size;
- uint32_t new_state, previous_state, session_flags;
+ uint32_t new_state, previous_state;
const uint8_t *addr;
bool timer;
@@ -411,12 +410,9 @@ skip_read:
* entries. Note that we are discarding updates so the page
* must be marked available even if these operations fail.
*/
- __wt_las_cursor(session, &las_cursor, &session_flags);
WT_TRET(__wt_las_remove_block(
- session, las_cursor, btree->id, ref->page_las->las_pageid));
+ session, NULL, btree->id, ref->page_las->las_pageid));
__wt_free(session, ref->page_las);
- WT_TRET(__wt_las_cursor_close(
- session, &las_cursor, session_flags));
}
done: WT_PUBLISH(ref->state, WT_REF_MEM);
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 71007e76dfd..1ccb27c2296 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -1495,8 +1495,8 @@ __split_multi_inmem(
* tombstone away: we may need it to correctly resolve
* modifications.
*/
- if (supd->onpage_upd->type == WT_UPDATE_DELETED &&
- prev_upd != NULL)
+ if (prev_upd != NULL &&
+ prev_upd->type == WT_UPDATE_DELETED)
prev_upd = prev_upd->next;
if (prev_upd != NULL) {
__wt_update_obsolete_free(
@@ -1620,8 +1620,11 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
break;
}
- /* There should be an address or a disk image (or both). */
- WT_ASSERT(session,
+ /*
+ * There can be an address or a disk image or both, but if there is
+ * neither, there must be a backing lookaside page.
+ */
+ WT_ASSERT(session, multi->las_pageid != 0 ||
multi->addr.addr != NULL || multi->disk_image != NULL);
/* If closing the file, there better be an address. */
@@ -1652,16 +1655,23 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
addr->type = multi->addr.type;
WT_RET(__wt_memdup(session,
multi->addr.addr, addr->size, &addr->addr));
- if (multi->las_pageid != 0) {
- WT_RET(__wt_calloc_one(session, &ref->page_las));
- ref->page_las->las_pageid = multi->las_pageid;
+
+ ref->state = WT_REF_DISK;
+ }
+
+ /*
+ * Copy any associated lookaside reference, potentially resetting
+ * WT_REF.state. Regardless of a backing address, WT_REF_LOOKASIDE
+ * overrides WT_REF_DISK.
+ */
+ if (multi->las_pageid != 0) {
+ WT_RET(__wt_calloc_one(session, &ref->page_las));
+ ref->page_las->las_pageid = multi->las_pageid;
#ifdef HAVE_TIMESTAMPS
- __wt_timestamp_set(&ref->page_las->min_timestamp,
- &multi->las_min_timestamp);
+ __wt_timestamp_set(
+ &ref->page_las->min_timestamp, &multi->las_min_timestamp);
#endif
- ref->state = WT_REF_LOOKASIDE;
- } else
- ref->state = WT_REF_DISK;
+ ref->state = WT_REF_LOOKASIDE;
}
/*
diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c
index ccf16674a68..00dafb680da 100644
--- a/src/cache/cache_las.c
+++ b/src/cache/cache_las.c
@@ -259,6 +259,184 @@ __wt_las_cursor_close(
}
/*
+ * __las_insert_block_verbose --
+ * Display a verbose message once per checkpoint with details about the
+ * cache state when performing a lookaside table write.
+ */
+static void
+__las_insert_block_verbose(
+ WT_SESSION_IMPL *session, uint32_t btree_id, uint64_t las_pageid)
+{
+#ifdef HAVE_VERBOSE
+ WT_CONNECTION_IMPL *conn;
+ uint64_t ckpt_gen_current, ckpt_gen_last;
+ uint32_t pct_dirty, pct_full;
+
+ if (!WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE))
+ return;
+
+ conn = S2C(session);
+ ckpt_gen_current = __wt_gen(session, WT_GEN_CHECKPOINT);
+ ckpt_gen_last = conn->las_verb_gen_write;
+
+ /*
+ * This message is throttled to one per checkpoint. To do this we
+ * track the generation of the last checkpoint for which the message
+ * was printed and check against the current checkpoint generation.
+ */
+ if (ckpt_gen_current > ckpt_gen_last) {
+ /*
+ * Attempt to atomically replace the last checkpoint generation
+ * for which this message was printed. If the atomic swap fails
+ * we have raced and the winning thread will print the message.
+ */
+ if (__wt_atomic_casv64(&conn->las_verb_gen_write,
+ ckpt_gen_last, ckpt_gen_current)) {
+ (void)__wt_eviction_clean_needed(session, &pct_full);
+ (void)__wt_eviction_dirty_needed(session, &pct_dirty);
+
+ __wt_verbose(session, WT_VERB_LOOKASIDE,
+ "Page reconciliation triggered lookaside write"
+ "file ID %" PRIu32 ", page ID %" PRIu64 ". "
+ "Entries now in lookaside file: %" PRId64 ", "
+ "cache dirty: %" PRIu32 "%% , "
+ "cache use: %" PRIu32 "%%",
+ btree_id, las_pageid,
+ WT_STAT_READ(conn->stats, cache_lookaside_entries),
+ pct_dirty, pct_full);
+ }
+ }
+#else
+ WT_UNUSED(session);
+ WT_UNUSED(btree_id);
+ WT_UNUSED(las_pageid);
+#endif
+}
+
+/*
+ * __wt_las_insert_block --
+ * Copy one set of saved updates into the database's lookaside buffer.
+ */
+int
+__wt_las_insert_block(WT_SESSION_IMPL *session,
+ WT_PAGE *page, WT_CURSOR *cursor, WT_MULTI *multi, WT_ITEM *key)
+{
+ WT_ITEM las_timestamp, las_value;
+ WT_SAVE_UPD *list;
+ WT_UPDATE *upd;
+ uint64_t insert_cnt, las_counter, las_pageid;
+ uint32_t btree_id, i, slot;
+ uint8_t *p;
+
+ WT_CLEAR(las_timestamp);
+ WT_CLEAR(las_value);
+ insert_cnt = 0;
+
+ btree_id = S2BT(session)->id;
+ las_pageid = multi->las_pageid =
+ __wt_atomic_add64(&S2BT(session)->las_pageid, 1);
+
+ /*
+ * Make sure there are no leftover entries (e.g., from a handle
+ * reopen).
+ */
+ WT_RET(__wt_las_remove_block(session, cursor, btree_id, las_pageid));
+
+ /* Enter each update in the boundary's list into the lookaside store. */
+ for (las_counter = 0, i = 0,
+ list = multi->supd; i < multi->supd_entries; ++i, ++list) {
+ /* Lookaside table key component: source key. */
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ p = key->mem;
+ WT_RET(
+ __wt_vpack_uint(&p, 0, WT_INSERT_RECNO(list->ins)));
+ key->size = WT_PTRDIFF(p, key->data);
+ break;
+ case WT_PAGE_ROW_LEAF:
+ if (list->ins == NULL)
+ WT_RET(__wt_row_leaf_key(
+ session, page, list->ripcip, key, false));
+ else {
+ key->data = WT_INSERT_KEY(list->ins);
+ key->size = WT_INSERT_KEY_SIZE(list->ins);
+ }
+ break;
+ WT_ILLEGAL_VALUE(session);
+ }
+
+ /*
+ * Lookaside table value component: update reference. Updates
+ * come from the row-store insert list (an inserted item), or
+ * update array (an update to an original on-page item), or from
+ * a column-store insert list (column-store format has no update
+ * array, the insert list contains both inserted items and
+ * updates to original on-page items). When rolling forward a
+ * modify update from an original on-page item, we need an
+ * on-page slot so we can find the original on-page item. When
+ * rolling forward from an inserted item, no on-page slot is
+ * possible.
+ */
+ slot = UINT32_MAX; /* Impossible slot */
+ if (list->ripcip != NULL)
+ slot = page->type == WT_PAGE_ROW_LEAF ?
+ WT_ROW_SLOT(page, list->ripcip) :
+ WT_COL_SLOT(page, list->ripcip);
+ upd = list->ins == NULL ?
+ page->modify->mod_row_update[slot] : list->ins->upd;
+
+ /*
+ * Walk the list of updates, storing each key/value pair into
+ * the lookaside table. Skip aborted items (there's no point
+ * to restoring them), and assert we never see a reserved item.
+ */
+ do {
+ if (upd->txnid == WT_TXN_ABORTED)
+ continue;
+
+ switch (upd->type) {
+ case WT_UPDATE_DELETED:
+ las_value.size = 0;
+ break;
+ case WT_UPDATE_MODIFIED:
+ case WT_UPDATE_STANDARD:
+ las_value.data = upd->data;
+ las_value.size = upd->size;
+ break;
+ case WT_UPDATE_RESERVED:
+ WT_ASSERT(session,
+ upd->type != WT_UPDATE_RESERVED);
+ continue;
+ }
+
+ cursor->set_key(cursor,
+ btree_id, las_pageid, ++las_counter, key);
+
+#ifdef HAVE_TIMESTAMPS
+ las_timestamp.data = &upd->timestamp;
+ las_timestamp.size = WT_TIMESTAMP_SIZE;
+#endif
+ cursor->set_value(cursor,
+ upd->txnid, &las_timestamp, upd->type, &las_value);
+
+ WT_RET(cursor->insert(cursor));
+ ++insert_cnt;
+ } while ((upd = upd->next) != NULL);
+ }
+
+ __wt_free(session, multi->supd);
+ multi->supd_entries = 0;
+
+ if (insert_cnt > 0) {
+ WT_STAT_CONN_INCRV(
+ session, cache_lookaside_entries, insert_cnt);
+ __las_insert_block_verbose(session, btree_id, las_pageid);
+ }
+ return (0);
+}
+
+/*
* __wt_las_remove_block --
* Remove all records matching a key prefix from the lookaside store.
*/
@@ -269,10 +447,18 @@ __wt_las_remove_block(WT_SESSION_IMPL *session,
WT_DECL_RET;
WT_ITEM las_key;
uint64_t las_counter, las_pageid, remove_cnt;
- uint32_t las_id;
+ uint32_t las_id, session_flags;
int exact;
+ bool local_cursor;
remove_cnt = 0;
+ session_flags = 0; /* [-Wconditional-uninitialized] */
+
+ local_cursor = false;
+ if (cursor == NULL) {
+ __wt_las_cursor(session, &cursor, &session_flags);
+ local_cursor = true;
+ }
/*
* Search for the block's unique prefix and step through all matching
@@ -301,6 +487,9 @@ __wt_las_remove_block(WT_SESSION_IMPL *session,
}
WT_ERR_NOTFOUND_OK(ret);
-err: WT_STAT_CONN_DECRV(session, cache_lookaside_entries, remove_cnt);
+err: if (local_cursor)
+ WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
+
+ WT_STAT_CONN_DECRV(session, cache_lookaside_entries, remove_cnt);
return (ret);
}
diff --git a/src/config/config.c b/src/config/config.c
index 9669d5bb39f..dd46aa55ad1 100644
--- a/src/config/config.c
+++ b/src/config/config.c
@@ -340,12 +340,15 @@ static const int8_t goesc[256] = {
static int
__config_next(WT_CONFIG *conf, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value)
{
- WT_CONFIG_ITEM *out = key;
- int utf8_remain = 0;
+ WT_CONFIG_ITEM *out;
+ int utf8_remain;
static const WT_CONFIG_ITEM true_value = {
"", 0, 1, WT_CONFIG_ITEM_BOOL
};
+ out = key;
+ utf8_remain = 0;
+
key->len = 0;
/* Keys with no value default to true. */
*value = true_value;
diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c
index 9781688217f..ecb0e02929f 100644
--- a/src/cursor/cur_backup.c
+++ b/src/cursor/cur_backup.c
@@ -253,16 +253,11 @@ __backup_start(
WT_ERR(__wt_fopen(session, WT_BACKUP_TMP,
WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs));
/*
- * If a list of targets was specified, work our way through them.
- * Else, generate a list of all database objects.
- *
- * Include log files if doing a full backup, and copy them before
- * copying data files to avoid rolling the metadata forward across
- * a checkpoint that completes during the backup.
+ * If targets were specified, add them to the list. Otherwise it is a
+ * full backup, add all database objects and log files to the list.
*/
target_list = false;
WT_ERR(__backup_uri(session, cfg, &target_list, &log_only));
-
if (!target_list) {
WT_ERR(__backup_log_append(session, cb, true));
WT_ERR(__backup_all(session));
diff --git a/src/docs/basic-api.dox b/src/docs/basic-api.dox
index 2b810e6676b..dceb82b06ba 100644
--- a/src/docs/basic-api.dox
+++ b/src/docs/basic-api.dox
@@ -106,7 +106,7 @@ by a previous run of the example). No data extraction or conversion is
required in the application.
Because the cursor was positioned in the table after the WT_CURSOR::insert
-call, we had to re-position it using the WT_CURSOR::first call; if we
+call, we had to re-position it using the WT_CURSOR::reset call; if we
weren't using the cursor for the call to WT_CURSOR::insert above, this loop
would simplify to:
diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c
index f2a09a0a769..bd70de8bddb 100644
--- a/src/evict/evict_file.c
+++ b/src/evict/evict_file.c
@@ -16,12 +16,11 @@ int
__wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
{
WT_BTREE *btree;
- WT_CURSOR *las_cursor;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
WT_PAGE *page;
WT_REF *next_ref, *ref;
- uint32_t session_flags, walk_flags;
+ uint32_t walk_flags;
dhandle = session->dhandle;
btree = dhandle->handle;
@@ -58,12 +57,7 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
!F_ISSET(btree, WT_BTREE_LOOKASIDE)) {
WT_ASSERT(session, !WT_IS_METADATA(dhandle));
- __wt_las_cursor(session, &las_cursor, &session_flags);
- WT_TRET(__wt_las_remove_block(
- session, las_cursor, btree->id, 0));
- WT_TRET(__wt_las_cursor_close(
- session, &las_cursor, session_flags));
- WT_RET(ret);
+ WT_RET(__wt_las_remove_block(session, NULL, btree->id, 0));
} else
FLD_SET(walk_flags, WT_READ_LOOKASIDE);
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 2bc359df4ae..02208e0f84a 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -113,9 +113,11 @@ __evict_entry_priority(WT_SESSION_IMPL *session, WT_REF *ref)
static int WT_CDECL
__evict_lru_cmp(const void *a_arg, const void *b_arg)
{
- const WT_EVICT_ENTRY *a = a_arg, *b = b_arg;
+ const WT_EVICT_ENTRY *a, *b;
uint64_t a_score, b_score;
+ a = a_arg;
+ b = b_arg;
a_score = (a->ref == NULL ? UINT64_MAX : a->score);
b_score = (b->ref == NULL ? UINT64_MAX : b->score);
@@ -1884,24 +1886,6 @@ __evict_walk_file(WT_SESSION_IMPL *session,
F_ISSET(btree, WT_BTREE_LOOKASIDE))
goto fast;
- /*
- * If application threads are blocked waiting for eviction (so
- * we are going to consider lookaside), and the only thing
- * preventing a clean page from being evicted is that it
- * contains historical data, mark it dirty so we can do
- * lookaside eviction.
- */
- if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD |
- WT_CACHE_EVICT_DIRTY_HARD) &&
- !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE) &&
- !modified && page->modify != NULL &&
- !__wt_txn_visible_all(session, page->modify->rec_max_txn,
- WT_TIMESTAMP_NULL(&page->modify->rec_max_timestamp))) {
- __wt_page_only_modify_set(session, page);
- modified = true;
- goto fast;
- }
-
/* Skip clean pages if appropriate. */
if (!modified && !F_ISSET(cache, WT_CACHE_EVICT_CLEAN))
continue;
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index 4b7c71c19ee..edf80ec4460 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -10,7 +10,7 @@
static int __evict_page_clean_update(WT_SESSION_IMPL *, WT_REF *, bool);
static int __evict_page_dirty_update(WT_SESSION_IMPL *, WT_REF *, bool);
-static int __evict_review(WT_SESSION_IMPL *, WT_REF *, bool, uint32_t *);
+static int __evict_review(WT_SESSION_IMPL *, WT_REF *, bool, bool *);
/*
* __evict_exclusive_clear --
@@ -122,8 +122,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
WT_DECL_RET;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
- uint32_t flags;
- bool clean_page, tree_dead;
+ bool clean_page, inmem_split, tree_dead;
conn = S2C(session);
@@ -143,13 +142,13 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
* to make this check for clean pages, too: while unlikely eviction
* would choose an internal page with children, it's not disallowed.
*/
- WT_ERR(__evict_review(session, ref, closing, &flags));
+ WT_ERR(__evict_review(session, ref, closing, &inmem_split));
/*
* If there was an in-memory split, the tree has been left in the state
* we want: there is nothing more to do.
*/
- if (LF_ISSET(WT_REC_INMEM_SPLIT))
+ if (inmem_split)
goto done;
/* Count evictions of internal pages during normal operation. */
@@ -428,7 +427,7 @@ __evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent)
*/
static int
__evict_review(
- WT_SESSION_IMPL *session, WT_REF *ref, bool closing, uint32_t *flagsp)
+ WT_SESSION_IMPL *session, WT_REF *ref, bool closing, bool *inmem_splitp)
{
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
@@ -437,11 +436,12 @@ __evict_review(
uint32_t flags;
bool lookaside_retry, *lookaside_retryp, modified;
+ *inmem_splitp = false;
+
conn = S2C(session);
flags = WT_REC_EVICT;
if (!WT_SESSION_IS_CHECKPOINT(session))
LF_SET(WT_REC_VISIBLE_ALL);
- *flagsp = flags;
/*
* Get exclusive access to the page if our caller doesn't have the tree
@@ -508,9 +508,8 @@ __evict_review(
WT_RET(__wt_txn_update_oldest(
session, WT_TXN_OLDEST_STRICT));
- if (!__wt_page_can_evict(session, ref, flagsp))
+ if (!__wt_page_can_evict(session, ref, inmem_splitp))
return (EBUSY);
- flags = *flagsp;
/*
* Check for an append-only workload needing an in-memory
@@ -519,7 +518,7 @@ __evict_review(
* the page stays in memory and the tree is left in the desired
* state: avoid the usual cleanup.
*/
- if (LF_ISSET(WT_REC_INMEM_SPLIT))
+ if (*inmem_splitp)
return (__wt_split_insert(session, ref));
}
@@ -609,7 +608,6 @@ __evict_review(
ret = __wt_reconcile(session, ref, NULL, flags, NULL);
}
- *flagsp = flags;
WT_RET(ret);
/*
diff --git a/src/include/btree.i b/src/include/btree.i
index 35c7d5d5a1a..8803f3b907d 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -462,7 +462,7 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page, bool rewrite)
* real progress.
*/
if (rewrite)
- (void)__wt_atomic_subv64(&cache->pages_inmem, 1);
+ (void)__wt_atomic_sub64(&cache->pages_inmem, 1);
else
(void)__wt_atomic_addv64(&cache->pages_evict, 1);
}
@@ -1287,13 +1287,15 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
* Check whether a page can be evicted.
*/
static inline bool
-__wt_page_can_evict(
- WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *evict_flagsp)
+__wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
{
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
bool modified;
+ if (inmem_splitp != NULL)
+ *inmem_splitp = false;
+
page = ref->page;
mod = page->modify;
@@ -1318,8 +1320,8 @@ __wt_page_can_evict(
* won't be written or discarded from the cache.
*/
if (__wt_leaf_page_can_split(session, page)) {
- if (evict_flagsp != NULL)
- FLD_SET(*evict_flagsp, WT_REC_INMEM_SPLIT);
+ if (inmem_splitp != NULL)
+ *inmem_splitp = true;
return (true);
}
diff --git a/src/include/connection.h b/src/include/connection.h
index 0b9e82ee1ef..3f890a50d2b 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -372,8 +372,8 @@ struct __wt_connection_impl {
* checkpoint. To accomplish this we track the checkpoint generation
* for the most recent read and write verbose messages.
*/
- volatile uint64_t las_verb_gen_read;
- volatile uint64_t las_verb_gen_write;
+ uint64_t las_verb_gen_read;
+ uint64_t las_verb_gen_write;
/* Set of btree IDs not being rolled back */
uint8_t *stable_rollback_bitstring;
diff --git a/src/include/extern.h b/src/include/extern.h
index 23897a05dfb..71bda687659 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -206,6 +206,7 @@ extern int __wt_las_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE
extern int __wt_las_cursor_open(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags);
extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_insert_block(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CURSOR *cursor, WT_MULTI *multi, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_remove_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uint32_t __wt_checksum_sw(const void *chunk, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_checksum_init(void);
@@ -434,7 +435,7 @@ extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTR
extern void __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot);
extern int64_t __wt_log_slot_release(WT_MYSLOT *myslot, int64_t size);
extern void __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
-extern int __wt_log_system_record( WT_SESSION_IMPL *session, WT_FH *log_fh, WT_LSN *lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_system_record(WT_SESSION_IMPL *session, WT_FH *log_fh, WT_LSN *lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_log_recover_system(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_verbose_dump_log(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/include/flags.h b/src/include/flags.h
index 65b4ce34752..ec0ef0a5311 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -45,13 +45,12 @@
#define WT_READ_WONT_NEED 0x00000800
#define WT_REC_CHECKPOINT 0x00000001
#define WT_REC_EVICT 0x00000002
-#define WT_REC_INMEM_SPLIT 0x00000004
-#define WT_REC_IN_MEMORY 0x00000008
-#define WT_REC_LOOKASIDE 0x00000010
-#define WT_REC_SCRUB 0x00000020
-#define WT_REC_UPDATE_RESTORE 0x00000040
-#define WT_REC_VISIBILITY_ERR 0x00000080
-#define WT_REC_VISIBLE_ALL 0x00000100
+#define WT_REC_IN_MEMORY 0x00000004
+#define WT_REC_LOOKASIDE 0x00000008
+#define WT_REC_SCRUB 0x00000010
+#define WT_REC_UPDATE_RESTORE 0x00000020
+#define WT_REC_VISIBILITY_ERR 0x00000040
+#define WT_REC_VISIBLE_ALL 0x00000080
#define WT_SESSION_CAN_WAIT 0x00000001
#define WT_SESSION_INTERNAL 0x00000002
#define WT_SESSION_LOCKED_CHECKPOINT 0x00000004
diff --git a/src/include/packing.i b/src/include/packing.i
index 01023b1ba88..e1cf158c660 100644
--- a/src/include/packing.i
+++ b/src/include/packing.i
@@ -719,8 +719,10 @@ __wt_struct_unpackv(WT_SESSION_IMPL *session,
static inline void
__wt_struct_size_adjust(WT_SESSION_IMPL *session, size_t *sizep)
{
- size_t curr_size = *sizep;
- size_t field_size, prev_field_size = 1;
+ size_t curr_size, field_size, prev_field_size;
+
+ curr_size = *sizep;
+ prev_field_size = 1;
while ((field_size = __wt_vsize_uint(curr_size)) != prev_field_size) {
curr_size += field_size - prev_field_size;
diff --git a/src/log/log.c b/src/log/log.c
index fb22d7b013f..89fe64c6f18 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -2577,11 +2577,13 @@ __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap)
WT_DECL_ITEM(logrec);
WT_DECL_RET;
size_t header_size, len;
- uint32_t rectype = WT_LOGREC_MESSAGE;
- const char *rec_fmt = WT_UNCHECKED_STRING(I);
+ uint32_t rectype;
+ const char *rec_fmt;
va_list ap_copy;
conn = S2C(session);
+ rectype = WT_LOGREC_MESSAGE;
+ rec_fmt = WT_UNCHECKED_STRING(I);
if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
return (0);
diff --git a/src/log/log_sys.c b/src/log/log_sys.c
index 32297fd6280..ad65aaf9a8a 100644
--- a/src/log/log_sys.c
+++ b/src/log/log_sys.c
@@ -13,8 +13,7 @@
* Write a system log record for the previous LSN.
*/
int
-__wt_log_system_record(
- WT_SESSION_IMPL *session, WT_FH *log_fh, WT_LSN *lsn)
+__wt_log_system_record(WT_SESSION_IMPL *session, WT_FH *log_fh, WT_LSN *lsn)
{
WT_DECL_ITEM(logrec_buf);
WT_DECL_RET;
@@ -23,10 +22,13 @@ __wt_log_system_record(
WT_LOG_RECORD *logrec;
WT_MYSLOT myslot;
size_t recsize;
- uint32_t rectype = WT_LOGREC_SYSTEM;
- const char *fmt = WT_UNCHECKED_STRING(I);
+ uint32_t rectype;
+ const char *fmt;
log = S2C(session)->log;
+ rectype = WT_LOGREC_SYSTEM;
+ fmt = WT_UNCHECKED_STRING(I);
+
WT_RET(__wt_logrec_alloc(session, log->allocsize, &logrec_buf));
memset((uint8_t *)logrec_buf->mem, 0, log->allocsize);
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index a62489cb661..4056722a13c 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -199,6 +199,7 @@ typedef struct {
WT_SAVE_UPD *supd; /* Saved updates */
uint32_t supd_next;
size_t supd_allocated;
+ size_t supd_memsize; /* Size of saved update structures */
/* List of pages we've written so far. */
WT_MULTI *multi;
@@ -313,6 +314,8 @@ static int __rec_col_var_helper(WT_SESSION_IMPL *, WT_RECONCILE *,
static int __rec_destroy_session(WT_SESSION_IMPL *);
static int __rec_init(WT_SESSION_IMPL *,
WT_REF *, uint32_t, WT_SALVAGE_COOKIE *, void *);
+static int __rec_las_wrapup(WT_SESSION_IMPL *, WT_RECONCILE *);
+static int __rec_las_wrapup_err(WT_SESSION_IMPL *, WT_RECONCILE *);
static uint32_t __rec_min_split_page_size(WT_BTREE *, uint32_t);
static int __rec_root_write(WT_SESSION_IMPL *, WT_PAGE *, uint32_t);
static int __rec_row_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
@@ -326,8 +329,6 @@ static int __rec_split_row_promote(
WT_SESSION_IMPL *, WT_RECONCILE *, WT_ITEM *, uint8_t);
static int __rec_split_write(
WT_SESSION_IMPL *, WT_RECONCILE *, WT_CHUNK *, WT_ITEM *, bool);
-static int __rec_update_las(
- WT_SESSION_IMPL *, WT_RECONCILE *, uint32_t, WT_MULTI *);
static int __rec_write_check_complete(
WT_SESSION_IMPL *, WT_RECONCILE *, int, bool *);
static void __rec_write_page_status(WT_SESSION_IMPL *, WT_RECONCILE *);
@@ -340,8 +341,6 @@ static int __rec_dictionary_init(WT_SESSION_IMPL *, WT_RECONCILE *, u_int);
static int __rec_dictionary_lookup(
WT_SESSION_IMPL *, WT_RECONCILE *, WT_KV *, WT_DICTIONARY **);
static void __rec_dictionary_reset(WT_RECONCILE *);
-static void __rec_verbose_lookaside_write(
- WT_SESSION_IMPL *, uint32_t, uint64_t);
/*
* __wt_reconcile --
@@ -629,7 +628,7 @@ __rec_write_check_complete(
* Check if lookaside eviction is possible. If any of the updates we
* saw were uncommitted, the lookaside table cannot be used.
*/
- if (r->update_used || r->update_uncommitted)
+ if (r->update_uncommitted || r->update_used)
return (0);
*lookaside_retryp = true;
@@ -808,8 +807,8 @@ err: __wt_page_out(session, &next);
* Configure raw compression.
*/
static inline bool
-__rec_raw_compression_config(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_SALVAGE_COOKIE *salvage)
+__rec_raw_compression_config(WT_SESSION_IMPL *session,
+ uint32_t flags, WT_PAGE *page, WT_SALVAGE_COOKIE *salvage)
{
WT_BTREE *btree;
@@ -825,6 +824,14 @@ __rec_raw_compression_config(
return (false);
/*
+ * XXX
+ * Turn off if lookaside is configured: lookaside potentially writes
+ * blocks without entries and raw compression isn't ready for that.
+ */
+ if (LF_ISSET(WT_REC_LOOKASIDE))
+ return (false);
+
+ /*
* Raw compression cannot support dictionary compression. (Technically,
* we could still use the raw callback on column-store variable length
* internal pages with dictionary compression configured, because
@@ -956,14 +963,14 @@ __rec_init(WT_SESSION_IMPL *session,
#endif
/* Track if updates were used and/or uncommitted. */
- r->update_used = r->update_uncommitted = false;
+ r->update_uncommitted = r->update_used = false;
/* Track if the page can be marked clean. */
r->leave_dirty = false;
/* Raw compression. */
r->raw_compression =
- __rec_raw_compression_config(session, page, salvage);
+ __rec_raw_compression_config(session, flags, page, salvage);
r->raw_destination.flags = WT_ITEM_ALIGNED;
/* Track overflow items. */
@@ -975,6 +982,7 @@ __rec_init(WT_SESSION_IMPL *session,
/* The list of saved updates is reused. */
r->supd_next = 0;
+ r->supd_memsize = 0;
/* The list of pages we've written. */
r->multi = NULL;
@@ -1125,8 +1133,8 @@ __rec_destroy_session(WT_SESSION_IMPL *session)
* Save a WT_UPDATE list for later restoration.
*/
static int
-__rec_update_save(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_INSERT *ins, void *ripcip, WT_UPDATE *onpage_upd)
+__rec_update_save(WT_SESSION_IMPL *session, WT_RECONCILE *r,
+ WT_INSERT *ins, void *ripcip, WT_UPDATE *onpage_upd, size_t upd_memsize)
{
WT_RET(__wt_realloc_def(
session, &r->supd_allocated, r->supd_next + 1, &r->supd));
@@ -1134,6 +1142,7 @@ __rec_update_save(WT_SESSION_IMPL *session,
r->supd[r->supd_next].ripcip = ripcip;
r->supd[r->supd_next].onpage_upd = onpage_upd;
++r->supd_next;
+ r->supd_memsize += upd_memsize;
return (0);
}
@@ -1143,22 +1152,24 @@ __rec_update_save(WT_SESSION_IMPL *session,
*/
static int
__rec_append_orig_value(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_UPDATE *first_upd, WT_CELL_UNPACK *unpack)
+ WT_PAGE *page, WT_UPDATE *upd, WT_CELL_UNPACK *unpack)
{
WT_DECL_ITEM(tmp);
WT_DECL_RET;
- WT_UPDATE *append, *upd;
+ WT_UPDATE *append;
size_t size;
- /*
- * If at least one self-contained update is globally visible, we're
- * done.
- */
- for (upd = first_upd; upd != NULL; upd = upd->next)
+ /* Done if at least one self-contained update is globally visible. */
+ for (;; upd = upd->next) {
if (WT_UPDATE_DATA_VALUE(upd) &&
__wt_txn_upd_visible_all(session, upd))
return (0);
+ /* Leave reference at the last item in the chain. */
+ if (upd->next == NULL)
+ break;
+ }
+
/*
* We need the original on-page value for some reader: get a copy and
* append it to the end of the update list with a transaction ID that
@@ -1187,8 +1198,6 @@ __rec_append_orig_value(WT_SESSION_IMPL *session,
*
* Append the new entry to the update list.
*/
- for (upd = first_upd; upd->next != NULL; upd = upd->next)
- ;
WT_PUBLISH(upd->next, append);
__wt_cache_page_inmem_incr(session, page, size);
@@ -1206,15 +1215,22 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
WT_INSERT *ins, void *ripcip, WT_CELL_UNPACK *vpack, WT_UPDATE **updp)
{
WT_PAGE *page;
- WT_UPDATE *first_ts_upd, *first_txn_upd, *first_upd, *upd;
+ WT_UPDATE *first_txn_upd, *first_upd, *upd;
wt_timestamp_t *timestampp;
+ size_t upd_memsize;
uint64_t max_txn, txnid;
bool all_visible, uncommitted;
+#ifdef HAVE_TIMESTAMPS
+ WT_UPDATE *first_ts_upd;
+ first_ts_upd = NULL;
+#endif
+
*updp = NULL;
page = r->page;
- first_ts_upd = first_txn_upd = NULL;
+ first_txn_upd = NULL;
+ upd_memsize = 0;
max_txn = WT_TXN_NONE;
uncommitted = false;
@@ -1253,6 +1269,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (WT_TXNID_LE(r->last_running, txnid))
uncommitted = r->update_uncommitted = true;
+ upd_memsize += WT_UPDATE_MEMSIZE(upd);
+
/*
* Find the first update we can use.
*
@@ -1340,7 +1358,6 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
#ifdef HAVE_TIMESTAMPS
timestampp = first_ts_upd == NULL ? NULL : &first_ts_upd->timestamp;
#else
- WT_UNUSED(first_ts_upd);
timestampp = NULL;
#endif
all_visible = *updp == first_txn_upd &&
@@ -1388,12 +1405,14 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* The order of the updates on the list matters, we can't move only the
* unresolved updates, move the entire update list.
*/
- WT_RET(__rec_update_save(session, r, ins, ripcip, *updp));
+ WT_RET(__rec_update_save(session, r, ins, ripcip, *updp, upd_memsize));
#ifdef HAVE_TIMESTAMPS
/* Track the oldest saved timestamp for lookaside. */
- if (F_ISSET(r, WT_REC_LOOKASIDE))
- for (upd = first_upd; upd->next != NULL; upd = upd->next)
+ if (first_ts_upd == NULL)
+ __wt_timestamp_set_zero(&r->min_saved_timestamp);
+ else if (F_ISSET(r, WT_REC_LOOKASIDE))
+ for (upd = first_upd; upd != NULL; upd = upd->next)
if (upd->txnid != WT_TXN_ABORTED &&
upd->txnid != WT_TXN_NONE &&
__wt_timestamp_cmp(
@@ -1410,9 +1429,8 @@ check_original_value:
* image is rewritten), or any reconciliation of a backing overflow
* record that will be physically removed once it's no longer needed.
*/
- if (*updp != NULL &&
- (F_ISSET(r, WT_REC_LOOKASIDE) ||
- (*updp != NULL && vpack != NULL &&
+ if (*updp != NULL && (F_ISSET(r, WT_REC_LOOKASIDE) ||
+ (vpack != NULL &&
vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)))
WT_RET(
__rec_append_orig_value(session, page, first_upd, vpack));
@@ -1994,25 +2012,32 @@ __rec_leaf_page_max(WT_SESSION_IMPL *session, WT_RECONCILE *r)
return (page_size * 2);
}
-#define WT_REC_MAX_SAVED_UPDATES 100
-
/*
* __rec_need_split --
* Check whether adding some bytes to the page requires a split.
- *
- * This takes into account the disk image growing across a boundary, and
- * also triggers a split for row store leaf pages when a threshold number
- * of saved updates is reached. This allows pages to split for update /
- * restore and lookaside eviction when there is no visible data that
- * causes the disk image to grow.
*/
static bool
__rec_need_split(WT_RECONCILE *r, size_t len)
{
- if (r->page->type == WT_PAGE_ROW_LEAF &&
- r->supd_next >= WT_REC_MAX_SAVED_UPDATES)
- return (true);
-
+ /*
+ * In the case of a row-store leaf page, trigger a split if a threshold
+ * number of saved updates is reached. This allows pages to split for
+ * update/restore and lookaside eviction when there is no visible data
+ * causing the disk image to grow.
+ *
+ * In the case of small pages or large keys, we might try to split when
+ * a page has no updates or entries, which isn't possible. To consider
+ * update/restore or lookaside information, require either page entries
+ * or updates that will be attached to the image. The limit is one of
+ * either, but it doesn't make sense to create pages or images with few
+ * entries or updates, even where page sizes are small (especially as
+ * updates that will eventually become overflow items can throw off our
+ * calculations). Bound the combination at something reasonable.
+ */
+ if (r->page->type == WT_PAGE_ROW_LEAF && r->entries + r->supd_next > 10)
+ len += r->supd_memsize;
+
+ /* Check for the disk image crossing a boundary. */
return (r->raw_compression ?
len > r->space_avail : WT_CHECK_CROSSING_BND(r, len));
}
@@ -2619,11 +2644,11 @@ __rec_split_crossing_bnd(
}
/*
- * __rec_split_raw_worker --
- * Handle the raw compression page reconciliation bookkeeping.
+ * __rec_split_raw --
+ * Raw compression.
*/
static int
-__rec_split_raw_worker(WT_SESSION_IMPL *session,
+__rec_split_raw(WT_SESSION_IMPL *session,
WT_RECONCILE *r, size_t next_len, bool no_more_rows)
{
WT_BM *bm;
@@ -3003,16 +3028,6 @@ split_grow: /*
}
/*
- * __rec_split_raw --
- * Raw compression split routine.
- */
-static inline int
-__rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
-{
- return (__rec_split_raw_worker(session, r, next_len, false));
-}
-
-/*
* __rec_split_finish_process_prev --
* If the two split chunks together fit in a single page, merge them into
* one. If they do not fit in a single page but the last is smaller than
@@ -3129,7 +3144,7 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r)
WT_PTRDIFF(r->first_free, r->cur_ptr->image.mem);
if (data_size <= btree->allocsize)
break;
- WT_RET(__rec_split_raw_worker(session, r, 0, true));
+ WT_RET(__rec_split_raw(session, r, 0, true));
}
if (r->entries == 0)
return (0);
@@ -3193,6 +3208,7 @@ __rec_split_write_supd(WT_SESSION_IMPL *session,
WT_DECL_RET;
WT_PAGE *page;
WT_SAVE_UPD *supd;
+ WT_UPDATE *upd;
uint32_t i, j;
int cmp;
@@ -3208,6 +3224,7 @@ __rec_split_write_supd(WT_SESSION_IMPL *session,
if (last_block) {
WT_RET(__rec_supd_move(session, multi, r->supd, r->supd_next));
r->supd_next = 0;
+ r->supd_memsize = 0;
return (0);
}
@@ -3252,8 +3269,19 @@ __rec_split_write_supd(WT_SESSION_IMPL *session,
* saved updates in sorted order, new saved updates must be
* appended to the list).
*/
- for (j = 0; i < r->supd_next; ++j, ++i)
+ r->supd_memsize = 0;
+ for (j = 0; i < r->supd_next; ++j, ++i) {
+ /* Account for the remaining update memory. */
+ if (r->supd[i].ins == NULL)
+ upd = page->modify->mod_row_update[
+ page->type == WT_PAGE_ROW_LEAF ?
+ WT_ROW_SLOT(page, r->supd[i].ripcip) :
+ WT_COL_SLOT(page, r->supd[i].ripcip)];
+ else
+ upd = r->supd[i].ins->upd;
+ r->supd_memsize += __wt_update_list_memsize(upd);
r->supd[j] = r->supd[i];
+ }
r->supd_next = j;
}
@@ -3296,10 +3324,8 @@ __rec_split_write_header(WT_SESSION_IMPL *session,
* and we found updates that weren't globally visible when reconciling
* this page.
*/
- if (F_ISSET(r, WT_REC_LOOKASIDE) && multi->supd != NULL) {
+ if (F_ISSET(r, WT_REC_LOOKASIDE) && multi->supd != NULL)
F_SET(dsk, WT_PAGE_LAS_UPDATE);
- r->cache_write_lookaside = true;
- }
dsk->unused[0] = dsk->unused[1] = 0;
@@ -3492,28 +3518,40 @@ __rec_split_write(WT_SESSION_IMPL *session, WT_RECONCILE *r,
goto copy_image;
/*
- * If there are saved updates, we are either doing update/restore
- * eviction or lookaside eviction. Update/restore never writes the
- * disk image.
- *
- * Lookaside does write disk images, but also needs to cope with the
- * case where no updates could be written, which means there are no
- * entries in the page image to write.
+ * If there are saved updates, either doing update/restore eviction or
+ * lookaside eviction.
*/
- if (multi->supd != NULL &&
- (F_ISSET(r, WT_REC_UPDATE_RESTORE) || chunk->entries == 0)) {
+ if (multi->supd != NULL) {
/*
+ * XXX
* If no entries were used, the page is empty and we can only
- * restore updates against an empty row store leaf page.
- * (Column store modify will attempt to allocate a zero-length
- * array).
+ * restore eviction/restore or lookaside updates against
+ * empty row-store leaf pages, column-store modify attempts to
+ * allocate a zero-length array.
*/
- if (r->page->type != WT_PAGE_ROW_LEAF &&
- chunk->entries == 0 && multi->supd != NULL)
+ if (r->page->type != WT_PAGE_ROW_LEAF && chunk->entries == 0)
return (EBUSY);
- r->cache_write_restore = true;
- goto update_las;
+ if (F_ISSET(r, WT_REC_LOOKASIDE)) {
+ r->cache_write_lookaside = true;
+
+ /*
+ * Lookaside eviction writes disk images, but if no
+ * entries were used, there's no disk image to write.
+ * There's no more work to do in this case, lookaside
+ * eviction doesn't copy disk images.
+ */
+ if (chunk->entries == 0)
+ return (0);
+ } else {
+ r->cache_write_restore = true;
+
+ /*
+ * Update/restore never writes a disk image, but always
+ * copies a disk image.
+ */
+ goto copy_image;
+ }
}
/*
@@ -3525,6 +3563,7 @@ __rec_split_write(WT_SESSION_IMPL *session, WT_RECONCILE *r,
last_block))
goto copy_image;
+ /* Write the disk image and get an address. */
WT_RET(__wt_bt_write(session,
compressed_image == NULL ? &chunk->image : compressed_image,
addr, &addr_size, false, F_ISSET(r, WT_REC_CHECKPOINT),
@@ -3535,15 +3574,6 @@ __rec_split_write(WT_SESSION_IMPL *session, WT_RECONCILE *r,
WT_RET(__wt_memdup(session, addr, addr_size, &multi->addr.addr));
multi->addr.size = (uint8_t)addr_size;
-update_las:
- /*
- * If using the lookaside table eviction path and we found updates that
- * weren't globally visible when reconciling this page, copy them into
- * the database's lookaside store.
- */
- if (F_ISSET(r, WT_REC_LOOKASIDE) && multi->supd != NULL)
- WT_RET(__rec_update_las(session, r, btree->id, multi));
-
copy_image:
#ifdef HAVE_DIAGNOSTIC
/*
@@ -3569,150 +3599,6 @@ copy_image:
}
/*
- * __rec_update_las --
- * Copy a set of updates into the database's lookaside buffer.
- */
-static int
-__rec_update_las(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, uint32_t btree_id, WT_MULTI *multi)
-{
- WT_CURSOR *cursor;
- WT_DECL_ITEM(key);
- WT_DECL_RET;
- WT_ITEM las_timestamp, las_value;
- WT_PAGE *page;
- WT_SAVE_UPD *list;
- WT_UPDATE *upd;
- uint64_t insert_cnt, las_counter, las_pageid;
- uint32_t i, session_flags, slot;
- uint8_t *p;
-
- cursor = NULL;
- WT_CLEAR(las_timestamp);
- WT_CLEAR(las_value);
- page = r->page;
- insert_cnt = las_pageid = 0;
-
- __wt_las_cursor(session, &cursor, &session_flags);
-
- /* Ensure enough room for a column-store key without checking. */
- WT_ERR(__wt_scr_alloc(session, WT_INTPACK64_MAXSIZE, &key));
-
- /*
- * Each key in the lookaside table is associated with a unique
- * identifier, allocated sequentially per tree.
- */
- las_pageid = multi->las_pageid =
- __wt_atomic_add64(&S2BT(session)->las_pageid, 1);
-
- /* The zero page ID is reserved, check we don't see it. */
- WT_ASSERT(session, las_pageid != 0);
-
- /*
- * Make sure there are no left over entries (e.g., from a handle
- * reopen).
- */
- WT_ERR(__wt_las_remove_block(session, cursor, btree_id, las_pageid));
-
- /* Enter each update in the boundary's list into the lookaside store. */
- for (las_counter = 0, i = 0,
- list = multi->supd; i < multi->supd_entries; ++i, ++list) {
- /* Lookaside table key component: source key. */
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- p = key->mem;
- WT_ERR(
- __wt_vpack_uint(&p, 0, WT_INSERT_RECNO(list->ins)));
- key->size = WT_PTRDIFF(p, key->data);
- break;
- case WT_PAGE_ROW_LEAF:
- if (list->ins == NULL)
- WT_ERR(__wt_row_leaf_key(
- session, page, list->ripcip, key, false));
- else {
- key->data = WT_INSERT_KEY(list->ins);
- key->size = WT_INSERT_KEY_SIZE(list->ins);
- }
- break;
- WT_ILLEGAL_VALUE_ERR(session);
- }
-
- /*
- * Lookaside table value component: update reference. Updates
- * come from the row-store insert list (an inserted item), or
- * update array (an update to an original on-page item), or from
- * a column-store insert list (column-store format has no update
- * array, the insert list contains both inserted items and
- * updates to original on-page items). When rolling forward a
- * modify update from an original on-page item, we need an
- * on-page slot so we can find the original on-page item. When
- * rolling forward from an inserted item, no on-page slot is
- * possible.
- */
- slot = UINT32_MAX; /* Impossible slot */
- if (list->ripcip != NULL)
- slot = page->type == WT_PAGE_ROW_LEAF ?
- WT_ROW_SLOT(page, list->ripcip) :
- WT_COL_SLOT(page, list->ripcip);
- upd = list->ins == NULL ?
- page->modify->mod_row_update[slot] : list->ins->upd;
-
- /*
- * Walk the list of updates, storing each key/value pair into
- * the lookaside table. Skip aborted items (there's no point
- * to restoring them), and assert we never see a reserved item.
- */
- do {
- if (upd->txnid == WT_TXN_ABORTED)
- continue;
-
- switch (upd->type) {
- case WT_UPDATE_DELETED:
- las_value.size = 0;
- break;
- case WT_UPDATE_MODIFIED:
- case WT_UPDATE_STANDARD:
- las_value.data = upd->data;
- las_value.size = upd->size;
- break;
- case WT_UPDATE_RESERVED:
- WT_ASSERT(session,
- upd->type != WT_UPDATE_RESERVED);
- continue;
- }
-
- cursor->set_key(cursor,
- btree_id, las_pageid, ++las_counter, key);
-
-#ifdef HAVE_TIMESTAMPS
- las_timestamp.data = &upd->timestamp;
- las_timestamp.size = WT_TIMESTAMP_SIZE;
-#endif
- cursor->set_value(cursor,
- upd->txnid, &las_timestamp, upd->type, &las_value);
-
- WT_ERR(cursor->insert(cursor));
- ++insert_cnt;
- } while ((upd = upd->next) != NULL);
- }
-
- __wt_free(session, multi->supd);
- multi->supd_entries = 0;
-
-err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
-
- if (insert_cnt > 0) {
- WT_STAT_CONN_INCRV(
- session, cache_lookaside_entries, insert_cnt);
- __rec_verbose_lookaside_write(session, btree_id, las_pageid);
- }
-
- __wt_scr_free(session, &key);
- return (ret);
-}
-
-/*
* __wt_bulk_init --
* Bulk insert initialization.
*/
@@ -3825,7 +3711,7 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
if (r->raw_compression) {
if (key->len + val->len > r->space_avail)
WT_RET(__rec_split_raw(
- session, r, key->len + val->len));
+ session, r, key->len + val->len, false));
} else
if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) {
/*
@@ -3992,7 +3878,7 @@ __wt_bulk_insert_var(
/* Boundary: split or write the page. */
if (r->raw_compression) {
if (val->len > r->space_avail)
- WT_RET(__rec_split_raw(session, r, val->len));
+ WT_RET(__rec_split_raw(session, r, val->len, false));
} else
if (WT_CROSSING_SPLIT_BND(r, val->len))
WT_RET(__rec_split_crossing_bnd(session, r, val->len));
@@ -4133,7 +4019,8 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
/* Boundary: split or write the page. */
if (__rec_need_split(r, val->len)) {
if (r->raw_compression)
- WT_ERR(__rec_split_raw(session, r, val->len));
+ WT_ERR(__rec_split_raw(
+ session, r, val->len, false));
else
WT_ERR(__rec_split_crossing_bnd(
session, r, val->len));
@@ -4181,7 +4068,8 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
/* Boundary: split or write the page. */
if (__rec_need_split(r, val->len)) {
if (r->raw_compression)
- WT_RET(__rec_split_raw(session, r, val->len));
+ WT_RET(__rec_split_raw(
+ session, r, val->len, false));
else
WT_RET(__rec_split_crossing_bnd(
session, r, val->len));
@@ -4454,7 +4342,7 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
/* Boundary: split or write the page. */
if (__rec_need_split(r, val->len)) {
if (r->raw_compression)
- WT_RET(__rec_split_raw(session, r, val->len));
+ WT_RET(__rec_split_raw(session, r, val->len, false));
else
WT_RET(__rec_split_crossing_bnd(session, r, val->len));
}
@@ -5156,7 +5044,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
if (__rec_need_split(r, key->len + val->len)) {
if (r->raw_compression)
WT_ERR(__rec_split_raw(
- session, r, key->len + val->len));
+ session, r, key->len + val->len, false));
else {
/*
* In one path above, we copied address blocks
@@ -5226,7 +5114,7 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
if (__rec_need_split(r, key->len + val->len)) {
if (r->raw_compression)
WT_RET(__rec_split_raw(
- session, r, key->len + val->len));
+ session, r, key->len + val->len, false));
else
WT_RET(__rec_split_crossing_bnd(
session, r, key->len + val->len));
@@ -5573,7 +5461,7 @@ build:
if (__rec_need_split(r, key->len + val->len)) {
if (r->raw_compression)
WT_ERR(__rec_split_raw(
- session, r, key->len + val->len));
+ session, r, key->len + val->len, false));
else {
/*
* If we copied address blocks from the page
@@ -5656,59 +5544,34 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) {
WT_RET(__rec_txn_read(session, r, ins, NULL, NULL, &upd));
- if (upd == NULL) {
- /*
- * Look for an update. If nothing is visible and not in
- * evict/restore, there's no work to do.
- */
- if (!F_ISSET(r, WT_REC_UPDATE_RESTORE))
- continue;
+ /* If no updates are visible there's no work to do. */
+ if (upd == NULL)
+ continue;
+ switch (upd->type) {
+ case WT_UPDATE_DELETED:
+ continue;
+ case WT_UPDATE_MODIFIED:
/*
- * When doing evict/restore, move the insert key to the
- * page, with an empty value (this allows us to split
- * the page if there's a huge, pinned insert list). The
- * on-page key must never be read, make sure there is a
- * globally visible update in the chain.
- *
- * __rec_txn_read also returns a NULL update when all of
- * the updates were aborted, without saving the update
- * list to the evict/restore array, so we can't append
- * a delete update. Ugly, but the alternative is another
- * parameter to __rec_txn_read.
+ * Impossible slot, there's no backing on-page
+ * item.
*/
- if (r->supd_next == 0 ||
- r->supd[r->supd_next - 1].ins != ins)
- continue;
-
- WT_RET(__rec_append_orig_value(
- session, r->page, ins->upd, NULL));
- val->len = 0;
- } else
- switch (upd->type) {
- case WT_UPDATE_DELETED:
- continue;
- case WT_UPDATE_MODIFIED:
- /*
- * Impossible slot, there's no backing on-page
- * item.
- */
- cbt->slot = UINT32_MAX;
- WT_RET(__wt_value_return(session, cbt, upd));
- WT_RET(__rec_cell_build_val(session, r,
- cbt->iface.value.data,
- cbt->iface.value.size, (uint64_t)0));
- break;
- case WT_UPDATE_STANDARD:
- if (upd->size == 0)
- val->len = 0;
- else
- WT_RET(__rec_cell_build_val(session,
- r, upd->data, upd->size,
- (uint64_t)0));
- break;
- WT_ILLEGAL_VALUE(session);
- }
+ cbt->slot = UINT32_MAX;
+ WT_RET(__wt_value_return(session, cbt, upd));
+ WT_RET(__rec_cell_build_val(session, r,
+ cbt->iface.value.data,
+ cbt->iface.value.size, (uint64_t)0));
+ break;
+ case WT_UPDATE_STANDARD:
+ if (upd->size == 0)
+ val->len = 0;
+ else
+ WT_RET(__rec_cell_build_val(session,
+ r, upd->data, upd->size,
+ (uint64_t)0));
+ break;
+ WT_ILLEGAL_VALUE(session);
+ }
/* Build key cell. */
WT_RET(__rec_cell_build_leaf_key(session, r,
WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins), &ovfl_key));
@@ -5717,7 +5580,7 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
if (__rec_need_split(r, key->len + val->len)) {
if (r->raw_compression)
WT_RET(__rec_split_raw(
- session, r, key->len + val->len));
+ session, r, key->len + val->len, false));
else {
/*
* Turn off prefix compression until a full key
@@ -5923,6 +5786,14 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
mod->rec_result = 0;
/*
+ * If using the lookaside table eviction path and we found updates that
+ * weren't globally visible when reconciling this page, copy them into
+ * the database's lookaside store.
+ */
+ if (F_ISSET(r, WT_REC_LOOKASIDE))
+ WT_RET(__rec_las_wrapup(session, r));
+
+ /*
* Wrap up overflow tracking. If we are about to create a checkpoint,
* the system must be entirely consistent at that point (the underlying
* block manager is presumably going to do some action to resolve the
@@ -5932,8 +5803,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_RET(__wt_ovfl_track_wrapup(session, page));
__wt_verbose(session, WT_VERB_RECONCILE,
- "%p reconciled into %" PRIu32 " pages",
- (void *)ref, r->multi_next);
+ "%p reconciled into %" PRIu32 " pages", (void *)ref, r->multi_next);
switch (r->multi_next) {
case 0: /* Page delete */
@@ -5988,7 +5858,6 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
__wt_timestamp_set(&mod->mod_replace_las_min_timestamp,
&r->min_saved_timestamp);
#endif
- r->multi->las_pageid = 0;
} else
WT_RET(__wt_bt_write(session, r->wrapup_checkpoint,
NULL, NULL, true, F_ISSET(r, WT_REC_CHECKPOINT),
@@ -6066,7 +5935,77 @@ __rec_write_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
multi->addr.addr, multi->addr.size));
}
+ /*
+ * If using the lookaside table eviction path and we found updates that
+ * weren't globally visible when reconciling this page, we might have
+ * already copied them into the database's lookaside store. Remove them.
+ */
+ if (F_ISSET(r, WT_REC_LOOKASIDE))
+ WT_TRET(__rec_las_wrapup_err(session, r));
+
WT_TRET(__wt_ovfl_track_wrapup_err(session, page));
+
+ return (ret);
+}
+
+/*
+ * __rec_las_wrapup --
+ * Copy all of the saved updates into the database's lookaside buffer.
+ */
+static int
+__rec_las_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
+{
+ WT_CURSOR *cursor;
+ WT_DECL_ITEM(key);
+ WT_DECL_RET;
+ WT_MULTI *multi;
+ uint32_t i, session_flags;
+
+ /* Check if there's work to do. */
+ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
+ if (multi->supd != NULL)
+ break;
+ if (i == r->multi_next)
+ return (0);
+
+ /* Ensure enough room for a column-store key without checking. */
+ WT_RET(__wt_scr_alloc(session, WT_INTPACK64_MAXSIZE, &key));
+
+ __wt_las_cursor(session, &cursor, &session_flags);
+
+ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
+ if (multi->supd != NULL)
+ WT_ERR(__wt_las_insert_block(
+ session, r->page, cursor, multi, key));
+
+err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
+
+ __wt_scr_free(session, &key);
+ return (ret);
+}
+
+/*
+ * __rec_las_wrapup_err --
+ * Discard any saved updates from the database's lookaside buffer.
+ */
+static int
+__rec_las_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r)
+{
+ WT_DECL_RET;
+ WT_MULTI *multi;
+ uint32_t btree_id, i;
+
+ btree_id = S2BT(session)->id;
+
+ /*
+ * Note the additional check for a non-zero lookaside page ID, that
+ * flags if lookaside table entries for this page have been written.
+ */
+ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
+ if (multi->supd != NULL && multi->las_pageid != 0)
+ WT_TRET(__wt_las_remove_block(
+ session, NULL, btree_id, multi->las_pageid));
+
return (ret);
}
@@ -6552,57 +6491,3 @@ __rec_dictionary_lookup(
*dpp = next;
return (0);
}
-
-/*
- * __rec_verbose_lookaside_write --
- * Create a verbose message to display once per checkpoint with details
- * about the cache state when performing a lookaside table write.
- */
-static void
-__rec_verbose_lookaside_write(
- WT_SESSION_IMPL *session, uint32_t las_id, uint64_t las_pageid)
-{
-#ifdef HAVE_VERBOSE
- WT_CONNECTION_IMPL *conn;
- uint64_t ckpt_gen_current, ckpt_gen_last;
- uint32_t pct_dirty, pct_full;
-
- if (!WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE)) return;
-
- conn = S2C(session);
- ckpt_gen_current = __wt_gen(session, WT_GEN_CHECKPOINT);
- ckpt_gen_last = conn->las_verb_gen_write;
-
- /*
- * This message is throttled to one per checkpoint. To do this we
- * track the generation of the last checkpoint for which the message
- * was printed and check against the current checkpoint generation.
- */
- if (ckpt_gen_current > ckpt_gen_last) {
- /*
- * Attempt to atomically replace the last checkpoint generation
- * for which this message was printed. If the atomic swap fails
- * we have raced and the winning thread will print the message.
- */
- if (__wt_atomic_casv64(&conn->las_verb_gen_write,
- ckpt_gen_last, ckpt_gen_current)) {
- (void)__wt_eviction_clean_needed(session, &pct_full);
- (void)__wt_eviction_dirty_needed(session, &pct_dirty);
-
- __wt_verbose(session, WT_VERB_LOOKASIDE,
- "Page reconciliation triggered lookaside write"
- "file ID %" PRIu32 ", page ID %" PRIu64 ". "
- "Entries now in lookaside file: %" PRId64 ", "
- "cache dirty: %" PRIu32 "%% , "
- "cache use: %" PRIu32 "%%",
- las_id, las_pageid,
- WT_STAT_READ(conn->stats, cache_lookaside_entries),
- pct_dirty, pct_full);
- }
- }
-#else
- WT_UNUSED(session);
- WT_UNUSED(las_id);
- WT_UNUSED(las_pageid);
-#endif
-}
diff --git a/src/session/session_compact.c b/src/session/session_compact.c
index a284b8a2229..6ccf3161229 100644
--- a/src/session/session_compact.c
+++ b/src/session/session_compact.c
@@ -250,7 +250,7 @@ __compact_worker(WT_SESSION_IMPL *session)
{
WT_DECL_RET;
u_int i, loop;
- bool didwork;
+ bool another_pass;
/*
* Reset the handles' compaction skip flag (we don't bother setting
@@ -274,7 +274,8 @@ __compact_worker(WT_SESSION_IMPL *session)
*/
for (loop = 0; loop < 100; ++loop) {
/* Step through the list of files being compacted. */
- for (didwork = false, i = 0; i < session->op_handle_next; ++i) {
+ for (another_pass = false,
+ i = 0; i < session->op_handle_next; ++i) {
/* Skip objects where there's no more work. */
if (session->op_handle[i]->compact_skip)
continue;
@@ -282,15 +283,43 @@ __compact_worker(WT_SESSION_IMPL *session)
session->compact_state = WT_COMPACT_RUNNING;
WT_WITH_DHANDLE(session,
session->op_handle[i], ret = __wt_compact(session));
- WT_ERR(ret);
- /* If we did no work, skip this file in the future. */
- if (session->compact_state == WT_COMPACT_SUCCESS)
- didwork = true;
- else
- session->op_handle[i]->compact_skip = true;
+ /*
+ * If successful and we did work, schedule another pass.
+ * If successful and we did no work, skip this file in
+ * the future.
+ */
+ if (ret == 0) {
+ if (session->
+ compact_state == WT_COMPACT_SUCCESS)
+ another_pass = true;
+ else
+ session->
+ op_handle[i]->compact_skip = true;
+ continue;
+ }
+
+ /*
+ * If compaction failed because checkpoint was running,
+ * continue with the next handle. We might continue to
+ * race with checkpoint on each handle, but that's OK,
+ * we'll step through all the handles, and then we'll
+ * block until a checkpoint completes.
+ *
+ * Just quit if eviction is the problem.
+ */
+ if (ret == EBUSY) {
+ if (__wt_cache_stuck(session)) {
+ WT_ERR_MSG(session, EBUSY,
+ "compaction halted by eviction "
+ "pressure");
+ }
+ ret = 0;
+ another_pass = true;
+ }
+ WT_ERR(ret);
}
- if (!didwork)
+ if (!another_pass)
break;
/*
@@ -320,10 +349,25 @@ __wt_session_compact(
WT_DECL_RET;
WT_SESSION_IMPL *session;
u_int i;
+ bool no_eviction_set;
+
+ no_eviction_set = false;
session = (WT_SESSION_IMPL *)wt_session;
SESSION_API_CALL(session, compact, config, cfg);
+ /*
+ * Don't highjack the compaction thread for eviction; it's holding locks
+ * blocking checkpoints and once an application is tapped for eviction,
+ * it can spend a long time doing nothing else. (And, if we're tapping
+ * application threads for eviction, compaction should quit, it's not
+ * making anything better.)
+ */
+ if (!F_ISSET(session, WT_SESSION_NO_EVICTION)) {
+ no_eviction_set = true;
+ F_SET(session, WT_SESSION_NO_EVICTION);
+ }
+
/* In-memory ignores compaction operations. */
if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
goto err;
@@ -393,6 +437,9 @@ err: session->compact = NULL;
*/
WT_TRET(__wt_session_release_resources(session));
+ if (no_eviction_set)
+ F_CLR(session, WT_SESSION_NO_EVICTION);
+
if (ret != 0)
WT_STAT_CONN_INCR(session, session_table_compact_fail);
else
diff --git a/src/support/pow.c b/src/support/pow.c
index cd770a514b2..2fb193afca8 100644
--- a/src/support/pow.c
+++ b/src/support/pow.c
@@ -89,8 +89,9 @@ __wt_nlpo2(uint32_t v)
uint32_t
__wt_log2_int(uint32_t n)
{
- uint32_t l = 0;
+ uint32_t l;
+ l = 0;
while (n >>= 1)
l++;
return (l);
diff --git a/src/txn/txn.c b/src/txn/txn.c
index 3215a372d36..cfdb7d26498 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -15,14 +15,16 @@
static uint32_t
__snapsort_partition(uint64_t *array, uint32_t f, uint32_t l, uint64_t pivot)
{
- uint32_t i = f - 1, j = l + 1;
+ uint32_t i, j;
+ i = f - 1;
+ j = l + 1;
for (;;) {
while (pivot < array[--j])
;
while (array[++i] < pivot)
;
- if (i<j) {
+ if (i < j) {
uint64_t tmp = array[i];
array[i] = array[j];
array[j] = tmp;
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 2137d5b16ef..afb3cba1db6 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -519,7 +519,7 @@ __checkpoint_stats(
conn = S2C(session);
/*
- * Get time diff in microseconds.
+ * Get time diff in milliseconds.
*/
msec = WT_TIMEDIFF_MS(*stop, *start);
@@ -549,11 +549,11 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session,
__wt_epoch(session, &stop);
/*
- * Get time diff in microseconds.
+ * Get time diff in milliseconds.
*/
msec = WT_TIMEDIFF_MS(stop, *start);
__wt_verbose(session,
- WT_VERB_CHECKPOINT, "time: %" PRIu64 " us, gen: %" PRIu64
+ WT_VERB_CHECKPOINT, "time: %" PRIu64 " ms, gen: %" PRIu64
": Full database checkpoint %s",
msec, __wt_gen(session, WT_GEN_CHECKPOINT), msg);
diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c
index c627335283d..bd1b2239e2d 100644
--- a/src/txn/txn_log.c
+++ b/src/txn/txn_log.c
@@ -187,10 +187,13 @@ __txn_logrec_init(WT_SESSION_IMPL *session)
WT_DECL_RET;
WT_TXN *txn;
size_t header_size;
- uint32_t rectype = WT_LOGREC_COMMIT;
- const char *fmt = WT_UNCHECKED_STRING(Iq);
+ uint32_t rectype;
+ const char *fmt;
txn = &session->txn;
+ rectype = WT_LOGREC_COMMIT;
+ fmt = WT_UNCHECKED_STRING(Iq);
+
if (txn->logrec != NULL)
return (0);
@@ -295,13 +298,14 @@ __txn_log_file_sync(WT_SESSION_IMPL *session, uint32_t flags, WT_LSN *lsnp)
WT_DECL_ITEM(logrec);
WT_DECL_RET;
size_t header_size;
- uint32_t rectype = WT_LOGREC_FILE_SYNC;
- int start;
- const char *fmt = WT_UNCHECKED_STRING(III);
+ uint32_t rectype, start;
+ const char *fmt;
bool need_sync;
btree = S2BT(session);
- start = LF_ISSET(WT_TXN_LOG_CKPT_START);
+ rectype = WT_LOGREC_FILE_SYNC;
+ start = LF_ISSET(WT_TXN_LOG_CKPT_START) ? 1 : 0;
+ fmt = WT_UNCHECKED_STRING(III);
need_sync = LF_ISSET(WT_TXN_LOG_CKPT_SYNC);
WT_RET(__wt_struct_size(
@@ -331,7 +335,9 @@ __wt_txn_checkpoint_logread(WT_SESSION_IMPL *session,
WT_ITEM ckpt_snapshot_unused;
uint32_t ckpt_file, ckpt_offset;
u_int ckpt_nsnapshot_unused;
- const char *fmt = WT_UNCHECKED_STRING(IIIu);
+ const char *fmt;
+
+ fmt = WT_UNCHECKED_STRING(IIIu);
if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
&ckpt_file, &ckpt_offset,
diff --git a/src/utilities/util_backup.c b/src/utilities/util_backup.c
index 7d809c2a624..bf4d7b67671 100644
--- a/src/utilities/util_backup.c
+++ b/src/utilities/util_backup.c
@@ -18,9 +18,9 @@ static int usage(void);
static int
append_target(WT_SESSION *session, const char *target, char **bufp)
{
- static bool first = true;
static size_t len = 0, remain = 0;
static char *buf = NULL;
+ static bool first = true;
/* 20 bytes of slop */
if (buf == NULL || remain < strlen(target) + 20) {
diff --git a/test/format/compact.c b/test/format/compact.c
index 8a558d2b35b..c1a73bea64b 100644
--- a/test/format/compact.c
+++ b/test/format/compact.c
@@ -64,11 +64,11 @@ compact(void *arg)
break;
/*
- * Compact can return EBUSY if concurrent with alter.
+ * Compact can return EBUSY if concurrent with alter or if there
+ * is eviction pressure, or we collide with checkpoints.
*/
- while ((ret = session->compact(session, g.uri, NULL)) == EBUSY)
- __wt_yield();
- if (ret != 0 && ret != WT_ROLLBACK)
+ ret = session->compact(session, g.uri, NULL);
+ if (ret != 0 && ret != EBUSY && ret != WT_ROLLBACK)
testutil_die(ret, "session.compact");
}
diff --git a/test/format/config.c b/test/format/config.c
index df5dc3e5378..049a655cb79 100644
--- a/test/format/config.c
+++ b/test/format/config.c
@@ -29,6 +29,7 @@
#include "format.h"
#include "config.h"
+static void config_checkpoint(void);
static void config_checksum(void);
static void config_compression(const char *);
static void config_encryption(void);
@@ -39,6 +40,7 @@ static void config_in_memory_reset(void);
static int config_is_perm(const char *);
static void config_isolation(void);
static void config_lrt(void);
+static void config_map_checkpoint(const char *, u_int *);
static void config_map_checksum(const char *, u_int *);
static void config_map_compression(const char *, u_int *);
static void config_map_encryption(const char *, u_int *);
@@ -159,6 +161,7 @@ config_setup(void)
if (!g.replay && g.run_cnt % 20 == 19 && !config_is_perm("threads"))
g.c_threads = 1;
+ config_checkpoint();
config_checksum();
config_compression("compression");
config_compression("logging_compression");
@@ -234,6 +237,28 @@ config_setup(void)
}
/*
+ * config_checkpoint --
+ * Checkpoint configuration.
+ */
+static void
+config_checkpoint(void)
+{
+ /* Choose a checkpoint mode if nothing was specified. */
+ if (!config_is_perm("checkpoints"))
+ switch (mmrand(NULL, 1, 20)) {
+ case 1: case 2: case 3: case 4: /* 20% */
+ config_single("checkpoints=wiredtiger", 0);
+ break;
+ case 5: /* 5 % */
+ config_single("checkpoints=off", 0);
+ break;
+ default: /* 75% */
+ config_single("checkpoints=on", 0);
+ break;
+ }
+}
+
+/*
* config_checksum --
* Checksum configuration.
*/
@@ -823,7 +848,10 @@ config_single(const char *s, int perm)
*cp->vstr = NULL;
}
- if (strncmp(s, "checksum", strlen("checksum")) == 0) {
+ if (strncmp(s, "checkpoints", strlen("checkpoints")) == 0) {
+ config_map_checkpoint(ep, &g.c_checkpoint_flag);
+ *cp->vstr = dstrdup(ep);
+ } else if (strncmp(s, "checksum", strlen("checksum")) == 0) {
config_map_checksum(ep, &g.c_checksum_flag);
*cp->vstr = dstrdup(ep);
} else if (strncmp(
@@ -834,12 +862,12 @@ config_single(const char *s, int perm)
s, "encryption", strlen("encryption")) == 0) {
config_map_encryption(ep, &g.c_encryption_flag);
*cp->vstr = dstrdup(ep);
- } else if (strncmp(s, "isolation", strlen("isolation")) == 0) {
- config_map_isolation(ep, &g.c_isolation_flag);
- *cp->vstr = dstrdup(ep);
} else if (strncmp(s, "file_type", strlen("file_type")) == 0) {
config_map_file_type(ep, &g.type);
*cp->vstr = dstrdup(config_file_type(g.type));
+ } else if (strncmp(s, "isolation", strlen("isolation")) == 0) {
+ config_map_isolation(ep, &g.c_isolation_flag);
+ *cp->vstr = dstrdup(ep);
} else if (strncmp(s, "logging_compression",
strlen("logging_compression")) == 0) {
config_map_compression(ep,
@@ -905,6 +933,24 @@ config_map_file_type(const char *s, u_int *vp)
}
/*
+ * config_map_checkpoint --
+ * Map a checkpoint configuration to a flag.
+ */
+static void
+config_map_checkpoint(const char *s, u_int *vp)
+{
+ /* Checkpoint configuration used to be 1/0, let it continue to work. */
+ if (strcmp(s, "on") == 0 || strcmp(s, "1") == 0)
+ *vp = CHECKPOINT_ON;
+ else if (strcmp(s, "off") == 0 || strcmp(s, "0") == 0)
+ *vp = CHECKPOINT_OFF;
+ else if (strcmp(s, "wiredtiger") == 0)
+ *vp = CHECKPOINT_WIREDTIGER;
+ else
+ testutil_die(EINVAL, "illegal checkpoint configuration: %s", s);
+}
+
+/*
* config_map_checksum --
* Map a checksum configuration to a flag.
*/
diff --git a/test/format/config.h b/test/format/config.h
index 7ac71d7877b..6fb4071074d 100644
--- a/test/format/config.h
+++ b/test/format/config.h
@@ -102,8 +102,16 @@ static CONFIG c[] = {
0x0, 1, 100, 100 * 1024, &g.c_cache, NULL },
{ "checkpoints",
- "if periodic checkpoints are done", /* 95% */
- C_BOOL, 95, 0, 0, &g.c_checkpoints, NULL },
+ "type of checkpoints (on | off | wiredtiger)",
+ C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_checkpoint},
+
+ { "checkpoint_log_size",
+ "MB of log to wait if wiredtiger checkpoints configured",
+ 0x0, 20, 200, 1024, &g.c_checkpoint_log_size, NULL},
+
+ { "checkpoint_wait",
+ "seconds to wait if wiredtiger checkpoints configured",
+ 0x0, 5, 100, 3600, &g.c_checkpoint_wait, NULL},
{ "checksum",
"type of checksums (on | off | uncompressed)",
@@ -222,6 +230,10 @@ static CONFIG c[] = {
"type of logging compression " COMPRESSION_LIST,
C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_logging_compression },
+ { "logging_file_max",
+ "maximum log file size in KB",
+ 0x0, 100, 512000, 2097152, &g.c_logging_file_max, NULL },
+
{ "logging_prealloc",
"if log file pre-allocation configured", /* 50% */
C_BOOL, 50, 0, 0, &g.c_logging_prealloc, NULL },
diff --git a/test/format/format.h b/test/format/format.h
index f35e71f58aa..96e1a0fe335 100644
--- a/test/format/format.h
+++ b/test/format/format.h
@@ -119,7 +119,6 @@ typedef struct {
bool workers_finished; /* Operations completed */
pthread_rwlock_t backup_lock; /* Backup running */
- pthread_rwlock_t checkpoint_lock; /* Checkpoint running */
WT_RAND_STATE rnd; /* Global RNG state */
@@ -151,7 +150,9 @@ typedef struct {
uint32_t c_bloom_hash_count;
uint32_t c_bloom_oldest;
uint32_t c_cache;
- uint32_t c_checkpoints;
+ char *c_checkpoint;
+ uint32_t c_checkpoint_log_size;
+ uint32_t c_checkpoint_wait;
char *c_checksum;
uint32_t c_chunk_size;
uint32_t c_compact;
@@ -182,6 +183,7 @@ typedef struct {
uint32_t c_logging;
uint32_t c_logging_archive;
char *c_logging_compression;
+ uint32_t c_logging_file_max;
uint32_t c_logging_prealloc;
uint32_t c_long_running_txn;
uint32_t c_lsm_worker_threads;
@@ -216,6 +218,11 @@ typedef struct {
#define VAR 3
u_int type; /* File type's flag value */
+#define CHECKPOINT_OFF 1
+#define CHECKPOINT_ON 2
+#define CHECKPOINT_WIREDTIGER 3
+ u_int c_checkpoint_flag; /* Checkpoint flag value */
+
#define CHECKSUM_OFF 1
#define CHECKSUM_ON 2
#define CHECKSUM_UNCOMPRESSED 3
diff --git a/test/format/ops.c b/test/format/ops.c
index 4fed18d12b4..607dd43a8f3 100644
--- a/test/format/ops.c
+++ b/test/format/ops.c
@@ -175,7 +175,7 @@ wts_ops(int lastrun)
if (g.c_backups)
testutil_check(
__wt_thread_create(NULL, &backup_tid, backup, NULL));
- if (g.c_checkpoints)
+ if (g.c_checkpoint_flag == CHECKPOINT_ON)
testutil_check(__wt_thread_create(
NULL, &checkpoint_tid, checkpoint, NULL));
if (g.c_compact)
@@ -252,7 +252,7 @@ wts_ops(int lastrun)
testutil_check(__wt_thread_join(NULL, alter_tid));
if (g.c_backups)
testutil_check(__wt_thread_join(NULL, backup_tid));
- if (g.c_checkpoints)
+ if (g.c_checkpoint_flag == CHECKPOINT_ON)
testutil_check(__wt_thread_join(NULL, checkpoint_tid));
if (g.c_compact)
testutil_check(__wt_thread_join(NULL, compact_tid));
@@ -988,8 +988,8 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
{
static int sn = 0;
WT_SESSION *session;
- int exact, ret;
uint8_t bitfield;
+ int exact, ret;
session = cursor->session;
diff --git a/test/format/wts.c b/test/format/wts.c
index 30493a41912..ddcd14cfd55 100644
--- a/test/format/wts.c
+++ b/test/format/wts.c
@@ -185,6 +185,12 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp)
if (DATASOURCE("lsm") || g.c_cache < 20)
CONFIG_APPEND(p, ",eviction_dirty_trigger=95");
+ /* Checkpoints. */
+ if (g.c_checkpoint_flag == CHECKPOINT_WIREDTIGER)
+ CONFIG_APPEND(p,
+ ",checkpoint=(wait=%" PRIu32 ",log_size=%" PRIu32 ")",
+ g.c_checkpoint_wait, MEGABYTE(g.c_checkpoint_log_size));
+
/* Eviction worker configuration. */
if (g.c_evict_max != 0)
CONFIG_APPEND(p,
@@ -193,12 +199,14 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp)
/* Logging configuration. */
if (g.c_logging)
CONFIG_APPEND(p,
- ",log=(enabled=true,archive=%d,prealloc=%d"
- ",compressor=\"%s\")",
+ ",log=(enabled=true,archive=%d,"
+ "prealloc=%d,file_max=%" PRIu32 ",compressor=\"%s\")",
g.c_logging_archive ? 1 : 0,
g.c_logging_prealloc ? 1 : 0,
+ KILOBYTE(g.c_logging_file_max),
compressor(g.c_logging_compression_flag));
+ /* Encryption. */
if (g.c_encryption)
CONFIG_APPEND(p,
",encryption=(name=%s)", encryptor(g.c_encryption_flag));
diff --git a/test/suite/test_las.py b/test/suite/test_las.py
index 52a0b2d7300..07938c6d80b 100644
--- a/test/suite/test_las.py
+++ b/test/suite/test_las.py
@@ -38,13 +38,13 @@ def timestamp_str(t):
class test_las(wttest.WiredTigerTestCase):
# Force a small cache.
def conn_config(self):
- return 'cache_size=1GB'
+ return 'cache_size=50MB'
def large_updates(self, session, uri, value, ds, nrows, timestamp=False):
# Insert a large number of records, we'll hang if the lookaside table
# isn't doing its thing.
cursor = session.open_cursor(uri)
- for i in range(1, 1000000):
+ for i in range(1, 10000):
if timestamp == True:
session.begin_transaction()
cursor.set_key(ds.key(nrows + i))
@@ -73,7 +73,6 @@ class test_las(wttest.WiredTigerTestCase):
session.close()
conn.close()
- @wttest.longtest('lookaside table smoke test')
def test_las(self):
# Create a small table.
uri = "table:test_las"
@@ -84,7 +83,7 @@ class test_las(wttest.WiredTigerTestCase):
# Initially load huge data
cursor = self.session.open_cursor(uri)
- for i in range(1, 1000000):
+ for i in range(1, 10000):
cursor.set_key(ds.key(nrows + i))
cursor.set_value(bigvalue)
self.assertEquals(cursor.insert(), 0)
diff --git a/test/utility/misc.c b/test/utility/misc.c
index 0d751cd0df8..9d8fa28d3d7 100644
--- a/test/utility/misc.c
+++ b/test/utility/misc.c
@@ -31,8 +31,8 @@ void (*custom_die)(void) = NULL;
const char *progname = "program name not set";
/*
- * die --
- * Report an error and quit.
+ * testutil_die --
+ * Report an error and abort.
*/
void
testutil_die(int e, const char *fmt, ...)
@@ -53,8 +53,9 @@ testutil_die(int e, const char *fmt, ...)
if (e != 0)
fprintf(stderr, ": %s", wiredtiger_strerror(e));
fprintf(stderr, "\n");
+ fprintf(stderr, "process aborting\n");
- exit(EXIT_FAILURE);
+ abort();
}
/*