summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bench/wtperf/runners/medium-lsm-async.wtperf2
-rw-r--r--bench/wtperf/wtperf.c9
-rw-r--r--dist/api_data.py20
-rw-r--r--dist/flags.py3
-rw-r--r--examples/c/ex_all.c15
-rw-r--r--src/btree/bt_cursor.c2
-rw-r--r--src/btree/bt_discard.c31
-rw-r--r--src/btree/bt_evict.c133
-rw-r--r--src/btree/bt_handle.c11
-rw-r--r--src/btree/bt_ret.c12
-rw-r--r--src/btree/bt_sync.c3
-rw-r--r--src/btree/rec_evict.c10
-rw-r--r--src/btree/rec_split.c39
-rw-r--r--src/btree/rec_write.c2
-rw-r--r--src/btree/row_key.c36
-rw-r--r--src/btree/row_srch.c6
-rw-r--r--src/config/config_def.c47
-rw-r--r--src/conn/conn_api.c8
-rw-r--r--src/conn/conn_cache.c5
-rw-r--r--src/cursor/cur_backup.c25
-rw-r--r--src/docs/file-formats.dox18
-rw-r--r--src/docs/programming.dox1
-rw-r--r--src/docs/tune-close.dox13
-rw-r--r--src/docs/tune-compress.dox28
-rw-r--r--src/docs/upgrading.dox7
-rw-r--r--src/include/api.h6
-rw-r--r--src/include/btree.h6
-rw-r--r--src/include/btree.i7
-rw-r--r--src/include/cache.h2
-rw-r--r--src/include/cell.i2
-rw-r--r--src/include/cursor.h14
-rw-r--r--src/include/extern.h4
-rw-r--r--src/include/flags.h25
-rw-r--r--src/include/txn.h10
-rw-r--r--src/include/txn.i20
-rw-r--r--src/include/wiredtiger.in42
-rw-r--r--src/lsm/lsm_cursor.c5
-rw-r--r--src/meta/meta_table.c20
-rw-r--r--src/txn/txn.c12
-rw-r--r--src/txn/txn_ckpt.c29
-rw-r--r--src/txn/txn_log.c5
-rw-r--r--src/utilities/util_salvage.c2
-rw-r--r--test/checkpoint/checkpointer.c245
-rw-r--r--test/checkpoint/test_checkpoint.c51
-rw-r--r--test/checkpoint/workers.c143
-rw-r--r--test/format/wts.c10
46 files changed, 705 insertions, 441 deletions
diff --git a/bench/wtperf/runners/medium-lsm-async.wtperf b/bench/wtperf/runners/medium-lsm-async.wtperf
index 3343373ff06..85541b6ff8a 100644
--- a/bench/wtperf/runners/medium-lsm-async.wtperf
+++ b/bench/wtperf/runners/medium-lsm-async.wtperf
@@ -2,7 +2,7 @@
conn_config="cache_size=1G"
async_threads=10
table_config="lsm=(chunk_size=100MB,merge_threads=2),type=lsm"
-icount=50000000
+icount=10000000
report_interval=5
run_time=120
populate_threads=1
diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c
index c1270f4d435..62f3ce754e0 100644
--- a/bench/wtperf/wtperf.c
+++ b/bench/wtperf/wtperf.c
@@ -1862,14 +1862,13 @@ main(int argc, char *argv[])
CONFIG *cfg, _cfg;
size_t cc_len, req_len, tc_len;
int ch, monitor_set, ret;
- char *cc_buf, *tc_buf;
const char *opts = "C:H:h:m:O:o:T:";
const char *config_opts, *sep;
- char *user_cconfig, *user_tconfig;
+ char *cc_buf, *tc_buf, *user_cconfig, *user_tconfig;
monitor_set = ret = 0;
- cc_buf = tc_buf = NULL;
- config_opts = user_cconfig = user_tconfig = NULL;
+ config_opts = NULL;
+ cc_buf = tc_buf = user_cconfig = user_tconfig = NULL;
/* Setup the default configuration values. */
cfg = &_cfg;
@@ -2062,6 +2061,8 @@ einval: ret = EINVAL;
err: config_free(cfg);
free(cc_buf);
free(tc_buf);
+ free(user_cconfig);
+ free(user_tconfig);
return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/dist/api_data.py b/dist/api_data.py
index ccd91998121..0c0e610ee22 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -235,9 +235,10 @@ file_config = format_meta + [
Config('memory_page_max', '5MB', r'''
the maximum size a page can grow to in memory before being
reconciled to disk. The specified size will be adjusted to a lower
- bound of <code>50 * leaf_page_max</code>. This limit is soft - it
- is possible for pages to be temporarily larger than this value.
- This setting is ignored for LSM trees, see \c chunk_size''',
+ bound of <code>50 * leaf_page_max</code>, and an upper bound of
+ <code>cache_size / 2</code>. This limit is soft - it is possible
+ for pages to be temporarily larger than this value. This setting
+ is ignored for LSM trees, see \c chunk_size''',
min='512B', max='10TB'),
Config('os_cache_max', '0', r'''
maximum system buffer cache usage, in bytes. If non-zero, evict
@@ -250,7 +251,7 @@ file_config = format_meta + [
system buffer cache after that many bytes from this object are
written into the buffer cache''',
min=0),
- Config('prefix_compression', 'true', r'''
+ Config('prefix_compression', 'false', r'''
configure prefix compression on row-store leaf pages''',
type='boolean'),
Config('prefix_compression_min', '4', r'''
@@ -347,6 +348,9 @@ connection_runtime_config = [
trigger eviction when the cache becomes this full (as a
percentage)''',
min=10, max=99),
+ Config('eviction_workers', '0', r'''
+ additional threads to help evict pages from cache''',
+ min=0, max=20),
Config('statistics', 'none', r'''
Maintain database statistics, which may impact performance.
Choosing "all" maintains all statistics regardless of cost,
@@ -365,6 +369,7 @@ connection_runtime_config = [
enable messages for various events. Options are given as a
list, such as <code>"verbose=[evictserver,read]"</code>''',
type='list', choices=[
+ 'api',
'block',
'checkpoint',
'compact',
@@ -373,6 +378,7 @@ connection_runtime_config = [
'fileops',
'log',
'lsm',
+ 'metadata',
'mutex',
'overflow',
'read',
@@ -601,7 +607,11 @@ methods = {
value. A value of zero disables the timeout''',
type='int'),
]),
-'connection.close' : Method([]),
+'connection.close' : Method([
+ Config('leak_memory', 'false', r'''
+ don't free memory during close''',
+ type='boolean'),
+]),
'connection.reconfigure' : Method(connection_runtime_config),
'connection.load_extension' : Method([
diff --git a/dist/flags.py b/dist/flags.py
index 3dd81ec934b..b9acda1023d 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -55,6 +55,7 @@ flags = {
'TXN_LOG_CKPT_STOP',
],
'verbose' : [
+ 'VERB_api',
'VERB_block',
'VERB_checkpoint',
'VERB_compact',
@@ -63,6 +64,7 @@ flags = {
'VERB_fileops',
'VERB_log',
'VERB_lsm',
+ 'VERB_metadata',
'VERB_mutex',
'VERB_overflow',
'VERB_read',
@@ -83,6 +85,7 @@ flags = {
'CONN_CACHE_POOL',
'CONN_CKPT_SYNC',
'CONN_EVICTION_RUN',
+ 'CONN_LEAK_MEMORY',
'CONN_LSM_MERGE',
'CONN_PANIC',
'CONN_SERVER_RUN',
diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c
index a135b66da19..44608bf920d 100644
--- a/examples/c/ex_all.c
+++ b/examples/c/ex_all.c
@@ -563,10 +563,10 @@ session_ops(WT_SESSION *session)
/*! [Configure dictionary compression on] */
ret = session->drop(session, "table:mytable", NULL);
- /*! [Configure key prefix compression off] */
+ /*! [Configure key prefix compression on] */
ret = session->create(session, "table:mytable",
- "key_format=S,value_format=S,prefix_compression=false");
- /*! [Configure key prefix compression off] */
+ "key_format=S,value_format=S,prefix_compression=true");
+ /*! [Configure key prefix compression on] */
ret = session->drop(session, "table:mytable", NULL);
#ifdef MIGHT_NOT_RUN
@@ -1050,6 +1050,15 @@ main(void)
/*! [Statistics logging with path] */
if (ret == 0)
(void)conn->close(conn, NULL);
+
+ /*
+ * Don't run this code, because memory checkers get very upset when we
+ * leak memory.
+ */
+ (void)wiredtiger_open(home, NULL, "create", &conn);
+ /*! [Connection close leaking memory] */
+ ret = conn->close(conn, "leak_memory=true");
+ /*! [Connection close leaking memory] */
#endif
/*! [Get the WiredTiger library version #1] */
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c
index c8caf50daa9..4d8d96ab143 100644
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -832,8 +832,8 @@ __wt_btcur_close(WT_CURSOR_BTREE *cbt)
session = (WT_SESSION_IMPL *)cbt->iface.session;
ret = __curfile_leave(cbt);
+ __wt_buf_free(session, &cbt->search_key);
__wt_buf_free(session, &cbt->tmp);
- __wt_buf_free(session, &cbt->srch);
return (ret);
}
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index 2c55400fddc..da448703d49 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -41,6 +41,7 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
{
WT_PAGE *page;
WT_PAGE_HEADER *dsk;
+ WT_PAGE_MODIFY *mod;
/*
* Kill our caller's reference, do our best to catch races.
@@ -66,9 +67,30 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
WT_ASSERT(session, hp == NULL);
}
#endif
+
+ /*
+ * If a root page split, there may be one or more pages linked from the
+ * page; walk the list, discarding pages.
+ */
+ switch (page->type) {
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_ROW_INT:
+ mod = page->modify;
+ if (mod != NULL && mod->mod_root_split != NULL)
+ __wt_page_out(session, &mod->mod_root_split);
+ break;
+ }
+
/* Update the cache's information. */
__wt_cache_page_evict(session, page);
+ /*
+ * If discarding the page as part of process exit, the application may
+ * configure to leak the memory rather than do the work.
+ */
+ if (F_ISSET(S2C(session), WT_CONN_LEAK_MEMORY))
+ return;
+
/* Free the page modification information. */
if (page->modify != NULL)
__free_page_modify(session, page);
@@ -139,15 +161,6 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page)
}
switch (page->type) {
- case WT_PAGE_COL_INT:
- case WT_PAGE_ROW_INT:
- /*
- * If a root page split, there may be one or more pages linked
- * from the page; walk the list, discarding pages.
- */
- if (mod->mod_root_split != NULL)
- __wt_page_out(session, &mod->mod_root_split);
- break;
case WT_PAGE_COL_FIX:
case WT_PAGE_COL_VAR:
/* Free the append array. */
diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c
index 6c26eca3cc4..c4ade741c19 100644
--- a/src/btree/bt_evict.c
+++ b/src/btree/bt_evict.c
@@ -8,11 +8,20 @@
#include "wt_internal.h"
static int __evict_clear_walks(WT_SESSION_IMPL *);
-static int __evict_lru(WT_SESSION_IMPL *, uint32_t);
-static int __evict_lru_cmp(const void *, const void *);
-static int __evict_walk(WT_SESSION_IMPL *, uint32_t *, uint32_t);
-static int __evict_walk_file(WT_SESSION_IMPL *, u_int *, uint32_t);
-static int __evict_worker(WT_SESSION_IMPL *);
+static int __evict_lru(WT_SESSION_IMPL *, uint32_t);
+static int __evict_lru_cmp(const void *, const void *);
+static int __evict_lru_pages(WT_SESSION_IMPL *, int);
+static int __evict_pass(WT_SESSION_IMPL *);
+static int __evict_walk(WT_SESSION_IMPL *, uint32_t *, uint32_t);
+static int __evict_walk_file(WT_SESSION_IMPL *, u_int *, uint32_t);
+static void *__evict_worker(void *);
+
+typedef struct {
+ WT_CONNECTION_IMPL *conn;
+ u_int id;
+
+ pthread_t tid;
+} WT_EVICTION_WORKER;
/*
* __evict_read_gen --
@@ -147,15 +156,29 @@ __wt_cache_evict_server(void *arg)
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ WT_EVICTION_WORKER *workers;
WT_SESSION_IMPL *session;
+ u_int i;
session = arg;
conn = S2C(session);
cache = conn->cache;
+ workers = NULL;
+
+ if (cache->eviction_workers > 0)
+ WT_ERR(__wt_calloc_def(
+ session, cache->eviction_workers, &workers));
+
+ for (i = 0; i < cache->eviction_workers; i++) {
+ workers[i].conn = conn;
+ workers[i].id = i;
+ WT_ERR(__wt_thread_create(session,
+ &workers[i].tid, __evict_worker, &workers[i]));
+ }
while (F_ISSET(conn, WT_CONN_EVICTION_RUN)) {
/* Evict pages from the cache as needed. */
- WT_ERR(__evict_worker(session));
+ WT_ERR(__evict_pass(session));
if (!F_ISSET(conn, WT_CONN_EVICTION_RUN))
break;
@@ -170,6 +193,13 @@ __wt_cache_evict_server(void *arg)
WT_VERBOSE_ERR(session, evictserver, "exiting");
+err: WT_VERBOSE_TRET(session, evictserver, "waiting for helper threads");
+ for (i = 0; i < cache->eviction_workers; i++) {
+ WT_TRET(__wt_cond_signal(session, cache->evict_waiter_cond));
+ WT_TRET(__wt_thread_join(session, workers[i].tid));
+ }
+ __wt_free(session, workers);
+
if (ret == 0) {
if (cache->pages_inmem != cache->pages_evict)
__wt_errx(session,
@@ -187,7 +217,7 @@ __wt_cache_evict_server(void *arg)
" bytes dirty and %" PRIu64 " pages dirty",
cache->bytes_dirty, cache->pages_dirty);
} else
-err: WT_PANIC_ERR(session, ret, "eviction server error");
+ WT_PANIC_ERR(session, ret, "eviction server error");
/* Close the eviction session. */
(void)session->iface.close(&session->iface, NULL);
@@ -197,10 +227,58 @@ err: WT_PANIC_ERR(session, ret, "eviction server error");
/*
* __evict_worker --
+ * Thread to help evict pages from the cache.
+ */
+static void *
+__evict_worker(void *arg)
+{
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_EVICTION_WORKER *worker;
+ WT_SESSION_IMPL *session;
+
+ worker = arg;
+ conn = worker->conn;
+ cache = conn->cache;
+ ret = 0;
+
+ /*
+ * We need a session handle because we're reading/writing pages.
+ * Start with the default session to keep error handling simple.
+ */
+ session = conn->default_session;
+ WT_ERR(__wt_open_session(conn, 1, NULL, NULL, &session));
+
+ while (F_ISSET(conn, WT_CONN_EVICTION_RUN)) {
+ WT_VERBOSE_ERR(session, evictserver, "worker sleeping");
+ WT_ERR(
+ __wt_cond_wait(session, cache->evict_waiter_cond, 100000));
+ if (!F_ISSET(conn, WT_CONN_EVICTION_RUN))
+ break;
+ WT_VERBOSE_ERR(session, evictserver, "worker waking");
+
+ WT_ERR(__evict_lru_pages(session, 1));
+ }
+
+ if (0) {
+err: __wt_err(session, ret, "cache eviction helper error");
+ }
+
+ WT_VERBOSE_TRET(session, evictserver, "helper exiting");
+
+ if (session != conn->default_session)
+ (void)session->iface.close(&session->iface, NULL);
+
+ return (NULL);
+}
+
+/*
+ * __evict_pass --
* Evict pages from memory.
*/
static int
-__evict_worker(WT_SESSION_IMPL *session)
+__evict_pass(WT_SESSION_IMPL *session)
{
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
@@ -463,6 +541,25 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session)
}
/*
+ * __evict_lru_pages --
+ * Get pages from the LRU queue to evict.
+ */
+static int
+__evict_lru_pages(WT_SESSION_IMPL *session, int is_app)
+{
+ WT_DECL_RET;
+
+ /*
+ * Reconcile and discard some pages: EBUSY is returned if a page fails
+ * eviction because it's unavailable, continue in that case.
+ */
+ while ((ret = __wt_evict_lru_page(session, is_app)) == 0 ||
+ ret == EBUSY)
+ ;
+ return (ret == WT_NOTFOUND ? 0 : ret);
+}
+
+/*
* __evict_lru --
* Evict pages from the cache based on their read generation.
*/
@@ -470,7 +567,6 @@ static int
__evict_lru(WT_SESSION_IMPL *session, uint32_t flags)
{
WT_CACHE *cache;
- WT_DECL_RET;
WT_EVICT_ENTRY *evict;
uint64_t cutoff;
uint32_t candidates, entries, i;
@@ -534,18 +630,12 @@ __evict_lru(WT_SESSION_IMPL *session, uint32_t flags)
__wt_spin_unlock(session, &cache->evict_lock);
/*
- * Signal any application threads waiting for the eviction queue to
- * have candidates.
+ * Signal any application or worker threads waiting for the eviction
+ * queue to have candidates.
*/
WT_RET(__wt_cond_signal(session, cache->evict_waiter_cond));
- /*
- * Reconcile and discard some pages: EBUSY is returned if a page fails
- * eviction because it's unavailable, continue in that case.
- */
- while ((ret = __wt_evict_lru_page(session, 0)) == 0 || ret == EBUSY)
- ;
- return (ret == WT_NOTFOUND ? 0 : ret);
+ return (__evict_lru_pages(session, 0));
}
/*
@@ -789,9 +879,12 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
* with the checkpointing thread.
*/
modified = __wt_page_is_modified(page);
+#ifdef EVICTION_DURING_CHECKPOINT
if (modified && btree->checkpointing &&
- page->modify->checkpoint_gen >=
- S2C(session)->txn_global.checkpoint_gen)
+ page->modify->checkpoint_gen >= btree->checkpoint_gen)
+#else
+ if (modified && btree->checkpointing)
+#endif
continue;
/* Optionally ignore clean pages. */
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index 6c043ef8a42..7fe3435a9b9 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -599,6 +599,7 @@ __btree_page_sizes(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_CONFIG_ITEM cval;
+ uint64_t cache_size;
uint32_t intl_split_size, leaf_split_size;
const char **cfg;
@@ -626,6 +627,16 @@ __btree_page_sizes(WT_SESSION_IMPL *session)
WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval));
btree->maxmempage = WT_MAX((uint64_t)cval.val, 50 * btree->maxleafpage);
+ /*
+ * Don't let pages grow to more than half the cache size. Otherwise,
+ * with very small caches, we can end up in a situation where nothing
+ * can be evicted. Take care getting the cache size: with a shared
+ * cache, it may not have been set.
+ */
+ cache_size = S2C(session)->cache_size;
+ if (cache_size > 0)
+ btree->maxmempage = WT_MIN(btree->maxmempage, cache_size / 2);
+
/* Allocation sizes must be a power-of-two, nothing else makes sense. */
if (!__wt_ispo2(btree->allocsize))
WT_RET_MSG(session,
diff --git a/src/btree/bt_ret.c b/src/btree/bt_ret.c
index ff6047a544f..b7448669184 100644
--- a/src/btree/bt_ret.c
+++ b/src/btree/bt_ret.c
@@ -74,14 +74,20 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
/*
* If the cursor references a WT_INSERT item, take the key and
- * related WT_UPDATE item. Otherwise, take the key from the
- * original page, and the value from any related WT_UPDATE item,
- * or the page if the key was never updated.
+ * related WT_UPDATE item. Otherwise, if we have an exact
+ * match, we already stashed a copy of the key: use that. If
+ * we don't have an exact match, take the key from the original
+ * page. Use the value from any related WT_UPDATE item, or the
+ * page if the key was never updated.
*/
if (cbt->ins != NULL) {
cursor->key.data = WT_INSERT_KEY(cbt->ins);
cursor->key.size = WT_INSERT_KEY_SIZE(cbt->ins);
upd = __wt_txn_read(session, cbt->ins->upd);
+ } else if (cbt->compare == 0) {
+ cursor->key.data = cbt->search_key.data;
+ cursor->key.size = cbt->search_key.size;
+ upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip));
} else {
WT_RET(__wt_row_leaf_key(
session, page, rip, &cursor->key, 0));
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index 55de99d1509..2f3ad8d6fc5 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -70,7 +70,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
* eviction to complete.
*/
btree->checkpointing = 1;
- checkpoint_gen = S2C(session)->txn_global.checkpoint_gen;
+ checkpoint_gen = ++btree->checkpoint_gen;
if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) {
WT_ERR(__wt_evict_file_exclusive_on(session));
@@ -132,6 +132,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
if (page->modify != NULL)
page->modify->checkpoint_gen = checkpoint_gen;
}
+ WT_ASSERT(session, checkpoint_gen == btree->checkpoint_gen);
break;
WT_ILLEGAL_VALUE_ERR(session);
}
diff --git a/src/btree/rec_evict.c b/src/btree/rec_evict.c
index e3c80910b36..a1b4e5b9ed1 100644
--- a/src/btree/rec_evict.c
+++ b/src/btree/rec_evict.c
@@ -312,8 +312,12 @@ __rec_review(
* is blocked by the exclusive lock.
*/
mod = page->modify;
+#ifdef EVICTION_DURING_CHECKPOINT
behind_checkpoint = btree->checkpointing && (mod != NULL) &&
- mod->checkpoint_gen >= S2C(session)->txn_global.checkpoint_gen;
+ mod->checkpoint_gen >= btree->checkpoint_gen;
+#else
+ behind_checkpoint = btree->checkpointing && (mod != NULL);
+#endif
if (behind_checkpoint && __wt_page_is_modified(page)) {
WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint);
@@ -322,8 +326,8 @@ __rec_review(
}
/*
- * If we are checkpointing, we can't merge multiblock pages into their
- * parent.
+ * If we behind a checkpoint, we can't merge multiblock pages into
+ * their parent.
*/
if (behind_checkpoint && F_ISSET(mod, WT_PM_REC_MULTIBLOCK))
return (EBUSY);
diff --git a/src/btree/rec_split.c b/src/btree/rec_split.c
index 2aeb9ffb080..68e429fa921 100644
--- a/src/btree/rec_split.c
+++ b/src/btree/rec_split.c
@@ -12,8 +12,9 @@
* any real understanding of what might be useful to surface to applications.
*/
static u_int __split_deepen_max_internal_image = 100;
-static u_int __split_deepen_min_child = 100;
+static u_int __split_deepen_min_child = 10;
static u_int __split_deepen_per_child = 100;
+static u_int __split_deepen_split_child = 100;
/*
* __split_should_deepen --
@@ -34,22 +35,28 @@ __split_should_deepen(WT_SESSION_IMPL *session, WT_PAGE *page)
pindex = WT_INTL_INDEX_COPY(page);
/*
- * Don't deepen the tree if the page's memory footprint is less than N
- * times the maximum internal page size chunk in the backing file.
+ * Deepen the tree if the page's memory footprint is larger than the
+ * maximum size for a page in memory. We need an absolute minimum
+ * number of entries in order to split the page: if there is a single
+ * huge key, splitting won't help.
*/
- if (page->memory_footprint <
- __split_deepen_max_internal_image * S2BT(session)->maxintlpage)
- return (0);
+ if (page->memory_footprint > S2BT(session)->maxmempage &&
+ pindex->entries >= __split_deepen_min_child)
+ return (1);
/*
- * Don't deepen the tree unless the split will result in at least N
- * children in the newly created intermediate layer.
+ * Deepen the tree if the page's memory footprint is at least N
+ * times the maximum internal page size chunk in the backing file and
+ * the split will result in at least N children in the newly created
+ * intermediate layer.
*/
- if (pindex->entries <
- (__split_deepen_per_child * __split_deepen_min_child))
- return (0);
+ if (page->memory_footprint >
+ __split_deepen_max_internal_image * S2BT(session)->maxintlpage &&
+ pindex->entries >=
+ (__split_deepen_per_child * __split_deepen_split_child))
+ return (1);
- return (1);
+ return (0);
}
/*
@@ -195,7 +202,13 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent)
panic = 0;
pindex = WT_INTL_INDEX_COPY(parent);
- children = pindex->entries / __split_deepen_per_child;
+
+ /*
+ * Create N children, unless we are dealing with a large page without
+ * many entries, in which case split into the minimum number of pages.
+ */
+ children = WT_MAX(pindex->entries / __split_deepen_per_child,
+ __split_deepen_min_child);
WT_STAT_FAST_CONN_INCR(session, cache_eviction_deepen);
WT_VERBOSE_ERR(session, split,
diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c
index d98d6c94faf..09de20a35d3 100644
--- a/src/btree/rec_write.c
+++ b/src/btree/rec_write.c
@@ -4669,7 +4669,7 @@ err: __wt_scr_free(&tkey);
* Set the checkpoint generation, used to determine whether we can skip
* writing this page again.
*/
- mod->checkpoint_gen = S2C(session)->txn_global.checkpoint_gen;
+ mod->checkpoint_gen = btree->checkpoint_gen;
return (0);
}
diff --git a/src/btree/row_key.c b/src/btree/row_key.c
index e7306bc3e86..fd198812fd1 100644
--- a/src/btree/row_key.c
+++ b/src/btree/row_key.c
@@ -109,13 +109,13 @@ __inmem_row_leaf_slots(
*/
int
__wt_row_leaf_key_copy(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *retb)
+ WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *keyb)
{
- WT_RET(__wt_row_leaf_key_work(session, page, rip_arg, retb, 0));
+ WT_RET(__wt_row_leaf_key_work(session, page, rip_arg, keyb, 0));
/* The return buffer may only hold a reference to a key, copy it. */
- if (!WT_DATA_IN_ITEM(retb))
- WT_RET(__wt_buf_set(session, retb, retb->data, retb->size));
+ if (!WT_DATA_IN_ITEM(keyb))
+ WT_RET(__wt_buf_set(session, keyb, keyb->data, keyb->size));
return (0);
}
@@ -127,7 +127,7 @@ __wt_row_leaf_key_copy(
*/
int
__wt_row_leaf_key_work(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *retb, int instantiate)
+ WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *keyb, int instantiate)
{
enum { FORWARD, BACKWARD } direction;
WT_BTREE *btree;
@@ -188,8 +188,8 @@ off_page: ikey = key;
* Take a copy and wrap up.
*/
if (slot_offset == 0) {
- retb->data = WT_IKEY_DATA(ikey);
- retb->size = ikey->size;
+ keyb->data = WT_IKEY_DATA(ikey);
+ keyb->size = ikey->size;
/*
* The key is already instantiated, ignore the
@@ -224,8 +224,8 @@ off_page: ikey = key;
* In short: if it's not an overflow key, take a copy
* and roll forward.
*/
- retb->data = WT_IKEY_DATA(ikey);
- retb->size = ikey->size;
+ keyb->data = WT_IKEY_DATA(ikey);
+ keyb->size = ikey->size;
direction = FORWARD;
goto next;
}
@@ -260,7 +260,7 @@ off_page: ikey = key;
goto off_page;
}
ret = __wt_dsk_cell_data_ref(
- session, WT_PAGE_ROW_LEAF, unpack, retb);
+ session, WT_PAGE_ROW_LEAF, unpack, keyb);
WT_TRET(__wt_rwunlock(
session, btree->ovfl_lock));
WT_ERR(ret);
@@ -299,11 +299,11 @@ off_page: ikey = key;
* directions then.
*/
if (btree->huffman_key == NULL) {
- retb->data = unpack->data;
- retb->size = unpack->size;
+ keyb->data = unpack->data;
+ keyb->size = unpack->size;
} else
WT_ERR(__wt_dsk_cell_data_ref(
- session, WT_PAGE_ROW_LEAF, unpack, retb));
+ session, WT_PAGE_ROW_LEAF, unpack, keyb));
if (slot_offset == 0) {
/*
@@ -391,10 +391,10 @@ off_page: ikey = key;
* don't need, truncate the item's data length to the
* prefix bytes.
*/
- retb->size = unpack->prefix;
- WT_ERR(__wt_buf_grow(session, retb, retb->size + size));
- memcpy((uint8_t *)retb->data + retb->size, p, size);
- retb->size += size;
+ keyb->size = unpack->prefix;
+ WT_ERR(__wt_buf_grow(session, keyb, keyb->size + size));
+ memcpy((uint8_t *)keyb->data + keyb->size, p, size);
+ keyb->size += size;
if (slot_offset == 0)
break;
@@ -431,7 +431,7 @@ next: switch (direction) {
if (!__wt_off_page(page, key)) {
WT_ERR(__wt_row_ikey(session,
WT_PAGE_DISK_OFFSET(page, key),
- retb->data, retb->size, &ikey));
+ keyb->data, keyb->size, &ikey));
/*
* Serialize the swap of the key into place: on success,
diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c
index 8263c9c89e3..4fa5c691436 100644
--- a/src/btree/row_srch.c
+++ b/src/btree/row_srch.c
@@ -128,7 +128,7 @@ __wt_row_search(WT_SESSION_IMPL *session,
int cmp, depth;
btree = S2BT(session);
- item = &cbt->srch;
+ item = &cbt->search_key;
rip = NULL;
match = 0; /* -Wuninitialized */
@@ -459,7 +459,9 @@ restart:
pindex = WT_INTL_INDEX_COPY(btree->root.page);
cbt->slot = pindex->entries < 2 ?
__wt_random() % page->pg_row_entries : 0;
- return (0);
+
+ return (__wt_row_leaf_key(session,
+ page, page->pg_row_d + cbt->slot, &cbt->search_key, 0));
}
/*
diff --git a/src/config/config_def.c b/src/config/config_def.c
index 7c22b0e9e4e..f2cc4884a8e 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -17,6 +17,11 @@ static const WT_CONFIG_CHECK confchk_connection_async_new_op[] = {
{ NULL, NULL, NULL, NULL }
};
+static const WT_CONFIG_CHECK confchk_connection_close[] = {
+ { "leak_memory", "boolean", NULL, NULL},
+ { NULL, NULL, NULL, NULL }
+};
+
static const WT_CONFIG_CHECK confchk_connection_load_extension[] = {
{ "config", "string", NULL, NULL},
{ "entry", "string", NULL, NULL},
@@ -53,17 +58,18 @@ static const WT_CONFIG_CHECK confchk_connection_reconfigure[] = {
{ "eviction_dirty_target", "int", "min=10,max=99", NULL},
{ "eviction_target", "int", "min=10,max=99", NULL},
{ "eviction_trigger", "int", "min=10,max=99", NULL},
+ { "eviction_workers", "int", "min=0,max=20", NULL},
{ "shared_cache", "category", NULL,
confchk_shared_cache_subconfigs},
{ "statistics", "list",
"choices=[\"all\",\"fast\",\"none\",\"clear\"]",
NULL},
{ "verbose", "list",
- "choices=[\"block\",\"checkpoint\",\"compact\",\"evict\","
- "\"evictserver\",\"fileops\",\"log\",\"lsm\",\"mutex\","
- "\"overflow\",\"read\",\"readserver\",\"reconcile\",\"recovery\","
- "\"salvage\",\"shared_cache\",\"split\",\"verify\",\"version\","
- "\"write\"]",
+ "choices=[\"api\",\"block\",\"checkpoint\",\"compact\",\"evict\""
+ ",\"evictserver\",\"fileops\",\"log\",\"lsm\",\"metadata\","
+ "\"mutex\",\"overflow\",\"read\",\"readserver\",\"reconcile\","
+ "\"recovery\",\"salvage\",\"shared_cache\",\"split\",\"verify\","
+ "\"version\",\"write\"]",
NULL},
{ NULL, NULL, NULL, NULL }
};
@@ -275,6 +281,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{ "eviction_dirty_target", "int", "min=10,max=99", NULL},
{ "eviction_target", "int", "min=10,max=99", NULL},
{ "eviction_trigger", "int", "min=10,max=99", NULL},
+ { "eviction_workers", "int", "min=0,max=20", NULL},
{ "extensions", "list", NULL, NULL},
{ "file_extend", "list", "choices=[\"data\",\"log\"]", NULL},
{ "hazard_max", "int", "min=15", NULL},
@@ -295,11 +302,11 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
NULL},
{ "use_environment_priv", "boolean", NULL, NULL},
{ "verbose", "list",
- "choices=[\"block\",\"checkpoint\",\"compact\",\"evict\","
- "\"evictserver\",\"fileops\",\"log\",\"lsm\",\"mutex\","
- "\"overflow\",\"read\",\"readserver\",\"reconcile\",\"recovery\","
- "\"salvage\",\"shared_cache\",\"split\",\"verify\",\"version\","
- "\"write\"]",
+ "choices=[\"api\",\"block\",\"checkpoint\",\"compact\",\"evict\""
+ ",\"evictserver\",\"fileops\",\"log\",\"lsm\",\"metadata\","
+ "\"mutex\",\"overflow\",\"read\",\"readserver\",\"reconcile\","
+ "\"recovery\",\"salvage\",\"shared_cache\",\"split\",\"verify\","
+ "\"version\",\"write\"]",
NULL},
{ NULL, NULL, NULL, NULL }
};
@@ -330,8 +337,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
confchk_connection_async_new_op
},
{ "connection.close",
- "",
- NULL
+ "leak_memory=0",
+ confchk_connection_close
},
{ "connection.load_extension",
"config=,entry=wiredtiger_extension_init,"
@@ -345,8 +352,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
{ "connection.reconfigure",
"async=(enabled=0,ops_max=1024,threads=2),cache_size=100MB,"
"error_prefix=,eviction_dirty_target=80,eviction_target=80,"
- "eviction_trigger=95,shared_cache=(chunk=10MB,name=,reserve=0,"
- "size=500MB),statistics=none,verbose=",
+ "eviction_trigger=95,eviction_workers=0,shared_cache=(chunk=10MB,"
+ "name=,reserve=0,size=500MB),statistics=none,verbose=",
confchk_connection_reconfigure
},
{ "cursor.close",
@@ -361,7 +368,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
",internal_key_truncate=,internal_page_max=4KB,key_format=u,"
"key_gap=10,leaf_item_max=0,leaf_page_max=32KB,"
"memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,"
- "prefix_compression=,prefix_compression_min=4,split_pct=75,"
+ "prefix_compression=0,prefix_compression_min=4,split_pct=75,"
"value_format=u,version=(major=0,minor=0)",
confchk_file_meta
},
@@ -399,7 +406,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
",bloom_config=,bloom_hash_count=8,bloom_oldest=0,chunk_max=5GB,"
"chunk_size=10MB,merge_max=15,merge_min=0,merge_threads=2),"
"memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,"
- "prefix_compression=,prefix_compression_min=4,source=,"
+ "prefix_compression=0,prefix_compression_min=4,source=,"
"split_pct=75,type=file,value_format=u",
confchk_session_create
},
@@ -453,10 +460,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"cache_size=100MB,checkpoint=(name=\"WiredTigerCheckpoint\","
"wait=0),checkpoint_sync=,create=0,direct_io=,error_prefix=,"
"eviction_dirty_target=80,eviction_target=80,eviction_trigger=95,"
- "extensions=,file_extend=,hazard_max=1000,log=(archive=,enabled=0"
- ",file_max=100MB,path=\"\"),lsm_merge=,mmap=,multiprocess=0,"
- "session_max=100,shared_cache=(chunk=10MB,name=,reserve=0,"
- "size=500MB),statistics=none,"
+ "eviction_workers=0,extensions=,file_extend=,hazard_max=1000,"
+ "log=(archive=,enabled=0,file_max=100MB,path=\"\"),lsm_merge=,"
+ "mmap=,multiprocess=0,session_max=100,shared_cache=(chunk=10MB,"
+ "name=,reserve=0,size=500MB),statistics=none,"
"statistics_log=(path=\"WiredTigerStat.%d.%H\",sources=,"
"timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=fsync,"
"use_environment_priv=0,verbose=",
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index e6718d3c978..2e540278b75 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -524,6 +524,7 @@ __conn_is_new(WT_CONNECTION *wt_conn)
static int
__conn_close(WT_CONNECTION *wt_conn, const char *config)
{
+ WT_CONFIG_ITEM cval;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_SESSION *wt_session;
@@ -533,7 +534,10 @@ __conn_close(WT_CONNECTION *wt_conn, const char *config)
conn = (WT_CONNECTION_IMPL *)wt_conn;
CONNECTION_API_CALL(conn, session, close, config, cfg);
- WT_UNUSED(cfg);
+
+ WT_ERR(__wt_config_gets(session, cfg, "leak_memory", &cval));
+ if (cval.val != 0)
+ F_SET(conn, WT_CONN_LEAK_MEMORY);
/*
* Rollback all running transactions.
@@ -998,6 +1002,7 @@ __wt_conn_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
const char *name;
uint32_t flag;
} *ft, verbtypes[] = {
+ { "api", WT_VERB_api },
{ "block", WT_VERB_block },
{ "checkpoint", WT_VERB_checkpoint },
{ "compact", WT_VERB_compact },
@@ -1006,6 +1011,7 @@ __wt_conn_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
{ "fileops", WT_VERB_fileops },
{ "log", WT_VERB_log },
{ "lsm", WT_VERB_lsm },
+ { "metadata", WT_VERB_metadata },
{ "mutex", WT_VERB_mutex },
{ "overflow", WT_VERB_overflow },
{ "read", WT_VERB_read },
diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c
index 489c7eff533..4eb6391334e 100644
--- a/src/conn/conn_cache.c
+++ b/src/conn/conn_cache.c
@@ -54,6 +54,11 @@ __wt_cache_config(WT_CONNECTION_IMPL *conn, const char *cfg[])
cache->eviction_dirty_target = (u_int)cval.val;
WT_RET_NOTFOUND_OK(ret);
+ if ((ret =
+ __wt_config_gets(session, cfg, "eviction_workers", &cval)) == 0)
+ cache->eviction_workers = (u_int)cval.val;
+ WT_RET_NOTFOUND_OK(ret);
+
return (0);
}
diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c
index 01cb4f7ccf4..00f3151e1a5 100644
--- a/src/cursor/cur_backup.c
+++ b/src/cursor/cur_backup.c
@@ -201,27 +201,34 @@ __backup_start(
/* Create the hot backup file. */
WT_ERR(__backup_file_create(session, cb));
+ /* Add log files if logging is enabled. */
+
/*
* If a list of targets was specified, work our way through them.
* Else, generate a list of all database objects.
+ *
+ * Include log files if doing a full backup, and copy them before
+ * copying data files to avoid rolling the metadata forward across
+ * a checkpoint that completes during the backup.
*/
target_list = 0;
WT_ERR(__backup_uri(session, cb, cfg, &target_list));
- if (!target_list)
+ if (!target_list) {
+ if (conn->log) {
+ WT_ERR(__wt_log_get_active_files(
+ session, &logfiles, &logcount));
+ for (i = 0; i < logcount; i++)
+ WT_ERR(__backup_list_append(
+ session, cb, logfiles[i]));
+ }
+
WT_ERR(__backup_all(session, cb));
+ }
/* Add the hot backup and single-threading file to the list. */
WT_ERR(__backup_list_append(session, cb, WT_METADATA_BACKUP));
WT_ERR(__backup_list_append(session, cb, WT_SINGLETHREAD));
- /* Add log files if logging is on and we're doing a full backup. */
- if (!target_list && conn->log) {
- WT_ERR(
- __wt_log_get_active_files(session, &logfiles, &logcount));
- for (i = 0; i < logcount; i++)
- WT_ERR(__backup_list_append(session, cb, logfiles[i]));
- }
-
err: /* Close the hot backup file. */
if (cb->bfp != NULL) {
WT_TRET(fclose(cb->bfp) == 0 ? 0 : __wt_errno());
diff --git a/src/docs/file-formats.dox b/src/docs/file-formats.dox
index 29e8bf94da8..46865da4811 100644
--- a/src/docs/file-formats.dox
+++ b/src/docs/file-formats.dox
@@ -45,13 +45,12 @@ dictionary compression, Huffman encoding and block compression.
and on-disk objects by storing any identical key prefix only once per
page.
- The cost is minor additional CPU and some additional memory use when
-operating on the in-memory tree. Specifically, sequential cursor
-movement through prefix-compressed page in reverse (but not forward)
-order, or the random lookup of a key/value pair will allocate sufficient
-memory to hold some number of uncompressed keys. So, for example, if
-key prefix compression only saves a small number of bytes per key, the
-additional memory cost of instantiating the uncompressed key may mean
+ The cost is additional CPU and memory when operating on the in-memory tree.
+Specifically, sequential cursor movement through prefix-compressed page in
+reverse (but not forward) order, or the random lookup of a key/value pair will
+allocate sufficient memory to hold some number of uncompressed keys. So, for
+example, if key prefix compression only saves a small number of bytes per key,
+the additional memory cost of instantiating the uncompressed key may mean
prefix compression is not worthwhile. Further, in cases where the
on-disk cost is the primary concern, block compression may mean prefix
compression is less useful.
@@ -59,10 +58,9 @@ compression is less useful.
Applications may limit the use of prefix compression by configuring the
minimum number of bytes that must be gained before prefix compression is
used with the WT_SESSION::create method's \c prefix_compression_min
-configuration string, or turn off key prefix compression entirely using
-the WT_SESSION::create method's \c prefix_compression configuration string.
+configuration string.
- Key prefix compression is enabled by default.
+ Key prefix compression is disabled by default.
- Dictionary compression reduces the size requirement of both the
in-memory and on-disk objects by storing any identical value only once
diff --git a/src/docs/programming.dox b/src/docs/programming.dox
index f7ff9e04f02..30cafd9dc3a 100644
--- a/src/docs/programming.dox
+++ b/src/docs/programming.dox
@@ -36,6 +36,7 @@ WiredTiger applications:
- @subpage tune_transparent_huge_pages
- @subpage tune_zone_reclaim
- @subpage tune_statistics
+- @subpage tune_close
@section programming_deployment Deployment considerations
- @subpage checkpoints
diff --git a/src/docs/tune-close.dox b/src/docs/tune-close.dox
new file mode 100644
index 00000000000..dd615e3e460
--- /dev/null
+++ b/src/docs/tune-close.dox
@@ -0,0 +1,13 @@
+/*! @page tune_close Connection close
+
+Closing a WT_CONNECTION:: handle can be expensive, in part because of
+the cost of freeing memory (for example, the memory allocated for a
+large cache). If the process is exiting regardless, configuring
+WT_CONNECTION::close to leak memory on close can significantly speed up
+the close.
+
+An example of configuring WT_CONNECTION::close to leak memory:
+
+@snippet ex_all.c Connection close leaking memory
+
+ */
diff --git a/src/docs/tune-compress.dox b/src/docs/tune-compress.dox
index 1d39626d0dd..bb675337a0d 100644
--- a/src/docs/tune-compress.dox
+++ b/src/docs/tune-compress.dox
@@ -1,25 +1,25 @@
/*! @page tune_compression Compression
-WiredTiger includes a number of different compression techniques, some
-configured by default and some not. Compression minimizes in-memory
-and on-disk resource requirements and potentially decreases the amount
-of I/O, at some CPU cost, when rows are read and written.
+WiredTiger includes a number of optional compression techniques. Configuring
+compression generally decreases on-disk and in-memory resource requirements
+and the amount of I/O, and increases CPU cost when rows are read and written.
-Configuring compression on or off may change application throughput.
-For example, in applications using solid-state drives (where I/O is less
-expensive), turning off compression may increase application performance
-by reducing CPU costs; in applications where I/O costs are more
-expensive, turning on compression may increase application performance
-by reducing the overall number of I/O operations.
+Configuring compression may change application throughput. For example,
+in applications using solid-state drives (where I/O is less expensive),
+turning off compression may increase application performance by reducing
+CPU costs; in applications where I/O costs are more expensive, turning on
+compression may increase application performance by reducing the overall
+number of I/O operations.
-For example, turning off row-store key prefix compression:
+An example of turning on row-store key prefix compression:
-@snippet ex_all.c Configure key prefix compression off
+@snippet ex_all.c Configure key prefix compression on
-For example, turning on row-store or column-store dictionary compression:
+An example of turning on row-store or column-store dictionary compression:
@snippet ex_all.c Configure dictionary compression on
-See @ref file_formats_compression for more information.
+See @ref file_formats_compression for more information on available
+compression techniques.
*/
diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox
index 2deb4459046..6b89be30791 100644
--- a/src/docs/upgrading.dox
+++ b/src/docs/upgrading.dox
@@ -13,6 +13,13 @@ We are now also enforcing that only one of \c cache_size and \c shared_cache
are specified in the ::wiredtiger_open configuration string.
</dd>
+<dt>WT_SESSION::create prefix_compression disabled by default</dt>
+<dd>
+In the 2.1.2 release, prefix compression default to \c false. Applications
+that benefit from prefix compression will need to explicitly set
+\c prefix_compression=true when creating tables.
+</dd>
+
@section version_211 Upgrading to Version 2.1.1
<dl>
diff --git a/src/include/api.h b/src/include/api.h
index cb62bf504b5..82b5b572ef9 100644
--- a/src/include/api.h
+++ b/src/include/api.h
@@ -15,7 +15,8 @@
#define API_CALL_NOCONF(s, h, n, cur, dh) do { \
API_SESSION_INIT(s, h, n, cur, dh); \
- WT_ERR(F_ISSET(S2C(s), WT_CONN_PANIC) ? __wt_panic(s) : 0)
+ WT_ERR(F_ISSET(S2C(s), WT_CONN_PANIC) ? __wt_panic(s) : 0); \
+ WT_VERBOSE_ERR((s), api, "CALL: " #h ":" #n)
#define API_CALL(s, h, n, cur, dh, config, cfg) do { \
const char *cfg[] = \
@@ -24,7 +25,8 @@
WT_ERR(F_ISSET(S2C(s), WT_CONN_PANIC) ? __wt_panic(s) : 0); \
WT_ERR(((config) != NULL) ? \
__wt_config_check((s), \
- WT_CONFIG_REF(session, h##_##n), (config), 0) : 0)
+ WT_CONFIG_REF(session, h##_##n), (config), 0) : 0); \
+ WT_VERBOSE_ERR((s), api, "CALL: " #h ":" #n)
#define API_END(s, ret) \
if ((s) != NULL) { \
diff --git a/src/include/btree.h b/src/include/btree.h
index 2ad0255b4ff..de735340d11 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -117,6 +117,12 @@ struct __wt_btree {
u_int evict_walk_skips; /* Number of walks skipped */
volatile uint32_t evict_busy; /* Count of threads in eviction */
+ /*
+ * The current checkpoint generation. Use a 32-bit count for now: if
+ * we can do 4 billion checkpoints without a restart, we'll be in good
+ * shape.
+ */
+ uint32_t checkpoint_gen;
int checkpointing; /* Checkpoint in progress */
/* Flags values up to 0xff are reserved for WT_DHANDLE_* */
diff --git a/src/include/btree.i b/src/include/btree.i
index e6b173a33e2..2cb2bb9c7e1 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -331,13 +331,14 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
if (F_ISSET(&session->txn, TXN_HAS_SNAPSHOT))
page->modify->disk_snap_min = session->txn.snap_min;
+ txn_global = &S2C(session)->txn_global;
+ page->modify->rec_skipped_txn = txn_global->last_running;
+
/*
* Set the checkpoint generation: if a checkpoint is already
* running, these changes cannot be included, by definition.
*/
- txn_global = &S2C(session)->txn_global;
- page->modify->checkpoint_gen = txn_global->checkpoint_gen;
- page->modify->rec_skipped_txn = txn_global->last_running;
+ page->modify->checkpoint_gen = S2BT(session)->checkpoint_gen;
}
/* Check if this is the largest transaction ID to update the page. */
diff --git a/src/include/cache.h b/src/include/cache.h
index eb081225ea3..bbe4157aadf 100644
--- a/src/include/cache.h
+++ b/src/include/cache.h
@@ -65,6 +65,8 @@ struct __wt_cache {
u_int eviction_target; /* Percent to end eviction */
u_int eviction_dirty_target; /* Percent to allow dirty */
+ u_int eviction_workers; /* Additional eviction threads */
+
/*
* LRU eviction list information.
*/
diff --git a/src/include/cell.i b/src/include/cell.i
index 91de604890c..00a1e4d0e5b 100644
--- a/src/include/cell.i
+++ b/src/include/cell.i
@@ -529,7 +529,7 @@ __wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell)
*
* !!!
* This line of code is really a call to __wt_off_page, but we know the
- * cell we're given with either be on the page or past the end of page,
+ * cell we're given will either be on the page or past the end of page,
* so it's a simpler check. (I wouldn't bother, but the real problem is
* we can't call __wt_off_page directly, it's in btree.i which requires
* this file be included first.)
diff --git a/src/include/cursor.h b/src/include/cursor.h
index 5b4f44377bc..9cd8f043bb7 100644
--- a/src/include/cursor.h
+++ b/src/include/cursor.h
@@ -102,6 +102,14 @@ struct __wt_cursor_btree {
int compare;
/*
+ * The key value from a binary search of a row-store files; we keep a
+ * copy of the last key we retrieved in the search, it avoids having
+ * doing the additional work of getting the key again for return to
+ * the application.
+ */
+ WT_ITEM search_key;
+
+ /*
* It's relatively expensive to calculate the last record on a variable-
* length column-store page because of the repeat values. Calculate it
* once per page and cache it. This value doesn't include the skiplist
@@ -151,12 +159,6 @@ struct __wt_cursor_btree {
WT_ITEM tmp;
/*
- * A temporary buffer used for key values during binary searches of row-
- * store files.
- */
- WT_ITEM srch;
-
- /*
* Fixed-length column-store items are a single byte, and it's simpler
* and cheaper to allocate the space for it now than keep checking to
* see if we need to grow the buffer.
diff --git a/src/include/extern.h b/src/include/extern.h
index 5c7458bd905..9285e2c657d 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -466,11 +466,11 @@ extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page);
extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session,
WT_PAGE *page,
WT_ROW *rip_arg,
- WT_ITEM *retb);
+ WT_ITEM *keyb);
extern int __wt_row_leaf_key_work(WT_SESSION_IMPL *session,
WT_PAGE *page,
WT_ROW *rip_arg,
- WT_ITEM *retb,
+ WT_ITEM *keyb,
int instantiate);
extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session,
WT_PAGE *page,
diff --git a/src/include/flags.h b/src/include/flags.h
index 2905cb3c945..34e1e07856a 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -3,9 +3,10 @@
* flags section: BEGIN
*/
#define WT_CACHE_POOL_RUN 0x00000001
-#define WT_CONN_CACHE_POOL 0x00000020
-#define WT_CONN_CKPT_SYNC 0x00000010
-#define WT_CONN_EVICTION_RUN 0x00000008
+#define WT_CONN_CACHE_POOL 0x00000040
+#define WT_CONN_CKPT_SYNC 0x00000020
+#define WT_CONN_EVICTION_RUN 0x00000010
+#define WT_CONN_LEAK_MEMORY 0x00000008
#define WT_CONN_LSM_MERGE 0x00000004
#define WT_CONN_PANIC 0x00000002
#define WT_CONN_SERVER_RUN 0x00000001
@@ -46,14 +47,16 @@
#define WT_TXN_LOG_CKPT_PREPARE 0x00000004
#define WT_TXN_LOG_CKPT_START 0x00000002
#define WT_TXN_LOG_CKPT_STOP 0x00000001
-#define WT_VERB_block 0x00040000
-#define WT_VERB_checkpoint 0x00020000
-#define WT_VERB_compact 0x00010000
-#define WT_VERB_evict 0x00008000
-#define WT_VERB_evictserver 0x00004000
-#define WT_VERB_fileops 0x00002000
-#define WT_VERB_log 0x00001000
-#define WT_VERB_lsm 0x00000800
+#define WT_VERB_api 0x00100000
+#define WT_VERB_block 0x00080000
+#define WT_VERB_checkpoint 0x00040000
+#define WT_VERB_compact 0x00020000
+#define WT_VERB_evict 0x00010000
+#define WT_VERB_evictserver 0x00008000
+#define WT_VERB_fileops 0x00004000
+#define WT_VERB_log 0x00002000
+#define WT_VERB_lsm 0x00001000
+#define WT_VERB_metadata 0x00000800
#define WT_VERB_mutex 0x00000400
#define WT_VERB_overflow 0x00000200
#define WT_VERB_read 0x00000100
diff --git a/src/include/txn.h b/src/include/txn.h
index 16fbe621228..99851d4eca7 100644
--- a/src/include/txn.h
+++ b/src/include/txn.h
@@ -40,13 +40,6 @@ struct __wt_txn_global {
*/
volatile uint64_t oldest_id;
- /*
- * The current checkpoint generation. Use a 32-bit count for now: if
- * we can do 4 billion checkpoints without a restart, we'll be in good
- * shape.
- */
- uint32_t checkpoint_gen;
-
/* Count of scanning threads, or -1 for exclusive access. */
volatile int32_t scan_count;
@@ -137,7 +130,6 @@ struct __wt_txn {
#define TXN_ERROR 0x02
#define TXN_HAS_ID 0x04
#define TXN_HAS_SNAPSHOT 0x08
-#define TXN_OLDEST 0x10
-#define TXN_RUNNING 0x20
+#define TXN_RUNNING 0x10
uint32_t flags;
};
diff --git a/src/include/txn.i b/src/include/txn.i
index 5eeb6fbddeb..4f039bcc408 100644
--- a/src/include/txn.i
+++ b/src/include/txn.i
@@ -210,6 +210,7 @@ __wt_txn_id_check(WT_SESSION_IMPL *session)
txn = &session->txn;
+ WT_ASSERT(session, F_ISSET(txn, TXN_RUNNING));
if (!F_ISSET(txn, TXN_HAS_ID)) {
conn = S2C(session);
txn_global = &conn->txn_global;
@@ -369,32 +370,23 @@ __wt_txn_am_oldest(WT_SESSION_IMPL *session)
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *s;
- uint64_t id, my_id;
+ uint64_t id;
uint32_t i, session_cnt;
- /* Cache the result: if we're the oldest, don't keep checking. */
- txn = &session->txn;
- if (F_ISSET(txn, TXN_OLDEST))
- return (1);
-
conn = S2C(session);
+ txn = &session->txn;
txn_global = &conn->txn_global;
- /*
- * Use this slightly convoluted way to get our ID, in case session->txn
- * has been hijacked for eviction.
- */
- s = &txn_global->states[session->id];
- if ((my_id = s->id) == WT_TXN_NONE)
+ if (txn->id == WT_TXN_NONE)
return (0);
WT_ORDERED_READ(session_cnt, conn->session_cnt);
for (i = 0, s = txn_global->states;
i < session_cnt;
i++, s++)
- if ((id = s->id) != WT_TXN_NONE && TXNID_LT(id, my_id))
+ if ((id = s->id) != WT_TXN_NONE &&
+ TXNID_LT(id, txn->id))
return (0);
- F_SET(txn, TXN_OLDEST);
return (1);
}
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 9dcc8280ba6..b85ef67c2f4 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -1139,10 +1139,11 @@ struct __wt_session {
* @config{ ),,}
* @config{memory_page_max, the maximum size a page can grow to in
* memory before being reconciled to disk. The specified size will be
- * adjusted to a lower bound of <code>50 * leaf_page_max</code>. This
- * limit is soft - it is possible for pages to be temporarily larger
- * than this value. This setting is ignored for LSM trees\, see \c
- * chunk_size., an integer between 512B and 10TB; default \c 5MB.}
+ * adjusted to a lower bound of <code>50 * leaf_page_max</code>\, and an
+ * upper bound of <code>cache_size / 2</code>. This limit is soft - it
+ * is possible for pages to be temporarily larger than this value. This
+ * setting is ignored for LSM trees\, see \c chunk_size., an integer
+ * between 512B and 10TB; default \c 5MB.}
* @config{os_cache_dirty_max, maximum dirty system buffer cache usage\,
* in bytes. If non-zero\, schedule writes for dirty blocks belonging
* to this object in the system buffer cache after that many bytes from
@@ -1153,7 +1154,7 @@ struct __wt_session {
* that many bytes from this object are read or written into the buffer
* cache., an integer greater than or equal to 0; default \c 0.}
* @config{prefix_compression, configure prefix compression on row-store
- * leaf pages., a boolean flag; default \c true.}
+ * leaf pages., a boolean flag; default \c false.}
* @config{prefix_compression_min, minimum gain before prefix
* compression will be used on row-store leaf pages., an integer greater
* than or equal to 0; default \c 4.}
@@ -1519,7 +1520,10 @@ struct __wt_connection {
* @snippet ex_all.c Close a connection
*
* @param connection the connection handle
- * @configempty{connection.close, see dist/api_data.py}
+ * @configstart{connection.close, see dist/api_data.py}
+ * @config{leak_memory, don't free memory during close., a boolean flag;
+ * default \c false.}
+ * @configend
* @errors
*/
int __F(close)(WT_HANDLE_CLOSED(WT_CONNECTION) *connection,
@@ -1558,6 +1562,8 @@ struct __wt_connection {
* @config{eviction_trigger, trigger eviction when the cache becomes
* this full (as a percentage)., an integer between 10 and 99; default
* \c 95.}
+ * @config{eviction_workers, additional threads to help evict pages from
+ * cache., an integer between 0 and 20; default \c 0.}
* @config{shared_cache = (, shared cache configuration options. A
* database should configure either a cache_size or a shared_cache not
* both., a set of related configuration options defined below.}
@@ -1590,11 +1596,12 @@ struct __wt_connection {
* @config{verbose, enable messages for various events. Options are
* given as a list\, such as
* <code>"verbose=[evictserver\,read]"</code>., a list\, with values
- * chosen from the following options: \c "block"\, \c "checkpoint"\, \c
- * "compact"\, \c "evict"\, \c "evictserver"\, \c "fileops"\, \c "log"\,
- * \c "lsm"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c "readserver"\,
- * \c "reconcile"\, \c "recovery"\, \c "salvage"\, \c "shared_cache"\,
- * \c "split"\, \c "verify"\, \c "version"\, \c "write"; default empty.}
+ * chosen from the following options: \c "api"\, \c "block"\, \c
+ * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evictserver"\, \c
+ * "fileops"\, \c "log"\, \c "lsm"\, \c "metadata"\, \c "mutex"\, \c
+ * "overflow"\, \c "read"\, \c "readserver"\, \c "reconcile"\, \c
+ * "recovery"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, \c
+ * "verify"\, \c "version"\, \c "write"; default empty.}
* @configend
* @errors
*/
@@ -1842,6 +1849,8 @@ struct __wt_connection {
* integer between 10 and 99; default \c 80.}
* @config{eviction_trigger, trigger eviction when the cache becomes this full
* (as a percentage)., an integer between 10 and 99; default \c 95.}
+ * @config{eviction_workers, additional threads to help evict pages from cache.,
+ * an integer between 0 and 20; default \c 0.}
* @config{extensions, list of shared library extensions to load (using dlopen).
* Any values specified to an library extension are passed to
* WT_CONNECTION::load_extension as the \c config parameter (for example\,
@@ -1934,11 +1943,12 @@ struct __wt_connection {
* information., a boolean flag; default \c false.}
* @config{verbose, enable messages for various events. Options are given as a
* list\, such as <code>"verbose=[evictserver\,read]"</code>., a list\, with
- * values chosen from the following options: \c "block"\, \c "checkpoint"\, \c
- * "compact"\, \c "evict"\, \c "evictserver"\, \c "fileops"\, \c "log"\, \c
- * "lsm"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c "readserver"\, \c
- * "reconcile"\, \c "recovery"\, \c "salvage"\, \c "shared_cache"\, \c "split"\,
- * \c "verify"\, \c "version"\, \c "write"; default empty.}
+ * values chosen from the following options: \c "api"\, \c "block"\, \c
+ * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evictserver"\, \c "fileops"\,
+ * \c "log"\, \c "lsm"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c
+ * "read"\, \c "readserver"\, \c "reconcile"\, \c "recovery"\, \c "salvage"\, \c
+ * "shared_cache"\, \c "split"\, \c "verify"\, \c "version"\, \c "write";
+ * default empty.}
* @configend
* Additionally, if a file named \c WiredTiger.config appears in the WiredTiger
* home directory, it is read for configuration values (see @ref config_file
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index ad7b0598e40..a91378cc90f 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -89,6 +89,11 @@ __clsm_enter(WT_CURSOR_LSM *clsm, int reset, int update)
/* Update the maximum transaction ID in the primary chunk. */
if (update && (chunk = clsm->primary_chunk) != NULL) {
+ /*
+ * Ensure that there is a transaction with an ID
+ * allocated before using that ID in the LSM tree.
+ */
+ WT_RET(__wt_txn_autocommit_check(session));
WT_RET(__wt_txn_id_check(session));
for (id = chunk->txnid_max, myid = session->txn.id;
!TXNID_LE(myid, id);
diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c
index 0078e2cb5bf..49f472e267b 100644
--- a/src/meta/meta_table.c
+++ b/src/meta/meta_table.c
@@ -91,6 +91,11 @@ __wt_metadata_insert(
WT_CURSOR *cursor;
WT_DECL_RET;
+ WT_VERBOSE_RET(session, metadata,
+ "Insert: key: %s, value: %s, tracking: %s, %s" "turtle",
+ key, value, WT_META_TRACKING(session) ? "true" : "false",
+ __metadata_turtle(key) ? "" : "not ");
+
if (__metadata_turtle(key))
WT_RET_MSG(session, EINVAL,
"%s: insert not supported on the turtle file", key);
@@ -117,6 +122,11 @@ __wt_metadata_update(
WT_CURSOR *cursor;
WT_DECL_RET;
+ WT_VERBOSE_RET(session, metadata,
+ "Update: key: %s, value: %s, tracking: %s, %s" "turtle",
+ key, value, WT_META_TRACKING(session) ? "true" : "false",
+ __metadata_turtle(key) ? "" : "not ");
+
if (__metadata_turtle(key))
return (__wt_turtle_update(session, key, value));
@@ -142,6 +152,11 @@ __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key)
WT_CURSOR *cursor;
WT_DECL_RET;
+ WT_VERBOSE_RET(session, metadata,
+ "Remove: key: %s, tracking: %s, %s" "turtle",
+ key, WT_META_TRACKING(session) ? "true" : "false",
+ __metadata_turtle(key) ? "" : "not ");
+
if (__metadata_turtle(key))
WT_RET_MSG(session, EINVAL,
"%s: remove not supported on the turtle file", key);
@@ -172,6 +187,11 @@ __wt_metadata_search(
*valuep = NULL;
+ WT_VERBOSE_RET(session, metadata,
+ "Search: key: %s, tracking: %s, %s" "turtle",
+ key, WT_META_TRACKING(session) ? "true" : "false",
+ __metadata_turtle(key) ? "" : "not ");
+
if (__metadata_turtle(key))
return (__wt_turtle_read(session, key, valuep));
diff --git a/src/txn/txn.c b/src/txn/txn.c
index 1443c409af9..47baeb66282 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -267,7 +267,7 @@ __wt_txn_release(WT_SESSION_IMPL *session)
WT_TXN_STATE *txn_state;
txn = &session->txn;
- txn->mod_count = 0;
+ WT_ASSERT(session, txn->mod_count == 0);
txn->notify = NULL;
txn_global = &S2C(session)->txn_global;
@@ -293,7 +293,7 @@ __wt_txn_release(WT_SESSION_IMPL *session)
if (session->ncursors == 0)
__wt_txn_release_snapshot(session);
txn->isolation = session->isolation;
- F_CLR(txn, TXN_ERROR | TXN_HAS_ID | TXN_OLDEST | TXN_RUNNING);
+ F_CLR(txn, TXN_ERROR | TXN_HAS_ID | TXN_RUNNING);
}
/*
@@ -308,8 +308,6 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
WT_TXN_OP *op;
u_int i;
- WT_UNUSED(cfg);
-
txn = &session->txn;
WT_ASSERT(session, !F_ISSET(txn, TXN_ERROR));
@@ -341,6 +339,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
/* Free memory associated with updates. */
for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++)
__wt_txn_op_free(session, op);
+ txn->mod_count = 0;
/*
* Auto-commit transactions need a new transaction snapshot so that the
@@ -350,7 +349,9 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
* the cursor. Get the new snapshot before releasing the ID for the
* commit.
*/
- if (session->ncursors > 0 && txn->isolation != TXN_ISO_READ_UNCOMMITTED)
+ if (session->ncursors > 0 &&
+ F_ISSET(txn, TXN_HAS_ID) &&
+ txn->isolation != TXN_ISO_READ_UNCOMMITTED)
__wt_txn_refresh(session, txn->id + 1, 1);
__wt_txn_release(session);
return (0);
@@ -403,6 +404,7 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
/* Free any memory allocated for the operation. */
__wt_txn_op_free(session, op);
}
+ txn->mod_count = 0;
__wt_txn_release(session);
return (ret);
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 8e37fdb6f83..efe444aed21 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -184,9 +184,9 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_DATA_HANDLE *dhandle;
WT_DECL_ITEM(tmp);
WT_DECL_RET;
- WT_SESSION *wt_session;
WT_TXN *txn;
WT_TXN_ISOLATION saved_isolation;
+ const char *txn_cfg[3];
void *saved_meta_next;
int full, started, tracking;
@@ -229,12 +229,17 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_txn_checkpoint_log(
session, full, WT_TXN_LOG_CKPT_PREPARE, NULL));
- /* Start a snapshot transaction for the checkpoint. */
- wt_session = &session->iface;
- WT_ERR(wt_session->begin_transaction(wt_session, "isolation=snapshot"));
-
- /* Increment the global checkpoint generation. */
- ++conn->txn_global.checkpoint_gen;
+ /*
+ * Start a snapshot transaction for the checkpoint.
+ *
+ * Note: we don't go through the public API calls because they have
+ * side effects on cursors, which applications can hold open across
+ * calls to checkpoint.
+ */
+ txn_cfg[0] = WT_CONFIG_BASE(session, session_begin_transaction);
+ txn_cfg[1] = "isolation=snapshot";
+ txn_cfg[2] = NULL;
+ WT_ERR(__wt_txn_begin(session, txn_cfg));
/* Tell logging that we have started a database checkpoint. */
if (S2C(session)->logging && full) {
@@ -245,8 +250,8 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint, NULL));
- /* Release the snapshot transaction, before syncing the file(s). */
- __wt_txn_release(session);
+ /* Commit the transaction before syncing the file(s). */
+ WT_ERR(__wt_txn_commit(session, NULL));
/*
* Checkpoints have to hit disk (it would be reasonable to configure for
@@ -297,10 +302,8 @@ err: /*
if (tracking)
WT_TRET(__wt_meta_track_off(session, ret != 0));
- if (F_ISSET(txn, TXN_HAS_SNAPSHOT))
- __wt_txn_release(session);
- else
- __wt_txn_release_snapshot(session);
+ if (F_ISSET(txn, TXN_RUNNING))
+ WT_TRET(__wt_txn_rollback(session, NULL));
/* Tell logging that we have finished a database checkpoint. */
if (S2C(session)->logging && started)
diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c
index 06e8b123505..01a49e9c583 100644
--- a/src/txn/txn_log.c
+++ b/src/txn/txn_log.c
@@ -142,6 +142,11 @@ __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
return (0);
txn = &session->txn;
+
+ /* We'd better have a transaction. */
+ WT_ASSERT(session,
+ F_ISSET(txn, TXN_RUNNING) && F_ISSET(txn, TXN_HAS_ID));
+
WT_ASSERT(session, txn->mod_count > 0);
op = txn->mod + txn->mod_count - 1;
diff --git a/src/utilities/util_salvage.c b/src/utilities/util_salvage.c
index f98facabddc..67aed52d534 100644
--- a/src/utilities/util_salvage.c
+++ b/src/utilities/util_salvage.c
@@ -34,7 +34,7 @@ util_salvage(WT_SESSION *session, int argc, char *argv[])
/* The remaining argument is the file name. */
if (argc != 1)
return (usage());
- if ((name = util_name(*argv, "file", UTIL_FILE_OK)) == NULL)
+ if ((name = util_name(*argv, "file", UTIL_ALL_OK)) == NULL)
return (1);
if ((ret = session->salvage(session, name, force)) != 0) {
diff --git a/test/checkpoint/checkpointer.c b/test/checkpoint/checkpointer.c
index 4f8eb748ae2..c712ae7b8e3 100644
--- a/test/checkpoint/checkpointer.c
+++ b/test/checkpoint/checkpointer.c
@@ -28,9 +28,9 @@
#include "test_checkpoint.h"
static void *checkpointer(void *);
-static int compare_cursors(WT_CURSOR *, int, WT_CURSOR *, int);
+static int compare_cursors(
+ WT_CURSOR *, const char *, WT_CURSOR *, const char *);
static int diagnose_key_error(WT_CURSOR *, int, WT_CURSOR *, int);
-static int get_key_int(WT_CURSOR *, int, u_int *);
static int real_checkpointer(void);
static int verify_checkpoint(WT_SESSION *);
@@ -139,6 +139,7 @@ static int
verify_checkpoint(WT_SESSION *session)
{
WT_CURSOR **cursors;
+ const char *type0, *typei;
char next_uri[128], ckpt[128];
int i, ret, t_ret;
uint64_t key_count;
@@ -147,6 +148,8 @@ verify_checkpoint(WT_SESSION *session)
key_count = 0;
snprintf(ckpt, 128, "checkpoint=%s", g.checkpoint_name);
cursors = calloc((size_t)g.ntables, sizeof(*cursors));
+ if (cursors == NULL)
+ return (log_print_err("verify_checkpoint", ENOMEM, 1));
for (i = 0; i < g.ntables; i++) {
/*
@@ -157,17 +160,21 @@ verify_checkpoint(WT_SESSION *session)
continue;
snprintf(next_uri, 128, "table:__wt%04d", i);
if ((ret = session->open_cursor(
- session, next_uri, NULL, ckpt, &cursors[i])) != 0)
- return (log_print_err(
- "verify_checkpoint:session.open_cursor", ret, 1));
+ session, next_uri, NULL, ckpt, &cursors[i])) != 0) {
+ (void)log_print_err(
+ "verify_checkpoint:session.open_cursor", ret, 1);
+ goto err;
+ }
}
while (ret == 0) {
ret = cursors[0]->next(cursors[0]);
if (ret == 0)
++key_count;
- else if (ret != WT_NOTFOUND)
- return (log_print_err("cursor->next", ret, 1));
+ else if (ret != WT_NOTFOUND) {
+ (void)log_print_err("cursor->next", ret, 1);
+ goto err;
+ }
/*
* Check to see that all remaining cursors have the
* same key/value pair.
@@ -180,62 +187,43 @@ verify_checkpoint(WT_SESSION *session)
if (g.cookies[i].type == LSM)
continue;
t_ret = cursors[i]->next(cursors[i]);
- if (t_ret != 0 && t_ret != WT_NOTFOUND)
- return (log_print_err("cursor->next", ret, 1));
+ if (t_ret != 0 && t_ret != WT_NOTFOUND) {
+ (void)log_print_err("cursor->next", ret, 1);
+ goto err;
+ }
if (ret == WT_NOTFOUND && t_ret == WT_NOTFOUND)
continue;
- else if (ret == WT_NOTFOUND || t_ret == WT_NOTFOUND)
- return (log_print_err(
+ else if (ret == WT_NOTFOUND || t_ret == WT_NOTFOUND) {
+ (void)log_print_err(
"verify_checkpoint tables with different"
- " amount of data", EFAULT, 1));
+ " amount of data", EFAULT, 1);
+ goto err;
+ }
+ type0 = type_to_string(g.cookies[0].type);
+ typei = type_to_string(g.cookies[i].type);
if ((ret = compare_cursors(
- cursors[0], 0, cursors[i], i)) != 0) {
- if (ret == ERR_KEY_MISMATCH)
- (void)diagnose_key_error(
- cursors[0], 0, cursors[i], i);
- return (log_print_err(
+ cursors[0], type0, cursors[i], typei)) != 0) {
+ (void)diagnose_key_error(
+ cursors[0], 0, cursors[i], i);
+ (void)log_print_err(
"verify_checkpoint - mismatching data",
- EFAULT, 1));
+ EFAULT, 1);
+ goto err;
}
}
}
- for (i = 0; i < g.ntables; i++) {
- if (cursors[i] != NULL &&
- (ret = cursors[i]->close(cursors[i])) != 0)
- return (log_print_err(
- "verify_checkpoint:cursor close", ret, 1));
- }
- free(cursors);
printf("Finished verifying a checkpoint with %d tables and %" PRIu64
" keys\n", g.ntables, key_count);
- return (0);
-}
-/*
- * get_key_int --
- * Column stores have a different format than all others, but the
- * underlying value should still match. Copy the string out of a
- * non-column store in that case to ensure that it's nul terminated.
- */
-static int
-get_key_int(WT_CURSOR *cursor, int table_index, u_int *rval)
-{
- WT_ITEM key;
- uint64_t val;
- char buf[128];
-
- if (g.cookies[table_index].type == COL)
- cursor->get_key(cursor, &val);
- else {
- cursor->get_key(cursor, &key);
- memset(buf, 0, 128);
- memcpy(buf, key.data, key.size);
- val = (uint64_t)atol(buf);
+err: for (i = 0; i < g.ntables; i++) {
+ if (cursors[i] != NULL &&
+ (ret = cursors[i]->close(cursors[i])) != 0)
+ (void)log_print_err(
+ "verify_checkpoint:cursor close", ret, 1);
}
-
- *rval = (u_int)val;
+ free(cursors);
return (0);
}
@@ -245,46 +233,41 @@ get_key_int(WT_CURSOR *cursor, int table_index, u_int *rval)
*/
static int
compare_cursors(
- WT_CURSOR *first, int first_index,
- WT_CURSOR *second, int second_index)
+ WT_CURSOR *cursor1, const char *type1,
+ WT_CURSOR *cursor2, const char *type2)
{
- u_int first_key_int, second_key_int;
- char *first_value, *second_value;
+ uint64_t key1, key2;
+ char *val1, *val2;
char buf[128];
memset(buf, 0, 128);
- if (get_key_int(first, first_index, &first_key_int) != 0 ||
- get_key_int(second, second_index, &second_key_int) != 0)
- return (log_print_err("Error decoding key", EINVAL, 1));
+ if (cursor1->get_key(cursor1, &key1) != 0 ||
+ cursor2->get_key(cursor2, &key2) != 0)
+ return (log_print_err("Error getting keys", EINVAL, 1));
- if (first_key_int != second_key_int) {
- printf("Key mismatch %" PRIu32 " from a %s table "
- "is not %" PRIu32 " from a %s table\n",
- first_key_int,
- type_to_string(g.cookies[first_index].type),
- second_key_int,
- type_to_string(g.cookies[second_index].type));
+ if (key1 != key2) {
+ printf("Key mismatch %" PRIu64 " from a %s table "
+ "is not %" PRIu64 " from a %s table\n",
+ key1, type1, key2, type2);
return (ERR_KEY_MISMATCH);
}
/* Now check the values. */
- first->get_value(first, &first_value);
- second->get_value(second, &second_value);
+ if (cursor1->get_value(cursor1, &val1) != 0 ||
+ cursor2->get_value(cursor2, &val2) != 0)
+ return (log_print_err("Error getting values", EINVAL, 1));
+
if (g.logfp != NULL)
- fprintf(g.logfp, "k1: %" PRIu32 " k2: %" PRIu32
+ fprintf(g.logfp, "k1: %" PRIu64 " k2: %" PRIu64
" val1: %s val2: %s \n",
- first_key_int, second_key_int,
- first_value, second_value);
- if (strlen(first_value) != strlen(second_value) ||
- strcmp(first_value, second_value) != 0) {
- printf("Value mismatch %s from a %s table "
- "is not %s from a %s table\n",
- first_value,
- type_to_string(g.cookies[first_index].type),
- second_value,
- type_to_string(g.cookies[second_index].type));
+ key1, key2, val1, val2);
+ if (strlen(val1) != strlen(val2) ||
+ strcmp(val1, val2) != 0) {
+ printf("Value mismatch for key %" PRIu64
+ ", %s from a %s table is not %s from a %s table\n",
+ key1, val1, type1, val2, type2);
return (ERR_DATA_MISMATCH);
}
@@ -298,79 +281,101 @@ compare_cursors(
*/
static int
diagnose_key_error(
- WT_CURSOR *first, int first_index,
- WT_CURSOR *second, int second_index)
+ WT_CURSOR *cursor1, int index1,
+ WT_CURSOR *cursor2, int index2)
{
WT_CURSOR *c;
- WT_ITEM first_key, second_key;
WT_SESSION *session;
- u_int key1i, key2i;
+ uint64_t key1, key1_orig, key2, key2_orig;
char next_uri[128], ckpt[128];
int ret;
/* Hack to avoid passing session as parameter. */
- session = first->session;
+ session = cursor1->session;
+ key1_orig = key2_orig = 0;
snprintf(ckpt, 128, "checkpoint=%s", g.checkpoint_name);
/* Save the failed keys. */
- first->get_key(first, &first_key);
- second->get_key(second, &second_key);
+ if (cursor1->get_key(cursor1, &key1_orig) != 0 ||
+ cursor2->get_key(cursor2, &key2_orig) != 0) {
+ (void)log_print_err("Error retrieving key.", EINVAL, 0);
+ goto live_check;
+ }
+
+ if (key1_orig == key2_orig)
+ goto live_check;
/* See if previous values are still valid. */
- if (first->prev(first) != 0 || second->prev(second) != 0)
+ if (cursor1->prev(cursor1) != 0 || cursor2->prev(cursor2) != 0)
return (1);
- if (get_key_int(first, first_index, &key1i) != 0 ||
- get_key_int(second, second_index, &key2i) != 0)
- log_print_err("Error decoding key", EINVAL, 1);
- else if (key1i != key2i)
- log_print_err("Now previous keys don't match", EINVAL, 0);
+ if (cursor1->get_key(cursor1, &key1) != 0 ||
+ cursor2->get_key(cursor2, &key2) != 0)
+ (void)log_print_err("Error decoding key", EINVAL, 1);
+ else if (key1 != key2)
+ (void)log_print_err("Now previous keys don't match", EINVAL, 0);
- if (first->next(first) != 0 || second->next(second) != 0)
+ if (cursor1->next(cursor1) != 0 || cursor2->next(cursor2) != 0)
return (1);
- if (get_key_int(first, first_index, &key1i) != 0 ||
- get_key_int(second, second_index, &key2i) != 0)
- log_print_err("Error decoding key", EINVAL, 1);
- else if (key1i == key2i)
- log_print_err("After prev/next keys match", EINVAL, 0);
+ if (cursor1->get_key(cursor1, &key1) != 0 ||
+ cursor2->get_key(cursor2, &key2) != 0)
+ (void)log_print_err("Error decoding key", EINVAL, 1);
+ else if (key1 == key2)
+ (void)log_print_err("After prev/next keys match", EINVAL, 0);
- if (first->next(first) != 0 || second->next(second) != 0)
+ if (cursor1->next(cursor1) != 0 || cursor2->next(cursor2) != 0)
return (1);
- if (get_key_int(first, first_index, &key1i) != 0 ||
- get_key_int(second, second_index, &key2i) != 0)
- log_print_err("Error decoding key", EINVAL, 1);
- else if (key1i == key2i)
- log_print_err("After prev/next/next keys match", EINVAL, 0);
+ if (cursor1->get_key(cursor1, &key1) != 0 ||
+ cursor2->get_key(cursor2, &key2) != 0)
+ (void)log_print_err("Error decoding key", EINVAL, 1);
+ else if (key1 == key2)
+ (void)log_print_err(
+ "After prev/next/next keys match", EINVAL, 0);
/*
* Now try opening new cursors on the checkpoints and see if we
* get the same missing key via searching.
*/
- snprintf(next_uri, 128, "table:__wt%04d", first_index);
+ snprintf(next_uri, 128, "table:__wt%04d", index1);
if (session->open_cursor(session, next_uri, NULL, ckpt, &c) != 0)
return (1);
- c->set_key(c, &first_key);
+ c->set_key(c, key1_orig);
if ((ret = c->search(c)) != 0)
- log_print_err("1st cursor didn't find 1st key\n", ret, 0);
- if (g.cookies[first_index].type == g.cookies[second_index].type) {
- c->set_key(c, &second_key);
- if ((ret = c->search(c)) != 0)
- log_print_err(
- "1st cursor didn't find 2nd key\n", ret, 0);
- }
+ (void)log_print_err("1st cursor didn't find 1st key\n", ret, 0);
+ c->set_key(c, key2_orig);
+ if ((ret = c->search(c)) != 0)
+ (void)log_print_err("1st cursor didn't find 2nd key\n", ret, 0);
c->close(c);
- snprintf(next_uri, 128, "table:__wt%04d", second_index);
+
+ snprintf(next_uri, 128, "table:__wt%04d", index2);
ret = session->open_cursor(session, next_uri, NULL, ckpt, &c);
- if (g.cookies[first_index].type == g.cookies[second_index].type) {
- c->set_key(c, &first_key);
- if ((ret = c->search(c)) != 0)
- log_print_err(
- "2nd cursor didn't find 1st key\n", ret, 0);
- }
- c->set_key(c, &second_key);
+ c->set_key(c, key1_orig);
if ((ret = c->search(c)) != 0)
- log_print_err("2nd cursor didn't find 2nd key\n", ret, 0);
- c->close(c);
+ (void)log_print_err("2nd cursor didn't find 1st key\n", ret, 0);
+ c->set_key(c, key2_orig);
+ if ((ret = c->search(c)) != 0)
+ (void)log_print_err("2nd cursor didn't find 2nd key\n", ret, 0);
+ (void)c->close(c);
+
+live_check:
+ /*
+ * Now try opening cursors on the live checkpoint to see if we get the
+ * same missing key via searching.
+ */
+ snprintf(next_uri, 128, "table:__wt%04d", index1);
+ if (session->open_cursor(session, next_uri, NULL, NULL, &c) != 0)
+ return (1);
+ c->set_key(c, key1_orig);
+ if ((ret = c->search(c)) != 0)
+ (void)log_print_err("1st cursor didn't find 1st key\n", ret, 0);
+ (void)c->close(c);
+
+ snprintf(next_uri, 128, "table:__wt%04d", index2);
+ ret = session->open_cursor(session, next_uri, NULL, NULL, &c);
+ c->set_key(c, key2_orig);
+ if ((ret = c->search(c)) != 0)
+ (void)log_print_err("2nd cursor didn't find 2nd key\n", ret, 0);
+ (void)c->close(c);
return (0);
}
diff --git a/test/checkpoint/test_checkpoint.c b/test/checkpoint/test_checkpoint.c
index 1e2ccca8a76..1f025dc087b 100644
--- a/test/checkpoint/test_checkpoint.c
+++ b/test/checkpoint/test_checkpoint.c
@@ -35,7 +35,7 @@ static void onint(int);
static int path_setup(const char *);
static int cleanup(void);
static int usage(void);
-static int wt_connect(char *);
+static int wt_connect(const char *);
static int wt_shutdown(void);
int
@@ -43,7 +43,7 @@ main(int argc, char *argv[])
{
table_type ttype;
int ch, cnt, ret, runs;
- char *config_open, *home;
+ const char *config_open, *home;
if ((g.progname = strrchr(argv[0], '/')) == NULL)
g.progname = argv[0];
@@ -117,22 +117,29 @@ main(int argc, char *argv[])
}
argc -= optind;
- argv += optind;
if (argc != 0)
return (usage());
/* Clean up on signal. */
(void)signal(SIGINT, onint);
- path_setup(home);
+ if ((ret = path_setup(home)) != 0)
+ return (ret);
printf("%s: process %" PRIu64 "\n", g.progname, (uint64_t)getpid());
- for (cnt = 1; runs == 0 || cnt <= runs; ++cnt) {
- printf(
- " %d: %u workers, %u tables\n",
+ for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) {
+ printf(" %d: %u workers, %u tables\n",
cnt, g.nworkers, g.ntables);
(void)cleanup(); /* Clean up previous runs */
+
+ /* Setup a fresh set of cookies in the global array. */
+ if ((g.cookies = calloc(
+ (size_t)(g.ntables), sizeof(COOKIE))) == NULL) {
+ (void)log_print_err("No memory", ENOMEM, 1);
+ break;
+ }
+
g.running = 1;
if ((ret = wt_connect(config_open)) != 0) {
@@ -140,7 +147,10 @@ main(int argc, char *argv[])
break;
}
- start_checkpoints();
+ if ((ret = start_checkpoints()) != 0) {
+ (void)log_print_err("Start checkpoints failed", ret, 1);
+ break;
+ }
if ((ret = start_workers(ttype)) != 0) {
(void)log_print_err("Start workers failed", ret, 1);
break;
@@ -160,16 +170,12 @@ main(int argc, char *argv[])
}
}
if (g.logfp != NULL)
- fclose(g.logfp);
- /*
- * Attempt to cleanup on error. Ideally we'd wait to know that the
- * checkpoint and worker threads are all done.
- */
- if (ret != 0) {
- (void)wt_shutdown();
- if (g.cookies != NULL)
- free(g.cookies);
- }
+ (void)fclose(g.logfp);
+
+ /* Ensure that cleanup is done on error. */
+ (void)wt_shutdown();
+ if (g.cookies != NULL)
+ free(g.cookies);
return (g.status);
}
@@ -178,7 +184,7 @@ main(int argc, char *argv[])
* Configure the WiredTiger connection.
*/
static int
-wt_connect(char *config_open)
+wt_connect(const char *config_open)
{
static WT_EVENT_HANDLER event_handler = {
handle_error,
@@ -210,8 +216,13 @@ wt_shutdown(void)
{
int ret;
+ if (g.conn == NULL)
+ return (0);
+
printf("Closing connection\n");
- if ((ret = g.conn->close(g.conn, NULL)) != 0)
+ ret = g.conn->close(g.conn, NULL);
+ g.conn = NULL;
+ if (ret != 0)
return (log_print_err("conn.close", ret, 1));
return (0);
}
diff --git a/test/checkpoint/workers.c b/test/checkpoint/workers.c
index 4d7ea331e88..e5e000d315e 100644
--- a/test/checkpoint/workers.c
+++ b/test/checkpoint/workers.c
@@ -44,7 +44,7 @@ create_table(WT_SESSION *session, COOKIE *cookie)
end = config + sizeof(config);
p += snprintf(p, (size_t)(end - p),
"key_format=%s,value_format=S",
- cookie->type == COL ? "r" : "u");
+ cookie->type == COL ? "r" : "q");
if (cookie->type == LSM)
(void)snprintf(p, (size_t)(end - p), ",type=lsm");
@@ -63,7 +63,6 @@ create_table(WT_SESSION *session, COOKIE *cookie)
int
start_workers(table_type type)
{
- COOKIE *cookies;
WT_SESSION *session;
struct timeval start, stop;
double seconds;
@@ -71,41 +70,40 @@ start_workers(table_type type)
int i, ret;
void *thread_ret;
+ ret = 0;
+
/* Create statistics and thread structures. */
- if ((cookies = calloc(
- (size_t)(g.ntables), sizeof(COOKIE))) == NULL ||
- (tids = calloc((size_t)(g.nworkers), sizeof(*tids))) == NULL)
+ if ((tids = calloc((size_t)(g.nworkers), sizeof(*tids))) == NULL)
return (log_print_err("calloc", errno, 1));
- if ((ret = g.conn->open_session(g.conn, NULL, NULL, &session)) != 0)
- return (log_print_err("conn.open_session", ret, 1));
+ if ((ret = g.conn->open_session(g.conn, NULL, NULL, &session)) != 0) {
+ (void)log_print_err("conn.open_session", ret, 1);
+ goto err;
+ }
/* Setup the cookies */
for (i = 0; i < g.ntables; ++i) {
- cookies[i].id = i;
+ g.cookies[i].id = i;
if (type == MIX)
- cookies[i].type = (i % MAX_TABLE_TYPE) + 1;
+ g.cookies[i].type = (i % MAX_TABLE_TYPE) + 1;
else
- cookies[i].type = type;
- snprintf(cookies[i].uri, 128,
- "%s%04d", URI_BASE, cookies[i].id);
+ g.cookies[i].type = type;
+ (void)snprintf(g.cookies[i].uri, 128,
+ "%s%04d", URI_BASE, g.cookies[i].id);
/* Should probably be atomic to avoid races. */
- if ((ret = create_table(session, &cookies[i])) != 0)
- return (ret);
+ if ((ret = create_table(session, &g.cookies[i])) != 0)
+ goto err;
}
- /*
- * Install the cookies in the global array.
- */
- g.cookies = cookies;
-
(void)gettimeofday(&start, NULL);
/* Create threads. */
for (i = 0; i < g.nworkers; ++i) {
if ((ret = pthread_create(
- &tids[i], NULL, worker, &cookies[i])) != 0)
- return (log_print_err("pthread_create", ret, 1));
+ &tids[i], NULL, worker, &g.cookies[i])) != 0) {
+ (void)log_print_err("pthread_create", ret, 1);
+ goto err;
+ }
}
/* Wait for the threads. */
@@ -117,9 +115,9 @@ start_workers(table_type type)
(stop.tv_usec - start.tv_usec) * 1e-6;
printf("Ran workers for: %f seconds\n", seconds);
- free(tids);
+err: free(tids);
- return (0);
+ return (ret);
}
/*
@@ -127,44 +125,13 @@ start_workers(table_type type)
* Write operation.
*/
static inline int
-worker_op(WT_CURSOR *cursor, COOKIE *cookie, u_int keyno)
+worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val)
{
- WT_ITEM *key, _key, *value, _value;
- u_int new_val;
int ret;
- char *old_val;
- char keybuf[64], valuebuf[64];
-
- key = &_key;
- value = &_value;
-
- if (cookie->type == COL)
- cursor->set_key(cursor, (uint32_t)keyno);
- else {
- key->data = keybuf;
- key->size = (uint32_t)
- snprintf(keybuf, sizeof(keybuf), "%017u", keyno);
- cursor->set_key(cursor, key);
- }
- new_val = keyno;
- if ((ret = cursor->search(cursor)) == 0) {
- cursor->get_value(cursor, &old_val);
- new_val = (u_int)atol(old_val) + 1;
- } else if (ret == WT_DEADLOCK)
- return (ret);
- else if (ret != WT_NOTFOUND)
- return (log_print_err("cursor.search", ret, 1));
- /*
- * The search cleared the key from our cursor - set it again. It would
- * be nice if we didn't need to.
- */
- if (cookie->type == COL)
- cursor->set_key(cursor, (uint32_t)keyno);
- else
- cursor->set_key(cursor, key);
-
- value->data = valuebuf;
- value->size = (uint32_t)snprintf(
+ char valuebuf[64];
+
+ cursor->set_key(cursor, keyno);
+ (void)snprintf(
valuebuf, sizeof(valuebuf), "%037u", new_val);
cursor->set_value(cursor, valuebuf);
if ((ret = cursor->insert(cursor)) != 0) {
@@ -202,41 +169,63 @@ real_worker(void)
WT_CURSOR **cursors;
WT_SESSION *session;
u_int i, keyno;
- int j, ret;
+ int j, ret, t_ret;
+
+ ret = t_ret = 0;
if ((cursors = calloc(
(size_t)(g.ntables), sizeof(WT_CURSOR *))) == NULL)
return (log_print_err("malloc", ENOMEM, 1));
if ((ret = g.conn->open_session(
- g.conn, NULL, "isolation=snapshot", &session)) != 0)
- return (log_print_err("conn.open_session", ret, 1));
+ g.conn, NULL, "isolation=snapshot", &session)) != 0) {
+ (void)log_print_err("conn.open_session", ret, 1);
+ goto err;
+ }
for (j = 0; j < g.ntables; j++)
- if ((ret = session->open_cursor(
- session, g.cookies[j].uri, NULL, NULL, &cursors[j])) != 0)
- return (log_print_err("session.open_cursor", ret, 1));
+ if ((ret = session->open_cursor(session,
+ g.cookies[j].uri, NULL, NULL, &cursors[j])) != 0) {
+ (void)log_print_err("session.open_cursor", ret, 1);
+ goto err;
+ }
for (i = 0; i < g.nops && g.running; ++i, sched_yield()) {
- session->begin_transaction(session, NULL);
+ if ((ret = session->begin_transaction(session, NULL)) != 0) {
+ (void)log_print_err(
+ "real_worker:begin_transaction", ret, 1);
+ goto err;
+ }
keyno = __wt_random() % g.nkeys + 1;
for (j = 0; j < g.ntables; j++) {
- if ((ret = worker_op(
- cursors[j], &g.cookies[j], keyno)) != 0)
+ if ((ret = worker_op(cursors[j], keyno, i)) != 0)
break;
}
- if (ret == 0)
- session->commit_transaction(session, NULL);
- else if (ret == WT_DEADLOCK)
- session->rollback_transaction(session, NULL);
- else {
+ if (ret == 0) {
+ if ((ret = session->commit_transaction(
+ session, NULL)) != 0) {
+ (void)log_print_err(
+ "real_worker:commit_transaction", ret, 1);
+ goto err;
+ }
+ } else if (ret == WT_DEADLOCK) {
+ if ((ret = session->rollback_transaction(
+ session, NULL)) != 0) {
+ (void)log_print_err(
+ "real_worker:rollback_transaction", ret, 1);
+ goto err;
+ }
+ } else {
(void)log_print_err("worker op failed", ret, 1);
- break;
+ goto err;
}
}
+
+err: if ((t_ret = session->close(session, NULL)) != 0 && ret == 0) {
+ ret = t_ret;
+ (void)log_print_err("session.close", ret, 1);
+ }
free(cursors);
- if ((ret = session->close(session, NULL)) != 0)
- return (log_print_err("session.close", ret, 1));
- return (0);
+ return (ret);
}
diff --git a/test/format/wts.c b/test/format/wts.c
index e01dd4edaf2..f4eeeafd224 100644
--- a/test/format/wts.c
+++ b/test/format/wts.c
@@ -35,9 +35,10 @@ handle_message(WT_EVENT_HANDLER *handler,
WT_UNUSED(session);
if (g.logfp != NULL)
- return (fprintf(g.logfp, "%s\n", message) < 0 ? -1 : 0);
+ return (fprintf(
+ g.logfp, "%p:%s\n", session, message) < 0 ? -1 : 0);
- return (printf("%s\n", message) < 0 ? -1 : 0);
+ return (printf("%p:%s\n", session, message) < 0 ? -1 : 0);
}
/*
@@ -328,10 +329,13 @@ wts_close(void)
{
WT_CONNECTION *conn;
int ret;
+ const char *config;
conn = g.wts_conn;
- if ((ret = conn->close(conn, NULL)) != 0)
+ config = MMRAND(0, 1) ? "leak_memory" : NULL;
+
+ if ((ret = conn->close(conn, config)) != 0)
die(ret, "connection.close");
}