summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith.bostic@mongodb.com>2016-07-21 01:40:24 -0400
committerMichael Cahill <michael.cahill@mongodb.com>2016-07-21 15:40:24 +1000
commit314ef8e74a3f7afcd05117165e8008a5199d4531 (patch)
treef581e254873c16090206d3d4ba9076003568c6cf
parent91c035d7715f116284ff5cc1daaa11de98880a33 (diff)
downloadmongo-314ef8e74a3f7afcd05117165e8008a5199d4531.tar.gz
WT-2737 Scrub dirty pages rather than evicting them (#2889)
* Link/include test/testutils with wtperf so we can use the test utility macros and functions in wtperf. * Minor changes to wtperf variables, sort some options, make some things boolean. * Merge the stub memory allocation functions used by the test code and wtperf. * Remove the WT_EVICT_CLEAN flag, it's never used. * __wt_page_can_evict() doesn't need to set WT_EVICTING, the only place that cares is __evict_review(), and it already sets that flag. * Add a new reconciliation flag WT_EVICT_SCRUB; it causes reconciliation to save disk images it creates. Change the eviction of multi-block pages to optionally re-instantiate pages in memory instead of evicting them. * When instantiating pages in-memory, set the read-generation to the same value as the read-generation of the original page (unless it's set to WT_READGEN_OLDEST, in which case leave the page's read-generation unset. * Simplify the in-memory tests some. * Turn on fixed-length store tests, they're no longer slow (no clue what changed). * Sue suggested removing Helium support from wtperf. * Replace dcalloc() calls before snprintf() calls with dmalloc. * Cosmetic cleanup: set the boundary structure's entries when initializing at the start of __rec_split_write(), don't set it at some random spot in the code. * After writing a block during checkpoint, we potentially re-use that block during eviction, and during eviction we'll want a disk image for potential re-instantiation in memory. If re-using a block but the boundary structure doesn't already have a disk image, create one. I don't think the boundary structure will ever already have a disk image in the current code, but future versions of reconciliation might, and the test is the same as the one we have to do for raw compression, which has already created the disk image. * If we're closing, don't instantiate any disk images, additionally, free any disk images we don't use. Both changes should fix the same problem where we have a disk image when we're truly discarding the page during a close, one should prevent us from ever having a disk image at that point, the other should prevent use from using any disk image we have. * Fixes for wtperf directory create changes: ensure the directory exists before the configuration file is dumped and create the right directories in multi-database mode.
-rw-r--r--SConstruct2
-rw-r--r--bench/wtperf/Makefile.am12
-rw-r--r--bench/wtperf/wtperf.c120
-rw-r--r--bench/wtperf/wtperf.h79
-rw-r--r--build_posix/Make.subdirs8
-rw-r--r--dist/flags.py4
-rwxr-xr-xdist/s_style2
-rw-r--r--src/btree/bt_split.c72
-rw-r--r--src/evict/evict_page.c33
-rw-r--r--src/include/btmem.h23
-rw-r--r--src/include/btree.i8
-rw-r--r--src/include/extern.h2
-rw-r--r--src/include/flags.h8
-rw-r--r--src/reconcile/rec_write.c205
-rw-r--r--test/suite/test_inmem01.py84
-rw-r--r--test/utility/misc.c15
-rw-r--r--test/utility/test_util.h1
17 files changed, 298 insertions, 380 deletions
diff --git a/SConstruct b/SConstruct
index a5dd8761d6c..b0ce771e9bd 100644
--- a/SConstruct
+++ b/SConstruct
@@ -484,7 +484,7 @@ t = env.Program("wtperf", [
"bench/wtperf/wtperf_throttle.c",
"bench/wtperf/wtperf_truncate.c",
],
- LIBS=[wtlib, shim] + wtlibs)
+ LIBS=[wtlib, shim, testutil] + wtlibs)
Default(t)
#Build the Examples
diff --git a/bench/wtperf/Makefile.am b/bench/wtperf/Makefile.am
index cc1f84b5406..57792e3887f 100644
--- a/bench/wtperf/Makefile.am
+++ b/bench/wtperf/Makefile.am
@@ -1,13 +1,17 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include
-
-LDADD = $(top_builddir)/libwiredtiger.la -lm
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = wtperf
-wtperf_LDFLAGS = -static
wtperf_SOURCES =\
config.c idle_table_cycle.c misc.c track.c wtperf.c \
wtperf.h wtperf_opt.i wtperf_throttle.c wtperf_truncate.c
+wtperf_LDADD = $(top_builddir)/test/utility/libtest_util.la
+wtperf_LDADD +=$(top_builddir)/libwiredtiger.la
+wtperf_LDADD +=-lm
+wtperf_LDFLAGS = -static
+
TESTS = smoke.sh
AM_TESTS_ENVIRONMENT = rm -rf WT_TEST ; mkdir WT_TEST ;
# automake 1.11 compatibility
diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c
index a2718bc4290..2dbf14e9f1c 100644
--- a/bench/wtperf/wtperf.c
+++ b/bench/wtperf/wtperf.c
@@ -36,7 +36,6 @@ static const CONFIG default_cfg = {
NULL, /* reopen config */
NULL, /* base_uri */
NULL, /* uris */
- NULL, /* helium_mount */
NULL, /* conn */
NULL, /* logf */
NULL, /* async */
@@ -73,14 +72,14 @@ static const char * const debug_cconfig = "";
static const char * const debug_tconfig = "";
static void *checkpoint_worker(void *);
-static int create_tables(CONFIG *);
-static int drop_all_tables(CONFIG *);
+static int drop_all_tables(CONFIG *);
static int execute_populate(CONFIG *);
static int execute_workload(CONFIG *);
static int find_table_count(CONFIG *);
static void *monitor(void *);
static void *populate_thread(void *);
static void randomize_value(CONFIG_THREAD *, char *);
+static int recreate_dir(const char *);
static int start_all_runs(CONFIG *);
static int start_run(CONFIG *);
static int start_threads(CONFIG *,
@@ -93,10 +92,6 @@ static void *worker(void *);
static uint64_t wtperf_rand(CONFIG_THREAD *);
static uint64_t wtperf_value_range(CONFIG *);
-#define HELIUM_NAME "dev1"
-#define HELIUM_PATH \
- "../../ext/test/helium/.libs/libwiredtiger_helium.so"
-#define HELIUM_CONFIG ",type=helium"
#define INDEX_COL_NAMES ",columns=(key,val)"
/* Retrieve an ID for the next insert operation. */
@@ -1894,9 +1889,6 @@ create_tables(CONFIG *cfg)
int ret;
char buf[512];
- if (cfg->create == 0)
- return (0);
-
if ((ret = cfg->conn->open_session(
cfg->conn, NULL, cfg->sess_config, &session)) != 0) {
lprintf(cfg, ret, 0,
@@ -1988,13 +1980,10 @@ start_all_runs(CONFIG *cfg)
if (strcmp(cfg->monitor_dir, cfg->home) == 0)
next_cfg->monitor_dir = new_home;
- /* Create clean home directories. */
- snprintf(cmd_buf, cmd_len, "rm -rf %s && mkdir %s",
- next_cfg->home, next_cfg->home);
- if ((ret = system(cmd_buf)) != 0) {
- fprintf(stderr, "%s: failed\n", cmd_buf);
- goto err;
- }
+ /* If creating the sub-database, recreate it's home */
+ if (cfg->create != 0)
+ recreate_dir(next_cfg->home);
+
if ((ret = pthread_create(
&threads[i], NULL, thread_run_wtperf, next_cfg)) != 0) {
lprintf(cfg, ret, 0, "Error creating thread");
@@ -2043,7 +2032,6 @@ start_run(CONFIG *cfg)
uint64_t total_ops;
uint32_t run_time;
int monitor_created, ret, t_ret;
- char helium_buf[256];
monitor_created = ret = 0;
/* [-Wconditional-uninitialized] */
@@ -2058,21 +2046,10 @@ start_run(CONFIG *cfg)
goto err;
}
- /* Configure optional Helium volume. */
- if (cfg->helium_mount != NULL) {
- snprintf(helium_buf, sizeof(helium_buf),
- "entry=wiredtiger_extension_init,config=["
- "%s=[helium_devices=\"he://./%s\","
- "helium_o_volume_truncate=1]]",
- HELIUM_NAME, cfg->helium_mount);
- if ((ret = cfg->conn->load_extension(
- cfg->conn, HELIUM_PATH, helium_buf)) != 0)
- lprintf(cfg,
- ret, 0, "Error loading Helium: %s", helium_buf);
- }
-
create_uris(cfg);
- if ((ret = create_tables(cfg)) != 0)
+
+ /* If creating, create the tables. */
+ if (cfg->create != 0 && (ret = create_tables(cfg)) != 0)
goto err;
/* Start the monitor thread. */
@@ -2202,18 +2179,21 @@ err: if (ret == 0)
extern int __wt_optind, __wt_optreset;
extern char *__wt_optarg;
+void (*custom_die)(void) = NULL;
int
main(int argc, char *argv[])
{
CONFIG *cfg, _cfg;
size_t req_len, sreq_len;
- int ch, monitor_set, ret;
- const char *opts = "C:H:h:m:O:o:T:";
+ bool monitor_set;
+ int ch, ret;
+ const char *opts = "C:h:m:O:o:T:";
const char *config_opts;
char *cc_buf, *sess_cfg, *tc_buf, *user_cconfig, *user_tconfig;
- monitor_set = ret = 0;
+ monitor_set = false;
+ ret = 0;
config_opts = NULL;
cc_buf = sess_cfg = tc_buf = user_cconfig = user_tconfig = NULL;
@@ -2239,8 +2219,12 @@ main(int argc, char *argv[])
strcat(user_cconfig, __wt_optarg);
}
break;
- case 'H':
- cfg->helium_mount = __wt_optarg;
+ case 'h':
+ cfg->home = __wt_optarg;
+ break;
+ case 'm':
+ cfg->monitor_dir = __wt_optarg;
+ monitor_set = true;
break;
case 'O':
config_opts = __wt_optarg;
@@ -2256,15 +2240,7 @@ main(int argc, char *argv[])
strcat(user_tconfig, __wt_optarg);
}
break;
- case 'h':
- cfg->home = __wt_optarg;
- break;
- case 'm':
- cfg->monitor_dir = __wt_optarg;
- monitor_set = 1;
- break;
case '?':
- fprintf(stderr, "Invalid option\n");
usage();
goto einval;
}
@@ -2320,7 +2296,7 @@ main(int argc, char *argv[])
* to 4096 if needed.
*/
req_len = strlen(",async=(enabled=true,threads=)") + 4;
- cfg->async_config = dcalloc(req_len, 1);
+ cfg->async_config = dmalloc(req_len);
snprintf(cfg->async_config, req_len,
",async=(enabled=true,threads=%" PRIu32 ")",
cfg->async_threads);
@@ -2341,13 +2317,9 @@ main(int argc, char *argv[])
}
/* Build the URI from the table name. */
- req_len = strlen("table:") +
- strlen(HELIUM_NAME) + strlen(cfg->table_name) + 2;
- cfg->base_uri = dcalloc(req_len, 1);
- snprintf(cfg->base_uri, req_len, "table:%s%s%s",
- cfg->helium_mount == NULL ? "" : HELIUM_NAME,
- cfg->helium_mount == NULL ? "" : "/",
- cfg->table_name);
+ req_len = strlen("table:") + strlen(cfg->table_name) + 2;
+ cfg->base_uri = dmalloc(req_len);
+ snprintf(cfg->base_uri, req_len, "table:%s", cfg->table_name);
/* Make stdout line buffered, so verbose output appears quickly. */
__wt_stream_set_line_buffer(stdout);
@@ -2366,13 +2338,13 @@ main(int argc, char *argv[])
if (cfg->session_count_idle > 0) {
sreq_len = strlen(",session_max=") + 6;
req_len += sreq_len;
- sess_cfg = dcalloc(sreq_len, 1);
+ sess_cfg = dmalloc(sreq_len);
snprintf(sess_cfg, sreq_len,
",session_max=%" PRIu32,
cfg->session_count_idle + cfg->workers_cnt +
cfg->populate_threads + 10);
}
- cc_buf = dcalloc(req_len, 1);
+ cc_buf = dmalloc(req_len);
/*
* This is getting hard to parse.
*/
@@ -2388,36 +2360,34 @@ main(int argc, char *argv[])
if ((ret = config_opt_str(cfg, "conn_config", cc_buf)) != 0)
goto err;
}
- if (cfg->verbose > 1 || cfg->index || cfg->helium_mount != NULL ||
+ if (cfg->verbose > 1 || cfg->index ||
user_tconfig != NULL || cfg->compress_table != NULL) {
- req_len = strlen(cfg->table_config) + strlen(HELIUM_CONFIG) +
- strlen(debug_tconfig) + 3;
+ req_len = strlen(cfg->table_config) + strlen(debug_tconfig) + 3;
if (user_tconfig != NULL)
req_len += strlen(user_tconfig);
if (cfg->compress_table != NULL)
req_len += strlen(cfg->compress_table);
if (cfg->index)
req_len += strlen(INDEX_COL_NAMES);
- tc_buf = dcalloc(req_len, 1);
+ tc_buf = dmalloc(req_len);
/*
* This is getting hard to parse.
*/
- snprintf(tc_buf, req_len, "%s%s%s%s%s%s%s%s",
+ snprintf(tc_buf, req_len, "%s%s%s%s%s%s%s",
cfg->table_config,
cfg->index ? INDEX_COL_NAMES : "",
cfg->compress_table ? cfg->compress_table : "",
cfg->verbose > 1 ? ",": "",
cfg->verbose > 1 ? debug_tconfig : "",
user_tconfig ? ",": "",
- user_tconfig ? user_tconfig : "",
- cfg->helium_mount == NULL ? "" : HELIUM_CONFIG);
+ user_tconfig ? user_tconfig : "");
if ((ret = config_opt_str(cfg, "table_config", tc_buf)) != 0)
goto err;
}
if (cfg->log_partial && cfg->table_count > 1) {
req_len = strlen(cfg->table_config) +
strlen(LOG_PARTIAL_CONFIG) + 1;
- cfg->partial_config = dcalloc(req_len, 1);
+ cfg->partial_config = dmalloc(req_len);
snprintf(cfg->partial_config, req_len, "%s%s",
cfg->table_config, LOG_PARTIAL_CONFIG);
}
@@ -2430,7 +2400,7 @@ main(int argc, char *argv[])
strlen(READONLY_CONFIG) + 1;
else
req_len = strlen(cfg->conn_config) + 1;
- cfg->reopen_config = dcalloc(req_len, 1);
+ cfg->reopen_config = dmalloc(req_len);
if (cfg->readonly)
snprintf(cfg->reopen_config, req_len, "%s%s",
cfg->conn_config, READONLY_CONFIG);
@@ -2442,6 +2412,12 @@ main(int argc, char *argv[])
if ((ret = config_sanity(cfg)) != 0)
goto err;
+ /* If creating, remove and re-create the home directory. */
+ if (cfg->create != 0 && (ret = recreate_dir(cfg->home)) != 0) {
+ lprintf(cfg, ret, 0, "Error re-creating home directory");
+ goto err;
+ }
+
/* Write a copy of the config. */
config_to_file(cfg);
@@ -2557,6 +2533,22 @@ stop_threads(CONFIG *cfg, u_int num, CONFIG_THREAD *threads)
}
static int
+recreate_dir(const char *name)
+{
+ char *buf;
+ size_t len;
+
+ len = strlen(name) * 2 + 100;
+ buf = dmalloc(len);
+ (void)snprintf(
+ buf, len, "rm -rf %s && mkdir %s", name, name);
+ testutil_checkfmt(system(buf), "system: %s", buf);
+ free(buf);
+
+ return (0);
+}
+
+static int
drop_all_tables(CONFIG *cfg)
{
struct timespec start, stop;
diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h
index a316b40d7fa..9f37d361755 100644
--- a/bench/wtperf/wtperf.h
+++ b/bench/wtperf/wtperf.h
@@ -29,14 +29,11 @@
#ifndef HAVE_WTPERF_H
#define HAVE_WTPERF_H
-#include <wt_internal.h>
+#include "test_util.h"
+
#include <assert.h>
#include <math.h>
-#ifdef _WIN32
-#include "windows_shim.h"
-#endif
-
#include "config_opt.h"
typedef struct __config CONFIG;
@@ -125,7 +122,6 @@ struct __config { /* Configuration structure */
char *reopen_config; /* Config string for conn reopen */
char *base_uri; /* Object URI */
char **uris; /* URIs if multiple tables */
- const char *helium_mount; /* Optional Helium mount point */
WT_CONNECTION *conn; /* Database connection */
@@ -327,75 +323,4 @@ die(int e, const char *str)
fprintf(stderr, "Call to %s failed: %s", str, wiredtiger_strerror(e));
exit(EXIT_FAILURE);
}
-
-/*
- * dmalloc --
- * Call malloc, dying on failure.
- */
-static inline void *
-dmalloc(size_t len)
-{
- void *p;
-
- if ((p = malloc(len)) == NULL)
- die(errno, "malloc");
- return (p);
-}
-
-/*
- * dcalloc --
- * Call calloc, dying on failure.
- */
-static inline void *
-dcalloc(size_t num, size_t size)
-{
- void *p;
-
- if ((p = calloc(num, size)) == NULL)
- die(errno, "calloc");
- return (p);
-}
-
-/*
- * drealloc --
- * Call realloc, dying on failure.
- */
-static inline void *
-drealloc(void *p, size_t len)
-{
- void *repl;
-
- if ((repl = realloc(p, len)) == NULL)
- die(errno, "realloc");
- return (repl);
-}
-
-/*
- * dstrdup --
- * Call strdup, dying on failure.
- */
-static inline char *
-dstrdup(const char *str)
-{
- char *p;
-
- if ((p = strdup(str)) == NULL)
- die(errno, "strdup");
- return (p);
-}
-
-/*
- * dstrndup --
- * Call emulating strndup, dying on failure. Don't use actual strndup here
- * as it is not supported within MSVC.
- */
-static inline char *
-dstrndup(const char *str, const size_t len)
-{
- char *p;
-
- p = dcalloc(len + 1, sizeof(char));
- memcpy(p, str, len);
- return (p);
-}
#endif
diff --git a/build_posix/Make.subdirs b/build_posix/Make.subdirs
index 64749378ed1..0b5175e4196 100644
--- a/build_posix/Make.subdirs
+++ b/build_posix/Make.subdirs
@@ -18,14 +18,15 @@ ext/extractors/csv
ext/test/kvs_bdb HAVE_BERKELEY_DB
.
api/leveldb LEVELDB
-bench/wtperf
examples/c
lang/java JAVA
examples/java JAVA
lang/python PYTHON
-# Make the tests
+# Test/Benchmark support library.
test/utility
+
+# Test programs.
test/bloom
test/checkpoint
test/csuite
@@ -39,3 +40,6 @@ test/readonly
test/recovery
test/salvage
test/thread
+
+# Benchmark programs.
+bench/wtperf
diff --git a/dist/flags.py b/dist/flags.py
index 7763c661259..8091283a8c0 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -39,10 +39,10 @@ flags = {
'rec_write' : [
'CHECKPOINTING',
'EVICTING',
- 'EVICT_CLEAN',
- 'EVICT_INMEM_SPLIT',
'EVICT_IN_MEMORY',
+ 'EVICT_INMEM_SPLIT',
'EVICT_LOOKASIDE',
+ 'EVICT_SCRUB',
'EVICT_UPDATE_RESTORE',
'VISIBILITY_ERR',
],
diff --git a/dist/s_style b/dist/s_style
index a222c004cc3..e33db5a5fab 100755
--- a/dist/s_style
+++ b/dist/s_style
@@ -33,7 +33,7 @@ else
exit 1;
fi
- egrep -w 'a a|an an|and and|are are|be be|by by|for for|from from|if if|in in|is is|it it|of of|the the|this this|to to|was was|were were|when when|with with|a an|an a|a the|the a' $f > $t
+ egrep -w 'a a|an an|and and|are are|be be|by by|for for|from from|if if|in in[^-]|is is|it it|of of|the the|this this|to to|was was|were were|when when|with with|a an|an a|a the|the a' $f > $t
test -s $t && {
echo "paired typo"
echo "============================"
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index ac352b79a25..0ad3acaea41 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -1476,7 +1476,7 @@ err: if (parent != NULL)
*/
static int
__split_multi_inmem(
- WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref, WT_MULTI *multi)
+ WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT_REF *ref)
{
WT_CURSOR_BTREE cbt;
WT_DECL_ITEM(key);
@@ -1561,6 +1561,15 @@ __split_multi_inmem(
}
/*
+ * Put the re-instantiated page in the same LRU queue location as the
+ * original page, unless this was a forced eviction, in which case we
+ * leave the new page with the read generation unset. Eviction will
+ * set the read generation next time it visits this page.
+ */
+ if (orig->read_gen != WT_READGEN_OLDEST)
+ page->read_gen = orig->read_gen;
+
+ /*
* If we modified the page above, it will have set the first dirty
* transaction to the last transaction currently running. However, the
* updates we installed may be older than that. Set the first dirty
@@ -1639,19 +1648,17 @@ __split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref)
*/
int
__wt_multi_to_ref(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp)
+ WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing)
{
WT_ADDR *addr;
WT_IKEY *ikey;
WT_REF *ref;
- size_t incr;
-
- incr = 0;
/* Allocate an underlying WT_REF. */
WT_RET(__wt_calloc_one(session, refp));
ref = *refp;
- incr += sizeof(WT_REF);
+ if (incrp)
+ *incrp += sizeof(WT_REF);
/*
* Set the WT_REF key before (optionally) building the page, underlying
@@ -1663,21 +1670,34 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
ikey = multi->key.ikey;
WT_RET(__wt_row_ikey(
session, 0, WT_IKEY_DATA(ikey), ikey->size, ref));
- incr += sizeof(WT_IKEY) + ikey->size;
+ if (incrp)
+ *incrp += sizeof(WT_IKEY) + ikey->size;
break;
default:
ref->ref_recno = multi->key.recno;
break;
}
- /* If there's a disk image, build a page, otherwise set the address. */
- if (multi->disk_image == NULL) {
- /*
- * Copy the address: we could simply take the buffer, but that
- * would complicate error handling, freeing the reference array
- * would have to avoid freeing the memory, and it's not worth
- * the confusion.
- */
+ /* There should be an address or a disk image (or both). */
+ WT_ASSERT(session,
+ multi->addr.addr != NULL || multi->disk_image != NULL);
+
+ /* If we're closing the file, there better be an address. */
+ WT_ASSERT(session, multi->addr.addr != NULL || !closing);
+
+ /* Verify any disk image we have. */
+ WT_ASSERT(session, multi->disk_image == NULL ||
+ __wt_verify_dsk_image(session,
+ "[page instantiate]", multi->disk_image, 0, false) == 0);
+
+ /*
+ * If there's an address, the page was written, set it.
+ *
+ * Copy the address: we could simply take the buffer, but that would
+ * complicate error handling, freeing the reference array would have
+ * to avoid freeing the memory, and it's not worth the confusion.
+ */
+ if (multi->addr.addr != NULL) {
WT_RET(__wt_calloc_one(session, &addr));
ref->addr = addr;
addr->size = multi->addr.size;
@@ -1685,14 +1705,20 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
WT_RET(__wt_strndup(session,
multi->addr.addr, addr->size, &addr->addr));
ref->state = WT_REF_DISK;
- } else {
- WT_RET(__split_multi_inmem(session, page, ref, multi));
+ }
+
+ /*
+ * If we have a disk image and we're not closing the file,
+ * re-instantiate the page.
+ *
+ * Discard any page image we don't use.
+ */
+ if (multi->disk_image != NULL && !closing) {
+ WT_RET(__split_multi_inmem(session, page, multi, ref));
ref->state = WT_REF_MEM;
}
+ __wt_free(session, multi->disk_image);
- /* Optionally return changes in the memory footprint. */
- if (incrp != NULL)
- *incrp += incr;
return (0);
}
@@ -2096,8 +2122,8 @@ __split_multi(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
*/
WT_RET(__wt_calloc_def(session, new_entries, &ref_new));
for (i = 0; i < new_entries; ++i)
- WT_ERR(__wt_multi_to_ref(session,
- page, &mod->mod_multi[i], &ref_new[i], &parent_incr));
+ WT_ERR(__wt_multi_to_ref(session, page,
+ &mod->mod_multi[i], &ref_new[i], &parent_incr, closing));
/*
* Split into the parent; if we're closing the file, we hold it
@@ -2215,7 +2241,7 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
WT_RET(__wt_calloc_one(session, &new));
new->ref_recno = ref->ref_recno;
- WT_ERR(__split_multi_inmem(session, page, new, &mod->mod_multi[0]));
+ WT_ERR(__split_multi_inmem(session, page, &mod->mod_multi[0], new));
/*
* The rewrite succeeded, we can no longer fail.
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index 53b22953669..5ef6a6370b0 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -156,7 +156,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
/* Update the reference and discard the page. */
if (__wt_ref_is_root(ref))
__wt_ref_out(session, ref);
- else if (tree_dead || (clean_page && !LF_ISSET(WT_EVICT_IN_MEMORY)))
+ else if ((clean_page && !LF_ISSET(WT_EVICT_IN_MEMORY)) || tree_dead)
/*
* Pages that belong to dead trees never write back to disk
* and can't support page splits.
@@ -315,9 +315,10 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
* write. Take advantage of the fact we have exclusive access
* to the page and rewrite it in memory.
*/
- if (mod->mod_multi_entries == 1)
+ if (mod->mod_multi_entries == 1) {
+ WT_ASSERT(session, closing == false);
WT_RET(__wt_split_rewrite(session, ref));
- else
+ } else
WT_RET(__wt_split_multi(session, ref, closing));
break;
case WT_PM_REC_REPLACE: /* 1-for-1 page swap */
@@ -476,10 +477,15 @@ __evict_review(
* If we have an exclusive lock (we're discarding the tree), assert
* there are no updates we cannot read.
*
- * Otherwise, if the page we're evicting is a leaf page marked for
- * forced eviction, set the update-restore flag, so reconciliation will
- * write blocks it can write and create a list of skipped updates for
- * blocks it cannot write. This is how forced eviction of active, huge
+ * Don't set any other flags for internal pages: they don't have update
+ * lists to be saved and restored, nor can we re-create them in memory.
+ *
+ * For leaf pages:
+ *
+ * If an in-memory configuration or the page is being forcibly evicted,
+ * set the update-restore flag, so reconciliation will write blocks it
+ * can write and create a list of skipped updates for blocks it cannot
+ * write, along with disk images. This is how eviction of active, huge
* pages works: we take a big page and reconcile it into blocks, some of
* which we write and discard, the rest of which we re-create as smaller
* in-memory pages, (restoring the updates that stopped us from writing
@@ -490,20 +496,27 @@ __evict_review(
* allowing the eviction of pages we'd otherwise have to retain in cache
* to support older readers.
*
- * Don't set the update-restore or lookaside table flags for internal
- * pages, they don't have update lists that can be saved and restored.
+ * Finally, if we don't need to do eviction at the moment, create disk
+ * images of split pages in order to re-instantiate them.
*/
cache = S2C(session)->cache;
if (closing)
LF_SET(WT_VISIBILITY_ERR);
else if (!WT_PAGE_IS_INTERNAL(page)) {
if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- LF_SET(WT_EVICT_IN_MEMORY | WT_EVICT_UPDATE_RESTORE);
+ LF_SET(WT_EVICT_IN_MEMORY |
+ WT_EVICT_UPDATE_RESTORE | WT_EVICT_SCRUB);
else if (page->read_gen == WT_READGEN_OLDEST ||
page->memory_footprint > S2BT(session)->splitmempage)
LF_SET(WT_EVICT_UPDATE_RESTORE);
else if (F_ISSET(cache, WT_CACHE_STUCK))
LF_SET(WT_EVICT_LOOKASIDE);
+ /*
+ * If we aren't trying to free space in the cache, just
+ * scrub the page and keep it around.
+ */
+ if (cache->state != WT_EVICT_STATE_ALL)
+ LF_SET(WT_EVICT_SCRUB);
}
*flagsp = flags;
diff --git a/src/include/btmem.h b/src/include/btmem.h
index 9700b6f4761..e7ebbd06c3a 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -266,14 +266,20 @@ struct __wt_page_modify {
} key;
/*
- * Eviction, but the block wasn't written: either an in-memory
- * configuration or unresolved updates prevented the write.
- * There may be a list of unresolved updates, there's always an
- * associated disk image.
+ * A disk image that may or may not have been written, used to
+ * check for matching blocks to avoid re-writing a page, and to
+ * re-instantiate the page in memory.
+ */
+ void *disk_image;
+
+ /*
+ * List of unresolved updates. Updates are either a WT_INSERT
+ * or a row-store leaf page entry; when creating lookaside
+ * records, there is an additional value, the committed item's
+ * transaction ID.
*
- * Saved updates are either a WT_INSERT, or a row-store leaf
- * page entry; in the case of creating lookaside records, there
- * is an additional value, the committed item's transaction ID.
+ * If there are unresolved updates, the block wasn't written and
+ * there will always be a disk image.
*/
struct __wt_save_upd {
WT_INSERT *ins;
@@ -281,10 +287,9 @@ struct __wt_page_modify {
uint64_t onpage_txn;
} *supd;
uint32_t supd_entries;
- void *disk_image;
/*
- * Block was written: address, size and checksum.
+ * Disk image was written: address, size and checksum.
* On subsequent reconciliations of this page, we avoid writing
* the block if it's unchanged by comparing size and checksum;
* the reuse flag is set when the block is unchanged and we're
diff --git a/src/include/btree.i b/src/include/btree.i
index c7950b4ea26..623ac75c50a 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -1150,19 +1150,13 @@ __wt_page_can_evict(
WT_PAGE_MODIFY *mod;
bool modified;
- if (evict_flagsp != NULL)
- *evict_flagsp = WT_EVICTING;
-
btree = S2BT(session);
page = ref->page;
mod = page->modify;
/* Pages that have never been modified can always be evicted. */
- if (mod == NULL) {
- if (evict_flagsp != NULL)
- FLD_SET(*evict_flagsp, WT_EVICT_CLEAN);
+ if (mod == NULL)
return (true);
- }
/*
* Check for in-memory splits before other eviction tests. If the page
diff --git a/src/include/extern.h b/src/include/extern.h
index 176f0908ffb..37a5e2a636c 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -161,7 +161,7 @@ extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPD
extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]);
extern void __wt_split_stash_discard(WT_SESSION_IMPL *session);
extern void __wt_split_stash_discard_all( WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session);
-extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp);
+extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing);
extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref);
extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing);
extern int __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref);
diff --git a/src/include/flags.h b/src/include/flags.h
index a411e997793..9346605ed24 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -23,10 +23,10 @@
#define WT_CONN_SERVER_SWEEP 0x00020000
#define WT_CONN_WAS_BACKUP 0x00040000
#define WT_EVICTING 0x00000002
-#define WT_EVICT_CLEAN 0x00000004
-#define WT_EVICT_INMEM_SPLIT 0x00000008
-#define WT_EVICT_IN_MEMORY 0x00000010
-#define WT_EVICT_LOOKASIDE 0x00000020
+#define WT_EVICT_INMEM_SPLIT 0x00000004
+#define WT_EVICT_IN_MEMORY 0x00000008
+#define WT_EVICT_LOOKASIDE 0x00000010
+#define WT_EVICT_SCRUB 0x00000020
#define WT_EVICT_UPDATE_RESTORE 0x00000040
#define WT_LOGSCAN_FIRST 0x00000001
#define WT_LOGSCAN_FROM_CKP 0x00000002
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index 8cd65f29d67..44d41040230 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -159,9 +159,16 @@ typedef struct {
WT_ADDR addr; /* Split's written location */
uint32_t size; /* Split's size */
uint32_t cksum; /* Split's checksum */
+
void *disk_image; /* Split's disk image */
/*
+ * Raw compression, the disk image being written is already
+ * compressed.
+ */
+ bool already_compressed;
+
+ /*
* Saved update list, supporting the WT_EVICT_UPDATE_RESTORE and
* WT_EVICT_LOOKASIDE configurations.
*/
@@ -175,13 +182,6 @@ typedef struct {
* column-store key.
*/
WT_ITEM key; /* Promoted row-store key */
-
- /*
- * During wrapup, after reconciling the root page, we write a
- * final block as part of a checkpoint. If raw compression
- * was configured, that block may have already been compressed.
- */
- bool already_compressed;
} *bnd; /* Saved boundaries */
uint32_t bnd_next; /* Next boundary slot */
uint32_t bnd_next_max; /* Maximum boundary slots used */
@@ -667,7 +667,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
WT_ASSERT(session, mod->mod_multi[i].supd == NULL);
WT_ERR(__wt_multi_to_ref(session,
- next, &mod->mod_multi[i], &pindex->index[i], NULL));
+ next, &mod->mod_multi[i], &pindex->index[i], NULL, false));
pindex->index[i]->home = next;
}
@@ -1860,18 +1860,19 @@ __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd)
WT_CLEAR(bnd->addr);
bnd->size = 0;
bnd->cksum = 0;
+
__wt_free(session, bnd->disk_image);
__wt_free(session, bnd->supd);
bnd->supd_next = 0;
bnd->supd_allocated = 0;
+ bnd->already_compressed = false;
+
/*
* Don't touch the key, we re-use that memory in each new
* reconciliation.
*/
-
- bnd->already_compressed = false;
}
/*
@@ -2314,7 +2315,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
/* Hitting a page boundary resets the dictionary, in all cases. */
__rec_dictionary_reset(r);
- inuse = WT_PTRDIFF32(r->first_free, dsk);
+ inuse = WT_PTRDIFF(r->first_free, dsk);
switch (r->bnd_state) {
case SPLIT_BOUNDARY:
/*
@@ -2484,7 +2485,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session,
WT_COMPRESSOR *compressor;
WT_DECL_RET;
WT_ITEM *dst, *write_ref;
- WT_PAGE_HEADER *dsk, *dsk_dst;
+ WT_PAGE_HEADER *dsk, *dsk_dst, *disk_image;
WT_SESSION *wt_session;
size_t corrected_page_size, extra_skip, len, result_len;
uint64_t recno;
@@ -2770,7 +2771,8 @@ no_slots:
if (result_slots != 0) {
/*
- * We have a block, finalize the header information.
+ * We have a block, finalize the compressed disk image's header
+ * information.
*/
dst->size = result_len + WT_BLOCK_COMPRESS_SKIP;
dsk_dst = dst->mem;
@@ -2780,6 +2782,19 @@ no_slots:
dsk_dst->u.entries = r->raw_entries[result_slots - 1];
/*
+ * Optionally keep the disk image in cache. Update the initial
+ * fields to reflect the actual disk image that was compressed.
+ */
+ if (F_ISSET(r, WT_EVICT_SCRUB)) {
+ WT_RET(__wt_strndup(session, dsk,
+ dsk_dst->mem_size, &last->disk_image));
+ disk_image = last->disk_image;
+ disk_image->recno = last->recno;
+ disk_image->mem_size = dsk_dst->mem_size;
+ disk_image->u.entries = dsk_dst->u.entries;
+ }
+
+ /*
* There is likely a remnant in the working buffer that didn't
* get compressed; copy it down to the start of the buffer and
* update the starting record number, free space and so on.
@@ -2893,48 +2908,6 @@ split_grow: /*
}
/*
- * __rec_raw_decompress --
- * Decompress a raw-compressed image.
- */
-static int
-__rec_raw_decompress(
- WT_SESSION_IMPL *session, const void *image, size_t size, void *retp)
-{
- WT_BTREE *btree;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_PAGE_HEADER const *dsk;
- size_t result_len;
-
- btree = S2BT(session);
- dsk = image;
-
- /*
- * We skipped an update and we can't write a block, but unfortunately,
- * the block has already been compressed. Decompress the block so we
- * can subsequently re-instantiate it in memory.
- */
- WT_RET(__wt_scr_alloc(session, dsk->mem_size, &tmp));
- memcpy(tmp->mem, image, WT_BLOCK_COMPRESS_SKIP);
- WT_ERR(btree->compressor->decompress(btree->compressor,
- &session->iface,
- (uint8_t *)image + WT_BLOCK_COMPRESS_SKIP,
- size - WT_BLOCK_COMPRESS_SKIP,
- (uint8_t *)tmp->mem + WT_BLOCK_COMPRESS_SKIP,
- dsk->mem_size - WT_BLOCK_COMPRESS_SKIP,
- &result_len));
- if (result_len != dsk->mem_size - WT_BLOCK_COMPRESS_SKIP)
- WT_ERR(__wt_illegal_value(session, btree->dhandle->name));
-
- WT_ERR(__wt_strndup(session, tmp->data, dsk->mem_size, retp));
- WT_ASSERT(session, __wt_verify_dsk_image(session,
- "[raw evict split]", tmp->data, dsk->mem_size, false) == 0);
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
-}
-
-/*
* __rec_split_raw --
* Raw compression split routine.
*/
@@ -3041,7 +3014,7 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r)
if (r->raw_compression && r->entries != 0) {
while (r->entries != 0) {
data_size =
- WT_PTRDIFF32(r->first_free, r->disk_image.mem);
+ WT_PTRDIFF(r->first_free, r->disk_image.mem);
if (data_size <= btree->allocsize)
break;
WT_RET(__rec_split_raw_worker(session, r, 0, true));
@@ -3170,8 +3143,6 @@ __rec_split_write(WT_SESSION_IMPL *session,
page = r->page;
mod = page->modify;
- WT_RET(__wt_scr_alloc(session, 0, &key));
-
/* Set the zero-length value flag in the page header. */
if (dsk->type == WT_PAGE_ROW_LEAF) {
F_CLR(dsk, WT_PAGE_EMPTY_V_ALL | WT_PAGE_EMPTY_V_NONE);
@@ -3182,6 +3153,8 @@ __rec_split_write(WT_SESSION_IMPL *session,
F_SET(dsk, WT_PAGE_EMPTY_V_NONE);
}
+ bnd->entries = r->entries;
+
/* Initialize the address (set the page type for the parent). */
switch (dsk->type) {
case WT_PAGE_COL_FIX:
@@ -3195,9 +3168,8 @@ __rec_split_write(WT_SESSION_IMPL *session,
case WT_PAGE_ROW_INT:
bnd->addr.type = WT_ADDR_INT;
break;
- WT_ILLEGAL_VALUE_ERR(session);
+ WT_ILLEGAL_VALUE(session);
}
-
bnd->size = (uint32_t)buf->size;
bnd->cksum = 0;
@@ -3209,6 +3181,8 @@ __rec_split_write(WT_SESSION_IMPL *session,
* This code requires a key be filled in for the next block (or the
* last block flag be set, if there's no next block).
*/
+ if (page->type == WT_PAGE_ROW_LEAF)
+ WT_RET(__wt_scr_alloc(session, 0, &key));
for (i = 0, supd = r->supd; i < r->supd_next; ++i, ++supd) {
/* The last block gets all remaining saved updates. */
if (last_block) {
@@ -3273,33 +3247,11 @@ supd_check_complete:
* image, we can't actually write it. Instead, we will re-instantiate
* the page using the disk image and any list of updates we skipped.
*/
- if (F_ISSET(r, WT_EVICT_IN_MEMORY) ||
- (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL)) {
-
- /* Statistics tracking that we used update/restore. */
- if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL)
- r->cache_write_restore = true;
-
- /*
- * If the buffer is compressed (raw compression was configured),
- * we have to decompress it so we can instantiate it later. It's
- * a slow and convoluted path, but it's also a rare one and it's
- * not worth making it faster. Else, the disk image is ready,
- * copy it into place for later. It's possible the disk image
- * has no items; we have to flag that for verification, it's a
- * special case since read/writing empty pages isn't generally
- * allowed.
- */
- if (bnd->already_compressed)
- WT_ERR(__rec_raw_decompress(
- session, buf->data, buf->size, &bnd->disk_image));
- else {
- WT_ERR(__wt_strndup(
- session, buf->data, buf->size, &bnd->disk_image));
- WT_ASSERT(session, __wt_verify_dsk_image(session,
- "[evict split]", buf->data, buf->size, true) == 0);
- }
- goto done;
+ if (F_ISSET(r, WT_EVICT_IN_MEMORY))
+ goto copy_image;
+ if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL) {
+ r->cache_write_restore = true;
+ goto copy_image;
}
/*
@@ -3342,14 +3294,15 @@ supd_check_complete:
multi->addr.reuse = 1;
bnd->addr = multi->addr;
+ bnd->disk_image = multi->disk_image;
+ multi->disk_image = NULL;
+
WT_STAT_FAST_DATA_INCR(session, rec_page_match);
- goto done;
+ goto copy_image;
}
}
}
- bnd->entries = r->entries;
-
#ifdef HAVE_VERBOSE
/* Output a verbose message if we create a page without many entries */
if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && r->entries < 6)
@@ -3373,9 +3326,17 @@ supd_check_complete:
* the database's lookaside store.
*/
if (F_ISSET(r, WT_EVICT_LOOKASIDE) && bnd->supd != NULL)
- ret = __rec_update_las(session, r, btree->id, bnd);
+ WT_ERR(__rec_update_las(session, r, btree->id, bnd));
+
+copy_image:
+ /*
+ * Optionally keep the disk image in cache (raw compression has already
+ * made a copy).
+ */
+ if (F_ISSET(r, WT_EVICT_SCRUB) && !bnd->disk_image)
+ WT_ERR(__wt_strndup(
+ session, buf->data, buf->size, &bnd->disk_image));
-done:
err: __wt_scr_free(session, &key);
return (ret);
}
@@ -5671,13 +5632,13 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
bnd = &r->bnd[0];
/*
- * If saving/restoring changes for this page and there's only
- * one block, there's nothing to write. This is an in-memory
- * configuration or a special case of forced eviction: set up
+ * If in-memory, or saving/restoring changes for this page and
+ * there's only one block, there's nothing to write. Set up
* a single block as if to split, then use that disk image to
* rewrite the page in memory.
*/
- if (bnd->disk_image != NULL)
+ if (F_ISSET(r, WT_EVICT_IN_MEMORY) ||
+ (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL))
goto split;
/*
@@ -5826,19 +5787,19 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_RET(__wt_row_ikey_alloc(session, 0,
bnd->key.data, bnd->key.size, &multi->key.ikey));
- if (bnd->disk_image == NULL) {
- multi->addr = bnd->addr;
- multi->addr.reuse = 0;
- multi->size = bnd->size;
- multi->cksum = bnd->cksum;
- bnd->addr.addr = NULL;
- } else {
- multi->supd = bnd->supd;
- multi->supd_entries = bnd->supd_next;
- bnd->supd = NULL;
- multi->disk_image = bnd->disk_image;
- bnd->disk_image = NULL;
- }
+ /* Copy any disk image. */
+ multi->supd = bnd->supd;
+ multi->supd_entries = bnd->supd_next;
+ bnd->supd = NULL;
+ multi->disk_image = bnd->disk_image;
+ bnd->disk_image = NULL;
+
+ /* Copy any address. */
+ multi->addr = bnd->addr;
+ multi->addr.reuse = 0;
+ multi->size = bnd->size;
+ multi->cksum = bnd->cksum;
+ bnd->addr.addr = NULL;
}
mod->mod_multi_entries = r->bnd_next;
@@ -5866,19 +5827,19 @@ __rec_split_col(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) {
multi->key.recno = bnd->recno;
- if (bnd->disk_image == NULL) {
- multi->addr = bnd->addr;
- multi->addr.reuse = 0;
- multi->size = bnd->size;
- multi->cksum = bnd->cksum;
- bnd->addr.addr = NULL;
- } else {
- multi->supd = bnd->supd;
- multi->supd_entries = bnd->supd_next;
- bnd->supd = NULL;
- multi->disk_image = bnd->disk_image;
- bnd->disk_image = NULL;
- }
+ /* Copy any disk image. */
+ multi->supd = bnd->supd;
+ multi->supd_entries = bnd->supd_next;
+ bnd->supd = NULL;
+ multi->disk_image = bnd->disk_image;
+ bnd->disk_image = NULL;
+
+ /* Copy any address. */
+ multi->addr = bnd->addr;
+ multi->addr.reuse = 0;
+ multi->size = bnd->size;
+ multi->cksum = bnd->cksum;
+ bnd->addr.addr = NULL;
}
mod->mod_multi_entries = r->bnd_next;
diff --git a/test/suite/test_inmem01.py b/test/suite/test_inmem01.py
index 875ebb2bfa7..fd2b04bf7e3 100644
--- a/test/suite/test_inmem01.py
+++ b/test/suite/test_inmem01.py
@@ -35,90 +35,68 @@ from wtscenario import check_scenarios
# test_inmem01.py
# Test in-memory configuration.
class test_inmem01(wttest.WiredTigerTestCase):
- name = 'inmem01'
- """
- In memory configuration still creates files on disk, but has limits
- in terms of how much data can be written.
- Test various scenarios including:
- - Add a small amount of data, ensure it is present.
- - Add more data than would fit into the configured cache.
- - Fill the cache with data, remove some data, ensure more data can be
- inserted (after a reasonable amount of time for space to be reclaimed)
- - Run queries after adding, removing and re-inserting data.
- - Try out keeping a cursor open while adding new data.
- """
+ uri = 'table:inmem01'
+ conn_config = \
+ 'cache_size=5MB,file_manager=(close_idle_time=0),in_memory=true'
+ table_config = ',memory_page_max=32k,leaf_page_max=4k'
+
scenarios = check_scenarios([
- ('col', dict(tablekind='col')),
- # Fixed length is very slow, disable it for now
- #('fix', dict(tablekind='fix')),
- ('row', dict(tablekind='row'))
+ ('col', dict(fmt='key_format=r,value_format=S')),
+ ('fix', dict(fmt='key_format=r,value_format=8t')),
+ ('row', dict(fmt='key_format=S,value_format=S'))
])
- # create an in-memory database
- conn_config = 'cache_size=5MB,' + \
- 'file_manager=(close_idle_time=0),in_memory=true'
-
- def get_table_config(self):
- kf = 'key_format='
- vf = 'value_format='
- if self.tablekind == 'row':
- kf = kf + 'S'
- else:
- kf = kf + 'r' # record format
- if self.tablekind == 'fix':
- vf = vf + '8t'
- else:
- vf = vf + 'S'
- return 'memory_page_max=32k,leaf_page_max=4k,' + kf + ',' + vf
-
+ # Smoke-test in-memory configurations, add a small amount of data and
+ # ensure it's visible.
def test_insert(self):
- table_config = self.get_table_config()
- simple_populate(self,
- "table:" + self.name, table_config, 1000)
- # Ensure the data is visible.
- simple_populate_check(self, 'table:' + self.name, 1000)
+ config = self.fmt + self.table_config
+ simple_populate(self, self.uri, config, 1000)
+ simple_populate_check(self, self.uri, 1000)
+ # Add more data than fits into the configured cache and verify it fails.
def test_insert_over_capacity(self):
- table_config = self.get_table_config()
+ config = self.fmt + self.table_config
msg = '/WT_CACHE_FULL.*/'
self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
- lambda:simple_populate(self,
- "table:" + self.name, table_config, 10000000), msg)
+ lambda:simple_populate(self, self.uri, config, 10000000), msg)
- # Figure out the last key we inserted.
- cursor = self.session.open_cursor('table:' + self.name, None)
+ # Figure out the last key we successfully inserted, and check all
+ # previous inserts are still there.
+ cursor = self.session.open_cursor(self.uri, None)
cursor.prev()
last_key = int(cursor.get_key())
- simple_populate_check(self, 'table:' + self.name, last_key)
+ simple_populate_check(self, self.uri, last_key)
+ # Fill the cache with data, remove some data, ensure more data can be
+ # inserted (after a reasonable amount of time for space to be reclaimed).
def test_insert_over_delete(self):
- table_config = self.get_table_config()
+ config = self.fmt + self.table_config
msg = '/WT_CACHE_FULL.*/'
self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
- lambda:simple_populate(self,
- "table:" + self.name, table_config, 10000000), msg)
+ lambda:simple_populate(self, self.uri, config, 10000000), msg)
# Now that the database contains as much data as will fit into
# the configured cache, verify removes succeed.
- cursor = self.session.open_cursor('table:' + self.name, None)
+ cursor = self.session.open_cursor(self.uri, None)
for i in range(1, 100):
cursor.set_key(key_populate(cursor, i))
cursor.remove()
+ # Run queries after adding, removing and re-inserting data.
+ # Try out keeping a cursor open while adding new data.
def test_insert_over_delete_replace(self):
- table_config = self.get_table_config()
+ config = self.fmt + self.table_config
msg = '/WT_CACHE_FULL.*/'
self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
- lambda:simple_populate(self,
- "table:" + self.name, table_config, 10000000), msg)
+ lambda:simple_populate(self, self.uri, config, 10000000), msg)
- cursor = self.session.open_cursor('table:' + self.name, None)
+ cursor = self.session.open_cursor(self.uri, None)
cursor.prev()
last_key = int(cursor.get_key())
# Now that the database contains as much data as will fit into
# the configured cache, verify removes succeed.
- cursor = self.session.open_cursor('table:' + self.name, None)
+ cursor = self.session.open_cursor(self.uri, None)
for i in range(1, last_key / 4, 1):
cursor.set_key(key_populate(cursor, i))
cursor.remove()
diff --git a/test/utility/misc.c b/test/utility/misc.c
index dfc655dec1a..dffd29a5b6a 100644
--- a/test/utility/misc.c
+++ b/test/utility/misc.c
@@ -192,3 +192,18 @@ dstrdup(const void *str)
return (p);
testutil_die(errno, "strdup");
}
+
+/*
+ * dstrndup --
+ * Call emulating strndup, dying on failure. Don't use actual strndup here
+ * as it is not supported within MSVC.
+ */
+void *
+dstrndup(const char *str, size_t len)
+{
+ char *p;
+
+ p = dcalloc(len + 1, sizeof(char));
+ memcpy(p, str, len);
+ return (p);
+}
diff --git a/test/utility/test_util.h b/test/utility/test_util.h
index 66ff8de2d19..821e06084d2 100644
--- a/test/utility/test_util.h
+++ b/test/utility/test_util.h
@@ -115,6 +115,7 @@ void *dcalloc(size_t, size_t);
void *dmalloc(size_t);
void *drealloc(void *, size_t);
void *dstrdup(const void *);
+void *dstrndup(const char *, size_t);
void testutil_clean_work_dir(char *);
void testutil_cleanup(TEST_OPTS *);
void testutil_make_work_dir(char *);