summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexg@wiredtiger.com>2014-03-04 16:04:48 +1100
committerAlex Gorrod <alexg@wiredtiger.com>2014-03-04 16:04:48 +1100
commitbe98ccec7299820ab9cbfa78581d1400b5f5ec65 (patch)
tree73f63490811959378dd453de3a3d00d4e1ca334c
parent006f6b7afec5d29351c6c1bf79268cb0d1c011fc (diff)
parent4deb7b35e421150a6d92ba3c197eab438b63cef7 (diff)
downloadmongo-be98ccec7299820ab9cbfa78581d1400b5f5ec65.tar.gz
Merge branch 'develop'
-rw-r--r--NEWS26
-rw-r--r--README6
-rw-r--r--RELEASE2
-rw-r--r--bench/wtperf/config.c118
-rw-r--r--bench/wtperf/misc.c19
-rw-r--r--bench/wtperf/runners/fruit-lsm.wtperf2
-rw-r--r--bench/wtperf/runners/fruit-short.wtperf2
-rw-r--r--bench/wtperf/runners/medium-lsm-compact.wtperf2
-rw-r--r--bench/wtperf/runners/multi-btree.wtperf13
-rw-r--r--bench/wtperf/runners/test1-500m-lsm.wtperf7
-rw-r--r--bench/wtperf/runners/test1-50m-lsm.wtperf6
-rw-r--r--bench/wtperf/runners/test2-500m-lsm.wtperf7
-rw-r--r--bench/wtperf/runners/test2-50m-lsm.wtperf4
-rw-r--r--bench/wtperf/runners/test3-500m-lsm.wtperf7
-rw-r--r--bench/wtperf/runners/test3-50m-lsm.wtperf9
-rw-r--r--bench/wtperf/runners/test4-500m-lsm.wtperf7
-rw-r--r--bench/wtperf/runners/test4-50m-lsm.wtperf6
-rw-r--r--bench/wtperf/runners/voxer-10k.wtperf19
-rw-r--r--bench/wtperf/runners/voxer-130k.wtperf19
-rwxr-xr-xbench/wtperf/smoke.sh2
-rw-r--r--bench/wtperf/track.c4
-rw-r--r--bench/wtperf/wtperf.c950
-rw-r--r--bench/wtperf/wtperf.h46
-rw-r--r--bench/wtperf/wtperf_opt.i14
-rw-r--r--build_posix/aclocal/version-set.m44
-rw-r--r--build_posix/aclocal/version.m42
-rw-r--r--dist/api_data.py9
-rw-r--r--dist/filelist1
-rw-r--r--dist/flags.py1
-rw-r--r--dist/s_funcs.list1
-rw-r--r--dist/s_string.ok1
-rw-r--r--dist/s_symbols.list1
-rw-r--r--dist/stat_data.py6
-rw-r--r--examples/c/Makefile.am1
-rw-r--r--examples/c/ex_config_parse.c166
-rw-r--r--examples/c/ex_data_source.c49
-rw-r--r--ext/datasources/helium/helium.c67
-rw-r--r--lang/java/java_doc.i3
-rw-r--r--lang/python/wiredtiger.i25
-rw-r--r--src/block/block_mgr.c9
-rw-r--r--src/block/block_open.c7
-rw-r--r--src/bloom/bloom.c2
-rw-r--r--src/btree/bt_discard.c4
-rw-r--r--src/btree/bt_evict.c115
-rw-r--r--src/btree/bt_handle.c41
-rw-r--r--src/btree/bt_page.c14
-rw-r--r--src/btree/rec_evict.c2
-rw-r--r--src/btree/rec_write.c20
-rw-r--r--src/config/config.c8
-rw-r--r--src/config/config_api.c105
-rw-r--r--src/config/config_def.c4
-rw-r--r--src/config/config_ext.c91
-rw-r--r--src/conn/conn_api.c21
-rw-r--r--src/conn/conn_cache_pool.c6
-rw-r--r--src/docs/Doxyfile1
-rw-r--r--src/docs/command-line.dox3
-rw-r--r--src/docs/config-strings.dox2
-rw-r--r--src/docs/custom_data.dox5
-rw-r--r--src/docs/helium.dox2
-rw-r--r--src/docs/spell.ok1
-rw-r--r--src/docs/top/main.dox6
-rw-r--r--src/docs/upgrading.dox15
-rw-r--r--src/include/btmem.h19
-rw-r--r--src/include/btree.h4
-rw-r--r--src/include/btree.i62
-rw-r--r--src/include/cache.h9
-rw-r--r--src/include/config.h8
-rw-r--r--src/include/extern.h23
-rw-r--r--src/include/flags.h1
-rw-r--r--src/include/lsm.h2
-rw-r--r--src/include/stat.h4
-rw-r--r--src/include/txn.h3
-rw-r--r--src/include/txn.i40
-rw-r--r--src/include/wiredtiger.in242
-rw-r--r--src/include/wiredtiger_ext.h137
-rw-r--r--src/include/wt_internal.h2
-rw-r--r--src/lsm/lsm_cursor.c68
-rw-r--r--src/lsm/lsm_merge.c51
-rw-r--r--src/lsm/lsm_stat.c2
-rw-r--r--src/lsm/lsm_tree.c12
-rw-r--r--src/lsm/lsm_worker.c17
-rw-r--r--src/os_posix/os_open.c9
-rw-r--r--src/schema/schema_create.c3
-rw-r--r--src/support/scratch.c3
-rw-r--r--src/support/stat.c12
-rw-r--r--tools/wt_nvd3_util.py46
-rw-r--r--tools/wtperf_graph.py50
-rw-r--r--tools/wtperf_stats.py173
-rw-r--r--tools/wtstats.py42
89 files changed, 2114 insertions, 1048 deletions
diff --git a/NEWS b/NEWS
index 9f0682f0ecd..a7ad4f8987c 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,29 @@
+WiredTiger release 2.1.1, 2014-03-04
+------------------------------------
+
+The WiredTiger 2.1.1 release contains new features, performance enhancements
+and bug fixes. Significant changes include:
+
+Fix a bug where a page could be marked clean when it contained uncommitted
+changes. This bug could cause undefined behavior in transaction rollback
+under load.
+
+Fix a bug with shared caches when rebalancing between connections.
+
+Add a new public API to WiredTiger that provides the ability to parse
+WiredTiger compatible configuration strings. See the upgrading documentation
+for further information. [#873]
+
+A number of performance enhancements to the LSM implementation, particularly
+for long running workloads.
+
+A number of performance enhancements and bug fixes to cache eviction code.
+
+Add an option to use direct I/O when reading from checkpoints. To enabled
+the functionality add "direct_io=[checkpoint]" to your wiredtiger_open
+configuration string. [#847]
+
+
WiredTiger release 2.1.0, 2014-02-04
------------------------------------
diff --git a/README b/README
index 7fab4d05ea0..711056e27f0 100644
--- a/README
+++ b/README
@@ -1,6 +1,6 @@
-WiredTiger 2.1.0: (February 4, 2014)
+WiredTiger 2.1.1: (March 4, 2014)
-This is version 2.1.0 of WiredTiger.
+This is version 2.1.1 of WiredTiger.
WiredTiger release packages and documentation can be found at:
@@ -9,7 +9,7 @@ WiredTiger release packages and documentation can be found at:
Information on configuring, building and installing WiredTiger can be
found at:
- http://source.wiredtiger.com/2.1.0/install.html
+ http://source.wiredtiger.com/2.1.1/install.html
WiredTiger licensing information can be found at:
diff --git a/RELEASE b/RELEASE
index e555c4a62a7..9254d748a4d 100644
--- a/RELEASE
+++ b/RELEASE
@@ -1,6 +1,6 @@
WIREDTIGER_VERSION_MAJOR=2
WIREDTIGER_VERSION_MINOR=1
-WIREDTIGER_VERSION_PATCH=0
+WIREDTIGER_VERSION_PATCH=1
WIREDTIGER_VERSION="$WIREDTIGER_VERSION_MAJOR.$WIREDTIGER_VERSION_MINOR.$WIREDTIGER_VERSION_PATCH"
WIREDTIGER_RELEASE_DATE=`date "+%B %e, %Y"`
diff --git a/bench/wtperf/config.c b/bench/wtperf/config.c
index 21275f69768..c87c3ac75a1 100644
--- a/bench/wtperf/config.c
+++ b/bench/wtperf/config.c
@@ -60,6 +60,23 @@ config_assign(CONFIG *dest, const CONFIG *src)
config_free(dest);
memcpy(dest, src, sizeof(CONFIG));
+ if (src->uris != NULL) {
+ dest->uris = (char **)calloc(src->table_count, sizeof(char *));
+ for (i = 0; i < src->table_count; i++)
+ dest->uris[i] = strdup(src->uris[i]);
+ }
+ dest->ckptthreads = NULL;
+ dest->popthreads = NULL;
+ dest->workers = NULL;
+
+ if (src->base_uri != NULL)
+ dest->base_uri = strdup(src->base_uri);
+ if (src->workload != NULL) {
+ dest->workload = calloc(WORKLOAD_MAX, sizeof(WORKLOAD));
+ memcpy(dest->workload,
+ src->workload, WORKLOAD_MAX * sizeof(WORKLOAD));
+ }
+
for (i = 0; i < sizeof(config_opts) / sizeof(config_opts[0]); i++)
if (config_opts[i].type == STRING_TYPE ||
config_opts[i].type == CONFIG_STRING_TYPE) {
@@ -96,14 +113,53 @@ config_free(CONFIG *cfg)
*pstr = NULL;
}
}
+ if (cfg->uris != NULL) {
+ for (i = 0; i < cfg->table_count; i++)
+ free(cfg->uris[i]);
+ free(cfg->uris);
+ }
+ free(cfg->ckptthreads);
free(cfg->popthreads);
- free(cfg->uri);
+ free(cfg->base_uri);
free(cfg->workers);
free(cfg->workload);
}
/*
+ * config_compress --
+ * Parse the compression configuration.
+ */
+int
+config_compress(CONFIG *cfg)
+{
+ int ret;
+ const char *s;
+
+ ret = 0;
+ s = cfg->compression;
+ if (strcmp(s, "none") == 0) {
+ cfg->compress_ext = NULL;
+ cfg->compress_table = NULL;
+ } else if (strcmp(s, "bzip") == 0) {
+ cfg->compress_ext = BZIP_EXT;
+ cfg->compress_table = BZIP_BLK;
+ } else if (strcmp(s, "snappy") == 0) {
+ cfg->compress_ext = SNAPPY_EXT;
+ cfg->compress_table = SNAPPY_BLK;
+ } else if (strcmp(s, "zlib") == 0) {
+ cfg->compress_ext = ZLIB_EXT;
+ cfg->compress_table = ZLIB_BLK;
+ } else {
+ fprintf(stderr,
+ "invalid compression configuration: %s\n", s);
+ ret = EINVAL;
+ }
+ return (ret);
+
+}
+
+/*
* config_threads --
* Parse the thread configuration.
*/
@@ -112,12 +168,10 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
{
WORKLOAD *workp;
WT_CONFIG_ITEM groupk, groupv, k, v;
- WT_CONFIG_SCAN *group, *scan;
- WT_EXTENSION_API *wt_api;
+ WT_CONFIG_PARSER *group, *scan;
int ret;
- wt_api = cfg->conn->get_extension_api(cfg->conn);
-
+ group = scan = NULL;
/* Allocate the workload array. */
if ((cfg->workload = calloc(WORKLOAD_MAX, sizeof(WORKLOAD))) == NULL)
return (enomem(cfg));
@@ -132,12 +186,11 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
* returned from the original string.
*/
if ((ret =
- wt_api->config_scan_begin(wt_api, NULL, config, len, &group)) != 0)
+ wiredtiger_config_parser_open(NULL, config, len, &group)) != 0)
goto err;
- while ((ret =
- wt_api->config_scan_next(wt_api, group, &groupk, &groupv)) == 0) {
- if ((ret = wt_api-> config_scan_begin(
- wt_api, NULL, groupk.str, groupk.len, &scan)) != 0)
+ while ((ret = group->next(group, &groupk, &groupv)) == 0) {
+ if ((ret = wiredtiger_config_parser_open(
+ NULL, groupk.str, groupk.len, &scan)) != 0)
goto err;
/* Move to the next workload slot. */
@@ -150,8 +203,7 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
}
workp = &cfg->workload[cfg->workload_cnt++];
- while ((ret =
- wt_api->config_scan_next(wt_api, scan, &k, &v)) == 0) {
+ while ((ret = scan->next(scan, &k, &v)) == 0) {
if (STRING_MATCH("count", k.str, k.len)) {
if ((workp->threads = v.val) <= 0)
goto err;
@@ -181,7 +233,9 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
ret = 0;
if (ret != 0 )
goto err;
- if ((ret = wt_api->config_scan_end(wt_api, scan)) != 0)
+ ret = scan->close(scan);
+ scan = NULL;
+ if (ret != 0)
goto err;
if (workp->insert == 0 &&
@@ -190,12 +244,19 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
cfg->workers_cnt += (u_int)workp->threads;
}
- if ((ret = wt_api->config_scan_end(wt_api, group)) != 0)
+ ret = group->close(group);
+ group = NULL;
+ if (ret != 0)
goto err;
return (0);
-err: fprintf(stderr,
+err: if (group != NULL)
+ (void)group->close(group);
+ if (scan != NULL)
+ (void)scan->close(scan);
+
+ fprintf(stderr,
"invalid thread configuration or scan error: %.*s\n",
(int)len, config);
return (EINVAL);
@@ -408,21 +469,17 @@ int
config_opt_line(CONFIG *cfg, const char *optstr)
{
WT_CONFIG_ITEM k, v;
- WT_CONFIG_SCAN *scan;
- WT_EXTENSION_API *wt_api;
+ WT_CONFIG_PARSER *scan;
int ret, t_ret;
- wt_api = cfg->conn->get_extension_api(cfg->conn);
-
- if ((ret = wt_api->config_scan_begin(
- wt_api, NULL, optstr, strlen(optstr), &scan)) != 0) {
+ if ((ret = wiredtiger_config_parser_open(
+ NULL, optstr, strlen(optstr), &scan)) != 0) {
lprintf(cfg, ret, 0, "Error in config_scan_begin");
return (ret);
}
while (ret == 0) {
- if ((ret =
- wt_api->config_scan_next(wt_api, scan, &k, &v)) != 0) {
+ if ((ret = scan->next(scan, &k, &v)) != 0) {
/* Any parse error has already been reported. */
if (ret == WT_NOTFOUND)
ret = 0;
@@ -430,7 +487,7 @@ config_opt_line(CONFIG *cfg, const char *optstr)
}
ret = config_opt(cfg, &k, &v);
}
- if ((t_ret = wt_api->config_scan_end(wt_api, scan)) != 0) {
+ if ((t_ret = scan->close(scan)) != 0) {
lprintf(cfg, ret, 0, "Error in config_scan_end");
if (ret == 0)
ret = t_ret;
@@ -474,6 +531,14 @@ config_sanity(CONFIG *cfg)
fprintf(stderr, "interval value longer than the run-time\n");
return (1);
}
+ if (cfg->table_count > 99) {
+ fprintf(stderr, "table count greater than 99\n");
+ return (1);
+ }
+ if (cfg->database_count > 99) {
+ fprintf(stderr, "database count greater than 99\n");
+ return (1);
+ }
return (0);
}
@@ -600,11 +665,8 @@ config_opt_usage(void)
void
usage(void)
{
- printf("wtperf [-LMS] [-C config] "
+ printf("wtperf [-C config] "
"[-H mount] [-h home] [-O file] [-o option] [-T config]\n");
- printf("\t-L Use a large default configuration\n");
- printf("\t-M Use a medium default configuration\n");
- printf("\t-S Use a small default configuration\n");
printf("\t-C <string> additional connection configuration\n");
printf("\t (added to option conn_config)\n");
printf("\t-H <mount> configure Helium volume mount point\n");
diff --git a/bench/wtperf/misc.c b/bench/wtperf/misc.c
index 4dac172a0c5..4946a260bec 100644
--- a/bench/wtperf/misc.c
+++ b/bench/wtperf/misc.c
@@ -44,24 +44,27 @@ enomem(const CONFIG *cfg)
int
setup_log_file(CONFIG *cfg)
{
+ int ret;
char *fname;
+ ret = 0;
+
if (cfg->verbose < 1)
return (0);
- if ((fname = calloc(strlen(cfg->home) +
+ if ((fname = calloc(strlen(cfg->monitor_dir) +
strlen(cfg->table_name) + strlen(".stat") + 2, 1)) == NULL)
return (enomem(cfg));
- sprintf(fname, "%s/%s.stat", cfg->home, cfg->table_name);
- cfg->logf = fopen(fname, "a");
- free(fname);
-
+ sprintf(fname, "%s/%s.stat", cfg->monitor_dir, cfg->table_name);
+ cfg->logf = fopen(fname, "w");
if (cfg->logf == NULL) {
- fprintf(stderr,
- "Failed to open log file: %s\n", strerror(errno));
- return (EINVAL);
+ ret = errno;
+ fprintf(stderr, "%s: %s\n", fname, strerror(ret));
}
+ free(fname);
+ if (cfg->logf == NULL)
+ return (ret);
/* Use line buffering for the log file. */
(void)setvbuf(cfg->logf, NULL, _IOLBF, 0);
diff --git a/bench/wtperf/runners/fruit-lsm.wtperf b/bench/wtperf/runners/fruit-lsm.wtperf
index 193b29d38bf..a06cc37c33d 100644
--- a/bench/wtperf/runners/fruit-lsm.wtperf
+++ b/bench/wtperf/runners/fruit-lsm.wtperf
@@ -6,7 +6,7 @@
conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
compact=true
sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
icount=25000000
key_sz=40
value_sz=800
diff --git a/bench/wtperf/runners/fruit-short.wtperf b/bench/wtperf/runners/fruit-short.wtperf
index 9061e231bbe..f546cbc29b5 100644
--- a/bench/wtperf/runners/fruit-short.wtperf
+++ b/bench/wtperf/runners/fruit-short.wtperf
@@ -6,7 +6,7 @@
conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
compact=true
sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
icount=25000000
key_sz=40
value_sz=800
diff --git a/bench/wtperf/runners/medium-lsm-compact.wtperf b/bench/wtperf/runners/medium-lsm-compact.wtperf
index 5393cdbfeba..62ae8cf86ca 100644
--- a/bench/wtperf/runners/medium-lsm-compact.wtperf
+++ b/bench/wtperf/runners/medium-lsm-compact.wtperf
@@ -1,6 +1,6 @@
# wtperf options file: medium lsm configuration
conn_config="cache_size=1G"
-table_config="lsm=(chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="lsm=(chunk_size=100MB,merge_threads=2,chunk_max=1TB),type=lsm"
icount=50000000
populate_threads=1
compact=true
diff --git a/bench/wtperf/runners/multi-btree.wtperf b/bench/wtperf/runners/multi-btree.wtperf
new file mode 100644
index 00000000000..831aa71cdd2
--- /dev/null
+++ b/bench/wtperf/runners/multi-btree.wtperf
@@ -0,0 +1,13 @@
+# wtperf options file: small btree multi-database configuration
+# Original cache was 500MB. Shared cache is 500MB * database_count.
+conn_config="shared_cache=(enable=true,size=2500MB,chunk=10M)"
+database_count=5
+table_config="leaf_page_max=4k,internal_page_max=16k,leaf_item_max=1433,internal_item_max=3100,type=file"
+# Likewise, divide original icount by database_count.
+icount=50000
+populate_threads=1
+random_range=100000000
+report_interval=5
+run_time=3000
+threads=((count=1,reads=1),(count=1,inserts=1))
+value_sz=1024
diff --git a/bench/wtperf/runners/test1-500m-lsm.wtperf b/bench/wtperf/runners/test1-500m-lsm.wtperf
index 116641fc165..7f85b8c13e5 100644
--- a/bench/wtperf/runners/test1-500m-lsm.wtperf
+++ b/bench/wtperf/runners/test1-500m-lsm.wtperf
@@ -1,12 +1,13 @@
-# wtperf options file: simulate riak and its test1 and test2 configuration
+# wtperf options file: simulate riak and its test1 configuration
# The configuration for the connection and table are from riak and the
# specification of the data (count, size, threads) is from basho_bench.
#
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
+#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=60)"
conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
compact=true
+compression="snappy"
sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_max=5GB,chunk_size=100MB,merge_threads=3),type=lsm,leaf_page_max=16K"
icount=500000000
key_sz=40
value_sz=1000
diff --git a/bench/wtperf/runners/test1-50m-lsm.wtperf b/bench/wtperf/runners/test1-50m-lsm.wtperf
index 6551b1565a9..548932f3aee 100644
--- a/bench/wtperf/runners/test1-50m-lsm.wtperf
+++ b/bench/wtperf/runners/test1-50m-lsm.wtperf
@@ -1,12 +1,12 @@
-# wtperf options file: simulate riak and its test1 and test2 configuration
+# wtperf options file: simulate riak and its test1 configuration
# The configuration for the connection and table are from riak and the
# specification of the data (count, size, threads) is from basho_bench.
#
-#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=300)"
+#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=30)"
conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024"
compact=true
sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
icount=50000000
key_sz=40
value_sz=1000
diff --git a/bench/wtperf/runners/test2-500m-lsm.wtperf b/bench/wtperf/runners/test2-500m-lsm.wtperf
index 9b8f225f643..db1ee9d469c 100644
--- a/bench/wtperf/runners/test2-500m-lsm.wtperf
+++ b/bench/wtperf/runners/test2-500m-lsm.wtperf
@@ -3,11 +3,12 @@
# specification of the data (count, size, threads) is from basho_bench.
# This test assumes that a test1 populate already completed and exists.
#
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
+#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=60)"
conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
create=false
-sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+compression="snappy"
+sess_config="isolation=snapshot
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_max=5GB,chunk_size=100MB,merge_threads=3),type=lsm,leaf_page_max=16K"
key_sz=40
value_sz=1000
report_interval=10
diff --git a/bench/wtperf/runners/test2-50m-lsm.wtperf b/bench/wtperf/runners/test2-50m-lsm.wtperf
index 945faade694..c5e77e7e207 100644
--- a/bench/wtperf/runners/test2-50m-lsm.wtperf
+++ b/bench/wtperf/runners/test2-50m-lsm.wtperf
@@ -3,11 +3,11 @@
# specification of the data (count, size, threads) is from basho_bench.
# This test assumes that a test1 populate already completed and exists.
#
-#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=300)"
+#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=30)"
conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024"
create=false
sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
key_sz=40
value_sz=1000
report_interval=10
diff --git a/bench/wtperf/runners/test3-500m-lsm.wtperf b/bench/wtperf/runners/test3-500m-lsm.wtperf
index 2da836d577c..446309c32c8 100644
--- a/bench/wtperf/runners/test3-500m-lsm.wtperf
+++ b/bench/wtperf/runners/test3-500m-lsm.wtperf
@@ -1,13 +1,14 @@
-# wtperf options file: simulate riak and its test2 configuration
+# wtperf options file: simulate riak and its test3 configuration
# The configuration for the connection and table are from riak and the
# specification of the data (count, size, threads) is from basho_bench.
# This test assumes that a test1 populate already completed and exists.
#
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
+#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=60)"
conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
create=false
+compression="snappy"
sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_max=5GB,chunk_size=100MB,merge_threads=3),type=lsm,leaf_page_max=16K"
key_sz=40
value_sz=1000
pareto=true
diff --git a/bench/wtperf/runners/test3-50m-lsm.wtperf b/bench/wtperf/runners/test3-50m-lsm.wtperf
index bc54cec3496..00291737545 100644
--- a/bench/wtperf/runners/test3-50m-lsm.wtperf
+++ b/bench/wtperf/runners/test3-50m-lsm.wtperf
@@ -1,17 +1,18 @@
-# wtperf options file: simulate riak and its test2 configuration
+# wtperf options file: simulate riak and its test3 configuration
# The configuration for the connection and table are from riak and the
# specification of the data (count, size, threads) is from basho_bench.
# This test assumes that a test1 populate already completed and exists.
#
-#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=300)"
+#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=30)"
conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024"
create=false
sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
key_sz=40
value_sz=1000
pareto=true
report_interval=10
run_time=1440
sample_interval=10
-threads=((count=10,reads=1,updates=1))
+#threads=((count=10,reads=1,updates=1))
+threads=((count=5,reads=1),(count=5,updates=1))
diff --git a/bench/wtperf/runners/test4-500m-lsm.wtperf b/bench/wtperf/runners/test4-500m-lsm.wtperf
index 1257f473a7d..a959ec9057e 100644
--- a/bench/wtperf/runners/test4-500m-lsm.wtperf
+++ b/bench/wtperf/runners/test4-500m-lsm.wtperf
@@ -1,13 +1,14 @@
-# wtperf options file: simulate riak and its test2 configuration
+# wtperf options file: simulate riak and its test4 configuration
# The configuration for the connection and table are from riak and the
# specification of the data (count, size, threads) is from basho_bench.
# This test assumes that a test1 populate already completed and exists.
#
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
+#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=60)"
conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
create=false
+compression="snappy"
sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_max=5GB,chunk_size=100MB,merge_threads=3),type=lsm,leaf_page_max=16K"
key_sz=40
value_sz=1000
report_interval=10
diff --git a/bench/wtperf/runners/test4-50m-lsm.wtperf b/bench/wtperf/runners/test4-50m-lsm.wtperf
index fc7c96c31d3..6536d8a1de7 100644
--- a/bench/wtperf/runners/test4-50m-lsm.wtperf
+++ b/bench/wtperf/runners/test4-50m-lsm.wtperf
@@ -1,13 +1,13 @@
-# wtperf options file: simulate riak and its test2 configuration
+# wtperf options file: simulate riak and its test4 configuration
# The configuration for the connection and table are from riak and the
# specification of the data (count, size, threads) is from basho_bench.
# This test assumes that a test1 populate already completed and exists.
#
-#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=300)"
+#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=30)"
conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024"
create=false
sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
key_sz=40
value_sz=1000
report_interval=10
diff --git a/bench/wtperf/runners/voxer-10k.wtperf b/bench/wtperf/runners/voxer-10k.wtperf
new file mode 100644
index 00000000000..17c46b4ed94
--- /dev/null
+++ b/bench/wtperf/runners/voxer-10k.wtperf
@@ -0,0 +1,19 @@
+# wtperf options file: simulate riak and its test1 and test2 configuration
+# The configuration for the connection and table are from riak and the
+# specification of the data (count, size, threads) is from basho_bench.
+#
+#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
+conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
+compact=true
+compression="snappy"
+sess_config="isolation=snapshot"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
+icount=15000
+key_sz=40
+value_sz=10000
+populate_threads=1
+report_interval=10
+random_value=true
+run_time=3600
+sample_interval=10
+threads=((count=20,read=1,update=1))
diff --git a/bench/wtperf/runners/voxer-130k.wtperf b/bench/wtperf/runners/voxer-130k.wtperf
new file mode 100644
index 00000000000..e81510cf067
--- /dev/null
+++ b/bench/wtperf/runners/voxer-130k.wtperf
@@ -0,0 +1,19 @@
+# wtperf options file: simulate riak and its test1 and test2 configuration
+# The configuration for the connection and table are from riak and the
+# specification of the data (count, size, threads) is from basho_bench.
+#
+#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
+conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
+compact=true
+compression="snappy"
+sess_config="isolation=snapshot"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
+icount=15000
+key_sz=40
+value_sz=130000
+populate_threads=1
+report_interval=10
+random_value=true
+run_time=3600
+sample_interval=10
+threads=((count=20,read=1,update=1))
diff --git a/bench/wtperf/smoke.sh b/bench/wtperf/smoke.sh
index 192d3cd208b..062277d90dc 100755
--- a/bench/wtperf/smoke.sh
+++ b/bench/wtperf/smoke.sh
@@ -1,4 +1,4 @@
#! /bin/sh
# Smoke-test wtperf as part of running "make check".
-./wtperf -S
+./wtperf -O `dirname $0`/runners/small-lsm.wtperf -o "run_time=20"
diff --git a/bench/wtperf/track.c b/bench/wtperf/track.c
index 12f50dd5411..3919d0eb1ab 100644
--- a/bench/wtperf/track.c
+++ b/bench/wtperf/track.c
@@ -279,10 +279,8 @@ latency_print_single(CONFIG *cfg, TRACK *total, const char *name)
return;
}
-#ifdef __WRITE_A_HEADER
fprintf(fp,
- "usecs,operations,cumulative-operations,total-operations\n");
-#endif
+ "#usecs,operations,cumulative-operations,total-operations\n");
cumops = 0;
for (i = 0; i < ELEMENTS(total->us); ++i) {
if (total->us[i] == 0)
diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c
index 5554f2e564e..ab64c00802e 100644
--- a/bench/wtperf/wtperf.c
+++ b/bench/wtperf/wtperf.c
@@ -31,68 +31,36 @@
static const CONFIG default_cfg = {
"WT_TEST", /* home */
"WT_TEST", /* monitor dir */
- NULL, /* uri */
+ NULL, /* base_uri */
+ NULL, /* uris */
+ NULL, /* helium_mount */
NULL, /* conn */
NULL, /* logf */
+ NULL, NULL, /* compressor ext, blk */
NULL, NULL, /* populate, checkpoint threads */
NULL, /* worker threads */
0, /* worker thread count */
NULL, /* workloads */
0, /* workload count */
+ 0, /* checkpoint operations */
+ 0, /* insert operations */
+ 0, /* read operations */
+ 0, /* update operations */
+ 0, /* insert key */
+ 0, /* checkpoint in progress */
+ 0, /* thread error */
+ 0, /* notify threads to stop */
+ 0, /* total seconds running */
#define OPT_DEFINE_DEFAULT
#include "wtperf_opt.i"
#undef OPT_DEFINE_DEFAULT
};
-static const char * const small_config_str =
- "conn_config=\"cache_size=500MB\","
- "table_config=\"lsm=(chunk_size=5MB)\","
- "icount=500000,"
- "value_sz=100,"
- "key_sz=20,"
- "report_interval=5,"
- "run_time=20,"
- "populate_threads=1,"
- "threads=((count=8,read=1)),";
-
-static const char * const med_config_str =
- "conn_config=\"cache_size=1GB\","
- "table_config=\"lsm=(chunk_size=20MB)\","
- "icount=50000000,"
- "value_sz=100,"
- "key_sz=20,"
- "report_interval=5,"
- "run_time=100,"
- "populate_threads=1,"
- "threads=((count=16,read=1)),";
-
-static const char * const large_config_str =
- "conn_config=\"cache_size=2GB\","
- "table_config=\"lsm=(chunk_size=50MB)\","
- "icount=500000000,"
- "value_sz=100,"
- "key_sz=20,"
- "report_interval=5,"
- "run_time=600,"
- "populate_threads=1,"
- "threads=((count=16,read=1)),";
-
static const char * const debug_cconfig = "verbose=[lsm]";
static const char * const debug_tconfig = "";
-static uint64_t g_ckpt_ops; /* checkpoint operations */
-static uint64_t g_insert_ops; /* insert operations */
-static uint64_t g_read_ops; /* read operations */
-static uint64_t g_update_ops; /* update operations */
-
-static uint64_t g_insert_key; /* insert key */
-
-static volatile int g_ckpt; /* checkpoint in progress */
-static volatile int g_error; /* thread error */
-static volatile int g_stop; /* notify threads to stop */
-
/*
* Atomic update where needed.
*/
@@ -103,15 +71,20 @@ static volatile int g_stop; /* notify threads to stop */
#endif
static void *checkpoint_worker(void *);
+static int create_tables(CONFIG *);
+static int create_uris(CONFIG *);
static int execute_populate(CONFIG *);
static int execute_workload(CONFIG *);
static int find_table_count(CONFIG *);
static void *monitor(void *);
static void *populate_thread(void *);
-static void randomize_value(CONFIG *, char *);
+static void randomize_value(CONFIG *, char *);
+static int start_all_runs(CONFIG *);
+static int start_run(CONFIG *);
static int start_threads(CONFIG *,
WORKLOAD *, CONFIG_THREAD *, u_int, void *(*)(void *));
static int stop_threads(CONFIG *, u_int, CONFIG_THREAD *);
+static void *thread_run_wtperf(void *);
static void *worker(void *);
static uint64_t wtperf_rand(CONFIG *);
static uint64_t wtperf_value_range(CONFIG *);
@@ -130,9 +103,9 @@ extern uint32_t __wt_random(void);
/* Retrieve an ID for the next insert operation. */
static inline uint64_t
-get_next_incr(void)
+get_next_incr(CONFIG *cfg)
{
- return (ATOMIC_ADD(g_insert_key, 1));
+ return (ATOMIC_ADD(cfg->insert_key, 1));
}
static void
@@ -219,16 +192,18 @@ worker(void *arg)
CONFIG_THREAD *thread;
TRACK *trk;
WT_CONNECTION *conn;
- WT_CURSOR *cursor;
+ WT_CURSOR **cursors, *cursor;
WT_SESSION *session;
+ size_t i;
uint64_t next_val, usecs;
- int measure_latency, ret;
uint8_t *op, *op_end;
+ int measure_latency, ret;
char *value_buf, *key_buf, *value;
thread = (CONFIG_THREAD *)arg;
cfg = thread->cfg;
conn = cfg->conn;
+ cursors = NULL;
session = NULL;
trk = NULL;
@@ -237,12 +212,22 @@ worker(void *arg)
lprintf(cfg, ret, 0, "worker: WT_CONNECTION.open_session");
goto err;
}
- if ((ret = session->open_cursor(
- session, cfg->uri, NULL, NULL, &cursor)) != 0) {
- lprintf(cfg,
- ret, 0, "worker: WT_SESSION.open_cursor: %s", cfg->uri);
+ cursors = (WT_CURSOR **)calloc(
+ cfg->table_count, sizeof(WT_CURSOR *));
+ if (cursors == NULL) {
+ lprintf(cfg, ENOMEM, 0,
+ "worker: couldn't allocate cursor array");
goto err;
}
+ for (i = 0; i < cfg->table_count; i++) {
+ if ((ret = session->open_cursor(session,
+ cfg->uris[i], NULL, NULL, &cursors[i])) != 0) {
+ lprintf(cfg, ret, 0,
+ "worker: WT_SESSION.open_cursor: %s",
+ cfg->uris[i]);
+ goto err;
+ }
+ }
key_buf = thread->key_buf;
value_buf = thread->value_buf;
@@ -250,7 +235,7 @@ worker(void *arg)
op = thread->workload->ops;
op_end = op + sizeof(thread->workload->ops);
- while (!g_stop) {
+ while (!cfg->stop) {
/*
* Generate the next key and setup operation specific
* statistics tracking objects.
@@ -262,7 +247,7 @@ worker(void *arg)
if (cfg->random_range)
next_val = wtperf_rand(cfg);
else
- next_val = cfg->icount + get_next_incr();
+ next_val = cfg->icount + get_next_incr(cfg);
break;
case WORKER_READ:
trk = &thread->read;
@@ -285,7 +270,18 @@ worker(void *arg)
}
sprintf(key_buf, "%0*" PRIu64, cfg->key_sz, next_val);
- measure_latency = cfg->sample_interval != 0 && (
+
+ /*
+ * Spread the data out around the multiple databases.
+ */
+ cursor = cursors[next_val % cfg->table_count];
+
+ /*
+ * Skip the first time we do an operation, when trk->ops
+ * is 0, to avoid first time latency spikes.
+ */
+ measure_latency =
+ cfg->sample_interval != 0 && trk->ops != 0 && (
trk->ops % cfg->sample_rate == 0);
if (measure_latency &&
(ret = __wt_epoch(NULL, &start)) != 0) {
@@ -385,8 +381,10 @@ op_err: lprintf(cfg, ret, 0,
/* Notify our caller we failed and shut the system down. */
if (0) {
-err: g_error = g_stop = 1;
+err: cfg->error = cfg->stop = 1;
}
+ if (cursors != NULL)
+ free(cursors);
return (NULL);
}
@@ -502,17 +500,20 @@ populate_thread(void *arg)
CONFIG_THREAD *thread;
TRACK *trk;
WT_CONNECTION *conn;
- WT_CURSOR *cursor;
+ WT_CURSOR **cursors, *cursor;
WT_SESSION *session;
- uint32_t opcount;
+ size_t i;
uint64_t op, usecs;
+ uint32_t opcount;
int intxn, measure_latency, ret;
char *value_buf, *key_buf;
+ const char *cursor_config;
thread = (CONFIG_THREAD *)arg;
cfg = thread->cfg;
conn = cfg->conn;
session = NULL;
+ cursors = NULL;
ret = 0;
trk = &thread->insert;
@@ -525,17 +526,30 @@ populate_thread(void *arg)
goto err;
}
- /* Do a bulk load if populate is single-threaded. */
- if ((ret = session->open_cursor(session, cfg->uri, NULL,
- cfg->populate_threads == 1 ? "bulk" : NULL, &cursor)) != 0) {
- lprintf(cfg,
- ret, 0, "populate: WT_SESSION.open_cursor: %s", cfg->uri);
+ /* Do bulk loads if populate is single-threaded. */
+ cursor_config = cfg->populate_threads == 1 ? "bulk" : NULL;
+ /* Create the cursors. */
+ cursors = (WT_CURSOR **)calloc(
+ cfg->table_count, sizeof(WT_CURSOR *));
+ if (cursors == NULL) {
+ lprintf(cfg, ENOMEM, 0,
+ "worker: couldn't allocate cursor array");
goto err;
}
+ for (i = 0; i < cfg->table_count; i++) {
+ if ((ret = session->open_cursor(
+ session, cfg->uris[i], NULL,
+ cursor_config, &cursors[i])) != 0) {
+ lprintf(cfg, ret, 0,
+ "populate: WT_SESSION.open_cursor: %s",
+ cfg->uris[i]);
+ goto err;
+ }
+ }
- /* Populate the database. */
+ /* Populate the databases. */
for (intxn = 0, opcount = 0;;) {
- op = get_next_incr();
+ op = get_next_incr(cfg);
if (op > cfg->icount)
break;
@@ -548,8 +562,13 @@ populate_thread(void *arg)
}
intxn = 1;
}
+ /*
+ * Figure out which table this op belongs to.
+ */
+ cursor = cursors[op % cfg->table_count];
sprintf(key_buf, "%0*" PRIu64, cfg->key_sz, op);
- measure_latency = cfg->sample_interval != 0 && (
+ measure_latency =
+ cfg->sample_interval != 0 && trk->ops != 0 && (
trk->ops % cfg->sample_rate == 0);
if (measure_latency &&
(ret = __wt_epoch(NULL, &start)) != 0) {
@@ -564,7 +583,12 @@ populate_thread(void *arg)
lprintf(cfg, ret, 0, "Failed inserting");
goto err;
}
- /* Gather statistics */
+ /*
+ * Gather statistics.
+ * We measure the latency of inserting a single key. If there
+ * are multiple tables, it is the time for insertion into all
+ * of them.
+ */
if (measure_latency) {
if ((ret = __wt_epoch(NULL, &stop)) != 0) {
lprintf(cfg, ret, 0,
@@ -601,8 +625,10 @@ populate_thread(void *arg)
/* Notify our caller we failed and shut the system down. */
if (0) {
-err: g_error = g_stop = 1;
+err: cfg->error = cfg->stop = 1;
}
+ if (cursors != NULL)
+ free(cursors);
return (NULL);
}
@@ -614,16 +640,16 @@ monitor(void *arg)
struct tm *tm, _tm;
CONFIG *cfg;
FILE *fp;
- char buf[64], *path;
- int ret;
+ size_t len;
uint64_t reads, inserts, updates;
uint64_t cur_reads, cur_inserts, cur_updates;
uint64_t last_reads, last_inserts, last_updates;
uint32_t read_avg, read_min, read_max;
uint32_t insert_avg, insert_min, insert_max;
uint32_t update_avg, update_min, update_max;
- size_t len;
u_int i;
+ int ret;
+ char buf[64], *path;
cfg = (CONFIG *)arg;
assert(cfg->sample_interval != 0);
@@ -643,28 +669,32 @@ monitor(void *arg)
}
/* Set line buffering for monitor file. */
(void)setvbuf(fp, NULL, _IOLBF, 0);
-#ifdef __WRITE_A_HEADER
fprintf(fp,
"#time,"
- "read operations,insert operations,update operations,"
+ "totalsec,"
+ "read ops per second,"
+ "insert ops per second,"
+ "update ops per second,"
"checkpoints,"
- "read average latency(NS),read minimum latency(NS),"
- "read maximum latency(NS),"
- "insert average latency(NS),insert min latency(NS),"
- "insert maximum latency(NS),"
- "update average latency(NS),update min latency(NS),"
- "update maximum latency(NS)"
+ "read average latency(uS),"
+ "read minimum latency(uS),"
+ "read maximum latency(uS),"
+ "insert average latency(uS),"
+ "insert min latency(uS),"
+ "insert maximum latency(uS),"
+ "update average latency(uS),"
+ "update min latency(uS),"
+ "update maximum latency(uS)"
"\n");
-#endif
last_reads = last_inserts = last_updates = 0;
- while (!g_stop) {
+ while (!cfg->stop) {
for (i = 0; i < cfg->sample_interval; i++) {
sleep(1);
- if (g_stop)
+ if (cfg->stop)
break;
}
/* If the workers are done, don't bother with a final call. */
- if (g_stop)
+ if (cfg->stop)
break;
if ((ret = __wt_epoch(NULL, &t)) != 0) {
@@ -694,18 +724,18 @@ monitor(void *arg)
cur_inserts = inserts - last_inserts;
(void)fprintf(fp,
- "%s"
+ "%s,%" PRIu32
",%" PRIu64 ",%" PRIu64 ",%" PRIu64
",%c"
",%" PRIu32 ",%" PRIu32 ",%" PRIu32
",%" PRIu32 ",%" PRIu32 ",%" PRIu32
",%" PRIu32 ",%" PRIu32 ",%" PRIu32
"\n",
- buf,
+ buf, cfg->totalsec,
cur_reads / cfg->sample_interval,
cur_inserts / cfg->sample_interval,
cur_updates / cfg->sample_interval,
- g_ckpt ? 'Y' : 'N',
+ cfg->ckpt ? 'Y' : 'N',
read_avg, read_min, read_max,
insert_avg, insert_min, insert_max,
update_avg, update_min, update_max);
@@ -717,7 +747,7 @@ monitor(void *arg)
/* Notify our caller we failed and shut the system down. */
if (0) {
-err: g_error = g_stop = 1;
+err: cfg->error = cfg->stop = 1;
}
if (fp != NULL)
@@ -750,27 +780,27 @@ checkpoint_worker(void *arg)
goto err;
}
- while (!g_stop) {
+ while (!cfg->stop) {
/* Break the sleep up, so we notice interrupts faster. */
for (i = 0; i < cfg->checkpoint_interval; i++) {
sleep(1);
- if (g_stop)
+ if (cfg->stop)
break;
}
/* If the workers are done, don't bother with a final call. */
- if (g_stop)
+ if (cfg->stop)
break;
if ((ret = __wt_epoch(NULL, &s)) != 0) {
lprintf(cfg, ret, 0, "Get time failed in checkpoint.");
goto err;
}
- g_ckpt = 1;
+ cfg->ckpt = 1;
if ((ret = session->checkpoint(session, NULL)) != 0) {
lprintf(cfg, ret, 0, "Checkpoint failed.");
goto err;
}
- g_ckpt = 0;
+ cfg->ckpt = 0;
++thread->ckpt.ops;
if ((ret = __wt_epoch(NULL, &e)) != 0) {
@@ -788,7 +818,7 @@ checkpoint_worker(void *arg)
/* Notify our caller we failed and shut the system down. */
if (0) {
-err: g_error = g_stop = 1;
+err: cfg->error = cfg->stop = 1;
}
return (NULL);
@@ -797,13 +827,14 @@ err: g_error = g_stop = 1;
static int
execute_populate(CONFIG *cfg)
{
+ struct timespec start, stop;
CONFIG_THREAD *popth;
WT_SESSION *session;
- struct timespec start, stop;
double secs;
+ size_t i;
uint64_t last_ops;
- uint32_t interval, total;
- int elapsed, ret;
+ uint32_t interval;
+ int elapsed, ret, t_ret;
session = NULL;
lprintf(cfg, 0, 1,
@@ -816,14 +847,14 @@ execute_populate(CONFIG *cfg)
cfg->popthreads, cfg->populate_threads, populate_thread)) != 0)
return (ret);
- g_insert_key = 0;
+ cfg->insert_key = 0;
if ((ret = __wt_epoch(NULL, &start)) != 0) {
lprintf(cfg, ret, 0, "Get time failed in populate.");
return (ret);
}
- for (elapsed = 0, interval = 0, last_ops = 0, total = 0;
- g_insert_key < cfg->icount && g_error == 0;) {
+ for (elapsed = 0, interval = 0, last_ops = 0;
+ cfg->insert_key < cfg->icount && cfg->error == 0;) {
/*
* Sleep for 100th of a second, report_interval is in second
* granularity, each 100th increment of elapsed is a single
@@ -836,14 +867,14 @@ execute_populate(CONFIG *cfg)
if (++interval < cfg->report_interval)
continue;
interval = 0;
- total += cfg->report_interval;
- g_insert_ops = sum_pop_ops(cfg);
+ cfg->totalsec += cfg->report_interval;
+ cfg->insert_ops = sum_pop_ops(cfg);
lprintf(cfg, 0, 1,
"%" PRIu64 " populate inserts (%" PRIu64 " of %"
PRIu32 ") in %" PRIu32 " secs (%" PRIu32 " total secs)",
- g_insert_ops - last_ops, g_insert_ops,
- cfg->icount, cfg->report_interval, total);
- last_ops = g_insert_ops;
+ cfg->insert_ops - last_ops, cfg->insert_ops,
+ cfg->icount, cfg->report_interval, cfg->totalsec);
+ last_ops = cfg->insert_ops;
}
if ((ret = __wt_epoch(NULL, &stop)) != 0) {
lprintf(cfg, ret, 0, "Get time failed in populate.");
@@ -864,7 +895,7 @@ execute_populate(CONFIG *cfg)
return (ret);
/* Report if any worker threads didn't finish. */
- if (g_error != 0) {
+ if (cfg->error != 0) {
lprintf(cfg, WT_ERROR, 0,
"Populate thread(s) exited without finishing.");
return (WT_ERROR);
@@ -879,7 +910,9 @@ execute_populate(CONFIG *cfg)
"Load time: %.2f\n" "load ops/sec: %.2f", secs, cfg->icount / secs);
/*
- * If configured, compact to allow LSM merging to complete.
+ * If configured, compact to allow LSM merging to complete. We
+ * set an unlimited timeout because if we close the connection
+ * then any in-progress compact/merge is aborted.
*/
if (cfg->compact) {
if ((ret = cfg->conn->open_session(
@@ -889,16 +922,34 @@ execute_populate(CONFIG *cfg)
return (ret);
}
lprintf(cfg, 0, 1, "Compact after populate");
- if ((ret = session->compact(session, cfg->uri, NULL)) != 0) {
- lprintf(cfg, ret, 0,
- "execute_populate: WT_SESSION.compact");
- return (ret);
+ if ((ret = __wt_epoch(NULL, &start)) != 0) {
+ lprintf(cfg, ret, 0, "Get time failed in populate.");
+ goto err;
}
- if ((ret = session->close(session, NULL)) != 0) {
- lprintf(cfg, ret, 0,
+ /*
+ * We measure how long it takes to compact all tables for this
+ * workload.
+ */
+ for (i = 0; i < cfg->table_count; i++)
+ if ((ret = session->compact(
+ session, cfg->uris[i], "timeout=0")) != 0) {
+ lprintf(cfg, ret, 0,
+ "execute_populate: WT_SESSION.compact");
+ goto err;
+ }
+ if ((ret = __wt_epoch(NULL, &stop)) != 0) {
+ lprintf(cfg, ret, 0, "Get time failed in populate.");
+ goto err;
+ }
+ secs = stop.tv_sec + stop.tv_nsec / (double)BILLION;
+ secs -= start.tv_sec + start.tv_nsec / (double)BILLION;
+ lprintf(cfg, 0, 1, "Compact completed in %.2f seconds", secs);
+err:
+ if ((t_ret = session->close(session, NULL)) != 0)
+ lprintf(cfg, t_ret, 0,
"execute_populate: WT_SESSION.close");
+ if (ret != 0 || (ret = t_ret) != 0)
return (ret);
- }
}
/*
@@ -926,12 +977,12 @@ execute_workload(CONFIG *cfg)
CONFIG_THREAD *threads;
WORKLOAD *workp;
uint64_t last_ckpts, last_inserts, last_reads, last_updates;
- uint32_t interval, run_ops, run_time, total;
+ uint32_t interval, run_ops, run_time;
u_int i;
int ret, t_ret;
- g_insert_key = 0;
- g_insert_ops = g_read_ops = g_update_ops = 0;
+ cfg->insert_key = 0;
+ cfg->insert_ops = cfg->read_ops = cfg->update_ops = 0;
last_ckpts = last_inserts = last_reads = last_updates = 0;
ret = 0;
@@ -963,8 +1014,8 @@ execute_workload(CONFIG *cfg)
threads += workp->threads;
}
- for (interval = cfg->report_interval, total = 0,
- run_time = cfg->run_time, run_ops = cfg->run_ops; g_error == 0;) {
+ for (interval = cfg->report_interval, run_time = cfg->run_time,
+ run_ops = cfg->run_ops; cfg->error == 0;) {
/*
* Sleep for one second at a time.
* If we are tracking run time, check to see if we're done, and
@@ -979,46 +1030,46 @@ execute_workload(CONFIG *cfg)
}
/* Sum the operations we've done. */
- g_ckpt_ops = sum_ckpt_ops(cfg);
- g_insert_ops = sum_insert_ops(cfg);
- g_read_ops = sum_read_ops(cfg);
- g_update_ops = sum_update_ops(cfg);
+ cfg->ckpt_ops = sum_ckpt_ops(cfg);
+ cfg->insert_ops = sum_insert_ops(cfg);
+ cfg->read_ops = sum_read_ops(cfg);
+ cfg->update_ops = sum_update_ops(cfg);
/* If we're checking total operations, see if we're done. */
- if (run_ops != 0 &&
- run_ops <= g_insert_ops + g_read_ops + g_update_ops)
+ if (run_ops != 0 && run_ops <=
+ cfg->insert_ops + cfg->read_ops + cfg->update_ops)
break;
/* If writing out throughput information, see if it's time. */
if (interval == 0 || --interval > 0)
continue;
interval = cfg->report_interval;
- total += cfg->report_interval;
+ cfg->totalsec += cfg->report_interval;
lprintf(cfg, 0, 1,
"%" PRIu64 " reads, %" PRIu64 " inserts, %" PRIu64
" updates, %" PRIu64 " checkpoints in %" PRIu32
" secs (%" PRIu32 " total secs)",
- g_read_ops - last_reads,
- g_insert_ops - last_inserts,
- g_update_ops - last_updates,
- g_ckpt_ops - last_ckpts,
- cfg->report_interval, total);
- last_reads = g_read_ops;
- last_inserts = g_insert_ops;
- last_updates = g_update_ops;
- last_ckpts = g_ckpt_ops;
+ cfg->read_ops - last_reads,
+ cfg->insert_ops - last_inserts,
+ cfg->update_ops - last_updates,
+ cfg->ckpt_ops - last_ckpts,
+ cfg->report_interval, cfg->totalsec);
+ last_reads = cfg->read_ops;
+ last_inserts = cfg->insert_ops;
+ last_updates = cfg->update_ops;
+ last_ckpts = cfg->ckpt_ops;
}
/* Notify the worker threads they are done. */
-err: g_stop = 1;
+err: cfg->stop = 1;
if ((t_ret = stop_threads(
cfg, (u_int)cfg->workers_cnt, cfg->workers)) != 0 && ret == 0)
ret = t_ret;
/* Report if any worker threads didn't finish. */
- if (g_error != 0) {
+ if (cfg->error != 0) {
lprintf(cfg, WT_ERROR, 0,
"Worker thread(s) exited without finishing.");
if (ret == 0)
@@ -1037,235 +1088,249 @@ find_table_count(CONFIG *cfg)
WT_CONNECTION *conn;
WT_CURSOR *cursor;
WT_SESSION *session;
- char *key;
+ uint32_t i, max_icount, table_icount;
int ret, t_ret;
+ char *key;
conn = cfg->conn;
+ max_icount = 0;
if ((ret = conn->open_session(
conn, NULL, cfg->sess_config, &session)) != 0) {
lprintf(cfg, ret, 0,
- "open_session failed finding existing table count");
- goto err;
- }
- if ((ret = session->open_cursor(session, cfg->uri,
- NULL, NULL, &cursor)) != 0) {
- lprintf(cfg, ret, 0,
- "open_cursor failed finding existing table count");
- goto err;
- }
- if ((ret = cursor->prev(cursor)) != 0) {
- lprintf(cfg, ret, 0,
- "cursor prev failed finding existing table count");
- goto err;
- }
- if ((ret = cursor->get_key(cursor, &key)) != 0) {
- lprintf(cfg, ret, 0,
- "cursor get_key failed finding existing table count");
- goto err;
+ "find_table_count: open_session failed");
+ goto out;
}
- cfg->icount = (uint32_t)atoi(key);
+ for (i = 0; i < cfg->table_count; i++) {
+ if ((ret = session->open_cursor(session, cfg->uris[i],
+ NULL, NULL, &cursor)) != 0) {
+ lprintf(cfg, ret, 0,
+ "find_table_count: open_cursor failed");
+ goto err;
+ }
+ if ((ret = cursor->prev(cursor)) != 0) {
+ lprintf(cfg, ret, 0,
+ "find_table_count: cursor prev failed");
+ goto err;
+ }
+ if ((ret = cursor->get_key(cursor, &key)) != 0) {
+ lprintf(cfg, ret, 0,
+ "find_table_count: cursor get_key failed");
+ goto err;
+ }
+ table_icount = (uint32_t)atoi(key);
+ if (table_icount > max_icount)
+ max_icount = table_icount;
+ if ((ret = cursor->close(cursor)) != 0) {
+ lprintf(cfg, ret, 0,
+ "find_table_count: cursor close failed");
+ goto err;
+ }
+ }
err: if ((t_ret = session->close(session, NULL)) != 0) {
if (ret == 0)
ret = t_ret;
lprintf(cfg, ret, 0,
- "session close failed finding existing table count");
+ "find_table_count: session close failed");
}
- return (ret);
+ cfg->icount = max_icount;
+out: return (ret);
}
-int
-main(int argc, char *argv[])
+/*
+ * Populate the uri array if more than one table is being used.
+ */
+static int
+create_uris(CONFIG *cfg)
{
- CONFIG *cfg, _cfg;
- WT_SESSION *session;
- pthread_t monitor_thread;
- size_t len;
- uint64_t req_len, total_ops;
- int ch, monitor_created, monitor_set, ret, t_ret;
- const char *helium_mount;
- const char *opts = "C:H:h:LMm:O:o:ST:";
- const char *wtperftmp_subdir = "wtperftmp";
- const char *user_cconfig, *user_tconfig;
- char *cmd, *cc_buf, *tc_buf, *tmphome;
-
- session = NULL;
- monitor_created = monitor_set = ret = 0;
- helium_mount = user_cconfig = user_tconfig = NULL;
- cmd = cc_buf = tc_buf = tmphome = NULL;
+ size_t base_uri_len;
+ uint32_t i;
+ int ret;
+ char *uri;
- /* Setup the default configuration values. */
- cfg = &_cfg;
- memset(cfg, 0, sizeof(*cfg));
- if (config_assign(cfg, &default_cfg))
+ ret = 0;
+ base_uri_len = strlen(cfg->base_uri);
+ cfg->uris = (char **)calloc(cfg->table_count, sizeof(char *));
+ if (cfg->uris == NULL) {
+ ret = ENOMEM;
goto err;
-
- /* Do a basic validation of options, and home is needed before open. */
- while ((ch = getopt(argc, argv, opts)) != EOF)
- switch (ch) {
- case 'h':
- cfg->home = optarg;
- break;
- case 'm':
- cfg->monitor_dir = optarg;
- monitor_set = 1;
- break;
- case '?':
- fprintf(stderr, "Invalid option\n");
- usage();
- goto einval;
+ }
+ for (i = 0; i < cfg->table_count; i++) {
+ uri = cfg->uris[i] = (char *)calloc(base_uri_len + 3, 1);
+ if (uri == NULL) {
+ ret = ENOMEM;
+ goto err;
}
+ memcpy(uri, cfg->base_uri, base_uri_len);
+ /*
+ * If there is only one table, just use base name.
+ */
+ if (cfg->table_count > 1) {
+ uri[base_uri_len] = uri[base_uri_len + 1] = '0';
+ uri[base_uri_len] = '0' + (i / 10);
+ uri[base_uri_len + 1] = '0' + (i % 10);
+ }
+ }
+err: if (ret != 0 && cfg->uris != NULL) {
+ for (i = 0; i < cfg->table_count; i++)
+ free(cfg->uris[i]);
+ free(cfg->uris);
+ cfg->uris = NULL;
+ }
+ return (ret);
+}
- /*
- * If the user did not specify a monitor directory
- * then set the monitor directory to the home dir.
- */
- if (!monitor_set)
- cfg->monitor_dir = cfg->home;
+static int
+create_tables(CONFIG *cfg)
+{
+ WT_SESSION *session;
+ size_t i;
+ int ret;
+ char *uri;
- /*
- * Create a temporary directory underneath the test directory in which
- * we do an initial WiredTiger open, because we need a connection in
- * order to use the extension configuration parser. We will open the
- * real WiredTiger database after parsing the options.
- */
- len = strlen(cfg->home) + strlen(wtperftmp_subdir) + 2;
- if ((tmphome = malloc(len)) == NULL) {
- ret = enomem(cfg);
- goto err;
- }
- snprintf(tmphome, len, "%s/%s", cfg->home, wtperftmp_subdir);
- len = len * 2 + 100;
- if ((cmd = malloc(len)) == NULL) {
- ret = enomem(cfg);
- goto err;
+ session = NULL;
+ if (cfg->create == 0)
+ return (0);
+
+ uri = cfg->base_uri;
+ if ((ret = cfg->conn->open_session(
+ cfg->conn, NULL, cfg->sess_config, &session)) != 0) {
+ lprintf(cfg, ret, 0,
+ "Error opening a session on %s", cfg->home);
+ return (ret);
}
- snprintf(cmd, len, "rm -rf %s && mkdir %s", tmphome, tmphome);
- if (system(cmd) != 0) {
- fprintf(stderr, "%s: failed\n", cmd);
- goto einval;
+ for (i = 0; i < cfg->table_count; i++) {
+ uri = cfg->uris[i];
+ if ((ret = session->create(
+ session, uri, cfg->table_config)) != 0) {
+ lprintf(cfg, ret, 0,
+ "Error creating table %s", cfg->uris[i]);
+ return (ret);
+ }
}
- if ((ret = wiredtiger_open(
- tmphome, NULL, "create", &cfg->conn)) != 0) {
- lprintf(cfg, ret, 0, "wiredtiger_open: %s", tmphome);
- goto err;
+ if ((ret = session->close(session, NULL)) != 0) {
+ lprintf(cfg,
+ ret, 0, "Error closing session");
+ return (ret);
}
+ return (ret);
+}
- /*
- * Then parse different config structures - other options override
- * fields within the structure.
- */
- optind = 1;
- while ((ch = getopt(argc, argv, opts)) != EOF)
- switch (ch) {
- case 'S':
- if (config_opt_line(cfg, small_config_str) != 0)
- goto einval;
- break;
- case 'M':
- if (config_opt_line(cfg, med_config_str) != 0)
- goto einval;
- break;
- case 'L':
- if (config_opt_line(cfg, large_config_str) != 0)
- goto einval;
- break;
- case 'O':
- if (config_opt_file(cfg, optarg) != 0)
- goto einval;
- break;
- default:
- /* Validation done previously. */
- break;
- }
+static int
+start_all_runs(CONFIG *cfg)
+{
+ CONFIG *next_cfg, **configs;
+ pthread_t *threads;
+ size_t cmd_len, home_len, i;
+ int ret, t_ret;
+ char *cmd_buf, *new_home;
- /* Parse other options */
- optind = 1;
- while ((ch = getopt(argc, argv, opts)) != EOF)
- switch (ch) {
- case 'C':
- user_cconfig = optarg;
- break;
- case 'H':
- helium_mount = optarg;
- break;
- case 'o':
- /* Allow -o key=value */
- if (config_opt_line(cfg, optarg) != 0)
- goto einval;
- break;
- case 'T':
- user_tconfig = optarg;
- break;
- }
+ ret = 0;
+ configs = NULL;
+ cmd_buf = NULL;
- /* Build the URI from the table name. */
- req_len = strlen("table:") +
- strlen(HELIUM_NAME) + strlen(cfg->table_name) + 2;
- if ((cfg->uri = calloc(req_len, 1)) == NULL) {
- ret = enomem(cfg);
+ if (cfg->database_count == 1)
+ return (start_run(cfg));
+
+ /* Allocate an array to hold our config struct copies. */
+ configs = calloc(cfg->database_count, sizeof(CONFIG *));
+ if (configs == NULL)
+ return (ENOMEM);
+
+ /* Allocate an array to hold our thread IDs. */
+ threads = calloc(cfg->database_count, sizeof(pthread_t));
+ if (threads == NULL) {
+ ret = ENOMEM;
goto err;
}
- snprintf(cfg->uri, req_len, "table:%s%s%s",
- helium_mount == NULL ? "" : HELIUM_NAME,
- helium_mount == NULL ? "" : "/",
- cfg->table_name);
-
- if ((ret = setup_log_file(cfg)) != 0)
- goto err;
- /* Make stdout line buffered, so verbose output appears quickly. */
- (void)setvbuf(stdout, NULL, _IOLBF, 0);
+ home_len = strlen(cfg->home);
+ cmd_len = (home_len * 2) + 30; /* Add some slop. */
+ cmd_buf = calloc(cmd_len, 1);
+ if (cmd_buf == NULL) {
+ ret = ENOMEM;
+ goto err;
+ }
+ for (i = 0; i < cfg->database_count; i++) {
+ next_cfg = calloc(1, sizeof(CONFIG));
+ if ((ret = config_assign(next_cfg, cfg)) != 0)
+ goto err;
- /* Concatenate non-default configuration strings. */
- if (cfg->verbose > 1 || user_cconfig != NULL) {
- req_len = strlen(cfg->conn_config) + strlen(debug_cconfig) + 3;
- if (user_cconfig != NULL)
- req_len += strlen(user_cconfig);
- if ((cc_buf = calloc(req_len, 1)) == NULL) {
- ret = enomem(cfg);
+ /* Setup a unique home directory for each database. */
+ configs[i] = next_cfg;
+ new_home = malloc(home_len + 5);
+ if (new_home == NULL) {
+ ret = ENOMEM;
goto err;
}
- snprintf(cc_buf, req_len, "%s%s%s%s%s",
- cfg->conn_config,
- cfg->verbose > 1 ? "," : "",
- cfg->verbose > 1 ? debug_cconfig : "",
- user_cconfig ? "," : "", user_cconfig ? user_cconfig : "");
- if ((ret = config_opt_str(cfg, "conn_config", cc_buf)) != 0)
- goto err;
- }
- if (cfg->verbose > 1 || helium_mount != NULL || user_tconfig != NULL) {
- req_len = strlen(cfg->table_config) +
- strlen(HELIUM_CONFIG) + strlen(debug_tconfig) + 3;
- if (user_tconfig != NULL)
- req_len += strlen(user_tconfig);
- if ((tc_buf = calloc(req_len, 1)) == NULL) {
- ret = enomem(cfg);
+ sprintf(new_home, "%s/D%02d", cfg->home, (int)i);
+ next_cfg->home = (const char *)new_home;
+
+ /* If the monitor dir is default, update it too. */
+ if (strcmp(cfg->monitor_dir, cfg->home) == 0)
+ next_cfg->monitor_dir = new_home;
+
+ /* Create clean home directories. */
+ snprintf(cmd_buf, cmd_len, "rm -rf %s && mkdir %s",
+ next_cfg->home, next_cfg->home);
+ if ((ret = system(cmd_buf)) != 0) {
+ fprintf(stderr, "%s: failed\n", cmd_buf);
goto err;
}
- snprintf(tc_buf, req_len, "%s%s%s%s%s%s",
- cfg->table_config,
- cfg->verbose > 1 ? "," : "",
- cfg->verbose > 1 ? debug_tconfig : "",
- user_tconfig ? "," : "", user_tconfig ? user_tconfig : "",
- helium_mount == NULL ? "" : HELIUM_CONFIG);
- if ((ret = config_opt_str(cfg, "table_config", tc_buf)) != 0)
+ if ((ret = pthread_create(
+ &threads[i], NULL, thread_run_wtperf, next_cfg)) != 0) {
+ lprintf(cfg, ret, 0, "Error creating thread");
goto err;
+ }
}
- ret = cfg->conn->close(cfg->conn, NULL);
- cfg->conn = NULL;
- if (ret != 0) {
- lprintf(cfg, ret, 0, "WT_CONNECTION.close: %s", tmphome);
- goto err;
+ /* Wait for threads to finish. */
+ for (i = 0; i < cfg->database_count; i++) {
+ if ((t_ret = pthread_join(threads[i], NULL)) != 0) {
+ lprintf(cfg, ret, 0, "Error joining thread");
+ if (ret == 0)
+ ret = t_ret;
+ }
}
- /* Sanity-check the configuration */
- if (config_sanity(cfg) != 0)
- goto err;
- if (cfg->verbose > 1) /* Display the configuration. */
- config_print(cfg);
+err: for (i = 0; i < cfg->database_count && configs[i] != NULL; i++) {
+ free((char *)configs[i]->home);
+ config_free(configs[i]);
+ free(configs[i]);
+ }
+ free(configs);
+ free(threads);
+ free(cmd_buf);
+
+ return (ret);
+}
+
+/* Run an instance of wtperf for a given configuration. */
+static void *
+thread_run_wtperf(void *arg)
+{
+ CONFIG *cfg;
+ int ret;
+
+ cfg = (CONFIG *)arg;
+ if ((ret = start_run(cfg)) != 0)
+ lprintf(cfg, ret, 0, "Run failed for: %s.", cfg->home);
+ return (NULL);
+}
+
+static int
+start_run(CONFIG *cfg)
+{
+ pthread_t monitor_thread;
+ uint64_t total_ops;
+ int monitor_created, ret, t_ret;
+ char helium_buf[256];
+
+ monitor_created = ret = 0;
+
+ if ((ret = setup_log_file(cfg)) != 0)
+ goto err;
if ((ret = wiredtiger_open( /* Open the real connection. */
cfg->home, NULL, cfg->conn_config, &cfg->conn)) != 0) {
@@ -1273,40 +1338,25 @@ main(int argc, char *argv[])
goto err;
}
- if (helium_mount != NULL) { /* Configure optional Helium volume. */
- char helium_buf[256];
+ /* Configure optional Helium volume. */
+ if (cfg->helium_mount != NULL) {
snprintf(helium_buf, sizeof(helium_buf),
"entry=wiredtiger_extension_init,config=["
"%s=[helium_devices=\"he://./%s\","
"helium_o_volume_truncate=1]]",
- HELIUM_NAME, helium_mount);
+ HELIUM_NAME, cfg->helium_mount);
if ((ret = cfg->conn->load_extension(
cfg->conn, HELIUM_PATH, helium_buf)) != 0)
lprintf(cfg,
ret, 0, "Error loading Helium: %s", helium_buf);
}
- if (cfg->create != 0) { /* If creating, create the table. */
- if ((ret = cfg->conn->open_session(
- cfg->conn, NULL, cfg->sess_config, &session)) != 0) {
- lprintf(cfg, ret, 0,
- "Error opening a session on %s", cfg->home);
- goto err;
- }
- if ((ret = session->create(
- session, cfg->uri, cfg->table_config)) != 0) {
- lprintf(cfg,
- ret, 0, "Error creating table %s", cfg->uri);
- goto err;
- }
- if ((ret = session->close(session, NULL)) != 0) {
- lprintf(cfg,
- ret, 0, "Error closing session");
- goto err;
- }
- session = NULL;
- }
- /* Start the monitor thread. */
+ if ((ret = create_uris(cfg)) != 0)
+ goto err;
+ if ((ret = create_tables(cfg)) != 0)
+ goto err;
+
+ /* Start the monitor thread. */
if (cfg->sample_interval != 0) {
if ((ret = pthread_create(
&monitor_thread, NULL, monitor, cfg)) != 0) {
@@ -1316,15 +1366,17 @@ main(int argc, char *argv[])
}
monitor_created = 1;
}
- /* If creating, populate the table. */
+
+ /* If creating, populate the table. */
if (cfg->create != 0 && execute_populate(cfg) != 0)
goto err;
- /* Optional workload. */
+
+ /* Optional workload. */
if (cfg->run_time != 0 || cfg->run_ops != 0) {
- /* Didn't create, set insert count. */
+ /* Didn't create, set insert count. */
if (cfg->create == 0 && find_table_count(cfg) != 0)
goto err;
- /* Start the checkpoint thread. */
+ /* Start the checkpoint thread. */
if (cfg->checkpoint_threads != 0) {
lprintf(cfg, 0, 1,
"Starting %" PRIu32 " checkpoint thread(s)",
@@ -1339,47 +1391,46 @@ main(int argc, char *argv[])
cfg->checkpoint_threads, checkpoint_worker) != 0)
goto err;
}
- /* Execute the workload. */
+ /* Execute the workload. */
if ((ret = execute_workload(cfg)) != 0)
goto err;
/* One final summation of the operations we've completed. */
- g_read_ops = sum_read_ops(cfg);
- g_insert_ops = sum_insert_ops(cfg);
- g_update_ops = sum_update_ops(cfg);
- g_ckpt_ops = sum_ckpt_ops(cfg);
- total_ops = g_read_ops + g_insert_ops + g_update_ops;
+ cfg->read_ops = sum_read_ops(cfg);
+ cfg->insert_ops = sum_insert_ops(cfg);
+ cfg->update_ops = sum_update_ops(cfg);
+ cfg->ckpt_ops = sum_ckpt_ops(cfg);
+ total_ops = cfg->read_ops + cfg->insert_ops + cfg->update_ops;
lprintf(cfg, 0, 1,
"Executed %" PRIu64 " read operations (%" PRIu64
"%%) %" PRIu64 " ops/sec",
- g_read_ops, (g_read_ops * 100) / total_ops,
- g_read_ops / cfg->run_time);
+ cfg->read_ops, (cfg->read_ops * 100) / total_ops,
+ cfg->read_ops / cfg->run_time);
lprintf(cfg, 0, 1,
"Executed %" PRIu64 " insert operations (%" PRIu64
"%%) %" PRIu64 " ops/sec",
- g_insert_ops, (g_insert_ops * 100) / total_ops,
- g_insert_ops / cfg->run_time);
+ cfg->insert_ops, (cfg->insert_ops * 100) / total_ops,
+ cfg->insert_ops / cfg->run_time);
lprintf(cfg, 0, 1,
"Executed %" PRIu64 " update operations (%" PRIu64
"%%) %" PRIu64 " ops/sec",
- g_update_ops, (g_update_ops * 100) / total_ops,
- g_update_ops / cfg->run_time);
+ cfg->update_ops, (cfg->update_ops * 100) / total_ops,
+ cfg->update_ops / cfg->run_time);
lprintf(cfg, 0, 1,
"Executed %" PRIu64 " checkpoint operations",
- g_ckpt_ops);
+ cfg->ckpt_ops);
latency_print(cfg);
}
if (0) {
-einval: ret = EINVAL;
err: if (ret == 0)
ret = EXIT_FAILURE;
}
/* Notify the worker threads they are done. */
- g_stop = 1;
+ cfg->stop = 1;
if ((t_ret = stop_threads(cfg, 1, cfg->ckptthreads)) != 0)
if (ret == 0)
@@ -1415,12 +1466,165 @@ err: if (ret == 0)
if ((t_ret = fclose(cfg->logf)) != 0 && ret == 0)
ret = t_ret;
}
- config_free(cfg);
+ return (ret);
+}
+int
+main(int argc, char *argv[])
+{
+ CONFIG *cfg, _cfg;
+ size_t req_len;
+ int ch, monitor_set, ret;
+ char *cc_buf, *tc_buf;
+ const char *opts = "C:H:h:m:O:o:T:";
+ const char *config_opts, *user_cconfig, *user_tconfig;
+
+ monitor_set = ret = 0;
+ cc_buf = tc_buf = NULL;
+ config_opts = user_cconfig = user_tconfig = NULL;
+
+ /* Setup the default configuration values. */
+ cfg = &_cfg;
+ memset(cfg, 0, sizeof(*cfg));
+ if (config_assign(cfg, &default_cfg))
+ goto err;
+
+ /* Do a basic validation of options, and home is needed before open. */
+ while ((ch = getopt(argc, argv, opts)) != EOF)
+ switch (ch) {
+ case 'C':
+ user_cconfig = optarg;
+ break;
+ case 'H':
+ cfg->helium_mount = optarg;
+ break;
+ case 'O':
+ config_opts = optarg;
+ break;
+ case 'T':
+ user_tconfig = optarg;
+ break;
+ case 'h':
+ cfg->home = optarg;
+ break;
+ case 'm':
+ cfg->monitor_dir = optarg;
+ monitor_set = 1;
+ break;
+ case '?':
+ fprintf(stderr, "Invalid option\n");
+ usage();
+ goto einval;
+ }
+
+ /*
+ * If the user did not specify a monitor directory then set the
+ * monitor directory to the home dir.
+ */
+ if (!monitor_set)
+ cfg->monitor_dir = cfg->home;
+
+ /* Parse configuration settings from configuration file. */
+ if (config_opts != NULL && config_opt_file(cfg, config_opts) != 0)
+ goto einval;
+
+ /* Parse options that override values set via a configuration file. */
+ optind = 1;
+ while ((ch = getopt(argc, argv, opts)) != EOF)
+ switch (ch) {
+ case 'o':
+ /* Allow -o key=value */
+ if (config_opt_line(cfg, optarg) != 0)
+ goto einval;
+ break;
+ }
+
+ if ((ret = config_compress(cfg)) != 0)
+ goto err;
+
+ /* Build the URI from the table name. */
+ req_len = strlen("table:") +
+ strlen(HELIUM_NAME) + strlen(cfg->table_name) + 2;
+ if ((cfg->base_uri = calloc(req_len, 1)) == NULL) {
+ ret = enomem(cfg);
+ goto err;
+ }
+ snprintf(cfg->base_uri, req_len, "table:%s%s%s",
+ cfg->helium_mount == NULL ? "" : HELIUM_NAME,
+ cfg->helium_mount == NULL ? "" : "/",
+ cfg->table_name);
+
+ /* Make stdout line buffered, so verbose output appears quickly. */
+ (void)setvbuf(stdout, NULL, _IOLBF, 0);
+
+ /* Concatenate non-default configuration strings. */
+ if (cfg->verbose > 1 || user_cconfig != NULL ||
+ cfg->compress_ext != NULL) {
+ req_len = strlen(cfg->conn_config) + strlen(debug_cconfig) + 3;
+ if (user_cconfig != NULL)
+ req_len += strlen(user_cconfig);
+ if (cfg->compress_ext != NULL)
+ req_len += strlen(cfg->compress_ext);
+ if ((cc_buf = calloc(req_len, 1)) == NULL) {
+ ret = enomem(cfg);
+ goto err;
+ }
+ /*
+ * This is getting hard to parse.
+ */
+ snprintf(cc_buf, req_len, "%s%s%s%s%s%s",
+ cfg->conn_config,
+ cfg->compress_ext ? cfg->compress_ext : "",
+ cfg->verbose > 1 ? ",": "",
+ cfg->verbose > 1 ? debug_cconfig : "",
+ user_cconfig ? ",": "",
+ user_cconfig ? user_cconfig : "");
+ if ((ret = config_opt_str(cfg, "conn_config", cc_buf)) != 0)
+ goto err;
+ }
+ if (cfg->verbose > 1 || cfg->helium_mount != NULL ||
+ user_tconfig != NULL || cfg->compress_table != NULL) {
+ req_len = strlen(cfg->table_config) + strlen(HELIUM_CONFIG) +
+ strlen(debug_tconfig) + 3;
+ if (user_tconfig != NULL)
+ req_len += strlen(user_tconfig);
+ if (cfg->compress_table != NULL)
+ req_len += strlen(cfg->compress_table);
+ if ((tc_buf = calloc(req_len, 1)) == NULL) {
+ ret = enomem(cfg);
+ goto err;
+ }
+ /*
+ * This is getting hard to parse.
+ */
+ snprintf(tc_buf, req_len, "%s%s%s%s%s%s%s",
+ cfg->table_config,
+ cfg->compress_table ? cfg->compress_table : "",
+ cfg->verbose > 1 ? ",": "",
+ cfg->verbose > 1 ? debug_tconfig : "",
+ user_tconfig ? ",": "",
+ user_tconfig ? user_tconfig : "",
+ cfg->helium_mount == NULL ? "" : HELIUM_CONFIG);
+ if ((ret = config_opt_str(cfg, "table_config", tc_buf)) != 0)
+ goto err;
+ }
+
+ /* Sanity-check the configuration. */
+ if (config_sanity(cfg) != 0)
+ goto err;
+
+ /* Display the configuration. */
+ if (cfg->verbose > 1)
+ config_print(cfg);
+
+ if ((ret = start_all_runs(cfg)) != 0)
+ goto err;
+
+ if (0)
+einval: ret = EINVAL;
+err: config_free(cfg);
free(cc_buf);
- free(cmd);
free(tc_buf);
- free(tmphome);
return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
@@ -1507,7 +1711,7 @@ wtperf_value_range(CONFIG *cfg)
if (cfg->random_range)
return (cfg->icount + cfg->random_range);
- return (cfg->icount + g_insert_key - (u_int)(cfg->workers_cnt + 1));
+ return (cfg->icount + cfg->insert_key - (u_int)(cfg->workers_cnt + 1));
}
static uint64_t
diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h
index 54c4334a2ac..c94ec10d8fd 100644
--- a/bench/wtperf/wtperf.h
+++ b/bench/wtperf/wtperf.h
@@ -48,6 +48,21 @@
typedef struct __config CONFIG;
typedef struct __config_thread CONFIG_THREAD;
+#define EXT_PFX ",extensions=("
+#define EXT_SFX ")"
+#define EXTPATH "../../ext/compressors/" /* Extensions path */
+#define BLKCMP_PFX ",block_compressor="
+
+#define BZIP_BLK BLKCMP_PFX "bzip2"
+#define BZIP_EXT \
+ EXT_PFX EXTPATH "bzip2/.libs/libwiredtiger_bzip2.so" EXT_SFX
+#define SNAPPY_BLK BLKCMP_PFX "snappy"
+#define SNAPPY_EXT \
+ EXT_PFX EXTPATH "snappy/.libs/libwiredtiger_snappy.so" EXT_SFX
+#define ZLIB_BLK BLKCMP_PFX "zlib"
+#define ZLIB_EXT \
+ EXT_PFX EXTPATH "zlib/.libs/libwiredtiger_zlib.so" EXT_SFX
+
typedef struct {
int64_t threads; /* Thread count */
int64_t insert; /* Insert ratio */
@@ -61,15 +76,24 @@ typedef struct {
uint8_t ops[100]; /* Operation schedule */
} WORKLOAD;
+/*
+ * NOTE: If you add any fields to this structure here, you must also add
+ * an initialization in wtperf.c in the default_cfg.
+ */
struct __config { /* Configuration struction */
const char *home; /* WiredTiger home */
const char *monitor_dir; /* Monitor output dir */
- char *uri; /* Object URI */
+ char *base_uri; /* Object URI */
+ char **uris; /* URIs if multiple tables */
+ const char *helium_mount; /* Optional Helium mount point */
WT_CONNECTION *conn; /* Database connection */
FILE *logf; /* Logging handle */
+ const char *compress_ext; /* Compression extension for conn */
+ const char *compress_table; /* Compression arg to table create */
+
CONFIG_THREAD *ckptthreads, *popthreads;
#define WORKLOAD_MAX 50
@@ -79,6 +103,21 @@ struct __config { /* Configuration struction */
WORKLOAD *workload; /* Workloads */
u_int workload_cnt;
+ /* State tracking variables. */
+
+ uint64_t ckpt_ops; /* checkpoint operations */
+ uint64_t insert_ops; /* insert operations */
+ uint64_t read_ops; /* read operations */
+ uint64_t update_ops; /* update operations */
+
+ uint64_t insert_key; /* insert key */
+
+ volatile int ckpt; /* checkpoint in progress */
+ volatile int error; /* thread error */
+ volatile int stop; /* notify threads to stop */
+
+ volatile uint32_t totalsec; /* total seconds running */
+
/* Fields changeable on command line are listed in wtperf_opt.i */
#define OPT_DECLARE_STRUCT
#include "wtperf_opt.i"
@@ -141,8 +180,8 @@ typedef struct {
* Minimum/maximum latency, shared with the monitor thread, that is, the
* monitor thread clears it so it's recalculated again for each period.
*/
- uint32_t min_latency; /* Minimum latency (NS) */
- uint32_t max_latency; /* Maximum latency (NS) */
+ uint32_t min_latency; /* Minimum latency (uS) */
+ uint32_t max_latency; /* Maximum latency (uS) */
/*
* Latency buckets.
@@ -168,6 +207,7 @@ struct __config_thread { /* Per-thread structure */
};
int config_assign(CONFIG *, const CONFIG *);
+int config_compress(CONFIG *);
void config_free(CONFIG *);
int config_opt_file(CONFIG *, const char *);
int config_opt_line(CONFIG *, const char *);
diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i
index 4e11799781f..9ee7072497c 100644
--- a/bench/wtperf/wtperf_opt.i
+++ b/bench/wtperf/wtperf_opt.i
@@ -82,9 +82,18 @@ DEF_OPT_AS_UINT32(checkpoint_threads, 0, "number of checkpoint threads")
DEF_OPT_AS_CONFIG_STRING(conn_config, "create",
"connection configuration string")
DEF_OPT_AS_BOOL(compact, 0, "post-populate compact for LSM merging activity")
+DEF_OPT_AS_STRING(compression, "none",
+ "compression extension. Allowed configuration values are: "
+ "'none' (default), 'bzip', 'snappy', 'zlib'")
DEF_OPT_AS_BOOL(create, 1,
"do population phase; false to use existing database")
-DEF_OPT_AS_UINT32(icount, 5000, "number of records to initially populate")
+DEF_OPT_AS_UINT32(database_count, 1,
+ "number of WiredTiger databases to use. Each database will execute the"
+ " workload using a separate home directory and complete set of worker"
+ " threads")
+DEF_OPT_AS_UINT32(icount, 5000,
+ "number of records to initially populate. If multiple tables are "
+ "configured, each table has this many items inserted.")
DEF_OPT_AS_BOOL(insert_rmw, 0,
"execute a read prior to each insert in workload phase")
DEF_OPT_AS_UINT32(key_sz, 20, "key size")
@@ -114,6 +123,9 @@ DEF_OPT_AS_CONFIG_STRING(table_config,
"key_format=S,value_format=S,type=lsm,exclusive=true,"
"leaf_page_max=4kb,internal_page_max=64kb,allocation_size=4kb,",
"table configuration string")
+DEF_OPT_AS_UINT32(table_count, 1,
+ "number of tables to run operations over. Keys are divided evenly "
+ "over the tables. Default 1, maximum 99.")
DEF_OPT_AS_STRING(threads, "", "workload configuration: each 'count' "
"entry is the total number of threads, and the 'insert', 'read' and "
"'update' entries are the ratios of insert, read and update operations "
diff --git a/build_posix/aclocal/version-set.m4 b/build_posix/aclocal/version-set.m4
index 2f2770d84cf..0c7436fad81 100644
--- a/build_posix/aclocal/version-set.m4
+++ b/build_posix/aclocal/version-set.m4
@@ -2,8 +2,8 @@ dnl build by dist/s_version
VERSION_MAJOR=2
VERSION_MINOR=1
-VERSION_PATCH=0
-VERSION_STRING='"WiredTiger 2.1.0: (February 4, 2014)"'
+VERSION_PATCH=1
+VERSION_STRING='"WiredTiger 2.1.1: (March 4, 2014)"'
AC_SUBST(VERSION_MAJOR)
AC_SUBST(VERSION_MINOR)
diff --git a/build_posix/aclocal/version.m4 b/build_posix/aclocal/version.m4
index b47038041e0..80fe1bb193f 100644
--- a/build_posix/aclocal/version.m4
+++ b/build_posix/aclocal/version.m4
@@ -1,2 +1,2 @@
dnl WiredTiger product version for AC_INIT. Maintained by dist/s_version
-2.1.0
+2.1.1
diff --git a/dist/api_data.py b/dist/api_data.py
index 5509032d148..412f0603efd 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -613,8 +613,13 @@ methods = {
Use \c O_DIRECT to access files. Options are given as a list,
such as <code>"direct_io=[data]"</code>. Configuring
\c direct_io requires care, see @ref
- tuning_system_buffer_cache_direct_io for important warnings''',
- type='list', choices=['data', 'log']),
+ tuning_system_buffer_cache_direct_io for important warnings.
+ Including \c "data" will cause WiredTiger data files to use
+ \c O_DIRECT, including \c "log" will cause WiredTiger log files
+ to use \c O_DIRECT, and including \c "checkpoint" will cause
+ WiredTiger data files opened at a checkpoint (i.e: read only) to
+ use \c O_DIRECT''',
+ type='list', choices=['checkpoint', 'data', 'log']),
Config('extensions', '', r'''
list of shared library extensions to load (using dlopen).
Any values specified to an library extension are passed to
diff --git a/dist/filelist b/dist/filelist
index 87b75d0b7cf..58b5aa2ca92 100644
--- a/dist/filelist
+++ b/dist/filelist
@@ -50,6 +50,7 @@ src/btree/row_key.c
src/btree/row_modify.c
src/btree/row_srch.c
src/config/config.c
+src/config/config_api.c
src/config/config_check.c
src/config/config_collapse.c
src/config/config_concat.c
diff --git a/dist/flags.py b/dist/flags.py
index 7ca1ee3144d..2a6adfccf43 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -15,6 +15,7 @@ flags = {
'SYNC_WRITE_LEAVES',
],
'file_types' : [
+ 'FILE_TYPE_CHECKPOINT',
'FILE_TYPE_DATA',
'FILE_TYPE_LOG'
],
diff --git a/dist/s_funcs.list b/dist/s_funcs.list
index b34564adda7..6ffb956f59e 100644
--- a/dist/s_funcs.list
+++ b/dist/s_funcs.list
@@ -22,6 +22,7 @@ __wt_log_scan
__wt_nlpo2
__wt_nlpo2_round
__wt_print_huffman_code
+wiredtiger_config_parser_open
wiredtiger_pack_int
wiredtiger_pack_item
wiredtiger_pack_str
diff --git a/dist/s_string.ok b/dist/s_string.ok
index a71cca62a8c..5f0d331e105 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -688,6 +688,7 @@ openfile
os
ovfl
packv
+parserp
patchp
pathname
pathnames
diff --git a/dist/s_symbols.list b/dist/s_symbols.list
index a66af1f8994..d3803bc3afa 100644
--- a/dist/s_symbols.list
+++ b/dist/s_symbols.list
@@ -1,4 +1,5 @@
# List of OK external symbols.
+wiredtiger_config_parser_open
wiredtiger_open
wiredtiger_pack_close
wiredtiger_pack_int
diff --git a/dist/stat_data.py b/dist/stat_data.py
index 1007cf71a11..72babeb881a 100644
--- a/dist/stat_data.py
+++ b/dist/stat_data.py
@@ -153,6 +153,9 @@ connection_stats = [
##########################################
# LSM statistics
##########################################
+ Stat('lsm_checkpoint_throttle',
+ 'sleep for LSM checkpoint throttle'),
+ Stat('lsm_merge_throttle', 'sleep for LSM merge throttle'),
Stat('lsm_rows_merged', 'rows merged in an LSM tree'),
##########################################
@@ -244,6 +247,8 @@ dsrc_stats = [
'bloom filter pages evicted from cache'),
Stat('bloom_page_read', 'bloom filter pages read into cache'),
Stat('bloom_size', 'total size of bloom filters', 'no_scale'),
+ Stat('lsm_checkpoint_throttle',
+ 'sleep for LSM checkpoint throttle'),
Stat('lsm_chunk_count',
'chunks in the LSM tree', 'no_aggregate,no_scale'),
Stat('lsm_generation_max',
@@ -252,6 +257,7 @@ dsrc_stats = [
Stat('lsm_lookup_no_bloom',
'queries that could have benefited ' +
'from a Bloom filter that did not exist'),
+ Stat('lsm_merge_throttle', 'sleep for LSM merge throttle'),
##########################################
# Block manager statistics
diff --git a/examples/c/Makefile.am b/examples/c/Makefile.am
index bd281df7130..5b43dcc2285 100644
--- a/examples/c/Makefile.am
+++ b/examples/c/Makefile.am
@@ -6,6 +6,7 @@ noinst_PROGRAMS = \
ex_all \
ex_call_center \
ex_config \
+ ex_config_parse \
ex_cursor \
ex_data_source \
ex_extending \
diff --git a/examples/c/ex_config_parse.c b/examples/c/ex_config_parse.c
new file mode 100644
index 00000000000..c6adc327c78
--- /dev/null
+++ b/examples/c/ex_config_parse.c
@@ -0,0 +1,166 @@
+/*-
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ex_config_parse.c
+ * This is an example demonstrating how to parse WiredTiger compatible
+ * configuration strings.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <wiredtiger.h>
+
+const char *home = NULL;
+
+int main(void)
+{
+ int ret;
+
+ /*! [Create a configuration parser] */
+ WT_CONFIG_ITEM k, v;
+ WT_CONFIG_PARSER *parser;
+ const char *config_string =
+ "path=/dev/loop,page_size=1024,log=(archive=true,file_max=20MB)";
+
+ if ((ret = wiredtiger_config_parser_open(
+ NULL, config_string, strlen(config_string), &parser)) != 0) {
+ fprintf(stderr, "Error creating configuration parser: %s\n",
+ wiredtiger_strerror(ret));
+ return (ret);
+ }
+ if ((ret = parser->close(parser)) != 0) {
+ fprintf(stderr, "Error closing configuration parser: %s\n",
+ wiredtiger_strerror(ret));
+ return (ret);
+ }
+ /*! [Create a configuration parser] */
+
+ if ((ret = wiredtiger_config_parser_open(
+ NULL, config_string, strlen(config_string), &parser)) != 0) {
+ fprintf(stderr, "Error creating configuration parser: %s\n",
+ wiredtiger_strerror(ret));
+ return (ret);
+ }
+
+ {
+ /*! [get] */
+ int64_t my_page_size;
+ /*
+ * Retrieve the value of the integer configuration string "page_size".
+ */
+ if ((ret = parser->get(parser, "page_size", &v)) != 0) {
+ fprintf(stderr,
+ "page_size configuration: %s", wiredtiger_strerror(ret));
+ return (ret);
+ }
+ my_page_size = v.val;
+ /*! [get] */
+
+ ret = parser->close(parser);
+
+ (void)my_page_size;
+ }
+
+ {
+ if ((ret = wiredtiger_config_parser_open(
+ NULL, config_string, strlen(config_string), &parser)) != 0) {
+ fprintf(stderr, "Error creating configuration parser: %s\n",
+ wiredtiger_strerror(ret));
+ return (ret);
+ }
+ /*! [next] */
+ /*
+ * Retrieve and print the values of the configuration strings.
+ */
+ while ((ret = parser->next(parser, &k, &v)) == 0) {
+ printf("%.*s:", (int)k.len, k.str);
+ if (v.type == WT_CONFIG_ITEM_NUM)
+ printf("%d\n", (int)v.val);
+ else
+ printf("%.*s\n", (int)v.len, v.str);
+ }
+ /*! [next] */
+ ret = parser->close(parser);
+ }
+
+ if ((ret = wiredtiger_config_parser_open(
+ NULL, config_string, strlen(config_string), &parser)) != 0) {
+ fprintf(stderr, "Error creating configuration parser: %s\n",
+ wiredtiger_strerror(ret));
+ return (ret);
+ }
+
+ /*! [nested get] */
+ /*
+ * Retrieve the value of the nested log file_max configuration string
+ * using dot shorthand. Utilize the configuration parsing automatic
+ * conversion of value strings into an integer.
+ */
+ v.type = WT_CONFIG_ITEM_NUM;
+ if ((ret = parser->get(parser, "log.file_max", &v)) != 0) {
+ fprintf(stderr,
+ "log.file_max configuration: %s", wiredtiger_strerror(ret));
+ return (ret);
+ }
+ printf("log file max: %d\n", (int)v.val);
+ /*! [nested get] */
+ ret = parser->close(parser);
+
+ if ((ret = wiredtiger_config_parser_open(
+ NULL, config_string, strlen(config_string), &parser)) != 0) {
+ fprintf(stderr, "Error creating configuration parser: %s\n",
+ wiredtiger_strerror(ret));
+ return (ret);
+ }
+ /*! [nested traverse] */
+ {
+ WT_CONFIG_PARSER *sub_parser;
+ while ((ret = parser->next(parser, &k, &v)) == 0) {
+ if (v.type == WT_CONFIG_ITEM_STRUCT) {
+ printf("Found nested configuration: %.*s\n",
+ (int)k.len, k.str);
+ if ((ret = wiredtiger_config_parser_open(
+ NULL, v.str, v.len, &sub_parser)) != 0) {
+ fprintf(stderr,
+ "Error creating nested configuration "
+ "parser: %s\n",
+ wiredtiger_strerror(ret));
+ parser->close(parser);
+ return (ret);
+ }
+ while ((ret = sub_parser->next(
+ sub_parser, &k, &v)) == 0)
+ printf("\t%.*s\n", (int)k.len, k.str);
+ sub_parser->close(sub_parser);
+ }
+ }
+ /*! [nested traverse] */
+ parser->close(parser);
+ }
+
+ return (0);
+}
diff --git a/examples/c/ex_data_source.c b/examples/c/ex_data_source.c
index daedce04075..953bb799340 100644
--- a/examples/c/ex_data_source.c
+++ b/examples/c/ex_data_source.c
@@ -339,30 +339,6 @@ my_open_cursor(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
}
{
- /*! [WT_EXTENSION config_strget] */
- WT_CONFIG_ITEM v;
- int64_t my_data_source_page_size;
-
- /*
- * Retrieve the value of the integer type configuration string
- * "page_size" from a local string (as opposed to the provided
- * WT_CONFIG_ARG reference).
- */
- const char *config_string = "path=/dev/loop,page_size=1024";
-
- if ((ret = wt_api->config_strget(
- wt_api, session, config_string, "page_size", &v)) != 0) {
- (void)wt_api->err_printf(wt_api, session,
- "page_size configuration: %s", wiredtiger_strerror(ret));
- return (ret);
- }
- my_data_source_page_size = v.val;
- /*! [WT_EXTENSION config_strget] */
-
- (void)my_data_source_page_size;
- }
-
- {
/*! [WT_EXTENSION config_get] */
WT_CONFIG_ITEM v;
const char *my_data_source_key;
@@ -392,31 +368,6 @@ my_open_cursor(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
}
{
- /*! [WT_EXTENSION config scan] */
- WT_CONFIG_ITEM k, v;
- WT_CONFIG_SCAN *scan;
-
- /*
- * Retrieve the value of the list type configuration string "paths".
- */
- if ((ret = wt_api->config_get(
- wt_api, session, config, "paths", &v)) != 0) {
- (void)wt_api->err_printf(wt_api, session,
- "paths configuration: %s", wiredtiger_strerror(ret));
- return (ret);
- }
-
- /*
- * Step through the list of entries.
- */
- ret = wt_api->config_scan_begin(wt_api, session, v.str, v.len, &scan);
- while ((ret = wt_api->config_scan_next(wt_api, scan, &k, &v)) == 0)
- printf("%.*s\n", (int)k.len, k.str);
- ret = wt_api->config_scan_end(wt_api, scan);
- /*! [WT_EXTENSION config scan] */
- }
-
- {
/*! [WT_EXTENSION collator config] */
/*
* Configure the appropriate collator.
diff --git a/ext/datasources/helium/helium.c b/ext/datasources/helium/helium.c
index 1239c88befa..cc420c89999 100644
--- a/ext/datasources/helium/helium.c
+++ b/ext/datasources/helium/helium.c
@@ -2098,19 +2098,22 @@ helium_session_open_cursor(WT_DATA_SOURCE *wtds, WT_SESSION *session,
CURSOR *cursor;
DATA_SOURCE *ds;
WT_CONFIG_ITEM v;
+ WT_CONFIG_PARSER *config_parser;
WT_CURSOR *wtcursor;
WT_EXTENSION_API *wtext;
WT_SOURCE *ws;
- int locked, ret = 0;
+ int locked, ret, tret;
const char *value;
*new_cursor = NULL;
+ config_parser = NULL;
cursor = NULL;
ds = (DATA_SOURCE *)wtds;
wtext = ds->wtext;
ws = NULL;
locked = 0;
+ ret = tret = 0;
value = NULL;
/* Allocate and initialize a cursor. */
@@ -2164,23 +2167,28 @@ helium_session_open_cursor(WT_DATA_SOURCE *wtds, WT_SESSION *session,
if ((ret = master_uri_get(wtds, session, uri, &value)) != 0)
goto err;
- if ((ret = wtext->config_strget(
- wtext, session, value, "key_format", &v)) != 0)
+ if ((ret = wtext->config_parser_open(wtext,
+ session, value, strlen(value), &config_parser)) != 0)
+ EMSG_ERR(wtext, session, ret,
+ "Configuration string parser: %s",
+ wtext->strerror(ret));
+ if ((ret = config_parser->get(
+ config_parser, "key_format", &v)) != 0)
EMSG_ERR(wtext, session, ret,
"key_format configuration: %s",
wtext->strerror(ret));
ws->config_recno = v.len == 1 && v.str[0] == 'r';
- if ((ret = wtext->config_strget(
- wtext, session, value, "value_format", &v)) != 0)
+ if ((ret = config_parser->get(
+ config_parser, "value_format", &v)) != 0)
EMSG_ERR(wtext, session, ret,
"value_format configuration: %s",
wtext->strerror(ret));
ws->config_bitfield =
v.len == 2 && isdigit(v.str[0]) && v.str[1] == 't';
- if ((ret = wtext->config_strget(
- wtext, session, value, "helium_o_compress", &v)) != 0)
+ if ((ret = config_parser->get(
+ config_parser, "helium_o_compress", &v)) != 0)
EMSG_ERR(wtext, session, ret,
"helium_o_compress configuration: %s",
wtext->strerror(ret));
@@ -2219,6 +2227,11 @@ err: if (ws != NULL && locked)
ESET(unlock(wtext, session, &ws->lock));
cursor_destroy(cursor);
}
+ if (config_parser != NULL &&
+ (tret = config_parser->close(config_parser)) != 0)
+ EMSG(wtext, session, tret,
+ "WT_CONFIG_PARSER.close: %s", wtext->strerror(tret));
+
free((void *)value);
return (ret);
}
@@ -2882,19 +2895,19 @@ helium_config_read(WT_EXTENSION_API *wtext, WT_CONFIG_ITEM *config,
char **devicep, HE_ENV *envp, int *env_setp, int *flagsp)
{
WT_CONFIG_ITEM k, v;
- WT_CONFIG_SCAN *scan;
+ WT_CONFIG_PARSER *config_parser;
int ret = 0, tret;
*env_setp = 0;
*flagsp = 0;
- /* Set up the scan of the configuration arguments list. */
- if ((ret = wtext->config_scan_begin(
- wtext, NULL, config->str, config->len, &scan)) != 0)
+ /* Traverse the configuration arguments list. */
+ if ((ret = wtext->config_parser_open(
+ wtext, NULL, config->str, config->len, &config_parser)) != 0)
ERET(wtext, NULL, ret,
- "WT_EXTENSION_API.config_scan_begin: %s",
+ "WT_EXTENSION_API.config_parser_open: %s",
wtext->strerror(ret));
- while ((ret = wtext->config_scan_next(wtext, scan, &k, &v)) == 0) {
+ while ((ret = config_parser->next(config_parser, &k, &v)) == 0) {
if (string_match("helium_devices", k.str, k.len)) {
if ((*devicep = calloc(1, v.len + 1)) == NULL)
return (os_errno());
@@ -2924,13 +2937,11 @@ helium_config_read(WT_EXTENSION_API *wtext, WT_CONFIG_ITEM *config,
ret = 0;
if (ret != 0)
EMSG_ERR(wtext, NULL, ret,
- "WT_EXTENSION_API.config_scan_next: %s",
- wtext->strerror(ret));
+ "WT_CONFIG_PARSER.next: %s", wtext->strerror(ret));
-err: if ((tret = wtext->config_scan_end(wtext, scan)) != 0)
+err: if ((tret = config_parser->close(config_parser)) != 0)
EMSG(wtext, NULL, tret,
- "WT_EXTENSION_API.config_scan_end: %s",
- wtext->strerror(tret));
+ "WT_CONFIG_PARSER.close: %s", wtext->strerror(tret));
return (ret);
}
@@ -3320,11 +3331,12 @@ wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
DATA_SOURCE *ds;
HELIUM_SOURCE *hs;
WT_CONFIG_ITEM k, v;
- WT_CONFIG_SCAN *scan;
+ WT_CONFIG_PARSER *config_parser;
WT_EXTENSION_API *wtext;
int vmajor, vminor, ret = 0;
const char **p;
+ config_parser = NULL;
ds = NULL;
wtext = connection->get_extension_api(connection);
@@ -3357,12 +3369,12 @@ wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
wtext->strerror(ret));
/* Step through the list of Helium sources, opening each one. */
- if ((ret =
- wtext->config_scan_begin(wtext, NULL, v.str, v.len, &scan)) != 0)
+ if ((ret = wtext->config_parser_open(
+ wtext, NULL, v.str, v.len, &config_parser)) != 0)
EMSG_ERR(wtext, NULL, ret,
- "WT_EXTENSION_API.config_scan_begin: config: %s",
+ "WT_EXTENSION_API.config_parser_open: config: %s",
wtext->strerror(ret));
- while ((ret = wtext->config_scan_next(wtext, scan, &k, &v)) == 0) {
+ while ((ret = config_parser->next(config_parser, &k, &v)) == 0) {
if (string_match("helium_verbose", k.str, k.len)) {
verbose = v.val == 0 ? 0 : 1;
continue;
@@ -3372,12 +3384,13 @@ wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
}
if (ret != WT_NOTFOUND)
EMSG_ERR(wtext, NULL, ret,
- "WT_EXTENSION_API.config_scan_next: config: %s",
+ "WT_CONFIG_PARSER.next: config: %s",
wtext->strerror(ret));
- if ((ret = wtext->config_scan_end(wtext, scan)) != 0)
+ if ((ret = config_parser->close(config_parser)) != 0)
EMSG_ERR(wtext, NULL, ret,
- "WT_EXTENSION_API.config_scan_end: config: %s",
+ "WT_CONFIG_PARSER.close: config: %s",
wtext->strerror(ret));
+ config_parser = NULL;
/* Find and open the database transaction store. */
if ((ret = helium_source_open_txn(ds)) != 0)
@@ -3414,6 +3427,8 @@ wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
err: if (ds != NULL)
ESET(helium_terminate((WT_DATA_SOURCE *)ds, NULL));
+ if (config_parser != NULL)
+ (void)config_parser->close(config_parser);
return (ret);
}
diff --git a/lang/java/java_doc.i b/lang/java/java_doc.i
index fcb580ddbd4..83404b45508 100644
--- a/lang/java/java_doc.i
+++ b/lang/java/java_doc.i
@@ -40,3 +40,6 @@ COPYDOC(__wt_connection, WT_CONNECTION, add_data_source)
COPYDOC(__wt_connection, WT_CONNECTION, add_collator)
COPYDOC(__wt_connection, WT_CONNECTION, add_compressor)
COPYDOC(__wt_connection, WT_CONNECTION, add_extractor)
+COPYDOC(__wt_config_parser, WT_CONFIG_PARSER, close)
+COPYDOC(__wt_config_parser, WT_CONFIG_PARSER, next)
+COPYDOC(__wt_config_parser, WT_CONFIG_PARSER, get)
diff --git a/lang/python/wiredtiger.i b/lang/python/wiredtiger.i
index 31dc159410b..e5f49fe2c63 100644
--- a/lang/python/wiredtiger.i
+++ b/lang/python/wiredtiger.i
@@ -582,19 +582,21 @@ typedef int int_void;
/* Add event handler support. */
%{
+/* Write to and flush the stream. */
static int
writeToPythonStream(const char *streamname, const char *message)
{
- PyObject *sys, *se, *sys_stderr_write, *written, *arglist;
+ PyObject *sys, *se, *write_method, *flush_method, *written,
+ *arglist, *arglist2;
char *msg;
int ret;
size_t msglen;
sys = NULL;
se = NULL;
- sys_stderr_write = NULL;
+ write_method = flush_method = NULL;
written = NULL;
- arglist = NULL;
+ arglist = arglist2 = NULL;
msglen = strlen(message);
msg = malloc(msglen + 2);
strcpy(msg, message);
@@ -608,21 +610,30 @@ writeToPythonStream(const char *streamname, const char *message)
goto err;
if ((se = PyObject_GetAttrString(sys, streamname)) == NULL)
goto err;
- if ((sys_stderr_write = PyObject_GetAttrString(se, "write")) == NULL)
+ if ((write_method = PyObject_GetAttrString(se, "write")) == NULL)
+ goto err;
+ if ((flush_method = PyObject_GetAttrString(se, "flush")) == NULL)
goto err;
if ((arglist = Py_BuildValue("(s)", msg)) == NULL)
goto err;
+ if ((arglist2 = Py_BuildValue("()", msg)) == NULL)
+ goto err;
- written = PyObject_CallObject(sys_stderr_write, arglist);
+ written = PyObject_CallObject(write_method, arglist);
+ (void)PyObject_CallObject(flush_method, arglist2);
ret = 0;
err: /* Release python Global Interpreter Lock */
SWIG_PYTHON_THREAD_END_BLOCK;
+ if (arglist2)
+ Py_XDECREF(arglist2);
if (arglist)
Py_XDECREF(arglist);
- if (sys_stderr_write)
- Py_XDECREF(sys_stderr_write);
+ if (flush_method)
+ Py_XDECREF(flush_method);
+ if (write_method)
+ Py_XDECREF(write_method);
if (se)
Py_XDECREF(se);
if (sys)
diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c
index 04c24a6a3b6..35201008622 100644
--- a/src/block/block_mgr.c
+++ b/src/block/block_mgr.c
@@ -405,8 +405,9 @@ __bm_method_set(WT_BM *bm, int readonly)
* Open a file.
*/
int
-__wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename,
- const char *cfg[], int forced_salvage, uint32_t allocsize, WT_BM **bmp)
+__wt_block_manager_open(WT_SESSION_IMPL *session,
+ const char *filename, const char *cfg[],
+ int forced_salvage, int readonly, uint32_t allocsize, WT_BM **bmp)
{
WT_BM *bm;
WT_DECL_RET;
@@ -416,8 +417,8 @@ __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename,
WT_RET(__wt_calloc_def(session, 1, &bm));
__bm_method_set(bm, 0);
- WT_ERR(__wt_block_open(
- session, filename, cfg, forced_salvage, allocsize, &bm->block));
+ WT_ERR(__wt_block_open(session, filename, cfg,
+ forced_salvage, readonly, allocsize, &bm->block));
*bmp = bm;
return (0);
diff --git a/src/block/block_open.c b/src/block/block_open.c
index 1132cb85a6c..23e69027718 100644
--- a/src/block/block_open.c
+++ b/src/block/block_open.c
@@ -95,7 +95,7 @@ __block_destroy(WT_SESSION_IMPL *session, WT_BLOCK *block)
int
__wt_block_open(WT_SESSION_IMPL *session,
const char *filename, const char *cfg[],
- int forced_salvage, uint32_t allocsize, WT_BLOCK **blockp)
+ int forced_salvage, int readonly, uint32_t allocsize, WT_BLOCK **blockp)
{
WT_BLOCK *block;
WT_CONFIG_ITEM cval;
@@ -158,8 +158,9 @@ __wt_block_open(WT_SESSION_IMPL *session,
#endif
/* Open the underlying file handle. */
- WT_ERR(__wt_open(
- session, filename, 0, 0, WT_FILE_TYPE_DATA, &block->fh));
+ WT_ERR(__wt_open(session, filename, 0, 0,
+ readonly ? WT_FILE_TYPE_CHECKPOINT : WT_FILE_TYPE_DATA,
+ &block->fh));
/* Initialize the live checkpoint's lock. */
WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager"));
diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c
index 46f74a2b997..3adaa459826 100644
--- a/src/bloom/bloom.c
+++ b/src/bloom/bloom.c
@@ -127,7 +127,7 @@ __bloom_open_cursor(WT_BLOOM *bloom, WT_CURSOR *owner)
WT_RET(__wt_open_cursor(session, bloom->uri, owner, cfg, &c));
/* XXX Layering violation: bump the cache priority for Bloom filters. */
- ((WT_CURSOR_BTREE *)c)->btree->evict_priority = (1 << 19);
+ ((WT_CURSOR_BTREE *)c)->btree->evict_priority = WT_EVICT_INT_SKEW;
bloom->c = c;
return (0);
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index 568e07f7fd1..c0c7a245a9f 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -317,6 +317,10 @@ __free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd)
do {
next = upd->next;
+ /* Everything we free should be visible to everyone. */
+ WT_ASSERT(session,
+ upd->txnid == WT_TXN_ABORTED ||
+ __wt_txn_visible_all(session, upd->txnid));
__wt_free(session, upd);
} while ((upd = next) != NULL);
}
diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c
index d57162c06a9..0b6767df613 100644
--- a/src/btree/bt_evict.c
+++ b/src/btree/bt_evict.c
@@ -229,7 +229,8 @@ __evict_worker(WT_SESSION_IMPL *session)
/* Check to see if the eviction server should run. */
if (bytes_inuse > (cache->eviction_target * bytes_max) / 100)
- flags = WT_EVICT_PASS_ALL;
+ flags = (loop > 10) ?
+ WT_EVICT_PASS_AGGRESSIVE : WT_EVICT_PASS_ALL;
else if (dirty_inuse >
(cache->eviction_dirty_target * bytes_max) / 100)
/* Ignore clean pages unless the cache is too large */
@@ -244,7 +245,7 @@ __evict_worker(WT_SESSION_IMPL *session)
F_SET(cache, WT_EVICT_ACTIVE);
WT_VERBOSE_RET(session, evictserver,
"Eviction pass with: Max: %" PRIu64
- " In use: %" PRIu64 " Dirty: %" PRIu64 " Internal: %s",
+ " In use: %" PRIu64 " Dirty: %" PRIu64 " Merge: %s",
bytes_max, bytes_inuse, dirty_inuse,
LF_ISSET(WT_EVICT_PASS_INTERNAL) ? "yes" : "no");
@@ -436,6 +437,9 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop)
*/
__wt_evict_file_exclusive_on(session);
+ /* Make sure the oldest transaction ID is up-to-date. */
+ __wt_txn_update_oldest(session);
+
/*
* We can't evict the page just returned to us, it marks our place in
* the tree. So, always walk one page ahead of the page being evicted.
@@ -721,12 +725,23 @@ __evict_walk(WT_SESSION_IMPL *session, u_int *entriesp, uint32_t flags)
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
- u_int slot, max_entries, retries;
+ u_int max_entries, old_slot, retries, slot;
conn = S2C(session);
cache = S2C(session)->cache;
retries = 0;
+ /* Increment the shared read generation. */
+ __wt_cache_read_gen_incr(session);
+
+ /*
+ * Update the oldest ID: we use it to decide whether pages are
+ * candidates for eviction. Without this, if all threads are blocked
+ * after a long-running transaction (such as a checkpoint) completes,
+ * we may never start evicting again.
+ */
+ __wt_txn_update_oldest(session);
+
/*
* Set the starting slot in the queue and the maximum pages added
* per walk.
@@ -751,9 +766,7 @@ retry: SLIST_FOREACH(dhandle, &conn->dhlh, l) {
/*
* Each time we reenter this function, start at the next handle
- * on the list. We use the address of the handle's name as the
- * handle's unique identifier, that should be unique, and is
- * unlikely to cause a false positive if freed and reallocated.
+ * on the list.
*/
if (cache->evict_file_next != NULL &&
cache->evict_file_next != dhandle)
@@ -769,9 +782,29 @@ retry: SLIST_FOREACH(dhandle, &conn->dhlh, l) {
*/
btree = dhandle->handle;
if (btree->root_page == NULL ||
- F_ISSET(btree, WT_BTREE_NO_EVICTION) || btree->bulk_load_ok)
+ F_ISSET(btree, WT_BTREE_NO_EVICTION) ||
+ btree->bulk_load_ok)
continue;
+ /*
+ * Also skip files that are configured to stick in cache until
+ * we get aggressive.
+ */
+ if (btree->evict_priority != 0 &&
+ !LF_ISSET(WT_EVICT_PASS_AGGRESSIVE))
+ continue;
+
+ /*
+ * If we are filling the queue, skip files that haven't been
+ * useful in the past.
+ */
+ if (btree->evict_walk_period != 0 &&
+ cache->evict_entries >= WT_EVICT_WALK_BASE &&
+ btree->evict_walk_skips++ < btree->evict_walk_period)
+ continue;
+ btree->evict_walk_skips = 0;
+ old_slot = slot;
+
__wt_spin_lock(session, &cache->evict_walk_lock);
/*
@@ -784,6 +817,17 @@ retry: SLIST_FOREACH(dhandle, &conn->dhlh, l) {
__wt_spin_unlock(session, &cache->evict_walk_lock);
+ /*
+ * If we didn't find enough candidates in the file, skip it
+ * next time.
+ */
+ if (slot >= old_slot + WT_EVICT_WALK_PER_FILE ||
+ slot >= max_entries)
+ btree->evict_walk_period = 0;
+ else
+ btree->evict_walk_period = WT_MIN(
+ WT_MAX(1, 2 * btree->evict_walk_period), 1000);
+
if (ret != 0 || slot >= max_entries)
break;
}
@@ -843,15 +887,15 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
WT_DECL_RET;
WT_EVICT_ENTRY *end, *evict, *start;
WT_PAGE *page;
- int modified, restarts, levels;
+ int internal_pages, levels, modified, restarts;
uint32_t walk_flags;
+ uint64_t pages_walked;
btree = S2BT(session);
cache = S2C(session)->cache;
start = cache->evict + *slotp;
- end = start + WT_EVICT_WALK_PER_FILE;
- if (end > cache->evict + cache->evict_slots)
- end = cache->evict + cache->evict_slots;
+ end = WT_MIN(start + WT_EVICT_WALK_PER_FILE,
+ cache->evict + cache->evict_slots);
WT_ASSERT(session, btree->evict_page == NULL ||
WT_PAGE_IS_ROOT(btree->evict_page) ||
@@ -860,20 +904,21 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
walk_flags = WT_TREE_EVICT;
if (LF_ISSET(WT_EVICT_PASS_INTERNAL))
walk_flags |= WT_TREE_SKIP_LEAF;
+
/*
* Get some more eviction candidate pages.
*/
- for (evict = start, restarts = 0;
+ for (evict = start, pages_walked = 0, internal_pages = restarts = 0;
evict < end && (ret == 0 || ret == WT_NOTFOUND);
- ret = __wt_tree_walk(session, &btree->evict_page, walk_flags)) {
+ ret = __wt_tree_walk(session, &btree->evict_page, walk_flags),
+ ++pages_walked) {
if ((page = btree->evict_page) == NULL) {
ret = 0;
/*
* Take care with terminating this loop.
*
* Don't make an extra call to __wt_tree_walk: that
- * will leave a page in the WT_REF_EVICT_WALK state,
- * unable to be evicted, which may prevent any work
+ * will leave a page pinned, which may prevent any work
* from being done.
*/
if (++restarts == 2)
@@ -881,8 +926,6 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
continue;
}
- WT_STAT_FAST_CONN_INCR(session, cache_eviction_walk);
-
/* Ignore root pages entirely. */
if (WT_PAGE_IS_ROOT(page))
continue;
@@ -940,6 +983,13 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
} else if (LF_ISSET(WT_EVICT_PASS_INTERNAL))
continue;
+ /* Limit internal pages to 50% unless we get aggressive. */
+ if ((page->type == WT_PAGE_COL_INT ||
+ page->type == WT_PAGE_ROW_INT) &&
+ ++internal_pages > WT_EVICT_WALK_PER_FILE / 2 &&
+ !LF_ISSET(WT_EVICT_PASS_AGGRESSIVE))
+ break;
+
/*
* If this page has never been considered for eviction,
* set its read generation to a little bit in the
@@ -975,6 +1025,15 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
continue;
/*
+ * If the page is clean but has modifications that appear too
+ * new to evict, skip it.
+ */
+ if (!modified && page->modify != NULL &&
+ !LF_ISSET(WT_EVICT_PASS_AGGRESSIVE) &&
+ !__wt_txn_visible_all(session, page->modify->rec_max_txn))
+ continue;
+
+ /*
* If the oldest transaction hasn't changed since the
* last time this page was written, it's unlikely that
* we can make progress. Similarly, if the most recent
@@ -988,7 +1047,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
* since rolled back, or we can help get the checkpoint
* completed sooner.
*/
- if (modified && !F_ISSET(cache, WT_EVICT_STUCK) &&
+ if (modified && !LF_ISSET(WT_EVICT_PASS_AGGRESSIVE) &&
(page->modify->disk_snap_min ==
S2C(session)->txn_global.oldest_id ||
!__wt_txn_visible_all(session,
@@ -1004,6 +1063,7 @@ add: WT_ASSERT(session, evict->page == NULL);
}
*slotp += (u_int)(evict - start);
+ WT_STAT_FAST_CONN_INCRV(session, cache_eviction_walk, pages_walked);
return (ret);
}
@@ -1160,7 +1220,8 @@ __wt_cache_dump(WT_SESSION_IMPL *session)
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_PAGE *page;
- uint64_t file_bytes, file_dirty, file_pages, total_bytes;
+ uint64_t file_intl_pages, file_leaf_pages;
+ uint64_t file_bytes, file_dirty, total_bytes;
conn = S2C(session);
total_bytes = 0;
@@ -1176,22 +1237,28 @@ __wt_cache_dump(WT_SESSION_IMPL *session)
btree->bulk_load_ok)
continue;
- file_bytes = file_dirty = file_pages = 0;
+ file_bytes = file_dirty = file_intl_pages = file_leaf_pages = 0;
page = NULL;
session->dhandle = dhandle;
while (__wt_tree_walk(session, &page, WT_TREE_CACHE) == 0 &&
page != NULL) {
- ++file_pages;
+ if (page->type == WT_PAGE_COL_INT ||
+ page->type == WT_PAGE_ROW_INT)
+ ++file_intl_pages;
+ else
+ ++file_leaf_pages;
file_bytes += page->memory_footprint;
if (__wt_page_is_modified(page))
file_dirty += page->memory_footprint;
}
session->dhandle = NULL;
- printf("cache dump: %s [%s]: %"
- PRIu64 " pages, %" PRIu64 "MB, %" PRIu64 "MB dirty\n",
+ printf("cache dump: %s [%s]:"
+ " %" PRIu64 " intl pages, %" PRIu64 " leaf pages,"
+ " %" PRIu64 "MB, %" PRIu64 "MB dirty\n",
dhandle->name, dhandle->checkpoint,
- file_pages, file_bytes >> 20, file_dirty >> 20);
+ file_intl_pages, file_leaf_pages,
+ file_bytes >> 20, file_dirty >> 20);
total_bytes += file_bytes;
}
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index a970b656cf5..f6cc4cc6fb3 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -11,7 +11,7 @@ static int __btree_conf(WT_SESSION_IMPL *, WT_CKPT *ckpt);
static int __btree_get_last_recno(WT_SESSION_IMPL *);
static int __btree_page_sizes(WT_SESSION_IMPL *);
static int __btree_preload(WT_SESSION_IMPL *);
-static int __btree_tree_open_empty(WT_SESSION_IMPL *, int);
+static int __btree_tree_open_empty(WT_SESSION_IMPL *, int, int);
static int pse1(WT_SESSION_IMPL *, const char *, uint32_t, uint32_t);
static int pse2(WT_SESSION_IMPL *, const char *, uint32_t, uint32_t, int);
@@ -69,8 +69,8 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
if (!WT_PREFIX_SKIP(filename, "file:"))
WT_ERR_MSG(session, EINVAL, "expected a 'file:' URI");
- WT_ERR(__wt_block_manager_open(session, filename,
- dhandle->cfg, forced_salvage, btree->allocsize, &btree->bm));
+ WT_ERR(__wt_block_manager_open(session, filename, dhandle->cfg,
+ forced_salvage, readonly, btree->allocsize, &btree->bm));
bm = btree->bm;
/*
@@ -102,7 +102,8 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
ckpt.raw.data, ckpt.raw.size,
root_addr, &root_addr_size, readonly));
if (creation || root_addr_size == 0)
- WT_ERR(__btree_tree_open_empty(session, creation));
+ WT_ERR(__btree_tree_open_empty(
+ session, creation, readonly));
else {
WT_ERR(__wt_btree_tree_open(
session, root_addr, root_addr_size));
@@ -355,7 +356,7 @@ err: __wt_buf_free(session, &dsk);
* Create an empty in-memory tree.
*/
static int
-__btree_tree_open_empty(WT_SESSION_IMPL *session, int creation)
+__btree_tree_open_empty(WT_SESSION_IMPL *session, int creation, int readonly)
{
WT_BTREE *btree;
WT_DECL_RET;
@@ -423,23 +424,31 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, int creation)
* the root page dirty to force a write, and without reconciling the
* leaf page we won't realize there's no records to write, we'll write
* a root page, which isn't correct for an empty tree.
- * Earlier versions of this code kept the leaf page clean, but with
- * the "empty" flag set in the leaf page's modification structure; in
- * that case, checkpoints works (forced reconciliation of a root with
- * a single "empty" page wouldn't write any blocks). That version had
+ *
+ * Earlier versions of this code kept the leaf page clean, but with the
+ * "empty" flag set in the leaf page's modification structure; in that
+ * case, checkpoints works (forced reconciliation of a root with a
+ * single "empty" page wouldn't write any blocks). That version had
* memory leaks because the eviction code didn't correctly handle pages
* that were "clean" (and so never reconciled), yet "modified" with an
* "empty" flag. The goal of this code is to mimic a real tree that
* simply has no records, for whatever reason, and trust reconciliation
* to figure out it's empty and not write any blocks.
- * We do not set the tree's modified flag because the checkpoint code
- * skips unmodified files in closing checkpoints (checkpoints that don't
- * require a write unless the file is actually dirty). There's no need
- * to reconcile this file unless the application does a real checkpoint
- * or it's actually modified.
+ *
+ * We do not set the tree's modified flag because the checkpoint code
+ * skips unmodified files in closing checkpoints (checkpoints that
+ * don't require a write unless the file is actually dirty). There's
+ * no need to reconcile this file unless the application does a real
+ * checkpoint or it's actually modified.
+ *
+ * Only do this for a live tree, not for checkpoints. If we open an
+ * empty checkpoint, the leaf page cannot be dirty or eviction may try
+ * to write it, which will fail because checkpoints are read-only.
*/
- WT_ERR(__wt_page_modify_init(session, leaf));
- __wt_page_only_modify_set(session, leaf);
+ if (!readonly) {
+ WT_ERR(__wt_page_modify_init(session, leaf));
+ __wt_page_only_modify_set(session, leaf);
+ }
btree->root_page = root;
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c
index cfc7137b13a..21298ac4722 100644
--- a/src/btree/bt_page.c
+++ b/src/btree/bt_page.c
@@ -93,21 +93,17 @@ __wt_page_in_func(
}
/*
- * If this page has ever been considered for eviction,
- * and its generation is aging, update it.
- */
- if (page->read_gen != WT_READ_GEN_NOTSET &&
- page->read_gen < __wt_cache_read_gen(session))
- page->read_gen =
- __wt_cache_read_gen_set(session);
-
- /*
* If we read the page and we are configured to not
* trash the cache, set the oldest read generation so
* the page is forcibly evicted as soon as possible.
+ *
+ * Otherwise, update the page's read generation.
*/
if (oldgen && page->read_gen == WT_READ_GEN_NOTSET)
page->read_gen = WT_READ_GEN_OLDEST;
+ else if (page->read_gen < __wt_cache_read_gen(session))
+ page->read_gen =
+ __wt_cache_read_gen_set(session);
return (0);
WT_ILLEGAL_VALUE(session);
diff --git a/src/btree/rec_evict.c b/src/btree/rec_evict.c
index 0713989af58..ce6c04d1283 100644
--- a/src/btree/rec_evict.c
+++ b/src/btree/rec_evict.c
@@ -498,7 +498,7 @@ ckpt: WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint);
* cache.
*/
if (!exclusive && mod != NULL &&
- !__wt_txn_visible_all(session, mod->disk_txn))
+ !__wt_txn_visible_all(session, mod->rec_max_txn))
return (EBUSY);
/*
diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c
index 81a4ec7a025..dd237693465 100644
--- a/src/btree/rec_write.c
+++ b/src/btree/rec_write.c
@@ -668,26 +668,12 @@ static inline int
__rec_txn_read(
WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_UPDATE *upd, WT_UPDATE **updp)
{
- uint64_t txnid;
int skip, retried;
retried = 0;
-retry: *updp = __wt_txn_read_skip(session, upd, &skip);
- if (!skip) {
- /*
- * Track the largest transaction ID written to disk for this
- * page. We store this in the page at the end of
- * reconciliation if no updates are skipped. It is used to
- * avoid evicting a clean page from memory with changes that
- * are required to satisfy a snapshot read.
- */
- if (*updp != NULL) {
- txnid = (*updp)->txnid;
- if (TXNID_LT(r->max_txn, txnid))
- r->max_txn = txnid;
- }
+retry: *updp = __wt_txn_read_skip(session, upd, &r->max_txn, &skip);
+ if (!skip)
return (0);
- }
/*
* If skipping this update will cause reconciliation to quit, update
@@ -4093,7 +4079,7 @@ err: __wt_scr_free(&tkey);
* cache's dirty statistics.
*/
if (!r->upd_skipped) {
- mod->disk_txn = r->max_txn;
+ mod->rec_max_txn = r->max_txn;
if (WT_ATOMIC_CAS(mod->write_gen, r->orig_write_gen, 0))
__wt_cache_dirty_decr(session, page);
diff --git a/src/config/config.c b/src/config/config.c
index c268d4e053a..c0eb672015f 100644
--- a/src/config/config.c
+++ b/src/config/config.c
@@ -118,7 +118,7 @@ static const int8_t gostruct[256] = {
A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_LOOP, A_BAD, A_QUP,
A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_UP, A_DOWN, A_BAD, A_BAD,
- A_NEXT, A_NUMBARE, A_BAD, A_BARE, A_NUMBARE, A_NUMBARE,
+ A_NEXT, A_NUMBARE, A_BARE, A_BARE, A_NUMBARE, A_NUMBARE,
A_NUMBARE, A_NUMBARE, A_NUMBARE, A_NUMBARE, A_NUMBARE,
A_NUMBARE, A_NUMBARE, A_NUMBARE, A_VALUE, A_BAD, A_BAD,
A_VALUE, A_BAD, A_BAD, A_BAD, A_BARE, A_BARE, A_BARE, A_BARE,
@@ -738,10 +738,8 @@ __wt_config_subgetraw(WT_SESSION_IMPL *session,
__wt_config_subgets(WT_SESSION_IMPL *session,
WT_CONFIG_ITEM *cfg, const char *key, WT_CONFIG_ITEM *value)
{
- WT_CONFIG_ITEM key_item;
-
- key_item.str = key;
- key_item.len = strlen(key);
+ WT_CONFIG_ITEM key_item =
+ { key, strlen(key), 0, WT_CONFIG_ITEM_STRING };
return (__wt_config_subgetraw(session, cfg, &key_item, value));
}
diff --git a/src/config/config_api.c b/src/config/config_api.c
new file mode 100644
index 00000000000..42f4c117b81
--- /dev/null
+++ b/src/config/config_api.c
@@ -0,0 +1,105 @@
+/*-
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __config_parser_close --
+ * WT_CONFIG_PARSER->close method.
+ */
+static int
+__config_parser_close(WT_CONFIG_PARSER *wt_config_parser)
+{
+ WT_CONFIG_PARSER_IMPL *config_parser;
+
+ config_parser = (WT_CONFIG_PARSER_IMPL *)wt_config_parser;
+
+ if (config_parser == NULL)
+ return (EINVAL);
+
+ __wt_free(config_parser->session, config_parser);
+ return (0);
+}
+
+/*
+ * __config_parser_get --
+ * WT_CONFIG_PARSER->search method.
+ */
+static int
+__config_parser_get(WT_CONFIG_PARSER *wt_config_parser,
+ const char *key, WT_CONFIG_ITEM *cval)
+{
+ WT_CONFIG_PARSER_IMPL *config_parser;
+
+ config_parser = (WT_CONFIG_PARSER_IMPL *)wt_config_parser;
+
+ if (config_parser == NULL)
+ return (EINVAL);
+
+ return (__wt_config_subgets(config_parser->session,
+ &config_parser->config_item, key, cval));
+}
+
+/*
+ * __config_parser_next --
+ * WT_CONFIG_PARSER->next method.
+ */
+static int
+__config_parser_next(WT_CONFIG_PARSER *wt_config_parser,
+ WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *cval)
+{
+ WT_CONFIG_PARSER_IMPL *config_parser;
+
+ config_parser = (WT_CONFIG_PARSER_IMPL *)wt_config_parser;
+
+ if (config_parser == NULL)
+ return (EINVAL);
+
+ return (__wt_config_next(&config_parser->config, key, cval));
+}
+
+/*
+ * wiredtiger_config_parser_open --
+ * Create a configuration parser.
+ */
+int
+wiredtiger_config_parser_open(WT_SESSION *wt_session,
+ const char *config, size_t len, WT_CONFIG_PARSER **config_parserp)
+{
+ static const WT_CONFIG_PARSER stds = {
+ __config_parser_close,
+ __config_parser_next,
+ __config_parser_get
+ };
+ WT_CONFIG_ITEM config_item =
+ { config, len, 0, WT_CONFIG_ITEM_STRING };
+ WT_CONFIG_PARSER_IMPL *config_parser;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ *config_parserp = NULL;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ WT_RET(__wt_calloc_def(session, 1, &config_parser));
+ config_parser->iface = stds;
+ config_parser->session = session;
+
+ /*
+ * Setup a WT_CONFIG_ITEM to be used for get calls and a WT_CONFIG
+ * structure for iterations through the configuration string.
+ */
+ memcpy(&config_parser->config_item, &config_item, sizeof(config_item));
+ WT_ERR(__wt_config_initn(
+ session, &config_parser->config, config, len));
+
+ if (ret == 0)
+ *config_parserp = (WT_CONFIG_PARSER *)config_parser;
+ else
+err: __wt_free(session, config_parser);
+
+ return (ret);
+}
diff --git a/src/config/config_def.c b/src/config/config_def.c
index abe6713696c..2c01cac1a85 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -250,7 +250,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
,
{ "checkpoint_sync", "boolean", NULL, NULL},
{ "create", "boolean", NULL, NULL},
- { "direct_io", "list", "choices=[\"data\",\"log\"]", NULL},
+ { "direct_io", "list",
+ "choices=[\"checkpoint\",\"data\",\"log\"]",
+ NULL},
{ "error_prefix", "string", NULL, NULL},
{ "eviction_dirty_target", "int", "min=10,max=99", NULL},
{ "eviction_target", "int", "min=10,max=99", NULL},
diff --git a/src/config/config_ext.c b/src/config/config_ext.c
index 7dd5445cc3c..26b3799d61c 100644
--- a/src/config/config_ext.c
+++ b/src/config/config_ext.c
@@ -8,6 +8,19 @@
#include "wt_internal.h"
/*
+ * __wt_ext_config_parser_open --
+ * WT_EXTENSION_API->config_parser_open implementation
+ */
+int
+__wt_ext_config_parser_open(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session,
+ const char *config, size_t len, WT_CONFIG_PARSER **config_parserp)
+{
+ WT_UNUSED(wt_ext);
+ return (wiredtiger_config_parser_open(
+ wt_session, config, len, config_parserp));
+}
+
+/*
* __wt_ext_config_get --
* Given a NULL-terminated list of configuration strings, find the final
* value for a given string key (external API version).
@@ -29,81 +42,3 @@ __wt_ext_config_get(WT_EXTENSION_API *wt_api,
return (WT_NOTFOUND);
return (__wt_config_gets(session, cfg, key, cval));
}
-
-/*
- * __wt_ext_config_strget --
- * Given a single configuration string, find the final value for a given
- * string key (external API version).
- */
-int
-__wt_ext_config_strget(WT_EXTENSION_API *wt_api,
- WT_SESSION *wt_session, const char *config, const char *key,
- WT_CONFIG_ITEM *cval)
-{
- const char *cfg_arg[] = { config, NULL };
-
- return (__wt_ext_config_get(
- wt_api, wt_session, (WT_CONFIG_ARG *)cfg_arg, key, cval));
-}
-
-/*
- * __wt_ext_config_scan_begin --
- * Start a scan of a config string.
- * (external API only).
- */
-int
-__wt_ext_config_scan_begin(
- WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
- const char *str, size_t len, WT_CONFIG_SCAN **scanp)
-{
- WT_CONFIG config, *scan;
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *session;
-
- conn = (WT_CONNECTION_IMPL *)wt_api->conn;
- if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
- session = conn->default_session;
-
- /* Note: allocate memory last to avoid cleanup. */
- WT_CLEAR(config);
- WT_RET(__wt_config_initn(session, &config, str, len));
- WT_RET(__wt_calloc_def(session, 1, &scan));
- *scan = config;
- *scanp = (WT_CONFIG_SCAN *)scan;
- return (0);
-}
-
-/*
- * __wt_ext_config_scan_end --
- * End a scan of a config string.
- * (external API only).
- */
-int
-__wt_ext_config_scan_end(WT_EXTENSION_API *wt_api, WT_CONFIG_SCAN *scan)
-{
- WT_CONFIG *conf;
-
- WT_UNUSED(wt_api);
-
- conf = (WT_CONFIG *)scan;
- __wt_free(conf->session, scan);
- return (0);
-}
-
-/*
- * __wt_ext_config_scan_next --
- * Get the next key/value pair from a config scan.
- * (external API only).
- */
-int
-__wt_ext_config_scan_next(
- WT_EXTENSION_API *wt_api, WT_CONFIG_SCAN *scan,
- WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value)
-{
- WT_CONFIG *conf;
-
- WT_UNUSED(wt_api);
-
- conf = (WT_CONFIG *)scan;
- return (__wt_config_next(conf, key, value));
-}
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 780ae6f6be5..b1992793827 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -105,11 +105,8 @@ __conn_get_extension_api(WT_CONNECTION *wt_conn)
conn->extension_api.scr_free = __wt_ext_scr_free;
conn->extension_api.collator_config = ext_collator_config;
conn->extension_api.collate = ext_collate;
+ conn->extension_api.config_parser_open = __wt_ext_config_parser_open;
conn->extension_api.config_get = __wt_ext_config_get;
- conn->extension_api.config_strget = __wt_ext_config_strget;
- conn->extension_api.config_scan_begin = __wt_ext_config_scan_begin;
- conn->extension_api.config_scan_end = __wt_ext_config_scan_end;
- conn->extension_api.config_scan_next = __wt_ext_config_scan_next;
conn->extension_api.metadata_insert = __wt_ext_metadata_insert;
conn->extension_api.metadata_remove = __wt_ext_metadata_remove;
conn->extension_api.metadata_search = __wt_ext_metadata_search;
@@ -123,6 +120,7 @@ __conn_get_extension_api(WT_CONNECTION *wt_conn)
conn->extension_api.transaction_notify = __wt_ext_transaction_notify;
conn->extension_api.transaction_oldest = __wt_ext_transaction_oldest;
conn->extension_api.transaction_visible = __wt_ext_transaction_visible;
+ conn->extension_api.version = wiredtiger_version;
return (&conn->extension_api);
}
@@ -498,6 +496,20 @@ __conn_close(WT_CONNECTION *wt_conn, const char *config)
CONNECTION_API_CALL(conn, session, close, config, cfg);
WT_UNUSED(cfg);
+ /*
+ * Rollback all running transactions.
+ * We do this as a separate pass because an active transaction in one
+ * session could cause trouble when closing a file, even if that
+ * session never referenced that file.
+ */
+ for (s = conn->sessions, i = 0; i < conn->session_cnt; ++s, ++i)
+ if (s->active && !F_ISSET(s, WT_SESSION_INTERNAL) &&
+ F_ISSET(&s->txn, TXN_RUNNING)) {
+ wt_session = &s->iface;
+ WT_TRET(wt_session->rollback_transaction(
+ wt_session, NULL));
+ }
+
/* Close open, external sessions. */
for (s = conn->sessions, i = 0; i < conn->session_cnt; ++s, ++i)
if (s->active && !F_ISSET(s, WT_SESSION_INTERNAL)) {
@@ -1012,6 +1024,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
const char *name;
uint32_t flag;
} *ft, file_types[] = {
+ { "checkpoint", WT_FILE_TYPE_CHECKPOINT },
{ "data", WT_FILE_TYPE_DATA },
{ "log", WT_FILE_TYPE_LOG },
{ NULL, 0 }
diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c
index 55faeefbde0..34b6a51570b 100644
--- a/src/conn/conn_cache_pool.c
+++ b/src/conn/conn_cache_pool.c
@@ -464,8 +464,10 @@ __cache_pool_adjust(uint64_t highest, uint64_t bump_threshold)
* it.
*/
grew = 0;
- adjusted = (cp->chunk > entry->cache_size - reserved) ?
- cp->chunk : (entry->cache_size - reserved);
+ if (entry->cache_size - cp->chunk > reserved)
+ adjusted = cp->chunk;
+ else
+ adjusted = entry->cache_size - reserved;
} else if (highest > 1 &&
entry->cache_size < cp->size &&
cache->bytes_inmem >=
diff --git a/src/docs/Doxyfile b/src/docs/Doxyfile
index 5f7e5016892..d8f753b269b 100644
--- a/src/docs/Doxyfile
+++ b/src/docs/Doxyfile
@@ -1576,6 +1576,7 @@ PREDEFINED = DOXYGEN \
__wt_compressor:=WT_COMPRESSOR \
__wt_config_arg:=WT_CONFIG_ARG \
__wt_config_item:=WT_CONFIG_ITEM \
+ __wt_config_parser:=WT_CONFIG_PARSER \
__wt_config_scan:=WT_CONFIG_SCAN \
__wt_connection:=WT_CONNECTION \
__wt_cursor:=WT_CURSOR \
diff --git a/src/docs/command-line.dox b/src/docs/command-line.dox
index 52be68a1074..04daa4050cd 100644
--- a/src/docs/command-line.dox
+++ b/src/docs/command-line.dox
@@ -291,9 +291,6 @@ engine, or, if specified, for the URI on the command-line.
<code>wt [-Vv] [-C config] [-h directory] stat [-a] [uri]</code>
@subsection util_stat_options Options
-The \c stat command has no command-specific options.
-
-@subsection util_stat_options Options
The following are command-specific options for the \c stat command:
@par <code>-a</code>
diff --git a/src/docs/config-strings.dox b/src/docs/config-strings.dox
index 542e86f620d..295316fe039 100644
--- a/src/docs/config-strings.dox
+++ b/src/docs/config-strings.dox
@@ -16,7 +16,7 @@ specified directly. More precisely, keys or values that match this regular
expression do not require quotes:
<pre>
- [-_0-9A-Za-z/][^\\t\\r\\n :=,\\])}]*
+ [-_0-9A-Za-z./][^\\t\\r\\n :=,\\])}]*
</pre>
More complex keys and values can be specified by quoting them with double
diff --git a/src/docs/custom_data.dox b/src/docs/custom_data.dox
index b1ee8dfbd9e..22dd75dcc26 100644
--- a/src/docs/custom_data.dox
+++ b/src/docs/custom_data.dox
@@ -165,11 +165,6 @@ of a configuration string as follows:
@snippet ex_data_source.c WT_EXTENSION config_get
-The WT_DATA_SOURCE::open_cursor method might retrieve the list value
-of a configuration string as follows:
-
-@snippet ex_data_source.c WT_EXTENSION config scan
-
@subsection custom_ds_config_add Creating data-specific configuration strings
Applications can add their own configuration strings to WiredTiger
diff --git a/src/docs/helium.dox b/src/docs/helium.dox
index cd6b47fb968..35e3886a8d6 100644
--- a/src/docs/helium.dox
+++ b/src/docs/helium.dox
@@ -103,7 +103,7 @@ WT_SESSION *session;
/* Create and truncate the access table. */
ret = session->create(session, "table:dev1/access",
- "key_format=S,value_format=S,type=helium,helium_open_o_truncate=1");
+ "key_format=S,value_format=S,type=helium,helium_o_truncate=1");
@endcode
@section helium_notes Helium notes
diff --git a/src/docs/spell.ok b/src/docs/spell.ok
index 6d24c474e19..58701aaa5bf 100644
--- a/src/docs/spell.ok
+++ b/src/docs/spell.ok
@@ -313,6 +313,7 @@ statlog
str
strerror
strftime
+strget
struct
structs
subdatabases
diff --git a/src/docs/top/main.dox b/src/docs/top/main.dox
index 5481d2deae5..f36f1887a73 100644
--- a/src/docs/top/main.dox
+++ b/src/docs/top/main.dox
@@ -6,9 +6,9 @@ WiredTiger is an high performance, scalable, production quality, NoSQL,
@section releases Releases
<table>
-@row{<b>WiredTiger 2.1.0</b> (current),
- <a href="releases/wiredtiger-2.1.0.tar.bz2"><b>[Release package]</b></a>,
- <a href="2.1.0/index.html"><b>[Documentation]</b></a>}
+@row{<b>WiredTiger 2.1.1</b> (current),
+ <a href="releases/wiredtiger-2.1.1.tar.bz2"><b>[Release package]</b></a>,
+ <a href="2.1.1/index.html"><b>[Documentation]</b></a>}
@row{<b>WiredTiger 1.6.6</b> (previous),
<a href="releases/wiredtiger-1.6.6.tar.bz2"><b>[Release package]</b></a>,
<a href="1.6.6/index.html"><b>[Documentation]</b></a>}
diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox
index e59b031a1ff..3cb0b96b0ef 100644
--- a/src/docs/upgrading.dox
+++ b/src/docs/upgrading.dox
@@ -1,5 +1,20 @@
/*! @page upgrading Upgrading WiredTiger applications
+@section version_211 Upgrading to Version 2.1.1
+<dl>
+
+<dt>WT_EXTENSION_API::config methods</dt>
+<dd>
+In the 2.1.1 release of WiredTiger the configuration string parsing API
+has been changed and added to a new public handle. The
+WT_EXTENSION_API::config_strget, WT_EXTENSION_API::config_scan_begin,
+WT_EXTENSION_API::config_scan_next and WT_EXTENSION_API::config_scan_end
+have been removed. They have been replaced by a
+WT_EXTENSION_API::config_parser_open method, which can be used to parse
+configuration strings. See the WT_CONFIG_PARSER documentation for
+examples on how to use the updated API.
+</dd>
+
@section version_21 Upgrading to Version 2.1
<dl>
diff --git a/src/include/btmem.h b/src/include/btmem.h
index 7f0bf280d5c..42d7ecfa9e2 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -197,8 +197,8 @@ struct __wt_page_modify {
*/
uint64_t disk_snap_min;
- /* The largest transaction ID written to disk for the page. */
- uint64_t disk_txn;
+ /* The largest transaction ID seen on the page by reconciliation. */
+ uint64_t rec_max_txn;
/* The largest update transaction ID (approximate). */
uint64_t update_txn;
@@ -345,22 +345,17 @@ struct __wt_page {
* The read generation is a 64-bit value, if incremented frequently, a
* 32-bit value could overflow.
*
- * The read generation is a piece of shared memory potentially accessed
+ * The read generation is a piece of shared memory potentially read
* by many threads. We don't want to update page read generations for
* in-cache workloads and suffer the cache misses, so we don't simply
* increment the read generation value on every access. Instead, the
- * read generation is initialized to 0, then set to a real value if the
- * page is ever considered for eviction. Once set to a real value, the
- * read generation is potentially incremented every time the page is
- * accessed. To try and avoid incrementing the page at a fast rate in
- * this case, the read generation is incremented to a future point.
- *
- * The read generation is not declared volatile or published: the read
- * generation is set a lot, and we don't want to write it that much.
+ * read generation is incremented by the eviction server each time it
+ * becomes active. To avoid incrementing a page's read generation too
+ * frequently, it is set to a future point.
*/
#define WT_READ_GEN_NOTSET 0
#define WT_READ_GEN_OLDEST 1
-#define WT_READ_GEN_STEP 1000
+#define WT_READ_GEN_STEP 100
uint64_t read_gen;
uint64_t memory_footprint; /* Memory attached to the page */
diff --git a/src/include/btree.h b/src/include/btree.h
index 84da462417a..b40fc8a7f80 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -112,7 +112,9 @@ struct __wt_btree {
uint64_t write_gen; /* Write generation */
WT_PAGE *evict_page; /* Eviction thread's location */
- uint64_t evict_priority; /* Relative priority of cached pages. */
+ uint64_t evict_priority; /* Relative priority of cached pages */
+ u_int evict_walk_period; /* Skip this many LRU walks */
+ u_int evict_walk_skips; /* Number of walks skipped */
volatile uint32_t lru_count; /* Count of threads in LRU eviction */
volatile int checkpointing; /* Checkpoint in progress */
diff --git a/src/include/btree.i b/src/include/btree.i
index f09d05178ab..94f8133d187 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -159,6 +159,12 @@ __wt_cache_read_gen(WT_SESSION_IMPL *session)
return (S2C(session)->cache->read_gen);
}
+static inline void
+__wt_cache_read_gen_incr(WT_SESSION_IMPL *session)
+{
+ ++S2C(session)->cache->read_gen;
+}
+
static inline uint64_t
__wt_cache_read_gen_set(WT_SESSION_IMPL *session)
{
@@ -171,7 +177,7 @@ __wt_cache_read_gen_set(WT_SESSION_IMPL *session)
* page. In other words, the goal is to avoid some number of updates
* immediately after each update we have to make.
*/
- return (++S2C(session)->cache->read_gen + WT_READ_GEN_STEP);
+ return (__wt_cache_read_gen(session) + WT_READ_GEN_STEP);
}
/*
@@ -563,6 +569,30 @@ __wt_eviction_force_check(WT_SESSION_IMPL *session, WT_PAGE *page)
}
/*
+ * __wt_eviction_force_txn_check --
+ * Check if the current transaction permits forced eviction of a page.
+ */
+static inline int
+__wt_eviction_force_txn_check(WT_SESSION_IMPL *session, WT_PAGE *page)
+{
+ WT_TXN_STATE *txn_state;
+
+ /*
+ * Only try if there is a chance of success. If the page has already
+ * been split and this transaction is already pinning the oldest ID so
+ * that the page can't be evicted, it has to complete before eviction
+ * can succeed.
+ */
+ txn_state = &S2C(session)->txn_global.states[session->id];
+ if (!F_ISSET_ATOMIC(page, WT_PAGE_WAS_SPLIT) ||
+ txn_state->snap_min == WT_TXN_NONE ||
+ TXNID_LT(page->modify->update_txn, txn_state->snap_min))
+ return (1);
+
+ return (0);
+}
+
+/*
* __wt_page_release --
* Release a reference to a page.
*/
@@ -582,10 +612,8 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_PAGE *page)
* Try to immediately evict pages if they have the special "oldest"
* read generation and we have some chance of succeeding.
*/
- if (!WT_TXN_ACTIVE(&session->txn) &&
- (page->modify == NULL ||
- !F_ISSET(page->modify, WT_PM_REC_SPLIT_MERGE)) &&
- page->read_gen == WT_READ_GEN_OLDEST &&
+ if (page->read_gen == WT_READ_GEN_OLDEST &&
+ __wt_eviction_force_txn_check(session, page) &&
WT_ATOMIC_CAS(page->ref->state, WT_REF_MEM, WT_REF_LOCKED)) {
if ((ret = __wt_hazard_clear(session, page)) != 0) {
page->ref->state = WT_REF_MEM;
@@ -678,30 +706,6 @@ __wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page)
/*
* __wt_eviction_force --
- * Check if the current transaction permits forced eviction of a page.
- */
-static inline int
-__wt_eviction_force_txn_check(WT_SESSION_IMPL *session, WT_PAGE *page)
-{
- WT_TXN_STATE *txn_state;
-
- /*
- * Only try if there is a chance of success. If the page has already
- * been split and this transaction is already pinning the oldest ID so
- * that the page can't be evicted, it has to complete before eviction
- * can succeed.
- */
- txn_state = &S2C(session)->txn_global.states[session->id];
- if (!F_ISSET_ATOMIC(page, WT_PAGE_WAS_SPLIT) ||
- txn_state->snap_min == WT_TXN_NONE ||
- TXNID_LT(page->modify->update_txn, txn_state->snap_min))
- return (1);
-
- return (0);
-}
-
-/*
- * __wt_eviction_force --
* Forcefully evict a page, if possible.
*/
static inline int
diff --git a/src/include/cache.h b/src/include/cache.h
index 055041b7e6c..717191d19dd 100644
--- a/src/include/cache.h
+++ b/src/include/cache.h
@@ -9,16 +9,17 @@
* Tuning constants: I hesitate to call this tuning, but we want to review some
* number of pages from each file's in-memory tree for each page we evict.
*/
-#define WT_EVICT_INT_SKEW (1<<12) /* Prefer leaf pages over internal
+#define WT_EVICT_INT_SKEW (1<<20) /* Prefer leaf pages over internal
pages by this many increments of the
read generation. */
#define WT_EVICT_WALK_PER_FILE 10 /* Pages to visit per file */
#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */
#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */
-#define WT_EVICT_PASS_ALL 0x01
-#define WT_EVICT_PASS_DIRTY 0x02
-#define WT_EVICT_PASS_INTERNAL 0x04
+#define WT_EVICT_PASS_AGGRESSIVE 0x01
+#define WT_EVICT_PASS_ALL 0x02
+#define WT_EVICT_PASS_DIRTY 0x04
+#define WT_EVICT_PASS_INTERNAL 0x08
/*
* WT_EVICT_ENTRY --
diff --git a/src/include/config.h b/src/include/config.h
index d8837f0f368..c83d96c8a5e 100644
--- a/src/include/config.h
+++ b/src/include/config.h
@@ -33,6 +33,14 @@ struct __wt_config_entry {
const WT_CONFIG_CHECK *checks; /* check array */
};
+struct __wt_config_parser_impl {
+ WT_CONFIG_PARSER iface;
+
+ WT_SESSION_IMPL *session;
+ WT_CONFIG config;
+ WT_CONFIG_ITEM config_item;
+};
+
/*
* DO NOT EDIT: automatically built by dist/api_config.py.
* configuration section: BEGIN
diff --git a/src/include/extern.h b/src/include/extern.h
index d1662717345..ba026253a5b 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -126,6 +126,7 @@ extern int __wt_block_manager_open(WT_SESSION_IMPL *session,
const char *filename,
const char *cfg[],
int forced_salvage,
+ int readonly,
uint32_t allocsize,
WT_BM **bmp);
extern int __wt_block_manager_truncate( WT_SESSION_IMPL *session,
@@ -138,6 +139,7 @@ extern int __wt_block_open(WT_SESSION_IMPL *session,
const char *filename,
const char *cfg[],
int forced_salvage,
+ int readonly,
uint32_t allocsize,
WT_BLOCK **blockp);
extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block);
@@ -529,27 +531,16 @@ extern int __wt_config_concat( WT_SESSION_IMPL *session,
const char **config_ret);
extern int __wt_conn_config_init(WT_SESSION_IMPL *session);
extern void __wt_conn_config_discard(WT_SESSION_IMPL *session);
+extern int __wt_ext_config_parser_open(WT_EXTENSION_API *wt_ext,
+ WT_SESSION *wt_session,
+ const char *config,
+ size_t len,
+ WT_CONFIG_PARSER **config_parserp);
extern int __wt_ext_config_get(WT_EXTENSION_API *wt_api,
WT_SESSION *wt_session,
WT_CONFIG_ARG *cfg_arg,
const char *key,
WT_CONFIG_ITEM *cval);
-extern int __wt_ext_config_strget(WT_EXTENSION_API *wt_api,
- WT_SESSION *wt_session,
- const char *config,
- const char *key,
- WT_CONFIG_ITEM *cval);
-extern int __wt_ext_config_scan_begin( WT_EXTENSION_API *wt_api,
- WT_SESSION *wt_session,
- const char *str,
- size_t len,
- WT_CONFIG_SCAN **scanp);
-extern int __wt_ext_config_scan_end(WT_EXTENSION_API *wt_api,
- WT_CONFIG_SCAN *scan);
-extern int __wt_ext_config_scan_next( WT_EXTENSION_API *wt_api,
- WT_CONFIG_SCAN *scan,
- WT_CONFIG_ITEM *key,
- WT_CONFIG_ITEM *value);
extern int __wt_collator_config( WT_SESSION_IMPL *session,
const char **cfg,
WT_COLLATOR **collatorp);
diff --git a/src/include/flags.h b/src/include/flags.h
index 31eed83e351..89f2450f3af 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -10,6 +10,7 @@
#define WT_CONN_PANIC 0x00000002
#define WT_CONN_SERVER_RUN 0x00000001
#define WT_EVICTION_SERVER_LOCKED 0x00000004
+#define WT_FILE_TYPE_CHECKPOINT 0x00000004
#define WT_FILE_TYPE_DATA 0x00000002
#define WT_FILE_TYPE_LOG 0x00000001
#define WT_LOGSCAN_FIRST 0x00000008
diff --git a/src/include/lsm.h b/src/include/lsm.h
index 4d3b14c5f74..b5c3859605c 100644
--- a/src/include/lsm.h
+++ b/src/include/lsm.h
@@ -30,6 +30,8 @@ struct __wt_cursor_lsm {
uint64_t *txnid_max; /* Maximum txn for each chunk */
size_t txnid_alloc;
+ u_int update_count; /* Updates performed. */
+
#define WT_CLSM_ACTIVE 0x01 /* Incremented the session count */
#define WT_CLSM_ITERATE_NEXT 0x02 /* Forward iteration */
#define WT_CLSM_ITERATE_PREV 0x04 /* Backward iteration */
diff --git a/src/include/stat.h b/src/include/stat.h
index 6717b4d081f..ea2a4068f96 100644
--- a/src/include/stat.h
+++ b/src/include/stat.h
@@ -182,6 +182,8 @@ struct __wt_connection_stats {
WT_STATS log_slot_transitions;
WT_STATS log_sync;
WT_STATS log_writes;
+ WT_STATS lsm_checkpoint_throttle;
+ WT_STATS lsm_merge_throttle;
WT_STATS lsm_rows_merged;
WT_STATS memory_allocation;
WT_STATS memory_free;
@@ -275,9 +277,11 @@ struct __wt_dsrc_stats {
WT_STATS cursor_search_near;
WT_STATS cursor_update;
WT_STATS cursor_update_bytes;
+ WT_STATS lsm_checkpoint_throttle;
WT_STATS lsm_chunk_count;
WT_STATS lsm_generation_max;
WT_STATS lsm_lookup_no_bloom;
+ WT_STATS lsm_merge_throttle;
WT_STATS rec_dictionary;
WT_STATS rec_overflow_key_internal;
WT_STATS rec_overflow_key_leaf;
diff --git a/src/include/txn.h b/src/include/txn.h
index e71b693928b..17e2c0c632e 100644
--- a/src/include/txn.h
+++ b/src/include/txn.h
@@ -131,6 +131,3 @@ struct __wt_txn {
#define TXN_RUNNING 0x08
uint32_t flags;
};
-
-#define WT_TXN_ACTIVE(txn) \
- (F_ISSET((txn), TXN_RUNNING) && (txn)->mod_count > 0)
diff --git a/src/include/txn.i b/src/include/txn.i
index cdfe697ee51..2543d5ff21f 100644
--- a/src/include/txn.i
+++ b/src/include/txn.i
@@ -166,20 +166,42 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id)
/*
* __wt_txn_read_skip --
- * Get the first visible update in a list (or NULL if none are visible),
- * and report whether uncommitted changes were skipped.
+ * Get the first visible update in a list (or NULL if none are visible).
+ * Report the maximum transaction ID in the list and whether any updates
+ * were skipped to find the visible update.
*/
static inline WT_UPDATE *
-__wt_txn_read_skip(WT_SESSION_IMPL *session, WT_UPDATE *upd, int *skipp)
+__wt_txn_read_skip(
+ WT_SESSION_IMPL *session, WT_UPDATE *upd, uint64_t *max_txn, int *skipp)
{
+ WT_UPDATE *first_upd;
+
+ /*
+ * Track the largest transaction ID on this page. We store this in the
+ * page at the end of reconciliation if no updates are skipped. It is
+ * used to avoid evicting a clean page from memory with changes that
+ * are required to satisfy a snapshot read.
+ *
+ * Record whether any updates were skipped on the way to finding the
+ * first visible update. That determines whether a future read with no
+ * intervening modifications to the page could see a different value.
+ * If not, the page can safely be marked clean, and does not need to be
+ * reconciled until it is modified again.
+ */
*skipp = 0;
- while (upd != NULL && !__wt_txn_visible(session, upd->txnid)) {
- if (upd->txnid != WT_TXN_ABORTED)
- *skipp = 1;
- upd = upd->next;
- }
+ for (first_upd = NULL; upd != NULL; upd = upd->next)
+ if (upd->txnid != WT_TXN_ABORTED) {
+ if (TXNID_LT(*max_txn, upd->txnid))
+ *max_txn = upd->txnid;
+ if (first_upd == NULL) {
+ if (__wt_txn_visible(session, upd->txnid))
+ first_upd = upd;
+ else
+ *skipp = 1;
+ }
+ }
- return (upd);
+ return (first_upd);
}
/*
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 4c7ee4917e9..6334bca2061 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -71,6 +71,9 @@ extern "C" {
*******************************************/
struct __wt_collator; typedef struct __wt_collator WT_COLLATOR;
struct __wt_compressor; typedef struct __wt_compressor WT_COMPRESSOR;
+struct __wt_config_item; typedef struct __wt_config_item WT_CONFIG_ITEM;
+struct __wt_config_parser;
+ typedef struct __wt_config_parser WT_CONFIG_PARSER;
struct __wt_connection; typedef struct __wt_connection WT_CONNECTION;
struct __wt_cursor; typedef struct __wt_cursor WT_CURSOR;
struct __wt_data_source; typedef struct __wt_data_source WT_DATA_SOURCE;
@@ -1490,8 +1493,12 @@ struct __wt_connection {
* @config{direct_io, Use \c O_DIRECT to access files. Options are given as a
* list\, such as <code>"direct_io=[data]"</code>. Configuring \c direct_io
* requires care\, see @ref tuning_system_buffer_cache_direct_io for important
- * warnings., a list\, with values chosen from the following options: \c
- * "data"\, \c "log"; default empty.}
+ * warnings. Including \c "data" will cause WiredTiger data files to use \c
+ * O_DIRECT\, including \c "log" will cause WiredTiger log files to use \c
+ * O_DIRECT\, and including \c "checkpoint" will cause WiredTiger data files
+ * opened at a checkpoint (i.e: read only) to use \c O_DIRECT., a list\, with
+ * values chosen from the following options: \c "checkpoint"\, \c "data"\, \c
+ * "log"; default empty.}
* @config{error_prefix, prefix string for error messages., a string; default
* empty.}
* @config{eviction_dirty_target, continue evicting until the cache has less
@@ -1890,6 +1897,153 @@ int wiredtiger_unpack_str(WT_PACK_STREAM *ps, const char **sp);
*/
int wiredtiger_unpack_uint(WT_PACK_STREAM *ps, uint64_t *up);
+/*!
+ * @name Configuration string parsing
+ * @{
+ */
+
+/*!
+ * The configuration information returned by the WiredTiger configuration
+ * parsing functions in the WT_EXTENSION_API and the public API.
+ */
+struct __wt_config_item {
+ /*!
+ * The value of a configuration string.
+ *
+ * Regardless of the type of the configuration string (boolean, int,
+ * list or string), the \c str field will reference the value of the
+ * configuration string.
+ *
+ * The bytes referenced by \c str are <b>not</b> nul-terminated,
+ * use the \c len field instead of a terminating nul byte.
+ */
+ const char *str;
+
+ /*! The number of bytes in the value referenced by \c str. */
+ size_t len;
+
+ /*!
+ * The value of a configuration boolean or integer.
+ *
+ * If the configuration string's value is "true" or "false", the
+ * \c val field will be set to 1 (true), or 0 (false).
+ *
+ * If the configuration string can be legally interpreted as an integer,
+ * using the strtoll function rules as specified in ISO/IEC 9899:1990
+ * ("ISO C90"), that integer will be stored in the \c val field.
+ */
+ int64_t val;
+
+ /*! Permitted values of the \c type field. */
+ enum {
+ /*! A string value with quotes stripped. */
+ WT_CONFIG_ITEM_STRING,
+ /*! A boolean literal ("true" or "false"). */
+ WT_CONFIG_ITEM_BOOL,
+ /*! An unquoted identifier: a string value without quotes. */
+ WT_CONFIG_ITEM_ID,
+ /*! A numeric value. */
+ WT_CONFIG_ITEM_NUM,
+ /*! A nested structure or list, including brackets. */
+ WT_CONFIG_ITEM_STRUCT
+ }
+ /*!
+ * The type of value determined by the parser. In all cases,
+ * the \c str and \c len fields are set.
+ */
+ type;
+};
+
+/*!
+ * Create a handle that can be used to parse or create configuration strings
+ * compatible with WiredTiger APIs.
+ * This API is outside the scope of a WiredTiger connection handle, since
+ * applications may need to generate configuration strings prior to calling
+ * ::wiredtiger_open.
+ * @param session the session handle to be used for error reporting. If NULL
+ * error messages will be written to stdout.
+ * @param config the configuration string being parsed. The string must
+ * remain valid for the lifetime of the parser handle.
+ * @param len the number of valid bytes in \c config
+ * @param[out] config_parserp A pointer to the newly opened handle
+ * @errors
+ */
+int wiredtiger_config_parser_open(WT_SESSION *session,
+ const char *config, size_t len, WT_CONFIG_PARSER **config_parserp);
+
+/*!
+ * A handle that can be used to search and traverse configuration strings
+ * compatible with WiredTiger APIs.
+ * To parse the contents of a list or nested configuration string use a new
+ * configuration parser handle based on the content of the ::WT_CONFIG_ITEM
+ * retrieved from the parent configuration string.
+ *
+ * @section config_parse_examples Configuration String Parsing examples
+ *
+ * This could be used in C to create a configuration parser as follows:
+ *
+ * @snippet ex_config_parse.c Create a configuration parser
+ *
+ * Once the parser has been created the content can be queried directly:
+ *
+ * @snippet ex_config_parse.c get
+ *
+ * Or the content can be traversed linearly:
+ *
+ * @snippet ex_config_parse.c next
+ *
+ * Nested configuration values can be queried using a shorthand notation:
+ *
+ * @snippet ex_config_parse.c nested get
+ *
+ * Nested configuration values can be traversed using multiple
+ * ::WT_CONFIG_PARSER handles:
+ *
+ * @snippet ex_config_parse.c nested traverse
+ */
+struct __wt_config_parser {
+
+ /*!
+ * Close the configuration scanner releasing any resources.
+ *
+ * @param config_parser the configuration parser handle
+ * @errors
+ *
+ */
+ int __F(close)(WT_CONFIG_PARSER *config_parser);
+
+ /*!
+ * Return the next key/value pair.
+ *
+ * When iteration would pass the end of the configuration string
+ * ::WT_NOTFOUND will be returned.
+ *
+ * If an item has no explicitly assigned value, the item will be
+ * returned in \c key and the \c value will be set to the boolean
+ * \c "true" value.
+ *
+ * @param config_parser the configuration parser handle
+ * @param key the returned key
+ * @param value the returned value
+ * @errors
+ *
+ */
+ int __F(next)(WT_CONFIG_PARSER *config_parser,
+ WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value);
+
+ /*!
+ * Return the value of an item in the configuration string.
+ *
+ * @param config_parser the configuration parser handle
+ * @param key configuration key string
+ * @param value the returned value
+ * @errors
+ *
+ */
+ int __F(get)(WT_CONFIG_PARSER *config_parser,
+ const char *key, WT_CONFIG_ITEM *value);
+};
+
#endif /* !defined(SWIG) */
/*!
* @}
@@ -1985,9 +2139,7 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp);
/*******************************************
* Forward structure declarations for the extension API
*******************************************/
-struct __wt_config_arg; typedef struct __wt_config_arg WT_CONFIG_ARG;
-struct __wt_config_item; typedef struct __wt_config_item WT_CONFIG_ITEM;
-struct __wt_config_scan; typedef struct __wt_config_scan WT_CONFIG_SCAN;
+struct __wt_config_arg; typedef struct __wt_config_arg WT_CONFIG_ARG;
/*!
* The interface implemented by applications to provide custom ordering of
@@ -2588,42 +2740,46 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_LOG_SYNC 1063
/*! log: log write operations */
#define WT_STAT_CONN_LOG_WRITES 1064
+/*! sleep for LSM checkpoint throttle */
+#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1065
+/*! sleep for LSM merge throttle */
+#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1066
/*! rows merged in an LSM tree */
-#define WT_STAT_CONN_LSM_ROWS_MERGED 1065
+#define WT_STAT_CONN_LSM_ROWS_MERGED 1067
/*! memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1066
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1068
/*! memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1067
+#define WT_STAT_CONN_MEMORY_FREE 1069
/*! memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1068
+#define WT_STAT_CONN_MEMORY_GROW 1070
/*! total read I/Os */
-#define WT_STAT_CONN_READ_IO 1069
+#define WT_STAT_CONN_READ_IO 1071
/*! page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1070
+#define WT_STAT_CONN_REC_PAGES 1072
/*! page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1071
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1073
/*! reconciliation failed because an update could not be included */
-#define WT_STAT_CONN_REC_SKIPPED_UPDATE 1072
+#define WT_STAT_CONN_REC_SKIPPED_UPDATE 1074
/*! pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1073
+#define WT_STAT_CONN_RWLOCK_READ 1075
/*! pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1074
+#define WT_STAT_CONN_RWLOCK_WRITE 1076
/*! open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1075
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1077
/*! transactions */
-#define WT_STAT_CONN_TXN_BEGIN 1076
+#define WT_STAT_CONN_TXN_BEGIN 1078
/*! transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1077
+#define WT_STAT_CONN_TXN_CHECKPOINT 1079
/*! transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1078
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1080
/*! transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1079
+#define WT_STAT_CONN_TXN_COMMIT 1081
/*! transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1080
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1082
/*! transactions rolled-back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1081
+#define WT_STAT_CONN_TXN_ROLLBACK 1083
/*! total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1082
+#define WT_STAT_CONN_WRITE_IO 1084
/*!
* @}
@@ -2767,43 +2923,47 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_CURSOR_UPDATE 2066
/*! cursor-update value bytes updated */
#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2067
+/*! sleep for LSM checkpoint throttle */
+#define WT_STAT_DSRC_LSM_CHECKPOINT_THROTTLE 2068
/*! chunks in the LSM tree */
-#define WT_STAT_DSRC_LSM_CHUNK_COUNT 2068
+#define WT_STAT_DSRC_LSM_CHUNK_COUNT 2069
/*! highest merge generation in the LSM tree */
-#define WT_STAT_DSRC_LSM_GENERATION_MAX 2069
+#define WT_STAT_DSRC_LSM_GENERATION_MAX 2070
/*! queries that could have benefited from a Bloom filter that did not
* exist */
-#define WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM 2070
+#define WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM 2071
+/*! sleep for LSM merge throttle */
+#define WT_STAT_DSRC_LSM_MERGE_THROTTLE 2072
/*! reconciliation dictionary matches */
-#define WT_STAT_DSRC_REC_DICTIONARY 2071
+#define WT_STAT_DSRC_REC_DICTIONARY 2073
/*! reconciliation internal-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2072
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2074
/*! reconciliation leaf-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2073
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2075
/*! reconciliation overflow values written */
-#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2074
+#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2076
/*! reconciliation pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE 2075
+#define WT_STAT_DSRC_REC_PAGE_DELETE 2077
/*! reconciliation pages merged */
-#define WT_STAT_DSRC_REC_PAGE_MERGE 2076
+#define WT_STAT_DSRC_REC_PAGE_MERGE 2078
/*! page reconciliation calls */
-#define WT_STAT_DSRC_REC_PAGES 2077
+#define WT_STAT_DSRC_REC_PAGES 2079
/*! page reconciliation calls for eviction */
-#define WT_STAT_DSRC_REC_PAGES_EVICTION 2078
+#define WT_STAT_DSRC_REC_PAGES_EVICTION 2080
/*! reconciliation failed because an update could not be included */
-#define WT_STAT_DSRC_REC_SKIPPED_UPDATE 2079
+#define WT_STAT_DSRC_REC_SKIPPED_UPDATE 2081
/*! reconciliation internal pages split */
-#define WT_STAT_DSRC_REC_SPLIT_INTERNAL 2080
+#define WT_STAT_DSRC_REC_SPLIT_INTERNAL 2082
/*! reconciliation leaf pages split */
-#define WT_STAT_DSRC_REC_SPLIT_LEAF 2081
+#define WT_STAT_DSRC_REC_SPLIT_LEAF 2083
/*! reconciliation maximum splits for a page */
-#define WT_STAT_DSRC_REC_SPLIT_MAX 2082
+#define WT_STAT_DSRC_REC_SPLIT_MAX 2084
/*! object compaction */
-#define WT_STAT_DSRC_SESSION_COMPACT 2083
+#define WT_STAT_DSRC_SESSION_COMPACT 2085
/*! open cursor count */
-#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2084
+#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2086
/*! update conflicts */
-#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2085
+#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2087
/*! @} */
/*
* Statistics section: END
diff --git a/src/include/wiredtiger_ext.h b/src/include/wiredtiger_ext.h
index 88ddecdade1..acf7efad3d9 100644
--- a/src/include/wiredtiger_ext.h
+++ b/src/include/wiredtiger_ext.h
@@ -184,6 +184,12 @@ struct __wt_extension_api {
WT_ITEM *first, WT_ITEM *second, int *cmp);
/*!
+ * @copydoc wiredtiger_config_parser_open
+ */
+ int (*config_parser_open)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
+ const char *config, size_t len, WT_CONFIG_PARSER **config_parserp);
+
+ /*!
* Return the value of a configuration string.
*
* @param wt_api the extension handle
@@ -199,69 +205,6 @@ struct __wt_extension_api {
WT_CONFIG_ARG *config, const char *key, WT_CONFIG_ITEM *value);
/*!
- * Return the value of a configuration string.
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param config a configuration string
- * @param key configuration key string
- * @param value the returned value
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION config_strget
- */
- int (*config_strget)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
- const char *config, const char *key, WT_CONFIG_ITEM *value);
-
- /*!
- * Return the list entries of a configuration string value.
- * This method steps through the entries found in the last returned
- * value from WT_EXTENSION_API::config_get. The last returned value
- * should be of type "list".
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param str the configuration string to scan
- * @param len the number of valid bytes in \c str
- * @param[out] scanp a handle used to scan the config string
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION config scan
- */
- int (*config_scan_begin)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
- const char *str, size_t len, WT_CONFIG_SCAN **scanp);
-
- /*!
- * Release any resources allocated by
- * WT_EXTENSION_API::config_scan_begin.
- *
- * @param wt_api the extension handle
- * @param scan the configuration scanner, invalid after this call
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION config scan
- */
- int (*config_scan_end)(WT_EXTENSION_API *wt_api, WT_CONFIG_SCAN *scan);
-
- /*!
- * Return the next key/value pair from a config string scan.
- *
- * If the string contains a list of items with no assigned value, the
- * items will be returned in \c key and the \c value will be set to the
- * boolean \c "true" value.
- *
- * @param wt_api the extension handle
- * @param scan the configuration scanner
- * @param key the returned key
- * @param value the returned value
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION config scan
- */
- int (*config_scan_next)(WT_EXTENSION_API *wt_api,
- WT_CONFIG_SCAN *scan, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value);
-
- /*!
* Insert a row into the metadata if it does not already exist.
*
* @param wt_api the extension handle
@@ -428,75 +371,19 @@ struct __wt_extension_api {
*/
int (*transaction_visible)(WT_EXTENSION_API *wt_api,
WT_SESSION *session, uint64_t transaction_id);
-};
-
-/*!
- * @typedef WT_CONFIG_ARG
- *
- * A configuration object passed to some extension interfaces. This is an
- * opaque type: configuration values can be queried using
- * WT_EXTENSION_API::config_get.
- */
-/*!
- * The configuration information returned by the WiredTiger extension function
- * WT_EXTENSION_API::config_get.
- */
-struct __wt_config_item {
/*!
- * The value of a configuration string.
- *
- * Regardless of the type of the configuration string (boolean, int,
- * list or string), the \c str field will reference the value of the
- * configuration string.
- *
- * The bytes referenced by \c str are <b>not</b> be nul-terminated,
- * use the \c len field instead of a terminating nul byte.
+ * @copydoc wiredtiger_version
*/
- const char *str;
-
- /*! The number of bytes in the value referenced by \c str. */
- size_t len;
-
- /*!
- * The value of a configuration boolean or integer.
- *
- * If the configuration string's value is "true" or "false", the
- * \c val field will be set to 1 (true), or 0 (false).
- *
- * If the configuration string can be legally interpreted as an integer,
- * using the strtoll function rules as specified in ISO/IEC 9899:1990
- * ("ISO C90"), that integer will be stored in the \c val field.
- */
- int64_t val;
-
- /*! Permitted values of the \c type field. */
- enum {
- /*! A string value with quotes stripped. */
- WT_CONFIG_ITEM_STRING,
- /*! A boolean literal ("true" or "false"). */
- WT_CONFIG_ITEM_BOOL,
- /*! An unquoted identifier: a string value without quotes. */
- WT_CONFIG_ITEM_ID,
- /*! A numeric value. */
- WT_CONFIG_ITEM_NUM,
- /*! A nested structure or list, including brackets. */
- WT_CONFIG_ITEM_STRUCT
- }
- /*!
- * The type of value determined by the parser. In all cases,
- * the \c str and \c len fields are set.
- */
- type;
+ const char *(*version)(int *majorp, int *minorp, int *patchp);
};
/*!
- * @typedef WT_CONFIG_SCAN
+ * @typedef WT_CONFIG_ARG
*
- * A handle for a scan through a configuration string.
- * This is an opaque type returned by WT_EXTENSION_API::config_scan_begin.
- * Configuration values can be queried using WT_EXTENSION_API::config_scan_next.
- * Call WT_EXTENSION_API::config_scan_end when finished to release resources.
+ * A configuration object passed to some extension interfaces. This is an
+ * opaque type: configuration values can be queried using
+ * WT_EXTENSION_API::config_get
*/
/*! @} */
diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h
index 3ae89fc211e..19a805aa25f 100644
--- a/src/include/wt_internal.h
+++ b/src/include/wt_internal.h
@@ -93,6 +93,8 @@ struct __wt_config_check;
typedef struct __wt_config_check WT_CONFIG_CHECK;
struct __wt_config_entry;
typedef struct __wt_config_entry WT_CONFIG_ENTRY;
+struct __wt_config_parser_impl;
+ typedef struct __wt_config_parser_impl WT_CONFIG_PARSER_IMPL;
struct __wt_connection_impl;
typedef struct __wt_connection_impl WT_CONNECTION_IMPL;
struct __wt_connection_stats;
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index 618257469ee..c1824df24ac 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -17,15 +17,15 @@
/*
* LSM API enter: check that the cursor is in sync with the tree.
*/
-#define WT_LSM_ENTER(clsm, cursor, session, n) \
+#define WT_LSM_ENTER(clsm, cursor, session, n, reset) \
clsm = (WT_CURSOR_LSM *)cursor; \
CURSOR_API_CALL(cursor, session, n, NULL); \
- WT_ERR(__clsm_enter(clsm, 0))
+ WT_ERR(__clsm_enter(clsm, reset, 0))
#define WT_LSM_UPDATE_ENTER(clsm, cursor, session, n) \
clsm = (WT_CURSOR_LSM *)cursor; \
CURSOR_UPDATE_API_CALL(cursor, session, n, NULL); \
- WT_ERR(__clsm_enter(clsm, 1))
+ WT_ERR(__clsm_enter(clsm, 0, 1))
#define WT_LSM_LEAVE(session) \
API_END(session); \
@@ -35,16 +35,18 @@
CURSOR_UPDATE_API_END(session, ret); \
WT_TRET(__clsm_leave(clsm))
-static int __clsm_open_cursors(WT_CURSOR_LSM *, int, u_int, uint32_t);
static int __clsm_lookup(WT_CURSOR_LSM *);
+static int __clsm_open_cursors(WT_CURSOR_LSM *, int, u_int, uint32_t);
+static int __clsm_reset_cursors(WT_CURSOR_LSM *, WT_CURSOR *);
/*
* __clsm_enter --
* Start an operation on an LSM cursor, update if the tree has changed.
*/
static inline int
-__clsm_enter(WT_CURSOR_LSM *clsm, int update)
+__clsm_enter(WT_CURSOR_LSM *clsm, int reset, int update)
{
+ WT_CURSOR *c;
WT_DECL_RET;
WT_LSM_CHUNK *chunk;
WT_SESSION_IMPL *session;
@@ -57,6 +59,20 @@ __clsm_enter(WT_CURSOR_LSM *clsm, int update)
if (F_ISSET(clsm, WT_CLSM_MERGE))
return (0);
+ if (reset) {
+ c = &clsm->iface;
+ /* Copy out data before resetting chunk cursors. */
+ if (F_ISSET(c, WT_CURSTD_KEY_INT) &&
+ !WT_DATA_IN_ITEM(&c->key))
+ WT_RET(__wt_buf_set(
+ session, &c->key, c->key.data, c->key.size));
+ if (F_ISSET(c, WT_CURSTD_VALUE_INT) &&
+ !WT_DATA_IN_ITEM(&c->value))
+ WT_RET(__wt_buf_set(
+ session, &c->value, c->value.data, c->value.size));
+ WT_RET(__clsm_reset_cursors(clsm, NULL));
+ }
+
for (;;) {
/*
* If the cursor looks up-to-date, check if the cache is full.
@@ -369,7 +385,7 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
* generation number and retry if it has changed under us.
*/
if (clsm->cursors != NULL && (ngood < clsm->nchunks ||
- (F_ISSET(clsm, WT_CLSM_OPEN_READ) && nupdates > 0))) {
+ (!F_ISSET(clsm, WT_CLSM_OPEN_READ) && nupdates > 0))) {
saved_gen = lsm_tree->dsk_gen;
locked = 0;
WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree));
@@ -607,7 +623,7 @@ __clsm_next(WT_CURSOR *cursor)
u_int i;
int check, cmp, deleted;
- WT_LSM_ENTER(clsm, cursor, session, next);
+ WT_LSM_ENTER(clsm, cursor, session, next, 0);
/* If we aren't positioned for a forward scan, get started. */
if (clsm->current == NULL || !F_ISSET(clsm, WT_CLSM_ITERATE_NEXT)) {
@@ -689,7 +705,7 @@ __clsm_prev(WT_CURSOR *cursor)
u_int i;
int check, cmp, deleted;
- WT_LSM_ENTER(clsm, cursor, session, prev);
+ WT_LSM_ENTER(clsm, cursor, session, prev, 0);
/* If we aren't positioned for a reverse scan, get started. */
if (clsm->current == NULL || !F_ISSET(clsm, WT_CLSM_ITERATE_PREV)) {
@@ -875,13 +891,14 @@ __clsm_lookup(WT_CURSOR_LSM *clsm)
}
WT_ERR(WT_NOTFOUND);
-done: WT_TRET(__clsm_reset_cursors(clsm, c));
+done:
err: if (ret == 0) {
clsm->current = c;
F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
} else {
- WT_TRET(__clsm_reset_cursors(clsm, NULL));
+ if (c != NULL)
+ WT_TRET(c->reset(c));
F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
}
@@ -899,7 +916,7 @@ __clsm_search(WT_CURSOR *cursor)
WT_DECL_RET;
WT_SESSION_IMPL *session;
- WT_LSM_ENTER(clsm, cursor, session, search);
+ WT_LSM_ENTER(clsm, cursor, session, search, 1);
WT_CURSOR_NEEDKEY(cursor);
ret = __clsm_lookup(clsm);
@@ -925,13 +942,10 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp)
larger = smaller = NULL;
- WT_LSM_ENTER(clsm, cursor, session, search_near);
+ WT_LSM_ENTER(clsm, cursor, session, search_near, 1);
WT_CURSOR_NEEDKEY(cursor);
F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV);
- /* Reset any positioned cursor(s) to release pinned resources. */
- WT_ERR(__clsm_reset_cursors(clsm, NULL));
-
/*
* search_near is somewhat fiddly: we can't just return a nearby key
* from the in-memory chunk because there could be a closer key on
@@ -1111,13 +1125,29 @@ __clsm_put(WT_SESSION_IMPL *session,
}
/*
- * The count is in a shared structure, but it's only approximate, so
- * don't worry about protecting access.
+ * Update the record count. It is in a shared structure, but it's only
+ * approximate, so don't worry about protecting access.
+ *
+ * Throttle if necessary. Every 100 update operations on each cursor,
+ * check if throttling is required. Don't rely only on the shared
+ * counter because it can race, and because for some workloads, there
+ * may not be enough records per chunk to get effective throttling.
*/
- if (++clsm->primary_chunk->count % 100 == 0 &&
- lsm_tree->merge_throttle + lsm_tree->ckpt_throttle > 0)
+ if ((++clsm->primary_chunk->count % 100 == 0 ||
+ ++clsm->update_count >= 100) &&
+ lsm_tree->merge_throttle + lsm_tree->ckpt_throttle > 0) {
+ clsm->update_count = 0;
+ WT_STAT_FAST_INCRV(session, &clsm->lsm_tree->stats,
+ lsm_checkpoint_throttle, (uint64_t)lsm_tree->ckpt_throttle);
+ WT_STAT_FAST_CONN_INCRV(session,
+ lsm_checkpoint_throttle, (uint64_t)lsm_tree->ckpt_throttle);
+ WT_STAT_FAST_INCRV(session, &clsm->lsm_tree->stats,
+ lsm_merge_throttle, (uint64_t)lsm_tree->merge_throttle);
+ WT_STAT_FAST_CONN_INCRV(session,
+ lsm_merge_throttle, (uint64_t)lsm_tree->merge_throttle);
__wt_sleep(0,
lsm_tree->ckpt_throttle + lsm_tree->merge_throttle);
+ }
/*
* In LSM there are multiple btrees active at one time. The tree
diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c
index 792adf0773f..1f70072b41f 100644
--- a/src/lsm/lsm_merge.c
+++ b/src/lsm/lsm_merge.c
@@ -82,7 +82,7 @@ __wt_lsm_merge(
aggressive = 10;
merge_min = (aggressive > 5) ? 2 : lsm_tree->merge_min;
max_gap = (aggressive + 4) / 5;
- max_level = (id == 0 ? 0 : id - 1) + aggressive;
+ max_level = (lsm_tree->merge_throttle > 0) ? 0 : id + aggressive;
/*
* If there aren't any chunks to merge, or some of the chunks aren't
@@ -101,24 +101,19 @@ __wt_lsm_merge(
WT_RET(__wt_lsm_tree_lock(session, lsm_tree, 1));
/*
- * Only include chunks that already have a Bloom filter and not
- * involved in a merge.
+ * Only include chunks that already have a Bloom filter or are the
+ * result of a merge and not involved in a merge.
*/
- end_chunk = lsm_tree->nchunks - 1;
- while (end_chunk > 0 &&
- ((chunk = lsm_tree->chunk[end_chunk]) == NULL ||
- !F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) ||
- F_ISSET(chunk, WT_LSM_CHUNK_MERGING))) {
- --end_chunk;
-
- /*
- * If we find a chunk on disk without a Bloom filter, give up.
- * We may have waited a while to lock the tree, and new chunks
- * may have been created in the meantime.
- */
- if (chunk != NULL &&
+ for (end_chunk = lsm_tree->nchunks - 1; end_chunk > 0; --end_chunk) {
+ chunk = lsm_tree->chunk[end_chunk];
+ WT_ASSERT(session, chunk != NULL);
+ if (F_ISSET(chunk, WT_LSM_CHUNK_MERGING))
+ continue;
+ if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) || chunk->generation > 0)
+ break;
+ else if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
F_ISSET(chunk, WT_LSM_CHUNK_ONDISK))
- end_chunk = 0;
+ break;
}
/*
@@ -150,8 +145,11 @@ __wt_lsm_merge(
youngest = lsm_tree->chunk[end_chunk];
nchunks = (end_chunk + 1) - start_chunk;
- /* If the chunk is already involved in a merge, stop. */
- if (F_ISSET(chunk, WT_LSM_CHUNK_MERGING))
+ /*
+ * If the chunk is already involved in a merge or a Bloom
+ * filter is being built for it, stop.
+ */
+ if (F_ISSET(chunk, WT_LSM_CHUNK_MERGING) || chunk->bloom_busy)
break;
/*
@@ -163,10 +161,14 @@ __wt_lsm_merge(
/*
* If the size of the chunks selected so far exceeds the
- * configured maximum chunk size, stop.
+ * configured maximum chunk size, stop. Keep going if we can
+ * slide the window further into the tree: we don't want to
+ * leave small chunks in the middle.
*/
if ((chunk_size += chunk->size) > lsm_tree->chunk_max)
- break;
+ if (nchunks < merge_min ||
+ chunk_size - youngest->size > lsm_tree->chunk_max)
+ break;
/*
* If we have enough chunks for a merge and the next chunk is
@@ -184,7 +186,12 @@ __wt_lsm_merge(
record_count += chunk->count;
--start_chunk;
- if (nchunks == lsm_tree->merge_max) {
+ /*
+ * If we have a full window, or the merge would be too big,
+ * remove the youngest chunk.
+ */
+ if (nchunks == lsm_tree->merge_max ||
+ chunk_size > lsm_tree->chunk_max) {
WT_ASSERT(session,
F_ISSET(youngest, WT_LSM_CHUNK_MERGING));
F_CLR(youngest, WT_LSM_CHUNK_MERGING);
diff --git a/src/lsm/lsm_stat.c b/src/lsm/lsm_stat.c
index 30148de9a9f..a626032fe8a 100644
--- a/src/lsm/lsm_stat.c
+++ b/src/lsm/lsm_stat.c
@@ -39,7 +39,7 @@ __lsm_stat_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR_STAT *cst)
"statistics=(%s%s%s)",
cst->stat_clear ? "clear," : "",
cst->stat_all ? "all," : "",
- cst->stat_fast ? "fast," : "");
+ !cst->stat_all && cst->stat_fast ? "fast," : "");
cfg[1] = disk_cfg[1] = config;
}
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index a830295908f..eb81acecaf4 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -692,9 +692,9 @@ __wt_lsm_tree_throttle(
else if (!decrease_only)
WT_LSM_MERGE_THROTTLE_INCREASE(lsm_tree->merge_throttle);
- /* Put an upper bound of 100ms on both throttle calculations. */
- lsm_tree->ckpt_throttle = WT_MIN(100000, lsm_tree->ckpt_throttle);
- lsm_tree->merge_throttle = WT_MIN(100000, lsm_tree->merge_throttle);
+ /* Put an upper bound of 1s on both throttle calculations. */
+ lsm_tree->ckpt_throttle = WT_MIN(1000000, lsm_tree->ckpt_throttle);
+ lsm_tree->merge_throttle = WT_MIN(1000000, lsm_tree->merge_throttle);
/*
* Update our estimate of how long each in-memory chunk stays active.
@@ -1035,7 +1035,13 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip)
WT_RET(__wt_seconds(session, &begin));
+ /*
+ * Set the compacting flag and clear the current merge throttle
+ * setting, so that all merge threads look for merges at all levels of
+ * the tree.
+ */
F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);
+ lsm_tree->merge_throttle = 0;
/* Wake up the merge threads. */
WT_RET(__wt_cond_signal(session, lsm_tree->work_cond));
diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c
index 8b939529b33..29f6ea31271 100644
--- a/src/lsm/lsm_worker.c
+++ b/src/lsm/lsm_worker.c
@@ -104,7 +104,7 @@ __wt_lsm_merge_worker(void *vargs)
session = lsm_tree->worker_sessions[id];
__wt_free(session, args);
- aggressive = stallms = 0;
+ aggressive = chunk_wait = stallms = 0;
while (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) {
/*
@@ -127,7 +127,7 @@ __wt_lsm_merge_worker(void *vargs)
progress = 1;
/* If we didn't create a Bloom filter, try to merge. */
- if (progress == 0 &&
+ if ((id != 0 || progress == 0) &&
__wt_lsm_merge(session, lsm_tree, id, aggressive) == 0)
progress = 1;
@@ -185,7 +185,8 @@ err: __wt_err(session, ret, "LSM merge worker failed");
/*
* __lsm_bloom_work --
- * Try to create a Bloom filter for the newest on-disk chunk.
+ * Try to create a Bloom filter for the newest on-disk chunk that doesn't
+ * have one.
*/
static int
__lsm_bloom_work(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
@@ -216,10 +217,14 @@ __lsm_bloom_work(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
chunk->count == 0)
continue;
- /* See if we win the race to switch on the "busy" flag. */
+ /*
+ * See if we win the race to switch on the "busy" flag and
+ * recheck that the chunk still needs a Bloom filter.
+ */
if (WT_ATOMIC_CAS(chunk->bloom_busy, 0, 1)) {
- ret = __lsm_bloom_create(
- session, lsm_tree, chunk, (u_int)i);
+ if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
+ ret = __lsm_bloom_create(
+ session, lsm_tree, chunk, (u_int)i);
chunk->bloom_busy = 0;
break;
}
diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c
index 6ef4caadd0a..c6938dad9fd 100644
--- a/src/os_posix/os_open.c
+++ b/src/os_posix/os_open.c
@@ -107,7 +107,8 @@ __wt_open(WT_SESSION_IMPL *session,
#endif
#ifdef O_NOATIME
/* Avoid updating metadata for read-only workloads. */
- if (dio_type == WT_FILE_TYPE_DATA)
+ if (dio_type == WT_FILE_TYPE_DATA ||
+ dio_type == WT_FILE_TYPE_CHECKPOINT)
f |= O_NOATIME;
#endif
@@ -157,7 +158,8 @@ __wt_open(WT_SESSION_IMPL *session,
#if defined(HAVE_POSIX_FADVISE)
/* Disable read-ahead on trees: it slows down random read workloads. */
- if (dio_type == WT_FILE_TYPE_DATA)
+ if (dio_type == WT_FILE_TYPE_DATA ||
+ dio_type == WT_FILE_TYPE_CHECKPOINT)
WT_ERR(posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM));
#endif
@@ -174,7 +176,8 @@ __wt_open(WT_SESSION_IMPL *session,
WT_ERR(__wt_filesize(session, fh, &fh->size));
/* Configure file extension. */
- if (dio_type == WT_FILE_TYPE_DATA)
+ if (dio_type == WT_FILE_TYPE_DATA ||
+ dio_type == WT_FILE_TYPE_CHECKPOINT)
fh->extend_len = conn->data_extend_len;
/*
diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c
index 8d620198c09..8bd78ce0ac4 100644
--- a/src/schema/schema_create.c
+++ b/src/schema/schema_create.c
@@ -33,7 +33,8 @@ __wt_direct_io_size_check(WT_SESSION_IMPL *session,
* if you configure direct I/O and then don't do I/O in alignments and
* units of its happy place.
*/
- if (FLD_ISSET(conn->direct_io, WT_FILE_TYPE_DATA)) {
+ if (FLD_ISSET(conn->direct_io,
+ WT_FILE_TYPE_CHECKPOINT | WT_FILE_TYPE_DATA)) {
align = (int64_t)conn->buffer_alignment;
if (align != 0 && (cval.val < align || cval.val % align != 0))
WT_RET_MSG(session, EINVAL,
diff --git a/src/support/scratch.c b/src/support/scratch.c
index 4b342977372..1aae8649901 100644
--- a/src/support/scratch.c
+++ b/src/support/scratch.c
@@ -127,7 +127,8 @@ __wt_buf_set(
/* Ensure the buffer is large enough. */
WT_RET(__wt_buf_initsize(session, buf, size));
- memcpy(buf->mem, data, size);
+ /* Copy, allowing for overlapping strings. */
+ memmove(buf->mem, data, size);
return (0);
}
diff --git a/src/support/stat.c b/src/support/stat.c
index 621c79220a4..c0caecbe606 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -93,11 +93,14 @@ __wt_stat_init_dsrc_stats(WT_DSRC_STATS *stats)
stats->cursor_search_near.desc = "cursor search near calls";
stats->cursor_update.desc = "cursor update calls";
stats->cursor_update_bytes.desc = "cursor-update value bytes updated";
+ stats->lsm_checkpoint_throttle.desc =
+ "sleep for LSM checkpoint throttle";
stats->lsm_chunk_count.desc = "chunks in the LSM tree";
stats->lsm_generation_max.desc =
"highest merge generation in the LSM tree";
stats->lsm_lookup_no_bloom.desc =
"queries that could have benefited from a Bloom filter that did not exist";
+ stats->lsm_merge_throttle.desc = "sleep for LSM merge throttle";
stats->rec_dictionary.desc = "reconciliation dictionary matches";
stats->rec_overflow_key_internal.desc =
"reconciliation internal-page overflow keys";
@@ -194,9 +197,11 @@ __wt_stat_refresh_dsrc_stats(void *stats_arg)
stats->cursor_search_near.v = 0;
stats->cursor_update.v = 0;
stats->cursor_update_bytes.v = 0;
+ stats->lsm_checkpoint_throttle.v = 0;
stats->lsm_chunk_count.v = 0;
stats->lsm_generation_max.v = 0;
stats->lsm_lookup_no_bloom.v = 0;
+ stats->lsm_merge_throttle.v = 0;
stats->rec_dictionary.v = 0;
stats->rec_overflow_key_internal.v = 0;
stats->rec_overflow_key_leaf.v = 0;
@@ -280,9 +285,11 @@ __wt_stat_aggregate_dsrc_stats(const void *child, const void *parent)
p->cursor_search_near.v += c->cursor_search_near.v;
p->cursor_update.v += c->cursor_update.v;
p->cursor_update_bytes.v += c->cursor_update_bytes.v;
+ p->lsm_checkpoint_throttle.v += c->lsm_checkpoint_throttle.v;
if (c->lsm_generation_max.v > p->lsm_generation_max.v)
p->lsm_generation_max.v = c->lsm_generation_max.v;
p->lsm_lookup_no_bloom.v += c->lsm_lookup_no_bloom.v;
+ p->lsm_merge_throttle.v += c->lsm_merge_throttle.v;
p->rec_dictionary.v += c->rec_dictionary.v;
p->rec_overflow_key_internal.v += c->rec_overflow_key_internal.v;
p->rec_overflow_key_leaf.v += c->rec_overflow_key_leaf.v;
@@ -389,6 +396,9 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
"log: consolidated slot join transitions";
stats->log_sync.desc = "log: log sync operations";
stats->log_writes.desc = "log: log write operations";
+ stats->lsm_checkpoint_throttle.desc =
+ "sleep for LSM checkpoint throttle";
+ stats->lsm_merge_throttle.desc = "sleep for LSM merge throttle";
stats->lsm_rows_merged.desc = "rows merged in an LSM tree";
stats->memory_allocation.desc = "memory allocations";
stats->memory_free.desc = "memory frees";
@@ -479,6 +489,8 @@ __wt_stat_refresh_connection_stats(void *stats_arg)
stats->log_slot_transitions.v = 0;
stats->log_sync.v = 0;
stats->log_writes.v = 0;
+ stats->lsm_checkpoint_throttle.v = 0;
+ stats->lsm_merge_throttle.v = 0;
stats->lsm_rows_merged.v = 0;
stats->memory_allocation.v = 0;
stats->memory_free.v = 0;
diff --git a/tools/wt_nvd3_util.py b/tools/wt_nvd3_util.py
new file mode 100644
index 00000000000..6bf1396b0ff
--- /dev/null
+++ b/tools/wt_nvd3_util.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+#
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+from datetime import datetime
+from nvd3 import lineChart
+
+# Add a multiChart type so we can overlay line graphs
+class multiChart(lineChart):
+ def __init__(self, **kwargs):
+ lineChart.__init__(self, **kwargs)
+
+ # Fix the axes
+ del self.axislist['yAxis']
+ self.create_y_axis('yAxis1', format=kwargs.get('y_axis_format', '.02f'))
+ self.create_y_axis('yAxis2', format=kwargs.get('y_axis_format', '.02f'))
+
+TIMEFMT = "%b %d %H:%M:%S"
+
+thisyear = datetime.today().year
+def parsetime(s):
+ return datetime.strptime(s, TIMEFMT).replace(year=thisyear)
+
diff --git a/tools/wtperf_graph.py b/tools/wtperf_graph.py
index 8bd11433014..f45145cf801 100644
--- a/tools/wtperf_graph.py
+++ b/tools/wtperf_graph.py
@@ -33,22 +33,26 @@ from subprocess import call
TIMEFMT = "%b %d %H:%M:%S"
-def process_monitor(fname, ckptlist, opdict):
+def process_monitor(fname, sfx, ckptlist, opdict):
# Read the monitor file and figure out when a checkpoint was running.
in_ckpt = 'N'
ckptlist=[]
+
+ ofname = 'monitor%s.png' % (sfx)
# Monitor output format currently is:
- # time,read,insert,update,ckpt,...latencies...
+ # time,totalsec,read,insert,update,ckpt,...latencies...
ops = ('read', 'insert', 'update')
- csvcol = (1, 2, 3)
+ csvcol = (2, 3, 4)
with open(fname, 'r') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
+ if row[0].lstrip().startswith('#'):
+ continue
# Look for checkpoints and operations.
- if row[4] != in_ckpt:
+ if row[5] != in_ckpt:
ckptlist.append(row[0])
- in_ckpt = row[4]
+ in_ckpt = row[5]
for op, col in zip(ops, csvcol):
if row[col] != '0' and opdict[op] == 0:
opdict[op] = 1
@@ -77,14 +81,14 @@ set yrange [0:]\n''' % {
})
it = iter(ckptlist)
for start, stop in zip(it, it):
- of.write('set object rectangle from first \'' + start +\
- '\', graph 0 ' + ' to first \'' + stop +\
- '\', graph 1 fc rgb "gray" back\n')
- of.write('set output "' + fname + '.png"\n')
- of.write('plot "' + fname + '" using 1:($2/1000) title "Reads", "' +\
- fname + '" using 1:($3/1000) title "Inserts", "' +\
- fname + '" using 1:($4/1000) title "Updates", "' +\
- fname + '" using 1:(($2+$3+$4)/1000) title "Total"\n')
+ of.write("set object rectangle from first '%s',\
+ graph 0 to first '%s',\
+ graph 1 fc rgb \"gray\" back\n" % (start, stop))
+ of.write('set output "%s"\n' % (ofname))
+ of.write("""plot "{name}" using 1:($3/1000) title "Reads", \\
+ "{name}" using 1:($4/1000) title "Inserts",\\
+ "{name}" using 1:($5/1000) title "Updates"
+ """.format(name=fname))
of.close()
call(["gnuplot", gcmd])
os.remove(gcmd)
@@ -118,10 +122,12 @@ set yrange [1:]\n''' % {
'\', graph 1 fc rgb "gray" back\n')
ofname = name + sfx + '.latency1.png'
of.write('set output "' + ofname + '"\n')
- of.write('plot "' + fname + '" using 1:($' + repr(col_avg) +\
+ of.write('plot "' +\
+ fname + '" using 1:($' + repr(col_avg) +\
') title "Average Latency", "' + fname +'" using 1:($' +\
repr(col_min) + ') title "Minimum Latency", "' +\
- fname + '" using 1:($' + repr(col_max) + ') title "Maximum Latency"\n')
+ fname + '" using 1:($' + repr(col_max) +\
+ ') title "Maximum Latency"\n')
of.close()
call(["gnuplot", gcmd])
os.remove(gcmd)
@@ -129,6 +135,9 @@ set yrange [1:]\n''' % {
# Graph latency vs. % operations
def plot_latency_percent(name, dirname, sfx, ckptlist):
+ lfile = os.path.join(dirname, 'latency.' + name)
+ if not os.path.exists(lfile):
+ return
gcmd = "gnuplot." + name + ".l2.cmd"
of = open(gcmd, "w")
of.write('''
@@ -147,7 +156,7 @@ set ylabel "%% operations"
set yrange [0:]\n''')
ofname = name + sfx + '.latency2.png'
of.write('set output "' + ofname + '"\n')
- of.write('plot "' + os.path.join(dirname, 'latency.' + name) + sfx +\
+ of.write('plot "' + lfile + sfx +\
'" using (($2 * 100)/$4) title "' + name + '"\n')
of.close()
call(["gnuplot", gcmd])
@@ -156,6 +165,9 @@ set yrange [0:]\n''')
# Graph latency vs. % operations (cumulative)
def plot_latency_cumulative_percent(name, dirname, sfx, ckptlist):
+ lfile = os.path.join(dirname, 'latency.' + name)
+ if not os.path.exists(lfile):
+ return
# Latency plot: cumulative operations vs. latency
gcmd = "gnuplot." + name + ".l3.cmd"
of = open(gcmd, "w")
@@ -176,7 +188,7 @@ set yrange [0:]\n''' % {
})
ofname = name + sfx + '.latency3.png'
of.write('set output "' + ofname + '"\n')
- of.write('plot "' + os.path.join(dirname, 'latency.' + name) + sfx +\
+ of.write('plot "' + lfile + sfx +\
'" using 1:(($3 * 100)/$4) title "' + name + '"\n')
of.close()
call(["gnuplot", gcmd])
@@ -187,14 +199,14 @@ def process_file(fname):
# NOTE: The operations below must be in this exact order to match
# the operation latency output in the monitor file.
opdict={'read':0, 'insert':0, 'update':0}
- process_monitor(fname, ckptlist, opdict)
# This assumes the monitor file has the string "monitor"
# and any other (optional) characters in the filename are a suffix.
sfx = os.path.basename(fname).replace('monitor','')
dirname = os.path.dirname(fname)
- column = 6 # average, minimum, maximum start in column 6
+ process_monitor(fname, sfx, ckptlist, opdict)
+ column = 7 # average, minimum, maximum start in column 7
for k, v in opdict.items():
if v != 0:
plot_latency_operation(
diff --git a/tools/wtperf_stats.py b/tools/wtperf_stats.py
new file mode 100644
index 00000000000..d9743055fff
--- /dev/null
+++ b/tools/wtperf_stats.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python
+#
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+
+import os, csv, operator
+from time import mktime
+
+try:
+ from wt_nvd3_util import multiChart, parsetime
+except ImportError:
+ print >>sys.stderr, "Could not import wt_nvd3_util.py, it should be\
+ in the same directory as %s" % sys.argv[0]
+ sys.exit(-1)
+
+try:
+ from stat_data import no_scale_per_second_list
+except ImportError:
+ print >>sys.stderr, "Could not import stat_data.py, it should be in\
+ the same directory as %s" % sys.argv[0]
+ sys.exit(-1)
+
+# Fixup the names and values in a dictionary read in from a csv file. One
+# field must be "#time" - which is used to calculate the interval.
+# Input is a dictionary, output is a list of dictionaries with a single entry.
+def munge_dict(values_dict, abstime):
+ sorted_values = sorted(values_dict, key=operator.itemgetter('#time'))
+ start_time = parsetime(sorted_values[0]['#time'])
+
+ ret = []
+ for v in sorted_values:
+ if abstime:
+ # Build the time series, milliseconds since the epoch
+ v['#time'] = int(mktime(parsetime(v['#time']).timetuple())) * 1000
+ else:
+ # Build the time series as seconds since the start of the data
+ v['#time'] = (parsetime(v['#time']) - start_time).seconds
+ next_val = {}
+ for title, value in v.items():
+ if title.find('uS') != -1:
+ title = title.replace('uS', 'ms')
+ value = float(value) / 1000
+ if title == 'totalsec':
+ value = 0
+ if title == 'checkpoints' and value == 'N':
+ value = 0
+ elif title.find('time') != -1:
+ title = 'time'
+ elif title.find('latency') == -1 and \
+ title.find('checkpoints') == -1:
+ title = title + ' (thousands)'
+ value = float(value) / 1000
+ next_val[title] = value
+ ret.append(next_val)
+
+ # After building the series, eliminate constants
+ d0 = ret[0]
+ for t0, v0 in d0.items():
+ skip = True
+ for d in ret:
+ v = d[t0]
+ if v != v0:
+ skip = False
+ break
+ if skip:
+ for dicts in ret:
+ del dicts[t0]
+
+ return ret
+
+def addPlotsToChart(chart, graph_data, wtstat_chart = False):
+ # Extract the times - they are the same for all lines.
+ times = []
+ for v in graph_data:
+ times.append(v['time'])
+
+ # Add a line to the graph for each field in the CSV file in alphabetical
+ # order, so the key is sorted.
+ for field in sorted(graph_data[0].keys()):
+ if field == 'time':
+ continue
+ # Split the latency and non-latency measurements onto different scales
+ axis = "1"
+ if not wtstat_chart and field.find('latency') == -1:
+ axis="2"
+ ydata = []
+ for v in graph_data:
+ ydata.append(v[field])
+ chart.add_serie(x=times, y=ydata, name=field, type="line", yaxis=axis)
+
+# Input parameters are a chart populated with WiredTiger statistics and
+# the directory where the wtperf monitor file can be found.
+def addPlotsToStatsChart(chart, dirname, abstime):
+ fname = os.path.join(dirname, 'monitor')
+ try:
+ with open(fname, 'rb') as csvfile:
+ reader = csv.DictReader(csvfile)
+ # Transform the data into something NVD3 can digest
+ graph_data = munge_dict(reader, abstime)
+ except IOError:
+ print >>sys.stderr, "Could not open wtperf monitor file."
+ sys.exit(-1)
+ addPlotsToChart(chart, graph_data, 1)
+
+def main():
+ # Parse the command line
+ import argparse
+
+ parser = argparse.ArgumentParser(description='Create graphs from WiredTiger statistics.')
+ parser.add_argument('--abstime', action='store_true',
+ help='use absolute time on the x axis')
+ parser.add_argument('--output', '-o', metavar='file',
+ default='wtperf_stats.html', help='HTML output file')
+ parser.add_argument('files', metavar='file', nargs='+',
+ help='input monitor file generated by WiredTiger wtperf application')
+ args = parser.parse_args()
+
+ output_file = open(args.output, 'w')
+
+ if len(args.files) != 1:
+ print 'Script currently only supports a single monitor file'
+ exit (1)
+
+ chart_extra = {}
+ # Add in the x axis if the user wants time.
+ if args.abstime:
+ chart_extra['x_axis_format'] = '%H:%M:%S'
+
+ for f in args.files:
+ with open(f, 'rb') as csvfile:
+ reader = csv.DictReader(csvfile)
+ # Transform the data into something NVD3 can digest
+ graph_data = munge_dict(reader, args.abstime)
+
+ chart = multiChart(name='wtperf',
+ height=450 + 10*len(graph_data[0].keys()),
+ resize=True,
+ x_is_date=args.abstime,
+ assets_directory='http://source.wiredtiger.com/graphs/',
+ **chart_extra)
+
+ addPlotsToChart(chart, graph_data)
+
+ chart.buildhtml()
+ output_file.write(chart.htmlcontent)
+ output_file.close()
+
+if __name__ == '__main__':
+ main()
+
diff --git a/tools/wtstats.py b/tools/wtstats.py
index 766c49989b6..37b28e6c13a 100644
--- a/tools/wtstats.py
+++ b/tools/wtstats.py
@@ -28,38 +28,35 @@
import fileinput, os, re, shutil, sys, textwrap
from collections import defaultdict
-from datetime import datetime
from time import mktime
from subprocess import call
try:
from stat_data import no_scale_per_second_list
except ImportError:
- print >>sys.stderr, "Could not import stat_data.py, it should be in the same directory as %s" % sys.argv[0]
+ print >>sys.stderr, "Could not import stat_data.py, it should be\
+ in the same directory as %s" % sys.argv[0]
sys.exit(-1)
try:
- from nvd3 import lineChart, lineWithFocusChart
+ from wtperf_stats import addPlotsToStatsChart
except ImportError:
- print >>sys.stderr, "Could not import nvd3. Please install it *from source* (other versions may be missing features that we rely on). Run these commands: git clone https://github.com/areski/python-nvd3.git ; cd python-nvd3 ; sudo python setup.py install"
+ print >>sys.stderr, "Could not import wtperf_stats.py, it should be\
+ in the same directory as %s" % sys.argv[0]
sys.exit(-1)
+try:
+ from wt_nvd3_util import multiChart, parsetime
+except ImportError:
+ print >>sys.stderr, "Could not import wt_nvd3_util.py, it should be\
+ in the same directory as %s" % sys.argv[0]
+ sys.exit(-1)
-# Add a multiChart type so we can overlay line graphs
-class multiChart(lineChart):
- def __init__(self, **kwargs):
- lineChart.__init__(self, **kwargs)
-
- # Fix the axes
- del self.axislist['yAxis']
- self.create_y_axis('yAxis1', format=kwargs.get('y_axis_format', '.02f'))
- self.create_y_axis('yAxis2', format=kwargs.get('y_axis_format', '.02f'))
-
-TIMEFMT = "%b %d %H:%M:%S"
-
-thisyear = datetime.today().year
-def parsetime(s):
- return datetime.strptime(s, TIMEFMT).replace(year=thisyear)
+try:
+ from nvd3 import lineChart, lineWithFocusChart
+except ImportError:
+ print >>sys.stderr, "Could not import nvd3. Please install it *from source* (other versions may be missing features that we rely on). Run these commands: git clone https://github.com/areski/python-nvd3.git ; cd python-nvd3 ; sudo python setup.py install"
+ sys.exit(-1)
# Plot a set of entries for a title.
def munge(title, values):
@@ -93,7 +90,7 @@ def munge(title, values):
# Parse the command line
import argparse
-parser = argparse.ArgumentParser(description='Create graphs from WiredTIger statistics.')
+parser = argparse.ArgumentParser(description='Create graphs from WiredTiger statistics.')
parser.add_argument('--abstime', action='store_true',
help='use absolute time on the x axis')
parser.add_argument('--focus', action='store_true',
@@ -108,6 +105,8 @@ parser.add_argument('--output', '-o', metavar='file', default='wtstats.html',
parser.add_argument('--right', '-R', metavar='regexp',
type=re.compile, action='append',
help='use the right axis for series with titles matching the specifed regexp')
+parser.add_argument('--wtperf', '-w', action='store_true',
+ help='Plot wtperf statistics on the same graph')
parser.add_argument('files', metavar='file', nargs='+',
help='input files generated by WiredTiger statistics logging')
args = parser.parse_args()
@@ -211,6 +210,9 @@ for title, yaxis, ydata in results:
chart.add_serie(x=xdata, y=(ydata.get(x, 0) for x in xdata), name=title,
type="line", yaxis="2" if yaxis else "1")
+if args.wtperf:
+ addPlotsToStatsChart(chart, os.path.dirname(args.files[0]), args.abstime)
+
chart.buildhtml()
output_file.write(chart.htmlcontent)