summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bench/wtperf/config.c91
-rw-r--r--bench/wtperf/runners/evict-btree-readonly.wtperf12
-rw-r--r--bench/wtperf/runners/evict-btree.wtperf2
-rw-r--r--bench/wtperf/runners/evict-lsm-readonly.wtperf13
-rw-r--r--bench/wtperf/runners/evict-lsm.wtperf2
-rw-r--r--bench/wtperf/runners/update-delta-mix1.wtperf18
-rw-r--r--bench/wtperf/runners/update-delta-mix2.wtperf18
-rw-r--r--bench/wtperf/runners/update-delta-mix3.wtperf18
-rw-r--r--bench/wtperf/runners/update-grow-stress.wtperf15
-rw-r--r--bench/wtperf/runners/update-shrink-stress.wtperf15
-rw-r--r--bench/wtperf/wtperf.c134
-rw-r--r--bench/wtperf/wtperf.h22
-rw-r--r--bench/wtperf/wtperf_opt.i12
-rw-r--r--build_posix/Make.subdirs4
-rw-r--r--build_posix/configure.ac.in24
-rwxr-xr-xbuild_posix/reconf1
-rw-r--r--build_win/filelist.win1
-rw-r--r--dist/api_data.py34
-rw-r--r--dist/api_err.py2
-rw-r--r--dist/filelist3
-rw-r--r--dist/flags.py2
-rwxr-xr-xdist/s_export9
-rw-r--r--dist/s_funcs.list2
-rwxr-xr-xdist/s_longlines5
-rw-r--r--dist/s_string.ok32
-rwxr-xr-xdist/s_style4
-rwxr-xr-xdist/s_whitespace7
-rwxr-xr-xdist/s_win6
-rw-r--r--dist/stat.py12
-rw-r--r--dist/stat_data.py91
-rw-r--r--examples/c/Makefile.am1
-rw-r--r--examples/c/ex_all.c6
-rw-r--r--examples/c/ex_async.c2
-rw-r--r--examples/c/ex_config_parse.c5
-rw-r--r--examples/c/ex_event_handler.c136
-rw-r--r--examples/c/ex_extractor.c13
-rw-r--r--examples/c/ex_schema.c45
-rw-r--r--examples/c/ex_stat.c37
-rw-r--r--examples/java/com/wiredtiger/examples/ex_all.java16
-rw-r--r--examples/java/com/wiredtiger/examples/ex_schema.java11
-rw-r--r--examples/java/com/wiredtiger/examples/ex_stat.java32
-rw-r--r--ext/collators/revint/Makefile.am10
-rw-r--r--ext/collators/revint/revint_collator.c153
-rw-r--r--ext/compressors/lz4/lz4_compress.c41
-rw-r--r--ext/compressors/snappy/snappy_compress.c31
-rw-r--r--src/async/async_op.c12
-rw-r--r--src/block/block_ckpt.c3
-rw-r--r--src/block/block_mgr.c291
-rw-r--r--src/block/block_open.c2
-rw-r--r--src/block/block_write.c8
-rw-r--r--src/btree/bt_compact.c24
-rw-r--r--src/btree/bt_curprev.c3
-rw-r--r--src/btree/bt_cursor.c41
-rw-r--r--src/btree/bt_discard.c3
-rw-r--r--src/btree/bt_handle.c5
-rw-r--r--src/btree/bt_read.c47
-rw-r--r--src/btree/bt_rebalance.c9
-rw-r--r--src/btree/bt_slvg.c6
-rw-r--r--src/btree/bt_split.c63
-rw-r--r--src/btree/bt_sync.c84
-rw-r--r--src/btree/bt_vrfy.c2
-rw-r--r--src/btree/bt_walk.c5
-rw-r--r--src/btree/col_modify.c22
-rw-r--r--src/btree/col_srch.c52
-rw-r--r--src/btree/row_key.c1
-rw-r--r--src/btree/row_srch.c147
-rw-r--r--src/cache/cache_las.c4
-rw-r--r--src/config/config.c4
-rw-r--r--src/config/config_def.c188
-rw-r--r--src/conn/api_strerror.c2
-rw-r--r--src/conn/conn_api.c205
-rw-r--r--src/conn/conn_cache.c12
-rw-r--r--src/conn/conn_dhandle.c196
-rw-r--r--src/conn/conn_handle.c5
-rw-r--r--src/conn/conn_log.c91
-rw-r--r--src/conn/conn_open.c6
-rw-r--r--src/conn/conn_stat.c106
-rw-r--r--src/conn/conn_sweep.c8
-rw-r--r--src/cursor/cur_backup.c180
-rw-r--r--src/cursor/cur_config.c30
-rw-r--r--src/cursor/cur_ds.c32
-rw-r--r--src/cursor/cur_dump.c32
-rw-r--r--src/cursor/cur_file.c32
-rw-r--r--src/cursor/cur_index.c32
-rw-r--r--src/cursor/cur_join.c538
-rw-r--r--src/cursor/cur_log.c34
-rw-r--r--src/cursor/cur_metadata.c90
-rw-r--r--src/cursor/cur_stat.c74
-rw-r--r--src/cursor/cur_std.c102
-rw-r--r--src/cursor/cur_table.c64
-rw-r--r--src/docs/checkpoint.dox34
-rw-r--r--src/docs/command-line.dox94
-rw-r--r--src/docs/data-sources.dox12
-rw-r--r--src/docs/error-handling.dox22
-rw-r--r--src/docs/license.dox36
-rw-r--r--src/docs/programming.dox1
-rw-r--r--src/docs/readonly.dox55
-rw-r--r--src/docs/spell.ok6
-rw-r--r--src/docs/statistics.dox9
-rw-r--r--src/docs/upgrading.dox43
-rw-r--r--src/docs/wtperf.dox15
-rw-r--r--src/evict/evict_file.c6
-rw-r--r--src/evict/evict_lru.c273
-rw-r--r--src/evict/evict_page.c3
-rw-r--r--src/include/btmem.h5
-rw-r--r--src/include/btree.h9
-rw-r--r--src/include/btree.i2
-rw-r--r--src/include/cache.h6
-rw-r--r--src/include/cache.i52
-rw-r--r--src/include/column.i30
-rw-r--r--src/include/config.h16
-rw-r--r--src/include/connection.h8
-rw-r--r--src/include/cursor.h34
-rw-r--r--src/include/extern.h53
-rw-r--r--src/include/flags.h44
-rw-r--r--src/include/gcc.h1
-rw-r--r--src/include/lint.h1
-rw-r--r--src/include/lsm.h23
-rw-r--r--src/include/meta.h1
-rw-r--r--src/include/misc.h6
-rw-r--r--src/include/msvc.h1
-rw-r--r--src/include/mutex.h7
-rw-r--r--src/include/packing.i4
-rw-r--r--src/include/schema.h18
-rw-r--r--src/include/session.h20
-rw-r--r--src/include/stat.h262
-rw-r--r--src/include/txn.i2
-rw-r--r--src/include/wiredtiger.in856
-rw-r--r--src/include/wiredtiger_ext.h138
-rw-r--r--src/log/log.c124
-rw-r--r--src/log/log_slot.c2
-rw-r--r--src/lsm/lsm_cursor.c34
-rw-r--r--src/lsm/lsm_manager.c17
-rw-r--r--src/lsm/lsm_merge.c13
-rw-r--r--src/lsm/lsm_meta.c336
-rw-r--r--src/lsm/lsm_tree.c257
-rw-r--r--src/lsm/lsm_work_unit.c22
-rw-r--r--src/lsm/lsm_worker.c11
-rw-r--r--src/meta/meta_apply.c52
-rw-r--r--src/meta/meta_ckpt.c5
-rw-r--r--src/meta/meta_table.c16
-rw-r--r--src/meta/meta_track.c11
-rw-r--r--src/meta/meta_turtle.c32
-rw-r--r--src/os_posix/os_alloc.c107
-rw-r--r--src/os_posix/os_errno.c16
-rw-r--r--src/os_posix/os_fallocate.c1
-rw-r--r--src/os_posix/os_fsync.c6
-rw-r--r--src/os_posix/os_ftruncate.c1
-rw-r--r--src/os_posix/os_open.c14
-rw-r--r--src/os_posix/os_remove.c1
-rw-r--r--src/os_posix/os_rename.c1
-rw-r--r--src/os_posix/os_rw.c3
-rw-r--r--src/os_posix/os_stdio.c3
-rw-r--r--src/os_win/os_errno.c31
-rw-r--r--src/os_win/os_fallocate.c1
-rw-r--r--src/os_win/os_fsync.c6
-rw-r--r--src/os_win/os_ftruncate.c1
-rw-r--r--src/os_win/os_open.c15
-rw-r--r--src/os_win/os_remove.c1
-rw-r--r--src/os_win/os_rename.c1
-rw-r--r--src/os_win/os_rw.c3
-rw-r--r--src/packing/pack_impl.c87
-rw-r--r--src/packing/pack_stream.c139
-rw-r--r--src/reconcile/rec_write.c161
-rw-r--r--src/schema/schema_create.c16
-rw-r--r--src/schema/schema_open.c3
-rw-r--r--src/schema/schema_plan.c12
-rw-r--r--src/schema/schema_truncate.c11
-rw-r--r--src/schema/schema_worker.c19
-rw-r--r--src/session/session_api.c221
-rw-r--r--src/session/session_compact.c112
-rw-r--r--src/session/session_dhandle.c26
-rw-r--r--src/support/cksum.c19
-rw-r--r--src/support/cond_auto.c136
-rw-r--r--src/support/huffman.c9
-rw-r--r--src/support/power8/LICENSE.TXT476
-rw-r--r--src/support/power8/README.md208
-rw-r--r--src/support/power8/crc32.S771
-rw-r--r--src/support/power8/crc32_constants.h901
-rw-r--r--src/support/power8/crc32_wrapper.c66
-rw-r--r--src/support/power8/ppc-opcode.h23
-rw-r--r--src/support/scratch.c2
-rw-r--r--src/support/stat.c411
-rw-r--r--src/txn/txn.c2
-rw-r--r--src/txn/txn_ckpt.c401
-rw-r--r--src/txn/txn_recover.c40
-rw-r--r--src/utilities/util_dump.c230
-rw-r--r--test/bloom/test_bloom.c55
-rw-r--r--test/checkpoint/test_checkpoint.c4
-rw-r--r--test/cursor_order/Makefile.am13
-rw-r--r--test/cursor_order/cursor_order.c307
-rw-r--r--test/cursor_order/cursor_order.h54
-rw-r--r--test/cursor_order/cursor_order_file.c132
-rw-r--r--test/cursor_order/cursor_order_ops.c370
-rw-r--r--test/fops/file.c2
-rw-r--r--test/fops/fops.c2
-rw-r--r--test/fops/t.c2
-rw-r--r--test/format/backup.c55
-rw-r--r--test/format/bdb.c8
-rw-r--r--test/format/bulk.c18
-rw-r--r--test/format/compact.c8
-rw-r--r--test/format/config.c29
-rw-r--r--test/format/config.h4
-rw-r--r--test/format/format.h9
-rw-r--r--test/format/lrt.c40
-rw-r--r--test/format/ops.c186
-rw-r--r--test/format/rebalance.c20
-rw-r--r--test/format/salvage.c22
-rw-r--r--test/format/t.c64
-rw-r--r--test/format/util.c28
-rw-r--r--test/format/wts.c95
-rw-r--r--test/huge/huge.c2
-rw-r--r--test/manydbs/Makefile.am13
-rw-r--r--test/manydbs/manydbs.c264
-rwxr-xr-xtest/manydbs/smoke.sh18
-rw-r--r--test/readonly/Makefile.am13
-rw-r--r--test/readonly/readonly.c409
-rwxr-xr-xtest/readonly/smoke.sh8
-rw-r--r--test/recovery/random-abort.c23
-rw-r--r--test/recovery/truncated-log.c36
-rw-r--r--test/salvage/salvage.c2
-rw-r--r--test/suite/helper.py40
-rw-r--r--test/suite/test_backup05.py14
-rw-r--r--test/suite/test_bug008.py237
-rw-r--r--test/suite/test_bulk02.py10
-rw-r--r--test/suite/test_checkpoint01.py12
-rw-r--r--test/suite/test_collator.py161
-rw-r--r--test/suite/test_compact02.py5
-rw-r--r--test/suite/test_cursor06.py5
-rw-r--r--test/suite/test_cursor_random.py26
-rw-r--r--test/suite/test_drop.py15
-rw-r--r--test/suite/test_drop02.py47
-rw-r--r--test/suite/test_dump.py51
-rw-r--r--test/suite/test_join01.py81
-rw-r--r--test/suite/test_join02.py11
-rw-r--r--test/suite/test_join05.py66
-rw-r--r--test/suite/test_join06.py158
-rw-r--r--test/suite/test_lsm03.py60
-rw-r--r--test/suite/test_readonly01.py146
-rw-r--r--test/suite/test_readonly02.py116
-rw-r--r--test/suite/test_readonly03.py125
-rw-r--r--test/suite/test_rebalance.py2
-rw-r--r--test/suite/test_schema07.py54
-rw-r--r--test/suite/test_sweep01.py21
-rw-r--r--test/suite/test_txn04.py17
-rw-r--r--test/suite/test_util13.py188
-rw-r--r--test/suite/wttest.py8
-rw-r--r--test/thread/t.c2
-rw-r--r--test/utility/test_util.i37
-rw-r--r--test/windows/windows_shim.h5
-rw-r--r--tools/wtstats/stat_data.py4
251 files changed, 12175 insertions, 3765 deletions
diff --git a/bench/wtperf/config.c b/bench/wtperf/config.c
index 3cb20ff2b26..e83d6fcceed 100644
--- a/bench/wtperf/config.c
+++ b/bench/wtperf/config.c
@@ -134,9 +134,11 @@ config_free(CONFIG *cfg)
}
cleanup_truncate_config(cfg);
+ free(cfg->base_uri);
free(cfg->ckptthreads);
+ free(cfg->partial_config);
free(cfg->popthreads);
- free(cfg->base_uri);
+ free(cfg->reopen_config);
free(cfg->workers);
free(cfg->workload);
}
@@ -157,13 +159,19 @@ config_compress(CONFIG *cfg)
cfg->compress_ext = NULL;
cfg->compress_table = NULL;
} else if (strcmp(s, "lz4") == 0) {
+#ifndef HAVE_BUILTIN_EXTENSION_LZ4
cfg->compress_ext = LZ4_EXT;
+#endif
cfg->compress_table = LZ4_BLK;
} else if (strcmp(s, "snappy") == 0) {
+#ifndef HAVE_BUILTIN_EXTENSION_SNAPPY
cfg->compress_ext = SNAPPY_EXT;
+#endif
cfg->compress_table = SNAPPY_BLK;
} else if (strcmp(s, "zlib") == 0) {
+#ifndef HAVE_BUILTIN_EXTENSION_ZLIB
cfg->compress_ext = ZLIB_EXT;
+#endif
cfg->compress_table = ZLIB_BLK;
} else {
fprintf(stderr,
@@ -233,10 +241,6 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
goto err;
continue;
}
- if (STRING_MATCH("throttle", k.str, k.len)) {
- workp->throttle = (uint64_t)v.val;
- continue;
- }
if (STRING_MATCH("insert", k.str, k.len) ||
STRING_MATCH("inserts", k.str, k.len)) {
if ((workp->insert = v.val) < 0)
@@ -254,20 +258,17 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
goto err;
continue;
}
- if (STRING_MATCH("update", k.str, k.len) ||
- STRING_MATCH("updates", k.str, k.len)) {
- if ((workp->update = v.val) < 0)
- goto err;
+ if (STRING_MATCH("throttle", k.str, k.len)) {
+ workp->throttle = (uint64_t)v.val;
continue;
}
if (STRING_MATCH("truncate", k.str, k.len)) {
if ((workp->truncate = v.val) != 1)
goto err;
/* There can only be one Truncate thread. */
- if (cfg->has_truncate != 0) {
+ if (F_ISSET(cfg, CFG_TRUNCATE))
goto err;
- }
- cfg->has_truncate = 1;
+ F_SET(cfg, CFG_TRUNCATE);
continue;
}
if (STRING_MATCH("truncate_pct", k.str, k.len)) {
@@ -282,6 +283,29 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
workp->truncate_count = (uint64_t)v.val;
continue;
}
+ if (STRING_MATCH("update", k.str, k.len) ||
+ STRING_MATCH("updates", k.str, k.len)) {
+ if ((workp->update = v.val) < 0)
+ goto err;
+ continue;
+ }
+ if (STRING_MATCH("update_delta", k.str, k.len)) {
+ if (v.type == WT_CONFIG_ITEM_STRING ||
+ v.type == WT_CONFIG_ITEM_ID) {
+ if (strncmp(v.str, "rand", 4) != 0)
+ goto err;
+ /* Special random value */
+ workp->update_delta = INT64_MAX;
+ F_SET(cfg, CFG_GROW);
+ } else {
+ workp->update_delta = v.val;
+ if (v.val > 0)
+ F_SET(cfg, CFG_GROW);
+ if (v.val < 0)
+ F_SET(cfg, CFG_SHRINK);
+ }
+ continue;
+ }
goto err;
}
if (ret == WT_NOTFOUND)
@@ -401,7 +425,12 @@ config_opt(CONFIG *cfg, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v)
*(uint32_t *)valueloc = (uint32_t)v->val;
break;
case CONFIG_STRING_TYPE:
- if (v->type != WT_CONFIG_ITEM_STRING) {
+ /*
+ * Configuration parsing uses string/ID to distinguish
+ * between quoted and unquoted values.
+ */
+ if (v->type != WT_CONFIG_ITEM_STRING &&
+ v->type != WT_CONFIG_ITEM_ID) {
fprintf(stderr, "wtperf: Error: "
"bad string value for \'%.*s=%.*s\'\n",
(int)k->len, k->str, (int)v->len, v->str);
@@ -430,7 +459,8 @@ config_opt(CONFIG *cfg, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v)
STRING_MATCH("threads", k->str, k->len))
return (config_threads(cfg, v->str, v->len));
- if (v->type != WT_CONFIG_ITEM_STRING) {
+ if (v->type != WT_CONFIG_ITEM_STRING &&
+ v->type != WT_CONFIG_ITEM_ID) {
fprintf(stderr, "wtperf: Error: "
"bad string value for \'%.*s=%.*s\'\n",
(int)k->len, k->str, (int)v->len, v->str);
@@ -634,6 +664,9 @@ config_opt_str(CONFIG *cfg, const char *name, const char *value)
int
config_sanity(CONFIG *cfg)
{
+ WORKLOAD *workp;
+ u_int i;
+
/* Various intervals should be less than the run-time. */
if (cfg->run_time > 0 &&
((cfg->checkpoint_threads != 0 &&
@@ -660,6 +693,36 @@ config_sanity(CONFIG *cfg)
"Invalid pareto distribution - should be a percentage\n");
return (EINVAL);
}
+
+ if (cfg->value_sz_max < cfg->value_sz) {
+ if (F_ISSET(cfg, CFG_GROW)) {
+ fprintf(stderr, "value_sz_max %" PRIu32
+ " must be greater than or equal to value_sz %"
+ PRIu32 "\n", cfg->value_sz_max, cfg->value_sz);
+ return (EINVAL);
+ } else
+ cfg->value_sz_max = cfg->value_sz;
+ }
+ if (cfg->value_sz_min > cfg->value_sz) {
+ if (F_ISSET(cfg, CFG_SHRINK)) {
+ fprintf(stderr, "value_sz_min %" PRIu32
+ " must be less than or equal to value_sz %"
+ PRIu32 "\n", cfg->value_sz_min, cfg->value_sz);
+ return (EINVAL);
+ } else
+ cfg->value_sz_min = cfg->value_sz;
+ }
+
+ if (cfg->readonly && cfg->workload != NULL)
+ for (i = 0, workp = cfg->workload;
+ i < cfg->workload_cnt; ++i, ++workp)
+ if (workp->insert != 0 || workp->update != 0 ||
+ workp->truncate != 0) {
+ fprintf(stderr,
+ "Invalid workload: insert, update or "
+ "truncate specified with readonly\n");
+ return (EINVAL);
+ }
return (0);
}
diff --git a/bench/wtperf/runners/evict-btree-readonly.wtperf b/bench/wtperf/runners/evict-btree-readonly.wtperf
new file mode 100644
index 00000000000..25599fadd8d
--- /dev/null
+++ b/bench/wtperf/runners/evict-btree-readonly.wtperf
@@ -0,0 +1,12 @@
+# wtperf options file: evict btree configuration
+conn_config="cache_size=50M,eviction=(threads_max=4),mmap=false"
+table_config="type=file"
+icount=10000000
+report_interval=5
+run_time=120
+populate_threads=1
+readonly=true
+threads=((count=16,reads=1))
+# Add throughput/latency monitoring
+max_latency=2000
+sample_interval=5
diff --git a/bench/wtperf/runners/evict-btree.wtperf b/bench/wtperf/runners/evict-btree.wtperf
index 24da4dd7902..e7d967e5c63 100644
--- a/bench/wtperf/runners/evict-btree.wtperf
+++ b/bench/wtperf/runners/evict-btree.wtperf
@@ -1,5 +1,5 @@
# wtperf options file: evict btree configuration
-conn_config="cache_size=50M"
+conn_config="cache_size=50M,eviction=(threads_max=4)"
table_config="type=file"
icount=10000000
report_interval=5
diff --git a/bench/wtperf/runners/evict-lsm-readonly.wtperf b/bench/wtperf/runners/evict-lsm-readonly.wtperf
new file mode 100644
index 00000000000..661b8e21924
--- /dev/null
+++ b/bench/wtperf/runners/evict-lsm-readonly.wtperf
@@ -0,0 +1,13 @@
+# wtperf options file: evict lsm configuration
+conn_config="cache_size=50M,lsm_manager=(worker_thread_max=6),eviction=(threads_max=4)"
+table_config="type=lsm,lsm=(chunk_size=2M),os_cache_dirty_max=16MB"
+compact=true
+icount=10000000
+report_interval=5
+run_time=120
+populate_threads=1
+readonly=true
+threads=((count=16,reads=1))
+# Add throughput/latency monitoring
+max_latency=2000
+sample_interval=5
diff --git a/bench/wtperf/runners/evict-lsm.wtperf b/bench/wtperf/runners/evict-lsm.wtperf
index ad885d98eb7..b872d429046 100644
--- a/bench/wtperf/runners/evict-lsm.wtperf
+++ b/bench/wtperf/runners/evict-lsm.wtperf
@@ -1,5 +1,5 @@
# wtperf options file: evict lsm configuration
-conn_config="cache_size=50M,lsm_manager=(worker_thread_max=6)"
+conn_config="cache_size=50M,lsm_manager=(worker_thread_max=6),eviction=(threads_max=4)"
table_config="type=lsm,lsm=(chunk_size=2M),os_cache_dirty_max=16MB"
compact=true
icount=10000000
diff --git a/bench/wtperf/runners/update-delta-mix1.wtperf b/bench/wtperf/runners/update-delta-mix1.wtperf
new file mode 100644
index 00000000000..0f5e75f5347
--- /dev/null
+++ b/bench/wtperf/runners/update-delta-mix1.wtperf
@@ -0,0 +1,18 @@
+# wtperf options file: Mixed workload where we grow some values and shrink
+# others. Mixed load leaning toward growing the dataset.
+#
+conn_config="cache_size=2GB,checkpoint=(wait=30)"
+table_config="type=file,leaf_page_max=32k,leaf_value_max=128k,split_pct=90"
+# The values are starting small, insert a lot so our database grows larger than
+# cache quickly.
+icount=200000
+report_interval=5
+run_time=300
+populate_threads=1
+#
+# Run more grow workload threads than shrink threads.
+#
+threads=((count=4,update=1,update_delta=100),(count=2,update=1,update_delta=-150))
+value_sz=20000
+value_sz_min=1000
+value_sz_max=65536
diff --git a/bench/wtperf/runners/update-delta-mix2.wtperf b/bench/wtperf/runners/update-delta-mix2.wtperf
new file mode 100644
index 00000000000..f3ce2a455cc
--- /dev/null
+++ b/bench/wtperf/runners/update-delta-mix2.wtperf
@@ -0,0 +1,18 @@
+# wtperf options file: Mixed workload where we grow some values and shrink
+# others. Mixed load leaning toward shrinking the dataset.
+#
+conn_config="cache_size=2GB,checkpoint=(wait=30)"
+table_config="type=file,leaf_page_max=32k,leaf_value_max=128k,split_pct=90"
+# The values are starting small, insert a lot so our database grows larger than
+# cache quickly.
+icount=200000
+report_interval=5
+run_time=300
+populate_threads=1
+#
+# Run more shrink workload threads than grow threads.
+#
+threads=((count=2,update=1,update_delta=150),(count=4,update=1,update_delta=-100))
+value_sz=20000
+value_sz_min=1000
+value_sz_max=65536
diff --git a/bench/wtperf/runners/update-delta-mix3.wtperf b/bench/wtperf/runners/update-delta-mix3.wtperf
new file mode 100644
index 00000000000..606eb727eef
--- /dev/null
+++ b/bench/wtperf/runners/update-delta-mix3.wtperf
@@ -0,0 +1,18 @@
+# wtperf options file: Mixed workload where we grow some values and shrink
+# others. Mixed load leaning toward mostly a balance.
+#
+conn_config="cache_size=2GB,checkpoint=(wait=30)"
+table_config="type=file,leaf_page_max=32k,leaf_value_max=128k,split_pct=90"
+# The values are starting small, insert a lot so our database grows larger than
+# cache quickly.
+icount=200000
+report_interval=5
+run_time=300
+populate_threads=1
+#
+# Run a balance of threads.
+#
+threads=((count=3,update=1,update_delta=100),(count=3,update=1,update_delta=-100))
+value_sz=20000
+value_sz_min=1000
+value_sz_max=65536
diff --git a/bench/wtperf/runners/update-grow-stress.wtperf b/bench/wtperf/runners/update-grow-stress.wtperf
new file mode 100644
index 00000000000..f7403e1578d
--- /dev/null
+++ b/bench/wtperf/runners/update-grow-stress.wtperf
@@ -0,0 +1,15 @@
+# wtperf options file: Grow the size of documents while there is cache
+# pressure and appends are happening as well.
+conn_config="cache_size=2GB,checkpoint=(wait=30)"
+table_config="type=file,leaf_page_max=32k,leaf_value_max=128k,split_pct=90"
+# The values are starting small, insert a lot so our database grows larger than
+# cache quickly.
+icount=200000
+report_interval=5
+run_time=240
+populate_threads=1
+# Continue inserting new records.
+threads=((count=1,inserts=1,throttle=1000),(count=4,update=1,update_delta=100))
+# Start with small values and let them grow slowly to large values.
+value_sz=10000
+value_sz_max=65536
diff --git a/bench/wtperf/runners/update-shrink-stress.wtperf b/bench/wtperf/runners/update-shrink-stress.wtperf
new file mode 100644
index 00000000000..bbdd9593b59
--- /dev/null
+++ b/bench/wtperf/runners/update-shrink-stress.wtperf
@@ -0,0 +1,15 @@
+# wtperf options file: Shrink the size of values. Checkpoint frequently
+# and insert new records too.
+#
+conn_config="cache_size=2GB,checkpoint=(wait=30)"
+table_config="type=file,leaf_page_max=32k,leaf_value_max=128k,split_pct=90"
+# Since we're continually inserting, start with a smaller number initially.
+icount=200000
+report_interval=5
+run_time=240
+populate_threads=1
+# Continue inserting new records.
+threads=((count=1,inserts=1,throttle=1000),(count=4,update=1,update_delta=-100))
+# Start with moderate values and let them shrink slowly.
+value_sz_min=1000
+value_sz=10000
diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c
index b2e68198e9a..340c400ba7e 100644
--- a/bench/wtperf/wtperf.c
+++ b/bench/wtperf/wtperf.c
@@ -33,6 +33,7 @@ static const CONFIG default_cfg = {
"WT_TEST", /* home */
"WT_TEST", /* monitor dir */
NULL, /* partial logging */
+ NULL, /* reopen config */
NULL, /* base_uri */
NULL, /* uris */
NULL, /* helium_mount */
@@ -59,7 +60,7 @@ static const CONFIG default_cfg = {
0, /* in warmup phase */
false, /* Signal for idle cycle thread */
0, /* total seconds running */
- 0, /* has truncate */
+ 0, /* flags */
{NULL, NULL}, /* the truncate queue */
{NULL, NULL}, /* the config queue */
@@ -86,6 +87,7 @@ static int start_threads(CONFIG *,
WORKLOAD *, CONFIG_THREAD *, u_int, void *(*)(void *));
static int stop_threads(CONFIG *, u_int, CONFIG_THREAD *);
static void *thread_run_wtperf(void *);
+static void update_value_delta(CONFIG_THREAD *);
static void *worker(void *);
static uint64_t wtperf_rand(CONFIG_THREAD *);
@@ -104,24 +106,93 @@ get_next_incr(CONFIG *cfg)
return (__wt_atomic_add64(&cfg->insert_key, 1));
}
+/*
+ * Each time this function is called we will overwrite the first and one
+ * other element in the value buffer.
+ */
static void
randomize_value(CONFIG_THREAD *thread, char *value_buf)
{
uint8_t *vb;
- uint32_t i;
+ uint32_t i, max_range, rand_val;
+
+ /*
+ * Limit how much of the buffer we validate for length, this means
+ * that only threads that do growing updates will ever make changes to
+ * values outside of the initial value size, but that's a fair trade
+ * off for avoiding figuring out how long the value is more accurately
+ * in this performance sensitive function.
+ */
+ if (thread->workload == NULL || thread->workload->update_delta == 0)
+ max_range = thread->cfg->value_sz;
+ else if (thread->workload->update_delta > 0)
+ max_range = thread->cfg->value_sz_max;
+ else
+ max_range = thread->cfg->value_sz_min;
/*
- * Each time we're called overwrite value_buf[0] and one other
- * randomly chosen byte (other than the trailing NUL).
- * Make sure we don't write a NUL: keep the value the same length.
+ * Generate a single random value and re-use it. We generally only
+ * have small ranges in this function, so avoiding a bunch of calls
+ * is worthwhile.
+ */
+ rand_val = __wt_random(&thread->rnd);
+ i = rand_val % (max_range - 1);
+
+ /*
+ * Ensure we don't write past the end of a value when configured for
+ * randomly sized values.
*/
- i = __wt_random(&thread->rnd) % (thread->cfg->value_sz - 1);
while (value_buf[i] == '\0' && i > 0)
--i;
- if (i > 0) {
- vb = (uint8_t *)value_buf;
- vb[0] = (__wt_random(&thread->rnd) % 255) + 1;
- vb[i] = (__wt_random(&thread->rnd) % 255) + 1;
+
+ vb = (uint8_t *)value_buf;
+ vb[0] = ((rand_val >> 8) % 255) + 1;
+ /*
+ * If i happened to be 0, we'll be re-writing the same value
+ * twice, but that doesn't matter.
+ */
+ vb[i] = ((rand_val >> 16) % 255) + 1;
+}
+
+/*
+ * Figure out and extend the size of the value string, used for growing
+ * updates. We know that the value to be updated is in the threads value
+ * scratch buffer.
+ */
+static inline void
+update_value_delta(CONFIG_THREAD *thread)
+{
+ CONFIG *cfg;
+ char * value;
+ int64_t delta, len, new_len;
+
+ cfg = thread->cfg;
+ value = thread->value_buf;
+ delta = thread->workload->update_delta;
+ len = (int64_t)strlen(value);
+
+ if (delta == INT64_MAX)
+ delta = __wt_random(&thread->rnd) %
+ (cfg->value_sz_max - cfg->value_sz);
+
+ /* Ensure we aren't changing across boundaries */
+ if (delta > 0 && len + delta > cfg->value_sz_max)
+ delta = cfg->value_sz_max - len;
+ else if (delta < 0 && len + delta < cfg->value_sz_min)
+ delta = cfg->value_sz_min - len;
+
+ /* Bail if there isn't anything to do */
+ if (delta == 0)
+ return;
+
+ if (delta < 0)
+ value[len + delta] = '\0';
+ else {
+ /* Extend the value by the configured amount. */
+ for (new_len = len;
+ new_len < cfg->value_sz_max && new_len - len < delta;
+ new_len++)
+ value[new_len] = 'a';
}
}
@@ -623,8 +694,10 @@ worker(void *arg)
* Copy as much of the previous value as is
* safe, and be sure to NUL-terminate.
*/
- strncpy(value_buf, value, cfg->value_sz);
- value_buf[cfg->value_sz - 1] = '\0';
+ strncpy(value_buf,
+ value, cfg->value_sz_max - 1);
+ if (thread->workload->update_delta != 0)
+ update_value_delta(thread);
if (value_buf[0] == 'a')
value_buf[0] = 'b';
else
@@ -1517,7 +1590,7 @@ close_reopen(CONFIG *cfg)
{
int ret;
- if (!cfg->reopen_connection)
+ if (!cfg->readonly && !cfg->reopen_connection)
return (0);
/*
* Reopen the connection. We do this so that the workload phase always
@@ -1533,7 +1606,7 @@ close_reopen(CONFIG *cfg)
return (ret);
}
if ((ret = wiredtiger_open(
- cfg->home, NULL, cfg->conn_config, &cfg->conn)) != 0) {
+ cfg->home, NULL, cfg->reopen_config, &cfg->conn)) != 0) {
lprintf(cfg, ret, 0, "Re-opening the connection failed");
return (ret);
}
@@ -1595,7 +1668,7 @@ execute_workload(CONFIG *cfg)
for (threads = cfg->workers, i = 0,
workp = cfg->workload; i < cfg->workload_cnt; ++i, ++workp) {
lprintf(cfg, 0, 1,
- "Starting workload #%d: %" PRId64 " threads, inserts=%"
+ "Starting workload #%u: %" PRId64 " threads, inserts=%"
PRId64 ", reads=%" PRId64 ", updates=%" PRId64
", truncate=%" PRId64 ", throttle=%" PRId64,
i + 1, workp->threads, workp->insert,
@@ -2194,7 +2267,7 @@ main(int argc, char *argv[])
* the compact operation, but not for the workloads.
*/
if (cfg->async_threads > 0) {
- if (cfg->has_truncate > 0) {
+ if (F_ISSET(cfg, CFG_TRUNCATE)) {
lprintf(cfg, 1, 0, "Cannot run truncate and async\n");
goto err;
}
@@ -2212,20 +2285,20 @@ main(int argc, char *argv[])
req_len = strlen(",async=(enabled=true,threads=)") + 4;
cfg->async_config = dcalloc(req_len, 1);
snprintf(cfg->async_config, req_len,
- ",async=(enabled=true,threads=%d)",
+ ",async=(enabled=true,threads=%" PRIu32 ")",
cfg->async_threads);
}
if ((ret = config_compress(cfg)) != 0)
goto err;
/* You can't have truncate on a random collection. */
- if (cfg->has_truncate && cfg->random_range) {
+ if (F_ISSET(cfg, CFG_TRUNCATE) && cfg->random_range) {
lprintf(cfg, 1, 0, "Cannot run truncate and random_range\n");
goto err;
}
/* We can't run truncate with more than one table. */
- if (cfg->has_truncate && cfg->table_count > 1) {
+ if (F_ISSET(cfg, CFG_TRUNCATE) && cfg->table_count > 1) {
lprintf(cfg, 1, 0, "Cannot truncate more than 1 table\n");
goto err;
}
@@ -2297,9 +2370,25 @@ main(int argc, char *argv[])
req_len = strlen(cfg->table_config) +
strlen(LOG_PARTIAL_CONFIG) + 1;
cfg->partial_config = dcalloc(req_len, 1);
- snprintf((char *)cfg->partial_config, req_len, "%s%s",
- (char *)cfg->table_config, LOG_PARTIAL_CONFIG);
+ snprintf(cfg->partial_config, req_len, "%s%s",
+ cfg->table_config, LOG_PARTIAL_CONFIG);
}
+ /*
+ * Set the config for reopen. If readonly add in that string.
+ * If not readonly then just copy the original conn_config.
+ */
+ if (cfg->readonly)
+ req_len = strlen(cfg->conn_config) +
+ strlen(READONLY_CONFIG) + 1;
+ else
+ req_len = strlen(cfg->conn_config) + 1;
+ cfg->reopen_config = dcalloc(req_len, 1);
+ if (cfg->readonly)
+ snprintf(cfg->reopen_config, req_len, "%s%s",
+ cfg->conn_config, READONLY_CONFIG);
+ else
+ snprintf(cfg->reopen_config, req_len, "%s",
+ cfg->conn_config);
/* Sanity-check the configuration. */
if ((ret = config_sanity(cfg)) != 0)
@@ -2357,7 +2446,8 @@ start_threads(CONFIG *cfg,
* strings: trailing NUL is included in the size.
*/
thread->key_buf = dcalloc(cfg->key_sz, 1);
- thread->value_buf = dcalloc(cfg->value_sz, 1);
+ thread->value_buf = dcalloc(cfg->value_sz_max, 1);
+
/*
* Initialize and then toss in a bit of random values if needed.
*/
diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h
index 929880b0aef..a2b497b3142 100644
--- a/bench/wtperf/wtperf.h
+++ b/bench/wtperf/wtperf.h
@@ -94,6 +94,7 @@ typedef struct {
int64_t truncate; /* Truncate ratio */
uint64_t truncate_pct; /* Truncate Percent */
uint64_t truncate_count; /* Truncate Count */
+ int64_t update_delta; /* Value size change on update */
#define WORKER_INSERT 1 /* Insert */
#define WORKER_INSERT_RMW 2 /* Insert with read-modify-write */
@@ -138,6 +139,7 @@ typedef struct {
} THROTTLE_CONFIG;
#define LOG_PARTIAL_CONFIG ",log=(enabled=false)"
+#define READONLY_CONFIG ",readonly=true"
/*
* NOTE: If you add any fields to this structure here, you must also add
* an initialization in wtperf.c in the default_cfg.
@@ -145,7 +147,8 @@ typedef struct {
struct __config { /* Configuration structure */
const char *home; /* WiredTiger home */
const char *monitor_dir; /* Monitor output dir */
- const char *partial_config; /* Config string for partial logging */
+ char *partial_config; /* Config string for partial logging */
+ char *reopen_config; /* Config string for conn reopen */
char *base_uri; /* Object URI */
char **uris; /* URIs if multiple tables */
const char *helium_mount; /* Optional Helium mount point */
@@ -188,7 +191,10 @@ struct __config { /* Configuration structure */
volatile uint32_t totalsec; /* total seconds running */
- u_int has_truncate; /* if there is a truncate workload */
+#define CFG_GROW 0x0001 /* There is a grow workload */
+#define CFG_SHRINK 0x0002 /* There is a shrink workload */
+#define CFG_TRUNCATE 0x0004 /* There is a truncate workload */
+ uint32_t flags; /* flags */
/* Queue head for use with the Truncate Logic */
TAILQ_HEAD(__truncate_qh, __truncate_queue_entry) stone_head;
@@ -331,7 +337,7 @@ generate_key(CONFIG *cfg, char *key_buf, uint64_t keyno)
static inline void
extract_key(char *key_buf, uint64_t *keynop)
{
- sscanf(key_buf, "%" SCNu64, keynop);
+ (void)sscanf(key_buf, "%" SCNu64, keynop);
}
/*
@@ -364,11 +370,11 @@ dmalloc(size_t len)
* Call calloc, dying on failure.
*/
static inline void *
-dcalloc(size_t num, size_t len)
+dcalloc(size_t num, size_t size)
{
void *p;
- if ((p = calloc(len, num)) == NULL)
+ if ((p = calloc(num, size)) == NULL)
die(errno, "calloc");
return (p);
}
@@ -410,11 +416,9 @@ static inline char *
dstrndup(const char *str, const size_t len)
{
char *p;
- p = dcalloc(len + 1, 1);
- strncpy(p, str, len);
- if (p == NULL)
- die(errno, "dstrndup");
+ p = dcalloc(len + 1, sizeof(char));
+ memcpy(p, str, len);
return (p);
}
#endif
diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i
index 60bbaff56e5..b5e274a17c2 100644
--- a/bench/wtperf/wtperf_opt.i
+++ b/bench/wtperf/wtperf_opt.i
@@ -145,6 +145,10 @@ DEF_OPT_AS_UINT32(random_range, 0,
"insert operations")
DEF_OPT_AS_BOOL(random_value, 0, "generate random content for the value")
DEF_OPT_AS_UINT32(read_range, 0, "scan a range of keys after each search")
+DEF_OPT_AS_BOOL(readonly, 0,
+ "reopen the connection between populate and workload phases in readonly "
+ "mode. Requires reopen_connection turned on (default). Requires that "
+ "read be the only workload specified")
DEF_OPT_AS_BOOL(reopen_connection, 1,
"close and reopen the connection between populate and workload phases")
DEF_OPT_AS_UINT32(report_interval, 2,
@@ -180,13 +184,17 @@ DEF_OPT_AS_STRING(threads, "", "workload configuration: each 'count' "
"'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))' "
"which would create 2 threads doing nothing but reads and 8 threads "
"each doing 50% inserts and 25% reads and updates. Allowed configuration "
- "values are 'count', 'throttle', 'reads', 'inserts', 'updates', 'truncate',"
- " 'truncate_pct' and 'truncate_count'. There are "
+ "values are 'count', 'throttle', 'update_delta', 'reads', 'inserts', "
+ "'updates', 'truncate', 'truncate_pct' and 'truncate_count'. There are "
"also behavior modifiers, supported modifiers are 'ops_per_txn'")
DEF_OPT_AS_CONFIG_STRING(transaction_config, "",
"transaction configuration string, relevant when populate_opts_per_txn "
"is nonzero")
DEF_OPT_AS_STRING(table_name, "test", "table name")
+DEF_OPT_AS_UINT32(value_sz_max, 1000,
+ "maximum value size when delta updates are present. Default disabled")
+DEF_OPT_AS_UINT32(value_sz_min, 1,
+ "minimum value size when delta updates are present. Default disabled")
DEF_OPT_AS_UINT32(value_sz, 100, "value size")
DEF_OPT_AS_UINT32(verbose, 1, "verbosity")
DEF_OPT_AS_UINT32(warmup, 0,
diff --git a/build_posix/Make.subdirs b/build_posix/Make.subdirs
index e1f8a05c613..4e1f829c0c5 100644
--- a/build_posix/Make.subdirs
+++ b/build_posix/Make.subdirs
@@ -6,6 +6,7 @@
# If the directory exists, it is added to AUTO_SUBDIRS.
# If a condition is included, the subdir is made conditional via AM_CONDITIONAL
ext/collators/reverse
+ext/collators/revint
ext/compressors/lz4 LZ4
ext/compressors/nop
ext/compressors/snappy SNAPPY
@@ -26,10 +27,13 @@ lang/python PYTHON
# Make the tests
test/bloom
test/checkpoint
+test/cursor_order
test/fops
test/format
test/huge
+test/manydbs
test/packing
+test/readonly
test/recovery
test/salvage
test/thread
diff --git a/build_posix/configure.ac.in b/build_posix/configure.ac.in
index 875c8b436a8..9251873be73 100644
--- a/build_posix/configure.ac.in
+++ b/build_posix/configure.ac.in
@@ -32,6 +32,23 @@ AC_SUBST([LIBTOOL_DEPS])
AC_PROG_CC(cc gcc)
AC_PROG_CXX(c++ g++)
+AM_PROG_AS(as gas)
+
+# This is a workaround as part of WT-2459. Currently, clang (v3.7) does not
+# support compiling the ASM code we have to perform the CRC checks on PowerPC.
+# To compile with clang we need to override the ASM compiler with CCAS to use
+# gcc. Unfortunately, doing the compilation in this manner means libtool can't
+# determine what tag to use for that one .S file. If we catch that we are using
+# two different compilers for CC and CCAS and we are on a PowerPC system we
+# overload the libtool flags to provide CC by default.
+if test "$CC" != "$CCAS"; then
+ AS_CASE([$host_cpu],
+ [ppc64*], [AM_LIBTOOLFLAGS+="--tag=CC"],
+ [elf64lppc], [AM_LIBTOOLFLAGS+="--tag=CC"],
+ [powerpc*], [AM_LIBTOOLFLAGS+="--tag=CC"],
+ [])
+fi
+AC_SUBST(AM_LIBTOOLFLAGS)
if test "$GCC" = "yes"; then
# The Solaris gcc compiler gets the additional -pthreads flag.
@@ -96,6 +113,13 @@ AC_SYS_LARGEFILE
AC_C_BIGENDIAN
+AC_MSG_CHECKING([for a 64-bit build])
+AC_COMPUTE_INT(ac_cv_sizeof_void_p, [sizeof(void *)])
+if test "$ac_cv_sizeof_void_p" != "8" ; then
+ AC_MSG_ERROR([WiredTiger requires a 64-bit build.])
+fi
+AC_MSG_RESULT(yes)
+
# Linux requires _GNU_SOURCE to be defined
case "$host_os" in
linux*) AM_CFLAGS="$AM_CFLAGS -D_GNU_SOURCE" ;;
diff --git a/build_posix/reconf b/build_posix/reconf
index 8700c5da43d..16d4002d9b9 100755
--- a/build_posix/reconf
+++ b/build_posix/reconf
@@ -24,6 +24,7 @@ clean()
aclocal.m4 \
auto-includes.chk \
autom4te.cache \
+ config.cache \
config.hin \
config.hin~ \
config.log \
diff --git a/build_win/filelist.win b/build_win/filelist.win
index 0a313026793..b6a9caf4a74 100644
--- a/build_win/filelist.win
+++ b/build_win/filelist.win
@@ -155,6 +155,7 @@ src/session/session_compact.c
src/session/session_dhandle.c
src/session/session_salvage.c
src/support/cksum.c
+src/support/cond_auto.c
src/support/crypto.c
src/support/err.c
src/support/filename.c
diff --git a/dist/api_data.py b/dist/api_data.py
index c386c0b345d..02aee1e8825 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -76,12 +76,12 @@ lsm_config = [
Config('bloom', 'true', r'''
create bloom filters on LSM tree chunks as they are merged''',
type='boolean'),
- Config('bloom_config', '', r'''
- config string used when creating Bloom filter files, passed
- to WT_SESSION::create'''),
Config('bloom_bit_count', '16', r'''
the number of bits used per item for LSM bloom filters''',
min='2', max='1000'),
+ Config('bloom_config', '', r'''
+ config string used when creating Bloom filter files, passed
+ to WT_SESSION::create'''),
Config('bloom_hash_count', '8', r'''
the number of hash values per item used for LSM bloom
filters''',
@@ -299,6 +299,15 @@ file_meta = file_config + [
the file version'''),
]
+lsm_meta = file_config + lsm_config + [
+ Config('last', '', r'''
+ the last allocated chunk ID'''),
+ Config('chunks', '', r'''
+ active chunks in the LSM tree'''),
+ Config('old_chunks', '', r'''
+ obsolete chunks in the LSM tree'''),
+]
+
table_only_config = [
Config('colgroups', '', r'''
comma-separated list of names of column groups. Each column
@@ -522,6 +531,9 @@ connection_runtime_config = [
the statistics log server uses a session from the configured
session_max''',
type='category', subconfig=[
+ Config('json', 'false', r'''
+ encode statistics in JSON format''',
+ type='boolean'),
Config('on_close', 'false', r'''log statistics on database close''',
type='boolean'),
Config('path', '"WiredTigerStat.%d.%H"', r'''
@@ -538,7 +550,8 @@ connection_runtime_config = [
type='list'),
Config('timestamp', '"%b %d %H:%M:%S"', r'''
a timestamp prepended to each log record, may contain strftime
- conversion specifications'''),
+ conversion specifications, when \c json is configured, defaults
+ to \c "%FT%Y.000Z"'''),
Config('wait', '0', r'''
seconds to wait between each write of the log records; setting
this value above 0 configures statistics logging''',
@@ -655,6 +668,11 @@ wiredtiger_open_common = connection_runtime_config + [
RPC server for primary processes and use RPC for secondary
processes). <b>Not yet supported in WiredTiger</b>''',
type='boolean'),
+ Config('readonly', 'false', r'''
+ open connection in read-only mode. The database must exist. All
+ methods that may modify a database are disabled. See @ref readonly
+ for more information''',
+ type='boolean'),
Config('session_max', '100', r'''
maximum expected number of sessions (including server
threads)''',
@@ -732,12 +750,16 @@ cursor_runtime_config = [
]
methods = {
-'file.meta' : Method(file_meta),
-
'colgroup.meta' : Method(colgroup_meta),
+'file.config' : Method(file_config),
+
+'file.meta' : Method(file_meta),
+
'index.meta' : Method(index_meta),
+'lsm.meta' : Method(lsm_meta),
+
'table.meta' : Method(table_meta),
'WT_CURSOR.close' : Method([]),
diff --git a/dist/api_err.py b/dist/api_err.py
index 09332d508a2..a17c68ee196 100644
--- a/dist/api_err.py
+++ b/dist/api_err.py
@@ -56,6 +56,8 @@ errors = [
This error is generated when wiredtiger_open is configured
to run in-memory, and an insert or update operation requires more
than the configured cache size to complete.''', undoc=True),
+ Error('WT_PERM_DENIED', -31808,
+ 'permission denied (internal)', undoc=True),
]
# Update the #defines in the wiredtiger.in file.
diff --git a/dist/filelist b/dist/filelist
index edd59435841..350e0c50087 100644
--- a/dist/filelist
+++ b/dist/filelist
@@ -153,6 +153,7 @@ src/session/session_compact.c
src/session/session_dhandle.c
src/session/session_salvage.c
src/support/cksum.c
+src/support/cond_auto.c
src/support/crypto.c
src/support/err.c
src/support/filename.c
@@ -163,6 +164,8 @@ src/support/hazard.c
src/support/hex.c
src/support/huffman.c
src/support/pow.c
+src/support/power8/crc32.S
+src/support/power8/crc32_wrapper.c
src/support/rand.c
src/support/scratch.c
src/support/stat.c
diff --git a/dist/flags.py b/dist/flags.py
index b97235b965a..f500e3b1ae1 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -99,6 +99,7 @@ flags = {
'CONN_LOG_SERVER_RUN',
'CONN_LSM_MERGE',
'CONN_PANIC',
+ 'CONN_READONLY',
'CONN_SERVER_ASYNC',
'CONN_SERVER_CHECKPOINT',
'CONN_SERVER_LSM',
@@ -114,6 +115,7 @@ flags = {
'SESSION_LOCK_NO_WAIT',
'SESSION_LOCKED_CHECKPOINT',
'SESSION_LOCKED_HANDLE_LIST',
+ 'SESSION_LOCKED_METADATA',
'SESSION_LOCKED_SCHEMA',
'SESSION_LOCKED_SLOT',
'SESSION_LOCKED_TABLE',
diff --git a/dist/s_export b/dist/s_export
index 1212b5b2c1f..8a2c701d27f 100755
--- a/dist/s_export
+++ b/dist/s_export
@@ -12,10 +12,7 @@ Darwin)
*)
# We require GNU nm, which may not be installed.
type nm > /dev/null 2>&1 &&
- (nm --version | grep 'GNU nm') > /dev/null 2>&1 || {
- echo 'skipped: GNU nm not found'
- exit 0
- }
+ (nm --version | grep 'GNU nm') > /dev/null 2>&1 || exit 0
NM='nm --extern-only --defined-only --print-file-name $f'
;;
esac
@@ -28,7 +25,9 @@ check()
sed 's/.* //' |
egrep -v '^__wt') |
sort |
- uniq -u > $t
+ uniq -u |
+ egrep -v \
+ 'zlib_extension_init|lz4_extension_init|snappy_extension_init' > $t
test -s $t && {
echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
diff --git a/dist/s_funcs.list b/dist/s_funcs.list
index ed6cf43bb2f..8d32eecdfb7 100644
--- a/dist/s_funcs.list
+++ b/dist/s_funcs.list
@@ -1,4 +1,6 @@
# List of functions that aren't found by s_funcs, but that's OK.
+FUNC_END
+FUNC_START
WT_CURDUMP_PASS
__bit_ffs
__bit_nclr
diff --git a/dist/s_longlines b/dist/s_longlines
index decedb58f44..000f33d51d5 100755
--- a/dist/s_longlines
+++ b/dist/s_longlines
@@ -9,8 +9,9 @@ l=`(cd .. &&
find dist -name '*.py' &&
find src -name '*.in') |
sed -e '/dist\/stat_data\.py/d' \
- -e '/support\/stat\.c/d' \
- -e '/include\/extern\.h/d'`
+ -e '/include\/extern\.h/d' \
+ -e '/support\/power8/d' \
+ -e '/support\/stat\.c/d'`
for f in $l ; do
expand -t8 < ../$f | awk -- \
diff --git a/dist/s_string.ok b/dist/s_string.ok
index 19fa27cd719..6762521ca76 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -107,11 +107,13 @@ FALLTHROUGH
FH
FLD
FLSv
+FLv
FNV
FORALL
FOREACH
FULLFSYNC
FindFirstFile
+Fixup
Fk
FlushFileBuffers
Fprintf
@@ -189,6 +191,7 @@ MALLOC
MEM
MEMALIGN
MERCHANTABILITY
+METADATA
MONGODB
MSVC
MULTIBLOCK
@@ -210,6 +213,7 @@ MySecret
NEEDKEY
NEEDVALUE
NOLL
+NOLOCK
NONINFRINGEMENT
NOTFOUND
NOTREACHED
@@ -243,6 +247,7 @@ Preload
Prepend
Qsort
RCS
+RDNOLOCK
RECNO
REF's
REFs
@@ -255,6 +260,7 @@ RNG
RPC
RUNDIR
Radu
+Readonly
Rebalance
RedHat
Redistributions
@@ -328,6 +334,7 @@ VxWorks
WAL
WIREDTIGER
WRLSN
+WRNOLOCK
WakeAllConditionVariable
Wconditional
WeakHashLen
@@ -430,6 +437,8 @@ cfg
cfkos
change's
changelog
+chdir
+checkfmt
checkpointed
checkpointer
checkpointing
@@ -437,6 +446,7 @@ checksum
checksums
children's
chk
+chmod
chongo
cip
cjoin
@@ -501,6 +511,7 @@ datasets
datasource
datastore
dbc
+dbs
dcalloc
decile
deciles
@@ -670,6 +681,7 @@ inline
inmem
insertK
insertV
+inserters
instantiation
intl
intnum
@@ -686,6 +698,7 @@ jnr
jrx
json
kb
+kbits
keycmp
keyid
keyv
@@ -715,6 +728,7 @@ libwiredtiger
llll
llu
loadtext
+localTime
localtime
logf
logmgr
@@ -744,6 +758,8 @@ majorp
malloc
marshall
marshalled
+maxcpu
+maxdbs
mbll
mbss
mem
@@ -756,6 +772,7 @@ memset
memsize
metaconf
metadata
+metadata's
metafile
mfence
minorp
@@ -792,7 +809,9 @@ nfilename
nhex
nlpo
nocase
+noclear
nocrypto
+nolock
nonliteral
noop
nop
@@ -829,8 +848,11 @@ parserp
patchp
pathname
pathnames
+pclose
+pcpu
perf
pfx
+popen
poptable
popthreads
portably
@@ -838,6 +860,7 @@ pos
posint
posix
postsize
+powerpc
pragmas
pre
prealloc
@@ -855,6 +878,7 @@ ps
psp
pthread
ptr
+ptrdiff
pushms
putK
putV
@@ -869,6 +893,7 @@ rS
rb
rbrace
rbracket
+rdonly
rduppo
readlock
readonly
@@ -891,6 +916,7 @@ resize
resizing
ret
retp
+revint
rf
rle
rmw
@@ -898,6 +924,7 @@ rng
rocksdb
rotN
rotn
+rp
rpc
run's
runtime
@@ -965,10 +992,12 @@ superset
sw
sy
sys
+sz
t's
tV
tablename
tcbench
+td
testutil
th
tid
@@ -1054,9 +1083,12 @@ vsize
vsnprintf
vtype
vunpack
+vw
+waitpid
walk's
warmup
wb
+wiredTiger
wiredtiger
workFactor
wrapup
diff --git a/dist/s_style b/dist/s_style
index 44a5bdda741..78fb7a6eb03 100755
--- a/dist/s_style
+++ b/dist/s_style
@@ -18,7 +18,9 @@ if [ $# -ne 1 ]; then
find bench examples ext src test \
-name '*.[chisy]' -o -name '*.in' -o -name '*.dox' |
- sed -e '/Makefile.in/d' -e '/build_win\/wiredtiger_config.h/d' |
+ sed -e '/Makefile.in/d' \
+ -e '/build_win\/wiredtiger_config.h/d' \
+ -e '/support\/power8/d' |
xargs $xp -n 1 -I{} sh ./dist/s_style {}
else
# General style correction and cleanup for a single file
diff --git a/dist/s_whitespace b/dist/s_whitespace
index d13de4b5989..74820a4f0e9 100755
--- a/dist/s_whitespace
+++ b/dist/s_whitespace
@@ -36,10 +36,9 @@ for f in `find bench examples ext src test \
-name '*.[chi]' -o \
-name '*.dox' -o \
-name '*.in' -o \
- -name 'Makefile.am'`; do
- if expr "$f" : ".*/Makefile.in" > /dev/null; then
- continue
- fi
+ -name 'Makefile.am' |
+ sed -e '/Makefile.in/d' \
+ -e '/support\/power8/d'`; do
whitespace_and_empty_line $f
done
diff --git a/dist/s_win b/dist/s_win
index 1eb4702d517..0b7d5184037 100755
--- a/dist/s_win
+++ b/dist/s_win
@@ -44,7 +44,7 @@ win_filelist()
f='../build_win/filelist.win'
# Process the files for which there's a Windows-specific version, then
- # append Windows-only files. (There aren't yet any POSIX-only files.)
+ # append Windows-only files and discard POSIX-only files.
(sed \
-e 's;os_posix/os_dir.c;os_win/os_dir.c;' \
-e 's;os_posix/os_dlopen.c;os_win/os_dlopen.c;' \
@@ -71,7 +71,9 @@ win_filelist()
-e 's;os_posix/os_sleep.c;os_win/os_sleep.c;' \
-e 's;os_posix/os_thread.c;os_win/os_thread.c;' \
-e 's;os_posix/os_time.c;os_win/os_time.c;' \
- -e 's;os_posix/os_yield.c;os_win/os_yield.c;'
+ -e 's;os_posix/os_yield.c;os_win/os_yield.c;' \
+ -e '/src\/support\/power8\/crc32.S/d' \
+ -e '/src\/support\/power8\/crc32_wrapper.c/d'
echo 'src/os_win/os_snprintf.c'
echo 'src/os_win/os_vsnprintf.c') < filelist | sort > $t
cmp $t $f > /dev/null 2>&1 ||
diff --git a/dist/stat.py b/dist/stat.py
index 6dcfccfeab5..7961bf7053f 100644
--- a/dist/stat.py
+++ b/dist/stat.py
@@ -98,11 +98,11 @@ for line in open('../src/include/wiredtiger.in', 'r'):
f.close()
compare_srcfile(tmp_file, '../src/include/wiredtiger.in')
-def print_func(name, handle, list):
+def print_func(name, handle, statlist):
'''Print the structures/functions for the stat.c file.'''
f.write('\n')
f.write('static const char * const __stats_' + name + '_desc[] = {\n')
- for l in list:
+ for l in statlist:
f.write('\t"' + l.desc + '",\n')
f.write('};\n')
@@ -143,7 +143,7 @@ void
__wt_stat_''' + name + '_clear_single(WT_' + name.upper() + '''_STATS *stats)
{
''')
- for l in sorted(list):
+ for l in statlist:
# no_clear: don't clear the value.
if 'no_clear' in l.flags:
f.write('\t\t/* not clearing ' + l.name + ' */\n')
@@ -170,7 +170,7 @@ __wt_stat_''' + name + '''_aggregate_single(
WT_''' + name.upper() + '_STATS *from, WT_' + name.upper() + '''_STATS *to)
{
''')
- for l in sorted(list):
+ for l in statlist:
if 'max_aggregate' in l.flags:
o = '\tif (from->' + l.name + ' > to->' + l.name + ')\n' +\
'\t\tto->' + l.name + ' = from->' + l.name + ';\n'
@@ -190,11 +190,11 @@ __wt_stat_''' + name + '''_aggregate(
# Connection level aggregation does not currently have any computation
# of a maximum value; I'm leaving in support for it, but don't declare
# a temporary variable until it's needed.
- for l in sorted(list):
+ for l in statlist:
if 'max_aggregate' in l.flags:
f.write('\tint64_t v;\n\n')
break;
- for l in sorted(list):
+ for l in statlist:
if 'max_aggregate' in l.flags:
o = '\tif ((v = WT_STAT_READ(from, ' + l.name + ')) > ' +\
'to->' + l.name + ')\n'
diff --git a/dist/stat_data.py b/dist/stat_data.py
index 41a93961079..bd951e64999 100644
--- a/dist/stat_data.py
+++ b/dist/stat_data.py
@@ -12,6 +12,7 @@
# max_aggregate Take the maximum value when aggregating statistics
# no_clear Value not cleared when statistics cleared
# no_scale Don't scale value per second in the logging tool script
+# size Used by timeseries tool, indicates value is a byte count
#
# The no_clear and no_scale flags are normally always set together (values that
# are maintained over time are normally not scaled per second).
@@ -108,6 +109,8 @@ connection_stats = [
##########################################
# System statistics
##########################################
+ ConnStat('cond_auto_wait', 'auto adjusting condition wait calls'),
+ ConnStat('cond_auto_wait_reset', 'auto adjusting condition resets'),
ConnStat('cond_wait', 'pthread mutex condition wait calls'),
ConnStat('file_open', 'files currently open', 'no_clear,no_scale'),
ConnStat('memory_allocation', 'memory allocations'),
@@ -123,7 +126,7 @@ connection_stats = [
##########################################
AsyncStat('async_alloc_race', 'number of allocation state races'),
AsyncStat('async_alloc_view', 'number of operation slots viewed for allocation'),
- AsyncStat('async_cur_queue', 'current work queue length'),
+ AsyncStat('async_cur_queue', 'current work queue length', 'no_scale'),
AsyncStat('async_flush', 'number of flush calls'),
AsyncStat('async_full', 'number of times operation allocation failed'),
AsyncStat('async_max_queue', 'maximum work queue length', 'no_clear,no_scale'),
@@ -138,9 +141,9 @@ connection_stats = [
##########################################
# Block manager statistics
##########################################
- BlockStat('block_byte_map_read', 'mapped bytes read'),
- BlockStat('block_byte_read', 'bytes read'),
- BlockStat('block_byte_write', 'bytes written'),
+ BlockStat('block_byte_map_read', 'mapped bytes read', 'size'),
+ BlockStat('block_byte_read', 'bytes read', 'size'),
+ BlockStat('block_byte_write', 'bytes written', 'size'),
BlockStat('block_map_read', 'mapped blocks read'),
BlockStat('block_preload', 'blocks pre-loaded'),
BlockStat('block_read', 'blocks read'),
@@ -149,14 +152,15 @@ connection_stats = [
##########################################
# Cache and eviction statistics
##########################################
- CacheStat('cache_bytes_dirty', 'tracked dirty bytes in the cache', 'no_clear,no_scale'),
- CacheStat('cache_bytes_internal', 'tracked bytes belonging to internal pages in the cache', 'no_clear,no_scale'),
- CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale'),
- CacheStat('cache_bytes_leaf', 'tracked bytes belonging to leaf pages in the cache', 'no_clear,no_scale'),
- CacheStat('cache_bytes_max', 'maximum bytes configured', 'no_clear,no_scale'),
- CacheStat('cache_bytes_overflow', 'tracked bytes belonging to overflow pages in the cache', 'no_clear,no_scale'),
- CacheStat('cache_bytes_read', 'bytes read into cache'),
- CacheStat('cache_bytes_write', 'bytes written from cache'),
+ CacheStat('cache_bytes_dirty', 'tracked dirty bytes in the cache', 'no_clear,no_scale,size'),
+ CacheStat('cache_bytes_internal', 'tracked bytes belonging to internal pages in the cache', 'no_clear,no_scale,size'),
+ CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale,size'),
+ CacheStat('cache_bytes_leaf', 'tracked bytes belonging to leaf pages in the cache', 'no_clear,no_scale,size'),
+ CacheStat('cache_bytes_max', 'maximum bytes configured', 'no_clear,no_scale,size'),
+ CacheStat('cache_bytes_overflow', 'tracked bytes belonging to overflow pages in the cache', 'no_clear,no_scale,size'),
+ CacheStat('cache_bytes_read', 'bytes read into cache', 'size'),
+ CacheStat('cache_bytes_write', 'bytes written from cache', 'size'),
+ CacheStat('cache_eviction_aggressive_set', 'eviction currently operating in aggressive mode', 'no_clear,no_scale'),
CacheStat('cache_eviction_app', 'pages evicted by application threads'),
CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'),
CacheStat('cache_eviction_clean', 'unmodified pages evicted'),
@@ -168,7 +172,7 @@ connection_stats = [
CacheStat('cache_eviction_force_fail', 'failed eviction of pages that exceeded the in-memory maximum'),
CacheStat('cache_eviction_hazard', 'hazard pointer blocked page eviction'),
CacheStat('cache_eviction_internal', 'internal pages evicted'),
- CacheStat('cache_eviction_maximum_page_size', 'maximum page size at eviction', 'no_clear,no_scale'),
+ CacheStat('cache_eviction_maximum_page_size', 'maximum page size at eviction', 'no_clear,no_scale,size'),
CacheStat('cache_eviction_queue_empty', 'eviction server candidate queue empty when topping up'),
CacheStat('cache_eviction_queue_not_empty', 'eviction server candidate queue not empty when topping up'),
CacheStat('cache_eviction_server_evicting', 'eviction server evicting pages'),
@@ -206,17 +210,19 @@ connection_stats = [
##########################################
# Logging statistics
##########################################
- LogStat('log_buffer_size', 'total log buffer size', 'no_clear,no_scale'),
- LogStat('log_bytes_payload', 'log bytes of payload data'),
- LogStat('log_bytes_written', 'log bytes written'),
+ LogStat('log_buffer_size', 'total log buffer size', 'no_clear,no_scale,size'),
+ LogStat('log_bytes_payload', 'log bytes of payload data', 'size'),
+ LogStat('log_bytes_written', 'log bytes written', 'size'),
LogStat('log_close_yields', 'yields waiting for previous log file close'),
- LogStat('log_compress_len', 'total size of compressed records'),
- LogStat('log_compress_mem', 'total in-memory size of compressed records'),
+ LogStat('log_compress_len', 'total size of compressed records', 'size'),
+ LogStat('log_compress_mem', 'total in-memory size of compressed records', 'size'),
LogStat('log_compress_small', 'log records too small to compress'),
LogStat('log_compress_write_fails', 'log records not compressed'),
LogStat('log_compress_writes', 'log records compressed'),
LogStat('log_flush', 'log flush operations'),
- LogStat('log_max_filesize', 'maximum log file size', 'no_clear,no_scale'),
+ LogStat('log_force_write', 'log force write operations'),
+ LogStat('log_force_write_skip', 'log force write operations skipped'),
+ LogStat('log_max_filesize', 'maximum log file size', 'no_clear,no_scale,size'),
LogStat('log_prealloc_files', 'pre-allocated log files prepared'),
LogStat('log_prealloc_max', 'number of pre-allocated log files to create', 'no_clear,no_scale'),
LogStat('log_prealloc_missed', 'pre-allocated log files not ready and missed'),
@@ -227,7 +233,7 @@ connection_stats = [
LogStat('log_scans', 'log scan operations'),
LogStat('log_slot_closes', 'consolidated slot closures'),
LogStat('log_slot_coalesced', 'written slots coalesced'),
- LogStat('log_slot_consolidated', 'logging bytes consolidated'),
+ LogStat('log_slot_consolidated', 'logging bytes consolidated', 'size'),
LogStat('log_slot_joins', 'consolidated slot joins'),
LogStat('log_slot_races', 'consolidated slot join races'),
LogStat('log_slot_switch_busy', 'busy returns attempting to switch slots'),
@@ -236,6 +242,7 @@ connection_stats = [
LogStat('log_sync', 'log sync operations'),
LogStat('log_sync_dir', 'log sync_dir operations'),
LogStat('log_write_lsn', 'log server thread advances write LSN'),
+ LogStat('log_write_lsn_skip', 'log server thread write LSN walk skipped'),
LogStat('log_writes', 'log write operations'),
LogStat('log_zero_fills', 'log files manually zero-filled'),
@@ -246,7 +253,7 @@ connection_stats = [
RecStat('rec_page_delete_fast', 'fast-path pages deleted'),
RecStat('rec_pages', 'page reconciliation calls'),
RecStat('rec_pages_eviction', 'page reconciliation calls for eviction'),
- RecStat('rec_split_stashed_bytes', 'split bytes currently awaiting free', 'no_clear,no_scale'),
+ RecStat('rec_split_stashed_bytes', 'split bytes currently awaiting free', 'no_clear,no_scale,size'),
RecStat('rec_split_stashed_objects', 'split objects currently awaiting free', 'no_clear,no_scale'),
##########################################
@@ -315,7 +322,7 @@ connection_stats = [
YieldStat('page_sleep', 'page acquire time sleeping (usecs)'),
]
-connection_stats = sorted(connection_stats, key=attrgetter('name'))
+connection_stats = sorted(connection_stats, key=attrgetter('desc'))
##########################################
# Data source statistics
@@ -333,18 +340,18 @@ dsrc_stats = [
CursorStat('cursor_create', 'create calls'),
CursorStat('cursor_insert', 'insert calls'),
CursorStat('cursor_insert_bulk', 'bulk-loaded cursor-insert calls'),
- CursorStat('cursor_insert_bytes', 'cursor-insert key and value bytes inserted'),
+ CursorStat('cursor_insert_bytes', 'cursor-insert key and value bytes inserted', 'size'),
CursorStat('cursor_next', 'next calls'),
CursorStat('cursor_prev', 'prev calls'),
CursorStat('cursor_remove', 'remove calls'),
- CursorStat('cursor_remove_bytes', 'cursor-remove key bytes removed'),
+ CursorStat('cursor_remove_bytes', 'cursor-remove key bytes removed', 'size'),
CursorStat('cursor_reset', 'reset calls'),
CursorStat('cursor_restart', 'restarted searches'),
CursorStat('cursor_search', 'search calls'),
CursorStat('cursor_search_near', 'search near calls'),
CursorStat('cursor_truncate', 'truncate calls'),
CursorStat('cursor_update', 'update calls'),
- CursorStat('cursor_update_bytes', 'cursor-update value bytes updated'),
+ CursorStat('cursor_update_bytes', 'cursor-update value bytes updated', 'size'),
##########################################
# Btree statistics
@@ -357,13 +364,13 @@ dsrc_stats = [
BtreeStat('btree_column_variable', 'column-store variable-size leaf pages', 'no_scale'),
BtreeStat('btree_compact_rewrite', 'pages rewritten by compaction'),
BtreeStat('btree_entries', 'number of key/value pairs', 'no_scale'),
- BtreeStat('btree_fixed_len', 'fixed-record size', 'max_aggregate,no_scale'),
+ BtreeStat('btree_fixed_len', 'fixed-record size', 'max_aggregate,no_scale,size'),
BtreeStat('btree_maximum_depth', 'maximum tree depth', 'max_aggregate,no_scale'),
- BtreeStat('btree_maxintlkey', 'maximum internal page key size', 'max_aggregate,no_scale'),
- BtreeStat('btree_maxintlpage', 'maximum internal page size', 'max_aggregate,no_scale'),
- BtreeStat('btree_maxleafkey', 'maximum leaf page key size', 'max_aggregate,no_scale'),
- BtreeStat('btree_maxleafpage', 'maximum leaf page size', 'max_aggregate,no_scale'),
- BtreeStat('btree_maxleafvalue', 'maximum leaf page value size', 'max_aggregate,no_scale'),
+ BtreeStat('btree_maxintlkey', 'maximum internal page key size', 'max_aggregate,no_scale,size'),
+ BtreeStat('btree_maxintlpage', 'maximum internal page size', 'max_aggregate,no_scale,size'),
+ BtreeStat('btree_maxleafkey', 'maximum leaf page key size', 'max_aggregate,no_scale,size'),
+ BtreeStat('btree_maxleafpage', 'maximum leaf page size', 'max_aggregate,no_scale,size'),
+ BtreeStat('btree_maxleafvalue', 'maximum leaf page value size', 'max_aggregate,no_scale,size'),
BtreeStat('btree_overflow', 'overflow pages', 'no_scale'),
BtreeStat('btree_row_internal', 'row-store internal pages', 'no_scale'),
BtreeStat('btree_row_leaf', 'row-store leaf pages', 'no_scale'),
@@ -377,7 +384,7 @@ dsrc_stats = [
LSMStat('bloom_miss', 'bloom filter misses'),
LSMStat('bloom_page_evict', 'bloom filter pages evicted from cache'),
LSMStat('bloom_page_read', 'bloom filter pages read into cache'),
- LSMStat('bloom_size', 'total size of bloom filters', 'no_scale'),
+ LSMStat('bloom_size', 'total size of bloom filters', 'no_scale,size'),
LSMStat('lsm_checkpoint_throttle', 'sleep for LSM checkpoint throttle'),
LSMStat('lsm_chunk_count', 'chunks in the LSM tree', 'no_scale'),
LSMStat('lsm_generation_max', 'highest merge generation in the LSM tree', 'max_aggregate,no_scale'),
@@ -387,22 +394,22 @@ dsrc_stats = [
##########################################
# Block manager statistics
##########################################
- BlockStat('allocation_size', 'file allocation unit size', 'max_aggregate,no_scale'),
+ BlockStat('allocation_size', 'file allocation unit size', 'max_aggregate,no_scale,size'),
BlockStat('block_alloc', 'blocks allocated'),
- BlockStat('block_checkpoint_size', 'checkpoint size', 'no_scale'),
+ BlockStat('block_checkpoint_size', 'checkpoint size', 'no_scale,size'),
BlockStat('block_extension', 'allocations requiring file extension'),
BlockStat('block_free', 'blocks freed'),
BlockStat('block_magic', 'file magic number', 'max_aggregate,no_scale'),
BlockStat('block_major', 'file major version number', 'max_aggregate,no_scale'),
BlockStat('block_minor', 'minor version number', 'max_aggregate,no_scale'),
- BlockStat('block_reuse_bytes', 'file bytes available for reuse'),
- BlockStat('block_size', 'file size in bytes', 'no_scale'),
+ BlockStat('block_reuse_bytes', 'file bytes available for reuse', 'no_scale,size'),
+ BlockStat('block_size', 'file size in bytes', 'no_scale,size'),
##########################################
# Cache and eviction statistics
##########################################
- CacheStat('cache_bytes_read', 'bytes read into cache'),
- CacheStat('cache_bytes_write', 'bytes written from cache'),
+ CacheStat('cache_bytes_read', 'bytes read into cache', 'size'),
+ CacheStat('cache_bytes_write', 'bytes written from cache', 'size'),
CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'),
CacheStat('cache_eviction_clean', 'unmodified pages evicted'),
CacheStat('cache_eviction_deepen', 'page split during eviction deepened the tree'),
@@ -448,8 +455,8 @@ dsrc_stats = [
RecStat('rec_page_match', 'page checksum matches'),
RecStat('rec_pages', 'page reconciliation calls'),
RecStat('rec_pages_eviction', 'page reconciliation calls for eviction'),
- RecStat('rec_prefix_compression', 'leaf page key bytes discarded using prefix compression'),
- RecStat('rec_suffix_compression', 'internal page key bytes discarded using suffix compression'),
+ RecStat('rec_prefix_compression', 'leaf page key bytes discarded using prefix compression', 'size'),
+ RecStat('rec_suffix_compression', 'internal page key bytes discarded using suffix compression', 'size'),
##########################################
# Transaction statistics
@@ -457,7 +464,7 @@ dsrc_stats = [
TxnStat('txn_update_conflict', 'update conflicts'),
]
-dsrc_stats = sorted(dsrc_stats, key=attrgetter('name'))
+dsrc_stats = sorted(dsrc_stats, key=attrgetter('desc'))
##########################################
# Cursor Join statistics
@@ -468,4 +475,4 @@ join_stats = [
JoinStat('bloom_false_positive', 'bloom filter false positives'),
]
-join_stats = sorted(join_stats, key=attrgetter('name'))
+join_stats = sorted(join_stats, key=attrgetter('desc'))
diff --git a/examples/c/Makefile.am b/examples/c/Makefile.am
index 587204efff1..72fd98aff7b 100644
--- a/examples/c/Makefile.am
+++ b/examples/c/Makefile.am
@@ -12,6 +12,7 @@ noinst_PROGRAMS = \
ex_cursor \
ex_data_source \
ex_encrypt \
+ ex_event_handler \
ex_extending \
ex_extractor \
ex_hello \
diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c
index 418c99ad6a3..1c036b75461 100644
--- a/examples/c/ex_all.c
+++ b/examples/c/ex_all.c
@@ -346,8 +346,7 @@ cursor_ops(WT_SESSION *session)
cursor->set_key(cursor, key);
if ((ret = cursor->remove(cursor)) != 0) {
fprintf(stderr,
- "cursor.remove: %s\n",
- cursor->session->strerror(cursor->session, ret));
+ "cursor.remove: %s\n", wiredtiger_strerror(ret));
return (ret);
}
/*! [Display an error] */
@@ -359,7 +358,8 @@ cursor_ops(WT_SESSION *session)
cursor->set_key(cursor, key);
if ((ret = cursor->remove(cursor)) != 0) {
fprintf(stderr,
- "cursor.remove: %s\n", session->strerror(session, ret));
+ "cursor.remove: %s\n",
+ cursor->session->strerror(cursor->session, ret));
return (ret);
}
/*! [Display an error thread safe] */
diff --git a/examples/c/ex_async.c b/examples/c/ex_async.c
index 584c3e54b87..ecdbd2f4fea 100644
--- a/examples/c/ex_async.c
+++ b/examples/c/ex_async.c
@@ -218,7 +218,7 @@ main(void)
*/
ret = conn->close(conn, NULL);
- printf("Searched for %d keys\n", ex_asynckeys.num_keys);
+ printf("Searched for %" PRIu32 " keys\n", ex_asynckeys.num_keys);
return (ret);
}
diff --git a/examples/c/ex_config_parse.c b/examples/c/ex_config_parse.c
index 124eff21130..be3c78bedd4 100644
--- a/examples/c/ex_config_parse.c
+++ b/examples/c/ex_config_parse.c
@@ -30,6 +30,7 @@
* configuration strings.
*/
+#include <inttypes.h>
#include <stdio.h>
#include <string.h>
@@ -99,7 +100,7 @@ main(void)
while ((ret = parser->next(parser, &k, &v)) == 0) {
printf("%.*s:", (int)k.len, k.str);
if (v.type == WT_CONFIG_ITEM_NUM)
- printf("%d\n", (int)v.val);
+ printf("%" PRId64 "\n", v.val);
else
printf("%.*s\n", (int)v.len, v.str);
}
@@ -126,7 +127,7 @@ main(void)
"log.file_max configuration: %s", wiredtiger_strerror(ret));
return (ret);
}
- printf("log file max: %d\n", (int)v.val);
+ printf("log file max: %" PRId64 "\n", v.val);
/*! [nested get] */
ret = parser->close(parser);
diff --git a/examples/c/ex_event_handler.c b/examples/c/ex_event_handler.c
new file mode 100644
index 00000000000..ba6807cd56d
--- /dev/null
+++ b/examples/c/ex_event_handler.c
@@ -0,0 +1,136 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ex_event_handler.c
+ * Demonstrate how to use the WiredTiger event handler mechanism.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <wiredtiger.h>
+
+static const char *home;
+
+int handle_wiredtiger_error(
+ WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
+int handle_wiredtiger_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
+
+/*! [Function event_handler] */
+/*
+ * Create our own event handler structure to allow us to pass context through
+ * to event handler callbacks. For this to work the WiredTiger event handler
+ * must appear first in our custom event handler structure.
+ */
+typedef struct {
+ WT_EVENT_HANDLER h;
+ const char *app_id;
+} CUSTOM_EVENT_HANDLER;
+
+/*
+ * handle_wiredtiger_error --
+ * Function to handle error callbacks from WiredTiger.
+ */
+int
+handle_wiredtiger_error(WT_EVENT_HANDLER *handler,
+ WT_SESSION *session, int error, const char *message)
+{
+ CUSTOM_EVENT_HANDLER *custom_handler;
+
+ /* Cast the handler back to our custom handler. */
+ custom_handler = (CUSTOM_EVENT_HANDLER *)handler;
+
+ /* Report the error on the console. */
+ fprintf(stderr,
+ "app_id %s, thread context %p, error %d, message %s\n",
+ custom_handler->app_id, session, error, message);
+
+ return (0);
+}
+
+/*
+ * handle_wiredtiger_message --
+ * Function to handle message callbacks from WiredTiger.
+ */
+int
+handle_wiredtiger_message(
+ WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *message)
+{
+ /* Cast the handler back to our custom handler. */
+ printf("app id %s, thread context %p, message %s\n",
+ ((CUSTOM_EVENT_HANDLER *)handler)->app_id, session, message);
+
+ return (0);
+}
+/*! [Function event_handler] */
+
+static int
+config_event_handler()
+{
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ int ret;
+
+ /*! [Configure event_handler] */
+ CUSTOM_EVENT_HANDLER event_handler;
+
+ event_handler.h.handle_error = handle_wiredtiger_error;
+ event_handler.h.handle_message = handle_wiredtiger_message;
+ /* Set handlers to NULL to use the default handler. */
+ event_handler.h.handle_progress = NULL;
+ event_handler.h.handle_close = NULL;
+ event_handler.app_id = "example_event_handler";
+
+ ret = wiredtiger_open(home,
+ (WT_EVENT_HANDLER *)&event_handler, "create", &conn);
+ /*! [Configure event_handler] */
+
+ /* Make an invalid API call, to ensure the event handler works. */
+ (void)conn->open_session(conn, NULL, "isolation=invalid", &session);
+
+ if (ret == 0)
+ ret = conn->close(conn, NULL);
+
+ return (ret);
+}
+
+int
+main(void)
+{
+ /*
+ * Create a clean test directory for this run of the test program if the
+ * environment variable isn't already set (as is done by make check).
+ */
+ if (getenv("WIREDTIGER_HOME") == NULL) {
+ home = "WT_HOME";
+ (void)system("rm -rf WT_HOME && mkdir WT_HOME");
+ } else
+ home = NULL;
+
+ return (config_event_handler());
+}
diff --git a/examples/c/ex_extractor.c b/examples/c/ex_extractor.c
index fff9c79f8e0..8623f4759fc 100644
--- a/examples/c/ex_extractor.c
+++ b/examples/c/ex_extractor.c
@@ -99,11 +99,13 @@ my_extract(WT_EXTRACTOR *extractor, WT_SESSION *session,
* key(s). WiredTiger will perform the required operation
* (such as a remove()).
*/
- fprintf(stderr, "EXTRACTOR: index op for year %d: %s %s\n",
+ fprintf(stderr,
+ "EXTRACTOR: index op for year %" PRIu16 ": %s %s\n",
year, first_name, last_name);
result_cursor->set_key(result_cursor, year);
if ((ret = result_cursor->insert(result_cursor)) != 0) {
- fprintf(stderr, "EXTRACTOR: op year %d: error %d\n",
+ fprintf(stderr,
+ "EXTRACTOR: op year %" PRIu16 ": error %d\n",
year, ret);
return (ret);
}
@@ -157,7 +159,7 @@ read_index(WT_SESSION *session)
*/
for (i = 0; i < 10 && RET_OK(ret); i++) {
year = (uint16_t)((rand() % YEAR_SPAN) + YEAR_BASE);
- printf("Year %d:\n", year);
+ printf("Year %" PRIu16 ":\n", year);
cursor->set_key(cursor, year);
if ((ret = cursor->search(cursor)) != 0)
break;
@@ -181,7 +183,7 @@ read_index(WT_SESSION *session)
}
}
if (!RET_OK(ret))
- fprintf(stderr, "Error %d for year %d\n", ret, year);
+ fprintf(stderr, "Error %d for year %" PRIu16 "\n", ret, year);
ret = cursor->close(cursor);
return (ret);
@@ -245,7 +247,8 @@ setup_table(WT_SESSION *session)
cursor->set_key(cursor, p.id);
cursor->set_value(cursor,
p.last_name, p.first_name, p.term_start, p.term_end);
- fprintf(stderr, "SETUP: table insert %d-%d: %s %s\n",
+ fprintf(stderr,
+ "SETUP: table insert %" PRIu16 "-%" PRIu16 ": %s %s\n",
p.term_start, p.term_end,
p.first_name, p.last_name);
ret = cursor->insert(cursor);
diff --git a/examples/c/ex_schema.c b/examples/c/ex_schema.c
index fdf02d12302..70fc7eb2e62 100644
--- a/examples/c/ex_schema.c
+++ b/examples/c/ex_schema.c
@@ -69,7 +69,7 @@ main(void)
{
POP_RECORD *p;
WT_CONNECTION *conn;
- WT_CURSOR *cursor, *cursor2, *join_cursor;
+ WT_CURSOR *cursor, *cursor2, *join_cursor, *stat_cursor;
WT_SESSION *session;
const char *country;
uint64_t recno, population;
@@ -86,7 +86,8 @@ main(void)
} else
home = NULL;
- if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0) {
+ if ((ret = wiredtiger_open(
+ home, NULL, "create,statistics=(fast)", &conn)) != 0) {
fprintf(stderr, "Error connecting to %s: %s\n",
home, wiredtiger_strerror(ret));
return (ret);
@@ -164,7 +165,8 @@ main(void)
ret = cursor->get_key(cursor, &recno);
ret = cursor->get_value(cursor, &country, &year, &population);
printf("ID %" PRIu64, recno);
- printf(": country %s, year %u, population %" PRIu64 "\n",
+ printf(
+ ": country %s, year %" PRIu16 ", population %" PRIu64 "\n",
country, year, population);
}
ret = cursor->close(cursor);
@@ -185,7 +187,8 @@ main(void)
ret = wiredtiger_struct_unpack(session,
value.data, value.size,
"5sHQ", &country, &year, &population);
- printf(": country %s, year %u, population %" PRIu64 "\n",
+ printf(
+ ": country %s, year %" PRIu16 ", population %" PRIu64 "\n",
country, year, population);
}
/*! [List the records in the table using raw mode.] */
@@ -201,7 +204,9 @@ main(void)
cursor->set_key(cursor, 2);
if ((ret = cursor->search(cursor)) == 0) {
ret = cursor->get_value(cursor, &country, &year, &population);
- printf("ID 2: country %s, year %u, population %" PRIu64 "\n",
+ printf(
+ "ID 2: "
+ "country %s, year %" PRIu16 ", population %" PRIu64 "\n",
country, year, population);
}
/*! [Read population from the primary column group] */
@@ -229,8 +234,8 @@ main(void)
cursor->set_key(cursor, "AU\0\0\0");
ret = cursor->search(cursor);
ret = cursor->get_value(cursor, &country, &year, &population);
- printf("AU: country %s, year %u, population %" PRIu64 "\n",
- country, (unsigned int)year, population);
+ printf("AU: country %s, year %" PRIu16 ", population %" PRIu64 "\n",
+ country, year, population);
/*! [Search in a simple index] */
ret = cursor->close(cursor);
@@ -241,8 +246,9 @@ main(void)
cursor->set_key(cursor, "USA\0\0", (uint16_t)1900);
ret = cursor->search(cursor);
ret = cursor->get_value(cursor, &country, &year, &population);
- printf("US 1900: country %s, year %u, population %" PRIu64 "\n",
- country, (unsigned int)year, population);
+ printf(
+ "US 1900: country %s, year %" PRIu16 ", population %" PRIu64 "\n",
+ country, year, population);
/*! [Search in a composite index] */
ret = cursor->close(cursor);
@@ -255,7 +261,7 @@ main(void)
"table:poptable(country,year)", NULL, NULL, &cursor);
while ((ret = cursor->next(cursor)) == 0) {
ret = cursor->get_value(cursor, &country, &year);
- printf("country %s, year %u\n", country, year);
+ printf("country %s, year %" PRIu16 "\n", country, year);
}
/*! [Return a subset of values from the table] */
ret = cursor->close(cursor);
@@ -273,7 +279,7 @@ main(void)
ret = cursor->get_value(cursor, &value);
ret = wiredtiger_struct_unpack(
session, value.data, value.size, "5sH", &country, &year);
- printf("country %s, year %u\n", country, year);
+ printf("country %s, year %" PRIu16 "\n", country, year);
}
/*! [Return a subset of values from the table using raw mode] */
ret = cursor->close(cursor);
@@ -288,7 +294,7 @@ main(void)
while ((ret = cursor->next(cursor)) == 0) {
ret = cursor->get_key(cursor, &country, &year);
ret = cursor->get_value(cursor, &recno);
- printf("row ID %" PRIu64 ": country %s, year %u\n",
+ printf("row ID %" PRIu64 ": country %s, year %" PRIu16 "\n",
recno, country, year);
}
/*! [Return the table's record number key using an index] */
@@ -305,7 +311,7 @@ main(void)
while ((ret = cursor->next(cursor)) == 0) {
ret = cursor->get_key(cursor, &country, &year);
ret = cursor->get_value(cursor, &population);
- printf("population %" PRIu64 ": country %s, year %u\n",
+ printf("population %" PRIu64 ": country %s, year %" PRIu16 "\n",
population, country, year);
}
/*! [Return a subset of the value columns from an index] */
@@ -320,7 +326,7 @@ main(void)
"index:poptable:country_plus_year()", NULL, NULL, &cursor);
while ((ret = cursor->next(cursor)) == 0) {
ret = cursor->get_key(cursor, &country, &year);
- printf("country %s, year %u\n", country, year);
+ printf("country %s, year %" PRIu16 "\n", country, year);
}
/*! [Access only the index] */
ret = cursor->close(cursor);
@@ -350,10 +356,19 @@ main(void)
ret = join_cursor->get_value(join_cursor, &country, &year,
&population);
printf("ID %" PRIu64, recno);
- printf(": country %s, year %u, population %" PRIu64 "\n",
+ printf(
+ ": country %s, year %" PRIu16 ", population %" PRIu64 "\n",
country, year, population);
}
/*! [Join cursors] */
+
+ /*! [Statistics cursor join cursor] */
+ ret = session->open_cursor(session,
+ "statistics:join",
+ join_cursor, NULL, &stat_cursor);
+ /*! [Statistics cursor join cursor] */
+
+ ret = stat_cursor->close(stat_cursor);
ret = join_cursor->close(join_cursor);
ret = cursor2->close(cursor2);
ret = cursor->close(cursor);
diff --git a/examples/c/ex_stat.c b/examples/c/ex_stat.c
index 65402230eb8..6c5c15aacc6 100644
--- a/examples/c/ex_stat.c
+++ b/examples/c/ex_stat.c
@@ -39,6 +39,7 @@
int print_cursor(WT_CURSOR *);
int print_database_stats(WT_SESSION *);
int print_file_stats(WT_SESSION *);
+int print_join_cursor_stats(WT_SESSION *);
int print_overflow_pages(WT_SESSION *);
int get_stat(WT_CURSOR *cursor, int stat_field, uint64_t *valuep);
int print_derived_stats(WT_SESSION *);
@@ -99,6 +100,37 @@ print_file_stats(WT_SESSION *session)
}
int
+print_join_cursor_stats(WT_SESSION *session)
+{
+ WT_CURSOR *idx_cursor, *join_cursor, *stat_cursor;
+ int ret;
+
+ ret = session->create(
+ session, "index:access:idx", "columns=(v)");
+ ret = session->open_cursor(
+ session, "index:access:idx", NULL, NULL, &idx_cursor);
+ ret = idx_cursor->next(idx_cursor);
+ ret = session->open_cursor(
+ session, "join:table:access", NULL, NULL, &join_cursor);
+ ret = session->join(session, join_cursor, idx_cursor, "compare=gt");
+ ret = join_cursor->next(join_cursor);
+
+ /*! [statistics join cursor function] */
+ if ((ret = session->open_cursor(session,
+ "statistics:join", join_cursor, NULL, &stat_cursor)) != 0)
+ return (ret);
+
+ ret = print_cursor(stat_cursor);
+ ret = stat_cursor->close(stat_cursor);
+ /*! [statistics join cursor function] */
+
+ ret = join_cursor->close(join_cursor);
+ ret = idx_cursor->close(idx_cursor);
+
+ return (ret);
+}
+
+int
print_overflow_pages(WT_SESSION *session)
{
/*! [statistics retrieve by key] */
@@ -204,7 +236,8 @@ main(void)
ret = wiredtiger_open(home, NULL, "create,statistics=(all)", &conn);
ret = conn->open_session(conn, NULL, NULL, &session);
ret = session->create(
- session, "table:access", "key_format=S,value_format=S");
+ session, "table:access",
+ "key_format=S,value_format=S,columns=(k,v)");
ret = session->open_cursor(
session, "table:access", NULL, NULL, &cursor);
@@ -219,6 +252,8 @@ main(void)
ret = print_file_stats(session);
+ ret = print_join_cursor_stats(session);
+
ret = print_overflow_pages(session);
ret = print_derived_stats(session);
diff --git a/examples/java/com/wiredtiger/examples/ex_all.java b/examples/java/com/wiredtiger/examples/ex_all.java
index 09db8e0fd56..5fe767d49bf 100644
--- a/examples/java/com/wiredtiger/examples/ex_all.java
+++ b/examples/java/com/wiredtiger/examples/ex_all.java
@@ -326,6 +326,22 @@ public static int cursor_ops(Session session)
/*! [Display an error] */
}
+ {
+ /*! [Display an error thread safe] */
+ try {
+ String key = "non-existent key";
+ cursor.putKeyString(key);
+ if ((ret = cursor.remove()) != 0) {
+ System.err.println(
+ "cursor.remove: " + wiredtiger.wiredtiger_strerror(ret));
+ return (ret);
+ }
+ } catch (WiredTigerException wte) { /* Catch severe errors. */
+ System.err.println("cursor.remove exception: " + wte);
+ }
+ /*! [Display an error thread safe] */
+ }
+
/*! [Close the cursor] */
ret = cursor.close();
/*! [Close the cursor] */
diff --git a/examples/java/com/wiredtiger/examples/ex_schema.java b/examples/java/com/wiredtiger/examples/ex_schema.java
index be1077ee2df..7cc26acb479 100644
--- a/examples/java/com/wiredtiger/examples/ex_schema.java
+++ b/examples/java/com/wiredtiger/examples/ex_schema.java
@@ -76,7 +76,7 @@ public class ex_schema {
throws WiredTigerException
{
Connection conn;
- Cursor cursor, cursor2, join_cursor;
+ Cursor cursor, cursor2, join_cursor, stat_cursor;
Session session;
String country;
long recno, population;
@@ -106,7 +106,7 @@ public class ex_schema {
home = null;
try {
- conn = wiredtiger.open(home, "create");
+ conn = wiredtiger.open(home, "create,statistics=(fast)");
session = conn.open_session(null);
} catch (WiredTigerException wte) {
System.err.println("WiredTigerException: " + wte);
@@ -368,6 +368,13 @@ public class ex_schema {
", population " + population);
}
/*! [Join cursors] */
+
+ /*! [Statistics cursor join cursor] */
+ stat_cursor = session.open_cursor(
+ "statistics:join", join_cursor, null);
+ /*! [Statistics cursor join cursor] */
+
+ ret = stat_cursor.close();
ret = join_cursor.close();
ret = cursor2.close();
ret = cursor.close();
diff --git a/examples/java/com/wiredtiger/examples/ex_stat.java b/examples/java/com/wiredtiger/examples/ex_stat.java
index b0b83a2d3b2..f8877a4620e 100644
--- a/examples/java/com/wiredtiger/examples/ex_stat.java
+++ b/examples/java/com/wiredtiger/examples/ex_stat.java
@@ -92,6 +92,33 @@ public class ex_stat {
}
int
+ print_join_cursor_stats(Session session)
+ throws WiredTigerException
+ {
+ Cursor idx_cursor, join_cursor, stat_cursor;
+ int ret;
+
+ ret = session.create("index:access:idx", "columns=(v)");
+ idx_cursor = session.open_cursor("index:access:idx", null, null);
+ ret = idx_cursor.next();
+ join_cursor = session.open_cursor("join:table:access", null, null);
+ ret = session.join(join_cursor, idx_cursor, "compare=gt");
+ ret = join_cursor.next();
+
+ /*! [statistics join cursor function] */
+ stat_cursor = session.open_cursor("statistics:join", join_cursor, null);
+
+ ret = print_cursor(stat_cursor);
+ ret = stat_cursor.close();
+ /*! [statistics join cursor function] */
+
+ ret = join_cursor.close();
+ ret = idx_cursor.close();
+
+ return (ret);
+ }
+
+ int
print_overflow_pages(Session session)
throws WiredTigerException
{
@@ -220,7 +247,8 @@ public class ex_stat {
conn = wiredtiger.open(home, "create,statistics=(all)");
session = conn.open_session(null);
- ret = session.create("table:access", "key_format=S,value_format=S");
+ ret = session.create("table:access",
+ "key_format=S,value_format=S,columns=(k,v)");
cursor = session.open_cursor("table:access", null, null);
cursor.putKeyString("key");
@@ -234,6 +262,8 @@ public class ex_stat {
ret = print_file_stats(session);
+ ret = print_join_cursor_stats(session);
+
ret = print_overflow_pages(session);
ret = print_derived_stats(session);
diff --git a/ext/collators/revint/Makefile.am b/ext/collators/revint/Makefile.am
new file mode 100644
index 00000000000..8c85c6a4701
--- /dev/null
+++ b/ext/collators/revint/Makefile.am
@@ -0,0 +1,10 @@
+AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include
+
+noinst_LTLIBRARIES = libwiredtiger_revint_collator.la
+libwiredtiger_revint_collator_la_SOURCES = revint_collator.c
+
+# libtool hack: noinst_LTLIBRARIES turns off building shared libraries as well
+# as installation, it will only build static libraries. As far as I can tell,
+# the "approved" libtool way to turn them back on is by adding -rpath.
+libwiredtiger_revint_collator_la_LDFLAGS = \
+ -avoid-version -module -rpath /nowhere
diff --git a/ext/collators/revint/revint_collator.c b/ext/collators/revint/revint_collator.c
new file mode 100644
index 00000000000..30b5dc67556
--- /dev/null
+++ b/ext/collators/revint/revint_collator.c
@@ -0,0 +1,153 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <stdint.h>
+#include <wiredtiger_ext.h>
+
+/*
+ * A simple WiredTiger collator for indices having a single integer key,
+ * where the ordering is descending (reversed). This collator also
+ * requires that primary key be an integer.
+ */
+
+/* Local collator structure. */
+typedef struct {
+ WT_COLLATOR collator; /* Must come first */
+ WT_EXTENSION_API *wt_api; /* Extension API */
+} REVINT_COLLATOR;
+
+/*
+ * revint_compare --
+ * WiredTiger reverse integer collation, used for tests.
+ */
+static int
+revint_compare(WT_COLLATOR *collator,
+ WT_SESSION *session, const WT_ITEM *k1, const WT_ITEM *k2, int *cmp)
+{
+ const REVINT_COLLATOR *revint_collator;
+ WT_EXTENSION_API *wtapi;
+ WT_PACK_STREAM *pstream;
+ int ret;
+ int64_t i1, i2, p1, p2;
+
+ i1 = i2 = p1 = p2 = 0;
+ revint_collator = (const REVINT_COLLATOR *)collator;
+ wtapi = revint_collator->wt_api;
+
+ /*
+ * All indices using this collator have an integer key, and the
+ * primary key is also an integer. A collator is usually passed the
+ * concatenation of index key and primary key (when available),
+ * hence we initially unpack using "ii".
+ *
+ * A collator may also be called with an item that includes a index
+ * key and no primary key. Among items having the same index key,
+ * an item with no primary key should sort before an item with a
+ * primary key. The reason is that if the application calls
+ * WT_CURSOR::search on a index key for which there are more than
+ * one value, the search key will not yet have a primary key. We
+ * want to position the cursor at the 'first' matching index key so
+ * that repeated calls to WT_CURSOR::next will see them all.
+ *
+ * To keep this code simple, we do not reverse the ordering
+ * when comparing primary keys.
+ */
+ if ((ret = wtapi->unpack_start(
+ wtapi, session, "ii", k1->data, k1->size, &pstream)) != 0 ||
+ (ret = wtapi->unpack_int(wtapi, pstream, &i1)) != 0)
+ goto err;
+ if ((ret = wtapi->unpack_int(wtapi, pstream, &p1)) != 0)
+ /* A missing primary key is OK and sorts first. */
+ p1 = INT64_MIN;
+ if ((ret = wtapi->pack_close(wtapi, pstream, NULL)) != 0)
+ goto err;
+
+ /* Unpack the second pair of numbers. */
+ if ((ret = wtapi->unpack_start(
+ wtapi, session, "ii", k2->data, k2->size, &pstream)) != 0 ||
+ (ret = wtapi->unpack_int(wtapi, pstream, &i2)) != 0)
+ goto err;
+ if ((ret = wtapi->unpack_int(wtapi, pstream, &p2)) != 0)
+ /* A missing primary key is OK and sorts first. */
+ p2 = INT64_MIN;
+ if ((ret = wtapi->pack_close(wtapi, pstream, NULL)) != 0)
+ goto err;
+
+ /* sorting is reversed */
+ if (i1 < i2)
+ *cmp = 1;
+ else if (i1 > i2)
+ *cmp = -1;
+ /* compare primary keys next, not reversed */
+ else if (p1 < p2)
+ *cmp = -1;
+ else if (p1 > p2)
+ *cmp = 1;
+ else
+ *cmp = 0; /* index key and primary key are same */
+
+err: return (ret);
+}
+
+/*
+ * revint_terminate --
+ * Terminate is called to free the collator and any associated memory.
+ */
+static int
+revint_terminate(WT_COLLATOR *collator, WT_SESSION *session)
+{
+ (void)session; /* Unused parameters */
+
+ /* Free the allocated memory. */
+ free(collator);
+ return (0);
+}
+
+/*
+ * wiredtiger_extension_init --
+ * WiredTiger revint collation extension.
+ */
+int
+wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
+{
+ REVINT_COLLATOR *revint_collator;
+
+ (void)config; /* Unused parameters */
+
+ if ((revint_collator = calloc(1, sizeof(REVINT_COLLATOR))) == NULL)
+ return (errno);
+
+ revint_collator->collator.compare = revint_compare;
+ revint_collator->collator.terminate = revint_terminate;
+ revint_collator->wt_api = connection->get_extension_api(connection);
+
+ return (connection->add_collator(
+ connection, "revint", &revint_collator->collator, NULL));
+}
diff --git a/ext/compressors/lz4/lz4_compress.c b/ext/compressors/lz4/lz4_compress.c
index 062307b721a..35159d0fa76 100644
--- a/ext/compressors/lz4/lz4_compress.c
+++ b/ext/compressors/lz4/lz4_compress.c
@@ -26,13 +26,15 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <wt_internal.h>
-
#include <lz4.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
+#include <wiredtiger_config.h>
+#include <wiredtiger.h>
+#include <wiredtiger_ext.h>
+
/* Local compressor structure. */
typedef struct {
WT_COMPRESSOR compressor; /* Must come first */
@@ -62,6 +64,22 @@ typedef struct {
uint32_t unused; /* Guaranteed to be 0 */
} LZ4_PREFIX;
+#ifdef WORDS_BIGENDIAN
+/*
+ * lz4_bswap32 --
+ * 32-bit unsigned little-endian to/from big-endian value.
+ */
+static inline uint32_t
+lz4_bswap32(uint32_t v)
+{
+ return (
+ ((v << 24) & 0xff000000) |
+ ((v << 8) & 0x00ff0000) |
+ ((v >> 8) & 0x0000ff00) |
+ ((v >> 24) & 0x000000ff)
+ );
+}
+
/*
* lz4_prefix_swap --
* The additional information is written in little-endian format, handle
@@ -70,15 +88,12 @@ typedef struct {
static inline void
lz4_prefix_swap(LZ4_PREFIX *prefix)
{
-#ifdef WORDS_BIGENDIAN
- prefix->compressed_len = __wt_bswap32(prefix->compressed_len);
- prefix->uncompressed_len = __wt_bswap32(prefix->uncompressed_len);
- prefix->useful_len = __wt_bswap32(prefix->useful_len);
- prefix->unused = __wt_bswap32(prefix->unused);
-#else
- WT_UNUSED(prefix);
-#endif
+ prefix->compressed_len = lz4_bswap32(prefix->compressed_len);
+ prefix->uncompressed_len = lz4_bswap32(prefix->uncompressed_len);
+ prefix->useful_len = lz4_bswap32(prefix->useful_len);
+ prefix->unused = lz4_bswap32(prefix->unused);
}
+#endif
/*
* lz4_error --
@@ -127,7 +142,9 @@ lz4_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
prefix.uncompressed_len = (uint32_t)src_len;
prefix.useful_len = (uint32_t)src_len;
prefix.unused = 0;
+#ifdef WORDS_BIGENDIAN
lz4_prefix_swap(&prefix);
+#endif
memcpy(dst, &prefix, sizeof(LZ4_PREFIX));
*result_lenp = (size_t)lz4_len + sizeof(LZ4_PREFIX);
@@ -163,7 +180,9 @@ lz4_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
* decompressed bytes to return from the start of the source buffer.
*/
memcpy(&prefix, src, sizeof(LZ4_PREFIX));
+#ifdef WORDS_BIGENDIAN
lz4_prefix_swap(&prefix);
+#endif
/*
* Decompress, starting after the prefix bytes. Use safe decompression:
@@ -278,7 +297,9 @@ lz4_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
prefix.uncompressed_len = (uint32_t)sourceSize;
prefix.useful_len = offsets[slot];
prefix.unused = 0;
+#ifdef WORDS_BIGENDIAN
lz4_prefix_swap(&prefix);
+#endif
memcpy(dst, &prefix, sizeof(LZ4_PREFIX));
*result_slotsp = slot;
diff --git a/ext/compressors/snappy/snappy_compress.c b/ext/compressors/snappy/snappy_compress.c
index fcefb8bb575..981e334a2de 100644
--- a/ext/compressors/snappy/snappy_compress.c
+++ b/ext/compressors/snappy/snappy_compress.c
@@ -26,13 +26,15 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <wt_internal.h>
-
#include <snappy-c.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
+#include <wiredtiger_config.h>
+#include <wiredtiger.h>
+#include <wiredtiger_ext.h>
+
/* Local compressor structure. */
typedef struct {
WT_COMPRESSOR compressor; /* Must come first */
@@ -40,6 +42,27 @@ typedef struct {
WT_EXTENSION_API *wt_api; /* Extension API */
} SNAPPY_COMPRESSOR;
+#ifdef WORDS_BIGENDIAN
+/*
+ * snappy_bswap64 --
+ * 64-bit unsigned little-endian to/from big-endian value.
+ */
+static inline uint64_t
+snappy_bswap64(uint64_t v)
+{
+ return (
+ ((v << 56) & 0xff00000000000000UL) |
+ ((v << 40) & 0x00ff000000000000UL) |
+ ((v << 24) & 0x0000ff0000000000UL) |
+ ((v << 8) & 0x000000ff00000000UL) |
+ ((v >> 8) & 0x00000000ff000000UL) |
+ ((v >> 24) & 0x0000000000ff0000UL) |
+ ((v >> 40) & 0x000000000000ff00UL) |
+ ((v >> 56) & 0x00000000000000ffUL)
+ );
+}
+#endif
+
/*
* wt_snappy_error --
* Output an error message, and return a standard error code.
@@ -109,7 +132,7 @@ wt_snappy_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
* Store the value in little-endian format.
*/
#ifdef WORDS_BIGENDIAN
- snaplen = __wt_bswap64(snaplen);
+ snaplen = snappy_bswap64(snaplen);
#endif
*(size_t *)dst = snaplen;
} else
@@ -142,7 +165,7 @@ wt_snappy_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
*/
snaplen = *(size_t *)src;
#ifdef WORDS_BIGENDIAN
- snaplen = __wt_bswap64(snaplen);
+ snaplen = snappy_bswap64(snaplen);
#endif
if (snaplen + sizeof(size_t) > src_len) {
(void)wt_api->err_printf(wt_api,
diff --git a/src/async/async_op.c b/src/async/async_op.c
index 130c704757b..970c33c3360 100644
--- a/src/async/async_op.c
+++ b/src/async/async_op.c
@@ -349,14 +349,8 @@ __wt_async_op_init(WT_SESSION_IMPL *session)
WT_ERR(__async_op_init(conn, op, i));
}
return (0);
-err:
- if (async->async_ops != NULL) {
- __wt_free(session, async->async_ops);
- async->async_ops = NULL;
- }
- if (async->async_queue != NULL) {
- __wt_free(session, async->async_queue);
- async->async_queue = NULL;
- }
+
+err: __wt_free(session, async->async_ops);
+ __wt_free(session, async->async_queue);
return (ret);
}
diff --git a/src/block/block_ckpt.c b/src/block/block_ckpt.c
index 03059c8f23a..812bf99acfb 100644
--- a/src/block/block_ckpt.c
+++ b/src/block/block_ckpt.c
@@ -812,8 +812,7 @@ __ckpt_string(WT_SESSION_IMPL *session,
WT_RET(__wt_block_buffer_to_ckpt(session, block, addr, ci));
WT_RET(__wt_buf_fmt(session, buf,
- "version=%d",
- ci->version));
+ "version=%" PRIu8, ci->version));
if (ci->root_offset == WT_BLOCK_INVALID_OFFSET)
WT_RET(__wt_buf_catfmt(session, buf, ", root=[Empty]"));
else
diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c
index dceaae8bb99..0bb75d129e1 100644
--- a/src/block/block_mgr.c
+++ b/src/block/block_mgr.c
@@ -69,6 +69,21 @@ __bm_checkpoint(WT_BM *bm,
}
/*
+ * __bm_checkpoint_readonly --
+ * Write a buffer into a block, creating a checkpoint; readonly version.
+ */
+static int
+__bm_checkpoint_readonly(WT_BM *bm,
+ WT_SESSION_IMPL *session, WT_ITEM *buf, WT_CKPT *ckptbase, bool data_cksum)
+{
+ WT_UNUSED(buf);
+ WT_UNUSED(ckptbase);
+ WT_UNUSED(data_cksum);
+
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_checkpoint_load --
* Load a checkpoint.
*/
@@ -113,6 +128,16 @@ __bm_checkpoint_resolve(WT_BM *bm, WT_SESSION_IMPL *session)
}
/*
+ * __bm_checkpoint_resolve_readonly --
+ * Resolve the checkpoint; readonly version.
+ */
+static int
+__bm_checkpoint_resolve_readonly(WT_BM *bm, WT_SESSION_IMPL *session)
+{
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_checkpoint_unload --
* Unload a checkpoint point.
*/
@@ -161,6 +186,16 @@ __bm_compact_end(WT_BM *bm, WT_SESSION_IMPL *session)
}
/*
+ * __bm_compact_end_readonly --
+ * End a block manager compaction; readonly version.
+ */
+static int
+__bm_compact_end_readonly(WT_BM *bm, WT_SESSION_IMPL *session)
+{
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_compact_page_skip --
* Return if a page is useful for compaction.
*/
@@ -173,6 +208,21 @@ __bm_compact_page_skip(WT_BM *bm, WT_SESSION_IMPL *session,
}
/*
+ * __bm_compact_page_skip_readonly --
+ * Return if a page is useful for compaction; readonly version.
+ */
+static int
+__bm_compact_page_skip_readonly(WT_BM *bm, WT_SESSION_IMPL *session,
+ const uint8_t *addr, size_t addr_size, bool *skipp)
+{
+ WT_UNUSED(addr);
+ WT_UNUSED(addr_size);
+ WT_UNUSED(skipp);
+
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_compact_skip --
* Return if a file can be compacted.
*/
@@ -183,6 +233,18 @@ __bm_compact_skip(WT_BM *bm, WT_SESSION_IMPL *session, bool *skipp)
}
/*
+ * __bm_compact_skip_readonly --
+ * Return if a file can be compacted; readonly version.
+ */
+static int
+__bm_compact_skip_readonly(WT_BM *bm, WT_SESSION_IMPL *session, bool *skipp)
+{
+ WT_UNUSED(skipp);
+
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_compact_start --
* Start a block manager compaction.
*/
@@ -193,6 +255,16 @@ __bm_compact_start(WT_BM *bm, WT_SESSION_IMPL *session)
}
/*
+ * __bm_compact_start_readonly --
+ * Start a block manager compaction; readonly version.
+ */
+static int
+__bm_compact_start_readonly(WT_BM *bm, WT_SESSION_IMPL *session)
+{
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_free --
* Free a block of space to the underlying file.
*/
@@ -204,6 +276,20 @@ __bm_free(WT_BM *bm,
}
/*
+ * __bm_free_readonly --
+ * Free a block of space to the underlying file; readonly version.
+ */
+static int
+__bm_free_readonly(WT_BM *bm,
+ WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+{
+ WT_UNUSED(addr);
+ WT_UNUSED(addr_size);
+
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_is_mapped --
* Return if the file is mapped into memory.
*/
@@ -226,6 +312,31 @@ __bm_salvage_end(WT_BM *bm, WT_SESSION_IMPL *session)
}
/*
+ * __bm_salvage_end_readonly --
+ * End a block manager salvage; readonly version.
+ */
+static int
+__bm_salvage_end_readonly(WT_BM *bm, WT_SESSION_IMPL *session)
+{
+ return (__bm_readonly(bm, session));
+}
+
+/*
+ * __bm_salvage_next_readonly --
+ * Return the next block from the file; readonly version.
+ */
+static int
+__bm_salvage_next_readonly(WT_BM *bm,
+ WT_SESSION_IMPL *session, uint8_t *addr, size_t *addr_sizep, bool *eofp)
+{
+ WT_UNUSED(addr);
+ WT_UNUSED(addr_sizep);
+ WT_UNUSED(eofp);
+
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_salvage_next --
* Return the next block from the file.
*/
@@ -248,6 +359,16 @@ __bm_salvage_start(WT_BM *bm, WT_SESSION_IMPL *session)
}
/*
+ * __bm_salvage_start_readonly --
+ * Start a block manager salvage; readonly version.
+ */
+static int
+__bm_salvage_start_readonly(WT_BM *bm, WT_SESSION_IMPL *session)
+{
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_salvage_valid --
* Inform salvage a block is valid.
*/
@@ -260,6 +381,21 @@ __bm_salvage_valid(WT_BM *bm,
}
/*
+ * __bm_salvage_valid_readonly --
+ * Inform salvage a block is valid; readonly version.
+ */
+static int
+__bm_salvage_valid_readonly(WT_BM *bm,
+ WT_SESSION_IMPL *session, uint8_t *addr, size_t addr_size, bool valid)
+{
+ WT_UNUSED(addr);
+ WT_UNUSED(addr_size);
+ WT_UNUSED(valid);
+
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_stat --
* Block-manager statistics.
*/
@@ -283,6 +419,18 @@ __bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool async)
}
/*
+ * __bm_sync_readonly --
+ * Flush a file to disk; readonly version.
+ */
+static int
+__bm_sync_readonly(WT_BM *bm, WT_SESSION_IMPL *session, bool async)
+{
+ WT_UNUSED(async);
+
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_verify_addr --
* Verify an address.
*/
@@ -327,6 +475,23 @@ __bm_write(WT_BM *bm, WT_SESSION_IMPL *session,
}
/*
+ * __bm_write_readonly --
+ * Write a buffer into a block, returning the block's address cookie;
+ * readonly version.
+ */
+static int
+__bm_write_readonly(WT_BM *bm, WT_SESSION_IMPL *session,
+ WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum)
+{
+ WT_UNUSED(buf);
+ WT_UNUSED(addr);
+ WT_UNUSED(addr_sizep);
+ WT_UNUSED(data_cksum);
+
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_write_size --
* Return the buffer size required to write a block.
*/
@@ -337,84 +502,68 @@ __bm_write_size(WT_BM *bm, WT_SESSION_IMPL *session, size_t *sizep)
}
/*
+ * __bm_write_size_readonly --
+ * Return the buffer size required to write a block; readonly version.
+ */
+static int
+__bm_write_size_readonly(WT_BM *bm, WT_SESSION_IMPL *session, size_t *sizep)
+{
+ WT_UNUSED(sizep);
+
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_method_set --
* Set up the legal methods.
*/
static void
__bm_method_set(WT_BM *bm, bool readonly)
{
+ bm->addr_invalid = __bm_addr_invalid;
+ bm->addr_string = __bm_addr_string;
+ bm->block_header = __bm_block_header;
+ bm->checkpoint = __bm_checkpoint;
+ bm->checkpoint_load = __bm_checkpoint_load;
+ bm->checkpoint_resolve = __bm_checkpoint_resolve;
+ bm->checkpoint_unload = __bm_checkpoint_unload;
+ bm->close = __bm_close;
+ bm->compact_end = __bm_compact_end;
+ bm->compact_page_skip = __bm_compact_page_skip;
+ bm->compact_skip = __bm_compact_skip;
+ bm->compact_start = __bm_compact_start;
+ bm->free = __bm_free;
+ bm->is_mapped = __bm_is_mapped;
+ bm->preload = __wt_bm_preload;
+ bm->read = __wt_bm_read;
+ bm->salvage_end = __bm_salvage_end;
+ bm->salvage_next = __bm_salvage_next;
+ bm->salvage_start = __bm_salvage_start;
+ bm->salvage_valid = __bm_salvage_valid;
+ bm->size = __wt_block_manager_size;
+ bm->stat = __bm_stat;
+ bm->sync = __bm_sync;
+ bm->verify_addr = __bm_verify_addr;
+ bm->verify_end = __bm_verify_end;
+ bm->verify_start = __bm_verify_start;
+ bm->write = __bm_write;
+ bm->write_size = __bm_write_size;
+
if (readonly) {
- bm->addr_invalid = __bm_addr_invalid;
- bm->addr_string = __bm_addr_string;
- bm->block_header = __bm_block_header;
- bm->checkpoint = (int (*)(WT_BM *, WT_SESSION_IMPL *,
- WT_ITEM *, WT_CKPT *, bool))__bm_readonly;
- bm->checkpoint_load = __bm_checkpoint_load;
- bm->checkpoint_resolve =
- (int (*)(WT_BM *, WT_SESSION_IMPL *))__bm_readonly;
- bm->checkpoint_unload = __bm_checkpoint_unload;
- bm->close = __bm_close;
- bm->compact_end =
- (int (*)(WT_BM *, WT_SESSION_IMPL *))__bm_readonly;
- bm->compact_page_skip = (int (*)(WT_BM *, WT_SESSION_IMPL *,
- const uint8_t *, size_t, bool *))__bm_readonly;
- bm->compact_skip = (int (*)
- (WT_BM *, WT_SESSION_IMPL *, bool *))__bm_readonly;
- bm->compact_start =
- (int (*)(WT_BM *, WT_SESSION_IMPL *))__bm_readonly;
- bm->free = (int (*)(WT_BM *,
- WT_SESSION_IMPL *, const uint8_t *, size_t))__bm_readonly;
- bm->is_mapped = __bm_is_mapped;
- bm->preload = __wt_bm_preload;
- bm->read = __wt_bm_read;
- bm->salvage_end = (int (*)
- (WT_BM *, WT_SESSION_IMPL *))__bm_readonly;
- bm->salvage_next = (int (*)(WT_BM *, WT_SESSION_IMPL *,
- uint8_t *, size_t *, bool *))__bm_readonly;
- bm->salvage_start = (int (*)
- (WT_BM *, WT_SESSION_IMPL *))__bm_readonly;
- bm->salvage_valid = (int (*)(WT_BM *,
- WT_SESSION_IMPL *, uint8_t *, size_t, bool))__bm_readonly;
- bm->size = __wt_block_manager_size;
- bm->stat = __bm_stat;
- bm->sync =
- (int (*)(WT_BM *, WT_SESSION_IMPL *, bool))__bm_readonly;
- bm->verify_addr = __bm_verify_addr;
- bm->verify_end = __bm_verify_end;
- bm->verify_start = __bm_verify_start;
- bm->write = (int (*)(WT_BM *, WT_SESSION_IMPL *,
- WT_ITEM *, uint8_t *, size_t *, bool))__bm_readonly;
- bm->write_size = (int (*)
- (WT_BM *, WT_SESSION_IMPL *, size_t *))__bm_readonly;
- } else {
- bm->addr_invalid = __bm_addr_invalid;
- bm->addr_string = __bm_addr_string;
- bm->block_header = __bm_block_header;
- bm->checkpoint = __bm_checkpoint;
- bm->checkpoint_load = __bm_checkpoint_load;
- bm->checkpoint_resolve = __bm_checkpoint_resolve;
- bm->checkpoint_unload = __bm_checkpoint_unload;
- bm->close = __bm_close;
- bm->compact_end = __bm_compact_end;
- bm->compact_page_skip = __bm_compact_page_skip;
- bm->compact_skip = __bm_compact_skip;
- bm->compact_start = __bm_compact_start;
- bm->free = __bm_free;
- bm->is_mapped = __bm_is_mapped;
- bm->preload = __wt_bm_preload;
- bm->read = __wt_bm_read;
- bm->salvage_end = __bm_salvage_end;
- bm->salvage_next = __bm_salvage_next;
- bm->salvage_start = __bm_salvage_start;
- bm->salvage_valid = __bm_salvage_valid;
- bm->size = __wt_block_manager_size;
- bm->stat = __bm_stat;
- bm->sync = __bm_sync;
- bm->verify_addr = __bm_verify_addr;
- bm->verify_end = __bm_verify_end;
- bm->verify_start = __bm_verify_start;
- bm->write = __bm_write;
- bm->write_size = __bm_write_size;
+ bm->checkpoint = __bm_checkpoint_readonly;
+ bm->checkpoint_resolve = __bm_checkpoint_resolve_readonly;
+ bm->compact_end = __bm_compact_end_readonly;
+ bm->compact_page_skip = __bm_compact_page_skip_readonly;
+ bm->compact_skip = __bm_compact_skip_readonly;
+ bm->compact_start = __bm_compact_start_readonly;
+ bm->free = __bm_free_readonly;
+ bm->salvage_end = __bm_salvage_end_readonly;
+ bm->salvage_next = __bm_salvage_next_readonly;
+ bm->salvage_start = __bm_salvage_start_readonly;
+ bm->salvage_valid = __bm_salvage_valid_readonly;
+ bm->sync = __bm_sync_readonly;
+ bm->write = __bm_write_readonly;
+ bm->write_size = __bm_write_size_readonly;
}
}
diff --git a/src/block/block_open.c b/src/block/block_open.c
index d9b2f908737..adb745c99e7 100644
--- a/src/block/block_open.c
+++ b/src/block/block_open.c
@@ -369,7 +369,7 @@ __desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block)
WT_ERR_MSG(session, WT_ERROR,
"unsupported WiredTiger file version: this build only "
"supports major/minor versions up to %d/%d, and the file "
- "is version %d/%d",
+ "is version %" PRIu16 "/%" PRIu16,
WT_BLOCK_MAJOR_VERSION, WT_BLOCK_MINOR_VERSION,
desc->majorv, desc->minorv);
diff --git a/src/block/block_write.c b/src/block/block_write.c
index 4c6ac198fe4..e05a430832e 100644
--- a/src/block/block_write.c
+++ b/src/block/block_write.c
@@ -206,10 +206,16 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
uint32_t cksum;
bool local_locked;
- blk = WT_BLOCK_HEADER_REF(buf->mem);
fh = block->fh;
/*
+ * Clear the block header to ensure all of it is initialized, even the
+ * unused fields.
+ */
+ blk = WT_BLOCK_HEADER_REF(buf->mem);
+ memset(blk, 0, sizeof(*blk));
+
+ /*
* Swap the page-header as needed; this doesn't belong here, but it's
* the best place to catch all callers.
*/
diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c
index 12df19a7e04..9cc56c56452 100644
--- a/src/btree/bt_compact.c
+++ b/src/btree/bt_compact.c
@@ -96,14 +96,13 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[])
WT_BTREE *btree;
WT_DECL_RET;
WT_REF *ref;
- bool block_manager_begin, skip;
+ bool skip;
WT_UNUSED(cfg);
btree = S2BT(session);
bm = btree->bm;
ref = NULL;
- block_manager_begin = false;
WT_STAT_FAST_DATA_INCR(session, session_compact);
@@ -123,24 +122,12 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[])
* We need to ensure we don't race with page reconciliation as it's
* writing the page modify information.
*
- * There are three ways we call reconciliation: checkpoints, threads
- * writing leaf pages (usually in preparation for a checkpoint or if
- * closing a file), and eviction.
- *
- * We're holding the schema lock which serializes with checkpoints.
- */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
-
- /*
- * Get the tree handle's flush lock which blocks threads writing leaf
- * pages.
+ * There are two ways we call reconciliation: checkpoints and eviction.
+ * Get the tree's flush lock which blocks threads writing pages for
+ * checkpoints.
*/
__wt_spin_lock(session, &btree->flush_lock);
- /* Start compaction. */
- WT_ERR(bm->compact_start(bm, session));
- block_manager_begin = true;
-
/* Walk the tree reviewing pages to see if they should be re-written. */
for (;;) {
/*
@@ -170,9 +157,6 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[])
err: if (ref != NULL)
WT_TRET(__wt_page_release(session, ref, 0));
- if (block_manager_begin)
- WT_TRET(bm->compact_end(bm, session));
-
/* Unblock threads writing leaf pages. */
__wt_spin_unlock(session, &btree->flush_lock);
diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c
index a083ec4016e..7475c0f1312 100644
--- a/src/btree/bt_curprev.c
+++ b/src/btree/bt_curprev.c
@@ -51,7 +51,8 @@ restart:
if (cbt->btree->type == BTREE_ROW) {
key.data = WT_INSERT_KEY(current);
key.size = WT_INSERT_KEY_SIZE(current);
- WT_RET(__wt_search_insert(session, cbt, &key));
+ WT_RET(__wt_search_insert(
+ session, cbt, cbt->ins_head, &key));
} else
cbt->ins = __col_insert_search(cbt->ins_head,
cbt->ins_stack, cbt->next_stack,
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c
index c11b7d35de6..1f3ac443495 100644
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -173,13 +173,18 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
*/
break;
case BTREE_COL_VAR:
+ /* The search function doesn't check for empty pages. */
+ if (page->pg_var_entries == 0)
+ return (false);
+ WT_ASSERT(session, cbt->slot < page->pg_var_entries);
+
/*
- * If search returned an insert object, there may or may not be
- * a matching on-page object, we have to check. Variable-length
- * column-store pages don't map one-to-one to keys, but have
- * "slots", check if search returned a valid slot.
+ * Column-store updates aren't stored on the page, instead they
+ * are stored as "insert" objects. If search returned an insert
+ * object we can't return, the returned on-page object must be
+ * checked for a match.
*/
- if (cbt->slot >= page->pg_var_entries)
+ if (cbt->ins != NULL && !F_ISSET(cbt, WT_CBT_VAR_ONPAGE_MATCH))
return (false);
/*
@@ -194,6 +199,11 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
return (false);
break;
case BTREE_ROW:
+ /* The search function doesn't check for empty pages. */
+ if (page->pg_row_entries == 0)
+ return (false);
+ WT_ASSERT(session, cbt->slot < page->pg_row_entries);
+
/*
* See above: for row-store, no insert object can have the same
* key as an on-page object, we're done.
@@ -201,15 +211,6 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
if (cbt->ins != NULL)
return (false);
- /*
- * Check if searched returned a valid slot (the failure mode is
- * an empty page, the search function doesn't check, and so the
- * more exact test is "page->pg_row_entries == 0", but this test
- * mirrors the column-store test).
- */
- if (cbt->slot >= page->pg_row_entries)
- return (false);
-
/* Updates are stored on the page, check for a delete. */
if (page->pg_row_upd != NULL && (upd = __wt_txn_read(
session, page->pg_row_upd[cbt->slot])) != NULL) {
@@ -1162,22 +1163,14 @@ int
__wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
{
WT_BTREE *btree;
- WT_CURSOR_BTREE *cbt;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- cbt = (start != NULL) ? start : stop;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
- btree = cbt->btree;
+ session = (WT_SESSION_IMPL *)start->iface.session;
+ btree = start->btree;
WT_STAT_FAST_DATA_INCR(session, cursor_truncate);
/*
- * We always delete in a forward direction because it's faster, assert
- * our caller provided us with a start cursor.
- */
- WT_ASSERT(session, start != NULL);
-
- /*
* For recovery, log the start and stop keys for a truncate operation,
* not the individual records removed. On the other hand, for rollback
* we need to keep track of all the in-memory operations.
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index 795111d53f9..1f739c9572e 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -337,8 +337,7 @@ __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
copy = WT_ROW_KEY_COPY(rip);
(void)__wt_row_leaf_key_info(
page, copy, &ikey, NULL, NULL, NULL);
- if (ikey != NULL)
- __wt_free(session, ikey);
+ __wt_free(session, ikey);
}
/*
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index 2db3ca7d984..1d33a7e7c9a 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -36,7 +36,8 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
btree = S2BT(session);
/* Checkpoint files are readonly. */
- readonly = dhandle->checkpoint != NULL;
+ readonly = (dhandle->checkpoint != NULL ||
+ F_ISSET(S2C(session), WT_CONN_READONLY));
/* Get the checkpoint information for this name/checkpoint pair. */
WT_CLEAR(ckpt);
@@ -349,7 +350,7 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
/* Initialize locks. */
WT_RET(__wt_rwlock_alloc(
session, &btree->ovfl_lock, "btree overflow lock"));
- WT_RET(__wt_spin_init(session, &btree->flush_lock, "btree flush lock"));
+ WT_RET(__wt_spin_init(session, &btree->flush_lock, "btree flush"));
btree->checkpointing = WT_CKPT_OFF; /* Not checkpointing */
btree->modified = 0; /* Clean */
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c
index ac9faef4ff2..5cf6a9bf2bc 100644
--- a/src/btree/bt_read.c
+++ b/src/btree/bt_read.c
@@ -281,10 +281,8 @@ err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
* On error, upd points to a single unlinked WT_UPDATE structure,
* first_upd points to a list.
*/
- if (upd != NULL)
- __wt_free(session, upd);
- if (first_upd != NULL)
- __wt_free_update_list(session, first_upd);
+ __wt_free(session, upd);
+ __wt_free_update_list(session, first_upd);
__wt_scr_free(session, &current_key);
__wt_scr_free(session, &las_addr);
@@ -460,12 +458,12 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
WT_DECL_RET;
WT_PAGE *page;
u_int sleep_cnt, wait_cnt;
- bool busy, cache_work, oldgen, stalled;
+ bool busy, cache_work, evict_soon, stalled;
int force_attempts;
btree = S2BT(session);
- for (oldgen = stalled = false,
+ for (evict_soon = stalled = false,
force_attempts = 0, sleep_cnt = wait_cnt = 0;;) {
switch (ref->state) {
case WT_REF_DELETED:
@@ -486,7 +484,16 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
WT_RET(__wt_cache_eviction_check(
session, 1, NULL));
WT_RET(__page_read(session, ref));
- oldgen = LF_ISSET(WT_READ_WONT_NEED) ||
+
+ /*
+ * If configured to not trash the cache, leave the page
+ * generation unset, we'll set it before returning to
+ * the oldest read generation, so the page is forcibly
+ * evicted as soon as possible. We don't do that set
+ * here because we don't want to evict the page before
+ * we "acquire" it.
+ */
+ evict_soon = LF_ISSET(WT_READ_WONT_NEED) ||
F_ISSET(session, WT_SESSION_NO_CACHE);
continue;
case WT_REF_READING:
@@ -575,20 +582,24 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
}
/*
- * If we read the page and we are configured to not
- * trash the cache, set the oldest read generation so
- * the page is forcibly evicted as soon as possible.
+ * If we read the page and are configured to not trash
+ * the cache, and no other thread has already used the
+ * page, set the oldest read generation so the page is
+ * forcibly evicted as soon as possible.
*
- * Otherwise, update the page's read generation.
+ * Otherwise, if we read the page, or, if configured to
+ * update the page's read generation and the page isn't
+ * already flagged for forced eviction, update the page
+ * read generation.
*/
page = ref->page;
- if (oldgen && page->read_gen == WT_READGEN_NOTSET)
- __wt_page_evict_soon(page);
- else if (!LF_ISSET(WT_READ_NO_GEN) &&
- page->read_gen != WT_READGEN_OLDEST &&
- page->read_gen < __wt_cache_read_gen(session))
- page->read_gen =
- __wt_cache_read_gen_bump(session);
+ if (page->read_gen == WT_READGEN_NOTSET) {
+ if (evict_soon)
+ __wt_page_evict_soon(page);
+ else
+ __wt_cache_read_gen_new(session, page);
+ } else if (!LF_ISSET(WT_READ_NO_GEN))
+ __wt_cache_read_gen_bump(session, page);
skip_evict:
/*
* Check if we need an autocommit transaction.
diff --git a/src/btree/bt_rebalance.c b/src/btree/bt_rebalance.c
index 86360e83ddf..d94eb2ddd80 100644
--- a/src/btree/bt_rebalance.c
+++ b/src/btree/bt_rebalance.c
@@ -412,6 +412,7 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[])
WT_UNUSED(cfg);
btree = S2BT(session);
+ evict_reset = false;
/*
* If the tree has never been written to disk, we're done, rebalance
@@ -438,7 +439,8 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[])
* cache is the root page, and that cannot be evicted; however, this way
* eviction ignores the tree entirely.)
*/
- WT_ERR(__wt_evict_file_exclusive_on(session, &evict_reset));
+ WT_ERR(__wt_evict_file_exclusive_on(session));
+ evict_reset = true;
/* Recursively walk the tree. */
switch (rs->type) {
@@ -470,7 +472,10 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[])
btree->root.page = rs->root;
rs->root = NULL;
-err: /* Discard any leftover root page we created. */
+err: if (evict_reset)
+ __wt_evict_file_exclusive_off(session);
+
+ /* Discard any leftover root page we created. */
if (rs->root != NULL) {
__wt_page_modify_clear(session, rs->root);
__wt_page_out(session, &rs->root);
diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c
index 8d78bda79fb..0e064d306b6 100644
--- a/src/btree/bt_slvg.c
+++ b/src/btree/bt_slvg.c
@@ -1206,8 +1206,7 @@ __slvg_col_build_internal(
__wt_root_ref_init(&ss->root_ref, page, true);
if (0) {
-err: if (addr != NULL)
- __wt_free(session, addr);
+err: __wt_free(session, addr);
__wt_page_out(session, &page);
}
return (ret);
@@ -1868,8 +1867,7 @@ __slvg_row_build_internal(
__wt_root_ref_init(&ss->root_ref, page, false);
if (0) {
-err: if (addr != NULL)
- __wt_free(session, addr);
+err: __wt_free(session, addr);
__wt_page_out(session, &page);
}
return (ret);
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index bd38451d5d1..4f16a290958 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -151,8 +151,7 @@ __wt_split_stash_discard_all(
for (i = 0, stash = session->split_stash;
i < session->split_stash_cnt;
++i, ++stash)
- if (stash->p != NULL)
- __wt_free(session_safe, stash->p);
+ __wt_free(session_safe, stash->p);
__wt_free(session_safe, session->split_stash);
session->split_stash_cnt = session->split_stash_alloc = 0;
@@ -1383,11 +1382,27 @@ __split_internal_should_split(WT_SESSION_IMPL *session, WT_REF *ref)
static int
__split_parent_climb(WT_SESSION_IMPL *session, WT_PAGE *page, bool page_hazard)
{
+ WT_BTREE *btree;
WT_DECL_RET;
WT_PAGE *parent;
WT_REF *ref;
bool parent_hazard;
+ btree = S2BT(session);
+
+ /*
+ * Disallow internal splits during the final pass of a checkpoint. Most
+ * splits are already disallowed during checkpoints, but an important
+ * exception is insert splits. The danger is an insert split creates a
+ * new chunk of the namespace, and then the internal split will move it
+ * to a different part of the tree where it will be written; in other
+ * words, in one part of the tree we'll skip the newly created insert
+ * split chunk, but we'll write it upon finding it in a different part
+ * of the tree.
+ */
+ if (btree->checkpointing != WT_CKPT_OFF)
+ return (__split_internal_unlock(session, page, page_hazard));
+
/*
* Page splits trickle up the tree, that is, as leaf pages grow large
* enough and are evicted, they'll split into their parent. And, as
@@ -1771,8 +1786,8 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
type, WT_INSERT_RECNO(moved_ins), 0, false, &right));
/*
- * The new page is dirty by definition, column-store splits update the
- * page-modify structure, so create it now.
+ * The new page is dirty by definition, plus column-store splits update
+ * the page-modify structure, so create it now.
*/
WT_ERR(__wt_page_modify_init(session, right));
__wt_page_modify_set(session, right);
@@ -1813,15 +1828,6 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
}
/*
- * We modified the page above, which will have set the first dirty
- * transaction to the last transaction current running. However, the
- * updates we installed may be older than that. Set the first dirty
- * transaction to an impossibly old value so this page is never skipped
- * in a checkpoint.
- */
- right->modify->first_dirty_txn = WT_TXN_FIRST;
-
- /*
* Calculate how much memory we're moving: figure out how deep the skip
* list stack is for the element we are moving, and the memory used by
* the item's list of updates.
@@ -1919,6 +1925,24 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
#endif
/*
+ * We perform insert splits concurrently with checkpoints, where the
+ * requirement is a checkpoint must include either the original page
+ * or both new pages. The page we're splitting is dirty, but that's
+ * insufficient: set the first dirty transaction to an impossibly old
+ * value so this page is not skipped by a checkpoint.
+ */
+ page->modify->first_dirty_txn = WT_TXN_FIRST;
+
+ /*
+ * We modified the page above, which will have set the first dirty
+ * transaction to the last transaction current running. However, the
+ * updates we installed may be older than that. Set the first dirty
+ * transaction to an impossibly old value so this page is never skipped
+ * in a checkpoint.
+ */
+ right->modify->first_dirty_txn = WT_TXN_FIRST;
+
+ /*
* Update the page accounting.
*
* XXX
@@ -1928,10 +1952,14 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
__wt_cache_page_inmem_incr(session, right, right_incr);
/*
- * Split into the parent. On successful return, the original page is no
- * longer locked, so we cannot safely look at it.
+ * The act of splitting into the parent releases the pages for eviction;
+ * ensure the page contents are consistent.
+ */
+ WT_WRITE_BARRIER();
+
+ /*
+ * Split into the parent.
*/
- page = NULL;
if ((ret = __split_parent(
session, ref, split_ref, 2, parent_incr, false, true)) == 0)
return (0);
@@ -1941,7 +1969,8 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
*
* Reset the split column-store page record.
*/
- page->modify->mod_split_recno = WT_RECNO_OOB;
+ if (type != WT_PAGE_ROW_LEAF)
+ page->modify->mod_split_recno = WT_RECNO_OOB;
/*
* Clear the allocated page's reference to the moved insert list element
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index 5cbd8d1e996..57056eb5c99 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -17,18 +17,18 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
{
struct timespec end, start;
WT_BTREE *btree;
+ WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
WT_REF *walk;
WT_TXN *txn;
uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages;
- uint64_t saved_snap_min;
+ uint64_t oldest_id, saved_snap_min;
uint32_t flags;
- bool evict_reset;
+ conn = S2C(session);
btree = S2BT(session);
-
walk = NULL;
txn = &session->txn;
saved_snap_min = WT_SESSION_TXN_STATE(session)->snap_min;
@@ -56,6 +56,15 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
return (0);
}
+ /*
+ * Save the oldest transaction ID we need to keep around.
+ * Otherwise, in a busy system, we could be updating pages so
+ * fast that write leaves never catches up. We deliberately
+ * have no transaction running at this point that would keep
+ * the oldest ID from moving forwards as we walk the tree.
+ */
+ oldest_id = __wt_txn_oldest_id(session);
+
flags |= WT_READ_NO_WAIT | WT_READ_SKIP_INTL;
for (walk = NULL;;) {
WT_ERR(__wt_tree_walk(session, &walk, flags));
@@ -64,13 +73,13 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
/*
* Write dirty pages if nobody beat us to it. Don't
- * try to write the hottest pages: checkpoint will have
- * to visit them anyway.
+ * try to write hot pages (defined as pages that have
+ * been updated since the write phase leaves started):
+ * checkpoint will have to visit them anyway.
*/
page = walk->page;
if (__wt_page_is_modified(page) &&
- __wt_txn_visible_all(
- session, page->modify->update_txn)) {
+ WT_TXNID_LT(page->modify->update_txn, oldest_id)) {
if (txn->isolation == WT_ISO_READ_COMMITTED)
__wt_txn_get_snapshot(session);
leaf_bytes += page->memory_footprint;
@@ -105,19 +114,18 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
__wt_spin_lock(session, &btree->flush_lock);
/*
- * When internal pages are being reconciled by checkpoint their
- * child pages cannot disappear from underneath them or be split
- * into them, nor can underlying blocks be freed until the block
- * lists for the checkpoint are stable. Set the checkpointing
- * flag to block eviction of dirty pages until the checkpoint's
- * internal page pass is complete, then wait for any existing
- * eviction to complete.
+ * In the final checkpoint pass, child pages cannot be evicted
+ * from underneath internal pages nor can underlying blocks be
+ * freed until the checkpoint's block lists are stable. Also,
+ * we cannot split child pages into parents unless we know the
+ * final pass will write a consistent view of that namespace.
+ * Set the checkpointing flag to block such actions and wait for
+ * any problematic eviction or page splits to complete.
*/
WT_PUBLISH(btree->checkpointing, WT_CKPT_PREPARE);
- WT_ERR(__wt_evict_file_exclusive_on(session, &evict_reset));
- if (evict_reset)
- __wt_evict_file_exclusive_off(session);
+ WT_ERR(__wt_evict_file_exclusive_on(session));
+ __wt_evict_file_exclusive_off(session);
WT_PUBLISH(btree->checkpointing, WT_CKPT_RUNNING);
@@ -215,7 +223,7 @@ err: /* On error, clear any left-over tree walk. */
* so that eviction knows that the checkpoint has completed.
*/
WT_PUBLISH(btree->checkpoint_gen,
- S2C(session)->txn_global.checkpoint_gen);
+ conn->txn_global.checkpoint_gen);
WT_STAT_FAST_DATA_SET(session,
btree_checkpoint_generation, btree->checkpoint_gen);
@@ -249,7 +257,8 @@ err: /* On error, clear any left-over tree walk. */
* before checkpointing the file). Start a flush to stable storage,
* but don't wait for it.
*/
- if (ret == 0 && syncop == WT_SYNC_WRITE_LEAVES)
+ if (ret == 0 &&
+ syncop == WT_SYNC_WRITE_LEAVES && F_ISSET(conn, WT_CONN_CKPT_SYNC))
WT_RET(btree->bm->sync(btree->bm, session, true));
return (ret);
@@ -260,24 +269,18 @@ err: /* On error, clear any left-over tree walk. */
* Cache operations.
*/
int
-__wt_cache_op(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_CACHE_OP op)
+__wt_cache_op(WT_SESSION_IMPL *session, WT_CACHE_OP op)
{
- WT_DECL_RET;
- WT_BTREE *btree;
-
- btree = S2BT(session);
-
switch (op) {
case WT_SYNC_CHECKPOINT:
case WT_SYNC_CLOSE:
/*
- * Set the checkpoint reference for reconciliation; it's ugly,
- * but drilling a function parameter path from our callers to
- * the reconciliation of the tree's root page is going to be
- * worse.
+ * Make sure the checkpoint reference is set for
+ * reconciliation; it's ugly, but drilling a function parameter
+ * path from our callers to the reconciliation of the tree's
+ * root page is going to be worse.
*/
- WT_ASSERT(session, btree->ckpt == NULL);
- btree->ckpt = ckptbase;
+ WT_ASSERT(session, S2BT(session)->ckpt != NULL);
break;
case WT_SYNC_DISCARD:
case WT_SYNC_WRITE_LEAVES:
@@ -287,23 +290,10 @@ __wt_cache_op(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_CACHE_OP op)
switch (op) {
case WT_SYNC_CHECKPOINT:
case WT_SYNC_WRITE_LEAVES:
- WT_ERR(__sync_file(session, op));
- break;
+ return (__sync_file(session, op));
case WT_SYNC_CLOSE:
case WT_SYNC_DISCARD:
- WT_ERR(__wt_evict_file(session, op));
- break;
+ return (__wt_evict_file(session, op));
+ WT_ILLEGAL_VALUE(session);
}
-
-err: switch (op) {
- case WT_SYNC_CHECKPOINT:
- case WT_SYNC_CLOSE:
- btree->ckpt = NULL;
- break;
- case WT_SYNC_DISCARD:
- case WT_SYNC_WRITE_LEAVES:
- break;
- }
-
- return (ret);
}
diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c
index ae2c20be1b6..952298f2456 100644
--- a/src/btree/bt_vrfy.c
+++ b/src/btree/bt_vrfy.c
@@ -226,7 +226,7 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
WT_WITH_PAGE_INDEX(session,
ret = __verify_tree(session, &btree->root, vs));
- WT_TRET(__wt_cache_op(session, NULL, WT_SYNC_DISCARD));
+ WT_TRET(__wt_cache_op(session, WT_SYNC_DISCARD));
}
/* Unload the checkpoint. */
diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c
index 55b11d7b2d1..bb8a750d848 100644
--- a/src/btree/bt_walk.c
+++ b/src/btree/bt_walk.c
@@ -583,14 +583,14 @@ restart: /*
break;
}
WT_ERR(ret);
+ couple = ref;
/*
* A new page: configure for traversal of any internal
* page's children, else return the leaf page.
*/
if (WT_PAGE_IS_INTERNAL(ref->page)) {
-descend: couple = ref;
- empty_internal = true;
+descend: empty_internal = true;
/*
* There's a split race when a cursor is setting
@@ -649,7 +649,6 @@ descend: couple = ref;
*/
if (skipleafcntp != NULL ||
LF_ISSET(WT_READ_SKIP_LEAF)) {
- couple = ref;
if (LF_ISSET(WT_READ_SKIP_LEAF))
break;
if (*skipleafcntp > 0) {
diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c
index 645d98d9c9b..fd60b12538a 100644
--- a/src/btree/col_modify.c
+++ b/src/btree/col_modify.c
@@ -25,6 +25,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
WT_INSERT_HEAD *ins_head, **ins_headp;
WT_ITEM _value;
WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
WT_UPDATE *old_upd, *upd;
size_t ins_size, upd_size;
u_int i, skipdepth;
@@ -60,6 +61,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
/* If we don't yet have a modify structure, we'll need one. */
WT_RET(__wt_page_modify_init(session, page));
+ mod = page->modify;
/*
* Delete, insert or update a column-store entry.
@@ -105,17 +107,17 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
/* Allocate the append/update list reference as necessary. */
if (append) {
WT_PAGE_ALLOC_AND_SWAP(session,
- page, page->modify->mod_append, ins_headp, 1);
- ins_headp = &page->modify->mod_append[0];
+ page, mod->mod_append, ins_headp, 1);
+ ins_headp = &mod->mod_append[0];
} else if (page->type == WT_PAGE_COL_FIX) {
WT_PAGE_ALLOC_AND_SWAP(session,
- page, page->modify->mod_update, ins_headp, 1);
- ins_headp = &page->modify->mod_update[0];
+ page, mod->mod_update, ins_headp, 1);
+ ins_headp = &mod->mod_update[0];
} else {
WT_PAGE_ALLOC_AND_SWAP(session,
- page, page->modify->mod_update, ins_headp,
+ page, mod->mod_update, ins_headp,
page->pg_var_entries);
- ins_headp = &page->modify->mod_update[cbt->slot];
+ ins_headp = &mod->mod_update[cbt->slot];
}
/* Allocate the WT_INSERT_HEAD structure as necessary. */
@@ -135,6 +137,14 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
cbt->ins_head = ins_head;
cbt->ins = ins;
+ /*
+ * Check for insert split and checkpoint races in column-store:
+ * it's easy (as opposed to in row-store) and a difficult bug to
+ * otherwise diagnose.
+ */
+ WT_ASSERT(session, mod->mod_split_recno == WT_RECNO_OOB ||
+ (recno != WT_RECNO_OOB && mod->mod_split_recno > recno));
+
if (upd_arg == NULL) {
WT_ERR(
__wt_update_alloc(session, value, &upd, &upd_size));
diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c
index 3aa31044b82..4730267a545 100644
--- a/src/btree/col_srch.c
+++ b/src/btree/col_srch.c
@@ -77,6 +77,7 @@ __wt_col_search(WT_SESSION_IMPL *session,
int depth;
btree = S2BT(session);
+ current = NULL;
__cursor_pos_clear(cbt);
@@ -116,12 +117,19 @@ __wt_col_search(WT_SESSION_IMPL *session,
goto leaf_only;
}
-restart_root:
+ if (0) {
+restart: /*
+ * Discard the currently held page and restart the search from
+ * the root.
+ */
+ WT_RET(__wt_page_release(session, current, 0));
+ }
+
/* Search the internal pages of the tree. */
current = &btree->root;
for (depth = 2, pindex = NULL;; ++depth) {
parent_pindex = pindex;
-restart_page: page = current->page;
+ page = current->page;
if (page->type != WT_PAGE_COL_INT)
break;
@@ -138,10 +146,8 @@ restart_page: page = current->page;
* on the page), check for an internal page split race.
*/
if (__wt_split_descent_race(
- session, current, parent_pindex)) {
- WT_RET(__wt_page_release(session, current, 0));
- goto restart_root;
- }
+ session, current, parent_pindex))
+ goto restart;
goto descend;
}
@@ -178,8 +184,14 @@ descend: /*
/*
* Swap the current page for the child page. If the page splits
- * while we're retrieving it, restart the search in the current
- * page; otherwise return on error, the swap call ensures we're
+ * while we're retrieving it, restart the search at the root.
+ * We cannot restart in the "current" page; for example, if a
+ * thread is appending to the tree, the page it's waiting for
+ * did an insert-split into the parent, then the parent split
+ * into its parent, the name space we are searching for may have
+ * moved above the current page in the tree.
+ *
+ * On other error, simply return, the swap call ensures we're
* holding nothing on failure.
*/
if ((ret = __wt_page_swap(
@@ -188,7 +200,7 @@ descend: /*
continue;
}
if (ret == WT_RESTART)
- goto restart_page;
+ goto restart;
return (ret);
}
@@ -199,7 +211,6 @@ descend: /*
leaf_only:
page = current->page;
cbt->ref = current;
- cbt->recno = recno;
/*
* Don't bother searching if the caller is appending a new record where
@@ -213,13 +224,6 @@ leaf_only:
}
/*
- * Set the on-page slot to an impossible value larger than any possible
- * slot (it's used to interpret the search function's return after the
- * search returns an insert list for a page that has no entries).
- */
- cbt->slot = UINT32_MAX;
-
- /*
* Search the leaf page.
*
* Search after a page is pinned does a search of the pinned page before
@@ -232,28 +236,38 @@ leaf_only:
* that's impossibly large for the page. We do have additional setup to
* do in that case, the record may be appended to the page.
*/
- cbt->compare = 0;
if (page->type == WT_PAGE_COL_FIX) {
if (recno < page->pg_fix_recno) {
+ cbt->recno = page->pg_fix_recno;
cbt->compare = 1;
return (0);
}
if (recno >= page->pg_fix_recno + page->pg_fix_entries) {
cbt->recno = page->pg_fix_recno + page->pg_fix_entries;
goto past_end;
- } else
+ } else {
+ cbt->recno = recno;
+ cbt->compare = 0;
ins_head = WT_COL_UPDATE_SINGLE(page);
+ }
} else {
if (recno < page->pg_var_recno) {
+ cbt->recno = page->pg_var_recno;
+ cbt->slot = 0;
cbt->compare = 1;
return (0);
}
if ((cip = __col_var_search(page, recno, NULL)) == NULL) {
cbt->recno = __col_var_last_recno(page);
+ cbt->slot = page->pg_var_entries == 0 ?
+ 0 : page->pg_var_entries - 1;
goto past_end;
} else {
+ cbt->recno = recno;
cbt->slot = WT_COL_SLOT(page, cip);
+ cbt->compare = 0;
ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
+ F_SET(cbt, WT_CBT_VAR_ONPAGE_MATCH);
}
}
diff --git a/src/btree/row_key.c b/src/btree/row_key.c
index 8b9e858ec18..9fff092d079 100644
--- a/src/btree/row_key.c
+++ b/src/btree/row_key.c
@@ -52,6 +52,7 @@ __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_RET(__wt_scr_alloc(session, 0, &key));
WT_RET(__wt_scr_alloc(session,
(uint32_t)__bitstr_size(page->pg_row_entries), &tmp));
+ memset(tmp->mem, 0, tmp->memsize);
if ((gap = btree->key_gap) == 0)
gap = 1;
diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c
index 28c55a4ccd0..6169a0a810a 100644
--- a/src/btree/row_srch.c
+++ b/src/btree/row_srch.c
@@ -9,18 +9,17 @@
#include "wt_internal.h"
/*
- * __wt_search_insert_append --
+ * __search_insert_append --
* Fast append search of a row-store insert list, creating a skiplist stack
* as we go.
*/
static inline int
-__wt_search_insert_append(WT_SESSION_IMPL *session,
- WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key, bool *donep)
+__search_insert_append(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
+ WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key, bool *donep)
{
WT_BTREE *btree;
WT_COLLATOR *collator;
WT_INSERT *ins;
- WT_INSERT_HEAD *inshead;
WT_ITEM key;
int cmp, i;
@@ -28,8 +27,7 @@ __wt_search_insert_append(WT_SESSION_IMPL *session,
collator = btree->collator;
*donep = 0;
- inshead = cbt->ins_head;
- if ((ins = WT_SKIP_LAST(inshead)) == NULL)
+ if ((ins = WT_SKIP_LAST(ins_head)) == NULL)
return (0);
key.data = WT_INSERT_KEY(ins);
key.size = WT_INSERT_KEY_SIZE(ins);
@@ -48,12 +46,13 @@ __wt_search_insert_append(WT_SESSION_IMPL *session,
*/
for (i = WT_SKIP_MAXDEPTH - 1; i >= 0; i--) {
cbt->ins_stack[i] = (i == 0) ? &ins->next[0] :
- (inshead->tail[i] != NULL) ?
- &inshead->tail[i]->next[i] : &inshead->head[i];
+ (ins_head->tail[i] != NULL) ?
+ &ins_head->tail[i]->next[i] : &ins_head->head[i];
cbt->next_stack[i] = NULL;
}
cbt->compare = -cmp;
cbt->ins = ins;
+ cbt->ins_head = ins_head;
*donep = 1;
}
return (0);
@@ -64,20 +63,18 @@ __wt_search_insert_append(WT_SESSION_IMPL *session,
* Search a row-store insert list, creating a skiplist stack as we go.
*/
int
-__wt_search_insert(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key)
+__wt_search_insert(WT_SESSION_IMPL *session,
+ WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key)
{
WT_BTREE *btree;
WT_COLLATOR *collator;
WT_INSERT *ins, **insp, *last_ins;
- WT_INSERT_HEAD *inshead;
WT_ITEM key;
size_t match, skiphigh, skiplow;
int cmp, i;
btree = S2BT(session);
collator = btree->collator;
- inshead = cbt->ins_head;
cmp = 0; /* -Wuninitialized */
/*
@@ -86,7 +83,7 @@ __wt_search_insert(
*/
match = skiphigh = skiplow = 0;
ins = last_ins = NULL;
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &inshead->head[i]; i >= 0;) {
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) {
if ((ins = *insp) == NULL) {
cbt->next_stack[i] = NULL;
cbt->ins_stack[i--] = insp--;
@@ -128,6 +125,7 @@ __wt_search_insert(
*/
cbt->compare = -cmp;
cbt->ins = (ins != NULL) ? ins : last_ins;
+ cbt->ins_head = ins_head;
return (0);
}
@@ -212,6 +210,7 @@ __wt_row_search(WT_SESSION_IMPL *session,
WT_BTREE *btree;
WT_COLLATOR *collator;
WT_DECL_RET;
+ WT_INSERT_HEAD *ins_head;
WT_ITEM *item;
WT_PAGE *page;
WT_PAGE_INDEX *pindex, *parent_pindex;
@@ -276,12 +275,20 @@ __wt_row_search(WT_SESSION_IMPL *session,
goto leaf_only;
}
+ if (0) {
+restart: /*
+ * Discard the currently held page and restart the search from
+ * the root.
+ */
+ WT_RET(__wt_page_release(session, current, 0));
+ skiphigh = skiplow = 0;
+ }
+
/* Search the internal pages of the tree. */
-restart_root:
current = &btree->root;
for (depth = 2, pindex = NULL;; ++depth) {
parent_pindex = pindex;
-restart_page: page = current->page;
+ page = current->page;
if (page->type != WT_PAGE_ROW_INT)
break;
@@ -419,20 +426,20 @@ restart_page: page = current->page;
*/
if (pindex->entries == base) {
append: if (__wt_split_descent_race(
- session, current, parent_pindex)) {
- if ((ret = __wt_page_release(
- session, current, 0)) != 0)
- return (ret);
-
- skiplow = skiphigh = 0;
- goto restart_root;
- }
+ session, current, parent_pindex))
+ goto restart;
}
descend: /*
* Swap the current page for the child page. If the page splits
- * while we're retrieving it, restart the search in the current
- * page; otherwise return on error, the swap call ensures we're
+ * while we're retrieving it, restart the search at the root.
+ * We cannot restart in the "current" page; for example, if a
+ * thread is appending to the tree, the page it's waiting for
+ * did an insert-split into the parent, then the parent split
+ * into its parent, the name space we are searching for may have
+ * moved above the current page in the tree.
+ *
+ * On other error, simply return, the swap call ensures we're
* holding nothing on failure.
*/
if ((ret = __wt_page_swap(
@@ -440,10 +447,8 @@ descend: /*
current = descent;
continue;
}
- if (ret == WT_RESTART) {
- skiphigh = skiplow = 0;
- goto restart_page;
- }
+ if (ret == WT_RESTART)
+ goto restart;
return (ret);
}
@@ -456,6 +461,12 @@ leaf_only:
cbt->ref = current;
/*
+ * Clear current now that we have moved the reference into the btree
+ * cursor, so that cleanup never releases twice.
+ */
+ current = NULL;
+
+ /*
* In the case of a right-side tree descent during an insert, do a fast
* check for an append to the page, try to catch cursors appending data
* into the tree.
@@ -479,24 +490,18 @@ leaf_only:
cbt->slot = WT_ROW_SLOT(page, page->pg_row_d);
F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
- cbt->ins_head = WT_ROW_INSERT_SMALLEST(page);
+ ins_head = WT_ROW_INSERT_SMALLEST(page);
} else {
cbt->slot = WT_ROW_SLOT(page,
page->pg_row_d + (page->pg_row_entries - 1));
- cbt->ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot);
+ ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot);
}
- WT_ERR(
- __wt_search_insert_append(session, cbt, srch_key, &done));
+ WT_ERR(__search_insert_append(
+ session, cbt, ins_head, srch_key, &done));
if (done)
return (0);
-
- /*
- * Don't leave the insert list head set, code external to the
- * search uses it.
- */
- cbt->ins_head = NULL;
}
/*
@@ -589,16 +594,16 @@ leaf_match: cbt->compare = 0;
cbt->slot = WT_ROW_SLOT(page, page->pg_row_d);
F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
- cbt->ins_head = WT_ROW_INSERT_SMALLEST(page);
+ ins_head = WT_ROW_INSERT_SMALLEST(page);
} else {
cbt->compare = -1;
cbt->slot = WT_ROW_SLOT(page, page->pg_row_d + (base - 1));
- cbt->ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot);
+ ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot);
}
/* If there's no insert list, we're done. */
- if (WT_SKIP_FIRST(cbt->ins_head) == NULL)
+ if (WT_SKIP_FIRST(ins_head) == NULL)
return (0);
/*
@@ -606,23 +611,16 @@ leaf_match: cbt->compare = 0;
* catch cursors repeatedly inserting at a single point.
*/
if (insert) {
- WT_ERR(
- __wt_search_insert_append(session, cbt, srch_key, &done));
+ WT_ERR(__search_insert_append(
+ session, cbt, ins_head, srch_key, &done));
if (done)
return (0);
}
- WT_ERR(__wt_search_insert(session, cbt, srch_key));
+ WT_ERR(__wt_search_insert(session, cbt, ins_head, srch_key));
return (0);
-err: /*
- * Release the current page if the search started at the root. If the
- * search didn't start at the root we should never have gone looking
- * beyond the start page.
- */
- WT_ASSERT(session, leaf == NULL || leaf == current);
- if (leaf == NULL)
- WT_TRET(__wt_page_release(session, current, 0));
+err: WT_TRET(__wt_page_release(session, current, 0));
return (ret);
}
@@ -660,19 +658,16 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
/*
* If the tree is new (and not empty), it might have a large insert
* list.
- */
- F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
- if ((cbt->ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL)
- return (WT_NOTFOUND);
-
- /*
+ *
* Walk down the list until we find a level with at least 50 entries,
* that's where we'll start rolling random numbers. The value 50 is
* used to ignore levels with only a few entries, that is, levels which
* are potentially badly skewed.
*/
- for (ins_head = cbt->ins_head,
- level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) {
+ F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
+ if ((ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL)
+ return (WT_NOTFOUND);
+ for (level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) {
start = &ins_head->head[level];
for (entries = 0, stop = start;
*stop != NULL; stop = &(*stop)->next[level])
@@ -767,6 +762,7 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
ins = ins->next[0];
cbt->ins = ins;
+ cbt->ins_head = ins_head;
cbt->compare = 0;
return (0);
@@ -786,11 +782,19 @@ __wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
WT_REF *current, *descent;
btree = S2BT(session);
+ current = NULL;
__cursor_pos_clear(cbt);
-restart_root:
- /* Walk the internal pages of the tree. */
+ if (0) {
+restart: /*
+ * Discard the currently held page and restart the search from
+ * the root.
+ */
+ WT_RET(__wt_page_release(session, current, 0));
+ }
+
+ /* Search the internal pages of the tree. */
current = &btree->root;
for (;;) {
page = current->page;
@@ -802,22 +806,19 @@ restart_root:
__wt_random(&session->rnd) % pindex->entries];
/*
- * Swap the parent page for the child page; return on error,
- * the swap function ensures we're holding nothing on failure.
+ * Swap the current page for the child page. If the page splits
+ * while we're retrieving it, restart the search at the root.
+ *
+ * On other error, simply return, the swap call ensures we're
+ * holding nothing on failure.
*/
if ((ret = __wt_page_swap(
session, current, descent, WT_READ_RESTART_OK)) == 0) {
current = descent;
continue;
}
- /*
- * Restart is returned if we find a page that's been split; the
- * held page isn't discarded when restart is returned, discard
- * it and restart the search from the top of the tree.
- */
- if (ret == WT_RESTART &&
- (ret = __wt_page_release(session, current, 0)) == 0)
- goto restart_root;
+ if (ret == WT_RESTART)
+ goto restart;
return (ret);
}
diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c
index 1ef8dd32bb4..8796ec6b2fc 100644
--- a/src/cache/cache_las.c
+++ b/src/cache/cache_las.c
@@ -58,6 +58,8 @@ __wt_las_create(WT_SESSION_IMPL *session)
conn = S2C(session);
+ if (F_ISSET(conn, WT_CONN_READONLY))
+ return (0);
/*
* Done at startup: we cannot do it on demand because we require the
* schema lock to create and drop the table, and it may not always be
@@ -203,7 +205,7 @@ __wt_las_cursor(
* useful more than once.
*/
*session_flags =
- F_ISSET(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION);
+ F_MASK(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION);
conn = S2C(session);
diff --git a/src/config/config.c b/src/config/config.c
index f480ab83dbd..96ef7a4e62a 100644
--- a/src/config/config.c
+++ b/src/config/config.c
@@ -16,9 +16,9 @@ static int
__config_err(WT_CONFIG *conf, const char *msg, int err)
{
WT_RET_MSG(conf->session, err,
- "Error parsing '%.*s' at byte %u: %s",
+ "Error parsing '%.*s' at offset %" WT_PTRDIFFT_FMT ": %s",
(int)(conf->end - conf->orig), conf->orig,
- (u_int)(conf->cur - conf->orig), msg);
+ conf->cur - conf->orig, msg);
}
/*
diff --git a/src/config/config_def.c b/src/config/config_def.c
index 879de670695..c752e5eb265 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -99,6 +99,7 @@ static const WT_CONFIG_CHECK
static const WT_CONFIG_CHECK
confchk_wiredtiger_open_statistics_log_subconfigs[] = {
+ { "json", "boolean", NULL, NULL, NULL, 0 },
{ "on_close", "boolean", NULL, NULL, NULL, 0 },
{ "path", "string", NULL, NULL, NULL, 0 },
{ "sources", "list", NULL, NULL, NULL, 0 },
@@ -146,7 +147,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
- confchk_wiredtiger_open_statistics_log_subconfigs, 5 },
+ confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\","
"\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\","
@@ -390,6 +391,61 @@ static const WT_CONFIG_CHECK confchk_colgroup_meta[] = {
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
+static const WT_CONFIG_CHECK confchk_file_config[] = {
+ { "allocation_size", "int",
+ NULL, "min=512B,max=128MB",
+ NULL, 0 },
+ { "app_metadata", "string", NULL, NULL, NULL, 0 },
+ { "block_allocation", "string",
+ NULL, "choices=[\"first\",\"best\"]",
+ NULL, 0 },
+ { "block_compressor", "string", NULL, NULL, NULL, 0 },
+ { "cache_resident", "boolean", NULL, NULL, NULL, 0 },
+ { "checksum", "string",
+ NULL, "choices=[\"on\",\"off\",\"uncompressed\"]",
+ NULL, 0 },
+ { "collator", "string", NULL, NULL, NULL, 0 },
+ { "columns", "list", NULL, NULL, NULL, 0 },
+ { "dictionary", "int", NULL, "min=0", NULL, 0 },
+ { "encryption", "category",
+ NULL, NULL,
+ confchk_WT_SESSION_create_encryption_subconfigs, 2 },
+ { "format", "string", NULL, "choices=[\"btree\"]", NULL, 0 },
+ { "huffman_key", "string", NULL, NULL, NULL, 0 },
+ { "huffman_value", "string", NULL, NULL, NULL, 0 },
+ { "internal_item_max", "int", NULL, "min=0", NULL, 0 },
+ { "internal_key_max", "int", NULL, "min=0", NULL, 0 },
+ { "internal_key_truncate", "boolean", NULL, NULL, NULL, 0 },
+ { "internal_page_max", "int",
+ NULL, "min=512B,max=512MB",
+ NULL, 0 },
+ { "key_format", "format", __wt_struct_confchk, NULL, NULL, 0 },
+ { "key_gap", "int", NULL, "min=0", NULL, 0 },
+ { "leaf_item_max", "int", NULL, "min=0", NULL, 0 },
+ { "leaf_key_max", "int", NULL, "min=0", NULL, 0 },
+ { "leaf_page_max", "int",
+ NULL, "min=512B,max=512MB",
+ NULL, 0 },
+ { "leaf_value_max", "int", NULL, "min=0", NULL, 0 },
+ { "log", "category",
+ NULL, NULL,
+ confchk_WT_SESSION_create_log_subconfigs, 1 },
+ { "memory_page_max", "int",
+ NULL, "min=512B,max=10TB",
+ NULL, 0 },
+ { "os_cache_dirty_max", "int", NULL, "min=0", NULL, 0 },
+ { "os_cache_max", "int", NULL, "min=0", NULL, 0 },
+ { "prefix_compression", "boolean", NULL, NULL, NULL, 0 },
+ { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 },
+ { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 },
+ { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 },
+ { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 },
+ { "value_format", "format",
+ __wt_struct_confchk, NULL,
+ NULL, 0 },
+ { NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
static const WT_CONFIG_CHECK confchk_file_meta[] = {
{ "allocation_size", "int",
NULL, "min=512B,max=128MB",
@@ -465,6 +521,67 @@ static const WT_CONFIG_CHECK confchk_index_meta[] = {
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
+static const WT_CONFIG_CHECK confchk_lsm_meta[] = {
+ { "allocation_size", "int",
+ NULL, "min=512B,max=128MB",
+ NULL, 0 },
+ { "app_metadata", "string", NULL, NULL, NULL, 0 },
+ { "block_allocation", "string",
+ NULL, "choices=[\"first\",\"best\"]",
+ NULL, 0 },
+ { "block_compressor", "string", NULL, NULL, NULL, 0 },
+ { "cache_resident", "boolean", NULL, NULL, NULL, 0 },
+ { "checksum", "string",
+ NULL, "choices=[\"on\",\"off\",\"uncompressed\"]",
+ NULL, 0 },
+ { "chunks", "string", NULL, NULL, NULL, 0 },
+ { "collator", "string", NULL, NULL, NULL, 0 },
+ { "columns", "list", NULL, NULL, NULL, 0 },
+ { "dictionary", "int", NULL, "min=0", NULL, 0 },
+ { "encryption", "category",
+ NULL, NULL,
+ confchk_WT_SESSION_create_encryption_subconfigs, 2 },
+ { "format", "string", NULL, "choices=[\"btree\"]", NULL, 0 },
+ { "huffman_key", "string", NULL, NULL, NULL, 0 },
+ { "huffman_value", "string", NULL, NULL, NULL, 0 },
+ { "internal_item_max", "int", NULL, "min=0", NULL, 0 },
+ { "internal_key_max", "int", NULL, "min=0", NULL, 0 },
+ { "internal_key_truncate", "boolean", NULL, NULL, NULL, 0 },
+ { "internal_page_max", "int",
+ NULL, "min=512B,max=512MB",
+ NULL, 0 },
+ { "key_format", "format", __wt_struct_confchk, NULL, NULL, 0 },
+ { "key_gap", "int", NULL, "min=0", NULL, 0 },
+ { "last", "string", NULL, NULL, NULL, 0 },
+ { "leaf_item_max", "int", NULL, "min=0", NULL, 0 },
+ { "leaf_key_max", "int", NULL, "min=0", NULL, 0 },
+ { "leaf_page_max", "int",
+ NULL, "min=512B,max=512MB",
+ NULL, 0 },
+ { "leaf_value_max", "int", NULL, "min=0", NULL, 0 },
+ { "log", "category",
+ NULL, NULL,
+ confchk_WT_SESSION_create_log_subconfigs, 1 },
+ { "lsm", "category",
+ NULL, NULL,
+ confchk_WT_SESSION_create_lsm_subconfigs, 11 },
+ { "memory_page_max", "int",
+ NULL, "min=512B,max=10TB",
+ NULL, 0 },
+ { "old_chunks", "string", NULL, NULL, NULL, 0 },
+ { "os_cache_dirty_max", "int", NULL, "min=0", NULL, 0 },
+ { "os_cache_max", "int", NULL, "min=0", NULL, 0 },
+ { "prefix_compression", "boolean", NULL, NULL, NULL, 0 },
+ { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 },
+ { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 },
+ { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 },
+ { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 },
+ { "value_format", "format",
+ __wt_struct_confchk, NULL,
+ NULL, 0 },
+ { NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
static const WT_CONFIG_CHECK confchk_table_meta[] = {
{ "app_metadata", "string", NULL, NULL, NULL, 0 },
{ "colgroups", "list", NULL, NULL, NULL, 0 },
@@ -544,6 +661,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{ "lsm_merge", "boolean", NULL, NULL, NULL, 0 },
{ "mmap", "boolean", NULL, NULL, NULL, 0 },
{ "multiprocess", "boolean", NULL, NULL, NULL, 0 },
+ { "readonly", "boolean", NULL, NULL, NULL, 0 },
{ "session_max", "int", NULL, "min=1", NULL, 0 },
{ "session_scratch_max", "int", NULL, NULL, NULL, 0 },
{ "shared_cache", "category",
@@ -554,7 +672,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
- confchk_wiredtiger_open_statistics_log_subconfigs, 5 },
+ confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
{ "transaction_sync", "category",
NULL, NULL,
confchk_wiredtiger_open_transaction_sync_subconfigs, 2 },
@@ -624,6 +742,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
{ "lsm_merge", "boolean", NULL, NULL, NULL, 0 },
{ "mmap", "boolean", NULL, NULL, NULL, 0 },
{ "multiprocess", "boolean", NULL, NULL, NULL, 0 },
+ { "readonly", "boolean", NULL, NULL, NULL, 0 },
{ "session_max", "int", NULL, "min=1", NULL, 0 },
{ "session_scratch_max", "int", NULL, NULL, NULL, 0 },
{ "shared_cache", "category",
@@ -634,7 +753,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
- confchk_wiredtiger_open_statistics_log_subconfigs, 5 },
+ confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
{ "transaction_sync", "category",
NULL, NULL,
confchk_wiredtiger_open_transaction_sync_subconfigs, 2 },
@@ -701,6 +820,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
{ "lsm_merge", "boolean", NULL, NULL, NULL, 0 },
{ "mmap", "boolean", NULL, NULL, NULL, 0 },
{ "multiprocess", "boolean", NULL, NULL, NULL, 0 },
+ { "readonly", "boolean", NULL, NULL, NULL, 0 },
{ "session_max", "int", NULL, "min=1", NULL, 0 },
{ "session_scratch_max", "int", NULL, NULL, NULL, 0 },
{ "shared_cache", "category",
@@ -711,7 +831,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
- confchk_wiredtiger_open_statistics_log_subconfigs, 5 },
+ confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
{ "transaction_sync", "category",
NULL, NULL,
confchk_wiredtiger_open_transaction_sync_subconfigs, 2 },
@@ -776,6 +896,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
{ "lsm_merge", "boolean", NULL, NULL, NULL, 0 },
{ "mmap", "boolean", NULL, NULL, NULL, 0 },
{ "multiprocess", "boolean", NULL, NULL, NULL, 0 },
+ { "readonly", "boolean", NULL, NULL, NULL, 0 },
{ "session_max", "int", NULL, "min=1", NULL, 0 },
{ "session_scratch_max", "int", NULL, NULL, NULL, 0 },
{ "shared_cache", "category",
@@ -786,7 +907,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
- confchk_wiredtiger_open_statistics_log_subconfigs, 5 },
+ confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
{ "transaction_sync", "category",
NULL, NULL,
confchk_wiredtiger_open_transaction_sync_subconfigs, 2 },
@@ -853,7 +974,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"file_max=100MB,path=,prealloc=,recover=on,zero_fill=0),"
"lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,"
"shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(on_close=0,"
+ "statistics=none,statistics_log=(json=0,on_close=0,"
"path=\"WiredTigerStat.%d.%H\",sources=,"
"timestamp=\"%b %d %H:%M:%S\",wait=0),verbose=",
confchk_WT_CONNECTION_reconfigure, 18
@@ -980,6 +1101,20 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"app_metadata=,collator=,columns=,source=,type=file",
confchk_colgroup_meta, 5
},
+ { "file.config",
+ "allocation_size=4KB,app_metadata=,block_allocation=best,"
+ "block_compressor=,cache_resident=0,checksum=uncompressed,"
+ "collator=,columns=,dictionary=0,encryption=(keyid=,name=),"
+ "format=btree,huffman_key=,huffman_value=,internal_item_max=0,"
+ "internal_key_max=0,internal_key_truncate=,internal_page_max=4KB,"
+ "key_format=u,key_gap=10,leaf_item_max=0,leaf_key_max=0,"
+ "leaf_page_max=32KB,leaf_value_max=0,log=(enabled=),"
+ "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,"
+ "prefix_compression=0,prefix_compression_min=4,"
+ "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75,"
+ "value_format=u",
+ confchk_file_config, 33
+ },
{ "file.meta",
"allocation_size=4KB,app_metadata=,block_allocation=best,"
"block_compressor=,cache_resident=0,checkpoint=,checkpoint_lsn=,"
@@ -1000,6 +1135,23 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"index_key_columns=,key_format=u,source=,type=file,value_format=u",
confchk_index_meta, 10
},
+ { "lsm.meta",
+ "allocation_size=4KB,app_metadata=,block_allocation=best,"
+ "block_compressor=,cache_resident=0,checksum=uncompressed,chunks="
+ ",collator=,columns=,dictionary=0,encryption=(keyid=,name=),"
+ "format=btree,huffman_key=,huffman_value=,internal_item_max=0,"
+ "internal_key_max=0,internal_key_truncate=,internal_page_max=4KB,"
+ "key_format=u,key_gap=10,last=,leaf_item_max=0,leaf_key_max=0,"
+ "leaf_page_max=32KB,leaf_value_max=0,log=(enabled=),"
+ "lsm=(auto_throttle=,bloom=,bloom_bit_count=16,bloom_config=,"
+ "bloom_hash_count=8,bloom_oldest=0,chunk_count_limit=0,"
+ "chunk_max=5GB,chunk_size=10MB,merge_max=15,merge_min=0),"
+ "memory_page_max=5MB,old_chunks=,os_cache_dirty_max=0,"
+ "os_cache_max=0,prefix_compression=0,prefix_compression_min=4,"
+ "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75,"
+ "value_format=u",
+ confchk_lsm_meta, 37
+ },
{ "table.meta",
"app_metadata=,colgroups=,collator=,columns=,key_format=u,"
"value_format=u",
@@ -1017,14 +1169,14 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
"in_memory=0,log=(archive=,compressor=,enabled=0,file_max=100MB,"
"path=,prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
- "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,"
+ "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
"session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
",name=,quota=0,reserve=0,size=500MB),statistics=none,"
- "statistics_log=(on_close=0,path=\"WiredTigerStat.%d.%H\","
+ "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\","
"sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"transaction_sync=(enabled=0,method=fsync),use_environment=,"
"use_environment_priv=0,verbose=,write_through=",
- confchk_wiredtiger_open, 37
+ confchk_wiredtiger_open, 38
},
{ "wiredtiger_open_all",
"async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
@@ -1038,15 +1190,15 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
"in_memory=0,log=(archive=,compressor=,enabled=0,file_max=100MB,"
"path=,prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
- "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,"
+ "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
"session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
",name=,quota=0,reserve=0,size=500MB),statistics=none,"
- "statistics_log=(on_close=0,path=\"WiredTigerStat.%d.%H\","
+ "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\","
"sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"transaction_sync=(enabled=0,method=fsync),use_environment=,"
"use_environment_priv=0,verbose=,version=(major=0,minor=0),"
"write_through=",
- confchk_wiredtiger_open_all, 38
+ confchk_wiredtiger_open_all, 39
},
{ "wiredtiger_open_basecfg",
"async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
@@ -1059,14 +1211,14 @@ static const WT_CONFIG_ENTRY config_entries[] = {
",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
"log=(archive=,compressor=,enabled=0,file_max=100MB,path=,"
"prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
- "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,"
+ "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
"session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
",name=,quota=0,reserve=0,size=500MB),statistics=none,"
- "statistics_log=(on_close=0,path=\"WiredTigerStat.%d.%H\","
+ "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\","
"sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"transaction_sync=(enabled=0,method=fsync),verbose=,"
"version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_basecfg, 32
+ confchk_wiredtiger_open_basecfg, 33
},
{ "wiredtiger_open_usercfg",
"async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
@@ -1079,14 +1231,14 @@ static const WT_CONFIG_ENTRY config_entries[] = {
",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
"log=(archive=,compressor=,enabled=0,file_max=100MB,path=,"
"prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
- "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,"
+ "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
"session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
",name=,quota=0,reserve=0,size=500MB),statistics=none,"
- "statistics_log=(on_close=0,path=\"WiredTigerStat.%d.%H\","
+ "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\","
"sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"transaction_sync=(enabled=0,method=fsync),verbose=,"
"write_through=",
- confchk_wiredtiger_open_usercfg, 31
+ confchk_wiredtiger_open_usercfg, 32
},
{ NULL, NULL, NULL, 0 }
};
diff --git a/src/conn/api_strerror.c b/src/conn/api_strerror.c
index edb11957556..87864f7f4b0 100644
--- a/src/conn/api_strerror.c
+++ b/src/conn/api_strerror.c
@@ -40,6 +40,8 @@ __wt_wiredtiger_error(int error)
return ("WT_RUN_RECOVERY: recovery must be run to continue");
case WT_CACHE_FULL:
return ("WT_CACHE_FULL: operation would overflow cache");
+ case WT_PERM_DENIED:
+ return ("WT_PERM_DENIED: permission denied (internal)");
}
/*
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 27977de63b2..6d115c8fdcd 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -772,6 +772,19 @@ __conn_get_extension_api(WT_CONNECTION *wt_conn)
conn->extension_api.transaction_visible = __wt_ext_transaction_visible;
conn->extension_api.version = wiredtiger_version;
+ /* Streaming pack/unpack API */
+ conn->extension_api.pack_start = __wt_ext_pack_start;
+ conn->extension_api.unpack_start = __wt_ext_unpack_start;
+ conn->extension_api.pack_close = __wt_ext_pack_close;
+ conn->extension_api.pack_item = __wt_ext_pack_item;
+ conn->extension_api.pack_int = __wt_ext_pack_int;
+ conn->extension_api.pack_str = __wt_ext_pack_str;
+ conn->extension_api.pack_uint = __wt_ext_pack_uint;
+ conn->extension_api.unpack_item = __wt_ext_unpack_item;
+ conn->extension_api.unpack_int = __wt_ext_unpack_int;
+ conn->extension_api.unpack_str = __wt_ext_unpack_str;
+ conn->extension_api.unpack_uint = __wt_ext_unpack_uint;
+
return (&conn->extension_api);
}
@@ -1109,6 +1122,29 @@ __conn_config_append(const char *cfg[], const char *config)
}
/*
+ * __conn_config_readonly --
+ * Append an entry to a config stack that overrides some settings
+ * when read-only is configured.
+ */
+static void
+__conn_config_readonly(const char *cfg[])
+{
+ const char *readonly;
+
+ /*
+ * Override certain settings. In general we override the options
+ * whose default conflicts. Other settings at odds will return
+ * an error and will be checked when those settings are processed.
+ */
+ readonly="checkpoint=(wait=0),"
+ "config_base=false,"
+ "create=false,"
+ "log=(archive=false,prealloc=false),"
+ "lsm_manager=(merge=false),";
+ __conn_config_append(cfg, readonly);
+}
+
+/*
* __conn_config_check_version --
* Check if a configuration version isn't compatible.
*/
@@ -1382,7 +1418,7 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
WT_FH *fh;
size_t len;
wt_off_t size;
- bool exist, is_create;
+ bool bytelock, exist, is_create;
char buf[256];
conn = S2C(session);
@@ -1391,6 +1427,10 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
WT_RET(__wt_config_gets(session, cfg, "create", &cval));
is_create = cval.val != 0;
+ if (F_ISSET(conn, WT_CONN_READONLY))
+ is_create = false;
+
+ bytelock = true;
__wt_spin_lock(session, &__wt_process.spinlock);
/*
@@ -1448,47 +1488,89 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
exist = false;
if (!is_create)
WT_ERR(__wt_exist(session, WT_WIREDTIGER, &exist));
- WT_ERR(__wt_open(session,
- WT_SINGLETHREAD, is_create || exist, false, 0, &conn->lock_fh));
+ ret = __wt_open(session,
+ WT_SINGLETHREAD, is_create || exist, false, 0, &conn->lock_fh);
/*
- * Lock a byte of the file: if we don't get the lock, some other process
- * is holding it, we're done. The file may be zero-length, and that's
- * OK, the underlying call supports locking past the end-of-file.
+ * If this is a read-only connection and we cannot grab the lock
+ * file, check if it is because there is not write permission or
+ * if the file does not exist. If so, then ignore the error.
+ * XXX Ignoring the error does allow multiple read-only
+ * connections to exist at the same time on a read-only directory.
*/
- if (__wt_bytelock(conn->lock_fh, (wt_off_t)0, true) != 0)
- WT_ERR_MSG(session, EBUSY,
- "WiredTiger database is already being managed by another "
- "process");
+ if (F_ISSET(conn, WT_CONN_READONLY)) {
+ /*
+ * If we got an expected permission or non-existence error
+ * then skip the byte lock.
+ */
+ ret = __wt_map_error_rdonly(ret);
+ if (ret == WT_NOTFOUND || ret == WT_PERM_DENIED) {
+ bytelock = false;
+ ret = 0;
+ }
+ }
+ WT_ERR(ret);
+ if (bytelock) {
+ /*
+ * Lock a byte of the file: if we don't get the lock, some other
+ * process is holding it, we're done. The file may be
+ * zero-length, and that's OK, the underlying call supports
+ * locking past the end-of-file.
+ */
+ if (__wt_bytelock(conn->lock_fh, (wt_off_t)0, true) != 0)
+ WT_ERR_MSG(session, EBUSY,
+ "WiredTiger database is already being managed by "
+ "another process");
- /*
- * If the size of the lock file is non-zero, we created it (or won a
- * locking race with the thread that created it, it doesn't matter).
- *
- * Write something into the file, zero-length files make me nervous.
- *
- * The test against the expected length is sheer paranoia (the length
- * should be 0 or correct), but it shouldn't hurt.
- */
+ /*
+ * If the size of the lock file is non-zero, we created it (or
+ * won a locking race with the thread that created it, it
+ * doesn't matter).
+ *
+ * Write something into the file, zero-length files make me
+ * nervous.
+ *
+ * The test against the expected length is sheer paranoia (the
+ * length should be 0 or correct), but it shouldn't hurt.
+ */
#define WT_SINGLETHREAD_STRING "WiredTiger lock file\n"
- WT_ERR(__wt_filesize(session, conn->lock_fh, &size));
- if (size != strlen(WT_SINGLETHREAD_STRING))
- WT_ERR(__wt_write(session, conn->lock_fh, (wt_off_t)0,
- strlen(WT_SINGLETHREAD_STRING), WT_SINGLETHREAD_STRING));
+ WT_ERR(__wt_filesize(session, conn->lock_fh, &size));
+ if (size != strlen(WT_SINGLETHREAD_STRING))
+ WT_ERR(__wt_write(session, conn->lock_fh, (wt_off_t)0,
+ strlen(WT_SINGLETHREAD_STRING),
+ WT_SINGLETHREAD_STRING));
+
+ }
/* We own the lock file, optionally create the WiredTiger file. */
- WT_ERR(__wt_open(session, WT_WIREDTIGER, is_create, false, 0, &fh));
+ ret = __wt_open(session, WT_WIREDTIGER, is_create, false, 0, &fh);
/*
- * Lock the WiredTiger file (for backward compatibility reasons as
- * described above). Immediately release the lock, it's just a test.
+ * If we're read-only, check for success as well as handled errors.
+ * Even if we're able to open the WiredTiger file successfully, we
+ * do not try to lock it. The lock file test above is the only
+ * one we do for read-only.
*/
- if (__wt_bytelock(fh, (wt_off_t)0, true) != 0) {
- WT_ERR_MSG(session, EBUSY,
- "WiredTiger database is already being managed by another "
- "process");
+ if (F_ISSET(conn, WT_CONN_READONLY)) {
+ ret = __wt_map_error_rdonly(ret);
+ if (ret == 0 || ret == WT_NOTFOUND || ret == WT_PERM_DENIED)
+ ret = 0;
+ WT_ERR(ret);
+ } else {
+ WT_ERR(ret);
+
+ /*
+ * Lock the WiredTiger file (for backward compatibility reasons
+ * as described above). Immediately release the lock, it's
+ * just a test.
+ */
+ if (__wt_bytelock(fh, (wt_off_t)0, true) != 0) {
+ WT_ERR_MSG(session, EBUSY,
+ "WiredTiger database is already being managed by "
+ "another process");
+ }
+ WT_ERR(__wt_bytelock(fh, (wt_off_t)0, false));
}
- WT_ERR(__wt_bytelock(fh, (wt_off_t)0, false));
/*
* We own the database home, figure out if we're creating it. There are
@@ -1502,11 +1584,21 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
conn->is_new = exist ? 0 : 1;
if (conn->is_new) {
+ if (F_ISSET(conn, WT_CONN_READONLY))
+ WT_ERR_MSG(session, EINVAL, "Creating a new database is"
+ " incompatible with read-only configuration.");
len = (size_t)snprintf(buf, sizeof(buf),
"%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING);
WT_ERR(__wt_write(session, fh, (wt_off_t)0, len, buf));
WT_ERR(__wt_fsync(session, fh));
} else {
+ /*
+ * Although exclusive and the read-only configuration settings
+ * are at odds, we do not have to check against read-only here
+ * because it falls out from earlier code in this function
+ * preventing creation and confirming the database
+ * already exists.
+ */
WT_ERR(__wt_config_gets(session, cfg, "exclusive", &cval));
if (cval.val != 0)
WT_ERR_MSG(session, EEXIST,
@@ -1602,6 +1694,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
{ "fileops", WT_VERB_FILEOPS },
{ "log", WT_VERB_LOG },
{ "lsm", WT_VERB_LSM },
+ { "lsm_manager", WT_VERB_LSM_MANAGER },
{ "metadata", WT_VERB_METADATA },
{ "mutex", WT_VERB_MUTEX },
{ "overflow", WT_VERB_OVERFLOW },
@@ -1736,6 +1829,7 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
"exclusive=,"
"in_memory=,"
"log=(recover=),"
+ "readonly=,"
"use_environment_priv=,"
"verbose=,", &base_config));
WT_ERR(__wt_config_init(session, &parser, base_config));
@@ -1808,7 +1902,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
const WT_NAME_FLAG *ft;
WT_SESSION_IMPL *session;
bool config_base_set;
- const char *enc_cfg[] = { NULL, NULL };
+ const char *enc_cfg[] = { NULL, NULL }, *merge_cfg;
char version[64];
/* Leave lots of space for optional additional configuration. */
@@ -1819,6 +1913,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
conn = NULL;
session = NULL;
+ merge_cfg = NULL;
WT_RET(__wt_library_init());
@@ -1860,6 +1955,16 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
session, cval.str, cval.len, &conn->error_prefix));
/*
+ * We need to look for read-only early so that we can use it
+ * in __conn_single and whether to use the base config file.
+ * XXX that means we can only make the choice in __conn_single if the
+ * user passes it in via the config string to wiredtiger_open.
+ */
+ WT_ERR(__wt_config_gets(session, cfg, "readonly", &cval));
+ if (cval.val)
+ F_SET(conn, WT_CONN_READONLY);
+
+ /*
* XXX ideally, we would check "in_memory" here, so we could completely
* avoid having a database directory. However, it can be convenient to
* pass "in_memory" via the WIREDTIGER_CONFIG environment variable, and
@@ -1883,6 +1988,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
* 4. the config passed in by the application
* 5. user configuration file (optional)
* 6. environment variable settings (optional)
+ * 7. overrides for a read-only connection
*
* Clear the entries we added to the stack, we're going to build it in
* order.
@@ -1898,8 +2004,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
(int)sizeof(version), ENOMEM);
__conn_config_append(cfg, version);
- /* Ignore the base_config file if we config_base set to false. */
- if (config_base_set)
+ /* Ignore the base_config file if config_base_set is false. */
+ if (config_base_set || F_ISSET(conn, WT_CONN_READONLY))
WT_ERR(
__conn_config_file(session, WT_BASECONFIG, false, cfg, i1));
__conn_config_append(cfg, config);
@@ -1909,7 +2015,35 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
/*
* Merge the full configuration stack and save it for reconfiguration.
*/
- WT_ERR(__wt_config_merge(session, cfg, NULL, &conn->cfg));
+ WT_ERR(__wt_config_merge(session, cfg, NULL, &merge_cfg));
+ /*
+ * The read-only setting may have been set in a configuration file.
+ * Get it again so that we can override other configuration settings
+ * before they are processed by the subsystems.
+ */
+ WT_ERR(__wt_config_gets(session, cfg, "readonly", &cval));
+ if (cval.val)
+ F_SET(conn, WT_CONN_READONLY);
+ if (F_ISSET(conn, WT_CONN_READONLY)) {
+ /*
+ * Create a new stack with the merged configuration as the
+ * base. The read-only string will use entry 1 and then
+ * we'll merge it again.
+ */
+ cfg[0] = merge_cfg;
+ cfg[1] = NULL;
+ cfg[2] = NULL;
+ /*
+ * We override some configuration settings for read-only.
+ * Other settings that conflict with and are an error with
+ * read-only are tested in their individual locations later.
+ */
+ __conn_config_readonly(cfg);
+ WT_ERR(__wt_config_merge(session, cfg, NULL, &conn->cfg));
+ } else {
+ conn->cfg = merge_cfg;
+ merge_cfg = NULL;
+ }
/*
* Configuration ...
@@ -2082,6 +2216,7 @@ err: /* Discard the scratch buffers. */
__wt_scr_free(session, &i2);
__wt_scr_free(session, &i3);
+ __wt_free(session, merge_cfg);
/*
* We may have allocated scratch memory when using the dummy session or
* the subsequently created real session, and we don't want to tie down
diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c
index 1831aad5895..9a2c394e9a6 100644
--- a/src/conn/conn_cache.c
+++ b/src/conn/conn_cache.c
@@ -140,6 +140,12 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
WT_RET(__wt_cache_config(session, false, cfg));
/*
+ * The lowest possible page read-generation has a special meaning, it
+ * marks a page for forcible eviction; don't let it happen by accident.
+ */
+ cache->read_gen = WT_READGEN_START_VALUE;
+
+ /*
* The target size must be lower than the trigger size or we will never
* get any work done.
*/
@@ -147,8 +153,8 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR_MSG(session, EINVAL,
"eviction target must be lower than the eviction trigger");
- WT_ERR(__wt_cond_alloc(session,
- "cache eviction server", false, &cache->evict_cond));
+ WT_ERR(__wt_cond_auto_alloc(session, "cache eviction server",
+ false, 10000, WT_MILLION, &cache->evict_cond));
WT_ERR(__wt_cond_alloc(session,
"eviction waiters", false, &cache->evict_waiter_cond));
WT_ERR(__wt_spin_init(session, &cache->evict_lock, "cache eviction"));
@@ -246,7 +252,7 @@ __wt_cache_destroy(WT_SESSION_IMPL *session)
" bytes dirty and %" PRIu64 " pages dirty",
cache->bytes_dirty, cache->pages_dirty);
- WT_TRET(__wt_cond_destroy(session, &cache->evict_cond));
+ WT_TRET(__wt_cond_auto_destroy(session, &cache->evict_cond));
WT_TRET(__wt_cond_destroy(session, &cache->evict_waiter_cond));
__wt_spin_destroy(session, &cache->evict_lock);
__wt_spin_destroy(session, &cache->evict_walk_lock);
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index 60136a71b99..5019ab59fe3 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -129,7 +129,7 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force)
WT_BTREE *btree;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
- bool evict_reset, marked_dead, no_schema_lock;
+ bool marked_dead, no_schema_lock;
btree = S2BT(session);
bm = btree->bm;
@@ -139,8 +139,8 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force)
if (!F_ISSET(dhandle, WT_DHANDLE_OPEN))
return (0);
- /* Ensure that we aren't racing with the eviction server */
- WT_RET(__wt_evict_file_exclusive_on(session, &evict_reset));
+ /* Turn off eviction. */
+ WT_RET(__wt_evict_file_exclusive_on(session));
/*
* If we don't already have the schema lock, make it an error to try
@@ -176,23 +176,19 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force)
WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) {
if (force && (bm == NULL || !bm->is_mapped(bm, session))) {
F_SET(session->dhandle, WT_DHANDLE_DEAD);
+ marked_dead = true;
- /*
- * Reset the tree's eviction priority, and the tree is
- * evictable by definition.
- */
+ /* Reset the tree's eviction priority (if any). */
__wt_evict_priority_clear(session);
- F_CLR(S2BT(session), WT_BTREE_NO_EVICTION);
-
- marked_dead = true;
}
if (!marked_dead || final)
WT_ERR(__wt_checkpoint_close(session, final));
}
WT_TRET(__wt_btree_close(session));
+
/*
- * If we marked a handle as dead it will be closed by sweep, via
+ * If we marked a handle dead it will be closed by sweep, via
* another call to sync and close.
*/
if (!marked_dead) {
@@ -206,12 +202,11 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force)
err: __wt_spin_unlock(session, &dhandle->close_lock);
- if (evict_reset)
- __wt_evict_file_exclusive_off(session);
-
if (no_schema_lock)
F_CLR(session, WT_SESSION_NO_SCHEMA_LOCK);
+ __wt_evict_file_exclusive_off(session);
+
return (ret);
}
@@ -355,42 +350,52 @@ err: F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
/*
* __conn_btree_apply_internal --
- * Apply a function to the open btree handles.
+ * Apply a function to an open data handle.
*/
static int
__conn_btree_apply_internal(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle,
- int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[])
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
+ const char *cfg[])
{
WT_DECL_RET;
+ bool skip;
+
+ /* Always apply the name function, if supplied. */
+ skip = false;
+ if (name_func != NULL)
+ WT_RET(name_func(session, dhandle->name, &skip));
+
+ /* If there is no file function, don't bother locking the handle */
+ if (file_func == NULL || skip)
+ return (0);
/*
* We need to pull the handle into the session handle cache and make
* sure it's referenced to stop other internal code dropping the handle
* (e.g in LSM when cleaning up obsolete chunks).
*/
- ret = __wt_session_get_btree(session,
- dhandle->name, dhandle->checkpoint, NULL, 0);
- if (ret == 0) {
- WT_SAVE_DHANDLE(session,
- ret = func(session, cfg));
- if (WT_META_TRACKING(session))
- WT_TRET(__wt_meta_track_handle_lock(session, false));
- else
- WT_TRET(__wt_session_release_btree(session));
- } else if (ret == EBUSY)
- ret = __wt_conn_btree_apply_single(session, dhandle->name,
- dhandle->checkpoint, func, cfg);
+ if ((ret = __wt_session_get_btree(session,
+ dhandle->name, dhandle->checkpoint, NULL, 0)) != 0)
+ return (ret == EBUSY ? 0 : ret);
+
+ WT_SAVE_DHANDLE(session, ret = file_func(session, cfg));
+ if (WT_META_TRACKING(session))
+ WT_TRET(__wt_meta_track_handle_lock(session, false));
+ else
+ WT_TRET(__wt_session_release_btree(session));
return (ret);
}
/*
* __wt_conn_btree_apply --
- * Apply a function to all open btree handles apart from the metadata.
+ * Apply a function to all open btree handles with the given URI.
*/
int
-__wt_conn_btree_apply(WT_SESSION_IMPL *session,
- bool apply_checkpoints, const char *uri,
- int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[])
+__wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri,
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
+ const char *cfg[])
{
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
@@ -407,116 +412,27 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session,
if (uri != NULL) {
bucket =
__wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
- TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq)
- if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
- !F_ISSET(dhandle, WT_DHANDLE_DEAD) &&
- strcmp(uri, dhandle->name) == 0 &&
- (apply_checkpoints || dhandle->checkpoint == NULL))
- WT_RET(__conn_btree_apply_internal(
- session, dhandle, func, cfg));
+ TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) {
+ if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
+ F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
+ dhandle->checkpoint != NULL ||
+ strcmp(uri, dhandle->name) != 0)
+ continue;
+ WT_RET(__conn_btree_apply_internal(
+ session, dhandle, file_func, name_func, cfg));
+ }
} else {
- TAILQ_FOREACH(dhandle, &conn->dhqh, q)
- if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
- !F_ISSET(dhandle, WT_DHANDLE_DEAD) &&
- (apply_checkpoints ||
- dhandle->checkpoint == NULL) &&
- WT_PREFIX_MATCH(dhandle->name, "file:") &&
- !WT_IS_METADATA(session, dhandle))
- WT_RET(__conn_btree_apply_internal(
- session, dhandle, func, cfg));
- }
-
- return (0);
-}
-
-/*
- * __wt_conn_btree_apply_single_ckpt --
- * Decode any checkpoint information from the configuration string then
- * call btree apply single.
- */
-int
-__wt_conn_btree_apply_single_ckpt(WT_SESSION_IMPL *session,
- const char *uri,
- int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[])
-{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- const char *checkpoint;
-
- checkpoint = NULL;
-
- /*
- * This function exists to handle checkpoint configuration. Callers
- * that never open a checkpoint call the underlying function directly.
- */
- WT_RET_NOTFOUND_OK(
- __wt_config_gets_def(session, cfg, "checkpoint", 0, &cval));
- if (cval.len != 0) {
- /*
- * The internal checkpoint name is special, find the last
- * unnamed checkpoint of the object.
- */
- if (WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) {
- WT_RET(__wt_meta_checkpoint_last_name(
- session, uri, &checkpoint));
- } else
- WT_RET(__wt_strndup(
- session, cval.str, cval.len, &checkpoint));
- }
-
- ret = __wt_conn_btree_apply_single(session, uri, checkpoint, func, cfg);
-
- __wt_free(session, checkpoint);
-
- return (ret);
-}
-
-/*
- * __wt_conn_btree_apply_single --
- * Apply a function to a single btree handle that couldn't be locked
- * (attempting to get the handle returned EBUSY).
- */
-int
-__wt_conn_btree_apply_single(WT_SESSION_IMPL *session,
- const char *uri, const char *checkpoint,
- int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[])
-{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- uint64_t bucket, hash;
-
- conn = S2C(session);
-
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
-
- hash = __wt_hash_city64(uri, strlen(uri));
- bucket = hash % WT_HASH_ARRAY_SIZE;
- TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq)
- if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
- !F_ISSET(dhandle, WT_DHANDLE_DEAD) &&
- (hash == dhandle->name_hash &&
- strcmp(uri, dhandle->name) == 0) &&
- ((dhandle->checkpoint == NULL && checkpoint == NULL) ||
- (dhandle->checkpoint != NULL && checkpoint != NULL &&
- strcmp(dhandle->checkpoint, checkpoint) == 0))) {
- /*
- * We're holding the handle list lock which locks out
- * handle open (which might change the state of the
- * underlying object). However, closing a handle
- * doesn't require the handle list lock, lock out
- * closing the handle and then confirm the handle is
- * still open.
- */
- __wt_spin_lock(session, &dhandle->close_lock);
- if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
- !F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
- WT_WITH_DHANDLE(session, dhandle,
- ret = func(session, cfg));
- }
- __wt_spin_unlock(session, &dhandle->close_lock);
- WT_RET(ret);
+ TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
+ if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
+ F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
+ dhandle->checkpoint != NULL ||
+ !WT_PREFIX_MATCH(dhandle->name, "file:") ||
+ WT_IS_METADATA(session, dhandle))
+ continue;
+ WT_RET(__conn_btree_apply_internal(
+ session, dhandle, file_func, name_func, cfg));
}
+ }
return (0);
}
diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c
index 12b4e87e921..16717597f4d 100644
--- a/src/conn/conn_handle.c
+++ b/src/conn/conn_handle.c
@@ -56,6 +56,7 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
WT_RET(__wt_rwlock_alloc(session,
&conn->hot_backup_lock, "hot backup"));
WT_RET(__wt_spin_init(session, &conn->las_lock, "lookaside table"));
+ WT_RET(__wt_spin_init(session, &conn->metadata_lock, "metadata"));
WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure"));
WT_RET(__wt_spin_init(session, &conn->schema_lock, "schema"));
WT_RET(__wt_spin_init(session, &conn->table_lock, "table creation"));
@@ -123,7 +124,8 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
* underlying file-close code uses the mutex to guard lists of
* open files.
*/
- WT_TRET(__wt_close(session, &conn->lock_fh));
+ if (conn->lock_fh)
+ WT_TRET(__wt_close(session, &conn->lock_fh));
/* Remove from the list of connections. */
__wt_spin_lock(session, &__wt_process.spinlock);
@@ -143,6 +145,7 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
__wt_spin_destroy(session, &conn->fh_lock);
WT_TRET(__wt_rwlock_destroy(session, &conn->hot_backup_lock));
__wt_spin_destroy(session, &conn->las_lock);
+ __wt_spin_destroy(session, &conn->metadata_lock);
__wt_spin_destroy(session, &conn->reconfig_lock);
__wt_spin_destroy(session, &conn->schema_lock);
__wt_spin_destroy(session, &conn->table_lock);
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index 60f46288072..757d69bf240 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -133,10 +133,17 @@ __logmgr_config(
FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);
WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval));
- if (cval.val != 0)
+ if (cval.val != 0) {
+ if (F_ISSET(conn, WT_CONN_READONLY))
+ WT_RET_MSG(session, EINVAL,
+ "Read-only configuration incompatible with "
+ "zero-filling log files");
FLD_SET(conn->log_flags, WT_CONN_LOG_ZERO_FILL);
+ }
WT_RET(__logmgr_sync_cfg(session, cfg));
+ if (conn->log_cond != NULL)
+ WT_RET(__wt_cond_auto_signal(session, conn->log_cond));
return (0);
}
@@ -463,7 +470,7 @@ __log_file_server(void *arg)
locked = false;
__wt_spin_unlock(session, &log->log_sync_lock);
} else {
- WT_ERR(__wt_cond_signal(
+ WT_ERR(__wt_cond_auto_signal(
session, conn->log_wrlsn_cond));
/*
* We do not want to wait potentially a second
@@ -633,7 +640,7 @@ restart:
if (slot->slot_start_lsn.l.offset !=
slot->slot_last_offset)
slot->slot_start_lsn.l.offset =
- slot->slot_last_offset;
+ (uint32_t)slot->slot_last_offset;
log->write_start_lsn = slot->slot_start_lsn;
log->write_lsn = slot->slot_end_lsn;
WT_ERR(__wt_cond_signal(
@@ -662,31 +669,54 @@ __log_wrlsn_server(void *arg)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ WT_LOG *log;
+ WT_LSN prev;
WT_SESSION_IMPL *session;
int yield;
+ bool did_work;
session = arg;
conn = S2C(session);
+ log = conn->log;
yield = 0;
+ WT_INIT_LSN(&prev);
+ did_work = false;
while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
/*
- * Write out any log record buffers.
+ * Write out any log record buffers if anything was done
+ * since last time. Only call the function to walk the
+ * slots if the system is not idle. On an idle system
+ * the alloc_lsn will not advance and the written lsn will
+ * match the alloc_lsn.
*/
- WT_ERR(__wt_log_wrlsn(session, &yield));
+ if (__wt_log_cmp(&prev, &log->alloc_lsn) != 0 ||
+ __wt_log_cmp(&log->write_lsn, &log->alloc_lsn) != 0)
+ WT_ERR(__wt_log_wrlsn(session, &yield));
+ else
+ WT_STAT_FAST_CONN_INCR(session, log_write_lsn_skip);
+ prev = log->alloc_lsn;
+ if (yield == 0)
+ did_work = true;
+ else
+ did_work = false;
/*
* If __wt_log_wrlsn did work we want to yield instead of sleep.
*/
if (yield++ < WT_THOUSAND)
__wt_yield();
else
- WT_ERR(__wt_cond_wait(
- session, conn->log_wrlsn_cond, 10000));
+ /*
+ * Send in false because if we did any work we would
+ * not be on this path.
+ */
+ WT_ERR(__wt_cond_auto_wait(
+ session, conn->log_wrlsn_cond, did_work));
}
/*
* On close we need to do this one more time because there could
* be straggling log writes that need to be written.
*/
- WT_ERR(__wt_log_force_write(session, 1));
+ WT_ERR(__wt_log_force_write(session, 1, NULL));
WT_ERR(__wt_log_wrlsn(session, NULL));
if (0) {
err: __wt_err(session, ret, "log wrlsn server error");
@@ -701,12 +731,13 @@ err: __wt_err(session, ret, "log wrlsn server error");
static WT_THREAD_RET
__log_server(void *arg)
{
+ struct timespec start, now;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_LOG *log;
WT_SESSION_IMPL *session;
- int freq_per_sec;
- bool locked, signalled;
+ uint64_t timediff;
+ bool did_work, locked, signalled;
session = arg;
conn = S2C(session);
@@ -714,11 +745,10 @@ __log_server(void *arg)
locked = signalled = false;
/*
- * Set this to the number of times per second we want to force out the
- * log slot buffer.
+ * Set this to the number of milliseconds we want to run archive and
+ * pre-allocation. Start it so that we run on the first time through.
*/
-#define WT_FORCE_PER_SECOND 20
- freq_per_sec = WT_FORCE_PER_SECOND;
+ timediff = WT_THOUSAND;
/*
* The log server thread does a variety of work. It forces out any
@@ -731,6 +761,7 @@ __log_server(void *arg)
* don't want log records sitting in the buffer over the time it
* takes to sync out an earlier file.
*/
+ did_work = true;
while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
/*
* Slots depend on future activity. Force out buffered
@@ -739,15 +770,14 @@ __log_server(void *arg)
* and a buffer may need to wait for the write_lsn to advance
* in the case of a synchronous buffer. We end up with a hang.
*/
- WT_ERR_BUSY_OK(__wt_log_force_write(session, 0));
+ WT_ERR_BUSY_OK(__wt_log_force_write(session, 0, &did_work));
/*
* We don't want to archive or pre-allocate files as often as
* we want to force out log buffers. Only do it once per second
* or if the condition was signalled.
*/
- if (--freq_per_sec <= 0 || signalled) {
- freq_per_sec = WT_FORCE_PER_SECOND;
+ if (timediff >= WT_THOUSAND || signalled) {
/*
* Perform log pre-allocation.
@@ -788,8 +818,12 @@ __log_server(void *arg)
}
/* Wait until the next event. */
- WT_ERR(__wt_cond_wait_signal(session, conn->log_cond,
- WT_MILLION / WT_FORCE_PER_SECOND, &signalled));
+
+ WT_ERR(__wt_epoch(session, &start));
+ WT_ERR(__wt_cond_auto_wait_signal(session, conn->log_cond,
+ did_work, &signalled));
+ WT_ERR(__wt_epoch(session, &now));
+ timediff = WT_TIMEDIFF_MS(now, start);
}
if (0) {
@@ -901,8 +935,9 @@ __wt_logmgr_open(WT_SESSION_IMPL *session)
*/
WT_RET(__wt_open_internal_session(conn, "log-wrlsn-server",
false, session_flags, &conn->log_wrlsn_session));
- WT_RET(__wt_cond_alloc(conn->log_wrlsn_session,
- "log write lsn server", false, &conn->log_wrlsn_cond));
+ WT_RET(__wt_cond_auto_alloc(conn->log_wrlsn_session,
+ "log write lsn server", false, 10000, WT_MILLION,
+ &conn->log_wrlsn_cond));
WT_RET(__wt_thread_create(conn->log_wrlsn_session,
&conn->log_wrlsn_tid, __log_wrlsn_server, conn->log_wrlsn_session));
conn->log_wrlsn_tid_set = true;
@@ -916,13 +951,13 @@ __wt_logmgr_open(WT_SESSION_IMPL *session)
if (conn->log_session != NULL) {
WT_ASSERT(session, conn->log_cond != NULL);
WT_ASSERT(session, conn->log_tid_set == true);
- WT_RET(__wt_cond_signal(session, conn->log_cond));
+ WT_RET(__wt_cond_auto_signal(session, conn->log_cond));
} else {
/* The log server gets its own session. */
WT_RET(__wt_open_internal_session(conn,
"log-server", false, session_flags, &conn->log_session));
- WT_RET(__wt_cond_alloc(conn->log_session,
- "log server", false, &conn->log_cond));
+ WT_RET(__wt_cond_auto_alloc(conn->log_session,
+ "log server", false, 50000, WT_MILLION, &conn->log_cond));
/*
* Start the thread.
@@ -958,7 +993,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session)
return (0);
}
if (conn->log_tid_set) {
- WT_TRET(__wt_cond_signal(session, conn->log_cond));
+ WT_TRET(__wt_cond_auto_signal(session, conn->log_cond));
WT_TRET(__wt_thread_join(session, conn->log_tid));
conn->log_tid_set = false;
}
@@ -973,7 +1008,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session)
conn->log_file_session = NULL;
}
if (conn->log_wrlsn_tid_set) {
- WT_TRET(__wt_cond_signal(session, conn->log_wrlsn_cond));
+ WT_TRET(__wt_cond_auto_signal(session, conn->log_wrlsn_cond));
WT_TRET(__wt_thread_join(session, conn->log_wrlsn_tid));
conn->log_wrlsn_tid_set = false;
}
@@ -994,9 +1029,9 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session)
}
/* Destroy the condition variables now that all threads are stopped */
- WT_TRET(__wt_cond_destroy(session, &conn->log_cond));
+ WT_TRET(__wt_cond_auto_destroy(session, &conn->log_cond));
WT_TRET(__wt_cond_destroy(session, &conn->log_file_cond));
- WT_TRET(__wt_cond_destroy(session, &conn->log_wrlsn_cond));
+ WT_TRET(__wt_cond_auto_destroy(session, &conn->log_wrlsn_cond));
WT_TRET(__wt_cond_destroy(session, &conn->log->log_sync_cond));
WT_TRET(__wt_cond_destroy(session, &conn->log->log_write_cond));
diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c
index 58577b4587d..aff422654d7 100644
--- a/src/conn/conn_open.c
+++ b/src/conn/conn_open.c
@@ -210,10 +210,8 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
/*
* If hash arrays were allocated, free them now.
*/
- if (s->dhhash != NULL)
- __wt_free(session, s->dhhash);
- if (s->tablehash != NULL)
- __wt_free(session, s->tablehash);
+ __wt_free(session, s->dhhash);
+ __wt_free(session, s->tablehash);
__wt_free(session, s->hazard);
}
diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c
index 08ad105c725..d6e59a50da5 100644
--- a/src/conn/conn_stat.c
+++ b/src/conn/conn_stat.c
@@ -86,6 +86,11 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
conn->stat_usecs = (uint64_t)cval.val * WT_MILLION;
WT_RET(__wt_config_gets(
+ session, cfg, "statistics_log.json", &cval));
+ if (cval.val != 0)
+ FLD_SET(conn->stat_flags, WT_CONN_STAT_JSON);
+
+ WT_RET(__wt_config_gets(
session, cfg, "statistics_log.on_close", &cval));
if (cval.val != 0)
FLD_SET(conn->stat_flags, WT_CONN_STAT_ON_CLOSE);
@@ -97,6 +102,10 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
if (!*runp && !FLD_ISSET(conn->stat_flags, WT_CONN_STAT_ON_CLOSE))
return (0);
+ /*
+ * If any statistics logging is done, this must not be a read-only
+ * connection.
+ */
WT_RET(__wt_config_gets(session, cfg, "statistics_log.sources", &cval));
WT_RET(__wt_config_subinit(session, &objectconf, &cval));
for (cnt = 0; (ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt)
@@ -132,9 +141,24 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
WT_ERR(__wt_config_gets(session, cfg, "statistics_log.path", &cval));
WT_ERR(__wt_nfilename(session, cval.str, cval.len, &conn->stat_path));
- WT_ERR(__wt_config_gets(
- session, cfg, "statistics_log.timestamp", &cval));
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &conn->stat_format));
+ /*
+ * When using JSON format, use the same timestamp format as MongoDB by
+ * default.
+ */
+ if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON)) {
+ ret = __wt_config_gets(
+ session, &cfg[1], "statistics_log.timestamp", &cval);
+ if (ret == WT_NOTFOUND)
+ WT_ERR(__wt_strdup(
+ session, "%FT%T.000Z", &conn->stat_format));
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+ if (conn->stat_format == NULL) {
+ WT_ERR(__wt_config_gets(
+ session, cfg, "statistics_log.timestamp", &cval));
+ WT_ERR(__wt_strndup(
+ session, cval.str, cval.len, &conn->stat_format));
+ }
err: __stat_sources_free(session, &sources);
return (ret);
@@ -149,22 +173,25 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats)
{
WT_CONNECTION_IMPL *conn;
WT_CURSOR *cursor;
- WT_CURSOR_STAT *cst;
WT_DECL_ITEM(tmp);
WT_DECL_RET;
- int64_t *stats;
- int i;
- const char *desc, *uri;
+ int64_t val;
+ size_t prefixlen;
+ const char *desc, *endprefix, *valstr, *uri;
const char *cfg[] = {
WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL };
+ bool first, groupfirst;
conn = S2C(session);
+ cursor = NULL;
+
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ first = groupfirst = true;
/* Build URI and configuration string. */
if (conn_stats)
uri = "statistics:";
else {
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
WT_ERR(__wt_buf_fmt(session, tmp, "statistics:%s", name));
uri = tmp->data;
}
@@ -175,31 +202,54 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats)
* If we don't find an underlying object, silently ignore it, the object
* may exist only intermittently.
*/
- switch (ret = __wt_curstat_open(session, uri, NULL, cfg, &cursor)) {
- case 0:
- cst = (WT_CURSOR_STAT *)cursor;
- for (stats = cst->stats, i = 0; i < cst->stats_count; ++i) {
- if (conn_stats)
- WT_ERR(__wt_stat_connection_desc(cst, i,
- &desc));
- else
- WT_ERR(__wt_stat_dsrc_desc(cst, i, &desc));
+ if ((ret = __wt_curstat_open(session, uri, NULL, cfg, &cursor)) != 0) {
+ if (ret == EBUSY || ret == ENOENT || ret == WT_NOTFOUND)
+ ret = 0;
+ goto err;
+ }
+
+ if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON)) {
+ WT_ERR(__wt_fprintf(conn->stat_fp,
+ "{\"version\":\"%s\",\"localTime\":\"%s\"",
+ WIREDTIGER_VERSION_STRING, conn->stat_stamp));
+ WT_ERR(__wt_fprintf(conn->stat_fp, ",\"wiredTiger\":{"));
+ while ((ret = cursor->next(cursor)) == 0) {
+ WT_ERR(cursor->get_value(cursor, &desc, &valstr, &val));
+ /* Check if we are starting a new section. */
+ endprefix = strchr(desc, ':');
+ prefixlen = WT_PTRDIFF(endprefix, desc);
+ WT_ASSERT(session, endprefix != NULL);
+ if (first ||
+ tmp->size != prefixlen ||
+ strncmp(desc, tmp->data, tmp->size) != 0) {
+ WT_ERR(__wt_buf_set(
+ session, tmp, desc, prefixlen));
+ WT_ERR(__wt_fprintf(conn->stat_fp,
+ "%s\"%.*s\":{", first ? "" : "},",
+ (int)prefixlen, desc));
+ first = false;
+ groupfirst = true;
+ }
+ WT_ERR(__wt_fprintf(conn->stat_fp,
+ "%s\"%s\":%" PRId64,
+ groupfirst ? "" : ",", endprefix + 2, val));
+ groupfirst = false;
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ WT_ERR(__wt_fprintf(conn->stat_fp, "}}}\n"));
+ } else {
+ while ((ret = cursor->next(cursor)) == 0) {
+ WT_ERR(cursor->get_value(cursor, &desc, &valstr, &val));
WT_ERR(__wt_fprintf(conn->stat_fp,
"%s %" PRId64 " %s %s\n",
- conn->stat_stamp, stats[i], name, desc));
+ conn->stat_stamp, val, name, desc));
}
- WT_ERR(cursor->close(cursor));
- break;
- case EBUSY:
- case ENOENT:
- case WT_NOTFOUND:
- ret = 0;
- break;
- default:
- break;
+ WT_ERR_NOTFOUND_OK(ret);
}
err: __wt_scr_free(session, &tmp);
+ if (cursor != NULL)
+ WT_TRET(cursor->close(cursor));
return (ret);
}
@@ -342,7 +392,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
if (conn->stat_sources != NULL) {
WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_conn_btree_apply(
- session, false, NULL, __statlog_apply, NULL));
+ session, NULL, __statlog_apply, NULL, NULL));
WT_RET(ret);
}
diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c
index 7628076e605..cc0aa5a1322 100644
--- a/src/conn/conn_sweep.c
+++ b/src/conn/conn_sweep.c
@@ -91,9 +91,9 @@ __sweep_expire_one(WT_SESSION_IMPL *session)
goto err;
/*
- * Mark the handle as dead and close the underlying file
- * handle. Closing the handle decrements the open file count,
- * meaning the close loop won't overrun the configured minimum.
+ * Mark the handle dead and close the underlying file handle.
+ * Closing the handle decrements the open file count, meaning the close
+ * loop won't overrun the configured minimum.
*/
ret = __wt_conn_btree_sync_and_close(session, false, true);
@@ -163,7 +163,7 @@ __sweep_discard_trees(WT_SESSION_IMPL *session, u_int *dead_handlesp)
!F_ISSET(dhandle, WT_DHANDLE_DEAD))
continue;
- /* If the handle is marked "dead", flush it from cache. */
+ /* If the handle is marked dead, flush it from cache. */
WT_WITH_DHANDLE(session, dhandle, ret =
__wt_conn_btree_sync_and_close(session, false, false));
diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c
index d7d74da48d4..2fb0c464a76 100644
--- a/src/cursor/cur_backup.c
+++ b/src/cursor/cur_backup.c
@@ -8,12 +8,12 @@
#include "wt_internal.h"
-static int __backup_all(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *);
+static int __backup_all(WT_SESSION_IMPL *);
static int __backup_cleanup_handles(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *);
static int __backup_file_create(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, bool);
-static int __backup_list_all_append(WT_SESSION_IMPL *, const char *[]);
static int __backup_list_append(
WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *);
+static int __backup_list_uri_append(WT_SESSION_IMPL *, const char *, bool *);
static int __backup_start(
WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *[]);
static int __backup_stop(WT_SESSION_IMPL *);
@@ -103,22 +103,22 @@ __wt_curbackup_open(WT_SESSION_IMPL *session,
const char *uri, const char *cfg[], WT_CURSOR **cursorp)
{
WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_notsup, /* get-value */
- __wt_cursor_notsup, /* set-key */
- __wt_cursor_notsup, /* set-value */
- __wt_cursor_notsup, /* compare */
- __wt_cursor_notsup, /* equals */
- __curbackup_next, /* next */
- __wt_cursor_notsup, /* prev */
- __curbackup_reset, /* reset */
- __wt_cursor_notsup, /* search */
- __wt_cursor_notsup, /* search-near */
- __wt_cursor_notsup, /* insert */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup, /* reconfigure */
- __curbackup_close); /* close */
+ __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value_notsup, /* get-value */
+ __wt_cursor_set_key_notsup, /* set-key */
+ __wt_cursor_set_value_notsup, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __curbackup_next, /* next */
+ __wt_cursor_notsup, /* prev */
+ __curbackup_reset, /* reset */
+ __wt_cursor_notsup, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __wt_cursor_notsup, /* insert */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __curbackup_close); /* close */
WT_CURSOR *cursor;
WT_CURSOR_BACKUP *cb;
WT_DECL_RET;
@@ -140,8 +140,9 @@ __wt_curbackup_open(WT_SESSION_IMPL *session,
* Start the backup and fill in the cursor's list. Acquire the schema
* lock, we need a consistent view when creating a copy.
*/
- WT_WITH_SCHEMA_LOCK(session, ret,
- ret = __backup_start(session, cb, cfg));
+ WT_WITH_CHECKPOINT_LOCK(session, ret,
+ WT_WITH_SCHEMA_LOCK(session, ret,
+ ret = __backup_start(session, cb, cfg)));
WT_ERR(ret);
/* __wt_cursor_init is last so we don't have to clean up on error. */
@@ -241,7 +242,7 @@ __backup_start(
if (!target_list) {
WT_ERR(__backup_log_append(session, cb, true));
- WT_ERR(__backup_all(session, cb));
+ WT_ERR(__backup_all(session));
}
/* Add the hot backup and standard WiredTiger files to the list. */
@@ -332,55 +333,14 @@ __backup_stop(WT_SESSION_IMPL *session)
* Backup all objects in the database.
*/
static int
-__backup_all(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
+__backup_all(WT_SESSION_IMPL *session)
{
- WT_CONFIG_ITEM cval;
- WT_CURSOR *cursor;
WT_DECL_RET;
- const char *key, *value;
-
- cursor = NULL;
-
- /* Copy all of the metadata entries to the hot backup file. */
- WT_RET(__wt_metadata_cursor(session, &cursor));
- while ((ret = cursor->next(cursor)) == 0) {
- WT_ERR(cursor->get_key(cursor, &key));
- WT_ERR(cursor->get_value(cursor, &value));
- WT_ERR(__wt_fprintf(cb->bfp, "%s\n%s\n", key, value));
-
- /*
- * While reading the metadata file, check there are no "sources"
- * or "types" which can't support hot backup. This checks for
- * a data source that's non-standard, which can't be backed up,
- * but is also sanity checking: if there's an entry backed by
- * anything other than a file or lsm entry, we're confused.
- */
- if ((ret = __wt_config_getones(
- session, value, "type", &cval)) == 0 &&
- !WT_PREFIX_MATCH_LEN(cval.str, cval.len, "file") &&
- !WT_PREFIX_MATCH_LEN(cval.str, cval.len, "lsm"))
- WT_ERR_MSG(session, ENOTSUP,
- "hot backup is not supported for objects of "
- "type %.*s", (int)cval.len, cval.str);
- WT_ERR_NOTFOUND_OK(ret);
- if ((ret =__wt_config_getones(
- session, value, "source", &cval)) == 0 &&
- !WT_PREFIX_MATCH_LEN(cval.str, cval.len, "file:") &&
- !WT_PREFIX_MATCH_LEN(cval.str, cval.len, "lsm:"))
- WT_ERR_MSG(session, ENOTSUP,
- "hot backup is not supported for objects of "
- "source %.*s", (int)cval.len, cval.str);
- WT_ERR_NOTFOUND_OK(ret);
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- WT_ERR(__wt_metadata_cursor_release(session, &cursor));
/* Build a list of the file objects that need to be copied. */
WT_WITH_HANDLE_LIST_LOCK(session, ret =
- __wt_meta_btree_apply(session, __backup_list_all_append, NULL));
+ __wt_meta_apply_all(session, NULL, __backup_list_uri_append, NULL));
-err: WT_TRET(__wt_metadata_cursor_release(session, &cursor));
return (ret);
}
@@ -430,11 +390,11 @@ __backup_uri(WT_SESSION_IMPL *session,
*/
if (WT_PREFIX_MATCH(uri, "log:")) {
*log_only = !target_list;
- WT_ERR(__wt_backup_list_uri_append(session, uri, NULL));
+ WT_ERR(__backup_list_uri_append(session, uri, NULL));
} else {
*log_only = false;
WT_ERR(__wt_schema_worker(session,
- uri, NULL, __wt_backup_list_uri_append, cfg, 0));
+ uri, NULL, __backup_list_uri_append, cfg, 0));
}
}
WT_ERR_NOTFOUND_OK(ret);
@@ -471,12 +431,12 @@ __wt_backup_file_remove(WT_SESSION_IMPL *session)
}
/*
- * __wt_backup_list_uri_append --
+ * __backup_list_uri_append --
* Append a new file name to the list, allocate space as necessary.
* Called via the schema_worker function.
*/
-int
-__wt_backup_list_uri_append(
+static int
+__backup_list_uri_append(
WT_SESSION_IMPL *session, const char *name, bool *skip)
{
WT_CURSOR_BACKUP *cb;
@@ -485,11 +445,31 @@ __wt_backup_list_uri_append(
cb = session->bkp_cursor;
WT_UNUSED(skip);
+ /*
+ * While reading the metadata file, check there are no data sources
+ * that can't support hot backup. This checks for a data source that's
+ * non-standard, which can't be backed up, but is also sanity checking:
+ * if there's an entry backed by anything other than a file or lsm
+ * entry, we're confused.
+ */
if (WT_PREFIX_MATCH(name, "log:")) {
WT_RET(__backup_log_append(session, cb, false));
return (0);
}
+ if (!WT_PREFIX_MATCH(name, "file:") &&
+ !WT_PREFIX_MATCH(name, "colgroup:") &&
+ !WT_PREFIX_MATCH(name, "index:") &&
+ !WT_PREFIX_MATCH(name, "lsm:") &&
+ !WT_PREFIX_MATCH(name, "table:"))
+ WT_RET_MSG(session, ENOTSUP,
+ "hot backup is not supported for objects of type %s",
+ name);
+
+ /* Ignore the lookaside table. */
+ if (strcmp(name, WT_LAS_URI) == 0)
+ return (0);
+
/* Add the metadata entry to the backup file. */
WT_RET(__wt_metadata_search(session, name, &value));
WT_RET(__wt_fprintf(cb->bfp, "%s\n%s\n", name, value));
@@ -503,34 +483,6 @@ __wt_backup_list_uri_append(
}
/*
- * __backup_list_all_append --
- * Append a new file name to the list, allocate space as necessary.
- * Called via the __wt_meta_btree_apply function.
- */
-static int
-__backup_list_all_append(WT_SESSION_IMPL *session, const char *cfg[])
-{
- WT_CURSOR_BACKUP *cb;
- const char *name;
-
- WT_UNUSED(cfg);
-
- cb = session->bkp_cursor;
- name = session->dhandle->name;
-
- /* Ignore files in the process of being bulk-loaded. */
- if (F_ISSET(S2BT(session), WT_BTREE_BULK))
- return (0);
-
- /* Ignore the lookaside table. */
- if (strcmp(name, WT_LAS_URI) == 0)
- return (0);
-
- /* Add the file to the list of files to be copied. */
- return (__backup_list_append(session, cb, name));
-}
-
-/*
* __backup_list_append --
* Append a new file name to the list, allocate space as necessary.
*/
@@ -541,7 +493,6 @@ __backup_list_append(
WT_CURSOR_BACKUP_ENTRY *p;
WT_DATA_HANDLE *old_dhandle;
WT_DECL_RET;
- bool need_handle;
const char *name;
/* Leave a NULL at the end to mark the end of the list. */
@@ -551,11 +502,26 @@ __backup_list_append(
p[0].name = p[1].name = NULL;
p[0].handle = p[1].handle = NULL;
- need_handle = false;
name = uri;
+
+ /*
+ * If it's a file in the database, get a handle for the underlying
+ * object (this handle blocks schema level operations, for example
+ * WT_SESSION.drop or an LSM file discard after level merging).
+ *
+ * If the handle is busy (e.g., it is being bulk-loaded), silently skip
+ * it. We have a special fake checkpoint in the metadata, and recovery
+ * will recreate an empty file.
+ */
if (WT_PREFIX_MATCH(uri, "file:")) {
- need_handle = true;
name += strlen("file:");
+
+ old_dhandle = session->dhandle;
+ ret = __wt_session_get_btree(session, uri, NULL, NULL, 0);
+ p->handle = session->dhandle;
+ session->dhandle = old_dhandle;
+ if (ret != 0)
+ return (ret == EBUSY ? 0 : ret);
}
/*
@@ -569,20 +535,6 @@ __backup_list_append(
*/
WT_RET(__wt_strdup(session, name, &p->name));
- /*
- * If it's a file in the database, get a handle for the underlying
- * object (this handle blocks schema level operations, for example
- * WT_SESSION.drop or an LSM file discard after level merging).
- */
- if (need_handle) {
- old_dhandle = session->dhandle;
- if ((ret =
- __wt_session_get_btree(session, uri, NULL, NULL, 0)) == 0)
- p->handle = session->dhandle;
- session->dhandle = old_dhandle;
- WT_RET(ret);
- }
-
++cb->list_next;
return (0);
}
diff --git a/src/cursor/cur_config.c b/src/cursor/cur_config.c
index 1b2fec0eb89..e0d270e4245 100644
--- a/src/cursor/cur_config.c
+++ b/src/cursor/cur_config.c
@@ -27,21 +27,21 @@ __wt_curconfig_open(WT_SESSION_IMPL *session,
const char *uri, const char *cfg[], WT_CURSOR **cursorp)
{
WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __wt_cursor_notsup, /* compare */
- __wt_cursor_notsup, /* equals */
- __wt_cursor_notsup, /* next */
- __wt_cursor_notsup, /* prev */
- __wt_cursor_noop, /* reset */
- __wt_cursor_notsup, /* search */
- __wt_cursor_notsup, /* search-near */
- __wt_cursor_notsup, /* insert */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup, /* reconfigure */
+ __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __wt_cursor_notsup, /* next */
+ __wt_cursor_notsup, /* prev */
+ __wt_cursor_noop, /* reset */
+ __wt_cursor_notsup, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __wt_cursor_notsup, /* insert */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
__curconfig_close);
WT_CURSOR_CONFIG *cconfig;
WT_CURSOR *cursor;
diff --git a/src/cursor/cur_ds.c b/src/cursor/cur_ds.c
index 2a598c99523..804c24a3d2e 100644
--- a/src/cursor/cur_ds.c
+++ b/src/cursor/cur_ds.c
@@ -449,22 +449,22 @@ __wt_curds_open(
const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp)
{
WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __curds_compare, /* compare */
- __wt_cursor_equals, /* equals */
- __curds_next, /* next */
- __curds_prev, /* prev */
- __curds_reset, /* reset */
- __curds_search, /* search */
- __curds_search_near, /* search-near */
- __curds_insert, /* insert */
- __curds_update, /* update */
- __curds_remove, /* remove */
- __wt_cursor_notsup, /* reconfigure */
- __curds_close); /* close */
+ __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __curds_compare, /* compare */
+ __wt_cursor_equals, /* equals */
+ __curds_next, /* next */
+ __curds_prev, /* prev */
+ __curds_reset, /* reset */
+ __curds_search, /* search */
+ __curds_search_near, /* search-near */
+ __curds_insert, /* insert */
+ __curds_update, /* update */
+ __curds_remove, /* remove */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __curds_close); /* close */
WT_CONFIG_ITEM cval, metadata;
WT_CURSOR *cursor, *source;
WT_CURSOR_DATA_SOURCE *data_source;
diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c
index 3324efd96cc..a7b1c98871a 100644
--- a/src/cursor/cur_dump.c
+++ b/src/cursor/cur_dump.c
@@ -348,22 +348,22 @@ int
__wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp)
{
WT_CURSOR_STATIC_INIT(iface,
- __curdump_get_key, /* get-key */
- __curdump_get_value, /* get-value */
- __curdump_set_key, /* set-key */
- __curdump_set_value, /* set-value */
- __wt_cursor_notsup, /* compare */
- __wt_cursor_notsup, /* equals */
- __curdump_next, /* next */
- __curdump_prev, /* prev */
- __curdump_reset, /* reset */
- __curdump_search, /* search */
- __curdump_search_near, /* search-near */
- __curdump_insert, /* insert */
- __curdump_update, /* update */
- __curdump_remove, /* remove */
- __wt_cursor_notsup, /* reconfigure */
- __curdump_close); /* close */
+ __curdump_get_key, /* get-key */
+ __curdump_get_value, /* get-value */
+ __curdump_set_key, /* set-key */
+ __curdump_set_value, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __curdump_next, /* next */
+ __curdump_prev, /* prev */
+ __curdump_reset, /* reset */
+ __curdump_search, /* search */
+ __curdump_search_near, /* search-near */
+ __curdump_insert, /* insert */
+ __curdump_update, /* update */
+ __curdump_remove, /* remove */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __curdump_close); /* close */
WT_CURSOR *cursor;
WT_CURSOR_DUMP *cdump;
WT_CURSOR_JSON *json;
diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c
index 8bbe1cc8eda..fac903b4770 100644
--- a/src/cursor/cur_file.c
+++ b/src/cursor/cur_file.c
@@ -397,22 +397,22 @@ __wt_curfile_create(WT_SESSION_IMPL *session,
WT_CURSOR **cursorp)
{
WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __curfile_compare, /* compare */
- __curfile_equals, /* equals */
- __curfile_next, /* next */
- __curfile_prev, /* prev */
- __curfile_reset, /* reset */
- __curfile_search, /* search */
- __curfile_search_near, /* search-near */
- __curfile_insert, /* insert */
- __curfile_update, /* update */
- __curfile_remove, /* remove */
- __wt_cursor_reconfigure, /* reconfigure */
- __curfile_close); /* close */
+ __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __curfile_compare, /* compare */
+ __curfile_equals, /* equals */
+ __curfile_next, /* next */
+ __curfile_prev, /* prev */
+ __curfile_reset, /* reset */
+ __curfile_search, /* search */
+ __curfile_search_near, /* search-near */
+ __curfile_insert, /* insert */
+ __curfile_update, /* update */
+ __curfile_remove, /* remove */
+ __wt_cursor_reconfigure, /* reconfigure */
+ __curfile_close); /* close */
WT_BTREE *btree;
WT_CONFIG_ITEM cval;
WT_CURSOR *cursor;
diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c
index 6822055131a..dbe8046ca21 100644
--- a/src/cursor/cur_index.c
+++ b/src/cursor/cur_index.c
@@ -386,22 +386,22 @@ __wt_curindex_open(WT_SESSION_IMPL *session,
const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
{
WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __curindex_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __curindex_set_value, /* set-value */
- __curindex_compare, /* compare */
- __wt_cursor_equals, /* equals */
- __curindex_next, /* next */
- __curindex_prev, /* prev */
- __curindex_reset, /* reset */
- __curindex_search, /* search */
- __curindex_search_near, /* search-near */
- __wt_cursor_notsup, /* insert */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup, /* reconfigure */
- __curindex_close); /* close */
+ __wt_cursor_get_key, /* get-key */
+ __curindex_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __curindex_set_value, /* set-value */
+ __curindex_compare, /* compare */
+ __wt_cursor_equals, /* equals */
+ __curindex_next, /* next */
+ __curindex_prev, /* prev */
+ __curindex_reset, /* reset */
+ __curindex_search, /* search */
+ __curindex_search_near, /* search-near */
+ __wt_cursor_notsup, /* insert */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __curindex_close); /* close */
WT_CURSOR_INDEX *cindex;
WT_CURSOR *cursor;
WT_DECL_ITEM(tmp);
diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c
index 2cbefa68c5e..38a83217933 100644
--- a/src/cursor/cur_join.c
+++ b/src/cursor/cur_join.c
@@ -8,6 +8,9 @@
#include "wt_internal.h"
+static int __curjoin_insert_endpoint(WT_SESSION_IMPL *,
+ WT_CURSOR_JOIN_ENTRY *, u_int, WT_CURSOR_JOIN_ENDPOINT **);
+
/*
* __curjoin_entry_iter_init --
* Initialize an iteration for the index managed by a join entry.
@@ -17,49 +20,56 @@ static int
__curjoin_entry_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ITER **iterp)
{
- WT_CURSOR *newcur;
WT_CURSOR *to_dup;
WT_DECL_RET;
const char *raw_cfg[] = { WT_CONFIG_BASE(
session, WT_SESSION_open_cursor), "raw", NULL };
const char *def_cfg[] = { WT_CONFIG_BASE(
session, WT_SESSION_open_cursor), NULL };
- const char *uri, **config;
- char *uribuf;
+ const char *urimain, **config;
+ char *mainbuf, *uri;
WT_CURSOR_JOIN_ITER *iter;
size_t size;
iter = NULL;
- uribuf = NULL;
+ mainbuf = uri = NULL;
to_dup = entry->ends[0].cursor;
- uri = to_dup->uri;
if (F_ISSET((WT_CURSOR *)cjoin, WT_CURSTD_RAW))
config = &raw_cfg[0];
else
config = &def_cfg[0];
+ size = strlen(to_dup->internal_uri) + 3;
+ WT_ERR(__wt_calloc(session, size, 1, &uri));
+ snprintf(uri, size, "%s()", to_dup->internal_uri);
+ urimain = cjoin->table->name;
if (cjoin->projection != NULL) {
- size = strlen(uri) + strlen(cjoin->projection) + 1;
- WT_ERR(__wt_calloc(session, size, 1, &uribuf));
- snprintf(uribuf, size, "%s%s", uri, cjoin->projection);
- uri = uribuf;
+ size = strlen(urimain) + strlen(cjoin->projection) + 1;
+ WT_ERR(__wt_calloc(session, size, 1, &mainbuf));
+ snprintf(mainbuf, size, "%s%s", urimain, cjoin->projection);
+ urimain = mainbuf;
}
- WT_ERR(__wt_open_cursor(session, uri, (WT_CURSOR *)cjoin, config,
- &newcur));
- WT_ERR(__wt_cursor_dup_position(to_dup, newcur));
+
WT_ERR(__wt_calloc_one(session, &iter));
+ WT_ERR(__wt_open_cursor(session, uri, (WT_CURSOR *)cjoin, config,
+ &iter->cursor));
+ WT_ERR(__wt_cursor_dup_position(to_dup, iter->cursor));
+ WT_ERR(__wt_open_cursor(session, urimain, (WT_CURSOR *)cjoin, config,
+ &iter->main));
iter->cjoin = cjoin;
iter->session = session;
iter->entry = entry;
- iter->cursor = newcur;
- iter->advance = false;
+ iter->positioned = false;
+ iter->isequal = (entry->ends_next == 1 &&
+ WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_EQ);
*iterp = iter;
if (0) {
err: __wt_free(session, iter);
}
- __wt_free(session, uribuf);
+ __wt_free(session, mainbuf);
+ __wt_free(session, uri);
return (ret);
}
@@ -72,18 +82,70 @@ static int
__curjoin_pack_recno(WT_SESSION_IMPL *session, uint64_t r, uint8_t *buf,
size_t bufsize, WT_ITEM *item)
{
- WT_DECL_RET;
WT_SESSION *wtsession;
size_t sz;
wtsession = (WT_SESSION *)session;
- WT_ERR(wiredtiger_struct_size(wtsession, &sz, "r", r));
+ WT_RET(wiredtiger_struct_size(wtsession, &sz, "r", r));
WT_ASSERT(session, sz < bufsize);
- WT_ERR(wiredtiger_struct_pack(wtsession, buf, bufsize, "r", r));
+ WT_RET(wiredtiger_struct_pack(wtsession, buf, bufsize, "r", r));
item->size = sz;
item->data = buf;
+ return (0);
+}
+
+/*
+ * __curjoin_split_key --
+ * Copy the primary key from a cursor (either main table or index)
+ * to another cursor. When copying from an index file, the index
+ * key is also returned.
+ *
+ */
+static int
+__curjoin_split_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
+ WT_ITEM *idxkey, WT_CURSOR *tocur, WT_CURSOR *fromcur,
+ const char *repack_fmt, bool isindex)
+{
+ WT_CURSOR *firstcg_cur;
+ WT_CURSOR_INDEX *cindex;
+ WT_ITEM *keyp;
+ const uint8_t *p;
-err: return (ret);
+ if (isindex) {
+ cindex = ((WT_CURSOR_INDEX *)fromcur);
+ /*
+ * Repack tells us where the index key ends; advance past
+ * that to get where the raw primary key starts.
+ */
+ WT_RET(__wt_struct_repack(session, cindex->child->key_format,
+ repack_fmt != NULL ? repack_fmt : cindex->iface.key_format,
+ &cindex->child->key, idxkey));
+ WT_ASSERT(session, cindex->child->key.size > idxkey->size);
+ tocur->key.data = (uint8_t *)idxkey->data + idxkey->size;
+ tocur->key.size = cindex->child->key.size - idxkey->size;
+ if (WT_CURSOR_RECNO(tocur)) {
+ p = (const uint8_t *)tocur->key.data;
+ WT_RET(__wt_vunpack_uint(&p, tocur->key.size,
+ &tocur->recno));
+ } else
+ tocur->recno = 0;
+ } else {
+ firstcg_cur = ((WT_CURSOR_TABLE *)fromcur)->cg_cursors[0];
+ keyp = &firstcg_cur->key;
+ if (WT_CURSOR_RECNO(tocur)) {
+ WT_ASSERT(session, keyp->size == sizeof(uint64_t));
+ tocur->recno = *(uint64_t *)keyp->data;
+ WT_RET(__curjoin_pack_recno(session, tocur->recno,
+ cjoin->recno_buf, sizeof(cjoin->recno_buf),
+ &tocur->key));
+ } else {
+ WT_ITEM_SET(tocur->key, *keyp);
+ tocur->recno = 0;
+ }
+ idxkey->data = NULL;
+ idxkey->size = 0;
+ }
+ return (0);
}
/*
@@ -92,45 +154,24 @@ err: return (ret);
*
*/
static int
-__curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_ITEM *primkey,
- uint64_t *rp)
+__curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_CURSOR *cursor)
{
- WT_CURSOR *firstcg_cur;
- WT_CURSOR_JOIN *cjoin;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t r;
-
- if (iter->advance)
- WT_ERR(iter->cursor->next(iter->cursor));
+ if (iter->positioned)
+ WT_RET(iter->cursor->next(iter->cursor));
else
- iter->advance = true;
-
- session = iter->session;
- cjoin = iter->cjoin;
+ iter->positioned = true;
/*
* Set our key to the primary key, we'll also need this
* to check membership.
*/
- if (iter->entry->index != NULL)
- firstcg_cur = ((WT_CURSOR_INDEX *)iter->cursor)->cg_cursors[0];
- else
- firstcg_cur = ((WT_CURSOR_TABLE *)iter->cursor)->cg_cursors[0];
- if (WT_CURSOR_RECNO(&cjoin->iface)) {
- r = *(uint64_t *)firstcg_cur->key.data;
- WT_ERR(__curjoin_pack_recno(session, r, cjoin->recno_buf,
- sizeof(cjoin->recno_buf), primkey));
- *rp = r;
- } else {
- WT_ITEM_SET(*primkey, firstcg_cur->key);
- *rp = 0;
- }
- iter->curkey = primkey;
+ WT_RET(__curjoin_split_key(iter->session, iter->cjoin, &iter->idxkey,
+ cursor, iter->cursor, iter->entry->repack_format,
+ iter->entry->index != NULL));
+ iter->curkey = &cursor->key;
iter->entry->stats.actual_count++;
iter->entry->stats.accesses++;
-
-err: return (ret);
+ return (0);
}
/*
@@ -141,17 +182,15 @@ err: return (ret);
static int
__curjoin_entry_iter_reset(WT_CURSOR_JOIN_ITER *iter)
{
- WT_DECL_RET;
-
- if (iter->advance) {
- WT_ERR(iter->cursor->reset(iter->cursor));
- WT_ERR(__wt_cursor_dup_position(
+ if (iter->positioned) {
+ WT_RET(iter->cursor->reset(iter->cursor));
+ WT_RET(iter->main->reset(iter->main));
+ WT_RET(__wt_cursor_dup_position(
iter->cjoin->entries[0].ends[0].cursor, iter->cursor));
- iter->advance = false;
+ iter->positioned = false;
iter->entry->stats.actual_count = 0;
}
-
-err: return (ret);
+ return (0);
}
/*
@@ -162,7 +201,7 @@ err: return (ret);
static bool
__curjoin_entry_iter_ready(WT_CURSOR_JOIN_ITER *iter)
{
- return (iter->advance);
+ return (iter->positioned);
}
/*
@@ -177,6 +216,8 @@ __curjoin_entry_iter_close(WT_CURSOR_JOIN_ITER *iter)
if (iter->cursor != NULL)
WT_TRET(iter->cursor->close(iter->cursor));
+ if (iter->main != NULL)
+ WT_TRET(iter->main->close(iter->main));
__wt_free(iter->session, iter);
return (ret);
@@ -232,10 +273,8 @@ __curjoin_get_value(WT_CURSOR *cursor, ...)
!__curjoin_entry_iter_ready(iter))
WT_ERR_MSG(session, EINVAL,
"join cursor must be advanced with next()");
- if (iter->entry->index != NULL)
- WT_ERR(__wt_curindex_get_valuev(iter->cursor, ap));
- else
- WT_ERR(__wt_curtable_get_valuev(iter->cursor, ap));
+
+ WT_ERR(__wt_curtable_get_valuev(iter->main, ap));
err: va_end(ap);
API_END_RET(session, ret);
@@ -251,43 +290,26 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
{
WT_COLLATOR *collator;
WT_CURSOR *c;
- WT_CURSOR_INDEX *cindex;
WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
WT_DECL_RET;
WT_DECL_ITEM(uribuf);
- WT_ITEM curkey, curvalue, *k;
- WT_TABLE *maintable;
+ WT_ITEM curkey, curvalue;
const char *raw_cfg[] = { WT_CONFIG_BASE(
session, WT_SESSION_open_cursor), "raw", NULL };
- const char *mainkey_str, *p;
- void *allocbuf;
- size_t mainkey_len, size;
- u_int i;
+ const char *uri;
+ size_t size;
int cmp, skip;
c = NULL;
- allocbuf = NULL;
skip = 0;
- if (entry->index != NULL) {
+ if (entry->index != NULL)
/*
- * Open a cursor having a projection of the keys of the
- * index we're comparing against. Open it raw, we're
- * going to compare it to the raw keys of the
- * reference cursors.
+ * Open the raw index. We're avoiding any references
+ * to the main table, they may be expensive.
*/
- maintable = ((WT_CURSOR_TABLE *)entry->main)->table;
- mainkey_str = maintable->colconf.str + 1;
- for (p = mainkey_str, i = 0;
- p != NULL && i < maintable->nkey_columns; i++)
- p = strchr(p + 1, ',');
- WT_ASSERT(session, p != 0);
- mainkey_len = WT_PTRDIFF(p, mainkey_str);
- size = strlen(entry->index->name) + mainkey_len + 3;
- WT_ERR(__wt_scr_alloc(session, size, &uribuf));
- WT_ERR(__wt_buf_fmt(session, uribuf, "%s(%.*s)",
- entry->index->name, (int)mainkey_len, mainkey_str));
- } else {
+ uri = entry->index->source;
+ else {
/*
* For joins on the main table, we just need the primary
* key for comparison, we don't need any values.
@@ -296,35 +318,38 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
WT_ERR(__wt_scr_alloc(session, size, &uribuf));
WT_ERR(__wt_buf_fmt(session, uribuf, "%s()",
cjoin->table->name));
+ uri = uribuf->data;
}
- WT_ERR(__wt_open_cursor(
- session, uribuf->data, &cjoin->iface, raw_cfg, &c));
+ WT_ERR(__wt_open_cursor(session, uri, &cjoin->iface, raw_cfg, &c));
/* Initially position the cursor if necessary. */
endmax = &entry->ends[entry->ends_next];
- if ((end = &entry->ends[0]) < endmax &&
- F_ISSET(end, WT_CURJOIN_END_GE)) {
- WT_ERR(__wt_cursor_dup_position(end->cursor, c));
- if (end->flags == WT_CURJOIN_END_GE)
- skip = 1;
+ if ((end = &entry->ends[0]) < endmax) {
+ if (F_ISSET(end, WT_CURJOIN_END_GT) ||
+ WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ) {
+ WT_ERR(__wt_cursor_dup_position(end->cursor, c));
+ if (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_GE)
+ skip = 1;
+ } else if (F_ISSET(end, WT_CURJOIN_END_LT)) {
+ if ((ret = c->next(c)) == WT_NOTFOUND)
+ goto done;
+ WT_ERR(ret);
+ } else
+ WT_ERR(__wt_illegal_value(session, NULL));
}
collator = (entry->index == NULL) ? NULL : entry->index->collator;
while (ret == 0) {
WT_ERR(c->get_key(c, &curkey));
if (entry->index != NULL) {
- cindex = (WT_CURSOR_INDEX *)c;
- if (cindex->index->extractor == NULL) {
- /*
- * Repack so it's comparable to the
- * reference endpoints.
- */
- k = &cindex->child->key;
- WT_ERR(__wt_struct_repack(session,
- cindex->child->key_format,
- entry->main->value_format, k, &curkey,
- &allocbuf));
- } else
- curkey = cindex->child->key;
+ /*
+ * Repack so it's comparable to the
+ * reference endpoints.
+ */
+ WT_ERR(__wt_struct_repack(session,
+ c->key_format,
+ (entry->repack_format != NULL ?
+ entry->repack_format : entry->index->idxkey_format),
+ &c->key, &curkey));
}
for (end = &entry->ends[skip]; end < endmax; end++) {
WT_ERR(__wt_compare(session, collator, &curkey,
@@ -345,8 +370,12 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
goto done;
}
}
- if (entry->index != NULL)
- WT_ERR(c->get_value(c, &curvalue));
+ if (entry->index != NULL) {
+ curvalue.data =
+ (unsigned char *)curkey.data + curkey.size;
+ WT_ASSERT(session, c->key.size > curkey.size);
+ curvalue.size = c->key.size - curkey.size;
+ }
else
WT_ERR(c->get_key(c, &curvalue));
WT_ERR(__wt_bloom_insert(bloom, &curvalue));
@@ -361,7 +390,6 @@ done:
err: if (c != NULL)
WT_TRET(c->close(c));
__wt_scr_free(session, &uribuf);
- __wt_free(session, allocbuf);
return (ret);
}
@@ -375,27 +403,23 @@ __curjoin_endpoint_init_key(WT_SESSION_IMPL *session,
{
WT_CURSOR *cursor;
WT_CURSOR_INDEX *cindex;
- WT_DECL_RET;
WT_ITEM *k;
uint64_t r;
- void *allocbuf;
- allocbuf = NULL;
if ((cursor = endpoint->cursor) != NULL) {
if (entry->index != NULL) {
/* Extract and save the index's logical key. */
cindex = (WT_CURSOR_INDEX *)endpoint->cursor;
- WT_ERR(__wt_struct_repack(session,
+ WT_RET(__wt_struct_repack(session,
cindex->child->key_format,
- cindex->iface.key_format,
- &cindex->child->key, &endpoint->key, &allocbuf));
- if (allocbuf != NULL)
- F_SET(endpoint, WT_CURJOIN_END_OWN_KEY);
+ (entry->repack_format != NULL ?
+ entry->repack_format : cindex->iface.key_format),
+ &cindex->child->key, &endpoint->key));
} else {
k = &((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]->key;
if (WT_CURSOR_RECNO(cursor)) {
r = *(uint64_t *)k->data;
- WT_ERR(__curjoin_pack_recno(session, r,
+ WT_RET(__curjoin_pack_recno(session, r,
endpoint->recno_buf,
sizeof(endpoint->recno_buf),
&endpoint->key));
@@ -404,10 +428,7 @@ __curjoin_endpoint_init_key(WT_SESSION_IMPL *session,
endpoint->key = *k;
}
}
- if (0) {
-err: __wt_free(session, allocbuf);
- }
- return (ret);
+ return (0);
}
/*
@@ -419,8 +440,13 @@ __curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin)
{
WT_BLOOM *bloom;
WT_DECL_RET;
+ WT_CURSOR *origcur;
WT_CURSOR_JOIN_ENTRY *je, *jeend, *je2;
WT_CURSOR_JOIN_ENDPOINT *end;
+ const char *def_cfg[] = { WT_CONFIG_BASE(
+ session, WT_SESSION_open_cursor), NULL };
+ const char *raw_cfg[] = { WT_CONFIG_BASE(
+ session, WT_SESSION_open_cursor), "raw", NULL };
uint32_t f, k;
if (cjoin->entries_next == 0)
@@ -429,9 +455,27 @@ __curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin)
"cursors");
je = &cjoin->entries[0];
+ jeend = &cjoin->entries[cjoin->entries_next];
+
+ /*
+ * For a single compare=le endpoint in the first iterated entry,
+ * construct a companion compare=ge endpoint that will actually
+ * be iterated.
+ */
+ if (((je = cjoin->entries) != jeend) &&
+ je->ends_next == 1 && F_ISSET(&je->ends[0], WT_CURJOIN_END_LT)) {
+ origcur = je->ends[0].cursor;
+ WT_RET(__curjoin_insert_endpoint(session, je, 0, &end));
+ WT_RET(__wt_open_cursor(session, origcur->uri,
+ (WT_CURSOR *)cjoin,
+ F_ISSET(origcur, WT_CURSTD_RAW) ? raw_cfg : def_cfg,
+ &end->cursor));
+ WT_RET(end->cursor->next(end->cursor));
+ end->flags = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ |
+ WT_CURJOIN_END_OWN_CURSOR;
+ }
WT_RET(__curjoin_entry_iter_init(session, cjoin, je, &cjoin->iter));
- jeend = &cjoin->entries[cjoin->entries_next];
for (je = cjoin->entries; je < jeend; je++) {
__wt_stat_join_init_single(&je->stats);
for (end = &je->ends[0]; end < &je->ends[je->ends_next];
@@ -449,6 +493,10 @@ __curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin)
F_SET(cjoin, WT_CURJOIN_SKIP_FIRST_LEFT);
if (F_ISSET(je, WT_CURJOIN_ENTRY_BLOOM)) {
+ if (session->txn.isolation == WT_ISO_READ_UNCOMMITTED)
+ WT_RET_MSG(session, EINVAL,
+ "join cursors with Bloom filters cannot be "
+ "used with read-uncommitted isolation");
if (je->bloom == NULL) {
/*
* Look for compatible filters to be shared,
@@ -520,35 +568,34 @@ __curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
{
WT_COLLATOR *collator;
WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
- WT_DECL_RET;
int cmp;
collator = (entry->index != NULL) ? entry->index->collator : NULL;
endmax = &entry->ends[entry->ends_next];
for (end = &entry->ends[skip_left ? 1 : 0]; end < endmax; end++) {
- WT_ERR(__wt_compare(session, collator, curkey, &end->key,
+ WT_RET(__wt_compare(session, collator, curkey, &end->key,
&cmp));
if (!F_ISSET(end, WT_CURJOIN_END_LT)) {
if (cmp < 0 ||
(cmp == 0 &&
!F_ISSET(end, WT_CURJOIN_END_EQ)) ||
(cmp > 0 && !F_ISSET(end, WT_CURJOIN_END_GT)))
- WT_ERR(WT_NOTFOUND);
+ WT_RET(WT_NOTFOUND);
} else {
if (cmp > 0 ||
(cmp == 0 &&
!F_ISSET(end, WT_CURJOIN_END_EQ)) ||
(cmp < 0 && !F_ISSET(end, WT_CURJOIN_END_LT)))
- WT_ERR(WT_NOTFOUND);
+ WT_RET(WT_NOTFOUND);
}
}
-err: return (ret);
+ return (0);
}
typedef struct {
WT_CURSOR iface;
WT_CURSOR_JOIN_ENTRY *entry;
- int ismember;
+ bool ismember;
} WT_CURJOIN_EXTRACTOR;
/*
@@ -584,8 +631,8 @@ __curjoin_extract_insert(WT_CURSOR *cursor) {
ret = __curjoin_entry_in_range(session, cextract->entry, &ikey, false);
if (ret == WT_NOTFOUND)
ret = 0;
- else
- cextract->ismember = 1;
+ else if (ret == 0)
+ cextract->ismember = true;
return (ret);
}
@@ -602,27 +649,29 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
WT_CURJOIN_EXTRACTOR extract_cursor;
WT_CURSOR *c;
WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __wt_cursor_notsup, /* compare */
- __wt_cursor_notsup, /* equals */
- __wt_cursor_notsup, /* next */
- __wt_cursor_notsup, /* prev */
- __wt_cursor_notsup, /* reset */
- __wt_cursor_notsup, /* search */
- __wt_cursor_notsup, /* search-near */
- __curjoin_extract_insert, /* insert */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* reconfigure */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup); /* close */
+ __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __wt_cursor_notsup, /* next */
+ __wt_cursor_notsup, /* prev */
+ __wt_cursor_notsup, /* reset */
+ __wt_cursor_notsup, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __curjoin_extract_insert, /* insert */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup); /* close */
WT_DECL_RET;
WT_INDEX *idx;
WT_ITEM *key, v;
bool bloom_found;
+ if (skip_left && entry->ends_next == 1)
+ return (0); /* no checks to make */
key = cjoin->iter->curkey;
entry->stats.accesses++;
bloom_found = false;
@@ -645,24 +694,35 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
bloom_found = true;
}
if (entry->index != NULL) {
- memset(&v, 0, sizeof(v)); /* Keep lint quiet. */
- c = entry->main;
- c->set_key(c, key);
- if ((ret = c->search(c)) == 0)
- ret = c->get_value(c, &v);
- else if (ret == WT_NOTFOUND)
- WT_ERR_MSG(session, WT_ERROR,
- "main table for join is missing entry.");
- WT_TRET(c->reset(c));
- WT_ERR(ret);
+ /*
+ * If this entry is used by the iterator, then we already
+ * have the index key, and we won't have to do any extraction
+ * either.
+ */
+ if (entry == cjoin->iter->entry)
+ WT_ITEM_SET(v, cjoin->iter->idxkey);
+ else {
+ memset(&v, 0, sizeof(v)); /* Keep lint quiet. */
+ c = entry->main;
+ c->set_key(c, key);
+ if ((ret = c->search(c)) == 0)
+ ret = c->get_value(c, &v);
+ else if (ret == WT_NOTFOUND)
+ WT_ERR_MSG(session, WT_ERROR,
+ "main table for join is missing entry");
+ WT_TRET(c->reset(c));
+ WT_ERR(ret);
+ }
} else
- v = *key;
+ WT_ITEM_SET(v, *key);
- if ((idx = entry->index) != NULL && idx->extractor != NULL) {
+ if ((idx = entry->index) != NULL && idx->extractor != NULL &&
+ entry != cjoin->iter->entry) {
+ WT_CLEAR(extract_cursor);
extract_cursor.iface = iface;
extract_cursor.iface.session = &session->iface;
extract_cursor.iface.key_format = idx->exkey_format;
- extract_cursor.ismember = 0;
+ extract_cursor.ismember = false;
extract_cursor.entry = entry;
WT_ERR(idx->extractor->extract(idx->extractor,
&session->iface, key, &v, &extract_cursor.iface));
@@ -685,7 +745,9 @@ err: if (ret == WT_NOTFOUND && bloom_found)
static int
__curjoin_next(WT_CURSOR *cursor)
{
+ WT_CURSOR *c;
WT_CURSOR_JOIN *cjoin;
+ WT_CURSOR_JOIN_ITER *iter;
WT_DECL_RET;
WT_SESSION_IMPL *session;
bool skip_left;
@@ -701,9 +763,11 @@ __curjoin_next(WT_CURSOR *cursor)
if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED))
WT_ERR(__curjoin_init_iter(session, cjoin));
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ iter = cjoin->iter;
+
nextkey:
- if ((ret = __curjoin_entry_iter_next(cjoin->iter, &cursor->key,
- &cursor->recno)) == 0) {
+ if ((ret = __curjoin_entry_iter_next(iter, cursor)) == 0) {
F_SET(cursor, WT_CURSTD_KEY_EXT);
/*
@@ -715,11 +779,31 @@ nextkey:
for (i = 0; i < cjoin->entries_next; i++) {
ret = __curjoin_entry_member(session, cjoin,
&cjoin->entries[i], skip_left);
- if (ret == WT_NOTFOUND)
+ if (ret == WT_NOTFOUND) {
+ /*
+ * If this is compare=eq on our outer iterator,
+ * and we've moved past it, we're done.
+ */
+ if (iter->isequal && i == 0)
+ break;
goto nextkey;
+ }
skip_left = false;
WT_ERR(ret);
}
+ } else if (ret != WT_NOTFOUND)
+ WT_ERR(ret);
+
+ if (ret == 0) {
+ /*
+ * Position the 'main' cursor, this will be used to
+ * retrieve values from the cursor join.
+ */
+ c = iter->main;
+ c->set_key(c, iter->curkey);
+ if ((ret = c->search(c)) != 0)
+ WT_ERR(c->search(c));
+ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
}
if (0) {
@@ -785,10 +869,11 @@ __curjoin_close(WT_CURSOR *cursor)
for (end = &entry->ends[0];
end < &entry->ends[entry->ends_next]; end++) {
F_CLR(end->cursor, WT_CURSTD_JOINED);
- if (F_ISSET(end, WT_CURJOIN_END_OWN_KEY))
- __wt_free(session, end->key.data);
+ if (F_ISSET(end, WT_CURJOIN_END_OWN_CURSOR))
+ WT_TRET(end->cursor->close(end->cursor));
}
__wt_free(session, entry->ends);
+ __wt_free(session, entry->repack_format);
}
if (cjoin->iter != NULL)
@@ -810,22 +895,22 @@ __wt_curjoin_open(WT_SESSION_IMPL *session,
const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
{
WT_CURSOR_STATIC_INIT(iface,
- __curjoin_get_key, /* get-key */
- __curjoin_get_value, /* get-value */
- __wt_cursor_notsup, /* set-key */
- __wt_cursor_notsup, /* set-value */
- __wt_cursor_notsup, /* compare */
- __wt_cursor_notsup, /* equals */
- __curjoin_next, /* next */
- __wt_cursor_notsup, /* prev */
- __curjoin_reset, /* reset */
- __wt_cursor_notsup, /* search */
- __wt_cursor_notsup, /* search-near */
- __wt_cursor_notsup, /* insert */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup, /* reconfigure */
- __curjoin_close); /* close */
+ __curjoin_get_key, /* get-key */
+ __curjoin_get_value, /* get-value */
+ __wt_cursor_set_key_notsup, /* set-key */
+ __wt_cursor_set_value_notsup, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __curjoin_next, /* next */
+ __wt_cursor_notsup, /* prev */
+ __curjoin_reset, /* reset */
+ __wt_cursor_notsup, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __wt_cursor_notsup, /* insert */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __curjoin_close); /* close */
WT_CURSOR *cursor;
WT_CURSOR_JOIN *cjoin;
WT_DECL_ITEM(tmp);
@@ -891,22 +976,22 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
WT_INDEX *idx, WT_CURSOR *ref_cursor, uint8_t flags, uint8_t range,
uint64_t count, uint32_t bloom_bit_count, uint32_t bloom_hash_count)
{
+ WT_CURSOR_INDEX *cindex;
+ WT_CURSOR_JOIN_ENDPOINT *end;
WT_CURSOR_JOIN_ENTRY *entry;
WT_DECL_RET;
- WT_CURSOR_JOIN_ENDPOINT *end, *newend;
bool hasins, needbloom, range_eq;
- u_int i, ins, nonbloom;
+ char *main_uri, *newformat;
const char *raw_cfg[] = { WT_CONFIG_BASE(
session, WT_SESSION_open_cursor), "raw", NULL };
- char *main_uri;
- size_t namesize, newsize;
+ size_t len, newsize;
+ u_int i, ins, nonbloom;
entry = NULL;
hasins = needbloom = false;
ins = 0; /* -Wuninitialized */
main_uri = NULL;
nonbloom = 0; /* -Wuninitialized */
- namesize = strlen(cjoin->table->name);
for (i = 0; i < cjoin->entries_next; i++) {
if (cjoin->entries[i].index == idx) {
@@ -982,13 +1067,13 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
((range & WT_CURJOIN_END_GT) != 0 || range_eq)) ||
(F_ISSET(end, WT_CURJOIN_END_LT) &&
((range & WT_CURJOIN_END_LT) != 0 || range_eq)) ||
- (end->flags == WT_CURJOIN_END_EQ &&
+ (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ &&
(range & (WT_CURJOIN_END_LT | WT_CURJOIN_END_GT))
!= 0))
WT_ERR_MSG(session, EINVAL,
"join has overlapping ranges");
if (range == WT_CURJOIN_END_EQ &&
- end->flags == WT_CURJOIN_END_EQ &&
+ WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ &&
!F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION))
WT_ERR_MSG(session, EINVAL,
"compare=eq can only be combined "
@@ -1013,31 +1098,70 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
entry->bloom_hash_count =
WT_MAX(entry->bloom_hash_count, bloom_hash_count);
}
- WT_ERR(__wt_realloc_def(session, &entry->ends_allocated,
- entry->ends_next + 1, &entry->ends));
- if (!hasins)
- ins = entry->ends_next;
- newend = &entry->ends[ins];
- memmove(newend + 1, newend,
- (entry->ends_next - ins) * sizeof(WT_CURSOR_JOIN_ENDPOINT));
- memset(newend, 0, sizeof(WT_CURSOR_JOIN_ENDPOINT));
- entry->ends_next++;
- newend->cursor = ref_cursor;
- F_SET(newend, range);
+ WT_ERR(__curjoin_insert_endpoint(session, entry,
+ hasins ? ins : entry->ends_next, &end));
+ end->cursor = ref_cursor;
+ F_SET(end, range);
/* Open the main file with a projection of the indexed columns. */
- if (entry->main == NULL && entry->index != NULL) {
- namesize = strlen(cjoin->table->name);
- newsize = namesize + entry->index->colconf.len + 1;
+ if (entry->main == NULL && idx != NULL) {
+ newsize = strlen(cjoin->table->name) + idx->colconf.len + 1;
WT_ERR(__wt_calloc(session, 1, newsize, &main_uri));
snprintf(main_uri, newsize, "%s%.*s",
- cjoin->table->name, (int)entry->index->colconf.len,
- entry->index->colconf.str);
+ cjoin->table->name, (int)idx->colconf.len,
+ idx->colconf.str);
WT_ERR(__wt_open_cursor(session, main_uri,
(WT_CURSOR *)cjoin, raw_cfg, &entry->main));
+ if (idx->extractor == NULL) {
+ /*
+ * Add no-op padding so trailing 'u' formats are not
+ * transformed to 'U'. This matches what happens in
+ * the index. We don't do this when we have an
+ * extractor, extractors already use the padding
+ * byte trick.
+ */
+ len = strlen(entry->main->value_format) + 3;
+ WT_ERR(__wt_calloc(session, len, 1, &newformat));
+ snprintf(newformat, len, "%s0x",
+ entry->main->value_format);
+ __wt_free(session, entry->main->value_format);
+ entry->main->value_format = newformat;
+ }
+
+ /*
+ * When we are repacking index keys to remove the primary
+ * key, we never want to transform trailing 'u'. Use no-op
+ * padding to force this.
+ */
+ cindex = (WT_CURSOR_INDEX *)ref_cursor;
+ len = strlen(cindex->iface.key_format) + 3;
+ WT_ERR(__wt_calloc(session, len, 1, &entry->repack_format));
+ snprintf(entry->repack_format, len, "%s0x",
+ cindex->iface.key_format);
}
-err: if (main_uri != NULL)
- __wt_free(session, main_uri);
+err: __wt_free(session, main_uri);
return (ret);
}
+
+/*
+ * __curjoin_insert_endpoint --
+ * Insert a new entry into the endpoint array for the join entry.
+ */
+static int
+__curjoin_insert_endpoint(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
+ u_int pos, WT_CURSOR_JOIN_ENDPOINT **newendp)
+{
+ WT_CURSOR_JOIN_ENDPOINT *newend;
+
+ WT_RET(__wt_realloc_def(session, &entry->ends_allocated,
+ entry->ends_next + 1, &entry->ends));
+ newend = &entry->ends[pos];
+ memmove(newend + 1, newend,
+ (entry->ends_next - pos) * sizeof(WT_CURSOR_JOIN_ENDPOINT));
+ memset(newend, 0, sizeof(WT_CURSOR_JOIN_ENDPOINT));
+ entry->ends_next++;
+ *newendp = newend;
+
+ return (0);
+}
diff --git a/src/cursor/cur_log.c b/src/cursor/cur_log.c
index 3fcd8a86066..0a13803da5d 100644
--- a/src/cursor/cur_log.c
+++ b/src/cursor/cur_log.c
@@ -347,22 +347,22 @@ __wt_curlog_open(WT_SESSION_IMPL *session,
{
WT_CONNECTION_IMPL *conn;
WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __curlog_compare, /* compare */
- __wt_cursor_equals, /* equals */
- __curlog_next, /* next */
- __wt_cursor_notsup, /* prev */
- __curlog_reset, /* reset */
- __curlog_search, /* search */
- __wt_cursor_notsup, /* search-near */
- __wt_cursor_notsup, /* insert */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup, /* reconfigure */
- __curlog_close); /* close */
+ __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __curlog_compare, /* compare */
+ __wt_cursor_equals, /* equals */
+ __curlog_next, /* next */
+ __wt_cursor_notsup, /* prev */
+ __curlog_reset, /* reset */
+ __curlog_search, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __wt_cursor_notsup, /* insert */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __curlog_close); /* close */
WT_CURSOR *cursor;
WT_CURSOR_LOG *cl;
WT_DECL_RET;
@@ -397,7 +397,7 @@ __wt_curlog_open(WT_SESSION_IMPL *session,
* The user may be trying to read a log record they just wrote.
* Log records may be buffered, so force out any now.
*/
- WT_ERR(__wt_log_force_write(session, 1));
+ WT_ERR(__wt_log_force_write(session, 1, NULL));
/* Log cursors block archiving. */
WT_ERR(__wt_readlock(session, log->log_archive_lock));
diff --git a/src/cursor/cur_metadata.c b/src/cursor/cur_metadata.c
index df66ef34ddd..3d702e2ea8c 100644
--- a/src/cursor/cur_metadata.c
+++ b/src/cursor/cur_metadata.c
@@ -31,6 +31,58 @@
} while (0)
/*
+ * __wt_schema_create_final --
+ * Create a single configuration line from a set of configuration strings,
+ * including all of the defaults declared for a session.create, and stripping
+ * any configuration strings that don't belong in a session.create. Here for
+ * the wt dump command utility, which reads a set of configuration strings and
+ * needs to add in the defaults and then collapse them into single string for
+ * a subsequent load.
+ */
+int
+__wt_schema_create_final(
+ WT_SESSION_IMPL *session, char *cfg_arg[], char **value_ret)
+{
+ WT_DECL_RET;
+ u_int i;
+ const char **cfg;
+
+ /*
+ * Count the entries in the original,
+ * Allocate a copy with the defaults as the first entry,
+ * Collapse the whole thing into a single configuration string (which
+ * also strips any entries that don't appear in the first entry).
+ */
+ for (i = 0; cfg_arg[i] != NULL; ++i)
+ ;
+ WT_RET(__wt_calloc_def(session, i + 2, &cfg));
+ cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_create);
+ for (i = 0; cfg_arg[i] != NULL; ++i)
+ cfg[i + 1] = cfg_arg[i];
+ cfg[i + 1] = NULL;
+
+ ret = __wt_config_collapse(session, cfg, value_ret);
+
+ __wt_free(session, cfg);
+ return (ret);
+}
+
+/*
+ * __schema_create_strip --
+ * Discard any configuration information from a schema entry that is not
+ * applicable to an session.create call. Here for the metadata:create URI.
+ */
+static int
+__schema_create_strip(
+ WT_SESSION_IMPL *session, const char *value, char **value_ret)
+{
+ const char *cfg[] =
+ { WT_CONFIG_BASE(session, WT_SESSION_create), value, NULL };
+
+ return (__wt_config_collapse(session, cfg, value_ret));
+}
+
+/*
* __curmetadata_setkv --
* Copy key/value into the public cursor, stripping internal metadata for
* "create-only" cursors.
@@ -49,8 +101,7 @@ __curmetadata_setkv(WT_CURSOR_METADATA *mdc, WT_CURSOR *fc)
c->key.data = fc->key.data;
c->key.size = fc->key.size;
if (F_ISSET(mdc, WT_MDC_CREATEONLY)) {
- WT_RET(__wt_schema_create_strip(
- session, fc->value.data, NULL, &value));
+ WT_RET(__schema_create_strip(session, fc->value.data, &value));
ret = __wt_buf_set(
session, &c->value, value, strlen(value) + 1);
__wt_free(session, value);
@@ -92,8 +143,7 @@ __curmetadata_metadata_search(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
WT_RET(__wt_metadata_search(session, WT_METAFILE_URI, &value));
if (F_ISSET(mdc, WT_MDC_CREATEONLY)) {
- ret = __wt_schema_create_strip(
- session, value, NULL, &stripped);
+ ret = __schema_create_strip(session, value, &stripped);
__wt_free(session, value);
WT_RET(ret);
value = stripped;
@@ -448,22 +498,22 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session,
const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
{
WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __curmetadata_compare, /* compare */
- __wt_cursor_equals, /* equals */
- __curmetadata_next, /* next */
- __curmetadata_prev, /* prev */
- __curmetadata_reset, /* reset */
- __curmetadata_search, /* search */
- __curmetadata_search_near, /* search-near */
- __curmetadata_insert, /* insert */
- __curmetadata_update, /* update */
- __curmetadata_remove, /* remove */
- __wt_cursor_notsup, /* reconfigure */
- __curmetadata_close); /* close */
+ __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __curmetadata_compare, /* compare */
+ __wt_cursor_equals, /* equals */
+ __curmetadata_next, /* next */
+ __curmetadata_prev, /* prev */
+ __curmetadata_reset, /* reset */
+ __curmetadata_search, /* search */
+ __curmetadata_search_near, /* search-near */
+ __curmetadata_insert, /* insert */
+ __curmetadata_update, /* update */
+ __curmetadata_remove, /* remove */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __curmetadata_close); /* close */
WT_CURSOR *cursor;
WT_CURSOR_METADATA *mdc;
WT_DECL_RET;
diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c
index 00a6ade21c6..f7a8f5fc866 100644
--- a/src/cursor/cur_stat.c
+++ b/src/cursor/cur_stat.c
@@ -200,8 +200,6 @@ __curstat_next(WT_CURSOR *cursor)
if (cst->notinitialized) {
WT_ERR(__wt_curstat_init(
session, cursor->internal_uri, NULL, cst->cfg, cst));
- if (cst->next_set != NULL)
- WT_ERR((*cst->next_set)(session, cst, true, true));
cst->notinitialized = false;
}
@@ -209,6 +207,8 @@ __curstat_next(WT_CURSOR *cursor)
if (cst->notpositioned) {
cst->notpositioned = false;
cst->key = WT_STAT_KEY_MIN(cst);
+ if (cst->next_set != NULL)
+ WT_ERR((*cst->next_set)(session, cst, true, true));
} else if (cst->key < WT_STAT_KEY_MAX(cst))
++cst->key;
else if (cst->next_set != NULL)
@@ -244,8 +244,6 @@ __curstat_prev(WT_CURSOR *cursor)
if (cst->notinitialized) {
WT_ERR(__wt_curstat_init(
session, cursor->internal_uri, NULL, cst->cfg, cst));
- if (cst->next_set != NULL)
- WT_ERR((*cst->next_set)(session, cst, false, true));
cst->notinitialized = false;
}
@@ -253,6 +251,8 @@ __curstat_prev(WT_CURSOR *cursor)
if (cst->notpositioned) {
cst->notpositioned = false;
cst->key = WT_STAT_KEY_MAX(cst);
+ if (cst->next_set != NULL)
+ WT_ERR((*cst->next_set)(session, cst, false, true));
} else if (cst->key > WT_STAT_KEY_MIN(cst))
--cst->key;
else if (cst->next_set != NULL)
@@ -449,7 +449,6 @@ __curstat_join_next_set(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst,
WT_JOIN_STATS_GROUP *join_group;
ssize_t pos;
- WT_ASSERT(session, WT_STREQ(cst->iface.uri, "statistics:join"));
join_group = &cst->u.join_stats_group;
cjoin = join_group->join_cursor;
if (init)
@@ -504,14 +503,13 @@ __curstat_join_init(WT_SESSION_IMPL *session,
WT_CURSOR *curjoin, const char *cfg[], WT_CURSOR_STAT *cst)
{
WT_CURSOR_JOIN *cjoin;
- WT_DECL_RET;
WT_UNUSED(cfg);
if (curjoin == NULL && cst->u.join_stats_group.join_cursor != NULL)
curjoin = &cst->u.join_stats_group.join_cursor->iface;
if (curjoin == NULL || !WT_PREFIX_MATCH(curjoin->uri, "join:"))
- WT_ERR_MSG(session, EINVAL,
+ WT_RET_MSG(session, EINVAL,
"join cursor must be used with statistics:join");
cjoin = (WT_CURSOR_JOIN *)curjoin;
memset(&cst->u.join_stats_group, 0, sizeof(WT_JOIN_STATS_GROUP));
@@ -522,8 +520,7 @@ __curstat_join_init(WT_SESSION_IMPL *session,
cst->stats_count = sizeof(WT_JOIN_STATS) / sizeof(int64_t);
cst->stats_desc = __curstat_join_desc;
cst->next_set = __curstat_join_next_set;
-
-err: return (ret);
+ return (0);
}
/*
@@ -544,25 +541,28 @@ __wt_curstat_init(WT_SESSION_IMPL *session,
dsrc_uri = uri + strlen("statistics:");
if (WT_STREQ(dsrc_uri, "join"))
- return (__curstat_join_init(session, curjoin, cfg, cst));
+ WT_RET(__curstat_join_init(session, curjoin, cfg, cst));
- if (WT_PREFIX_MATCH(dsrc_uri, "colgroup:"))
- return (
+ else if (WT_PREFIX_MATCH(dsrc_uri, "colgroup:"))
+ WT_RET(
__wt_curstat_colgroup_init(session, dsrc_uri, cfg, cst));
- if (WT_PREFIX_MATCH(dsrc_uri, "file:"))
- return (__curstat_file_init(session, dsrc_uri, cfg, cst));
+ else if (WT_PREFIX_MATCH(dsrc_uri, "file:"))
+ WT_RET(__curstat_file_init(session, dsrc_uri, cfg, cst));
- if (WT_PREFIX_MATCH(dsrc_uri, "index:"))
- return (__wt_curstat_index_init(session, dsrc_uri, cfg, cst));
+ else if (WT_PREFIX_MATCH(dsrc_uri, "index:"))
+ WT_RET(__wt_curstat_index_init(session, dsrc_uri, cfg, cst));
- if (WT_PREFIX_MATCH(dsrc_uri, "lsm:"))
- return (__wt_curstat_lsm_init(session, dsrc_uri, cst));
+ else if (WT_PREFIX_MATCH(dsrc_uri, "lsm:"))
+ WT_RET(__wt_curstat_lsm_init(session, dsrc_uri, cst));
- if (WT_PREFIX_MATCH(dsrc_uri, "table:"))
- return (__wt_curstat_table_init(session, dsrc_uri, cfg, cst));
+ else if (WT_PREFIX_MATCH(dsrc_uri, "table:"))
+ WT_RET(__wt_curstat_table_init(session, dsrc_uri, cfg, cst));
- return (__wt_bad_object_type(session, uri));
+ else
+ return (__wt_bad_object_type(session, uri));
+
+ return (0);
}
/*
@@ -575,22 +575,22 @@ __wt_curstat_open(WT_SESSION_IMPL *session,
{
WT_CONNECTION_IMPL *conn;
WT_CURSOR_STATIC_INIT(iface,
- __curstat_get_key, /* get-key */
- __curstat_get_value, /* get-value */
- __curstat_set_key, /* set-key */
- __curstat_set_value, /* set-value */
- __wt_cursor_notsup, /* compare */
- __wt_cursor_notsup, /* equals */
- __curstat_next, /* next */
- __curstat_prev, /* prev */
- __curstat_reset, /* reset */
- __curstat_search, /* search */
- __wt_cursor_notsup, /* search-near */
- __wt_cursor_notsup, /* insert */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup, /* reconfigure */
- __curstat_close); /* close */
+ __curstat_get_key, /* get-key */
+ __curstat_get_value, /* get-value */
+ __curstat_set_key, /* set-key */
+ __curstat_set_value, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __curstat_next, /* next */
+ __curstat_prev, /* prev */
+ __curstat_reset, /* reset */
+ __curstat_search, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __wt_cursor_notsup, /* insert */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __curstat_close); /* close */
WT_CONFIG_ITEM cval, sval;
WT_CURSOR *cursor;
WT_CURSOR_STAT *cst;
diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c
index 051f36c8854..7839971f975 100644
--- a/src/cursor/cur_std.c
+++ b/src/cursor/cur_std.c
@@ -9,27 +9,108 @@
#include "wt_internal.h"
/*
+ * __wt_cursor_noop --
+ * Cursor noop.
+ */
+int
+__wt_cursor_noop(WT_CURSOR *cursor)
+{
+ WT_UNUSED(cursor);
+
+ return (0);
+}
+
+/*
* __wt_cursor_notsup --
* Unsupported cursor actions.
*/
int
__wt_cursor_notsup(WT_CURSOR *cursor)
{
- WT_UNUSED(cursor);
+ WT_SESSION_IMPL *session;
- return (ENOTSUP);
+ session = (WT_SESSION_IMPL *)cursor->session;
+ WT_RET_MSG(session, ENOTSUP, "Unsupported cursor operation");
}
/*
- * __wt_cursor_noop --
- * Cursor noop.
+ * __wt_cursor_get_value_notsup --
+ * WT_CURSOR.get_value not-supported.
*/
int
-__wt_cursor_noop(WT_CURSOR *cursor)
+__wt_cursor_get_value_notsup(WT_CURSOR *cursor, ...)
{
- WT_UNUSED(cursor);
+ return (__wt_cursor_notsup(cursor));
+}
- return (0);
+/*
+ * __wt_cursor_set_key_notsup --
+ * WT_CURSOR.set_key not-supported.
+ */
+void
+__wt_cursor_set_key_notsup(WT_CURSOR *cursor, ...)
+{
+ (void)__wt_cursor_notsup(cursor);
+}
+
+/*
+ * __wt_cursor_set_value_notsup --
+ * WT_CURSOR.set_value not-supported.
+ */
+void
+__wt_cursor_set_value_notsup(WT_CURSOR *cursor, ...)
+{
+ (void)__wt_cursor_notsup(cursor);
+}
+
+/*
+ * __wt_cursor_compare_notsup --
+ * Unsupported cursor comparison.
+ */
+int
+__wt_cursor_compare_notsup(WT_CURSOR *a, WT_CURSOR *b, int *cmpp)
+{
+ WT_UNUSED(b);
+ WT_UNUSED(cmpp);
+
+ return (__wt_cursor_notsup(a));
+}
+
+/*
+ * __wt_cursor_equals_notsup --
+ * Unsupported cursor equality.
+ */
+int
+__wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp)
+{
+ WT_UNUSED(other);
+ WT_UNUSED(equalp);
+
+ return (__wt_cursor_notsup(cursor));
+}
+
+/*
+ * __wt_cursor_search_near_notsup --
+ * Unsupported cursor search-near.
+ */
+int
+__wt_cursor_search_near_notsup(WT_CURSOR *cursor, int *exact)
+{
+ WT_UNUSED(exact);
+
+ return (__wt_cursor_notsup(cursor));
+}
+
+/*
+ * __wt_cursor_reconfigure_notsup --
+ * Unsupported cursor reconfiguration.
+ */
+int
+__wt_cursor_reconfigure_notsup(WT_CURSOR *cursor, const char *config)
+{
+ WT_UNUSED(config);
+
+ return (__wt_cursor_notsup(cursor));
}
/*
@@ -46,13 +127,12 @@ __wt_cursor_set_notsup(WT_CURSOR *cursor)
* cursors in a session. Reconfigure is left open in case it's possible
* in the future to change these configurations.
*/
- cursor->compare =
- (int (*)(WT_CURSOR *, WT_CURSOR *, int *))__wt_cursor_notsup;
+ cursor->compare = __wt_cursor_compare_notsup;
cursor->next = __wt_cursor_notsup;
cursor->prev = __wt_cursor_notsup;
cursor->reset = __wt_cursor_noop;
cursor->search = __wt_cursor_notsup;
- cursor->search_near = (int (*)(WT_CURSOR *, int *))__wt_cursor_notsup;
+ cursor->search_near = __wt_cursor_search_near_notsup;
cursor->insert = __wt_cursor_notsup;
cursor->update = __wt_cursor_notsup;
cursor->remove = __wt_cursor_notsup;
@@ -628,7 +708,7 @@ __wt_cursor_init(WT_CURSOR *cursor,
} else {
WT_RET(
__wt_config_gets_def(session, cfg, "readonly", 0, &cval));
- if (cval.val != 0) {
+ if (cval.val != 0 || F_ISSET(S2C(session), WT_CONN_READONLY)) {
cursor->insert = __wt_cursor_notsup;
cursor->update = __wt_cursor_notsup;
cursor->remove = __wt_cursor_notsup;
diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c
index d986577f640..9eb88ec6fcd 100644
--- a/src/cursor/cur_table.c
+++ b/src/cursor/cur_table.c
@@ -79,22 +79,22 @@ __wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx,
WT_CURSOR *cur, WT_CURSOR_TABLE *ctable, int (*f)(WT_CURSOR *))
{
WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __wt_cursor_notsup, /* compare */
- __wt_cursor_notsup, /* equals */
- __wt_cursor_notsup, /* next */
- __wt_cursor_notsup, /* prev */
- __wt_cursor_notsup, /* reset */
- __wt_cursor_notsup, /* search */
- __wt_cursor_notsup, /* search-near */
- __curextract_insert, /* insert */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* reconfigure */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup); /* close */
+ __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __wt_cursor_notsup, /* next */
+ __wt_cursor_notsup, /* prev */
+ __wt_cursor_notsup, /* reset */
+ __wt_cursor_notsup, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __curextract_insert, /* insert */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup); /* close */
WT_CURSOR_EXTRACTOR extract_cursor;
WT_DECL_RET;
WT_ITEM key, value;
@@ -842,22 +842,22 @@ __wt_curtable_open(WT_SESSION_IMPL *session,
const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
{
WT_CURSOR_STATIC_INIT(iface,
- __wt_curtable_get_key, /* get-key */
- __wt_curtable_get_value, /* get-value */
- __wt_curtable_set_key, /* set-key */
- __wt_curtable_set_value, /* set-value */
- __curtable_compare, /* compare */
- __wt_cursor_equals, /* equals */
- __curtable_next, /* next */
- __curtable_prev, /* prev */
- __curtable_reset, /* reset */
- __curtable_search, /* search */
- __curtable_search_near, /* search-near */
- __curtable_insert, /* insert */
- __curtable_update, /* update */
- __curtable_remove, /* remove */
- __wt_cursor_reconfigure, /* reconfigure */
- __curtable_close); /* close */
+ __wt_curtable_get_key, /* get-key */
+ __wt_curtable_get_value, /* get-value */
+ __wt_curtable_set_key, /* set-key */
+ __wt_curtable_set_value, /* set-value */
+ __curtable_compare, /* compare */
+ __wt_cursor_equals, /* equals */
+ __curtable_next, /* next */
+ __curtable_prev, /* prev */
+ __curtable_reset, /* reset */
+ __curtable_search, /* search */
+ __curtable_search_near, /* search-near */
+ __curtable_insert, /* insert */
+ __curtable_update, /* update */
+ __curtable_remove, /* remove */
+ __wt_cursor_reconfigure, /* reconfigure */
+ __curtable_close); /* close */
WT_CONFIG_ITEM cval;
WT_CURSOR *cursor;
WT_CURSOR_TABLE *ctable;
diff --git a/src/docs/checkpoint.dox b/src/docs/checkpoint.dox
index 523c0887859..ec28fea13c3 100644
--- a/src/docs/checkpoint.dox
+++ b/src/docs/checkpoint.dox
@@ -23,11 +23,16 @@ All transactional updates committed before a checkpoint are made durable
by the checkpoint, therefore the frequency of checkpoints limits the
volume of data that may be lost due to application or system failure.
-When WiredTiger data sources are first opened, they are opened in the
-state of the most recent checkpoint taken on the file, in other words,
-updates after the most recent checkpoint will not appear in the data
-source. If no checkpoint is found when the data source is opened, the
-data source will appear empty.
+Data sources that are involved in an exclusive operation when the
+checkpoint starts, including bulk load, verify or salvage, will be skipped
+by the checkpoint. Operations requiring exclusive access may fail with
+an \c EBUSY error if attempted during a checkpoint.
+
+When data sources are first opened, they are opened in the state of the
+most recent checkpoint taken on the file, in other words, updates after the
+most recent checkpoint will not appear in the data source. If no
+checkpoint is found when the data source is opened, the data source will
+appear empty.
@section checkpoint_server Automatic checkpoints
@@ -54,15 +59,16 @@ checkpoint cursor is closed.
@section checkpoint_naming Checkpoint naming
-Additionally, checkpoints that do not include LSM trees may optionally
-be given names by the application. Checkpoints named by the application
-persist until explicitly discarded or the application creates a new
-checkpoint with the same name (which replaces the previous checkpoint
-of that name). If the previous checkpoint cannot be replaced, either
-because a cursor is reading from the previous checkpoint, or backups are
-in progress, the checkpoint will fail. Because named checkpoints
-persist until discarded or replaced, they can be used to periodically
-snapshot data for later use.
+Additionally, checkpoints that do not include LSM trees may optionally be
+given names by the application. Because named checkpoints persist until
+discarded or replaced, they can be used to periodically snapshot data for
+later use.
+
+Checkpoints named by the application persist until explicitly discarded or
+the application creates a new checkpoint with the same name (which replaces
+the previous checkpoint of that name). If the previous checkpoint cannot be
+replaced, either because a cursor is reading from the previous checkpoint,
+or backups are in progress, the checkpoint will fail.
Internal checkpoints (that is, checkpoints not named by the application)
use the reserved name "WiredTigerCheckpoint". Applications can open the
diff --git a/src/docs/command-line.dox b/src/docs/command-line.dox
index e2b376d5e3f..0f5c56d25ce 100644
--- a/src/docs/command-line.dox
+++ b/src/docs/command-line.dox
@@ -41,7 +41,7 @@ by default and commands that only read data will not run recovery.
Perform a backup of a database or set of data sources.
The \c backup command performs a backup of the database, copying the
-database files to a \c specified directory, which can be subsequently
+underlying files to a \c specified directory, which can be subsequently
opened as a WiredTiger database. See @ref backup for more information,
and @ref file_permissions for specifics on the copied file permissions.
@@ -58,10 +58,10 @@ the named data sources.
<hr>
@section util_compact wt compact
-Compact a table or file.
+Compact a table.
-The \c compact command attempts to rewrite the specified table or file
-to consume less disk space.
+The \c compact command attempts to rewrite the specified table to
+consume less disk space.
@subsection util_compact_synopsis Synopsis
<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] compact uri</code>
@@ -71,7 +71,7 @@ The \c compact command has no command-specific options.
<hr>
@section util_create wt create
-Create a table or file.
+Create a table.
The \c create command creates the specified \c uri with the specified
configuration. It is equivalent to a call to WT_SESSION::create with
@@ -88,7 +88,7 @@ Include a configuration string to be passed to WT_SESSION::create.
<hr>
@section util_drop wt drop
-Drop a table or file.
+Drop a table.
The \c drop command drops the specified \c uri. It is equivalent to a
call to WT_SESSION::drop with the "force" configuration argument.
@@ -136,10 +136,10 @@ printable characters unencoded).
<hr>
@section util_list wt list
-List the tables and files in the database.
+List the tables in the database.
-By default, the \c list command prints out the tables and files stored in
-the database. If a URI is specified as an argument, only information about
+By default, the \c list command prints out the tables stored in the
+database. If a URI is specified as an argument, only information about
that data source is printed.
@subsection util_list_synopsis Synopsis
@@ -158,16 +158,16 @@ value is printed.
<hr>
@section util_load wt load
-Load a table or file from dump output.
+Load a table from dump output.
The \c load command reads the standard input for data and loads it into
-a table or file, creating the table or file if it does not yet exist.
-The data should be the format produced by the \c dump command; see
-@ref dump_formats for details.
+a table, creating the table if it does not yet exist. The data should
+be the format produced by the \c dump command; see @ref dump_formats for
+details.
-By default, if the table or file already exists, data in the file or
-table will be overwritten by the new data (use the \c -n option to
-make an attempt to overwrite existing data return an error).
+By default, if the table already exists, data in the table will be
+overwritten by the new data (use the \c -n option to make an attempt to
+overwrite existing data return an error).
@subsection util_load_synopsis Synopsis
<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] load [-ajn] [-f input] [-r name] [uri configuration ...]</code>
@@ -182,8 +182,8 @@ number keys. The \c -a option is only applicable when loading into a
column store.
@par <code>-f</code>
-By default, the \c load command reads from the standard input; the \c
--f option reads the input from the specified file.
+By default, the \c load command reads from the standard input; the \c -f
+option reads the input from the specified file.
@par <code>-j</code>
Load input in the JSON (<a href="http://www.json.org">JavaScript Object
@@ -196,7 +196,7 @@ load command to fail if there's an attempt to overwrite already existing
data.
@par <code>-r</code>
-By default, the \c load command uses the table or file name taken from the
+By default, the \c load command uses the table name taken from the
input; the \c -r option renames the data source.
Additionally, \c uri and \c configuration pairs may be specified to the
@@ -227,24 +227,23 @@ table:xxx block_allocation=first table:xxx prefix_compress=false
<hr>
@section util_loadtext wt loadtext
-Load text into a table or file.
+Load text into a table.
The \c loadtext command reads the standard input for text and loads it
-into a table or file. The input data should be printable characters,
-with newline delimiters for each key or value.
+into a table. The input data should be printable characters, with
+newline delimiters for each key or value.
-The \c loadtext command does not create the file if it does not yet
+The \c loadtext command does not create the object if it does not yet
exist.
-In the case of inserting values into a column-store table or file, each
-value is appended to the table or file; in the case of inserting values
-into a row-store table or file, lines are handled in pairs, where the
-first line is the key and the second line is the value. If the
-row-store table or file already exists, data in the table or file will
-be overwritten by the new data.
+In the case of inserting values into a column-store table, each value
+is appended to the table; in the case of inserting values into a
+row-store table, lines are handled in pairs, where the first line is the
+key and the second line is the value. If the row-store table already
+exists, data in the table will be overwritten by the new data.
@subsection util_loadtext_synopsis Synopsis
-<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] loadtext [-f input]</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] loadtext [-f input] uri</code>
@subsection util_loadtext_options Options
The following are command-specific options for the \c loadtext command:
@@ -275,7 +274,7 @@ to the default string format.
<hr>
@section util_read wt read
-Read records from a table or file.
+Read records from a table.
The \c read command prints out the records associated with the specified
keys from the specified data source. The data source must be configured
@@ -291,9 +290,9 @@ The \c read command has no command-specific options.
<hr>
@section util_rename wt rename
-Rename a table or file.
+Rename a table.
-The \c rename command renames the specified table or file.
+The \c rename command renames the specified table.
@subsection util_rename_synopsis Synopsis
<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] rename uri name</code>
@@ -303,11 +302,11 @@ The \c rename command has no command-specific options.
<hr>
@section util_salvage wt salvage
-Recover data from a corrupted file.
+Recover data from a corrupted table.
The \c salvage command salvages the specified data source, discarding any
-data that cannot be recovered. Underlying files are re-written in
-place, overwriting the original file contents.
+data that cannot be recovered. Underlying files are re-written in place,
+overwriting the original file contents.
@subsection util_salvage_synopsis Synopsis
<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] salvage [-F force] uri</code>
@@ -316,9 +315,9 @@ place, overwriting the original file contents.
The following are command-specific options for the \c salvage command:
@par <code>-F</code>
-By default, salvage will refuse to salvage files that fail basic tests
-(for example, files that don't appear to be in a WiredTiger format).
-The \c -F option forces the salvage of the file, regardless.
+By default, salvage will refuse to salvage tables that fail basic tests
+(for example, tables that don't appear to be in a WiredTiger format).
+The \c -F option forces the salvage of the table, regardless.
<hr>
@section util_stat wt stat
@@ -339,11 +338,11 @@ Include only "fast" statistics in the output (equivalent to passing
<hr>
@section util_upgrade wt upgrade
-Upgrade a table or file.
+Upgrade a table.
-The \c upgrade command upgrades the specified table or file, exiting
-success if the data source is up-to-date, and failure if the data source
-cannot be upgraded.
+The \c upgrade command upgrades the specified table, exiting success if
+the data source is up-to-date, and failure if the data source cannot be
+upgraded.
@subsection util_upgrade_synopsis Synopsis
<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] upgrade uri</code>
@@ -353,11 +352,10 @@ The \c upgrade command has no command-specific options.
<hr>
@section util_verify wt verify
-Check the structural integrity of a table or file.
+Check the structural integrity of a table.
-The \c verify command verifies the specified table or file, exiting
-success if the data source is correct, and failure if the data source is
-corrupted.
+The \c verify command verifies the specified table, exiting success if
+the data source is correct, and failure if the data source is corrupted.
@subsection util_verify_synopsis Synopsis
<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] verify uri</code>
@@ -367,7 +365,7 @@ The \c verify command has no command-specific options.
<hr>
@section util_write wt write
-Write records to a table or file.
+Write records to a table.
The \c write command stores records into the specified data source.
The data source must be configured with string or record number keys and
diff --git a/src/docs/data-sources.dox b/src/docs/data-sources.dox
index d09d1cbc1b8..7f1879e0ffe 100644
--- a/src/docs/data-sources.dox
+++ b/src/docs/data-sources.dox
@@ -38,7 +38,7 @@ cursor types that give access to data managed by WiredTiger:
key=<code>string</code>\, value=<code>string</code>\,<br>
see @ref metadata for details}
@row{<tt>statistics:[\<data source URI\>]</tt>,
- database or data source statistics cursor,
+ database, data source or join statistics cursor,
key=<code>int id</code>\,<br>
value=<code>(string description\,
string value\, uint64_t value)</code>\,<br>
@@ -106,7 +106,9 @@ WiredTiger database as well as statistics for individual data sources.
The statistics are at two levels: per-database and per-individual data
source. Database-wide statistics are retrieved with the \c "statistics:"
URI; individual data source statistics are available by specifying
-\c "statistics:<data source URI>".
+\c "statistics:<data source URI>". Additionally, statistics about a
+join cursor can be retrieved by specifying \c "statistics:join" and
+supplying the join cursor as an argument in the SESSION::open_cursor call.
The statistic key is an integer from the list of keys in
@ref_single statistics_keys "Statistics Keys". Statistics cursors return
@@ -127,7 +129,11 @@ The following is an example of printing statistics about a table:
@snippet ex_stat.c statistics table function
-Both examples can use a common display routine that iterates through the
+The following is an example of printing statistics about a join cursor:
+
+@snippet ex_stat.c statistics join cursor function
+
+These three examples can use a common display routine that iterates through the
statistics until the cursor returns the end of the list.
@snippet ex_stat.c statistics display function
diff --git a/src/docs/error-handling.dox b/src/docs/error-handling.dox
index d1291e38ff0..d91a126ee21 100644
--- a/src/docs/error-handling.dox
+++ b/src/docs/error-handling.dox
@@ -55,14 +55,32 @@ This error is generated when wiredtiger_open is configured to return an error if
@if IGNORE_BUILT_BY_API_ERR_END
@endif
-The ::wiredtiger_strerror function returns the standard message
-associated with any WiredTiger, ISO C99, or POSIX 1003.1-2001 function:
+@section error_translation Translating errors
+
+The WT_SESSION::strerror and ::wiredtiger_strerror functions return the
+standard text message associated with any WiredTiger, ISO C, or POSIX
+standard API.
+
+@snippet ex_all.c Display an error thread safe
@snippet ex_all.c Display an error
+Note that ::wiredtiger_strerror is not thread-safe.
+
@m_if{c}
+@section error_handling_event Error handling using the WT_EVENT_HANDLER
+
More complex error handling can be configured by passing an implementation
of WT_EVENT_HANDLER to ::wiredtiger_open or WT_CONNECTION::open_session.
+
+For example, both informational and error messages might be passed to an
+application-specific logging function that added a timestamp and logged
+the message to a file, and error messages might additionally be output to
+the \c stderr file stream.
+
+@snippet ex_event_handler.c Function event_handler
+@snippet ex_event_handler.c Configure event_handler
+
@m_endif
*/
diff --git a/src/docs/license.dox b/src/docs/license.dox
index febced2c6af..d7814d04fd6 100644
--- a/src/docs/license.dox
+++ b/src/docs/license.dox
@@ -2,16 +2,16 @@
The complete WiredTiger software package is Open Source software: you
are welcome to modify and redistribute it under the terms of
-<a href="http://www.gnu.org/licenses/gpl-2.0-standalone.html">
-<b>version 2</b></a> or
-<a href="http://www.gnu.org/licenses/gpl-3.0-standalone.html">
-<b>version 3</b></a> of the
-<b>GNU General Public License</b></a>
+<a href="http://www.gnu.org/licenses/gpl-2.0-standalone.html">version 2</a>
+or
+<a href="http://www.gnu.org/licenses/gpl-3.0-standalone.html">version 3</a>
+of the
+<b>GNU General Public License</b>
as published by the Free Software Foundation. This program is
distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the
-<b>GNU General Public License</b></a> for details.
+<b>GNU General Public License</b> for details.
Additionally, portions of the WiredTiger distribution are distributed
under the terms of the
@@ -31,10 +31,10 @@ those described above, or for technical support for this software, please
contact MongoDB, Inc. at
<a mailto="info@wiredtiger.com">info@wiredtiger.com</a>.
-@section license_library 3rd party software included in the WiredTiger library
+@section license_library 3rd party software always included in the WiredTiger library
Every build of the WiredTiger library binary includes the following 3rd
-party software, distributed under their license terms. Redistribution
+party software, distributed under separate license terms. Redistribution
of the WiredTiger library should comply with these copyrights.
<table>
@@ -46,14 +46,26 @@ of the WiredTiger library should comply with these copyrights.
@row{\c src/support/hash_fnv.c, Authors, Public Domain}
</table>
+@section license_crc32-vpmsum 3rd party software optionally included in the WiredTiger library: PPC64
+
+PPC64 and PPC64LE builds of the WiredTiger library binary include additional
+3rd party software, distributed under separate license terms. Redistribution
+of the WiredTiger library PPC64 and PPC64LE builds should comply with these
+copyrights.
+
+<table>
+@hrow{Distribution Files, Copyright Holder, License}
+@row{\c src/support/power8/*, Anton Blanchard, <a href="http://opensource.org/licenses/Apache-2.0">Apache License\, Version 2.0</a> or the <a href="http://www.gnu.org/licenses/gpl-2.0-standalone.html">GNU General Public License\, version 2 or later</a>}
+</table>
+
@section license_leveldb 3rd party software optionally included in the WiredTiger library: LevelDB
If the \c --enable-leveldb configuration option is specified when
configuring the WiredTiger build, additional 3rd party software is
-included in the WiredTiger LevelDB library binary, distributed under
-their license terms. Redistribution of the WiredTiger library built
-with the \c --enable-leveldb configuration option should comply with
-these copyrights.
+included in the WiredTiger library binary, distributed under separate
+license terms. Redistribution of the WiredTiger library built with the
+\c --enable-leveldb configuration option should comply with these
+copyrights.
<table>
@hrow{Distribution Files, Copyright Holder, License}
diff --git a/src/docs/programming.dox b/src/docs/programming.dox
index 5d79edd660b..f717f4ed1fe 100644
--- a/src/docs/programming.dox
+++ b/src/docs/programming.dox
@@ -30,6 +30,7 @@ each of which is ordered by one or more columns.
<h2>Programming notes</h2>
- @subpage threads
- @subpage namespace
+- @subpage readonly
@m_if{c}
- @subpage signals
@m_endif
diff --git a/src/docs/readonly.dox b/src/docs/readonly.dox
new file mode 100644
index 00000000000..ad4a94a73f1
--- /dev/null
+++ b/src/docs/readonly.dox
@@ -0,0 +1,55 @@
+/*! @m_page{{c,java},readonly,Database read-only mode}
+
+WiredTiger supports read-only mode databases. When a database is opened
+in read-only mode, all modifications are disabled on the WT_CONNECTION
+handle, any sessions opened in that connection and any cursors opened
+in any of those sessions. For example, all cursor or session handle
+methods that modify the database will instead return errors.
+
+When a database is opened in read-only mode, the database directory and
+content must already exist and have been shutdown cleanly.
+
+@section readonly_config Database read-only configuration considerations
+
+The \c readonly configuration affects other configuration settings.
+Where a default setting contradicts read-only operation, WiredTiger
+defaults are overridden to perform in a read-only mode. For example, LSM
+tree merges are turned off when LSM trees are configured, and log file
+archiving is disabled when logging is configured.
+
+Where a user configured setting contradicts read-only operation, WiredTiger
+will return an error. For example, zero-filling
+log files is not allowed in read-only mode, and attempting to configure
+them will return an error.
+
+@section readonly_recovery Readonly configuration and recovery
+
+Because recovery modifies the database, recovery cannot be done in
+read-only mode. A ::wiredtiger_open call to open a database in read-only
+mode will fail if the database was not cleanly shutdown and recovery is
+required.
+
+@section readonly_logging Readonly configuration and logging
+
+If logging is enabled on the database when opened in read-only mode, log
+file archiving and log file pre-allocation are disabled and the log files
+will not be modified any way.
+
+@section readonly_lsm Readonly configuration and LSM trees
+
+If LSM trees are in use, read-only mode turns off all modification.
+Internal LSM operations such as merging, creating new chunks, creating
+bloom filters and dropping old chunks are disabled.
+
+@section readonly_handles Readonly configuration and multiple database handles
+
+One unusual affect of read-only operations is the potential for multiple
+read-only database handles open on the same database at the same time.
+WiredTiger prevents multiple connection handles by writing a lock file,
+and this locking is done even in read-only mode. However, if the lock
+file cannot be written, opening in read-only mode is still allowed to
+proceed. For that reason, multiple read-only connection handles could
+be open at the same time. Normal locking occurs if the lock file can be
+written in read-only mode, preventing multiple database connections.
+
+*/
diff --git a/src/docs/spell.ok b/src/docs/spell.ok
index 80597302cbb..efc306568cd 100644
--- a/src/docs/spell.ok
+++ b/src/docs/spell.ok
@@ -7,6 +7,7 @@ Atomicity
BLOBs
CFLAGS
CPPFLAGS
+CRC
Cheng
Christoph
Collet's
@@ -64,6 +65,7 @@ NOTFOUND
NUMA
NoSQL
OPTYPE
+PPC
PRELOAD
README
Rebalance
@@ -151,6 +153,7 @@ control's
copydoc
cpp
crashless
+crc
cursortype
customerABC
cv
@@ -377,6 +380,7 @@ rVv
rdbms
rdlock
readlock
+readonly
realclean
realloc
realloc'd
@@ -419,6 +423,7 @@ src
ssd
startsync
statlog
+stderr
str
strerror
strftime
@@ -475,6 +480,7 @@ valuefmt
vec
versa
vm
+vpmsum
warmup
whitespace
wiredtiger
diff --git a/src/docs/statistics.dox b/src/docs/statistics.dox
index 453da34c51a..0a29e351e4e 100644
--- a/src/docs/statistics.dox
+++ b/src/docs/statistics.dox
@@ -79,6 +79,15 @@ or logged:
@snippet ex_all.c Statistics clear configuration
+The following example opens a statistics cursor on an open join cursor:
+
+@snippet ex_schema.c Statistics cursor join cursor
+
+The statistics gathered will be organized by reference cursors participating
+in the join (see WT_SESSION::join); the uri of each reference cursor appears
+as a prefix in the description field returned as a value by the statistics
+cursor.
+
@section statistics_log Statistics logging
WiredTiger will optionally log database statistics into a file when the
diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox
index e4d85003a1e..8b3d61e4c19 100644
--- a/src/docs/upgrading.dox
+++ b/src/docs/upgrading.dox
@@ -2,27 +2,34 @@
@section version_271 Upgrading to Version 2.7.1
<dl>
+<dt>LSM metadata</dt>
+<dd>
+There is a change to the format of LSM metadata in this release to fix bugs
+in dump / load of tables of type LSM. Tables created with the old LSM metadata
+format will be upgraded automatically, but once updated to the new version
+<b>are no longer compatible with older releases of WiredTiger</b>.
+</dd>
+
<dt>Column-store bulk-load cursors</dt>
<dd>
-Historically, bulk-load of a column-store object ignored any key set in
-the cursor and automatically assigned each inserted row the next
-sequential record number for its key. In the 2.7.1 release, column-store
-objects match row-store behavior and require the cursor key be set
-before an insert. (This also allows allows sparse tables to be created
-in column-store objects, any skipped records are created as
-already-deleted rows.) To match the previous behavior, specify the
-\c append configuration string when opening the column-store bulk-load
-cursor; this causes the cursor's key to be ignored and each inserted row
-will be assigned the next record number.
+Historically, bulk-load of a column-store object ignored any key set in the
+cursor and automatically assigned each inserted row the next sequential
+record number for its key. In the 2.7.1 release, column-store objects match
+row-store behavior and require the cursor key be set before an insert.
+(This allows sparse tables to be created in column-store objects, any
+skipped records are created as already-deleted rows.) To match the previous
+behavior, specify the \c append configuration string when opening the
+column-store bulk-load cursor; this causes the cursor's key to be ignored
+and each inserted row will be assigned the next record number.
</dd>
<dt>Change to WT_SESSION::truncate with URI</dt>
<dd>
If using the WT_SESSION::truncate API with a file: URI for a full table
-truncate, underlying algorithmic changes result in some visible differences.
-This call can now return WT_ROLLBACK. Applications should be prepared to
-handle this error. This method no longer requires exclusive access to the
-table. Also the underlying disk space may not be immediately
+truncate, underlying algorithmic changes result in some visible
+differences. This call can now return WT_ROLLBACK. Applications should be
+prepared to handle this error. This method no longer requires exclusive
+access to the table. Also the underlying disk space may not be immediately
reclaimed when the call returns. The performance of this API may differ
from earlier releases.
</dd>
@@ -34,6 +41,14 @@ from the WiredTiger release; remaining compression engines include LZ4,
snappy and zlib.
</dd>
+<dt>Change to named checkpoints with bulk loads</dt>
+<dd>
+Previous versions of WiredTiger created empty named checkpoints in files
+being bulk-loaded. In this release, checkpoint skips files being
+bulk-loaded, so they do not get named checkpoints that complete during the
+bulk load.
+</dd>
+
</dl><hr>
@section version_270 Upgrading to Version 2.7.0
diff --git a/src/docs/wtperf.dox b/src/docs/wtperf.dox
index 1f0d1533ac4..6d8dcab8f65 100644
--- a/src/docs/wtperf.dox
+++ b/src/docs/wtperf.dox
@@ -212,6 +212,10 @@ insert operations
generate random content for the value
@par read_range (unsigned int, default=0)
scan a range of keys after each search
+@par readonly (boolean, default=false)
+reopen the connection between populate and workload phases in readonly
+mode. Requires reopen_connection turned on (default). Requires that
+read be the only workload specified
@par reopen_connection (boolean, default=true)
close and reopen the connection between populate and workload phases
@par report_interval (unsigned int, default=2)
@@ -247,14 +251,19 @@ threads configuration might be
'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))'
which would create 2 threads doing nothing but reads and 8 threads
each doing 50% inserts and 25% reads and updates. Allowed
-configuration values are 'count', 'throttle', 'reads', 'inserts',
-'updates', 'truncate', 'truncate_pct' and 'truncate_count'. There are
-also behavior modifiers, supported modifiers are 'ops_per_txn'
+configuration values are 'count', 'throttle', 'update_delta', 'reads',
+'inserts', 'updates', 'truncate', 'truncate_pct' and 'truncate_count'.
+There are also behavior modifiers, supported modifiers are
+'ops_per_txn'
@par transaction_config (string, default=)
transaction configuration string, relevant when populate_opts_per_txn
is nonzero
@par table_name (string, default=test)
table name
+@par value_sz_max (unsigned int, default=1000)
+maximum value size when delta updates are present. Default disabled
+@par value_sz_min (unsigned int, default=1)
+minimum value size when delta updates are present. Default disabled
@par value_sz (unsigned int, default=100)
value size
@par verbose (unsigned int, default=1)
diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c
index 641864a8baa..ca98b1bd62a 100644
--- a/src/evict/evict_file.c
+++ b/src/evict/evict_file.c
@@ -18,13 +18,12 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
WT_DECL_RET;
WT_PAGE *page;
WT_REF *next_ref, *ref;
- bool evict_reset;
/*
* We need exclusive access to the file -- disable ordinary eviction
* and drain any blocks already queued.
*/
- WT_RET(__wt_evict_file_exclusive_on(session, &evict_reset));
+ WT_RET(__wt_evict_file_exclusive_on(session));
/* Make sure the oldest transaction ID is up-to-date. */
__wt_txn_update_oldest(session, true);
@@ -98,8 +97,7 @@ err: /* On error, clear any left-over tree walk. */
session, next_ref, WT_READ_NO_EVICT));
}
- if (evict_reset)
- __wt_evict_file_exclusive_off(session);
+ __wt_evict_file_exclusive_off(session);
return (ret);
}
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 0536a06bc22..50a00787f35 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -159,7 +159,7 @@ __wt_evict_server_wake(WT_SESSION_IMPL *session)
bytes_max / WT_MEGABYTE));
}
- return (__wt_cond_signal(session, cache->evict_cond));
+ return (__wt_cond_auto_signal(session, cache->evict_cond));
}
/*
@@ -175,8 +175,8 @@ __evict_server(void *arg)
WT_SESSION_IMPL *session;
#ifdef HAVE_DIAGNOSTIC
struct timespec now, stuck_ts;
- uint64_t pages_evicted = 0;
#endif
+ uint64_t pages_evicted = 0;
u_int spins;
session = arg;
@@ -219,11 +219,11 @@ __evict_server(void *arg)
/* Next time we wake up, reverse the sweep direction. */
cache->flags ^= WT_CACHE_WALK_REVERSE;
-#ifdef HAVE_DIAGNOSTIC
pages_evicted = 0;
} else if (pages_evicted != cache->pages_evict) {
- WT_ERR(__wt_epoch(session, &stuck_ts));
pages_evicted = cache->pages_evict;
+#ifdef HAVE_DIAGNOSTIC
+ WT_ERR(__wt_epoch(session, &stuck_ts));
} else {
/* After being stuck for 5 minutes, give up. */
WT_ERR(__wt_epoch(session, &now));
@@ -238,7 +238,8 @@ __evict_server(void *arg)
WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "sleeping"));
/* Don't rely on signals: check periodically. */
- WT_ERR(__wt_cond_wait(session, cache->evict_cond, 100000));
+ WT_ERR(__wt_cond_auto_wait(
+ session, cache->evict_cond, pages_evicted != 0));
WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "waking"));
}
@@ -477,6 +478,7 @@ __evict_update_work(WT_SESSION_IMPL *session)
conn = S2C(session);
cache = conn->cache;
+ WT_STAT_FAST_CONN_SET(session, cache_eviction_aggressive_set, 0);
/* Clear previous state. */
cache->state = 0;
@@ -534,8 +536,11 @@ __evict_update_work(WT_SESSION_IMPL *session)
return (false);
-done: if (F_ISSET(cache, WT_CACHE_STUCK))
+done: if (F_ISSET(cache, WT_CACHE_STUCK)) {
+ WT_STAT_FAST_CONN_SET(session,
+ cache_eviction_aggressive_set, 1);
FLD_SET(cache->state, WT_EVICT_PASS_AGGRESSIVE);
+ }
return (true);
}
@@ -594,8 +599,11 @@ __evict_pass(WT_SESSION_IMPL *session)
if (!__evict_update_work(session))
break;
- if (loop > 10)
+ if (loop > 10) {
+ WT_STAT_FAST_CONN_SET(session,
+ cache_eviction_aggressive_set, 1);
FLD_SET(cache->state, WT_EVICT_PASS_AGGRESSIVE);
+ }
/*
* Start a worker if we have capacity and we haven't reached
@@ -713,12 +721,32 @@ __evict_clear_walks(WT_SESSION_IMPL *session)
}
/*
- * __evict_request_walk_clear --
+ * __evict_clear_all_walks --
+ * Clear the eviction walk points for all files a session is waiting on.
+ */
+static int
+__evict_clear_all_walks(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ TAILQ_FOREACH(dhandle, &conn->dhqh, q)
+ if (WT_PREFIX_MATCH(dhandle->name, "file:"))
+ WT_WITH_DHANDLE(session,
+ dhandle, WT_TRET(__evict_clear_walk(session)));
+ return (ret);
+}
+
+/*
+ * __evict_request_clear_walk --
* Request that the eviction server clear the tree's current eviction
* point.
*/
static int
-__evict_request_walk_clear(WT_SESSION_IMPL *session)
+__evict_request_clear_walk(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_CACHE *cache;
@@ -746,32 +774,12 @@ __evict_request_walk_clear(WT_SESSION_IMPL *session)
}
/*
- * __evict_clear_all_walks --
- * Clear the eviction walk points for all files a session is waiting on.
- */
-static int
-__evict_clear_all_walks(WT_SESSION_IMPL *session)
-{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
-
- conn = S2C(session);
-
- TAILQ_FOREACH(dhandle, &conn->dhqh, q)
- if (WT_PREFIX_MATCH(dhandle->name, "file:"))
- WT_WITH_DHANDLE(session,
- dhandle, WT_TRET(__evict_clear_walk(session)));
- return (ret);
-}
-
-/*
* __wt_evict_file_exclusive_on --
* Get exclusive eviction access to a file and discard any of the file's
* blocks queued for eviction.
*/
int
-__wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp)
+__wt_evict_file_exclusive_on(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_CACHE *cache;
@@ -779,33 +787,39 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp)
WT_EVICT_ENTRY *evict;
u_int i, elem;
- *evict_resetp = false;
-
btree = S2BT(session);
cache = S2C(session)->cache;
- /* If the file wasn't evictable, there's no work to do. */
- if (F_ISSET(btree, WT_BTREE_NO_EVICTION))
+ /*
+ * Hold the walk lock to set the no-eviction flag.
+ *
+ * The no-eviction flag can be set permanently, in which case we never
+ * increment the no-eviction count.
+ */
+ __wt_spin_lock(session, &cache->evict_walk_lock);
+ if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) {
+ if (btree->evict_disabled != 0)
+ ++btree->evict_disabled;
+ __wt_spin_unlock(session, &cache->evict_walk_lock);
return (0);
+ }
+ ++btree->evict_disabled;
/*
- * Hold the walk lock to set the "no eviction" flag: no new pages from
- * the file will be queued for eviction after this point.
+ * Ensure no new pages from the file will be queued for eviction after
+ * this point.
*/
- __wt_spin_lock(session, &cache->evict_walk_lock);
F_SET(btree, WT_BTREE_NO_EVICTION);
- __wt_spin_unlock(session, &cache->evict_walk_lock);
+ WT_FULL_BARRIER();
/* Clear any existing LRU eviction walk for the file. */
- WT_ERR(__evict_request_walk_clear(session));
-
- /* Hold the evict lock to remove any queued pages from this file. */
- __wt_spin_lock(session, &cache->evict_lock);
+ WT_ERR(__evict_request_clear_walk(session));
/*
* The eviction candidate list might reference pages from the file,
- * clear it.
+ * clear it. Hold the evict lock to remove queued pages from a file.
*/
+ __wt_spin_lock(session, &cache->evict_lock);
elem = cache->evict_max;
for (i = 0, evict = cache->evict_queue; i < elem; i++, evict++)
if (evict->btree == btree)
@@ -819,10 +833,11 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp)
while (btree->evict_busy > 0)
__wt_yield();
- *evict_resetp = true;
- return (0);
-
-err: F_CLR(btree, WT_BTREE_NO_EVICTION);
+ if (0) {
+err: --btree->evict_disabled;
+ F_CLR(btree, WT_BTREE_NO_EVICTION);
+ }
+ __wt_spin_unlock(session, &cache->evict_walk_lock);
return (ret);
}
@@ -834,12 +849,28 @@ void
__wt_evict_file_exclusive_off(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
+ WT_CACHE *cache;
btree = S2BT(session);
+ cache = S2C(session)->cache;
- WT_ASSERT(session, btree->evict_ref == NULL);
+ /*
+ * We have seen subtle bugs with multiple threads racing to turn
+ * eviction on/off. Make races more likely in diagnostic builds.
+ */
+ WT_DIAGNOSTIC_YIELD;
- F_CLR(btree, WT_BTREE_NO_EVICTION);
+ WT_ASSERT(session,
+ btree->evict_ref == NULL && F_ISSET(btree, WT_BTREE_NO_EVICTION));
+
+ /*
+ * The no-eviction flag can be set permanently, in which case we never
+ * increment the no-eviction count.
+ */
+ __wt_spin_lock(session, &cache->evict_walk_lock);
+ if (btree->evict_disabled > 0 && --btree->evict_disabled == 0)
+ F_CLR(btree, WT_BTREE_NO_EVICTION);
+ __wt_spin_unlock(session, &cache->evict_walk_lock);
}
/*
@@ -869,7 +900,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
{
WT_CACHE *cache;
WT_DECL_RET;
- uint64_t cutoff;
+ uint64_t cutoff, read_gen_oldest;
uint32_t candidates, entries;
cache = S2C(session)->cache;
@@ -910,34 +941,62 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
return (0);
}
- WT_ASSERT(session, cache->evict_queue[0].ref != NULL);
-
- /* Track the oldest read generation we have in the queue. */
- cache->read_gen_oldest = cache->evict_queue[0].ref->page->read_gen;
-
+ /* Decide how many of the candidates we're going to try and evict. */
if (FLD_ISSET(cache->state,
- WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK))
+ WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) {
/*
* Take all candidates if we only gathered pages with an oldest
* read generation set.
*/
cache->evict_candidates = entries;
- else {
- /* Find the bottom 25% of read generations. */
- cutoff = (3 * __evict_read_gen(&cache->evict_queue[0]) +
- __evict_read_gen(&cache->evict_queue[entries - 1])) / 4;
+ } else {
/*
- * Don't take less than 10% or more than 50% of entries,
- * regardless. That said, if there is only one entry, which is
- * normal when populating an empty file, don't exclude it.
+ * Find the oldest read generation we have in the queue, used
+ * to set the initial value for pages read into the system.
+ * The queue is sorted, find the first "normal" generation.
*/
- for (candidates = 1 + entries / 10;
- candidates < entries / 2;
- candidates++)
- if (__evict_read_gen(
- &cache->evict_queue[candidates]) > cutoff)
+ read_gen_oldest = WT_READGEN_OLDEST;
+ for (candidates = 0; candidates < entries; ++candidates) {
+ read_gen_oldest =
+ __evict_read_gen(&cache->evict_queue[candidates]);
+ if (read_gen_oldest != WT_READGEN_OLDEST)
break;
- cache->evict_candidates = candidates;
+ }
+
+ /*
+ * Take all candidates if we only gathered pages with an oldest
+ * read generation set.
+ *
+ * We normally never take more than 50% of the entries; if 50%
+ * of the entries were at the oldest read generation, take them.
+ */
+ if (read_gen_oldest == WT_READGEN_OLDEST)
+ cache->evict_candidates = entries;
+ else if (candidates >= entries / 2)
+ cache->evict_candidates = candidates;
+ else {
+ /* Save the calculated oldest generation. */
+ cache->read_gen_oldest = read_gen_oldest;
+
+ /* Find the bottom 25% of read generations. */
+ cutoff =
+ (3 * read_gen_oldest + __evict_read_gen(
+ &cache->evict_queue[entries - 1])) / 4;
+
+ /*
+ * Don't take less than 10% or more than 50% of entries,
+ * regardless. That said, if there is only one entry,
+ * which is normal when populating an empty file, don't
+ * exclude it.
+ */
+ for (candidates = 1 + entries / 10;
+ candidates < entries / 2;
+ candidates++)
+ if (__evict_read_gen(
+ &cache->evict_queue[candidates]) > cutoff)
+ break;
+ cache->evict_candidates = candidates;
+ }
}
cache->evict_current = cache->evict_queue;
@@ -1106,23 +1165,27 @@ retry: while (slot < max_entries && ret == 0) {
__wt_spin_unlock(session, &conn->dhandle_lock);
dhandle_locked = false;
- __wt_spin_lock(session, &cache->evict_walk_lock);
-
/*
- * Re-check the "no eviction" flag -- it is used to enforce
- * exclusive access when a handle is being closed.
+ * Re-check the "no eviction" flag, used to enforce exclusive
+ * access when a handle is being closed. If not set, remember
+ * the file to visit first, next loop.
+ *
+ * Only try to acquire the lock and simply continue if we fail;
+ * the lock is held while the thread turning off eviction clears
+ * the tree's current eviction point, and part of the process is
+ * waiting on this thread to acknowledge that action.
*/
- if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) {
- /* Remember the file to visit first, next loop. */
- cache->evict_file_next = dhandle;
-
- WT_WITH_DHANDLE(session, dhandle,
- ret = __evict_walk_file(session, &slot));
- WT_ASSERT(session, session->split_gen == 0);
+ if (!F_ISSET(btree, WT_BTREE_NO_EVICTION) &&
+ !__wt_spin_trylock(session, &cache->evict_walk_lock)) {
+ if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) {
+ cache->evict_file_next = dhandle;
+ WT_WITH_DHANDLE(session, dhandle,
+ ret = __evict_walk_file(session, &slot));
+ WT_ASSERT(session, session->split_gen == 0);
+ }
+ __wt_spin_unlock(session, &cache->evict_walk_lock);
}
- __wt_spin_unlock(session, &cache->evict_walk_lock);
-
/*
* If we didn't find any candidates in the file, skip it next
* time.
@@ -1209,7 +1272,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
uint64_t pages_walked;
uint32_t walk_flags;
int internal_pages, restarts;
- bool enough, modified, would_split;
+ bool enough, modified;
conn = S2C(session);
btree = S2BT(session);
@@ -1265,9 +1328,22 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU))
continue;
+ /*
+ * It's possible (but unlikely) to visit a page without a read
+ * generation, if we race with the read instantiating the page.
+ * Ignore those pages, but set the page's read generation here
+ * to ensure a bug doesn't somehow leave a page without a read
+ * generation.
+ */
+ if (page->read_gen == WT_READGEN_NOTSET) {
+ __wt_cache_read_gen_new(session, page);
+ continue;
+ }
+
/* Pages we no longer need (clean or dirty), are found money. */
if (__wt_page_is_empty(page) ||
- F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
+ F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
+ page->read_gen == WT_READGEN_OLDEST)
goto fast;
/* Skip clean pages if appropriate. */
@@ -1280,25 +1356,17 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
* eviction, skip anything that isn't marked.
*/
if (FLD_ISSET(cache->state, WT_EVICT_PASS_WOULD_BLOCK) &&
- page->memory_footprint < btree->splitmempage &&
- page->read_gen != WT_READGEN_OLDEST)
+ page->memory_footprint < btree->splitmempage)
continue;
/* Limit internal pages to 50% unless we get aggressive. */
if (WT_PAGE_IS_INTERNAL(page) &&
- ++internal_pages > WT_EVICT_WALK_PER_FILE / 2 &&
- !FLD_ISSET(cache->state, WT_EVICT_PASS_AGGRESSIVE))
+ !FLD_ISSET(cache->state, WT_EVICT_PASS_AGGRESSIVE) &&
+ internal_pages >= (int)(evict - start) / 2)
continue;
- /*
- * If this page has never been considered for eviction, set its
- * read generation to somewhere in the middle of the LRU list.
- */
- if (page->read_gen == WT_READGEN_NOTSET)
- page->read_gen = __wt_cache_read_gen_new(session);
-
fast: /* If the page can't be evicted, give up. */
- if (!__wt_page_can_evict(session, ref, &would_split))
+ if (!__wt_page_can_evict(session, ref, NULL))
continue;
/*
@@ -1332,6 +1400,9 @@ fast: /* If the page can't be evicted, give up. */
__evict_init_candidate(session, evict, ref);
++evict;
+ if (WT_PAGE_IS_INTERNAL(page))
+ ++internal_pages;
+
WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER,
"select: %p, size %" PRIu64, page, page->memory_footprint));
}
@@ -1392,8 +1463,9 @@ __evict_get_ref(
}
/*
- * The eviction server only tries to evict half of the pages before
- * looking for more.
+ * Only evict half of the pages before looking for more. The remainder
+ * are left to eviction workers (if configured), or application threads
+ * if necessary.
*/
candidates = cache->evict_candidates;
if (is_server && candidates > 1)
@@ -1452,7 +1524,6 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
{
WT_BTREE *btree;
WT_DECL_RET;
- WT_PAGE *page;
WT_REF *ref;
WT_RET(__evict_get_ref(session, is_server, &btree, &ref));
@@ -1481,9 +1552,7 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
* the page and some other thread may have evicted it by the time we
* look at it.
*/
- page = ref->page;
- if (page->read_gen != WT_READGEN_OLDEST)
- page->read_gen = __wt_cache_read_gen_bump(session);
+ __wt_cache_read_gen_bump(session, ref->page);
WT_WITH_BTREE(session, btree, ret = __wt_evict(session, ref, false));
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index 72c07eaa05d..f0d4752cc83 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -471,8 +471,7 @@ __evict_review(
LF_SET(WT_EVICT_IN_MEMORY | WT_EVICT_UPDATE_RESTORE);
else if (page->read_gen == WT_READGEN_OLDEST)
LF_SET(WT_EVICT_UPDATE_RESTORE);
- else if (F_ISSET(session, WT_SESSION_INTERNAL) &&
- F_ISSET(S2C(session)->cache, WT_CACHE_STUCK))
+ else if (F_ISSET(S2C(session)->cache, WT_CACHE_STUCK))
LF_SET(WT_EVICT_LOOKASIDE);
}
diff --git a/src/include/btmem.h b/src/include/btmem.h
index ee495c52fc8..7cdf2bef43a 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -598,9 +598,14 @@ struct __wt_page {
* read generation is incremented by the eviction server each time it
* becomes active. To avoid incrementing a page's read generation too
* frequently, it is set to a future point.
+ *
+ * Because low read generation values have special meaning, and there
+ * are places where we manipulate the value, use an initial value well
+ * outside of the special range.
*/
#define WT_READGEN_NOTSET 0
#define WT_READGEN_OLDEST 1
+#define WT_READGEN_START_VALUE 100
#define WT_READGEN_STEP 100
uint64_t read_gen;
diff --git a/src/include/btree.h b/src/include/btree.h
index 703de0f2fc6..fd921677751 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -129,10 +129,11 @@ struct __wt_btree {
uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */
uint64_t write_gen; /* Write generation */
- WT_REF *evict_ref; /* Eviction thread's location */
- uint64_t evict_priority; /* Relative priority of cached pages */
- u_int evict_walk_period; /* Skip this many LRU walks */
- u_int evict_walk_skips; /* Number of walks skipped */
+ WT_REF *evict_ref; /* Eviction thread's location */
+ uint64_t evict_priority; /* Relative priority of cached pages */
+ u_int evict_walk_period; /* Skip this many LRU walks */
+ u_int evict_walk_skips; /* Number of walks skipped */
+ u_int evict_disabled; /* Eviction disabled count */
volatile uint32_t evict_busy; /* Count of threads in eviction */
enum {
diff --git a/src/include/btree.i b/src/include/btree.i
index b4b4d7f25a2..6df7f87073f 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -1149,7 +1149,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
* parent frees the backing blocks for any no-longer-used overflow keys,
* which will corrupt the checkpoint's block management.
*/
- if (btree->checkpointing &&
+ if (btree->checkpointing != WT_CKPT_OFF &&
F_ISSET_ATOMIC(ref->home, WT_PAGE_OVERFLOW_KEYS))
return (false);
diff --git a/src/include/cache.h b/src/include/cache.h
index a3961d6043e..9184a2fe6ed 100644
--- a/src/include/cache.h
+++ b/src/include/cache.h
@@ -75,9 +75,9 @@ struct __wt_cache {
/*
* Read information.
*/
- uint64_t read_gen; /* Page read generation (LRU) */
- uint64_t read_gen_oldest; /* The oldest read generation that
- eviction knows about */
+ uint64_t read_gen; /* Current page read generation */
+ uint64_t read_gen_oldest; /* Oldest read generation the eviction
+ * server saw in its last queue load */
/*
* Eviction thread information.
diff --git a/src/include/cache.i b/src/include/cache.i
index ee13eee84c5..8cf7555e716 100644
--- a/src/include/cache.i
+++ b/src/include/cache.i
@@ -28,34 +28,43 @@ __wt_cache_read_gen_incr(WT_SESSION_IMPL *session)
/*
* __wt_cache_read_gen_bump --
- * Get the read generation to keep a page in memory.
+ * Update the page's read generation.
*/
-static inline uint64_t
-__wt_cache_read_gen_bump(WT_SESSION_IMPL *session)
+static inline void
+__wt_cache_read_gen_bump(WT_SESSION_IMPL *session, WT_PAGE *page)
{
+ /* Ignore pages set for forcible eviction. */
+ if (page->read_gen == WT_READGEN_OLDEST)
+ return;
+
+ /* Ignore pages already in the future. */
+ if (page->read_gen > __wt_cache_read_gen(session))
+ return;
+
/*
- * We return read-generations from the future (where "the future" is
- * measured by increments of the global read generation). The reason
- * is because when acquiring a new hazard pointer for a page, we can
- * check its read generation, and if the read generation isn't less
- * than the current global generation, we don't bother updating the
- * page. In other words, the goal is to avoid some number of updates
- * immediately after each update we have to make.
+ * We set read-generations in the future (where "the future" is measured
+ * by increments of the global read generation). The reason is because
+ * when acquiring a new hazard pointer for a page, we can check its read
+ * generation, and if the read generation isn't less than the current
+ * global generation, we don't bother updating the page. In other
+ * words, the goal is to avoid some number of updates immediately after
+ * each update we have to make.
*/
- return (__wt_cache_read_gen(session) + WT_READGEN_STEP);
+ page->read_gen = __wt_cache_read_gen(session) + WT_READGEN_STEP;
}
/*
* __wt_cache_read_gen_new --
* Get the read generation for a new page in memory.
*/
-static inline uint64_t
-__wt_cache_read_gen_new(WT_SESSION_IMPL *session)
+static inline void
+__wt_cache_read_gen_new(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_CACHE *cache;
cache = S2C(session)->cache;
- return (__wt_cache_read_gen(session) + cache->read_gen_oldest) / 2;
+ page->read_gen =
+ (__wt_cache_read_gen(session) + cache->read_gen_oldest) / 2;
}
/*
@@ -119,12 +128,11 @@ __wt_session_can_wait(WT_SESSION_IMPL *session)
return (0);
/*
- * LSM sets the no-eviction flag when holding the LSM tree lock,
- * in that case, or when holding the schema lock, we don't want to
- * highjack the thread for eviction.
+ * LSM sets the no-eviction flag when holding the LSM tree lock, in that
+ * case, or when holding the schema lock, we don't want to highjack the
+ * thread for eviction.
*/
- if (F_ISSET(session,
- WT_SESSION_NO_EVICTION | WT_SESSION_LOCKED_SCHEMA))
+ if (F_ISSET(session, WT_SESSION_NO_EVICTION | WT_SESSION_LOCKED_SCHEMA))
return (0);
return (1);
@@ -224,11 +232,11 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp)
return (0);
/*
- * Threads operating on trees that cannot be evicted are ignored,
- * mostly because they're not contributing to the problem.
+ * Threads operating on cache-resident trees are ignored because they're
+ * not contributing to the problem.
*/
btree = S2BT_SAFE(session);
- if (btree != NULL && F_ISSET(btree, WT_BTREE_NO_EVICTION))
+ if (btree != NULL && F_ISSET(btree, WT_BTREE_IN_MEMORY))
return (0);
/* Check if eviction is needed. */
diff --git a/src/include/column.i b/src/include/column.i
index 9f3e2101f6f..d64e68420a5 100644
--- a/src/include/column.i
+++ b/src/include/column.i
@@ -11,13 +11,13 @@
* Search a column-store insert list for the next larger record.
*/
static inline WT_INSERT *
-__col_insert_search_gt(WT_INSERT_HEAD *inshead, uint64_t recno)
+__col_insert_search_gt(WT_INSERT_HEAD *ins_head, uint64_t recno)
{
WT_INSERT *ins, **insp;
int i;
/* If there's no insert chain to search, we're done. */
- if ((ins = WT_SKIP_LAST(inshead)) == NULL)
+ if ((ins = WT_SKIP_LAST(ins_head)) == NULL)
return (NULL);
/* Fast path check for targets past the end of the skiplist. */
@@ -29,7 +29,7 @@ __col_insert_search_gt(WT_INSERT_HEAD *inshead, uint64_t recno)
* go as far as possible at each level before stepping down to the next.
*/
ins = NULL;
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &inshead->head[i]; i >= 0;)
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;)
if (*insp != NULL && recno >= WT_INSERT_RECNO(*insp)) {
ins = *insp; /* GTE: keep going at this level */
insp = &(*insp)->next[i];
@@ -50,7 +50,7 @@ __col_insert_search_gt(WT_INSERT_HEAD *inshead, uint64_t recno)
* such a record exists before searching.
*/
if (ins == NULL)
- ins = WT_SKIP_FIRST(inshead);
+ ins = WT_SKIP_FIRST(ins_head);
while (recno >= WT_INSERT_RECNO(ins))
ins = WT_SKIP_NEXT(ins);
return (ins);
@@ -61,13 +61,13 @@ __col_insert_search_gt(WT_INSERT_HEAD *inshead, uint64_t recno)
* Search a column-store insert list for the next smaller record.
*/
static inline WT_INSERT *
-__col_insert_search_lt(WT_INSERT_HEAD *inshead, uint64_t recno)
+__col_insert_search_lt(WT_INSERT_HEAD *ins_head, uint64_t recno)
{
WT_INSERT *ins, **insp;
int i;
/* If there's no insert chain to search, we're done. */
- if ((ins = WT_SKIP_FIRST(inshead)) == NULL)
+ if ((ins = WT_SKIP_FIRST(ins_head)) == NULL)
return (NULL);
/* Fast path check for targets before the skiplist. */
@@ -78,7 +78,7 @@ __col_insert_search_lt(WT_INSERT_HEAD *inshead, uint64_t recno)
* The insert list is a skip list: start at the highest skip level, then
* go as far as possible at each level before stepping down to the next.
*/
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &inshead->head[i]; i >= 0;)
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;)
if (*insp != NULL && recno > WT_INSERT_RECNO(*insp)) {
ins = *insp; /* GT: keep going at this level */
insp = &(*insp)->next[i];
@@ -95,14 +95,14 @@ __col_insert_search_lt(WT_INSERT_HEAD *inshead, uint64_t recno)
* Search a column-store insert list for an exact match.
*/
static inline WT_INSERT *
-__col_insert_search_match(WT_INSERT_HEAD *inshead, uint64_t recno)
+__col_insert_search_match(WT_INSERT_HEAD *ins_head, uint64_t recno)
{
WT_INSERT **insp, *ret_ins;
uint64_t ins_recno;
int cmp, i;
/* If there's no insert chain to search, we're done. */
- if ((ret_ins = WT_SKIP_LAST(inshead)) == NULL)
+ if ((ret_ins = WT_SKIP_LAST(ins_head)) == NULL)
return (NULL);
/* Fast path the check for values at the end of the skiplist. */
@@ -115,7 +115,7 @@ __col_insert_search_match(WT_INSERT_HEAD *inshead, uint64_t recno)
* The insert list is a skip list: start at the highest skip level, then
* go as far as possible at each level before stepping down to the next.
*/
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &inshead->head[i]; i >= 0; ) {
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0; ) {
if (*insp == NULL) {
--i;
--insp;
@@ -143,7 +143,7 @@ __col_insert_search_match(WT_INSERT_HEAD *inshead, uint64_t recno)
* Search a column-store insert list, creating a skiplist stack as we go.
*/
static inline WT_INSERT *
-__col_insert_search(WT_INSERT_HEAD *inshead,
+__col_insert_search(WT_INSERT_HEAD *ins_head,
WT_INSERT ***ins_stack, WT_INSERT **next_stack, uint64_t recno)
{
WT_INSERT **insp, *ret_ins;
@@ -151,15 +151,15 @@ __col_insert_search(WT_INSERT_HEAD *inshead,
int cmp, i;
/* If there's no insert chain to search, we're done. */
- if ((ret_ins = WT_SKIP_LAST(inshead)) == NULL)
+ if ((ret_ins = WT_SKIP_LAST(ins_head)) == NULL)
return (NULL);
/* Fast path appends. */
if (recno >= WT_INSERT_RECNO(ret_ins)) {
for (i = 0; i < WT_SKIP_MAXDEPTH; i++) {
ins_stack[i] = (i == 0) ? &ret_ins->next[0] :
- (inshead->tail[i] != NULL) ?
- &inshead->tail[i]->next[i] : &inshead->head[i];
+ (ins_head->tail[i] != NULL) ?
+ &ins_head->tail[i]->next[i] : &ins_head->head[i];
next_stack[i] = NULL;
}
return (ret_ins);
@@ -169,7 +169,7 @@ __col_insert_search(WT_INSERT_HEAD *inshead,
* The insert list is a skip list: start at the highest skip level, then
* go as far as possible at each level before stepping down to the next.
*/
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &inshead->head[i]; i >= 0; ) {
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0; ) {
if ((ret_ins = *insp) == NULL) {
next_stack[i] = NULL;
ins_stack[i--] = insp--;
diff --git a/src/include/config.h b/src/include/config.h
index e63db0e76cf..48a255134af 100644
--- a/src/include/config.h
+++ b/src/include/config.h
@@ -85,13 +85,15 @@ struct __wt_config_parser_impl {
#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 33
#define WT_CONFIG_ENTRY_WT_SESSION_verify 34
#define WT_CONFIG_ENTRY_colgroup_meta 35
-#define WT_CONFIG_ENTRY_file_meta 36
-#define WT_CONFIG_ENTRY_index_meta 37
-#define WT_CONFIG_ENTRY_table_meta 38
-#define WT_CONFIG_ENTRY_wiredtiger_open 39
-#define WT_CONFIG_ENTRY_wiredtiger_open_all 40
-#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 41
-#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 42
+#define WT_CONFIG_ENTRY_file_config 36
+#define WT_CONFIG_ENTRY_file_meta 37
+#define WT_CONFIG_ENTRY_index_meta 38
+#define WT_CONFIG_ENTRY_lsm_meta 39
+#define WT_CONFIG_ENTRY_table_meta 40
+#define WT_CONFIG_ENTRY_wiredtiger_open 41
+#define WT_CONFIG_ENTRY_wiredtiger_open_all 42
+#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 43
+#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 44
/*
* configuration section: END
* DO NOT EDIT: automatically built by dist/flags.py.
diff --git a/src/include/connection.h b/src/include/connection.h
index 88797e83ad6..2255056fcf6 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -175,6 +175,7 @@ struct __wt_connection_impl {
WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */
WT_SPINLOCK dhandle_lock; /* Data handle list spinlock */
WT_SPINLOCK fh_lock; /* File handle queue spinlock */
+ WT_SPINLOCK metadata_lock; /* Metadata update spinlock */
WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */
WT_SPINLOCK schema_lock; /* Schema operation spinlock */
WT_SPINLOCK table_lock; /* Table creation spinlock */
@@ -298,9 +299,10 @@ struct __wt_connection_impl {
#define WT_CONN_STAT_ALL 0x01 /* "all" statistics configured */
#define WT_CONN_STAT_CLEAR 0x02 /* clear after gathering */
#define WT_CONN_STAT_FAST 0x04 /* "fast" statistics configured */
-#define WT_CONN_STAT_NONE 0x08 /* don't gather statistics */
-#define WT_CONN_STAT_ON_CLOSE 0x10 /* output statistics on close */
-#define WT_CONN_STAT_SIZE 0x20 /* "size" statistics configured */
+#define WT_CONN_STAT_JSON 0x08 /* output JSON format */
+#define WT_CONN_STAT_NONE 0x10 /* don't gather statistics */
+#define WT_CONN_STAT_ON_CLOSE 0x20 /* output statistics on close */
+#define WT_CONN_STAT_SIZE 0x40 /* "size" statistics configured */
uint32_t stat_flags;
/* Connection statistics */
diff --git a/src/include/cursor.h b/src/include/cursor.h
index 7f7b5dceb79..4b35daf106e 100644
--- a/src/include/cursor.h
+++ b/src/include/cursor.h
@@ -31,22 +31,22 @@
NULL, /* uri */ \
NULL, /* key_format */ \
NULL, /* value_format */ \
- (int (*)(WT_CURSOR *, ...))(get_key), \
- (int (*)(WT_CURSOR *, ...))(get_value), \
- (void (*)(WT_CURSOR *, ...))(set_key), \
- (void (*)(WT_CURSOR *, ...))(set_value), \
- (int (*)(WT_CURSOR *, WT_CURSOR *, int *))(compare), \
- (int (*)(WT_CURSOR *, WT_CURSOR *, int *))(equals), \
+ get_key, \
+ get_value, \
+ set_key, \
+ set_value, \
+ compare, \
+ equals, \
next, \
prev, \
reset, \
search, \
- (int (*)(WT_CURSOR *, int *))(search_near), \
+ search_near, \
insert, \
update, \
remove, \
close, \
- (int (*)(WT_CURSOR *, const char *))(reconfigure), \
+ reconfigure, \
{ NULL, NULL }, /* TAILQ_ENTRY q */ \
0, /* recno key */ \
{ 0 }, /* recno raw buffer */ \
@@ -213,10 +213,11 @@ struct __wt_cursor_btree {
#define WT_CBT_NO_TXN 0x10 /* Non-transactional cursor
(e.g. on a checkpoint) */
#define WT_CBT_SEARCH_SMALLEST 0x20 /* Row-store: small-key insert list */
+#define WT_CBT_VAR_ONPAGE_MATCH 0x40 /* Var-store: on-page recno match */
#define WT_CBT_POSITION_MASK /* Flags associated with position */ \
(WT_CBT_ITERATE_APPEND | WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV | \
- WT_CBT_SEARCH_SMALLEST)
+ WT_CBT_SEARCH_SMALLEST | WT_CBT_VAR_ONPAGE_MATCH)
uint8_t flags;
};
@@ -287,9 +288,12 @@ struct __wt_cursor_join_iter {
WT_SESSION_IMPL *session;
WT_CURSOR_JOIN *cjoin;
WT_CURSOR_JOIN_ENTRY *entry;
- WT_CURSOR *cursor;
- WT_ITEM *curkey;
- bool advance;
+ WT_CURSOR *cursor; /* has null projection */
+ WT_CURSOR *main; /* main table with projection */
+ WT_ITEM *curkey; /* primary key */
+ WT_ITEM idxkey;
+ bool positioned;
+ bool isequal; /* advancing means we're done */
};
struct __wt_cursor_join_endpoint {
@@ -302,14 +306,18 @@ struct __wt_cursor_join_endpoint {
#define WT_CURJOIN_END_GT 0x04 /* include values > cursor */
#define WT_CURJOIN_END_GE (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ)
#define WT_CURJOIN_END_LE (WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ)
-#define WT_CURJOIN_END_OWN_KEY 0x08 /* must free key's data */
+#define WT_CURJOIN_END_OWN_CURSOR 0x08 /* must close cursor */
uint8_t flags; /* range for this endpoint */
};
+#define WT_CURJOIN_END_RANGE(endp) \
+ ((endp)->flags & \
+ (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ | WT_CURJOIN_END_LT))
struct __wt_cursor_join_entry {
WT_INDEX *index;
WT_CURSOR *main; /* raw main table cursor */
WT_BLOOM *bloom; /* Bloom filter handle */
+ char *repack_format; /* target format for repack */
uint32_t bloom_bit_count; /* bits per item in bloom */
uint32_t bloom_hash_count; /* hash functions in bloom */
uint64_t count; /* approx number of matches */
diff --git a/src/include/extern.h b/src/include/extern.h
index 1999ff6b732..48c52d4a109 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -168,7 +168,7 @@ extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing);
extern int __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref);
extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref);
extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst);
-extern int __wt_cache_op(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_CACHE_OP op);
+extern int __wt_cache_op(WT_SESSION_IMPL *session, WT_CACHE_OP op);
extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, size_t size, bool empty_page_ok);
@@ -190,7 +190,7 @@ extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, WT_ITEM *key, u_int s
extern int __wt_update_alloc( WT_SESSION_IMPL *session, WT_ITEM *value, WT_UPDATE **updp, size_t *sizep);
extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
-extern int __wt_search_insert( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key);
+extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key);
extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert);
extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt);
extern int __wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt);
@@ -252,9 +252,7 @@ extern int __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize);
extern int __wt_conn_dhandle_find( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint);
extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force);
extern int __wt_conn_btree_open( WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags);
-extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, bool apply_checkpoints, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
-extern int __wt_conn_btree_apply_single_ckpt(WT_SESSION_IMPL *session, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
-extern int __wt_conn_btree_apply_single(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
+extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]);
extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *uri, bool force);
extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session, bool final, bool force);
extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session);
@@ -278,7 +276,6 @@ extern int __wt_sweep_create(WT_SESSION_IMPL *session);
extern int __wt_sweep_destroy(WT_SESSION_IMPL *session);
extern int __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp);
extern int __wt_backup_file_remove(WT_SESSION_IMPL *session);
-extern int __wt_backup_list_uri_append( WT_SESSION_IMPL *session, const char *name, bool *skip);
extern int __wt_curbulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool bitmap, bool skip_sort_check);
extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp);
extern int __wt_curds_open( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp);
@@ -300,12 +297,20 @@ extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const c
extern ssize_t __wt_json_strlen(const char *src, size_t srclen);
extern int __wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen);
extern int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp);
+extern int __wt_schema_create_final( WT_SESSION_IMPL *session, char *cfg_arg[], char **value_ret);
extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
extern void __wt_curstat_dsrc_final(WT_CURSOR_STAT *cst);
extern int __wt_curstat_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *curjoin, const char *cfg[], WT_CURSOR_STAT *cst);
extern int __wt_curstat_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp);
-extern int __wt_cursor_notsup(WT_CURSOR *cursor);
extern int __wt_cursor_noop(WT_CURSOR *cursor);
+extern int __wt_cursor_notsup(WT_CURSOR *cursor);
+extern int __wt_cursor_get_value_notsup(WT_CURSOR *cursor, ...);
+extern void __wt_cursor_set_key_notsup(WT_CURSOR *cursor, ...);
+extern void __wt_cursor_set_value_notsup(WT_CURSOR *cursor, ...);
+extern int __wt_cursor_compare_notsup(WT_CURSOR *a, WT_CURSOR *b, int *cmpp);
+extern int __wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp);
+extern int __wt_cursor_search_near_notsup(WT_CURSOR *cursor, int *exact);
+extern int __wt_cursor_reconfigure_notsup(WT_CURSOR *cursor, const char *config);
extern void __wt_cursor_set_notsup(WT_CURSOR *cursor);
extern int __wt_cursor_kv_not_set(WT_CURSOR *cursor, bool key);
extern int __wt_cursor_get_key(WT_CURSOR *cursor, ...);
@@ -337,7 +342,7 @@ extern void __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref);
extern int __wt_evict_server_wake(WT_SESSION_IMPL *session);
extern int __wt_evict_create(WT_SESSION_IMPL *session);
extern int __wt_evict_destroy(WT_SESSION_IMPL *session);
-extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp);
+extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session);
extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session);
extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full);
extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v);
@@ -360,7 +365,7 @@ extern int __wt_log_open(WT_SESSION_IMPL *session);
extern int __wt_log_close(WT_SESSION_IMPL *session);
extern int __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep);
extern int __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, WT_LSN *next_lsnp, void *cookie, int firstrecord), void *cookie);
-extern int __wt_log_force_write(WT_SESSION_IMPL *session, bool retry);
+extern int __wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work);
extern int __wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags);
extern int __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap);
extern int __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags);
@@ -441,7 +446,7 @@ extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree);
extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk);
extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree);
extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args);
-extern int __wt_meta_btree_apply(WT_SESSION_IMPL *session, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
+extern int __wt_meta_apply_all(WT_SESSION_IMPL *session, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]);
extern int __wt_meta_checkpoint(WT_SESSION_IMPL *session, const char *fname, const char *checkpoint, WT_CKPT *ckpt);
extern int __wt_meta_checkpoint_last_name( WT_SESSION_IMPL *session, const char *fname, const char **namep);
extern int __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname);
@@ -481,7 +486,9 @@ extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **va
extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value);
extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp);
+extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp);
extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
+extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp);
extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg);
@@ -490,6 +497,7 @@ extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp
extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret);
extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh);
extern int __wt_errno(void);
+extern int __wt_map_error_rdonly(int error);
extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen);
extern int __wt_exist(WT_SESSION_IMPL *session, const char *filename, bool *existp);
extern void __wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh);
@@ -552,8 +560,18 @@ extern int __wt_struct_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v);
extern int __wt_struct_size(WT_SESSION_IMPL *session, size_t *sizep, const char *fmt, ...);
extern int __wt_struct_pack(WT_SESSION_IMPL *session, void *buffer, size_t size, const char *fmt, ...);
extern int __wt_struct_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, ...);
-extern int __wt_struct_unpack_size(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, size_t *resultp);
-extern int __wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt, const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf, void **reallocp);
+extern int __wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt, const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf);
+extern int __wt_ext_pack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format, void *buffer, size_t size, WT_PACK_STREAM **psp);
+extern int __wt_ext_unpack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format, const void *buffer, size_t size, WT_PACK_STREAM **psp);
+extern int __wt_ext_pack_close(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, size_t *usedp);
+extern int __wt_ext_pack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item);
+extern int __wt_ext_pack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t i);
+extern int __wt_ext_pack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char *s);
+extern int __wt_ext_pack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t u);
+extern int __wt_ext_unpack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item);
+extern int __wt_ext_unpack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t *ip);
+extern int __wt_ext_unpack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char **sp);
+extern int __wt_ext_unpack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t *up);
extern int __wt_ovfl_discard_add(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell);
extern void __wt_ovfl_discard_free(WT_SESSION_IMPL *session, WT_PAGE *page);
extern int __wt_ovfl_reuse_search(WT_SESSION_IMPL *session, WT_PAGE *page, uint8_t **addrp, size_t *addr_sizep, const void *value, size_t value_size);
@@ -572,7 +590,6 @@ extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
extern int __wt_bulk_insert_fix( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted);
extern int __wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk);
extern int __wt_bulk_insert_var( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted);
-extern int __wt_schema_create_strip(WT_SESSION_IMPL *session, const char *v1, const char *v2, char **value_ret);
extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg, const char *config_name, uint32_t *allocsizep);
extern int __wt_schema_colgroup_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf);
extern int __wt_schema_index_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, const char *config, WT_ITEM *buf);
@@ -612,6 +629,7 @@ extern WT_DATA_SOURCE *__wt_schema_get_source(WT_SESSION_IMPL *session, const ch
extern int __wt_str_name_check(WT_SESSION_IMPL *session, const char *str);
extern int __wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len);
extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags);
+extern int __wt_session_notsup(WT_SESSION *wt_session);
extern int __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers);
extern int __wt_session_copy_values(WT_SESSION_IMPL *session);
extern int __wt_session_release_resources(WT_SESSION_IMPL *session);
@@ -621,8 +639,8 @@ extern int __wt_session_drop(WT_SESSION_IMPL *session, const char *uri, const ch
extern int __wt_session_range_truncate(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop);
extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp);
extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp);
-extern int __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp);
extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config);
+extern int __wt_session_compact_readonly( WT_SESSION *wt_session, const char *uri, const char *config);
extern int __wt_session_lock_dhandle( WT_SESSION_IMPL *session, uint32_t flags, bool *is_deadp);
extern int __wt_session_release_btree(WT_SESSION_IMPL *session);
extern int __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], uint32_t flags);
@@ -632,6 +650,11 @@ extern int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *ch
extern int __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]);
extern uint32_t __wt_cksum(const void *chunk, size_t len);
extern void __wt_cksum_init(void);
+extern int __wt_cond_auto_alloc( WT_SESSION_IMPL *session, const char *name, bool is_signalled, uint64_t min, uint64_t max, WT_CONDVAR **condp);
+extern int __wt_cond_auto_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
+extern int __wt_cond_auto_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool *signalled);
+extern int __wt_cond_auto_wait( WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress);
+extern int __wt_cond_auto_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
extern int __wt_decrypt(WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor, size_t skip, WT_ITEM *in, WT_ITEM *out);
extern int __wt_encrypt(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t skip, WT_ITEM *in, WT_ITEM *out);
extern void __wt_encrypt_size(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep);
@@ -731,7 +754,7 @@ extern void __wt_txn_destroy(WT_SESSION_IMPL *session);
extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_global_destroy(WT_SESSION_IMPL *session);
extern int __wt_checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len);
-extern int __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]);
+extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]);
diff --git a/src/include/flags.h b/src/include/flags.h
index 24fae4abccd..a6f42a9938f 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -12,13 +12,14 @@
#define WT_CONN_LOG_SERVER_RUN 0x00000080
#define WT_CONN_LSM_MERGE 0x00000100
#define WT_CONN_PANIC 0x00000200
-#define WT_CONN_SERVER_ASYNC 0x00000400
-#define WT_CONN_SERVER_CHECKPOINT 0x00000800
-#define WT_CONN_SERVER_LSM 0x00001000
-#define WT_CONN_SERVER_RUN 0x00002000
-#define WT_CONN_SERVER_STATISTICS 0x00004000
-#define WT_CONN_SERVER_SWEEP 0x00008000
-#define WT_CONN_WAS_BACKUP 0x00010000
+#define WT_CONN_READONLY 0x00000400
+#define WT_CONN_SERVER_ASYNC 0x00000800
+#define WT_CONN_SERVER_CHECKPOINT 0x00001000
+#define WT_CONN_SERVER_LSM 0x00002000
+#define WT_CONN_SERVER_RUN 0x00004000
+#define WT_CONN_SERVER_STATISTICS 0x00008000
+#define WT_CONN_SERVER_SWEEP 0x00010000
+#define WT_CONN_WAS_BACKUP 0x00020000
#define WT_EVICTING 0x00000001
#define WT_EVICT_IN_MEMORY 0x00000002
#define WT_EVICT_LOOKASIDE 0x00000004
@@ -55,20 +56,21 @@
#define WT_SESSION_INTERNAL 0x00000004
#define WT_SESSION_LOCKED_CHECKPOINT 0x00000008
#define WT_SESSION_LOCKED_HANDLE_LIST 0x00000010
-#define WT_SESSION_LOCKED_SCHEMA 0x00000020
-#define WT_SESSION_LOCKED_SLOT 0x00000040
-#define WT_SESSION_LOCKED_TABLE 0x00000080
-#define WT_SESSION_LOCKED_TURTLE 0x00000100
-#define WT_SESSION_LOCK_NO_WAIT 0x00000200
-#define WT_SESSION_LOGGING_INMEM 0x00000400
-#define WT_SESSION_LOOKASIDE_CURSOR 0x00000800
-#define WT_SESSION_NO_CACHE 0x00001000
-#define WT_SESSION_NO_DATA_HANDLES 0x00002000
-#define WT_SESSION_NO_EVICTION 0x00004000
-#define WT_SESSION_NO_LOGGING 0x00008000
-#define WT_SESSION_NO_SCHEMA_LOCK 0x00010000
-#define WT_SESSION_QUIET_CORRUPT_FILE 0x00020000
-#define WT_SESSION_SERVER_ASYNC 0x00040000
+#define WT_SESSION_LOCKED_METADATA 0x00000020
+#define WT_SESSION_LOCKED_SCHEMA 0x00000040
+#define WT_SESSION_LOCKED_SLOT 0x00000080
+#define WT_SESSION_LOCKED_TABLE 0x00000100
+#define WT_SESSION_LOCKED_TURTLE 0x00000200
+#define WT_SESSION_LOCK_NO_WAIT 0x00000400
+#define WT_SESSION_LOGGING_INMEM 0x00000800
+#define WT_SESSION_LOOKASIDE_CURSOR 0x00001000
+#define WT_SESSION_NO_CACHE 0x00002000
+#define WT_SESSION_NO_DATA_HANDLES 0x00004000
+#define WT_SESSION_NO_EVICTION 0x00008000
+#define WT_SESSION_NO_LOGGING 0x00010000
+#define WT_SESSION_NO_SCHEMA_LOCK 0x00020000
+#define WT_SESSION_QUIET_CORRUPT_FILE 0x00040000
+#define WT_SESSION_SERVER_ASYNC 0x00080000
#define WT_TXN_LOG_CKPT_CLEANUP 0x00000001
#define WT_TXN_LOG_CKPT_PREPARE 0x00000002
#define WT_TXN_LOG_CKPT_START 0x00000004
diff --git a/src/include/gcc.h b/src/include/gcc.h
index 6ccc0de3c03..ce6afdd6e9c 100644
--- a/src/include/gcc.h
+++ b/src/include/gcc.h
@@ -6,6 +6,7 @@
* See the file LICENSE for redistribution information.
*/
+#define WT_PTRDIFFT_FMT "td" /* ptrdiff_t format string */
#define WT_SIZET_FMT "zu" /* size_t format string */
/* Add GCC-specific attributes to types and function declarations. */
diff --git a/src/include/lint.h b/src/include/lint.h
index f8b17022968..1b64186cbab 100644
--- a/src/include/lint.h
+++ b/src/include/lint.h
@@ -6,6 +6,7 @@
* See the file LICENSE for redistribution information.
*/
+#define WT_PTRDIFFT_FMT "td" /* ptrdiff_t format string */
#define WT_SIZET_FMT "zu" /* size_t format string */
#define WT_COMPILER_TYPE_ALIGN(x)
diff --git a/src/include/lsm.h b/src/include/lsm.h
index 7cb3ccc895d..444073087df 100644
--- a/src/include/lsm.h
+++ b/src/include/lsm.h
@@ -179,7 +179,7 @@ struct __wt_lsm_tree {
int collator_owned;
uint32_t refcnt; /* Number of users of the tree */
- uint8_t exclusive; /* Tree is locked exclusively */
+ WT_SESSION_IMPL *excl_session; /* Session has exclusive lock */
#define LSM_TREE_MAX_QUEUE 100
uint32_t queue_ref;
@@ -215,7 +215,7 @@ struct __wt_lsm_tree {
size_t chunk_alloc; /* Space allocated for chunks */
uint32_t nchunks; /* Number of active chunks */
uint32_t last; /* Last allocated ID */
- int modified; /* Have there been updates? */
+ bool modified; /* Have there been updates? */
WT_LSM_CHUNK **old_chunks; /* Array of old LSM chunks */
size_t old_alloc; /* Space allocated for old chunks */
@@ -242,13 +242,18 @@ struct __wt_lsm_tree {
int64_t lsm_lookup_no_bloom;
int64_t lsm_merge_throttle;
-#define WT_LSM_TREE_ACTIVE 0x01 /* Workers are active */
-#define WT_LSM_TREE_AGGRESSIVE_TIMER 0x02 /* Timer for merge aggression */
-#define WT_LSM_TREE_COMPACTING 0x04 /* Tree being compacted */
-#define WT_LSM_TREE_MERGES 0x08 /* Tree should run merges */
-#define WT_LSM_TREE_NEED_SWITCH 0x10 /* New chunk needs creating */
-#define WT_LSM_TREE_OPEN 0x20 /* The tree is open */
-#define WT_LSM_TREE_THROTTLE 0x40 /* Throttle updates */
+ /*
+ * The tree is open for business. This used to be a flag, but it is
+ * susceptible to races.
+ */
+ bool active;
+
+#define WT_LSM_TREE_AGGRESSIVE_TIMER 0x01 /* Timer for merge aggression */
+#define WT_LSM_TREE_COMPACTING 0x02 /* Tree being compacted */
+#define WT_LSM_TREE_MERGES 0x04 /* Tree should run merges */
+#define WT_LSM_TREE_NEED_SWITCH 0x08 /* New chunk needs creating */
+#define WT_LSM_TREE_OPEN 0x10 /* The tree is open */
+#define WT_LSM_TREE_THROTTLE 0x20 /* Throttle updates */
uint32_t flags;
};
diff --git a/src/include/meta.h b/src/include/meta.h
index d61022c0c44..ac0f5fedac4 100644
--- a/src/include/meta.h
+++ b/src/include/meta.h
@@ -21,6 +21,7 @@
#define WT_METADATA_TURTLE_SET "WiredTiger.turtle.set" /* Turtle temp file */
#define WT_METADATA_URI "metadata:" /* Metadata alias */
+#define WT_METAFILE "WiredTiger.wt" /* Metadata table */
#define WT_METAFILE_URI "file:WiredTiger.wt" /* Metadata table URI */
#define WT_LAS_URI "file:WiredTigerLAS.wt" /* Lookaside table URI*/
diff --git a/src/include/misc.h b/src/include/misc.h
index 5dadb1b1484..07d52c61eac 100644
--- a/src/include/misc.h
+++ b/src/include/misc.h
@@ -198,13 +198,9 @@
/* Check if a string matches a prefix. */
#define WT_PREFIX_MATCH(str, pfx) \
- (((const char *)str)[0] == ((const char *)pfx)[0] && \
+ (((const char *)(str))[0] == ((const char *)pfx)[0] && \
strncmp((str), (pfx), strlen(pfx)) == 0)
-/* Check if a non-nul-terminated string matches a prefix. */
-#define WT_PREFIX_MATCH_LEN(str, len, pfx) \
- ((len) >= strlen(pfx) && WT_PREFIX_MATCH(str, pfx))
-
/* Check if a string matches a prefix, and move past it. */
#define WT_PREFIX_SKIP(str, pfx) \
(WT_PREFIX_MATCH(str, pfx) ? ((str) += strlen(pfx), 1) : 0)
diff --git a/src/include/msvc.h b/src/include/msvc.h
index 99260a44875..d5be5bd8c60 100644
--- a/src/include/msvc.h
+++ b/src/include/msvc.h
@@ -13,6 +13,7 @@
#define inline __inline
+#define WT_PTRDIFFT_FMT "Id" /* ptrdiff_t format string */
#define WT_SIZET_FMT "Iu" /* size_t format string */
/*
diff --git a/src/include/mutex.h b/src/include/mutex.h
index f798bfb3ece..04679884930 100644
--- a/src/include/mutex.h
+++ b/src/include/mutex.h
@@ -20,6 +20,13 @@ struct __wt_condvar {
int waiters; /* Numbers of waiters, or
-1 if signalled with no waiters. */
+ /*
+ * The following fields are only used for automatically adjusting
+ * condition variables. They could be in a separate structure.
+ */
+ uint64_t min_wait; /* Minimum wait duration */
+ uint64_t max_wait; /* Maximum wait duration */
+ uint64_t prev_wait; /* Wait duration used last time */
};
/*
diff --git a/src/include/packing.i b/src/include/packing.i
index 784a55ef2ae..35b2ddc43db 100644
--- a/src/include/packing.i
+++ b/src/include/packing.i
@@ -677,8 +677,8 @@ __wt_struct_unpackv(WT_SESSION_IMPL *session,
if (fmt[0] != '\0' && fmt[1] == '\0') {
pv.type = fmt[0];
- if ((ret = __unpack_read(session, &pv, &p, size)) == 0)
- WT_UNPACK_PUT(session, pv, ap);
+ WT_RET(__unpack_read(session, &pv, &p, size));
+ WT_UNPACK_PUT(session, pv, ap);
return (0);
}
diff --git a/src/include/schema.h b/src/include/schema.h
index a51030870c1..f93c596e2ca 100644
--- a/src/include/schema.h
+++ b/src/include/schema.h
@@ -133,6 +133,14 @@ struct __wt_table {
&S2C(session)->dhandle_lock, WT_SESSION_LOCKED_HANDLE_LIST, op)
/*
+ * WT_WITH_METADATA_LOCK --
+ * Acquire the metadata lock, perform an operation, drop the lock.
+ */
+#define WT_WITH_METADATA_LOCK(session, ret, op) \
+ WT_WITH_LOCK(session, ret, \
+ &S2C(session)->metadata_lock, WT_SESSION_LOCKED_METADATA, op)
+
+/*
* WT_WITH_SCHEMA_LOCK --
* Acquire the schema lock, perform an operation, drop the lock.
* Check that we are not already holding some other lock: the schema lock
@@ -166,6 +174,8 @@ struct __wt_table {
*/
#define WT_WITHOUT_LOCKS(session, op) do { \
WT_CONNECTION_IMPL *__conn = S2C(session); \
+ bool __checkpoint_locked = \
+ F_ISSET(session, WT_SESSION_LOCKED_CHECKPOINT); \
bool __handle_locked = \
F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST); \
bool __table_locked = \
@@ -184,7 +194,15 @@ struct __wt_table {
F_CLR(session, WT_SESSION_LOCKED_SCHEMA); \
__wt_spin_unlock(session, &__conn->schema_lock); \
} \
+ if (__checkpoint_locked) { \
+ F_CLR(session, WT_SESSION_LOCKED_CHECKPOINT); \
+ __wt_spin_unlock(session, &__conn->checkpoint_lock); \
+ } \
op; \
+ if (__checkpoint_locked) { \
+ __wt_spin_lock(session, &__conn->checkpoint_lock); \
+ F_SET(session, WT_SESSION_LOCKED_CHECKPOINT); \
+ } \
if (__schema_locked) { \
__wt_spin_lock(session, &__conn->schema_lock); \
F_SET(session, WT_SESSION_LOCKED_SCHEMA); \
diff --git a/src/include/session.h b/src/include/session.h
index 5c3291230b4..7fdb7fc2548 100644
--- a/src/include/session.h
+++ b/src/include/session.h
@@ -126,14 +126,24 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
void *block_manager; /* Block-manager support */
int (*block_manager_cleanup)(WT_SESSION_IMPL *);
- /* Checkpoint support */
- struct {
- WT_DATA_HANDLE *dhandle;
- const char *name;
- } *ckpt_handle; /* Handle list */
+ /* Checkpoint handles */
+ WT_DATA_HANDLE **ckpt_handle; /* Handle list */
u_int ckpt_handle_next; /* Next empty slot */
size_t ckpt_handle_allocated; /* Bytes allocated */
+ /*
+ * Operations acting on handles.
+ *
+ * The preferred pattern is to gather all of the required handles at
+ * the beginning of an operation, then drop any other locks, perform
+ * the operation, then release the handles. This cannot be easily
+ * merged with the list of checkpoint handles because some operations
+ * (such as compact) do checkpoints internally.
+ */
+ WT_DATA_HANDLE **op_handle; /* Handle list */
+ u_int op_handle_next; /* Next empty slot */
+ size_t op_handle_allocated; /* Bytes allocated */
+
void *reconcile; /* Reconciliation support */
int (*reconcile_cleanup)(WT_SESSION_IMPL *);
diff --git a/src/include/stat.h b/src/include/stat.h
index 51d2fa332e7..f9170dc1a79 100644
--- a/src/include/stat.h
+++ b/src/include/stat.h
@@ -227,12 +227,22 @@ __wt_stats_clear(void *stats_arg, int slot)
*/
#define WT_CONNECTION_STATS_BASE 1000
struct __wt_connection_stats {
- int64_t async_alloc_race;
- int64_t async_alloc_view;
+ int64_t lsm_work_queue_app;
+ int64_t lsm_work_queue_manager;
+ int64_t lsm_rows_merged;
+ int64_t lsm_checkpoint_throttle;
+ int64_t lsm_merge_throttle;
+ int64_t lsm_work_queue_switch;
+ int64_t lsm_work_units_discarded;
+ int64_t lsm_work_units_done;
+ int64_t lsm_work_units_created;
+ int64_t lsm_work_queue_max;
int64_t async_cur_queue;
+ int64_t async_max_queue;
+ int64_t async_alloc_race;
int64_t async_flush;
+ int64_t async_alloc_view;
int64_t async_full;
- int64_t async_max_queue;
int64_t async_nowork;
int64_t async_op_alloc;
int64_t async_op_compact;
@@ -240,55 +250,66 @@ struct __wt_connection_stats {
int64_t async_op_remove;
int64_t async_op_search;
int64_t async_op_update;
- int64_t block_byte_map_read;
- int64_t block_byte_read;
- int64_t block_byte_write;
- int64_t block_map_read;
int64_t block_preload;
int64_t block_read;
int64_t block_write;
- int64_t cache_bytes_dirty;
- int64_t cache_bytes_internal;
+ int64_t block_byte_read;
+ int64_t block_byte_write;
+ int64_t block_map_read;
+ int64_t block_byte_map_read;
int64_t cache_bytes_inuse;
- int64_t cache_bytes_leaf;
- int64_t cache_bytes_max;
- int64_t cache_bytes_overflow;
int64_t cache_bytes_read;
int64_t cache_bytes_write;
- int64_t cache_eviction_app;
int64_t cache_eviction_checkpoint;
- int64_t cache_eviction_clean;
- int64_t cache_eviction_deepen;
- int64_t cache_eviction_dirty;
- int64_t cache_eviction_fail;
- int64_t cache_eviction_force;
- int64_t cache_eviction_force_delete;
- int64_t cache_eviction_force_fail;
- int64_t cache_eviction_hazard;
- int64_t cache_eviction_internal;
- int64_t cache_eviction_maximum_page_size;
+ int64_t cache_eviction_aggressive_set;
int64_t cache_eviction_queue_empty;
int64_t cache_eviction_queue_not_empty;
int64_t cache_eviction_server_evicting;
int64_t cache_eviction_server_not_evicting;
int64_t cache_eviction_slow;
- int64_t cache_eviction_split_internal;
- int64_t cache_eviction_split_leaf;
- int64_t cache_eviction_walk;
int64_t cache_eviction_worker_evicting;
- int64_t cache_inmem_split;
+ int64_t cache_eviction_force_fail;
+ int64_t cache_eviction_hazard;
int64_t cache_inmem_splittable;
+ int64_t cache_inmem_split;
+ int64_t cache_eviction_internal;
+ int64_t cache_eviction_split_internal;
+ int64_t cache_eviction_split_leaf;
int64_t cache_lookaside_insert;
int64_t cache_lookaside_remove;
- int64_t cache_overhead;
- int64_t cache_pages_dirty;
+ int64_t cache_bytes_max;
+ int64_t cache_eviction_maximum_page_size;
+ int64_t cache_eviction_dirty;
+ int64_t cache_eviction_deepen;
+ int64_t cache_write_lookaside;
int64_t cache_pages_inuse;
+ int64_t cache_eviction_force;
+ int64_t cache_eviction_force_delete;
+ int64_t cache_eviction_app;
int64_t cache_read;
int64_t cache_read_lookaside;
+ int64_t cache_eviction_fail;
+ int64_t cache_eviction_walk;
int64_t cache_write;
- int64_t cache_write_lookaside;
int64_t cache_write_restore;
+ int64_t cache_overhead;
+ int64_t cache_bytes_internal;
+ int64_t cache_bytes_leaf;
+ int64_t cache_bytes_overflow;
+ int64_t cache_bytes_dirty;
+ int64_t cache_pages_dirty;
+ int64_t cache_eviction_clean;
+ int64_t cond_auto_wait_reset;
+ int64_t cond_auto_wait;
+ int64_t file_open;
+ int64_t memory_allocation;
+ int64_t memory_free;
+ int64_t memory_grow;
int64_t cond_wait;
+ int64_t rwlock_read;
+ int64_t rwlock_write;
+ int64_t read_io;
+ int64_t write_io;
int64_t cursor_create;
int64_t cursor_insert;
int64_t cursor_next;
@@ -298,96 +319,81 @@ struct __wt_connection_stats {
int64_t cursor_restart;
int64_t cursor_search;
int64_t cursor_search_near;
- int64_t cursor_truncate;
int64_t cursor_update;
+ int64_t cursor_truncate;
int64_t dh_conn_handle_count;
- int64_t dh_session_handles;
- int64_t dh_session_sweeps;
- int64_t dh_sweep_close;
int64_t dh_sweep_ref;
+ int64_t dh_sweep_close;
int64_t dh_sweep_remove;
int64_t dh_sweep_tod;
int64_t dh_sweeps;
- int64_t file_open;
- int64_t log_buffer_size;
+ int64_t dh_session_handles;
+ int64_t dh_session_sweeps;
+ int64_t log_slot_switch_busy;
+ int64_t log_slot_closes;
+ int64_t log_slot_races;
+ int64_t log_slot_transitions;
+ int64_t log_slot_joins;
+ int64_t log_slot_unbuffered;
int64_t log_bytes_payload;
int64_t log_bytes_written;
- int64_t log_close_yields;
- int64_t log_compress_len;
- int64_t log_compress_mem;
- int64_t log_compress_small;
- int64_t log_compress_write_fails;
- int64_t log_compress_writes;
+ int64_t log_zero_fills;
int64_t log_flush;
+ int64_t log_force_write;
+ int64_t log_force_write_skip;
+ int64_t log_compress_writes;
+ int64_t log_compress_write_fails;
+ int64_t log_compress_small;
+ int64_t log_release_write_lsn;
+ int64_t log_scans;
+ int64_t log_scan_rereads;
+ int64_t log_write_lsn;
+ int64_t log_write_lsn_skip;
+ int64_t log_sync;
+ int64_t log_sync_dir;
+ int64_t log_writes;
+ int64_t log_slot_consolidated;
int64_t log_max_filesize;
- int64_t log_prealloc_files;
int64_t log_prealloc_max;
int64_t log_prealloc_missed;
+ int64_t log_prealloc_files;
int64_t log_prealloc_used;
- int64_t log_release_write_lsn;
int64_t log_scan_records;
- int64_t log_scan_rereads;
- int64_t log_scans;
- int64_t log_slot_closes;
+ int64_t log_compress_mem;
+ int64_t log_buffer_size;
+ int64_t log_compress_len;
int64_t log_slot_coalesced;
- int64_t log_slot_consolidated;
- int64_t log_slot_joins;
- int64_t log_slot_races;
- int64_t log_slot_switch_busy;
- int64_t log_slot_transitions;
- int64_t log_slot_unbuffered;
- int64_t log_sync;
- int64_t log_sync_dir;
- int64_t log_write_lsn;
- int64_t log_writes;
- int64_t log_zero_fills;
- int64_t lsm_checkpoint_throttle;
- int64_t lsm_merge_throttle;
- int64_t lsm_rows_merged;
- int64_t lsm_work_queue_app;
- int64_t lsm_work_queue_manager;
- int64_t lsm_work_queue_max;
- int64_t lsm_work_queue_switch;
- int64_t lsm_work_units_created;
- int64_t lsm_work_units_discarded;
- int64_t lsm_work_units_done;
- int64_t memory_allocation;
- int64_t memory_free;
- int64_t memory_grow;
- int64_t page_busy_blocked;
- int64_t page_forcible_evict_blocked;
- int64_t page_locked_blocked;
- int64_t page_read_blocked;
- int64_t page_sleep;
- int64_t read_io;
- int64_t rec_page_delete;
+ int64_t log_close_yields;
int64_t rec_page_delete_fast;
int64_t rec_pages;
int64_t rec_pages_eviction;
+ int64_t rec_page_delete;
int64_t rec_split_stashed_bytes;
int64_t rec_split_stashed_objects;
- int64_t rwlock_read;
- int64_t rwlock_write;
int64_t session_cursor_open;
int64_t session_open;
+ int64_t page_busy_blocked;
+ int64_t page_forcible_evict_blocked;
+ int64_t page_locked_blocked;
+ int64_t page_read_blocked;
+ int64_t page_sleep;
+ int64_t txn_snapshots_created;
+ int64_t txn_snapshots_dropped;
int64_t txn_begin;
- int64_t txn_checkpoint;
- int64_t txn_checkpoint_generation;
int64_t txn_checkpoint_running;
+ int64_t txn_checkpoint_generation;
int64_t txn_checkpoint_time_max;
int64_t txn_checkpoint_time_min;
int64_t txn_checkpoint_time_recent;
int64_t txn_checkpoint_time_total;
- int64_t txn_commit;
+ int64_t txn_checkpoint;
int64_t txn_fail_cache;
- int64_t txn_pinned_checkpoint_range;
int64_t txn_pinned_range;
+ int64_t txn_pinned_checkpoint_range;
int64_t txn_pinned_snapshot_range;
- int64_t txn_rollback;
- int64_t txn_snapshots_created;
- int64_t txn_snapshots_dropped;
int64_t txn_sync;
- int64_t write_io;
+ int64_t txn_commit;
+ int64_t txn_rollback;
};
/*
@@ -395,102 +401,102 @@ struct __wt_connection_stats {
*/
#define WT_DSRC_STATS_BASE 2000
struct __wt_dsrc_stats {
- int64_t allocation_size;
- int64_t block_alloc;
- int64_t block_checkpoint_size;
- int64_t block_extension;
- int64_t block_free;
- int64_t block_magic;
- int64_t block_major;
- int64_t block_minor;
- int64_t block_reuse_bytes;
- int64_t block_size;
- int64_t bloom_count;
int64_t bloom_false_positive;
int64_t bloom_hit;
int64_t bloom_miss;
int64_t bloom_page_evict;
int64_t bloom_page_read;
+ int64_t bloom_count;
+ int64_t lsm_chunk_count;
+ int64_t lsm_generation_max;
+ int64_t lsm_lookup_no_bloom;
+ int64_t lsm_checkpoint_throttle;
+ int64_t lsm_merge_throttle;
int64_t bloom_size;
+ int64_t block_extension;
+ int64_t block_alloc;
+ int64_t block_free;
+ int64_t block_checkpoint_size;
+ int64_t allocation_size;
+ int64_t block_reuse_bytes;
+ int64_t block_magic;
+ int64_t block_major;
+ int64_t block_size;
+ int64_t block_minor;
int64_t btree_checkpoint_generation;
- int64_t btree_column_deleted;
int64_t btree_column_fix;
int64_t btree_column_internal;
int64_t btree_column_rle;
+ int64_t btree_column_deleted;
int64_t btree_column_variable;
- int64_t btree_compact_rewrite;
- int64_t btree_entries;
int64_t btree_fixed_len;
- int64_t btree_maximum_depth;
int64_t btree_maxintlkey;
int64_t btree_maxintlpage;
int64_t btree_maxleafkey;
int64_t btree_maxleafpage;
int64_t btree_maxleafvalue;
+ int64_t btree_maximum_depth;
+ int64_t btree_entries;
int64_t btree_overflow;
+ int64_t btree_compact_rewrite;
int64_t btree_row_internal;
int64_t btree_row_leaf;
int64_t cache_bytes_read;
int64_t cache_bytes_write;
int64_t cache_eviction_checkpoint;
- int64_t cache_eviction_clean;
- int64_t cache_eviction_deepen;
- int64_t cache_eviction_dirty;
int64_t cache_eviction_fail;
int64_t cache_eviction_hazard;
+ int64_t cache_inmem_splittable;
+ int64_t cache_inmem_split;
int64_t cache_eviction_internal;
int64_t cache_eviction_split_internal;
int64_t cache_eviction_split_leaf;
- int64_t cache_inmem_split;
- int64_t cache_inmem_splittable;
+ int64_t cache_eviction_dirty;
+ int64_t cache_read_overflow;
int64_t cache_overflow_value;
+ int64_t cache_eviction_deepen;
+ int64_t cache_write_lookaside;
int64_t cache_read;
int64_t cache_read_lookaside;
- int64_t cache_read_overflow;
int64_t cache_write;
- int64_t cache_write_lookaside;
int64_t cache_write_restore;
- int64_t compress_raw_fail;
- int64_t compress_raw_fail_temporary;
- int64_t compress_raw_ok;
+ int64_t cache_eviction_clean;
int64_t compress_read;
int64_t compress_write;
int64_t compress_write_fail;
int64_t compress_write_too_small;
- int64_t cursor_create;
- int64_t cursor_insert;
+ int64_t compress_raw_fail_temporary;
+ int64_t compress_raw_fail;
+ int64_t compress_raw_ok;
int64_t cursor_insert_bulk;
+ int64_t cursor_create;
int64_t cursor_insert_bytes;
+ int64_t cursor_remove_bytes;
+ int64_t cursor_update_bytes;
+ int64_t cursor_insert;
int64_t cursor_next;
int64_t cursor_prev;
int64_t cursor_remove;
- int64_t cursor_remove_bytes;
int64_t cursor_reset;
int64_t cursor_restart;
int64_t cursor_search;
int64_t cursor_search_near;
int64_t cursor_truncate;
int64_t cursor_update;
- int64_t cursor_update_bytes;
- int64_t lsm_checkpoint_throttle;
- int64_t lsm_chunk_count;
- int64_t lsm_generation_max;
- int64_t lsm_lookup_no_bloom;
- int64_t lsm_merge_throttle;
int64_t rec_dictionary;
+ int64_t rec_page_delete_fast;
+ int64_t rec_suffix_compression;
int64_t rec_multiblock_internal;
- int64_t rec_multiblock_leaf;
- int64_t rec_multiblock_max;
int64_t rec_overflow_key_internal;
+ int64_t rec_prefix_compression;
+ int64_t rec_multiblock_leaf;
int64_t rec_overflow_key_leaf;
+ int64_t rec_multiblock_max;
int64_t rec_overflow_value;
- int64_t rec_page_delete;
- int64_t rec_page_delete_fast;
int64_t rec_page_match;
int64_t rec_pages;
int64_t rec_pages_eviction;
- int64_t rec_prefix_compression;
- int64_t rec_suffix_compression;
+ int64_t rec_page_delete;
int64_t session_compact;
int64_t session_cursor_open;
int64_t txn_update_conflict;
diff --git a/src/include/txn.i b/src/include/txn.i
index 46f2ff3e5f1..40e2a6175d6 100644
--- a/src/include/txn.i
+++ b/src/include/txn.i
@@ -266,6 +266,8 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
}
F_SET(txn, WT_TXN_RUNNING);
+ if (F_ISSET(S2C(session), WT_CONN_READONLY))
+ F_SET(txn, WT_TXN_READONLY);
return (false);
}
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 767c176b53f..1e263f22880 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -828,7 +828,8 @@ struct __wt_session {
* @snippet ex_all.c Display an error thread safe
*
* @param session the session handle
- * @param error a return value from a WiredTiger function
+ * @param error a return value from a WiredTiger, ISO C, or POSIX
+ * standard API
* @returns a string representation of the error
*/
const char *__F(strerror)(WT_SESSION *session, int error);
@@ -873,7 +874,7 @@ struct __wt_session {
* updates). See @ref data_sources for more information.
* <br>
* @copydoc doc_cursor_types
- * @param to_dup a cursor to duplicate
+ * @param to_dup a cursor to duplicate or gather statistics on
* @configstart{WT_SESSION.open_cursor, see dist/api_data.py}
* @config{append, append the value as a new record\, creating a new
* record number key; valid only for cursors with record number keys., a
@@ -1409,7 +1410,7 @@ struct __wt_session {
* if <code>NULL</code>, the truncate continues to the end of the
* object
* @configempty{WT_SESSION.truncate, see dist/api_data.py}
- * @ebusy_errors
+ * @errors
*/
int __F(truncate)(WT_SESSION *session,
const char *name,
@@ -1893,8 +1894,10 @@ struct __wt_connection {
* information. Enabling the statistics log server uses a session from
* the configured session_max., a set of related configuration options
* defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;on_close, log
- * statistics on database close., a boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;json, encode
+ * statistics in JSON format., a boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;on_close, log statistics on database
+ * close., a boolean flag; default \c false.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the pathname to a file into
* which the log records are written\, may contain ISO C standard
* strftime conversion specifications. If the value is not an absolute
@@ -1908,7 +1911,8 @@ struct __wt_connection {
* empty.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;timestamp, a timestamp
* prepended to each log record\, may contain strftime conversion
- * specifications., a string; default \c "%b %d %H:%M:%S".}
+ * specifications\, when \c json is configured\, defaults to \c
+ * "%FT%Y.000Z"., a string; default \c "%b %d %H:%M:%S".}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;wait, seconds to wait between each
* write of the log records; setting this value above 0 configures
* statistics logging., an integer between 0 and 100000; default \c 0.}
@@ -1982,7 +1986,8 @@ struct __wt_connection {
*
* @param connection the connection handle
* @param errhandler An error handler. If <code>NULL</code>, the
- * connection's error handler is used
+ * connection's error handler is used. See @ref error_handling_event
+ * for more information.
* @configstart{WT_CONNECTION.open_session, see dist/api_data.py}
* @config{isolation, the default isolation level for operations in this
* session., a string\, chosen from the following options: \c
@@ -2143,7 +2148,8 @@ struct __wt_connection {
* @param home The path to the database home directory. See @ref home
* for more information.
* @param errhandler An error handler. If <code>NULL</code>, a builtin error
- * handler is installed that writes error messages to stderr
+ * handler is installed that writes error messages to stderr. See
+ * @ref error_handling_event for more information.
* @configstart{wiredtiger_open, see dist/api_data.py}
* @config{async = (, asynchronous operations configuration options., a set of
* related configuration options defined below.}
@@ -2326,6 +2332,9 @@ struct __wt_connection {
* start an RPC server for primary processes and use RPC for secondary
* processes). <b>Not yet supported in WiredTiger</b>., a boolean flag; default
* \c false.}
+ * @config{readonly, open connection in read-only mode. The database must
+ * exist. All methods that may modify a database are disabled. See @ref
+ * readonly for more information., a boolean flag; default \c false.}
* @config{session_max, maximum expected number of sessions (including server
* threads)., an integer greater than or equal to 1; default \c 100.}
* @config{shared_cache = (, shared cache configuration options. A database
@@ -2363,23 +2372,26 @@ struct __wt_connection {
* maintain\, to a file. See @ref statistics for more information. Enabling
* the statistics log server uses a session from the configured session_max., a
* set of related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;on_close, log statistics on database close.,
- * a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the
- * pathname to a file into which the log records are written\, may contain ISO C
- * standard strftime conversion specifications. If the value is not an absolute
- * path name\, the file is created relative to the database home., a string;
- * default \c "WiredTigerStat.%d.%H".}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;sources,
- * if non-empty\, include statistics for the list of data source URIs\, if they
- * are open at the time of the statistics logging. The list may include URIs
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;json, encode statistics in JSON format., a
+ * boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;on_close,
+ * log statistics on database close., a boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the pathname to a file into which the
+ * log records are written\, may contain ISO C standard strftime conversion
+ * specifications. If the value is not an absolute path name\, the file is
+ * created relative to the database home., a string; default \c
+ * "WiredTigerStat.%d.%H".}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;sources, if
+ * non-empty\, include statistics for the list of data source URIs\, if they are
+ * open at the time of the statistics logging. The list may include URIs
* matching a single data source ("table:mytable")\, or a URI matching all data
* sources of a particular type ("table:")., a list of strings; default empty.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;timestamp, a timestamp prepended to each log
- * record\, may contain strftime conversion specifications., a string; default
- * \c "%b %d %H:%M:%S".}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;wait, seconds to wait
- * between each write of the log records; setting this value above 0 configures
+ * record\, may contain strftime conversion specifications\, when \c json is
+ * configured\, defaults to \c "%FT%Y.000Z"., a string; default \c "%b %d
+ * %H:%M:%S".}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;wait, seconds to wait between
+ * each write of the log records; setting this value above 0 configures
* statistics logging., an integer between 0 and 100000; default \c 0.}
* @config{
* ),,}
@@ -2431,11 +2443,12 @@ int wiredtiger_open(const char *home,
WT_CONNECTION **connectionp);
/*!
- * Return information about a WiredTiger error as a string, not thread-safe.
+ * Return information about a WiredTiger error as a string (see
+ * WT_SESSION::strerror for a thread-safe API).
*
* @snippet ex_all.c Display an error
*
- * @param error a return value from a WiredTiger call
+ * @param error a return value from a WiredTiger, ISO C, or POSIX standard API
* @returns a string representation of the error
*/
const char *wiredtiger_strerror(int error);
@@ -2474,7 +2487,7 @@ struct __wt_async_callback {
struct __wt_event_handler {
/*!
* Callback to handle error messages; by default, error messages are
- * written to the stderr stream.
+ * written to the stderr stream. See @ref error_handling.
*
* Errors that require the application to exit and restart will have
* their \c error value set to \c WT_PANIC. The application can exit
@@ -2488,8 +2501,9 @@ struct __wt_event_handler {
* @param session the WiredTiger session handle in use when the error
* was generated. The handle may have been created by the application
* or automatically by WiredTiger.
- * @param error a WiredTiger, C99 or POSIX error code, which can
- * be converted to a string using ::wiredtiger_strerror
+ * @param error a return value from a WiredTiger, ISO C, or
+ * POSIX standard API, which can be converted to a string using
+ * WT_SESSION::strerror
* @param message an error string
*/
int (*handle_error)(WT_EVENT_HANDLER *handler,
@@ -2497,7 +2511,7 @@ struct __wt_event_handler {
/*!
* Callback to handle informational messages; by default, informational
- * messages are written to the stdout stream.
+ * messages are written to the stdout stream. See @ref error_handling.
*
* Message handler returns are not ignored: if the handler returns
* non-zero, the error may cause the WiredTiger function posting the
@@ -2513,7 +2527,7 @@ struct __wt_event_handler {
/*!
* Callback to handle progress messages; by default, no progress
- * messages are written.
+ * messages are written. See @ref error_handling.
*
* Progress handler returns are not ignored: if the handler returns
* non-zero, the error may cause the WiredTiger function posting the
@@ -2998,6 +3012,10 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp);
*/
#define WT_CACHE_FULL -31807
/*! @endcond */
+/*! @cond internal */
+/*! Permission denied (internal). */
+#define WT_PERM_DENIED -31808
+/*! @endcond */
/*
* Error return section: END
* DO NOT EDIT: automatically built by dist/api_err.py.
@@ -3688,329 +3706,341 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
* keys. See @ref data_statistics for more information.
* @{
*/
-/*! async: number of allocation state races */
-#define WT_STAT_CONN_ASYNC_ALLOC_RACE 1000
-/*! async: number of operation slots viewed for allocation */
-#define WT_STAT_CONN_ASYNC_ALLOC_VIEW 1001
+/*! LSM: application work units currently queued */
+#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1000
+/*! LSM: merge work units currently queued */
+#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1001
+/*! LSM: rows merged in an LSM tree */
+#define WT_STAT_CONN_LSM_ROWS_MERGED 1002
+/*! LSM: sleep for LSM checkpoint throttle */
+#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1003
+/*! LSM: sleep for LSM merge throttle */
+#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1004
+/*! LSM: switch work units currently queued */
+#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1005
+/*! LSM: tree maintenance operations discarded */
+#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1006
+/*! LSM: tree maintenance operations executed */
+#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1007
+/*! LSM: tree maintenance operations scheduled */
+#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1008
+/*! LSM: tree queue hit maximum */
+#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1009
/*! async: current work queue length */
-#define WT_STAT_CONN_ASYNC_CUR_QUEUE 1002
+#define WT_STAT_CONN_ASYNC_CUR_QUEUE 1010
+/*! async: maximum work queue length */
+#define WT_STAT_CONN_ASYNC_MAX_QUEUE 1011
+/*! async: number of allocation state races */
+#define WT_STAT_CONN_ASYNC_ALLOC_RACE 1012
/*! async: number of flush calls */
-#define WT_STAT_CONN_ASYNC_FLUSH 1003
+#define WT_STAT_CONN_ASYNC_FLUSH 1013
+/*! async: number of operation slots viewed for allocation */
+#define WT_STAT_CONN_ASYNC_ALLOC_VIEW 1014
/*! async: number of times operation allocation failed */
-#define WT_STAT_CONN_ASYNC_FULL 1004
-/*! async: maximum work queue length */
-#define WT_STAT_CONN_ASYNC_MAX_QUEUE 1005
+#define WT_STAT_CONN_ASYNC_FULL 1015
/*! async: number of times worker found no work */
-#define WT_STAT_CONN_ASYNC_NOWORK 1006
+#define WT_STAT_CONN_ASYNC_NOWORK 1016
/*! async: total allocations */
-#define WT_STAT_CONN_ASYNC_OP_ALLOC 1007
+#define WT_STAT_CONN_ASYNC_OP_ALLOC 1017
/*! async: total compact calls */
-#define WT_STAT_CONN_ASYNC_OP_COMPACT 1008
+#define WT_STAT_CONN_ASYNC_OP_COMPACT 1018
/*! async: total insert calls */
-#define WT_STAT_CONN_ASYNC_OP_INSERT 1009
+#define WT_STAT_CONN_ASYNC_OP_INSERT 1019
/*! async: total remove calls */
-#define WT_STAT_CONN_ASYNC_OP_REMOVE 1010
+#define WT_STAT_CONN_ASYNC_OP_REMOVE 1020
/*! async: total search calls */
-#define WT_STAT_CONN_ASYNC_OP_SEARCH 1011
+#define WT_STAT_CONN_ASYNC_OP_SEARCH 1021
/*! async: total update calls */
-#define WT_STAT_CONN_ASYNC_OP_UPDATE 1012
-/*! block-manager: mapped bytes read */
-#define WT_STAT_CONN_BLOCK_BYTE_MAP_READ 1013
-/*! block-manager: bytes read */
-#define WT_STAT_CONN_BLOCK_BYTE_READ 1014
-/*! block-manager: bytes written */
-#define WT_STAT_CONN_BLOCK_BYTE_WRITE 1015
-/*! block-manager: mapped blocks read */
-#define WT_STAT_CONN_BLOCK_MAP_READ 1016
+#define WT_STAT_CONN_ASYNC_OP_UPDATE 1022
/*! block-manager: blocks pre-loaded */
-#define WT_STAT_CONN_BLOCK_PRELOAD 1017
+#define WT_STAT_CONN_BLOCK_PRELOAD 1023
/*! block-manager: blocks read */
-#define WT_STAT_CONN_BLOCK_READ 1018
+#define WT_STAT_CONN_BLOCK_READ 1024
/*! block-manager: blocks written */
-#define WT_STAT_CONN_BLOCK_WRITE 1019
-/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1020
-/*! cache: tracked bytes belonging to internal pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1021
+#define WT_STAT_CONN_BLOCK_WRITE 1025
+/*! block-manager: bytes read */
+#define WT_STAT_CONN_BLOCK_BYTE_READ 1026
+/*! block-manager: bytes written */
+#define WT_STAT_CONN_BLOCK_BYTE_WRITE 1027
+/*! block-manager: mapped blocks read */
+#define WT_STAT_CONN_BLOCK_MAP_READ 1028
+/*! block-manager: mapped bytes read */
+#define WT_STAT_CONN_BLOCK_BYTE_MAP_READ 1029
/*! cache: bytes currently in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INUSE 1022
-/*! cache: tracked bytes belonging to leaf pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_LEAF 1023
-/*! cache: maximum bytes configured */
-#define WT_STAT_CONN_CACHE_BYTES_MAX 1024
-/*! cache: tracked bytes belonging to overflow pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_OVERFLOW 1025
+#define WT_STAT_CONN_CACHE_BYTES_INUSE 1030
/*! cache: bytes read into cache */
-#define WT_STAT_CONN_CACHE_BYTES_READ 1026
+#define WT_STAT_CONN_CACHE_BYTES_READ 1031
/*! cache: bytes written from cache */
-#define WT_STAT_CONN_CACHE_BYTES_WRITE 1027
-/*! cache: pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP 1028
+#define WT_STAT_CONN_CACHE_BYTES_WRITE 1032
/*! cache: checkpoint blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1029
-/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1030
-/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1031
-/*! cache: modified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1032
-/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1033
-/*! cache: pages evicted because they exceeded the in-memory maximum */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1034
-/*! cache: pages evicted because they had chains of deleted items */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1035
-/*! cache: failed eviction of pages that exceeded the in-memory maximum */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1036
-/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1037
-/*! cache: internal pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1038
-/*! cache: maximum page size at eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1039
+#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1033
+/*! cache: eviction currently operating in aggressive mode */
+#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1034
/*! cache: eviction server candidate queue empty when topping up */
-#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1040
+#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1035
/*! cache: eviction server candidate queue not empty when topping up */
-#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1041
+#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1036
/*! cache: eviction server evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1042
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1037
/*! cache: eviction server populating queue, but not evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1043
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1038
/*! cache: eviction server unable to reach eviction goal */
-#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1044
-/*! cache: internal pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1045
-/*! cache: leaf pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1046
-/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1047
+#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1039
/*! cache: eviction worker thread evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1048
-/*! cache: in-memory page splits */
-#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1049
+#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1040
+/*! cache: failed eviction of pages that exceeded the in-memory maximum */
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1041
+/*! cache: hazard pointer blocked page eviction */
+#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1042
/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1050
+#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1043
+/*! cache: in-memory page splits */
+#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1044
+/*! cache: internal pages evicted */
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1045
+/*! cache: internal pages split during eviction */
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1046
+/*! cache: leaf pages split during eviction */
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1047
/*! cache: lookaside table insert calls */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1051
+#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1048
/*! cache: lookaside table remove calls */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1052
-/*! cache: percentage overhead */
-#define WT_STAT_CONN_CACHE_OVERHEAD 1053
-/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1054
+#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1049
+/*! cache: maximum bytes configured */
+#define WT_STAT_CONN_CACHE_BYTES_MAX 1050
+/*! cache: maximum page size at eviction */
+#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1051
+/*! cache: modified pages evicted */
+#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1052
+/*! cache: page split during eviction deepened the tree */
+#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1053
+/*! cache: page written requiring lookaside records */
+#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1054
/*! cache: pages currently held in the cache */
#define WT_STAT_CONN_CACHE_PAGES_INUSE 1055
+/*! cache: pages evicted because they exceeded the in-memory maximum */
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1056
+/*! cache: pages evicted because they had chains of deleted items */
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1057
+/*! cache: pages evicted by application threads */
+#define WT_STAT_CONN_CACHE_EVICTION_APP 1058
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1056
+#define WT_STAT_CONN_CACHE_READ 1059
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1057
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1060
+/*! cache: pages selected for eviction unable to be evicted */
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1061
+/*! cache: pages walked for eviction */
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1062
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1058
-/*! cache: page written requiring lookaside records */
-#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1059
+#define WT_STAT_CONN_CACHE_WRITE 1063
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1060
+#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1064
+/*! cache: percentage overhead */
+#define WT_STAT_CONN_CACHE_OVERHEAD 1065
+/*! cache: tracked bytes belonging to internal pages in the cache */
+#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1066
+/*! cache: tracked bytes belonging to leaf pages in the cache */
+#define WT_STAT_CONN_CACHE_BYTES_LEAF 1067
+/*! cache: tracked bytes belonging to overflow pages in the cache */
+#define WT_STAT_CONN_CACHE_BYTES_OVERFLOW 1068
+/*! cache: tracked dirty bytes in the cache */
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1069
+/*! cache: tracked dirty pages in the cache */
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1070
+/*! cache: unmodified pages evicted */
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1071
+/*! connection: auto adjusting condition resets */
+#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1072
+/*! connection: auto adjusting condition wait calls */
+#define WT_STAT_CONN_COND_AUTO_WAIT 1073
+/*! connection: files currently open */
+#define WT_STAT_CONN_FILE_OPEN 1074
+/*! connection: memory allocations */
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1075
+/*! connection: memory frees */
+#define WT_STAT_CONN_MEMORY_FREE 1076
+/*! connection: memory re-allocations */
+#define WT_STAT_CONN_MEMORY_GROW 1077
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1061
+#define WT_STAT_CONN_COND_WAIT 1078
+/*! connection: pthread mutex shared lock read-lock calls */
+#define WT_STAT_CONN_RWLOCK_READ 1079
+/*! connection: pthread mutex shared lock write-lock calls */
+#define WT_STAT_CONN_RWLOCK_WRITE 1080
+/*! connection: total read I/Os */
+#define WT_STAT_CONN_READ_IO 1081
+/*! connection: total write I/Os */
+#define WT_STAT_CONN_WRITE_IO 1082
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1062
+#define WT_STAT_CONN_CURSOR_CREATE 1083
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1063
+#define WT_STAT_CONN_CURSOR_INSERT 1084
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1064
+#define WT_STAT_CONN_CURSOR_NEXT 1085
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1065
+#define WT_STAT_CONN_CURSOR_PREV 1086
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1066
+#define WT_STAT_CONN_CURSOR_REMOVE 1087
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1067
+#define WT_STAT_CONN_CURSOR_RESET 1088
/*! cursor: cursor restarted searches */
-#define WT_STAT_CONN_CURSOR_RESTART 1068
+#define WT_STAT_CONN_CURSOR_RESTART 1089
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1069
+#define WT_STAT_CONN_CURSOR_SEARCH 1090
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1070
-/*! cursor: truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1071
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1091
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1072
+#define WT_STAT_CONN_CURSOR_UPDATE 1092
+/*! cursor: truncate calls */
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1093
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1073
-/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1074
-/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1075
-/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1076
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1094
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1077
+#define WT_STAT_CONN_DH_SWEEP_REF 1095
+/*! data-handle: connection sweep dhandles closed */
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1096
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1078
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1097
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1079
+#define WT_STAT_CONN_DH_SWEEP_TOD 1098
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1080
-/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1081
-/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1082
+#define WT_STAT_CONN_DH_SWEEPS 1099
+/*! data-handle: session dhandles swept */
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1100
+/*! data-handle: session sweep attempts */
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1101
+/*! log: busy returns attempting to switch slots */
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1102
+/*! log: consolidated slot closures */
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1103
+/*! log: consolidated slot join races */
+#define WT_STAT_CONN_LOG_SLOT_RACES 1104
+/*! log: consolidated slot join transitions */
+#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1105
+/*! log: consolidated slot joins */
+#define WT_STAT_CONN_LOG_SLOT_JOINS 1106
+/*! log: consolidated slot unbuffered writes */
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1107
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1083
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1108
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1084
-/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1085
-/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1086
-/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1087
-/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1088
-/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1089
-/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1090
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1109
+/*! log: log files manually zero-filled */
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1110
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1091
+#define WT_STAT_CONN_LOG_FLUSH 1111
+/*! log: log force write operations */
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1112
+/*! log: log force write operations skipped */
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1113
+/*! log: log records compressed */
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1114
+/*! log: log records not compressed */
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1115
+/*! log: log records too small to compress */
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1116
+/*! log: log release advances write LSN */
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1117
+/*! log: log scan operations */
+#define WT_STAT_CONN_LOG_SCANS 1118
+/*! log: log scan records requiring two reads */
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1119
+/*! log: log server thread advances write LSN */
+#define WT_STAT_CONN_LOG_WRITE_LSN 1120
+/*! log: log server thread write LSN walk skipped */
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1121
+/*! log: log sync operations */
+#define WT_STAT_CONN_LOG_SYNC 1122
+/*! log: log sync_dir operations */
+#define WT_STAT_CONN_LOG_SYNC_DIR 1123
+/*! log: log write operations */
+#define WT_STAT_CONN_LOG_WRITES 1124
+/*! log: logging bytes consolidated */
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1125
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1092
-/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1093
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1126
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1094
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1127
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1095
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1128
+/*! log: pre-allocated log files prepared */
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1129
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1096
-/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1097
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1130
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1098
-/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1099
-/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1100
-/*! log: consolidated slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1101
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1131
+/*! log: total in-memory size of compressed records */
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1132
+/*! log: total log buffer size */
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1133
+/*! log: total size of compressed records */
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1134
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1102
-/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1103
-/*! log: consolidated slot joins */
-#define WT_STAT_CONN_LOG_SLOT_JOINS 1104
-/*! log: consolidated slot join races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1105
-/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1106
-/*! log: consolidated slot join transitions */
-#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1107
-/*! log: consolidated slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1108
-/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1109
-/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1110
-/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1111
-/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1112
-/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1113
-/*! LSM: sleep for LSM checkpoint throttle */
-#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1114
-/*! LSM: sleep for LSM merge throttle */
-#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1115
-/*! LSM: rows merged in an LSM tree */
-#define WT_STAT_CONN_LSM_ROWS_MERGED 1116
-/*! LSM: application work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1117
-/*! LSM: merge work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1118
-/*! LSM: tree queue hit maximum */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1119
-/*! LSM: switch work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1120
-/*! LSM: tree maintenance operations scheduled */
-#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1121
-/*! LSM: tree maintenance operations discarded */
-#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1122
-/*! LSM: tree maintenance operations executed */
-#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1123
-/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1124
-/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1125
-/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1126
-/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1127
-/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1128
-/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1129
-/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1130
-/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1131
-/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1132
-/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1133
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1135
+/*! log: yields waiting for previous log file close */
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1136
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1134
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1137
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1135
+#define WT_STAT_CONN_REC_PAGES 1138
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1136
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1139
+/*! reconciliation: pages deleted */
+#define WT_STAT_CONN_REC_PAGE_DELETE 1140
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1137
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1141
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1138
-/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1139
-/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1140
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1142
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1141
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1143
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1142
+#define WT_STAT_CONN_SESSION_OPEN 1144
+/*! thread-yield: page acquire busy blocked */
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1145
+/*! thread-yield: page acquire eviction blocked */
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1146
+/*! thread-yield: page acquire locked blocked */
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1147
+/*! thread-yield: page acquire read blocked */
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1148
+/*! thread-yield: page acquire time sleeping (usecs) */
+#define WT_STAT_CONN_PAGE_SLEEP 1149
+/*! transaction: number of named snapshots created */
+#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1150
+/*! transaction: number of named snapshots dropped */
+#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1151
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1143
-/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1144
-/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1145
+#define WT_STAT_CONN_TXN_BEGIN 1152
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1146
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1153
+/*! transaction: transaction checkpoint generation */
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1154
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1147
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1155
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1148
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1156
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1149
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1157
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1150
-/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1151
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1158
+/*! transaction: transaction checkpoints */
+#define WT_STAT_CONN_TXN_CHECKPOINT 1159
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1152
-/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1153
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1160
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1154
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1161
+/*! transaction: transaction range of IDs currently pinned by a checkpoint */
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1162
/*! transaction: transaction range of IDs currently pinned by named
* snapshots */
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1155
-/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1156
-/*! transaction: number of named snapshots created */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1157
-/*! transaction: number of named snapshots dropped */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1158
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1163
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1159
-/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1160
+#define WT_STAT_CONN_TXN_SYNC 1164
+/*! transaction: transactions committed */
+#define WT_STAT_CONN_TXN_COMMIT 1165
+/*! transaction: transactions rolled back */
+#define WT_STAT_CONN_TXN_ROLLBACK 1166
/*!
* @}
@@ -4018,200 +4048,200 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
* @anchor statistics_dsrc
* @{
*/
-/*! block-manager: file allocation unit size */
-#define WT_STAT_DSRC_ALLOCATION_SIZE 2000
-/*! block-manager: blocks allocated */
-#define WT_STAT_DSRC_BLOCK_ALLOC 2001
-/*! block-manager: checkpoint size */
-#define WT_STAT_DSRC_BLOCK_CHECKPOINT_SIZE 2002
-/*! block-manager: allocations requiring file extension */
-#define WT_STAT_DSRC_BLOCK_EXTENSION 2003
-/*! block-manager: blocks freed */
-#define WT_STAT_DSRC_BLOCK_FREE 2004
-/*! block-manager: file magic number */
-#define WT_STAT_DSRC_BLOCK_MAGIC 2005
-/*! block-manager: file major version number */
-#define WT_STAT_DSRC_BLOCK_MAJOR 2006
-/*! block-manager: minor version number */
-#define WT_STAT_DSRC_BLOCK_MINOR 2007
-/*! block-manager: file bytes available for reuse */
-#define WT_STAT_DSRC_BLOCK_REUSE_BYTES 2008
-/*! block-manager: file size in bytes */
-#define WT_STAT_DSRC_BLOCK_SIZE 2009
-/*! LSM: bloom filters in the LSM tree */
-#define WT_STAT_DSRC_BLOOM_COUNT 2010
/*! LSM: bloom filter false positives */
-#define WT_STAT_DSRC_BLOOM_FALSE_POSITIVE 2011
+#define WT_STAT_DSRC_BLOOM_FALSE_POSITIVE 2000
/*! LSM: bloom filter hits */
-#define WT_STAT_DSRC_BLOOM_HIT 2012
+#define WT_STAT_DSRC_BLOOM_HIT 2001
/*! LSM: bloom filter misses */
-#define WT_STAT_DSRC_BLOOM_MISS 2013
+#define WT_STAT_DSRC_BLOOM_MISS 2002
/*! LSM: bloom filter pages evicted from cache */
-#define WT_STAT_DSRC_BLOOM_PAGE_EVICT 2014
+#define WT_STAT_DSRC_BLOOM_PAGE_EVICT 2003
/*! LSM: bloom filter pages read into cache */
-#define WT_STAT_DSRC_BLOOM_PAGE_READ 2015
+#define WT_STAT_DSRC_BLOOM_PAGE_READ 2004
+/*! LSM: bloom filters in the LSM tree */
+#define WT_STAT_DSRC_BLOOM_COUNT 2005
+/*! LSM: chunks in the LSM tree */
+#define WT_STAT_DSRC_LSM_CHUNK_COUNT 2006
+/*! LSM: highest merge generation in the LSM tree */
+#define WT_STAT_DSRC_LSM_GENERATION_MAX 2007
+/*! LSM: queries that could have benefited from a Bloom filter that did
+ * not exist */
+#define WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM 2008
+/*! LSM: sleep for LSM checkpoint throttle */
+#define WT_STAT_DSRC_LSM_CHECKPOINT_THROTTLE 2009
+/*! LSM: sleep for LSM merge throttle */
+#define WT_STAT_DSRC_LSM_MERGE_THROTTLE 2010
/*! LSM: total size of bloom filters */
-#define WT_STAT_DSRC_BLOOM_SIZE 2016
+#define WT_STAT_DSRC_BLOOM_SIZE 2011
+/*! block-manager: allocations requiring file extension */
+#define WT_STAT_DSRC_BLOCK_EXTENSION 2012
+/*! block-manager: blocks allocated */
+#define WT_STAT_DSRC_BLOCK_ALLOC 2013
+/*! block-manager: blocks freed */
+#define WT_STAT_DSRC_BLOCK_FREE 2014
+/*! block-manager: checkpoint size */
+#define WT_STAT_DSRC_BLOCK_CHECKPOINT_SIZE 2015
+/*! block-manager: file allocation unit size */
+#define WT_STAT_DSRC_ALLOCATION_SIZE 2016
+/*! block-manager: file bytes available for reuse */
+#define WT_STAT_DSRC_BLOCK_REUSE_BYTES 2017
+/*! block-manager: file magic number */
+#define WT_STAT_DSRC_BLOCK_MAGIC 2018
+/*! block-manager: file major version number */
+#define WT_STAT_DSRC_BLOCK_MAJOR 2019
+/*! block-manager: file size in bytes */
+#define WT_STAT_DSRC_BLOCK_SIZE 2020
+/*! block-manager: minor version number */
+#define WT_STAT_DSRC_BLOCK_MINOR 2021
/*! btree: btree checkpoint generation */
-#define WT_STAT_DSRC_BTREE_CHECKPOINT_GENERATION 2017
-/*! btree: column-store variable-size deleted values */
-#define WT_STAT_DSRC_BTREE_COLUMN_DELETED 2018
+#define WT_STAT_DSRC_BTREE_CHECKPOINT_GENERATION 2022
/*! btree: column-store fixed-size leaf pages */
-#define WT_STAT_DSRC_BTREE_COLUMN_FIX 2019
+#define WT_STAT_DSRC_BTREE_COLUMN_FIX 2023
/*! btree: column-store internal pages */
-#define WT_STAT_DSRC_BTREE_COLUMN_INTERNAL 2020
+#define WT_STAT_DSRC_BTREE_COLUMN_INTERNAL 2024
/*! btree: column-store variable-size RLE encoded values */
-#define WT_STAT_DSRC_BTREE_COLUMN_RLE 2021
+#define WT_STAT_DSRC_BTREE_COLUMN_RLE 2025
+/*! btree: column-store variable-size deleted values */
+#define WT_STAT_DSRC_BTREE_COLUMN_DELETED 2026
/*! btree: column-store variable-size leaf pages */
-#define WT_STAT_DSRC_BTREE_COLUMN_VARIABLE 2022
-/*! btree: pages rewritten by compaction */
-#define WT_STAT_DSRC_BTREE_COMPACT_REWRITE 2023
-/*! btree: number of key/value pairs */
-#define WT_STAT_DSRC_BTREE_ENTRIES 2024
+#define WT_STAT_DSRC_BTREE_COLUMN_VARIABLE 2027
/*! btree: fixed-record size */
-#define WT_STAT_DSRC_BTREE_FIXED_LEN 2025
-/*! btree: maximum tree depth */
-#define WT_STAT_DSRC_BTREE_MAXIMUM_DEPTH 2026
+#define WT_STAT_DSRC_BTREE_FIXED_LEN 2028
/*! btree: maximum internal page key size */
-#define WT_STAT_DSRC_BTREE_MAXINTLKEY 2027
+#define WT_STAT_DSRC_BTREE_MAXINTLKEY 2029
/*! btree: maximum internal page size */
-#define WT_STAT_DSRC_BTREE_MAXINTLPAGE 2028
+#define WT_STAT_DSRC_BTREE_MAXINTLPAGE 2030
/*! btree: maximum leaf page key size */
-#define WT_STAT_DSRC_BTREE_MAXLEAFKEY 2029
+#define WT_STAT_DSRC_BTREE_MAXLEAFKEY 2031
/*! btree: maximum leaf page size */
-#define WT_STAT_DSRC_BTREE_MAXLEAFPAGE 2030
+#define WT_STAT_DSRC_BTREE_MAXLEAFPAGE 2032
/*! btree: maximum leaf page value size */
-#define WT_STAT_DSRC_BTREE_MAXLEAFVALUE 2031
+#define WT_STAT_DSRC_BTREE_MAXLEAFVALUE 2033
+/*! btree: maximum tree depth */
+#define WT_STAT_DSRC_BTREE_MAXIMUM_DEPTH 2034
+/*! btree: number of key/value pairs */
+#define WT_STAT_DSRC_BTREE_ENTRIES 2035
/*! btree: overflow pages */
-#define WT_STAT_DSRC_BTREE_OVERFLOW 2032
+#define WT_STAT_DSRC_BTREE_OVERFLOW 2036
+/*! btree: pages rewritten by compaction */
+#define WT_STAT_DSRC_BTREE_COMPACT_REWRITE 2037
/*! btree: row-store internal pages */
-#define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2033
+#define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2038
/*! btree: row-store leaf pages */
-#define WT_STAT_DSRC_BTREE_ROW_LEAF 2034
+#define WT_STAT_DSRC_BTREE_ROW_LEAF 2039
/*! cache: bytes read into cache */
-#define WT_STAT_DSRC_CACHE_BYTES_READ 2035
+#define WT_STAT_DSRC_CACHE_BYTES_READ 2040
/*! cache: bytes written from cache */
-#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2036
+#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2041
/*! cache: checkpoint blocked page eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2037
-/*! cache: unmodified pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2038
-/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2039
-/*! cache: modified pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2040
+#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2042
/*! cache: data source pages selected for eviction unable to be evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2041
+#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2043
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2042
+#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2044
+/*! cache: in-memory page passed criteria to be split */
+#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2045
+/*! cache: in-memory page splits */
+#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2046
/*! cache: internal pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2043
+#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2047
/*! cache: internal pages split during eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2044
+#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2048
/*! cache: leaf pages split during eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2045
-/*! cache: in-memory page splits */
-#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2046
-/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2047
+#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2049
+/*! cache: modified pages evicted */
+#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2050
+/*! cache: overflow pages read into cache */
+#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2051
/*! cache: overflow values cached in memory */
-#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2048
+#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2052
+/*! cache: page split during eviction deepened the tree */
+#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2053
+/*! cache: page written requiring lookaside records */
+#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2054
/*! cache: pages read into cache */
-#define WT_STAT_DSRC_CACHE_READ 2049
+#define WT_STAT_DSRC_CACHE_READ 2055
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2050
-/*! cache: overflow pages read into cache */
-#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2051
+#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2056
/*! cache: pages written from cache */
-#define WT_STAT_DSRC_CACHE_WRITE 2052
-/*! cache: page written requiring lookaside records */
-#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2053
+#define WT_STAT_DSRC_CACHE_WRITE 2057
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2054
-/*! compression: raw compression call failed, no additional data available */
-#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2055
-/*! compression: raw compression call failed, additional data available */
-#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2056
-/*! compression: raw compression call succeeded */
-#define WT_STAT_DSRC_COMPRESS_RAW_OK 2057
+#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2058
+/*! cache: unmodified pages evicted */
+#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2059
/*! compression: compressed pages read */
-#define WT_STAT_DSRC_COMPRESS_READ 2058
+#define WT_STAT_DSRC_COMPRESS_READ 2060
/*! compression: compressed pages written */
-#define WT_STAT_DSRC_COMPRESS_WRITE 2059
+#define WT_STAT_DSRC_COMPRESS_WRITE 2061
/*! compression: page written failed to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2060
+#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2062
/*! compression: page written was too small to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2061
-/*! cursor: create calls */
-#define WT_STAT_DSRC_CURSOR_CREATE 2062
-/*! cursor: insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT 2063
+#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2063
+/*! compression: raw compression call failed, additional data available */
+#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2064
+/*! compression: raw compression call failed, no additional data available */
+#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2065
+/*! compression: raw compression call succeeded */
+#define WT_STAT_DSRC_COMPRESS_RAW_OK 2066
/*! cursor: bulk-loaded cursor-insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2064
+#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2067
+/*! cursor: create calls */
+#define WT_STAT_DSRC_CURSOR_CREATE 2068
/*! cursor: cursor-insert key and value bytes inserted */
-#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2065
+#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2069
+/*! cursor: cursor-remove key bytes removed */
+#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2070
+/*! cursor: cursor-update value bytes updated */
+#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2071
+/*! cursor: insert calls */
+#define WT_STAT_DSRC_CURSOR_INSERT 2072
/*! cursor: next calls */
-#define WT_STAT_DSRC_CURSOR_NEXT 2066
+#define WT_STAT_DSRC_CURSOR_NEXT 2073
/*! cursor: prev calls */
-#define WT_STAT_DSRC_CURSOR_PREV 2067
+#define WT_STAT_DSRC_CURSOR_PREV 2074
/*! cursor: remove calls */
-#define WT_STAT_DSRC_CURSOR_REMOVE 2068
-/*! cursor: cursor-remove key bytes removed */
-#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2069
+#define WT_STAT_DSRC_CURSOR_REMOVE 2075
/*! cursor: reset calls */
-#define WT_STAT_DSRC_CURSOR_RESET 2070
+#define WT_STAT_DSRC_CURSOR_RESET 2076
/*! cursor: restarted searches */
-#define WT_STAT_DSRC_CURSOR_RESTART 2071
+#define WT_STAT_DSRC_CURSOR_RESTART 2077
/*! cursor: search calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH 2072
+#define WT_STAT_DSRC_CURSOR_SEARCH 2078
/*! cursor: search near calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2073
+#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2079
/*! cursor: truncate calls */
-#define WT_STAT_DSRC_CURSOR_TRUNCATE 2074
+#define WT_STAT_DSRC_CURSOR_TRUNCATE 2080
/*! cursor: update calls */
-#define WT_STAT_DSRC_CURSOR_UPDATE 2075
-/*! cursor: cursor-update value bytes updated */
-#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2076
-/*! LSM: sleep for LSM checkpoint throttle */
-#define WT_STAT_DSRC_LSM_CHECKPOINT_THROTTLE 2077
-/*! LSM: chunks in the LSM tree */
-#define WT_STAT_DSRC_LSM_CHUNK_COUNT 2078
-/*! LSM: highest merge generation in the LSM tree */
-#define WT_STAT_DSRC_LSM_GENERATION_MAX 2079
-/*! LSM: queries that could have benefited from a Bloom filter that did
- * not exist */
-#define WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM 2080
-/*! LSM: sleep for LSM merge throttle */
-#define WT_STAT_DSRC_LSM_MERGE_THROTTLE 2081
+#define WT_STAT_DSRC_CURSOR_UPDATE 2081
/*! reconciliation: dictionary matches */
#define WT_STAT_DSRC_REC_DICTIONARY 2082
+/*! reconciliation: fast-path pages deleted */
+#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2083
+/*! reconciliation: internal page key bytes discarded using suffix
+ * compression */
+#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2084
/*! reconciliation: internal page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2083
-/*! reconciliation: leaf page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2084
-/*! reconciliation: maximum blocks required for a page */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2085
+#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2085
/*! reconciliation: internal-page overflow keys */
#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2086
+/*! reconciliation: leaf page key bytes discarded using prefix compression */
+#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2087
+/*! reconciliation: leaf page multi-block writes */
+#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2088
/*! reconciliation: leaf-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2087
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2089
+/*! reconciliation: maximum blocks required for a page */
+#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2090
/*! reconciliation: overflow values written */
-#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2088
-/*! reconciliation: pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE 2089
-/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2090
+#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2091
/*! reconciliation: page checksum matches */
-#define WT_STAT_DSRC_REC_PAGE_MATCH 2091
+#define WT_STAT_DSRC_REC_PAGE_MATCH 2092
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_DSRC_REC_PAGES 2092
+#define WT_STAT_DSRC_REC_PAGES 2093
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_DSRC_REC_PAGES_EVICTION 2093
-/*! reconciliation: leaf page key bytes discarded using prefix compression */
-#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2094
-/*! reconciliation: internal page key bytes discarded using suffix
- * compression */
-#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2095
+#define WT_STAT_DSRC_REC_PAGES_EVICTION 2094
+/*! reconciliation: pages deleted */
+#define WT_STAT_DSRC_REC_PAGE_DELETE 2095
/*! session: object compaction */
#define WT_STAT_DSRC_SESSION_COMPACT 2096
/*! session: open cursor count */
diff --git a/src/include/wiredtiger_ext.h b/src/include/wiredtiger_ext.h
index 0db876b56f3..7d97d97dcf5 100644
--- a/src/include/wiredtiger_ext.h
+++ b/src/include/wiredtiger_ext.h
@@ -268,8 +268,9 @@ struct __wt_extension_api {
WT_SESSION *session, const char *key, const char *value);
/*!
- * Pack a structure into a buffer.
- * See ::wiredtiger_struct_pack for details.
+ * Pack a structure into a buffer. Deprecated in favor of stream
+ * based pack and unpack API. See WT_EXTENSION_API::pack_start for
+ * details.
*
* @param wt_api the extension handle
* @param session the session handle
@@ -282,8 +283,8 @@ struct __wt_extension_api {
void *buffer, size_t size, const char *format, ...);
/*!
- * Calculate the size required to pack a structure.
- * See ::wiredtiger_struct_size for details.
+ * Calculate the size required to pack a structure. Deprecated in
+ * favor of stream based pack and unpack API.
*
* @param wt_api the extension handle
* @param session the session handle
@@ -296,8 +297,9 @@ struct __wt_extension_api {
size_t *sizep, const char *format, ...);
/*!
- * Unpack a structure from a buffer.
- * See ::wiredtiger_struct_unpack for details.
+ * Unpack a structure from a buffer. Deprecated in favor of stream
+ * based pack and unpack API. See WT_EXTENSION_API::unpack_start for
+ * details.
*
* @param wt_api the extension handle
* @param session the session handle
@@ -309,6 +311,130 @@ struct __wt_extension_api {
int (*struct_unpack)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
const void *buffer, size_t size, const char *format, ...);
+ /*
+ * Streaming pack/unpack API.
+ */
+ /*!
+ * Start a packing operation into a buffer.
+ * See ::wiredtiger_pack_start for details.
+ *
+ * @param session the session handle
+ * @param format the data format, see @ref packing
+ * @param buffer a pointer to memory to hold the packed data
+ * @param size the size of the buffer
+ * @param[out] psp the new packing stream handle
+ * @errors
+ */
+ int (*pack_start)(WT_EXTENSION_API *wt_api,
+ WT_SESSION *session, const char *format,
+ void *buffer, size_t size, WT_PACK_STREAM **psp);
+
+ /*!
+ * Start an unpacking operation from a buffer.
+ * See ::wiredtiger_unpack_start for details.
+ *
+ * @param session the session handle
+ * @param format the data format, see @ref packing
+ * @param buffer a pointer to memory holding the packed data
+ * @param size the size of the buffer
+ * @param[out] psp the new packing stream handle
+ * @errors
+ */
+ int (*unpack_start)(WT_EXTENSION_API *wt_api,
+ WT_SESSION *session, const char *format,
+ const void *buffer, size_t size, WT_PACK_STREAM **psp);
+
+ /*!
+ * Close a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param[out] usedp the number of bytes in the buffer used by the
+ * stream
+ * @errors
+ */
+ int (*pack_close)(WT_EXTENSION_API *wt_api,
+ WT_PACK_STREAM *ps, size_t *usedp);
+
+ /*!
+ * Pack an item into a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param item an item to pack
+ * @errors
+ */
+ int (*pack_item)(WT_EXTENSION_API *wt_api,
+ WT_PACK_STREAM *ps, WT_ITEM *item);
+
+ /*!
+ * Pack a signed integer into a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param i a signed integer to pack
+ * @errors
+ */
+ int (*pack_int)(WT_EXTENSION_API *wt_api,
+ WT_PACK_STREAM *ps, int64_t i);
+
+ /*!
+ * Pack a string into a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param s a string to pack
+ * @errors
+ */
+ int (*pack_str)(WT_EXTENSION_API *wt_api,
+ WT_PACK_STREAM *ps, const char *s);
+
+ /*!
+ * Pack an unsigned integer into a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param u an unsigned integer to pack
+ * @errors
+ */
+ int (*pack_uint)(WT_EXTENSION_API *wt_api,
+ WT_PACK_STREAM *ps, uint64_t u);
+
+ /*!
+ * Unpack an item from a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param item an item to unpack
+ * @errors
+ */
+ int (*unpack_item)(WT_EXTENSION_API *wt_api,
+ WT_PACK_STREAM *ps, WT_ITEM *item);
+
+ /*!
+ * Unpack a signed integer from a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param[out] ip the unpacked signed integer
+ * @errors
+ */
+ int (*unpack_int)(WT_EXTENSION_API *wt_api,
+ WT_PACK_STREAM *ps, int64_t *ip);
+
+ /*!
+ * Unpack a string from a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param[out] sp the unpacked string
+ * @errors
+ */
+ int (*unpack_str)(WT_EXTENSION_API *wt_api,
+ WT_PACK_STREAM *ps, const char **sp);
+
+ /*!
+ * Unpack an unsigned integer from a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param[out] up the unpacked unsigned integer
+ * @errors
+ */
+ int (*unpack_uint)(WT_EXTENSION_API *wt_api,
+ WT_PACK_STREAM *ps, uint64_t *up);
+
/*!
* Return the current transaction ID.
*
diff --git a/src/log/log.c b/src/log/log.c
index ce2d7191491..e41073299a8 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -29,7 +29,7 @@ __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn)
log = conn->log;
log->ckpt_lsn = *ckp_lsn;
if (conn->log_cond != NULL)
- WT_RET(__wt_cond_signal(session, conn->log_cond));
+ WT_RET(__wt_cond_auto_signal(session, conn->log_cond));
return (0);
}
@@ -46,7 +46,7 @@ __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start)
conn = S2C(session);
log = conn->log;
- WT_RET(__wt_log_force_write(session, 1));
+ WT_RET(__wt_log_force_write(session, 1, NULL));
WT_RET(__wt_log_wrlsn(session, NULL));
if (start)
*lsn = log->write_start_lsn;
@@ -118,9 +118,9 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
*/
if (log->sync_dir_lsn.l.file < min_lsn->l.file) {
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
- "log_force_sync: sync directory %s to LSN %d/%lu",
- log->log_dir_fh->name,
- min_lsn->l.file, min_lsn->l.offset));
+ "log_force_sync: sync directory %s to LSN %" PRIu32
+ "/%" PRIu32,
+ log->log_dir_fh->name, min_lsn->l.file, min_lsn->l.offset));
WT_ERR(__wt_directory_sync_fh(session, log->log_dir_fh));
log->sync_dir_lsn = *min_lsn;
WT_STAT_FAST_CONN_INCR(session, log_sync_dir);
@@ -130,7 +130,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
*/
if (__wt_log_cmp(&log->sync_lsn, min_lsn) < 0) {
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
- "log_force_sync: sync %s to LSN %d/%lu",
+ "log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32,
log->log_fh->name, min_lsn->l.file, min_lsn->l.offset));
WT_ERR(__wt_fsync(session, log->log_fh));
log->sync_lsn = *min_lsn;
@@ -273,7 +273,7 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session,
* These may be files needed by backup. Force the current slot
* to get written to the file.
*/
- WT_RET(__wt_log_force_write(session, 1));
+ WT_RET(__wt_log_force_write(session, 1, NULL));
WT_RET(__log_get_files(session, WT_LOG_FILENAME, &files, &count));
/* Filter out any files that are below the checkpoint LSN. */
@@ -697,7 +697,7 @@ __log_openfile(WT_SESSION_IMPL *session,
WT_ERR_MSG(session, WT_ERROR,
"unsupported WiredTiger file version: this build "
" only supports major/minor versions up to %d/%d, "
- " and the file is version %d/%d",
+ " and the file is version %" PRIu16 "/%" PRIu16,
WT_LOG_MAJOR_VERSION, WT_LOG_MINOR_VERSION,
desc->majorv, desc->minorv);
}
@@ -824,7 +824,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
if (create_log) {
WT_STAT_FAST_CONN_INCR(session, log_prealloc_missed);
if (conn->log_cond != NULL)
- WT_RET(__wt_cond_signal(
+ WT_RET(__wt_cond_auto_signal(
session, conn->log_cond));
}
}
@@ -1088,28 +1088,36 @@ __wt_log_open(WT_SESSION_IMPL *session)
WT_RET(__wt_open(session, conn->log_path,
false, false, WT_FILE_TYPE_DIRECTORY, &log->log_dir_fh));
}
- /*
- * Clean up any old interim pre-allocated files.
- * We clean up these files because settings have changed upon reboot
- * and we want those settings to take effect right away.
- */
- WT_ERR(__log_get_files(session,
- WT_LOG_TMPNAME, &logfiles, &logcount));
- for (i = 0; i < logcount; i++) {
- WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
- WT_ERR(__wt_log_remove(session, WT_LOG_TMPNAME, lognum));
- }
- __wt_log_files_free(session, logfiles, logcount);
- logfiles = NULL;
- logcount = 0;
- WT_ERR(__log_get_files(session,
- WT_LOG_PREPNAME, &logfiles, &logcount));
- for (i = 0; i < logcount; i++) {
- WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
- WT_ERR(__wt_log_remove(session, WT_LOG_PREPNAME, lognum));
+
+ if (!F_ISSET(conn, WT_CONN_READONLY)) {
+ /*
+ * Clean up any old interim pre-allocated files. We clean
+ * up these files because settings have changed upon reboot
+ * and we want those settings to take effect right away.
+ */
+ WT_ERR(__log_get_files(session,
+ WT_LOG_TMPNAME, &logfiles, &logcount));
+ for (i = 0; i < logcount; i++) {
+ WT_ERR(__wt_log_extract_lognum(
+ session, logfiles[i], &lognum));
+ WT_ERR(__wt_log_remove(
+ session, WT_LOG_TMPNAME, lognum));
+ }
+ __wt_log_files_free(session, logfiles, logcount);
+ logfiles = NULL;
+ logcount = 0;
+ WT_ERR(__log_get_files(session,
+ WT_LOG_PREPNAME, &logfiles, &logcount));
+ for (i = 0; i < logcount; i++) {
+ WT_ERR(__wt_log_extract_lognum(
+ session, logfiles[i], &lognum));
+ WT_ERR(__wt_log_remove(
+ session, WT_LOG_PREPNAME, lognum));
+ }
+ __wt_log_files_free(session, logfiles, logcount);
+ logfiles = NULL;
}
- __wt_log_files_free(session, logfiles, logcount);
- logfiles = NULL;
+
/*
* Now look at the log files and set our LSNs.
*/
@@ -1121,7 +1129,8 @@ __wt_log_open(WT_SESSION_IMPL *session)
}
log->fileid = lastlog;
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
- "log_open: first log %d last log %d", firstlog, lastlog));
+ "log_open: first log %" PRIu32 " last log %" PRIu32,
+ firstlog, lastlog));
if (firstlog == UINT32_MAX) {
WT_ASSERT(session, logcount == 0);
WT_INIT_LSN(&log->first_lsn);
@@ -1132,9 +1141,11 @@ __wt_log_open(WT_SESSION_IMPL *session)
* Start logging at the beginning of the next log file, no matter
* where the previous log file ends.
*/
- WT_WITH_SLOT_LOCK(session, log, ret,
- ret = __log_newfile(session, true, NULL));
- WT_ERR(ret);
+ if (!F_ISSET(conn, WT_CONN_READONLY)) {
+ WT_WITH_SLOT_LOCK(session, log, ret,
+ ret = __log_newfile(session, true, NULL));
+ WT_ERR(ret);
+ }
/* If we found log files, save the new state. */
if (logcount > 0) {
@@ -1163,20 +1174,24 @@ __wt_log_close(WT_SESSION_IMPL *session)
if (log->log_close_fh != NULL && log->log_close_fh != log->log_fh) {
WT_RET(__wt_verbose(session, WT_VERB_LOG,
"closing old log %s", log->log_close_fh->name));
- WT_RET(__wt_fsync(session, log->log_close_fh));
+ if (!F_ISSET(conn, WT_CONN_READONLY))
+ WT_RET(__wt_fsync(session, log->log_close_fh));
WT_RET(__wt_close(session, &log->log_close_fh));
}
if (log->log_fh != NULL) {
WT_RET(__wt_verbose(session, WT_VERB_LOG,
"closing log %s", log->log_fh->name));
- WT_RET(__wt_fsync(session, log->log_fh));
+ if (!F_ISSET(conn, WT_CONN_READONLY))
+ WT_RET(__wt_fsync(session, log->log_fh));
WT_RET(__wt_close(session, &log->log_fh));
log->log_fh = NULL;
}
if (log->log_dir_fh != NULL) {
WT_RET(__wt_verbose(session, WT_VERB_LOG,
"closing log directory %s", log->log_dir_fh->name));
- WT_RET(__wt_directory_sync_fh(session, log->log_dir_fh));
+ if (!F_ISSET(conn, WT_CONN_READONLY))
+ WT_RET(
+ __wt_directory_sync_fh(session, log->log_dir_fh));
WT_RET(__wt_close(session, &log->log_dir_fh));
log->log_dir_fh = NULL;
}
@@ -1237,10 +1252,8 @@ __log_has_hole(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, bool *hole)
}
}
-err: if (buf != NULL)
- __wt_free(session, buf);
- if (zerobuf != NULL)
- __wt_free(session, zerobuf);
+err: __wt_free(session, buf);
+ __wt_free(session, zerobuf);
return (ret);
}
@@ -1324,7 +1337,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
*/
if (F_ISSET(session, WT_SESSION_LOCKED_SLOT))
__wt_spin_unlock(session, &log->log_slot_lock);
- WT_ERR(__wt_cond_signal(session, conn->log_wrlsn_cond));
+ WT_ERR(__wt_cond_auto_signal(session, conn->log_wrlsn_cond));
if (++yield_count < WT_THOUSAND)
__wt_yield();
else
@@ -1381,7 +1394,8 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
(log->sync_dir_lsn.l.file < sync_lsn.l.file)) {
WT_ASSERT(session, log->log_dir_fh != NULL);
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
- "log_release: sync directory %s to LSN %u/%lu",
+ "log_release: sync directory %s to LSN %" PRIu32
+ "/%" PRIu32,
log->log_dir_fh->name,
sync_lsn.l.file, sync_lsn.l.offset));
WT_ERR(__wt_directory_sync_fh(
@@ -1396,7 +1410,8 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
if (F_ISSET(slot, WT_SLOT_SYNC) &&
__wt_log_cmp(&log->sync_lsn, &slot->slot_end_lsn) < 0) {
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
- "log_release: sync log %s to LSN %u/%lu",
+ "log_release: sync log %s to LSN %" PRIu32
+ "/%" PRIu32,
log->log_fh->name,
sync_lsn.l.file, sync_lsn.l.offset));
WT_STAT_FAST_CONN_INCR(session, log_sync);
@@ -1463,7 +1478,7 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
if (LF_ISSET(WT_LOGSCAN_RECOVER))
WT_RET(__wt_verbose(session, WT_VERB_LOG,
- "__wt_log_scan truncating to %u/%u",
+ "__wt_log_scan truncating to %" PRIu32 "/%" PRIu32,
log->trunc_lsn.l.file, log->trunc_lsn.l.offset));
if (log != NULL) {
@@ -1744,14 +1759,25 @@ err: WT_STAT_FAST_CONN_INCR(session, log_scans);
* Wrapper function that takes the lock.
*/
int
-__wt_log_force_write(WT_SESSION_IMPL *session, bool retry)
+__wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work)
{
WT_LOG *log;
WT_MYSLOT myslot;
+ uint32_t joined;
log = S2C(session)->log;
memset(&myslot, 0, sizeof(myslot));
+ WT_STAT_FAST_CONN_INCR(session, log_force_write);
+ if (did_work != NULL)
+ *did_work = true;
myslot.slot = log->active_slot;
+ joined = WT_LOG_SLOT_JOINED(log->active_slot->slot_state);
+ if (joined == 0) {
+ WT_STAT_FAST_CONN_INCR(session, log_force_write_skip);
+ if (did_work != NULL)
+ *did_work = false;
+ return (0);
+ }
return (__wt_log_slot_switch(session, &myslot, retry, true));
}
@@ -1984,10 +2010,10 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
* XXX I've seen times when conditions are NULL.
*/
if (conn->log_cond != NULL) {
- WT_ERR(__wt_cond_signal(session, conn->log_cond));
+ WT_ERR(__wt_cond_auto_signal(session, conn->log_cond));
__wt_yield();
} else
- WT_ERR(__wt_log_force_write(session, 1));
+ WT_ERR(__wt_log_force_write(session, 1, NULL));
}
if (LF_ISSET(WT_LOG_FLUSH)) {
/* Wait for our writes to reach the OS */
@@ -2114,7 +2140,7 @@ __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags)
WT_RET(__wt_log_flush_lsn(session, &lsn, false));
WT_RET(__wt_verbose(session, WT_VERB_LOG,
- "log_flush: flags %d LSN %u/%lu",
+ "log_flush: flags %#" PRIx32 " LSN %" PRIu32 "/%" PRIu32,
flags, lsn.l.file, lsn.l.offset));
/*
* If the user wants write-no-sync, there is nothing more to do.
diff --git a/src/log/log_slot.c b/src/log/log_slot.c
index 2844516e78f..570d1c9ce48 100644
--- a/src/log/log_slot.c
+++ b/src/log/log_slot.c
@@ -253,7 +253,7 @@ __wt_log_slot_new(WT_SESSION_IMPL *session)
/*
* If we didn't find any free slots signal the worker thread.
*/
- (void)__wt_cond_signal(session, conn->log_wrlsn_cond);
+ (void)__wt_cond_auto_signal(session, conn->log_wrlsn_cond);
__wt_yield();
}
/* NOTREACHED */
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index f76b2bfd9ac..e023b2b407e 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -1501,22 +1501,22 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
{
WT_CONFIG_ITEM cval;
WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __clsm_compare, /* compare */
- __wt_cursor_equals, /* equals */
- __clsm_next, /* next */
- __clsm_prev, /* prev */
- __clsm_reset, /* reset */
- __clsm_search, /* search */
- __clsm_search_near, /* search-near */
- __clsm_insert, /* insert */
- __clsm_update, /* update */
- __clsm_remove, /* remove */
- __wt_cursor_reconfigure, /* reconfigure */
- __wt_clsm_close); /* close */
+ __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __clsm_compare, /* compare */
+ __wt_cursor_equals, /* equals */
+ __clsm_next, /* next */
+ __clsm_prev, /* prev */
+ __clsm_reset, /* reset */
+ __clsm_search, /* search */
+ __clsm_search_near, /* search-near */
+ __clsm_insert, /* insert */
+ __clsm_update, /* update */
+ __clsm_remove, /* remove */
+ __wt_cursor_reconfigure, /* reconfigure */
+ __wt_clsm_close); /* close */
WT_CURSOR *cursor;
WT_CURSOR_LSM *clsm;
WT_DECL_RET;
@@ -1556,7 +1556,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
WT_ERR(ret);
/* Make sure we have exclusive access if and only if we want it */
- WT_ASSERT(session, !bulk || lsm_tree->exclusive);
+ WT_ASSERT(session, !bulk || lsm_tree->excl_session != NULL);
WT_ERR(__wt_calloc_one(session, &clsm));
diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c
index dac8d987328..943a5894ab3 100644
--- a/src/lsm/lsm_manager.c
+++ b/src/lsm/lsm_manager.c
@@ -212,6 +212,10 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session)
conn = S2C(session);
manager = &conn->lsm_manager;
+ if (F_ISSET(conn, WT_CONN_READONLY)) {
+ manager->lsm_workers = 0;
+ return (0);
+ }
/*
* We need at least a manager, a switch thread and a generic
* worker.
@@ -284,6 +288,8 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session)
manager = &conn->lsm_manager;
removed = 0;
+ WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) ||
+ manager->lsm_workers == 0);
if (manager->lsm_workers > 0) {
/*
* Stop the main LSM manager thread first.
@@ -384,7 +390,7 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST);
dhandle_locked = true;
TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) {
- if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE))
+ if (!lsm_tree->active)
continue;
WT_ERR(__wt_epoch(session, &now));
pushms = lsm_tree->work_push_ts.tv_sec == 0 ? 0 :
@@ -427,8 +433,10 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
session, WT_LSM_WORK_BLOOM, 0, lsm_tree));
WT_ERR(__wt_verbose(session,
WT_VERB_LSM_MANAGER,
- "MGR %s: queue %d mod %d nchunks %d"
- " flags 0x%x aggressive %d pushms %" PRIu64
+ "MGR %s: queue %" PRIu32 " mod %d "
+ "nchunks %" PRIu32
+ " flags %#" PRIx32 " aggressive %" PRIu32
+ " pushms %" PRIu64
" fillms %" PRIu64,
lsm_tree->name, lsm_tree->queue_ref,
lsm_tree->modified, lsm_tree->nchunks,
@@ -616,6 +624,7 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session,
manager = &S2C(session)->lsm_manager;
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
/*
* Don't add merges or bloom filter creates if merges
* or bloom filters are disabled in the tree.
@@ -641,7 +650,7 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session,
* is checked.
*/
(void)__wt_atomic_add32(&lsm_tree->queue_ref, 1);
- if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) {
+ if (!lsm_tree->active) {
(void)__wt_atomic_sub32(&lsm_tree->queue_ref, 1);
return (0);
}
diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c
index 29325066da7..6d907284546 100644
--- a/src/lsm/lsm_merge.c
+++ b/src/lsm/lsm_merge.c
@@ -60,10 +60,11 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
struct timespec now;
uint64_t msec_since_last_merge, msec_to_create_merge;
- u_int new_aggressive;
+ uint32_t new_aggressive;
new_aggressive = 0;
+ WT_ASSERT(session, lsm_tree->merge_min != 0);
/*
* If the tree is open read-only or we are compacting, be very
* aggressive. Otherwise, we can spend a long time waiting for merges
@@ -124,8 +125,9 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
if (new_aggressive > lsm_tree->merge_aggressiveness) {
WT_RET(__wt_verbose(session, WT_VERB_LSM,
- "LSM merge %s got aggressive (old %u new %u), "
- "merge_min %d, %u / %" PRIu64,
+ "LSM merge %s got aggressive "
+ "(old %" PRIu32 " new %" PRIu32 "), "
+ "merge_min %u, %" PRIu64 " / %" PRIu64,
lsm_tree->name, lsm_tree->merge_aggressiveness,
new_aggressive, lsm_tree->merge_min,
msec_since_last_merge, lsm_tree->chunk_fill_ms));
@@ -410,7 +412,8 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id)
start_chunk, end_chunk, dest_id, record_count, generation));
for (verb = start_chunk; verb <= end_chunk; verb++)
WT_ERR(__wt_verbose(session, WT_VERB_LSM,
- "Merging %s: Chunk[%u] id %u, gen: %" PRIu32
+ "Merging %s: Chunk[%u] id %" PRIu32
+ ", gen: %" PRIu32
", size: %" PRIu64 ", records: %" PRIu64,
lsm_tree->name, verb, lsm_tree->chunk[verb]->id,
lsm_tree->chunk[verb]->generation,
@@ -460,7 +463,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id)
#define LSM_MERGE_CHECK_INTERVAL WT_THOUSAND
for (insert_count = 0; (ret = src->next(src)) == 0; insert_count++) {
if (insert_count % LSM_MERGE_CHECK_INTERVAL == 0) {
- if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE))
+ if (!lsm_tree->active)
WT_ERR(EINTR);
WT_STAT_FAST_CONN_INCRV(session,
diff --git a/src/lsm/lsm_meta.c b/src/lsm/lsm_meta.c
index d76b2a48aa7..e19e2cd0126 100644
--- a/src/lsm/lsm_meta.c
+++ b/src/lsm/lsm_meta.c
@@ -9,17 +9,17 @@
#include "wt_internal.h"
/*
- * __wt_lsm_meta_read --
- * Read the metadata for an LSM tree.
+ * __lsm_meta_read_v0 --
+ * Read v0 of LSM metadata.
*/
-int
-__wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+static int
+__lsm_meta_read_v0(
+ WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *lsmconf)
{
WT_CONFIG cparser, lparser;
WT_CONFIG_ITEM ck, cv, fileconf, lk, lv, metadata;
WT_DECL_RET;
WT_LSM_CHUNK *chunk;
- char *lsmconfig;
u_int nchunks;
chunk = NULL; /* -Wconditional-uninitialized */
@@ -28,8 +28,7 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE))
F_SET(lsm_tree, WT_LSM_TREE_MERGES);
- WT_RET(__wt_metadata_search(session, lsm_tree->name, &lsmconfig));
- WT_ERR(__wt_config_init(session, &cparser, lsmconfig));
+ WT_ERR(__wt_config_init(session, &cparser, lsmconf));
while ((ret = __wt_config_next(&cparser, &ck, &cv)) == 0) {
if (WT_STRING_MATCH("key_format", ck.str, ck.len)) {
__wt_free(session, lsm_tree->key_format);
@@ -48,7 +47,7 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
* from the file configuration.
*/
WT_ERR(__wt_config_getones(
- session, lsmconfig, "file_config", &fileconf));
+ session, lsmconf, "file_config", &fileconf));
WT_CLEAR(metadata);
WT_ERR_NOTFOUND_OK(__wt_config_subgets(
session, &fileconf, "app_metadata", &metadata));
@@ -160,16 +159,292 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
*/
}
WT_ERR_NOTFOUND_OK(ret);
+err: return (ret);
+}
+
+/*
+ * __lsm_meta_read_v1 --
+ * Read v1 of LSM metadata.
+ */
+static int
+__lsm_meta_read_v1(
+ WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *lsmconf)
+{
+ WT_CONFIG lparser;
+ WT_CONFIG_ITEM cv, lk, lv, metadata;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ const char *file_cfg[] = {
+ WT_CONFIG_BASE(session, file_config), NULL, NULL, NULL };
+ char *fileconf;
+ u_int nchunks;
+
+ chunk = NULL; /* -Wconditional-uninitialized */
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "key_format", &cv));
+ WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->key_format));
+ WT_ERR(__wt_config_getones(session, lsmconf, "value_format", &cv));
+ WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->value_format));
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "collator", &cv));
+ if (cv.len != 0 && !WT_STRING_MATCH("none", cv.str, cv.len)) {
+ /* Extract the application-supplied metadata (if any). */
+ WT_CLEAR(metadata);
+ WT_ERR_NOTFOUND_OK(__wt_config_getones(
+ session, lsmconf, "app_metadata", &metadata));
+ WT_ERR(__wt_collator_config(session, lsm_tree->name,
+ &cv, &metadata,
+ &lsm_tree->collator, &lsm_tree->collator_owned));
+ WT_ERR(__wt_strndup(session,
+ cv.str, cv.len, &lsm_tree->collator_name));
+ }
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.auto_throttle", &cv));
+ if (cv.val)
+ F_SET(lsm_tree, WT_LSM_TREE_THROTTLE);
+ else
+ F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE);
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom", &cv));
+ FLD_SET(lsm_tree->bloom,
+ (cv.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED));
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom_oldest", &cv));
+ if (cv.val != 0)
+ FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST);
+
+ if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
+ FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))
+ WT_ERR_MSG(session, EINVAL,
+ "Bloom filters can only be created on newest and oldest "
+ "chunks if bloom filters are enabled");
+
+ WT_ERR(__wt_config_getones(
+ session, lsmconf, "lsm.bloom_bit_count", &cv));
+ lsm_tree->bloom_bit_count = (uint32_t)cv.val;
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom_config", &cv));
+ /* Don't include the brackets. */
+ if (cv.type == WT_CONFIG_ITEM_STRUCT) {
+ cv.str++;
+ cv.len -= 2;
+ }
+ WT_ERR(__wt_config_check(session,
+ WT_CONFIG_REF(session, WT_SESSION_create), cv.str, cv.len));
+ WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->bloom_config));
+ WT_ERR(__wt_config_getones(
+ session, lsmconf, "lsm.bloom_hash_count", &cv));
+ lsm_tree->bloom_hash_count = (uint32_t)cv.val;
+
+ WT_ERR(__wt_config_getones(
+ session, lsmconf, "lsm.chunk_count_limit", &cv));
+ lsm_tree->chunk_count_limit = (uint32_t)cv.val;
+ if (cv.val == 0)
+ F_SET(lsm_tree, WT_LSM_TREE_MERGES);
+ else
+ F_CLR(lsm_tree, WT_LSM_TREE_MERGES);
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.chunk_max", &cv));
+ lsm_tree->chunk_max = (uint64_t)cv.val;
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.chunk_size", &cv));
+ lsm_tree->chunk_size = (uint64_t)cv.val;
+
+ if (lsm_tree->chunk_size > lsm_tree->chunk_max)
+ WT_ERR_MSG(session, EINVAL,
+ "Chunk size (chunk_size) must be smaller than or equal to "
+ "the maximum chunk size (chunk_max)");
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.merge_max", &cv));
+ lsm_tree->merge_max = (uint32_t)cv.val;
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.merge_min", &cv));
+ lsm_tree->merge_min = (uint32_t)cv.val;
+
+ if (lsm_tree->merge_min > lsm_tree->merge_max)
+ WT_ERR_MSG(session, EINVAL,
+ "LSM merge_min must be less than or equal to merge_max");
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "last", &cv));
+ lsm_tree->last = (u_int)cv.val;
+ WT_ERR(__wt_config_getones(session, lsmconf, "chunks", &cv));
+ WT_ERR(__wt_config_subinit(session, &lparser, &cv));
+ for (nchunks = 0; (ret =
+ __wt_config_next(&lparser, &lk, &lv)) == 0; ) {
+ if (WT_STRING_MATCH("id", lk.str, lk.len)) {
+ WT_ERR(__wt_realloc_def(session,
+ &lsm_tree->chunk_alloc,
+ nchunks + 1, &lsm_tree->chunk));
+ WT_ERR(__wt_calloc_one(session, &chunk));
+ lsm_tree->chunk[nchunks++] = chunk;
+ chunk->id = (uint32_t)lv.val;
+ WT_ERR(__wt_lsm_tree_chunk_name(session,
+ lsm_tree, chunk->id, &chunk->uri));
+ F_SET(chunk,
+ WT_LSM_CHUNK_ONDISK |
+ WT_LSM_CHUNK_STABLE);
+ } else if (WT_STRING_MATCH("bloom", lk.str, lk.len)) {
+ WT_ERR(__wt_lsm_tree_bloom_name(
+ session, lsm_tree, chunk->id, &chunk->bloom_uri));
+ F_SET(chunk, WT_LSM_CHUNK_BLOOM);
+ continue;
+ } else if (WT_STRING_MATCH("chunk_size", lk.str, lk.len)) {
+ chunk->size = (uint64_t)lv.val;
+ continue;
+ } else if (WT_STRING_MATCH("count", lk.str, lk.len)) {
+ chunk->count = (uint64_t)lv.val;
+ continue;
+ } else if (WT_STRING_MATCH("generation", lk.str, lk.len)) {
+ chunk->generation = (uint32_t)lv.val;
+ continue;
+ }
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ lsm_tree->nchunks = nchunks;
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "old_chunks", &cv));
+ WT_ERR(__wt_config_subinit(session, &lparser, &cv));
+ for (nchunks = 0; (ret =
+ __wt_config_next(&lparser, &lk, &lv)) == 0; ) {
+ if (WT_STRING_MATCH("bloom", lk.str, lk.len)) {
+ WT_ERR(__wt_strndup(session,
+ lv.str, lv.len, &chunk->bloom_uri));
+ F_SET(chunk, WT_LSM_CHUNK_BLOOM);
+ continue;
+ }
+ WT_ERR(__wt_realloc_def(session,
+ &lsm_tree->old_alloc, nchunks + 1,
+ &lsm_tree->old_chunks));
+ WT_ERR(__wt_calloc_one(session, &chunk));
+ lsm_tree->old_chunks[nchunks++] = chunk;
+ WT_ERR(__wt_strndup(session,
+ lk.str, lk.len, &chunk->uri));
+ F_SET(chunk, WT_LSM_CHUNK_ONDISK);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ lsm_tree->nold_chunks = nchunks;
+
+ /*
+ * Set up the config for each chunk.
+ *
+ * Make the memory_page_max double the chunk size, so application
+ * threads don't immediately try to force evict the chunk when the
+ * worker thread clears the NO_EVICTION flag.
+ */
+ file_cfg[1] = lsmconf;
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_buf_fmt(session, buf,
+ "key_format=u,value_format=u,memory_page_max=%" PRIu64,
+ 2 * lsm_tree->chunk_max));
+ file_cfg[2] = buf->data;
+ WT_ERR(__wt_config_collapse(session, file_cfg, &fileconf));
+ lsm_tree->file_config = fileconf;
+
+ /*
+ * Ignore any other values: the metadata entry might have been
+ * created by a future release, with unknown options.
+ */
+err: __wt_scr_free(session, &buf);
+ return (ret);
+}
+
+/*
+ * __lsm_meta_upgrade_v1 --
+ * Upgrade to v1 of LSM metadata.
+ */
+static int
+__lsm_meta_upgrade_v1(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+{
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ const char *new_cfg[] = {
+ WT_CONFIG_BASE(session, lsm_meta), NULL, NULL, NULL };
+
+ /* Include the custom config that used to be embedded in file_config. */
+ new_cfg[1] = lsm_tree->file_config;
+
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_buf_fmt(session, buf,
+ "key_format=%s,value_format=%s",
+ lsm_tree->key_format, lsm_tree->value_format));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, ",collator=%s",
+ lsm_tree->collator_name != NULL ? lsm_tree->collator_name : ""));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, ",lsm=("));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, "auto_throttle=%d",
+ F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE)));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom=%d",
+ FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_MERGED)));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_oldest=%d",
+ FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST)));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_bit_count=%" PRIu32,
+ lsm_tree->bloom_bit_count));
+ if (lsm_tree->bloom_config != NULL &&
+ strlen(lsm_tree->bloom_config) > 0)
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_config=(%s)",
+ lsm_tree->bloom_config));
+ else
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_config="));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_hash_count=%" PRIu32,
+ lsm_tree->bloom_hash_count));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, ",chunk_count_limit=%" PRIu32,
+ lsm_tree->chunk_count_limit));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",chunk_max=%" PRIu64,
+ lsm_tree->chunk_max));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",merge_max=%" PRIu32,
+ lsm_tree->merge_max));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",merge_min=%" PRIu32,
+ lsm_tree->merge_min));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, ")"));
+
+ new_cfg[2] = buf->data;
+ WT_ERR(__wt_config_merge(session, new_cfg, NULL, &lsm_tree->config));
+
+err: __wt_scr_free(session, &buf);
+ return (ret);
+}
+/*
+ * __wt_lsm_meta_read --
+ * Read the metadata for an LSM tree.
+ */
+int
+__wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+{
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ char *lsmconf;
+ bool upgrade;
+
+ /* LSM trees inherit the merge setting from the connection. */
+ if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE))
+ F_SET(lsm_tree, WT_LSM_TREE_MERGES);
+
+ WT_RET(__wt_metadata_search(session, lsm_tree->name, &lsmconf));
+ upgrade = false;
+ ret = __wt_config_getones(session, lsmconf, "file_config", &cval);
+ if (ret == 0) {
+ ret = __lsm_meta_read_v0(session, lsm_tree, lsmconf);
+ __wt_free(session, lsmconf);
+ WT_RET(ret);
+ upgrade = true;
+ } else if (ret == WT_NOTFOUND) {
+ lsm_tree->config = lsmconf;
+ ret = 0;
+ WT_RET(__lsm_meta_read_v1(session, lsm_tree, lsmconf));
+ }
/*
- * If the default merge_min was not overridden, calculate it now. We
- * do this here so that trees created before merge_min was added get a
- * sane value.
+ * If the default merge_min was not overridden, calculate it now.
*/
if (lsm_tree->merge_min < 2)
lsm_tree->merge_min = WT_MAX(2, lsm_tree->merge_max / 2);
-
-err: __wt_free(session, lsmconfig);
+ /*
+ * If needed, upgrade the configuration. We need to do this after
+ * we have fixed the merge_min value.
+ */
+ if (upgrade)
+ WT_RET(__lsm_meta_upgrade_v1(session, lsm_tree));
return (ret);
}
@@ -184,32 +459,15 @@ __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
WT_DECL_RET;
WT_LSM_CHUNK *chunk;
u_int i;
+ const char *new_cfg[] = { NULL, NULL, NULL };
+ char *new_metadata;
bool first;
+ new_metadata = NULL;
+
WT_RET(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_buf_fmt(session, buf,
- "key_format=%s,value_format=%s,bloom_config=(%s),file_config=(%s)",
- lsm_tree->key_format, lsm_tree->value_format,
- lsm_tree->bloom_config, lsm_tree->file_config));
- if (lsm_tree->collator_name != NULL)
- WT_ERR(__wt_buf_catfmt(
- session, buf, ",collator=%s", lsm_tree->collator_name));
WT_ERR(__wt_buf_catfmt(session, buf,
- ",last=%" PRIu32
- ",chunk_count_limit=%" PRIu32
- ",chunk_max=%" PRIu64
- ",chunk_size=%" PRIu64
- ",auto_throttle=%" PRIu32
- ",merge_max=%" PRIu32
- ",merge_min=%" PRIu32
- ",bloom=%" PRIu32
- ",bloom_bit_count=%" PRIu32
- ",bloom_hash_count=%" PRIu32,
- lsm_tree->last, lsm_tree->chunk_count_limit,
- lsm_tree->chunk_max, lsm_tree->chunk_size,
- F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE) ? 1 : 0,
- lsm_tree->merge_max, lsm_tree->merge_min, lsm_tree->bloom,
- lsm_tree->bloom_bit_count, lsm_tree->bloom_hash_count));
+ ",last=%" PRIu32, lsm_tree->last));
WT_ERR(__wt_buf_catfmt(session, buf, ",chunks=["));
for (i = 0; i < lsm_tree->nchunks; i++) {
chunk = lsm_tree->chunk[i];
@@ -243,9 +501,15 @@ __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
session, buf, ",bloom=\"%s\"", chunk->bloom_uri));
}
WT_ERR(__wt_buf_catfmt(session, buf, "]"));
- ret = __wt_metadata_update(session, lsm_tree->name, buf->data);
+
+ /* Update the existing configuration with the new values. */
+ new_cfg[0] = lsm_tree->config;
+ new_cfg[1] = buf->data;
+ WT_ERR(__wt_config_collapse(session, new_cfg, &new_metadata));
+ ret = __wt_metadata_update(session, lsm_tree->name, new_metadata);
WT_ERR(ret);
err: __wt_scr_free(session, &buf);
+ __wt_free(session, new_metadata);
return (ret);
}
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index ab18e41a2f5..cb1ddf22f84 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -27,6 +27,7 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
WT_UNUSED(final); /* Only used in diagnostic builds */
+ WT_ASSERT(session, !lsm_tree->active);
/*
* The work unit queue should be empty, but it's worth checking
* since work units use a different locking scheme to regular tree
@@ -85,19 +86,27 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
* Close an LSM tree structure.
*/
static int
-__lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+__lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
{
WT_DECL_RET;
int i;
- /* Stop any active merges. */
- F_CLR(lsm_tree, WT_LSM_TREE_ACTIVE);
+ /*
+ * Stop any new work units being added. The barrier is necessary
+ * because we rely on the state change being visible before checking
+ * the tree queue state.
+ */
+ lsm_tree->active = false;
+ WT_READ_BARRIER();
/*
- * Wait for all LSM operations and work units that were in flight to
- * finish.
+ * Wait for all LSM operations to drain. If WiredTiger is shutting
+ * down also wait for the tree reference count to go to zero, otherwise
+ * we know a user is holding a reference to the tree, so exclusive
+ * access is not available.
*/
- for (i = 0; lsm_tree->refcnt > 1 || lsm_tree->queue_ref > 0; ++i) {
+ for (i = 0;
+ lsm_tree->queue_ref > 0 || (final && lsm_tree->refcnt > 1); ++i) {
/*
* Remove any work units from the manager queues. Do this step
* repeatedly in case a work unit was in the process of being
@@ -114,11 +123,14 @@ __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
if (i % WT_THOUSAND == 0) {
WT_WITHOUT_LOCKS(session, ret =
__wt_lsm_manager_clear_tree(session, lsm_tree));
- WT_RET(ret);
+ WT_ERR(ret);
}
__wt_yield();
}
return (0);
+
+err: lsm_tree->active = true;
+ return (ret);
}
/*
@@ -142,7 +154,7 @@ __wt_lsm_tree_close_all(WT_SESSION_IMPL *session)
* is unconditional.
*/
(void)__wt_atomic_add32(&lsm_tree->refcnt, 1);
- WT_TRET(__lsm_tree_close(session, lsm_tree));
+ WT_TRET(__lsm_tree_close(session, lsm_tree, true));
WT_TRET(__lsm_tree_discard(session, lsm_tree, true));
}
@@ -157,9 +169,12 @@ static int
__lsm_tree_set_name(WT_SESSION_IMPL *session,
WT_LSM_TREE *lsm_tree, const char *uri)
{
- if (lsm_tree->name != NULL)
- __wt_free(session, lsm_tree->name);
- WT_RET(__wt_strdup(session, uri, &lsm_tree->name));
+ void *p;
+
+ WT_RET(__wt_strdup(session, uri, &p));
+
+ __wt_free(session, lsm_tree->name);
+ lsm_tree->name = p;
lsm_tree->filename = lsm_tree->name + strlen("lsm:");
return (0);
}
@@ -306,15 +321,15 @@ int
__wt_lsm_tree_create(WT_SESSION_IMPL *session,
const char *uri, bool exclusive, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_ITEM(buf);
WT_DECL_RET;
WT_LSM_TREE *lsm_tree;
const char *cfg[] =
- { WT_CONFIG_BASE(session, WT_SESSION_create), config, NULL };
- char *tmpconfig;
+ { WT_CONFIG_BASE(session, lsm_meta), config, NULL };
+ const char *metadata;
- /* If the tree is open, it already exists. */
+ metadata = NULL;
+
+ /* If the tree can be opened, it already exists. */
WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree));
if (ret == 0) {
@@ -323,139 +338,22 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session,
}
WT_RET_NOTFOUND_OK(ret);
- /*
- * If the tree has metadata, it already exists.
- *
- * !!!
- * Use a local variable: we don't care what the existing configuration
- * is, but we don't want to overwrite the real config.
- */
- if (__wt_metadata_search(session, uri, &tmpconfig) == 0) {
- __wt_free(session, tmpconfig);
- return (exclusive ? EEXIST : 0);
+ if (!F_ISSET(S2C(session), WT_CONN_READONLY)) {
+ WT_ERR(__wt_config_merge(session, cfg, NULL, &metadata));
+ WT_ERR(__wt_metadata_insert(session, uri, metadata));
}
- WT_RET_NOTFOUND_OK(ret);
-
- /* In-memory configurations don't make sense for LSM. */
- if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- WT_RET_MSG(session, EINVAL,
- "LSM trees not supported by in-memory configurations");
-
- WT_RET(__wt_config_gets(session, cfg, "key_format", &cval));
- if (WT_STRING_MATCH("r", cval.str, cval.len))
- WT_RET_MSG(session, EINVAL,
- "LSM trees cannot be configured as column stores");
-
- WT_RET(__wt_calloc_one(session, &lsm_tree));
-
- WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri));
-
- WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval));
- WT_ERR(__wt_strndup(
- session, cval.str, cval.len, &lsm_tree->key_format));
- WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval));
- WT_ERR(__wt_strndup(
- session, cval.str, cval.len, &lsm_tree->value_format));
-
- WT_ERR(__wt_config_gets_none(session, cfg, "collator", &cval));
- WT_ERR(__wt_strndup(
- session, cval.str, cval.len, &lsm_tree->collator_name));
-
- WT_ERR(__wt_config_gets(session, cfg, "cache_resident", &cval));
- if (cval.val != 0)
- WT_ERR_MSG(session, EINVAL,
- "The cache_resident flag is not compatible with LSM");
-
- WT_ERR(__wt_config_gets(session, cfg, "lsm.auto_throttle", &cval));
- if (cval.val)
- F_SET(lsm_tree, WT_LSM_TREE_THROTTLE);
- else
- F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE);
- WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom", &cval));
- FLD_SET(lsm_tree->bloom,
- (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED));
- WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_oldest", &cval));
- if (cval.val != 0)
- FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST);
-
- if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
- FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))
- WT_ERR_MSG(session, EINVAL,
- "Bloom filters can only be created on newest and oldest "
- "chunks if bloom filters are enabled");
-
- WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_config", &cval));
- if (cval.type == WT_CONFIG_ITEM_STRUCT) {
- cval.str++;
- cval.len -= 2;
- }
- WT_ERR(__wt_config_check(session,
- WT_CONFIG_REF(session, WT_SESSION_create), cval.str, cval.len));
- WT_ERR(__wt_strndup(
- session, cval.str, cval.len, &lsm_tree->bloom_config));
-
- WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_bit_count", &cval));
- lsm_tree->bloom_bit_count = (uint32_t)cval.val;
- WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_hash_count", &cval));
- lsm_tree->bloom_hash_count = (uint32_t)cval.val;
- WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_count_limit", &cval));
- lsm_tree->chunk_count_limit = (uint32_t)cval.val;
- if (cval.val == 0)
- F_SET(lsm_tree, WT_LSM_TREE_MERGES);
- else
- F_CLR(lsm_tree, WT_LSM_TREE_MERGES);
- WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_max", &cval));
- lsm_tree->chunk_max = (uint64_t)cval.val;
- WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_size", &cval));
- lsm_tree->chunk_size = (uint64_t)cval.val;
- if (lsm_tree->chunk_size > lsm_tree->chunk_max)
- WT_ERR_MSG(session, EINVAL,
- "Chunk size (chunk_size) must be smaller than or equal to "
- "the maximum chunk size (chunk_max)");
- WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_max", &cval));
- lsm_tree->merge_max = (uint32_t)cval.val;
- WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_min", &cval));
- lsm_tree->merge_min = (uint32_t)cval.val;
- if (lsm_tree->merge_min > lsm_tree->merge_max)
- WT_ERR_MSG(session, EINVAL,
- "LSM merge_min must be less than or equal to merge_max");
-
- /*
- * Set up the config for each chunk.
- *
- * Make the memory_page_max double the chunk size, so application
- * threads don't immediately try to force evict the chunk when the
- * worker thread clears the NO_EVICTION flag.
- */
- WT_ERR(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_buf_fmt(session, buf,
- "%s,key_format=u,value_format=u,memory_page_max=%" PRIu64,
- config, 2 * lsm_tree->chunk_max));
- WT_ERR(__wt_strndup(
- session, buf->data, buf->size, &lsm_tree->file_config));
-
- /* Create the first chunk and flush the metadata. */
- WT_ERR(__wt_lsm_meta_write(session, lsm_tree));
-
- /* Discard our partially populated handle. */
- ret = __lsm_tree_discard(session, lsm_tree, false);
- lsm_tree = NULL;
/*
* Open our new tree and add it to the handle cache. Don't discard on
* error: the returned handle is NULL on error, and the metadata
* tracking macros handle cleaning up on failure.
*/
- if (ret == 0)
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __lsm_tree_open(session, uri, true, &lsm_tree));
+ WT_WITH_HANDLE_LIST_LOCK(session,
+ ret = __lsm_tree_open(session, uri, true, &lsm_tree));
if (ret == 0)
__wt_lsm_tree_release(session, lsm_tree);
- if (0) {
-err: WT_TRET(__lsm_tree_discard(session, lsm_tree, false));
- }
- __wt_scr_free(session, &buf);
+err: __wt_free(session, metadata);
return (ret);
}
@@ -477,27 +375,26 @@ __lsm_tree_find(WT_SESSION_IMPL *session,
/* See if the tree is already open. */
TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q)
if (strcmp(uri, lsm_tree->name) == 0) {
- /*
- * Short circuit if the handle is already held
- * exclusively or exclusive access is requested and
- * there are references held.
- */
- if ((exclusive && lsm_tree->refcnt > 0) ||
- lsm_tree->exclusive)
- return (EBUSY);
-
if (exclusive) {
/*
* Make sure we win the race to switch on the
* exclusive flag.
*/
- if (!__wt_atomic_cas8(
- &lsm_tree->exclusive, 0, 1))
+ if (!__wt_atomic_cas_ptr(
+ &lsm_tree->excl_session, NULL, session))
return (EBUSY);
- /* Make sure there are no readers */
- if (!__wt_atomic_cas32(
- &lsm_tree->refcnt, 0, 1)) {
- lsm_tree->exclusive = 0;
+
+ /*
+ * Drain the work queue before checking for
+ * open cursors - otherwise we can generate
+ * spurious busy returns.
+ */
+ (void)__wt_atomic_add32(&lsm_tree->refcnt, 1);
+ if (__lsm_tree_close(
+ session, lsm_tree, false) != 0 ||
+ lsm_tree->refcnt != 1) {
+ __wt_lsm_tree_release(
+ session, lsm_tree);
return (EBUSY);
}
} else {
@@ -507,11 +404,11 @@ __lsm_tree_find(WT_SESSION_IMPL *session,
* We got a reference, check if an exclusive
* lock beat us to it.
*/
- if (lsm_tree->exclusive) {
+ if (lsm_tree->excl_session != NULL) {
WT_ASSERT(session,
lsm_tree->refcnt > 0);
- (void)__wt_atomic_sub32(
- &lsm_tree->refcnt, 1);
+ __wt_lsm_tree_release(
+ session, lsm_tree);
return (EBUSY);
}
}
@@ -603,7 +500,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session,
* with getting handles exclusive.
*/
lsm_tree->refcnt = 1;
- lsm_tree->exclusive = exclusive ? 1 : 0;
+ lsm_tree->excl_session = exclusive ? session : NULL;
lsm_tree->queue_ref = 0;
/* Set a flush timestamp as a baseline. */
@@ -611,7 +508,9 @@ __lsm_tree_open(WT_SESSION_IMPL *session,
/* Now the tree is setup, make it visible to others. */
TAILQ_INSERT_HEAD(&S2C(session)->lsmqh, lsm_tree, q);
- F_SET(lsm_tree, WT_LSM_TREE_ACTIVE | WT_LSM_TREE_OPEN);
+ if (!exclusive)
+ lsm_tree->active = true;
+ F_SET(lsm_tree, WT_LSM_TREE_OPEN);
*treep = lsm_tree;
@@ -638,7 +537,7 @@ __wt_lsm_tree_get(WT_SESSION_IMPL *session,
ret = __lsm_tree_open(session, uri, exclusive, treep);
WT_ASSERT(session, ret != 0 ||
- (exclusive ? 1 : 0) == (*treep)->exclusive);
+ (*treep)->excl_session == (exclusive ? session : NULL));
return (ret);
}
@@ -650,8 +549,11 @@ void
__wt_lsm_tree_release(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
WT_ASSERT(session, lsm_tree->refcnt > 0);
- if (lsm_tree->exclusive)
- lsm_tree->exclusive = 0;
+ if (lsm_tree->excl_session == session) {
+ /* We cleared the active flag when getting exclusive access. */
+ lsm_tree->active = true;
+ lsm_tree->excl_session = NULL;
+ }
(void)__wt_atomic_sub32(&lsm_tree->refcnt, 1);
}
@@ -868,7 +770,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
F_CLR(lsm_tree, WT_LSM_TREE_NEED_SWITCH);
++lsm_tree->dsk_gen;
- lsm_tree->modified = 1;
+ lsm_tree->modified = true;
/*
* Set the switch transaction in the previous chunk unless this is
@@ -964,9 +866,7 @@ __wt_lsm_tree_drop(
WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_lsm_tree_get(session, name, true, &lsm_tree));
WT_RET(ret);
-
- /* Shut down the LSM worker. */
- WT_ERR(__lsm_tree_close(session, lsm_tree));
+ WT_ASSERT(session, !lsm_tree->active);
/* Prevent any new opens. */
WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree));
@@ -995,6 +895,7 @@ __wt_lsm_tree_drop(
WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree));
ret = __wt_metadata_remove(session, name);
+ WT_ASSERT(session, !lsm_tree->active);
err: if (locked)
WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree));
WT_WITH_HANDLE_LIST_LOCK(session,
@@ -1027,9 +928,6 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session,
ret = __wt_lsm_tree_get(session, olduri, true, &lsm_tree));
WT_RET(ret);
- /* Shut down the LSM worker. */
- WT_ERR(__lsm_tree_close(session, lsm_tree));
-
/* Prevent any new opens. */
WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree));
locked = true;
@@ -1067,8 +965,8 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session,
err: if (locked)
WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree));
- if (old != NULL)
- __wt_free(session, old);
+ __wt_free(session, old);
+
/*
* Discard this LSM tree structure. The first operation on the renamed
* tree will create a new one.
@@ -1102,9 +1000,6 @@ __wt_lsm_tree_truncate(
ret = __wt_lsm_tree_get(session, name, true, &lsm_tree));
WT_RET(ret);
- /* Shut down the LSM worker. */
- WT_ERR(__lsm_tree_close(session, lsm_tree));
-
/* Prevent any new opens. */
WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree));
locked = true;
@@ -1308,8 +1203,8 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
if (chunk != NULL) {
WT_ERR(__wt_verbose(session, WT_VERB_LSM,
"Compact force flush %s flags 0x%" PRIx32
- " chunk %u flags 0x%"
- PRIx32, name, lsm_tree->flags, chunk->id, chunk->flags));
+ " chunk %" PRIu32 " flags 0x%" PRIx32,
+ name, lsm_tree->flags, chunk->id, chunk->flags));
flushing = true;
/*
* Make sure the in-memory chunk gets flushed do not push a
@@ -1331,7 +1226,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
}
/* Wait for the work unit queues to drain. */
- while (F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) {
+ while (lsm_tree->active) {
/*
* The flush flag is cleared when the chunk has been flushed.
* Continue to push forced flushes until the chunk is on disk.
@@ -1342,7 +1237,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
WT_ERR(__wt_verbose(session,
WT_VERB_LSM,
- "Compact flush done %s chunk %u. "
+ "Compact flush done %s chunk %" PRIu32 ". "
"Start compacting progress %" PRIu64,
name, chunk->id,
lsm_tree->merge_progressing));
@@ -1353,7 +1248,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
progress = lsm_tree->merge_progressing;
} else {
WT_ERR(__wt_verbose(session, WT_VERB_LSM,
- "Compact flush retry %s chunk %u",
+ "Compact flush retry %s chunk %" PRIu32,
name, chunk->id));
WT_ERR(__wt_lsm_manager_push_entry(session,
WT_LSM_WORK_FLUSH, WT_LSM_WORK_FORCE,
@@ -1413,7 +1308,6 @@ err:
__wt_lsm_tree_release(session, lsm_tree);
return (ret);
-
}
/*
@@ -1455,8 +1349,7 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session,
continue;
WT_ERR(__wt_schema_worker(session, chunk->uri,
file_func, name_func, cfg, open_flags));
- if (name_func == __wt_backup_list_uri_append &&
- F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
+ if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
WT_ERR(__wt_schema_worker(session, chunk->bloom_uri,
file_func, name_func, cfg, open_flags));
}
diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c
index d5d81df6785..87771e2cb6c 100644
--- a/src/lsm/lsm_work_unit.c
+++ b/src/lsm/lsm_work_unit.c
@@ -29,7 +29,7 @@ __lsm_copy_chunks(WT_SESSION_IMPL *session,
cookie->nchunks = 0;
WT_RET(__wt_lsm_tree_readlock(session, lsm_tree));
- if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE))
+ if (!lsm_tree->active)
return (__wt_lsm_tree_readunlock(session, lsm_tree));
/* Take a copy of the current state of the LSM tree. */
@@ -72,14 +72,14 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session,
{
WT_DECL_RET;
WT_LSM_CHUNK *chunk, *evict_chunk, *flush_chunk;
- u_int i;
+ uint32_t i;
*chunkp = NULL;
chunk = evict_chunk = flush_chunk = NULL;
WT_ASSERT(session, lsm_tree->queue_ref > 0);
WT_RET(__wt_lsm_tree_readlock(session, lsm_tree));
- if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE) || lsm_tree->nchunks == 0)
+ if (!lsm_tree->active || lsm_tree->nchunks == 0)
return (__wt_lsm_tree_readunlock(session, lsm_tree));
/* Search for a chunk to evict and/or a chunk to flush. */
@@ -118,7 +118,7 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session,
if (chunk != NULL) {
WT_ERR(__wt_verbose(session, WT_VERB_LSM,
- "Flush%s: return chunk %u of %u: %s",
+ "Flush%s: return chunk %" PRIu32 " of %" PRIu32 ": %s",
force ? " w/ force" : "",
i, lsm_tree->nchunks, chunk->uri));
@@ -322,7 +322,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
*/
saved_isolation = session->txn.isolation;
session->txn.isolation = WT_ISO_READ_UNCOMMITTED;
- ret = __wt_cache_op(session, NULL, WT_SYNC_WRITE_LEAVES);
+ ret = __wt_cache_op(session, WT_SYNC_WRITE_LEAVES);
session->txn.isolation = saved_isolation;
WT_TRET(__wt_session_release_btree(session));
}
@@ -334,11 +334,17 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
/*
* Turn on metadata tracking to ensure the checkpoint gets the
* necessary handle locks.
+ *
+ * Ensure that we don't race with a running checkpoint: the checkpoint
+ * lock protects against us racing with an application checkpoint in
+ * this chunk. Don't wait for it, though: checkpoints can take a long
+ * time, and our checkpoint operation should be very quick.
*/
WT_ERR(__wt_meta_track_on(session));
- WT_WITH_SCHEMA_LOCK(session, ret,
- ret = __wt_schema_worker(
- session, chunk->uri, __wt_checkpoint, NULL, NULL, 0));
+ WT_WITH_CHECKPOINT_LOCK(session, ret,
+ WT_WITH_SCHEMA_LOCK(session, ret,
+ ret = __wt_schema_worker(
+ session, chunk->uri, __wt_checkpoint, NULL, NULL, 0)));
WT_TRET(__wt_meta_track_off(session, false, ret != 0));
if (ret != 0)
WT_ERR_MSG(session, ret, "LSM checkpoint");
diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c
index 7562cb1cae3..0874da8db13 100644
--- a/src/lsm/lsm_worker.c
+++ b/src/lsm/lsm_worker.c
@@ -20,7 +20,7 @@ int
__wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args)
{
WT_RET(__wt_verbose(session, WT_VERB_LSM_MANAGER,
- "Start LSM worker %d type 0x%x", args->id, args->type));
+ "Start LSM worker %u type %#" PRIx32, args->id, args->type));
return (__wt_thread_create(session, &args->tid, __lsm_worker, args));
}
@@ -59,9 +59,8 @@ __lsm_worker_general_op(
*/
if (chunk != NULL) {
WT_ERR(__wt_verbose(session, WT_VERB_LSM,
- "Flush%s chunk %d %s",
- force ? " w/ force" : "",
- chunk->id, chunk->uri));
+ "Flush%s chunk %" PRIu32 " %s",
+ force ? " w/ force" : "", chunk->id, chunk->uri));
ret = __wt_lsm_checkpoint_chunk(
session, entry->lsm_tree, chunk);
WT_ASSERT(session, chunk->refcnt > 0);
@@ -140,7 +139,7 @@ __lsm_worker(void *arg)
if (ret == WT_NOTFOUND) {
F_CLR(entry->lsm_tree, WT_LSM_TREE_COMPACTING);
ret = 0;
- } else if (ret == EBUSY)
+ } else if (ret == EBUSY || ret == EINTR)
ret = 0;
/* Paranoia: clear session state. */
@@ -164,7 +163,7 @@ __lsm_worker(void *arg)
if (ret != 0) {
err: __wt_lsm_manager_free_work_unit(session, entry);
WT_PANIC_MSG(session, ret,
- "Error in LSM worker thread %d", cookie->id);
+ "Error in LSM worker thread %u", cookie->id);
}
return (WT_THREAD_RET_VALUE);
}
diff --git a/src/meta/meta_apply.c b/src/meta/meta_apply.c
index 92766213b33..fb483c21dd9 100644
--- a/src/meta/meta_apply.c
+++ b/src/meta/meta_apply.c
@@ -15,39 +15,41 @@
*/
static inline int
__meta_btree_apply(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
- int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[])
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
+ const char *cfg[])
{
WT_DECL_RET;
const char *uri;
- int cmp;
+ bool skip;
- cursor->set_key(cursor, "file:");
- if ((ret = cursor->search_near(cursor, &cmp)) == 0 && cmp < 0)
- ret = cursor->next(cursor);
- for (; ret == 0; ret = cursor->next(cursor)) {
+ while ((ret = cursor->next(cursor)) == 0) {
WT_RET(cursor->get_key(cursor, &uri));
- if (!WT_PREFIX_MATCH(uri, "file:"))
- break;
if (strcmp(uri, WT_METAFILE_URI) == 0)
continue;
+ skip = false;
+ if (name_func != NULL)
+ WT_RET(name_func(session, uri, &skip));
+
+ if (file_func == NULL || skip || !WT_PREFIX_MATCH(uri, "file:"))
+ continue;
+
/*
* We need to pull the handle into the session handle cache
* and make sure it's referenced to stop other internal code
* dropping the handle (e.g in LSM when cleaning up obsolete
* chunks). Holding the metadata lock isn't enough.
*/
- ret = __wt_session_get_btree(session, uri, NULL, NULL, 0);
- if (ret == 0) {
- WT_SAVE_DHANDLE(session, ret = func(session, cfg));
- if (WT_META_TRACKING(session))
- WT_TRET(__wt_meta_track_handle_lock(
- session, false));
- else
- WT_TRET(__wt_session_release_btree(session));
- } else if (ret == EBUSY)
- ret = __wt_conn_btree_apply_single(
- session, uri, NULL, func, cfg);
+ if ((ret = __wt_session_get_btree(
+ session, uri, NULL, NULL, 0)) != 0)
+ return (ret == EBUSY ? 0 : ret);
+ WT_SAVE_DHANDLE(session, ret = file_func(session, cfg));
+ if (WT_META_TRACKING(session))
+ WT_TRET(__wt_meta_track_handle_lock(
+ session, false));
+ else
+ WT_TRET(__wt_session_release_btree(session));
WT_RET(ret);
}
WT_RET_NOTFOUND_OK(ret);
@@ -56,20 +58,22 @@ __meta_btree_apply(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
}
/*
- * __wt_meta_btree_apply --
+ * __wt_meta_apply_all --
* Apply a function to all files listed in the metadata, apart from the
* metadata file.
*/
int
-__wt_meta_btree_apply(WT_SESSION_IMPL *session,
- int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[])
+__wt_meta_apply_all(WT_SESSION_IMPL *session,
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
+ const char *cfg[])
{
WT_CURSOR *cursor;
WT_DECL_RET;
WT_RET(__wt_metadata_cursor(session, &cursor));
- WT_SAVE_DHANDLE(session,
- ret = __meta_btree_apply(session, cursor, func, cfg));
+ WT_SAVE_DHANDLE(session, ret =
+ __meta_btree_apply(session, cursor, file_func, name_func, cfg));
WT_TRET(__wt_metadata_cursor_release(session, &cursor));
return (ret);
diff --git a/src/meta/meta_ckpt.c b/src/meta/meta_ckpt.c
index df4cd2cb4d6..0a864432daf 100644
--- a/src/meta/meta_ckpt.c
+++ b/src/meta/meta_ckpt.c
@@ -212,8 +212,7 @@ __ckpt_last_name(
if (found && a.val < found)
continue;
- if (*namep != NULL)
- __wt_free(session, *namep);
+ __wt_free(session, *namep);
WT_ERR(__wt_strndup(session, k.str, k.len, namep));
found = a.val;
}
@@ -221,7 +220,7 @@ __ckpt_last_name(
ret = WT_NOTFOUND;
if (0) {
-err: __wt_free(session, namep);
+err: __wt_free(session, *namep);
}
return (ret);
}
diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c
index 61cc009c983..e5f2727b5b6 100644
--- a/src/meta/meta_table.c
+++ b/src/meta/meta_table.c
@@ -67,18 +67,16 @@ __wt_metadata_cursor_open(
btree = ((WT_CURSOR_BTREE *)(*cursorp))->btree;
/*
- * Set special flags for the metadata file: eviction (the metadata file
- * is in-memory and never evicted), logging (the metadata file is always
- * logged if possible).
+ * Special settings for metadata: skew eviction so metadata almost
+ * always stays in cache and make sure metadata is logged if possible.
*
- * Test flags before setting them so updates can't race in subsequent
- * opens (the first update is safe because it's single-threaded from
+ * Test before setting so updates can't race in subsequent opens (the
+ * first update is safe because it's single-threaded from
* wiredtiger_open).
*/
- if (!F_ISSET(btree, WT_BTREE_IN_MEMORY))
- F_SET(btree, WT_BTREE_IN_MEMORY);
- if (!F_ISSET(btree, WT_BTREE_NO_EVICTION))
- F_SET(btree, WT_BTREE_NO_EVICTION);
+ if (btree->evict_priority == 0)
+ WT_WITH_BTREE(session, btree,
+ __wt_evict_priority_set(session, WT_EVICT_INT_SKEW));
if (F_ISSET(btree, WT_BTREE_NO_LOGGING))
F_CLR(btree, WT_BTREE_NO_LOGGING);
diff --git a/src/meta/meta_track.c b/src/meta/meta_track.c
index 1baab2deae1..a73b7e09d37 100644
--- a/src/meta/meta_track.c
+++ b/src/meta/meta_track.c
@@ -284,11 +284,12 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
* should be included in the checkpoint.
*/
ckpt_session->txn.id = session->txn.id;
- F_SET(ckpt_session, WT_SESSION_LOCKED_SCHEMA);
- WT_WITH_DHANDLE(ckpt_session,
- WT_SESSION_META_DHANDLE(session),
- ret = __wt_checkpoint(ckpt_session, NULL));
- F_CLR(ckpt_session, WT_SESSION_LOCKED_SCHEMA);
+ F_SET(ckpt_session, WT_SESSION_LOCKED_METADATA);
+ WT_WITH_METADATA_LOCK(session, ret,
+ WT_WITH_DHANDLE(ckpt_session,
+ WT_SESSION_META_DHANDLE(session),
+ ret = __wt_checkpoint(ckpt_session, NULL)));
+ F_CLR(ckpt_session, WT_SESSION_LOCKED_METADATA);
ckpt_session->txn.id = WT_TXN_NONE;
WT_RET(ret);
WT_WITH_DHANDLE(session,
diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c
index 7182bb0fe5f..471bb65cac0 100644
--- a/src/meta/meta_turtle.c
+++ b/src/meta/meta_turtle.c
@@ -113,8 +113,9 @@ __metadata_load_bulk(WT_SESSION_IMPL *session)
WT_DECL_RET;
uint32_t allocsize;
bool exist;
- const char *filecfg[] = { WT_CONFIG_BASE(session, file_meta), NULL };
- const char *key;
+ const char *filecfg[] = {
+ WT_CONFIG_BASE(session, file_meta), NULL, NULL };
+ const char *key, *value;
/*
* If a file was being bulk-loaded during the hot backup, it will appear
@@ -135,6 +136,8 @@ __metadata_load_bulk(WT_SESSION_IMPL *session)
* If the file doesn't exist, assume it's a bulk-loaded file;
* retrieve the allocation size and re-create the file.
*/
+ WT_ERR(cursor->get_value(cursor, &value));
+ filecfg[1] = value;
WT_ERR(__wt_direct_io_size_check(
session, filecfg, "allocation_size", &allocsize));
WT_ERR(__wt_block_manager_create(session, key, allocsize));
@@ -153,10 +156,11 @@ int
__wt_turtle_init(WT_SESSION_IMPL *session)
{
WT_DECL_RET;
- bool exist, exist_incr;
+ bool exist_backup, exist_incr, exist_turtle, load;
char *metaconf;
metaconf = NULL;
+ load = false;
/*
* Discard any turtle setup file left-over from previous runs. This
@@ -179,13 +183,29 @@ __wt_turtle_init(WT_SESSION_IMPL *session)
* done.
*/
WT_RET(__wt_exist(session, WT_INCREMENTAL_BACKUP, &exist_incr));
- WT_RET(__wt_exist(session, WT_METADATA_TURTLE, &exist));
- if (exist) {
+ WT_RET(__wt_exist(session, WT_METADATA_BACKUP, &exist_backup));
+ WT_RET(__wt_exist(session, WT_METADATA_TURTLE, &exist_turtle));
+ if (exist_turtle) {
if (exist_incr)
WT_RET_MSG(session, EINVAL,
"Incremental backup after running recovery "
"is not allowed.");
- } else {
+ /*
+ * If we have a backup file and metadata and turtle files,
+ * we want to recreate the metadata from the backup.
+ */
+ if (exist_backup) {
+ WT_RET(__wt_msg(session, "Both %s and %s exist. "
+ "Recreating metadata from backup.",
+ WT_METADATA_TURTLE, WT_METADATA_BACKUP));
+ WT_RET(__wt_remove_if_exists(session, WT_METAFILE));
+ WT_RET(__wt_remove_if_exists(
+ session, WT_METADATA_TURTLE));
+ load = true;
+ }
+ } else
+ load = true;
+ if (load) {
if (exist_incr)
F_SET(S2C(session), WT_CONN_WAS_BACKUP);
diff --git a/src/os_posix/os_alloc.c b/src/os_posix/os_alloc.c
index 3876f9a1afe..cfc7b80450e 100644
--- a/src/os_posix/os_alloc.c
+++ b/src/os_posix/os_alloc.c
@@ -18,22 +18,13 @@
#include <gperftools/tcmalloc.h>
#define calloc tc_calloc
+#define malloc tc_malloc
#define realloc tc_realloc
#define posix_memalign tc_posix_memalign
#define free tc_free
#endif
/*
- * There's no malloc interface, WiredTiger never calls malloc.
- *
- * The problem is an application might allocate memory, write secret stuff in
- * it, free the memory, then WiredTiger allocates the memory and uses it for a
- * file page or log record, then writes it to disk, without having overwritten
- * it fully. That results in the secret stuff being protected by WiredTiger's
- * permission mechanisms, potentially inappropriate for the secret stuff.
- */
-
-/*
* __wt_calloc --
* ANSI calloc function.
*/
@@ -67,12 +58,46 @@ __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp)
}
/*
- * __wt_realloc --
- * ANSI realloc function.
+ * __wt_malloc --
+ * ANSI malloc function.
*/
int
-__wt_realloc(WT_SESSION_IMPL *session,
- size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp)
+__wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp)
+{
+ void *p;
+
+ /*
+ * Defensive: if our caller doesn't handle errors correctly, ensure a
+ * free won't fail.
+ */
+ *(void **)retp = NULL;
+
+ /*
+ * !!!
+ * This function MUST handle a NULL WT_SESSION_IMPL handle.
+ */
+ WT_ASSERT(session, bytes_to_allocate != 0);
+
+ if (session != NULL)
+ WT_STAT_FAST_CONN_INCR(session, memory_allocation);
+
+ if ((p = malloc(bytes_to_allocate)) == NULL)
+ WT_RET_MSG(session, __wt_errno(),
+ "memory allocation of %" WT_SIZET_FMT " bytes failed",
+ bytes_to_allocate);
+
+ *(void **)retp = p;
+ return (0);
+}
+
+/*
+ * __realloc_func --
+ * ANSI realloc function.
+ */
+static int
+__realloc_func(WT_SESSION_IMPL *session,
+ size_t *bytes_allocated_ret, size_t bytes_to_allocate, bool clear_memory,
+ void *retp)
{
void *p;
size_t bytes_allocated;
@@ -107,15 +132,12 @@ __wt_realloc(WT_SESSION_IMPL *session,
bytes_to_allocate);
/*
- * Clear the allocated memory -- an application might: allocate memory,
- * write secret stuff into it, free the memory, then we re-allocate the
- * memory and use it for a file page or log record, and then write it to
- * disk. That would result in the secret stuff being protected by the
- * WiredTiger permission mechanisms, potentially inappropriate for the
- * secret stuff.
+ * Clear the allocated memory, parts of WiredTiger depend on allocated
+ * memory being cleared.
*/
- memset((uint8_t *)
- p + bytes_allocated, 0, bytes_to_allocate - bytes_allocated);
+ if (clear_memory)
+ memset((uint8_t *)p + bytes_allocated,
+ 0, bytes_to_allocate - bytes_allocated);
/* Update caller's bytes allocated value. */
if (bytes_allocated_ret != NULL)
@@ -126,9 +148,33 @@ __wt_realloc(WT_SESSION_IMPL *session,
}
/*
+ * __wt_realloc --
+ * WiredTiger's realloc API.
+ */
+int
+__wt_realloc(WT_SESSION_IMPL *session,
+ size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp)
+{
+ return (__realloc_func(
+ session, bytes_allocated_ret, bytes_to_allocate, true, retp));
+}
+
+/*
+ * __wt_realloc_noclear --
+ * WiredTiger's realloc API, not clearing allocated memory.
+ */
+int
+__wt_realloc_noclear(WT_SESSION_IMPL *session,
+ size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp)
+{
+ return (__realloc_func(
+ session, bytes_allocated_ret, bytes_to_allocate, false, retp));
+}
+
+/*
* __wt_realloc_aligned --
* ANSI realloc function that aligns to buffer boundaries, configured with
- * the "buffer_alignment" key to wiredtiger_open.
+ * the "buffer_alignment" key to wiredtiger_open.
*/
int
__wt_realloc_aligned(WT_SESSION_IMPL *session,
@@ -184,10 +230,6 @@ __wt_realloc_aligned(WT_SESSION_IMPL *session,
__wt_free(session, p);
p = newp;
- /* Clear the allocated memory (see above). */
- memset((uint8_t *)p + bytes_allocated, 0,
- bytes_to_allocate - bytes_allocated);
-
/* Update caller's bytes allocated value. */
if (bytes_allocated_ret != NULL)
*bytes_allocated_ret = bytes_to_allocate;
@@ -200,11 +242,11 @@ __wt_realloc_aligned(WT_SESSION_IMPL *session,
* If there is no posix_memalign function, or no alignment configured,
* fall back to realloc.
*
- * Windows note: Visual C CRT memalign does not match Posix behavior
- * and would also double each allocation so it is bad for memory use
+ * Windows note: Visual C CRT memalign does not match POSIX behavior
+ * and would also double each allocation so it is bad for memory use.
*/
- return (__wt_realloc(
- session, bytes_allocated_ret, bytes_to_allocate, retp));
+ return (__realloc_func(
+ session, bytes_allocated_ret, bytes_to_allocate, false, retp));
}
/*
@@ -221,13 +263,14 @@ __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp)
return (0);
}
- WT_RET(__wt_calloc(session, len + 1, 1, &p));
+ WT_RET(__wt_malloc(session, len + 1, &p));
/*
* Don't change this to strncpy, we rely on this function to duplicate
* "strings" that contain nul bytes.
*/
memcpy(p, str, len);
+ ((uint8_t *)p)[len] = '\0';
*(void **)retp = p;
return (0);
diff --git a/src/os_posix/os_errno.c b/src/os_posix/os_errno.c
index a58ae88447e..a0f1202c6ef 100644
--- a/src/os_posix/os_errno.c
+++ b/src/os_posix/os_errno.c
@@ -23,6 +23,22 @@ __wt_errno(void)
}
/*
+ * __wt_map_error_rdonly --
+ * Map an error into a WiredTiger error code specific for
+ * read-only operation which intercepts based on certain types
+ * of failures.
+ */
+int
+__wt_map_error_rdonly(int error)
+{
+ if (error == ENOENT)
+ return (WT_NOTFOUND);
+ else if (error == EACCES)
+ return (WT_PERM_DENIED);
+ return (error);
+}
+
+/*
* __wt_strerror --
* POSIX implementation of WT_SESSION.strerror and wiredtiger_strerror.
*/
diff --git a/src/os_posix/os_fallocate.c b/src/os_posix/os_fallocate.c
index 9d160afd179..bf20a99bdef 100644
--- a/src/os_posix/os_fallocate.c
+++ b/src/os_posix/os_fallocate.c
@@ -115,6 +115,7 @@ __wt_fallocate(
{
WT_DECL_RET;
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
switch (fh->fallocate_available) {
/*
* Check for already configured handles and make the configured call.
diff --git a/src/os_posix/os_fsync.c b/src/os_posix/os_fsync.c
index f5afddc557b..0bd0359338b 100644
--- a/src/os_posix/os_fsync.c
+++ b/src/os_posix/os_fsync.c
@@ -60,6 +60,7 @@ __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh)
#ifdef __linux__
WT_DECL_RET;
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
if ((ret = __wt_handle_sync(fh->fd)) == 0)
return (0);
WT_RET_MSG(session, ret, "%s: fsync", fh->name);
@@ -108,6 +109,7 @@ __wt_directory_sync(WT_SESSION_IMPL *session, const char *path)
if (ret != 0)
WT_RET_MSG(session, ret, "%s: open", path);
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
if ((ret = __wt_handle_sync(fd)) != 0)
WT_ERR_MSG(session, ret, "%s: fsync", path);
@@ -134,6 +136,9 @@ __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh)
WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: fsync", fh->name));
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) ||
+ WT_STRING_MATCH(fh->name, WT_SINGLETHREAD,
+ strlen(WT_SINGLETHREAD)));
if ((ret = __wt_handle_sync(fh->fd)) == 0)
return (0);
WT_RET_MSG(session, ret, "%s fsync error", fh->name);
@@ -149,6 +154,7 @@ __wt_fsync_async(WT_SESSION_IMPL *session, WT_FH *fh)
#ifdef HAVE_SYNC_FILE_RANGE
WT_DECL_RET;
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
WT_RET(__wt_verbose(
session, WT_VERB_FILEOPS, "%s: sync_file_range", fh->name));
diff --git a/src/os_posix/os_ftruncate.c b/src/os_posix/os_ftruncate.c
index 2af90512f26..94d6cba3bf5 100644
--- a/src/os_posix/os_ftruncate.c
+++ b/src/os_posix/os_ftruncate.c
@@ -17,6 +17,7 @@ __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
{
WT_DECL_RET;
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret);
if (ret == 0)
return (0);
diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c
index b085676c53b..219b26c2fa1 100644
--- a/src/os_posix/os_open.c
+++ b/src/os_posix/os_open.c
@@ -73,7 +73,16 @@ __wt_open(WT_SESSION_IMPL *session,
goto setupfh;
}
- f = O_RDWR;
+ /*
+ * If this is a read-only connection, open all files read-only
+ * except the lock file.
+ */
+ if (F_ISSET(conn, WT_CONN_READONLY) &&
+ !WT_STRING_MATCH(name, WT_SINGLETHREAD,
+ strlen(WT_SINGLETHREAD)))
+ f = O_RDONLY;
+ else
+ f = O_RDWR;
#ifdef O_BINARY
/* Windows clones: we always want to treat the file as a binary. */
f |= O_BINARY;
@@ -94,6 +103,9 @@ __wt_open(WT_SESSION_IMPL *session,
#endif
if (ok_create) {
+ WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) ||
+ WT_STRING_MATCH(name, WT_SINGLETHREAD,
+ strlen(WT_SINGLETHREAD)));
f |= O_CREAT;
if (exclusive)
f |= O_EXCL;
diff --git a/src/os_posix/os_remove.c b/src/os_posix/os_remove.c
index bc244c12e46..eb2e37fdc38 100644
--- a/src/os_posix/os_remove.c
+++ b/src/os_posix/os_remove.c
@@ -21,6 +21,7 @@ __remove_file_check(WT_SESSION_IMPL *session, const char *name)
uint64_t bucket;
conn = S2C(session);
+ WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY));
fh = NULL;
bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE;
diff --git a/src/os_posix/os_rename.c b/src/os_posix/os_rename.c
index 301190305c4..8ec4ee3aa23 100644
--- a/src/os_posix/os_rename.c
+++ b/src/os_posix/os_rename.c
@@ -21,6 +21,7 @@ __wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
WT_RET(__wt_verbose(
session, WT_VERB_FILEOPS, "rename %s to %s", from, to));
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
from_path = to_path = NULL;
WT_RET(__wt_filename(session, from, &from_path));
diff --git a/src/os_posix/os_rw.c b/src/os_posix/os_rw.c
index 8733bfe0f53..3d49fa7e712 100644
--- a/src/os_posix/os_rw.c
+++ b/src/os_posix/os_rw.c
@@ -65,6 +65,9 @@ __wt_write(WT_SESSION_IMPL *session,
"%s: write %" WT_SIZET_FMT " bytes at offset %" PRIuMAX,
fh->name, len, (uintmax_t)offset));
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) ||
+ WT_STRING_MATCH(fh->name, WT_SINGLETHREAD,
+ strlen(WT_SINGLETHREAD)));
/* Assert direct I/O is aligned and a multiple of the alignment. */
WT_ASSERT(session,
!fh->direct_io ||
diff --git a/src/os_posix/os_stdio.c b/src/os_posix/os_stdio.c
index 7ab107eda1e..65a0f40a659 100644
--- a/src/os_posix/os_stdio.c
+++ b/src/os_posix/os_stdio.c
@@ -46,8 +46,7 @@ __wt_fopen(WT_SESSION_IMPL *session,
if (*fpp == NULL)
ret = __wt_errno();
- if (pathbuf != NULL)
- __wt_free(session, pathbuf);
+ __wt_free(session, pathbuf);
if (ret == 0)
return (0);
diff --git a/src/os_win/os_errno.c b/src/os_win/os_errno.c
index 6a9daf8443f..590fcdc9d44 100644
--- a/src/os_win/os_errno.c
+++ b/src/os_win/os_errno.c
@@ -17,11 +17,13 @@ static const int windows_error_offset = -29000;
* Windows errors are from 0 - 15999 according to the documentation
*/
static DWORD
-__wt_map_error_to_windows_error(int error) {
- /* Ensure we do not exceed the error range
- Also validate he do not get any COM errors
- (which are negative integers)
- */
+__wt_map_error_to_windows_error(int error)
+{
+ /*
+ * Ensure we do not exceed the error range
+ * Also validate we do not get any COM errors
+ * (which are negative integers)
+ */
WT_ASSERT(NULL, error < 0);
return (error + -(windows_error_offset));
@@ -32,11 +34,28 @@ __wt_map_error_to_windows_error(int error) {
* Return a positive integer, a decoded Windows error
*/
static int
-__wt_map_windows_error_to_error(DWORD winerr) {
+__wt_map_windows_error_to_error(DWORD winerr)
+{
return (winerr + windows_error_offset);
}
/*
+ * __wt_map_error_rdonly --
+ * Map an error into a WiredTiger error code specific for
+ * read-only operation which intercepts based on certain types
+ * of failures.
+ */
+int
+__wt_map_error_rdonly(int winerr)
+{
+ if (winerr == ERROR_FILE_NOT_FOUND)
+ return (WT_NOTFOUND);
+ else if (winerr == ERROR_ACCESS_DENIED)
+ return (WT_PERM_DENIED);
+ return (winerr);
+}
+
+/*
* __wt_errno --
* Return errno, or WT_ERROR if errno not set.
*/
diff --git a/src/os_win/os_fallocate.c b/src/os_win/os_fallocate.c
index cdc7a1c46ee..a324687ca73 100644
--- a/src/os_win/os_fallocate.c
+++ b/src/os_win/os_fallocate.c
@@ -35,6 +35,7 @@ int
__wt_fallocate(
WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len)
{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
WT_UNUSED(session);
WT_UNUSED(fh);
WT_UNUSED(offset);
diff --git a/src/os_win/os_fsync.c b/src/os_win/os_fsync.c
index 913b7ca5a4e..c196fc6c06a 100644
--- a/src/os_win/os_fsync.c
+++ b/src/os_win/os_fsync.c
@@ -15,6 +15,7 @@
int
__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh)
{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
WT_UNUSED(session);
WT_UNUSED(fh);
return (0);
@@ -27,6 +28,7 @@ __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh)
int
__wt_directory_sync(WT_SESSION_IMPL *session, const char *path)
{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
WT_UNUSED(session);
WT_UNUSED(path);
return (0);
@@ -44,6 +46,9 @@ __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh)
WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: FlushFileBuffers",
fh->name));
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) ||
+ WT_STRING_MATCH(fh->name, WT_SINGLETHREAD,
+ strlen(WT_SINGLETHREAD)));
if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE)
WT_RET_MSG(session,
__wt_errno(), "%s FlushFileBuffers error", fh->name);
@@ -58,6 +63,7 @@ __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh)
int
__wt_fsync_async(WT_SESSION_IMPL *session, WT_FH *fh)
{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
WT_UNUSED(session);
WT_UNUSED(fh);
diff --git a/src/os_win/os_ftruncate.c b/src/os_win/os_ftruncate.c
index 0c11b5509b7..88fcf9542c1 100644
--- a/src/os_win/os_ftruncate.c
+++ b/src/os_win/os_ftruncate.c
@@ -18,6 +18,7 @@ __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
WT_DECL_RET;
LARGE_INTEGER largeint;
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
largeint.QuadPart = len;
if ((ret = SetFilePointerEx(
diff --git a/src/os_win/os_open.c b/src/os_win/os_open.c
index 3ec53daf001..f10582c5bd1 100644
--- a/src/os_win/os_open.c
+++ b/src/os_win/os_open.c
@@ -58,7 +58,17 @@ __wt_open(WT_SESSION_IMPL *session,
WT_RET(__wt_filename(session, name, &path));
- share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
+ /*
+ * If this is a read-only connection, open all files read-only
+ * except the lock file.
+ */
+ if (F_ISSET(conn, WT_CONN_READONLY) &&
+ !WT_STRING_MATCH(name, WT_SINGLETHREAD,
+ strlen(WT_SINGLETHREAD)))
+ share_mode = FILE_SHARE_READ;
+ else
+ share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
+
/*
* Security:
* The application may spawn a new process, and we don't want another
@@ -72,6 +82,9 @@ __wt_open(WT_SESSION_IMPL *session,
dwCreationDisposition = 0;
if (ok_create) {
+ WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) ||
+ WT_STRING_MATCH(name, WT_SINGLETHREAD,
+ strlen(WT_SINGLETHREAD)));
dwCreationDisposition = CREATE_NEW;
if (exclusive)
dwCreationDisposition = CREATE_ALWAYS;
diff --git a/src/os_win/os_remove.c b/src/os_win/os_remove.c
index 5682a25d7f2..84f1dd86674 100644
--- a/src/os_win/os_remove.c
+++ b/src/os_win/os_remove.c
@@ -21,6 +21,7 @@ __remove_file_check(WT_SESSION_IMPL *session, const char *name)
uint64_t bucket;
conn = S2C(session);
+ WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY));
fh = NULL;
bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE;
diff --git a/src/os_win/os_rename.c b/src/os_win/os_rename.c
index 829ab1d16e9..b4be2dba24c 100644
--- a/src/os_win/os_rename.c
+++ b/src/os_win/os_rename.c
@@ -22,6 +22,7 @@ __wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
WT_RET(__wt_verbose(
session, WT_VERB_FILEOPS, "rename %s to %s", from, to));
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
from_path = to_path = NULL;
WT_RET(__wt_filename(session, from, &from_path));
diff --git a/src/os_win/os_rw.c b/src/os_win/os_rw.c
index 49f011001a4..a9537a648f9 100644
--- a/src/os_win/os_rw.c
+++ b/src/os_win/os_rw.c
@@ -74,6 +74,9 @@ __wt_write(WT_SESSION_IMPL *session,
"%s: write %" WT_SIZET_FMT " bytes at offset %" PRIuMAX,
fh->name, len, (uintmax_t)offset));
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) ||
+ WT_STRING_MATCH(fh->name, WT_SINGLETHREAD,
+ strlen(WT_SINGLETHREAD)));
/* Assert direct I/O is aligned and a multiple of the alignment. */
WT_ASSERT(session,
!fh->direct_io ||
diff --git a/src/packing/pack_impl.c b/src/packing/pack_impl.c
index 0e3ed44ba6a..5dbb0f33842 100644
--- a/src/packing/pack_impl.c
+++ b/src/packing/pack_impl.c
@@ -107,36 +107,6 @@ __wt_struct_unpack(WT_SESSION_IMPL *session,
}
/*
- * __wt_struct_unpack_size --
- * Determine the packed size of a buffer matching the format.
- */
-int
-__wt_struct_unpack_size(WT_SESSION_IMPL *session,
- const void *buffer, size_t size, const char *fmt, size_t *resultp)
-{
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_RET;
- WT_PACK pack;
- const uint8_t *p, *end;
-
- p = buffer;
- end = p + size;
-
- WT_RET(__pack_init(session, &pack, fmt));
- while ((ret = __pack_next(&pack, &pv)) == 0)
- WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p)));
-
- /* Be paranoid - __pack_write should never overflow. */
- WT_ASSERT(session, p <= end);
-
- if (ret != WT_NOTFOUND)
- return (ret);
-
- *resultp = WT_PTRDIFF(p, buffer);
- return (0);
-}
-
-/*
* __wt_struct_repack --
* Return the subset of the packed buffer that represents part of
* the format. If the result is not contiguous in the existing
@@ -144,70 +114,43 @@ __wt_struct_unpack_size(WT_SESSION_IMPL *session,
*/
int
__wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt,
- const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf, void **reallocp)
+ const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf)
{
WT_DECL_PACK_VALUE(pvin);
WT_DECL_PACK_VALUE(pvout);
WT_DECL_RET;
WT_PACK packin, packout;
const uint8_t *before, *end, *p;
- uint8_t *pout;
- size_t len;
const void *start;
start = NULL;
p = inbuf->data;
end = p + inbuf->size;
- /*
- * Handle this non-contiguous case: 'U' -> 'u' at the end of the buf.
- * The former case has the size embedded before the item, the latter
- * does not.
- */
- if ((len = strlen(outfmt)) > 1 && outfmt[len - 1] == 'u' &&
- strlen(infmt) > len && infmt[len - 1] == 'U') {
- WT_ERR(__wt_realloc(session, NULL, inbuf->size, reallocp));
- pout = *reallocp;
- } else
- pout = NULL;
-
- WT_ERR(__pack_init(session, &packout, outfmt));
- WT_ERR(__pack_init(session, &packin, infmt));
+ WT_RET(__pack_init(session, &packout, outfmt));
+ WT_RET(__pack_init(session, &packin, infmt));
/* Outfmt should complete before infmt */
while ((ret = __pack_next(&packout, &pvout)) == 0) {
if (p >= end)
- WT_ERR(EINVAL);
- WT_ERR(__pack_next(&packin, &pvin));
+ WT_RET(EINVAL);
+ if (pvout.type == 'x' && pvout.size == 0 && pvout.havesize)
+ continue;
+ WT_RET(__pack_next(&packin, &pvin));
before = p;
- WT_ERR(__unpack_read(session, &pvin, &p, (size_t)(end - p)));
- if (pvout.type != pvin.type) {
- if (pvout.type == 'u' && pvin.type == 'U') {
- /* Skip the prefixed size, we don't need it */
- WT_ERR(__wt_struct_unpack_size(session, before,
- (size_t)(end - before), "I", &len));
- before += len;
- } else
- WT_ERR(ENOTSUP);
- }
- if (pout != NULL) {
- memcpy(pout, before, WT_PTRDIFF(p, before));
- pout += p - before;
- } else if (start == NULL)
+ WT_RET(__unpack_read(session, &pvin, &p, (size_t)(end - p)));
+ if (pvout.type != pvin.type)
+ WT_RET(ENOTSUP);
+ if (start == NULL)
start = before;
}
- WT_ERR_NOTFOUND_OK(ret);
+ WT_RET_NOTFOUND_OK(ret);
/* Be paranoid - __pack_write should never overflow. */
WT_ASSERT(session, p <= end);
- if (pout != NULL) {
- outbuf->data = *reallocp;
- outbuf->size = WT_PTRDIFF(pout, *reallocp);
- } else {
- outbuf->data = start;
- outbuf->size = WT_PTRDIFF(p, start);
- }
+ outbuf->data = start;
+ outbuf->size = WT_PTRDIFF(p, start);
-err: return (ret);
+ return (0);
}
diff --git a/src/packing/pack_stream.c b/src/packing/pack_stream.c
index 98da5b405c3..1393eb9a9c1 100644
--- a/src/packing/pack_stream.c
+++ b/src/packing/pack_stream.c
@@ -65,8 +65,7 @@ wiredtiger_pack_close(WT_PACK_STREAM *ps, size_t *usedp)
if (usedp != NULL)
*usedp = WT_PTRDIFF(ps->p, ps->start);
- if (ps != NULL)
- __wt_free(ps->pack.session, ps);
+ __wt_free(ps->pack.session, ps);
return (0);
}
@@ -327,3 +326,139 @@ wiredtiger_unpack_uint(WT_PACK_STREAM *ps, uint64_t *up)
}
return (0);
}
+
+/*
+ * __wt_ext_pack_start --
+ * WT_EXTENSION.pack_start method.
+ */
+int
+__wt_ext_pack_start(WT_EXTENSION_API *wt_api,
+ WT_SESSION *wt_session, const char *format,
+ void *buffer, size_t size, WT_PACK_STREAM **psp)
+{
+ WT_CONNECTION_IMPL *conn;
+
+ conn = (WT_CONNECTION_IMPL *)wt_api->conn;
+ if (wt_session == NULL)
+ wt_session = (WT_SESSION *)conn->default_session;
+ return (wiredtiger_pack_start(wt_session, format, buffer, size, psp));
+}
+
+/*
+ * __wt_ext_unpack_start --
+ * WT_EXTENSION.unpack_start
+ */
+int
+__wt_ext_unpack_start(WT_EXTENSION_API *wt_api,
+ WT_SESSION *wt_session, const char *format,
+ const void *buffer, size_t size, WT_PACK_STREAM **psp)
+{
+ WT_CONNECTION_IMPL *conn;
+
+ conn = (WT_CONNECTION_IMPL *)wt_api->conn;
+ if (wt_session == NULL)
+ wt_session = (WT_SESSION *)conn->default_session;
+ return (wiredtiger_unpack_start(wt_session, format, buffer, size, psp));
+}
+
+/*
+ * __wt_ext_pack_close --
+ * WT_EXTENSION.pack_close
+ */
+int
+__wt_ext_pack_close(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, size_t *usedp)
+{
+ WT_UNUSED(wt_api);
+ return (wiredtiger_pack_close(ps, usedp));
+}
+
+/*
+ * __wt_ext_pack_item --
+ * WT_EXTENSION.pack_item
+ */
+int
+__wt_ext_pack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item)
+{
+ WT_UNUSED(wt_api);
+ return (wiredtiger_pack_item(ps, item));
+}
+
+/*
+ * __wt_ext_pack_int --
+ * WT_EXTENSION.pack_int
+ */
+int
+__wt_ext_pack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t i)
+{
+ WT_UNUSED(wt_api);
+ return (wiredtiger_pack_int(ps, i));
+}
+
+/*
+ * __wt_ext_pack_str --
+ * WT_EXTENSION.pack_str
+ */
+int
+__wt_ext_pack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char *s)
+{
+ WT_UNUSED(wt_api);
+ return (wiredtiger_pack_str(ps, s));
+}
+
+/*
+ * __wt_ext_pack_uint --
+ * WT_EXTENSION.pack_uint
+ */
+int
+__wt_ext_pack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t u)
+{
+ WT_UNUSED(wt_api);
+ return (wiredtiger_pack_uint(ps, u));
+}
+
+/*
+ * __wt_ext_unpack_item --
+ * WT_EXTENSION.unpack_item
+ */
+int
+__wt_ext_unpack_item(WT_EXTENSION_API *wt_api,
+ WT_PACK_STREAM *ps, WT_ITEM *item)
+{
+ WT_UNUSED(wt_api);
+ return (wiredtiger_unpack_item(ps, item));
+}
+
+/*
+ * __wt_ext_unpack_int --
+ * WT_EXTENSION.unpack_int
+ */
+int
+__wt_ext_unpack_int(WT_EXTENSION_API *wt_api,
+ WT_PACK_STREAM *ps, int64_t *ip)
+{
+ WT_UNUSED(wt_api);
+ return (wiredtiger_unpack_int(ps, ip));
+}
+
+/*
+ * __wt_ext_unpack_str --
+ * WT_EXTENSION.unpack_str
+ */
+int
+__wt_ext_unpack_str(WT_EXTENSION_API *wt_api,
+ WT_PACK_STREAM *ps, const char **sp)
+{
+ WT_UNUSED(wt_api);
+ return (wiredtiger_unpack_str(ps, sp));
+}
+
+/*
+ * __wt_ext_unpack_uint --
+ * WT_EXTENSION.unpack_uint
+ */
+int
+__wt_ext_unpack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t *up)
+{
+ WT_UNUSED(wt_api);
+ return (wiredtiger_unpack_uint(ps, up));
+}
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index c25d7b5e493..a69f335c9b3 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -363,6 +363,17 @@ __wt_reconcile(WT_SESSION_IMPL *session,
WT_ASSERT(session, __wt_page_is_modified(page));
/*
+ * Reconciliation locks the page for three reasons:
+ * Reconciliation reads the lists of page updates, obsolete updates
+ * cannot be discarded while reconciliation is in progress;
+ * The compaction process reads page modification information, which
+ * reconciliation modifies;
+ * In-memory splits: reconciliation of an internal page cannot handle
+ * a child page splitting during the reconciliation.
+ */
+ WT_RET(__wt_fair_lock(session, &page->page_lock));
+
+ /*
* Check that transaction time always moves forward for a given page.
* If this check fails, reconciliation can free something that a future
* reconciliation will need.
@@ -376,17 +387,6 @@ __wt_reconcile(WT_SESSION_IMPL *session,
session, ref, flags, salvage, &session->reconcile));
r = session->reconcile;
- /*
- * Reconciliation locks the page for three reasons:
- * Reconciliation reads the lists of page updates, obsolete updates
- * cannot be discarded while reconciliation is in progress;
- * The compaction process reads page modification information, which
- * reconciliation modifies;
- * In-memory splits: reconciliation of an internal page cannot handle
- * a child page splitting during the reconciliation.
- */
- WT_RET(__wt_fair_lock(session, &page->page_lock));
-
/* Reconcile the page. */
switch (page->type) {
case WT_PAGE_COL_FIX:
@@ -1313,7 +1313,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
} while (0)
typedef enum {
- WT_CHILD_IGNORE, /* Deleted child: ignore */
+ WT_CHILD_IGNORE, /* Ignored child */
WT_CHILD_MODIFIED, /* Modified child */
WT_CHILD_ORIGINAL, /* Original child */
WT_CHILD_PROXY /* Deleted child: proxy */
@@ -1450,16 +1450,15 @@ __rec_child_modify(WT_SESSION_IMPL *session,
/*
* This function is called when walking an internal page to decide how
- * to handle child pages referenced by the internal page, specifically
- * if the child page is to be merged into its parent.
+ * to handle child pages referenced by the internal page.
*
* Internal pages are reconciled for two reasons: first, when evicting
* an internal page, second by the checkpoint code when writing internal
- * pages. During eviction, the subtree is locked down so all pages
- * should be in the WT_REF_DISK or WT_REF_LOCKED state. During
- * checkpoint, any eviction that might affect our review of an internal
- * page is prohibited, however, as the subtree is not reserved for our
- * exclusive use, there are other page states that must be considered.
+ * pages. During eviction, all pages should be in the WT_REF_DISK or
+ * WT_REF_DELETED state. During checkpoint, eviction that might affect
+ * review of an internal page is prohibited, however, as the subtree is
+ * not reserved for our exclusive use, there are other page states that
+ * must be considered.
*/
for (;; __wt_yield())
switch (r->tested_ref_state = ref->state) {
@@ -1488,15 +1487,14 @@ __rec_child_modify(WT_SESSION_IMPL *session,
/*
* Locked.
*
- * If evicting, the evicted page's subtree, including
- * this child, was selected for eviction by us and the
- * state is stable until we reset it, it's an in-memory
- * state. This is the expected state for a child being
- * merged into a page (where the page was selected by
- * the eviction server for eviction).
+ * We should never be here during eviction, active child
+ * pages in an evicted page's subtree fails the eviction
+ * attempt.
*/
- if (F_ISSET(r, WT_EVICTING))
- goto in_memory;
+ if (F_ISSET(r, WT_EVICTING)) {
+ WT_ASSERT(session, !F_ISSET(r, WT_EVICTING));
+ return (EBUSY);
+ }
/*
* If called during checkpoint, the child is being
@@ -1514,24 +1512,21 @@ __rec_child_modify(WT_SESSION_IMPL *session,
/*
* In memory.
*
- * If evicting, the evicted page's subtree, including
- * this child, was selected for eviction by us and the
- * state is stable until we reset it, it's an in-memory
- * state. This is the expected state for a child being
- * merged into a page (where the page belongs to a file
- * being discarded from the cache during close).
+ * We should never be here during eviction, active child
+ * pages in an evicted page's subtree fails the eviction
+ * attempt.
*/
- if (F_ISSET(r, WT_EVICTING))
- goto in_memory;
+ if (F_ISSET(r, WT_EVICTING)) {
+ WT_ASSERT(session, !F_ISSET(r, WT_EVICTING));
+ return (EBUSY);
+ }
/*
* If called during checkpoint, acquire a hazard pointer
* so the child isn't evicted, it's an in-memory case.
*
- * This call cannot return split/restart, eviction of
- * pages that split into their parent is shutout during
- * checkpoint, all splits in process will have completed
- * before we walk any pages for checkpoint.
+ * This call cannot return split/restart, we have a lock
+ * on the parent which prevents a child page split.
*/
ret = __wt_page_in(session, ref,
WT_READ_CACHE | WT_READ_NO_EVICT |
@@ -1548,29 +1543,31 @@ __rec_child_modify(WT_SESSION_IMPL *session,
/*
* Being read, not modified by definition.
*
- * We should never be here during eviction, a child page
- * in this state within an evicted page's subtree would
- * have caused normally eviction to fail, and exclusive
- * eviction shouldn't ever see pages being read.
+ * We should never be here during eviction, active child
+ * pages in an evicted page's subtree fails the eviction
+ * attempt.
*/
- WT_ASSERT(session, !F_ISSET(r, WT_EVICTING));
+ if (F_ISSET(r, WT_EVICTING)) {
+ WT_ASSERT(session, !F_ISSET(r, WT_EVICTING));
+ return (EBUSY);
+ }
goto done;
case WT_REF_SPLIT:
/*
* The page was split out from under us.
*
- * We should never be here during eviction, a child page
- * in this state within an evicted page's subtree would
- * have caused eviction to fail.
+ * We should never be here during eviction, active child
+ * pages in an evicted page's subtree fails the eviction
+ * attempt.
*
* We should never be here during checkpoint, dirty page
* eviction is shutout during checkpoint, all splits in
* process will have completed before we walk any pages
* for checkpoint.
*/
- WT_ASSERT(session, ref->state != WT_REF_SPLIT);
- /* FALLTHROUGH */
+ WT_ASSERT(session, WT_REF_SPLIT != WT_REF_SPLIT);
+ return (EBUSY);
WT_ILLEGAL_VALUE(session);
}
@@ -1581,11 +1578,21 @@ in_memory:
* modify structure has been instantiated. If the modify structure
* exists and the page has actually been modified, set that state.
* If that's not the case, we would normally use the original cell's
- * disk address as our reference, but, if we're forced to instantiate
- * a deleted child page and it's never modified, we end up here with
- * a page that has a modify structure, no modifications, and no disk
- * address. Ignore those pages, they're not modified and there is no
- * reason to write the cell.
+ * disk address as our reference, however there are two special cases,
+ * both flagged by a missing block address.
+ *
+ * First, if forced to instantiate a deleted child page and it's never
+ * modified, we end up here with a page that has a modify structure, no
+ * modifications, and no disk address. Ignore those pages, they're not
+ * modified and there is no reason to write the cell.
+ *
+ * Second, insert splits are permitted during checkpoint. When doing the
+ * final checkpoint pass, we first walk the internal page's page-index
+ * and write out any dirty pages we find, then we write out the internal
+ * page in post-order traversal. If we found the split page in the first
+ * step, it will have an address; if we didn't find the split page in
+ * the first step, it won't have an address and we ignore it, it's not
+ * part of the checkpoint.
*/
mod = ref->page->modify;
if (mod != NULL && mod->rec_result != 0)
@@ -1953,12 +1960,21 @@ __rec_split_init(WT_SESSION_IMPL *session,
WT_RET(__wt_buf_init(session, &r->disk_image, corrected_page_size));
/*
- * Clear the disk page's header and block-manager space, set the page
- * type (the type doesn't change, and setting it later would require
- * additional code in a few different places).
+ * Clear the disk page header to ensure all of it is initialized, even
+ * the unused fields.
+ *
+ * In the case of fixed-length column-store, clear the entire buffer:
+ * fixed-length column-store sets bits in bytes, where the bytes are
+ * assumed to initially be 0.
+ */
+ memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ?
+ corrected_page_size : WT_PAGE_HEADER_SIZE);
+
+ /*
+ * Set the page type (the type doesn't change, and setting it later
+ * would require additional code in a few different places).
*/
dsk = r->disk_image.mem;
- memset(dsk, 0, WT_PAGE_HEADER_BYTE_SIZE(btree));
dsk->type = page->type;
/*
@@ -3019,13 +3035,13 @@ __rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
* The data isn't laid out on a page boundary or nul padded; copy it to
* a clean, aligned, padded buffer before writing it.
*
- * Allocate a scratch buffer to hold the new disk image. Copy the
- * WT_PAGE_HEADER header onto the scratch buffer, most of the header
- * information remains unchanged between the pages.
+ * Allocate a scratch buffer to hold the new disk image. Copy the disk
+ * page's header and block-manager space into the scratch buffer, most
+ * of the header information remains unchanged between the pages.
*/
WT_RET(__wt_scr_alloc(session, r->disk_image.memsize, &tmp));
dsk = tmp->mem;
- memcpy(dsk, r->disk_image.mem, WT_PAGE_HEADER_SIZE);
+ memcpy(dsk, r->disk_image.mem, WT_PAGE_HEADER_BYTE_SIZE(btree));
/*
* For each split chunk we've created, update the disk image and copy
@@ -3808,7 +3824,7 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
switch (state) {
case WT_CHILD_IGNORE:
- /* Deleted child we don't have to write. */
+ /* Ignored child. */
WT_CHILD_RELEASE_ERR(session, hazard, ref);
continue;
@@ -3977,7 +3993,7 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
* record 100 moves to another page. When we reconcile
* the original page, we write record 98, then we don't
* see record 99 for whatever reason. If we've moved
- * record 1000, we don't know to write a deleted record
+ * record 100, we don't know to write a deleted record
* 99 on the page.)
*
* The record number recorded during the split is the
@@ -3999,8 +4015,6 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
} else {
WT_RET(
__rec_txn_read(session, r, ins, NULL, NULL, &upd));
- if (upd == NULL)
- continue;
recno = WT_INSERT_RECNO(ins);
}
for (;;) {
@@ -4536,22 +4550,25 @@ compare: /*
* record 100 moves to another page. When we reconcile
* the original page, we write record 98, then we don't
* see record 99 for whatever reason. If we've moved
- * record 1000, we don't know to write a deleted record
+ * record 100, we don't know to write a deleted record
* 99 on the page.)
*
+ * Assert the recorded record number is past the end of
+ * the page.
+ *
* The record number recorded during the split is the
* first key on the split page, that is, one larger than
* the last key on this page, we have to decrement it.
*/
if ((n = page->modify->mod_split_recno) == WT_RECNO_OOB)
break;
+ WT_ASSERT(session, n >= src_recno);
n -= 1;
+
upd = NULL;
} else {
WT_ERR(
__rec_txn_read(session, r, ins, NULL, NULL, &upd));
- if (upd == NULL)
- continue;
n = WT_INSERT_RECNO(ins);
}
while (src_recno <= n) {
@@ -4734,10 +4751,10 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
switch (state) {
case WT_CHILD_IGNORE:
/*
- * Deleted child we don't have to write.
+ * Ignored child.
*
- * Overflow keys referencing discarded pages are no
- * longer useful, schedule them for discard. Don't
+ * Overflow keys referencing pages we're not writing are
+ * no longer useful, schedule them for discard. Don't
* worry about instantiation, internal page keys are
* always instantiated. Don't worry about reuse,
* reusing this key in this reconciliation is unlikely.
diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c
index 9b3b76b62de..756f1fdcc6c 100644
--- a/src/schema/schema_create.c
+++ b/src/schema/schema_create.c
@@ -9,22 +9,6 @@
#include "wt_internal.h"
/*
- * __wt_schema_create_strip --
- * Discard any configuration information from a schema entry that is not
- * applicable to an session.create call, here for the wt dump command utility,
- * which only wants to dump the schema information needed for load.
- */
-int
-__wt_schema_create_strip(WT_SESSION_IMPL *session,
- const char *v1, const char *v2, char **value_ret)
-{
- const char *cfg[] =
- { WT_CONFIG_BASE(session, WT_SESSION_create), v1, v2, NULL };
-
- return (__wt_config_collapse(session, cfg, value_ret));
-}
-
-/*
* __wt_direct_io_size_check --
* Return a size from the configuration, complaining if it's insufficient
* for direct I/O.
diff --git a/src/schema/schema_open.c b/src/schema/schema_open.c
index 49318f80959..e7ce4e42498 100644
--- a/src/schema/schema_open.c
+++ b/src/schema/schema_open.c
@@ -109,8 +109,7 @@ __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table)
err: __wt_scr_free(session, &buf);
__wt_schema_destroy_colgroup(session, &colgroup);
- if (cgconfig != NULL)
- __wt_free(session, cgconfig);
+ __wt_free(session, cgconfig);
return (ret);
}
diff --git a/src/schema/schema_plan.c b/src/schema/schema_plan.c
index 612a2d2d192..12a1aa9c22f 100644
--- a/src/schema/schema_plan.c
+++ b/src/schema/schema_plan.c
@@ -212,7 +212,7 @@ __wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table,
WT_ASSERT(session, !value_only ||
coltype == WT_PROJ_VALUE);
WT_RET(__wt_buf_catfmt(
- session, plan, "%d%c", cg, coltype));
+ session, plan, "%u%c", cg, coltype));
/*
* Set the current column group and column
@@ -226,7 +226,7 @@ __wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table,
if (current_col < col) {
if (col - current_col > 1)
WT_RET(__wt_buf_catfmt(session,
- plan, "%d", col - current_col));
+ plan, "%u", col - current_col));
WT_RET(__wt_buf_catfmt(session,
plan, "%c", WT_PROJ_SKIP));
}
@@ -375,8 +375,8 @@ __wt_struct_reformat(WT_SESSION_IMPL *session, WT_TABLE *table,
pv.type = 'u';
if (pv.havesize)
- WT_RET(__wt_buf_catfmt(
- session, format, "%d%c", (int)pv.size, pv.type));
+ WT_RET(__wt_buf_catfmt(session,
+ format, "%" PRIu32 "%c", pv.size, pv.type));
else
WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type));
} while (have_next);
@@ -399,8 +399,8 @@ __wt_struct_truncate(WT_SESSION_IMPL *session,
while (ncols-- > 0) {
WT_RET(__pack_next(&pack, &pv));
if (pv.havesize)
- WT_RET(__wt_buf_catfmt(
- session, format, "%d%c", (int)pv.size, pv.type));
+ WT_RET(__wt_buf_catfmt(session,
+ format, "%" PRIu32 "%c", pv.size, pv.type));
else
WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type));
}
diff --git a/src/schema/schema_truncate.c b/src/schema/schema_truncate.c
index e7752b60ca4..d9a798b6ed8 100644
--- a/src/schema/schema_truncate.c
+++ b/src/schema/schema_truncate.c
@@ -131,22 +131,19 @@ int
__wt_schema_range_truncate(
WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR *stop)
{
- WT_CURSOR *cursor;
WT_DATA_SOURCE *dsrc;
WT_DECL_RET;
const char *uri;
- cursor = (start != NULL) ? start : stop;
- uri = cursor->internal_uri;
+ uri = start->internal_uri;
if (WT_PREFIX_MATCH(uri, "file:")) {
- if (start != NULL)
- WT_CURSOR_NEEDKEY(start);
+ WT_CURSOR_NEEDKEY(start);
if (stop != NULL)
WT_CURSOR_NEEDKEY(stop);
- WT_WITH_BTREE(session, ((WT_CURSOR_BTREE *)cursor)->btree,
+ WT_WITH_BTREE(session, ((WT_CURSOR_BTREE *)start)->btree,
ret = __wt_btcur_range_truncate(
- (WT_CURSOR_BTREE *)start, (WT_CURSOR_BTREE *)stop));
+ (WT_CURSOR_BTREE *)start, (WT_CURSOR_BTREE *)stop));
} else if (WT_PREFIX_MATCH(uri, "table:"))
ret = __wt_table_range_truncate(
(WT_CURSOR_TABLE *)start, (WT_CURSOR_TABLE *)stop);
diff --git a/src/schema/schema_worker.c b/src/schema/schema_worker.c
index b5ee3bb7f7d..52be76bb7a5 100644
--- a/src/schema/schema_worker.c
+++ b/src/schema/schema_worker.c
@@ -55,18 +55,11 @@ __wt_schema_worker(WT_SESSION_IMPL *session,
WT_ERR(ret);
}
- if ((ret = __wt_session_get_btree_ckpt(
- session, uri, cfg, open_flags)) == 0) {
- WT_SAVE_DHANDLE(session,
- ret = file_func(session, cfg));
- WT_TRET(__wt_session_release_btree(session));
- } else if (ret == EBUSY) {
- WT_ASSERT(session, !FLD_ISSET(
- open_flags, WT_DHANDLE_EXCLUSIVE));
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_conn_btree_apply_single_ckpt(
- session, uri, file_func, cfg));
- }
+ WT_ERR(__wt_session_get_btree_ckpt(
+ session, uri, cfg, open_flags));
+ WT_SAVE_DHANDLE(session,
+ ret = file_func(session, cfg));
+ WT_TRET(__wt_session_release_btree(session));
WT_ERR(ret);
}
} else if (WT_PREFIX_MATCH(uri, "colgroup:")) {
@@ -133,7 +126,7 @@ __wt_schema_worker(WT_SESSION_IMPL *session,
dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg));
else if (file_func == __wt_checkpoint)
;
- else if (file_func == __wt_checkpoint_list)
+ else if (file_func == __wt_checkpoint_get_handles)
;
else if (file_func == __wt_checkpoint_sync)
;
diff --git a/src/session/session_api.c b/src/session/session_api.c
index c03b5fdc044..bb496494234 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -13,6 +13,20 @@ static int __session_snapshot(WT_SESSION *, const char *);
static int __session_rollback_transaction(WT_SESSION *, const char *);
/*
+ * __wt_session_notsup --
+ * Unsupported session method.
+ */
+int
+__wt_session_notsup(WT_SESSION *wt_session)
+{
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ WT_RET_MSG(session, ENOTSUP, "Unsupported session method");
+}
+
+/*
* __wt_session_reset_cursors --
* Reset all open cursors.
*/
@@ -26,7 +40,8 @@ __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers)
/* Stop when there are no positioned cursors. */
if (session->ncursors == 0)
break;
- WT_TRET(cursor->reset(cursor));
+ if (!F_ISSET(cursor, WT_CURSTD_JOINED))
+ WT_TRET(cursor->reset(cursor));
/* Optionally, free the cursor buffers */
if (free_buffers) {
__wt_buf_free(session, &cursor->key);
@@ -478,10 +493,13 @@ __session_create(WT_SESSION *wt_session, const char *uri, const char *config)
/*
* We can't disallow type entirely, a configuration string might
* innocently include it, for example, a dump/load pair. If the
- * URI type prefix and the type are the same, let it go.
+ * underlying type is "file", it's OK ("file" is the underlying
+ * type for every type); if the URI type prefix and the type are
+ * the same, let it go.
*/
if ((ret =
__wt_config_getones(session, config, "type", &cval)) == 0 &&
+ !WT_STRING_MATCH("file", cval.str, cval.len) &&
(strncmp(uri, cval.str, cval.len) != 0 ||
uri[cval.len] != ':'))
WT_ERR_MSG(session, EINVAL,
@@ -495,6 +513,20 @@ err: API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
+ * __session_create_readonly --
+ * WT_SESSION->create method; readonly version.
+ */
+static int
+__session_create_readonly(
+ WT_SESSION *wt_session, const char *uri, const char *config)
+{
+ WT_UNUSED(uri);
+ WT_UNUSED(config);
+
+ return (__wt_session_notsup(wt_session));
+}
+
+/*
* __session_log_flush --
* WT_SESSION->log_flush method.
*/
@@ -532,6 +564,18 @@ err: API_END_RET(session, ret);
}
/*
+ * __session_log_flush_readonly --
+ * WT_SESSION->log_flush method; readonly version.
+ */
+static int
+__session_log_flush_readonly(WT_SESSION *wt_session, const char *config)
+{
+ WT_UNUSED(config);
+
+ return (__wt_session_notsup(wt_session));
+}
+
+/*
* __session_log_printf --
* WT_SESSION->log_printf method.
*/
@@ -554,6 +598,19 @@ err: API_END_RET(session, ret);
}
/*
+ * __session_log_printf_readonly --
+ * WT_SESSION->log_printf method; readonly version.
+ */
+static int
+__session_log_printf_readonly(WT_SESSION *wt_session, const char *fmt, ...)
+ WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3)))
+{
+ WT_UNUSED(fmt);
+
+ return (__wt_session_notsup(wt_session));
+}
+
+/*
* __session_rebalance --
* WT_SESSION->rebalance method.
*/
@@ -567,9 +624,6 @@ __session_rebalance(WT_SESSION *wt_session, const char *uri, const char *config)
SESSION_API_CALL(session, rebalance, config, cfg);
- if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- WT_ERR(ENOTSUP);
-
/* Block out checkpoints to avoid spurious EBUSY errors. */
WT_WITH_CHECKPOINT_LOCK(session, ret,
WT_WITH_SCHEMA_LOCK(session, ret,
@@ -580,6 +634,20 @@ err: API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
+ * __session_rebalance_readonly --
+ * WT_SESSION->rebalance method; readonly version.
+ */
+static int
+__session_rebalance_readonly(
+ WT_SESSION *wt_session, const char *uri, const char *config)
+{
+ WT_UNUSED(uri);
+ WT_UNUSED(config);
+
+ return (__wt_session_notsup(wt_session));
+}
+
+/*
* __session_rename --
* WT_SESSION->rename method.
*/
@@ -597,14 +665,30 @@ __session_rename(WT_SESSION *wt_session,
WT_ERR(__wt_str_name_check(session, uri));
WT_ERR(__wt_str_name_check(session, newuri));
- WT_WITH_SCHEMA_LOCK(session, ret,
- WT_WITH_TABLE_LOCK(session, ret,
- ret = __wt_schema_rename(session, uri, newuri, cfg)));
+ WT_WITH_CHECKPOINT_LOCK(session, ret,
+ WT_WITH_SCHEMA_LOCK(session, ret,
+ WT_WITH_TABLE_LOCK(session, ret,
+ ret = __wt_schema_rename(session, uri, newuri, cfg))));
err: API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
+ * __session_rename_readonly --
+ * WT_SESSION->rename method; readonly version.
+ */
+static int
+__session_rename_readonly(WT_SESSION *wt_session,
+ const char *uri, const char *newuri, const char *config)
+{
+ WT_UNUSED(uri);
+ WT_UNUSED(newuri);
+ WT_UNUSED(config);
+
+ return (__wt_session_notsup(wt_session));
+}
+
+/*
* __session_reset --
* WT_SESSION->reset method.
*/
@@ -646,9 +730,10 @@ __wt_session_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
if (!lock_wait)
F_SET(session, WT_SESSION_LOCK_NO_WAIT);
- WT_WITH_SCHEMA_LOCK(session, ret,
- WT_WITH_TABLE_LOCK(session, ret,
- ret = __wt_schema_drop(session, uri, cfg)));
+ WT_WITH_CHECKPOINT_LOCK(session, ret,
+ WT_WITH_SCHEMA_LOCK(session, ret,
+ WT_WITH_TABLE_LOCK(session, ret,
+ ret = __wt_schema_drop(session, uri, cfg))));
if (!lock_wait)
F_CLR(session, WT_SESSION_LOCK_NO_WAIT);
@@ -679,6 +764,20 @@ err: /* Note: drop operations cannot be unrolled (yet?). */
}
/*
+ * __session_drop_readonly --
+ * WT_SESSION->drop method; readonly version.
+ */
+static int
+__session_drop_readonly(
+ WT_SESSION *wt_session, const char *uri, const char *config)
+{
+ WT_UNUSED(uri);
+ WT_UNUSED(config);
+
+ return (__wt_session_notsup(wt_session));
+}
+
+/*
* __session_join --
* WT_SESSION->join method.
*/
@@ -823,6 +922,20 @@ err: API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
+ * __session_salvage_readonly --
+ * WT_SESSION->salvage method; readonly version.
+ */
+static int
+__session_salvage_readonly(
+ WT_SESSION *wt_session, const char *uri, const char *config)
+{
+ WT_UNUSED(uri);
+ WT_UNUSED(config);
+
+ return (__wt_session_notsup(wt_session));
+}
+
+/*
* __wt_session_range_truncate --
* Session handling of a range truncate.
*/
@@ -1004,6 +1117,22 @@ err: TXN_API_END_RETRY(session, ret, 0);
}
/*
+ * __session_truncate_readonly --
+ * WT_SESSION->truncate method; readonly version.
+ */
+static int
+__session_truncate_readonly(WT_SESSION *wt_session,
+ const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config)
+{
+ WT_UNUSED(uri);
+ WT_UNUSED(start);
+ WT_UNUSED(stop);
+ WT_UNUSED(config);
+
+ return (__wt_session_notsup(wt_session));
+}
+
+/*
* __session_upgrade --
* WT_SESSION->upgrade method.
*/
@@ -1026,6 +1155,20 @@ err: API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
+ * __session_upgrade_readonly --
+ * WT_SESSION->upgrade method; readonly version.
+ */
+static int
+__session_upgrade_readonly(
+ WT_SESSION *wt_session, const char *uri, const char *config)
+{
+ WT_UNUSED(uri);
+ WT_UNUSED(config);
+
+ return (__wt_session_notsup(wt_session));
+}
+
+/*
* __session_verify --
* WT_SESSION->verify method.
*/
@@ -1247,6 +1390,18 @@ err: API_END_RET(session, ret);
}
/*
+ * __session_transaction_sync_readonly --
+ * WT_SESSION->transaction_sync method; readonly version.
+ */
+static int
+__session_transaction_sync_readonly(WT_SESSION *wt_session, const char *config)
+{
+ WT_UNUSED(config);
+
+ return (__wt_session_notsup(wt_session));
+}
+
+/*
* __session_checkpoint --
* WT_SESSION->checkpoint method.
*/
@@ -1295,6 +1450,18 @@ err: API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
+ * __session_checkpoint_readonly --
+ * WT_SESSION->checkpoint method; readonly version.
+ */
+static int
+__session_checkpoint_readonly(WT_SESSION *wt_session, const char *config)
+{
+ WT_UNUSED(config);
+
+ return (__wt_session_notsup(wt_session));
+}
+
+/*
* __session_snapshot --
* WT_SESSION->snapshot method.
*/
@@ -1380,6 +1547,33 @@ __open_session(WT_CONNECTION_IMPL *conn,
__session_snapshot,
__session_transaction_pinned_range,
__session_transaction_sync
+ }, stds_readonly = {
+ NULL,
+ NULL,
+ __session_close,
+ __session_reconfigure,
+ __session_strerror,
+ __session_open_cursor,
+ __session_create_readonly,
+ __wt_session_compact_readonly,
+ __session_drop_readonly,
+ __session_join,
+ __session_log_flush_readonly,
+ __session_log_printf_readonly,
+ __session_rebalance_readonly,
+ __session_rename_readonly,
+ __session_reset,
+ __session_salvage_readonly,
+ __session_truncate_readonly,
+ __session_upgrade_readonly,
+ __session_verify,
+ __session_begin_transaction,
+ __session_commit_transaction,
+ __session_rollback_transaction,
+ __session_checkpoint_readonly,
+ __session_snapshot,
+ __session_transaction_pinned_range,
+ __session_transaction_sync_readonly
};
WT_DECL_RET;
WT_SESSION_IMPL *session, *session_ret;
@@ -1407,7 +1601,7 @@ __open_session(WT_CONNECTION_IMPL *conn,
if (i == conn->session_size)
WT_ERR_MSG(session, ENOMEM,
"only configured to support %" PRIu32 " sessions"
- " (including %d additional internal sessions)",
+ " (including %" PRIu32 " additional internal sessions)",
conn->session_size, WT_EXTRA_INTERNAL_SESSIONS);
/*
@@ -1419,7 +1613,8 @@ __open_session(WT_CONNECTION_IMPL *conn,
conn->session_cnt = i + 1;
session_ret->id = i;
- session_ret->iface = stds;
+ session_ret->iface =
+ F_ISSET(conn, WT_CONN_READONLY) ? stds_readonly : stds;
session_ret->iface.connection = &conn->iface;
WT_ERR(__wt_cond_alloc(session, "session", false, &session_ret->cond));
diff --git a/src/session/session_compact.c b/src/session/session_compact.c
index 5abccbd1366..2a53ad58f52 100644
--- a/src/session/session_compact.c
+++ b/src/session/session_compact.c
@@ -97,13 +97,13 @@
*/
/*
- * __wt_compact_uri_analyze --
+ * __compact_uri_analyze --
* Extract information relevant to deciding what work compact needs to
* do from a URI that is part of a table schema.
* Called via the schema_worker function.
*/
-int
-__wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp)
+static int
+__compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp)
{
/*
* Add references to schema URI objects to the list of objects to be
@@ -120,6 +120,61 @@ __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp)
}
/*
+ * __compact_start --
+ * Start object compaction.
+ */
+static int
+__compact_start(WT_SESSION_IMPL *session)
+{
+ WT_BM *bm;
+
+ bm = S2BT(session)->bm;
+ return (bm->compact_start(bm, session));
+}
+
+/*
+ * __compact_end --
+ * End object compaction.
+ */
+static int
+__compact_end(WT_SESSION_IMPL *session)
+{
+ WT_BM *bm;
+
+ bm = S2BT(session)->bm;
+ return (bm->compact_end(bm, session));
+}
+
+/*
+ * __compact_handle_append --
+ * Gather a file handle to be compacted.
+ * Called via the schema_worker function.
+ */
+static int
+__compact_handle_append(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ WT_DECL_RET;
+
+ WT_UNUSED(cfg);
+
+ /* Make sure there is space for the next entry. */
+ WT_RET(__wt_realloc_def(session, &session->op_handle_allocated,
+ session->op_handle_next + 1, &session->op_handle));
+
+ WT_RET(__wt_session_get_btree(
+ session, session->dhandle->name, NULL, NULL, 0));
+
+ /* Set compact active on the handle. */
+ if ((ret = __compact_start(session)) != 0) {
+ WT_TRET(__wt_session_release_btree(session));
+ return (ret);
+ }
+
+ session->op_handle[session->op_handle_next++] = session->dhandle;
+ return (0);
+}
+
+/*
* __session_compact_check_timeout --
* Check if the timeout has been exceeded.
*/
@@ -143,21 +198,25 @@ __session_compact_check_timeout(
* Function to alternate between checkpoints and compaction calls.
*/
static int
-__compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
+__compact_file(WT_SESSION_IMPL *session, const char *cfg[])
{
struct timespec start_time;
+ WT_DATA_HANDLE *dhandle;
WT_DECL_ITEM(t);
WT_DECL_RET;
int i;
const char *checkpoint_cfg[] = {
WT_CONFIG_BASE(session, WT_SESSION_checkpoint), NULL, NULL };
+ dhandle = session->dhandle;
+
/*
* Force the checkpoint: we don't want to skip it because the work we
* need to have done is done in the underlying block manager.
*/
WT_ERR(__wt_scr_alloc(session, 128, &t));
- WT_ERR(__wt_buf_fmt(session, t, "target=(\"%s\"),force=1", uri));
+ WT_ERR(__wt_buf_fmt(
+ session, t, "target=(\"%s\"),force=1", dhandle->name));
checkpoint_cfg[1] = t->data;
WT_ERR(__wt_epoch(session, &start_time));
@@ -173,9 +232,8 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
WT_ERR(__wt_txn_checkpoint(session, checkpoint_cfg));
session->compact_state = WT_COMPACT_RUNNING;
- WT_WITH_SCHEMA_LOCK(session, ret,
- ret = __wt_schema_worker(
- session, uri, __wt_compact, NULL, cfg, 0));
+ WT_WITH_DHANDLE(session, dhandle,
+ ret = __wt_compact(session, cfg));
WT_ERR(ret);
if (session->compact_state != WT_COMPACT_SUCCESS)
break;
@@ -193,6 +251,7 @@ err: session->compact_state = WT_COMPACT_NONE;
/*
* __wt_session_compact --
+ * WT_SESSION.compact method.
*/
int
__wt_session_compact(
@@ -203,6 +262,7 @@ __wt_session_compact(
WT_DECL_RET;
WT_SESSION_IMPL *session;
WT_TXN *txn;
+ u_int i;
session = (WT_SESSION_IMPL *)wt_session;
SESSION_API_CALL(session, compact, config, cfg);
@@ -227,10 +287,10 @@ __wt_session_compact(
WT_ERR(__wt_config_gets(session, cfg, "timeout", &cval));
session->compact->max_time = (uint64_t)cval.val;
- /* Find the types of data sources are being compacted. */
+ /* Find the types of data sources being compacted. */
WT_WITH_SCHEMA_LOCK(session, ret,
- ret = __wt_schema_worker(
- session, uri, NULL, __wt_compact_uri_analyze, cfg, 0));
+ ret = __wt_schema_worker(session, uri,
+ __compact_handle_append, __compact_uri_analyze, cfg, 0));
WT_ERR(ret);
if (session->compact->lsm_count != 0)
@@ -247,11 +307,25 @@ __wt_session_compact(
WT_ERR_MSG(session, EINVAL,
" File compaction not permitted in a transaction");
- WT_ERR(__compact_file(session, uri, cfg));
+ for (i = 0; i < session->op_handle_next; ++i) {
+ WT_WITH_DHANDLE(session, session->op_handle[i],
+ ret = __compact_file(session, cfg));
+ WT_ERR(ret);
+ }
}
err: session->compact = NULL;
+ for (i = 0; i < session->op_handle_next; ++i) {
+ WT_WITH_DHANDLE(session, session->op_handle[i],
+ WT_TRET(__compact_end(session)));
+ WT_WITH_DHANDLE(session, session->op_handle[i],
+ WT_TRET(__wt_session_release_btree(session)));
+ }
+
+ __wt_free(session, session->op_handle);
+ session->op_handle_allocated = session->op_handle_next = 0;
+
/*
* Release common session resources (for example, checkpoint may acquire
* significant reconciliation structures/memory).
@@ -260,3 +334,17 @@ err: session->compact = NULL;
API_END_RET_NOTFOUND_MAP(session, ret);
}
+
+/*
+ * __wt_session_compact_readonly --
+ * WT_SESSION.compact method; readonly version.
+ */
+int
+__wt_session_compact_readonly(
+ WT_SESSION *wt_session, const char *uri, const char *config)
+{
+ WT_UNUSED(uri);
+ WT_UNUSED(config);
+
+ return (__wt_session_notsup(wt_session));
+}
diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c
index 1ee3342442c..ddf4d3dfa33 100644
--- a/src/session/session_dhandle.c
+++ b/src/session/session_dhandle.c
@@ -15,24 +15,21 @@ static int __session_dhandle_sweep(WT_SESSION_IMPL *);
* Add a handle to the session's cache.
*/
static int
-__session_add_dhandle(
- WT_SESSION_IMPL *session, WT_DATA_HANDLE_CACHE **dhandle_cachep)
+__session_add_dhandle(WT_SESSION_IMPL *session)
{
WT_DATA_HANDLE_CACHE *dhandle_cache;
uint64_t bucket;
+ /* Allocate a handle cache entry. */
WT_RET(__wt_calloc_one(session, &dhandle_cache));
+
dhandle_cache->dhandle = session->dhandle;
bucket = dhandle_cache->dhandle->name_hash % WT_HASH_ARRAY_SIZE;
TAILQ_INSERT_HEAD(&session->dhandles, dhandle_cache, q);
TAILQ_INSERT_HEAD(&session->dhhash[bucket], dhandle_cache, hashq);
- if (dhandle_cachep != NULL)
- *dhandle_cachep = dhandle_cache;
-
- /* Sweep the handle list to remove any dead handles. */
- return (__session_dhandle_sweep(session));
+ return (0);
}
/*
@@ -450,14 +447,23 @@ __session_get_dhandle(
return (0);
}
+ /* Sweep the handle list to remove any dead handles. */
+ WT_RET(__session_dhandle_sweep(session));
+
/*
* We didn't find a match in the session cache, search the shared
* handle list and cache the handle we find.
*/
WT_WITH_HANDLE_LIST_LOCK(session,
ret = __session_find_shared_dhandle(session, uri, checkpoint));
- if (ret == 0)
- ret = __session_add_dhandle(session, NULL);
+ WT_RET(ret);
+
+ /*
+ * Fixup the reference count on failure (we incremented the reference
+ * count while holding the handle-list lock).
+ */
+ if ((ret = __session_add_dhandle(session)) != 0)
+ (void)__wt_atomic_sub32(&session->dhandle->session_ref, 1);
return (ret);
}
@@ -571,7 +577,7 @@ __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint)
* files, since changes to the underlying file are visible to the in
* memory pages.
*/
- WT_ERR(__wt_cache_op(session, NULL, WT_SYNC_DISCARD));
+ WT_ERR(__wt_cache_op(session, WT_SYNC_DISCARD));
/*
* We lock checkpoint handles that we are overwriting, so the handle
diff --git a/src/support/cksum.c b/src/support/cksum.c
index c2982c40015..0b086753406 100644
--- a/src/support/cksum.c
+++ b/src/support/cksum.c
@@ -1260,6 +1260,23 @@ __wt_cksum_hw(const void *chunk, size_t len)
}
#endif
+#if defined(__powerpc64__)
+
+unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p,
+ unsigned long len);
+
+/*
+ * __wt_cksum_hw --
+ * Return a checksum for a chunk of memory, computed in hardware
+ * using 8 byte steps.
+ */
+static uint32_t
+__wt_cksum_hw(const void *chunk, size_t len)
+{
+ return crc32_vpmsum(0, chunk, len);
+}
+#endif
+
/*
* __wt_cksum --
* Return a checksum for a chunk of memory using the fastest method
@@ -1302,6 +1319,8 @@ __wt_cksum_init(void)
__wt_cksum_func = __wt_cksum_hw;
else
__wt_cksum_func = __wt_cksum_sw;
+#elif defined(__powerpc64__)
+ __wt_cksum_func = __wt_cksum_hw;
#else
__wt_cksum_func = __wt_cksum_sw;
#endif
diff --git a/src/support/cond_auto.c b/src/support/cond_auto.c
new file mode 100644
index 00000000000..ec95622f333
--- /dev/null
+++ b/src/support/cond_auto.c
@@ -0,0 +1,136 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * This is an implementation of condition variables that automatically adjust
+ * the wait time depending on whether the wake is resulting in useful work.
+ */
+
+/*
+ * __wt_cond_auto_alloc --
+ * Allocate and initialize an automatically adjusting condition variable.
+ */
+int
+__wt_cond_auto_alloc(
+ WT_SESSION_IMPL *session, const char *name,
+ bool is_signalled, uint64_t min, uint64_t max, WT_CONDVAR **condp)
+{
+ WT_CONDVAR *cond;
+
+ WT_RET(__wt_cond_alloc(session, name, is_signalled, condp));
+ cond = *condp;
+
+ cond->min_wait = min;
+ cond->max_wait = max;
+ cond->prev_wait = min;
+
+ return (0);
+}
+
+/*
+ * __wt_cond_auto_signal --
+ * Signal a condition variable.
+ */
+int
+__wt_cond_auto_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond)
+{
+
+ WT_ASSERT(session, cond->min_wait != 0);
+ return (__wt_cond_signal(session, cond));
+}
+
+/*
+ * __wt_cond_auto_wait_signal --
+ * Wait on a mutex, optionally timing out. If we get it before the time
+ * out period expires, let the caller know.
+ * TODO: Can this version of the API be removed, now that we have the
+ * auto adjusting condition variables?
+ */
+int
+__wt_cond_auto_wait_signal(
+ WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool *signalled)
+{
+ uint64_t delta;
+
+ /*
+ * Catch cases where this function is called with a condition variable
+ * that was initialized non-auto.
+ */
+ WT_ASSERT(session, cond->min_wait != 0);
+
+ WT_STAT_FAST_CONN_INCR(session, cond_auto_wait);
+ if (progress)
+ cond->prev_wait = cond->min_wait;
+ else {
+ delta = WT_MAX(1, (cond->max_wait - cond->min_wait) / 10);
+ cond->prev_wait = WT_MIN(
+ cond->max_wait, cond->prev_wait + delta);
+ }
+
+ WT_RET(__wt_cond_wait_signal(
+ session, cond, cond->prev_wait, signalled));
+
+ if (progress || *signalled)
+ WT_STAT_FAST_CONN_INCR(session, cond_auto_wait_reset);
+ if (*signalled)
+ cond->prev_wait = cond->min_wait;
+
+ return (0);
+}
+
+/*
+ * __wt_cond_auto_wait --
+ * Wait on a mutex, optionally timing out. If we get it before the time
+ * out period expires, let the caller know.
+ */
+int
+__wt_cond_auto_wait(
+ WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress)
+{
+ bool signalled;
+
+ /*
+ * Call the signal version so the wait period is reset if the
+ * condition is woken explicitly.
+ */
+ WT_RET(__wt_cond_auto_wait_signal(session, cond, progress, &signalled));
+
+ return (0);
+}
+
+/*
+ * __wt_cond_auto_destroy --
+ * Destroy a condition variable.
+ */
+int
+__wt_cond_auto_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp)
+{
+ return (__wt_cond_destroy(session, condp));
+}
diff --git a/src/support/huffman.c b/src/support/huffman.c
index edd0bc9f648..1e1aaeab5b5 100644
--- a/src/support/huffman.c
+++ b/src/support/huffman.c
@@ -492,11 +492,12 @@ __wt_huffman_open(WT_SESSION_IMPL *session,
uint8_t symbol;
uint32_t weighted_length;
- printf("leaf depth %" PRIu16 "..%" PRIu16 ", memory use: "
- "codes %u# * %uB + code2symbol %u# * %uB\n",
+ printf("leaf depth %" PRIu16 "..%" PRIu16
+ ", memory use: codes %u# * %" WT_SIZET_FMT
+ "B + code2symbol %u# * %" WT_SIZET_FMT "B\n",
huffman->min_depth, huffman->max_depth,
- huffman->numSymbols, (u_int)sizeof(WT_HUFFMAN_CODE),
- 1U << huffman->max_depth, (u_int)sizeof(uint16_t));
+ huffman->numSymbols, sizeof(WT_HUFFMAN_CODE),
+ 1U << huffman->max_depth, sizeof(uint16_t));
/*
* measure quality of computed Huffman codes, for different max bit
diff --git a/src/support/power8/LICENSE.TXT b/src/support/power8/LICENSE.TXT
new file mode 100644
index 00000000000..2f4bb91f574
--- /dev/null
+++ b/src/support/power8/LICENSE.TXT
@@ -0,0 +1,476 @@
+Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
+
+crc32-vpmsum is free software; you can redistribute it and/or
+modify it under the terms of either:
+
+ a) the GNU General Public License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version., or
+ b) the Apache License, Version 2.0
+
+
+
+
+
+
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+
+
+
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
diff --git a/src/support/power8/README.md b/src/support/power8/README.md
new file mode 100644
index 00000000000..3e2976650cd
--- /dev/null
+++ b/src/support/power8/README.md
@@ -0,0 +1,208 @@
+crc32-vpmsum
+============
+
+A set of examples for accelerating CRC32 calculations using the vector
+polynomial multiply sum (vpmsum) instructions introduced in POWER8. These
+instructions implement byte, halfword, word and doubleword carryless
+multiply/add.
+
+Performance
+-----------
+
+An implementation of slice-by-8, one of the fastest lookup table methods
+is included so we can compare performance against it. Testing 5000000
+iterations of a CRC of 32 kB of data (to keep it L1 cache contained):
+
+```
+# time slice_by_8_bench 32768 5000000
+122.220 seconds
+
+# time crc32_bench 32768 5000000
+2.937 seconds
+```
+
+The vpmsum accelerated CRC is just over 41x faster.
+
+This test was run on a 4.1 GHz POWER8, so the algorithm sustains about
+52 GiB/sec or 13.6 bytes/cycle. The theoretical limit is 16 bytes/cycle
+since we can execute a maximum of one vpmsum instruction per cycle.
+
+In another test, a version was added to the kernel and btrfs write
+performance was shown to be 3.8x faster. The test was done to a ramdisk
+to mitigate any I/O induced variability.
+
+Quick start
+-----------
+
+- Modify CRC and OPTIONS in the Makefile. There are examples for the two most
+ common crc32s.
+
+- Type make to create the constants (crc32_constants.h)
+
+- Import the code into your application (crc32.S crc32_wrapper.c
+ crc32_constants.h ppc-opcode.h) and call the CRC:
+
+```
+unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p, unsigned long len);
+```
+
+CRC background
+--------------
+
+For a good background on CRCs, check out:
+
+http://www.ross.net/crc/download/crc_v3.txt
+
+A few key points:
+
+- A CRC is the remainder after dividing a message by the CRC polynomial,
+ ie M mod CRC_POLY
+- multiply/divide is carryless
+- add/subtract is an xor
+- n (where n is the order of the CRC) bits of zeroes are appended to the
+ end of the message.
+
+One more important piece of information - a CRC is a linear function, so:
+
+```
+ CRC(A xor B) = CRC(A) xor CRC(B)
+
+ CRC(A . B) = CRC(A) . CRC(B) (remember this is carryless multiply)
+```
+
+If we take 64bits of data, represented by two 32 bit chunks (AAAAAAAA
+and BBBBBBBB):
+
+```
+CRC(AAAAAAAABBBBBBBB)
+ = CRC(AAAAAAAA00000000 xor BBBBBBBB)
+ = CRC(AAAAAAAA00000000) xor CRC(BBBBBBBB)
+```
+
+If we operate on AAAAAAAA:
+
+```
+CRC(AAAAAAAA00000000)
+ = CRC(AAAAAAAA . 100000000)
+ = CRC(AAAAAAAA) . CRC(100000000)
+```
+
+And CRC(100000000) is a constant which we can pre-calculate:
+
+```
+CRC(100000000)
+ = 100000000 mod CRC_POLY
+ = 2^32 mod CRC_POLY
+```
+
+Finally we can add our modified AAAAAAAA to BBBBBBBB:
+
+```
+CRC(AAAAAAAABBBBBBBB)
+ = ((2^32 mod CRC_POLY) . CRC(AAAAAAAA)) xor CRC(BBBBBBBB)
+```
+
+In other words, with the right constants pre-calculated we can shift the
+input data around and we can also calculate the CRC in as many parallel
+chunks as we want.
+
+No matter how much shifting we do, the final result will be be 64 bits of
+data (63 actually, because there is no carry into the top bit). To reduce
+it further we need a another trick, and that is Barrett reduction:
+
+http://en.wikipedia.org/wiki/Barrett_reduction
+
+Barrett reduction is a method of calculating a mod n. The idea is to
+calculate q, the multiple of our polynomial that we need to subtract. By
+doing the computation 2x bits higher (ie 64 bits) and shifting the
+result back down 2x bits, we round down to the nearest multiple.
+
+```
+ k = 32
+ m = floor((4^k)/n) = floor((4^32))/n)
+ n = 64 bits of data
+ a = 32 bit CRC
+
+ q = floor(ma/(2^64))
+ result = a - qn
+```
+
+An example in the floating point domain makes it clearer how this works:
+
+```
+a mod n = a - floor(am) * n
+```
+
+Let's use it to calculate 22 mod 10:
+
+```
+ a = 22
+ n = 10
+ m = 1/n = 1/10 = 0.1
+
+22 mod 10
+ = 22 - floor(22*0.1) * 10
+ = 22 - 2 * 10
+ = 22 - 20
+ = 2
+```
+
+There is one more issue left - bit reflection. Some CRCs are defined to
+operate on the least significant bit first (eg CRC32c). Lets look at
+how this would get laid out in a register, and lets simplify it to just
+two bytes (vs a 16 byte VMX register):
+
+ [ 8..15 ] [ 0..7 ]
+
+Notice how the bits and bytes are out of order. Since we are doing
+multi word multiplication on these values we need them to both be
+in order.
+
+The simplest way to fix this is to reflect the bits in each byte:
+
+ [ 15..8 ] [ 7..0 ]
+
+However shuffling bits in a byte is expensive on most CPUs. It is
+however relatively cheap to shuffle bytes around. What if we load
+the bytes in reversed:
+
+ [ 0..7 ] [ 8..15 ]
+
+Now the bits and bytes are in order, except the least significant bit
+of the register is now on the left and the most significant bit is on the
+right. We operate as if the register is reflected, which normally we
+cannot do. The reason we get away with this is our multiplies are carryless
+and our addition and subtraction is xor, so our operations never create
+carries.
+
+The only trick is we have to shift the result of multiplies left one
+because the high bit of the multiply is always 0, and we want that high bit
+on the right not the left.
+
+Implementation
+--------------
+
+The vpmsum instructions on POWER8 have a 6 cycle latency and we can
+execute one every cycle. In light of this the main loop has 8 parallel
+streams which consume 8 x 16 B each iteration. At the completion of this
+loop we have taken 32 kB of data and reduced it to 8 x 16 B (128 B).
+
+The next step is to take this 128 B and reduce it to 8 B. At this stage
+we also add 32 bits of 0 to the end.
+
+We then apply Barrett reduction to get our CRC.
+
+Examples
+--------
+- barrett_reduction: An example of Barrett reduction
+
+- final_fold: Starting with 128 bits, add 32 bits of zeros and reduce it to
+ 64 bits, then apply Barrett reduction
+
+- final_fold2: A second method of reduction
+
+Acknowledgements
+----------------
+
+Thanks to Michael Gschwind, Jeff Derby, Lorena Pesantez and Stewart Smith
+for their ideas and assistance.
diff --git a/src/support/power8/crc32.S b/src/support/power8/crc32.S
new file mode 100644
index 00000000000..c0b81143f07
--- /dev/null
+++ b/src/support/power8/crc32.S
@@ -0,0 +1,771 @@
+#if defined(__powerpc64__)
+/*
+ * Calculate the checksum of data that is 16 byte aligned and a multiple of
+ * 16 bytes.
+ *
+ * The first step is to reduce it to 1024 bits. We do this in 8 parallel
+ * chunks in order to mask the latency of the vpmsum instructions. If we
+ * have more than 32 kB of data to checksum we repeat this step multiple
+ * times, passing in the previous 1024 bits.
+ *
+ * The next step is to reduce the 1024 bits to 64 bits. This step adds
+ * 32 bits of 0s to the end - this matches what a CRC does. We just
+ * calculate constants that land the data in this 32 bits.
+ *
+ * We then use fixed point Barrett reduction to compute a mod n over GF(2)
+ * for n = CRC using POWER8 instructions. We use x = 32.
+ *
+ * http://en.wikipedia.org/wiki/Barrett_reduction
+ *
+ * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <ppc-asm.h>
+#include "ppc-opcode.h"
+
+#undef toc
+
+#ifndef r1
+#define r1 1
+#endif
+
+#ifndef r2
+#define r2 2
+#endif
+
+ .section .rodata
+.balign 16
+
+.byteswap_constant:
+ /* byte reverse permute constant */
+ .octa 0x0F0E0D0C0B0A09080706050403020100
+
+#define __ASSEMBLY__
+#include "crc32_constants.h"
+
+ .text
+
+#if defined(__BIG_ENDIAN__) && defined(REFLECT)
+#define BYTESWAP_DATA
+#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
+#define BYTESWAP_DATA
+#else
+#undef BYTESWAP_DATA
+#endif
+
+#define off16 r25
+#define off32 r26
+#define off48 r27
+#define off64 r28
+#define off80 r29
+#define off96 r30
+#define off112 r31
+
+#define const1 v24
+#define const2 v25
+
+#define byteswap v26
+#define mask_32bit v27
+#define mask_64bit v28
+#define zeroes v29
+
+#ifdef BYTESWAP_DATA
+#define VPERM(A, B, C, D) vperm A, B, C, D
+#else
+#define VPERM(A, B, C, D)
+#endif
+
+/* unsigned int __crc32_vpmsum(unsigned int crc, void *p, unsigned long len) */
+FUNC_START(__crc32_vpmsum)
+ std r31,-8(r1)
+ std r30,-16(r1)
+ std r29,-24(r1)
+ std r28,-32(r1)
+ std r27,-40(r1)
+ std r26,-48(r1)
+ std r25,-56(r1)
+
+ li off16,16
+ li off32,32
+ li off48,48
+ li off64,64
+ li off80,80
+ li off96,96
+ li off112,112
+ li r0,0
+
+ /* Enough room for saving 10 non volatile VMX registers */
+ subi r6,r1,56+10*16
+ subi r7,r1,56+2*16
+
+ stvx v20,0,r6
+ stvx v21,off16,r6
+ stvx v22,off32,r6
+ stvx v23,off48,r6
+ stvx v24,off64,r6
+ stvx v25,off80,r6
+ stvx v26,off96,r6
+ stvx v27,off112,r6
+ stvx v28,0,r7
+ stvx v29,off16,r7
+
+ mr r10,r3
+
+ vxor zeroes,zeroes,zeroes
+ vspltisw v0,-1
+
+ vsldoi mask_32bit,zeroes,v0,4
+ vsldoi mask_64bit,zeroes,v0,8
+
+ /* Get the initial value into v8 */
+ vxor v8,v8,v8
+ MTVRD(v8, r3)
+#ifdef REFLECT
+ vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */
+#else
+ vsldoi v8,v8,zeroes,4 /* shift into top 32 bits */
+#endif
+
+#ifdef BYTESWAP_DATA
+ addis r3,r2,.byteswap_constant@toc@ha
+ addi r3,r3,.byteswap_constant@toc@l
+
+ lvx byteswap,0,r3
+ addi r3,r3,16
+#endif
+
+ cmpdi r5,256
+ blt .Lshort
+
+ rldicr r6,r5,0,56
+
+ /* Checksum in blocks of MAX_SIZE */
+1: lis r7,MAX_SIZE@h
+ ori r7,r7,MAX_SIZE@l
+ mr r9,r7
+ cmpd r6,r7
+ bgt 2f
+ mr r7,r6
+2: subf r6,r7,r6
+
+ /* our main loop does 128 bytes at a time */
+ srdi r7,r7,7
+
+ /*
+ * Work out the offset into the constants table to start at. Each
+ * constant is 16 bytes, and it is used against 128 bytes of input
+ * data - 128 / 16 = 8
+ */
+ sldi r8,r7,4
+ srdi r9,r9,3
+ subf r8,r8,r9
+
+ /* We reduce our final 128 bytes in a separate step */
+ addi r7,r7,-1
+ mtctr r7
+
+ addis r3,r2,.constants@toc@ha
+ addi r3,r3,.constants@toc@l
+
+ /* Find the start of our constants */
+ add r3,r3,r8
+
+ /* zero v0-v7 which will contain our checksums */
+ vxor v0,v0,v0
+ vxor v1,v1,v1
+ vxor v2,v2,v2
+ vxor v3,v3,v3
+ vxor v4,v4,v4
+ vxor v5,v5,v5
+ vxor v6,v6,v6
+ vxor v7,v7,v7
+
+ lvx const1,0,r3
+
+ /*
+ * If we are looping back to consume more data we use the values
+ * already in v16-v23.
+ */
+ cmpdi r0,1
+ beq 2f
+
+ /* First warm up pass */
+ lvx v16,0,r4
+ lvx v17,off16,r4
+ VPERM(v16,v16,v16,byteswap)
+ VPERM(v17,v17,v17,byteswap)
+ lvx v18,off32,r4
+ lvx v19,off48,r4
+ VPERM(v18,v18,v18,byteswap)
+ VPERM(v19,v19,v19,byteswap)
+ lvx v20,off64,r4
+ lvx v21,off80,r4
+ VPERM(v20,v20,v20,byteswap)
+ VPERM(v21,v21,v21,byteswap)
+ lvx v22,off96,r4
+ lvx v23,off112,r4
+ VPERM(v22,v22,v22,byteswap)
+ VPERM(v23,v23,v23,byteswap)
+ addi r4,r4,8*16
+
+ /* xor in initial value */
+ vxor v16,v16,v8
+
+2: bdz .Lfirst_warm_up_done
+
+ addi r3,r3,16
+ lvx const2,0,r3
+
+ /* Second warm up pass */
+ VPMSUMD(v8,v16,const1)
+ lvx v16,0,r4
+ VPERM(v16,v16,v16,byteswap)
+ ori r2,r2,0
+
+ VPMSUMD(v9,v17,const1)
+ lvx v17,off16,r4
+ VPERM(v17,v17,v17,byteswap)
+ ori r2,r2,0
+
+ VPMSUMD(v10,v18,const1)
+ lvx v18,off32,r4
+ VPERM(v18,v18,v18,byteswap)
+ ori r2,r2,0
+
+ VPMSUMD(v11,v19,const1)
+ lvx v19,off48,r4
+ VPERM(v19,v19,v19,byteswap)
+ ori r2,r2,0
+
+ VPMSUMD(v12,v20,const1)
+ lvx v20,off64,r4
+ VPERM(v20,v20,v20,byteswap)
+ ori r2,r2,0
+
+ VPMSUMD(v13,v21,const1)
+ lvx v21,off80,r4
+ VPERM(v21,v21,v21,byteswap)
+ ori r2,r2,0
+
+ VPMSUMD(v14,v22,const1)
+ lvx v22,off96,r4
+ VPERM(v22,v22,v22,byteswap)
+ ori r2,r2,0
+
+ VPMSUMD(v15,v23,const1)
+ lvx v23,off112,r4
+ VPERM(v23,v23,v23,byteswap)
+
+ addi r4,r4,8*16
+
+ bdz .Lfirst_cool_down
+
+ /*
+ * main loop. We modulo schedule it such that it takes three iterations
+ * to complete - first iteration load, second iteration vpmsum, third
+ * iteration xor.
+ */
+ .balign 16
+4: lvx const1,0,r3
+ addi r3,r3,16
+ ori r2,r2,0
+
+ vxor v0,v0,v8
+ VPMSUMD(v8,v16,const2)
+ lvx v16,0,r4
+ VPERM(v16,v16,v16,byteswap)
+ ori r2,r2,0
+
+ vxor v1,v1,v9
+ VPMSUMD(v9,v17,const2)
+ lvx v17,off16,r4
+ VPERM(v17,v17,v17,byteswap)
+ ori r2,r2,0
+
+ vxor v2,v2,v10
+ VPMSUMD(v10,v18,const2)
+ lvx v18,off32,r4
+ VPERM(v18,v18,v18,byteswap)
+ ori r2,r2,0
+
+ vxor v3,v3,v11
+ VPMSUMD(v11,v19,const2)
+ lvx v19,off48,r4
+ VPERM(v19,v19,v19,byteswap)
+ lvx const2,0,r3
+ ori r2,r2,0
+
+ vxor v4,v4,v12
+ VPMSUMD(v12,v20,const1)
+ lvx v20,off64,r4
+ VPERM(v20,v20,v20,byteswap)
+ ori r2,r2,0
+
+ vxor v5,v5,v13
+ VPMSUMD(v13,v21,const1)
+ lvx v21,off80,r4
+ VPERM(v21,v21,v21,byteswap)
+ ori r2,r2,0
+
+ vxor v6,v6,v14
+ VPMSUMD(v14,v22,const1)
+ lvx v22,off96,r4
+ VPERM(v22,v22,v22,byteswap)
+ ori r2,r2,0
+
+ vxor v7,v7,v15
+ VPMSUMD(v15,v23,const1)
+ lvx v23,off112,r4
+ VPERM(v23,v23,v23,byteswap)
+
+ addi r4,r4,8*16
+
+ bdnz 4b
+
+.Lfirst_cool_down:
+ /* First cool down pass */
+ lvx const1,0,r3
+ addi r3,r3,16
+
+ vxor v0,v0,v8
+ VPMSUMD(v8,v16,const1)
+ ori r2,r2,0
+
+ vxor v1,v1,v9
+ VPMSUMD(v9,v17,const1)
+ ori r2,r2,0
+
+ vxor v2,v2,v10
+ VPMSUMD(v10,v18,const1)
+ ori r2,r2,0
+
+ vxor v3,v3,v11
+ VPMSUMD(v11,v19,const1)
+ ori r2,r2,0
+
+ vxor v4,v4,v12
+ VPMSUMD(v12,v20,const1)
+ ori r2,r2,0
+
+ vxor v5,v5,v13
+ VPMSUMD(v13,v21,const1)
+ ori r2,r2,0
+
+ vxor v6,v6,v14
+ VPMSUMD(v14,v22,const1)
+ ori r2,r2,0
+
+ vxor v7,v7,v15
+ VPMSUMD(v15,v23,const1)
+ ori r2,r2,0
+
+.Lsecond_cool_down:
+ /* Second cool down pass */
+ vxor v0,v0,v8
+ vxor v1,v1,v9
+ vxor v2,v2,v10
+ vxor v3,v3,v11
+ vxor v4,v4,v12
+ vxor v5,v5,v13
+ vxor v6,v6,v14
+ vxor v7,v7,v15
+
+#ifdef REFLECT
+ /*
+ * vpmsumd produces a 96 bit result in the least significant bits
+ * of the register. Since we are bit reflected we have to shift it
+ * left 32 bits so it occupies the least significant bits in the
+ * bit reflected domain.
+ */
+ vsldoi v0,v0,zeroes,4
+ vsldoi v1,v1,zeroes,4
+ vsldoi v2,v2,zeroes,4
+ vsldoi v3,v3,zeroes,4
+ vsldoi v4,v4,zeroes,4
+ vsldoi v5,v5,zeroes,4
+ vsldoi v6,v6,zeroes,4
+ vsldoi v7,v7,zeroes,4
+#endif
+
+ /* xor with last 1024 bits */
+ lvx v8,0,r4
+ lvx v9,off16,r4
+ VPERM(v8,v8,v8,byteswap)
+ VPERM(v9,v9,v9,byteswap)
+ lvx v10,off32,r4
+ lvx v11,off48,r4
+ VPERM(v10,v10,v10,byteswap)
+ VPERM(v11,v11,v11,byteswap)
+ lvx v12,off64,r4
+ lvx v13,off80,r4
+ VPERM(v12,v12,v12,byteswap)
+ VPERM(v13,v13,v13,byteswap)
+ lvx v14,off96,r4
+ lvx v15,off112,r4
+ VPERM(v14,v14,v14,byteswap)
+ VPERM(v15,v15,v15,byteswap)
+
+ addi r4,r4,8*16
+
+ vxor v16,v0,v8
+ vxor v17,v1,v9
+ vxor v18,v2,v10
+ vxor v19,v3,v11
+ vxor v20,v4,v12
+ vxor v21,v5,v13
+ vxor v22,v6,v14
+ vxor v23,v7,v15
+
+ li r0,1
+ cmpdi r6,0
+ addi r6,r6,128
+ bne 1b
+
+ /* Work out how many bytes we have left */
+ andi. r5,r5,127
+
+ /* Calculate where in the constant table we need to start */
+ subfic r6,r5,128
+ add r3,r3,r6
+
+ /* How many 16 byte chunks are in the tail */
+ srdi r7,r5,4
+ mtctr r7
+
+ /*
+ * Reduce the previously calculated 1024 bits to 64 bits, shifting
+ * 32 bits to include the trailing 32 bits of zeros
+ */
+ lvx v0,0,r3
+ lvx v1,off16,r3
+ lvx v2,off32,r3
+ lvx v3,off48,r3
+ lvx v4,off64,r3
+ lvx v5,off80,r3
+ lvx v6,off96,r3
+ lvx v7,off112,r3
+ addi r3,r3,8*16
+
+ VPMSUMW(v0,v16,v0)
+ VPMSUMW(v1,v17,v1)
+ VPMSUMW(v2,v18,v2)
+ VPMSUMW(v3,v19,v3)
+ VPMSUMW(v4,v20,v4)
+ VPMSUMW(v5,v21,v5)
+ VPMSUMW(v6,v22,v6)
+ VPMSUMW(v7,v23,v7)
+
+ /* Now reduce the tail (0 - 112 bytes) */
+ cmpdi r7,0
+ beq 1f
+
+ lvx v16,0,r4
+ lvx v17,0,r3
+ VPERM(v16,v16,v16,byteswap)
+ VPMSUMW(v16,v16,v17)
+ vxor v0,v0,v16
+ bdz 1f
+
+ lvx v16,off16,r4
+ lvx v17,off16,r3
+ VPERM(v16,v16,v16,byteswap)
+ VPMSUMW(v16,v16,v17)
+ vxor v0,v0,v16
+ bdz 1f
+
+ lvx v16,off32,r4
+ lvx v17,off32,r3
+ VPERM(v16,v16,v16,byteswap)
+ VPMSUMW(v16,v16,v17)
+ vxor v0,v0,v16
+ bdz 1f
+
+ lvx v16,off48,r4
+ lvx v17,off48,r3
+ VPERM(v16,v16,v16,byteswap)
+ VPMSUMW(v16,v16,v17)
+ vxor v0,v0,v16
+ bdz 1f
+
+ lvx v16,off64,r4
+ lvx v17,off64,r3
+ VPERM(v16,v16,v16,byteswap)
+ VPMSUMW(v16,v16,v17)
+ vxor v0,v0,v16
+ bdz 1f
+
+ lvx v16,off80,r4
+ lvx v17,off80,r3
+ VPERM(v16,v16,v16,byteswap)
+ VPMSUMW(v16,v16,v17)
+ vxor v0,v0,v16
+ bdz 1f
+
+ lvx v16,off96,r4
+ lvx v17,off96,r3
+ VPERM(v16,v16,v16,byteswap)
+ VPMSUMW(v16,v16,v17)
+ vxor v0,v0,v16
+
+ /* Now xor all the parallel chunks together */
+1: vxor v0,v0,v1
+ vxor v2,v2,v3
+ vxor v4,v4,v5
+ vxor v6,v6,v7
+
+ vxor v0,v0,v2
+ vxor v4,v4,v6
+
+ vxor v0,v0,v4
+
+.Lbarrett_reduction:
+ /* Barrett constants */
+ addis r3,r2,.barrett_constants@toc@ha
+ addi r3,r3,.barrett_constants@toc@l
+
+ lvx const1,0,r3
+ lvx const2,off16,r3
+
+ vsldoi v1,v0,v0,8
+ vxor v0,v0,v1 /* xor two 64 bit results together */
+
+#ifdef REFLECT
+ /* shift left one bit */
+ vspltisb v1,1
+ vsl v0,v0,v1
+#endif
+
+ vand v0,v0,mask_64bit
+
+#ifndef REFLECT
+ /*
+ * Now for the Barrett reduction algorithm. The idea is to calculate q,
+ * the multiple of our polynomial that we need to subtract. By
+ * doing the computation 2x bits higher (ie 64 bits) and shifting the
+ * result back down 2x bits, we round down to the nearest multiple.
+ */
+ VPMSUMD(v1,v0,const1) /* ma */
+ vsldoi v1,zeroes,v1,8 /* q = floor(ma/(2^64)) */
+ VPMSUMD(v1,v1,const2) /* qn */
+ vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
+
+ /*
+ * Get the result into r3. We need to shift it left 8 bytes:
+ * V0 [ 0 1 2 X ]
+ * V0 [ 0 X 2 3 ]
+ */
+ vsldoi v0,v0,zeroes,8 /* shift result into top 64 bits */
+#else
+ /*
+ * The reflected version of Barrett reduction. Instead of bit
+ * reflecting our data (which is expensive to do), we bit reflect our
+ * constants and our algorithm, which means the intermediate data in
+ * our vector registers goes from 0-63 instead of 63-0. We can reflect
+ * the algorithm because we don't carry in mod 2 arithmetic.
+ */
+ vand v1,v0,mask_32bit /* bottom 32 bits of a */
+ VPMSUMD(v1,v1,const1) /* ma */
+ vand v1,v1,mask_32bit /* bottom 32bits of ma */
+ VPMSUMD(v1,v1,const2) /* qn */
+ vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
+
+ /*
+ * Since we are bit reflected, the result (ie the low 32 bits) is in
+ * the high 32 bits. We just need to shift it left 4 bytes
+ * V0 [ 0 1 X 3 ]
+ * V0 [ 0 X 2 3 ]
+ */
+ vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */
+#endif
+
+ /* Get it into r3 */
+ MFVRD(r3, v0)
+
+.Lout:
+ subi r6,r1,56+10*16
+ subi r7,r1,56+2*16
+
+ lvx v20,0,r6
+ lvx v21,off16,r6
+ lvx v22,off32,r6
+ lvx v23,off48,r6
+ lvx v24,off64,r6
+ lvx v25,off80,r6
+ lvx v26,off96,r6
+ lvx v27,off112,r6
+ lvx v28,0,r7
+ lvx v29,off16,r7
+
+ ld r31,-8(r1)
+ ld r30,-16(r1)
+ ld r29,-24(r1)
+ ld r28,-32(r1)
+ ld r27,-40(r1)
+ ld r26,-48(r1)
+ ld r25,-56(r1)
+
+ blr
+
+.Lfirst_warm_up_done:
+ lvx const1,0,r3
+ addi r3,r3,16
+
+ VPMSUMD(v8,v16,const1)
+ VPMSUMD(v9,v17,const1)
+ VPMSUMD(v10,v18,const1)
+ VPMSUMD(v11,v19,const1)
+ VPMSUMD(v12,v20,const1)
+ VPMSUMD(v13,v21,const1)
+ VPMSUMD(v14,v22,const1)
+ VPMSUMD(v15,v23,const1)
+
+ b .Lsecond_cool_down
+
+.Lshort:
+ cmpdi r5,0
+ beq .Lzero
+
+ addis r3,r2,.short_constants@toc@ha
+ addi r3,r3,.short_constants@toc@l
+
+ /* Calculate where in the constant table we need to start */
+ subfic r6,r5,256
+ add r3,r3,r6
+
+ /* How many 16 byte chunks? */
+ srdi r7,r5,4
+ mtctr r7
+
+ vxor v19,v19,v19
+ vxor v20,v20,v20
+
+ lvx v0,0,r4
+ lvx v16,0,r3
+ VPERM(v0,v0,v16,byteswap)
+ vxor v0,v0,v8 /* xor in initial value */
+ VPMSUMW(v0,v0,v16)
+ bdz .Lv0
+
+ lvx v1,off16,r4
+ lvx v17,off16,r3
+ VPERM(v1,v1,v17,byteswap)
+ VPMSUMW(v1,v1,v17)
+ bdz .Lv1
+
+ lvx v2,off32,r4
+ lvx v16,off32,r3
+ VPERM(v2,v2,v16,byteswap)
+ VPMSUMW(v2,v2,v16)
+ bdz .Lv2
+
+ lvx v3,off48,r4
+ lvx v17,off48,r3
+ VPERM(v3,v3,v17,byteswap)
+ VPMSUMW(v3,v3,v17)
+ bdz .Lv3
+
+ lvx v4,off64,r4
+ lvx v16,off64,r3
+ VPERM(v4,v4,v16,byteswap)
+ VPMSUMW(v4,v4,v16)
+ bdz .Lv4
+
+ lvx v5,off80,r4
+ lvx v17,off80,r3
+ VPERM(v5,v5,v17,byteswap)
+ VPMSUMW(v5,v5,v17)
+ bdz .Lv5
+
+ lvx v6,off96,r4
+ lvx v16,off96,r3
+ VPERM(v6,v6,v16,byteswap)
+ VPMSUMW(v6,v6,v16)
+ bdz .Lv6
+
+ lvx v7,off112,r4
+ lvx v17,off112,r3
+ VPERM(v7,v7,v17,byteswap)
+ VPMSUMW(v7,v7,v17)
+ bdz .Lv7
+
+ addi r3,r3,128
+ addi r4,r4,128
+
+ lvx v8,0,r4
+ lvx v16,0,r3
+ VPERM(v8,v8,v16,byteswap)
+ VPMSUMW(v8,v8,v16)
+ bdz .Lv8
+
+ lvx v9,off16,r4
+ lvx v17,off16,r3
+ VPERM(v9,v9,v17,byteswap)
+ VPMSUMW(v9,v9,v17)
+ bdz .Lv9
+
+ lvx v10,off32,r4
+ lvx v16,off32,r3
+ VPERM(v10,v10,v16,byteswap)
+ VPMSUMW(v10,v10,v16)
+ bdz .Lv10
+
+ lvx v11,off48,r4
+ lvx v17,off48,r3
+ VPERM(v11,v11,v17,byteswap)
+ VPMSUMW(v11,v11,v17)
+ bdz .Lv11
+
+ lvx v12,off64,r4
+ lvx v16,off64,r3
+ VPERM(v12,v12,v16,byteswap)
+ VPMSUMW(v12,v12,v16)
+ bdz .Lv12
+
+ lvx v13,off80,r4
+ lvx v17,off80,r3
+ VPERM(v13,v13,v17,byteswap)
+ VPMSUMW(v13,v13,v17)
+ bdz .Lv13
+
+ lvx v14,off96,r4
+ lvx v16,off96,r3
+ VPERM(v14,v14,v16,byteswap)
+ VPMSUMW(v14,v14,v16)
+ bdz .Lv14
+
+ lvx v15,off112,r4
+ lvx v17,off112,r3
+ VPERM(v15,v15,v17,byteswap)
+ VPMSUMW(v15,v15,v17)
+
+.Lv15: vxor v19,v19,v15
+.Lv14: vxor v20,v20,v14
+.Lv13: vxor v19,v19,v13
+.Lv12: vxor v20,v20,v12
+.Lv11: vxor v19,v19,v11
+.Lv10: vxor v20,v20,v10
+.Lv9: vxor v19,v19,v9
+.Lv8: vxor v20,v20,v8
+.Lv7: vxor v19,v19,v7
+.Lv6: vxor v20,v20,v6
+.Lv5: vxor v19,v19,v5
+.Lv4: vxor v20,v20,v4
+.Lv3: vxor v19,v19,v3
+.Lv2: vxor v20,v20,v2
+.Lv1: vxor v19,v19,v1
+.Lv0: vxor v20,v20,v0
+
+ vxor v0,v19,v20
+
+ b .Lbarrett_reduction
+
+.Lzero:
+ mr r3,r10
+ b .Lout
+
+FUNC_END(__crc32_vpmsum)
+#endif
diff --git a/src/support/power8/crc32_constants.h b/src/support/power8/crc32_constants.h
new file mode 100644
index 00000000000..02c471d1c56
--- /dev/null
+++ b/src/support/power8/crc32_constants.h
@@ -0,0 +1,901 @@
+#define CRC 0x1edc6f41
+#define CRC_XOR
+#define REFLECT
+
+#ifndef __ASSEMBLY__
+#ifdef CRC_TABLE
+static const unsigned int crc_table[] = {
+ 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
+ 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
+ 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
+ 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
+ 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
+ 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
+ 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
+ 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
+ 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
+ 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
+ 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
+ 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
+ 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
+ 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
+ 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
+ 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
+ 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
+ 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
+ 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
+ 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
+ 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
+ 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
+ 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
+ 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
+ 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
+ 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
+ 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
+ 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
+ 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
+ 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
+ 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
+ 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
+ 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
+ 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
+ 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
+ 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
+ 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
+ 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
+ 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
+ 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
+ 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
+ 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
+ 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
+ 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
+ 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
+ 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
+ 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
+ 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
+ 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
+ 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
+ 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
+ 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
+ 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
+ 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
+ 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
+ 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
+ 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
+ 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
+ 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
+ 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
+ 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
+ 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
+ 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
+ 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,};
+
+#endif
+#else
+#define MAX_SIZE 32768
+.constants:
+
+ /* Reduce 262144 kbits to 1024 bits */
+ /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
+ .octa 0x00000000b6ca9e20000000009c37c408
+
+ /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
+ .octa 0x00000000350249a800000001b51df26c
+
+ /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
+ .octa 0x00000001862dac54000000000724b9d0
+
+ /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
+ .octa 0x00000001d87fb48c00000001c00532fe
+
+ /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
+ .octa 0x00000001f39b699e00000000f05a9362
+
+ /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
+ .octa 0x0000000101da11b400000001e1007970
+
+ /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
+ .octa 0x00000001cab571e000000000a57366ee
+
+ /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
+ .octa 0x00000000c7020cfe0000000192011284
+
+ /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
+ .octa 0x00000000cdaed1ae0000000162716d9a
+
+ /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
+ .octa 0x00000001e804effc00000000cd97ecde
+
+ /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
+ .octa 0x0000000077c3ea3a0000000058812bc0
+
+ /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
+ .octa 0x0000000068df31b40000000088b8c12e
+
+ /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
+ .octa 0x00000000b059b6c200000001230b234c
+
+ /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
+ .octa 0x0000000145fb8ed800000001120b416e
+
+ /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
+ .octa 0x00000000cbc0916800000001974aecb0
+
+ /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
+ .octa 0x000000005ceeedc2000000008ee3f226
+
+ /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
+ .octa 0x0000000047d74e8600000001089aba9a
+
+ /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
+ .octa 0x00000001407e9e220000000065113872
+
+ /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
+ .octa 0x00000001da967bda000000005c07ec10
+
+ /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
+ .octa 0x000000006c8983680000000187590924
+
+ /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
+ .octa 0x00000000f2d14c9800000000e35da7c6
+
+ /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
+ .octa 0x00000001993c6ad4000000000415855a
+
+ /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
+ .octa 0x000000014683d1ac0000000073617758
+
+ /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
+ .octa 0x00000001a7c93e6c0000000176021d28
+
+ /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
+ .octa 0x000000010211e90a00000001c358fd0a
+
+ /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
+ .octa 0x000000001119403e00000001ff7a2c18
+
+ /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
+ .octa 0x000000001c3261aa00000000f2d9f7e4
+
+ /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
+ .octa 0x000000014e37a634000000016cf1f9c8
+
+ /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
+ .octa 0x0000000073786c0c000000010af9279a
+
+ /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
+ .octa 0x000000011dc037f80000000004f101e8
+
+ /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
+ .octa 0x0000000031433dfc0000000070bcf184
+
+ /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
+ .octa 0x000000009cde8348000000000a8de642
+
+ /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
+ .octa 0x0000000038d3c2a60000000062ea130c
+
+ /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
+ .octa 0x000000011b25f26000000001eb31cbb2
+
+ /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
+ .octa 0x000000001629e6f00000000170783448
+
+ /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
+ .octa 0x0000000160838b4c00000001a684b4c6
+
+ /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
+ .octa 0x000000007a44011c00000000253ca5b4
+
+ /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
+ .octa 0x00000000226f417a0000000057b4b1e2
+
+ /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
+ .octa 0x0000000045eb2eb400000000b6bd084c
+
+ /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
+ .octa 0x000000014459d70c0000000123c2d592
+
+ /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
+ .octa 0x00000001d406ed8200000000159dafce
+
+ /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
+ .octa 0x0000000160c8e1a80000000127e1a64e
+
+ /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
+ .octa 0x0000000027ba80980000000056860754
+
+ /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
+ .octa 0x000000006d92d01800000001e661aae8
+
+ /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
+ .octa 0x000000012ed7e3f200000000f82c6166
+
+ /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
+ .octa 0x000000002dc8778800000000c4f9c7ae
+
+ /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
+ .octa 0x0000000018240bb80000000074203d20
+
+ /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
+ .octa 0x000000001ad381580000000198173052
+
+ /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
+ .octa 0x00000001396b78f200000001ce8aba54
+
+ /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
+ .octa 0x000000011a68133400000001850d5d94
+
+ /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
+ .octa 0x000000012104732e00000001d609239c
+
+ /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
+ .octa 0x00000000a140d90c000000001595f048
+
+ /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
+ .octa 0x00000001b7215eda0000000042ccee08
+
+ /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
+ .octa 0x00000001aaf1df3c000000010a389d74
+
+ /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
+ .octa 0x0000000029d15b8a000000012a840da6
+
+ /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
+ .octa 0x00000000f1a96922000000001d181c0c
+
+ /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
+ .octa 0x00000001ac80d03c0000000068b7d1f6
+
+ /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
+ .octa 0x000000000f11d56a000000005b0f14fc
+
+ /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
+ .octa 0x00000001f1c022a20000000179e9e730
+
+ /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
+ .octa 0x0000000173d00ae200000001ce1368d6
+
+ /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
+ .octa 0x00000001d4ffe4ac0000000112c3a84c
+
+ /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
+ .octa 0x000000016edc5ae400000000de940fee
+
+ /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
+ .octa 0x00000001f1a0214000000000fe896b7e
+
+ /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
+ .octa 0x00000000ca0b28a000000001f797431c
+
+ /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
+ .octa 0x00000001928e30a20000000053e989ba
+
+ /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
+ .octa 0x0000000097b1b002000000003920cd16
+
+ /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
+ .octa 0x00000000b15bf90600000001e6f579b8
+
+ /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
+ .octa 0x00000000411c5d52000000007493cb0a
+
+ /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
+ .octa 0x00000001c36f330000000001bdd376d8
+
+ /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
+ .octa 0x00000001119227e0000000016badfee6
+
+ /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
+ .octa 0x00000000114d47020000000071de5c58
+
+ /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
+ .octa 0x00000000458b5b9800000000453f317c
+
+ /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
+ .octa 0x000000012e31fb8e0000000121675cce
+
+ /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
+ .octa 0x000000005cf619d800000001f409ee92
+
+ /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
+ .octa 0x0000000063f4d8b200000000f36b9c88
+
+ /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
+ .octa 0x000000004138dc8a0000000036b398f4
+
+ /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
+ .octa 0x00000001d29ee8e000000001748f9adc
+
+ /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
+ .octa 0x000000006a08ace800000001be94ec00
+
+ /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
+ .octa 0x0000000127d4201000000000b74370d6
+
+ /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
+ .octa 0x0000000019d76b6200000001174d0b98
+
+ /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
+ .octa 0x00000001b1471f6e00000000befc06a4
+
+ /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
+ .octa 0x00000001f64c19cc00000001ae125288
+
+ /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
+ .octa 0x00000000003c0ea00000000095c19b34
+
+ /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
+ .octa 0x000000014d73abf600000001a78496f2
+
+ /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
+ .octa 0x00000001620eb84400000001ac5390a0
+
+ /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
+ .octa 0x0000000147655048000000002a80ed6e
+
+ /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
+ .octa 0x0000000067b5077e00000001fa9b0128
+
+ /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
+ .octa 0x0000000010ffe20600000001ea94929e
+
+ /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
+ .octa 0x000000000fee8f1e0000000125f4305c
+
+ /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
+ .octa 0x00000001da26fbae00000001471e2002
+
+ /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
+ .octa 0x00000001b3a8bd880000000132d2253a
+
+ /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
+ .octa 0x00000000e8f3898e00000000f26b3592
+
+ /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
+ .octa 0x00000000b0d0d28c00000000bc8b67b0
+
+ /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
+ .octa 0x0000000030f2a798000000013a826ef2
+
+ /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
+ .octa 0x000000000fba10020000000081482c84
+
+ /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
+ .octa 0x00000000bdb9bd7200000000e77307c2
+
+ /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
+ .octa 0x0000000075d3bf5a00000000d4a07ec8
+
+ /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
+ .octa 0x00000000ef1f98a00000000017102100
+
+ /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
+ .octa 0x00000000689c760200000000db406486
+
+ /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
+ .octa 0x000000016d5fa5fe0000000192db7f88
+
+ /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
+ .octa 0x00000001d0d2b9ca000000018bf67b1e
+
+ /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
+ .octa 0x0000000041e7b470000000007c09163e
+
+ /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
+ .octa 0x00000001cbb6495e000000000adac060
+
+ /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
+ .octa 0x000000010052a0b000000000bd8316ae
+
+ /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
+ .octa 0x00000001d8effb5c000000019f09ab54
+
+ /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
+ .octa 0x00000001d969853c0000000125155542
+
+ /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
+ .octa 0x00000000523ccce2000000018fdb5882
+
+ /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
+ .octa 0x000000001e2436bc00000000e794b3f4
+
+ /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
+ .octa 0x00000000ddd1c3a2000000016f9bb022
+
+ /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
+ .octa 0x0000000019fcfe3800000000290c9978
+
+ /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
+ .octa 0x00000001ce95db640000000083c0f350
+
+ /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
+ .octa 0x00000000af5828060000000173ea6628
+
+ /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
+ .octa 0x00000001006388f600000001c8b4e00a
+
+ /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
+ .octa 0x0000000179eca00a00000000de95d6aa
+
+ /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
+ .octa 0x0000000122410a6a000000010b7f7248
+
+ /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
+ .octa 0x000000004288e87c00000001326e3a06
+
+ /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
+ .octa 0x000000016c5490da00000000bb62c2e6
+
+ /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
+ .octa 0x00000000d1c71f6e0000000156a4b2c2
+
+ /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
+ .octa 0x00000001b4ce08a6000000011dfe763a
+
+ /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
+ .octa 0x00000001466ba60c000000007bcca8e2
+
+ /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
+ .octa 0x00000001f6c488a40000000186118faa
+
+ /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
+ .octa 0x000000013bfb06820000000111a65a88
+
+ /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
+ .octa 0x00000000690e9e54000000003565e1c4
+
+ /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
+ .octa 0x00000000281346b6000000012ed02a82
+
+ /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
+ .octa 0x000000015646402400000000c486ecfc
+
+ /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
+ .octa 0x000000016063a8dc0000000001b951b2
+
+ /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
+ .octa 0x0000000116a663620000000048143916
+
+ /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
+ .octa 0x000000017e8aa4d200000001dc2ae124
+
+ /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
+ .octa 0x00000001728eb10c00000001416c58d6
+
+ /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
+ .octa 0x00000001b08fd7fa00000000a479744a
+
+ /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
+ .octa 0x00000001092a16e80000000096ca3a26
+
+ /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
+ .octa 0x00000000a505637c00000000ff223d4e
+
+ /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
+ .octa 0x00000000d94869b2000000010e84da42
+
+ /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
+ .octa 0x00000001c8b203ae00000001b61ba3d0
+
+ /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
+ .octa 0x000000005704aea000000000680f2de8
+
+ /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
+ .octa 0x000000012e295fa2000000008772a9a8
+
+ /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
+ .octa 0x000000011d0908bc0000000155f295bc
+
+ /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
+ .octa 0x0000000193ed97ea00000000595f9282
+
+ /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
+ .octa 0x000000013a0f1c520000000164b1c25a
+
+ /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
+ .octa 0x000000010c2c40c000000000fbd67c50
+
+ /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
+ .octa 0x00000000ff6fac3e0000000096076268
+
+ /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
+ .octa 0x000000017b3609c000000001d288e4cc
+
+ /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
+ .octa 0x0000000088c8c92200000001eaac1bdc
+
+ /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
+ .octa 0x00000001751baae600000001f1ea39e2
+
+ /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
+ .octa 0x000000010795297200000001eb6506fc
+
+ /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
+ .octa 0x0000000162b00abe000000010f806ffe
+
+ /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
+ .octa 0x000000000d7b404c000000010408481e
+
+ /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
+ .octa 0x00000000763b13d40000000188260534
+
+ /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
+ .octa 0x00000000f6dc22d80000000058fc73e0
+
+ /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
+ .octa 0x000000007daae06000000000391c59b8
+
+ /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
+ .octa 0x000000013359ab7c000000018b638400
+
+ /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
+ .octa 0x000000008add438a000000011738f5c4
+
+ /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
+ .octa 0x00000001edbefdea000000008cf7c6da
+
+ /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
+ .octa 0x000000004104e0f800000001ef97fb16
+
+ /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
+ .octa 0x00000000b48a82220000000102130e20
+
+ /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
+ .octa 0x00000001bcb4684400000000db968898
+
+ /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
+ .octa 0x000000013293ce0a00000000b5047b5e
+
+ /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
+ .octa 0x00000001710d0844000000010b90fdb2
+
+ /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
+ .octa 0x0000000117907f6e000000004834a32e
+
+ /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
+ .octa 0x0000000087ddf93e0000000059c8f2b0
+
+ /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
+ .octa 0x000000005970e9b00000000122cec508
+
+ /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
+ .octa 0x0000000185b2b7d0000000000a330cda
+
+ /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
+ .octa 0x00000001dcee0efc000000014a47148c
+
+ /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
+ .octa 0x0000000030da27220000000042c61cb8
+
+ /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
+ .octa 0x000000012f925a180000000012fe6960
+
+ /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
+ .octa 0x00000000dd2e357c00000000dbda2c20
+
+ /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
+ .octa 0x00000000071c80de000000011122410c
+
+ /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
+ .octa 0x000000011513140a00000000977b2070
+
+ /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
+ .octa 0x00000001df876e8e000000014050438e
+
+ /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
+ .octa 0x000000015f81d6ce0000000147c840e8
+
+ /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
+ .octa 0x000000019dd94dbe00000001cc7c88ce
+
+ /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
+ .octa 0x00000001373d206e00000001476b35a4
+
+ /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
+ .octa 0x00000000668ccade000000013d52d508
+
+ /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
+ .octa 0x00000001b192d268000000008e4be32e
+
+ /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
+ .octa 0x00000000e30f3a7800000000024120fe
+
+ /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
+ .octa 0x000000010ef1f7bc00000000ddecddb4
+
+ /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
+ .octa 0x00000001f5ac738000000000d4d403bc
+
+ /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
+ .octa 0x000000011822ea7000000001734b89aa
+
+ /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
+ .octa 0x00000000c3a33848000000010e7a58d6
+
+ /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
+ .octa 0x00000001bd151c2400000001f9f04e9c
+
+ /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
+ .octa 0x0000000056002d7600000000b692225e
+
+ /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
+ .octa 0x000000014657c4f4000000019b8d3f3e
+
+ /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
+ .octa 0x0000000113742d7c00000001a874f11e
+
+ /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
+ .octa 0x000000019c5920ba000000010d5a4254
+
+ /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
+ .octa 0x000000005216d2d600000000bbb2f5d6
+
+ /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
+ .octa 0x0000000136f5ad8a0000000179cc0e36
+
+ /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
+ .octa 0x000000018b07beb600000001dca1da4a
+
+ /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
+ .octa 0x00000000db1e93b000000000feb1a192
+
+ /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
+ .octa 0x000000000b96fa3a00000000d1eeedd6
+
+ /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
+ .octa 0x00000001d9968af0000000008fad9bb4
+
+ /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
+ .octa 0x000000000e4a77a200000001884938e4
+
+ /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
+ .octa 0x00000000508c2ac800000001bc2e9bc0
+
+ /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
+ .octa 0x0000000021572a8000000001f9658a68
+
+ /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
+ .octa 0x00000001b859daf2000000001b9224fc
+
+ /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
+ .octa 0x000000016f7884740000000055b2fb84
+
+ /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
+ .octa 0x00000001b438810e000000018b090348
+
+ /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
+ .octa 0x0000000095ddc6f2000000011ccbd5ea
+
+ /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
+ .octa 0x00000001d977c20c0000000007ae47f8
+
+ /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
+ .octa 0x00000000ebedb99a0000000172acbec0
+
+ /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
+ .octa 0x00000001df9e9e9200000001c6e3ff20
+
+ /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
+ .octa 0x00000001a4a3f95200000000e1b38744
+
+ /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
+ .octa 0x00000000e2f5122000000000791585b2
+
+ /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
+ .octa 0x000000004aa01f3e00000000ac53b894
+
+ /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
+ .octa 0x00000000b3e90a5800000001ed5f2cf4
+
+ /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
+ .octa 0x000000000c9ca2aa00000001df48b2e0
+
+ /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
+ .octa 0x000000015168231600000000049c1c62
+
+ /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
+ .octa 0x0000000036fce78c000000017c460c12
+
+ /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
+ .octa 0x000000009037dc10000000015be4da7e
+
+ /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
+ .octa 0x00000000d3298582000000010f38f668
+
+ /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
+ .octa 0x00000001b42e8ad60000000039f40a00
+
+ /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
+ .octa 0x00000000142a983800000000bd4c10c4
+
+ /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
+ .octa 0x0000000109c7f1900000000042db1d98
+
+ /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
+ .octa 0x0000000056ff931000000001c905bae6
+
+ /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
+ .octa 0x00000001594513aa00000000069d40ea
+
+ /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
+ .octa 0x00000001e3b5b1e8000000008e4fbad0
+
+ /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
+ .octa 0x000000011dd5fc080000000047bedd46
+
+ /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
+ .octa 0x00000001675f0cc20000000026396bf8
+
+ /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
+ .octa 0x00000000d1c8dd4400000000379beb92
+
+ /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
+ .octa 0x0000000115ebd3d8000000000abae54a
+
+ /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
+ .octa 0x00000001ecbd0dac0000000007e6a128
+
+ /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
+ .octa 0x00000000cdf67af2000000000ade29d2
+
+ /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
+ .octa 0x000000004c01ff4c00000000f974c45c
+
+ /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
+ .octa 0x00000000f2d8657e00000000e77ac60a
+
+ /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
+ .octa 0x000000006bae74c40000000145895816
+
+ /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
+ .octa 0x0000000152af8aa00000000038e362be
+
+ /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
+ .octa 0x0000000004663802000000007f991a64
+
+ /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
+ .octa 0x00000001ab2f5afc00000000fa366d3a
+
+ /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
+ .octa 0x0000000074a4ebd400000001a2bb34f0
+
+ /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
+ .octa 0x00000001d7ab3a4c0000000028a9981e
+
+ /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
+ .octa 0x00000001a8da60c600000001dbc672be
+
+ /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
+ .octa 0x000000013cf6382000000000b04d77f6
+
+ /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
+ .octa 0x00000000bec12e1e0000000124400d96
+
+ /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
+ .octa 0x00000001c6368010000000014ca4b414
+
+ /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
+ .octa 0x00000001e6e78758000000012fe2c938
+
+ /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
+ .octa 0x000000008d7f2b3c00000001faed01e6
+
+ /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
+ .octa 0x000000016b4a156e000000007e80ecfe
+
+ /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
+ .octa 0x00000001c63cfeb60000000098daee94
+
+ /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
+ .octa 0x000000015f902670000000010a04edea
+
+ /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
+ .octa 0x00000001cd5de11e00000001c00b4524
+
+ /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
+ .octa 0x000000001acaec540000000170296550
+
+ /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
+ .octa 0x000000002bd0ca780000000181afaa48
+
+ /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
+ .octa 0x0000000032d63d5c0000000185a31ffa
+
+ /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
+ .octa 0x000000001c6d4e4c000000002469f608
+
+ /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
+ .octa 0x0000000106a60b92000000006980102a
+
+ /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
+ .octa 0x00000000d3855e120000000111ea9ca8
+
+ /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
+ .octa 0x00000000e312563600000001bd1d29ce
+
+ /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
+ .octa 0x000000009e8f7ea400000001b34b9580
+
+ /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
+ .octa 0x00000001c82e562c000000003076054e
+
+ /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
+ .octa 0x00000000ca9f09ce000000012a608ea4
+
+ /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
+ .octa 0x00000000c63764e600000000784d05fe
+
+ /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
+ .octa 0x0000000168d2e49e000000016ef0d82a
+
+ /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
+ .octa 0x00000000e986c1480000000075bda454
+
+ /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
+ .octa 0x00000000cfb65894000000003dc0a1c4
+
+ /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
+ .octa 0x0000000111cadee400000000e9a5d8be
+
+ /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
+ .octa 0x0000000171fb63ce00000001609bc4b4
+
+.short_constants:
+
+ /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
+ /* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */
+ .octa 0x7fec2963e5bf80485cf015c388e56f72
+
+ /* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod p(x)` */
+ .octa 0x38e888d4844752a9963a18920246e2e6
+
+ /* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod p(x)` */
+ .octa 0x42316c00730206ad419a441956993a31
+
+ /* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod p(x)` */
+ .octa 0x543d5c543e65ddf9924752ba2b830011
+
+ /* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod p(x)` */
+ .octa 0x78e87aaf56767c9255bd7f9518e4a304
+
+ /* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod p(x)` */
+ .octa 0x8f68fcec1903da7f6d76739fe0553f1e
+
+ /* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod p(x)` */
+ .octa 0x3f4840246791d588c133722b1fe0b5c3
+
+ /* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod p(x)` */
+ .octa 0x34c96751b04de25a64b67ee0e55ef1f3
+
+ /* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)` */
+ .octa 0x156c8e180b4a395b069db049b8fdb1e7
+
+ /* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */
+ .octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e
+
+ /* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */
+ .octa 0x041d37768cd75659817cdc5119b29a35
+
+ /* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */
+ .octa 0x3a0777818cfaa9651ce9d94b36c41f1c
+
+ /* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */
+ .octa 0x0e148e8252377a554f256efcb82be955
+
+ /* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */
+ .octa 0x9c25531d19e65ddeec1631edb2dea967
+
+ /* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */
+ .octa 0x790606ff9957c0a65d27e147510ac59a
+
+ /* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */
+ .octa 0x82f63b786ea2d55ca66805eb18b8ea18
+
+
+.barrett_constants:
+ /* 33 bit reflected Barrett constant m - (4^32)/n */
+ .octa 0x000000000000000000000000dea713f1 /* x^64 div p(x)` */
+ /* 33 bit reflected Barrett constant n */
+ .octa 0x00000000000000000000000105ec76f1
+#endif
diff --git a/src/support/power8/crc32_wrapper.c b/src/support/power8/crc32_wrapper.c
new file mode 100644
index 00000000000..34ac4150338
--- /dev/null
+++ b/src/support/power8/crc32_wrapper.c
@@ -0,0 +1,66 @@
+#if defined(__powerpc64__)
+#define CRC_TABLE
+#include "crc32_constants.h"
+
+#define VMX_ALIGN 16
+#define VMX_ALIGN_MASK (VMX_ALIGN-1)
+
+#ifdef REFLECT
+static unsigned int crc32_align(unsigned int crc, unsigned char *p,
+ unsigned long len)
+{
+ while (len--)
+ crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
+ return crc;
+}
+#else
+static unsigned int crc32_align(unsigned int crc, unsigned char *p,
+ unsigned long len)
+{
+ while (len--)
+ crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8);
+ return crc;
+}
+#endif
+
+unsigned int __crc32_vpmsum(unsigned int crc, unsigned char *p,
+ unsigned long len);
+
+unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p,
+ unsigned long len)
+{
+ unsigned int prealign;
+ unsigned int tail;
+
+#ifdef CRC_XOR
+ crc ^= 0xffffffff;
+#endif
+
+ if (len < VMX_ALIGN + VMX_ALIGN_MASK) {
+ crc = crc32_align(crc, p, len);
+ goto out;
+ }
+
+ if ((unsigned long)p & VMX_ALIGN_MASK) {
+ prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
+ crc = crc32_align(crc, p, prealign);
+ len -= prealign;
+ p += prealign;
+ }
+
+ crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
+
+ tail = len & VMX_ALIGN_MASK;
+ if (tail) {
+ p += len & ~VMX_ALIGN_MASK;
+ crc = crc32_align(crc, p, tail);
+ }
+
+out:
+#ifdef CRC_XOR
+ crc ^= 0xffffffff;
+#endif
+
+ return crc;
+}
+#endif
diff --git a/src/support/power8/ppc-opcode.h b/src/support/power8/ppc-opcode.h
new file mode 100644
index 00000000000..b63feea60a0
--- /dev/null
+++ b/src/support/power8/ppc-opcode.h
@@ -0,0 +1,23 @@
+#ifndef __OPCODES_H
+#define __OPCODES_H
+
+#define __PPC_RA(a) (((a) & 0x1f) << 16)
+#define __PPC_RB(b) (((b) & 0x1f) << 11)
+#define __PPC_XA(a) ((((a) & 0x1f) << 16) | (((a) & 0x20) >> 3))
+#define __PPC_XB(b) ((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4))
+#define __PPC_XS(s) ((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5))
+#define __PPC_XT(s) __PPC_XS(s)
+#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
+#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
+
+#define PPC_INST_VPMSUMW 0x10000488
+#define PPC_INST_VPMSUMD 0x100004c8
+#define PPC_INST_MFVSRD 0x7c000066
+#define PPC_INST_MTVSRD 0x7c000166
+
+#define VPMSUMW(t, a, b) .long PPC_INST_VPMSUMW | VSX_XX3((t), a, b)
+#define VPMSUMD(t, a, b) .long PPC_INST_VPMSUMD | VSX_XX3((t), a, b)
+#define MFVRD(a, t) .long PPC_INST_MFVSRD | VSX_XX1((t)+32, a, 0)
+#define MTVRD(t, a) .long PPC_INST_MTVSRD | VSX_XX1((t)+32, a, 0)
+
+#endif
diff --git a/src/support/scratch.c b/src/support/scratch.c
index 94020ba2621..aea98dc49ef 100644
--- a/src/support/scratch.c
+++ b/src/support/scratch.c
@@ -45,7 +45,7 @@ __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size)
WT_RET(__wt_realloc_aligned(
session, &buf->memsize, size, &buf->mem));
else
- WT_RET(__wt_realloc(
+ WT_RET(__wt_realloc_noclear(
session, &buf->memsize, size, &buf->mem));
}
diff --git a/src/support/stat.c b/src/support/stat.c
index 7a615131628..2a826eda962 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -3,102 +3,102 @@
#include "wt_internal.h"
static const char * const __stats_dsrc_desc[] = {
- "block-manager: file allocation unit size",
- "block-manager: blocks allocated",
- "block-manager: checkpoint size",
- "block-manager: allocations requiring file extension",
- "block-manager: blocks freed",
- "block-manager: file magic number",
- "block-manager: file major version number",
- "block-manager: minor version number",
- "block-manager: file bytes available for reuse",
- "block-manager: file size in bytes",
- "LSM: bloom filters in the LSM tree",
"LSM: bloom filter false positives",
"LSM: bloom filter hits",
"LSM: bloom filter misses",
"LSM: bloom filter pages evicted from cache",
"LSM: bloom filter pages read into cache",
+ "LSM: bloom filters in the LSM tree",
+ "LSM: chunks in the LSM tree",
+ "LSM: highest merge generation in the LSM tree",
+ "LSM: queries that could have benefited from a Bloom filter that did not exist",
+ "LSM: sleep for LSM checkpoint throttle",
+ "LSM: sleep for LSM merge throttle",
"LSM: total size of bloom filters",
+ "block-manager: allocations requiring file extension",
+ "block-manager: blocks allocated",
+ "block-manager: blocks freed",
+ "block-manager: checkpoint size",
+ "block-manager: file allocation unit size",
+ "block-manager: file bytes available for reuse",
+ "block-manager: file magic number",
+ "block-manager: file major version number",
+ "block-manager: file size in bytes",
+ "block-manager: minor version number",
"btree: btree checkpoint generation",
- "btree: column-store variable-size deleted values",
"btree: column-store fixed-size leaf pages",
"btree: column-store internal pages",
"btree: column-store variable-size RLE encoded values",
+ "btree: column-store variable-size deleted values",
"btree: column-store variable-size leaf pages",
- "btree: pages rewritten by compaction",
- "btree: number of key/value pairs",
"btree: fixed-record size",
- "btree: maximum tree depth",
"btree: maximum internal page key size",
"btree: maximum internal page size",
"btree: maximum leaf page key size",
"btree: maximum leaf page size",
"btree: maximum leaf page value size",
+ "btree: maximum tree depth",
+ "btree: number of key/value pairs",
"btree: overflow pages",
+ "btree: pages rewritten by compaction",
"btree: row-store internal pages",
"btree: row-store leaf pages",
"cache: bytes read into cache",
"cache: bytes written from cache",
"cache: checkpoint blocked page eviction",
- "cache: unmodified pages evicted",
- "cache: page split during eviction deepened the tree",
- "cache: modified pages evicted",
"cache: data source pages selected for eviction unable to be evicted",
"cache: hazard pointer blocked page eviction",
+ "cache: in-memory page passed criteria to be split",
+ "cache: in-memory page splits",
"cache: internal pages evicted",
"cache: internal pages split during eviction",
"cache: leaf pages split during eviction",
- "cache: in-memory page splits",
- "cache: in-memory page passed criteria to be split",
+ "cache: modified pages evicted",
+ "cache: overflow pages read into cache",
"cache: overflow values cached in memory",
+ "cache: page split during eviction deepened the tree",
+ "cache: page written requiring lookaside records",
"cache: pages read into cache",
"cache: pages read into cache requiring lookaside entries",
- "cache: overflow pages read into cache",
"cache: pages written from cache",
- "cache: page written requiring lookaside records",
"cache: pages written requiring in-memory restoration",
- "compression: raw compression call failed, no additional data available",
- "compression: raw compression call failed, additional data available",
- "compression: raw compression call succeeded",
+ "cache: unmodified pages evicted",
"compression: compressed pages read",
"compression: compressed pages written",
"compression: page written failed to compress",
"compression: page written was too small to compress",
- "cursor: create calls",
- "cursor: insert calls",
+ "compression: raw compression call failed, additional data available",
+ "compression: raw compression call failed, no additional data available",
+ "compression: raw compression call succeeded",
"cursor: bulk-loaded cursor-insert calls",
+ "cursor: create calls",
"cursor: cursor-insert key and value bytes inserted",
+ "cursor: cursor-remove key bytes removed",
+ "cursor: cursor-update value bytes updated",
+ "cursor: insert calls",
"cursor: next calls",
"cursor: prev calls",
"cursor: remove calls",
- "cursor: cursor-remove key bytes removed",
"cursor: reset calls",
"cursor: restarted searches",
"cursor: search calls",
"cursor: search near calls",
"cursor: truncate calls",
"cursor: update calls",
- "cursor: cursor-update value bytes updated",
- "LSM: sleep for LSM checkpoint throttle",
- "LSM: chunks in the LSM tree",
- "LSM: highest merge generation in the LSM tree",
- "LSM: queries that could have benefited from a Bloom filter that did not exist",
- "LSM: sleep for LSM merge throttle",
"reconciliation: dictionary matches",
+ "reconciliation: fast-path pages deleted",
+ "reconciliation: internal page key bytes discarded using suffix compression",
"reconciliation: internal page multi-block writes",
- "reconciliation: leaf page multi-block writes",
- "reconciliation: maximum blocks required for a page",
"reconciliation: internal-page overflow keys",
+ "reconciliation: leaf page key bytes discarded using prefix compression",
+ "reconciliation: leaf page multi-block writes",
"reconciliation: leaf-page overflow keys",
+ "reconciliation: maximum blocks required for a page",
"reconciliation: overflow values written",
- "reconciliation: pages deleted",
- "reconciliation: fast-path pages deleted",
"reconciliation: page checksum matches",
"reconciliation: page reconciliation calls",
"reconciliation: page reconciliation calls for eviction",
- "reconciliation: leaf page key bytes discarded using prefix compression",
- "reconciliation: internal page key bytes discarded using suffix compression",
+ "reconciliation: pages deleted",
"session: object compaction",
"session: open cursor count",
"transaction: update conflicts",
@@ -132,6 +132,18 @@ __wt_stat_dsrc_init(WT_DATA_HANDLE *handle)
void
__wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
{
+ stats->bloom_false_positive = 0;
+ stats->bloom_hit = 0;
+ stats->bloom_miss = 0;
+ stats->bloom_page_evict = 0;
+ stats->bloom_page_read = 0;
+ stats->bloom_count = 0;
+ stats->lsm_chunk_count = 0;
+ stats->lsm_generation_max = 0;
+ stats->lsm_lookup_no_bloom = 0;
+ stats->lsm_checkpoint_throttle = 0;
+ stats->lsm_merge_throttle = 0;
+ stats->bloom_size = 0;
stats->block_extension = 0;
stats->block_alloc = 0;
stats->block_free = 0;
@@ -145,9 +157,9 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
/* not clearing btree_checkpoint_generation */
stats->btree_column_fix = 0;
stats->btree_column_internal = 0;
+ stats->btree_column_rle = 0;
stats->btree_column_deleted = 0;
stats->btree_column_variable = 0;
- stats->btree_column_rle = 0;
stats->btree_fixed_len = 0;
stats->btree_maxintlkey = 0;
stats->btree_maxintlpage = 0;
@@ -202,18 +214,6 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cursor_search_near = 0;
stats->cursor_truncate = 0;
stats->cursor_update = 0;
- stats->bloom_false_positive = 0;
- stats->bloom_hit = 0;
- stats->bloom_miss = 0;
- stats->bloom_page_evict = 0;
- stats->bloom_page_read = 0;
- stats->bloom_count = 0;
- stats->lsm_chunk_count = 0;
- stats->lsm_generation_max = 0;
- stats->lsm_lookup_no_bloom = 0;
- stats->lsm_checkpoint_throttle = 0;
- stats->lsm_merge_throttle = 0;
- stats->bloom_size = 0;
stats->rec_dictionary = 0;
stats->rec_page_delete_fast = 0;
stats->rec_suffix_compression = 0;
@@ -246,6 +246,19 @@ void
__wt_stat_dsrc_aggregate_single(
WT_DSRC_STATS *from, WT_DSRC_STATS *to)
{
+ to->bloom_false_positive += from->bloom_false_positive;
+ to->bloom_hit += from->bloom_hit;
+ to->bloom_miss += from->bloom_miss;
+ to->bloom_page_evict += from->bloom_page_evict;
+ to->bloom_page_read += from->bloom_page_read;
+ to->bloom_count += from->bloom_count;
+ to->lsm_chunk_count += from->lsm_chunk_count;
+ if (from->lsm_generation_max > to->lsm_generation_max)
+ to->lsm_generation_max = from->lsm_generation_max;
+ to->lsm_lookup_no_bloom += from->lsm_lookup_no_bloom;
+ to->lsm_checkpoint_throttle += from->lsm_checkpoint_throttle;
+ to->lsm_merge_throttle += from->lsm_merge_throttle;
+ to->bloom_size += from->bloom_size;
to->block_extension += from->block_extension;
to->block_alloc += from->block_alloc;
to->block_free += from->block_free;
@@ -263,9 +276,9 @@ __wt_stat_dsrc_aggregate_single(
to->btree_checkpoint_generation += from->btree_checkpoint_generation;
to->btree_column_fix += from->btree_column_fix;
to->btree_column_internal += from->btree_column_internal;
+ to->btree_column_rle += from->btree_column_rle;
to->btree_column_deleted += from->btree_column_deleted;
to->btree_column_variable += from->btree_column_variable;
- to->btree_column_rle += from->btree_column_rle;
if (from->btree_fixed_len > to->btree_fixed_len)
to->btree_fixed_len = from->btree_fixed_len;
if (from->btree_maxintlkey > to->btree_maxintlkey)
@@ -328,19 +341,6 @@ __wt_stat_dsrc_aggregate_single(
to->cursor_search_near += from->cursor_search_near;
to->cursor_truncate += from->cursor_truncate;
to->cursor_update += from->cursor_update;
- to->bloom_false_positive += from->bloom_false_positive;
- to->bloom_hit += from->bloom_hit;
- to->bloom_miss += from->bloom_miss;
- to->bloom_page_evict += from->bloom_page_evict;
- to->bloom_page_read += from->bloom_page_read;
- to->bloom_count += from->bloom_count;
- to->lsm_chunk_count += from->lsm_chunk_count;
- if (from->lsm_generation_max > to->lsm_generation_max)
- to->lsm_generation_max = from->lsm_generation_max;
- to->lsm_lookup_no_bloom += from->lsm_lookup_no_bloom;
- to->lsm_checkpoint_throttle += from->lsm_checkpoint_throttle;
- to->lsm_merge_throttle += from->lsm_merge_throttle;
- to->bloom_size += from->bloom_size;
to->rec_dictionary += from->rec_dictionary;
to->rec_page_delete_fast += from->rec_page_delete_fast;
to->rec_suffix_compression += from->rec_suffix_compression;
@@ -367,6 +367,21 @@ __wt_stat_dsrc_aggregate(
{
int64_t v;
+ to->bloom_false_positive += WT_STAT_READ(from, bloom_false_positive);
+ to->bloom_hit += WT_STAT_READ(from, bloom_hit);
+ to->bloom_miss += WT_STAT_READ(from, bloom_miss);
+ to->bloom_page_evict += WT_STAT_READ(from, bloom_page_evict);
+ to->bloom_page_read += WT_STAT_READ(from, bloom_page_read);
+ to->bloom_count += WT_STAT_READ(from, bloom_count);
+ to->lsm_chunk_count += WT_STAT_READ(from, lsm_chunk_count);
+ if ((v = WT_STAT_READ(from, lsm_generation_max)) >
+ to->lsm_generation_max)
+ to->lsm_generation_max = v;
+ to->lsm_lookup_no_bloom += WT_STAT_READ(from, lsm_lookup_no_bloom);
+ to->lsm_checkpoint_throttle +=
+ WT_STAT_READ(from, lsm_checkpoint_throttle);
+ to->lsm_merge_throttle += WT_STAT_READ(from, lsm_merge_throttle);
+ to->bloom_size += WT_STAT_READ(from, bloom_size);
to->block_extension += WT_STAT_READ(from, block_extension);
to->block_alloc += WT_STAT_READ(from, block_alloc);
to->block_free += WT_STAT_READ(from, block_free);
@@ -387,10 +402,10 @@ __wt_stat_dsrc_aggregate(
to->btree_column_fix += WT_STAT_READ(from, btree_column_fix);
to->btree_column_internal +=
WT_STAT_READ(from, btree_column_internal);
+ to->btree_column_rle += WT_STAT_READ(from, btree_column_rle);
to->btree_column_deleted += WT_STAT_READ(from, btree_column_deleted);
to->btree_column_variable +=
WT_STAT_READ(from, btree_column_variable);
- to->btree_column_rle += WT_STAT_READ(from, btree_column_rle);
if ((v = WT_STAT_READ(from, btree_fixed_len)) > to->btree_fixed_len)
to->btree_fixed_len = v;
if ((v = WT_STAT_READ(from, btree_maxintlkey)) > to->btree_maxintlkey)
@@ -467,21 +482,6 @@ __wt_stat_dsrc_aggregate(
to->cursor_search_near += WT_STAT_READ(from, cursor_search_near);
to->cursor_truncate += WT_STAT_READ(from, cursor_truncate);
to->cursor_update += WT_STAT_READ(from, cursor_update);
- to->bloom_false_positive += WT_STAT_READ(from, bloom_false_positive);
- to->bloom_hit += WT_STAT_READ(from, bloom_hit);
- to->bloom_miss += WT_STAT_READ(from, bloom_miss);
- to->bloom_page_evict += WT_STAT_READ(from, bloom_page_evict);
- to->bloom_page_read += WT_STAT_READ(from, bloom_page_read);
- to->bloom_count += WT_STAT_READ(from, bloom_count);
- to->lsm_chunk_count += WT_STAT_READ(from, lsm_chunk_count);
- if ((v = WT_STAT_READ(from, lsm_generation_max)) >
- to->lsm_generation_max)
- to->lsm_generation_max = v;
- to->lsm_lookup_no_bloom += WT_STAT_READ(from, lsm_lookup_no_bloom);
- to->lsm_checkpoint_throttle +=
- WT_STAT_READ(from, lsm_checkpoint_throttle);
- to->lsm_merge_throttle += WT_STAT_READ(from, lsm_merge_throttle);
- to->bloom_size += WT_STAT_READ(from, bloom_size);
to->rec_dictionary += WT_STAT_READ(from, rec_dictionary);
to->rec_page_delete_fast += WT_STAT_READ(from, rec_page_delete_fast);
to->rec_suffix_compression +=
@@ -509,12 +509,22 @@ __wt_stat_dsrc_aggregate(
}
static const char * const __stats_connection_desc[] = {
- "async: number of allocation state races",
- "async: number of operation slots viewed for allocation",
+ "LSM: application work units currently queued",
+ "LSM: merge work units currently queued",
+ "LSM: rows merged in an LSM tree",
+ "LSM: sleep for LSM checkpoint throttle",
+ "LSM: sleep for LSM merge throttle",
+ "LSM: switch work units currently queued",
+ "LSM: tree maintenance operations discarded",
+ "LSM: tree maintenance operations executed",
+ "LSM: tree maintenance operations scheduled",
+ "LSM: tree queue hit maximum",
"async: current work queue length",
+ "async: maximum work queue length",
+ "async: number of allocation state races",
"async: number of flush calls",
+ "async: number of operation slots viewed for allocation",
"async: number of times operation allocation failed",
- "async: maximum work queue length",
"async: number of times worker found no work",
"async: total allocations",
"async: total compact calls",
@@ -522,55 +532,66 @@ static const char * const __stats_connection_desc[] = {
"async: total remove calls",
"async: total search calls",
"async: total update calls",
- "block-manager: mapped bytes read",
- "block-manager: bytes read",
- "block-manager: bytes written",
- "block-manager: mapped blocks read",
"block-manager: blocks pre-loaded",
"block-manager: blocks read",
"block-manager: blocks written",
- "cache: tracked dirty bytes in the cache",
- "cache: tracked bytes belonging to internal pages in the cache",
+ "block-manager: bytes read",
+ "block-manager: bytes written",
+ "block-manager: mapped blocks read",
+ "block-manager: mapped bytes read",
"cache: bytes currently in the cache",
- "cache: tracked bytes belonging to leaf pages in the cache",
- "cache: maximum bytes configured",
- "cache: tracked bytes belonging to overflow pages in the cache",
"cache: bytes read into cache",
"cache: bytes written from cache",
- "cache: pages evicted by application threads",
"cache: checkpoint blocked page eviction",
- "cache: unmodified pages evicted",
- "cache: page split during eviction deepened the tree",
- "cache: modified pages evicted",
- "cache: pages selected for eviction unable to be evicted",
- "cache: pages evicted because they exceeded the in-memory maximum",
- "cache: pages evicted because they had chains of deleted items",
- "cache: failed eviction of pages that exceeded the in-memory maximum",
- "cache: hazard pointer blocked page eviction",
- "cache: internal pages evicted",
- "cache: maximum page size at eviction",
+ "cache: eviction currently operating in aggressive mode",
"cache: eviction server candidate queue empty when topping up",
"cache: eviction server candidate queue not empty when topping up",
"cache: eviction server evicting pages",
"cache: eviction server populating queue, but not evicting pages",
"cache: eviction server unable to reach eviction goal",
- "cache: internal pages split during eviction",
- "cache: leaf pages split during eviction",
- "cache: pages walked for eviction",
"cache: eviction worker thread evicting pages",
- "cache: in-memory page splits",
+ "cache: failed eviction of pages that exceeded the in-memory maximum",
+ "cache: hazard pointer blocked page eviction",
"cache: in-memory page passed criteria to be split",
+ "cache: in-memory page splits",
+ "cache: internal pages evicted",
+ "cache: internal pages split during eviction",
+ "cache: leaf pages split during eviction",
"cache: lookaside table insert calls",
"cache: lookaside table remove calls",
- "cache: percentage overhead",
- "cache: tracked dirty pages in the cache",
+ "cache: maximum bytes configured",
+ "cache: maximum page size at eviction",
+ "cache: modified pages evicted",
+ "cache: page split during eviction deepened the tree",
+ "cache: page written requiring lookaside records",
"cache: pages currently held in the cache",
+ "cache: pages evicted because they exceeded the in-memory maximum",
+ "cache: pages evicted because they had chains of deleted items",
+ "cache: pages evicted by application threads",
"cache: pages read into cache",
"cache: pages read into cache requiring lookaside entries",
+ "cache: pages selected for eviction unable to be evicted",
+ "cache: pages walked for eviction",
"cache: pages written from cache",
- "cache: page written requiring lookaside records",
"cache: pages written requiring in-memory restoration",
+ "cache: percentage overhead",
+ "cache: tracked bytes belonging to internal pages in the cache",
+ "cache: tracked bytes belonging to leaf pages in the cache",
+ "cache: tracked bytes belonging to overflow pages in the cache",
+ "cache: tracked dirty bytes in the cache",
+ "cache: tracked dirty pages in the cache",
+ "cache: unmodified pages evicted",
+ "connection: auto adjusting condition resets",
+ "connection: auto adjusting condition wait calls",
+ "connection: files currently open",
+ "connection: memory allocations",
+ "connection: memory frees",
+ "connection: memory re-allocations",
"connection: pthread mutex condition wait calls",
+ "connection: pthread mutex shared lock read-lock calls",
+ "connection: pthread mutex shared lock write-lock calls",
+ "connection: total read I/Os",
+ "connection: total write I/Os",
"cursor: cursor create calls",
"cursor: cursor insert calls",
"cursor: cursor next calls",
@@ -580,96 +601,81 @@ static const char * const __stats_connection_desc[] = {
"cursor: cursor restarted searches",
"cursor: cursor search calls",
"cursor: cursor search near calls",
- "cursor: truncate calls",
"cursor: cursor update calls",
+ "cursor: truncate calls",
"data-handle: connection data handles currently active",
- "data-handle: session dhandles swept",
- "data-handle: session sweep attempts",
- "data-handle: connection sweep dhandles closed",
"data-handle: connection sweep candidate became referenced",
+ "data-handle: connection sweep dhandles closed",
"data-handle: connection sweep dhandles removed from hash list",
"data-handle: connection sweep time-of-death sets",
"data-handle: connection sweeps",
- "connection: files currently open",
- "log: total log buffer size",
+ "data-handle: session dhandles swept",
+ "data-handle: session sweep attempts",
+ "log: busy returns attempting to switch slots",
+ "log: consolidated slot closures",
+ "log: consolidated slot join races",
+ "log: consolidated slot join transitions",
+ "log: consolidated slot joins",
+ "log: consolidated slot unbuffered writes",
"log: log bytes of payload data",
"log: log bytes written",
- "log: yields waiting for previous log file close",
- "log: total size of compressed records",
- "log: total in-memory size of compressed records",
- "log: log records too small to compress",
- "log: log records not compressed",
- "log: log records compressed",
+ "log: log files manually zero-filled",
"log: log flush operations",
+ "log: log force write operations",
+ "log: log force write operations skipped",
+ "log: log records compressed",
+ "log: log records not compressed",
+ "log: log records too small to compress",
+ "log: log release advances write LSN",
+ "log: log scan operations",
+ "log: log scan records requiring two reads",
+ "log: log server thread advances write LSN",
+ "log: log server thread write LSN walk skipped",
+ "log: log sync operations",
+ "log: log sync_dir operations",
+ "log: log write operations",
+ "log: logging bytes consolidated",
"log: maximum log file size",
- "log: pre-allocated log files prepared",
"log: number of pre-allocated log files to create",
"log: pre-allocated log files not ready and missed",
+ "log: pre-allocated log files prepared",
"log: pre-allocated log files used",
- "log: log release advances write LSN",
"log: records processed by log scan",
- "log: log scan records requiring two reads",
- "log: log scan operations",
- "log: consolidated slot closures",
+ "log: total in-memory size of compressed records",
+ "log: total log buffer size",
+ "log: total size of compressed records",
"log: written slots coalesced",
- "log: logging bytes consolidated",
- "log: consolidated slot joins",
- "log: consolidated slot join races",
- "log: busy returns attempting to switch slots",
- "log: consolidated slot join transitions",
- "log: consolidated slot unbuffered writes",
- "log: log sync operations",
- "log: log sync_dir operations",
- "log: log server thread advances write LSN",
- "log: log write operations",
- "log: log files manually zero-filled",
- "LSM: sleep for LSM checkpoint throttle",
- "LSM: sleep for LSM merge throttle",
- "LSM: rows merged in an LSM tree",
- "LSM: application work units currently queued",
- "LSM: merge work units currently queued",
- "LSM: tree queue hit maximum",
- "LSM: switch work units currently queued",
- "LSM: tree maintenance operations scheduled",
- "LSM: tree maintenance operations discarded",
- "LSM: tree maintenance operations executed",
- "connection: memory allocations",
- "connection: memory frees",
- "connection: memory re-allocations",
- "thread-yield: page acquire busy blocked",
- "thread-yield: page acquire eviction blocked",
- "thread-yield: page acquire locked blocked",
- "thread-yield: page acquire read blocked",
- "thread-yield: page acquire time sleeping (usecs)",
- "connection: total read I/Os",
- "reconciliation: pages deleted",
+ "log: yields waiting for previous log file close",
"reconciliation: fast-path pages deleted",
"reconciliation: page reconciliation calls",
"reconciliation: page reconciliation calls for eviction",
+ "reconciliation: pages deleted",
"reconciliation: split bytes currently awaiting free",
"reconciliation: split objects currently awaiting free",
- "connection: pthread mutex shared lock read-lock calls",
- "connection: pthread mutex shared lock write-lock calls",
"session: open cursor count",
"session: open session count",
+ "thread-yield: page acquire busy blocked",
+ "thread-yield: page acquire eviction blocked",
+ "thread-yield: page acquire locked blocked",
+ "thread-yield: page acquire read blocked",
+ "thread-yield: page acquire time sleeping (usecs)",
+ "transaction: number of named snapshots created",
+ "transaction: number of named snapshots dropped",
"transaction: transaction begins",
- "transaction: transaction checkpoints",
- "transaction: transaction checkpoint generation",
"transaction: transaction checkpoint currently running",
+ "transaction: transaction checkpoint generation",
"transaction: transaction checkpoint max time (msecs)",
"transaction: transaction checkpoint min time (msecs)",
"transaction: transaction checkpoint most recent time (msecs)",
"transaction: transaction checkpoint total time (msecs)",
- "transaction: transactions committed",
+ "transaction: transaction checkpoints",
"transaction: transaction failures due to cache overflow",
- "transaction: transaction range of IDs currently pinned by a checkpoint",
"transaction: transaction range of IDs currently pinned",
+ "transaction: transaction range of IDs currently pinned by a checkpoint",
"transaction: transaction range of IDs currently pinned by named snapshots",
- "transaction: transactions rolled back",
- "transaction: number of named snapshots created",
- "transaction: number of named snapshots dropped",
"transaction: transaction sync calls",
- "connection: total write I/Os",
+ "transaction: transactions committed",
+ "transaction: transactions rolled back",
};
int
@@ -700,6 +706,16 @@ __wt_stat_connection_init(WT_CONNECTION_IMPL *handle)
void
__wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
{
+ /* not clearing lsm_work_queue_app */
+ /* not clearing lsm_work_queue_manager */
+ stats->lsm_rows_merged = 0;
+ stats->lsm_checkpoint_throttle = 0;
+ stats->lsm_merge_throttle = 0;
+ /* not clearing lsm_work_queue_switch */
+ stats->lsm_work_units_discarded = 0;
+ stats->lsm_work_units_done = 0;
+ stats->lsm_work_units_created = 0;
+ stats->lsm_work_queue_max = 0;
stats->async_cur_queue = 0;
/* not clearing async_max_queue */
stats->async_alloc_race = 0;
@@ -724,6 +740,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_bytes_read = 0;
stats->cache_bytes_write = 0;
stats->cache_eviction_checkpoint = 0;
+ /* not clearing cache_eviction_aggressive_set */
stats->cache_eviction_queue_empty = 0;
stats->cache_eviction_queue_not_empty = 0;
stats->cache_eviction_server_evicting = 0;
@@ -761,6 +778,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing cache_bytes_dirty */
/* not clearing cache_pages_dirty */
stats->cache_eviction_clean = 0;
+ stats->cond_auto_wait_reset = 0;
+ stats->cond_auto_wait = 0;
/* not clearing file_open */
stats->memory_allocation = 0;
stats->memory_free = 0;
@@ -799,6 +818,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->log_bytes_written = 0;
stats->log_zero_fills = 0;
stats->log_flush = 0;
+ stats->log_force_write = 0;
+ stats->log_force_write_skip = 0;
stats->log_compress_writes = 0;
stats->log_compress_write_fails = 0;
stats->log_compress_small = 0;
@@ -806,6 +827,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->log_scans = 0;
stats->log_scan_rereads = 0;
stats->log_write_lsn = 0;
+ stats->log_write_lsn_skip = 0;
stats->log_sync = 0;
stats->log_sync_dir = 0;
stats->log_writes = 0;
@@ -821,16 +843,6 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->log_compress_len = 0;
stats->log_slot_coalesced = 0;
stats->log_close_yields = 0;
- /* not clearing lsm_work_queue_app */
- /* not clearing lsm_work_queue_manager */
- stats->lsm_rows_merged = 0;
- stats->lsm_checkpoint_throttle = 0;
- stats->lsm_merge_throttle = 0;
- /* not clearing lsm_work_queue_switch */
- stats->lsm_work_units_discarded = 0;
- stats->lsm_work_units_done = 0;
- stats->lsm_work_units_created = 0;
- stats->lsm_work_queue_max = 0;
stats->rec_page_delete_fast = 0;
stats->rec_pages = 0;
stats->rec_pages_eviction = 0;
@@ -876,6 +888,21 @@ void
__wt_stat_connection_aggregate(
WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *to)
{
+ to->lsm_work_queue_app += WT_STAT_READ(from, lsm_work_queue_app);
+ to->lsm_work_queue_manager +=
+ WT_STAT_READ(from, lsm_work_queue_manager);
+ to->lsm_rows_merged += WT_STAT_READ(from, lsm_rows_merged);
+ to->lsm_checkpoint_throttle +=
+ WT_STAT_READ(from, lsm_checkpoint_throttle);
+ to->lsm_merge_throttle += WT_STAT_READ(from, lsm_merge_throttle);
+ to->lsm_work_queue_switch +=
+ WT_STAT_READ(from, lsm_work_queue_switch);
+ to->lsm_work_units_discarded +=
+ WT_STAT_READ(from, lsm_work_units_discarded);
+ to->lsm_work_units_done += WT_STAT_READ(from, lsm_work_units_done);
+ to->lsm_work_units_created +=
+ WT_STAT_READ(from, lsm_work_units_created);
+ to->lsm_work_queue_max += WT_STAT_READ(from, lsm_work_queue_max);
to->async_cur_queue += WT_STAT_READ(from, async_cur_queue);
to->async_max_queue += WT_STAT_READ(from, async_max_queue);
to->async_alloc_race += WT_STAT_READ(from, async_alloc_race);
@@ -901,6 +928,8 @@ __wt_stat_connection_aggregate(
to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
to->cache_eviction_checkpoint +=
WT_STAT_READ(from, cache_eviction_checkpoint);
+ to->cache_eviction_aggressive_set +=
+ WT_STAT_READ(from, cache_eviction_aggressive_set);
to->cache_eviction_queue_empty +=
WT_STAT_READ(from, cache_eviction_queue_empty);
to->cache_eviction_queue_not_empty +=
@@ -955,6 +984,8 @@ __wt_stat_connection_aggregate(
to->cache_bytes_dirty += WT_STAT_READ(from, cache_bytes_dirty);
to->cache_pages_dirty += WT_STAT_READ(from, cache_pages_dirty);
to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean);
+ to->cond_auto_wait_reset += WT_STAT_READ(from, cond_auto_wait_reset);
+ to->cond_auto_wait += WT_STAT_READ(from, cond_auto_wait);
to->file_open += WT_STAT_READ(from, file_open);
to->memory_allocation += WT_STAT_READ(from, memory_allocation);
to->memory_free += WT_STAT_READ(from, memory_free);
@@ -993,6 +1024,8 @@ __wt_stat_connection_aggregate(
to->log_bytes_written += WT_STAT_READ(from, log_bytes_written);
to->log_zero_fills += WT_STAT_READ(from, log_zero_fills);
to->log_flush += WT_STAT_READ(from, log_flush);
+ to->log_force_write += WT_STAT_READ(from, log_force_write);
+ to->log_force_write_skip += WT_STAT_READ(from, log_force_write_skip);
to->log_compress_writes += WT_STAT_READ(from, log_compress_writes);
to->log_compress_write_fails +=
WT_STAT_READ(from, log_compress_write_fails);
@@ -1002,6 +1035,7 @@ __wt_stat_connection_aggregate(
to->log_scans += WT_STAT_READ(from, log_scans);
to->log_scan_rereads += WT_STAT_READ(from, log_scan_rereads);
to->log_write_lsn += WT_STAT_READ(from, log_write_lsn);
+ to->log_write_lsn_skip += WT_STAT_READ(from, log_write_lsn_skip);
to->log_sync += WT_STAT_READ(from, log_sync);
to->log_sync_dir += WT_STAT_READ(from, log_sync_dir);
to->log_writes += WT_STAT_READ(from, log_writes);
@@ -1018,21 +1052,6 @@ __wt_stat_connection_aggregate(
to->log_compress_len += WT_STAT_READ(from, log_compress_len);
to->log_slot_coalesced += WT_STAT_READ(from, log_slot_coalesced);
to->log_close_yields += WT_STAT_READ(from, log_close_yields);
- to->lsm_work_queue_app += WT_STAT_READ(from, lsm_work_queue_app);
- to->lsm_work_queue_manager +=
- WT_STAT_READ(from, lsm_work_queue_manager);
- to->lsm_rows_merged += WT_STAT_READ(from, lsm_rows_merged);
- to->lsm_checkpoint_throttle +=
- WT_STAT_READ(from, lsm_checkpoint_throttle);
- to->lsm_merge_throttle += WT_STAT_READ(from, lsm_merge_throttle);
- to->lsm_work_queue_switch +=
- WT_STAT_READ(from, lsm_work_queue_switch);
- to->lsm_work_units_discarded +=
- WT_STAT_READ(from, lsm_work_units_discarded);
- to->lsm_work_units_done += WT_STAT_READ(from, lsm_work_units_done);
- to->lsm_work_units_created +=
- WT_STAT_READ(from, lsm_work_units_created);
- to->lsm_work_queue_max += WT_STAT_READ(from, lsm_work_queue_max);
to->rec_page_delete_fast += WT_STAT_READ(from, rec_page_delete_fast);
to->rec_pages += WT_STAT_READ(from, rec_pages);
to->rec_pages_eviction += WT_STAT_READ(from, rec_pages_eviction);
diff --git a/src/txn/txn.c b/src/txn/txn.c
index e8fd8c0c119..7a768a8fe20 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -344,7 +344,7 @@ retry:
current_id - oldest_id > 10000 && oldest_session != NULL) {
(void)__wt_verbose(session, WT_VERB_TRANSACTION,
"old snapshot %" PRIu64
- " pinned in session %d [%s]"
+ " pinned in session %" PRIu32 " [%s]"
" with snap_min %" PRIu64 "\n",
oldest_id, oldest_session->id,
oldest_session->lastop,
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 6a2c1eef826..1eebc9e9d04 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -8,6 +8,10 @@
#include "wt_internal.h"
+static int __checkpoint_lock_tree(
+ WT_SESSION_IMPL *, bool, bool, const char *[]);
+static int __checkpoint_tree_helper(WT_SESSION_IMPL *, const char *[]);
+
/*
* __wt_checkpoint_name_ok --
* Complain if the checkpoint name isn't acceptable.
@@ -155,8 +159,8 @@ __checkpoint_apply_all(WT_SESSION_IMPL *session, const char *cfg[],
ckpt_closed = cval.len != 0;
}
WT_ERR(ckpt_closed ?
- __wt_meta_btree_apply(session, op, cfg) :
- __wt_conn_btree_apply(session, false, NULL, op, cfg));
+ __wt_meta_apply_all(session, op, NULL, cfg) :
+ __wt_conn_btree_apply(session, NULL, op, NULL, cfg));
}
if (fullp != NULL)
@@ -179,14 +183,8 @@ __checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[],
/* If we have already locked the handles, apply the operation. */
for (i = 0; i < session->ckpt_handle_next; ++i) {
- if (session->ckpt_handle[i].dhandle != NULL)
- WT_WITH_DHANDLE(session,
- session->ckpt_handle[i].dhandle,
- ret = (*op)(session, cfg));
- else
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_conn_btree_apply_single(session,
- session->ckpt_handle[i].name, NULL, op, cfg));
+ WT_WITH_DHANDLE(session, session->ckpt_handle[i],
+ ret = (*op)(session, cfg));
WT_RET(ret);
}
@@ -230,11 +228,11 @@ __checkpoint_data_source(WT_SESSION_IMPL *session, const char *cfg[])
}
/*
- * __wt_checkpoint_list --
+ * __wt_checkpoint_get_handles --
* Get a list of handles to flush.
*/
int
-__wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[])
+__wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_DECL_RET;
const char *name;
@@ -257,15 +255,18 @@ __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[])
name = session->dhandle->name;
session->dhandle = NULL;
- /* Record busy file names, we'll deal with them in the checkpoint. */
- if ((ret = __wt_session_get_btree(session, name, NULL, NULL, 0)) == 0)
- session->ckpt_handle[session->ckpt_handle_next++].dhandle =
- session->dhandle;
- else if (ret == EBUSY)
- ret = __wt_strdup(session, name,
- &session->ckpt_handle[session->ckpt_handle_next++].name);
+ if ((ret = __wt_session_get_btree(session, name, NULL, NULL, 0)) != 0)
+ return (ret == EBUSY ? 0 : ret);
- return (ret);
+ WT_SAVE_DHANDLE(session,
+ ret = __checkpoint_lock_tree(session, true, true, cfg));
+ if (ret != 0) {
+ WT_TRET(__wt_session_release_btree(session));
+ return (ret);
+ }
+
+ session->ckpt_handle[session->ckpt_handle_next++] = session->dhandle;
+ return (0);
}
/*
@@ -277,7 +278,7 @@ __checkpoint_write_leaves(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_UNUSED(cfg);
- return (__wt_cache_op(session, NULL, WT_SYNC_WRITE_LEAVES));
+ return (__wt_cache_op(session, WT_SYNC_WRITE_LEAVES));
}
/*
@@ -381,15 +382,20 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
/* Configure logging only if doing a full checkpoint. */
logging = FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED);
+ /* Keep track of handles acquired for locking. */
+ WT_ERR(__wt_meta_track_on(session));
+ tracking = true;
+
/*
* Get a list of handles we want to flush; this may pull closed objects
* into the session cache, but we're going to do that eventually anyway.
*/
+ WT_ASSERT(session, session->ckpt_handle_next == 0);
WT_WITH_SCHEMA_LOCK(session, ret,
WT_WITH_TABLE_LOCK(session, ret,
WT_WITH_HANDLE_LIST_LOCK(session,
ret = __checkpoint_apply_all(
- session, cfg, __wt_checkpoint_list, NULL))));
+ session, cfg, __wt_checkpoint_get_handles, NULL))));
WT_ERR(ret);
/*
@@ -418,15 +424,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* completion. Do it after flushing the pages to give the
* asynchronous flush as much time as possible before we wait.
*/
- if (F_ISSET(conn, WT_CONN_CKPT_SYNC))
- WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync));
-
- /* Acquire the schema lock. */
- F_SET(session, WT_SESSION_LOCKED_SCHEMA);
- __wt_spin_lock(session, &conn->schema_lock);
-
- WT_ERR(__wt_meta_track_on(session));
- tracking = true;
+ WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync));
/* Tell logging that we are about to start a database checkpoint. */
if (full && logging)
@@ -440,6 +438,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_epoch(session, &start));
/*
+ * Start the checkpoint for real.
+ *
* Bump the global checkpoint generation, used to figure out whether
* checkpoint has visited a tree. There is no need for this to be
* atomic: it is only written while holding the checkpoint lock.
@@ -503,7 +503,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_txn_checkpoint_log(
session, full, WT_TXN_LOG_CKPT_START, NULL));
- WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint));
+ WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_tree_helper));
/*
* Clear the dhandle so the visibility check doesn't get confused about
@@ -522,8 +522,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* Checkpoints have to hit disk (it would be reasonable to configure for
* lazy checkpoints, but we don't support them yet).
*/
- if (F_ISSET(conn, WT_CONN_CKPT_SYNC))
- WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync));
+ WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync));
WT_ERR(__checkpoint_verbose_track(session,
"sync completed", &verb_timer));
@@ -543,16 +542,25 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* Recovery relies on the checkpoint LSN in the metadata only being
* updated by full checkpoints so only checkpoint the metadata for
* full or non-logged checkpoints.
+ *
+ * This is very similar to __wt_meta_track_off, ideally they would be
+ * merged.
*/
if (full || !logging) {
session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED;
/* Disable metadata tracking during the metadata checkpoint. */
saved_meta_next = session->meta_track_next;
session->meta_track_next = NULL;
+ WT_WITH_METADATA_LOCK(session, ret,
+ WT_WITH_DHANDLE(session,
+ WT_SESSION_META_DHANDLE(session),
+ ret = __wt_checkpoint(session, cfg)));
+ session->meta_track_next = saved_meta_next;
+ WT_ERR(ret);
+
WT_WITH_DHANDLE(session,
WT_SESSION_META_DHANDLE(session),
- ret = __wt_checkpoint(session, cfg));
- session->meta_track_next = saved_meta_next;
+ ret = __wt_checkpoint_sync(session, NULL));
WT_ERR(ret);
WT_ERR(__checkpoint_verbose_track(session,
@@ -610,23 +618,13 @@ err: /*
WT_TXN_LOG_CKPT_STOP : WT_TXN_LOG_CKPT_CLEANUP, NULL));
}
- for (i = 0; i < session->ckpt_handle_next; ++i) {
- if (session->ckpt_handle[i].dhandle == NULL) {
- __wt_free(session, session->ckpt_handle[i].name);
- continue;
- }
- WT_WITH_DHANDLE(session, session->ckpt_handle[i].dhandle,
+ for (i = 0; i < session->ckpt_handle_next; ++i)
+ WT_WITH_DHANDLE(session, session->ckpt_handle[i],
WT_TRET(__wt_session_release_btree(session)));
- }
__wt_free(session, session->ckpt_handle);
session->ckpt_handle_allocated = session->ckpt_handle_next = 0;
- if (F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
- F_CLR(session, WT_SESSION_LOCKED_SCHEMA);
- __wt_spin_unlock(session, &conn->schema_lock);
- }
-
session->isolation = txn->isolation = saved_isolation;
return (ret);
}
@@ -768,14 +766,13 @@ __drop_to(WT_CKPT *ckptbase, const char *name, size_t len)
}
/*
- * __checkpoint_worker --
- * Checkpoint a tree.
+ * __checkpoint_lock_tree --
+ * Acquire the locks required to checkpoint a tree.
*/
static int
-__checkpoint_worker(WT_SESSION_IMPL *session,
- const char *cfg[], bool is_checkpoint, bool need_tracking)
+__checkpoint_lock_tree(WT_SESSION_IMPL *session,
+ bool is_checkpoint, bool need_tracking, const char *cfg[])
{
- WT_BM *bm;
WT_BTREE *btree;
WT_CKPT *ckpt, *ckptbase;
WT_CONFIG dropconf;
@@ -783,19 +780,15 @@ __checkpoint_worker(WT_SESSION_IMPL *session,
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
- WT_LSN ckptlsn;
- int deleted, was_modified;
- bool fake_ckpt, force, hot_backup_locked;
- const char *name;
char *name_alloc;
+ const char *name;
+ bool hot_backup_locked;
btree = S2BT(session);
- bm = btree->bm;
conn = S2C(session);
ckpt = ckptbase = NULL;
dhandle = session->dhandle;
- was_modified = btree->modified;
- fake_ckpt = hot_backup_locked = false;
+ hot_backup_locked = false;
name_alloc = NULL;
/*
@@ -814,15 +807,6 @@ __checkpoint_worker(WT_SESSION_IMPL *session,
WT_ASSERT(session, !need_tracking ||
WT_IS_METADATA(session, dhandle) || WT_META_TRACKING(session));
- /*
- * Set the checkpoint LSN to the maximum LSN so that if logging is
- * disabled, recovery will never roll old changes forward over the
- * non-logged changes in this checkpoint. If logging is enabled, a
- * real checkpoint LSN will be assigned later for this checkpoint and
- * overwrite this.
- */
- WT_MAX_LSN(&ckptlsn);
-
/* Get the list of checkpoints for this file. */
WT_RET(__wt_meta_ckptlist_get(session, dhandle->name, &ckptbase));
@@ -873,74 +857,15 @@ __checkpoint_worker(WT_SESSION_IMPL *session,
/* Drop checkpoints with the same name as the one we're taking. */
__drop(ckptbase, name, strlen(name));
- /*
- * Check for clean objects not requiring a checkpoint.
- *
- * If we're closing a handle, and the object is clean, we can skip the
- * checkpoint, whatever checkpoints we have are sufficient. (We might
- * not have any checkpoints if the object was never modified, and that's
- * OK: the object creation code doesn't mark the tree modified so we can
- * skip newly created trees here.)
- *
- * If the application repeatedly checkpoints an object (imagine hourly
- * checkpoints using the same explicit or internal name), there's no
- * reason to repeat the checkpoint for clean objects. The test is if
- * the only checkpoint we're deleting is the last one in the list and
- * it has the same name as the checkpoint we're about to take, skip the
- * work. (We can't skip checkpoints that delete more than the last
- * checkpoint because deleting those checkpoints might free up space in
- * the file.) This means an application toggling between two (or more)
- * checkpoint names will repeatedly take empty checkpoints, but that's
- * not likely enough to make detection worthwhile.
- *
- * Checkpoint read-only objects otherwise: the application must be able
- * to open the checkpoint in a cursor after taking any checkpoint, which
- * means it must exist.
- */
- force = false;
- F_CLR(btree, WT_BTREE_SKIP_CKPT);
- if (!btree->modified && cfg != NULL) {
- ret = __wt_config_gets(session, cfg, "force", &cval);
- if (ret != 0 && ret != WT_NOTFOUND)
- WT_ERR(ret);
- if (ret == 0 && cval.val != 0)
- force = true;
- }
- if (!btree->modified && !force) {
- if (!is_checkpoint)
- goto nockpt;
-
- deleted = 0;
- WT_CKPT_FOREACH(ckptbase, ckpt)
- if (F_ISSET(ckpt, WT_CKPT_DELETE))
- ++deleted;
- /*
- * Complicated test: if the last checkpoint in the object has
- * the same name as the checkpoint we're taking (correcting for
- * internal checkpoint names with their generational suffix
- * numbers), we can skip the checkpoint, there's nothing to do.
- * The exception is if we're deleting two or more checkpoints:
- * then we may save space.
- */
- if (ckpt > ckptbase &&
- (strcmp(name, (ckpt - 1)->name) == 0 ||
- (WT_PREFIX_MATCH(name, WT_CHECKPOINT) &&
- WT_PREFIX_MATCH((ckpt - 1)->name, WT_CHECKPOINT))) &&
- deleted < 2) {
-nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT);
- WT_PUBLISH(btree->checkpoint_gen,
- S2C(session)->txn_global.checkpoint_gen);
- WT_STAT_FAST_DATA_SET(session,
- btree_checkpoint_generation,
- btree->checkpoint_gen);
- goto done;
- }
- }
-
/* Add a new checkpoint entry at the end of the list. */
WT_CKPT_FOREACH(ckptbase, ckpt)
;
WT_ERR(__wt_strdup(session, name, &ckpt->name));
+ /*
+ * We are now done with the local use of the name. Free the local
+ * allocation, if needed.
+ */
+ __wt_free(session, name_alloc);
F_SET(ckpt, WT_CKPT_ADD);
/*
@@ -1021,32 +946,128 @@ nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT);
* copy instead of forcing checkpoints on clean objects to associate
* names with checkpoints.
*/
- if (is_checkpoint)
- switch (F_MASK(btree, WT_BTREE_SPECIAL_FLAGS)) {
- case 0:
- break;
- case WT_BTREE_BULK:
- /*
- * The only checkpoints a bulk-loaded file should have
- * are fake ones we created without the underlying block
- * manager. I'm leaving this code here because it's a
- * cheap test and a nasty race.
- */
- WT_CKPT_FOREACH(ckptbase, ckpt)
- if (!F_ISSET(ckpt, WT_CKPT_ADD | WT_CKPT_FAKE))
- WT_ERR_MSG(session, ret,
- "block-manager checkpoint found "
- "for a bulk-loaded file");
- fake_ckpt = true;
- goto fake;
- case WT_BTREE_REBALANCE:
- case WT_BTREE_SALVAGE:
- case WT_BTREE_UPGRADE:
- case WT_BTREE_VERIFY:
- WT_ERR_MSG(session, EINVAL,
- "checkpoints are blocked during rebalance, "
- "salvage, upgrade or verify operations");
+ WT_ASSERT(session,
+ !is_checkpoint || !F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS));
+
+ hot_backup_locked = false;
+ WT_ERR(__wt_readunlock(session, conn->hot_backup_lock));
+
+ WT_ASSERT(session, btree->ckpt == NULL);
+ btree->ckpt = ckptbase;
+
+ return (0);
+
+err: if (hot_backup_locked)
+ WT_TRET(__wt_readunlock(session, conn->hot_backup_lock));
+
+ __wt_meta_ckptlist_free(session, ckptbase);
+ __wt_free(session, name_alloc);
+
+ return (ret);
+}
+
+/*
+ * __checkpoint_tree --
+ * Checkpoint a single tree.
+ * Assumes all necessary locks have been acquired by the caller.
+ */
+static int
+__checkpoint_tree(
+ WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[])
+{
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_CKPT *ckpt, *ckptbase;
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ WT_LSN ckptlsn;
+ const char *name;
+ int deleted, was_modified;
+ bool fake_ckpt, force;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+ ckptbase = btree->ckpt;
+ conn = S2C(session);
+ dhandle = session->dhandle;
+ fake_ckpt = false;
+ was_modified = btree->modified;
+
+ /*
+ * Set the checkpoint LSN to the maximum LSN so that if logging is
+ * disabled, recovery will never roll old changes forward over the
+ * non-logged changes in this checkpoint. If logging is enabled, a
+ * real checkpoint LSN will be assigned for this checkpoint and
+ * overwrite this.
+ */
+ WT_MAX_LSN(&ckptlsn);
+
+ /*
+ * Check for clean objects not requiring a checkpoint.
+ *
+ * If we're closing a handle, and the object is clean, we can skip the
+ * checkpoint, whatever checkpoints we have are sufficient. (We might
+ * not have any checkpoints if the object was never modified, and that's
+ * OK: the object creation code doesn't mark the tree modified so we can
+ * skip newly created trees here.)
+ *
+ * If the application repeatedly checkpoints an object (imagine hourly
+ * checkpoints using the same explicit or internal name), there's no
+ * reason to repeat the checkpoint for clean objects. The test is if
+ * the only checkpoint we're deleting is the last one in the list and
+ * it has the same name as the checkpoint we're about to take, skip the
+ * work. (We can't skip checkpoints that delete more than the last
+ * checkpoint because deleting those checkpoints might free up space in
+ * the file.) This means an application toggling between two (or more)
+ * checkpoint names will repeatedly take empty checkpoints, but that's
+ * not likely enough to make detection worthwhile.
+ *
+ * Checkpoint read-only objects otherwise: the application must be able
+ * to open the checkpoint in a cursor after taking any checkpoint, which
+ * means it must exist.
+ */
+ force = false;
+ F_CLR(btree, WT_BTREE_SKIP_CKPT);
+ if (!btree->modified && cfg != NULL) {
+ ret = __wt_config_gets(session, cfg, "force", &cval);
+ if (ret != 0 && ret != WT_NOTFOUND)
+ WT_ERR(ret);
+ if (ret == 0 && cval.val != 0)
+ force = true;
+ }
+ if (!btree->modified && !force) {
+ if (!is_checkpoint)
+ goto nockpt;
+
+ deleted = 0;
+ WT_CKPT_FOREACH(ckptbase, ckpt)
+ if (F_ISSET(ckpt, WT_CKPT_DELETE))
+ ++deleted;
+ /*
+ * Complicated test: if the tree is clean and last two
+ * checkpoints have the same name (correcting for internal
+ * checkpoint names with their generational suffix numbers), we
+ * can skip the checkpoint, there's nothing to do. The
+ * exception is if we're deleting two or more checkpoints: then
+ * we may save space.
+ */
+ name = (ckpt - 1)->name;
+ if (ckpt > ckptbase + 1 && deleted < 2 &&
+ (strcmp(name, (ckpt - 2)->name) == 0 ||
+ (WT_PREFIX_MATCH(name, WT_CHECKPOINT) &&
+ WT_PREFIX_MATCH((ckpt - 2)->name, WT_CHECKPOINT)))) {
+nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT);
+ WT_PUBLISH(btree->checkpoint_gen,
+ S2C(session)->txn_global.checkpoint_gen);
+ WT_STAT_FAST_DATA_SET(session,
+ btree_checkpoint_generation,
+ btree->checkpoint_gen);
+ ret = 0;
+ goto err;
}
+ }
/*
* If an object has never been used (in other words, if it could become
@@ -1100,9 +1121,9 @@ nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT);
/* Flush the file from the cache, creating the checkpoint. */
if (is_checkpoint)
- WT_ERR(__wt_cache_op(session, ckptbase, WT_SYNC_CHECKPOINT));
+ WT_ERR(__wt_cache_op(session, WT_SYNC_CHECKPOINT));
else
- WT_ERR(__wt_cache_op(session, ckptbase, WT_SYNC_CLOSE));
+ WT_ERR(__wt_cache_op(session, WT_SYNC_CLOSE));
/*
* All blocks being written have been written; set the object's write
@@ -1134,9 +1155,8 @@ fake: /*
* sync the file here or we could roll forward the metadata in
* recovery and open a checkpoint that isn't yet durable.
*/
- if (F_ISSET(conn, WT_CONN_CKPT_SYNC) &&
- (WT_IS_METADATA(session, dhandle) ||
- !F_ISSET(&session->txn, WT_TXN_RUNNING)))
+ if (WT_IS_METADATA(session, dhandle) ||
+ !F_ISSET(&session->txn, WT_TXN_RUNNING))
WT_ERR(__wt_checkpoint_sync(session, NULL));
WT_ERR(__wt_meta_ckptlist_set(
@@ -1161,7 +1181,6 @@ fake: /*
WT_ERR(__wt_txn_checkpoint_log(
session, false, WT_TXN_LOG_CKPT_STOP, NULL));
-done:
err: /*
* If the checkpoint didn't complete successfully, make sure the
* tree is marked dirty.
@@ -1169,29 +1188,42 @@ err: /*
if (ret != 0 && !btree->modified && was_modified)
btree->modified = 1;
- if (hot_backup_locked)
- WT_TRET(__wt_readunlock(session, conn->hot_backup_lock));
-
__wt_meta_ckptlist_free(session, ckptbase);
- __wt_free(session, name_alloc);
+ btree->ckpt = NULL;
return (ret);
}
/*
+ * __checkpoint_tree_helper --
+ * Checkpoint a tree (suitable for use in *_apply functions).
+ */
+static int
+__checkpoint_tree_helper(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ return (__checkpoint_tree(session, true, cfg));
+}
+
+/*
* __wt_checkpoint --
* Checkpoint a file.
*/
int
__wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
{
+ WT_DECL_RET;
+
/* Should not be called with a checkpoint handle. */
WT_ASSERT(session, session->dhandle->checkpoint == NULL);
- /* Should be holding the schema lock. */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ /* We must hold the metadata lock if checkpointing the metadata. */
+ WT_ASSERT(session, !WT_IS_METADATA(session, session->dhandle) ||
+ F_ISSET(session, WT_SESSION_LOCKED_METADATA));
- return (__checkpoint_worker(session, cfg, true, true));
+ WT_SAVE_DHANDLE(session,
+ ret = __checkpoint_lock_tree(session, true, true, cfg));
+ WT_RET(ret);
+ return (__checkpoint_tree(session, true, cfg));
}
/*
@@ -1210,8 +1242,9 @@ __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[])
/* Should not be called with a checkpoint handle. */
WT_ASSERT(session, session->dhandle->checkpoint == NULL);
- /* Should have an underlying block manager reference. */
- WT_ASSERT(session, bm != NULL);
+ /* Unnecessary if checkpoint_sync has been configured "off". */
+ if (!F_ISSET(S2C(session), WT_CONN_CKPT_SYNC))
+ return (0);
return (bm->sync(bm, session, false));
}
@@ -1240,7 +1273,7 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
if (F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
F_SET(session->dhandle, WT_DHANDLE_DEAD);
if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
- return (__wt_cache_op(session, NULL, WT_SYNC_DISCARD));
+ return (__wt_cache_op(session, WT_SYNC_DISCARD));
/*
* If closing an unmodified file, check that no update is required
@@ -1249,21 +1282,13 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
if (!btree->modified && !bulk) {
__wt_txn_update_oldest(session, true);
return (__wt_txn_visible_all(session, btree->rec_max_txn) ?
- __wt_cache_op(session, NULL, WT_SYNC_DISCARD) : EBUSY);
+ __wt_cache_op(session, WT_SYNC_DISCARD) : EBUSY);
}
/*
- * We should already have the schema lock unless we're finishing a bulk
- * load -- the only other paths to closing files (sweep and LSM) have
- * already checked for read-only trees.
- */
- WT_ASSERT(session,
- final || bulk || F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
-
- /*
* Turn on metadata tracking if:
* - The session is not already doing metadata tracking.
- * - The file was bulk loaded.
+ * - The file was not bulk loaded.
* - The close is not during connection close.
*/
need_tracking = !WT_META_TRACKING(session) && !bulk && !final;
@@ -1271,10 +1296,14 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
if (need_tracking)
WT_RET(__wt_meta_track_on(session));
- WT_TRET(__checkpoint_worker(session, NULL, false, need_tracking));
+ WT_SAVE_DHANDLE(session,
+ ret = __checkpoint_lock_tree(session, false, need_tracking, NULL));
+ WT_ASSERT(session, ret == 0);
+ if (ret == 0)
+ ret = __checkpoint_tree(session, false, NULL);
if (need_tracking)
- WT_RET(__wt_meta_track_off(session, true, ret != 0));
+ WT_TRET(__wt_meta_track_off(session, true, ret != 0));
return (ret);
}
diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c
index e6bd8a8d755..1ea4dba1152 100644
--- a/src/txn/txn_recover.c
+++ b/src/txn/txn_recover.c
@@ -88,11 +88,11 @@ __recovery_cursor(WT_SESSION_IMPL *session, WT_RECOVERY *r,
* Helper to a cursor if this operation is to be applied during recovery.
*/
#define GET_RECOVERY_CURSOR(session, r, lsnp, fileid, cp) \
- WT_ERR(__recovery_cursor( \
- (session), (r), (lsnp), (fileid), false, (cp))); \
- WT_ERR(__wt_verbose((session), WT_VERB_RECOVERY, \
- "%s op %d to file %d at LSN %u/%u", \
- (cursor == NULL) ? "Skipping" : "Applying", \
+ WT_ERR(__recovery_cursor(session, r, lsnp, fileid, false, cp)); \
+ WT_ERR(__wt_verbose(session, WT_VERB_RECOVERY, \
+ "%s op %" PRIu32 " to file %" PRIu32 " at LSN %" PRIu32 \
+ "/%" PRIu32, \
+ cursor == NULL ? "Skipping" : "Applying", \
optype, fileid, lsnp->l.file, lsnp->l.offset)); \
if (cursor == NULL) \
break
@@ -334,7 +334,7 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
r->files[fileid].ckpt_lsn = lsn;
WT_RET(__wt_verbose(r->session, WT_VERB_RECOVERY,
- "Recovering %s with id %u @ (%" PRIu32 ", %" PRIu32 ")",
+ "Recovering %s with id %" PRIu32 " @ (%" PRIu32 ", %" PRIu32 ")",
uri, fileid, lsn.l.file, lsn.l.offset));
return (0);
@@ -449,6 +449,18 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
*/
if (!was_backup) {
r.metadata_only = true;
+ /*
+ * If this is a read-only connection, check if the checkpoint
+ * LSN in the metadata file is up to date, indicating a clean
+ * shutdown.
+ */
+ if (F_ISSET(conn, WT_CONN_READONLY)) {
+ WT_ERR(__wt_log_needs_recovery(
+ session, &metafile->ckpt_lsn, &needs_rec));
+ if (needs_rec)
+ WT_ERR_MSG(session, WT_RUN_RECOVERY,
+ "Read-only database needs recovery");
+ }
if (WT_IS_INIT_LSN(&metafile->ckpt_lsn))
WT_ERR(__wt_log_scan(session,
NULL, WT_LOGSCAN_FIRST, __txn_log_recover, &r));
@@ -484,7 +496,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
*/
r.metadata_only = false;
WT_ERR(__wt_verbose(session, WT_VERB_RECOVERY,
- "Main recovery loop: starting at %u/%u",
+ "Main recovery loop: starting at %" PRIu32 "/%" PRIu32,
r.ckpt_lsn.l.file, r.ckpt_lsn.l.offset));
WT_ERR(__wt_log_needs_recovery(session, &r.ckpt_lsn, &needs_rec));
/*
@@ -492,8 +504,17 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
* return an error if the user does not want automatic
* recovery.
*/
- if (needs_rec && FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR))
+ if (needs_rec &&
+ (FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR) ||
+ F_ISSET(conn, WT_CONN_READONLY))) {
+ if (F_ISSET(conn, WT_CONN_READONLY))
+ WT_ERR_MSG(session, WT_RUN_RECOVERY,
+ "Read-only database needs recovery");
WT_ERR(WT_RUN_RECOVERY);
+ }
+
+ if (F_ISSET(conn, WT_CONN_READONLY))
+ goto done;
/*
* Recovery can touch more data than fits in cache, so it relies on
@@ -504,7 +525,8 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
eviction_started = true;
/*
- * Always run recovery even if it was a clean shutdown.
+ * Always run recovery even if it was a clean shutdown only if
+ * this is not a read-only connection.
* We can consider skipping it in the future.
*/
if (WT_IS_INIT_LSN(&r.ckpt_lsn))
diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c
index ca761a52d8a..aedd9168fbd 100644
--- a/src/utilities/util_dump.c
+++ b/src/utilities/util_dump.c
@@ -22,10 +22,10 @@ static int dump_prefix(WT_SESSION *, bool);
static int dump_record(WT_CURSOR *, bool, bool);
static int dump_suffix(WT_SESSION *);
static int dump_table_config(WT_SESSION *, WT_CURSOR *, const char *);
-static int dump_table_config_type(
+static int dump_table_config_complex(
WT_SESSION *, WT_CURSOR *, WT_CURSOR *, const char *, const char *);
static int dup_json_string(const char *, char **);
-static int print_config(WT_SESSION *, const char *, const char *, const char *);
+static int print_config(WT_SESSION *, const char *, char *[]);
static int usage(void);
int
@@ -150,9 +150,9 @@ dump_config(WT_SESSION *session, const char *uri, bool hex)
/* Open a metadata cursor. */
if ((ret = session->open_cursor(
- session, "metadata:create", NULL, NULL, &cursor)) != 0) {
+ session, "metadata:", NULL, NULL, &cursor)) != 0) {
fprintf(stderr, "%s: %s: session.open_cursor: %s\n", progname,
- "metadata:create", session->strerror(session, ret));
+ "metadata:", session->strerror(session, ret));
return (1);
}
/*
@@ -352,12 +352,23 @@ match: if ((ret = cursor->get_key(cursor, &key)) != 0)
static int
dump_json_table_config(WT_SESSION *session, const char *uri)
{
+ WT_CONFIG_ITEM cval;
WT_CURSOR *cursor;
WT_DECL_RET;
+ size_t len;
int tret;
- char *value;
+ const char *name, *value;
+ char *p;
+
+ p = NULL;
+
+ /* Get the table name. */
+ if ((name = strchr(uri, ':')) == NULL) {
+ fprintf(stderr, "%s: %s: corrupted uri\n", progname, uri);
+ return (1);
+ }
+ ++name;
- /* Dump the config. */
/* Open a metadata cursor. */
if ((ret = session->open_cursor(
session, "metadata:create", NULL, NULL, &cursor)) != 0) {
@@ -368,12 +379,41 @@ dump_json_table_config(WT_SESSION *session, const char *uri)
}
/*
- * Search for the object itself, to make sure it
- * exists, and get its config string. This where we
- * find out a table object doesn't exist, use a simple
- * error message.
+ * Search for the object itself, just to make sure it exists, we don't
+ * want to output a header if the user entered the wrong name. This is
+ * where we find out a table doesn't exist, use a simple error message.
+ *
+ * Workaround for WiredTiger "simple" table handling. Simple tables
+ * have column-group entries, but they aren't listed in the metadata's
+ * table entry. Figure out if it's a simple table and in that case,
+ * retrieve the column-group entry and use the value from its "source"
+ * file.
*/
- cursor->set_key(cursor, uri);
+ if (WT_PREFIX_MATCH(uri, "table:")) {
+ len = strlen("colgroup:") + strlen(name) + 1;
+ if ((p = malloc(len)) == NULL)
+ return (util_err(session, errno, NULL));
+ (void)snprintf(p, len, "colgroup:%s", name);
+ cursor->set_key(cursor, p);
+ if ((ret = cursor->search(cursor)) == 0) {
+ if ((ret = cursor->get_value(cursor, &value)) != 0)
+ return (util_cerr(cursor, "get_value", ret));
+ if ((ret = __wt_config_getones(
+ (WT_SESSION_IMPL *)session,
+ value, "source", &cval)) != 0)
+ return (util_err(
+ session, ret, "%s: source entry", p));
+ free(p);
+ len = cval.len + 10;
+ if ((p = malloc(len)) == NULL)
+ return (util_err(session, errno, NULL));
+ (void)snprintf(p, len, "%.*s", (int)cval.len, cval.str);
+ cursor->set_key(cursor, p);
+ } else
+ cursor->set_key(cursor, uri);
+ } else
+ cursor->set_key(cursor, uri);
+
if ((ret = cursor->search(cursor)) == 0) {
if ((ret = cursor->get_value(cursor, &value)) != 0)
ret = util_cerr(cursor, "get_value", ret);
@@ -381,8 +421,7 @@ dump_json_table_config(WT_SESSION *session, const char *uri)
session, cursor, uri, value) != 0)
ret = 1;
} else if (ret == WT_NOTFOUND)
- ret = util_err(
- session, 0, "%s: No such object exists", uri);
+ ret = util_err(session, 0, "%s: No such object exists", uri);
else
ret = util_err(session, ret, "%s", uri);
@@ -392,6 +431,7 @@ dump_json_table_config(WT_SESSION *session, const char *uri)
ret = tret;
}
+ free(p);
return (ret);
}
@@ -414,10 +454,17 @@ dump_json_table_end(WT_SESSION *session)
static int
dump_table_config(WT_SESSION *session, WT_CURSOR *cursor, const char *uri)
{
+ WT_CONFIG_ITEM cval;
WT_CURSOR *srch;
WT_DECL_RET;
+ size_t len;
int tret;
- const char *key, *name, *value;
+ bool complex_table;
+ const char *name, *v;
+ char *p, **cfg, *_cfg[4] = {NULL, NULL, NULL, NULL};
+
+ p = NULL;
+ cfg = &_cfg[3];
/* Get the table name. */
if ((name = strchr(uri, ':')) == NULL) {
@@ -427,59 +474,111 @@ dump_table_config(WT_SESSION *session, WT_CURSOR *cursor, const char *uri)
++name;
/*
- * Dump out the config information: first, dump the uri entry itself
- * (requires a lookup).
+ * Dump out the config information: first, dump the uri entry itself,
+ * it overrides all subsequent configurations.
*/
cursor->set_key(cursor, uri);
if ((ret = cursor->search(cursor)) != 0)
return (util_cerr(cursor, "search", ret));
- if ((ret = cursor->get_key(cursor, &key)) != 0)
- return (util_cerr(cursor, "get_key", ret));
- if ((ret = cursor->get_value(cursor, &value)) != 0)
+ if ((ret = cursor->get_value(cursor, &v)) != 0)
return (util_cerr(cursor, "get_value", ret));
- if (print_config(session, key, value, NULL) != 0)
- return (1);
+ if ((*--cfg = strdup(v)) == NULL)
+ return (util_err(session, errno, NULL));
/*
- * The underlying table configuration function needs a second cursor:
- * open one before calling it, it makes error handling hugely simpler.
+ * Workaround for WiredTiger "simple" table handling. Simple tables
+ * have column-group entries, but they aren't listed in the metadata's
+ * table entry, and the name is different from other column-groups.
+ * Figure out if it's a simple table and in that case, retrieve the
+ * column-group's configuration value and the column-group's "source"
+ * entry, where the column-group entry overrides the source's.
*/
- if ((ret =
- session->open_cursor(session, NULL, cursor, NULL, &srch)) != 0)
- return (util_cerr(cursor, "open_cursor", ret));
+ complex_table = false;
+ if (WT_PREFIX_MATCH(uri, "table:")) {
+ len = strlen("colgroup:") + strlen(name) + 1;
+ if ((p = malloc(len)) == NULL)
+ return (util_err(session, errno, NULL));
+ (void)snprintf(p, len, "colgroup:%s", name);
+ cursor->set_key(cursor, p);
+ if ((ret = cursor->search(cursor)) == 0) {
+ if ((ret = cursor->get_value(cursor, &v)) != 0)
+ return (util_cerr(cursor, "get_value", ret));
+ if ((*--cfg = strdup(v)) == NULL)
+ return (util_err(session, errno, NULL));
+ if ((ret =__wt_config_getones(
+ (WT_SESSION_IMPL *)session,
+ *cfg, "source", &cval)) != 0)
+ return (util_err(
+ session, ret, "%s: source entry", p));
+ free(p);
+ len = cval.len + 10;
+ if ((p = malloc(len)) == NULL)
+ return (util_err(session, errno, NULL));
+ (void)snprintf(p, len, "%.*s", (int)cval.len, cval.str);
+ cursor->set_key(cursor, p);
+ if ((ret = cursor->search(cursor)) != 0)
+ return (util_cerr(cursor, "search", ret));
+ if ((ret = cursor->get_value(cursor, &v)) != 0)
+ return (util_cerr(cursor, "get_value", ret));
+ if ((*--cfg = strdup(v)) == NULL)
+ return (util_err(session, errno, NULL));
+ } else
+ complex_table = true;
+ }
- if ((ret = dump_table_config_type(
- session, cursor, srch, name, "colgroup:")) == 0)
- ret = dump_table_config_type(
- session, cursor, srch, name, "index:");
+ if (print_config(session, uri, cfg) != 0)
+ return (1);
- if ((tret = srch->close(srch)) != 0) {
- tret = util_cerr(cursor, "close", tret);
- if (ret == 0)
- ret = tret;
+ if (complex_table) {
+ /*
+ * The underlying table configuration function needs a second
+ * cursor: open one before calling it, it makes error handling
+ * hugely simpler.
+ */
+ if ((ret = session->open_cursor(
+ session, "metadata:", NULL, NULL, &srch)) != 0)
+ return (util_cerr(cursor, "open_cursor", ret));
+
+ if ((ret = dump_table_config_complex(
+ session, cursor, srch, name, "colgroup:")) == 0)
+ ret = dump_table_config_complex(
+ session, cursor, srch, name, "index:");
+
+ if ((tret = srch->close(srch)) != 0) {
+ tret = util_cerr(cursor, "close", tret);
+ if (ret == 0)
+ ret = tret;
+ }
}
+ free(p);
+ free(_cfg[0]);
+ free(_cfg[1]);
+ free(_cfg[2]);
return (ret);
}
/*
- * dump_table_config_type --
+ * dump_table_config_complex --
* Dump the column groups or indices for a table.
*/
static int
-dump_table_config_type(WT_SESSION *session,
+dump_table_config_complex(WT_SESSION *session,
WT_CURSOR *cursor, WT_CURSOR *srch, const char *name, const char *entry)
{
WT_CONFIG_ITEM cval;
WT_DECL_RET;
- const char *key, *skip, *value, *value_source;
+ const char *key;
+ size_t len;
int exact;
- char *p;
+ const char *v;
+ char *p, *cfg[3] = {NULL, NULL, NULL};
/*
* Search the file looking for column group and index key/value pairs:
* for each one, look up the related source information and append it
- * to the base record.
+ * to the base record, where the column group and index configuration
+ * overrides the source configuration.
*/
cursor->set_key(cursor, entry);
if ((ret = cursor->search_near(cursor, &exact)) != 0) {
@@ -497,27 +596,32 @@ match: if ((ret = cursor->get_key(cursor, &key)) != 0)
if (!WT_PREFIX_MATCH(key, entry))
return (0);
- /* Check for a table name match. */
- skip = key + strlen(entry);
- if (strncmp(
- skip, name, strlen(name)) != 0 || skip[strlen(name)] != ':')
+ /*
+ * Check for a table name match. This test will match "simple"
+ * table column-groups as well as the more complex ones, but
+ * the previous version of the test was wrong and we're only
+ * in this function in the case of complex tables.
+ */
+ if (!WT_PREFIX_MATCH(key + strlen(entry), name))
continue;
/* Get the value. */
- if ((ret = cursor->get_value(cursor, &value)) != 0)
+ if ((ret = cursor->get_value(cursor, &v)) != 0)
return (util_cerr(cursor, "get_value", ret));
+ if ((cfg[1] = strdup(v)) == NULL)
+ return (util_err(session, errno, NULL));
/* Crack it and get the underlying source. */
if ((ret = __wt_config_getones(
- (WT_SESSION_IMPL *)session, value, "source", &cval)) != 0)
+ (WT_SESSION_IMPL *)session, cfg[1], "source", &cval)) != 0)
return (
util_err(session, ret, "%s: source entry", key));
/* Nul-terminate the source entry. */
- if ((p = malloc(cval.len + 10)) == NULL)
+ len = cval.len + 10;
+ if ((p = malloc(len)) == NULL)
return (util_err(session, errno, NULL));
- (void)strncpy(p, cval.str, cval.len);
- p[cval.len] = '\0';
+ (void)snprintf(p, len, "%.*s", (int)cval.len, cval.str);
srch->set_key(srch, p);
if ((ret = srch->search(srch)) != 0)
ret = util_err(session, ret, "%s: %s", key, p);
@@ -526,16 +630,22 @@ match: if ((ret = cursor->get_key(cursor, &key)) != 0)
return (1);
/* Get the source's value. */
- if ((ret = srch->get_value(srch, &value_source)) != 0)
+ if ((ret = srch->get_value(srch, &v)) != 0)
return (util_cerr(cursor, "get_value", ret));
+ if ((cfg[0] = strdup(v)) == NULL)
+ return (util_err(session, errno, NULL));
/*
* The dumped configuration string is the original key plus the
- * source's configuration.
+ * source's configuration, where the values of the original key
+ * override any source configurations of the same name.
*/
- if (print_config(session, key, value, value_source) != 0)
+ if (print_config(session, key, cfg) != 0)
return (util_err(session, EIO, NULL));
}
+ free(cfg[0]);
+ free(cfg[1]);
+
if (ret == 0 || ret == WT_NOTFOUND)
return (0);
return (util_cerr(cursor, "next", ret));
@@ -649,27 +759,21 @@ dup_json_string(const char *str, char **result)
* Output a key/value URI pair by combining v1 and v2.
*/
static int
-print_config(WT_SESSION *session,
- const char *key, const char *v1, const char *v2)
+print_config(WT_SESSION *session, const char *key, char *cfg[])
{
WT_DECL_RET;
char *value_ret;
- const char *cfg[] = { v1, v2, NULL };
/*
- * The underlying call will stop if the first string is NULL -- check
- * here and swap in that case.
+ * We have all of the object configuration, but don't have the default
+ * session.create configuration. Have the underlying library add in the
+ * defaults and collapse it all into one load configuration string.
*/
- if (cfg[0] == NULL) {
- cfg[0] = cfg[1];
- cfg[1] = NULL;
- }
-
- if ((ret = __wt_config_collapse(
+ if ((ret = __wt_schema_create_final(
(WT_SESSION_IMPL *)session, cfg, &value_ret)) != 0)
return (util_err(session, ret, NULL));
ret = printf("%s\n%s\n", key, value_ret);
- free((char *)value_ret);
+ free(value_ret);
if (ret < 0)
return (util_err(session, EIO, NULL));
return (0);
diff --git a/test/bloom/test_bloom.c b/test/bloom/test_bloom.c
index 04fc8d1c371..f95bc7faaf9 100644
--- a/test/bloom/test_bloom.c
+++ b/test/bloom/test_bloom.c
@@ -55,6 +55,8 @@ void usage(void);
extern char *__wt_optarg;
extern int __wt_optind;
+void (*custom_die)(void) = NULL;
+
int
main(int argc, char *argv[])
{
@@ -129,11 +131,9 @@ setup(void)
"create,error_prefix=\"%s\",cache_size=%" PRIu32 "MB,%s",
g.progname, g.c_cache, g.config_open == NULL ? "" : g.config_open);
- if ((ret = wiredtiger_open(NULL, NULL, config, &conn)) != 0)
- testutil_die(ret, "wiredtiger_open");
+ testutil_check(wiredtiger_open(NULL, NULL, config, &conn));
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- testutil_die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
g.wt_conn = conn;
g.wt_session = session;
@@ -153,39 +153,35 @@ run(void)
/* Use the internal session handle to access private APIs. */
sess = (WT_SESSION_IMPL *)g.wt_session;
- if ((ret = __wt_bloom_create(
- sess, uri, NULL, g.c_ops, g.c_factor, g.c_k, &bloomp)) != 0)
- testutil_die(ret, "__wt_bloom_create");
+ testutil_check(__wt_bloom_create(
+ sess, uri, NULL, g.c_ops, g.c_factor, g.c_k, &bloomp));
item.size = g.c_key_max;
for (i = 0; i < g.c_ops; i++) {
item.data = g.entries[i];
if ((ret = __wt_bloom_insert(bloomp, &item)) != 0)
- testutil_die(ret, "__wt_bloom_insert: %d", i);
+ testutil_die(ret, "__wt_bloom_insert: %" PRIu32, i);
}
- if ((ret = __wt_bloom_finalize(bloomp)) != 0)
- testutil_die(ret, "__wt_bloom_finalize");
+ testutil_check(__wt_bloom_finalize(bloomp));
for (i = 0; i < g.c_ops; i++) {
item.data = g.entries[i];
if ((ret = __wt_bloom_get(bloomp, &item)) != 0) {
- fprintf(stderr, "get failed at record: %d\n", i);
+ fprintf(stderr,
+ "get failed at record: %" PRIu32 "\n", i);
testutil_die(ret, "__wt_bloom_get");
}
}
- if ((ret = __wt_bloom_close(bloomp)) != 0)
- testutil_die(ret, "__wt_bloom_close");
-
- if ((ret = g.wt_session->checkpoint(g.wt_session, NULL)) != 0)
- testutil_die(ret, "WT_SESSION.checkpoint");
- if ((ret = __wt_bloom_open(
- sess, uri, g.c_factor, g.c_k, NULL, &bloomp)) != 0)
- testutil_die(ret, "__wt_bloom_open");
+ testutil_check(__wt_bloom_close(bloomp));
+
+ testutil_check(g.wt_session->checkpoint(g.wt_session, NULL));
+ testutil_check(__wt_bloom_open(
+ sess, uri, g.c_factor, g.c_k, NULL, &bloomp));
+
for (i = 0; i < g.c_ops; i++) {
item.data = g.entries[i];
- if ((ret = __wt_bloom_get(bloomp, &item)) != 0)
- testutil_die(ret, "__wt_bloom_get");
+ testutil_check(__wt_bloom_get(bloomp, &item));
}
/*
@@ -194,33 +190,34 @@ run(void)
*/
item.size = g.c_key_max + 10;
item.data = calloc(item.size, 1);
+ if (item.data == NULL)
+ testutil_die(ENOMEM, "value buffer malloc");
memset((void *)item.data, 'a', item.size);
for (i = 0, fp = 0; i < g.c_ops; i++) {
((uint8_t *)item.data)[i % item.size] =
'a' + ((uint8_t)rand() % 26);
if ((ret = __wt_bloom_get(bloomp, &item)) == 0)
++fp;
+ if (ret != 0 && ret != WT_NOTFOUND)
+ testutil_die(ret, "__wt_bloom_get");
}
free((void *)item.data);
- printf("Out of %d ops, got %d false positives, %.4f%%\n",
+ printf(
+ "Out of %" PRIu32 " ops, got %" PRIu32 " false positives, %.4f%%\n",
g.c_ops, fp, 100.0 * fp/g.c_ops);
- if ((ret = __wt_bloom_drop(bloomp, NULL)) != 0)
- testutil_die(ret, "__wt_bloom_drop");
+ testutil_check(__wt_bloom_drop(bloomp, NULL));
}
void
cleanup(void)
{
uint32_t i;
- int ret;
for (i = 0; i < g.c_ops; i++)
free(g.entries[i]);
free(g.entries);
- if ((ret = g.wt_session->close(g.wt_session, NULL)) != 0)
- testutil_die(ret, "WT_SESSION.close");
- if ((g.wt_conn->close(g.wt_conn, NULL)) != 0)
- testutil_die(ret, "WT_CONNECTION.close");
+ testutil_check(g.wt_session->close(g.wt_session, NULL));
+ testutil_check(g.wt_conn->close(g.wt_conn, NULL));
}
/*
diff --git a/test/checkpoint/test_checkpoint.c b/test/checkpoint/test_checkpoint.c
index 1914ad0188a..c5524b3c63e 100644
--- a/test/checkpoint/test_checkpoint.c
+++ b/test/checkpoint/test_checkpoint.c
@@ -41,6 +41,8 @@ static int wt_shutdown(void);
extern int __wt_optind;
extern char *__wt_optarg;
+void (*custom_die)(void) = NULL;
+
int
main(int argc, char *argv[])
{
@@ -134,7 +136,7 @@ main(int argc, char *argv[])
printf("%s: process %" PRIu64 "\n", g.progname, (uint64_t)getpid());
for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) {
- printf(" %d: %u workers, %u tables\n",
+ printf(" %d: %d workers, %d tables\n",
cnt, g.nworkers, g.ntables);
(void)cleanup(); /* Clean up previous runs */
diff --git a/test/cursor_order/Makefile.am b/test/cursor_order/Makefile.am
new file mode 100644
index 00000000000..c0c0ed639bf
--- /dev/null
+++ b/test/cursor_order/Makefile.am
@@ -0,0 +1,13 @@
+AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/test/utility
+
+noinst_PROGRAMS = cursor_order
+cursor_order_LDADD = $(top_builddir)/libwiredtiger.la
+
+cursor_order_SOURCES = cursor_order_file.c cursor_order_ops.c cursor_order.c
+cursor_order_LDFLAGS = -static
+
+TESTS = $(noinst_PROGRAMS)
+
+clean-local:
+ rm -rf WiredTiger* wt.* *.core __stats
diff --git a/test/cursor_order/cursor_order.c b/test/cursor_order/cursor_order.c
new file mode 100644
index 00000000000..d8cfc0c1421
--- /dev/null
+++ b/test/cursor_order/cursor_order.c
@@ -0,0 +1,307 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "cursor_order.h"
+
+static char home[512]; /* Program working dir */
+static char *progname; /* Program name */
+static FILE *logfp; /* Log file */
+
+static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
+static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
+static void onint(int);
+static void shutdown(void);
+static int usage(void);
+static void wt_connect(SHARED_CONFIG *, char *);
+static void wt_shutdown(SHARED_CONFIG *);
+
+extern int __wt_optind;
+extern char *__wt_optarg;
+
+void (*custom_die)(void) = NULL;
+
+int
+main(int argc, char *argv[])
+{
+ SHARED_CONFIG _cfg, *cfg;
+ int ch, cnt, runs;
+ char *config_open, *working_dir;
+
+ if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL)
+ progname = argv[0];
+ else
+ ++progname;
+
+ cfg = &_cfg;
+ config_open = NULL;
+ working_dir = NULL;
+ runs = 1;
+
+ /*
+ * Explicitly initialize the shared configuration object before
+ * parsing command line options.
+ */
+ cfg->append_inserters = 1;
+ cfg->conn = NULL;
+ cfg->ftype = ROW;
+ cfg->max_nops = 1000000;
+ cfg->multiple_files = false;
+ cfg->nkeys = 1000;
+ cfg->reverse_scanners = 5;
+ cfg->reverse_scan_ops = 10;
+ cfg->thread_finish = false;
+ cfg->vary_nops = false;
+
+ while ((ch = __wt_getopt(
+ progname, argc, argv, "C:Fk:h:l:n:R:r:t:vw:W:")) != EOF)
+ switch (ch) {
+ case 'C': /* wiredtiger_open config */
+ config_open = __wt_optarg;
+ break;
+ case 'F': /* multiple files */
+ cfg->multiple_files = true;
+ break;
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ case 'k': /* rows */
+ cfg->nkeys = (uint64_t)atol(__wt_optarg);
+ break;
+ case 'l': /* log */
+ if ((logfp = fopen(__wt_optarg, "w")) == NULL) {
+ fprintf(stderr,
+ "%s: %s\n", __wt_optarg, strerror(errno));
+ return (EXIT_FAILURE);
+ }
+ break;
+ case 'n': /* operations */
+ cfg->max_nops = (uint64_t)atol(__wt_optarg);
+ break;
+ case 'R':
+ cfg->reverse_scanners = (uint64_t)atol(__wt_optarg);
+ break;
+ case 'r': /* runs */
+ runs = atoi(__wt_optarg);
+ break;
+ case 't':
+ switch (__wt_optarg[0]) {
+ case 'f':
+ cfg->ftype = FIX;
+ break;
+ case 'r':
+ cfg->ftype = ROW;
+ break;
+ case 'v':
+ cfg->ftype = VAR;
+ break;
+ default:
+ return (usage());
+ }
+ break;
+ case 'v': /* vary operation count */
+ cfg->vary_nops = true;
+ break;
+ case 'w':
+ cfg->reverse_scan_ops = (uint64_t)atol(__wt_optarg);
+ break;
+ case 'W':
+ cfg->append_inserters = (uint64_t)atol(__wt_optarg);
+ break;
+ default:
+ return (usage());
+ }
+
+ argc -= __wt_optind;
+ argv += __wt_optind;
+ if (argc != 0)
+ return (usage());
+
+ testutil_work_dir_from_path(home, 512, working_dir);
+
+ if (cfg->vary_nops && !cfg->multiple_files) {
+ fprintf(stderr,
+ "Variable op counts only supported with multiple tables\n");
+ return (usage());
+ }
+
+ /* Clean up on signal. */
+ (void)signal(SIGINT, onint);
+
+ printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid());
+ for (cnt = 1; runs == 0 || cnt <= runs; ++cnt) {
+ printf(
+ " %d: %" PRIu64
+ " reverse scanners, %" PRIu64 " writers\n",
+ cnt, cfg->reverse_scanners, cfg->append_inserters);
+
+ shutdown(); /* Clean up previous runs */
+
+ wt_connect(cfg, config_open); /* WiredTiger connection */
+
+ if (ops_start(cfg))
+ return (EXIT_FAILURE);
+
+ wt_shutdown(cfg); /* WiredTiger shut down */
+ }
+ return (0);
+}
+
+/*
+ * wt_connect --
+ * Configure the WiredTiger connection.
+ */
+static void
+wt_connect(SHARED_CONFIG *cfg, char *config_open)
+{
+ static WT_EVENT_HANDLER event_handler = {
+ handle_error,
+ handle_message,
+ NULL,
+ NULL /* Close handler. */
+ };
+ int ret;
+ char config[512];
+ size_t print_count;
+
+ testutil_clean_work_dir(home);
+ testutil_make_work_dir(home);
+
+ print_count = (size_t)snprintf(config, sizeof(config),
+ "create,statistics=(all),error_prefix=\"%s\",%s%s",
+ progname,
+ config_open == NULL ? "" : ",",
+ config_open == NULL ? "" : config_open);
+
+ if (print_count >= sizeof(config))
+ testutil_die(EINVAL, "Config string too long");
+
+ if ((ret = wiredtiger_open(
+ home, &event_handler, config, &cfg->conn)) != 0)
+ testutil_die(ret, "wiredtiger_open");
+}
+
+/*
+ * wt_shutdown --
+ * Flush the file to disk and shut down the WiredTiger connection.
+ */
+static void
+wt_shutdown(SHARED_CONFIG *cfg)
+{
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ int ret;
+
+ conn = cfg->conn;
+
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "conn.session");
+
+ if ((ret = session->checkpoint(session, NULL)) != 0)
+ testutil_die(ret, "session.checkpoint");
+
+ if ((ret = conn->close(conn, NULL)) != 0)
+ testutil_die(ret, "conn.close");
+}
+
+/*
+ * shutdown --
+ * Clean up from previous runs.
+ */
+static void
+shutdown(void)
+{
+ testutil_clean_work_dir(home);
+}
+
+static int
+handle_error(WT_EVENT_HANDLER *handler,
+ WT_SESSION *session, int error, const char *errmsg)
+{
+ (void)(handler);
+ (void)(session);
+ (void)(error);
+
+ return (fprintf(stderr, "%s\n", errmsg) < 0 ? -1 : 0);
+}
+
+static int
+handle_message(WT_EVENT_HANDLER *handler,
+ WT_SESSION *session, const char *message)
+{
+ (void)(handler);
+ (void)(session);
+
+ if (logfp != NULL)
+ return (fprintf(logfp, "%s\n", message) < 0 ? -1 : 0);
+
+ return (printf("%s\n", message) < 0 ? -1 : 0);
+}
+
+/*
+ * onint --
+ * Interrupt signal handler.
+ */
+static void
+onint(int signo)
+{
+ (void)(signo);
+
+ shutdown();
+
+ fprintf(stderr, "\n");
+ exit(EXIT_FAILURE);
+}
+
+/*
+ * usage --
+ * Display usage statement and exit failure.
+ */
+static int
+usage(void)
+{
+ fprintf(stderr,
+ "usage: %s "
+ "[-FLv] [-C wiredtiger-config] [-k keys] [-l log]\n\t"
+ "[-n ops] [-R reverse_scanners] [-r runs] [-t f|r|v] "
+ "[-W append_inserters]\n",
+ progname);
+ fprintf(stderr, "%s",
+ "\t-C specify wiredtiger_open configuration arguments\n"
+ "\t-F create a file per thread\n"
+ "\t-k set number of keys to load\n"
+ "\t-L log print per operation\n"
+ "\t-l specify a log file\n"
+ "\t-n set number of operations each thread does\n"
+ "\t-R set number of reverse scanner threads\n"
+ "\t-r set number of runs (0 for continuous)\n"
+ "\t-t set a file type (fix | row | var)\n"
+ "\t-v do a different number of operations on different tables\n"
+ "\t-w set number of items to walk in a reverse scan\n"
+ "\t-W set number of threads doing append inserts\n");
+ return (EXIT_FAILURE);
+}
diff --git a/test/cursor_order/cursor_order.h b/test/cursor_order/cursor_order.h
new file mode 100644
index 00000000000..dd49fce124b
--- /dev/null
+++ b/test/cursor_order/cursor_order.h
@@ -0,0 +1,54 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <signal.h>
+
+#include "test_util.i"
+
+#define FNAME "file:cursor_order.%03d" /* File name */
+
+typedef enum { FIX, ROW, VAR } __ftype; /* File type */
+
+typedef struct {
+ uint64_t append_inserters; /* Number of append threads */
+ WT_CONNECTION *conn; /* WiredTiger connection */
+ __ftype ftype;
+ uint64_t key_range; /* Current key range */
+ uint64_t max_nops; /* Operations per thread */
+ bool multiple_files; /* File per thread */
+ uint64_t nkeys; /* Keys to load */
+ uint64_t reverse_scanners; /* Number of scan threads */
+ uint64_t reverse_scan_ops; /* Keys to visit per scan */
+ bool thread_finish; /* Signal to finish run. */
+ bool vary_nops; /* Operations per thread */
+
+} SHARED_CONFIG;
+
+void load(SHARED_CONFIG *, const char *);
+int ops_start(SHARED_CONFIG *);
+void verify(SHARED_CONFIG *, const char *);
diff --git a/test/cursor_order/cursor_order_file.c b/test/cursor_order/cursor_order_file.c
new file mode 100644
index 00000000000..5dc7194b5fb
--- /dev/null
+++ b/test/cursor_order/cursor_order_file.c
@@ -0,0 +1,132 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "cursor_order.h"
+
+static void
+file_create(SHARED_CONFIG *cfg, const char *name)
+{
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ int ret;
+ char *p, *end, config[128];
+
+ conn = cfg->conn;
+
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "conn.session");
+
+ p = config;
+ end = config + sizeof(config);
+ p += snprintf(p, (size_t)(end - p),
+ "key_format=%s,"
+ "internal_page_max=%d,"
+ "split_deepen_min_child=200,"
+ "leaf_page_max=%d,",
+ cfg->ftype == ROW ? "S" : "r", 16 * 1024, 128 * 1024);
+ if (cfg->ftype == FIX)
+ (void)snprintf(p, (size_t)(end - p), ",value_format=3t");
+
+ if ((ret = session->create(session, name, config)) != 0)
+ if (ret != EEXIST)
+ testutil_die(ret, "session.create");
+
+ if ((ret = session->close(session, NULL)) != 0)
+ testutil_die(ret, "session.close");
+}
+
+void
+load(SHARED_CONFIG *cfg, const char *name)
+{
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_ITEM *value, _value;
+ WT_SESSION *session;
+ char keybuf[64], valuebuf[64];
+ int64_t keyno;
+ int ret;
+
+ conn = cfg->conn;
+
+ file_create(cfg, name);
+
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "conn.session");
+
+ if ((ret =
+ session->open_cursor(session, name, NULL, "bulk", &cursor)) != 0)
+ testutil_die(ret, "cursor.open");
+
+ value = &_value;
+ for (keyno = 1; keyno <= (int64_t)cfg->nkeys; ++keyno) {
+ if (cfg->ftype == ROW) {
+ snprintf(keybuf, sizeof(keybuf), "%016u", (u_int)keyno);
+ cursor->set_key(cursor, keybuf);
+ } else
+ cursor->set_key(cursor, (uint32_t)keyno);
+ value->data = valuebuf;
+ if (cfg->ftype == FIX)
+ cursor->set_value(cursor, 0x01);
+ else {
+ value->size = (uint32_t)snprintf(
+ valuebuf, sizeof(valuebuf), "%37u", (u_int)keyno);
+ cursor->set_value(cursor, value);
+ }
+ if ((ret = cursor->insert(cursor)) != 0)
+ testutil_die(ret, "cursor.insert");
+ }
+
+ /* Setup the starting key range for the workload phase. */
+ cfg->key_range = cfg->nkeys;
+ if ((ret = cursor->close(cursor)) != 0)
+ testutil_die(ret, "cursor.close");
+ if ((ret = session->checkpoint(session, NULL)) != 0)
+ testutil_die(ret, "session.checkpoint");
+
+ if ((ret = session->close(session, NULL)) != 0)
+ testutil_die(ret, "session.close");
+}
+
+void
+verify(SHARED_CONFIG *cfg, const char *name)
+{
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ int ret;
+
+ conn = cfg->conn;
+
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "conn.session");
+
+ if ((ret = session->verify(session, name, NULL)) != 0)
+ testutil_die(ret, "session.create");
+
+ if ((ret = session->close(session, NULL)) != 0)
+ testutil_die(ret, "session.close");
+}
diff --git a/test/cursor_order/cursor_order_ops.c b/test/cursor_order/cursor_order_ops.c
new file mode 100644
index 00000000000..d44505ab2f3
--- /dev/null
+++ b/test/cursor_order/cursor_order_ops.c
@@ -0,0 +1,370 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "cursor_order.h"
+
+static void *append_insert(void *);
+static void print_stats(SHARED_CONFIG *);
+static void *reverse_scan(void *);
+
+typedef struct {
+ char *name; /* object name */
+ uint64_t nops; /* Thread op count */
+
+ WT_RAND_STATE rnd; /* RNG */
+
+ int append_insert; /* cursor.insert */
+ int reverse_scans; /* cursor.prev sequences */
+ SHARED_CONFIG *cfg;
+} INFO;
+
+static INFO *run_info;
+
+int
+ops_start(SHARED_CONFIG *cfg)
+{
+ struct timeval start, stop;
+ double seconds;
+ pthread_t *tids;
+ uint64_t i, name_index, offset, total_nops;
+ int ret;
+ void *thread_ret;
+
+ tids = NULL; /* Keep GCC 4.1 happy. */
+ total_nops = 0;
+
+ /* Create per-thread structures. */
+ if ((run_info = calloc(
+ (size_t)(cfg->reverse_scanners + cfg->append_inserters),
+ sizeof(*run_info))) == NULL)
+ testutil_die(errno, "calloc");
+
+ if ((tids = calloc(
+ (size_t)(cfg->reverse_scanners + cfg->append_inserters),
+ sizeof(*tids))) == NULL)
+ testutil_die(errno, "calloc");
+
+ /* Create the files and load the initial records. */
+ for (i = 0; i < cfg->append_inserters; ++i) {
+ run_info[i].cfg = cfg;
+ if (i == 0 || cfg->multiple_files) {
+ if ((run_info[i].name = malloc(64)) == NULL)
+ testutil_die(errno, "malloc");
+ snprintf(run_info[i].name, 64, FNAME, (int)i);
+
+ /* Vary by orders of magnitude */
+ if (cfg->vary_nops)
+ run_info[i].nops =
+ WT_MAX(1000, cfg->max_nops >> i);
+ load(cfg, run_info[i].name);
+ } else
+ run_info[i].name = run_info[0].name;
+
+ /* Setup op count if not varying ops. */
+ if (run_info[i].nops == 0)
+ run_info[i].nops = cfg->max_nops;
+ total_nops += run_info[i].nops;
+ }
+
+ /* Setup the reverse scanner configurations */
+ for (i = 0; i < cfg->reverse_scanners; ++i) {
+ offset = i + cfg->append_inserters;
+ run_info[offset].cfg = cfg;
+ if (cfg->multiple_files) {
+ if ((run_info[offset].name = malloc(64)) == NULL)
+ testutil_die(errno, "malloc");
+ /* Have reverse scans read from tables with writes. */
+ name_index = i % cfg->append_inserters;
+ snprintf(
+ run_info[offset].name, 64, FNAME, (int)name_index);
+
+ /* Vary by orders of magnitude */
+ if (cfg->vary_nops)
+ run_info[offset].nops =
+ WT_MAX(1000, cfg->max_nops >> name_index);
+ } else
+ run_info[offset].name = run_info[0].name;
+
+ /* Setup op count if not varying ops. */
+ if (run_info[offset].nops == 0)
+ run_info[offset].nops = cfg->max_nops;
+ total_nops += run_info[offset].nops;
+ }
+
+ (void)gettimeofday(&start, NULL);
+
+ /* Create threads. */
+ for (i = 0; i < cfg->reverse_scanners; ++i)
+ if ((ret = pthread_create(
+ &tids[i], NULL, reverse_scan, (void *)(uintptr_t)i)) != 0)
+ testutil_die(ret, "pthread_create");
+ for (; i < cfg->reverse_scanners + cfg->append_inserters; ++i) {
+ if ((ret = pthread_create(
+ &tids[i], NULL, append_insert, (void *)(uintptr_t)i)) != 0)
+ testutil_die(ret, "pthread_create");
+ }
+
+ /* Wait for the threads. */
+ for (i = 0; i < cfg->reverse_scanners + cfg->append_inserters; ++i)
+ (void)pthread_join(tids[i], &thread_ret);
+
+ (void)gettimeofday(&stop, NULL);
+ seconds = (stop.tv_sec - start.tv_sec) +
+ (stop.tv_usec - start.tv_usec) * 1e-6;
+ fprintf(stderr, "timer: %.2lf seconds (%d ops/second)\n",
+ seconds, (int)(((cfg->reverse_scanners + cfg->append_inserters) *
+ total_nops) / seconds));
+
+ /* Verify the files. */
+ for (i = 0; i < cfg->reverse_scanners + cfg->append_inserters; ++i) {
+ verify(cfg, run_info[i].name);
+ if (!cfg->multiple_files)
+ break;
+ }
+
+ /* Output run statistics. */
+ print_stats(cfg);
+
+ /* Free allocated memory. */
+ for (i = 0; i < cfg->reverse_scanners + cfg->append_inserters; ++i) {
+ free(run_info[i].name);
+ if (!cfg->multiple_files)
+ break;
+ }
+
+ free(run_info);
+ free(tids);
+
+ return (0);
+}
+
+/*
+ * reverse_scan_op --
+ * Walk a cursor back from the end of the file.
+ */
+static inline void
+reverse_scan_op(
+ SHARED_CONFIG *cfg, WT_SESSION *session, WT_CURSOR *cursor, INFO *s)
+{
+ uint64_t i, initial_key_range, prev_key, this_key;
+ int ret;
+ char *strkey;
+
+ WT_UNUSED(session);
+ WT_UNUSED(s);
+
+ /* Make GCC 4.1 happy */
+ prev_key = this_key = 0;
+
+ /* Reset the cursor */
+ if ((ret = cursor->reset(cursor)) != 0)
+ testutil_die(ret, "cursor.reset");
+
+ /* Save the key range. */
+ initial_key_range = cfg->key_range - cfg->append_inserters;
+
+ for (i = 0; i < cfg->reverse_scan_ops; i++) {
+ if ((ret = cursor->prev(cursor)) != 0) {
+ if (ret == WT_NOTFOUND)
+ break;
+ testutil_die(ret, "cursor.prev");
+ }
+
+ if (cfg->ftype == ROW) {
+ if ((ret = cursor->get_key(cursor, &strkey)) != 0)
+ testutil_die(ret, "cursor.get_key");
+ this_key = (uint64_t)atol(strkey);
+ } else
+ if ((ret = cursor->get_key(
+ cursor, (uint64_t *)&this_key)) != 0)
+ testutil_die(ret, "cursor.get_key");
+
+ if (i == 0 && this_key < initial_key_range)
+ testutil_die(ret,
+ "cursor scan start range wrong first prev %" PRIu64
+ " initial range: %" PRIu64,
+ this_key, initial_key_range);
+ if (i != 0 && this_key >= prev_key)
+ testutil_die(ret,
+ "cursor scan out of order this: %" PRIu64
+ " prev: %" PRIu64,
+ this_key, prev_key);
+ prev_key = this_key;
+ }
+}
+
+/*
+ * reverse_scan --
+ * Reader thread start function.
+ */
+static void *
+reverse_scan(void *arg)
+{
+ INFO *s;
+ SHARED_CONFIG *cfg;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uintmax_t id;
+ uint64_t i;
+ int ret;
+ char tid[128];
+
+ id = (uintmax_t)arg;
+ s = &run_info[id];
+ cfg = s->cfg;
+ __wt_thread_id(tid, sizeof(tid));
+ __wt_random_init(&s->rnd);
+
+ printf(" reverse scan thread %2" PRIuMAX
+ " starting: tid: %s, file: %s\n",
+ id, tid, s->name);
+
+ __wt_yield(); /* Get all the threads created. */
+
+ if ((ret = cfg->conn->open_session(
+ cfg->conn, NULL, "isolation=snapshot", &session)) != 0)
+ testutil_die(ret, "conn.open_session");
+ if ((ret = session->open_cursor(
+ session, s->name, NULL, NULL, &cursor)) != 0)
+ testutil_die(ret, "session.open_cursor");
+ for (i = 0; i < s->nops && !cfg->thread_finish;
+ ++i, ++s->reverse_scans, __wt_yield())
+ reverse_scan_op(cfg, session, cursor, s);
+ if ((ret = session->close(session, NULL)) != 0)
+ testutil_die(ret, "session.close");
+
+ printf(" reverse scan thread %2" PRIuMAX
+ " stopping: tid: %s, file: %s\n",
+ id, tid, s->name);
+
+ /* Notify all other threads to finish once the first thread is done */
+ cfg->thread_finish = true;
+
+ return (NULL);
+}
+
+/*
+ * append_insert_op --
+ * Write operation.
+ */
+static inline void
+append_insert_op(
+ SHARED_CONFIG *cfg, WT_SESSION *session, WT_CURSOR *cursor, INFO *s)
+{
+ WT_ITEM *value, _value;
+ uint64_t keyno;
+ int ret;
+ char keybuf[64], valuebuf[64];
+
+ WT_UNUSED(session);
+
+ value = &_value;
+
+ keyno = __wt_atomic_add64(&cfg->key_range, 1);
+ if (cfg->ftype == ROW) {
+ snprintf(keybuf, sizeof(keybuf), "%016u", (u_int)keyno);
+ cursor->set_key(cursor, keybuf);
+ } else
+ cursor->set_key(cursor, (uint32_t)keyno);
+
+ ++s->append_insert;
+ value->data = valuebuf;
+ if (cfg->ftype == FIX)
+ cursor->set_value(cursor, 0x10);
+ else {
+ value->size = (uint32_t)snprintf(
+ valuebuf, sizeof(valuebuf), "XXX %37u", (u_int)keyno);
+ cursor->set_value(cursor, value);
+ }
+ if ((ret = cursor->insert(cursor)) != 0)
+ testutil_die(ret, "cursor.insert");
+}
+
+/*
+ * append_insert --
+ * Writer thread start function.
+ */
+static void *
+append_insert(void *arg)
+{
+ INFO *s;
+ SHARED_CONFIG *cfg;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uintmax_t id;
+ uint64_t i;
+ int ret;
+ char tid[128];
+
+ id = (uintmax_t)arg;
+ s = &run_info[id];
+ cfg = s->cfg;
+ __wt_thread_id(tid, sizeof(tid));
+ __wt_random_init(&s->rnd);
+
+ printf("write thread %2" PRIuMAX " starting: tid: %s, file: %s\n",
+ id, tid, s->name);
+
+ __wt_yield(); /* Get all the threads created. */
+
+ if ((ret = cfg->conn->open_session(
+ cfg->conn, NULL, "isolation=snapshot", &session)) != 0)
+ testutil_die(ret, "conn.open_session");
+ if ((ret = session->open_cursor(
+ session, s->name, NULL, NULL, &cursor)) != 0)
+ testutil_die(ret, "session.open_cursor");
+ for (i = 0; i < s->nops && !cfg->thread_finish; ++i, __wt_yield())
+ append_insert_op(cfg, session, cursor, s);
+ if ((ret = session->close(session, NULL)) != 0)
+ testutil_die(ret, "session.close");
+
+ printf("write thread %2" PRIuMAX " stopping: tid: %s, file: %s\n",
+ id, tid, s->name);
+
+ /* Notify all other threads to finish once the first thread is done */
+ cfg->thread_finish = true;
+
+ return (NULL);
+}
+
+/*
+ * print_stats --
+ * Display reverse scan/writer thread stats.
+ */
+static void
+print_stats(SHARED_CONFIG *cfg)
+{
+ INFO *s;
+ uint64_t id, total_threads;
+
+ total_threads = cfg->reverse_scanners + cfg->append_inserters;
+ s = run_info;
+ for (id = 0; id < total_threads; ++id, ++s)
+ printf("%3d: reverse scans %6d, append inserts %6d\n",
+ (int)id, (int)s->reverse_scans, (int)s->append_insert);
+}
diff --git a/test/fops/file.c b/test/fops/file.c
index 4cd92e7b590..ea15f1ee80d 100644
--- a/test/fops/file.c
+++ b/test/fops/file.c
@@ -147,7 +147,7 @@ obj_create_unique(int force)
/* Generate a unique object name. */
if ((ret = pthread_rwlock_wrlock(&single)) != 0)
testutil_die(ret, "pthread_rwlock_wrlock single");
- (void)snprintf(new_uri, sizeof(new_uri), "%s.%d", uri, ++uid);
+ (void)snprintf(new_uri, sizeof(new_uri), "%s.%u", uri, ++uid);
if ((ret = pthread_rwlock_unlock(&single)) != 0)
testutil_die(ret, "pthread_rwlock_unlock single");
diff --git a/test/fops/fops.c b/test/fops/fops.c
index fbc9d9c6048..3333ff16858 100644
--- a/test/fops/fops.c
+++ b/test/fops/fops.c
@@ -109,7 +109,7 @@ fop(void *arg)
__wt_random_init(&rnd);
for (i = 0; i < nops; ++i, __wt_yield())
- switch (__wt_random(&rnd) % 9) {
+ switch (__wt_random(&rnd) % 10) {
case 0:
++s->bulk;
obj_bulk();
diff --git a/test/fops/t.c b/test/fops/t.c
index 0881c23d7d4..24994404c7c 100644
--- a/test/fops/t.c
+++ b/test/fops/t.c
@@ -50,6 +50,8 @@ static void wt_shutdown(void);
extern int __wt_optind;
extern char *__wt_optarg;
+void (*custom_die)(void) = NULL;
+
int
main(int argc, char *argv[])
{
diff --git a/test/format/backup.c b/test/format/backup.c
index 748494bf841..2b1463bd0e3 100644
--- a/test/format/backup.c
+++ b/test/format/backup.c
@@ -37,20 +37,18 @@ check_copy(void)
{
WT_CONNECTION *conn;
WT_SESSION *session;
- int ret;
wts_open(g.home_backup, 0, &conn);
- if ((ret = conn->open_session(
- conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session: %s", g.home_backup);
+ testutil_checkfmt(
+ conn->open_session(conn, NULL, NULL, &session),
+ "%s", g.home_backup);
- ret = session->verify(session, g.uri, NULL);
- if (ret != 0)
- die(ret, "session.verify: %s: %s", g.home_backup, g.uri);
+ testutil_checkfmt(
+ session->verify(session, g.uri, NULL),
+ "%s: %s", g.home_backup, g.uri);
- if ((ret = conn->close(conn, NULL)) != 0)
- die(ret, "connection.close: %s", g.home_backup);
+ testutil_checkfmt(conn->close(conn, NULL), "%s", g.home_backup);
}
/*
@@ -62,14 +60,19 @@ copy_file(const char *name)
{
size_t len;
char *cmd;
- int ret;
len = strlen(g.home) + strlen(g.home_backup) + strlen(name) * 2 + 20;
cmd = dmalloc(len);
(void)snprintf(cmd, len,
"cp %s/%s %s/%s", g.home, name, g.home_backup, name);
- if ((ret = system(cmd)) != 0)
- die(ret, "backup copy: %s", cmd);
+ testutil_checkfmt(system(cmd), "backup copy: %s", cmd);
+ free(cmd);
+
+ len = strlen(g.home) + strlen(g.home_backup2) + strlen(name) * 2 + 20;
+ cmd = dmalloc(len);
+ (void)snprintf(cmd, len,
+ "cp %s/%s %s/%s", g.home, name, g.home_backup2, name);
+ testutil_checkfmt(system(cmd), "backup copy: %s", cmd);
free(cmd);
}
@@ -96,8 +99,7 @@ backup(void *arg)
return (NULL);
/* Open a session. */
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
/*
* Perform a backup at somewhere under 10 seconds (so we get at
@@ -113,12 +115,12 @@ backup(void *arg)
break;
/* Lock out named checkpoints */
- if ((ret = pthread_rwlock_wrlock(&g.backup_lock)) != 0)
- die(ret, "pthread_rwlock_wrlock: backup lock");
+ testutil_check(pthread_rwlock_wrlock(&g.backup_lock));
/* Re-create the backup directory. */
- if ((ret = system(g.home_backup_init)) != 0)
- die(ret, "backup directory creation failed");
+ testutil_checkfmt(
+ system(g.home_backup_init),
+ "%s", "backup directory creation failed");
/*
* open_cursor can return EBUSY if a metadata operation is
@@ -128,26 +130,21 @@ backup(void *arg)
"backup:", NULL, NULL, &backup_cursor)) == EBUSY)
sleep(1);
if (ret != 0)
- die(ret, "session.open_cursor: backup");
+ testutil_die(ret, "session.open_cursor: backup");
while ((ret = backup_cursor->next(backup_cursor)) == 0) {
- if ((ret =
- backup_cursor->get_key(backup_cursor, &key)) != 0)
- die(ret, "cursor.get_key");
+ testutil_check(
+ backup_cursor->get_key(backup_cursor, &key));
copy_file(key);
}
- if ((ret = backup_cursor->close(backup_cursor)) != 0)
- die(ret, "cursor.close");
-
- if ((ret = pthread_rwlock_unlock(&g.backup_lock)) != 0)
- die(ret, "pthread_rwlock_unlock: backup lock");
+ testutil_check(backup_cursor->close(backup_cursor));
+ testutil_check(pthread_rwlock_unlock(&g.backup_lock));
check_copy();
}
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
return (NULL);
}
diff --git a/test/format/bdb.c b/test/format/bdb.c
index d7b4bca62f2..823fc8ff888 100644
--- a/test/format/bdb.c
+++ b/test/format/bdb.c
@@ -128,7 +128,7 @@ bdb_np(int next,
if ((ret =
dbc->get(dbc, &key, &value, next ? DB_NEXT : DB_PREV)) != 0) {
if (ret != DB_NOTFOUND)
- die(ret, "dbc.get: %s: {%.*s}",
+ testutil_die(ret, "dbc.get: %s: {%.*s}",
next ? "DB_NEXT" : "DB_PREV",
(int)key.size, (char *)key.data);
*notfoundp = 1;
@@ -154,7 +154,7 @@ bdb_read(uint64_t keyno, void *valuep, size_t *valuesizep, int *notfoundp)
*notfoundp = 0;
if ((ret = dbc->get(dbc, &key, &value, DB_SET)) != 0) {
if (ret != DB_NOTFOUND)
- die(ret, "dbc.get: DB_SET: {%.*s}",
+ testutil_die(ret, "dbc.get: DB_SET: {%.*s}",
(int)key.size, (char *)key.data);
*notfoundp = 1;
} else {
@@ -178,7 +178,7 @@ bdb_update(const void *arg_key, size_t arg_key_size,
*notfoundp = 0;
if ((ret = dbc->put(dbc, &key, &value, DB_KEYFIRST)) != 0) {
if (ret != DB_NOTFOUND) {
- die(ret, "dbc.put: DB_KEYFIRST: {%.*s}{%.*s}",
+ testutil_die(ret, "dbc.put: DB_KEYFIRST: {%.*s}{%.*s}",
(int)key.size, (char *)key.data,
(int)value.size, (char *)value.data);
}
@@ -204,7 +204,7 @@ bdb_remove(uint64_t keyno, int *notfoundp)
if ((ret = dbc->del(dbc, 0)) != 0) {
if (ret != DB_NOTFOUND)
- die(ret, "dbc.del: {%.*s}",
+ testutil_die(ret, "dbc.del: {%.*s}",
(int)key.size, (char *)key.data);
*notfoundp = 1;
}
diff --git a/test/format/bulk.c b/test/format/bulk.c
index 28189e25b65..64b005d294f 100644
--- a/test/format/bulk.c
+++ b/test/format/bulk.c
@@ -37,13 +37,11 @@ wts_load(void)
WT_SESSION *session;
uint8_t *keybuf, *valbuf;
bool is_bulk;
- int ret;
conn = g.wts_conn;
keybuf = valbuf = NULL;
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
if (g.logging != 0)
(void)g.wt_api->msg_printf(g.wt_api, session,
@@ -61,9 +59,8 @@ wts_load(void)
if (g.c_reverse)
is_bulk = false;
- if ((ret = session->open_cursor(session, g.uri, NULL,
- is_bulk ? "bulk,append" : NULL, &cursor)) != 0)
- die(ret, "session.open_cursor");
+ testutil_check(session->open_cursor(session, g.uri, NULL,
+ is_bulk ? "bulk,append" : NULL, &cursor));
/* Set up the key/value buffers. */
key_gen_setup(&keybuf);
@@ -120,8 +117,7 @@ wts_load(void)
break;
}
- if ((ret = cursor->insert(cursor)) != 0)
- die(ret, "cursor.insert");
+ testutil_check(cursor->insert(cursor));
#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED)
@@ -129,15 +125,13 @@ wts_load(void)
#endif
}
- if ((ret = cursor->close(cursor)) != 0)
- die(ret, "cursor.close");
+ testutil_check(cursor->close(cursor));
if (g.logging != 0)
(void)g.wt_api->msg_printf(g.wt_api, session,
"=============== bulk load stop ===============");
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
free(keybuf);
free(valbuf);
diff --git a/test/format/compact.c b/test/format/compact.c
index fdfa597e07e..a75ee4f2adf 100644
--- a/test/format/compact.c
+++ b/test/format/compact.c
@@ -48,8 +48,7 @@ compact(void *arg)
/* Open a session. */
conn = g.wts_conn;
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
/*
* Perform compaction at somewhere under 15 seconds (so we get at
@@ -66,11 +65,10 @@ compact(void *arg)
if ((ret = session->compact(
session, g.uri, NULL)) != 0 && ret != WT_ROLLBACK)
- die(ret, "session.compact");
+ testutil_die(ret, "session.compact");
}
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
return (NULL);
}
diff --git a/test/format/config.c b/test/format/config.c
index d431546f254..042316d8344 100644
--- a/test/format/config.c
+++ b/test/format/config.c
@@ -138,9 +138,10 @@ config_setup(void)
/* Required shared libraries. */
if (DATASOURCE("helium") && access(HELIUM_PATH, R_OK) != 0)
- die(errno, "Levyx/helium shared library: %s", HELIUM_PATH);
+ testutil_die(errno,
+ "Levyx/helium shared library: %s", HELIUM_PATH);
if (DATASOURCE("kvsbdb") && access(KVS_BDB_PATH, R_OK) != 0)
- die(errno, "kvsbdb shared library: %s", KVS_BDB_PATH);
+ testutil_die(errno, "kvsbdb shared library: %s", KVS_BDB_PATH);
/* Some data-sources don't support user-specified collations. */
if (DATASOURCE("helium") || DATASOURCE("kvsbdb"))
@@ -199,14 +200,15 @@ config_setup(void)
if (!config_is_perm("key_max") && g.c_key_max < g.c_key_min)
g.c_key_max = g.c_key_min;
if (g.c_key_min > g.c_key_max)
- die(EINVAL, "key_min may not be larger than key_max");
+ testutil_die(EINVAL, "key_min may not be larger than key_max");
if (!config_is_perm("value_min") && g.c_value_min > g.c_value_max)
g.c_value_min = g.c_value_max;
if (!config_is_perm("value_max") && g.c_value_max < g.c_value_min)
g.c_value_max = g.c_value_min;
if (g.c_value_min > g.c_value_max)
- die(EINVAL, "value_min may not be larger than value_max");
+ testutil_die(EINVAL,
+ "value_min may not be larger than value_max");
/* Reset the key count. */
g.key_cnt = 0;
@@ -412,7 +414,7 @@ config_lrt(void)
*/
if (g.type == FIX) {
if (g.c_long_running_txn && config_is_perm("long_running_txn"))
- die(EINVAL,
+ testutil_die(EINVAL,
"long_running_txn not supported with fixed-length "
"column store");
g.c_long_running_txn = 0;
@@ -453,7 +455,7 @@ config_print(int error_display)
fp = stdout;
else
if ((fp = fopen(g.home_config, "w")) == NULL)
- die(errno, "fopen: %s", g.home_config);
+ testutil_die(errno, "fopen: %s", g.home_config);
fprintf(fp, "############################################\n");
fprintf(fp, "# RUN PARAMETERS\n");
@@ -487,7 +489,7 @@ config_file(const char *name)
char *p, buf[256];
if ((fp = fopen(name, "r")) == NULL)
- die(errno, "fopen: %s", name);
+ testutil_die(errno, "fopen: %s", name);
while (fgets(buf, sizeof(buf), fp) != NULL) {
for (p = buf; *p != '\0' && *p != '\n'; ++p)
;
@@ -582,7 +584,7 @@ config_single(const char *s, int perm)
*cp->vstr = strdup(ep);
}
if (*cp->vstr == NULL)
- die(errno, "malloc");
+ testutil_die(errno, "malloc");
return;
}
@@ -625,7 +627,7 @@ config_map_file_type(const char *s, u_int *vp)
strcmp(s, "row-store") == 0)
*vp = ROW;
else
- die(EINVAL, "illegal file type configuration: %s", s);
+ testutil_die(EINVAL, "illegal file type configuration: %s", s);
}
/*
@@ -642,7 +644,7 @@ config_map_checksum(const char *s, u_int *vp)
else if (strcmp(s, "uncompressed") == 0)
*vp = CHECKSUM_UNCOMPRESSED;
else
- die(EINVAL, "illegal checksum configuration: %s", s);
+ testutil_die(EINVAL, "illegal checksum configuration: %s", s);
}
/*
@@ -667,7 +669,8 @@ config_map_compression(const char *s, u_int *vp)
else if (strcmp(s, "zlib-noraw") == 0)
*vp = COMPRESS_ZLIB_NO_RAW;
else
- die(EINVAL, "illegal compression configuration: %s", s);
+ testutil_die(EINVAL,
+ "illegal compression configuration: %s", s);
}
/*
@@ -682,7 +685,7 @@ config_map_encryption(const char *s, u_int *vp)
else if (strcmp(s, "rotn-7") == 0)
*vp = ENCRYPT_ROTN_7;
else
- die(EINVAL, "illegal encryption configuration: %s", s);
+ testutil_die(EINVAL, "illegal encryption configuration: %s", s);
}
/*
@@ -701,7 +704,7 @@ config_map_isolation(const char *s, u_int *vp)
else if (strcmp(s, "snapshot") == 0)
*vp = ISOLATION_SNAPSHOT;
else
- die(EINVAL, "illegal isolation configuration: %s", s);
+ testutil_die(EINVAL, "illegal isolation configuration: %s", s);
}
/*
diff --git a/test/format/config.h b/test/format/config.h
index d8b11b005d4..a17614bc044 100644
--- a/test/format/config.h
+++ b/test/format/config.h
@@ -246,6 +246,10 @@ static CONFIG c[] = {
"minimum gain before prefix compression is used",
0x0, 0, 8, 256, &g.c_prefix_compression_min, NULL },
+ { "quiet",
+ "quiet run (same as -q)",
+ C_IGNORE|C_BOOL, 0, 0, 0, &g.c_quiet, NULL },
+
{ "repeat_data_pct",
"percent duplicate values in row- or var-length column-stores",
0x0, 0, 90, 90, &g.c_repeat_data_pct, NULL },
diff --git a/test/format/format.h b/test/format/format.h
index 41c9de3dd30..a129c5395fd 100644
--- a/test/format/format.h
+++ b/test/format/format.h
@@ -109,6 +109,7 @@ typedef struct {
char *home; /* Home directory */
char *home_backup; /* Hot-backup directory */
+ char *home_backup2; /* Saved Hot-backup directory */
char *home_backup_init; /* Initialize backup command */
char *home_bdb; /* BDB directory */
char *home_config; /* Run CONFIG file path */
@@ -142,7 +143,6 @@ typedef struct {
FILE *logfp; /* Log file */
int replay; /* Replaying a run. */
- int track; /* Track progress */
int workers_finished; /* Operations completed */
pthread_rwlock_t backup_lock; /* Hot backup running */
@@ -210,6 +210,7 @@ typedef struct {
uint32_t c_merge_max;
uint32_t c_mmap;
uint32_t c_ops;
+ uint32_t c_quiet;
uint32_t c_prefix_compression;
uint32_t c_prefix_compression_min;
uint32_t c_repeat_data_pct;
@@ -334,12 +335,6 @@ void wts_salvage(void);
void wts_stats(void);
void wts_verify(const char *);
-void die(int, const char *, ...)
-#if defined(__GNUC__)
-__attribute__((__noreturn__))
-#endif
-;
-
/*
* mmrand --
* Return a random value between a min/max pair.
diff --git a/test/format/lrt.c b/test/format/lrt.c
index b7392829d30..451d2f4fa3c 100644
--- a/test/format/lrt.c
+++ b/test/format/lrt.c
@@ -60,11 +60,9 @@ lrt(void *arg)
/* Open a session and cursor. */
conn = g.wts_conn;
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
- if ((ret = session->open_cursor(
- session, g.uri, NULL, NULL, &cursor)) != 0)
- die(ret, "session.open_cursor");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(
+ session, g.uri, NULL, NULL, &cursor));
for (pinned = 0;;) {
if (pinned) {
@@ -73,7 +71,8 @@ lrt(void *arg)
&key, saved_keyno, 1)) == WT_ROLLBACK)
;
if (ret != 0)
- die(ret, "read_row %" PRIu64, saved_keyno);
+ testutil_die(ret,
+ "read_row %" PRIu64, saved_keyno);
/* Compare the previous value with the current one. */
if (g.type == FIX) {
@@ -83,21 +82,19 @@ lrt(void *arg)
} else
ret = cursor->get_value(cursor, &value);
if (ret != 0)
- die(ret,
+ testutil_die(ret,
"cursor.get_value: %" PRIu64, saved_keyno);
if (buf_size != value.size ||
memcmp(buf, value.data, value.size) != 0)
- die(0, "mismatched start/stop values");
+ testutil_die(0, "mismatched start/stop values");
/* End the transaction. */
- if ((ret =
- session->commit_transaction(session, NULL)) != 0)
- die(ret, "session.commit_transaction");
+ testutil_check(
+ session->commit_transaction(session, NULL));
/* Reset the cursor, releasing our pin. */
- if ((ret = cursor->reset(cursor)) != 0)
- die(ret, "cursor.reset");
+ testutil_check(cursor->reset(cursor));
pinned = 0;
} else {
/*
@@ -106,9 +103,8 @@ lrt(void *arg)
* positioned. As soon as the cursor loses its position
* a new snapshot will be allocated.
*/
- if ((ret = session->begin_transaction(
- session, "isolation=snapshot")) != 0)
- die(ret, "session.begin_transaction");
+ testutil_check(session->begin_transaction(
+ session, "isolation=snapshot"));
/* Read a record at the end of the table. */
do {
@@ -120,7 +116,8 @@ lrt(void *arg)
;
} while (ret == WT_NOTFOUND);
if (ret != 0)
- die(ret, "read_row %" PRIu64, saved_keyno);
+ testutil_die(ret,
+ "read_row %" PRIu64, saved_keyno);
/* Copy the cursor's value. */
if (g.type == FIX) {
@@ -130,11 +127,11 @@ lrt(void *arg)
} else
ret = cursor->get_value(cursor, &value);
if (ret != 0)
- die(ret,
+ testutil_die(ret,
"cursor.get_value: %" PRIu64, saved_keyno);
if (buf_len < value.size &&
(buf = realloc(buf, buf_len = value.size)) == NULL)
- die(errno, "malloc");
+ testutil_die(errno, "malloc");
memcpy(buf, value.data, buf_size = value.size);
/*
@@ -149,7 +146,7 @@ lrt(void *arg)
;
} while (ret == WT_NOTFOUND);
if (ret != 0)
- die(ret, "read_row %" PRIu64, keyno);
+ testutil_die(ret, "read_row %" PRIu64, keyno);
pinned = 1;
}
@@ -166,8 +163,7 @@ lrt(void *arg)
break;
}
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
free(keybuf);
free(buf);
diff --git a/test/format/ops.c b/test/format/ops.c
index 36d56df1505..5d66f4d5391 100644
--- a/test/format/ops.c
+++ b/test/format/ops.c
@@ -56,7 +56,7 @@ wts_ops(int lastrun)
pthread_t backup_tid, compact_tid, lrt_tid;
int64_t fourths, thread_ops;
uint32_t i;
- int ret, running;
+ int running;
conn = g.wts_conn;
@@ -97,36 +97,32 @@ wts_ops(int lastrun)
/* Open a session. */
if (g.logging != 0) {
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
(void)g.wt_api->msg_printf(g.wt_api, session,
"=============== thread ops start ===============");
}
/* Create thread structure; start the worker threads. */
if ((tinfo = calloc((size_t)g.c_threads, sizeof(*tinfo))) == NULL)
- die(errno, "calloc");
+ testutil_die(errno, "calloc");
for (i = 0; i < g.c_threads; ++i) {
tinfo[i].id = (int)i + 1;
tinfo[i].state = TINFO_RUNNING;
- if ((ret =
- pthread_create(&tinfo[i].tid, NULL, ops, &tinfo[i])) != 0)
- die(ret, "pthread_create");
+ testutil_check(
+ pthread_create(&tinfo[i].tid, NULL, ops, &tinfo[i]));
}
/*
* If a multi-threaded run, start optional backup, compaction and
* long-running reader threads.
*/
- if (g.c_backups &&
- (ret = pthread_create(&backup_tid, NULL, backup, NULL)) != 0)
- die(ret, "pthread_create: backup");
- if (g.c_compact &&
- (ret = pthread_create(&compact_tid, NULL, compact, NULL)) != 0)
- die(ret, "pthread_create: compaction");
- if (!SINGLETHREADED && g.c_long_running_txn &&
- (ret = pthread_create(&lrt_tid, NULL, lrt, NULL)) != 0)
- die(ret, "pthread_create: long-running reader");
+ if (g.c_backups)
+ testutil_check(pthread_create(&backup_tid, NULL, backup, NULL));
+ if (g.c_compact)
+ testutil_check(
+ pthread_create(&compact_tid, NULL, compact, NULL));
+ if (!SINGLETHREADED && g.c_long_running_txn)
+ testutil_check(pthread_create(&lrt_tid, NULL, lrt, NULL));
/* Spin on the threads, calculating the totals. */
for (;;) {
@@ -192,8 +188,7 @@ wts_ops(int lastrun)
if (g.logging != 0) {
(void)g.wt_api->msg_printf(g.wt_api, session,
"=============== thread ops stop ===============");
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
}
}
@@ -234,7 +229,7 @@ ops(void *arg)
uint32_t op;
uint8_t *keybuf, *valbuf;
u_int np;
- int ckpt_available, dir, insert, intxn, notfound, readonly, ret;
+ int ckpt_available, dir, insert, intxn, notfound, readonly;
char *ckpt_config, ckpt_name[64];
tinfo = arg;
@@ -269,9 +264,8 @@ ops(void *arg)
*/
if (intxn &&
(tinfo->ops == ckpt_op || tinfo->ops == session_op)) {
- if ((ret = session->commit_transaction(
- session, NULL)) != 0)
- die(ret, "session.commit_transaction");
+ testutil_check(
+ session->commit_transaction(session, NULL));
++tinfo->commit;
intxn = 0;
}
@@ -279,13 +273,11 @@ ops(void *arg)
/* Open up a new session and cursors. */
if (tinfo->ops == session_op ||
session == NULL || cursor == NULL) {
- if (session != NULL &&
- (ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ if (session != NULL)
+ testutil_check(session->close(session, NULL));
- if ((ret = conn->open_session(conn, NULL,
- ops_session_config(&tinfo->rnd), &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL,
+ ops_session_config(&tinfo->rnd), &session));
/*
* 10% of the time, perform some read-only operations
@@ -300,9 +292,8 @@ ops(void *arg)
*/
if (!SINGLETHREADED && !DATASOURCE("lsm") &&
ckpt_available && mmrand(&tinfo->rnd, 1, 10) == 1) {
- if ((ret = session->open_cursor(session,
- g.uri, NULL, ckpt_name, &cursor)) != 0)
- die(ret, "session.open_cursor");
+ testutil_check(session->open_cursor(session,
+ g.uri, NULL, ckpt_name, &cursor));
/* Pick the next session/cursor close/open. */
session_op += 250;
@@ -323,13 +314,12 @@ ops(void *arg)
* want to have to specify the record number,
* which requires an append configuration.
*/
- if ((ret = session->open_cursor(session, g.uri,
- NULL, "overwrite", &cursor)) != 0)
- die(ret, "session.open_cursor");
- if ((g.type == FIX || g.type == VAR) &&
- (ret = session->open_cursor(session, g.uri,
- NULL, "append", &cursor_insert)) != 0)
- die(ret, "session.open_cursor");
+ testutil_check(session->open_cursor(session,
+ g.uri, NULL, "overwrite", &cursor));
+ if (g.type == FIX || g.type == VAR)
+ testutil_check(session->open_cursor(
+ session, g.uri,
+ NULL, "append", &cursor_insert));
/* Pick the next session/cursor close/open. */
session_op += mmrand(&tinfo->rnd, 100, 5000);
@@ -358,21 +348,17 @@ ops(void *arg)
}
/* Named checkpoints lock out backups */
- if (ckpt_config != NULL &&
- (ret = pthread_rwlock_wrlock(&g.backup_lock)) != 0)
- die(ret,
- "pthread_rwlock_wrlock: backup lock");
-
- if ((ret =
- session->checkpoint(session, ckpt_config)) != 0)
- die(ret, "session.checkpoint%s%s",
- ckpt_config == NULL ? "" : ": ",
- ckpt_config == NULL ? "" : ckpt_config);
-
- if (ckpt_config != NULL &&
- (ret = pthread_rwlock_unlock(&g.backup_lock)) != 0)
- die(ret,
- "pthread_rwlock_wrlock: backup lock");
+ if (ckpt_config != NULL)
+ testutil_check(
+ pthread_rwlock_wrlock(&g.backup_lock));
+
+ testutil_checkfmt(
+ session->checkpoint(session, ckpt_config),
+ "%s", ckpt_config == NULL ? "" : ckpt_config);
+
+ if (ckpt_config != NULL)
+ testutil_check(
+ pthread_rwlock_unlock(&g.backup_lock));
/* Rephrase the checkpoint name for cursor open. */
if (ckpt_config == NULL)
@@ -393,8 +379,7 @@ ops(void *arg)
* have to do the reset outside of a transaction.
*/
if (tinfo->ops > reset_op && !intxn) {
- if ((ret = session->reset(session)) != 0)
- die(ret, "session.reset");
+ testutil_check(session->reset(session));
/* Pick the next reset operation. */
reset_op += mmrand(&tinfo->rnd, 20000, 50000);
@@ -406,9 +391,8 @@ ops(void *arg)
*/
if (!SINGLETHREADED &&
!intxn && mmrand(&tinfo->rnd, 1, 10) >= 8) {
- if ((ret =
- session->begin_transaction(session, NULL)) != 0)
- die(ret, "session.begin_transaction");
+ testutil_check(
+ session->begin_transaction(session, NULL));
intxn = 1;
}
@@ -466,9 +450,8 @@ ops(void *arg)
if (col_insert(tinfo,
cursor_insert, &key, &value, &keyno))
goto deadlock;
- if ((ret =
- cursor_insert->reset(cursor_insert)) != 0)
- die(ret, "cursor.reset");
+ testutil_check(
+ cursor_insert->reset(cursor_insert));
insert = 1;
break;
@@ -518,8 +501,7 @@ skip_insert: if (col_update(tinfo,
goto deadlock;
/* Reset the cursor: there is no reason to keep pages pinned. */
- if ((ret = cursor->reset(cursor)) != 0)
- die(ret, "cursor.reset");
+ testutil_check(cursor->reset(cursor));
/*
* If we're in the transaction, commit 40% of the time and
@@ -528,9 +510,8 @@ skip_insert: if (col_update(tinfo,
if (intxn)
switch (mmrand(&tinfo->rnd, 1, 10)) {
case 1: case 2: case 3: case 4: /* 40% */
- if ((ret = session->commit_transaction(
- session, NULL)) != 0)
- die(ret, "session.commit_transaction");
+ testutil_check(session->commit_transaction(
+ session, NULL));
++tinfo->commit;
intxn = 0;
break;
@@ -538,10 +519,8 @@ skip_insert: if (col_update(tinfo,
if (0) {
deadlock: ++tinfo->deadlock;
}
- if ((ret = session->rollback_transaction(
- session, NULL)) != 0)
- die(ret,
- "session.rollback_transaction");
+ testutil_check(session->rollback_transaction(
+ session, NULL));
++tinfo->rollback;
intxn = 0;
break;
@@ -550,8 +529,8 @@ deadlock: ++tinfo->deadlock;
}
}
- if (session != NULL && (ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ if (session != NULL)
+ testutil_check(session->close(session, NULL));
free(keybuf);
free(valbuf);
@@ -573,7 +552,6 @@ wts_read_scan(void)
WT_SESSION *session;
uint64_t cnt, last_cnt;
uint8_t *keybuf;
- int ret;
conn = g.wts_conn;
@@ -581,12 +559,10 @@ wts_read_scan(void)
key_gen_setup(&keybuf);
/* Open a session and cursor pair. */
- if ((ret = conn->open_session(
- conn, NULL, ops_session_config(NULL), &session)) != 0)
- die(ret, "connection.open_session");
- if ((ret = session->open_cursor(
- session, g.uri, NULL, NULL, &cursor)) != 0)
- die(ret, "session.open_cursor");
+ testutil_check(conn->open_session(
+ conn, NULL, ops_session_config(NULL), &session));
+ testutil_check(session->open_cursor(
+ session, g.uri, NULL, NULL, &cursor));
/* Check a random subset of the records using the key. */
for (last_cnt = cnt = 0; cnt < g.key_cnt;) {
@@ -599,12 +575,11 @@ wts_read_scan(void)
}
key.data = keybuf;
- if ((ret = read_row(cursor, &key, cnt, 0)) != 0)
- die(ret, "read_scan");
+ testutil_checkfmt(
+ read_row(cursor, &key, cnt, 0), "%s", "read_scan");
}
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
free(keybuf);
}
@@ -666,7 +641,7 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int notfound_err)
return (WT_NOTFOUND);
break;
default:
- die(ret, "read_row: read row %" PRIu64, keyno);
+ testutil_die(ret, "read_row: read row %" PRIu64, keyno);
}
#ifdef HAVE_BERKELEY_DB
@@ -703,7 +678,7 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int notfound_err)
"read_row: value mismatch %" PRIu64 ":\n", keyno);
print_item("bdb", &bdb_value);
print_item(" wt", &value);
- die(0, NULL);
+ testutil_die(0, NULL);
}
}
#endif
@@ -748,7 +723,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
break;
}
if (ret != 0 && ret != WT_NOTFOUND)
- die(ret, "%s", which);
+ testutil_die(ret, "%s", which);
*notfoundp = (ret == WT_NOTFOUND);
#ifdef HAVE_BERKELEY_DB
@@ -777,7 +752,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
fprintf(stderr, "nextprev: %s key mismatch:\n", which);
print_item("bdb-key", &bdb_key);
print_item(" wt-key", &key);
- die(0, NULL);
+ testutil_die(0, NULL);
}
} else {
if (keyno != (uint64_t)atoll(bdb_key.data)) {
@@ -787,7 +762,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
"nextprev: %s key mismatch: %.*s != %" PRIu64 "\n",
which,
(int)bdb_key.size, (char *)bdb_key.data, keyno);
- die(0, NULL);
+ testutil_die(0, NULL);
}
}
if (value.size != bdb_value.size ||
@@ -795,7 +770,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
fprintf(stderr, "nextprev: %s value mismatch:\n", which);
print_item("bdb-value", &bdb_value);
print_item(" wt-value", &value);
- die(0, NULL);
+ testutil_die(0, NULL);
}
if (g.logging == LOG_OPS)
@@ -851,7 +826,8 @@ row_update(TINFO *tinfo,
if (ret == WT_ROLLBACK)
return (WT_ROLLBACK);
if (ret != 0 && ret != WT_NOTFOUND)
- die(ret, "row_update: update row %" PRIu64 " by key", keyno);
+ testutil_die(ret,
+ "row_update: update row %" PRIu64 " by key", keyno);
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
@@ -905,7 +881,7 @@ col_update(TINFO *tinfo,
if (ret == WT_ROLLBACK)
return (WT_ROLLBACK);
if (ret != 0 && ret != WT_NOTFOUND)
- die(ret, "col_update: %" PRIu64, keyno);
+ testutil_die(ret, "col_update: %" PRIu64, keyno);
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
@@ -937,7 +913,7 @@ table_append_init(void)
free(g.append);
if ((g.append = calloc(g.append_max, sizeof(uint64_t))) == NULL)
- die(errno, "calloc");
+ testutil_die(errno, "calloc");
}
/*
@@ -948,7 +924,7 @@ static void
table_append(uint64_t keyno)
{
uint64_t *p, *ep;
- int done, ret;
+ int done;
ep = g.append + g.append_max;
@@ -979,8 +955,7 @@ table_append(uint64_t keyno)
* and we find a slot.
*/
for (done = 0;;) {
- if ((ret = pthread_rwlock_wrlock(&g.append_lock)) != 0)
- die(ret, "pthread_rwlock_wrlock: append_lock");
+ testutil_check(pthread_rwlock_wrlock(&g.append_lock));
/*
* If this is the thread we've been waiting for, and its record
@@ -1017,8 +992,7 @@ table_append(uint64_t keyno)
break;
}
- if ((ret = pthread_rwlock_unlock(&g.append_lock)) != 0)
- die(ret, "pthread_rwlock_unlock: append_lock");
+ testutil_check(pthread_rwlock_unlock(&g.append_lock));
if (done)
break;
@@ -1055,7 +1029,8 @@ row_insert(TINFO *tinfo,
if (ret == WT_ROLLBACK)
return (WT_ROLLBACK);
if (ret != 0 && ret != WT_NOTFOUND)
- die(ret, "row_insert: insert row %" PRIu64 " by key", keyno);
+ testutil_die(ret,
+ "row_insert: insert row %" PRIu64 " by key", keyno);
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
@@ -1094,10 +1069,9 @@ col_insert(TINFO *tinfo,
if ((ret = cursor->insert(cursor)) != 0) {
if (ret == WT_ROLLBACK)
return (WT_ROLLBACK);
- die(ret, "cursor.insert");
+ testutil_die(ret, "cursor.insert");
}
- if ((ret = cursor->get_key(cursor, &keyno)) != 0)
- die(ret, "cursor.get_key");
+ testutil_check(cursor->get_key(cursor, &keyno));
*keynop = (uint32_t)keyno;
table_append(keyno); /* Extend the object. */
@@ -1157,7 +1131,8 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
if (ret == WT_ROLLBACK)
return (WT_ROLLBACK);
if (ret != 0 && ret != WT_NOTFOUND)
- die(ret, "row_remove: remove %" PRIu64 " by key", keyno);
+ testutil_die(ret,
+ "row_remove: remove %" PRIu64 " by key", keyno);
*notfoundp = (ret == WT_NOTFOUND);
#ifdef HAVE_BERKELEY_DB
@@ -1200,7 +1175,8 @@ col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
if (ret == WT_ROLLBACK)
return (WT_ROLLBACK);
if (ret != 0 && ret != WT_NOTFOUND)
- die(ret, "col_remove: remove %" PRIu64 " by key", keyno);
+ testutil_die(ret,
+ "col_remove: remove %" PRIu64 " by key", keyno);
*notfoundp = (ret == WT_NOTFOUND);
#ifdef HAVE_BERKELEY_DB
@@ -1245,7 +1221,7 @@ notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno)
fprintf(stderr, " row %" PRIu64 ":", keyno);
fprintf(stderr,
" not found in Berkeley DB, found in WiredTiger\n");
- die(0, NULL);
+ testutil_die(0, NULL);
}
if (wt_ret == WT_NOTFOUND) {
fprintf(stderr, "%s: %s:", g.progname, f);
@@ -1253,7 +1229,7 @@ notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno)
fprintf(stderr, " row %" PRIu64 ":", keyno);
fprintf(stderr,
" found in Berkeley DB, not found in WiredTiger\n");
- die(0, NULL);
+ testutil_die(0, NULL);
}
return (0);
}
diff --git a/test/format/rebalance.c b/test/format/rebalance.c
index 8e8fa1a371f..d35dcec1d53 100644
--- a/test/format/rebalance.c
+++ b/test/format/rebalance.c
@@ -33,7 +33,6 @@ wts_rebalance(void)
{
WT_CONNECTION *conn;
WT_SESSION *session;
- int ret;
char cmd[1024];
if (g.c_rebalance == 0)
@@ -45,26 +44,23 @@ wts_rebalance(void)
(void)snprintf(cmd, sizeof(cmd),
"../../wt -h %s dump -f %s/rebalance.orig %s",
g.home, g.home, g.uri);
- if ((ret = system(cmd)) != 0)
- die(ret, "command failed: %s", cmd);
+ testutil_checkfmt(system(cmd), "command failed: %s", cmd);
/* Rebalance, then verify the object. */
wts_reopen();
conn = g.wts_conn;
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
if (g.logging != 0)
(void)g.wt_api->msg_printf(g.wt_api, session,
"=============== rebalance start ===============");
- if ((ret = session->rebalance(session, g.uri, NULL)) != 0)
- die(ret, "session.rebalance: %s: %s", g.uri);
+ testutil_checkfmt(
+ session->rebalance(session, g.uri, NULL), "%s", g.uri);
if (g.logging != 0)
(void)g.wt_api->msg_printf(g.wt_api, session,
"=============== rebalance stop ===============");
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
wts_verify("post-rebalance verify");
wts_close();
@@ -72,13 +68,11 @@ wts_rebalance(void)
(void)snprintf(cmd, sizeof(cmd),
"../../wt -h %s dump -f %s/rebalance.new %s",
g.home, g.home, g.uri);
- if ((ret = system(cmd)) != 0)
- die(ret, "command failed: %s", cmd);
+ testutil_checkfmt(system(cmd), "command failed: %s", cmd);
/* Compare the old/new versions of the object. */
(void)snprintf(cmd, sizeof(cmd),
"cmp %s/rebalance.orig %s/rebalance.new > /dev/null",
g.home, g.home);
- if ((ret = system(cmd)) != 0)
- die(ret, "command failed: %s", cmd);
+ testutil_checkfmt(system(cmd), "command failed: %s", cmd);
}
diff --git a/test/format/salvage.c b/test/format/salvage.c
index d0358e998b4..526e1563390 100644
--- a/test/format/salvage.c
+++ b/test/format/salvage.c
@@ -42,12 +42,10 @@ salvage(void)
conn = g.wts_conn;
track("salvage", 0ULL, NULL);
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
if ((ret = session->salvage(session, g.uri, "force=true")) != 0)
- die(ret, "session.salvage: %s", g.uri);
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_die(ret, "session.salvage: %s", g.uri);
+ testutil_check(session->close(session, NULL));
}
/*
@@ -101,37 +99,37 @@ corrupt(void)
return (0);
found: if (fstat(fd, &sb) == -1)
- die(errno, "salvage-corrupt: fstat");
+ testutil_die(errno, "salvage-corrupt: fstat");
offset = mmrand(NULL, 0, (u_int)sb.st_size);
len = (size_t)(20 + (sb.st_size / 100) * 2);
(void)snprintf(buf, sizeof(buf), "%s/slvg.corrupt", g.home);
if ((fp = fopen(buf, "w")) == NULL)
- die(errno, "salvage-corrupt: open: %s", buf);
+ testutil_die(errno, "salvage-corrupt: open: %s", buf);
(void)fprintf(fp,
"salvage-corrupt: offset %" PRIuMAX ", length " SIZET_FMT "\n",
(uintmax_t)offset, len);
fclose_and_clear(&fp);
if (lseek(fd, offset, SEEK_SET) == -1)
- die(errno, "salvage-corrupt: lseek");
+ testutil_die(errno, "salvage-corrupt: lseek");
memset(buf, 'z', sizeof(buf));
for (; len > 0; len -= nw) {
nw = (size_t)(len > sizeof(buf) ? sizeof(buf) : len);
if (write(fd, buf, nw) == -1)
- die(errno, "salvage-corrupt: write");
+ testutil_die(errno, "salvage-corrupt: write");
}
if (close(fd) == -1)
- die(errno, "salvage-corrupt: close");
+ testutil_die(errno, "salvage-corrupt: close");
/*
* Save a copy of the corrupted file so we can replay the salvage step
* as necessary.
*/
if ((ret = system(copycmd)) != 0)
- die(ret, "salvage corrupt copy step failed");
+ testutil_die(ret, "salvage corrupt copy step failed");
return (1);
}
@@ -157,7 +155,7 @@ wts_salvage(void)
* step as necessary.
*/
if ((ret = system(g.home_salvage_copy)) != 0)
- die(ret, "salvage copy step failed");
+ testutil_die(ret, "salvage copy step failed");
/* Salvage, then verify. */
wts_open(g.home, 1, &g.wts_conn);
diff --git a/test/format/t.c b/test/format/t.c
index ccbc0442e4a..28c22e23cb8 100644
--- a/test/format/t.c
+++ b/test/format/t.c
@@ -30,17 +30,20 @@
GLOBAL g;
+static void format_die(void);
static void startup(void);
static void usage(void);
extern int __wt_optind;
extern char *__wt_optarg;
+void (*custom_die)(void) = format_die; /* Local death handler. */
+
int
main(int argc, char *argv[])
{
time_t start;
- int ch, i, onerun, reps, ret;
+ int ch, i, onerun, reps;
const char *config, *home;
config = NULL;
@@ -64,7 +67,7 @@ main(int argc, char *argv[])
#endif
/* Track progress unless we're re-directing output to a file. */
- g.track = isatty(1) ? 1 : 0;
+ g.c_quiet = isatty(1) ? 0 : 1;
/* Set values from the command line. */
home = NULL;
@@ -99,7 +102,7 @@ main(int argc, char *argv[])
g.logging = LOG_OPS;
break;
case 'q': /* Quiet */
- g.track = 0;
+ g.c_quiet = 1;
break;
case 'r': /* Replay a run */
g.replay = 1;
@@ -125,9 +128,9 @@ main(int argc, char *argv[])
/* If it's a replay, use the home directory's CONFIG file. */
if (g.replay) {
if (config != NULL)
- die(EINVAL, "-c incompatible with -r");
+ testutil_die(EINVAL, "-c incompatible with -r");
if (access(g.home_config, R_OK) != 0)
- die(ENOENT, "%s", g.home_config);
+ testutil_die(ENOENT, "%s", g.home_config);
config = g.home_config;
}
@@ -176,12 +179,9 @@ main(int argc, char *argv[])
* Initialize locks to single-thread named checkpoints and backups, last
* last-record updates, and failures.
*/
- if ((ret = pthread_rwlock_init(&g.append_lock, NULL)) != 0)
- die(ret, "pthread_rwlock_init: append lock");
- if ((ret = pthread_rwlock_init(&g.backup_lock, NULL)) != 0)
- die(ret, "pthread_rwlock_init: backup lock");
- if ((ret = pthread_rwlock_init(&g.death_lock, NULL)) != 0)
- die(ret, "pthread_rwlock_init: death lock");
+ testutil_check(pthread_rwlock_init(&g.append_lock, NULL));
+ testutil_check(pthread_rwlock_init(&g.backup_lock, NULL));
+ testutil_check(pthread_rwlock_init(&g.death_lock, NULL));
printf("%s: process %" PRIdMAX "\n", g.progname, (intmax_t)getpid());
while (++g.run_cnt <= g.c_runs || g.c_runs == 0 ) {
@@ -259,7 +259,7 @@ main(int argc, char *argv[])
wts_salvage();
/* Overwrite the progress line with a completion line. */
- if (g.track)
+ if (!g.c_quiet)
printf("\r%78s\r", " ");
printf("%4d: %s, %s (%.0f seconds)\n",
g.run_cnt, g.c_data_source,
@@ -273,10 +273,8 @@ main(int argc, char *argv[])
config_print(0);
- if ((ret = pthread_rwlock_destroy(&g.append_lock)) != 0)
- die(ret, "pthread_rwlock_destroy: append lock");
- if ((ret = pthread_rwlock_destroy(&g.backup_lock)) != 0)
- die(ret, "pthread_rwlock_destroy: backup lock");
+ testutil_check(pthread_rwlock_destroy(&g.append_lock));
+ testutil_check(pthread_rwlock_destroy(&g.backup_lock));
config_clear();
@@ -298,41 +296,33 @@ startup(void)
/* Create or initialize the home and data-source directories. */
if ((ret = system(g.home_init)) != 0)
- die(ret, "home directory initialization failed");
+ testutil_die(ret, "home directory initialization failed");
/* Open/truncate the logging file. */
if (g.logging != 0 && (g.logfp = fopen(g.home_log, "w")) == NULL)
- die(errno, "fopen: %s", g.home_log);
+ testutil_die(errno, "fopen: %s", g.home_log);
/* Open/truncate the random number logging file. */
if ((g.randfp = fopen(g.home_rand, g.replay ? "r" : "w")) == NULL)
- die(errno, "%s", g.home_rand);
+ testutil_die(errno, "%s", g.home_rand);
}
/*
* die --
- * Report an error and quit, dumping the configuration.
+ * Report an error, dumping the configuration.
*/
-void
-die(int e, const char *fmt, ...)
+static void
+format_die(void)
{
- va_list ap;
-
- /* Single-thread error handling. */
+ /*
+ * Single-thread error handling, our caller exits after calling
+ * us - don't release the lock.
+ */
(void)pthread_rwlock_wrlock(&g.death_lock);
/* Try and turn off tracking so it doesn't obscure the error message. */
- if (g.track) {
- g.track = 0;
- fprintf(stderr, "\n");
- }
- if (fmt != NULL) { /* Death message. */
- fprintf(stderr, "%s: ", g.progname);
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
- if (e != 0)
- fprintf(stderr, ": %s", wiredtiger_strerror(e));
+ if (!g.c_quiet) {
+ g.c_quiet = 1;
fprintf(stderr, "\n");
}
@@ -343,8 +333,6 @@ die(int e, const char *fmt, ...)
/* Display the configuration that failed. */
if (g.run_cnt)
config_print(1);
-
- exit(EXIT_FAILURE);
}
/*
diff --git a/test/format/util.c b/test/format/util.c
index 2b6b9d67fc3..2e4c869366c 100644
--- a/test/format/util.c
+++ b/test/format/util.c
@@ -42,7 +42,7 @@ dmalloc(size_t len)
void *p;
if ((p = malloc(len)) == NULL)
- die(errno, "malloc");
+ testutil_die(errno, "malloc");
return (p);
}
@@ -56,7 +56,7 @@ dstrdup(const char *str)
char *p;
if ((p = strdup(str)) == NULL)
- die(errno, "strdup");
+ testutil_die(errno, "strdup");
return (p);
}
@@ -236,7 +236,7 @@ track(const char *tag, uint64_t cnt, TINFO *tinfo)
int len;
char msg[128];
- if (!g.track || tag == NULL)
+ if (g.c_quiet || tag == NULL)
return;
if (tinfo == NULL && cnt == 0)
@@ -268,9 +268,9 @@ track(const char *tag, uint64_t cnt, TINFO *tinfo)
lastlen = len;
if (printf("%s\r", msg) < 0)
- die(EIO, "printf");
+ testutil_die(EIO, "printf");
if (fflush(stdout) == EOF)
- die(errno, "fflush");
+ testutil_die(errno, "fflush");
}
/*
@@ -310,6 +310,10 @@ path_setup(const char *home)
g.home_backup = dmalloc(len);
snprintf(g.home_backup, len, "%s/%s", g.home, "BACKUP");
+ len = strlen(g.home) + strlen("BACKUP2") + 2;
+ g.home_backup2 = dmalloc(len);
+ snprintf(g.home_backup2, len, "%s/%s", g.home, "BACKUP2");
+
/* BDB directory. */
len = strlen(g.home) + strlen("bdb") + 2;
g.home_bdb = dmalloc(len);
@@ -340,13 +344,15 @@ path_setup(const char *home)
/* Backup directory initialize command, remove and re-create it. */
#undef CMD
#ifdef _WIN32
-#define CMD "del /s /q >:nul && mkdir %s"
+#define CMD "del /s /q >:nul && mkdir %s %s"
#else
-#define CMD "rm -rf %s && mkdir %s"
+#define CMD "rm -rf %s %s && mkdir %s %s"
#endif
- len = strlen(g.home_backup) * 2 + strlen(CMD) + 1;
+ len = strlen(g.home_backup) * 2 +
+ strlen(g.home_backup2) * 2 + strlen(CMD) + 1;
g.home_backup_init = dmalloc(len);
- snprintf(g.home_backup_init, len, CMD, g.home_backup, g.home_backup);
+ snprintf(g.home_backup_init, len, CMD, g.home_backup, g.home_backup2,
+ g.home_backup, g.home_backup2);
/*
* Salvage command, save the interesting files so we can replay the
@@ -407,7 +413,7 @@ rng(WT_RAND_STATE *rnd)
"\n" "end of random number log reached\n");
exit(EXIT_SUCCESS);
}
- die(errno, "random number log");
+ testutil_die(errno, "random number log");
}
return ((uint32_t)strtoul(buf, NULL, 10));
@@ -435,6 +441,6 @@ fclose_and_clear(FILE **fpp)
return;
*fpp = NULL;
if (fclose(fp) != 0)
- die(errno, "fclose");
+ testutil_die(errno, "fclose");
return;
}
diff --git a/test/format/wts.c b/test/format/wts.c
index 9d4d3fe5cb8..81e484296e2 100644
--- a/test/format/wts.c
+++ b/test/format/wts.c
@@ -53,7 +53,8 @@ compressor(uint32_t compress_flag)
default:
break;
}
- die(EINVAL, "illegal compression flag: 0x%x", compress_flag);
+ testutil_die(EINVAL,
+ "illegal compression flag: %#" PRIx32, compress_flag);
}
/*
@@ -71,7 +72,8 @@ encryptor(uint32_t encrypt_flag)
default:
break;
}
- die(EINVAL, "illegal encryption flag: 0x%x", encrypt_flag);
+ testutil_die(EINVAL,
+ "illegal encryption flag: %#" PRIx32, encrypt_flag);
}
static int
@@ -222,7 +224,8 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp)
p += snprintf(p, REMAIN(p, end), ",%s", g.config_open);
if (REMAIN(p, end) == 0)
- die(ENOMEM, "wiredtiger_open configuration buffer too small");
+ testutil_die(ENOMEM,
+ "wiredtiger_open configuration buffer too small");
/*
* Direct I/O may not work with backups, doing copies through the buffer
@@ -233,8 +236,8 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp)
if (strstr(config, "direct_io") != NULL)
g.c_backups = 0;
- if ((ret = wiredtiger_open(home, &event_handler, config, &conn)) != 0)
- die(ret, "wiredtiger_open: %s", home);
+ testutil_checkfmt(
+ wiredtiger_open(home, &event_handler, config, &conn), "%s", home);
if (set_api)
g.wt_api = conn->get_extension_api(conn);
@@ -247,7 +250,7 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp)
*/
if (DATASOURCE("helium")) {
if (g.helium_mount == NULL)
- die(EINVAL, "no Helium mount point specified");
+ testutil_die(EINVAL, "no Helium mount point specified");
(void)snprintf(helium_config, sizeof(helium_config),
"entry=wiredtiger_extension_init,config=["
"helium_verbose=0,"
@@ -256,7 +259,7 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp)
g.helium_mount);
if ((ret = conn->load_extension(
conn, HELIUM_PATH, helium_config)) != 0)
- die(ret,
+ testutil_die(ret,
"WT_CONNECTION.load_extension: %s:%s",
HELIUM_PATH, helium_config);
}
@@ -270,11 +273,8 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp)
void
wts_reopen(void)
{
- int ret;
-
- if ((ret = wiredtiger_open(g.home,
- &event_handler, g.wiredtiger_open_config, &g.wts_conn)) != 0)
- die(ret, "wiredtiger_open: %s", g.home);
+ testutil_checkfmt(wiredtiger_open(g.home, &event_handler,
+ g.wiredtiger_open_config, &g.wts_conn), "%s", g.home);
}
/*
@@ -287,7 +287,6 @@ wts_create(void)
WT_CONNECTION *conn;
WT_SESSION *session;
uint32_t maxintlpage, maxintlkey, maxleafpage, maxleafkey, maxleafvalue;
- int ret;
char config[4096], *end, *p;
conn = g.wts_conn;
@@ -316,7 +315,7 @@ wts_create(void)
p += snprintf(p, REMAIN(p, end),
"key_format=%s,"
"allocation_size=512,%s"
- "internal_page_max=%d,leaf_page_max=%d",
+ "internal_page_max=%" PRIu32 ",leaf_page_max=%" PRIu32,
(g.type == ROW) ? "u" : "r",
g.c_firstfit ? "block_allocation=first," : "",
maxintlpage, maxleafpage);
@@ -328,15 +327,15 @@ wts_create(void)
maxintlkey = mmrand(NULL, maxintlpage / 50, maxintlpage / 40);
if (maxintlkey > 20)
p += snprintf(p, REMAIN(p, end),
- ",internal_key_max=%d", maxintlkey);
+ ",internal_key_max=%" PRIu32, maxintlkey);
maxleafkey = mmrand(NULL, maxleafpage / 50, maxleafpage / 40);
if (maxleafkey > 20)
p += snprintf(p, REMAIN(p, end),
- ",leaf_key_max=%d", maxleafkey);
+ ",leaf_key_max=%" PRIu32, maxleafkey);
maxleafvalue = mmrand(NULL, maxleafpage * 10, maxleafpage / 40);
if (maxleafvalue > 40 && maxleafvalue < 100 * 1024)
p += snprintf(p, REMAIN(p, end),
- ",leaf_value_max=%d", maxleafvalue);
+ ",leaf_value_max=%" PRIu32, maxleafvalue);
switch (g.type) {
case FIX:
@@ -364,7 +363,7 @@ wts_create(void)
",huffman_value=english");
if (g.c_dictionary)
p += snprintf(p, REMAIN(p, end),
- ",dictionary=%d", mmrand(NULL, 123, 517));
+ ",dictionary=%" PRIu32, mmrand(NULL, 123, 517));
break;
}
@@ -431,32 +430,28 @@ wts_create(void)
}
if (REMAIN(p, end) == 0)
- die(ENOMEM, "WT_SESSION.create configuration buffer too small");
+ testutil_die(ENOMEM,
+ "WT_SESSION.create configuration buffer too small");
/*
* Create the underlying store.
*/
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
- if ((ret = session->create(session, g.uri, config)) != 0)
- die(ret, "session.create: %s", g.uri);
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_checkfmt(session->create(session, g.uri, config), "%s", g.uri);
+ testutil_check(session->close(session, NULL));
}
void
wts_close(void)
{
WT_CONNECTION *conn;
- int ret;
const char *config;
conn = g.wts_conn;
config = g.c_leak_memory ? "leak_memory" : NULL;
- if ((ret = conn->close(conn, config)) != 0)
- die(ret, "connection.close");
+ testutil_check(conn->close(conn, config));
g.wts_conn = NULL;
g.wt_api = NULL;
}
@@ -466,7 +461,6 @@ wts_dump(const char *tag, int dump_bdb)
{
#ifdef HAVE_BERKELEY_DB
size_t len;
- int ret;
char *cmd;
/*
@@ -491,8 +485,7 @@ wts_dump(const char *tag, int dump_bdb)
g.uri == NULL ? "" : "-n",
g.uri == NULL ? "" : g.uri);
- if ((ret = system(cmd)) != 0)
- die(ret, "%s: dump comparison failed", tag);
+ testutil_checkfmt(system(cmd), "%s: dump comparison failed", tag);
free(cmd);
#else
(void)tag; /* [-Wunused-variable] */
@@ -513,8 +506,7 @@ wts_verify(const char *tag)
conn = g.wts_conn;
track("verify", 0ULL, NULL);
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
if (g.logging != 0)
(void)g.wt_api->msg_printf(g.wt_api, session,
"=============== verify start ===============");
@@ -522,13 +514,12 @@ wts_verify(const char *tag)
/* Session operations for LSM can return EBUSY. */
ret = session->verify(session, g.uri, "strict");
if (ret != 0 && !(ret == EBUSY && DATASOURCE("lsm")))
- die(ret, "session.verify: %s: %s", g.uri, tag);
+ testutil_die(ret, "session.verify: %s: %s", g.uri, tag);
if (g.logging != 0)
(void)g.wt_api->msg_printf(g.wt_api, session,
"=============== verify stop ===============");
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
}
/*
@@ -558,49 +549,43 @@ wts_stats(void)
conn = g.wts_conn;
track("stat", 0ULL, NULL);
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
if ((fp = fopen(g.home_stats, "w")) == NULL)
- die(errno, "fopen: %s", g.home_stats);
+ testutil_die(errno, "fopen: %s", g.home_stats);
/* Connection statistics. */
fprintf(fp, "====== Connection statistics:\n");
- if ((ret = session->open_cursor(session,
- "statistics:", NULL, NULL, &cursor)) != 0)
- die(ret, "session.open_cursor");
+ testutil_check(session->open_cursor(
+ session, "statistics:", NULL, NULL, &cursor));
while ((ret = cursor->next(cursor)) == 0 &&
(ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0)
if (fprintf(fp, "%s=%s\n", desc, pval) < 0)
- die(errno, "fprintf");
+ testutil_die(errno, "fprintf");
if (ret != WT_NOTFOUND)
- die(ret, "cursor.next");
- if ((ret = cursor->close(cursor)) != 0)
- die(ret, "cursor.close");
+ testutil_die(ret, "cursor.next");
+ testutil_check(cursor->close(cursor));
/* Data source statistics. */
fprintf(fp, "\n\n====== Data source statistics:\n");
stat_name = dmalloc(strlen("statistics:") + strlen(g.uri) + 1);
sprintf(stat_name, "statistics:%s", g.uri);
- if ((ret = session->open_cursor(
- session, stat_name, NULL, NULL, &cursor)) != 0)
- die(ret, "session.open_cursor");
+ testutil_check(session->open_cursor(
+ session, stat_name, NULL, NULL, &cursor));
free(stat_name);
while ((ret = cursor->next(cursor)) == 0 &&
(ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0)
if (fprintf(fp, "%s=%s\n", desc, pval) < 0)
- die(errno, "fprintf");
+ testutil_die(errno, "fprintf");
if (ret != WT_NOTFOUND)
- die(ret, "cursor.next");
- if ((ret = cursor->close(cursor)) != 0)
- die(ret, "cursor.close");
+ testutil_die(ret, "cursor.next");
+ testutil_check(cursor->close(cursor));
fclose_and_clear(&fp);
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
}
diff --git a/test/huge/huge.c b/test/huge/huge.c
index d09f6f375fb..ad19035ff99 100644
--- a/test/huge/huge.c
+++ b/test/huge/huge.c
@@ -167,6 +167,8 @@ run(CONFIG *cp, int bigkey, size_t bytes)
extern int __wt_optind;
extern char *__wt_optarg;
+void (*custom_die)(void) = NULL;
+
int
main(int argc, char *argv[])
{
diff --git a/test/manydbs/Makefile.am b/test/manydbs/Makefile.am
new file mode 100644
index 00000000000..53559b25243
--- /dev/null
+++ b/test/manydbs/Makefile.am
@@ -0,0 +1,13 @@
+AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/test/utility
+
+noinst_PROGRAMS = t
+t_SOURCES = manydbs.c
+t_LDADD = $(top_builddir)/libwiredtiger.la
+t_LDFLAGS = -static
+
+# Run this during a "make check" smoke test.
+TESTS = smoke.sh
+
+clean-local:
+ rm -rf WiredTiger* *.core __*
diff --git a/test/manydbs/manydbs.c b/test/manydbs/manydbs.c
new file mode 100644
index 00000000000..1d3412a7b06
--- /dev/null
+++ b/test/manydbs/manydbs.c
@@ -0,0 +1,264 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <sys/wait.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include <wiredtiger.h>
+
+#include "test_util.i"
+
+#define HOME_SIZE 512
+#define HOME_BASE "WT_HOME"
+static char home[HOME_SIZE]; /* Base home directory */
+static char hometmp[HOME_SIZE]; /* Each conn home directory */
+static const char *progname; /* Program name */
+static const char * const uri = "table:main";
+
+#define WTOPEN_CFG_COMMON \
+ "create,log=(file_max=10M,archive=false,enabled)," \
+ "statistics=(fast),statistics_log=(wait=5),"
+#define WT_CONFIG0 \
+ WTOPEN_CFG_COMMON \
+ "transaction_sync=(enabled=false)"
+#define WT_CONFIG1 \
+ WTOPEN_CFG_COMMON \
+ "transaction_sync=(enabled,method=none)"
+#define WT_CONFIG2 \
+ WTOPEN_CFG_COMMON \
+ "transaction_sync=(enabled,method=fsync)"
+
+#define MAX_DBS 10
+#define MAX_IDLE_TIME 30
+#define IDLE_INCR 5
+
+#define MAX_KV 100
+#define MAX_VAL 128
+
+static void
+usage(void)
+{
+ fprintf(stderr,
+ "usage: %s [-I] [-D maxdbs] [-h dir]\n", progname);
+ exit(EXIT_FAILURE);
+}
+
+extern int __wt_optind;
+extern char *__wt_optarg;
+
+void (*custom_die)(void) = NULL;
+
+WT_CONNECTION **connections = NULL;
+WT_CURSOR **cursors = NULL;
+WT_RAND_STATE rnd;
+WT_SESSION **sessions = NULL;
+
+static int
+get_stat(WT_SESSION *stat_session, int stat_field, uint64_t *valuep)
+{
+ WT_CURSOR *statc;
+ const char *desc, *pvalue;
+ int ret;
+
+ testutil_check(stat_session->open_cursor(stat_session,
+ "statistics:", NULL, NULL, &statc));
+ statc->set_key(statc, stat_field);
+ if ((ret = statc->search(statc)) != 0)
+ return (ret);
+
+ ret = statc->get_value(statc, &desc, &pvalue, valuep);
+ testutil_check(statc->close(statc));
+ return (ret);
+}
+
+static int
+run_ops(int dbs)
+{
+ WT_ITEM data;
+ int db_set, i, key;
+ uint32_t db;
+ uint8_t buf[MAX_VAL];
+
+ memset(buf, 0, sizeof(buf));
+ for (i = 0; i < MAX_VAL; ++i)
+ buf[i] = (uint8_t)__wt_random(&rnd);
+ data.data = buf;
+ /*
+ * Write a small amount of data into a random subset of the databases.
+ */
+ db_set = dbs / 4;
+ for (i = 0; i < db_set; ++i) {
+ db = __wt_random(&rnd) % (uint32_t)dbs;
+ printf("Write to database %" PRIu32 "\n", db);
+ for (key = 0; key < MAX_KV; ++key) {
+ data.size = __wt_random(&rnd) % MAX_VAL;
+ cursors[db]->set_key(cursors[db], key);
+ cursors[db]->set_value(cursors[db], &data);
+ testutil_check(cursors[db]->insert(cursors[db]));
+ }
+ }
+ return (0);
+}
+
+int
+main(int argc, char *argv[])
+{
+ uint64_t cond_reset, cond_wait;
+ uint64_t *cond_reset_orig;
+ int cfg, ch, dbs, i;
+ bool idle;
+ const char *working_dir, *wt_cfg;
+ char cmd[128];
+
+ if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL)
+ progname = argv[0];
+ else
+ ++progname;
+ dbs = MAX_DBS;
+ working_dir = HOME_BASE;
+ idle = false;
+ while ((ch = __wt_getopt(progname, argc, argv, "D:h:I")) != EOF)
+ switch (ch) {
+ case 'D':
+ dbs = atoi(__wt_optarg);
+ break;
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ case 'I':
+ idle = true;
+ break;
+ default:
+ usage();
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+ if (argc != 0)
+ usage();
+
+ /*
+ * Allocate arrays for connection handles, sessions, statistics
+ * cursors and, if needed, data cursors.
+ */
+ if ((connections = calloc(
+ (size_t)dbs, sizeof(WT_CONNECTION *))) == NULL)
+ testutil_die(ENOMEM, "connection array malloc");
+ if ((sessions = calloc(
+ (size_t)dbs, sizeof(WT_SESSION *))) == NULL)
+ testutil_die(ENOMEM, "session array malloc");
+ if ((cond_reset_orig = calloc((size_t)dbs, sizeof(uint64_t))) == NULL)
+ testutil_die(ENOMEM, "orig stat malloc");
+ if (!idle && ((cursors = calloc(
+ (size_t)dbs, sizeof(WT_CURSOR *))) == NULL))
+ testutil_die(ENOMEM, "cursor array malloc");
+ memset(cmd, 0, sizeof(cmd));
+ /*
+ * Set up all the directory names.
+ */
+ testutil_work_dir_from_path(home, HOME_SIZE, working_dir);
+ testutil_make_work_dir(home);
+ __wt_random_init(&rnd);
+ for (i = 0; i < dbs; ++i) {
+ snprintf(hometmp, HOME_SIZE, "%s/%s.%d", home, HOME_BASE, i);
+ testutil_make_work_dir(hometmp);
+ /*
+ * Open each database. Rotate different configurations
+ * among them. Open a session and statistics cursor.
+ * If writing data, create the table and open a data cursor.
+ */
+ cfg = i % 3;
+ if (cfg == 0)
+ wt_cfg = WT_CONFIG0;
+ else if (cfg == 1)
+ wt_cfg = WT_CONFIG1;
+ else
+ wt_cfg = WT_CONFIG2;
+ testutil_check(wiredtiger_open(
+ hometmp, NULL, wt_cfg, &connections[i]));
+ testutil_check(connections[i]->open_session(connections[i],
+ NULL, NULL, &sessions[i]));
+ if (!idle) {
+ testutil_check(sessions[i]->create(sessions[i],
+ uri, "key_format=Q,value_format=u"));
+ testutil_check(sessions[i]->open_cursor(sessions[i],
+ uri, NULL, NULL, &cursors[i]));
+ }
+ }
+
+ sleep(10);
+
+ /*
+ * Record original reset setting. There could have been some
+ * activity during the creation period.
+ */
+ for (i = 0; i < dbs; ++i)
+ testutil_check(get_stat(sessions[i],
+ WT_STAT_CONN_COND_AUTO_WAIT_RESET, &cond_reset_orig[i]));
+ for (i = 0; i < MAX_IDLE_TIME; i += IDLE_INCR) {
+ if (!idle)
+ testutil_check(run_ops(dbs));
+ printf("Sleep %d (%d of %d)\n", IDLE_INCR, i, MAX_IDLE_TIME);
+ sleep(IDLE_INCR);
+ }
+ for (i = 0; i < dbs; ++i) {
+ testutil_check(get_stat(sessions[i],
+ WT_STAT_CONN_COND_AUTO_WAIT_RESET, &cond_reset));
+ testutil_check(get_stat(sessions[i],
+ WT_STAT_CONN_COND_AUTO_WAIT, &cond_wait));
+ /*
+ * On an idle workload there should be no resets of condition
+ * variables during the idle period. Even with a light
+ * workload, resets should not be very common. We look for 5%.
+ */
+ if (idle && cond_reset != cond_reset_orig[i])
+ testutil_die(ERANGE,
+ "condition reset on idle connection %d of %" PRIu64,
+ i, cond_reset);
+ if (!idle && cond_reset > cond_wait / 20)
+ testutil_die(ERANGE, "connection %d condition reset %"
+ PRIu64 " exceeds 5%% of %" PRIu64,
+ i, cond_reset, cond_wait);
+ testutil_check(connections[i]->close(connections[i], NULL));
+ }
+
+ /* Cleanup allocated memory. */
+ free(connections);
+ free(sessions);
+ free(cond_reset_orig);
+ if (!idle)
+ free(cursors);
+
+ return (EXIT_SUCCESS);
+}
diff --git a/test/manydbs/smoke.sh b/test/manydbs/smoke.sh
new file mode 100755
index 00000000000..c0e2976f154
--- /dev/null
+++ b/test/manydbs/smoke.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+set -e
+
+# Smoke-test format as part of running "make check".
+# Run with:
+# 1. The defaults
+# 2. Set idle flag to turn off operations.
+# 3. More dbs.
+#
+echo "manydbs: default with operations turned on"
+$TEST_WRAPPER ./t
+echo "manydbs: totally idle databases"
+$TEST_WRAPPER ./t -I
+echo "manydbs: 40 databases with operations"
+$TEST_WRAPPER ./t -D 40
+echo "manydbs: 40 idle databases"
+$TEST_WRAPPER ./t -I -D 40
diff --git a/test/readonly/Makefile.am b/test/readonly/Makefile.am
new file mode 100644
index 00000000000..3abcd2386a1
--- /dev/null
+++ b/test/readonly/Makefile.am
@@ -0,0 +1,13 @@
+AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/test/utility
+
+noinst_PROGRAMS = t
+t_SOURCES = readonly.c
+t_LDADD = $(top_builddir)/libwiredtiger.la
+t_LDFLAGS = -static
+
+# Run this during a "make check" smoke test.
+TESTS = smoke.sh
+
+clean-local:
+ rm -rf WT_RD* WiredTiger* *.core __*
diff --git a/test/readonly/readonly.c b/test/readonly/readonly.c
new file mode 100644
index 00000000000..41400da2605
--- /dev/null
+++ b/test/readonly/readonly.c
@@ -0,0 +1,409 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <sys/wait.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include <wiredtiger.h>
+
+#include "test_util.i"
+
+#define HOME_SIZE 512
+static char home[HOME_SIZE]; /* Program working dir lock file */
+#define HOME_WR_SUFFIX ".WRNOLOCK" /* Writable dir copy no lock file */
+static char home_wr[HOME_SIZE + sizeof(HOME_WR_SUFFIX)];
+#define HOME_RD_SUFFIX ".RD" /* Read-only dir */
+static char home_rd[HOME_SIZE + sizeof(HOME_RD_SUFFIX)];
+#define HOME_RD2_SUFFIX ".RDNOLOCK" /* Read-only dir no lock file */
+static char home_rd2[HOME_SIZE + sizeof(HOME_RD2_SUFFIX)];
+
+static const char *progname; /* Program name */
+static const char *saved_argv0; /* Program command */
+static const char * const uri = "table:main";
+
+#define ENV_CONFIG \
+ "create,log=(file_max=10M,archive=false,enabled)," \
+ "transaction_sync=(enabled,method=none)"
+#define ENV_CONFIG_RD "readonly=true"
+#define ENV_CONFIG_WR "readonly=false"
+#define MAX_VAL 4096
+#define MAX_KV 10000
+
+#define EXPECT_ERR 1
+#define EXPECT_SUCCESS 0
+
+#define OP_READ 0
+#define OP_WRITE 1
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [-h dir]\n", progname);
+ exit(EXIT_FAILURE);
+}
+
+static int
+run_child(const char *homedir, int op, int expect)
+{
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ int i, ret;
+ const char *cfg;
+
+ /*
+ * We expect the read-only database will allow the second read-only
+ * handle to succeed because no one can create or set the lock file.
+ */
+ if (op == OP_READ)
+ cfg = ENV_CONFIG_RD;
+ else
+ cfg = ENV_CONFIG_WR;
+ if ((ret = wiredtiger_open(homedir, NULL, cfg, &conn)) == 0) {
+ if (expect == EXPECT_ERR)
+ testutil_die(
+ ret, "wiredtiger_open expected error, succeeded");
+ } else {
+ if (expect == EXPECT_SUCCESS)
+ testutil_die(
+ ret, "wiredtiger_open expected success, error");
+ /*
+ * If we expect an error and got one, we're done.
+ */
+ return (0);
+ }
+
+ /*
+ * Make sure we can read the data.
+ */
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "WT_CONNECTION:open_session");
+
+ if ((ret =
+ session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
+ testutil_die(ret, "WT_SESSION.open_cursor: %s", uri);
+
+ i = 0;
+ while ((ret = cursor->next(cursor)) == 0)
+ ++i;
+ if (i != MAX_KV)
+ testutil_die(EPERM, "cursor walk");
+ if ((ret = conn->close(conn, NULL)) != 0)
+ testutil_die(ret, "conn_close");
+ return (0);
+}
+
+/*
+ * Child process opens both databases readonly.
+ */
+static void
+open_dbs(int op, const char *dir,
+ const char *dir_wr, const char *dir_rd, const char *dir_rd2)
+{
+ int expect, ret;
+
+ /*
+ * The parent has an open connection to all directories.
+ * We expect opening the writeable homes to return an error.
+ * It is a failure if the child successfully opens that.
+ */
+ expect = EXPECT_ERR;
+ if ((ret = run_child(dir, op, expect)) != 0)
+ testutil_die(ret, "wiredtiger_open readonly allowed");
+ if ((ret = run_child(dir_wr, op, expect)) != 0)
+ testutil_die(ret, "wiredtiger_open readonly allowed");
+
+ /*
+ * The parent must have a read-only connection open to the
+ * read-only databases. If the child is opening read-only
+ * too, we expect success. Otherwise an error if the child
+ * attempts to open read/write (permission error).
+ */
+ if (op == OP_READ)
+ expect = EXPECT_SUCCESS;
+ if ((ret = run_child(dir_rd, op, expect)) != 0)
+ testutil_die(ret, "run child 1");
+ if ((ret = run_child(dir_rd2, op, expect)) != 0)
+ testutil_die(ret, "run child 2");
+ exit(EXIT_SUCCESS);
+}
+
+extern int __wt_optind;
+extern char *__wt_optarg;
+
+void (*custom_die)(void) = NULL;
+
+int
+main(int argc, char *argv[])
+{
+ WT_CONNECTION *conn, *conn2, *conn3, *conn4;
+ WT_CURSOR *cursor;
+ WT_ITEM data;
+ WT_SESSION *session;
+ uint64_t i;
+ int ch, status, op, ret;
+ bool child;
+ const char *working_dir;
+ char cmd[512];
+ uint8_t buf[MAX_VAL];
+
+ if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL)
+ progname = argv[0];
+ else
+ ++progname;
+ /*
+ * Needed unaltered for system command later.
+ */
+ saved_argv0 = argv[0];
+
+ working_dir = "WT_RD";
+ child = false;
+ op = OP_READ;
+ while ((ch = __wt_getopt(progname, argc, argv, "Rh:W")) != EOF)
+ switch (ch) {
+ case 'R':
+ child = true;
+ op = OP_READ;
+ break;
+ case 'W':
+ child = true;
+ op = OP_WRITE;
+ break;
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ default:
+ usage();
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+ if (argc != 0)
+ usage();
+
+ /*
+ * Set up all the directory names.
+ */
+ testutil_work_dir_from_path(home, sizeof(home), working_dir);
+ (void)snprintf(home_wr, sizeof(home_wr), "%s%s", home, HOME_WR_SUFFIX);
+ (void)snprintf(home_rd, sizeof(home_rd), "%s%s", home, HOME_RD_SUFFIX);
+ (void)snprintf(
+ home_rd2, sizeof(home_rd2), "%s%s", home, HOME_RD2_SUFFIX);
+ if (!child) {
+ testutil_make_work_dir(home);
+ testutil_make_work_dir(home_wr);
+ testutil_make_work_dir(home_rd);
+ testutil_make_work_dir(home_rd2);
+ } else
+ /*
+ * We are a child process, we just want to call
+ * the open_dbs with the directories we have.
+ * The child function will exit.
+ */
+ open_dbs(op, home, home_wr, home_rd, home_rd2);
+
+ /*
+ * Parent creates a database and table. Then cleanly shuts down.
+ * Then copy database to read-only directory and chmod.
+ * Also copy database to read-only directory and remove the lock
+ * file. One read-only database will have a lock file in the
+ * file system and the other will not.
+ * Parent opens all databases with read-only configuration flag.
+ * Parent forks off child who tries to also open all databases
+ * with the read-only flag. It should error on the writeable
+ * directory, but allow it on the read-only directories.
+ * The child then confirms it can read all the data.
+ */
+ /*
+ * Run in the home directory and create the table.
+ */
+ if ((ret = wiredtiger_open(home, NULL, ENV_CONFIG, &conn)) != 0)
+ testutil_die(ret, "wiredtiger_open");
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "WT_CONNECTION:open_session");
+ if ((ret = session->create(session,
+ uri, "key_format=Q,value_format=u")) != 0)
+ testutil_die(ret, "WT_SESSION.create: %s", uri);
+ if ((ret =
+ session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
+ testutil_die(ret, "WT_SESSION.open_cursor: %s", uri);
+
+ /*
+ * Write data into the table and then cleanly shut down connection.
+ */
+ memset(buf, 0, sizeof(buf));
+ data.data = buf;
+ data.size = MAX_VAL;
+ for (i = 0; i < MAX_KV; ++i) {
+ cursor->set_key(cursor, i);
+ cursor->set_value(cursor, &data);
+ if ((ret = cursor->insert(cursor)) != 0)
+ testutil_die(ret, "WT_CURSOR.insert");
+ }
+ if ((ret = conn->close(conn, NULL)) != 0)
+ testutil_die(ret, "WT_CONNECTION:close");
+
+ /*
+ * Copy the database. Remove any lock file from one copy
+ * and chmod the copies to be read-only permissions.
+ */
+ (void)snprintf(cmd, sizeof(cmd),
+ "cp -rp %s/* %s; rm -f %s/WiredTiger.lock",
+ home, home_wr, home_wr);
+ (void)system(cmd);
+
+ (void)snprintf(cmd, sizeof(cmd),
+ "cp -rp %s/* %s; chmod 0555 %s; chmod -R 0444 %s/*",
+ home, home_rd, home_rd, home_rd);
+ (void)system(cmd);
+
+ (void)snprintf(cmd, sizeof(cmd),
+ "cp -rp %s/* %s; rm -f %s/WiredTiger.lock; "
+ "chmod 0555 %s; chmod -R 0444 %s/*",
+ home, home_rd2, home_rd2, home_rd2, home_rd2);
+ (void)system(cmd);
+
+ /*
+ * Run four scenarios. Sometimes expect errors, sometimes success.
+ * The writable database directories should always fail to allow the
+ * child to open due to the lock file. The read-only ones will only
+ * succeed when the child attempts read-only.
+ *
+ * 1. Parent has read-only handle to all databases. Child opens
+ * read-only also.
+ * 2. Parent has read-only handle to all databases. Child opens
+ * read-write.
+ * 3. Parent has read-write handle to writable databases and
+ * read-only to read-only databases. Child opens read-only.
+ * 4. Parent has read-write handle to writable databases and
+ * read-only to read-only databases. Child opens read-write.
+ */
+ /*
+ * Open a connection handle to all databases.
+ */
+ fprintf(stderr, " *** Expect several error messages from WT ***\n");
+ /*
+ * Scenario 1.
+ */
+ if ((ret = wiredtiger_open(home, NULL, ENV_CONFIG_RD, &conn)) != 0)
+ testutil_die(ret, "wiredtiger_open original home");
+ if ((ret = wiredtiger_open(home_wr, NULL, ENV_CONFIG_RD, &conn2)) != 0)
+ testutil_die(ret, "wiredtiger_open write nolock");
+ if ((ret = wiredtiger_open(home_rd, NULL, ENV_CONFIG_RD, &conn3)) != 0)
+ testutil_die(ret, "wiredtiger_open readonly");
+ if ((ret = wiredtiger_open(home_rd2, NULL, ENV_CONFIG_RD, &conn4)) != 0)
+ testutil_die(ret, "wiredtiger_open readonly nolock");
+
+ /*
+ * Create a child to also open a connection handle to the databases.
+ * We cannot use fork here because using fork the child inherits the
+ * same memory image. Therefore the WT process structure is set in
+ * the child even though it should not be. So use 'system' to spawn
+ * an entirely new process.
+ */
+ (void)snprintf(
+ cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir);
+ if ((status = system(cmd)) < 0)
+ testutil_die(status, "system");
+ /*
+ * The child will exit with success if its test passes.
+ */
+ if (WEXITSTATUS(status) != 0)
+ testutil_die(WEXITSTATUS(status), "system");
+
+ /*
+ * Scenario 2. Run child with writable config.
+ */
+ (void)snprintf(
+ cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir);
+ if ((status = system(cmd)) < 0)
+ testutil_die(status, "system");
+
+ if (WEXITSTATUS(status) != 0)
+ testutil_die(WEXITSTATUS(status), "system");
+
+ /*
+ * Reopen the two writable directories and rerun the child.
+ */
+ if ((ret = conn->close(conn, NULL)) != 0)
+ testutil_die(ret, "WT_CONNECTION:close");
+ if ((ret = conn2->close(conn2, NULL)) != 0)
+ testutil_die(ret, "WT_CONNECTION:close");
+ if ((ret = wiredtiger_open(home, NULL, ENV_CONFIG_RD, &conn)) != 0)
+ testutil_die(ret, "wiredtiger_open original home");
+ if ((ret = wiredtiger_open(home_wr, NULL, ENV_CONFIG_RD, &conn2)) != 0)
+ testutil_die(ret, "wiredtiger_open write nolock");
+ /*
+ * Scenario 3. Child read-only.
+ */
+ (void)snprintf(
+ cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir);
+ if ((status = system(cmd)) < 0)
+ testutil_die(status, "system");
+ if (WEXITSTATUS(status) != 0)
+ testutil_die(WEXITSTATUS(status), "system");
+
+ /*
+ * Scenario 4. Run child with writable config.
+ */
+ (void)snprintf(
+ cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir);
+ if ((status = system(cmd)) < 0)
+ testutil_die(status, "system");
+ if (WEXITSTATUS(status) != 0)
+ testutil_die(WEXITSTATUS(status), "system");
+
+ /*
+ * Clean-up.
+ */
+ if ((ret = conn->close(conn, NULL)) != 0)
+ testutil_die(ret, "WT_CONNECTION:close");
+ if ((ret = conn2->close(conn2, NULL)) != 0)
+ testutil_die(ret, "WT_CONNECTION:close");
+ if ((ret = conn3->close(conn3, NULL)) != 0)
+ testutil_die(ret, "WT_CONNECTION:close");
+ if ((ret = conn4->close(conn4, NULL)) != 0)
+ testutil_die(ret, "WT_CONNECTION:close");
+ /*
+ * We need to chmod the read-only databases back so that they can
+ * be removed by scripts.
+ */
+ (void)snprintf(cmd, sizeof(cmd), "chmod 0777 %s %s", home_rd, home_rd2);
+ (void)system(cmd);
+ (void)snprintf(cmd, sizeof(cmd), "chmod -R 0666 %s/* %s/*",
+ home_rd, home_rd2);
+ (void)system(cmd);
+ printf(" *** Readonly test successful ***\n");
+ return (EXIT_SUCCESS);
+}
diff --git a/test/readonly/smoke.sh b/test/readonly/smoke.sh
new file mode 100755
index 00000000000..740deb5743a
--- /dev/null
+++ b/test/readonly/smoke.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+trap 'chmod -R u+w WT_*; exit 0' 0 1 2 3 13 15
+
+set -e
+
+# Smoke-test format as part of running "make check".
+$TEST_WRAPPER ./t
diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c
index ddcafbc80fd..f9c3ed28814 100644
--- a/test/recovery/random-abort.c
+++ b/test/recovery/random-abort.c
@@ -42,7 +42,7 @@
static char home[512]; /* Program working dir */
static const char *progname; /* Program name */
-static const char *uri = "table:main";
+static const char * const uri = "table:main";
#define RECORDS_FILE "records"
@@ -88,7 +88,8 @@ fill_db(void)
/*
* Run in the home directory so that the records file is in there too.
*/
- chdir(home);
+ if (chdir(home) != 0)
+ testutil_die(errno, "chdir: %s", home);
if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0)
testutil_die(ret, "wiredtiger_open");
if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
@@ -109,7 +110,7 @@ fill_db(void)
/*
* Set to no buffering.
*/
- setvbuf(fp, NULL, _IONBF, 0);
+ (void)setvbuf(fp, NULL, _IONBF, 0);
/*
* Write data into the table until we are killed by the parent.
@@ -135,6 +136,8 @@ fill_db(void)
extern int __wt_optind;
extern char *__wt_optarg;
+void (*custom_die)(void) = NULL;
+
int
main(int argc, char *argv[])
{
@@ -201,13 +204,15 @@ main(int argc, char *argv[])
printf("Kill child\n");
if (kill(pid, SIGKILL) != 0)
testutil_die(errno, "kill");
- waitpid(pid, &status, 0);
+ if (waitpid(pid, &status, 0) == -1)
+ testutil_die(errno, "waitpid");
/*
* !!! If we wanted to take a copy of the directory before recovery,
* this is the place to do it.
*/
- chdir(home);
+ if (chdir(home) != 0)
+ testutil_die(errno, "chdir: %s", home);
printf("Open database, run recovery and verify content\n");
if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn)) != 0)
testutil_die(ret, "wiredtiger_open");
@@ -239,13 +244,15 @@ main(int argc, char *argv[])
++absent;
}
}
- fclose(fp);
+ if (fclose(fp) != 0)
+ testutil_die(errno, "fclose");
if ((ret = conn->close(conn, NULL)) != 0)
testutil_die(ret, "WT_CONNECTION:close");
if (absent) {
- printf("%u record(s) absent from %u\n", absent, count);
+ printf("%" PRIu32 " record(s) absent from %" PRIu32 "\n",
+ absent, count);
return (EXIT_FAILURE);
}
- printf("%u records verified\n", count);
+ printf("%" PRIu32 " records verified\n", count);
return (EXIT_SUCCESS);
}
diff --git a/test/recovery/truncated-log.c b/test/recovery/truncated-log.c
index 4add7a61f66..67fdb932c27 100644
--- a/test/recovery/truncated-log.c
+++ b/test/recovery/truncated-log.c
@@ -45,7 +45,7 @@
static char home[512]; /* Program working dir */
static const char *progname; /* Program name */
-static const char *uri = "table:main";
+static const char * const uri = "table:main";
#define RECORDS_FILE "records"
@@ -54,7 +54,6 @@ static const char *uri = "table:main";
"transaction_sync=(enabled,method=none)"
#define ENV_CONFIG_REC "log=(recover=on)"
#define LOG_FILE_1 "WiredTigerLog.0000000001"
-#define MAX_VAL 4096
#define K_SIZE 16
#define V_SIZE 256
@@ -86,7 +85,8 @@ fill_db(void)
/*
* Run in the home directory so that the records file is in there too.
*/
- chdir(home);
+ if (chdir(home) != 0)
+ testutil_die(errno, "chdir: %s", home);
if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0)
testutil_die(ret, "wiredtiger_open");
if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
@@ -107,7 +107,7 @@ fill_db(void)
/*
* Set to no buffering.
*/
- setvbuf(fp, NULL, _IONBF, 0);
+ (void)setvbuf(fp, NULL, _IONBF, 0);
save_lsn.l.file = 0;
/*
@@ -156,18 +156,23 @@ fill_db(void)
"%" PRIu32 " %" PRIu32 "\n",
save_lsn.l.offset, i - 1) == -1)
testutil_die(errno, "fprintf");
- fclose(fp);
- abort();
+ break;
}
}
first = false;
}
}
+ if (fclose(fp) != 0)
+ testutil_die(errno, "fclose");
+ abort();
+ /* NOTREACHED */
}
extern int __wt_optind;
extern char *__wt_optarg;
+void (*custom_die)(void) = NULL;
+
int
main(int argc, char *argv[])
{
@@ -218,26 +223,32 @@ main(int argc, char *argv[])
/* parent */
/* Wait for child to kill itself. */
- waitpid(pid, &status, 0);
+ if (waitpid(pid, &status, 0) == -1)
+ testutil_die(errno, "waitpid");
/*
* !!! If we wanted to take a copy of the directory before recovery,
* this is the place to do it.
*/
- chdir(home);
+ if (chdir(home) != 0)
+ testutil_die(errno, "chdir: %s", home);
+
printf("Open database, run recovery and verify content\n");
if ((fp = fopen(RECORDS_FILE, "r")) == NULL)
testutil_die(errno, "fopen");
ret = fscanf(fp, "%" SCNu64 " %" SCNu32 "\n", &offset, &max_key);
- fclose(fp);
if (ret != 2)
testutil_die(errno, "fscanf");
+ if (fclose(fp) != 0)
+ testutil_die(errno, "fclose");
/*
* The offset is the beginning of the last record. Truncate to
* the middle of that last record (i.e. ahead of that offset).
*/
+ if (offset > UINT64_MAX - V_SIZE)
+ testutil_die(ERANGE, "offset");
new_offset = offset + V_SIZE;
- printf("Parent: Truncate to %u\n", (uint32_t)new_offset);
+ printf("Parent: Truncate to %" PRIu64 "\n", new_offset);
if ((ret = truncate(LOG_FILE_1, (wt_off_t)new_offset)) != 0)
testutil_die(errno, "truncate");
@@ -260,9 +271,10 @@ main(int argc, char *argv[])
if ((ret = conn->close(conn, NULL)) != 0)
testutil_die(ret, "WT_CONNECTION:close");
if (count > max_key) {
- printf("expected %u records found %u\n", max_key, count);
+ printf("expected %" PRIu32 " records found %" PRIu32 "\n",
+ max_key, count);
return (EXIT_FAILURE);
}
- printf("%u records verified\n", count);
+ printf("%" PRIu32 " records verified\n", count);
return (EXIT_SUCCESS);
}
diff --git a/test/salvage/salvage.c b/test/salvage/salvage.c
index c2ad6224b11..a1517d70787 100644
--- a/test/salvage/salvage.c
+++ b/test/salvage/salvage.c
@@ -64,6 +64,8 @@ static int verbose; /* -v flag */
extern int __wt_optind;
extern char *__wt_optarg;
+void (*custom_die)(void) = NULL;
+
int
main(int argc, char *argv[])
{
diff --git a/test/suite/helper.py b/test/suite/helper.py
index 3c460e23d08..f85d708880f 100644
--- a/test/suite/helper.py
+++ b/test/suite/helper.py
@@ -107,7 +107,10 @@ def copy_wiredtiger_home(olddir, newdir, aligned=True):
for fname in os.listdir(olddir):
fullname = os.path.join(olddir, fname)
# Skip lock file, on Windows it is locked.
- if os.path.isfile(fullname) and "WiredTiger.lock" not in fullname:
+ # Skip temporary log files.
+ if os.path.isfile(fullname) and "WiredTiger.lock" not in fullname and \
+ "WiredTigerTmplog" not in fullname and \
+ "WiredTigerPreplog" not in fullname:
# Use a dd command that does not align on a block boundary.
if aligned:
shutil.copy(fullname, newdir)
@@ -196,31 +199,36 @@ def complex_populate_index_count():
# config: prefix of the session.create configuration string
# rows: entries to insert
def complex_populate(self, uri, config, rows):
- complex_populate_type(self, uri, config, rows, '')
+ complex_populate_type(self, uri, config, '', rows, '')
+def complex_populate_cgconfig(self, uri, config, rows):
+ complex_populate_type(self, uri, config, config, rows, '')
def complex_populate_lsm(self, uri, config, rows):
- complex_populate_type(self, uri, config, rows, 'type=lsm')
-def complex_populate_type(self, uri, config, rows, type):
+ complex_populate_type(self, uri, config, '', rows, 'type=lsm')
+def complex_populate_cgconfig_lsm(self, uri, config, rows):
+ complex_populate_type(self, uri, config, config, rows, 'type=lsm')
+def complex_populate_type(self, uri, config, cgconfig, rows, type):
self.session.create(uri,
config + ',value_format=SiSS,' +
'columns=(record,column2,column3,column4,column5),' +
'colgroups=(cgroup1,cgroup2,cgroup3,cgroup4,cgroup5,cgroup6)')
cgname = 'colgroup:' + uri.split(":")[1]
- self.session.create(cgname + ':cgroup1', 'columns=(column2)' + ',' + type)
- self.session.create(cgname + ':cgroup2', 'columns=(column3)' + ',' + type)
- self.session.create(cgname + ':cgroup3', 'columns=(column4)' + ',' + type)
+ cgcfg = ',' + cgconfig + ',' + type
+ self.session.create(cgname + ':cgroup1', 'columns=(column2)' + ',' + cgcfg)
+ self.session.create(cgname + ':cgroup2', 'columns=(column3)' + ',' + cgcfg)
+ self.session.create(cgname + ':cgroup3', 'columns=(column4)' + ',' + cgcfg)
self.session.create(
- cgname + ':cgroup4', 'columns=(column2,column3)' + ',' + type)
+ cgname + ':cgroup4', 'columns=(column2,column3)' + ',' + cgcfg)
self.session.create(
- cgname + ':cgroup5', 'columns=(column3,column4)' + ',' + type)
+ cgname + ':cgroup5', 'columns=(column3,column4)' + ',' + cgcfg)
self.session.create(
- cgname + ':cgroup6', 'columns=(column2,column4,column5)' + ',' + type)
+ cgname + ':cgroup6', 'columns=(column2,column4,column5)' + ',' + cgcfg)
indxname = 'index:' + uri.split(":")[1]
- self.session.create(indxname + ':indx1', 'columns=(column2)' + ',' + type)
- self.session.create(indxname + ':indx2', 'columns=(column3)' + ',' + type)
- self.session.create(indxname + ':indx3', 'columns=(column4)' + ',' + type)
+ self.session.create(indxname + ':indx1', 'columns=(column2)' + ',' + cgcfg)
+ self.session.create(indxname + ':indx2', 'columns=(column3)' + ',' + cgcfg)
+ self.session.create(indxname + ':indx3', 'columns=(column4)' + ',' + cgcfg)
self.session.create(
- indxname + ':indx4', 'columns=(column2,column4)' + ',' + type)
+ indxname + ':indx4', 'columns=(column2,column4)' + ',' + cgcfg)
cursor = self.session.open_cursor(uri, None)
for i in range(1, rows + 1):
cursor[key_populate(cursor, i)] = \
@@ -228,9 +236,9 @@ def complex_populate_type(self, uri, config, rows, type):
cursor.close()
# add some indices after populating
self.session.create(
- indxname + ':indx5', 'columns=(column3,column5)' + ',' + type)
+ indxname + ':indx5', 'columns=(column3,column5)' + ',' + cgcfg)
self.session.create(
- indxname + ':indx6', 'columns=(column3,column5,column4)' + ',' + type)
+ indxname + ':indx6', 'columns=(column3,column5,column4)' + ',' + cgcfg)
def complex_populate_colgroup_name(self, uri, i):
return 'colgroup:' + uri.split(":")[1] + ':cgroup' + str(i + 1)
diff --git a/test/suite/test_backup05.py b/test/suite/test_backup05.py
index 8b176d0f7d7..991a9f71b19 100644
--- a/test/suite/test_backup05.py
+++ b/test/suite/test_backup05.py
@@ -44,14 +44,6 @@ class test_backup05(wttest.WiredTigerTestCase, suite_subprocess):
create_params = 'key_format=i,value_format=i'
freq = 5
- def copy_windows(self, olddir, newdir):
- os.mkdir(newdir)
- for fname in os.listdir(olddir):
- fullname = os.path.join(olddir, fname)
- # Skip lock file on Windows since it is locked
- if os.path.isfile(fullname) and "WiredTiger.lock" not in fullname:
- shutil.copy(fullname, newdir)
-
def check_manual_backup(self, i, olddir, newdir):
''' Simulate a manual backup from olddir and restart in newdir. '''
self.session.checkpoint()
@@ -71,7 +63,7 @@ class test_backup05(wttest.WiredTigerTestCase, suite_subprocess):
session.verify(self.uri)
conn.close()
- def test_backup(self):
+ def backup(self):
'''Check manual fsyncLock backup strategy'''
# Here's the strategy:
@@ -95,5 +87,9 @@ class test_backup05(wttest.WiredTigerTestCase, suite_subprocess):
else:
self.session.verify(self.uri)
+ def test_backup(self):
+ with self.expectedStdoutPattern('Recreating metadata'):
+ self.backup()
+
if __name__ == '__main__':
wttest.run()
diff --git a/test/suite/test_bug008.py b/test/suite/test_bug008.py
index 8f0526d9cef..0243887e258 100644
--- a/test/suite/test_bug008.py
+++ b/test/suite/test_bug008.py
@@ -33,65 +33,208 @@ import wiredtiger, wttest
from helper import simple_populate, key_populate, value_populate
from wtscenario import check_scenarios
-# Tests for invisible updates.
+# Test search/search-near operations, including invisible values and keys
+# past the end of the table.
class test_bug008(wttest.WiredTigerTestCase):
+ uri = 'file:test_bug008' # This is a btree layer test.
scenarios = check_scenarios([
- ('fix', dict(fmt='key_format=r,value_format=8t', empty=1)),
- ('row', dict(fmt='key_format=S', empty=0)),
- ('var', dict(fmt='key_format=r', empty=0))
+ ('fix', dict(fmt='key_format=r,value_format=8t', empty=1, colvar=0)),
+ ('row', dict(fmt='key_format=S', empty=0, colvar=0)),
+ ('var', dict(fmt='key_format=r', empty=0, colvar=1))
])
+ # Verify cursor search and search-near operations in an empty table.
+ def test_search_empty(self):
+ # Create the object and open a cursor.
+ self.session.create(self.uri, self.fmt)
+ cursor = self.session.open_cursor(self.uri, None)
+
+ # Search for a record past the end of the table, which should fail.
+ cursor.set_key(key_populate(cursor, 100))
+ self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+
+ # Search-near for a record past the end of the table, which should fail.
+ cursor.set_key(key_populate(cursor, 100))
+ self.assertEqual(cursor.search_near(), wiredtiger.WT_NOTFOUND)
+
+ # Verify cursor search and search-near operations at and past the end of
+ # a file, with a set of on-page visible records.
+ def test_search_eot(self):
+ # Populate the tree and reopen the connection, forcing it to disk
+ # and moving the records to an on-page format.
+ simple_populate(self, self.uri, self.fmt, 100)
+ self.reopen_conn()
+
+ # Open a cursor.
+ cursor = self.session.open_cursor(self.uri, None)
+
+ # Search for a record at the end of the table, which should succeed.
+ cursor.set_key(key_populate(cursor, 100))
+ self.assertEqual(cursor.search(), 0)
+ self.assertEqual(cursor.get_key(), key_populate(cursor, 100))
+ self.assertEqual(cursor.get_value(), value_populate(cursor, 100))
+
+ # Search-near for a record at the end of the table, which should
+ # succeed, returning the last record.
+ cursor.set_key(key_populate(cursor, 100))
+ self.assertEqual(cursor.search_near(), 0)
+ self.assertEqual(cursor.get_key(), key_populate(cursor, 100))
+ self.assertEqual(cursor.get_value(), value_populate(cursor, 100))
+
+ # Search for a record past the end of the table, which should fail.
+ cursor.set_key(key_populate(cursor, 200))
+ self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+
+ # Search-near for a record past the end of the table, which should
+ # succeed, returning the last record.
+ cursor.set_key(key_populate(cursor, 200))
+ self.assertEqual(cursor.search_near(), -1)
+ self.assertEqual(cursor.get_key(), key_populate(cursor, 100))
+ self.assertEqual(cursor.get_value(), value_populate(cursor, 100))
+
+ # Verify cursor search-near operations before and after a set of
+ # column-store duplicates.
+ def test_search_duplicate(self):
+ if self.colvar == 0:
+ return
+
+ # Populate the tree.
+ simple_populate(self, self.uri, self.fmt, 105)
+
+ # Set up deleted records before and after a set of duplicate records,
+ # and make sure search/search-near returns the correct record.
+ cursor = self.session.open_cursor(self.uri, None)
+ for i in range(20, 100):
+ cursor[key_populate(cursor, i)] = '=== IDENTICAL VALUE ==='
+ for i in range(15, 25):
+ cursor.set_key(key_populate(cursor, i))
+ self.assertEqual(cursor.remove(), 0)
+ for i in range(95, 106):
+ cursor.set_key(key_populate(cursor, i))
+ self.assertEqual(cursor.remove(), 0)
+ cursor.close()
+
+ # Reopen the connection, forcing it to disk and moving the records to
+ # an on-page format.
+ self.reopen_conn()
+
+ # Open a cursor.
+ cursor = self.session.open_cursor(self.uri, None)
+
+ # Search-near for a record in the deleted set before the duplicate set,
+ # which should succeed, returning the first record in the duplicate set.
+ cursor.set_key(key_populate(cursor, 18))
+ self.assertEqual(cursor.search_near(), 1)
+ self.assertEqual(cursor.get_key(), key_populate(cursor, 25))
+
+ # Search-near for a record in the deleted set after the duplicate set,
+ # which should succeed, returning the last record in the duplicate set.
+ cursor.set_key(key_populate(cursor, 98))
+ self.assertEqual(cursor.search_near(), -1)
+ self.assertEqual(cursor.get_key(), key_populate(cursor, 94))
+
# Verify cursor search and search-near operations on a file with a set of
# on-page visible records, and a set of insert-list invisible records.
def test_search_invisible_one(self):
- uri = 'file:test_bug008' # This is a btree layer test.
+ # Populate the tree.
+ simple_populate(self, self.uri, self.fmt, 100)
- # Populate the tree and reopen the connection, forcing it to disk
- # and moving the records to an on-page format.
- simple_populate(self, uri, self.fmt, 100)
+ # Delete a range of records.
+ for i in range(5, 10):
+ cursor = self.session.open_cursor(self.uri, None)
+ cursor.set_key(key_populate(cursor, i))
+ self.assertEqual(cursor.remove(), 0)
+
+ # Reopen the connection, forcing it to disk and moving the records to
+ # an on-page format.
self.reopen_conn()
- # Begin a transaction, and add some additional records.
+ # Add updates to the existing records (in both the deleted an undeleted
+ # range), as well as some new records after the end. Put the updates in
+ # a separate transaction so they're invisible to another cursor.
self.session.begin_transaction()
- cursor = self.session.open_cursor(uri, None)
+ cursor = self.session.open_cursor(self.uri, None)
+ for i in range(5, 10):
+ cursor[key_populate(cursor, i)] = value_populate(cursor, i + 1000)
+ for i in range(30, 40):
+ cursor[key_populate(cursor, i)] = value_populate(cursor, i + 1000)
for i in range(100, 140):
- cursor[key_populate(cursor, i)] = value_populate(cursor, i)
+ cursor[key_populate(cursor, i)] = value_populate(cursor, i + 1000)
# Open a separate session and cursor.
s = self.conn.open_session()
- cursor = s.open_cursor(uri, None)
+ cursor = s.open_cursor(self.uri, None)
- # Search for an invisible record.
- cursor.set_key(key_populate(cursor, 130))
- if self.empty:
- # Invisible updates to fixed-length column-store objects are
- # invisible to the reader, but the fact that they exist past
- # the end of the initial records causes the instantiation of
- # empty records: confirm successful return of an empty row.
- cursor.search()
- self.assertEqual(cursor.get_key(), 130)
- self.assertEqual(cursor.get_value(), 0)
- else:
- # Otherwise, we should not find any matching records.
- self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+ # Search for an existing record in the deleted range, should not find
+ # it.
+ for i in range(5, 10):
+ cursor.set_key(key_populate(cursor, i))
+ if self.empty:
+ # Fixed-length column-store rows always exist.
+ self.assertEqual(cursor.search(), 0)
+ self.assertEqual(cursor.get_key(), i)
+ self.assertEqual(cursor.get_value(), 0)
+ else:
+ self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
- # Search-near for an invisible record, which should succeed, returning
- # the last visible record.
- cursor.set_key(key_populate(cursor, 130))
- cursor.search_near()
- if self.empty:
- # Invisible updates to fixed-length column-store objects are
- # invisible to the reader, but the fact that they exist past
- # the end of the initial records causes the instantiation of
- # empty records: confirm successful return of an empty row.
- cursor.search()
- self.assertEqual(cursor.get_key(), 130)
- self.assertEqual(cursor.get_value(), 0)
- else:
- # Otherwise, we should find the closest record for which we can see
- # the value.
- self.assertEqual(cursor.get_key(), key_populate(cursor, 100))
- self.assertEqual(cursor.get_value(), value_populate(cursor, 100))
+ # Search for an existing record in the updated range, should see the
+ # original value.
+ for i in range(30, 40):
+ cursor.set_key(key_populate(cursor, i))
+ self.assertEqual(cursor.search(), 0)
+ self.assertEqual(cursor.get_key(), key_populate(cursor, i))
+
+ # Search for a added record, should not find it.
+ for i in range(120, 130):
+ cursor.set_key(key_populate(cursor, i))
+ if self.empty:
+ # Invisible updates to fixed-length column-store objects are
+ # invisible to the reader, but the fact that they exist past
+ # the end of the initial records causes the instantiation of
+ # empty records: confirm successful return of an empty row.
+ self.assertEqual(cursor.search(), 0)
+ self.assertEqual(cursor.get_key(), i)
+ self.assertEqual(cursor.get_value(), 0)
+ else:
+ # Otherwise, we should not find any matching records.
+ self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+
+ # Search-near for an existing record in the deleted range, should find
+ # the next largest record. (This depends on the implementation behavior
+ # which currently includes a bias to prefix search.)
+ for i in range(5, 10):
+ cursor.set_key(key_populate(cursor, i))
+ if self.empty:
+ # Fixed-length column-store rows always exist.
+ self.assertEqual(cursor.search_near(), 0)
+ self.assertEqual(cursor.get_key(), i)
+ self.assertEqual(cursor.get_value(), 0)
+ else:
+ self.assertEqual(cursor.search_near(), 1)
+ self.assertEqual(cursor.get_key(), key_populate(cursor, 10))
+
+ # Search-near for an existing record in the updated range, should see
+ # the original value.
+ for i in range(30, 40):
+ cursor.set_key(key_populate(cursor, i))
+ self.assertEqual(cursor.search_near(), 0)
+ self.assertEqual(cursor.get_key(), key_populate(cursor, i))
+
+ # Search-near for an added record, should find the previous largest
+ # record.
+ for i in range(120, 130):
+ cursor.set_key(key_populate(cursor, i))
+ if self.empty:
+ # Invisible updates to fixed-length column-store objects are
+ # invisible to the reader, but the fact that they exist past
+ # the end of the initial records causes the instantiation of
+ # empty records: confirm successful return of an empty row.
+ self.assertEqual(cursor.search_near(), 0)
+ self.assertEqual(cursor.get_key(), i)
+ self.assertEqual(cursor.get_value(), 0)
+ else:
+ self.assertEqual(cursor.search_near(), -1)
+ self.assertEqual(cursor.get_key(), key_populate(cursor, 100))
# Verify cursor search and search-near operations on a file with a set of
# on-page visible records, a set of insert-list visible records, and a set
@@ -101,28 +244,26 @@ class test_bug008(wttest.WiredTigerTestCase):
# fallback happens, whether the correct position is in the page slots or
# the insert list.)
def test_search_invisible_two(self):
- uri = 'file:test_bug008' # This is a btree layer test.
-
# Populate the tree and reopen the connection, forcing it to disk
# and moving the records to an on-page format.
- simple_populate(self, uri, self.fmt, 100)
+ simple_populate(self, self.uri, self.fmt, 100)
self.reopen_conn()
# Add some additional visible records.
- cursor = self.session.open_cursor(uri, None)
+ cursor = self.session.open_cursor(self.uri, None)
for i in range(100, 120):
cursor[key_populate(cursor, i)] = value_populate(cursor, i)
cursor.close()
# Begin a transaction, and add some additional records.
self.session.begin_transaction()
- cursor = self.session.open_cursor(uri, None)
+ cursor = self.session.open_cursor(self.uri, None)
for i in range(120, 140):
cursor[key_populate(cursor, i)] = value_populate(cursor, i)
# Open a separate session and cursor.
s = self.conn.open_session()
- cursor = s.open_cursor(uri, None)
+ cursor = s.open_cursor(self.uri, None)
# Search for an invisible record.
cursor.set_key(key_populate(cursor, 130))
diff --git a/test/suite/test_bulk02.py b/test/suite/test_bulk02.py
index eeca6a56967..fe8118209f2 100644
--- a/test/suite/test_bulk02.py
+++ b/test/suite/test_bulk02.py
@@ -49,8 +49,7 @@ class test_bulkload_checkpoint(wttest.WiredTigerTestCase, suite_subprocess):
scenarios = number_scenarios(multiply_scenarios('.', types, ckpt_type))
- # Bulk-load handles return EBUSY to the checkpoint code, causing the
- # checkpoint call to find a handle anyway, and create fake checkpoint.
+ # Bulk-load handles are skipped by checkpoints.
# Named and unnamed checkpoint versions.
def test_bulkload_checkpoint(self):
# Open a bulk cursor and insert a few records.
@@ -72,11 +71,8 @@ class test_bulkload_checkpoint(wttest.WiredTigerTestCase, suite_subprocess):
# In the case of named checkpoints, verify they're still there,
# reflecting an empty file.
if self.ckpt_type == 'named':
- cursor = self.session.open_cursor(
- self.uri, None, 'checkpoint=myckpt')
- self.assertEquals(cursor.next(), wiredtiger.WT_NOTFOUND)
- cursor.close()
-
+ self.assertRaises(wiredtiger.WiredTigerError,
+ lambda: self.session.open_cursor(self.uri, None, 'checkpoint=myckpt'))
# test_bulkload_backup
# Test bulk-load with hot-backup.
diff --git a/test/suite/test_checkpoint01.py b/test/suite/test_checkpoint01.py
index 7d4503b84b7..6e1ad7814ed 100644
--- a/test/suite/test_checkpoint01.py
+++ b/test/suite/test_checkpoint01.py
@@ -185,7 +185,7 @@ class test_checkpoint_cursor(wttest.WiredTigerTestCase):
# Check dropping all checkpoints fails.
msg = '/checkpoints cannot be dropped/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.checkpoint("name=checkpoint-2"), msg)
+ lambda: self.session.checkpoint("force,name=checkpoint-2"), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.checkpoint("drop=(checkpoint-2)"), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
@@ -265,9 +265,13 @@ class test_checkpoint_cursor_update(wttest.WiredTigerTestCase):
cursor = self.session.open_cursor(self.uri, None, "checkpoint=ckpt")
cursor.set_key(key_populate(cursor, 10))
cursor.set_value("XXX")
- self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.insert())
- self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.remove())
- self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.update())
+ msg = "/Unsupported cursor/"
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: cursor.insert(), msg)
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: cursor.remove(), msg)
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: cursor.update(), msg)
cursor.close()
diff --git a/test/suite/test_collator.py b/test/suite/test_collator.py
new file mode 100644
index 00000000000..34b5c20247f
--- /dev/null
+++ b/test/suite/test_collator.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import os
+import wiredtiger, wttest, run
+from wtscenario import check_scenarios, number_scenarios
+
+# test_collator.py
+# Test indices using a custom extractor and collator.
+class test_collator(wttest.WiredTigerTestCase):
+ """
+ Test indices with a custom extractor to create an index,
+ with our own collator.
+ Our set of rows looks like a multiplication table:
+ row '0': '0,0,0,0'
+ row '1': '0,1,2,3'
+ row '2': '0,2,4,6'
+ with the twist that entries are mod 100. So, looking further:
+ row '40': '0,40,80,20'
+
+ Each column is placed into its own index. Our collator reverses
+ the values.
+ """
+ nentries = 100
+ nindices = 4
+
+ # Return the wiredtiger_open extension argument for a shared library.
+ def extensionArg(self, exts):
+ extfiles = []
+ for ext in exts:
+ (dirname, name, libname) = ext
+ if name != None and name != 'none':
+ testdir = os.path.dirname(__file__)
+ extdir = os.path.join(run.wt_builddir, 'ext', dirname)
+ extfile = os.path.join(
+ extdir, name, '.libs', 'libwiredtiger_' + libname + '.so')
+ if not os.path.exists(extfile):
+ self.skipTest('extension "' + extfile + '" not built')
+ if not extfile in extfiles:
+ extfiles.append(extfile)
+ if len(extfiles) == 0:
+ return ''
+ else:
+ return ',extensions=["' + '","'.join(extfiles) + '"]'
+
+ # Override WiredTigerTestCase, we have extensions.
+ def setUpConnectionOpen(self, dir):
+ extarg = self.extensionArg([('extractors', 'csv', 'csv_extractor'),
+ ('collators', 'revint', 'revint_collator')])
+ connarg = 'create,error_prefix="{0}: ",{1}'.format(
+ self.shortid(), extarg)
+ conn = self.wiredtiger_open(dir, connarg)
+ self.pr(`conn`)
+ return conn
+
+ def create_indices(self):
+ # Create self.nindices index files, each with a column from the CSV
+ for i in range(0, self.nindices):
+ si = str(i)
+ self.session.create('index:collator:x' + si,
+ 'key_format=i,columns=(key),' +
+ 'collator=revint,' +
+ 'extractor=csv,app_metadata={"format" : "i",' +
+ '"field" : "' + si + '"}')
+
+ def drop_indices(self):
+ for i in range(0, self.nindices):
+ self.session.drop("index:collator:x" + str(i))
+
+ def csv(self, s, i):
+ return s.split(',')[i]
+
+ def expected_main_value(self, i):
+ return ','.join([str((i*j)%100) for j in range(0, self.nindices)])
+
+ # We split the population into two phases
+ # (in anticipation of future tests that create
+ # indices between the two population steps).
+ def populate(self):
+ cursor = self.session.open_cursor('table:collator', None, None)
+ for i in range(0, self.nentries):
+ cursor[i] = self.expected_main_value(i)
+ cursor.close()
+
+ def check_entries(self):
+ cursor = self.session.open_cursor('table:collator', None, None)
+ icursor = []
+ for i in range(0, self.nindices):
+ icursor.append(self.session.open_cursor('index:collator:x' + str(i),
+ None, None))
+ i = 0
+ for primkey, value in cursor:
+ # Check main table
+ expect = self.expected_main_value(i)
+ self.assertEqual(i, primkey)
+ self.assertEqual(value, expect)
+ for idx in range(0, self.nindices):
+ c = icursor[idx]
+ indexkey = (i*idx)%100
+ c.set_key(indexkey)
+ self.assertEqual(c.search(), 0)
+ value = c.get_value()
+ key = c.get_key()
+ while value != expect and key == indexkey and \
+ self.csv(value, idx) == self.csv(expect, idx):
+ self.assertEqual(0, c.next())
+ value = c.get_value()
+ key = c.get_key()
+ self.assertEqual(value, expect)
+ i += 1
+ self.assertEqual(self.nentries, i)
+ for i in range(0, self.nindices):
+ c = icursor[i]
+ c.reset()
+ expected = set(range(0, self.nentries))
+ for key, val in c:
+ primkey = int(val.split(',')[1])
+ expected.remove(primkey)
+ self.assertEquals(0, len(expected))
+ c.close()
+
+ def test_index(self):
+ self.session.create("table:collator", "key_format=i,value_format=S,"
+ "columns=(primarykey,value)")
+ self.create_indices()
+ self.populate()
+ self.check_entries()
+
+ # Drop and recreate all indices, everything should be there.
+ self.drop_indices()
+ self.create_indices()
+ self.check_entries()
+
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_compact02.py b/test/suite/test_compact02.py
index 14781b0f050..7ad05cd2536 100644
--- a/test/suite/test_compact02.py
+++ b/test/suite/test_compact02.py
@@ -50,11 +50,12 @@ class test_compact02(wttest.WiredTigerTestCase):
# being stored: compaction doesn't work on tables with many overflow items
# because we don't rewrite them. Experimentally, 8KB is as small as the test
# can go. Additionally, we can't set the maximum page size too large because
- # there won't be enough pages to rewrite. Experimentally, 32KB (the default)
- # is as large as the test can go.
+ # there won't be enough pages to rewrite. Experimentally, 128KB works.
fileConfig = [
('default', dict(fileConfig='')),
('8KB', dict(fileConfig='leaf_page_max=8kb')),
+ ('64KB', dict(fileConfig='leaf_page_max=64KB')),
+ ('128KB', dict(fileConfig='leaf_page_max=128KB')),
]
scenarios = \
number_scenarios(multiply_scenarios('.', types, cacheSize, fileConfig))
diff --git a/test/suite/test_cursor06.py b/test/suite/test_cursor06.py
index ff7c1144344..5545c862dd7 100644
--- a/test/suite/test_cursor06.py
+++ b/test/suite/test_cursor06.py
@@ -89,10 +89,11 @@ class test_cursor06(wttest.WiredTigerTestCase):
self.session.drop(uri, "force")
self.populate(uri)
cursor = self.session.open_cursor(uri, None, open_config)
+ msg = '/Unsupported cursor/'
if open_config == "readonly=1":
self.set_kv(cursor)
- self.assertRaises(wiredtiger.WiredTigerError,
- lambda: cursor.update())
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: cursor.update(), msg)
else:
self.set_kv(cursor)
cursor.update()
diff --git a/test/suite/test_cursor_random.py b/test/suite/test_cursor_random.py
index 2cef62b218a..16ce5cae685 100644
--- a/test/suite/test_cursor_random.py
+++ b/test/suite/test_cursor_random.py
@@ -51,15 +51,21 @@ class test_cursor_random(wttest.WiredTigerTestCase):
uri = self.type
self.session.create(uri, 'key_format=S,value_format=S')
cursor = self.session.open_cursor(uri, None, self.config)
- self.assertRaises(
- wiredtiger.WiredTigerError, lambda: cursor.compare(cursor))
- self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.insert())
- self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.prev())
- self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.remove())
- self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.search())
- self.assertRaises(
- wiredtiger.WiredTigerError, lambda: cursor.search_near())
- self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.update())
+ msg = "/Unsupported cursor/"
+ self.assertRaisesWithMessage(
+ wiredtiger.WiredTigerError, lambda: cursor.compare(cursor), msg)
+ self.assertRaisesWithMessage(
+ wiredtiger.WiredTigerError, lambda: cursor.insert(), msg)
+ self.assertRaisesWithMessage(
+ wiredtiger.WiredTigerError, lambda: cursor.prev(), msg)
+ self.assertRaisesWithMessage(
+ wiredtiger.WiredTigerError, lambda: cursor.remove(), msg)
+ self.assertRaisesWithMessage(
+ wiredtiger.WiredTigerError, lambda: cursor.search(), msg)
+ self.assertRaisesWithMessage(
+ wiredtiger.WiredTigerError, lambda: cursor.search_near(), msg)
+ self.assertRaisesWithMessage(
+ wiredtiger.WiredTigerError, lambda: cursor.update(), msg)
self.assertTrue(cursor.next(), wiredtiger.WT_NOTFOUND)
self.assertEquals(cursor.reconfigure(), 0)
@@ -137,7 +143,7 @@ class test_cursor_random_column(wttest.WiredTigerTestCase):
def test_cursor_random_column(self):
self.session.create(self.uri, 'key_format=r,value_format=S')
- msg = '/Operation not supported/'
+ msg = '/next_random .* not supported/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:
self.session.open_cursor(self.uri, None, "next_random=true"), msg)
diff --git a/test/suite/test_drop.py b/test/suite/test_drop.py
index 5663b85d661..52ea7251ab5 100644
--- a/test/suite/test_drop.py
+++ b/test/suite/test_drop.py
@@ -41,12 +41,11 @@ class test_drop(wttest.WiredTigerTestCase):
scenarios = check_scenarios([
('file', dict(uri='file:')),
('table', dict(uri='table:')),
- #Not yet: drop failing with an open cursor needs handle locking
- #('table-lsm', dict(uri='table:', extra_config=',type=lsm')),
+ ('table-lsm', dict(uri='table:', extra_config=',type=lsm')),
])
# Populate an object, remove it and confirm it no longer exists.
- def drop(self, populate, with_cursor, close_session, drop_index):
+ def drop(self, populate, with_cursor, reopen, drop_index):
uri = self.uri + self.name
populate(self, uri, 'key_format=S' + self.extra_config, 10)
@@ -57,7 +56,7 @@ class test_drop(wttest.WiredTigerTestCase):
lambda: self.session.drop(uri, None))
cursor.close()
- if close_session:
+ if reopen:
self.reopen_conn()
if drop_index:
@@ -73,17 +72,17 @@ class test_drop(wttest.WiredTigerTestCase):
# Try all combinations except dropping the index, the simple
# case has no indices.
for with_cursor in [False, True]:
- for close_session in [False, True]:
- self.drop(simple_populate, with_cursor, close_session, False)
+ for reopen in [False, True]:
+ self.drop(simple_populate, with_cursor, reopen, False)
# A complex, multi-file table object.
# Try all test combinations.
if self.uri == "table:":
for with_cursor in [False, True]:
- for close_session in [False, True]:
+ for reopen in [False, True]:
for drop_index in [False, True]:
self.drop(complex_populate, with_cursor,
- close_session, drop_index)
+ reopen, drop_index)
# Test drop of a non-existent object: force succeeds, without force fails.
def test_drop_dne(self):
diff --git a/test/suite/test_drop02.py b/test/suite/test_drop02.py
new file mode 100644
index 00000000000..677ba3866b2
--- /dev/null
+++ b/test/suite/test_drop02.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+from helper import simple_populate
+
+# test_drop02.py
+# Test dropping an LSM tree on first open. There was a bug where this
+# would cause an assertion failure: WT-2501
+class test_drop02(wttest.WiredTigerTestCase):
+ name = 'test_drop02'
+
+ # Populate an object, remove it and confirm it no longer exists.
+ def test_drop(self):
+ uri = 'lsm:' + self.name
+ simple_populate(self, uri, 'key_format=S', 100000)
+ self.reopen_conn()
+
+ self.session.drop(uri, None)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_dump.py b/test/suite/test_dump.py
index c850d1b5d3f..fc1422155e2 100644
--- a/test/suite/test_dump.py
+++ b/test/suite/test_dump.py
@@ -29,8 +29,8 @@
import os
import wiredtiger, wttest
from helper import \
- complex_populate, complex_populate_check_cursor,\
- simple_populate, simple_populate_check_cursor
+ complex_populate, complex_populate_check, \
+ simple_populate, simple_populate_check
from suite_subprocess import suite_subprocess
from wtscenario import multiply_scenarios, number_scenarios
@@ -54,15 +54,24 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess):
('string', dict(keyfmt='S'))
]
types = [
- ('file', dict(type='file:',
+ ('file', dict(uri='file:', config='', lsm=False,
populate=simple_populate,
- populate_check=simple_populate_check_cursor)),
- ('table-simple', dict(type='table:',
+ populate_check=simple_populate_check)),
+ ('lsm', dict(uri='lsm:', config='', lsm=True,
populate=simple_populate,
- populate_check=simple_populate_check_cursor)),
- ('table-complex', dict(type='table:',
+ populate_check=simple_populate_check)),
+ ('table-simple', dict(uri='table:', config='', lsm=False,
+ populate=simple_populate,
+ populate_check=simple_populate_check)),
+ ('table-simple-lsm', dict(uri='table:', config='type=lsm', lsm=True,
+ populate=simple_populate,
+ populate_check=simple_populate_check)),
+ ('table-complex', dict(uri='table:', config='', lsm=False,
+ populate=complex_populate,
+ populate_check=complex_populate_check)),
+ ('table-complex-lsm', dict(uri='table:', config='type=lsm', lsm=True,
populate=complex_populate,
- populate_check=complex_populate_check_cursor))
+ populate_check=complex_populate_check))
]
scenarios = number_scenarios(
multiply_scenarios('.', types, keyfmt, dumpfmt))
@@ -94,9 +103,14 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess):
# Dump, re-load and do a content comparison.
def test_dump(self):
+ # LSM and column-store isn't a valid combination.
+ if self.lsm and self.keyfmt == 'r':
+ return
+
# Create the object.
- uri = self.type + self.name
- self.populate(self, uri, 'key_format=' + self.keyfmt, self.nentries)
+ uri = self.uri + self.name
+ self.populate(self, uri,
+ self.config + ',key_format=' + self.keyfmt, self.nentries)
# Dump the object.
os.mkdir(self.dir)
@@ -108,11 +122,17 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess):
# Re-load the object.
self.runWt(['-h', self.dir, 'load', '-f', 'dump.out'])
- # Check the contents
+ # Check the database contents
+ self.runWt(['list'], outfilename='list.out')
+ self.runWt(['-h', self.dir, 'list'], outfilename='list.out.new')
+ s1 = set(open('list.out').read().split())
+ s2 = set(open('list.out.new').read().split())
+ self.assertEqual(not s1.symmetric_difference(s2), True)
+
+ # Check the object's contents
conn = self.wiredtiger_open(self.dir)
session = conn.open_session()
- cursor = session.open_cursor(uri, None, None)
- self.populate_check(self, cursor, self.nentries)
+ self.populate_check(self, uri, self.nentries)
conn.close()
# Re-load the object again.
@@ -121,8 +141,7 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess):
# Check the contents, they shouldn't have changed.
conn = self.wiredtiger_open(self.dir)
session = conn.open_session()
- cursor = session.open_cursor(uri, None, None)
- self.populate_check(self, cursor, self.nentries)
+ self.populate_check(self, uri, self.nentries)
conn.close()
# Re-load the object again, but confirm -n (no overwrite) fails.
@@ -130,7 +149,7 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess):
'load', '-n', '-f', 'dump.out'], errfilename='errfile.out')
self.check_non_empty_file('errfile.out')
- # If there is are indices, dump one of them and check the output.
+ # If there are indices, dump one of them and check the output.
if self.populate == complex_populate:
indexuri = 'index:' + self.name + ':indx1'
hexopt = ['-x'] if self.hex == 1 else []
diff --git a/test/suite/test_join01.py b/test/suite/test_join01.py
index f03c7c6f06c..4aa2bc6e269 100644
--- a/test/suite/test_join01.py
+++ b/test/suite/test_join01.py
@@ -33,7 +33,6 @@ from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
# Join operations
# Basic tests for join
class test_join01(wttest.WiredTigerTestCase):
- table_name1 = 'test_join01'
nentries = 100
scenarios = [
@@ -75,8 +74,18 @@ class test_join01(wttest.WiredTigerTestCase):
# the join cursor and iterating again.
def stats(self, jc, which):
statcur = self.session.open_cursor('statistics:join', jc, None)
- self.check_stats(statcur, 0, 'join: index:join01:index1: ' +
- 'bloom filter false positives')
+ # pick a stat we always expect to see
+ statdesc = 'bloom filter false positives'
+ expectstats = [
+ 'join: index:join01:index1: ' + statdesc,
+ 'join: index:join01:index2: ' + statdesc ]
+ if self.ref == 'index':
+ expectstats.append('join: index:join01:index0: ' + statdesc)
+ else:
+ expectstats.append('join: table:join01: ' + statdesc)
+ self.check_stats(statcur, expectstats)
+ statcur.reset()
+ self.check_stats(statcur, expectstats)
statcur.close()
def statstr_to_int(self, str):
@@ -87,16 +96,14 @@ class test_join01(wttest.WiredTigerTestCase):
parts = str.rpartition('(')
return int(parts[2].rstrip(')'))
- # string should appear with a minimum value of least "min".
- def check_stats(self, statcursor, min, lookfor):
+ # All of the expect strings should appear
+ def check_stats(self, statcursor, expectstats):
stringclass = ''.__class__
intclass = (0).__class__
# Reset the cursor, we're called multiple times.
statcursor.reset()
- found = False
- foundval = 0
self.printVerbose(3, 'statistics:')
for id, desc, valstr, val in statcursor:
self.assertEqual(type(desc), stringclass)
@@ -105,12 +112,11 @@ class test_join01(wttest.WiredTigerTestCase):
self.assertEqual(val, self.statstr_to_int(valstr))
self.printVerbose(3, ' stat: \'' + desc + '\', \'' +
valstr + '\', ' + str(val))
- if desc == lookfor:
- found = True
- foundval = val
+ if desc in expectstats:
+ expectstats.remove(desc)
- self.assertTrue(found, 'in stats, did not see: ' + lookfor)
- self.assertTrue(foundval >= min)
+ self.assertTrue(len(expectstats) == 0,
+ 'missing expected values in stats: ' + str(expectstats))
# Common function for testing the most basic functionality
# of joins
@@ -142,7 +148,8 @@ class test_join01(wttest.WiredTigerTestCase):
# and examine primary keys 2,5,8,...,95,98,1,4,7,...,94,97.
jc = self.session.open_cursor('join:table:join01' + proj_suffix,
None, None)
- c2 = self.session.open_cursor('index:join01:index2', None, None)
+ # Adding a projection to a reference cursor should be allowed.
+ c2 = self.session.open_cursor('index:join01:index2(v1)', None, None)
c2.set_key(99) # skips all entries w/ primary key divisible by three
self.assertEquals(0, c2.search())
self.session.join(jc, c2, 'compare=gt')
@@ -160,12 +167,12 @@ class test_join01(wttest.WiredTigerTestCase):
# Then select all numbers whose reverse string representation
# is in '20' < x < '40'.
- c1a = self.session.open_cursor('index:join01:index1', None, None)
+ c1a = self.session.open_cursor('index:join01:index1(v1)', None, None)
c1a.set_key('21')
self.assertEquals(0, c1a.search())
self.session.join(jc, c1a, 'compare=gt' + joincfg1)
- c1b = self.session.open_cursor('index:join01:index1', None, None)
+ c1b = self.session.open_cursor('index:join01:index1(v1)', None, None)
c1b.set_key('41')
self.assertEquals(0, c1b.search())
self.session.join(jc, c1b, 'compare=lt' + joincfg1)
@@ -342,11 +349,12 @@ class test_join01(wttest.WiredTigerTestCase):
'/index cursor is being used in a join/')
# Only a small number of operations allowed on a join cursor
- self.assertRaises(wiredtiger.WiredTigerError,
- lambda: jc.search())
+ msg = "/Unsupported cursor/"
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: jc.search(), msg)
- self.assertRaises(wiredtiger.WiredTigerError,
- lambda: jc.prev())
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: jc.prev(), msg)
self.assertEquals(jc.next(), 0)
self.assertEquals(jc.next(), wiredtiger.WT_NOTFOUND)
@@ -390,6 +398,7 @@ class test_join01(wttest.WiredTigerTestCase):
def test_cursor_close2(self):
self.cursor_close_common(False)
+ # test statistics using the framework set up for this test
def test_stats(self):
bloomcfg1000 = ',strategy=bloom,count=1000'
bloomcfg10 = ',strategy=bloom,count=10'
@@ -399,6 +408,40 @@ class test_join01(wttest.WiredTigerTestCase):
# statistics should pick up some false positives.
self.join_common(bloomcfg10, bloomcfg10, False, True)
+ # test statistics with a simple one index join cursor
+ def test_simple_stats(self):
+ self.session.create("table:join01b",
+ "key_format=i,value_format=i,columns=(k,v)")
+ self.session.create("index:join01b:index", "columns=(v)")
+
+ cursor = self.session.open_cursor("table:join01b", None, None)
+ cursor[1] = 11
+ cursor[2] = 12
+ cursor[3] = 13
+ cursor.close()
+
+ cursor = self.session.open_cursor("index:join01b:index", None, None)
+ cursor.set_key(11)
+ cursor.search()
+
+ jcursor = self.session.open_cursor("join:table:join01b", None, None)
+ self.session.join(jcursor, cursor, "compare=gt")
+
+ while jcursor.next() == 0:
+ [k] = jcursor.get_keys()
+ [v] = jcursor.get_values()
+
+ statcur = self.session.open_cursor("statistics:join", jcursor, None)
+ found = False
+ while statcur.next() == 0:
+ [desc, pvalue, value] = statcur.get_values()
+ #self.tty(str(desc) + "=" + str(pvalue))
+ found = True
+ self.assertEquals(found, True)
+
+ jcursor.close()
+ cursor.close()
+
if __name__ == '__main__':
wttest.run()
diff --git a/test/suite/test_join02.py b/test/suite/test_join02.py
index d122de8a0eb..a691c499cf6 100644
--- a/test/suite/test_join02.py
+++ b/test/suite/test_join02.py
@@ -179,15 +179,16 @@ class test_join02(wttest.WiredTigerTestCase):
c.close()
# Use the primary table in one of the joins.
+ # Use various projections, which should not matter for ref cursors
c0a = self.session.open_cursor('table:join02', None, None)
- c0b = self.session.open_cursor('table:join02', None, None)
- c1a = self.session.open_cursor('index:join02:index1', None, None)
+ c0b = self.session.open_cursor('table:join02(v4)', None, None)
+ c1a = self.session.open_cursor('index:join02:index1(v0)', None, None)
c1b = self.session.open_cursor('index:join02:index1', None, None)
c2a = self.session.open_cursor('index:join02:index2', None, None)
c2b = self.session.open_cursor('index:join02:index2', None, None)
- c3a = self.session.open_cursor('index:join02:index3', None, None)
- c3b = self.session.open_cursor('index:join02:index3', None, None)
- c4a = self.session.open_cursor('index:join02:index4', None, None)
+ c3a = self.session.open_cursor('index:join02:index3(v4)', None, None)
+ c3b = self.session.open_cursor('index:join02:index3(v0)', None, None)
+ c4a = self.session.open_cursor('index:join02:index4(v1)', None, None)
# Attach extra properties to each cursor. For cursors that
# may appear on the 'left' side of a range CA < x < CB,
diff --git a/test/suite/test_join05.py b/test/suite/test_join05.py
new file mode 100644
index 00000000000..ef2be4c6460
--- /dev/null
+++ b/test/suite/test_join05.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+
+# test_join05.py
+# Tests based on JIRA reports
+class test_join05(wttest.WiredTigerTestCase):
+
+ # test join having the first index just be lt/le
+ def test_wt_2384(self):
+ self.session.create("table:test_2384",
+ "key_format=i,value_format=i,columns=(k,v)")
+ self.session.create("index:test_2384:index", "columns=(v)")
+ cursor = self.session.open_cursor("table:test_2384", None, None)
+ cursor[1] = 11
+ cursor[2] = 12
+ cursor[3] = 13
+ cursor.close()
+
+ cursor = self.session.open_cursor("index:test_2384:index", None, None)
+ cursor.set_key(13)
+ self.assertEquals(cursor.search(), 0)
+
+ jcursor = self.session.open_cursor("join:table:test_2384", None, None)
+ self.session.join(jcursor, cursor, "compare=lt")
+
+ nr_found = 0
+ while jcursor.next() == 0:
+ [k] = jcursor.get_keys()
+ [v] = jcursor.get_values()
+ #self.tty("jcursor: k=" + str(k) + ", v=" + str(v))
+ nr_found += 1
+
+ self.assertEquals(nr_found, 2)
+ jcursor.close()
+ cursor.close()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_join06.py b/test/suite/test_join06.py
new file mode 100644
index 00000000000..9af6f93792f
--- /dev/null
+++ b/test/suite/test_join06.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import os
+import wiredtiger, wttest, run
+from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+
+# test_join06.py
+# Join operations
+# Joins with a read-uncommitted
+class test_join06(wttest.WiredTigerTestCase):
+ nentries = 1000
+
+ isoscen = [
+ ('isolation_read_uncommitted', dict(uncommitted=True)),
+ ('isolation_default', dict(uncommitted=False))
+ ]
+
+ bloomscen = [
+ ('bloom', dict(bloom=True)),
+ ('nobloom', dict(bloom=False))
+ ]
+
+ scenarios = number_scenarios(multiply_scenarios('.', isoscen, bloomscen))
+
+ def gen_values(self, i):
+ s = str(i) # 345 => "345"
+ f = s[0:1] + s[0:1] + s[0:1] # 345 => "333"
+ return [s, f]
+
+ def gen_values2(self, i):
+ s = str(i) # 345 => "345"
+ l = s[-1:] + s[-1:] + s[-1:] # 345 => "555"
+ return [s, l]
+
+ def populate(self, s, gen_values):
+ c = s.open_cursor('table:join06', None, None)
+ for i in range(0, self.nentries):
+ c.set_key(i)
+ c.set_value(*gen_values(i))
+ c.insert()
+ c.close()
+
+ # Common function for testing the most basic functionality
+ # of joins
+ def test_join(self):
+ self.session.create('table:join06',
+ 'columns=(k,v0,v1),key_format=i,value_format=SS')
+ self.session.create('index:join06:index0','columns=(v0)')
+ self.session.create('index:join06:index1','columns=(v1)')
+
+ self.populate(self.session, self.gen_values)
+
+ # TODO: needed?
+ #self.reopen_conn()
+
+ if self.uncommitted:
+ self.session.begin_transaction('isolation=read-uncommitted')
+
+ jc = self.session.open_cursor('join:table:join06', None, None)
+ c0 = self.session.open_cursor('index:join06:index0', None, None)
+ c0.set_key('520')
+ self.assertEquals(0, c0.search())
+ self.session.join(jc, c0, 'compare=ge')
+
+ joinconfig = 'compare=eq'
+ if self.bloom:
+ joinconfig += ',strategy=bloom,count=1000'
+ c1 = self.session.open_cursor('index:join06:index1', None, None)
+ c1.set_key('555')
+ self.assertEquals(0, c1.search())
+ self.session.join(jc, c1, joinconfig)
+
+ if self.uncommitted and self.bloom:
+ # Make sure that read-uncommitted with Bloom is not allowed.
+ # This is detected on the first next() operation.
+ msg = '/cannot be used with read-uncommitted/'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: jc.next(), msg)
+ return
+
+ # Changes made in another session may or may not be visible to us,
+ # depending on the isolation level.
+ if self.uncommitted:
+ # isolation level is read-uncommitted, so we will see
+ # additions deletions made in our other session.
+ mbr = set(range(525,1000,10)) | set(range(55,100,10)) | set([520])
+ else:
+ # default isolation level, so we should see a consistent
+ # set at the time we begin iteration.
+ mbr = set(range(520,600)) | set(range(53,60))
+
+ altered = False
+
+ while jc.next() == 0:
+ [k] = jc.get_keys()
+ [v0,v1] = jc.get_values()
+ #self.tty('GOT: ' + str(k) + ': ' + str(jc.get_values()))
+ if altered and self.uncommitted:
+ self.assertEquals(self.gen_values2(k), [v0, v1])
+ else:
+ self.assertEquals(self.gen_values(k), [v0, v1])
+ if not k in mbr:
+ self.tty('**** ERROR: result ' + str(k) + ' is not in: ' +
+ str(mbr))
+ self.assertTrue(k in mbr)
+ mbr.remove(k)
+
+ # In another session, we remove entries for keys ending in 6,
+ # and add entries for keys ending in 5. Depending on the
+ # isolation level for the transaction, these changes may or
+ # may not be visible for the original session.
+ if not altered:
+ s = self.conn.open_session(None)
+ s.begin_transaction(None)
+ self.populate(s, self.gen_values2)
+ s.commit_transaction()
+ s.close()
+ altered = True
+
+ if len(mbr) != 0:
+ self.tty('**** ERROR: did not see these: ' + str(mbr))
+ self.assertEquals(0, len(mbr))
+
+ jc.close()
+ c1.close()
+ c0.close()
+ if self.uncommitted:
+ self.session.commit_transaction()
+ self.session.drop('table:join06')
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_lsm03.py b/test/suite/test_lsm03.py
new file mode 100644
index 00000000000..448d864c646
--- /dev/null
+++ b/test/suite/test_lsm03.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wtscenario, wttest
+from helper import simple_populate
+
+# test_lsm03.py
+# Check to make sure that LSM schema operations don't get EBUSY when
+# there are no user operations active.
+class test_lsm03(wttest.WiredTigerTestCase):
+ name = 'test_lsm03'
+
+ # Use small pages so we generate some internal layout
+ # Setup LSM so multiple chunks are present
+ config = 'key_format=S,allocation_size=512,internal_page_max=512' + \
+ ',leaf_page_max=1k,lsm=(chunk_size=512k,merge_min=10)'
+
+ # Populate an object then drop it.
+ def test_lsm_drop_active(self):
+ uri = 'lsm:' + self.name
+ simple_populate(self, uri, self.config, 10000)
+
+ # Force to disk
+ self.reopen_conn()
+
+ # An open cursors should cause failure.
+ cursor = self.session.open_cursor(uri, None, None)
+ self.assertRaises(wiredtiger.WiredTigerError,
+ lambda: self.session.drop(uri, None))
+ cursor.close()
+
+ # Add enough records that a merge should be running
+ simple_populate(self, uri, self.config, 50000)
+ # The drop should succeed even when LSM work units are active
+ self.session.drop(uri)
diff --git a/test/suite/test_readonly01.py b/test/suite/test_readonly01.py
new file mode 100644
index 00000000000..59e9743ab7e
--- /dev/null
+++ b/test/suite/test_readonly01.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python
+#
+# Public Domain 2016-2016 MongoDB, Inc.
+# Public Domain 2008-2016 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_readonly01.py
+# Readonly: Test readonly mode.
+#
+
+import fnmatch, os, shutil, time
+from suite_subprocess import suite_subprocess
+from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+import wttest
+
+class test_readonly01(wttest.WiredTigerTestCase, suite_subprocess):
+ tablename = 'test_readonly01'
+ create = True
+ entries = 10000
+
+ #
+ # We want a list of directory writable or readonly.
+ #
+ basecfg_list = [
+ ('basecfg', dict(basecfg='config_base=true,')),
+ ('no_basecfg', dict(basecfg='config_base=false,')),
+ ]
+ dir_list = [
+ ('write', dict(dirchmod=False)),
+ ('readonly', dict(dirchmod=True)),
+ ]
+ log_list = [
+ ('logging', dict(logcfg='log=(archive=false,enabled,file_max=100K),')),
+ ('no_logging', dict(logcfg='log=(enabled=false),')),
+ ]
+
+ types = [
+ ('lsm', dict(tabletype='lsm', uri='lsm',
+ create_params = 'key_format=i,value_format=i')),
+ ('file-row', dict(tabletype='row', uri='file',
+ create_params = 'key_format=i,value_format=i')),
+ ('file-var', dict(tabletype='var', uri='file',
+ create_params = 'key_format=r,value_format=i')),
+ ('file-fix', dict(tabletype='fix', uri='file',
+ create_params = 'key_format=r,value_format=8t')),
+ ('table-row', dict(tabletype='row', uri='table',
+ create_params = 'key_format=i,value_format=i')),
+ ('table-var', dict(tabletype='var', uri='table',
+ create_params = 'key_format=r,value_format=i')),
+ ('table-fix', dict(tabletype='fix', uri='table',
+ create_params = 'key_format=r,value_format=8t')),
+ ]
+
+ scenarios = multiply_scenarios('.',
+ basecfg_list, dir_list, log_list, types)
+
+ def conn_config(self, dir):
+ self.home = dir
+ params = \
+ 'error_prefix="%s",' % self.shortid() + \
+ '%s' % self.logcfg + \
+ '%s' % self.basecfg
+ if self.create:
+ conn_params = 'create,' + params
+ else:
+ conn_params = 'readonly=true,' + params
+ return conn_params
+
+ def close_reopen(self):
+ ''' Close the connection and reopen readonly'''
+ #
+ # close the original connection. If needed, chmod the
+ # database directory to readonly mode. Then reopen the
+ # connection with readonly.
+ #
+ self.close_conn()
+ #
+ # The chmod command is not fully portable to windows.
+ #
+ if self.dirchmod and os.name == 'posix':
+ for f in os.listdir(self.home):
+ if os.path.isfile(f):
+ os.chmod(f, 0444)
+ os.chmod(self.home, 0555)
+ self.conn = self.setUpConnectionOpen(self.home)
+ self.session = self.setUpSessionOpen(self.conn)
+
+ def readonly(self):
+ # Here's the strategy:
+ # - Create a table.
+ # - Insert data into table.
+ # - Close connection.
+ # - Possibly chmod to readonly
+ # - Open connection readonly
+ # - Confirm we can read the data.
+ #
+ tablearg = self.uri + ':' + self.tablename
+ self.session.create(tablearg, self.create_params)
+ c = self.session.open_cursor(tablearg, None, None)
+ for i in range(self.entries):
+ c[i+1] = i % 255
+ # Close the connection. Reopen readonly
+ self.create = False
+ self.close_reopen()
+ c = self.session.open_cursor(tablearg, None, None)
+ i = 0
+ for key, value in c:
+ self.assertEqual(i+1, key)
+ self.assertEqual(i % 255, value)
+ i += 1
+ self.assertEqual(i, self.entries)
+ self.pr('Read %d entries' % i)
+ c.close()
+ self.create = True
+
+ def test_readonly(self):
+ if self.dirchmod and os.name == 'posix':
+ with self.expectedStderrPattern('Permission'):
+ self.readonly()
+ else:
+ self.readonly()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_readonly02.py b/test/suite/test_readonly02.py
new file mode 100644
index 00000000000..0df5465642d
--- /dev/null
+++ b/test/suite/test_readonly02.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python
+#
+# Public Domain 2016-2016 MongoDB, Inc.
+# Public Domain 2008-2016 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_readonly02.py
+# Readonly: Test readonly mode with illegal config combinations
+# and error checking during updates.
+#
+
+from helper import copy_wiredtiger_home
+from suite_subprocess import suite_subprocess
+import os, wiredtiger, wttest
+
+class test_readonly02(wttest.WiredTigerTestCase, suite_subprocess):
+ tablename = 'table:test_readonly02'
+ create = True
+ create_params = 'key_format=i,value_format=i'
+ entries = 10
+
+ conn_params = \
+ 'create,statistics=(fast),log=(enabled,file_max=100K,zero_fill=true),'
+ conn_params_rd = \
+ 'create,readonly=true,statistics=(fast),log=(enabled,zero_fill=false),'
+ conn_params_rdcfg = \
+ 'create,readonly=true,statistics=(fast),log=(enabled),'
+
+ #
+ # Run to make sure incompatible configuration options return an error.
+ # The situations that cause failures (instead of silent overrides) are:
+ # 1. setting readonly on a new database directory
+ # 2. an unclean shutdown and reopening readonly
+ # 3. logging with zero-fill enabled and readonly
+ #
+ badcfg1 = 'log=(enabled,zero_fill=true)'
+
+ def setUpConnectionOpen(self, dir):
+ self.home = dir
+ rdonlydir = dir + '.rdonly'
+ #
+ # First time through check readonly on a non-existent database.
+ #
+ if self.create:
+ # 1. setting readonly on a new database directory
+ # Setting readonly prevents creation so we should see an
+ # error because the lock file does not exist.
+ msg = '/No such file/'
+ if os.name != 'posix':
+ msg = '/cannot find the file/'
+ os.mkdir(rdonlydir)
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.wiredtiger_open(
+ rdonlydir, self.conn_params_rd), msg)
+
+ self.create = False
+ conn = self.wiredtiger_open(dir, self.conn_params)
+ return conn
+
+ def check_unclean(self):
+ backup = "WT_COPYDIR"
+ copy_wiredtiger_home(self.home, backup, True)
+ msg = '/needs recovery/'
+ # 2. an unclean shutdown and reopening readonly
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.wiredtiger_open(backup, self.conn_params_rd), msg)
+
+ def close_checkerror(self, cfg):
+ ''' Close the connection and reopen readonly'''
+ #
+ # Close the original connection. Reopen readonly and also with
+ # the given configuration string.
+ #
+ self.close_conn()
+ conn_params = self.conn_params_rd + cfg
+ msg = '/Invalid argument/'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.wiredtiger_open(self.home, conn_params), msg)
+
+ def test_readonly(self):
+ tablearg = self.tablename
+ self.session.create(tablearg, self.create_params)
+ c = self.session.open_cursor(tablearg, None, None)
+ for i in range(self.entries):
+ c[i+1] = i % 255
+ # Check for an error on an unclean recovery/restart.
+ self.check_unclean()
+
+ # Close the connection. Reopen readonly with other bad settings.
+ # 3. logging with zero-fill enabled and readonly
+ self.close_checkerror(self.badcfg1)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_readonly03.py b/test/suite/test_readonly03.py
new file mode 100644
index 00000000000..d9930e8f553
--- /dev/null
+++ b/test/suite/test_readonly03.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+#
+# Public Domain 2016-2016 MongoDB, Inc.
+# Public Domain 2008-2016 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_readonly03.py
+# Readonly: Test connection readonly mode with modifying methods. Confirm
+# all return ENOTSUP.
+#
+
+from helper import simple_populate
+from suite_subprocess import suite_subprocess
+import os, sys, wiredtiger, wttest
+
+class test_readonly03(wttest.WiredTigerTestCase, suite_subprocess):
+ uri = 'table:test_readonly03'
+ uri2 = 'table:test_readonly03_2'
+ create = True
+
+ conn_params = 'create,log=(enabled),'
+ conn_params_rd = 'readonly=true'
+
+ session_ops = [ 'create', 'compact', 'drop', 'log_flush', 'log_printf',
+ 'rebalance', 'rename', 'salvage', 'truncate', 'upgrade', ]
+ cursor_ops = [ 'insert', 'remove', 'update', ]
+
+ def setUpConnectionOpen(self, dir):
+ self.home = dir
+ if self.create:
+ conn_cfg = self.conn_params
+ else:
+ conn_cfg = self.conn_params_rd
+ conn = self.wiredtiger_open(dir, conn_cfg)
+ self.create = False
+ return conn
+
+
+ def test_readonly(self):
+ create_params = 'key_format=i,value_format=i'
+ entries = 10
+ # Create a database and a table.
+ simple_populate(self, self.uri, create_params, entries)
+
+ #
+ # Now close and reopen. Note that the connection function
+ # above will reopen it readonly.
+ self.reopen_conn()
+ msg = '/Unsupported/'
+ c = self.session.open_cursor(self.uri, None, None)
+ for op in self.cursor_ops:
+ c.set_key(1)
+ c.set_value(1)
+ if op == 'insert':
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: c.insert(), msg)
+ elif op == 'remove':
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: c.remove(), msg)
+ elif op == 'update':
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: c.update(), msg)
+ else:
+ self.fail('Unknown cursor operation: ' + op)
+ c.close()
+ for op in self.session_ops:
+ if op == 'create':
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.create(self.uri2, create_params),
+ msg)
+ elif op == 'compact':
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.compact(self.uri, None), msg)
+ elif op == 'drop':
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.drop(self.uri, None), msg)
+ elif op == 'log_flush':
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.log_flush(None), msg)
+ elif op == 'log_printf':
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.log_printf("test"), msg)
+ elif op == 'rebalance':
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.rebalance(self.uri, None), msg)
+ elif op == 'rename':
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.rename(self.uri, self.uri2, None), msg)
+ elif op == 'salvage':
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.salvage(self.uri, None), msg)
+ elif op == 'truncate':
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.truncate(self.uri, None, None, None),
+ msg)
+ elif op == 'upgrade':
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.upgrade(self.uri, None), msg)
+ else:
+ self.fail('Unknown session method: ' + op)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_rebalance.py b/test/suite/test_rebalance.py
index 80cce6ed514..f2167e864c9 100644
--- a/test/suite/test_rebalance.py
+++ b/test/suite/test_rebalance.py
@@ -59,7 +59,7 @@ class test_rebalance(wttest.WiredTigerTestCase):
if with_cursor:
cursor = self.session.open_cursor(uri, None, None)
self.assertRaises(wiredtiger.WiredTigerError,
- lambda: self.session.drop(uri, None))
+ lambda: self.session.rebalance(uri, None))
cursor.close()
self.session.rebalance(uri, None)
diff --git a/test/suite/test_schema07.py b/test/suite/test_schema07.py
new file mode 100644
index 00000000000..ac397c6e1a1
--- /dev/null
+++ b/test/suite/test_schema07.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+
+# test_schema07.py
+# Test that long-running tests don't fill the cache with metadata
+class test_schema07(wttest.WiredTigerTestCase):
+ tablename = 'table:test_schema07'
+
+ def conn_config(self, dir):
+ return 'cache_size=10MB'
+
+ @wttest.longtest("Creating many tables shouldn't fill the cache")
+ def test_many_tables(self):
+ s = self.session
+ # We have a 10MB cache, metadata is (well) over 512B per table,
+ # if we can create 20K tables, something must be cleaning up.
+ for i in xrange(20000):
+ uri = '%s-%06d' % (self.tablename, i)
+ s.create(uri)
+ c = s.open_cursor(uri)
+ # This will block if the metadata fills the cache
+ c["key"] = "value"
+ c.close()
+ self.session.drop(uri)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_sweep01.py b/test/suite/test_sweep01.py
index f996dbfa06d..bccd2bce012 100644
--- a/test/suite/test_sweep01.py
+++ b/test/suite/test_sweep01.py
@@ -40,7 +40,7 @@ import wttest
class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
tablebase = 'test_sweep01'
uri = 'table:' + tablebase
- numfiles = 50
+ numfiles = 30
numkv = 1000
conn_config = 'file_manager=(close_handle_minimum=0,' + \
'close_idle_time=6,close_scan_interval=2),' + \
@@ -87,7 +87,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
#
# We've configured checkpoints to run every 5 seconds, sweep server to
# run every 2 seconds and idle time to be 6 seconds. It should take
- # about 8 seconds for a handle to be closed. Sleep for 12 seconds to be
+ # about 8 seconds for a handle to be closed. Sleep for double to be
# safe.
#
uri = '%s.test' % self.uri
@@ -105,13 +105,24 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
c = self.session.open_cursor(uri, None)
k = 0
sleep = 0
- while sleep < 12:
+ max = 60
+ final_nfile = 4
+ while sleep < max:
self.session.checkpoint()
k = k+1
c[k] = 1
sleep += 2
time.sleep(2)
+ # Give slow machines time to process files.
+ stat_cursor = self.session.open_cursor('statistics:', None, None)
+ this_nfile = stat_cursor[stat.conn.file_open][2]
+ stat_cursor.close()
+ self.pr("==== loop " + str(sleep))
+ self.pr("this_nfile " + str(this_nfile))
+ if this_nfile == final_nfile:
+ break
c.close()
+ self.pr("Sweep loop took " + str(sleep))
stat_cursor = self.session.open_cursor('statistics:', None, None)
close2 = stat_cursor[stat.conn.dh_sweep_close][2]
@@ -177,7 +188,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
self.assertEqual(nfile2 < nfile1, True)
# The only files that should be left are the metadata, the lookaside
# file, the lock file, and the active file.
- if (nfile2 != 4):
+ if (nfile2 != final_nfile):
print "close1: " + str(close1) + " close2: " + str(close2)
print "remove1: " + str(remove1) + " remove2: " + str(remove2)
print "sweep1: " + str(sweep1) + " sweep2: " + str(sweep2)
@@ -186,7 +197,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
print "tod1: " + str(tod1) + " tod2: " + str(tod2)
print "ref1: " + str(ref1) + " ref2: " + str(ref2)
print "XX2: nfile1: " + str(nfile1) + " nfile2: " + str(nfile2)
- self.assertEqual(nfile2 == 4, True)
+ self.assertEqual(nfile2 == final_nfile, True)
if __name__ == '__main__':
wttest.run()
diff --git a/test/suite/test_txn04.py b/test/suite/test_txn04.py
index de49c5fe235..bbd6ce8c4e2 100644
--- a/test/suite/test_txn04.py
+++ b/test/suite/test_txn04.py
@@ -121,17 +121,14 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess):
cmd += self.backup_dir
self.runWt(cmd.split())
- self.exception='false'
backup_conn_params = 'log=(enabled,file_max=%s)' % self.logmax
backup_conn = self.wiredtiger_open(self.backup_dir, backup_conn_params)
try:
self.check(backup_conn.open_session(), None, committed)
- except:
- self.exception='true'
finally:
backup_conn.close()
- def test_ops(self):
+ def ops(self):
self.session.create(self.uri, self.create_params)
c = self.session.open_cursor(self.uri, None, 'overwrite')
# Set up the table with entries for 1-5.
@@ -149,7 +146,6 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess):
# The runWt command closes our connection and sessions so
# we need to reopen them here.
self.hot_backup(None, committed)
- self.assertEqual(True, self.exception == 'false')
c = self.session.open_cursor(self.uri, None, 'overwrite')
c.set_value(1)
# Then do the given modification.
@@ -192,14 +188,13 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess):
# Check the state after each commit/rollback.
self.check_all(current, committed)
- # Backup the target we modified. We expect that running
- # recovery now will generate an exception if we committed.
+ # Backup the target we modified and verify the data.
# print 'Call hot_backup with ' + self.uri
self.hot_backup(self.uri, committed)
- if txn == 'commit':
- self.assertEqual(True, self.exception == 'true')
- else:
- self.assertEqual(True, self.exception == 'false')
+
+ def test_ops(self):
+ with self.expectedStdoutPattern('Recreating metadata'):
+ self.ops()
if __name__ == '__main__':
wttest.run()
diff --git a/test/suite/test_util13.py b/test/suite/test_util13.py
new file mode 100644
index 00000000000..222f42cd7f1
--- /dev/null
+++ b/test/suite/test_util13.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import os, re, string
+from suite_subprocess import suite_subprocess
+import itertools, wiredtiger, wttest
+
+from helper import complex_populate_cgconfig, complex_populate_cgconfig_lsm
+from helper import simple_populate
+from helper import complex_populate_check, simple_populate_check
+from wtscenario import multiply_scenarios, number_scenarios
+
+# test_util13.py
+# Utilities: wt dump, as well as the dump cursor
+# Test that dump and load retain table configuration information.
+#
+class test_util13(wttest.WiredTigerTestCase, suite_subprocess):
+ """
+ Test wt dump. We check for specific output and preservation of
+ non-default table create parameters.
+ """
+
+ pfx = 'test_util13'
+ nentries = 100
+ dir = "dump_dir"
+ #
+ # Select table configuration settings that are not the default.
+ #
+ types = [
+ ('file-simple', dict(uri='file:' + pfx, pop=simple_populate,
+ populate_check=simple_populate_check,
+ table_config='prefix_compression_min=3', cfg='')),
+ ('lsm-simple', dict(uri='lsm:' + pfx, pop=simple_populate,
+ populate_check=simple_populate_check,
+ table_config='lsm=(bloom_bit_count=29)',
+ cfg='bloom_bit_count=29')),
+ ('table-simple', dict(uri='table:' + pfx, pop=simple_populate,
+ populate_check=simple_populate_check,
+ table_config='split_pct=50', cfg='')),
+ ('table-complex',
+ dict(uri='table:' + pfx, pop=complex_populate_cgconfig,
+ populate_check=complex_populate_check,
+ table_config='allocation_size=512B', cfg='')),
+ ('table-complex-lsm',
+ dict(uri='table:' + pfx, pop=complex_populate_cgconfig_lsm,
+ populate_check=complex_populate_check,
+ table_config='lsm=(merge_max=5)',
+ cfg='merge_max=5')),
+ ]
+
+ scenarios = number_scenarios(multiply_scenarios('.', types))
+
+ def compare_config(self, expected_cfg, actual_cfg):
+ # Replace '(' characters so configuration groups don't break parsing.
+ # If we ever want to look for config groups this will need to change.
+ #print "compare_config Actual config "
+ #print actual_cfg
+ #print "compare_config Expected config "
+ #print expected_cfg
+ cfg_orig = actual_cfg
+ if self.pop != simple_populate:
+ #
+ # If we have a complex config, strip out the colgroups and
+ # columns from the config. Doing so allows us to keep the
+ # split commands below usable because those two items don't
+ # have assignments in them.
+ #
+ nocolgrp = re.sub("colgroups=\((.+?)\),", '', actual_cfg)
+ cfg_orig = re.sub("columns=\((.+?)\),", '', nocolgrp)
+
+ #print "Using original config "
+ #print cfg_orig
+ da = dict(kv.split('=') for kv in
+ cfg_orig.strip().replace('(',',').split(','))
+ dx = dict(kv.split('=') for kv in
+ expected_cfg.strip().replace('(',',').split(','))
+
+ # Check that all items in our expected config subset are in
+ # the actual configuration and they match.
+ match = all(item in da.items() for item in dx.items())
+ if match == False:
+ print "MISMATCH:"
+ print "Original dict: "
+ print da
+ print "Expected config: "
+ print dx
+ return match
+
+ def compare_files(self, expect_subset, dump_out):
+ inheader = isconfig = False
+ for l1, l2 in zip(open(expect_subset, "rb"), open(dump_out, "rb")):
+ if isconfig:
+ if not self.compare_config(l1, l2):
+ return False
+ if inheader:
+ # This works because the expected subset has a format
+ # of URI and config lines alternating.
+ isconfig = not isconfig
+ if l1.strip() == 'Header':
+ inheader = True
+ if l1.strip() == 'Data':
+ break
+ return True
+
+ def load_recheck(self, expect_subset, dump_out):
+ newdump = "newdump.out"
+ os.mkdir(self.dir)
+ self.runWt(['-h', self.dir, 'load', '-f', dump_out])
+ # Check the contents
+ conn = self.wiredtiger_open(self.dir)
+ session = conn.open_session()
+ cursor = session.open_cursor(self.uri, None, None)
+ self.populate_check
+ conn.close()
+ dumpargs = ["-h"]
+ dumpargs.append(self.dir)
+ dumpargs.append("dump")
+ dumpargs.append(self.uri)
+ self.runWt(dumpargs, outfilename=newdump)
+
+ self.assertTrue(self.compare_files(expect_subset, newdump))
+ return True
+
+ def test_dump_config(self):
+ # The number of btree_entries reported is influenced by the
+ # number of column groups and indices. Each insert will have
+ # a multiplied effect.
+ self.pop(self, self.uri,
+ 'key_format=S,value_format=S,' + self.table_config, self.nentries)
+
+ ver = wiredtiger.wiredtiger_version()
+ verstring = str(ver[1]) + '.' + str(ver[2]) + '.' + str(ver[3])
+ expectfile="expect.out"
+ with open(expectfile, "w") as expectout:
+ # Note: this output is sensitive to the precise output format
+ # generated by wt dump. If this is likely to change, we should
+ # make this test more accommodating.
+ expectout.write(
+ 'WiredTiger Dump (WiredTiger Version ' + verstring + ')\n')
+ expectout.write('Format=print\n')
+ expectout.write('Header\n')
+ expectout.write(self.uri + '\n')
+ # Check the config on the colgroup itself for complex tables.
+ if self.pop != simple_populate:
+ expectout.write('key_format=S\n')
+ expectout.write('colgroup:' + self.pfx + ':cgroup1\n')
+ if self.cfg == '':
+ expectout.write(self.table_config + '\n')
+ else:
+ expectout.write(self.cfg + '\n')
+ expectout.write('Data\n')
+
+ self.pr('calling dump')
+ outfile="dump.out"
+ dumpargs = ["dump"]
+ dumpargs.append(self.uri)
+ self.runWt(dumpargs, outfilename=outfile)
+
+ self.assertTrue(self.compare_files(expectfile, outfile))
+ self.assertTrue(self.load_recheck(expectfile, outfile))
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/wttest.py b/test/suite/wttest.py
index b5a58d1566f..a1945b4325d 100644
--- a/test/suite/wttest.py
+++ b/test/suite/wttest.py
@@ -335,6 +335,14 @@ class WiredTigerTestCase(unittest.TestCase):
# always get back to original directory
os.chdir(self.origcwd)
+ # Make sure no read-only files or directories were left behind
+ os.chmod(self.testdir, 0777)
+ for root, dirs, files in os.walk(self.testdir):
+ for d in dirs:
+ os.chmod(os.path.join(root, d), 0777)
+ for f in files:
+ os.chmod(os.path.join(root, f), 0666)
+
# Clean up unless there's a failure
if (passed or skipped) and not WiredTigerTestCase._preserveFiles:
shutil.rmtree(self.testdir, ignore_errors=True)
diff --git a/test/thread/t.c b/test/thread/t.c
index e72b54bf62a..22334076ee1 100644
--- a/test/thread/t.c
+++ b/test/thread/t.c
@@ -51,6 +51,8 @@ static void wt_shutdown(void);
extern int __wt_optind;
extern char *__wt_optarg;
+void (*custom_die)(void) = NULL;
+
int
main(int argc, char *argv[])
{
diff --git a/test/utility/test_util.i b/test/utility/test_util.i
index 3b88d375381..c5cebadcb5c 100644
--- a/test/utility/test_util.i
+++ b/test/utility/test_util.i
@@ -42,25 +42,60 @@
#define DEFAULT_DIR "WT_TEST"
#define MKDIR_COMMAND "mkdir "
+/* Allow tests to add their own death handling. */
+extern void (*custom_die)(void);
+
+static void testutil_die(int, const char *, ...)
+#if defined(__GNUC__)
+__attribute__((__noreturn__))
+#endif
+;
+
/*
* die --
* Report an error and quit.
*/
-static inline void
+static void
testutil_die(int e, const char *fmt, ...)
{
va_list ap;
+ /* Allow test programs to cleanup on fatal error. */
+ if (custom_die != NULL)
+ (*custom_die)();
+
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
if (e != 0)
fprintf(stderr, ": %s", wiredtiger_strerror(e));
fprintf(stderr, "\n");
+
exit(EXIT_FAILURE);
}
/*
+ * testutil_check --
+ * Complain and quit if a function call fails.
+ */
+#define testutil_check(call) do { \
+ int __r; \
+ if ((__r = (call)) != 0) \
+ testutil_die(__r, "%s/%d: %s", __func__, __LINE__, #call);\
+} while (0)
+
+/*
+ * testutil_checkfmt --
+ * Complain and quit if a function call fails, with additional arguments.
+ */
+#define testutil_checkfmt(call, fmt, ...) do { \
+ int __r; \
+ if ((__r = (call)) != 0) \
+ testutil_die(__r, "%s/%d: %s: " fmt, \
+ __func__, __LINE__, #call, __VA_ARGS__); \
+} while (0)
+
+/*
* testutil_work_dir_from_path --
* Takes a buffer, its size and the intended work directory.
* Creates the full intended work directory in buffer.
diff --git a/test/windows/windows_shim.h b/test/windows/windows_shim.h
index c35c27cb7b0..f32edce88e7 100644
--- a/test/windows/windows_shim.h
+++ b/test/windows/windows_shim.h
@@ -44,6 +44,11 @@ typedef int u_int;
#define R_OK 04
#define X_OK R_OK
+/* MSVC Doesn't provide __func__, it has __FUNCTION__ */
+#ifdef _MSC_VER
+#define __func__ __FUNCTION__
+#endif
+
/* snprintf does not exist on <= VS 2013 */
#if _MSC_VER < 1900
#define snprintf _wt_snprintf
diff --git a/tools/wtstats/stat_data.py b/tools/wtstats/stat_data.py
index 7cee87e49ed..c75e4f194dd 100644
--- a/tools/wtstats/stat_data.py
+++ b/tools/wtstats/stat_data.py
@@ -1,8 +1,10 @@
# DO NOT EDIT: automatically built by dist/stat.py. */
no_scale_per_second_list = [
+ 'async: current work queue length',
'async: maximum work queue length',
'cache: bytes currently in the cache',
+ 'cache: eviction currently operating in aggressive mode',
'cache: maximum bytes configured',
'cache: maximum page size at eviction',
'cache: pages currently held in the cache',
@@ -35,6 +37,7 @@ no_scale_per_second_list = [
'transaction: transaction range of IDs currently pinned by named snapshots',
'block-manager: checkpoint size',
'block-manager: file allocation unit size',
+ 'block-manager: file bytes available for reuse',
'block-manager: file magic number',
'block-manager: file major version number',
'block-manager: file size in bytes',
@@ -67,6 +70,7 @@ no_scale_per_second_list = [
no_clear_list = [
'async: maximum work queue length',
'cache: bytes currently in the cache',
+ 'cache: eviction currently operating in aggressive mode',
'cache: maximum bytes configured',
'cache: maximum page size at eviction',
'cache: pages currently held in the cache',