summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith@wiredtiger.com>2014-09-12 07:38:37 -0400
committerKeith Bostic <keith@wiredtiger.com>2014-09-12 07:38:37 -0400
commitc43e1200da808eeb1d8b6aaff409e26fe2a96462 (patch)
tree1875a6324cf443715e78ec664589ee637a2d761a
parent9d7a071385083340a20da2b9bd1b2ce7c0953c27 (diff)
parentb82fa31a5c3f27e7d975bc1eb16a76e3181e85cb (diff)
downloadmongo-c43e1200da808eeb1d8b6aaff409e26fe2a96462.tar.gz
Merge branch 'develop' into cursor-open-optimization
-rw-r--r--api/leveldb/Makefile.am2
-rw-r--r--api/leveldb/basho/perf_count.h2
-rw-r--r--api/leveldb/config.hin22
-rw-r--r--api/leveldb/hyperleveldb/replay_iterator.h2
-rw-r--r--api/leveldb/leveldb/include/leveldb/cache.h2
-rw-r--r--api/leveldb/leveldb/include/leveldb/comparator.h2
-rw-r--r--api/leveldb/leveldb/include/leveldb/db.h8
-rw-r--r--api/leveldb/leveldb/include/leveldb/env.h2
-rw-r--r--api/leveldb/leveldb/include/leveldb/filter_policy.h2
-rw-r--r--api/leveldb/leveldb/include/leveldb/iterator.h2
-rw-r--r--api/leveldb/leveldb/include/leveldb/options.h2
-rw-r--r--api/leveldb/leveldb/include/leveldb/slice.h5
-rw-r--r--api/leveldb/leveldb/include/leveldb/status.h2
-rw-r--r--api/leveldb/leveldb/include/leveldb/write_batch.h2
-rw-r--r--api/leveldb/leveldb_wt.cc14
-rw-r--r--api/leveldb/leveldb_wt.h3
-rw-r--r--bench/wtperf/runners/shared-cache-stress.wtperf12
-rwxr-xr-xbench/wtperf/runners/wtperf_run.sh26
-rw-r--r--bench/wtperf/wtperf.c17
-rw-r--r--build_posix/configure.ac.in9
-rw-r--r--dist/s_copyright.list1
-rwxr-xr-xdist/s_docs2
-rw-r--r--examples/c/ex_all.c6
-rw-r--r--ext/compressors/zlib/zlib_compress.c9
-rw-r--r--lang/python/Makefile.am18
-rw-r--r--lang/python/setup.py10
-rw-r--r--src/async/async_api.c82
-rw-r--r--src/btree/bt_debug.c2
-rw-r--r--src/btree/rec_track.c2
-rw-r--r--src/btree/rec_write.c29
-rw-r--r--src/config/config_collapse.c326
-rw-r--r--src/conn/conn_api.c100
-rw-r--r--src/conn/conn_cache.c71
-rw-r--r--src/conn/conn_cache_pool.c106
-rw-r--r--src/conn/conn_ckpt.c30
-rw-r--r--src/conn/conn_handle.c13
-rw-r--r--src/conn/conn_log.c6
-rw-r--r--src/conn/conn_open.c14
-rw-r--r--src/conn/conn_stat.c65
-rw-r--r--src/cursor/cur_file.c21
-rw-r--r--src/docs/Doxyfile2
-rw-r--r--src/docs/file-formats.dox31
-rw-r--r--src/docs/install.dox7
-rw-r--r--src/docs/lsm.dox13
-rw-r--r--src/docs/programming.dox2
-rw-r--r--src/docs/spell.ok2
-rw-r--r--src/docs/tune-bulk-load.dox4
-rw-r--r--src/include/connection.h3
-rw-r--r--src/include/dhandle.h2
-rw-r--r--src/include/extern.h38
-rw-r--r--src/include/lsm.h15
-rw-r--r--src/include/misc.h6
-rw-r--r--src/include/schema.h46
-rw-r--r--src/include/session.h2
-rw-r--r--src/include/txn.i21
-rw-r--r--src/lsm/lsm_cursor.c8
-rw-r--r--src/lsm/lsm_manager.c132
-rw-r--r--src/lsm/lsm_merge.c32
-rw-r--r--src/lsm/lsm_tree.c114
-rw-r--r--src/lsm/lsm_work_unit.c46
-rw-r--r--src/lsm/lsm_worker.c28
-rw-r--r--src/schema/schema_util.c26
-rw-r--r--src/session/session_api.c12
-rw-r--r--src/txn/txn_ckpt.c59
-rw-r--r--test/format/config.c21
-rw-r--r--test/format/config.h3
-rw-r--r--test/format/format.h1
-rw-r--r--test/format/ops.c31
-rw-r--r--test/format/wts.c4
-rw-r--r--test/suite/test_checkpoint01.py10
70 files changed, 1202 insertions, 570 deletions
diff --git a/api/leveldb/Makefile.am b/api/leveldb/Makefile.am
index 44aa69bbd48..2cfd9d945a5 100644
--- a/api/leveldb/Makefile.am
+++ b/api/leveldb/Makefile.am
@@ -16,7 +16,7 @@ leveldbincludedir = $(includedir)/wiredtiger/leveldb
endif
endif
leveldbinclude_HEADERS = \
- wiredtiger_config.h \
+ leveldb_wt_config.h \
leveldb/include/leveldb/cache.h \
leveldb/include/leveldb/comparator.h\
leveldb/include/leveldb/db.h \
diff --git a/api/leveldb/basho/perf_count.h b/api/leveldb/basho/perf_count.h
index 0edf1b96549..b0f4abf9b66 100644
--- a/api/leveldb/basho/perf_count.h
+++ b/api/leveldb/basho/perf_count.h
@@ -23,7 +23,7 @@
#ifndef STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_
#define STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_
-#include "wiredtiger_config.h"
+#include "leveldb_wt_config.h"
#include <stdint.h>
#include <string>
diff --git a/api/leveldb/config.hin b/api/leveldb/config.hin
new file mode 100644
index 00000000000..131b68969d3
--- /dev/null
+++ b/api/leveldb/config.hin
@@ -0,0 +1,22 @@
+/* api/leveldb/config.hin. Generated by autoheader, then hand-edited. */
+
+/* Build the LevelDB API with Basho LevelDB support. */
+#undef HAVE_BASHOLEVELDB
+
+/* Snappy support automatically loaded. */
+#undef HAVE_BUILTIN_EXTENSION_SNAPPY
+
+/* Zlib support automatically loaded. */
+#undef HAVE_BUILTIN_EXTENSION_ZLIB
+
+/* Define to 1 for diagnostic tests. */
+#undef HAVE_DIAGNOSTIC
+
+/* Build the LevelDB API with HyperLevelDB support. */
+#undef HAVE_HYPERLEVELDB
+
+/* Define to 1 if you have the `snappy' library (-lsnappy). */
+#undef HAVE_LIBSNAPPY
+
+/* Build the LevelDB API with RocksDB support. */
+#undef HAVE_ROCKSDB
diff --git a/api/leveldb/hyperleveldb/replay_iterator.h b/api/leveldb/hyperleveldb/replay_iterator.h
index 6e2f562c6c4..397acdfd889 100644
--- a/api/leveldb/hyperleveldb/replay_iterator.h
+++ b/api/leveldb/hyperleveldb/replay_iterator.h
@@ -5,7 +5,7 @@
#ifndef STORAGE_LEVELDB_INCLUDE_REPLAY_ITERATOR_H_
#define STORAGE_LEVELDB_INCLUDE_REPLAY_ITERATOR_H_
-#include "wiredtiger_config.h"
+#include "leveldb_wt_config.h"
#include "slice.h"
#include "status.h"
diff --git a/api/leveldb/leveldb/include/leveldb/cache.h b/api/leveldb/leveldb/include/leveldb/cache.h
index 6ae25122133..94be8e919a8 100644
--- a/api/leveldb/leveldb/include/leveldb/cache.h
+++ b/api/leveldb/leveldb/include/leveldb/cache.h
@@ -18,7 +18,7 @@
#ifndef STORAGE_LEVELDB_INCLUDE_CACHE_H_
#define STORAGE_LEVELDB_INCLUDE_CACHE_H_
-#include "wiredtiger_config.h"
+#include "leveldb_wt_config.h"
#if defined(HAVE_ROCKSDB) && !defined(leveldb)
#define leveldb rocksdb
#endif
diff --git a/api/leveldb/leveldb/include/leveldb/comparator.h b/api/leveldb/leveldb/include/leveldb/comparator.h
index 23e0ba84559..78d83a4d08e 100644
--- a/api/leveldb/leveldb/include/leveldb/comparator.h
+++ b/api/leveldb/leveldb/include/leveldb/comparator.h
@@ -5,7 +5,7 @@
#ifndef STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_
#define STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_
-#include "wiredtiger_config.h"
+#include "leveldb_wt_config.h"
#if defined(HAVE_ROCKSDB) && !defined(leveldb)
#define leveldb rocksdb
#endif
diff --git a/api/leveldb/leveldb/include/leveldb/db.h b/api/leveldb/leveldb/include/leveldb/db.h
index c1818d28a7a..df8fcbbe9f8 100644
--- a/api/leveldb/leveldb/include/leveldb/db.h
+++ b/api/leveldb/leveldb/include/leveldb/db.h
@@ -5,7 +5,7 @@
#ifndef STORAGE_LEVELDB_INCLUDE_DB_H_
#define STORAGE_LEVELDB_INCLUDE_DB_H_
-#include "wiredtiger_config.h"
+#include "leveldb_wt_config.h"
#if defined(HAVE_ROCKSDB) && !defined(leveldb)
#define leveldb rocksdb
#endif
@@ -292,6 +292,12 @@ class DB {
// db->CompactRange(NULL, NULL);
virtual void CompactRange(const Slice* begin, const Slice* end) = 0;
+ // Suspends the background compaction thread. This methods
+ // returns once suspended.
+ virtual void SuspendCompactions() = 0;
+ // Resumes a suspended background compation thread.
+ virtual void ResumeCompactions() = 0;
+
#ifdef HAVE_HYPERLEVELDB
// Create a live backup of a live LevelDB instance.
// The backup is stored in a directory named "backup-<name>" under the top
diff --git a/api/leveldb/leveldb/include/leveldb/env.h b/api/leveldb/leveldb/include/leveldb/env.h
index 0d043307736..4ad67d36fea 100644
--- a/api/leveldb/leveldb/include/leveldb/env.h
+++ b/api/leveldb/leveldb/include/leveldb/env.h
@@ -13,7 +13,7 @@
#ifndef STORAGE_LEVELDB_INCLUDE_ENV_H_
#define STORAGE_LEVELDB_INCLUDE_ENV_H_
-#include "wiredtiger_config.h"
+#include "leveldb_wt_config.h"
#if defined(HAVE_ROCKSDB) && !defined(leveldb)
#define leveldb rocksdb
#endif
diff --git a/api/leveldb/leveldb/include/leveldb/filter_policy.h b/api/leveldb/leveldb/include/leveldb/filter_policy.h
index 2d970e709d6..e434ef4b241 100644
--- a/api/leveldb/leveldb/include/leveldb/filter_policy.h
+++ b/api/leveldb/leveldb/include/leveldb/filter_policy.h
@@ -16,7 +16,7 @@
#ifndef STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_
#define STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_
-#include "wiredtiger_config.h"
+#include "leveldb_wt_config.h"
#if defined(HAVE_ROCKSDB) && !defined(leveldb)
#define leveldb rocksdb
#endif
diff --git a/api/leveldb/leveldb/include/leveldb/iterator.h b/api/leveldb/leveldb/include/leveldb/iterator.h
index 3845d553a4e..2d97d180b17 100644
--- a/api/leveldb/leveldb/include/leveldb/iterator.h
+++ b/api/leveldb/leveldb/include/leveldb/iterator.h
@@ -15,7 +15,7 @@
#ifndef STORAGE_LEVELDB_INCLUDE_ITERATOR_H_
#define STORAGE_LEVELDB_INCLUDE_ITERATOR_H_
-#include "wiredtiger_config.h"
+#include "leveldb_wt_config.h"
#if defined(HAVE_ROCKSDB) && !defined(leveldb)
#define leveldb rocksdb
#endif
diff --git a/api/leveldb/leveldb/include/leveldb/options.h b/api/leveldb/leveldb/include/leveldb/options.h
index a14503fe086..9dcf73fc2a0 100644
--- a/api/leveldb/leveldb/include/leveldb/options.h
+++ b/api/leveldb/leveldb/include/leveldb/options.h
@@ -5,7 +5,7 @@
#ifndef STORAGE_LEVELDB_INCLUDE_OPTIONS_H_
#define STORAGE_LEVELDB_INCLUDE_OPTIONS_H_
-#include "wiredtiger_config.h"
+#include "leveldb_wt_config.h"
#if defined(HAVE_ROCKSDB) && !defined(leveldb)
#define leveldb rocksdb
#endif
diff --git a/api/leveldb/leveldb/include/leveldb/slice.h b/api/leveldb/leveldb/include/leveldb/slice.h
index d7c20cfcaac..1eb66dd825f 100644
--- a/api/leveldb/leveldb/include/leveldb/slice.h
+++ b/api/leveldb/leveldb/include/leveldb/slice.h
@@ -15,7 +15,7 @@
#ifndef STORAGE_LEVELDB_INCLUDE_SLICE_H_
#define STORAGE_LEVELDB_INCLUDE_SLICE_H_
-#include "wiredtiger_config.h"
+#include "leveldb_wt_config.h"
#if defined(HAVE_ROCKSDB) && !defined(leveldb)
#define leveldb rocksdb
#endif
@@ -82,7 +82,8 @@ class Slice {
(memcmp(data_, x.data_, x.size_) == 0));
}
- private:
+// The LevelDB JNI layer peeks in here
+// private:
const char* data_;
size_t size_;
diff --git a/api/leveldb/leveldb/include/leveldb/status.h b/api/leveldb/leveldb/include/leveldb/status.h
index 8b2cbb9b422..3c21f64462b 100644
--- a/api/leveldb/leveldb/include/leveldb/status.h
+++ b/api/leveldb/leveldb/include/leveldb/status.h
@@ -13,7 +13,7 @@
#ifndef STORAGE_LEVELDB_INCLUDE_STATUS_H_
#define STORAGE_LEVELDB_INCLUDE_STATUS_H_
-#include "wiredtiger_config.h"
+#include "leveldb_wt_config.h"
#if defined(HAVE_ROCKSDB) && !defined(leveldb)
#define leveldb rocksdb
#endif
diff --git a/api/leveldb/leveldb/include/leveldb/write_batch.h b/api/leveldb/leveldb/include/leveldb/write_batch.h
index 9184d42c24c..293b41ad818 100644
--- a/api/leveldb/leveldb/include/leveldb/write_batch.h
+++ b/api/leveldb/leveldb/include/leveldb/write_batch.h
@@ -21,7 +21,7 @@
#ifndef STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_
#define STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_
-#include "wiredtiger_config.h"
+#include "leveldb_wt_config.h"
#if defined(HAVE_ROCKSDB) && !defined(leveldb)
#define leveldb rocksdb
#endif
diff --git a/api/leveldb/leveldb_wt.cc b/api/leveldb/leveldb_wt.cc
index cfeb0549db4..6425a5a8dfd 100644
--- a/api/leveldb/leveldb_wt.cc
+++ b/api/leveldb/leveldb_wt.cc
@@ -755,13 +755,8 @@ IteratorImpl::Next()
int ret;
WT_ITEM item;
- if (!Status().ok())
- return;
-
- if (!valid_) {
- SetError(EINVAL);
+ if (!Status().ok() || !valid_)
return;
- }
ret = cursor_->next(cursor_);
if (ret != 0) {
@@ -791,13 +786,8 @@ IteratorImpl::Prev()
{
WT_ITEM item;
- if (!Status().ok())
- return;
-
- if (!valid_) {
- SetError(EINVAL);
+ if (!Status().ok() || !valid_)
return;
- }
int ret = cursor_->prev(cursor_);
if (ret != 0) {
diff --git a/api/leveldb/leveldb_wt.h b/api/leveldb/leveldb_wt.h
index 301fa250e85..683482ad23c 100644
--- a/api/leveldb/leveldb_wt.h
+++ b/api/leveldb/leveldb_wt.h
@@ -27,7 +27,7 @@
#ifndef _INCLUDE_LEVELDB_WT_H
#define _INCLUDE_LEVELDB_WT_H 1
-#include "wiredtiger_config.h"
+#include "leveldb_wt_config.h"
#include "leveldb/cache.h"
#include "leveldb/comparator.h"
@@ -171,6 +171,7 @@ private:
class CacheImpl : public Cache {
public:
CacheImpl(size_t capacity) : Cache(), capacity_(capacity) {}
+ virtual ~CacheImpl() {}
virtual Handle* Insert(const Slice&, void*, size_t,
void (*)(const Slice&, void*)) { return 0; }
diff --git a/bench/wtperf/runners/shared-cache-stress.wtperf b/bench/wtperf/runners/shared-cache-stress.wtperf
new file mode 100644
index 00000000000..87d14f4f5c1
--- /dev/null
+++ b/bench/wtperf/runners/shared-cache-stress.wtperf
@@ -0,0 +1,12 @@
+# Stress out the shared cache.
+conn_config="statistics=(none),shared_cache=(name=wt-cache,size=536870912,reserve=10MB,chunk=20MB,)"
+table_config="allocation_size=4KB,key_gap=10,split_pct=75,internal_page_max=4KB,internal_key_truncate=false,prefix_compression=false,leaf_item_max=1433,type=file,internal_item_max=1433,exclusive=true,leaf_page_max=4KB,block_compressor=,"
+checkpoint_interval=100
+checkpoint_threads=1
+icount=50000
+random_range=500000
+report_interval=5
+run_time=600
+populate_threads=1
+threads=((count=1,inserts=1),(count=1,reads=1))
+database_count=25
diff --git a/bench/wtperf/runners/wtperf_run.sh b/bench/wtperf/runners/wtperf_run.sh
index 16470f35dfa..3296a4072b5 100755
--- a/bench/wtperf/runners/wtperf_run.sh
+++ b/bench/wtperf/runners/wtperf_run.sh
@@ -79,7 +79,7 @@ while test "$run" -le "$runmax"; do
if test "$?" -ne "0"; then
exit 1
fi
- # Load uses different text. Handle separately.
+ # Load is always using floating point, so handle separately
l=`grep "^Load time:" ./WT_TEST/test.stat`
if test "$?" -eq "0"; then
load=`echo $l | cut -d ' ' -f 3`
@@ -87,7 +87,7 @@ while test "$run" -le "$runmax"; do
load=0
fi
cur[$loadindex]=$load
- sum[$loadindex]=`expr $load + ${sum[$loadindex]}`
+ sum[$loadindex]=`echo "${sum[$loadindex]} + $load" | bc`
echo "cur ${cur[$loadindex]} sum ${sum[$loadindex]}" >> $outfile
for i in ${!ops[*]}; do
l=`grep "Executed.*${ops[$i]} operations" ./WT_TEST/test.stat`
@@ -109,8 +109,17 @@ while test "$run" -le "$runmax"; do
done
else
for i in ${!cur[*]}; do
- min[$i]=$(getval $getmin ${cur[$i]} ${min[$i]})
- max[$i]=$(getval $getmax ${cur[$i]} ${max[$i]})
+ if test "$i" -eq "$loadindex"; then
+ if (($(bc <<< "${cur[$i]} < ${min[$i]}") )); then
+ min[$i]=${cur[$i]}
+ fi
+ if (($(bc <<< "${cur[$i]} > ${max[$i]}") )); then
+ max[$i]=${cur[$i]}
+ fi
+ else
+ min[$i]=$(getval $getmin ${cur[$i]} ${min[$i]})
+ max[$i]=$(getval $getmax ${cur[$i]} ${max[$i]})
+ fi
done
fi
#
@@ -145,8 +154,13 @@ fi
# Average the remaining and write it out to the file.
#
for i in ${!min[*]}; do
- s=`expr ${sum[$i]} - ${min[$i]} - ${max[$i]}`
- avg[$i]=`expr $s / $numruns`
+ if test "$i" -eq "$loadindex"; then
+ s=`echo "scale=3; ${sum[$i]} - ${min[$i]} - ${max[$i]}" | bc`
+ avg[$i]=`echo "scale=3; $s / $numruns" | bc`
+ else
+ s=`expr ${sum[$i]} - ${min[$i]} - ${max[$i]}`
+ avg[$i]=`expr $s / $numruns`
+ fi
done
for i in ${!outp[*]}; do
echo "${outp[$i]} ${avg[$i]}" >> $outfile
diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c
index ad645e19596..b6d65761b5a 100644
--- a/bench/wtperf/wtperf.c
+++ b/bench/wtperf/wtperf.c
@@ -1200,7 +1200,7 @@ execute_populate(CONFIG *cfg)
CONFIG_THREAD *popth;
WT_ASYNC_OP *asyncop;
size_t i;
- uint64_t last_ops, secs;
+ uint64_t last_ops, msecs;
uint32_t interval, tables;
int elapsed, ret;
void *(*pfunc)(void *);
@@ -1278,12 +1278,11 @@ execute_populate(CONFIG *cfg)
}
lprintf(cfg, 0, 1, "Finished load of %" PRIu32 " items", cfg->icount);
- secs = WT_TIMEDIFF(stop, start) / BILLION;
- if (secs == 0)
- ++secs;
+ msecs = ns_to_ms(WT_TIMEDIFF(stop, start));
lprintf(cfg, 0, 1,
- "Load time: %" PRIu64 "\n" "load ops/sec: %" PRIu64,
- secs, cfg->icount / secs);
+ "Load time: %.2f\n" "load ops/sec: %" PRIu64,
+ (double)msecs / (double)THOUSAND,
+ (uint64_t)((cfg->icount / msecs) / THOUSAND));
/*
* If configured, compact to allow LSM merging to complete. We
@@ -1323,9 +1322,9 @@ execute_populate(CONFIG *cfg)
lprintf(cfg, ret, 0, "Get time failed in populate.");
return (ret);
}
- secs = WT_TIMEDIFF(stop, start) / BILLION;
lprintf(cfg, 0, 1,
- "Compact completed in %" PRIu64 " seconds", secs);
+ "Compact completed in %" PRIu64 " seconds",
+ (uint64_t)(ns_to_sec(WT_TIMEDIFF(stop, start))));
assert(tables == 0);
}
return (0);
@@ -1871,7 +1870,7 @@ err: if (ret == 0)
if (cfg->conn != NULL &&
(t_ret = cfg->conn->close(cfg->conn, NULL)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(cfg, t_ret, 0,
"Error closing connection to %s", cfg->home);
if (ret == 0)
ret = t_ret;
diff --git a/build_posix/configure.ac.in b/build_posix/configure.ac.in
index dd5bce738ea..6352fa6d0df 100644
--- a/build_posix/configure.ac.in
+++ b/build_posix/configure.ac.in
@@ -9,6 +9,10 @@ AC_CONFIG_AUX_DIR([build_posix/gnu-support])
AC_CONFIG_MACRO_DIR([build_posix/aclocal])
AC_CONFIG_SRCDIR([RELEASE])
+# If CFLAGS/CXXFLAGS were not set on entry, default to "-O3 -g"
+: ${CFLAGS=-O3 -g}
+: ${CXXFLAGS=-O3 -g}
+
AM_INIT_AUTOMAKE([1.11 foreign parallel-tests subdir-objects])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([no])])
@@ -24,9 +28,6 @@ LT_PREREQ(2.2.6)
LT_INIT([pic-only])
AC_SUBST([LIBTOOL_DEPS])
-# If CFLAGS was not set on entry, default to "-O3 -g"
-: ${CFLAGS="-O3 -g"}
-
AC_PROG_CC(cc gcc)
# AC_PROG_CXX(c++ g++)
@@ -165,7 +166,7 @@ AC_CONFIG_HEADERS([wiredtiger_config.h:build_posix/config.hin])
# The LevelDB API needs some configuration knowledge
AM_COND_IF([LEVELDB],
- AC_CONFIG_HEADERS([api/leveldb/wiredtiger_config.h:build_posix/config.hin]))
+ AC_CONFIG_HEADERS([api/leveldb/leveldb_wt_config.h:api/leveldb/config.hin]))
# BEGIN check existence -- maintained by reconf and Make.subdirs
# END check existence
diff --git a/dist/s_copyright.list b/dist/s_copyright.list
index ca2ba425ad5..d66be5a1ba7 100644
--- a/dist/s_copyright.list
+++ b/dist/s_copyright.list
@@ -1,3 +1,4 @@
+skip api/leveldb/leveldb_wt_config.in
skip dist/api_config.py
skip dist/api_data.py
skip dist/api_err.py
diff --git a/dist/s_docs b/dist/s_docs
index 815d27d7b11..c0c8885e1b5 100755
--- a/dist/s_docs
+++ b/dist/s_docs
@@ -113,7 +113,7 @@ valid_build()
}
classf=`ls ../docs/struct___* 2>/dev/null`
for c in $classf; do
- echo "$c: Need to add class to PREDEFINE in src/docs/Doxyfile"
+ echo "$c: Need to add class to PREDEFINED in src/docs/Doxyfile"
done
}
diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c
index 8f9fba093de..ea5d26ce133 100644
--- a/examples/c/ex_all.c
+++ b/examples/c/ex_all.c
@@ -1055,8 +1055,8 @@ main(void)
/*! [Statistics logging with a table] */
ret = wiredtiger_open(home, NULL,
- "create,"
- "statistics_log=(sources=(\"table:table1\",\"table:table2\"))",
+ "create, statistics_log=("
+ "sources=(\"lsm:table1\",\"lsm:table2\"), wait=5)",
&conn);
/*! [Statistics logging with a table] */
if (ret == 0)
@@ -1064,7 +1064,7 @@ main(void)
/*! [Statistics logging with all tables] */
ret = wiredtiger_open(home, NULL,
- "create,statistics_log=(sources=(\"table:\"))",
+ "create, statistics_log=(sources=(\"lsm:\"), wait=5)",
&conn);
/*! [Statistics logging with all tables] */
if (ret == 0)
diff --git a/ext/compressors/zlib/zlib_compress.c b/ext/compressors/zlib/zlib_compress.c
index 33bb9bf8810..3532ecf16cd 100644
--- a/ext/compressors/zlib/zlib_compress.c
+++ b/ext/compressors/zlib/zlib_compress.c
@@ -225,8 +225,15 @@ zlib_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
* Strategy: take the available output size and compress that much
* input. Continue until there is no input small enough or the
* compression fails to fit.
+ *
+ * Don't let the compression ratio become insanely good (which can
+ * happen with synthetic workloads). Once we hit a limit, stop so that
+ * the in-memory size of pages isn't totally different to the on-disk
+ * size. Otherwise we can get into trouble where every update to a
+ * page results in forced eviction based on in-memory size, even though
+ * the data fits into a single on-disk block.
*/
- while (zs.avail_out > 0) {
+ while (zs.avail_out > 0 && zs.total_in <= zs.total_out * 20) {
/* Find the slot we will try to compress up to. */
if ((curr_slot = zlib_find_slot(
zs.total_in + zs.avail_out, offsets, slots)) <= last_slot)
diff --git a/lang/python/Makefile.am b/lang/python/Makefile.am
index 0ac56138e29..03c65a57028 100644
--- a/lang/python/Makefile.am
+++ b/lang/python/Makefile.am
@@ -1,5 +1,5 @@
PYSRC = $(top_srcdir)/lang/python
-PY_INCLUDE_DIRS = $(top_srcdir)
+PYDIRS = -t $(abs_builddir) -I $(abs_top_srcdir):$(abs_top_builddir) -L $(abs_top_builddir)/.libs
all-local: _wiredtiger.so
# We keep generated Python sources under lang/python: that's where they live
@@ -10,15 +10,19 @@ $(PYSRC)/wiredtiger_wrap.c: $(top_srcdir)/src/include/wiredtiger.in $(PYSRC)/wir
mv wiredtiger.py wiredtiger/__init__.py)
_wiredtiger.so: $(top_builddir)/libwiredtiger.la $(PYSRC)/wiredtiger_wrap.c
- $(PYTHON) $(PYSRC)/setup.py build_ext -b . -t . -f -I $(PY_INCLUDE_DIRS)
+ (cd $(PYSRC) && \
+ $(PYTHON) setup.py build_ext -f -b $(abs_builddir) $(PYDIRS))
install-exec-local:
- $(PYTHON) $(PYSRC)/setup.py build_py -d build
- $(PYTHON) $(PYSRC)/setup.py build_ext -b build -t . -f -I $(PY_INCLUDE_DIRS)
- $(PYTHON) $(PYSRC)/setup.py install_lib -b build --skip-build $(PYTHON_INSTALL_ARG)
+ (cd $(PYSRC) && \
+ $(PYTHON) setup.py build_py -d $(abs_builddir)/build && \
+ $(PYTHON) setup.py build_ext -f -b $(abs_builddir)/build $(PYDIRS) && \
+ $(PYTHON) setup.py install_lib -b $(abs_builddir)/build --skip-build $(PYTHON_INSTALL_ARG))
+# We build in different places for an install vs running from the tree:
+# clean up both. Don't rely on "setup.py clean" -- everything that should
+# be removed is created under the build directory.
clean-local:
- $(PYTHON) $(PYSRC)/setup.py clean
- rm -rf _wiredtiger.so WT_TEST build wiredtiger
+ rm -rf build _wiredtiger.so wiredtiger_wrap.o WT_TEST
TESTS = run-ex_access
diff --git a/lang/python/setup.py b/lang/python/setup.py
index 1c6ebc71387..1057006ce50 100644
--- a/lang/python/setup.py
+++ b/lang/python/setup.py
@@ -35,9 +35,7 @@ if not 'ARCHFLAGS' in os.environ:
os.environ['ARCHFLAGS'] = ''
# Suppress warnings building SWIG generated code
-extra_cflags = [
- '-w',
-]
+extra_cflags = [ '-w' ]
dir = os.path.dirname(__file__)
@@ -50,12 +48,10 @@ wt_ver = '%d.%d' % (WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR)
setup(name='wiredtiger', version=wt_ver,
ext_modules=[Extension('_wiredtiger',
- [os.path.join(dir, 'wiredtiger_wrap.c')],
- include_dirs=['../..'],
- library_dirs=['../../.libs'],
+ [os.path.join(dir, 'wiredtiger_wrap.c')],
libraries=['wiredtiger'],
extra_compile_args=extra_cflags,
)],
- package_dir={'' : dir},
+ package_dir={'' : dir},
packages=['wiredtiger'],
)
diff --git a/src/async/async_api.c b/src/async/async_api.c
index ae567466e8e..23a176653c7 100644
--- a/src/async/async_api.c
+++ b/src/async/async_api.c
@@ -172,32 +172,24 @@ __async_config(WT_SESSION_IMPL *session,
WT_CONNECTION_IMPL *conn, const char **cfg, int *runp)
{
WT_CONFIG_ITEM cval;
- WT_DECL_RET;
/*
* The async configuration is off by default.
*/
- if ((ret = __wt_config_gets(
- session, cfg, "async.enabled", &cval)) == 0)
- *runp = cval.val != 0;
- WT_RET_NOTFOUND_OK(ret);
+ WT_RET(__wt_config_gets(session, cfg, "async.enabled", &cval));
+ *runp = cval.val != 0;
/*
* Even if async is turned off, we want to parse and store the
* default values so that reconfigure can just enable them.
*/
- if ((ret = __wt_config_gets(
- session, cfg, "async.ops_max", &cval)) == 0)
- conn->async_size = (uint32_t)cval.val;
- WT_RET_NOTFOUND_OK(ret);
-
- if ((ret = __wt_config_gets(
- session, cfg, "async.threads", &cval)) == 0) {
- conn->async_workers = (uint32_t)cval.val;
- /* Sanity check that api_data.py is in sync with async.h */
- WT_ASSERT(session, conn->async_workers <= WT_ASYNC_MAX_WORKERS);
- }
- WT_RET_NOTFOUND_OK(ret);
+ WT_RET(__wt_config_gets(session, cfg, "async.ops_max", &cval));
+ conn->async_size = (uint32_t)cval.val;
+
+ WT_RET(__wt_config_gets(session, cfg, "async.threads", &cval));
+ conn->async_workers = (uint32_t)cval.val;
+ /* Sanity check that api_data.py is in sync with async.h */
+ WT_ASSERT(session, conn->async_workers <= WT_ASYNC_MAX_WORKERS);
return (0);
}
@@ -209,8 +201,8 @@ __async_config(WT_SESSION_IMPL *session,
void
__wt_async_stats_update(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
WT_ASYNC *async;
+ WT_CONNECTION_IMPL *conn;
WT_CONNECTION_STATS *stats;
conn = S2C(session);
@@ -224,27 +216,18 @@ __wt_async_stats_update(WT_SESSION_IMPL *session)
}
/*
- * __wt_async_create --
- * Start the async subsystem and worker threads.
+ * __async_start --
+ * Start the async subsystem. All configuration processing has
+ * already been done by the caller.
*/
-int
-__wt_async_create(WT_CONNECTION_IMPL *conn, const char *cfg[])
+static int
+__async_start(WT_SESSION_IMPL *session)
{
WT_ASYNC *async;
- WT_SESSION_IMPL *session;
- int run;
+ WT_CONNECTION_IMPL *conn;
uint32_t i;
- session = conn->default_session;
-
- /* Handle configuration. */
- run = 0;
- WT_RET(__async_config(session, conn, cfg, &run));
-
- /* If async is not configured, we're done. */
- if (!run)
- return (0);
-
+ conn = S2C(session);
conn->async_cfg = 1;
/*
* Async is on, allocate the WT_ASYNC structure and initialize the ops.
@@ -284,21 +267,42 @@ __wt_async_create(WT_CONNECTION_IMPL *conn, const char *cfg[])
}
/*
+ * __wt_async_create --
+ * Start the async subsystem and worker threads.
+ */
+int
+__wt_async_create(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ WT_CONNECTION_IMPL *conn;
+ int run;
+
+ conn = S2C(session);
+
+ /* Handle configuration. */
+ run = 0;
+ WT_RET(__async_config(session, conn, cfg, &run));
+
+ /* If async is not configured, we're done. */
+ if (!run)
+ return (0);
+ return (__async_start(session));
+}
+
+/*
* __wt_async_reconfig --
* Start the async subsystem and worker threads.
*/
int
-__wt_async_reconfig(WT_CONNECTION_IMPL *conn, const char *cfg[])
+__wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_ASYNC *async;
- WT_CONNECTION_IMPL tmp_conn;
+ WT_CONNECTION_IMPL *conn, tmp_conn;
WT_DECL_RET;
WT_SESSION *wt_session;
- WT_SESSION_IMPL *session;
int run;
uint32_t i;
- session = conn->default_session;
+ conn = S2C(session);
async = conn->async;
memset(&tmp_conn, 0, sizeof(tmp_conn));
tmp_conn.async_cfg = conn->async_cfg;
@@ -338,7 +342,7 @@ __wt_async_reconfig(WT_CONNECTION_IMPL *conn, const char *cfg[])
return (ret);
} else if (conn->async_cfg == 0 && run)
/* Case 2 */
- return (__wt_async_create(conn, cfg));
+ return (__async_start(session));
else if (conn->async_cfg == 0)
/* Case 3 */
return (0);
diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c
index 8a069cc4bdf..84c4565eafe 100644
--- a/src/btree/bt_debug.c
+++ b/src/btree/bt_debug.c
@@ -62,7 +62,7 @@ __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v)
snprintf(buf, sizeof(buf), "verbose=[%s]", v);
cfg[0] = buf;
- return (__wt_conn_verbose_config(session, cfg));
+ return (__wt_verbose_config(session, cfg));
}
/*
diff --git a/src/btree/rec_track.c b/src/btree/rec_track.c
index a4ef0aaa100..165df9d61e5 100644
--- a/src/btree/rec_track.c
+++ b/src/btree/rec_track.c
@@ -807,7 +807,7 @@ __wt_ovfl_txnc_add(WT_SESSION_IMPL *session, WT_PAGE *page,
txnc->value_offset = WT_PTRDIFF32(p, txnc);
txnc->value_size = WT_STORE_SIZE(value_size);
memcpy(p, value, value_size);
- txnc->current = __wt_txn_current_id(session);
+ txnc->current = __wt_txn_new_id(session);
__wt_cache_page_inmem_incr(session, page,
WT_OVFL_SIZE(WT_OVFL_TXNC) + addr_size + value_size);
diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c
index 4687505df11..bae29929aa5 100644
--- a/src/btree/rec_write.c
+++ b/src/btree/rec_write.c
@@ -3859,7 +3859,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_REF *ref;
size_t size;
u_int vtype;
- int hazard, onpage_ovfl, ovfl_key, state;
+ int hazard, key_onpage_ovfl, ovfl_key, state;
const void *p;
btree = S2BT(session);
@@ -3907,11 +3907,12 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
ikey = __wt_ref_key_instantiated(ref);
if (ikey == NULL || ikey->cell_offset == 0) {
cell = NULL;
- onpage_ovfl = 0;
+ key_onpage_ovfl = 0;
} else {
cell = WT_PAGE_REF_OFFSET(page, ikey->cell_offset);
__wt_cell_unpack(cell, kpack);
- onpage_ovfl = kpack->ovfl == 1 ? 1 : 0;
+ key_onpage_ovfl =
+ kpack->ovfl && kpack->raw != WT_CELL_KEY_OVFL_RM;
}
WT_ERR(__rec_child_modify(session, r, ref, &hazard, &state));
@@ -3928,7 +3929,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
* always instantiated. Don't worry about reuse,
* reusing this key in this reconciliation is unlikely.
*/
- if (onpage_ovfl && kpack->raw != WT_CELL_KEY_OVFL_RM)
+ if (key_onpage_ovfl)
WT_ERR(__wt_ovfl_discard_add(
session, page, kpack->cell));
CHILD_RELEASE_ERR(session, hazard, ref);
@@ -3954,8 +3955,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
* worry about reuse, reusing this key in this
* reconciliation is unlikely.
*/
- if (onpage_ovfl &&
- kpack->raw != WT_CELL_KEY_OVFL_RM)
+ if (key_onpage_ovfl)
WT_ERR(__wt_ovfl_discard_add(
session, page, kpack->cell));
CHILD_RELEASE_ERR(session, hazard, ref);
@@ -3970,8 +3970,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
* worry about reuse, reusing this key in this
* reconciliation is unlikely.
*/
- if (onpage_ovfl &&
- kpack->raw != WT_CELL_KEY_OVFL_RM)
+ if (key_onpage_ovfl)
WT_ERR(__wt_ovfl_discard_add(
session, page, kpack->cell));
@@ -4011,18 +4010,10 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
CHILD_RELEASE_ERR(session, hazard, ref);
/*
- * If the key is an overflow key, check to see if the backing
- * blocks have been freed; in that case, we have to build a new
- * key.
- */
- if (onpage_ovfl && kpack->raw == WT_CELL_KEY_OVFL_RM)
- onpage_ovfl = 0;
-
- /*
* Build key cell.
* Truncate any 0th key, internal pages don't need 0th keys.
*/
- if (onpage_ovfl) {
+ if (key_onpage_ovfl) {
key->buf.data = cell;
key->buf.size = __wt_cell_total_len(kpack);
key->cell_len = 0;
@@ -4048,10 +4039,10 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
* case, we have to build the actual key now because we
* are about to promote it.
*/
- if (onpage_ovfl) {
+ if (key_onpage_ovfl) {
WT_ERR(__wt_buf_set(session,
r->cur, WT_IKEY_DATA(ikey), ikey->size));
- onpage_ovfl = 0;
+ key_onpage_ovfl = 0;
}
WT_ERR(__rec_split(session, r));
}
diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c
index 8d245680f69..5621ad492f6 100644
--- a/src/config/config_collapse.c
+++ b/src/config/config_collapse.c
@@ -9,9 +9,24 @@
/*
* __wt_config_collapse --
- * Given a NULL-terminated list of configuration strings, where the first
- * one contains all the defaults, collapse them into newly allocated
- * memory.
+ * Collapse a set of configuration strings into newly allocated memory.
+ *
+ * This function takes a NULL-terminated list of configuration strings (where
+ * the first one contains all the defaults and the values are in order from
+ * least to most preferred, that is, the default values are least preferred),
+ * and collapses them into newly allocated memory. The algorithm is to walk
+ * the first of the configuration strings, and for each entry, search all of
+ * the configuration strings for a final value, keeping the last value found.
+ *
+ * Notes:
+ * Any key not appearing in the first configuration string is discarded
+ * from the final result, because we'll never search for it.
+ *
+ * Nested structures aren't parsed. For example, imagine a configuration
+ * string contains "key=(k2=v2,k3=v3)", and a subsequent string has
+ * "key=(k4=v4)", the result will be "key=(k4=v4)", as we search for and
+ * use the final value of "key", regardless of field overlap or missing
+ * fields in the nested value.
*/
int
__wt_config_collapse(
@@ -60,3 +75,308 @@ __wt_config_collapse(
err: __wt_scr_free(&tmp);
return (ret);
}
+
+/*
+ * We need a character that can't appear in a key as a separator.
+ */
+#undef SEP /* separator key, character */
+#define SEP "."
+#undef SEPC
+#define SEPC '.'
+
+/*
+ * Individual configuration entries, including a generation number used to make
+ * the qsort stable.
+ */
+typedef struct {
+ char *k, *v; /* key, value */
+ size_t gen; /* generation */
+} WT_CONFIG_MERGE_ENTRY;
+
+/*
+ * The array of configuration entries.
+ */
+typedef struct {
+ size_t entries_allocated; /* allocated */
+ size_t entries_next; /* next slot */
+
+ WT_CONFIG_MERGE_ENTRY *entries; /* array of entries */
+} WT_CONFIG_MERGE;
+
+/*
+ * __config_merge_scan --
+ * Walk a configuration string, inserting entries into the merged array.
+ */
+static int
+__config_merge_scan(WT_SESSION_IMPL *session,
+ const char *key, const char *value, WT_CONFIG_MERGE *cp)
+{
+ WT_CONFIG cparser;
+ WT_CONFIG_ITEM k, v;
+ WT_DECL_ITEM(kb);
+ WT_DECL_ITEM(vb);
+ WT_DECL_RET;
+ size_t len;
+ const char *str;
+
+ WT_ERR(__wt_scr_alloc(session, 0, &kb));
+ WT_ERR(__wt_scr_alloc(session, 0, &vb));
+
+ WT_ERR(__wt_config_init(session, &cparser, value));
+ while ((ret = __wt_config_next(&cparser, &k, &v)) == 0) {
+ if (k.type != WT_CONFIG_ITEM_STRING &&
+ k.type != WT_CONFIG_ITEM_ID)
+ WT_ERR_MSG(session, EINVAL,
+ "Invalid configuration key found: '%s'\n", k.str);
+
+ /* Include the quotes around string keys/values. */
+ if (k.type == WT_CONFIG_ITEM_STRING) {
+ --k.str;
+ k.len += 2;
+ }
+ if (v.type == WT_CONFIG_ITEM_STRING) {
+ --v.str;
+ v.len += 2;
+ }
+
+ /*
+ * !!!
+ * WiredTiger names its internal checkpoints with a trailing
+ * dot and a number, for example, "WiredTigerCheckpoint.37".
+ * We're using dot to separate names in nested structures,
+ * and there's an obvious conflict. This works for now because
+ * that's the only case of a dot in a key name, and we never
+ * merge configuration strings that contain checkpoint names,
+ * for historic reasons. For now, return an error if there's
+ * ever a problem. (Note, it's probably safe if the dot is in
+ * a quoted key, that is, a key of type WT_CONFIG_ITEM_STRING,
+ * but since this isn't ever supposed to happen, I'm leaving
+ * the test simple.)
+ */
+ for (str = k.str, len = k.len; len > 0; ++str, --len)
+ if (*str == SEPC)
+ WT_ERR_MSG(session, EINVAL,
+ "key %s contains a separator character "
+ "(%s)", (char *)kb->data, SEP);
+
+ /* Build the key/value strings. */
+ WT_ERR(__wt_buf_fmt(session,
+ kb, "%s%s%.*s",
+ key == NULL ? "" : key,
+ key == NULL ? "" : SEP,
+ (int)k.len, k.str));
+ WT_ERR(__wt_buf_fmt(session,
+ vb, "%.*s", (int)v.len, v.str));
+
+ /*
+ * If the value is a structure, recursively parse it.
+ *
+ * !!!
+ * Don't merge unless the structure has field names. WiredTiger
+ * stores checkpoint LSNs in the metadata file using nested
+ * structures without field names: "checkpoint_lsn=(1,0)", not
+ * "checkpoint_lsn=(file=1,offset=0)". The value type is still
+ * WT_CONFIG_ITEM_STRUCT, so we check for a field name in the
+ * value.
+ */
+ if (v.type == WT_CONFIG_ITEM_STRUCT &&
+ strchr(vb->data, '=') != NULL) {
+ WT_ERR(__config_merge_scan(
+ session, kb->data, vb->data, cp));
+ continue;
+ }
+
+ /* Insert the value into the array. */
+ WT_ERR(__wt_realloc_def(session,
+ &cp->entries_allocated,
+ cp->entries_next + 1, &cp->entries));
+ WT_ERR(__wt_strndup(session,
+ kb->data, kb->size, &cp->entries[cp->entries_next].k));
+ WT_ERR(__wt_strndup(session,
+ vb->data, vb->size, &cp->entries[cp->entries_next].v));
+ cp->entries[cp->entries_next].gen = cp->entries_next;
+ ++cp->entries_next;
+ }
+
+err: __wt_scr_free(&kb);
+ __wt_scr_free(&vb);
+ return (0);
+}
+
+/*
+ * __strip_comma --
+ * Strip a trailing comma.
+ */
+static inline void
+__strip_comma(WT_ITEM *buf)
+{
+ if (buf->size != 0 && ((char *)buf->data)[buf->size - 1] == ',')
+ --buf->size;
+}
+
+/*
+ * __config_merge_format_next --
+ * Walk the array, building entries.
+ */
+static int
+__config_merge_format_next(WT_SESSION_IMPL *session, const char *prefix,
+ size_t plen, size_t *enp, WT_CONFIG_MERGE *cp, WT_ITEM *build)
+{
+ WT_CONFIG_MERGE_ENTRY *ep;
+ size_t len1, len2, next;
+ char *p;
+
+ for (; *enp < cp->entries_next; ++*enp) {
+ ep = &cp->entries[*enp];
+ len1 = strlen(ep->k);
+
+ /*
+ * The entries are in sorted order, take the last entry for any
+ * key.
+ */
+ if (*enp < (cp->entries_next - 1)) {
+ len2 = strlen((ep + 1)->k);
+
+ /* Choose the last of identical keys. */
+ if (len1 == len2 &&
+ memcmp(ep->k, (ep + 1)->k, len1) == 0)
+ continue;
+
+ /*
+ * The test is complicated by matching empty entries
+ * "foo=" against nested structures "foo,bar=", where
+ * the latter is a replacement for the former.
+ */
+ if (len2 > len1 &&
+ (ep + 1)->k[len1] == SEPC &&
+ memcmp(ep->k, (ep + 1)->k, len1) == 0)
+ continue;
+ }
+
+ /*
+ * If we're skipping a prefix and this entry doesn't match it,
+ * back off one entry and pop up a level.
+ */
+ if (plen != 0 &&
+ (plen > len1 || memcmp(ep->k, prefix, plen) != 0)) {
+ --*enp;
+ break;
+ }
+
+ /*
+ * If the entry introduces a new level, recurse through that
+ * new level.
+ */
+ if ((p = strchr(ep->k + plen, SEPC)) != NULL) {
+ next = WT_PTRDIFF(p, ep->k);
+ WT_RET(__wt_buf_catfmt(session,
+ build, "%.*s=(", (int)(next - plen), ep->k + plen));
+ WT_RET(__config_merge_format_next(
+ session, ep->k, next + 1, enp, cp, build));
+ __strip_comma(build);
+ WT_RET(__wt_buf_catfmt(session, build, "),"));
+ continue;
+ }
+
+ /* Append the entry to the buffer. */
+ WT_RET(__wt_buf_catfmt(
+ session, build, "%s=%s,", ep->k + plen, ep->v));
+ }
+
+ return (0);
+}
+
+/*
+ * __config_merge_format --
+ * Take the sorted array of entries, and format them into allocated memory.
+ */
+static int
+__config_merge_format(
+ WT_SESSION_IMPL *session, WT_CONFIG_MERGE *cp, const char **config_ret)
+{
+ WT_DECL_ITEM(build);
+ WT_DECL_RET;
+ size_t entries;
+
+ WT_RET(__wt_scr_alloc(session, 4 * 1024, &build));
+
+ entries = 0;
+ WT_ERR(__config_merge_format_next(session, "", 0, &entries, cp, build));
+
+ __strip_comma(build);
+
+ ret = __wt_strndup(session, build->data, build->size, config_ret);
+
+err: __wt_scr_free(&build);
+ return (ret);
+}
+
+/*
+ * __config_merge_cmp --
+ * Qsort function: sort the config merge array.
+ */
+static int
+__config_merge_cmp(const void *a, const void *b)
+{
+ WT_CONFIG_MERGE_ENTRY *ae, *be;
+ int cmp;
+
+ ae = (WT_CONFIG_MERGE_ENTRY *)a;
+ be = (WT_CONFIG_MERGE_ENTRY *)b;
+
+ if ((cmp = strcmp(ae->k, be->k)) != 0)
+ return (cmp);
+ return (ae->gen > be->gen ? 1 : -1);
+}
+
+/*
+ * __wt_config_merge --
+ * Merge a set of configuration strings into newly allocated memory.
+ *
+ * This function takes a NULL-terminated list of configuration strings (where
+ * the values are in order from least to most preferred), and merges them into
+ * newly allocated memory. The algorithm is to walk the configuration strings
+ * and build a table of each key/value pair. The pairs are sorted based on the
+ * name and the configuration string in which they were found, and a final
+ * configuration string is built from the result.
+ *
+ * Note:
+ * Nested structures are parsed and merge. For example, if configuration
+ * strings "key=(k1=v1,k2=v2)" and "key=(k1=v2)" appear, the result will
+ * be "key=(k1=v2,k2=v2)" because the nested values are merged.
+ */
+int
+__wt_config_merge(
+ WT_SESSION_IMPL *session, const char **cfg, const char **config_ret)
+{
+ WT_CONFIG_MERGE merge;
+ WT_DECL_RET;
+ size_t i;
+
+ /* Start out with a reasonable number of entries. */
+ WT_CLEAR(merge);
+
+ WT_RET(__wt_realloc_def(
+ session, &merge.entries_allocated, 100, &merge.entries));
+
+ /* Scan the configuration strings, entering them into the array. */
+ for (; *cfg != NULL; ++cfg)
+ WT_ERR(__config_merge_scan(session, NULL, *cfg, &merge));
+
+ /*
+ * Sort the array by key and, in the case of identical keys, by
+ * generation.
+ */
+ qsort(merge.entries, merge.entries_next,
+ sizeof(WT_CONFIG_MERGE_ENTRY), __config_merge_cmp);
+
+ /* Convert the array of entries into a string. */
+ ret = __config_merge_format(session, &merge, config_ret);
+
+err: for (i = 0; i < merge.entries_next; ++i) {
+ __wt_free(session, merge.entries[i].k);
+ __wt_free(session, merge.entries[i].v);
+ }
+ __wt_free(session, merge.entries);
+ return (ret);
+}
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index e7826e9fd56..6446da4781f 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -626,43 +626,47 @@ err: /*
static int
__conn_reconfigure(WT_CONNECTION *wt_conn, const char *config)
{
- WT_CONFIG_ITEM cval;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_SESSION_IMPL *session;
-
- /*
- * Special version of cfg that doesn't include the default config: used
- * to limit changes to values that the application sets explicitly.
- * Note that any function using this value has to be prepared to handle
- * not-found as a valid option return.
- */
- const char *raw_cfg[] = { config, NULL };
+ const char *p, *config_cfg[] = { NULL, NULL, NULL };
conn = (WT_CONNECTION_IMPL *)wt_conn;
CONNECTION_API_CALL(conn, session, reconfigure, config, cfg);
+ WT_UNUSED(cfg);
- WT_ERR(__wt_conn_cache_pool_config(session, cfg));
- WT_ERR(__wt_cache_config(conn, raw_cfg));
+ /* Serialize reconfiguration. */
+ __wt_spin_lock(session, &conn->reconfig_lock);
- WT_ERR(__wt_async_reconfig(conn, raw_cfg));
- WT_ERR(__conn_statistics_config(session, raw_cfg));
- WT_ERR(__wt_conn_verbose_config(session, raw_cfg));
- WT_ERR(__wt_checkpoint_server_create(conn, cfg));
- WT_ERR(__wt_statlog_create(conn, cfg));
+ /*
+ * The configuration argument has been checked for validity, replace the
+ * previous connection configuration.
+ *
+ * DO NOT merge the configuration before the reconfigure calls. Some
+ * of the underlying reconfiguration functions do explicit checks with
+ * the second element of the configuration array, knowing the defaults
+ * are in slot #1 and the application's modifications are in slot #2.
+ */
+ config_cfg[0] = conn->cfg;
+ config_cfg[1] = config;
- WT_ERR(__wt_config_gets(
- session, cfg, "lsm_manager.worker_thread_max", &cval));
- if (cval.val)
- conn->lsm_manager.lsm_workers_max = (uint32_t)cval.val;
+ WT_ERR(__conn_statistics_config(session, config_cfg));
+ WT_ERR(__wt_async_reconfig(session, config_cfg));
+ WT_ERR(__wt_cache_config(session, config_cfg));
+ WT_ERR(__wt_cache_pool_config(session, config_cfg));
+ WT_ERR(__wt_checkpoint_server_create(session, config_cfg));
+ WT_ERR(__wt_lsm_manager_config(session, config_cfg));
+ WT_ERR(__wt_statlog_create(session, config_cfg));
+ WT_ERR(__wt_verbose_config(session, config_cfg));
+
+ WT_ERR(__wt_config_merge(session, config_cfg, &p));
+ __wt_free(session, conn->cfg);
+ conn->cfg = p;
- /* Wake up the cache pool server so any changes are noticed. */
- if (F_ISSET(conn, WT_CONN_CACHE_POOL))
- WT_ERR(__wt_cond_signal(
- session, __wt_process.cache_pool->cache_pool_cond));
+err: __wt_spin_unlock(session, &conn->reconfig_lock);
-err: API_END_RET(session, ret);
+ API_END_RET(session, ret);
}
/*
@@ -1009,59 +1013,62 @@ __conn_statistics_config(WT_SESSION_IMPL *session, const char *cfg[])
WT_CONFIG_ITEM cval, sval;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ uint32_t flags;
int set;
conn = S2C(session);
- if ((ret = __wt_config_gets(session, cfg, "statistics", &cval)) != 0)
- return (ret == WT_NOTFOUND ? 0 : ret);
-
- /* Configuring statistics clears any existing values. */
- conn->stat_flags = 0;
+ WT_RET(__wt_config_gets(session, cfg, "statistics", &cval));
+ flags = 0;
set = 0;
if ((ret = __wt_config_subgets(
session, &cval, "none", &sval)) == 0 && sval.val != 0) {
- FLD_SET(conn->stat_flags, WT_CONN_STAT_NONE);
+ LF_SET(WT_CONN_STAT_NONE);
++set;
}
WT_RET_NOTFOUND_OK(ret);
if ((ret = __wt_config_subgets(
session, &cval, "fast", &sval)) == 0 && sval.val != 0) {
- FLD_SET(conn->stat_flags, WT_CONN_STAT_FAST);
+ LF_SET(WT_CONN_STAT_FAST);
++set;
}
WT_RET_NOTFOUND_OK(ret);
if ((ret = __wt_config_subgets(
session, &cval, "all", &sval)) == 0 && sval.val != 0) {
- FLD_SET(conn->stat_flags, WT_CONN_STAT_ALL | WT_CONN_STAT_FAST);
+ LF_SET(WT_CONN_STAT_ALL | WT_CONN_STAT_FAST);
++set;
}
WT_RET_NOTFOUND_OK(ret);
if ((ret = __wt_config_subgets(
session, &cval, "clear", &sval)) == 0 && sval.val != 0)
- FLD_SET(conn->stat_flags, WT_CONN_STAT_CLEAR);
+ LF_SET(WT_CONN_STAT_CLEAR);
WT_RET_NOTFOUND_OK(ret);
if (set > 1)
WT_RET_MSG(session, EINVAL,
"only one statistics configuration value may be specified");
+
+ /* Configuring statistics clears any existing values. */
+ conn->stat_flags = flags;
+
return (0);
}
/*
- * __wt_conn_verbose_config --
+ * __wt_verbose_config --
* Set verbose configuration.
*/
int
-__wt_conn_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
+__wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_CONFIG_ITEM cval, sval;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ uint32_t flags;
static const struct {
const char *name;
uint32_t flag;
@@ -1092,14 +1099,14 @@ __wt_conn_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
conn = S2C(session);
- if ((ret = __wt_config_gets(session, cfg, "verbose", &cval)) != 0)
- return (ret == WT_NOTFOUND ? 0 : ret);
+ WT_RET(__wt_config_gets(session, cfg, "verbose", &cval));
+ flags = 0;
for (ft = verbtypes; ft->name != NULL; ft++) {
if ((ret = __wt_config_subgets(
session, &cval, ft->name, &sval)) == 0 && sval.val != 0) {
#ifdef HAVE_VERBOSE
- FLD_SET(conn->verbose, ft->flag);
+ LF_SET(ft->flag);
#else
WT_RET_MSG(session, EINVAL,
"Verbose option specified when WiredTiger built "
@@ -1107,11 +1114,11 @@ __wt_conn_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
"configure command and rebuild to include support "
"for verbose messages");
#endif
- } else
- FLD_CLR(conn->verbose, ft->flag);
-
+ }
WT_RET_NOTFOUND_OK(ret);
}
+
+ conn->verbose = flags;
return (0);
}
@@ -1319,7 +1326,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
if (cval.val)
F_SET(conn, WT_CONN_CKPT_SYNC);
- WT_ERR(__wt_conn_verbose_config(session, cfg));
+ WT_ERR(__wt_verbose_config(session, cfg));
WT_ERR(__wt_config_gets(session, cfg, "buffer_alignment", &cval));
if (cval.val == -1)
@@ -1399,13 +1406,12 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
*/
WT_ERR(__wt_connection_workers(session, cfg));
+ /* Merge the final configuration for later reconfiguration. */
+ WT_ERR(__wt_config_merge(session, cfg, &conn->cfg));
+
STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0);
*wt_connp = &conn->iface;
- /*
- * Destroying the connection on error will destroy our session handle,
- * cleanup using the session handle first, then discard the connection.
- */
err: __wt_buf_free(session, &cbbuf);
__wt_buf_free(session, &cubuf);
diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c
index 42e45a9c58b..2c21e5f1fbc 100644
--- a/src/conn/conn_cache.c
+++ b/src/conn/conn_cache.c
@@ -12,65 +12,52 @@
* Configure the underlying cache.
*/
int
-__wt_cache_config(WT_CONNECTION_IMPL *conn, const char *cfg[])
+__wt_cache_config(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_CACHE *cache;
WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
- session = conn->default_session;
+ conn = S2C(session);
cache = conn->cache;
/*
* If not using a shared cache configure the cache size, otherwise
* check for a reserved size.
*/
- if (!F_ISSET(conn, WT_CONN_CACHE_POOL) &&
- (ret = __wt_config_gets(session, cfg, "cache_size", &cval)) == 0)
+ if (!F_ISSET(conn, WT_CONN_CACHE_POOL)) {
+ WT_RET(__wt_config_gets(session, cfg, "cache_size", &cval));
conn->cache_size = (uint64_t)cval.val;
-
- if (F_ISSET(conn, WT_CONN_CACHE_POOL) &&
- (ret = __wt_config_gets(session, cfg,
- "shared_cache.reserve", &cval)) == 0 && cval.val != 0)
+ } else {
+ WT_RET(__wt_config_gets(
+ session, cfg, "shared_cache.reserve", &cval));
+ if (cval.val == 0)
+ WT_RET(__wt_config_gets(
+ session, cfg, "shared_cache.chunk", &cval));
cache->cp_reserved = (uint64_t)cval.val;
- else if ((ret = __wt_config_gets(session, cfg,
- "shared_cache.chunk", &cval)) == 0)
- cache->cp_reserved = (uint64_t)cval.val;
- WT_RET_NOTFOUND_OK(ret);
+ }
- if ((ret =
- __wt_config_gets(session, cfg, "eviction_target", &cval)) == 0)
- cache->eviction_target = (u_int)cval.val;
- WT_RET_NOTFOUND_OK(ret);
+ WT_RET(__wt_config_gets(session, cfg, "eviction_target", &cval));
+ cache->eviction_target = (u_int)cval.val;
- if ((ret =
- __wt_config_gets(session, cfg, "eviction_trigger", &cval)) == 0)
- cache->eviction_trigger = (u_int)cval.val;
- WT_RET_NOTFOUND_OK(ret);
+ WT_RET(__wt_config_gets(session, cfg, "eviction_trigger", &cval));
+ cache->eviction_trigger = (u_int)cval.val;
- if ((ret = __wt_config_gets(
- session, cfg, "eviction_dirty_target", &cval)) == 0)
- cache->eviction_dirty_target = (u_int)cval.val;
- WT_RET_NOTFOUND_OK(ret);
+ WT_RET(__wt_config_gets(session, cfg, "eviction_dirty_target", &cval));
+ cache->eviction_dirty_target = (u_int)cval.val;
/*
* The eviction thread configuration options include the main eviction
* thread and workers. Our implementation splits them out. Adjust for
* the difference when parsing the configuration.
*/
- if ((ret = __wt_config_gets(
- session, cfg, "eviction.threads_max", &cval)) == 0) {
- WT_ASSERT(session, cval.val > 0);
- conn->evict_workers_max = (u_int)cval.val - 1;
- }
- WT_RET_NOTFOUND_OK(ret);
- if ((ret = __wt_config_gets(
- session, cfg, "eviction.threads_min", &cval)) == 0) {
- WT_ASSERT(session, cval.val > 0);
- conn->evict_workers_min = (u_int)cval.val - 1;
- }
- WT_RET_NOTFOUND_OK(ret);
+ WT_RET(__wt_config_gets(session, cfg, "eviction.threads_max", &cval));
+ WT_ASSERT(session, cval.val > 0);
+ conn->evict_workers_max = (u_int)cval.val - 1;
+
+ WT_RET(__wt_config_gets(session, cfg, "eviction.threads_min", &cval));
+ WT_ASSERT(session, cval.val > 0);
+ conn->evict_workers_min = (u_int)cval.val - 1;
if (conn->evict_workers_min > conn->evict_workers_max)
WT_RET_MSG(session, EINVAL,
@@ -85,13 +72,13 @@ __wt_cache_config(WT_CONNECTION_IMPL *conn, const char *cfg[])
* Create the underlying cache.
*/
int
-__wt_cache_create(WT_CONNECTION_IMPL *conn, const char *cfg[])
+__wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
- WT_SESSION_IMPL *session;
- session = conn->default_session;
+ conn = S2C(session);
WT_ASSERT(session, conn->cache == NULL ||
(F_ISSET(conn, WT_CONN_CACHE_POOL) && conn->cache != NULL));
@@ -101,7 +88,7 @@ __wt_cache_create(WT_CONNECTION_IMPL *conn, const char *cfg[])
cache = conn->cache;
/* Use a common routine for run-time configuration options. */
- WT_RET(__wt_cache_config(conn, cfg));
+ WT_RET(__wt_cache_config(session, cfg));
/* Add the configured cache to the cache pool. */
if (F_ISSET(conn, WT_CONN_CACHE_POOL))
diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c
index ab7e2cc48ee..5148229db51 100644
--- a/src/conn/conn_cache_pool.c
+++ b/src/conn/conn_cache_pool.c
@@ -24,11 +24,11 @@ static int __cache_pool_assess(WT_SESSION_IMPL *, uint64_t *);
static int __cache_pool_balance(WT_SESSION_IMPL *);
/*
- * __wt_conn_cache_pool_config --
+ * __wt_cache_pool_config --
* Parse and setup the cache pool options.
*/
int
-__wt_conn_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
+__wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
{
WT_CACHE_POOL *cp;
WT_CONFIG_ITEM cval;
@@ -102,6 +102,7 @@ __wt_conn_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
pool_name);
cp = __wt_process.cache_pool;
+
/*
* The cache pool requires a reference count to avoid a race between
* configuration/open and destroy.
@@ -110,39 +111,54 @@ __wt_conn_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
++cp->refs;
/*
- * Retrieve the pool configuration options. The values are optional if
- * we are re-configuring.
+ * Cache pool configurations are optional when not creating. If
+ * values aren't being changed, retrieve the current value so that
+ * validation of settings works.
*/
- ret = __wt_config_gets(session, cfg, "shared_cache.size", &cval);
- if (reconfiguring && ret == WT_NOTFOUND)
- /* Not being changed; use the old value. */
- size = cp->size;
- else {
- WT_ERR(ret);
+ if (!created) {
+ if (__wt_config_gets(session, &cfg[1],
+ "shared_cache.size", &cval) == 0 && cval.val != 0)
+ size = (uint64_t)cval.val;
+ else
+ size = cp->size;
+ if (__wt_config_gets(session, &cfg[1],
+ "shared_cache.chunk", &cval) == 0 && cval.val != 0)
+ chunk = (uint64_t)cval.val;
+ else
+ chunk = cp->chunk;
+ } else {
+ /*
+ * The only time shared cache configuration uses default
+ * values is when we are creating the pool.
+ */
+ WT_ERR(__wt_config_gets(
+ session, cfg, "shared_cache.size", &cval));
+ WT_ASSERT(session, cval.val != 0);
size = (uint64_t)cval.val;
- }
- ret = __wt_config_gets(session, cfg, "shared_cache.chunk", &cval);
- if (reconfiguring && ret == WT_NOTFOUND)
- /* Not being changed; use the old value. */
- chunk = cp->chunk;
- else {
- WT_ERR(ret);
+ WT_ERR(__wt_config_gets(
+ session, cfg, "shared_cache.chunk", &cval));
+ WT_ASSERT(session, cval.val != 0);
chunk = (uint64_t)cval.val;
}
+
/*
* Retrieve the reserve size here for validation of configuration.
* Don't save it yet since the connections cache is not created if
* we are opening. Cache configuration is responsible for saving the
* setting.
+ * The different conditions when reserved size are set are:
+ * - It's part of the users configuration - use that value.
+ * - We are reconfiguring - keep the previous value.
+ * - We are joining a cache pool for the first time (including
+ * creating the pool) - use the chunk size; that's the default.
*/
- ret = __wt_config_gets(session, cfg, "shared_cache.reserve", &cval);
- if (reconfiguring && ret == WT_NOTFOUND)
- /* It is safe to access the cache during reconfigure. */
- reserve = conn->cache->cp_reserved;
- else {
- WT_ERR(ret);
+ if (__wt_config_gets(session, &cfg[1],
+ "shared_cache.reserve", &cval) == 0 && cval.val != 0)
reserve = (uint64_t)cval.val;
- }
+ else if (reconfiguring)
+ reserve = conn->cache->cp_reserved;
+ else
+ reserve = chunk;
/*
* Validate that size and reserve values don't cause the cache
@@ -163,6 +179,11 @@ __wt_conn_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
cp->size = size;
cp->chunk = chunk;
+ /* Wake up the cache pool server so any changes are noticed. */
+ if (reconfiguring)
+ WT_ERR(__wt_cond_signal(
+ session, __wt_process.cache_pool->cache_pool_cond));
+
WT_ERR(__wt_verbose(session, WT_VERB_SHARED_CACHE,
"Configured cache pool %s. Size: %" PRIu64
", chunk size: %" PRIu64, cp->name, cp->size, cp->chunk));
@@ -488,26 +509,46 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
if (cache->cp_skip_count > 0 && --cache->cp_skip_count > 0)
continue;
/*
- * TODO: Use __wt_cache_bytes_inuse instead of eviction_target
- * which doesn't do the right thing at the moment.
+ * If the entry is currently allocated less than the reserved
+ * size, increase it's allocation. This should only happen if:
+ * - It's the first time we've seen this member
+ * - The reserved size has been adjusted
*/
if (entry->cache_size < reserved) {
grew = 1;
adjusted = reserved - entry->cache_size;
+ /*
+ * Conditions for reducing the amount of resources for an
+ * entry:
+ * - If we are forcing and this entry has more than the
+ * minimum amount of space in use.
+ * - If the read pressure in this entry is below the
+ * threshold, other entries need more cache, the entry has
+ * more than the minimum space and there is no available
+ * space in the pool.
+ */
} else if ((force && entry->cache_size > reserved) ||
(read_pressure < WT_CACHE_POOL_REDUCE_THRESHOLD &&
highest > 1 && entry->cache_size > reserved &&
cp->currently_used >= cp->size)) {
+ grew = 0;
/*
- * If a connection isn't actively using it's assigned
- * cache and is assigned a reasonable amount - reduce
- * it.
+ * Shrink by a chunk size if that doesn't drop us
+ * below the reserved size.
*/
- grew = 0;
- if (entry->cache_size - cp->chunk > reserved)
+ if (entry->cache_size > cp->chunk + reserved)
adjusted = cp->chunk;
else
adjusted = entry->cache_size - reserved;
+ /*
+ * Conditions for increasing the amount of resources for an
+ * entry:
+ * - There was some activity across the pool
+ * - This entry is using less than the entire cache pool
+ * - The connection is using enough cache to require eviction
+ * - There is space available in the pool
+ * - Additional cache would benefit the connection
+ */
} else if (highest > 1 &&
entry->cache_size < cp->size &&
cache->bytes_inmem >=
@@ -527,6 +568,9 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
} else {
cache->cp_skip_count =
WT_CACHE_POOL_REDUCE_SKIPS;
+ WT_ASSERT(session,
+ entry->cache_size >= adjusted &&
+ cp->currently_used >= adjusted);
entry->cache_size -= adjusted;
cp->currently_used -= adjusted;
}
diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c
index d1ee647d08a..101877a3ddb 100644
--- a/src/conn/conn_ckpt.c
+++ b/src/conn/conn_ckpt.c
@@ -20,6 +20,7 @@ __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, int *startp)
WT_CONNECTION_IMPL *conn;
WT_DECL_ITEM(tmp);
WT_DECL_RET;
+ char *p;
conn = S2C(session);
@@ -40,19 +41,26 @@ __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, int *startp)
}
*startp = 1;
+ /*
+ * The application can specify a checkpoint name, which we ignore if
+ * it's our default.
+ */
WT_RET(__wt_config_gets(session, cfg, "checkpoint.name", &cval));
+ if (cval.len != 0 &&
+ !WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) {
+ WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len));
- if (!WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) {
WT_RET(__wt_scr_alloc(session, cval.len + 20, &tmp));
- strcpy((char *)tmp->data, "name=");
- strncat((char *)tmp->data, cval.str, cval.len);
- ret = __wt_strndup(session,
- tmp->data, strlen("name=") + cval.len, &conn->ckpt_config);
- __wt_scr_free(&tmp);
- WT_RET(ret);
+ WT_ERR(__wt_buf_fmt(
+ session, tmp, "name=%.*s", (int)cval.len, cval.str));
+ WT_ERR(__wt_strdup(session, tmp->data, &p));
+
+ __wt_free(session, conn->ckpt_config);
+ conn->ckpt_config = p;
}
- return (0);
+err: __wt_scr_free(&tmp);
+ return (ret);
}
/*
@@ -139,17 +147,19 @@ __ckpt_server_start(WT_CONNECTION_IMPL *conn)
* Configure and start the checkpoint server.
*/
int
-__wt_checkpoint_server_create(WT_CONNECTION_IMPL *conn, const char *cfg[])
+__wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[])
{
+ WT_CONNECTION_IMPL *conn;
int start;
+ conn = S2C(session);
start = 0;
/* If there is already a server running, shut it down. */
if (conn->ckpt_session != NULL)
WT_RET(__wt_checkpoint_server_destroy(conn));
- WT_RET(__ckpt_server_config(conn->default_session, cfg, &start));
+ WT_RET(__ckpt_server_config(session, cfg, &start));
if (start)
WT_RET(__ckpt_server_start(conn));
diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c
index 36c53133325..e4f0a6ddd73 100644
--- a/src/conn/conn_handle.c
+++ b/src/conn/conn_handle.c
@@ -45,12 +45,23 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
WT_RET(__wt_spin_init(session, &conn->dhandle_lock, "data handle"));
WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list"));
WT_RET(__wt_spin_init(session, &conn->hot_backup_lock, "hot backup"));
+ WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure"));
WT_RET(__wt_spin_init(session, &conn->schema_lock, "schema"));
WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS(conn), &conn->page_lock));
for (i = 0; i < WT_PAGE_LOCKS(conn); ++i)
WT_RET(
__wt_spin_init(session, &conn->page_lock[i], "btree page"));
+ /* Setup the spin locks for the LSM manager queues. */
+ WT_RET(__wt_spin_init(session,
+ &conn->lsm_manager.app_lock, "LSM application queue lock"));
+ WT_RET(__wt_spin_init(session,
+ &conn->lsm_manager.manager_lock, "LSM manager queue lock"));
+ WT_RET(__wt_spin_init(
+ session, &conn->lsm_manager.switch_lock, "LSM switch queue lock"));
+ WT_RET(__wt_cond_alloc(
+ session, "LSM worker cond", 0, &conn->lsm_manager.work_cond));
+
/*
* Generation numbers.
*
@@ -114,12 +125,14 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
__wt_spin_destroy(session, &conn->dhandle_lock);
__wt_spin_destroy(session, &conn->fh_lock);
__wt_spin_destroy(session, &conn->hot_backup_lock);
+ __wt_spin_destroy(session, &conn->reconfig_lock);
__wt_spin_destroy(session, &conn->schema_lock);
for (i = 0; i < WT_PAGE_LOCKS(conn); ++i)
__wt_spin_destroy(session, &conn->page_lock[i]);
__wt_free(session, conn->page_lock);
/* Free allocated memory. */
+ __wt_free(session, conn->cfg);
__wt_free(session, conn->home);
__wt_free(session, conn->error_prefix);
__wt_free(session, conn->sessions);
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index 0ecf48c6628..114e44ea193 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -166,13 +166,13 @@ err: __wt_err(session, ret, "log archive server error");
* Start the log subsystem and archive server thread.
*/
int
-__wt_logmgr_create(WT_CONNECTION_IMPL *conn, const char *cfg[])
+__wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
WT_LOG *log;
int run;
- session = conn->default_session;
+ conn = S2C(session);
/* Handle configuration. */
WT_RET(__logmgr_config(session, cfg, &run));
diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c
index 3a2f1cb51a4..956b944ec19 100644
--- a/src/conn/conn_open.c
+++ b/src/conn/conn_open.c
@@ -55,10 +55,10 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[])
WT_WRITE_BARRIER();
/* Connect to a cache pool. */
- WT_RET(__wt_conn_cache_pool_config(session, cfg));
+ WT_RET(__wt_cache_pool_config(session, cfg));
/* Create the cache. */
- WT_RET(__wt_cache_create(conn, cfg));
+ WT_RET(__wt_cache_create(session, cfg));
/* Initialize transaction support. */
WT_RET(__wt_txn_global_init(conn, cfg));
@@ -102,7 +102,7 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
WT_TRET(__wt_async_destroy(conn));
WT_TRET(__wt_lsm_manager_destroy(conn));
WT_TRET(__wt_checkpoint_server_destroy(conn));
- WT_TRET(__wt_statlog_destroy(conn, 1));
+ WT_TRET(__wt_statlog_destroy(session, 1));
WT_TRET(__wt_sweep_destroy(conn));
/* Close open data handles. */
@@ -225,20 +225,20 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[])
* Start the optional statistics thread. Start statistics first so that
* other optional threads can know if statistics are enabled or not.
*/
- WT_RET(__wt_statlog_create(conn, cfg));
+ WT_RET(__wt_statlog_create(session, cfg));
/* Start the optional async threads. */
- WT_RET(__wt_async_create(conn, cfg));
+ WT_RET(__wt_async_create(session, cfg));
/*
* Start the optional logging/archive thread.
* NOTE: The log manager must be started before checkpoints so that the
* checkpoint server knows if logging is enabled.
*/
- WT_RET(__wt_logmgr_create(conn, cfg));
+ WT_RET(__wt_logmgr_create(session, cfg));
/* Start the optional checkpoint thread. */
- WT_RET(__wt_checkpoint_server_create(conn, cfg));
+ WT_RET(__wt_checkpoint_server_create(session, cfg));
return (0);
}
diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c
index eaee410ad0a..fbd9b3835b4 100644
--- a/src/conn/conn_stat.c
+++ b/src/conn/conn_stat.c
@@ -19,6 +19,22 @@
#endif
/*
+ * __stat_sources_free --
+ * Free the array of statistics sources.
+ */
+static void
+__stat_sources_free(WT_SESSION_IMPL *session, char ***sources)
+{
+ char **p;
+
+ if ((p = (*sources)) != NULL) {
+ for (; *p != NULL; ++p)
+ __wt_free(session, *p);
+ __wt_free(session, *sources);
+ }
+}
+
+/*
* __wt_conn_stat_init --
* Initialize the per-connection statistics.
*/
@@ -41,8 +57,10 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, int *runp)
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
int cnt;
+ char **sources;
conn = S2C(session);
+ sources = NULL;
WT_RET(__wt_config_gets(session, cfg, "statistics_log.wait", &cval));
/* Only start the server if wait time is non-zero */
@@ -67,7 +85,7 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, int *runp)
;
WT_RET_NOTFOUND_OK(ret);
if (cnt != 0) {
- WT_RET(__wt_calloc_def(session, cnt + 1, &conn->stat_sources));
+ WT_RET(__wt_calloc_def(session, cnt + 1, &sources));
WT_RET(__wt_config_subinit(session, &objectconf, &cval));
for (cnt = 0;
(ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt) {
@@ -80,24 +98,28 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, int *runp)
*/
if (!WT_PREFIX_MATCH(k.str, "file:") &&
!WT_PREFIX_MATCH(k.str, "lsm:"))
- WT_RET_MSG(session, EINVAL,
+ WT_ERR_MSG(session, EINVAL,
"statistics_log sources configuration only "
"supports objects of type \"file\" or "
"\"lsm\"");
- WT_RET(__wt_strndup(session,
- k.str, k.len, &conn->stat_sources[cnt]));
+ WT_ERR(
+ __wt_strndup(session, k.str, k.len, &sources[cnt]));
}
- WT_RET_NOTFOUND_OK(ret);
+ WT_ERR_NOTFOUND_OK(ret);
+
+ conn->stat_sources = sources;
+ sources = NULL;
}
- WT_RET(__wt_config_gets(session, cfg, "statistics_log.path", &cval));
- WT_RET(__wt_nfilename(session, cval.str, cval.len, &conn->stat_path));
+ WT_ERR(__wt_config_gets(session, cfg, "statistics_log.path", &cval));
+ WT_ERR(__wt_nfilename(session, cval.str, cval.len, &conn->stat_path));
- WT_RET(__wt_config_gets(
+ WT_ERR(__wt_config_gets(
session, cfg, "statistics_log.timestamp", &cval));
- WT_RET(__wt_strndup(session, cval.str, cval.len, &conn->stat_format));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &conn->stat_format));
- return (0);
+err: __stat_sources_free(session, &sources);
+ return (ret);
}
/*
@@ -441,12 +463,12 @@ __statlog_start(WT_CONNECTION_IMPL *conn)
* Start the statistics server thread.
*/
int
-__wt_statlog_create(WT_CONNECTION_IMPL *conn, const char *cfg[])
+__wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
int start;
- session = conn->default_session;
+ conn = S2C(session);
start = 0;
/*
@@ -455,9 +477,9 @@ __wt_statlog_create(WT_CONNECTION_IMPL *conn, const char *cfg[])
* configuration changes - but that makes our lives easier.
*/
if (conn->stat_session != NULL)
- WT_RET(__wt_statlog_destroy(conn, 0));
+ WT_RET(__wt_statlog_destroy(session, 0));
- WT_RET_NOTFOUND_OK(__statlog_config(session, cfg, &start));
+ WT_RET(__statlog_config(session, cfg, &start));
if (start)
WT_RET(__statlog_start(conn));
@@ -469,14 +491,13 @@ __wt_statlog_create(WT_CONNECTION_IMPL *conn, const char *cfg[])
* Destroy the statistics server thread.
*/
int
-__wt_statlog_destroy(WT_CONNECTION_IMPL *conn, int is_close)
+__wt_statlog_destroy(WT_SESSION_IMPL *session, int is_close)
{
+ WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_SESSION *wt_session;
- WT_SESSION_IMPL *session;
- char **p;
- session = conn->default_session;
+ conn = S2C(session);
F_CLR(conn, WT_CONN_SERVER_STATISTICS);
if (conn->stat_tid_set) {
@@ -491,11 +512,7 @@ __wt_statlog_destroy(WT_CONNECTION_IMPL *conn, int is_close)
WT_TRET(__wt_cond_destroy(session, &conn->stat_cond));
- if ((p = conn->stat_sources) != NULL) {
- for (; *p != NULL; ++p)
- __wt_free(session, *p);
- __wt_free(session, conn->stat_sources);
- }
+ __stat_sources_free(session, &conn->stat_sources);
__wt_free(session, conn->stat_path);
__wt_free(session, conn->stat_format);
diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c
index e5a1d8a68b6..08129e668f5 100644
--- a/src/cursor/cur_file.c
+++ b/src/cursor/cur_file.c
@@ -10,29 +10,22 @@
/*
* WT_BTREE_CURSOR_SAVE_AND_RESTORE
* Save the cursor's key/value data/size fields, call an underlying btree
- * function, and then consistently handle failure and success.
+ * function, and then consistently handle failure and success.
*/
#define WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, f, ret) do { \
- const void *__key_data = (cursor)->key.data; \
- const void *__value_data = (cursor)->value.data; \
+ WT_ITEM __key_copy = (cursor)->key; \
uint64_t __recno = (cursor)->recno; \
- size_t __key_size = (cursor)->key.size; \
- size_t __value_size = (cursor)->value.size; \
+ WT_ITEM __value_copy = (cursor)->value; \
if (((ret) = (f)) == 0) { \
F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT); \
F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); \
- } else if ((ret) == WT_NOTFOUND) \
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); \
- else { \
+ } else { \
if (F_ISSET(cursor, WT_CURSTD_KEY_EXT)) { \
(cursor)->recno = __recno; \
- (cursor)->key.data = __key_data; \
- (cursor)->key.size = __key_size; \
- } \
- if (F_ISSET(cursor, WT_CURSTD_VALUE_EXT)) { \
- (cursor)->value.data = __value_data; \
- (cursor)->value.size = __value_size; \
+ WT_ITEM_SET((cursor)->key, __key_copy); \
} \
+ if (F_ISSET(cursor, WT_CURSTD_VALUE_EXT)) \
+ WT_ITEM_SET((cursor)->value, __value_copy); \
F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); \
} \
} while (0)
diff --git a/src/docs/Doxyfile b/src/docs/Doxyfile
index 792b255f4ab..5492905f7e9 100644
--- a/src/docs/Doxyfile
+++ b/src/docs/Doxyfile
@@ -268,7 +268,7 @@ OPTIMIZE_OUTPUT_VHDL = NO
# that for custom extensions you also need to set FILE_PATTERNS otherwise the
# files are not read by doxygen.
-EXTENSION_MAPPING =
+EXTENSION_MAPPING = in=C
# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all
# comments according to the Markdown format, which allows for more readable
diff --git a/src/docs/file-formats.dox b/src/docs/file-formats.dox
index 46865da4811..bc747433172 100644
--- a/src/docs/file-formats.dox
+++ b/src/docs/file-formats.dox
@@ -3,7 +3,8 @@
@section file_formats_formats File formats
WiredTiger supports two underlying file formats: row-store and
-column-store, both are key/value stores.
+column-store, where both are B+tree implementations of key/value stores.
+WiredTiger also supports @ref lsm, implemented as a tree of B+trees.
In a row-store, both keys and data are variable-length byte strings. In
a column-store, keys are 64-bit record numbers (key_format type 'r'),
@@ -28,14 +29,38 @@ deleting a value is the same as storing a value of 0. For the same
reason, storing a value of 0 will cause cursor scans to skip the record.
WiredTiger does not support duplicate data items: there can be only a
-single value for any given key, and applications are responsible for
-creating unique key/value pairs.
+single value associated with any given key, and applications are
+responsible for creating unique key/value pairs.
WiredTiger allocates space from the underlying files in block units.
The minimum file allocation unit WiredTiger supports is 512B and the
maximum file allocation unit is 512MB. File block offsets are 64-bit
(meaning the maximum file size is very, very large).
+@section file_formats_choice Choosing a file format
+
+The row-store format is the default choice for most applications. When
+the primary key is a record number, there are advantages to storing
+columns in separate files, or the underlying data is a set of bits,
+column-store format may be a better choice.
+
+Both row- and column-store formats can maintain high volumes of writes,
+but for data sets requiring sustained, extreme write throughput, @ref
+lsm are usually a better choice. For applications that do not require
+extreme write throughput, row- or column-store is likely to be a better
+choice because the read throughput is better than with LSM trees (an
+effect that becomes more pronounced as additional read threads are added).
+
+Applications with complex schemas may also benefit from using multiple
+storage formats, that is, using a combination of different formats in
+the database, and even in individual tables (for example, a sparse, wide
+table configured with a column-store primary, where indexes are stored
+in an LSM tree).
+
+Finally, as WiredTiger makes it easy to switch back-and-forth between
+storage configurations, it's usually worthwhile benchmarking possible
+configurations when there is any question.
+
@section file_formats_compression File formats and compression
Row-stores support four types of compression: key prefix compression,
diff --git a/src/docs/install.dox b/src/docs/install.dox
index f3f5094eb5d..eae566f4291 100644
--- a/src/docs/install.dox
+++ b/src/docs/install.dox
@@ -21,15 +21,14 @@ First, clone the repository:
git clone git://github.com/wiredtiger/wiredtiger.git
@endcode
-Second, run the \c build_posix/reconf script:
+Second, run \c autogen.sh to create the \c configure script:
@code
cd wiredtiger
-sh build_posix/reconf
+sh autogen.sh
@endcode
-This creates the \c configure script, and you can now proceed with @ref
-building.
+Now proceed with @ref building.
@section building Building WiredTiger
diff --git a/src/docs/lsm.dox b/src/docs/lsm.dox
index 0313862afdf..b71fccd7151 100644
--- a/src/docs/lsm.dox
+++ b/src/docs/lsm.dox
@@ -107,17 +107,10 @@ there are chunks in the tree for each cursor that is open on the LSM tree.
The number of hazard pointers is configured with the \c "hazard_max"
configuration key to ::wiredtiger_open.
-@subsection lsm_tombstones Empty values
-
-Internally, WiredTiger's LSM trees use an empty value to represent a
-record that has been removed (also known as a "tombstone"). For this
-reason, applications cannot store records in LSM trees with empty values.
-
@subsection lsm_checkpoints Named checkpoints
-Named checkpoints are not supported on LSM trees, and cursors cannot be opened
-with a non-empty \c "checkpoint" configuration.
-
-We intend to address these limitations in future releases.
+Named checkpoints are not supported on LSM trees, and cursors cannot be
+opened with a non-empty \c "checkpoint" configuration (that is, only the
+most recent standard checkpoint can be read).
*/
diff --git a/src/docs/programming.dox b/src/docs/programming.dox
index 59eeab7705d..4add19c833b 100644
--- a/src/docs/programming.dox
+++ b/src/docs/programming.dox
@@ -18,8 +18,8 @@ each of which is ordered by one or more columns.
<h2>Storage options</h2>
- @subpage schema
-- @subpage lsm
- @subpage file_formats
+- @subpage lsm
- @subpage compression
<h2>Programming notes</h2>
diff --git a/src/docs/spell.ok b/src/docs/spell.ok
index 95bdf58fc06..857f89cef05 100644
--- a/src/docs/spell.ok
+++ b/src/docs/spell.ok
@@ -88,6 +88,7 @@ ar
archiver
arg
async
+autogen
atomicity
autoconf
automake
@@ -339,7 +340,6 @@ realloc'd
recno
recnoN
recnum
-reconf
recoverability
recs
rectype
diff --git a/src/docs/tune-bulk-load.dox b/src/docs/tune-bulk-load.dox
index 9e89fb7ceea..8ee1061c76c 100644
--- a/src/docs/tune-bulk-load.dox
+++ b/src/docs/tune-bulk-load.dox
@@ -11,7 +11,9 @@ be used on newly created objects, and an object being bulk-loaded is not
accessible from other cursors.
Cursors configured for bulk-load only support the WT_CURSOR::insert and
-WT_CURSOR::close methods.
+WT_CURSOR::close methods. Bulk load inserts are non-transactional: they
+cannot be rolled back and ignore the transactional state of the WT_SESSION
+in which they are opened.
When bulk-loading row-store objects, keys must be loaded in sorted
order.
diff --git a/src/include/connection.h b/src/include/connection.h
index 03feef68e56..9af23f95cbf 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -70,9 +70,12 @@ struct __wt_connection_impl {
WT_SESSION_IMPL *default_session;
WT_SESSION_IMPL dummy_session;
+ const char *cfg; /* Connection configuration */
+
WT_SPINLOCK api_lock; /* Connection API spinlock */
WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */
WT_SPINLOCK fh_lock; /* File handle queue spinlock */
+ WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */
WT_SPINLOCK schema_lock; /* Schema operation spinlock */
/*
diff --git a/src/include/dhandle.h b/src/include/dhandle.h
index 9a05620c74c..5556627c74d 100644
--- a/src/include/dhandle.h
+++ b/src/include/dhandle.h
@@ -18,7 +18,7 @@
#define WT_SET_BTREE_IN_SESSION(s, b) ((s)->dhandle = b->dhandle)
#define WT_CLEAR_BTREE_IN_SESSION(s) ((s)->dhandle = NULL)
-#define WT_WITH_DHANDLE(s, d, e) do { \
+#define WT_WITH_DHANDLE(s, d, e) do { \
WT_DATA_HANDLE *__saved_dhandle = (s)->dhandle; \
(s)->dhandle = (d); \
e; \
diff --git a/src/include/extern.h b/src/include/extern.h
index a213a0e4bfa..9783de0a7a6 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -1,8 +1,8 @@
/* DO NOT EDIT: automatically built by dist/s_prototypes. */
extern void __wt_async_stats_update(WT_SESSION_IMPL *session);
-extern int __wt_async_create(WT_CONNECTION_IMPL *conn, const char *cfg[]);
-extern int __wt_async_reconfig(WT_CONNECTION_IMPL *conn, const char *cfg[]);
+extern int __wt_async_create(WT_SESSION_IMPL *session, const char *cfg[]);
+extern int __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_async_destroy(WT_CONNECTION_IMPL *conn);
extern int __wt_async_flush(WT_CONNECTION_IMPL *conn);
extern int __wt_async_new_op(WT_CONNECTION_IMPL *conn,
@@ -575,6 +575,9 @@ extern int __wt_config_check(WT_SESSION_IMPL *session,
extern int __wt_config_collapse( WT_SESSION_IMPL *session,
const char **cfg,
const char **config_ret);
+extern int __wt_config_merge( WT_SESSION_IMPL *session,
+ const char **cfg,
+ const char **config_ret);
extern int __wt_config_concat( WT_SESSION_IMPL *session,
const char **cfg,
const char **config_ret);
@@ -600,18 +603,16 @@ extern int __wt_conn_remove_compressor( WT_CONNECTION_IMPL *conn,
WT_NAMED_COMPRESSOR *ncomp);
extern int __wt_conn_remove_data_source( WT_CONNECTION_IMPL *conn,
WT_NAMED_DATA_SOURCE *ndsrc);
-extern int __wt_conn_verbose_config(WT_SESSION_IMPL *session,
- const char *cfg[]);
-extern int __wt_cache_config(WT_CONNECTION_IMPL *conn, const char *cfg[]);
-extern int __wt_cache_create(WT_CONNECTION_IMPL *conn, const char *cfg[]);
+extern int __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]);
+extern int __wt_cache_config(WT_SESSION_IMPL *session, const char *cfg[]);
+extern int __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]);
extern void __wt_cache_stats_update(WT_SESSION_IMPL *session);
extern int __wt_cache_destroy(WT_CONNECTION_IMPL *conn);
-extern int __wt_conn_cache_pool_config(WT_SESSION_IMPL *session,
- const char **cfg);
+extern int __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg);
extern int __wt_conn_cache_pool_open(WT_SESSION_IMPL *session);
extern int __wt_conn_cache_pool_destroy(WT_CONNECTION_IMPL *conn);
extern void *__wt_cache_pool_server(void *arg);
-extern int __wt_checkpoint_server_create(WT_CONNECTION_IMPL *conn,
+extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session,
const char *cfg[]);
extern int __wt_checkpoint_server_destroy(WT_CONNECTION_IMPL *conn);
extern int __wt_checkpoint_signal(WT_SESSION_IMPL *session, off_t logsize);
@@ -641,15 +642,15 @@ extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session,
extern int __wt_conn_dhandle_discard(WT_CONNECTION_IMPL *conn);
extern int __wt_connection_init(WT_CONNECTION_IMPL *conn);
extern int __wt_connection_destroy(WT_CONNECTION_IMPL *conn);
-extern int __wt_logmgr_create(WT_CONNECTION_IMPL *conn, const char *cfg[]);
+extern int __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_logmgr_destroy(WT_CONNECTION_IMPL *conn);
extern int __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]);
extern int __wt_connection_close(WT_CONNECTION_IMPL *conn);
extern int __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]);
extern void __wt_conn_stat_init(WT_SESSION_IMPL *session);
extern int __wt_statlog_log_one(WT_SESSION_IMPL *session);
-extern int __wt_statlog_create(WT_CONNECTION_IMPL *conn, const char *cfg[]);
-extern int __wt_statlog_destroy(WT_CONNECTION_IMPL *conn, int is_close);
+extern int __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]);
+extern int __wt_statlog_destroy(WT_SESSION_IMPL *session, int is_close);
extern int __wt_sweep_create(WT_CONNECTION_IMPL *conn);
extern int __wt_sweep_destroy(WT_CONNECTION_IMPL *conn);
extern int __wt_curbackup_open(WT_SESSION_IMPL *session,
@@ -929,6 +930,7 @@ extern int __wt_clsm_open(WT_SESSION_IMPL *session,
WT_CURSOR *owner,
const char *cfg[],
WT_CURSOR **cursorp);
+extern int __wt_lsm_manager_config(WT_SESSION_IMPL *session, const char **cfg);
extern int __wt_lsm_manager_start(WT_SESSION_IMPL *session);
extern void __wt_lsm_manager_free_work_unit( WT_SESSION_IMPL *session,
WT_LSM_WORK_UNIT *entry);
@@ -1010,8 +1012,10 @@ extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session,
int *),
const char *cfg[],
uint32_t open_flags);
-extern int __wt_lsm_get_chunk_to_flush( WT_SESSION_IMPL *session,
+extern int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session,
WT_LSM_TREE *lsm_tree,
+ int force,
+ int *last,
WT_LSM_CHUNK **chunkp);
extern int __wt_lsm_work_switch( WT_SESSION_IMPL *session,
WT_LSM_WORK_UNIT **entryp,
@@ -1375,7 +1379,10 @@ extern int __wt_schema_range_truncate( WT_SESSION_IMPL *session,
WT_CURSOR *stop);
extern WT_DATA_SOURCE *__wt_schema_get_source(WT_SESSION_IMPL *session,
const char *name);
-extern int __wt_schema_name_check(WT_SESSION_IMPL *session, const char *uri);
+extern int __wt_str_name_check(WT_SESSION_IMPL *session, const char *str);
+extern int __wt_name_check(WT_SESSION_IMPL *session,
+ const char *str,
+ size_t len);
extern int __wt_schema_worker(WT_SESSION_IMPL *session,
const char *uri,
int (*file_func)(WT_SESSION_IMPL *,
@@ -1604,6 +1611,9 @@ extern int __wt_txn_init(WT_SESSION_IMPL *session);
extern void __wt_txn_destroy(WT_SESSION_IMPL *session);
extern int __wt_txn_global_init(WT_CONNECTION_IMPL *conn, const char *cfg[]);
extern void __wt_txn_global_destroy(WT_CONNECTION_IMPL *conn);
+extern int __wt_checkpoint_name_ok(WT_SESSION_IMPL *session,
+ const char *name,
+ size_t len);
extern int __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
diff --git a/src/include/lsm.h b/src/include/lsm.h
index fceed7987a7..48984399acd 100644
--- a/src/include/lsm.h
+++ b/src/include/lsm.h
@@ -88,6 +88,8 @@ struct __wt_lsm_chunk {
#define WT_LSM_WORK_FLUSH 0x04 /* Flush a chunk to disk */
#define WT_LSM_WORK_MERGE 0x08 /* Look for a tree merge */
#define WT_LSM_WORK_SWITCH 0x10 /* Switch to a new in memory chunk */
+#define WT_LSM_WORK_FORCE 0x10000 /* Force last chunk flush */
+#define WT_LSM_WORK_MASK 0xffff /* Mask for work types */
/*
* WT_LSM_WORK_UNIT --
@@ -125,8 +127,6 @@ struct __wt_lsm_manager {
uint32_t lsm_workers; /* Current number of LSM workers */
uint32_t lsm_workers_max;
WT_LSM_WORKER_ARGS *lsm_worker_cookies;
-#define WT_LSM_MANAGER_RUNNING 0x01
- uint32_t flags;
};
/*
@@ -192,11 +192,12 @@ struct __wt_lsm_tree {
int freeing_old_chunks; /* Whether chunks are being freed */
uint32_t merge_aggressiveness; /* Increase amount of work per merge */
-#define WT_LSM_TREE_ACTIVE 0x01 /* Workers are active */
-#define WT_LSM_TREE_COMPACTING 0x02 /* Tree is being compacted */
-#define WT_LSM_TREE_NEED_SWITCH 0x04 /* A new chunk should be created */
-#define WT_LSM_TREE_OPEN 0x08 /* The tree is open */
-#define WT_LSM_TREE_THROTTLE 0x10 /* Throttle updates */
+#define WT_LSM_TREE_ACTIVE 0x01 /* Workers are active */
+#define WT_LSM_TREE_COMPACT_FLUSH 0x02 /* Flushed for compact */
+#define WT_LSM_TREE_COMPACTING 0x04 /* Tree being compacted */
+#define WT_LSM_TREE_NEED_SWITCH 0x08 /* New chunk needs creating */
+#define WT_LSM_TREE_OPEN 0x10 /* The tree is open */
+#define WT_LSM_TREE_THROTTLE 0x20 /* Throttle updates */
uint32_t flags;
#define WT_LSM_TREE_EXCLUSIVE 0x01 /* Tree is opened exclusively */
diff --git a/src/include/misc.h b/src/include/misc.h
index e50038b2c66..d28de81a6aa 100644
--- a/src/include/misc.h
+++ b/src/include/misc.h
@@ -186,6 +186,12 @@
((i)->mem != NULL && (i)->data >= (i)->mem && \
WT_PTRDIFF((i)->data, (i)->mem) < (i)->memsize)
+/* Copy the data and size fields of an item. */
+#define WT_ITEM_SET(dst, src) do { \
+ (dst).data = (src).data; \
+ (dst).size = (src).size; \
+} while (0)
+
/*
* In diagnostic mode we track the locations from which hazard pointers and
* scratch buffers were acquired.
diff --git a/src/include/schema.h b/src/include/schema.h
index 038404f5ea5..e24a19b03ca 100644
--- a/src/include/schema.h
+++ b/src/include/schema.h
@@ -65,43 +65,37 @@ struct __wt_table {
*/
#define WT_COLGROUPS(t) WT_MAX((t)->ncolgroups, 1)
+/*
+ * WT_WITH_SCHEMA_LOCK --
+ * Acquire the schema lock, perform an operation, drop the lock.
+ */
#define WT_WITH_SCHEMA_LOCK(session, op) do { \
- int __schema_locked = 0; \
- WT_DECL_SPINLOCK_ID(__id); /* Must appear last */ \
WT_ASSERT(session, \
F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) || \
!F_ISSET(session, WT_SESSION_NO_SCHEMA_LOCK)); \
- while (!F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)) \
- if (session->skip_schema_lock || __wt_spin_trylock( \
- session, &S2C(session)->schema_lock, &__id) == 0) { \
- F_SET(session, WT_SESSION_SCHEMA_LOCKED); \
- __schema_locked = 1; \
- } else \
- __wt_yield(); \
- (op); \
- if (__schema_locked) { \
+ if (F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)) { \
+ (op); \
+ } else { \
+ __wt_spin_lock(session, &S2C(session)->schema_lock); \
+ F_SET(session, WT_SESSION_SCHEMA_LOCKED); \
+ (op); \
+ __wt_spin_unlock(session, &S2C(session)->schema_lock); \
F_CLR(session, WT_SESSION_SCHEMA_LOCKED); \
- if (!session->skip_schema_lock) \
- __wt_spin_unlock( \
- session, &S2C(session)->schema_lock); \
} \
} while (0)
-/* Drop the schema lock, and re-acquire after operation. */
+/*
+ * WT_WITHOUT_SCHEMA_LOCK --
+ * Drop the schema lock, perform an operation, re-acquire the lock.
+ */
#define WT_WITHOUT_SCHEMA_LOCK(session, op) do { \
- WT_DECL_SPINLOCK_ID(__id); /* Must appear last */ \
- if (!F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)) \
- (op); \
- else { \
+ if (F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)) { \
__wt_spin_unlock(session, &S2C(session)->schema_lock); \
F_CLR(session, WT_SESSION_SCHEMA_LOCKED); \
(op); \
- while (!F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)) { \
- if (__wt_spin_trylock(session, \
- &S2C(session)->schema_lock, &__id) == 0) \
- F_SET(session, WT_SESSION_SCHEMA_LOCKED);\
- else \
- __wt_yield(); \
- } \
+ __wt_spin_lock(session, &S2C(session)->schema_lock); \
+ F_SET(session, WT_SESSION_SCHEMA_LOCKED); \
+ } else { \
+ (op); \
} \
} while (0)
diff --git a/src/include/session.h b/src/include/session.h
index 5d566f8b62d..31d58ff61e5 100644
--- a/src/include/session.h
+++ b/src/include/session.h
@@ -113,8 +113,6 @@ struct __wt_session_impl {
int (*reconcile_cleanup)(WT_SESSION_IMPL *);
int compaction; /* Compaction did some work */
- int skip_schema_lock; /* Another thread holds the schema lock
- * on our behalf */
/*
* The split stash memory and hazard information persist past session
diff --git a/src/include/txn.i b/src/include/txn.i
index 3854429f8e4..81559bfe490 100644
--- a/src/include/txn.i
+++ b/src/include/txn.i
@@ -179,7 +179,7 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd)
/*
* __wt_txn_autocommit_check --
- * If an auto-commit transaction is required, start one.
+ * If an auto-commit transaction is required, start one.
*/
static inline int
__wt_txn_autocommit_check(WT_SESSION_IMPL *session)
@@ -195,23 +195,20 @@ __wt_txn_autocommit_check(WT_SESSION_IMPL *session)
}
/*
- * __wt_txn_current_id --
- * Get the current transaction ID.
- */
-static inline uint64_t
-__wt_txn_current_id(WT_SESSION_IMPL *session)
-{
- return (S2C(session)->txn_global.current);
-}
-
-/*
* __wt_txn_new_id --
* Allocate a new transaction ID.
*/
static inline uint64_t
__wt_txn_new_id(WT_SESSION_IMPL *session)
{
- return WT_ATOMIC_ADD(S2C(session)->txn_global.current, 1);
+ /*
+ * We want the global value to lead the allocated values, so that any
+ * allocated transaction ID eventually becomes globally visible. When
+ * there are no transactions running, the oldest_id will reach the
+ * global current ID, so we want post-increment semantics. Our atomic
+ * add primitive does pre-increment, so adjust the result here.
+ */
+ return WT_ATOMIC_ADD(S2C(session)->txn_global.current, 1) - 1;
}
/*
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index bbc5de7f13f..df2f7bba271 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -700,7 +700,7 @@ __clsm_get_current(
multiple = 0;
WT_FORALL_CURSORS(clsm, c, i) {
- if (!F_ISSET(c, WT_CURSTD_KEY_SET))
+ if (!F_ISSET(c, WT_CURSTD_KEY_INT))
continue;
if (current == NULL) {
current = c;
@@ -823,7 +823,7 @@ retry: /*
if (F_ISSET(clsm, WT_CLSM_MULTIPLE)) {
check = 0;
WT_FORALL_CURSORS(clsm, c, i) {
- if (!F_ISSET(c, WT_CURSTD_KEY_SET))
+ if (!F_ISSET(c, WT_CURSTD_KEY_INT))
continue;
if (check) {
WT_ERR(WT_LSM_CURCMP(session,
@@ -906,7 +906,7 @@ retry: /*
if (F_ISSET(clsm, WT_CLSM_MULTIPLE)) {
check = 0;
WT_FORALL_CURSORS(clsm, c, i) {
- if (!F_ISSET(c, WT_CURSTD_KEY_SET))
+ if (!F_ISSET(c, WT_CURSTD_KEY_INT))
continue;
if (check) {
WT_ERR(WT_LSM_CURCMP(session,
@@ -958,7 +958,7 @@ __clsm_reset_cursors(WT_CURSOR_LSM *clsm, WT_CURSOR *skip)
WT_FORALL_CURSORS(clsm, c, i) {
if (c == skip)
continue;
- if (F_ISSET(c, WT_CURSTD_KEY_SET))
+ if (F_ISSET(c, WT_CURSTD_KEY_INT))
WT_TRET(c->reset(c));
}
diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c
index 7052d16c451..91affe53ef3 100644
--- a/src/lsm/lsm_manager.c
+++ b/src/lsm/lsm_manager.c
@@ -14,8 +14,28 @@ static int __lsm_manager_worker_setup(WT_SESSION_IMPL *);
static void * __lsm_worker_manager(void *);
/*
+ * __wt_lsm_manager_config --
+ * Re-configure the LSM manager.
+ */
+int
+__wt_lsm_manager_config(WT_SESSION_IMPL *session, const char **cfg)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_CONFIG_ITEM cval;
+
+ conn = S2C(session);
+
+ WT_RET(__wt_config_gets(
+ session, cfg, "lsm_manager.worker_thread_max", &cval));
+ if (cval.val)
+ conn->lsm_manager.lsm_workers_max = (uint32_t)cval.val;
+ return (0);
+}
+
+/*
* __wt_lsm_manager_start --
- * Start the LSM management infrastructure.
+ * Start the LSM management infrastructure. Our queues and locks were
+ * initialized when the connection was initialized.
*/
int
__wt_lsm_manager_start(WT_SESSION_IMPL *session)
@@ -54,8 +74,6 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session)
WT_ERR(__wt_thread_create(
session, &cookies[0].tid, __lsm_worker_manager, &cookies[0]));
- while (!F_ISSET(manager, WT_LSM_MANAGER_RUNNING))
- __wt_yield();
F_SET(S2C(session), WT_CONN_SERVER_LSM);
if (0) {
@@ -104,57 +122,59 @@ __wt_lsm_manager_destroy(WT_CONNECTION_IMPL *conn)
manager = &conn->lsm_manager;
removed = 0;
- if (manager->lsm_worker_cookies == NULL)
- return (0);
-
- /* Wait for the server to notice and wrap up. */
- while (F_ISSET(conn, WT_CONN_SERVER_LSM))
- __wt_yield();
+ if (manager->lsm_worker_cookies != NULL) {
+ /* Wait for the server to notice and wrap up. */
+ while (F_ISSET(conn, WT_CONN_SERVER_LSM))
+ __wt_yield();
- /* Clean up open LSM handles. */
- ret = __wt_lsm_tree_close_all(conn->default_session);
+ /* Clean up open LSM handles. */
+ ret = __wt_lsm_tree_close_all(conn->default_session);
- WT_TRET(__wt_thread_join(session, manager->lsm_worker_cookies[0].tid));
- manager->lsm_worker_cookies[0].tid = 0;
+ WT_TRET(__wt_thread_join(
+ session, manager->lsm_worker_cookies[0].tid));
+ manager->lsm_worker_cookies[0].tid = 0;
+
+ /* Release memory from any operations left on the queue. */
+ for (current = TAILQ_FIRST(&manager->switchqh);
+ current != NULL; current = next) {
+ next = TAILQ_NEXT(current, q);
+ TAILQ_REMOVE(&manager->switchqh, current, q);
+ ++removed;
+ __wt_lsm_manager_free_work_unit(session, current);
+ }
+ for (current = TAILQ_FIRST(&manager->appqh);
+ current != NULL; current = next) {
+ next = TAILQ_NEXT(current, q);
+ TAILQ_REMOVE(&manager->appqh, current, q);
+ ++removed;
+ __wt_lsm_manager_free_work_unit(session, current);
+ }
+ for (current = TAILQ_FIRST(&manager->managerqh);
+ current != NULL; current = next) {
+ next = TAILQ_NEXT(current, q);
+ TAILQ_REMOVE(&manager->managerqh, current, q);
+ ++removed;
+ __wt_lsm_manager_free_work_unit(session, current);
+ }
- /* Release memory from any operations left on the queue. */
- for (current = TAILQ_FIRST(&manager->switchqh);
- current != NULL; current = next) {
- next = TAILQ_NEXT(current, q);
- TAILQ_REMOVE(&manager->switchqh, current, q);
- ++removed;
- __wt_lsm_manager_free_work_unit(session, current);
- }
- for (current = TAILQ_FIRST(&manager->appqh);
- current != NULL; current = next) {
- next = TAILQ_NEXT(current, q);
- TAILQ_REMOVE(&manager->appqh, current, q);
- ++removed;
- __wt_lsm_manager_free_work_unit(session, current);
- }
- for (current = TAILQ_FIRST(&manager->managerqh);
- current != NULL; current = next) {
- next = TAILQ_NEXT(current, q);
- TAILQ_REMOVE(&manager->managerqh, current, q);
- ++removed;
- __wt_lsm_manager_free_work_unit(session, current);
- }
+ /* Close all LSM worker sessions. */
+ for (i = 0; i < manager->lsm_workers_max; i++) {
+ wt_session =
+ &manager->lsm_worker_cookies[i].session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ }
- /* Close all LSM worker sessions. */
- for (i = 0; i < manager->lsm_workers_max; i++) {
- wt_session = &manager->lsm_worker_cookies[i].session->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
+ WT_STAT_FAST_CONN_INCRV(session,
+ lsm_work_units_discarded, removed);
+ __wt_free(session, manager->lsm_worker_cookies);
}
- WT_STAT_FAST_CONN_INCRV(session, lsm_work_units_discarded, removed);
-
+ /* Free resources that are allocated in connection initialize */
__wt_spin_destroy(session, &manager->switch_lock);
__wt_spin_destroy(session, &manager->app_lock);
__wt_spin_destroy(session, &manager->manager_lock);
WT_TRET(__wt_cond_destroy(session, &manager->work_cond));
- __wt_free(session, manager->lsm_worker_cookies);
-
return (ret);
}
@@ -184,10 +204,10 @@ __lsm_manager_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
if (lsm_tree->merge_aggressiveness > old_aggressive)
WT_RET(__wt_verbose(session, WT_VERB_LSM,
- "LSM merge got aggressive (%u), "
- "%u / %" PRIu64,
- lsm_tree->merge_aggressiveness, stallms,
- lsm_tree->chunk_fill_ms));
+ "LSM merge %s got aggressive (%u), "
+ "%u / %" PRIu64,
+ lsm_tree->name, lsm_tree->merge_aggressiveness, stallms,
+ lsm_tree->chunk_fill_ms));
return (0);
}
@@ -208,16 +228,6 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session)
WT_ASSERT(session, manager->lsm_workers == 1);
- /* Setup the spin locks for the queues. */
- WT_RET(__wt_spin_init(
- session, &manager->app_lock, "LSM application queue lock"));
- WT_RET(__wt_spin_init(
- session, &manager->manager_lock, "LSM manager queue lock"));
- WT_RET(__wt_spin_init(
- session, &manager->switch_lock, "LSM switch queue lock"));
- WT_RET(__wt_cond_alloc(
- session, "LSM worker cond", 0, &manager->work_cond));
-
worker_args = &manager->lsm_worker_cookies[1];
worker_args->work_cond = manager->work_cond;
worker_args->id = manager->lsm_workers++;
@@ -252,12 +262,6 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session)
F_SET(worker_args, WT_LSM_WORK_MERGE);
WT_RET(__wt_lsm_worker_start(session, worker_args));
}
- /*
- * Yield to give new threads a chance to get started. Indicate that
- * we have allocated resources and are running now.
- */
- __wt_yield();
- F_SET(manager, WT_LSM_MANAGER_RUNNING);
return (0);
}
@@ -538,7 +542,7 @@ __wt_lsm_manager_push_entry(
(void)WT_ATOMIC_ADD(lsm_tree->queue_ref, 1);
WT_STAT_FAST_CONN_INCR(session, lsm_work_units_created);
- switch (type) {
+ switch (type & WT_LSM_WORK_MASK) {
case WT_LSM_WORK_SWITCH:
__wt_spin_lock(session, &manager->switch_lock);
TAILQ_INSERT_TAIL(&manager->switchqh, entry, q);
diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c
index bf758abd6b1..363fe77b93e 100644
--- a/src/lsm/lsm_merge.c
+++ b/src/lsm/lsm_merge.c
@@ -61,6 +61,7 @@ __wt_lsm_merge(
uint32_t aggressive, generation, max_gap, max_gen, max_level, start_id;
uint64_t insert_count, record_count, chunk_size;
u_int dest_id, end_chunk, i, merge_max, merge_min, nchunks, start_chunk;
+ u_int verb;
int create_bloom, locked, tret;
const char *cfg[3];
const char *drop_cfg[] =
@@ -72,16 +73,17 @@ __wt_lsm_merge(
dest = src = NULL;
locked = 0;
start_id = 0;
- aggressive = lsm_tree->merge_aggressiveness;
/*
- * If the tree is open read-only be very aggressive. Otherwise, we can
- * spend a long time waiting for merges to start in read-only
- * applications.
+ * If the tree is open read-only or we are compacting, be very
+ * aggressive. Otherwise, we can spend a long time waiting for merges
+ * to start in read-only applications.
*/
- if (!lsm_tree->modified)
+ if (!lsm_tree->modified ||
+ F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING))
lsm_tree->merge_aggressiveness = 10;
+ aggressive = lsm_tree->merge_aggressiveness;
merge_max = (aggressive > 5) ? 100 : lsm_tree->merge_min;
merge_min = (aggressive > 5) ? 2 : lsm_tree->merge_min;
max_gap = (aggressive + 4) / 5;
@@ -249,10 +251,22 @@ __wt_lsm_merge(
/* Allocate an ID for the merge. */
dest_id = WT_ATOMIC_ADD(lsm_tree->last, 1);
- WT_RET(__wt_verbose(session, WT_VERB_LSM,
- "Merging chunks %u-%u into %u (%" PRIu64 " records)"
- ", generation %" PRIu32,
- start_chunk, end_chunk, dest_id, record_count, generation));
+ /*
+ * We only want to do the chunk loop if we're running with verbose,
+ * so we wrap these statements in the conditional. Avoid the loop
+ * in the normal path.
+ */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_LSM)) {
+ WT_RET(__wt_verbose(session, WT_VERB_LSM,
+ "Merging %s chunks %u-%u into %u (%" PRIu64 " records)"
+ ", generation %" PRIu32,
+ lsm_tree->name,
+ start_chunk, end_chunk, dest_id, record_count, generation));
+ for (verb = start_chunk; verb <= end_chunk; verb++)
+ WT_RET(__wt_verbose(session, WT_VERB_LSM,
+ "%s: Chunk[%u] id %u",
+ lsm_tree->name, verb, lsm_tree->chunk[verb]->id));
+ }
WT_RET(__wt_calloc_def(session, 1, &chunk));
chunk->id = dest_id;
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index 30f97821a8c..fac47ff0465 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -733,7 +733,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
/* Set the switch transaction in the previous chunk, if necessary. */
if (chunk != NULL && chunk->switch_txn == WT_TXN_NONE)
- chunk->switch_txn = __wt_txn_current_id(session);
+ chunk->switch_txn = __wt_txn_new_id(session);
/* Update the throttle time. */
__wt_lsm_tree_throttle(session, lsm_tree, 0);
@@ -744,8 +744,8 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
nchunks + 1, &lsm_tree->chunk));
WT_ERR(__wt_verbose(session, WT_VERB_LSM,
- "Tree switch to: %" PRIu32 ", checkpoint throttle %ld, "
- "merge throttle %ld",
+ "Tree %s switch to: %" PRIu32 ", checkpoint throttle %ld, "
+ "merge throttle %ld", lsm_tree->name,
new_id, lsm_tree->ckpt_throttle, lsm_tree->merge_throttle));
WT_ERR(__wt_calloc_def(session, 1, &chunk));
@@ -1007,9 +1007,10 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip)
WT_LSM_CHUNK *chunk;
WT_LSM_TREE *lsm_tree;
time_t begin, end;
- int i, compacting, locked;
+ int i, compacting, flushing, locked, ref;
- compacting = locked = 0;
+ compacting = flushing = locked = ref = 0;
+ chunk = NULL;
/*
* This function is applied to all matching sources: ignore anything
* that is not an LSM tree.
@@ -1028,6 +1029,19 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip)
WT_ERR(__wt_seconds(session, &begin));
+ /*
+ * Compacting has two distinct phases.
+ * 1. All in-memory chunks up to and including the current
+ * current chunk must be flushed. Normally, the flush code
+ * does not flush the last, in-use chunk, so we set a force
+ * flag to include that last chunk. We monitor the state of the
+ * last chunk and periodically push another forced flush work
+ * unit until it is complete.
+ * 2. After all flushing is done, we move onto the merging
+ * phase for compaction. Again, we monitor the state and
+ * continue to push merge work units until all merging is done.
+ */
+
/* Lock the tree: single-thread compaction. */
WT_ERR(__wt_lsm_tree_lock(session, lsm_tree, 1));
locked = 1;
@@ -1036,12 +1050,13 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip)
lsm_tree->merge_throttle = 0;
lsm_tree->merge_aggressiveness = 0;
- /* If another thread started compacting this tree, we're done. */
- if (F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING))
+ /* If another thread started a compact on this tree, we're done. */
+ if (F_ISSET(lsm_tree,
+ WT_LSM_TREE_COMPACT_FLUSH | WT_LSM_TREE_COMPACTING))
goto err;
- compacting = 1;
- F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);
+ flushing = 1;
+ F_SET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH);
/*
* Set the switch transaction on the current chunk, if it
@@ -1049,26 +1064,67 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip)
* can be flushed by the checkpoint worker.
*/
if (lsm_tree->nchunks > 0 &&
- (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL &&
- chunk->switch_txn == WT_TXN_NONE)
- chunk->switch_txn = __wt_txn_current_id(session);
+ (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL) {
+ if (chunk->switch_txn == WT_TXN_NONE)
+ chunk->switch_txn = __wt_txn_new_id(session);
+ /*
+ * If we have a chunk, we want to look for it to be on-disk.
+ * So we need to add a reference to keep it available.
+ */
+ (void)WT_ATOMIC_ADD(chunk->refcnt, 1);
+ ref = 1;
+ }
locked = 0;
WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree));
- /* Make sure the in-memory chunk gets flushed but not switched. */
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_FLUSH, lsm_tree));
+ if (chunk != NULL)
+ WT_ERR(__wt_verbose(session, WT_VERB_LSM,
+ "Compact force flush %s flags 0x%" PRIx32
+ " chunk %u flags 0x%"
+ PRIx32, name, lsm_tree->flags, chunk->id, chunk->flags));
+ /* Make sure the in-memory chunk gets flushed but not switched. */
+ WT_ERR(__wt_lsm_manager_push_entry(session,
+ WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, lsm_tree));
/* Wait for the work unit queues to drain. */
while (F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) {
/*
+ * The flush flag is cleared when the chunk has been flushed.
+ * Continue to push forced flushes until the chunk is on disk.
+ * Once it is on disk move to the compacting phase.
+ */
+ if (flushing && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH)) {
+ if (chunk != NULL &&
+ !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
+ WT_ERR(__wt_verbose(session, WT_VERB_LSM,
+ "Compact flush retry %s chunk %u",
+ name, chunk->id));
+ F_SET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH);
+ WT_ERR(__wt_lsm_manager_push_entry(session,
+ WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE,
+ lsm_tree));
+ } else {
+ if (ref) {
+ WT_ASSERT(session, chunk != NULL);
+ WT_ERR(__wt_verbose(session,
+ WT_VERB_LSM,
+ "Compact flush done %s chunk %u",
+ name, chunk->id));
+ (void)WT_ATOMIC_SUB(chunk->refcnt, 1);
+ }
+ flushing = ref = 0;
+ compacting = 1;
+ F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);
+ }
+ }
+ /*
* The compacting flag is cleared when no merges can be done.
* Ensure that we push through some aggressive merges before
* stopping otherwise we might not do merges that would
* span chunks with different generations.
*/
- if (!F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) {
+ if (compacting && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) {
if (lsm_tree->merge_aggressiveness < 10) {
F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);
lsm_tree->merge_aggressiveness = 10;
@@ -1086,21 +1142,29 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip)
* done. If we are pushing merges, make sure they are
* aggressive, to avoid duplicating effort.
*/
+ if (compacting)
#define COMPACT_PARALLEL_MERGES 5
- for (i = lsm_tree->queue_ref;
- i < COMPACT_PARALLEL_MERGES; i++) {
- lsm_tree->merge_aggressiveness = 10;
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_MERGE, lsm_tree));
- }
+ for (i = lsm_tree->queue_ref;
+ i < COMPACT_PARALLEL_MERGES; i++) {
+ lsm_tree->merge_aggressiveness = 10;
+ WT_ERR(__wt_lsm_manager_push_entry(
+ session, WT_LSM_WORK_MERGE, lsm_tree));
+ }
}
-err: if (locked)
- WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree));
- /* Ensure the compacting flag is cleared if we set it. */
+err:
+ /* Ensure anything we set is cleared. */
+ if (flushing)
+ F_CLR(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH);
+ if (ref)
+ (void)WT_ATOMIC_SUB(chunk->refcnt, 1);
if (compacting) {
F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING);
lsm_tree->merge_aggressiveness = 0;
+ if (locked)
+ WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree));
}
+ WT_ERR(__wt_verbose(session, WT_VERB_LSM,
+ "Compact %s complete, return %d", name, ret));
__wt_lsm_tree_release(session, lsm_tree);
return (ret);
diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c
index 5c96c82f84c..eb791f98f5f 100644
--- a/src/lsm/lsm_work_unit.c
+++ b/src/lsm/lsm_work_unit.c
@@ -66,22 +66,39 @@ err: WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree));
* Find and pin a chunk in the LSM tree that is likely to need flushing.
*/
int
-__wt_lsm_get_chunk_to_flush(
- WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK **chunkp)
+__wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session,
+ WT_LSM_TREE *lsm_tree, int force, int *last, WT_LSM_CHUNK **chunkp)
{
- u_int i;
+ u_int i, end;
*chunkp = NULL;
+ *last = 0;
WT_ASSERT(session, lsm_tree->queue_ref > 0);
WT_RET(__wt_lsm_tree_lock(session, lsm_tree, 0));
if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE))
return (__wt_lsm_tree_unlock(session, lsm_tree));
- for (i = 0; i < lsm_tree->nchunks - 1; i++) {
+ /*
+ * Normally we don't want to force out the last chunk. But if we're
+ * doing a forced flush, likely from a compact call, then we want
+ * to include the final chunk.
+ */
+ end = force ? lsm_tree->nchunks : lsm_tree->nchunks - 1;
+ for (i = 0; i < end; i++) {
if (!F_ISSET(lsm_tree->chunk[i], WT_LSM_CHUNK_ONDISK)) {
(void)WT_ATOMIC_ADD(lsm_tree->chunk[i]->refcnt, 1);
+ WT_RET(__wt_verbose(session, WT_VERB_LSM,
+ "Flush%s: return chunk %u of %u: %s",
+ force ? " w/ force" : "", i, end - 1,
+ lsm_tree->chunk[i]->uri));
*chunkp = lsm_tree->chunk[i];
+ /*
+ * Let the caller know if this is the last chunk we
+ * could have selected or an earlier one.
+ */
+ if (i == end - 1)
+ *last = 1;
break;
}
}
@@ -216,16 +233,25 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
else
WT_RET_MSG(session, ret, "discard handle");
}
- if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK))
+ if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
+ WT_RET(__wt_verbose(session, WT_VERB_LSM,
+ "LSM worker %s already on disk",
+ chunk->uri));
return (0);
+ }
/* Stop if a running transaction needs the chunk. */
__wt_txn_update_oldest(session);
if (chunk->switch_txn == WT_TXN_NONE ||
- !__wt_txn_visible_all(session, chunk->switch_txn))
+ !__wt_txn_visible_all(session, chunk->switch_txn)) {
+ WT_RET(__wt_verbose(session, WT_VERB_LSM,
+ "LSM worker %s: running transaction, return",
+ chunk->uri));
return (0);
+ }
- WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker flushing"));
+ WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker flushing %s",
+ chunk->uri));
/*
* Flush the file before checkpointing: this is the expensive part in
@@ -248,7 +274,8 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
}
WT_RET(ret);
- WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointing"));
+ WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointing %s",
+ chunk->uri));
WT_WITH_SCHEMA_LOCK(session,
ret = __wt_schema_worker(session, chunk->uri,
@@ -289,7 +316,8 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
/* Make sure we aren't pinning a transaction ID. */
__wt_txn_release_snapshot(session);
- WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointed"));
+ WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointed %s",
+ chunk->uri));
/*
* Schedule a bloom filter create for our newly flushed chunk */
if (!FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF))
diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c
index 0bb6cfb9c08..1f2b76ba720 100644
--- a/src/lsm/lsm_worker.c
+++ b/src/lsm/lsm_worker.c
@@ -32,6 +32,7 @@ __lsm_worker_general_op(
WT_DECL_RET;
WT_LSM_CHUNK *chunk;
WT_LSM_WORK_UNIT *entry;
+ int force, last;
*completed = 0;
if (!F_ISSET(cookie, WT_LSM_WORK_FLUSH) &&
@@ -43,16 +44,34 @@ __lsm_worker_general_op(
cookie->flags, &entry)) != 0 || entry == NULL)
return (ret);
- if (entry->flags == WT_LSM_WORK_FLUSH) {
- WT_ERR(__wt_lsm_get_chunk_to_flush(
- session, entry->lsm_tree, &chunk));
+ if ((entry->flags & WT_LSM_WORK_MASK) == WT_LSM_WORK_FLUSH) {
+ force = F_ISSET(entry, WT_LSM_WORK_FORCE);
+ F_CLR(entry, WT_LSM_WORK_FORCE);
+ last = 0;
+ WT_ERR(__wt_lsm_get_chunk_to_flush(session,
+ entry->lsm_tree, force, &last, &chunk));
+ /*
+ * If we got a chunk to flush, checkpoint it.
+ */
if (chunk != NULL) {
+ WT_ERR(__wt_verbose(session, WT_VERB_LSM,
+ "Flush%s%s chunk %d %s",
+ force ? " w/ force" : "",
+ last ? " last" : "",
+ chunk->id, chunk->uri));
ret = __wt_lsm_checkpoint_chunk(
session, entry->lsm_tree, chunk);
WT_ASSERT(session, chunk->refcnt > 0);
(void)WT_ATOMIC_SUB(chunk->refcnt, 1);
WT_ERR(ret);
}
+ /*
+ * If we flushed the last chunk for a compact, clear the
+ * flag so compact knows that is complete.
+ */
+ if (last && force &&
+ F_ISSET(entry->lsm_tree, WT_LSM_TREE_COMPACT_FLUSH))
+ F_CLR(entry->lsm_tree, WT_LSM_TREE_COMPACT_FLUSH);
} else if (entry->flags == WT_LSM_WORK_DROP)
WT_ERR(__wt_lsm_free_chunks(session, entry->lsm_tree));
else if (entry->flags == WT_LSM_WORK_BLOOM) {
@@ -93,7 +112,8 @@ __lsm_worker(void *arg)
(ret = __wt_lsm_manager_pop_entry(
session, WT_LSM_WORK_SWITCH, &entry)) == 0 &&
entry != NULL)
- WT_ERR(__wt_lsm_work_switch(session, &entry, &progress));
+ WT_ERR(
+ __wt_lsm_work_switch(session, &entry, &progress));
/* Flag an error if the pop failed. */
WT_ERR(ret);
diff --git a/src/schema/schema_util.c b/src/schema/schema_util.c
index 90e5fb42dc1..263f56f1c41 100644
--- a/src/schema/schema_util.c
+++ b/src/schema/schema_util.c
@@ -23,11 +23,11 @@ __wt_schema_get_source(WT_SESSION_IMPL *session, const char *name)
}
/*
- * __wt_schema_name_check --
+ * __wt_str_name_check --
* Disallow any use of the WiredTiger name space.
*/
int
-__wt_schema_name_check(WT_SESSION_IMPL *session, const char *uri)
+__wt_str_name_check(WT_SESSION_IMPL *session, const char *str)
{
const char *name, *sep;
int skipped;
@@ -37,7 +37,7 @@ __wt_schema_name_check(WT_SESSION_IMPL *session, const char *uri)
* "bad" if the application truncated the metadata file. Skip any
* leading URI prefix, check and then skip over a table name.
*/
- name = uri;
+ name = str;
for (skipped = 0; skipped < 2; skipped++) {
if ((sep = strchr(name, ':')) == NULL)
break;
@@ -62,3 +62,23 @@ __wt_schema_name_check(WT_SESSION_IMPL *session, const char *uri)
return (0);
}
+
+/*
+ * __wt_name_check --
+ * Disallow any use of the WiredTiger name space.
+ */
+int
+__wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len)
+{
+ WT_DECL_RET;
+ WT_DECL_ITEM(tmp);
+
+ WT_RET(__wt_scr_alloc(session, len, &tmp));
+
+ WT_ERR(__wt_buf_fmt(session, tmp, "%.*s", (int)len, str));
+
+ ret = __wt_str_name_check(session, tmp->data);
+
+err: __wt_scr_free(&tmp);
+ return (ret);
+}
diff --git a/src/session/session_api.c b/src/session/session_api.c
index eff89231d48..d4dc26f6c49 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -362,7 +362,7 @@ __session_create(WT_SESSION *wt_session, const char *uri, const char *config)
WT_UNUSED(cfg);
/* Disallow objects in the WiredTiger name space. */
- WT_ERR(__wt_schema_name_check(session, uri));
+ WT_ERR(__wt_str_name_check(session, uri));
/*
* Type configuration only applies to tables, column groups and indexes.
@@ -430,8 +430,8 @@ __session_rename(WT_SESSION *wt_session,
SESSION_API_CALL(session, rename, config, cfg);
/* Disallow objects in the WiredTiger name space. */
- WT_ERR(__wt_schema_name_check(session, uri));
- WT_ERR(__wt_schema_name_check(session, newuri));
+ WT_ERR(__wt_str_name_check(session, uri));
+ WT_ERR(__wt_str_name_check(session, newuri));
WT_WITH_SCHEMA_LOCK(session,
ret = __wt_schema_rename(session, uri, newuri, cfg));
@@ -451,7 +451,7 @@ __session_compact(WT_SESSION *wt_session, const char *uri, const char *config)
session = (WT_SESSION_IMPL *)wt_session;
/* Disallow objects in the WiredTiger name space. */
- WT_RET(__wt_schema_name_check(session, uri));
+ WT_RET(__wt_str_name_check(session, uri));
if (!WT_PREFIX_MATCH(uri, "colgroup:") &&
!WT_PREFIX_MATCH(uri, "file:") &&
@@ -477,7 +477,7 @@ __session_drop(WT_SESSION *wt_session, const char *uri, const char *config)
SESSION_API_CALL(session, drop, config, cfg);
/* Disallow objects in the WiredTiger name space. */
- WT_ERR(__wt_schema_name_check(session, uri));
+ WT_ERR(__wt_str_name_check(session, uri));
WT_WITH_SCHEMA_LOCK(session,
ret = __wt_schema_drop(session, uri, cfg));
@@ -539,7 +539,7 @@ __session_truncate(WT_SESSION *wt_session,
if (uri != NULL) {
/* Disallow objects in the WiredTiger name space. */
- WT_ERR(__wt_schema_name_check(session, uri));
+ WT_ERR(__wt_str_name_check(session, uri));
WT_WITH_SCHEMA_LOCK(session,
ret = __wt_schema_truncate(session, uri, cfg));
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 71f1c8bb2ae..0bebce927fe 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -8,6 +8,30 @@
#include "wt_internal.h"
/*
+ * __wt_checkpoint_name_ok --
+ * Complain if the checkpoint name isn't acceptable.
+ */
+int
+__wt_checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len)
+{
+ /* Check for characters we don't want to see in a metadata file. */
+ WT_RET(__wt_name_check(session, name, len));
+
+ /*
+ * The internal checkpoint name is special, applications aren't allowed
+ * to use it. Be aggressive and disallow any matching prefix, it makes
+ * things easier when checking in other places.
+ */
+ if (len < strlen(WT_CHECKPOINT))
+ return (0);
+ if (!WT_PREFIX_MATCH(name, WT_CHECKPOINT))
+ return (0);
+
+ WT_RET_MSG(session, EINVAL,
+ "the checkpoint name \"%s\" is reserved", WT_CHECKPOINT);
+}
+
+/*
* __checkpoint_name_check --
* Check for an attempt to name a checkpoint that includes anything
* other than a file object.
@@ -75,9 +99,11 @@ __checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[],
target_list = 0;
- /* Flag if this is a named checkpoint. */
- WT_ERR(__wt_config_gets(session, cfg, "name", &cval));
+ /* Flag if this is a named checkpoint, and check if the name is OK. */
+ WT_RET(__wt_config_gets(session, cfg, "name", &cval));
named = cval.len != 0;
+ if (named)
+ WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len));
/* Step through the targets and optionally operate on each one. */
WT_ERR(__wt_config_gets(session, cfg, "target", &cval));
@@ -413,27 +439,6 @@ err: /*
}
/*
- * __ckpt_name_ok --
- * Complain if our reserved checkpoint name is used.
- */
-static int
-__ckpt_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len)
-{
- /*
- * The internal checkpoint name is special, applications aren't allowed
- * to use it. Be aggressive and disallow any matching prefix, it makes
- * things easier when checking in other places.
- */
- if (len < strlen(WT_CHECKPOINT))
- return (0);
- if (!WT_PREFIX_MATCH(name, WT_CHECKPOINT))
- return (0);
-
- WT_RET_MSG(session, EINVAL,
- "the checkpoint name \"%s\" is reserved", WT_CHECKPOINT);
-}
-
-/*
* __drop --
* Drop all checkpoints with a specific name.
*/
@@ -575,7 +580,7 @@ __checkpoint_worker(
if (cval.len == 0)
name = WT_CHECKPOINT;
else {
- WT_ERR(__ckpt_name_ok(session, cval.str, cval.len));
+ WT_ERR(__wt_checkpoint_name_ok(session, cval.str, cval.len));
WT_ERR(__wt_strndup(session, cval.str, cval.len, &name_alloc));
name = name_alloc;
}
@@ -588,12 +593,12 @@ __checkpoint_worker(
WT_ERR(__wt_config_subinit(session, &dropconf, &cval));
while ((ret =
__wt_config_next(&dropconf, &k, &v)) == 0) {
- /* Disallow the reserved checkpoint name. */
+ /* Disallow unsafe checkpoint names. */
if (v.len == 0)
- WT_ERR(__ckpt_name_ok(
+ WT_ERR(__wt_checkpoint_name_ok(
session, k.str, k.len));
else
- WT_ERR(__ckpt_name_ok(
+ WT_ERR(__wt_checkpoint_name_ok(
session, v.str, v.len));
if (v.len == 0)
diff --git a/test/format/config.c b/test/format/config.c
index 509dc7684ec..519c5db7890 100644
--- a/test/format/config.c
+++ b/test/format/config.c
@@ -243,26 +243,32 @@ config_compression(void)
cp = config_find("compression", strlen("compression"));
if (!(cp->flags & C_PERM)) {
cstr = "compression=none";
- switch (MMRAND(1, 10)) {
- case 1: case 2: case 3: /* 30% */
+ switch (MMRAND(1, 20)) {
+ case 1: case 2: case 3: /* 30% no compression */
+ case 4: case 5: case 6:
break;
- case 4: case 5: /* 20% */
+ case 7: case 8: case 9: case 10: /* 20% bzip */
if (access(BZIP_PATH, R_OK) == 0)
cstr = "compression=bzip";
break;
- case 6: /* 10% */
+ case 11: /* 5% bzip-raw */
if (access(BZIP_PATH, R_OK) == 0)
cstr = "compression=bzip-raw";
break;
- case 7: case 8: /* 20% */
+ case 12: case 13: case 14: case 15: /* 20% snappy */
if (access(SNAPPY_PATH, R_OK) == 0)
cstr = "compression=snappy";
break;
- case 9: case 10: /* 20% */
+ case 16: case 17: case 18: case 19: /* 20% zlib */
if (access(ZLIB_PATH, R_OK) == 0)
cstr = "compression=zlib";
break;
+ case 20: /* 5% zlib-no-raw */
+ if (access(ZLIB_PATH, R_OK) == 0)
+ cstr = "compression=zlib-noraw";
+ break;
}
+
config_single(cstr, 0);
}
@@ -281,6 +287,7 @@ config_compression(void)
die(0, "snappy library not found or not readable");
break;
case COMPRESS_ZLIB:
+ case COMPRESS_ZLIB_NO_RAW:
if (access(ZLIB_PATH, R_OK) != 0)
die(0, "zlib library not found or not readable");
break;
@@ -549,6 +556,8 @@ config_map_compression(const char *s, u_int *vp)
*vp = COMPRESS_SNAPPY;
else if (strcmp(s, "zlib") == 0)
*vp = COMPRESS_ZLIB;
+ else if (strcmp(s, "zlib-noraw") == 0)
+ *vp = COMPRESS_ZLIB_NO_RAW;
else
die(EINVAL, "illegal compression configuration: %s", s);
}
diff --git a/test/format/config.h b/test/format/config.h
index a32df3de95c..9852fafabf7 100644
--- a/test/format/config.h
+++ b/test/format/config.h
@@ -115,7 +115,8 @@ static CONFIG c[] = {
C_BOOL, 10, 0, 0, &g.c_compact, NULL },
{ "compression",
- "type of compression (none | bzip | bzip-raw | lzo | snappy | zlib)",
+ "type of compression "
+ "(none | bzip | bzip-raw | lzo | snappy | zlib | zlib-noraw)",
C_IGNORE|C_STRING, 1, 5, 5, NULL, &g.c_compression },
{ "data_extend",
diff --git a/test/format/format.h b/test/format/format.h
index 0e45a28b3ef..1f2b363e9a4 100644
--- a/test/format/format.h
+++ b/test/format/format.h
@@ -205,6 +205,7 @@ typedef struct {
#define COMPRESS_LZO 4
#define COMPRESS_SNAPPY 5
#define COMPRESS_ZLIB 6
+#define COMPRESS_ZLIB_NO_RAW 7
u_int c_compression_flag; /* Compression flag value */
#define ISOLATION_RANDOM 1
diff --git a/test/format/ops.c b/test/format/ops.c
index 8c5a75e57a3..e38b75f0deb 100644
--- a/test/format/ops.c
+++ b/test/format/ops.c
@@ -220,21 +220,13 @@ ops(void *arg)
/*
* We can't checkpoint or swap sessions/cursors while in a
* transaction, resolve any running transaction.
- *
- * Reset the cursor regardless: we may block waiting for a lock
- * and there is no reason to keep pages pinned.
*/
- if (cnt == ckpt_op || cnt == session_op) {
- if (intxn) {
- if ((ret = session->commit_transaction(
- session, NULL)) != 0)
- die(ret, "session.commit_transaction");
- ++tinfo->commit;
- intxn = 0;
- }
- if (cursor != NULL &&
- (ret = cursor->reset(cursor)) != 0)
- die(ret, "cursor.reset");
+ if (intxn && (cnt == ckpt_op || cnt == session_op)) {
+ if ((ret = session->commit_transaction(
+ session, NULL)) != 0)
+ die(ret, "session.commit_transaction");
+ ++tinfo->commit;
+ intxn = 0;
}
/* Open up a new session and cursors. */
@@ -372,13 +364,6 @@ ops(void *arg)
if (g.append_cnt >= g.append_max)
goto skip_insert;
- /*
- * Reset the standard cursor so it doesn't keep
- * pages pinned.
- */
- if ((ret = cursor->reset(cursor)) != 0)
- die(ret, "cursor.reset");
-
/* Insert, then reset the insert cursor. */
if (col_insert(
cursor_insert, &key, &value, &keyno))
@@ -431,6 +416,10 @@ skip_insert: if (col_update(cursor, &key, &value, keyno))
if (read_row(cursor, &key, keyno))
goto deadlock;
+ /* Reset the cursor: there is no reason to keep pages pinned. */
+ if (cursor != NULL && (ret = cursor->reset(cursor)) != 0)
+ die(ret, "cursor.reset");
+
/*
* If we're in the transaction, commit 40% of the time and
* rollback 10% of the time.
diff --git a/test/format/wts.c b/test/format/wts.c
index 1a83fa92894..e495956fd2e 100644
--- a/test/format/wts.c
+++ b/test/format/wts.c
@@ -272,6 +272,10 @@ wts_create(void)
p += snprintf(p, (size_t)(end - p),
",block_compressor=\"zlib\"");
break;
+ case COMPRESS_ZLIB_NO_RAW:
+ p += snprintf(p, (size_t)(end - p),
+ ",block_compressor=\"zlib-noraw\"");
+ break;
}
/* Configure Btree internal key truncation. */
diff --git a/test/suite/test_checkpoint01.py b/test/suite/test_checkpoint01.py
index 153ea015cf5..ab4dbe18bd6 100644
--- a/test/suite/test_checkpoint01.py
+++ b/test/suite/test_checkpoint01.py
@@ -308,8 +308,8 @@ class test_checkpoint_last(wttest.WiredTigerTestCase):
# Check we can't use the reserved name as an application checkpoint name.
-class test_checkpoint_last_name(wttest.WiredTigerTestCase):
- def test_checkpoint_last_name(self):
+class test_checkpoint_illegal_name(wttest.WiredTigerTestCase):
+ def test_checkpoint_illegal_name(self):
simple_populate(self, "file:checkpoint", 'key_format=S', 100)
msg = '/the checkpoint name.*is reserved/'
for conf in (
@@ -324,6 +324,12 @@ class test_checkpoint_last_name(wttest.WiredTigerTestCase):
'drop=(to=WiredTigerCheckpointX)'):
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.checkpoint(conf), msg)
+ msg = '/WiredTiger objects should not include grouping/'
+ for conf in (
+ 'name=check{point',
+ 'name=check\\point'):
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.checkpoint(conf), msg)
# Check we can't name checkpoints that include LSM tables.