diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2017-03-24 16:00:44 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2017-03-24 16:00:44 +1100 |
commit | 16be8160ea3682f18df9bcc6d5addda0c0114137 (patch) | |
tree | b3460592742413f28654c5cf02e596f807903f22 | |
parent | cc2f15f595b16479affd73791c207da334453bcc (diff) | |
parent | e4edaa7b73ca8583506f23a0c6fe701d6213d836 (diff) | |
download | mongo-16be8160ea3682f18df9bcc6d5addda0c0114137.tar.gz |
Merge branch 'develop' into mongodb-3.6
136 files changed, 2577 insertions, 1570 deletions
diff --git a/.gitignore b/.gitignore index c7b3ade9e87..4611f2aa98c 100644 --- a/.gitignore +++ b/.gitignore @@ -90,24 +90,28 @@ _wiredtiger.pyd **/examples/c/ex_pack **/examples/c/ex_process **/examples/c/ex_schema -**/examples/c/ex_scope **/examples/c/ex_stat **/examples/c/ex_sync **/examples/c/ex_thread **/test/bloom/t **/test/checkpoint/t -**/test/csuite/test_wt1965_col_efficiency -**/test/csuite/test_wt2246_col_append -**/test/csuite/test_wt2323_join_visibility -**/test/csuite/test_wt2403_lsm_workload -**/test/csuite/test_wt2447_join_main_table -**/test/csuite/test_wt2535_insert_race -**/test/csuite/test_wt2592_join_schema -**/test/csuite/test_wt2695_checksum -**/test/csuite/test_wt2719_reconfig -**/test/csuite/test_wt2834_join_bloom_fix -**/test/csuite/test_wt2853_perf -**/test/csuite/test_wt2999_join_extractor +**/test_scope +**/test_wt1965_col_efficiency +**/test_wt2246_col_append +**/test_wt2323_join_visibility +**/test_wt2403_lsm_workload +**/test_wt2447_join_main_table +**/test_wt2535_insert_race +**/test_wt2592_join_schema +**/test_wt2695_checksum +**/test_wt2719_reconfig +**/test_wt2834_join_bloom_fix +**/test_wt2853_perf +**/test_wt2909_checkpoint_integrity +**/test_wt2999_join_extractor +**/test_wt3120_filesys +**/test_wt3135_search_near_collator +**/test_wt3184_dup_index_collator **/test/cursor_order/cursor_order **/test/fops/t **/test/format/s_dumpcmp diff --git a/SConstruct b/SConstruct index e9e72630b11..b397f662be7 100644 --- a/SConstruct +++ b/SConstruct @@ -376,7 +376,6 @@ examples = [ "ex_pack", "ex_process", "ex_schema", - "ex_scope", "ex_stat", "ex_thread", ] diff --git a/bench/wtperf/runners/many-table-stress.wtperf b/bench/wtperf/runners/many-table-stress.wtperf index 51d0bb0dd9d..6cf1d5d2696 100644 --- a/bench/wtperf/runners/many-table-stress.wtperf +++ b/bench/wtperf/runners/many-table-stress.wtperf @@ -1,7 +1,7 @@ # Create a set of tables with uneven distribution of data conn_config="cache_size=1G,eviction=(threads_max=8),file_manager=(close_idle_time=100000),checkpoint=(wait=20,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000" table_config="type=file" -table_count=5000 +table_count=2000 icount=0 random_range=1000000000 pareto=10 diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 7f5e5ad3373..772dedac8c8 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -32,9 +32,6 @@ #define DEFAULT_HOME "WT_TEST" #define DEFAULT_MONITOR_DIR "WT_TEST" -static const char * const debug_cconfig = ""; -static const char * const debug_tconfig = ""; - static void *checkpoint_worker(void *); static int drop_all_tables(WTPERF *); static int execute_populate(WTPERF *); @@ -1655,6 +1652,9 @@ close_reopen(WTPERF *wtperf) opts = wtperf->opts; + if (opts->in_memory) + return (0); + if (!opts->readonly && !opts->reopen_connection) return (0); /* @@ -2566,9 +2566,9 @@ main(int argc, char *argv[]) __wt_stream_set_line_buffer(stdout); /* Concatenate non-default configuration strings. */ - if ((opts->verbose > 1 && strlen(debug_cconfig) != 0) || - user_cconfig != NULL || opts->session_count_idle > 0 || - wtperf->compress_ext != NULL || wtperf->async_config != NULL) { + if (user_cconfig != NULL || opts->session_count_idle > 0 || + wtperf->compress_ext != NULL || wtperf->async_config != NULL || + opts->in_memory) { req_len = 20; req_len += wtperf->async_config != NULL ? strlen(wtperf->async_config) : 0; @@ -2583,8 +2583,8 @@ main(int argc, char *argv[]) opts->session_count_idle + wtperf->workers_cnt + opts->populate_threads + 10); } + req_len += opts->in_memory ? strlen("in_memory=true") : 0; req_len += user_cconfig != NULL ? strlen(user_cconfig) : 0; - req_len += debug_cconfig != NULL ? strlen(debug_cconfig) : 0; cc_buf = dmalloc(req_len); pos = 0; @@ -2603,6 +2603,12 @@ main(int argc, char *argv[]) append_comma, wtperf->compress_ext); append_comma = ","; } + if (opts->in_memory) { + pos += (size_t)snprintf( + cc_buf + pos, req_len - pos, "%s%s", + append_comma, "in_memory=true"); + append_comma = ","; + } if (sess_cfg != NULL && strlen(sess_cfg) != 0) { pos += (size_t)snprintf( cc_buf + pos, req_len - pos, "%s%s", @@ -2615,23 +2621,18 @@ main(int argc, char *argv[]) append_comma, user_cconfig); append_comma = ","; } - if (opts->verbose > 1 && strlen(debug_cconfig) != 0) - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, debug_cconfig); if (strlen(cc_buf) != 0 && (ret = config_opt_name_value(wtperf, "conn_config", cc_buf)) != 0) goto err; } - if ((opts->verbose > 1 && strlen(debug_tconfig) != 0) || opts->index || + if (opts->index || user_tconfig != NULL || wtperf->compress_table != NULL) { req_len = 20; req_len += wtperf->compress_table != NULL ? strlen(wtperf->compress_table) : 0; req_len += opts->index ? strlen(INDEX_COL_NAMES) : 0; req_len += user_tconfig != NULL ? strlen(user_tconfig) : 0; - req_len += debug_tconfig != NULL ? strlen(debug_tconfig) : 0; tc_buf = dmalloc(req_len); pos = 0; @@ -2655,10 +2656,6 @@ main(int argc, char *argv[]) append_comma, user_tconfig); append_comma = ","; } - if (opts->verbose > 1 && strlen(debug_tconfig) != 0) - pos += (size_t)snprintf( - tc_buf + pos, req_len - pos, "%s%s", - append_comma, debug_tconfig); if (strlen(tc_buf) != 0 && (ret = config_opt_name_value(wtperf, "table_config", tc_buf)) != 0) diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i index 63cef4c28fb..90f70457407 100644 --- a/bench/wtperf/wtperf_opt.i +++ b/bench/wtperf/wtperf_opt.i @@ -110,6 +110,8 @@ DEF_OPT_AS_UINT32(database_count, 1, DEF_OPT_AS_BOOL(drop_tables, 0, "Whether to drop all tables at the end of the run, and report time taken" " to do the drop.") +DEF_OPT_AS_BOOL(in_memory, 0, + "Whether to create the database in-memory.") DEF_OPT_AS_UINT32(icount, 5000, "number of records to initially populate. If multiple tables are " "configured the count is spread evenly across all tables.") diff --git a/build_posix/aclocal/strict.m4 b/build_posix/aclocal/strict.m4 index c107dd017d7..659867fa69e 100644 --- a/build_posix/aclocal/strict.m4 +++ b/build_posix/aclocal/strict.m4 @@ -31,6 +31,7 @@ AC_DEFUN([AM_GCC_WARNINGS], [ w="$w -Wstrict-prototypes" w="$w -Wswitch-enum" w="$w -Wundef" + w="$w -Wuninitialized" w="$w -Wunreachable-code" w="$w -Wunsafe-loop-optimizations" w="$w -Wunused" @@ -66,6 +67,10 @@ AC_DEFUN([AM_CLANG_WARNINGS], [ # w="$w -Wno-error=cast-qual" w="$w -Wno-cast-qual" + # On Centos 7.3.1611, system header files aren't compatible with + # -Wdisabled-macro-expansion. + w="$w -Wno-disabled-macro-expansion" + case "$1" in *Apple*clang*version*4.1*) # Apple clang has its own numbering system, and older OS X diff --git a/dist/api_err.py b/dist/api_err.py index 82f961a4ac9..bd379ac8d70 100644 --- a/dist/api_err.py +++ b/dist/api_err.py @@ -82,7 +82,7 @@ for line in open('../src/include/wiredtiger.in', 'r'): ''.join('\n * ' + l for l in textwrap.wrap( textwrap.dedent(err.long_desc).strip(), 77)) + '\n' if err.long_desc else '')) - tfile.write('#define\t%s\t%d\n' % (err.name, err.value)) + tfile.write('#define\t%s\t(%d)\n' % (err.name, err.value)) if 'undoc' in err.flags: tfile.write('/*! @endcond */\n') tfile.write('/*\n') diff --git a/dist/s_string.ok b/dist/s_string.ok index e033f77327f..1f7f7d9fd3a 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -63,10 +63,12 @@ CPUs CRC CSV CStream +CURFILE CURSORs CURSTD CallsCustDate Castagnoli +CentOS Checkpointing Checksum Checksums @@ -223,8 +225,10 @@ MEMALIGN MERCHANTABILITY METADATA MONGODB +MOVEFILE MRXB MRXBOPC +MSDN MSVC MULTI MULTIBLOCK @@ -238,8 +242,7 @@ Metadata Mewhort Mitzenmacher MongoDB -MoveFile -MoveFileW +MoveFileExW Multi MultiByteToWideChar Multithreaded @@ -1147,6 +1150,7 @@ sw sx sy sys +syscall sz t's tV diff --git a/dist/s_void b/dist/s_void index 90425d5a718..249f043d029 100755 --- a/dist/s_void +++ b/dist/s_void @@ -137,7 +137,7 @@ for f in `find bench ext src test -name '*.[ci]'`; do # form of return assignment or call. file_parse $f | sed -e 's/return ([^)]*); }$//' \ - -e '/[A-Z]*_API_CALL[A-Z_]*(/d' \ + -e '/[_A-Z]*_API_CALL[_A-Z]*(/d' \ -e '/WT_CURSOR_NEEDKEY(/d' \ -e '/WT_CURSOR_NEEDVALUE(/d' \ -e '/WT_ERR[A-Z_]*(/d' \ @@ -166,7 +166,7 @@ for f in `find bench ext src test -name '*.[ci]'`; do file_parse $f | grep 'WT_DECL_RET' | sed -e '/ret =/d' \ - -e '/API_END_RET/d' \ + -e '/[_A-Z]*_API_CALL[_A-Z]*(/d' \ -e '/WT_CURSOR_NEEDKEY/d' \ -e '/WT_CURSOR_NEEDVALUE/d' \ -e '/WT_ERR/d' \ diff --git a/examples/c/Makefile.am b/examples/c/Makefile.am index d5305eec5c8..20936661e06 100644 --- a/examples/c/Makefile.am +++ b/examples/c/Makefile.am @@ -20,7 +20,6 @@ noinst_PROGRAMS = \ ex_pack \ ex_process \ ex_schema \ - ex_scope \ ex_stat \ ex_sync \ ex_thread diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c index 8a1533011b2..82620673fe1 100644 --- a/examples/c/ex_all.c +++ b/examples/c/ex_all.c @@ -848,8 +848,8 @@ my_compare(WT_COLLATOR *collator, WT_SESSION *session, p1 = (const char *)value1->data; p2 = (const char *)value2->data; - while (*p1 != '\0' && *p1 == *p2) - p1++, p2++; + for (; *p1 != '\0' && *p1 == *p2; ++p1, ++p2) + ; *cmp = (int)*p2 - (int)*p1; return (0); diff --git a/examples/c/ex_scope.c b/examples/c/ex_scope.c deleted file mode 100644 index 795ad85d57b..00000000000 --- a/examples/c/ex_scope.c +++ /dev/null @@ -1,217 +0,0 @@ -/*- - * Public Domain 2014-2016 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * ex_scope.c - * demonstrates the scope of buffers holding cursor keys and values. - */ -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <wiredtiger.h> - -#ifdef _WIN32 -/* snprintf is not supported on <= VS2013 */ -#define snprintf _snprintf -#endif - -static const char *home; - -static int -cursor_scope_ops(WT_CURSOR *cursor) -{ - struct { - const char *op; - const char *key; - const char *value; - int (*apply)(WT_CURSOR *); - } *op, ops[] = { - { "insert", "key1", "value1", cursor->insert, }, - { "update", "key1", "value2", cursor->update, }, - { "search", "key1", "value2", cursor->search, }, - { "remove", "key1", "value2", cursor->remove, }, - { NULL, NULL, NULL, NULL } - }; - WT_SESSION *session; - const char *key, *value; - char keybuf[10], valuebuf[10]; - int ret; - - session = cursor->session; - - for (op = ops; op->key != NULL; op++) { - key = value = NULL; - - /*! [cursor scope operation] */ - (void)snprintf(keybuf, sizeof(keybuf), "%s", op->key); - cursor->set_key(cursor, keybuf); - (void)snprintf(valuebuf, sizeof(valuebuf), "%s", op->value); - cursor->set_value(cursor, valuebuf); - - /* - * The application must keep key and value memory valid until - * the next operation that positions the cursor, modifies the - * data, or resets or closes the cursor. - * - * Modifying either the key or value buffers is not permitted. - */ - - /* Apply the operation (insert, update, search or remove). */ - if ((ret = op->apply(cursor)) != 0) { - fprintf(stderr, - "%s: error performing the operation: %s\n", - op->op, session->strerror(session, ret)); - return (ret); - } - - /* - * The cursor no longer references application memory, so - * application buffers can be safely overwritten. - */ - strcpy(keybuf, "no key"); - strcpy(valuebuf, "no value"); - - /* - * Check that get_key/value behave as expected after the - * operation. - */ - if (op->apply == cursor->insert) { - /* - * WT_CURSOR::insert no longer references application - * memory, but as it does not position the cursor, it - * doesn't reference memory owned by the cursor, either. - */ - printf("ex_scope: " - "expect two WiredTiger error messages:\n"); - if ((ret = cursor->get_key(cursor, &key)) == 0 || - (ret = cursor->get_value(cursor, &value)) == 0) { - fprintf(stderr, - "%s: error in get_key/value: %s\n", - op->op, session->strerror(session, ret)); - return (ret); - } - continue; - } - if (op->apply == cursor->remove) { - /* - * WT_CURSOR::remove no longer references application - * memory; as it does not position the cursor, it will - * reference key memory owned by the cursor, but has no - * value. - */ - printf("ex_scope: " - "expect one WiredTiger error message:\n"); - if ((ret = cursor->get_key(cursor, &key)) != 0 || - (ret = cursor->get_value(cursor, &value)) == 0) { - fprintf(stderr, - "%s: error in get_key/value: %s\n", - op->op, session->strerror(session, ret)); - return (ret); - } - } else /* search, update */{ - /* - * WT_CURSOR::search and WT_CURSOR::update no longer - * reference application memory; as they position the - * cursor, they will reference key/value memory owned - * by the cursor. - */ - if ((ret = cursor->get_key(cursor, &key)) != 0 || - (ret = cursor->get_value(cursor, &value)) != 0) { - fprintf(stderr, - "%s: error in get_key/value: %s\n", - op->op, session->strerror(session, ret)); - return (ret); - } - } - - /* - * Modifying the memory referenced by either key or value is - * not permitted. - * - * Check that the cursor's key and value are what we expect. - */ - if (key == keybuf || - (op->apply != cursor->remove && value == valuebuf)) { - fprintf(stderr, - "%s: cursor points at application memory!\n", - op->op); - return (EINVAL); - } - - if (strcmp(key, op->key) != 0 || - (op->apply != cursor->remove && - strcmp(value, op->value) != 0)) { - fprintf(stderr, - "%s: unexpected key / value!\n", op->op); - return (EINVAL); - } - /*! [cursor scope operation] */ - } - - return (0); -} - -int -main(void) -{ - WT_CONNECTION *conn; - WT_CURSOR *cursor; - WT_SESSION *session; - int ret; - - /* - * Create a clean test directory for this run of the test program if the - * environment variable isn't already set (as is done by make check). - */ - if (getenv("WIREDTIGER_HOME") == NULL) { - home = "WT_HOME"; - ret = system("rm -rf WT_HOME && mkdir WT_HOME"); - } else - home = NULL; - - /* Open a connection, create a simple table, open a cursor. */ - if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0 || - (ret = conn->open_session(conn, NULL, NULL, &session)) != 0) { - fprintf(stderr, "Error connecting to %s: %s\n", - home == NULL ? "." : home, wiredtiger_strerror(ret)); - return (EXIT_FAILURE); - } - - ret = session->create(session, - "table:scope", "key_format=S,value_format=S,columns=(k,v)"); - - ret = session->open_cursor(session, - "table:scope", NULL, NULL, &cursor); - - ret = cursor_scope_ops(cursor); - - /* Close the connection and clean up. */ - ret = conn->close(conn, NULL); - - return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); -} diff --git a/lang/python/setup_pip.py b/lang/python/setup_pip.py new file mode 100644 index 00000000000..636eecab80a --- /dev/null +++ b/lang/python/setup_pip.py @@ -0,0 +1,408 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +# This script builds a Python source distribution that can built be installed +# via pip install. This must be run in a git repository to determine the files +# to package. Also as a prerequisite, SWIG must be run as the generated files +# are part of the package. To create the distribution, in this directory, run +# "python setup_pip.py sdist", this creates a tar.gz file under ./dist . +from __future__ import print_function +import os, os.path, re, shutil, site, sys +from setuptools import setup, Distribution +from distutils.extension import Extension +import distutils.sysconfig +import distutils.ccompiler +from distutils.errors import CompileError, LinkError +import subprocess +from subprocess import call +import setuptools.command.install +import setuptools.command.build_ext + +# msg -- +# Print a message to stderr. +def msg(s): + print(os.path.basename(__file__) + ": " + s, file=sys.stderr) + +# die -- +# For failures, show a message and exit. +def die(s): + msg(s) + sys.exit(1) + +# build_commands -- +# Run a sequence of commands, and die if any fail. +def build_commands(commands, build_dir, build_env): + for command in commands: + callargs = [ 'sh', '-c', command ] + verbose_command = '"' + '" "'.join(callargs) + '"' + print('running: ' + verbose_command) + if call(callargs, cwd=build_dir, env=build_env) != 0: + die('build command failed: ' + verbose_command) + +# check_needed_dependencies -- +# Make a quick check of any needed library dependencies, and +# add to the library path and include path as needed. If a library +# is not found, it is not definitive. +def check_needed_dependencies(builtins, inc_paths, lib_paths): + library_dirs = get_library_dirs() + compiler = distutils.ccompiler.new_compiler() + distutils.sysconfig.customize_compiler(compiler) + compiler.set_library_dirs(library_dirs) + missing = [] + for name, libname, instructions in builtins: + found = compiler.find_library_file(library_dirs, libname) + if found is None: + msg(libname + ": missing") + msg(instructions) + msg("after installing it, set LD_LIBRARY_PATH or DYLD_LIBRARY_PATH") + missing.append(libname) + else: + package_top = os.path.dirname(os.path.dirname(found)) + inc_paths.append(os.path.join(package_top, 'include')) + lib_paths.append(os.path.join(package_top, 'lib')) + + # XXX: we are not accounting for other directories that might be + # discoverable via /sbin/ldconfig. It might be better to write a tiny + # compile using -lsnappy, -lz... + # + #if len(missing) > 0: + # die("install packages for: " + str(missing)) + +# find_executable -- +# Locate an executable in the PATH. +def find_executable(exename, path): + p = subprocess.Popen(['which', exename ], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out, err = p.communicate('') + out = str(out) # needed for Python3 + if out == '': + if err != '': + err = ': "' + err + '"' + die('"' + exename + '": not found in path' + err) + dirname = os.path.dirname(out) + if not dirname in path: + path.append(dirname) + +# get_build_path -- +# Create a PATH that can be used for installation. Apparently, +# installation commands are run with a restricted PATH, and +# autoreconf/aclocal will not normally be found. +def get_build_path(): + build_paths = [] + find_executable('autoreconf', build_paths) + find_executable('aclocal', build_paths) + build_path = os.environ['PATH'] + ':' + ':'.join(build_paths) + return build_path + +# get_compile_flags -- +# Get system specific compile flags. Return a triple: C preprocessor +# flags, C compilation flags and linker flags. +def get_compile_flags(inc_paths, lib_paths): + # Suppress warnings building SWIG generated code + if sys.platform == 'win32' and cc == 'msvc': + cflags = ['/arch:SSE2', '/EHsc'] + cppflags = [] + ldflags = [] + # Windows untested and incomplete, don't claim that it works. + die('Windows is not supported by this setup script') + else: + cflags = [ '-w', '-Wno-sign-conversion', '-std=c11' ] + cppflags = ['-I' + path for path in inc_paths] + cppflags.append('-DHAVE_CONFIG_H') + ldflags = ['-L' + path for path in lib_paths] + if sys.platform == 'darwin': + cflags.extend([ '-arch', 'x86_64' ]) + return (cppflags, cflags, ldflags) + +# get_sources_curdir -- +# Get a list of sources from the current directory +def get_sources_curdir(): + DEVNULL = open(os.devnull, 'w') + gitproc = subprocess.Popen( + ['git', 'ls-tree', '-r', '--name-only', 'HEAD^{tree}'], + stdin=DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sources = [line.rstrip() for line in gitproc.stdout.readlines()] + err = gitproc.stderr.read() + gitproc.wait() + subret = gitproc.returncode + if subret != 0 or err: + msg("git command to get sources returned " + str(subret) + + ", error=" + str(err)) + die("this command must be run in a git repository") + return sources + +# get_wiredtiger_versions -- +# Read the version information from the RELEASE_INFO file. +def get_wiredtiger_versions(wt_dir): + v = {} + for l in open(os.path.join(wt_dir, 'RELEASE_INFO')): + if re.match(r'WIREDTIGER_VERSION_(?:MAJOR|MINOR|PATCH)=', l): + exec(l, v) + wt_ver = '%d.%d' % (v['WIREDTIGER_VERSION_MAJOR'], + v['WIREDTIGER_VERSION_MINOR']) + wt_full_ver = wt_ver + '.%d' % (v['WIREDTIGER_VERSION_PATCH']) + return (wt_ver, wt_full_ver) + +# get_library_dirs +# Build a plausible set of library directories. +def get_library_dirs(): + dirs = [] + dirs.append("/usr/local/lib") + dirs.append("/usr/local/lib64") + dirs.append("/lib/x86_64-linux-gnu") + dirs.append("/opt/local/lib") + dirs.append("/usr/lib") + dirs.append("/usr/lib64") + for path in ['LD_LIBRARY_PATH', 'DYLD_LIBRARY_PATH', 'LIBRARY_PATH']: + if path in os.environ: + dirs.extend(os.environ[path].split(':')) + dirs = list(set(filter(os.path.isdir, dirs))) + return dirs + +# source_filter +# Make any needed changes to the sources list. Any entry that +# needs to be moved is returned in a dictionary. +def source_filter(sources): + result = [] + movers = dict() + py_dir = os.path.join('lang', 'python') + pywt_dir = os.path.join(py_dir, 'wiredtiger') + pywt_prefix = pywt_dir + os.path.sep + for f in sources: + if not re.match(source_regex, f): + continue + src = f + dest = f + # move all lang/python files to the top level. + if dest.startswith(pywt_prefix): + dest = os.path.basename(dest) + if dest == 'pip_init.py': + dest = '__init__.py' + if dest != src: + movers[dest] = src + result.append(dest) + # Add SWIG generated files + result.append('wiredtiger.py') + movers['wiredtiger.py'] = os.path.join(pywt_dir, '__init__.py') + result.append(os.path.join(py_dir, 'wiredtiger_wrap.c')) + return result, movers + +################################################################ +# Do some initial setup and checks. +this_abs_script = os.path.abspath(__file__) +this_dir = os.path.dirname(this_abs_script) +pip_command = None +for arg in sys.argv[1:]: + if arg[0] != '-' and pip_command == None: + pip_command = arg + break + +if this_dir.endswith(os.sep + os.path.join('lang', 'python')): + wt_dir = os.path.dirname(os.path.dirname(this_dir)) + os.chdir(wt_dir) +elif os.path.isfile(os.path.join(this_dir, 'LICENSE')): + wt_dir = this_dir +else: + die('running from an unknown directory') + +python3 = (sys.version_info[0] > 2) +if python3: + die('Python3 is not yet supported') + +# Ensure that Extensions won't be built for 32 bit, +# that won't work with WiredTiger. +if sys.maxsize < 2**32: + die('need to be running on a 64 bit system, and have a 64 bit Python') + +python_rel_dir = os.path.join('lang', 'python') +build_dir = os.path.join(wt_dir, 'build_posix') +makefile = os.path.join(build_dir, 'Makefile') +built_sentinal = os.path.join(build_dir, 'built.txt') +conf_make_dir = 'build_posix' +wt_swig_lib_name = os.path.join(python_rel_dir, '_wiredtiger.so') + +################################################################ +# Put together build options for the WiredTiger extension. +short_description = 'high performance, scalable, production quality, ' + \ + 'NoSQL, Open Source extensible platform for data management' +long_description = 'WiredTiger is a ' + short_description + '.\n\n' + \ + open(os.path.join(wt_dir, 'README')).read() + +wt_ver, wt_full_ver = get_wiredtiger_versions(wt_dir) +build_path = get_build_path() + +# We only need a small set of directories to build a WT library, +# we also include any files at the top level. +source_regex = r'^(?:(?:api|build_posix|ext|lang/python|src|dist)/|[^/]*$)' + +# The builtins that we include in this distribution. +builtins = [ + # [ name, libname, instructions ] + [ 'snappy', 'snappy', + 'Note: a suitable version of snappy can be found at\n' + \ + ' https://github.com/google/snappy/releases/download/' + \ + '1.1.3/snappy-1.1.3.tar.gz\n' + \ + 'It can be installed via: yum install snappy snappy-devel' + \ + 'or via: apt-get install libsnappy-dev' ], + [ 'zlib', 'z', + 'Need to install zlib\n' + \ + 'It can be installed via: apt-get install zlib1g' ] +] +builtin_names = [b[0] for b in builtins] +builtin_libraries = [b[1] for b in builtins] + +# Here's the configure/make operations we perform before the python extension +# is linked. +configure_cmds = [ + './makemake --clean-and-make', + './reconf', + # force building a position independent library; it will be linked + # into a single shared library with the SWIG interface code. + 'CFLAGS="${CFLAGS:-} -fPIC -DPIC" ' + \ + '../configure --enable-python --with-builtins=' + ','.join(builtin_names) +] + +# build all the builtins, at the moment they are all compressors. +make_cmds = [] +for name in builtin_names: + make_cmds.append('(cd ext/compressors/' + name + '/; make)') +make_cmds.append('make libwiredtiger.la') + +inc_paths = [ os.path.join(build_dir, 'src', 'include'), build_dir, '.' ] +lib_paths = [ '.' ] # wiredtiger.so is moved into the top level directory + +check_needed_dependencies(builtins, inc_paths, lib_paths) + +cppflags, cflags, ldflags = get_compile_flags(inc_paths, lib_paths) + +# If we are creating a source distribution, create a staging directory +# with just the right sources. Put the result in the python dist directory. +if pip_command == 'sdist': + sources, movers = source_filter(get_sources_curdir()) + stage_dir = os.path.join(python_rel_dir, 'stage') + shutil.rmtree(stage_dir, True) + os.makedirs(stage_dir) + shutil.copy2(this_abs_script, os.path.join(stage_dir, 'setup.py')) + for f in sources: + d = os.path.join(stage_dir, os.path.dirname(f)) + if not os.path.isdir(d): + os.makedirs(d) + if f in movers: + src = movers[f] + else: + src = f + # Symlinks are not followed in setup, we need to use real files. + shutil.copy2(src, os.path.join(stage_dir, f)) + os.chdir(stage_dir) + sys.argv.append('--dist-dir=' + os.path.join('..', 'dist')) +else: + sources = [ os.path.join(python_rel_dir, 'wiredtiger_wrap.c') ] + +wt_ext = Extension('_wiredtiger', + sources = sources, + extra_compile_args = cflags + cppflags, + extra_link_args = ldflags, + libraries = builtin_libraries, + extra_objects = [ os.path.join(build_dir, '.libs', 'libwiredtiger.a') ], + include_dirs = inc_paths, + library_dirs = lib_paths, +) +extensions = [ wt_ext ] +env = { "CFLAGS" : ' '.join(cflags), + "CPPFLAGS" : ' '.join(cppflags), + "LDFLAGS" : ' '.join(ldflags), + "PATH" : build_path } + +class BinaryDistribution(Distribution): + def is_pure(self): + return False + +class WTInstall(setuptools.command.install.install): + def run(self): + self.run_command("build_ext") + return setuptools.command.install.install.run(self) + +class WTBuildExt(setuptools.command.build_ext.build_ext): + def __init__(self, *args, **kwargs): + setuptools.command.build_ext.build_ext.__init__(self, *args, **kwargs) + + def run(self): + # only run this once + if not os.path.isfile(built_sentinal): + try: + os.remove(makefile) + except OSError: + pass + self.execute( + lambda: build_commands(configure_cmds, conf_make_dir, env), [], + 'wiredtiger configure') + if not os.path.isfile(makefile): + die('configure failed, file does not exist: ' + makefile) + self.execute( + lambda: build_commands(make_cmds, conf_make_dir, env), [], + 'wiredtiger make') + open(built_sentinal, 'a').close() + return setuptools.command.build_ext.build_ext.run(self) + +setup( + name = 'wiredtiger', + version = wt_full_ver, + author = 'The WiredTiger Development Team, part of MongoDB', + author_email = 'info@wiredtiger.com', + description = short_description, + license='GPL2,GPL3,Commercial', + long_description = long_description, + url = 'http://source.wiredtiger.com/', + keywords = 'scalable NoSQL database datastore engine open source', + packages = ['wiredtiger'], + ext_package = 'wiredtiger', + ext_modules = extensions, + include_package_data = True, + distclass = BinaryDistribution, + package_dir = { 'wiredtiger' : '.' }, + cmdclass = { 'install': WTInstall, 'build_ext': WTBuildExt }, + package_data = { + 'wiredtiger' : [ wt_swig_lib_name, '*.py' ] + }, + classifiers=[ + 'Intended Audience :: Developers', + 'Programming Language :: C', + 'Programming Language :: C++', + 'Programming Language :: Python', + 'Programming Language :: Java', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: POSIX', + 'Operating System :: POSIX :: BSD', + 'Operating System :: POSIX :: Linux', + 'Operating System :: POSIX :: SunOS/Solaris', + ] +) + +if pip_command == 'sdist': + shutil.rmtree(os.path.join(this_dir, 'stage')) diff --git a/lang/python/wiredtiger/pip_init.py b/lang/python/wiredtiger/pip_init.py new file mode 100644 index 00000000000..d59c8218976 --- /dev/null +++ b/lang/python/wiredtiger/pip_init.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +# pip_init.py +# This is installed as __init__.py, and imports the file created by SWIG. +# This is needed because SWIG's import helper code created by certain SWIG +# versions may be broken, see: https://github.com/swig/swig/issues/769 . +# Importing indirectly seems to avoid these issues. +import os, sys +fname = os.path.basename(__file__) +if fname != '__init__.py' and fname != '__init__.pyc': + print(__file__ + ': this file is not yet installed') + sys.exit(1) + +# After importing the SWIG-generated file, copy all symbols from from it +# to this module so they will appear in the wiredtiger namespace. +me = sys.modules[__name__] +sys.path.append(os.path.dirname(__file__)) # needed for Python3 +import wiredtiger +for name in dir(wiredtiger): + value = getattr(wiredtiger, name) + setattr(me, name, value) diff --git a/src/async/async_api.c b/src/async/async_api.c index 026a008188c..b9cc995f5a5 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -338,17 +338,15 @@ __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) * 2. If async is off, and the user wants it on, start it. * 3. If not a toggle and async is off, we're done. */ - if (conn->async_cfg && !run) { - /* Case 1 */ + if (conn->async_cfg && !run) { /* Case 1 */ WT_TRET(__wt_async_flush(session)); ret = __wt_async_destroy(session); conn->async_cfg = false; return (ret); - } else if (!conn->async_cfg && run) - /* Case 2 */ + } + if (!conn->async_cfg && run) /* Case 2 */ return (__async_start(session)); - else if (!conn->async_cfg) - /* Case 3 */ + if (!conn->async_cfg) /* Case 3 */ return (0); /* diff --git a/src/block/block_addr.c b/src/block/block_addr.c index 580316bdfc6..a67efca62a3 100644 --- a/src/block/block_addr.c +++ b/src/block/block_addr.c @@ -226,7 +226,7 @@ __wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session, ci->discard.offset, ci->discard.size, ci->discard.checksum)); a = (uint64_t)ci->file_size; WT_RET(__wt_vpack_uint(pp, 0, a)); - a = (uint64_t)ci->ckpt_size; + a = ci->ckpt_size; WT_RET(__wt_vpack_uint(pp, 0, a)); return (0); diff --git a/src/block/block_ext.c b/src/block/block_ext.c index 26acc8c560f..e9357d73d1d 100644 --- a/src/block/block_ext.c +++ b/src/block/block_ext.c @@ -634,11 +634,11 @@ __wt_block_off_free( */ if ((ret = __wt_block_off_remove_overlap( session, block, &block->live.alloc, offset, size)) == 0) - ret = __block_merge(session, block, - &block->live.avail, offset, (wt_off_t)size); + ret = __block_merge( + session, block, &block->live.avail, offset, size); else if (ret == WT_NOTFOUND) - ret = __block_merge(session, block, - &block->live.discard, offset, (wt_off_t)size); + ret = __block_merge( + session, block, &block->live.discard, offset, size); return (ret); } @@ -1247,7 +1247,8 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_DECL_RET; WT_EXT *ext; WT_PAGE_HEADER *dsk; - size_t entries, size; + uint32_t entries; + size_t size; uint8_t *p; WT_RET(__block_extlist_dump(session, block, el, "write")); diff --git a/src/block/block_read.c b/src/block/block_read.c index 869a92b6ae1..8d4aec7df75 100644 --- a/src/block/block_read.c +++ b/src/block/block_read.c @@ -39,7 +39,7 @@ __wt_bm_preload( (uint8_t *)bm->map + offset, size, bm->mapped_cookie); if (!mapped && handle->fh_advise != NULL) ret = handle->fh_advise(handle, (WT_SESSION *)session, - (wt_off_t)offset, (wt_off_t)size, WT_FILE_HANDLE_WILLNEED); + offset, (wt_off_t)size, WT_FILE_HANDLE_WILLNEED); if (ret != EBUSY && ret != ENOTSUP) return (ret); diff --git a/src/block/block_vrfy.c b/src/block/block_vrfy.c index 94824ad19f8..154765ed079 100644 --- a/src/block/block_vrfy.c +++ b/src/block/block_vrfy.c @@ -22,7 +22,7 @@ static int __verify_set_file_size(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *); ((off) / (block)->allocsize - 1) #ifdef HAVE_VERBOSE #define WT_FRAG_TO_OFF(block, frag) \ - (((wt_off_t)(frag + 1)) * (block)->allocsize) + (((wt_off_t)((frag) + 1)) * (block)->allocsize) #endif /* diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 5fde2237538..48ae1ad6d76 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -9,6 +9,70 @@ #include "wt_internal.h" /* + * WT_CURFILE_OP_XXX + * If we're going to return an error, we need to restore the cursor to + * a valid state, the upper-level cursor code is likely to retry. The macros + * here are called to save and restore that state. + */ +#define WT_CURFILE_OP_DECL \ + WT_ITEM __key_copy; \ + WT_ITEM __value_copy; \ + uint64_t __recno; \ + uint32_t __flags +#define WT_CURFILE_OP_PUSH do { \ + WT_ITEM_SET(__key_copy, cursor->key); \ + WT_ITEM_SET(__value_copy, cursor->value); \ + __recno = cursor->recno; \ + __flags = cursor->flags; \ +} while (0) +#define WT_CURFILE_OP_POP do { \ + cursor->recno = __recno; \ + if (FLD_ISSET(__flags, WT_CURSTD_KEY_EXT)) \ + WT_ITEM_SET(cursor->key, __key_copy); \ + if (FLD_ISSET(__flags, WT_CURSTD_VALUE_EXT)) \ + WT_ITEM_SET(cursor->value, __value_copy); \ + F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); \ + F_SET(cursor, \ + FLD_MASK(__flags, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT));\ +} while (0) + +/* + * __cursor_page_pinned -- + * Return if we have a page pinned and it's not been flagged for forced + * eviction (the forced eviction test is so we periodically release pages + * grown too large). + */ +static inline bool +__cursor_page_pinned(WT_CURSOR_BTREE *cbt) +{ + return (F_ISSET(cbt, WT_CBT_ACTIVE) && + cbt->ref->page->read_gen != WT_READGEN_OLDEST); +} + +/* + * __cursor_copy_int_key -- + * If we're pointing into the tree, save the key into local memory. + */ +static inline int +__cursor_copy_int_key(WT_CURSOR *cursor) +{ + /* + * We're about to discard the cursor's position and the cursor layer + * might retry the operation. We discard pinned pages on error, which + * will invalidate pinned keys. Clear WT_CURSTD_KEY_INT in all cases, + * the underlying page is gone whether we can allocate memory or not. + */ + if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + F_CLR(cursor, WT_CURSTD_KEY_INT); + if (!WT_DATA_IN_ITEM(&cursor->key)) + WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, + &cursor->key, cursor->key.data, cursor->key.size)); + F_SET(cursor, WT_CURSTD_KEY_EXT); + } + return (0); +} + +/* * __cursor_size_chk -- * Return if an inserted item is too large. */ @@ -55,6 +119,34 @@ __cursor_size_chk(WT_SESSION_IMPL *session, WT_ITEM *kv) } /* + * __cursor_disable_bulk -- + * Disable bulk loads into a tree. + */ +static inline void +__cursor_disable_bulk(WT_SESSION_IMPL *session, WT_BTREE *btree) +{ + /* + * Once a tree (other than the LSM primary) is no longer empty, eviction + * should pay attention to it, and it's no longer possible to bulk-load + * into it. + */ + if (!btree->original) + return; + if (btree->lsm_primary) { + btree->original = 0; /* Make the next test faster. */ + return; + } + + /* + * We use a compare-and-swap here to avoid races among the first inserts + * into a tree. Eviction is disabled when an empty tree is opened, and + * it must only be enabled once. + */ + if (__wt_atomic_cas8(&btree->original, 1, 0)) + __wt_evict_file_exclusive_off(session); +} + +/* * __cursor_fix_implicit -- * Return if search went past the end of the tree. */ @@ -323,8 +415,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) * from the root. */ valid = false; - if (F_ISSET(cbt, WT_CBT_ACTIVE) && - cbt->ref->page->read_gen != WT_READGEN_OLDEST) { + if (__cursor_page_pinned(cbt)) { __wt_txn_cursor_op(session); WT_ERR(btree->type == BTREE_ROW ? @@ -402,9 +493,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) * existing record. */ valid = false; - if (btree->type == BTREE_ROW && - F_ISSET(cbt, WT_CBT_ACTIVE) && - cbt->ref->page->read_gen != WT_READGEN_OLDEST) { + if (btree->type == BTREE_ROW && __cursor_page_pinned(cbt)) { __wt_txn_cursor_op(session); WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true)); @@ -506,20 +595,23 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); - /* - * The tree is no longer empty: eviction should pay attention to it, - * and it's no longer possible to bulk-load into it. - */ - if (btree->bulk_load_ok) { - btree->bulk_load_ok = false; - __wt_btree_evictable(session, true); - } + /* It's no longer possible to bulk-load into the tree. */ + __cursor_disable_bulk(session, btree); retry: WT_RET(__cursor_func_init(cbt, true)); - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: + if (btree->type == BTREE_ROW) { + WT_ERR(__cursor_row_search(session, cbt, NULL, true)); + /* + * If not overwriting, fail if the key exists, else insert the + * key/value pair. + */ + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && + cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) + WT_ERR(WT_DUPLICATE_KEY); + + ret = __cursor_row_modify(session, cbt, false); + } else { /* * If WT_CURSTD_APPEND is set, insert a new record (ignoring * the application's record number). The real record number @@ -544,19 +636,6 @@ retry: WT_RET(__cursor_func_init(cbt, true)); WT_ERR(__cursor_col_modify(session, cbt, false)); if (F_ISSET(cursor, WT_CURSTD_APPEND)) cbt->iface.recno = cbt->recno; - break; - case BTREE_ROW: - WT_ERR(__cursor_row_search(session, cbt, NULL, true)); - /* - * If not overwriting, fail if the key exists, else insert the - * key/value pair. - */ - if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && - cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) - WT_ERR(WT_DUPLICATE_KEY); - - ret = __cursor_row_modify(session, cbt, false); - break; } err: if (ret == WT_RESTART) { @@ -564,11 +643,9 @@ err: if (ret == WT_RESTART) { WT_STAT_DATA_INCR(session, cursor_restart); goto retry; } + /* Insert doesn't maintain a position across calls, clear resources. */ - if (ret == 0) - WT_TRET(__curfile_leave(cbt)); - if (ret != 0) - WT_TRET(__cursor_reset(cbt)); + WT_TRET(__cursor_reset(cbt)); return (ret); } @@ -626,29 +703,24 @@ __wt_btcur_update_check(WT_CURSOR_BTREE *cbt) retry: WT_RET(__cursor_func_init(cbt, true)); - switch (btree->type) { - case BTREE_ROW: + if (btree->type == BTREE_ROW) { WT_ERR(__cursor_row_search(session, cbt, NULL, true)); /* * Just check for conflicts. */ ret = __curfile_update_check(cbt); - break; - case BTREE_COL_FIX: - case BTREE_COL_VAR: + } else WT_ERR(__wt_illegal_value(session, NULL)); - break; - } err: if (ret == WT_RESTART) { WT_STAT_CONN_INCR(session, cursor_restart); WT_STAT_DATA_INCR(session, cursor_restart); goto retry; } - WT_TRET(__curfile_leave(cbt)); - if (ret != 0) - WT_TRET(__cursor_reset(cbt)); + + /* Insert doesn't maintain a position across calls, clear resources. */ + WT_TRET(__cursor_reset(cbt)); return (ret); } @@ -660,23 +732,83 @@ int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_OP_DECL; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; + bool positioned; btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; + WT_CURFILE_OP_PUSH; + WT_STAT_CONN_INCR(session, cursor_remove); WT_STAT_DATA_INCR(session, cursor_remove); WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size); -retry: WT_RET(__cursor_func_init(cbt, true)); + /* + * WT_CURSOR.remove has a unique semantic, the cursor stays positioned + * if it starts positioned, otherwise clear the cursor on completion. + */ + positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT); - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: +retry: + /* + * If removing with overwrite configured, and positioned to an on-page + * key, the update doesn't require another search. The cursor won't be + * positioned on a page with an external key set, but be sure. + */ + if (__cursor_page_pinned(cbt) && + F_ISSET_ALL(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_OVERWRITE)) { + WT_ERR(__wt_txn_autocommit_check(session)); + + /* + * The cursor position may not be exact (the cursor's comparison + * value not equal to zero). Correct to an exact match so we can + * remove whatever we're pointing at. + */ + cbt->compare = 0; + ret = btree->type == BTREE_ROW ? + __cursor_row_modify(session, cbt, true) : + __cursor_col_modify(session, cbt, true); + + /* + * The pinned page goes away if we fail for any reason, make + * sure there's a local copy of any key. (Restart could still + * use the pinned page, but that's an unlikely path.) Re-save + * the cursor state: we may retry but eventually fail. + */ + if (ret != 0) { + WT_TRET(__cursor_copy_int_key(cursor)); + WT_CURFILE_OP_PUSH; + goto err; + } + goto done; + } + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Re-save the cursor state: we may retry but + * eventually fail. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + WT_CURFILE_OP_PUSH; + + WT_ERR(__cursor_func_init(cbt, true)); + + if (btree->type == BTREE_ROW) { + WT_ERR(__cursor_row_search(session, cbt, NULL, false)); + + /* Check whether an update would conflict. */ + WT_ERR(__curfile_update_check(cbt)); + + if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) + WT_ERR(WT_NOTFOUND); + + ret = __cursor_row_modify(session, cbt, true); + } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); /* @@ -703,19 +835,6 @@ retry: WT_RET(__cursor_func_init(cbt, true)); cbt->recno = cursor->recno; } else ret = __cursor_col_modify(session, cbt, true); - break; - case BTREE_ROW: - /* Remove the record if it exists. */ - WT_ERR(__cursor_row_search(session, cbt, NULL, false)); - - /* Check whether an update would conflict. */ - WT_ERR(__curfile_update_check(cbt)); - - if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) - WT_ERR(WT_NOTFOUND); - - ret = __cursor_row_modify(session, cbt, true); - break; } err: if (ret == WT_RESTART) { @@ -723,15 +842,29 @@ err: if (ret == WT_RESTART) { WT_STAT_DATA_INCR(session, cursor_restart); goto retry; } + /* - * If the cursor is configured to overwrite and the record is not - * found, that is exactly what we want. + * If the cursor is configured to overwrite and the record is not found, + * that is exactly what we want, return success. */ if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) && ret == WT_NOTFOUND) ret = 0; - if (ret != 0) +done: /* + * If the cursor was positioned, it stays positioned, point the cursor + * at an internal copy of the key. Otherwise, there's no position or + * key/value. + */ + if (ret == 0) + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (ret == 0 && positioned) { + WT_TRET(__wt_key_return(session, cbt)); + if (ret == 0) + F_SET(cursor, WT_CURSTD_KEY_INT); + } else WT_TRET(__cursor_reset(cbt)); + if (ret != 0) + WT_CURFILE_OP_POP; return (ret); } @@ -760,20 +893,24 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); - /* - * The tree is no longer empty: eviction should pay attention to it, - * and it's no longer possible to bulk-load into it. - */ - if (btree->bulk_load_ok) { - btree->bulk_load_ok = false; - __wt_btree_evictable(session, true); - } + /* It's no longer possible to bulk-load into the tree. */ + __cursor_disable_bulk(session, btree); retry: WT_RET(__cursor_func_init(cbt, true)); - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: + if (btree->type == BTREE_ROW) { + WT_ERR(__cursor_row_search(session, cbt, NULL, true)); + /* + * If not overwriting, check for conflicts and fail if the key + * does not exist. + */ + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { + WT_ERR(__curfile_update_check(cbt)); + if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) + WT_ERR(WT_NOTFOUND); + } + ret = __cursor_row_modify(session, cbt, false); + } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); /* @@ -792,20 +929,6 @@ retry: WT_RET(__cursor_func_init(cbt, true)); WT_ERR(WT_NOTFOUND); } ret = __cursor_col_modify(session, cbt, false); - break; - case BTREE_ROW: - WT_ERR(__cursor_row_search(session, cbt, NULL, true)); - /* - * If not overwriting, check for conflicts and fail if the key - * does not exist. - */ - if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { - WT_ERR(__curfile_update_check(cbt)); - if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) - WT_ERR(WT_NOTFOUND); - } - ret = __cursor_row_modify(session, cbt, false); - break; } err: if (ret == WT_RESTART) { @@ -955,9 +1078,12 @@ __cursor_truncate(WT_SESSION_IMPL *session, WT_DECL_RET; /* - * First, call the standard cursor remove method to do a full search and - * re-position the cursor because we don't have a saved copy of the - * page's write generation information, which we need to remove records. + * First, call the cursor search method to re-position the cursor: we + * may not have a cursor position (if the higher-level truncate code + * switched the cursors to have an "external" cursor key, and because + * we don't save a copy of the page's write generation information, + * which we need to remove records. + * * Once that's done, we can delete records without a full search, unless * we encounter a restart error because the page was modified by some * other thread of control; in that case, repeat the full search to @@ -970,20 +1096,27 @@ __cursor_truncate(WT_SESSION_IMPL *session, * instantiated the end cursor, so we know that page is pinned in memory * and we can proceed without concern. */ -retry: WT_RET(__wt_btcur_remove(start)); +retry: WT_RET(__wt_btcur_search(start)); /* - * Reset ret each time through so that we don't loop forever in - * the cursor equals case. + * XXX KEITH + * When the btree cursor code sets/clears the cursor flags (rather than + * the cursor layer), the set/clear goes away, only the assert remains. */ - for (ret = 0;;) { + F_CLR((WT_CURSOR *)start, WT_CURSTD_KEY_SET); + F_SET((WT_CURSOR *)start, WT_CURSTD_KEY_INT); + WT_ASSERT(session, + F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); + + for (;;) { + if ((ret = rmfunc(session, start, 1)) != 0) + break; + if (stop != NULL && __cursor_equals(start, stop)) break; if ((ret = __wt_btcur_next(start, true)) != 0) break; - start->compare = 0; /* Exact match */ - if ((ret = rmfunc(session, start, 1)) != 0) - break; + start->compare = 0; /* Exact match */ } if (ret == WT_RESTART) { @@ -1016,29 +1149,40 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session, * record 37, records 1-36 magically appear. Those records can't be * deleted, which means we have to ignore already "deleted" records. * - * First, call the standard cursor remove method to do a full search and - * re-position the cursor because we don't have a saved copy of the - * page's write generation information, which we need to remove records. + * First, call the cursor search method to re-position the cursor: we + * may not have a cursor position (if the higher-level truncate code + * switched the cursors to have an "external" cursor key, and because + * we don't save a copy of the page's write generation information, + * which we need to remove records. + * * Once that's done, we can delete records without a full search, unless * we encounter a restart error because the page was modified by some * other thread of control; in that case, repeat the full search to * refresh the page's modification information. */ -retry: WT_RET(__wt_btcur_remove(start)); +retry: WT_RET(__wt_btcur_search(start)); + /* - * Reset ret each time through so that we don't loop forever in - * the cursor equals case. + * XXX KEITH + * When the btree cursor code sets/clears the cursor flags (rather than + * the cursor layer), the set/clear goes away, only the assert remains. */ - for (ret = 0;;) { + F_CLR((WT_CURSOR *)start, WT_CURSTD_KEY_SET); + F_SET((WT_CURSOR *)start, WT_CURSTD_KEY_INT); + WT_ASSERT(session, + F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); + + for (;;) { + value = (const uint8_t *)start->iface.value.data; + if (*value != 0 && + (ret = rmfunc(session, start, 1)) != 0) + break; + if (stop != NULL && __cursor_equals(start, stop)) break; if ((ret = __wt_btcur_next(start, true)) != 0) break; start->compare = 0; /* Exact match */ - value = (const uint8_t *)start->iface.value.data; - if (*value != 0 && - (ret = rmfunc(session, start, 1)) != 0) - break; } if (ret == WT_RESTART) { @@ -1158,7 +1302,7 @@ __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel) * Skip the usual cursor tear-down in that case. */ if (!lowlevel) - ret = __curfile_leave(cbt); + ret = __cursor_reset(cbt); __wt_buf_free(session, &cbt->_row_key); __wt_buf_free(session, &cbt->_tmp); diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index d664da2ebd3..4989301468f 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -34,7 +34,7 @@ static const /* Output separator */ static int __debug_cell(WT_DBG *, const WT_PAGE_HEADER *, WT_CELL_UNPACK *); static int __debug_cell_data( - WT_DBG *, WT_PAGE *, int type, const char *, WT_CELL_UNPACK *); + WT_DBG *, WT_PAGE *, int, const char *, WT_CELL_UNPACK *); static int __debug_col_skip(WT_DBG *, WT_INSERT_HEAD *, const char *, bool); static int __debug_config(WT_SESSION_IMPL *, WT_DBG *, const char *); static int __debug_dsk_cell(WT_DBG *, const WT_PAGE_HEADER *); diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 6ed70788759..f2bffee06da 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -15,6 +15,44 @@ static int __btree_preload(WT_SESSION_IMPL *); static int __btree_tree_open_empty(WT_SESSION_IMPL *, bool); /* + * __btree_clear -- + * Clear a Btree, either on handle discard or re-open. + */ +static int +__btree_clear(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + WT_DECL_RET; + + btree = S2BT(session); + + /* + * If the tree hasn't gone through an open/close cycle, there's no + * cleanup to be done. + */ + if (!F_ISSET(btree, WT_BTREE_CLOSED)) + return (0); + + /* Close the Huffman tree. */ + __wt_btree_huffman_close(session); + + /* Terminate any associated collator. */ + if (btree->collator_owned && btree->collator->terminate != NULL) + WT_TRET(btree->collator->terminate( + btree->collator, &session->iface)); + + /* Destroy locks. */ + __wt_rwlock_destroy(session, &btree->ovfl_lock); + __wt_spin_destroy(session, &btree->flush_lock); + + /* Free allocated memory. */ + __wt_free(session, btree->key_format); + __wt_free(session, btree->value_format); + + return (ret); +} + +/* * __wt_btree_open -- * Open a Btree. */ @@ -28,12 +66,27 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) WT_DATA_HANDLE *dhandle; WT_DECL_RET; size_t root_addr_size; + uint32_t mask; uint8_t root_addr[WT_BTREE_MAX_ADDR_COOKIE]; const char *filename; bool creation, forced_salvage, readonly; - dhandle = session->dhandle; btree = S2BT(session); + dhandle = session->dhandle; + + /* + * This may be a re-open of an underlying object and we have to clean + * up. We can't clear the operation flags, however, they're set by the + * connection handle software that called us. + */ + WT_RET(__btree_clear(session)); + + mask = F_MASK(btree, WT_BTREE_SPECIAL_FLAGS); + memset(btree, 0, sizeof(*btree)); + btree->flags = mask; + + /* Set the data handle first, our called functions reasonably use it. */ + btree->dhandle = dhandle; /* Checkpoint files are readonly. */ readonly = dhandle->checkpoint != NULL || @@ -126,6 +179,20 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) } } + /* + * Eviction ignores trees until the handle's open flag is set, configure + * eviction before that happens. + * + * Files that can still be bulk-loaded cannot be evicted. + * Permanently cache-resident files can never be evicted. + * Special operations don't enable eviction. (The underlying commands + * may turn on eviction, but it's their decision.) + */ + if (btree->original || + F_ISSET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_REBALANCE | + WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) + WT_ERR(__wt_evict_file_exclusive_on(session)); + if (0) { err: WT_TRET(__wt_btree_close(session)); } @@ -147,7 +214,24 @@ __wt_btree_close(WT_SESSION_IMPL *session) btree = S2BT(session); + /* + * The close process isn't the same as discarding the handle: we might + * re-open the handle, which isn't a big deal, but the backing blocks + * for the handle may not yet have been discarded from the cache, and + * eviction uses WT_BTREE structure elements. Free backing resources + * but leave the rest alone, and we'll discard the structure when we + * discard the data handle. + * + * Handles can be closed multiple times, ignore all but the first. + */ + if (F_ISSET(btree, WT_BTREE_CLOSED)) + return (0); + F_SET(btree, WT_BTREE_CLOSED); + + /* Discard any underlying block manager resources. */ if ((bm = btree->bm) != NULL) { + btree->bm = NULL; + /* Unload the checkpoint, unless it's a special command. */ if (!F_ISSET(btree, WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) @@ -155,33 +239,26 @@ __wt_btree_close(WT_SESSION_IMPL *session) /* Close the underlying block manager reference. */ WT_TRET(bm->close(bm, session)); - - btree->bm = NULL; } - /* Close the Huffman tree. */ - __wt_btree_huffman_close(session); - - /* Destroy locks. */ - __wt_rwlock_destroy(session, &btree->ovfl_lock); - __wt_spin_destroy(session, &btree->flush_lock); - - /* Free allocated memory. */ - __wt_free(session, btree->key_format); - __wt_free(session, btree->value_format); + return (ret); +} - if (btree->collator_owned) { - if (btree->collator->terminate != NULL) - WT_TRET(btree->collator->terminate( - btree->collator, &session->iface)); - btree->collator_owned = 0; - } - btree->collator = NULL; - btree->kencryptor = NULL; +/* + * __wt_btree_discard -- + * Discard a Btree. + */ +int +__wt_btree_discard(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + WT_DECL_RET; - btree->bulk_load_ok = false; + ret = __btree_clear(session); - F_CLR(btree, WT_BTREE_SPECIAL_FLAGS); + btree = S2BT(session); + __wt_overwrite_and_free(session, btree); + session->dhandle->handle = NULL; return (ret); } @@ -267,9 +344,9 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) WT_RET(__wt_config_gets(session, cfg, "cache_resident", &cval)); if (cval.val) - F_SET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION); + F_SET(btree, WT_BTREE_IN_MEMORY); else - F_CLR(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION); + F_CLR(btree, WT_BTREE_IN_MEMORY); WT_RET(__wt_config_gets(session, cfg, "ignore_in_memory_cache_size", &cval)); @@ -482,13 +559,10 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation) /* * Newly created objects can be used for cursor inserts or for bulk * loads; set a flag that's cleared when a row is inserted into the - * tree. Objects being bulk-loaded cannot be evicted, we set it - * globally, there's no point in searching empty trees for eviction. + * tree. */ - if (creation) { - btree->bulk_load_ok = true; - __wt_btree_evictable(session, false); - } + if (creation) + btree->original = 1; /* * A note about empty trees: the initial tree is a single root page. @@ -581,27 +655,6 @@ __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) } /* - * __wt_btree_evictable -- - * Setup or release a cache-resident tree. - */ -void -__wt_btree_evictable(WT_SESSION_IMPL *session, bool on) -{ - WT_BTREE *btree; - - btree = S2BT(session); - - /* Permanently cache-resident files can never be evicted. */ - if (F_ISSET(btree, WT_BTREE_IN_MEMORY)) - return; - - if (on) - F_CLR(btree, WT_BTREE_NO_EVICTION); - else - F_SET(btree, WT_BTREE_NO_EVICTION); -} - -/* * __btree_preload -- * Pre-load internal pages. */ diff --git a/src/btree/bt_io.c b/src/btree/bt_io.c index a8645f79dbe..b5e4d52394a 100644 --- a/src/btree/bt_io.c +++ b/src/btree/bt_io.c @@ -183,7 +183,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t dst_len, len, result_len, size, src_len; int compression_failed; /* Extension API, so not a bool. */ uint8_t *dst, *src; - bool data_checksum, encrypted; + bool data_checksum, encrypted, timer; btree = S2BT(session); bm = btree->bm; @@ -216,7 +216,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, &result_len)); WT_ASSERT(session, dsk->mem_size == result_len + WT_BLOCK_COMPRESS_SKIP); - ctmp->size = (uint32_t)result_len + WT_BLOCK_COMPRESS_SKIP; + ctmp->size = result_len + WT_BLOCK_COMPRESS_SKIP; ip = ctmp; } else { WT_ASSERT(session, dsk->mem_size == buf->size); @@ -357,7 +357,8 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, data_checksum = !compressed; break; } - if (!F_ISSET(session, WT_SESSION_INTERNAL)) + timer = !F_ISSET(session, WT_SESSION_INTERNAL); + if (timer) __wt_epoch(session, &start); /* Call the block manager to write the block. */ @@ -367,7 +368,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, bm, session, ip, addr, addr_sizep, data_checksum, checkpoint_io)); /* Update some statistics now that the write is done */ - if (!F_ISSET(session, WT_SESSION_INTERNAL)) { + if (timer) { __wt_epoch(session, &stop); WT_STAT_CONN_INCR(session, cache_write_app_count); WT_STAT_CONN_INCRV(session, cache_write_app_time, diff --git a/src/btree/bt_random.c b/src/btree/bt_random.c index 4c7ff861d26..25ede0a09ac 100644 --- a/src/btree/bt_random.c +++ b/src/btree/bt_random.c @@ -178,6 +178,8 @@ __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, bool eviction) WT_REF *current, *descent; uint32_t flags, i, entries, retry; + *refp = NULL; + btree = S2BT(session); current = NULL; retry = 100; @@ -201,16 +203,6 @@ restart: /* current = &btree->root; for (;;) { page = current->page; - /* - * When walking a tree for eviction, an exclusive operation may - * be in progress leaving the root page is not valid. Just give - * up in that case. - */ - if (page == NULL) { - WT_ASSERT(session, eviction); - break; - } - if (!WT_PAGE_IS_INTERNAL(page)) break; diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index 39f9e1159cb..64874547b9c 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -369,6 +369,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref) size_t addr_size; uint32_t previous_state; const uint8_t *addr; + bool timer; btree = S2BT(session); page = NULL; @@ -408,10 +409,11 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref) * There's an address, read or map the backing disk page and build an * in-memory version of the page. */ - if (!F_ISSET(session, WT_SESSION_INTERNAL)) + timer = !F_ISSET(session, WT_SESSION_INTERNAL); + if (timer) __wt_epoch(session, &start); WT_ERR(__wt_bt_read(session, &tmp, addr, addr_size)); - if (!F_ISSET(session, WT_SESSION_INTERNAL)) { + if (timer) { __wt_epoch(session, &stop); WT_STAT_CONN_INCR(session, cache_read_app_count); WT_STAT_CONN_INCRV(session, cache_read_app_time, @@ -590,8 +592,9 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags */ if (LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(session, WT_SESSION_NO_EVICTION) || - (F_ISSET(btree, WT_BTREE_NO_EVICTION) && - !F_ISSET(btree, WT_BTREE_NO_RECONCILE))) + btree->lsm_primary || + (btree->evict_disabled > 0 && + !F_ISSET(btree, WT_BTREE_ALLOW_SPLITS))) goto skip_evict; /* diff --git a/src/btree/bt_rebalance.c b/src/btree/bt_rebalance.c index 24b4f7bb33d..68848c7c8f5 100644 --- a/src/btree/bt_rebalance.c +++ b/src/btree/bt_rebalance.c @@ -406,12 +406,10 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_BTREE *btree; WT_DECL_RET; WT_REBALANCE_STUFF *rs, _rstuff; - bool evict_reset; WT_UNUSED(cfg); btree = S2BT(session); - evict_reset = false; /* * If the tree has never been written to disk, we're done, rebalance @@ -433,14 +431,6 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) /* Set the internal page tree type. */ rs->type = btree->root.page->type; - /* - * Get exclusive access to the file. (Not required, the only page in the - * cache is the root page, and that cannot be evicted; however, this way - * eviction ignores the tree entirely.) - */ - WT_ERR(__wt_evict_file_exclusive_on(session)); - evict_reset = true; - /* Recursively walk the tree. */ switch (rs->type) { case WT_PAGE_ROW_INT: @@ -471,10 +461,7 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) btree->root.page = rs->root; rs->root = NULL; -err: if (evict_reset) - __wt_evict_file_exclusive_off(session); - - /* Discard any leftover root page we created. */ +err: /* Discard any leftover root page we created. */ if (rs->root != NULL) { __wt_page_modify_clear(session, rs->root); __wt_page_out(session, &rs->root); diff --git a/src/btree/bt_ret.c b/src/btree/bt_ret.c index 6409a1a180c..9fc457e2297 100644 --- a/src/btree/bt_ret.c +++ b/src/btree/bt_ret.c @@ -9,64 +9,21 @@ #include "wt_internal.h" /* - * __wt_kv_return -- - * Return a page referenced key/value pair to the application. + * __key_return -- + * Change the cursor to reference an internal return key. */ -int -__wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) +static inline int +__key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) { - WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK unpack; WT_CURSOR *cursor; WT_ITEM *tmp; WT_PAGE *page; WT_ROW *rip; - uint8_t v; - - btree = S2BT(session); page = cbt->ref->page; cursor = &cbt->iface; - switch (page->type) { - case WT_PAGE_COL_FIX: - /* - * The interface cursor's record has usually been set, but that - * isn't universally true, specifically, cursor.search_near may - * call here without first setting the interface cursor. - */ - cursor->recno = cbt->recno; - - /* If the cursor references a WT_UPDATE item, return it. */ - if (upd != NULL) { - cursor->value.data = WT_UPDATE_DATA(upd); - cursor->value.size = upd->size; - return (0); - } - - /* Take the value from the original page. */ - v = __bit_getv_recno(cbt->ref, cursor->recno, btree->bitcnt); - return (__wt_buf_set(session, &cursor->value, &v, 1)); - case WT_PAGE_COL_VAR: - /* - * The interface cursor's record has usually been set, but that - * isn't universally true, specifically, cursor.search_near may - * call here without first setting the interface cursor. - */ - cursor->recno = cbt->recno; - - /* If the cursor references a WT_UPDATE item, return it. */ - if (upd != NULL) { - cursor->value.data = WT_UPDATE_DATA(upd); - cursor->value.size = upd->size; - return (0); - } - - /* Take the value from the original page cell. */ - cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]); - break; - case WT_PAGE_ROW_LEAF: + if (page->type == WT_PAGE_ROW_LEAF) { rip = &page->pg_row[cbt->slot]; /* @@ -79,7 +36,10 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) if (cbt->ins != NULL) { cursor->key.data = WT_INSERT_KEY(cbt->ins); cursor->key.size = WT_INSERT_KEY_SIZE(cbt->ins); - } else if (cbt->compare == 0) { + return (0); + } + + if (cbt->compare == 0) { /* * If not in an insert list and there's an exact match, * the row-store search function built the key we want @@ -97,16 +57,51 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) cursor->key.data = cbt->row_key->data; cursor->key.size = cbt->row_key->size; - } else - WT_RET(__wt_row_leaf_key( - session, page, rip, &cursor->key, false)); - - /* If the cursor references a WT_UPDATE item, return it. */ - if (upd != NULL) { - cursor->value.data = WT_UPDATE_DATA(upd); - cursor->value.size = upd->size; return (0); } + return (__wt_row_leaf_key( + session, page, rip, &cursor->key, false)); + } + + /* + * WT_PAGE_COL_FIX, WT_PAGE_COL_VAR: + * The interface cursor's record has usually been set, but that + * isn't universally true, specifically, cursor.search_near may call + * here without first setting the interface cursor. + */ + cursor->recno = cbt->recno; + return (0); +} + +/* + * __value_return -- + * Change the cursor to reference an internal return value. + */ +static inline int +__value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) +{ + WT_BTREE *btree; + WT_CELL *cell; + WT_CELL_UNPACK unpack; + WT_CURSOR *cursor; + WT_PAGE *page; + WT_ROW *rip; + uint8_t v; + + btree = S2BT(session); + + page = cbt->ref->page; + cursor = &cbt->iface; + + /* If the cursor references a WT_UPDATE item, return it. */ + if (upd != NULL) { + cursor->value.data = WT_UPDATE_DATA(upd); + cursor->value.size = upd->size; + return (0); + } + + if (page->type == WT_PAGE_ROW_LEAF) { + rip = &page->pg_row[cbt->slot]; /* Simple values have their location encoded in the WT_ROW. */ if (__wt_row_leaf_value(page, rip, &cursor->value)) @@ -121,13 +116,46 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) cursor->value.size = 0; return (0); } - break; - WT_ILLEGAL_VALUE(session); + __wt_cell_unpack(cell, &unpack); + return (__wt_page_cell_data_ref( + session, page, &unpack, &cursor->value)); + + } + + if (page->type == WT_PAGE_COL_VAR) { + /* Take the value from the original page cell. */ + cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]); + __wt_cell_unpack(cell, &unpack); + return (__wt_page_cell_data_ref( + session, page, &unpack, &cursor->value)); } - /* The value is an on-page cell, unpack and expand it as necessary. */ - __wt_cell_unpack(cell, &unpack); - WT_RET(__wt_page_cell_data_ref(session, page, &unpack, &cursor->value)); + /* WT_PAGE_COL_FIX: Take the value from the original page. */ + v = __bit_getv_recno(cbt->ref, cursor->recno, btree->bitcnt); + return (__wt_buf_set(session, &cursor->value, &v, 1)); +} + +/* + * __wt_key_return -- + * Change the cursor to reference an internal return key. + */ +int +__wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) +{ + WT_RET(__key_return(session, cbt)); + + return (0); +} + +/* + * __wt_kv_return -- + * Return a page referenced key/value pair to the application. + */ +int +__wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) +{ + WT_RET(__wt_key_return(session, cbt)); + WT_RET(__value_return(session, cbt, upd)); return (0); } diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c index fea979cac6e..165f932afb2 100644 --- a/src/btree/bt_slvg.c +++ b/src/btree/bt_slvg.c @@ -166,13 +166,11 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_DECL_RET; WT_STUFF *ss, stuff; uint32_t i, leaf_cnt; - bool evict_reset; WT_UNUSED(cfg); btree = S2BT(session); bm = btree->bm; - evict_reset = false; WT_CLEAR(stuff); ss = &stuff; @@ -184,13 +182,6 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_ERR(__wt_scr_alloc(session, 0, &ss->tmp2)); /* - * Salvage handles its own page eviction; get exclusive access to the - * file, have eviction ignore the tree entirely. - */ - WT_ERR(__wt_evict_file_exclusive_on(session)); - evict_reset = true; - - /* * Step 1: * Inform the underlying block manager that we're salvaging the file. */ @@ -350,9 +341,6 @@ err: WT_TRET(bm->salvage_end(bm, session)); if (ss->root_ref.page != NULL) __wt_ref_out(session, &ss->root_ref); - if (evict_reset) - __wt_evict_file_exclusive_off(session); - /* Discard the leaf and overflow page memory. */ WT_TRET(__slvg_cleanup(session, ss)); diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 6b2100ec7e3..b1bad760826 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -10,8 +10,8 @@ #define WT_MEM_TRANSFER(from_decr, to_incr, len) do { \ size_t __len = (len); \ - from_decr += __len; \ - to_incr += __len; \ + (from_decr) += __len; \ + (to_incr) += __len; \ } while (0) /* @@ -119,7 +119,7 @@ __wt_split_stash_discard(WT_SESSION_IMPL *session) ++i, ++stash) { if (stash->p == NULL) continue; - else if (stash->split_gen >= oldest) + if (stash->split_gen >= oldest) break; /* * It's a bad thing if another thread is in this memory after diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index 7bf15baa67f..ead6ccc4ac0 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -78,6 +78,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages; uint64_t oldest_id, saved_pinned_id; uint32_t flags; + bool timer; conn = S2C(session); btree = S2BT(session); @@ -88,7 +89,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) internal_bytes = leaf_bytes = 0; internal_pages = leaf_pages = 0; - if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) + timer = WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT); + if (timer) __wt_epoch(session, &start); switch (syncop) { @@ -186,9 +188,9 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) * to grow significantly larger than the configured maximum * size. */ - F_SET(btree, WT_BTREE_NO_RECONCILE); + F_SET(btree, WT_BTREE_ALLOW_SPLITS); ret = __wt_evict_file_exclusive_on(session); - F_CLR(btree, WT_BTREE_NO_RECONCILE); + F_CLR(btree, WT_BTREE_ALLOW_SPLITS); WT_ERR(ret); __wt_evict_file_exclusive_off(session); @@ -242,7 +244,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) break; } - if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) { + if (timer) { __wt_epoch(session, &end); __wt_verbose(session, WT_VERB_CHECKPOINT, "__sync_file WT_SYNC_%s wrote: %" PRIu64 diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c index 05990918215..7475811adc5 100644 --- a/src/btree/bt_vrfy.c +++ b/src/btree/bt_vrfy.c @@ -216,13 +216,11 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) ckpt->raw.data, ckpt->raw.size, root_addr, &root_addr_size, true)); - /* - * Ignore trees with no root page. - * Verify, then discard the checkpoint from the cache. - */ - if (root_addr_size != 0 && - (ret = __wt_btree_tree_open( - session, root_addr, root_addr_size)) == 0) { + /* Skip trees with no root page. */ + if (root_addr_size != 0) { + WT_ERR(__wt_btree_tree_open( + session, root_addr, root_addr_size)); + if (WT_VRFY_DUMP(vs)) WT_ERR(__wt_msg(session, "Root: %s %s", __wt_addr_string(session, @@ -230,14 +228,38 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) __wt_page_type_string( btree->root.page->type))); + __wt_evict_file_exclusive_off(session); + + /* Verify the tree. */ WT_WITH_PAGE_INDEX(session, ret = __verify_tree(session, &btree->root, vs)); + /* + * We have an exclusive lock on the handle, but we're + * swapping root pages in-and-out of that handle, and + * there's a race with eviction entering the tree and + * seeing an invalid root page. Eviction must work on + * trees being verified (else we'd have to do our own + * eviction), lock eviction out whenever we're loading + * a new root page. This loops works because we are + * called with eviction locked out, so we release the + * lock at the top of the loop and re-acquire it here. + */ + WT_TRET(__wt_evict_file_exclusive_on(session)); WT_TRET(__wt_cache_op(session, WT_SYNC_DISCARD)); } /* Unload the checkpoint. */ WT_TRET(bm->checkpoint_unload(bm, session)); + + /* + * We've finished one checkpoint's verification (verification, + * then cache eviction and checkpoint unload): if any errors + * occurred, quit. Done this way because otherwise we'd need + * at least two more state variables on error, one to know if + * we need to discard the tree from the cache and one to know + * if we need to unload the checkpoint. + */ WT_ERR(ret); /* Display the tree shape. */ @@ -252,7 +274,7 @@ err: /* Inform the underlying block manager we're done. */ /* Discard the list of checkpoints. */ if (ckptbase != NULL) - __wt_meta_ckptlist_free(session, ckptbase); + __wt_meta_ckptlist_free(session, &ckptbase); /* Free allocated memory. */ __wt_scr_free(session, &vs->max_key); diff --git a/src/btree/bt_vrfy_dsk.c b/src/btree/bt_vrfy_dsk.c index 3a6fd8261ba..a4071c44aee 100644 --- a/src/btree/bt_vrfy_dsk.c +++ b/src/btree/bt_vrfy_dsk.c @@ -203,7 +203,8 @@ __verify_dsk_row( WT_ITEM *last; enum { FIRST, WAS_KEY, WAS_VALUE } last_cell_type; void *huffman; - uint32_t cell_num, cell_type, i, key_cnt, prefix; + size_t prefix; + uint32_t cell_num, cell_type, i, key_cnt; uint8_t *end; int cmp; @@ -343,8 +344,9 @@ __verify_dsk_row( if (cell_num > 1 && prefix > last->size) WT_ERR_VRFY(session, "key %" PRIu32 " on page at %s has a prefix " - "compression count of %" PRIu32 ", larger than " - "the length of the previous key, %" WT_SIZET_FMT, + "compression count of %" WT_SIZET_FMT + ", larger than the length of the previous key, %" + WT_SIZET_FMT, cell_num, tag, prefix, last->size); /* diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c index ddaa2e5f70b..86484feb7c9 100644 --- a/src/btree/bt_walk.c +++ b/src/btree/bt_walk.c @@ -348,16 +348,19 @@ __tree_walk_internal(WT_SESSION_IMPL *session, /* If no page is active, begin a walk from the start/end of the tree. */ if (ref == NULL) { restart: /* - * We can reach here with a NULL or root reference; the release + * We can be here with a NULL or root WT_REF; the page release * function handles them internally, don't complicate this code * by calling them out. */ WT_ERR(__wt_page_release(session, couple, flags)); - couple = couple_orig = ref = &btree->root; - if (ref->page == NULL) - goto done; + /* + * We're not supposed to walk trees without root pages. As this + * has not always been the case, assert to debug that change. + */ + WT_ASSERT(session, btree->root.page != NULL); + couple = couple_orig = ref = &btree->root; initial_descent = true; goto descend; } diff --git a/src/config/config_api.c b/src/config/config_api.c index 05c5c1287a7..9f70ba65e9b 100644 --- a/src/config/config_api.c +++ b/src/config/config_api.c @@ -215,7 +215,7 @@ __wt_configure_method(WT_SESSION_IMPL *session, WT_CONFIG_ENTRY *entry; WT_CONNECTION_IMPL *conn; WT_DECL_RET; - size_t cnt; + size_t cnt, len; char *newcheck_name, *p; /* @@ -276,11 +276,9 @@ __wt_configure_method(WT_SESSION_IMPL *session, */ WT_ERR(__wt_calloc_one(session, &entry)); entry->method = (*epp)->method; - WT_ERR(__wt_calloc_def(session, - strlen((*epp)->base) + strlen(",") + strlen(config) + 1, &p)); - (void)strcpy(p, (*epp)->base); - (void)strcat(p, ","); - (void)strcat(p, config); + len = strlen((*epp)->base) + strlen(",") + strlen(config) + 1; + WT_ERR(__wt_calloc_def(session, len, &p)); + snprintf(p, len, "%s,%s", (*epp)->base, config); entry->base = p; /* diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 99213c5b557..c5480897494 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -12,16 +12,20 @@ * __conn_dhandle_destroy -- * Destroy a data handle. */ -static void +static int __conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle) { + WT_DECL_RET; + + WT_WITH_DHANDLE(session, dhandle, ret = __wt_btree_discard(session)); + __wt_rwlock_destroy(session, &dhandle->rwlock); __wt_free(session, dhandle->name); __wt_free(session, dhandle->checkpoint); - __wt_free(session, dhandle->handle); __wt_spin_destroy(session, &dhandle->close_lock); __wt_stat_dsrc_discard(session, dhandle); __wt_overwrite_and_free(session, dhandle); + return (ret); } /* @@ -84,7 +88,7 @@ __wt_conn_dhandle_alloc( session->dhandle = dhandle; return (0); -err: __conn_dhandle_destroy(session, dhandle); +err: WT_TRET(__conn_dhandle_destroy(session, dhandle)); return (ret); } @@ -156,11 +160,11 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) WT_RET(__wt_evict_file_exclusive_on(session)); /* - * If we don't already have the schema lock, make it an error to try - * to acquire it. The problem is that we are holding an exclusive - * lock on the handle, and if we attempt to acquire the schema lock - * we might deadlock with a thread that has the schema lock and wants - * a handle lock (specifically, checkpoint). + * If we don't already have the schema lock, make it an error to try to + * acquire it. The problem is that we are holding an exclusive lock on + * the handle, and if we attempt to acquire the schema lock we might + * deadlock with a thread that has the schema lock and wants a handle + * lock. */ no_schema_lock = false; if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) { @@ -200,6 +204,7 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) } WT_TRET(__wt_btree_close(session)); + F_CLR(btree, WT_BTREE_SPECIAL_FLAGS); /* * If we marked a handle dead it will be closed by sweep, via @@ -403,10 +408,7 @@ __conn_btree_apply_internal(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle, return (ret == EBUSY ? 0 : ret); WT_SAVE_DHANDLE(session, ret = file_func(session, cfg)); - if (WT_META_TRACKING(session)) - WT_TRET(__wt_meta_track_handle_lock(session, false)); - else - WT_TRET(__wt_session_release_btree(session)); + WT_TRET(__wt_session_release_btree(session)); return (ret); } @@ -500,7 +502,12 @@ __wt_conn_dhandle_close_all( session->dhandle = dhandle; - /* Lock the handle exclusively. */ + /* + * Lock the handle exclusively. If this is part of + * schema-changing operation (indicated by metadata tracking + * being enabled), hold the lock for the duration of the + * operation. + */ WT_ERR(__wt_session_get_btree(session, dhandle->name, dhandle->checkpoint, NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY)); @@ -611,7 +618,7 @@ __wt_conn_dhandle_discard_single( */ if (ret == 0 || final) { __conn_btree_config_clear(session); - __conn_dhandle_destroy(session, dhandle); + WT_TRET(__conn_dhandle_destroy(session, dhandle)); session->dhandle = NULL; } diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index 8c186c63939..22d90b08438 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -10,7 +10,7 @@ #define WT_DHANDLE_CAN_DISCARD(dhandle) \ (!F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_OPEN) && \ - dhandle->session_inuse == 0 && dhandle->session_ref == 0) + (dhandle)->session_inuse == 0 && (dhandle)->session_ref == 0) /* * __sweep_mark -- diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index 0ec917fbf95..274dc1e8f62 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -325,24 +325,21 @@ __curfile_remove(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_REMOVE_API_CALL(cursor, session, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); + WT_CURSOR_CHECKKEY(cursor); WT_CURSOR_NOVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, __wt_btcur_remove(cbt), ret); + WT_ERR(__wt_btcur_remove(cbt)); /* - * After a successful remove, copy the key: the value is not available. + * Remove with a search-key is fire-and-forget, no position and no key. + * Remove starting from a position maintains the position and a key. + * We don't know which it was at this layer, so can only assert the key + * is not set at all, or internal. There's never a value. */ - if (ret == 0) { - if (F_ISSET(cursor, WT_CURSTD_KEY_INT) && - !WT_DATA_IN_ITEM(&(cursor)->key)) { - WT_ERR(__wt_buf_set(session, &cursor->key, - cursor->key.data, cursor->key.size)); - F_CLR(cursor, WT_CURSTD_KEY_INT); - F_SET(cursor, WT_CURSTD_KEY_EXT); - } - F_CLR(cursor, WT_CURSTD_VALUE_SET); - } + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == 0 || + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); + WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0); err: CURSOR_UPDATE_API_END(session, ret); return (ret); diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 013a64ef2d5..8df8e201173 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -270,7 +270,7 @@ again: iter->positioned = true; return (ret); } - else if (ret == WT_NOTFOUND) { + if (ret == WT_NOTFOUND) { WT_RET(__curjoin_iter_close_all(iter->child)); entry->subjoin->iter = NULL; iter->child = NULL; @@ -518,8 +518,7 @@ __curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, } if (disjunction && end == endmax) return (WT_NOTFOUND); - else - return (0); + return (0); } typedef struct { diff --git a/src/cursor/cur_json.c b/src/cursor/cur_json.c index 5870d14273e..0ad3c4f4201 100644 --- a/src/cursor/cur_json.c +++ b/src/cursor/cur_json.c @@ -23,20 +23,20 @@ static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *, bool, const char *, size_t *); #define WT_PACK_JSON_GET(session, pv, jstr) do { \ - switch (pv.type) { \ + switch ((pv).type) { \ case 'x': \ break; \ case 's': \ case 'S': \ - WT_RET(json_string_arg(session, &jstr, &pv.u.item)); \ - pv.type = pv.type == 's' ? 'j' : 'J'; \ + WT_RET(json_string_arg(session, &(jstr), &(pv).u.item));\ + (pv).type = (pv).type == 's' ? 'j' : 'J'; \ break; \ case 'b': \ case 'h': \ case 'i': \ case 'l': \ case 'q': \ - WT_RET(json_int_arg(session, &jstr, &pv.u.i)); \ + WT_RET(json_int_arg(session, &(jstr), &(pv).u.i)); \ break; \ case 'B': \ case 'H': \ @@ -46,11 +46,11 @@ static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *, case 'r': \ case 'R': \ case 't': \ - WT_RET(json_uint_arg(session, &jstr, &pv.u.u)); \ + WT_RET(json_uint_arg(session, &(jstr), &(pv).u.u)); \ break; \ case 'u': \ - WT_RET(json_string_arg(session, &jstr, &pv.u.item)); \ - pv.type = 'K'; \ + WT_RET(json_string_arg(session, &(jstr), &(pv).u.item));\ + (pv).type = 'K'; \ break; \ /* User format strings have already been validated. */ \ WT_ILLEGAL_VALUE(session); \ @@ -304,7 +304,6 @@ __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor) __wt_free(session, json->value_buf); __wt_free(session, json); } - return; } /* @@ -323,33 +322,32 @@ __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode) if (bufsz >= 1) *buf = ch; return (1); - } else { - abbrev = '\0'; - switch (ch) { - case '\\': - case '"': - abbrev = ch; - break; - case '\f': - abbrev = 'f'; - break; - case '\n': - abbrev = 'n'; - break; - case '\r': - abbrev = 'r'; - break; - case '\t': - abbrev = 't'; - break; - } - if (abbrev != '\0') { - if (bufsz >= 2) { - *buf++ = '\\'; - *buf = abbrev; - } - return (2); + } + abbrev = '\0'; + switch (ch) { + case '\\': + case '"': + abbrev = ch; + break; + case '\f': + abbrev = 'f'; + break; + case '\n': + abbrev = 'n'; + break; + case '\r': + abbrev = 'r'; + break; + case '\t': + abbrev = 't'; + break; + } + if (abbrev != '\0') { + if (bufsz >= 2) { + *buf++ = '\\'; + *buf = abbrev; } + return (2); } } if (bufsz >= 6) { @@ -421,16 +419,16 @@ __wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat, #define MATCH_KEYWORD(session, in, result, keyword, matchval) do { \ size_t _kwlen = strlen(keyword); \ if (strncmp(in, keyword, _kwlen) == 0 && \ - !__wt_isalnum((u_char)in[_kwlen])) { \ - in += _kwlen; \ - result = matchval; \ + !__wt_isalnum((u_char)(in)[_kwlen])) { \ + (in) += _kwlen; \ + (result) = matchval; \ } else { \ - const char *_bad = in; \ - while (__wt_isalnum((u_char)*in)) \ - in++; \ + const char *_bad = (in); \ + while (__wt_isalnum((u_char)*(in))) \ + (in)++; \ WT_RET_MSG(session, EINVAL, \ "unknown keyword \"%.*s\" in JSON", \ - (int)(in - _bad), _bad); \ + (int)((in) - _bad), _bad); \ } \ } while (0) @@ -692,12 +690,13 @@ json_uint_arg(WT_SESSION_IMPL *session, const char **jstr, uint64_t *up) #define JSON_EXPECT_TOKEN_GET(session, jstr, tokval, start, sz) do { \ int __tok; \ - WT_RET(__wt_json_token((WT_SESSION *)session, jstr, &__tok, &start, &sz));\ - if (__tok != tokval) \ + WT_RET(__wt_json_token( \ + (WT_SESSION *)(session), jstr, &__tok, &(start), &(sz))); \ + if (__tok != (tokval)) \ WT_RET_MSG(session, EINVAL, \ "expected JSON %s, got %s", \ __wt_json_tokname(tokval), __wt_json_tokname(__tok)); \ - jstr = start + sz; \ + (jstr) = (start) + (sz); \ } while (0) #define JSON_EXPECT_TOKEN(session, jstr, tokval) do { \ diff --git a/src/cursor/cur_metadata.c b/src/cursor/cur_metadata.c index 10e2fdf28be..fbfc73956e2 100644 --- a/src/cursor/cur_metadata.c +++ b/src/cursor/cur_metadata.c @@ -16,7 +16,7 @@ WT_CURSOR_NEEDKEY(cursor); \ WT_ERR(__wt_buf_set(session, \ &((WT_CURSOR_METADATA *)(cursor))->file_cursor->key, \ - cursor->key.data, cursor->key.size)); \ + (cursor)->key.data, (cursor)->key.size)); \ F_SET(((WT_CURSOR_METADATA *)(cursor))->file_cursor, \ WT_CURSTD_KEY_EXT); \ } while (0) @@ -25,7 +25,7 @@ WT_CURSOR_NEEDVALUE(cursor); \ WT_ERR(__wt_buf_set(session, \ &((WT_CURSOR_METADATA *)(cursor))->file_cursor->value, \ - cursor->value.data, cursor->value.size)); \ + (cursor)->value.data, (cursor)->value.size)); \ F_SET(((WT_CURSOR_METADATA *)(cursor))->file_cursor, \ WT_CURSTD_VALUE_EXT); \ } while (0) diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c index 5fde64c74ca..c5ccdb1b649 100644 --- a/src/cursor/cur_stat.c +++ b/src/cursor/cur_stat.c @@ -163,7 +163,6 @@ static void __curstat_set_value(WT_CURSOR *cursor, ...) { WT_UNUSED(cursor); - return; } /* diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c index 7e8cd153d2d..3b72bb0730f 100644 --- a/src/cursor/cur_table.c +++ b/src/cursor/cur_table.c @@ -14,8 +14,8 @@ static int __curtable_update(WT_CURSOR *cursor); #define APPLY_CG(ctable, f) do { \ WT_CURSOR **__cp; \ u_int __i; \ - for (__i = 0, __cp = ctable->cg_cursors; \ - __i < WT_COLGROUPS(ctable->table); \ + for (__i = 0, __cp = (ctable)->cg_cursors; \ + __i < WT_COLGROUPS((ctable)->table); \ __i++, __cp++) \ WT_TRET((*__cp)->f(*__cp)); \ } while (0) @@ -511,9 +511,16 @@ __curtable_insert(WT_CURSOR *cursor) */ F_SET(primary, flag_orig | WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT); - if (ret == WT_DUPLICATE_KEY && F_ISSET(cursor, WT_CURSTD_OVERWRITE)) + if (ret == WT_DUPLICATE_KEY && F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { WT_ERR(__curtable_update(cursor)); - else { + + /* + * The cursor is no longer positioned. This isn't just cosmetic, + * without a reset, iteration on this cursor won't start at the + * beginning/end of the table. + */ + APPLY_CG(ctable, reset); + } else { WT_ERR(ret); for (i = 1; i < WT_COLGROUPS(ctable->table); i++, cp++) { @@ -601,22 +608,53 @@ err: CURSOR_UPDATE_API_END(session, ret); static int __curtable_remove(WT_CURSOR *cursor) { + WT_CURSOR *primary; WT_CURSOR_TABLE *ctable; WT_DECL_RET; WT_SESSION_IMPL *session; + bool positioned; ctable = (WT_CURSOR_TABLE *)cursor; JOINABLE_CURSOR_REMOVE_API_CALL(cursor, session, NULL); WT_ERR(__curtable_open_indices(ctable)); + /* Check if the cursor was positioned. */ + primary = *ctable->cg_cursors; + positioned = F_ISSET(primary, WT_CURSTD_KEY_INT); + /* Find the old record so it can be removed from indices */ if (ctable->table->nindices > 0) { APPLY_CG(ctable, search); + if (ret == WT_NOTFOUND) + goto notfound; WT_ERR(ret); WT_ERR(__apply_idx(ctable, offsetof(WT_CURSOR, remove), false)); } APPLY_CG(ctable, remove); + if (ret == WT_NOTFOUND) + goto notfound; + WT_ERR(ret); + +notfound: + /* + * If the cursor is configured to overwrite and the record is not found, + * that is exactly what we want. + */ + if (ret == WT_NOTFOUND && F_ISSET(primary, WT_CURSTD_OVERWRITE)) + ret = 0; + + /* + * If the cursor was positioned, it stays positioned with a key but no + * no value, otherwise, there's no position, key or value. This isn't + * just cosmetic, without a reset, iteration on this cursor won't start + * at the beginning/end of the table. + */ + F_CLR(primary, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (positioned) + F_SET(primary, WT_CURSTD_KEY_INT); + else + APPLY_CG(ctable, reset); err: CURSOR_UPDATE_API_END(session, ret); return (ret); @@ -989,11 +1027,15 @@ __wt_curtable_open(WT_SESSION_IMPL *session, if (0) { err: if (*cursorp != NULL) { - if (*cursorp != cursor) - WT_TRET(__wt_cursor_close(*cursorp)); + /* + * When a dump cursor is opened, then *cursorp, not + * cursor, is the dump cursor. Close the dump cursor, + * and the table cursor will be closed as its child. + */ + cursor = *cursorp; *cursorp = NULL; } - WT_TRET(__curtable_close(cursor)); + WT_TRET(cursor->close(cursor)); } __wt_scr_free(session, &tmp); diff --git a/src/docs/cursor-ops.dox b/src/docs/cursor-ops.dox index b743d81db57..e479ff29191 100644 --- a/src/docs/cursor-ops.dox +++ b/src/docs/cursor-ops.dox @@ -145,9 +145,5 @@ that may not be modified or freed by the application. If a longer scope is required, the application must make a copy of the memory before the cursor is re-used, closed or reset. -The comments in this example code explain when the application can safely -modify memory passed to WT_CURSOR::set_key or WT_CURSOR::set_value: - -@snippet ex_scope.c cursor scope operation @m_endif */ diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index f463e6bc615..e5fce3d0d5d 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -2,22 +2,45 @@ @section version_292 Upgrading to Version 2.9.2 <dl> -<dt>WiredTiger Utility now supports truncate</dt> + +<dt>WiredTiger utility now supports truncate</dt> <dd> -The WiredTiger Utility can now \c truncate an object. Removing all contents -from the specified object. +The WiredTiger utility \c wt can now \c truncate objects, removing all +contents from the specified object. </dd> + <dt>Handle list lock statistics</dt> <dd> In the 2.9.1 release we added statistics tracking handle list lock timing, we have switched that lock from a spin lock to a read-write lock, and consequently changed the statistics tracking lock related wait time. </dd> -</dl> -@section version_291 Upgrading to Version 2.9.1 +<dt>Forced and named checkpoint error conditions changed</dt> +<dd> +There are new cases where checkpoints created with an explicit name or the +"force" configuration option can return an EBUSY error. This can happen if +the checkpoint overlaps with other schema operations, for example table create. +</dd> + +<dt>WT_CURSOR::remove may not return a positioned cursor</dt> +<dd> +The WT_CURSOR::remove method was previously documented to always return a +positioned cursor on success, which is not possible when \c overwrite=true +and the record does not exist. +The documentation has been updated, and the method has been changed to +never return a cursor position unless called with an existing cursor +position. In other words, if the cursor is positioned and the +WT_CURSOR::remove is called, the cursor will remain positioned; if the +cursor is not positioned and the WT_CURSOR::remove method is called, the +cursor will not be positioned on return. +</dd> + +</dl><hr> +@section version_291 Upgrading to Version 2.9.1 <dl> + <dt>Changes to hazard pointer configuration</dt> <dd> The \c hazard_max parameter to ::wiredtiger_open is now ignored. Memory is @@ -33,10 +56,11 @@ have added a new \c access_pattern_hint configuration option available for WT_SESSION::create that can be used to restore the old default by setting the value to "random". </dd> -</dl> +</dl><hr> @section version_290 Upgrading to Version 2.9.0 <dl> + <dt>Changes to cursor behavior after WT_CURSOR::insert</dt> <dd> After a successful call to WT_CURSOR::insert, unless a cursor has record diff --git a/src/docs/wtperf.dox b/src/docs/wtperf.dox index 2eac0fef3f4..6bdcf5f4f8d 100644 --- a/src/docs/wtperf.dox +++ b/src/docs/wtperf.dox @@ -167,6 +167,8 @@ do population phase; false to use existing database number of WiredTiger databases to use. Each database will execute the workload using a separate home directory and complete set of worker threads @par drop_tables (boolean, default=false) Whether to drop all tables at the end of the run, and report time taken to do the drop. +@par in_memory (boolean, default=false) +Whether to create the database in-memory. @par icount (unsigned int, default=5000) number of records to initially populate. If multiple tables are configured the count is spread evenly across all tables. @par idle_table_cycle (unsigned int, default=0) diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c index 17b038fb003..3d8f4a61ca7 100644 --- a/src/evict/evict_file.c +++ b/src/evict/evict_file.c @@ -15,15 +15,27 @@ int __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) { + WT_BTREE *btree; WT_DECL_RET; WT_PAGE *page; WT_REF *next_ref, *ref; + btree = S2BT(session); + /* - * We need exclusive access to the file -- disable ordinary eviction - * and drain any blocks already queued. + * We need exclusive access to the file, we're about to discard the root + * page. Assert eviction has been locked out. */ - WT_RET(__wt_evict_file_exclusive_on(session)); + WT_ASSERT(session, + btree->evict_disabled > 0 || + !F_ISSET(session->dhandle, WT_DHANDLE_OPEN)); + + /* + * We do discard objects without pages in memory. If that's the case, + * we're done. + */ + if (btree->root.page == NULL) + return (0); /* Make sure the oldest transaction ID is up-to-date. */ WT_RET(__wt_txn_update_oldest( @@ -102,7 +114,5 @@ err: /* On error, clear any left-over tree walk. */ session, next_ref, WT_READ_NO_EVICT)); } - __wt_evict_file_exclusive_off(session); - return (ret); } diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index f1949a7c320..84c9990832d 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -824,31 +824,19 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) btree = S2BT(session); cache = S2C(session)->cache; - /* - * Hold the walk lock to set the no-eviction flag. - * - * The no-eviction flag can be set permanently, in which case we never - * increment the no-eviction count. - */ + /* Hold the walk lock to turn off eviction. */ __wt_spin_lock(session, &cache->evict_walk_lock); - if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) { - if (btree->evict_disabled != 0) - ++btree->evict_disabled; + if (++btree->evict_disabled > 1) { __wt_spin_unlock(session, &cache->evict_walk_lock); return (0); } - ++btree->evict_disabled; /* * Ensure no new pages from the file will be queued for eviction after - * this point. + * this point, then clear any existing LRU eviction walk for the file. */ - F_SET(btree, WT_BTREE_NO_EVICTION); (void)__wt_atomic_addv32(&cache->pass_intr, 1); - - /* Clear any existing LRU eviction walk for the file. */ - WT_WITH_PASS_LOCK(session, - ret = __evict_clear_walk(session)); + WT_WITH_PASS_LOCK(session, ret = __evict_clear_walk(session)); (void)__wt_atomic_subv32(&cache->pass_intr, 1); WT_ERR(ret); @@ -879,7 +867,6 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) if (0) { err: --btree->evict_disabled; - F_CLR(btree, WT_BTREE_NO_EVICTION); } __wt_spin_unlock(session, &cache->evict_walk_lock); return (ret); @@ -904,16 +891,11 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) */ WT_DIAGNOSTIC_YIELD; - WT_ASSERT(session, - btree->evict_ref == NULL && F_ISSET(btree, WT_BTREE_NO_EVICTION)); - - /* - * The no-eviction flag can be set permanently, in which case we never - * increment the no-eviction count. - */ + /* Hold the walk lock to turn on eviction. */ __wt_spin_lock(session, &cache->evict_walk_lock); - if (btree->evict_disabled > 0 && --btree->evict_disabled == 0) - F_CLR(btree, WT_BTREE_NO_EVICTION); + WT_ASSERT(session, + btree->evict_ref == NULL && btree->evict_disabled > 0); + --btree->evict_disabled; __wt_spin_unlock(session, &cache->evict_walk_lock); } @@ -921,7 +903,7 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) #define EVICT_TUNE_DATAPT_MIN 3 /* Data points needed before deciding if we should keep adding workers or settle on an earlier value. */ -#define EVICT_TUNE_PERIOD 2 /* Tune period in seconds */ +#define EVICT_TUNE_PERIOD 1 /* Tune period in seconds */ /* * __evict_tune_workers -- @@ -952,7 +934,6 @@ __evict_tune_workers(WT_SESSION_IMPL *session) cache = conn->cache; WT_ASSERT(session, conn->evict_threads.threads[0]->session == session); - pgs_evicted_persec_cur = 0; if (conn->evict_tune_stable) return (0); @@ -984,7 +965,8 @@ __evict_tune_workers(WT_SESSION_IMPL *session) pgs_evicted_persec_cur = (delta_pages * WT_THOUSAND) / delta_msec; conn->evict_tune_num_points++; - /* Keep track of the maximum eviction throughput seen and the number + /* + * Keep track of the maximum eviction throughput seen and the number * of workers corresponding to that throughput. */ if (pgs_evicted_persec_cur > conn->evict_tune_pg_sec_max) { @@ -1372,7 +1354,7 @@ retry: while (slot < max_entries) { /* Skip files that don't allow eviction. */ btree = dhandle->handle; - if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) + if (btree->evict_disabled > 0) continue; /* @@ -1428,13 +1410,23 @@ retry: while (slot < max_entries) { * the tree's current eviction point, and part of the process is * waiting on this thread to acknowledge that action. */ - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION) && + if (btree->evict_disabled == 0 && !__wt_spin_trylock(session, &cache->evict_walk_lock)) { - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { + if (btree->evict_disabled == 0) { + /* + * Assert the handle has a root page: eviction + * should have been locked out if the tree is + * being discarded or the root page is changing. + * As this has not always been the case, assert + * to debug that change. + */ + WT_ASSERT(session, btree->root.page != NULL); + cache->evict_file_next = dhandle; - WT_WITH_DHANDLE(session, dhandle, ret = - __evict_walk_file(session, queue, - max_entries, &slot)); + WT_WITH_DHANDLE(session, dhandle, + ret = __evict_walk_file( + session, queue, max_entries, &slot)); + WT_ASSERT(session, session->split_gen == 0); } __wt_spin_unlock(session, &cache->evict_walk_lock); @@ -1663,7 +1655,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, * eviction fairly visits all pages in trees with a lot of in-cache * content. */ - switch (btree->evict_walk_state) { + switch ((WT_EVICT_WALK_START)btree->evict_start_type) { case WT_EVICT_WALK_NEXT: break; case WT_EVICT_WALK_PREV: @@ -1720,9 +1712,9 @@ __evict_walk_file(WT_SESSION_IMPL *session, * Try a different walk start point next time if a * walk gave up. */ - btree->evict_walk_state = - (btree->evict_walk_state + 1) % - WT_EVICT_WALK_MAX_LEGAL_VALUE; + btree->evict_start_type = + (btree->evict_start_type + 1) % + WT_EVICT_WALK_START_NUM; break; } @@ -2124,6 +2116,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *txn_state; uint64_t init_evict_count, max_pages_evicted; + bool timer; conn = S2C(session); cache = conn->cache; @@ -2144,7 +2137,9 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) __wt_evict_server_wake(session); /* Track how long application threads spend doing eviction. */ - if (WT_STAT_ENABLED(session) && !F_ISSET(session, WT_SESSION_INTERNAL)) + timer = + WT_STAT_ENABLED(session) && !F_ISSET(session, WT_SESSION_INTERNAL); + if (timer) __wt_epoch(session, &enter); for (init_evict_count = cache->pages_evict;; ret = 0) { @@ -2210,8 +2205,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) } } -err: if (WT_STAT_ENABLED(session) && - !F_ISSET(session, WT_SESSION_INTERNAL)) { +err: if (timer) { __wt_epoch(session, &leave); WT_STAT_CONN_INCRV(session, application_cache_time, WT_TIMEDIFF_US(leave, enter)); @@ -2239,7 +2233,7 @@ __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) page = ref->page; if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) || - F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION)) + S2BT(session)->evict_disabled > 0) return (false); /* Append to the urgent queue if we can. */ @@ -2249,7 +2243,7 @@ __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) __wt_spin_lock(session, &cache->evict_queue_lock); if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) || - F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION)) + S2BT(session)->evict_disabled > 0) goto done; __wt_spin_lock(session, &urgent_queue->evict_lock); diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index 5b17a78a4dd..85689efd0b1 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -480,8 +480,8 @@ __evict_review( if (LF_ISSET(WT_EVICT_INMEM_SPLIT)) return (__wt_split_insert(session, ref)); - /* We are done if reconciliation is disabled. */ - if (F_ISSET(S2BT(session), WT_BTREE_NO_RECONCILE)) + /* If splits are the only permitted operation, we're done. */ + if (F_ISSET(S2BT(session), WT_BTREE_ALLOW_SPLITS)) return (EBUSY); } diff --git a/src/include/api.h b/src/include/api.h index 2783d17f825..a3636eb8040 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -7,22 +7,21 @@ */ /* Standard entry points to the API: declares/initializes local variables. */ -#define API_SESSION_INIT(s, h, n, cur, dh) \ +#define API_SESSION_INIT(s, h, n, dh) \ WT_DATA_HANDLE *__olddh = (s)->dhandle; \ const char *__oldname = (s)->name; \ - (s)->cursor = (cur); \ (s)->dhandle = (dh); \ (s)->name = (s)->lastop = #h "." #n; \ -#define API_CALL_NOCONF(s, h, n, cur, dh) do { \ - API_SESSION_INIT(s, h, n, cur, dh); \ +#define API_CALL_NOCONF(s, h, n, dh) do { \ + API_SESSION_INIT(s, h, n, dh); \ WT_ERR(WT_SESSION_CHECK_PANIC(s)); \ __wt_verbose((s), WT_VERB_API, "CALL: " #h ":" #n) -#define API_CALL(s, h, n, cur, dh, config, cfg) do { \ - const char *cfg[] = \ +#define API_CALL(s, h, n, dh, config, cfg) do { \ + const char *(cfg)[] = \ { WT_CONFIG_BASE(s, h##_##n), config, NULL }; \ - API_SESSION_INIT(s, h, n, cur, dh); \ + API_SESSION_INIT(s, h, n, dh); \ WT_ERR(WT_SESSION_CHECK_PANIC(s)); \ if ((config) != NULL) \ WT_ERR(__wt_config_check((s), \ @@ -42,17 +41,17 @@ } while (0) /* An API call wrapped in a transaction if necessary. */ -#define TXN_API_CALL(s, h, n, cur, bt, config, cfg) do { \ +#define TXN_API_CALL(s, h, n, bt, config, cfg) do { \ bool __autotxn = false; \ - API_CALL(s, h, n, bt, cur, config, cfg); \ + API_CALL(s, h, n, bt, config, cfg); \ __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\ if (__autotxn) \ F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT) /* An API call wrapped in a transaction if necessary. */ -#define TXN_API_CALL_NOCONF(s, h, n, cur, bt) do { \ +#define TXN_API_CALL_NOCONF(s, h, n, bt) do { \ bool __autotxn = false; \ - API_CALL_NOCONF(s, h, n, cur, bt); \ + API_CALL_NOCONF(s, h, n, bt); \ __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\ if (__autotxn) \ F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT) @@ -63,15 +62,16 @@ if (__autotxn) { \ if (F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT)) \ F_CLR(&(s)->txn, WT_TXN_AUTOCOMMIT); \ - else if (ret == 0 && !F_ISSET(&(s)->txn, WT_TXN_ERROR)) \ - ret = __wt_txn_commit((s), NULL); \ + else if ((ret) == 0 && \ + !F_ISSET(&(s)->txn, WT_TXN_ERROR)) \ + (ret) = __wt_txn_commit((s), NULL); \ else { \ if (retry) \ WT_TRET(__wt_session_copy_values(s)); \ WT_TRET(__wt_txn_rollback((s), NULL)); \ - if ((ret == 0 || ret == WT_ROLLBACK) && \ + if (((ret) == 0 || (ret) == WT_ROLLBACK) && \ (retry)) { \ - ret = 0; \ + (ret) = 0; \ continue; \ } \ WT_TRET(__wt_session_reset_cursors(s, false)); \ @@ -98,24 +98,24 @@ #define CONNECTION_API_CALL(conn, s, n, config, cfg) \ s = (conn)->default_session; \ - API_CALL(s, WT_CONNECTION, n, NULL, NULL, config, cfg) + API_CALL(s, WT_CONNECTION, n, NULL, config, cfg) #define CONNECTION_API_CALL_NOCONF(conn, s, n) \ s = (conn)->default_session; \ - API_CALL_NOCONF(s, WT_CONNECTION, n, NULL, NULL) + API_CALL_NOCONF(s, WT_CONNECTION, n, NULL) #define SESSION_API_CALL(s, n, config, cfg) \ - API_CALL(s, WT_SESSION, n, NULL, NULL, config, cfg) + API_CALL(s, WT_SESSION, n, NULL, config, cfg) #define SESSION_API_CALL_NOCONF(s, n) \ - API_CALL_NOCONF(s, WT_SESSION, n, NULL, NULL) + API_CALL_NOCONF(s, WT_SESSION, n, NULL) #define SESSION_TXN_API_CALL(s, n, config, cfg) \ - TXN_API_CALL(s, WT_SESSION, n, NULL, NULL, config, cfg) + TXN_API_CALL(s, WT_SESSION, n, NULL, config, cfg) #define CURSOR_API_CALL(cur, s, n, bt) \ (s) = (WT_SESSION_IMPL *)(cur)->session; \ - API_CALL_NOCONF(s, WT_CURSOR, n, cur, \ + API_CALL_NOCONF(s, WT_CURSOR, n, \ ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle) #define JOINABLE_CURSOR_CALL_CHECK(cur) \ @@ -128,7 +128,7 @@ #define CURSOR_REMOVE_API_CALL(cur, s, bt) \ (s) = (WT_SESSION_IMPL *)(cur)->session; \ - TXN_API_CALL_NOCONF(s, WT_CURSOR, remove, cur, \ + TXN_API_CALL_NOCONF(s, WT_CURSOR, remove, \ ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); #define JOINABLE_CURSOR_REMOVE_API_CALL(cur, s, bt) \ @@ -137,7 +137,7 @@ #define CURSOR_UPDATE_API_CALL(cur, s, n, bt) \ (s) = (WT_SESSION_IMPL *)(cur)->session; \ - TXN_API_CALL_NOCONF(s, WT_CURSOR, n, cur, \ + TXN_API_CALL_NOCONF(s, WT_CURSOR, n, \ ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \ if (F_ISSET(S2C(s), WT_CONN_IN_MEMORY) && \ !F_ISSET((WT_BTREE *)(bt), WT_BTREE_IGNORE_CACHE) && \ @@ -153,4 +153,4 @@ #define ASYNCOP_API_CALL(conn, s, n) \ s = (conn)->default_session; \ - API_CALL_NOCONF(s, asyncop, n, NULL, NULL) + API_CALL_NOCONF(s, asyncop, n, NULL) diff --git a/src/include/bitstring.i b/src/include/bitstring.i index 08746beb9b9..118dc0bba01 100644 --- a/src/include/bitstring.i +++ b/src/include/bitstring.i @@ -230,7 +230,7 @@ __bit_getv(uint8_t *bitf, uint64_t entry, uint8_t width) #define __BIT_GET(len, mask) \ case len: \ if (__bit_test(bitf, bit)) \ - value |= mask; \ + value |= (mask); \ ++bit \ /* FALLTHROUGH */ diff --git a/src/include/btmem.h b/src/include/btmem.h index 39ca223aebf..f1bb08d2699 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -507,7 +507,7 @@ struct __wt_page { #define WT_INTL_INDEX_GET_SAFE(page) \ ((page)->u.intl.__index) #define WT_INTL_INDEX_GET(session, page, pindex) do { \ - WT_ASSERT(session, session->split_gen != 0); \ + WT_ASSERT(session, (session)->split_gen != 0); \ (pindex) = WT_INTL_INDEX_GET_SAFE(page); \ } while (0) #define WT_INTL_INDEX_SET(page, v) do { \ @@ -868,7 +868,7 @@ struct __wt_col { * Return the 0-based array offset based on a WT_COL reference. */ #define WT_COL_SLOT(page, cip) \ - ((uint32_t)(((WT_COL *)cip) - (page)->pg_var)) + ((uint32_t)(((WT_COL *)(cip)) - (page)->pg_var)) /* * WT_IKEY -- @@ -977,10 +977,10 @@ struct __wt_insert { } key; } u; -#define WT_INSERT_KEY_SIZE(ins) (((WT_INSERT *)ins)->u.key.size) +#define WT_INSERT_KEY_SIZE(ins) (((WT_INSERT *)(ins))->u.key.size) #define WT_INSERT_KEY(ins) \ - ((void *)((uint8_t *)(ins) + ((WT_INSERT *)ins)->u.key.offset)) -#define WT_INSERT_RECNO(ins) (((WT_INSERT *)ins)->u.recno) + ((void *)((uint8_t *)(ins) + ((WT_INSERT *)(ins))->u.key.offset)) +#define WT_INSERT_RECNO(ins) (((WT_INSERT *)(ins))->u.recno) WT_INSERT *next[0]; /* forward-linked skip list */ }; @@ -989,9 +989,9 @@ struct __wt_insert { * Skiplist helper macros. */ #define WT_SKIP_FIRST(ins_head) \ - (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)ins_head)->head[0]) + (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)(ins_head))->head[0]) #define WT_SKIP_LAST(ins_head) \ - (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)ins_head)->tail[0]) + (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)(ins_head))->tail[0]) #define WT_SKIP_NEXT(ins) ((ins)->next[0]) #define WT_SKIP_FOREACH(ins, ins_head) \ for ((ins) = WT_SKIP_FIRST(ins_head); \ @@ -1004,7 +1004,7 @@ struct __wt_insert { #define WT_PAGE_ALLOC_AND_SWAP(s, page, dest, v, count) do { \ if (((v) = (dest)) == NULL) { \ WT_ERR(__wt_calloc_def(s, count, &(v))); \ - if (__wt_atomic_cas_ptr(&dest, NULL, v)) \ + if (__wt_atomic_cas_ptr(&(dest), NULL, v)) \ __wt_cache_page_inmem_incr( \ s, page, (count) * sizeof(*(v))); \ else \ diff --git a/src/include/btree.h b/src/include/btree.h index 976c1d2110c..88312f408cc 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -118,15 +118,17 @@ struct __wt_btree { uint64_t last_recno; /* Column-store last record number */ - WT_REF root; /* Root page reference */ - bool modified; /* If the tree ever modified */ - bool bulk_load_ok; /* Bulk-load is a possibility */ + WT_REF root; /* Root page reference */ + bool modified; /* If the tree ever modified */ + uint8_t original; /* Newly created: bulk-load possible + (want a bool but needs atomic cas) */ + + bool lsm_primary; /* Handle is/was the LSM primary */ WT_BM *bm; /* Block manager reference */ u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ uint64_t checkpoint_gen; /* Checkpoint generation */ - bool include_checkpoint_txn;/* ID checks include checkpoint */ uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ uint64_t write_gen; /* Write generation */ @@ -139,14 +141,10 @@ struct __wt_btree { u_int evict_walk_period; /* Skip this many LRU walks */ u_int evict_walk_saved; /* Saved walk skips for checkpoints */ u_int evict_walk_skips; /* Number of walks skipped */ - u_int evict_disabled; /* Eviction disabled count */ + int evict_disabled; /* Eviction disabled count */ volatile uint32_t evict_busy; /* Count of threads in eviction */ - enum { - WT_EVICT_WALK_NEXT, WT_EVICT_WALK_PREV, - WT_EVICT_WALK_RAND_NEXT, WT_EVICT_WALK_RAND_PREV - } evict_walk_state; /* Eviction walk state */ -#define WT_EVICT_WALK_MAX_LEGAL_VALUE WT_EVICT_WALK_RAND_PREV + 1 - + int evict_start_type; /* Start position for eviction walk + (see WT_EVICT_WALK_START). */ enum { WT_CKPT_OFF, WT_CKPT_PREPARE, WT_CKPT_RUNNING } checkpointing; /* Checkpoint in progress */ @@ -159,15 +157,14 @@ struct __wt_btree { WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */ /* Flags values up to 0xff are reserved for WT_DHANDLE_* */ -#define WT_BTREE_BULK 0x000100 /* Bulk-load handle */ -#define WT_BTREE_IGNORE_CACHE 0x000200 /* Cache-resident object */ -#define WT_BTREE_IN_MEMORY 0x000400 /* Cache-resident object */ -#define WT_BTREE_LOOKASIDE 0x000800 /* Look-aside table */ -#define WT_BTREE_LSM_PRIMARY 0x001000 /* Handle is current LSM primary */ -#define WT_BTREE_NO_CHECKPOINT 0x002000 /* Disable checkpoints */ -#define WT_BTREE_NO_EVICTION 0x004000 /* Disable eviction */ +#define WT_BTREE_ALLOW_SPLITS 0x000100 /* Allow splits, even with no evict */ +#define WT_BTREE_BULK 0x000200 /* Bulk-load handle */ +#define WT_BTREE_CLOSED 0x000400 /* Handle closed */ +#define WT_BTREE_IGNORE_CACHE 0x000800 /* Cache-resident object */ +#define WT_BTREE_IN_MEMORY 0x001000 /* Cache-resident object */ +#define WT_BTREE_LOOKASIDE 0x002000 /* Look-aside table */ +#define WT_BTREE_NO_CHECKPOINT 0x004000 /* Disable checkpoints */ #define WT_BTREE_NO_LOGGING 0x008000 /* Disable logging */ -#define WT_BTREE_NO_RECONCILE 0x010000 /* Allow splits, even with no evict */ #define WT_BTREE_REBALANCE 0x020000 /* Handle is for rebalance */ #define WT_BTREE_SALVAGE 0x040000 /* Handle is for salvage */ #define WT_BTREE_SKIP_CKPT 0x080000 /* Handle skipped checkpoint */ diff --git a/src/include/btree.i b/src/include/btree.i index 315efa86fa6..eefc2db075d 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -149,7 +149,7 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) if (WT_PAGE_IS_INTERNAL(page)) { (void)__wt_atomic_add64(&btree->bytes_dirty_intl, size); (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size); - } else if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { + } else if (!btree->lsm_primary) { (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); } @@ -285,7 +285,7 @@ __wt_cache_page_byte_dirty_decr( decr, "WT_BTREE.bytes_dirty_intl"); __wt_cache_decr_check_uint64(session, &cache->bytes_dirty_intl, decr, "WT_CACHE.bytes_dirty_intl"); - } else if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { + } else if (!btree->lsm_primary) { __wt_cache_decr_check_uint64(session, &btree->bytes_dirty_leaf, decr, "WT_BTREE.bytes_dirty_leaf"); __wt_cache_decr_check_uint64(session, &cache->bytes_dirty_leaf, @@ -345,7 +345,7 @@ __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page) (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size); (void)__wt_atomic_add64(&cache->pages_dirty_intl, 1); } else { - if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { + if (!btree->lsm_primary) { (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); } @@ -444,7 +444,7 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) __wt_cache_decr_zero_uint64(session, &cache->bytes_dirty_intl, modify->bytes_dirty, "WT_CACHE.bytes_dirty_intl"); - } else if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { + } else if (!btree->lsm_primary) { __wt_cache_decr_zero_uint64(session, &btree->bytes_dirty_leaf, modify->bytes_dirty, "WT_BTREE.bytes_dirty_leaf"); @@ -1229,7 +1229,6 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) * data in the last skiplist on the page. Split if there are enough * items and the skiplist does not fit within a single disk page. */ - ins_head = page->type == WT_PAGE_ROW_LEAF ? (page->entries == 0 ? WT_ROW_INSERT_SMALLEST(page) : @@ -1401,7 +1400,7 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) if (page->read_gen != WT_READGEN_OLDEST || LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(session, WT_SESSION_NO_EVICTION) || - F_ISSET(btree, WT_BTREE_NO_EVICTION) || + btree->evict_disabled > 0 || !__wt_page_can_evict(session, ref, NULL)) return (__wt_hazard_clear(session, ref)); @@ -1521,7 +1520,7 @@ __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize) return (false); /* A tree that can be evicted always requires a switch. */ - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) + if (btree->evict_disabled == 0) return (true); /* Check for a tree with a single leaf page. */ @@ -1546,55 +1545,6 @@ __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize) } /* - * __wt_btree_lsm_switch_primary -- - * Switch a btree handle to/from the current primary chunk of an LSM tree. - */ -static inline void -__wt_btree_lsm_switch_primary(WT_SESSION_IMPL *session, bool on) -{ - WT_BTREE *btree; - WT_CACHE *cache; - WT_PAGE *child, *root; - WT_PAGE_INDEX *pindex; - WT_REF *first; - size_t size; - - btree = S2BT(session); - cache = S2C(session)->cache; - root = btree->root.page; - - if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) - F_SET(btree, WT_BTREE_LSM_PRIMARY | WT_BTREE_NO_EVICTION); - if (!on && F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { - pindex = WT_INTL_INDEX_GET_SAFE(root); - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION) || - pindex->entries != 1) - return; - first = pindex->index[0]; - - /* - * We're reaching down into the page without a hazard pointer, - * but that's OK because we know that no-eviction is set so the - * page can't disappear. - * - * While this tree was the primary, its dirty bytes were not - * included in the cache accounting. Fix that now before we - * open it up for eviction. - */ - child = first->page; - if (first->state == WT_REF_MEM && - child->type == WT_PAGE_ROW_LEAF && - __wt_page_is_modified(child)) { - size = child->modify->bytes_dirty; - (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); - (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); - } - - F_CLR(btree, WT_BTREE_LSM_PRIMARY | WT_BTREE_NO_EVICTION); - } -} - -/* * __wt_split_descent_race -- * Return if we raced with an internal page split when descending the tree. */ diff --git a/src/include/buf.i b/src/include/buf.i index ebbee6b4633..d192e292dcf 100644 --- a/src/include/buf.i +++ b/src/include/buf.i @@ -37,28 +37,30 @@ __wt_buf_extend(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) /* * __wt_buf_init -- - * Initialize a buffer at a specific size. + * Create an empty buffer at a specific size. */ static inline int __wt_buf_init(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) { + /* + * The buffer grow function does what we need, but anticipates data + * referenced by the buffer. Avoid any data copy by setting data to + * reference the buffer's allocated memory, and clearing it. + */ buf->data = buf->mem; - buf->size = 0; /* Clear existing data length */ - WT_RET(__wt_buf_grow(session, buf, size)); - - return (0); + buf->size = 0; + return (__wt_buf_grow(session, buf, size)); } /* * __wt_buf_initsize -- - * Initialize a buffer at a specific size, and set the data length. + * Create an empty buffer at a specific size, and set the data length. */ static inline int __wt_buf_initsize(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) { - buf->data = buf->mem; - buf->size = 0; /* Clear existing data length */ - WT_RET(__wt_buf_grow(session, buf, size)); + WT_RET(__wt_buf_init(session, buf, size)); + buf->size = size; /* Set the data length. */ return (0); @@ -72,14 +74,15 @@ static inline int __wt_buf_set( WT_SESSION_IMPL *session, WT_ITEM *buf, const void *data, size_t size) { - /* Ensure the buffer is large enough. */ - WT_RET(__wt_buf_initsize(session, buf, size)); - - /* Copy the data, allowing for overlapping strings. */ - if (size != 0) - memmove(buf->mem, data, size); - - return (0); + /* + * The buffer grow function does what we need, but expects the data to + * be referenced by the buffer. If we're copying data from outside the + * buffer, set it up so it makes sense to the buffer grow function. (No + * test needed, this works if WT_ITEM.data is already set to "data".) + */ + buf->data = data; + buf->size = size; + return (__wt_buf_grow(session, buf, size)); } /* diff --git a/src/include/cache.h b/src/include/cache.h index abd5a1901f7..04920c3585a 100644 --- a/src/include/cache.h +++ b/src/include/cache.h @@ -18,6 +18,15 @@ #define WT_EVICT_MAX_TREES 1000 /* Maximum walk points */ +/* Ways to position when starting an eviction walk. */ +typedef enum { + WT_EVICT_WALK_NEXT, + WT_EVICT_WALK_PREV, + WT_EVICT_WALK_RAND_NEXT, + WT_EVICT_WALK_RAND_PREV +} WT_EVICT_WALK_START; +#define WT_EVICT_WALK_START_NUM (WT_EVICT_WALK_RAND_PREV + 1) + /* * WT_EVICT_ENTRY -- * Encapsulation of an eviction candidate. diff --git a/src/include/cell.i b/src/include/cell.i index c130768e595..71c2515daf0 100644 --- a/src/include/cell.i +++ b/src/include/cell.i @@ -361,14 +361,12 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size) cell->__chunk[0] = (uint8_t) ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT); return (1); - } else { - byte = (uint8_t)size; /* Type + length */ - cell->__chunk[0] = (uint8_t) - ((byte << WT_CELL_SHORT_SHIFT) | - WT_CELL_KEY_SHORT_PFX); - cell->__chunk[1] = prefix; /* Prefix */ - return (2); } + byte = (uint8_t)size; /* Type + length */ + cell->__chunk[0] = (uint8_t) + ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT_PFX); + cell->__chunk[1] = prefix; /* Prefix */ + return (2); } if (prefix == 0) { @@ -569,8 +567,8 @@ __wt_cell_unpack_safe( */ #define WT_CELL_LEN_CHK(t, len) do { \ if (start != NULL && \ - ((uint8_t *)t < (uint8_t *)start || \ - (((uint8_t *)t) + (len)) > (uint8_t *)end)) \ + ((uint8_t *)(t) < (uint8_t *)start || \ + (((uint8_t *)(t)) + (len)) > (uint8_t *)end)) \ return (WT_ERROR); \ } while (0) diff --git a/src/include/column.i b/src/include/column.i index c1b45a1f4e0..07b627315e6 100644 --- a/src/include/column.i +++ b/src/include/column.i @@ -108,7 +108,7 @@ __col_insert_search_match(WT_INSERT_HEAD *ins_head, uint64_t recno) /* Fast path the check for values at the end of the skiplist. */ if (recno > WT_INSERT_RECNO(ret_ins)) return (NULL); - else if (recno == WT_INSERT_RECNO(ret_ins)) + if (recno == WT_INSERT_RECNO(ret_ins)) return (ret_ins); /* @@ -127,7 +127,7 @@ __col_insert_search_match(WT_INSERT_HEAD *ins_head, uint64_t recno) if (cmp == 0) /* Exact match: return */ return (*insp); - else if (cmp > 0) /* Keep going at this level */ + if (cmp > 0) /* Keep going at this level */ insp = &(*insp)->next[i]; else { /* Drop down a level */ --i; diff --git a/src/include/connection.h b/src/include/connection.h index ce483d3291a..6c23492e926 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -127,7 +127,7 @@ struct __wt_named_extractor { F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \ TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \ TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \ - ++conn->dhandle_count; \ + ++(conn)->dhandle_count; \ } while (0) #define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) do { \ @@ -135,7 +135,7 @@ struct __wt_named_extractor { F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \ TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \ TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \ - --conn->dhandle_count; \ + --(conn)->dhandle_count; \ } while (0) /* diff --git a/src/include/cursor.h b/src/include/cursor.h index 31c8963a486..f32b4250d30 100644 --- a/src/include/cursor.h +++ b/src/include/cursor.h @@ -73,7 +73,7 @@ struct __wt_cursor_backup { #define WT_CURBACKUP_LOCKER 0x01 /* Hot-backup started */ uint8_t flags; }; -#define WT_CURSOR_BACKUP_ID(cursor) (((WT_CURSOR_BACKUP *)cursor)->maxid) +#define WT_CURSOR_BACKUP_ID(cursor) (((WT_CURSOR_BACKUP *)(cursor))->maxid) struct __wt_cursor_btree { WT_CURSOR iface; @@ -474,7 +474,7 @@ struct __wt_cursor_stat { * Return a reference to a statistic cursor's stats structures. */ #define WT_CURSOR_STATS(cursor) \ - (((WT_CURSOR_STAT *)cursor)->stats) + (((WT_CURSOR_STAT *)(cursor))->stats) struct __wt_cursor_table { WT_CURSOR iface; @@ -493,7 +493,7 @@ struct __wt_cursor_table { }; #define WT_CURSOR_PRIMARY(cursor) \ - (((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]) + (((WT_CURSOR_TABLE *)(cursor))->cg_cursors[0]) #define WT_CURSOR_RECNO(cursor) WT_STREQ((cursor)->key_format, "r") @@ -550,4 +550,4 @@ struct __wt_cursor_table { } while (0) #define WT_CURSOR_RAW_OK \ - WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_PRINT | WT_CURSTD_RAW + (WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_PRINT | WT_CURSTD_RAW) diff --git a/src/include/cursor.i b/src/include/cursor.i index c3fcef9a13d..9cb9f5e7189 100644 --- a/src/include/cursor.i +++ b/src/include/cursor.i @@ -93,17 +93,19 @@ __curfile_enter(WT_CURSOR_BTREE *cbt) } /* - * __curfile_leave -- - * Clear a file cursor's position. + * __cursor_reset -- + * Reset the cursor, it no longer holds any position. */ static inline int -__curfile_leave(WT_CURSOR_BTREE *cbt) +__cursor_reset(WT_CURSOR_BTREE *cbt) { WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)cbt->iface.session; + __cursor_pos_clear(cbt); + /* If the cursor was active, deactivate it. */ if (F_ISSET(cbt, WT_CBT_ACTIVE)) { if (!F_ISSET(cbt, WT_CBT_NO_TXN)) @@ -111,12 +113,15 @@ __curfile_leave(WT_CURSOR_BTREE *cbt) F_CLR(cbt, WT_CBT_ACTIVE); } + /* If we're not holding a cursor reference, we're done. */ + if (cbt->ref == NULL) + return (0); + /* * If we were scanning and saw a lot of deleted records on this page, * try to evict the page when we release it. */ - if (cbt->ref != NULL && - cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD) + if (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD) __wt_page_evict_soon(session, cbt->ref); cbt->page_deleted_count = 0; @@ -247,7 +252,7 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter) #ifdef HAVE_DIAGNOSTIC __wt_cursor_key_order_reset(cbt); #endif - WT_RET(__curfile_leave(cbt)); + WT_RET(__cursor_reset(cbt)); } /* @@ -272,24 +277,6 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter) } /* - * __cursor_reset -- - * Reset the cursor. - */ -static inline int -__cursor_reset(WT_CURSOR_BTREE *cbt) -{ - WT_DECL_RET; - - /* - * The cursor is leaving the API, and no longer holds any position, - * generally called to clean up the cursor after an error. - */ - ret = __curfile_leave(cbt); - __cursor_pos_clear(cbt); - return (ret); -} - -/* * __cursor_row_slot_return -- * Return a row-store leaf page slot's K/V pair. */ diff --git a/src/include/dhandle.h b/src/include/dhandle.h index 4f318e7bccf..8861e96112b 100644 --- a/src/include/dhandle.h +++ b/src/include/dhandle.h @@ -38,20 +38,20 @@ (((WT_CURSOR_BTREE *)((s)->meta_cursor))->btree->dhandle) #define WT_DHANDLE_ACQUIRE(dhandle) \ - (void)__wt_atomic_add32(&dhandle->session_ref, 1) + (void)__wt_atomic_add32(&(dhandle)->session_ref, 1) #define WT_DHANDLE_RELEASE(dhandle) \ - (void)__wt_atomic_sub32(&dhandle->session_ref, 1) + (void)__wt_atomic_sub32(&(dhandle)->session_ref, 1) #define WT_DHANDLE_NEXT(session, dhandle, head, field) do { \ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));\ - if (dhandle == NULL) \ - dhandle = TAILQ_FIRST(head); \ + if ((dhandle) == NULL) \ + (dhandle) = TAILQ_FIRST(head); \ else { \ WT_DHANDLE_RELEASE(dhandle); \ - dhandle = TAILQ_NEXT(dhandle, field); \ + (dhandle) = TAILQ_NEXT(dhandle, field); \ } \ - if (dhandle != NULL) \ + if ((dhandle) != NULL) \ WT_DHANDLE_ACQUIRE(dhandle); \ } while (0) diff --git a/src/include/error.h b/src/include/error.h index bbb7f989332..c338acb370f 100644 --- a/src/include/error.h +++ b/src/include/error.h @@ -67,14 +67,16 @@ int __ret; \ if ((__ret = (a)) != 0 && \ (__ret == WT_PANIC || \ - ret == 0 || ret == WT_DUPLICATE_KEY || ret == WT_NOTFOUND)) \ + ret == 0 || ret == WT_DUPLICATE_KEY || \ + ret == WT_NOTFOUND || ret == WT_RESTART)) \ ret = __ret; \ } while (0) #define WT_TRET_ERROR_OK(a, e) do { \ int __ret; \ if ((__ret = (a)) != 0 && __ret != (e) && \ (__ret == WT_PANIC || \ - ret == 0 || ret == WT_DUPLICATE_KEY || ret == WT_NOTFOUND)) \ + ret == 0 || ret == WT_DUPLICATE_KEY || \ + ret == WT_NOTFOUND || ret == WT_RESTART)) \ ret = __ret; \ } while (0) #define WT_TRET_NOTFOUND_OK(a) WT_TRET_ERROR_OK(a, WT_NOTFOUND) diff --git a/src/include/extern.h b/src/include/extern.h index 19ad9a880df..c0aa21b7f4c 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -133,10 +133,10 @@ extern void __wt_free_ref_index(WT_SESSION_IMPL *session, WT_PAGE *page, WT_PAGE extern void __wt_free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_btree_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, bool is_recno) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_btree_evictable(WT_SESSION_IMPL *session, bool on) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -161,6 +161,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags #endif ) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern bool __wt_split_obsolete(WT_SESSION_IMPL *session, uint64_t split_gen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -462,7 +463,7 @@ extern int __wt_meta_checkpoint_last_name( WT_SESSION_IMPL *session, const char extern int __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_ckptlist_get( WT_SESSION_IMPL *session, const char *fname, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT *ckptbase) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_meta_checkpoint_free(WT_SESSION_IMPL *session, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_ext_metadata_insert(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_ext_metadata_remove( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/intpack.i b/src/include/intpack.i index e8bea58cede..a534de9d9a8 100644 --- a/src/include/intpack.i +++ b/src/include/intpack.i @@ -59,21 +59,21 @@ /* Count the leading zero bytes. */ #if defined(__GNUC__) #define WT_LEADING_ZEROS(x, i) \ - (i = (x == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3) + ((i) = ((x) == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3) #elif defined(_MSC_VER) #define WT_LEADING_ZEROS(x, i) do { \ - if (x == 0) i = (int)sizeof(x); \ + if ((x) == 0) (i) = (int)sizeof(x); \ else { \ unsigned long __index; \ _BitScanReverse64(&__index, x); \ __index = 63 ^ __index; \ - i = (int)(__index >> 3); } \ + (i) = (int)(__index >> 3); } \ } while (0) #else #define WT_LEADING_ZEROS(x, i) do { \ uint64_t __x = (x); \ uint64_t __m = (uint64_t)0xff << 56; \ - for (i = 0; !(__x & __m) && i != 8; i++) \ + for ((i) = 0; !(__x & __m) && (i) != 8; (i)++) \ __m >>= 8; \ } while (0) #endif @@ -231,7 +231,8 @@ __wt_vpack_int(uint8_t **pp, size_t maxlen, int64_t x) if (x < NEG_2BYTE_MIN) { *p = NEG_MULTI_MARKER; return (__wt_vpack_negint(pp, maxlen, (uint64_t)x)); - } else if (x < NEG_1BYTE_MIN) { + } + if (x < NEG_1BYTE_MIN) { WT_SIZE_CHECK_PACK(2, maxlen); x -= NEG_2BYTE_MIN; *p++ = NEG_2BYTE_MARKER | GET_BITS(x, 13, 8); @@ -358,12 +359,10 @@ __wt_vsize_uint(uint64_t x) { if (x <= POS_1BYTE_MAX) return (1); - else if (x <= POS_2BYTE_MAX + 1) { + if (x <= POS_2BYTE_MAX + 1) return (2); - } else { - x -= POS_2BYTE_MAX + 1; - return (__wt_vsize_posint(x)); - } + x -= POS_2BYTE_MAX + 1; + return (__wt_vsize_posint(x)); } /* @@ -373,13 +372,12 @@ __wt_vsize_uint(uint64_t x) static inline size_t __wt_vsize_int(int64_t x) { - if (x < NEG_2BYTE_MIN) { + if (x < NEG_2BYTE_MIN) return (__wt_vsize_negint((uint64_t)x)); - } else if (x < NEG_1BYTE_MIN) { + if (x < NEG_1BYTE_MIN) return (2); - } else if (x < 0) { + if (x < 0) return (1); - } else - /* For non-negative values, use the unsigned code above. */ - return (__wt_vsize_uint((uint64_t)x)); + /* For non-negative values, use the unsigned code above. */ + return (__wt_vsize_uint((uint64_t)x)); } diff --git a/src/include/lint.h b/src/include/lint.h index e20a83144ee..2d0f47988b7 100644 --- a/src/include/lint.h +++ b/src/include/lint.h @@ -29,9 +29,9 @@ __wt_atomic_fetch_add##name(type *vp, type v) \ { \ type orig; \ \ - old = *vp; \ + orig = *vp; \ *vp += v; \ - return (old); \ + return (orig); \ } \ static inline ret \ __wt_atomic_store##name(type *vp, type v) \ @@ -40,7 +40,7 @@ __wt_atomic_store##name(type *vp, type v) \ \ orig = *vp; \ *vp = v; \ - return (old); \ + return (orig); \ } \ static inline ret \ __wt_atomic_sub##name(type *vp, type v) \ @@ -49,9 +49,9 @@ __wt_atomic_sub##name(type *vp, type v) \ return (*vp); \ } \ static inline bool \ -__wt_atomic_cas##name(type *vp, type old, type new) \ +__wt_atomic_cas##name(type *vp, type orig, type new) \ { \ - if (*vp == old) { \ + if (*vp == orig) { \ *vp = new; \ return (true); \ } \ @@ -75,8 +75,8 @@ WT_ATOMIC_FUNC(size, size_t, size_t) * Pointer compare and swap. */ static inline bool -__wt_atomic_cas_ptr(void *vp, void *old, void *new) { - if (*(void **)vp == old) { +__wt_atomic_cas_ptr(void *vp, void *orig, void *new) { + if (*(void **)vp == orig) { *(void **)vp = new; return (true); } diff --git a/src/include/log.h b/src/include/log.h index a6be3582b4d..f0999ba316b 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -86,8 +86,8 @@ union __wt_lsn { * The high bit is reserved for the special states. If the high bit is * set (WT_LOG_SLOT_RESERVED) then we are guaranteed to be in a special state. */ -#define WT_LOG_SLOT_FREE -1 /* Not in use */ -#define WT_LOG_SLOT_WRITTEN -2 /* Slot data written, not processed */ +#define WT_LOG_SLOT_FREE (-1) /* Not in use */ +#define WT_LOG_SLOT_WRITTEN (-2) /* Slot data written, not processed */ /* * We allocate the buffer size, but trigger a slot switch when we cross @@ -144,8 +144,8 @@ union __wt_lsn { /* Slot is in use, but closed to new joins */ #define WT_LOG_SLOT_CLOSED(state) \ (WT_LOG_SLOT_ACTIVE(state) && \ - (FLD64_ISSET((uint64_t)state, WT_LOG_SLOT_CLOSE) && \ - !FLD64_ISSET((uint64_t)state, WT_LOG_SLOT_RESERVED))) + (FLD64_ISSET((uint64_t)(state), WT_LOG_SLOT_CLOSE) && \ + !FLD64_ISSET((uint64_t)(state), WT_LOG_SLOT_RESERVED))) /* Slot is in use, all data copied into buffer */ #define WT_LOG_SLOT_INPROGRESS(state) \ (WT_LOG_SLOT_RELEASED(state) != WT_LOG_SLOT_JOINED(state)) @@ -185,7 +185,7 @@ struct __wt_logslot { #define WT_WITH_SLOT_LOCK(session, log, op) do { \ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); \ WT_WITH_LOCK_WAIT(session, \ - &log->log_slot_lock, WT_SESSION_LOCKED_SLOT, op); \ + &(log)->log_slot_lock, WT_SESSION_LOCKED_SLOT, op); \ } while (0) struct __wt_myslot { diff --git a/src/include/misc.h b/src/include/misc.h index 66d43496e93..9161a215fdc 100644 --- a/src/include/misc.h +++ b/src/include/misc.h @@ -63,7 +63,7 @@ #define WT_MAX(a, b) ((a) < (b) ? (b) : (a)) /* Elements in an array. */ -#define WT_ELEMENTS(a) (sizeof(a) / sizeof(a[0])) +#define WT_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) /* 10 level skip lists, 1/4 have a link to the next element. */ #define WT_SKIP_MAXDEPTH 10 @@ -140,6 +140,7 @@ #define F_CLR(p, mask) FLD_CLR((p)->flags, mask) #define F_ISSET(p, mask) FLD_ISSET((p)->flags, mask) +#define F_ISSET_ALL(p, mask) (FLD_MASK((p)->flags, mask) == (mask)) #define F_MASK(p, mask) FLD_MASK((p)->flags, mask) #define F_SET(p, mask) FLD_SET((p)->flags, mask) @@ -180,14 +181,14 @@ */ #define WT_BINARY_SEARCH(key, arrayp, n, found) do { \ uint32_t __base, __indx, __limit; \ - found = false; \ + (found) = false; \ for (__base = 0, __limit = (n); __limit != 0; __limit >>= 1) { \ __indx = __base + (__limit >> 1); \ - if ((arrayp)[__indx] < key) { \ + if ((arrayp)[__indx] < (key)) { \ __base = __indx + 1; \ --__limit; \ - } else if ((arrayp)[__indx] == key) { \ - found = true; \ + } else if ((arrayp)[__indx] == (key)) { \ + (found) = true; \ break; \ } \ } \ @@ -206,8 +207,8 @@ /* Check if a string matches a prefix. */ #define WT_PREFIX_MATCH(str, pfx) \ - (((const char *)(str))[0] == ((const char *)pfx)[0] && \ - strncmp((str), (pfx), strlen(pfx)) == 0) + (((const char *)(str))[0] == ((const char *)(pfx))[0] && \ + strncmp(str, pfx, strlen(pfx)) == 0) /* Check if a string matches a prefix, and move past it. */ #define WT_PREFIX_SKIP(str, pfx) \ @@ -224,8 +225,8 @@ /* Check if a string matches a byte string of len bytes. */ #define WT_STRING_MATCH(str, bytes, len) \ - (((const char *)str)[0] == ((const char *)bytes)[0] && \ - strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0') + (((const char *)(str))[0] == ((const char *)(bytes))[0] && \ + strncmp(str, bytes, len) == 0 && (str)[len] == '\0') /* * Macro that produces a string literal that isn't wrapped in quotes, to avoid diff --git a/src/include/mutex.i b/src/include/mutex.i index 6b83cb280d3..640706284c3 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -316,6 +316,6 @@ __wt_spin_trylock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t) stats = (int64_t **)S2C(session)->stats; stats[session->stat_bucket][t->stat_count_off]++; return (0); - } else - return (__wt_spin_trylock(session, t)); + } + return (__wt_spin_trylock(session, t)); } diff --git a/src/include/os.h b/src/include/os.h index 7a8e47ed81f..73d89268392 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -11,8 +11,14 @@ * A call returning 0 indicates success; any call where \ * 0 is not the only successful return must provide an \ * expression evaluating to 0 in all successful cases. \ + * \ + * XXX \ + * Casting the call's return to int is because CentOS 7.3.1611 \ + * complains about syscall returning a long and the loss of \ + * integer precision in the assignment to ret. The cast should \ + * be a no-op everywhere. \ */ \ - if (((ret) = (call)) == 0) \ + if (((ret) = (int)(call)) == 0) \ break; \ /* \ * The call's error was either returned by the call or \ @@ -61,7 +67,7 @@ #define WT_TIMECMP(t1, t2) \ ((t1).tv_sec < (t2).tv_sec ? -1 : \ - (t1).tv_sec == (t2.tv_sec) ? \ + (t1).tv_sec == (t2).tv_sec ? \ (t1).tv_nsec < (t2).tv_nsec ? -1 : \ (t1).tv_nsec == (t2).tv_nsec ? 0 : 1 : 1) diff --git a/src/include/packing.i b/src/include/packing.i index 8ba3dd536ac..d79afe6d4a2 100644 --- a/src/include/packing.i +++ b/src/include/packing.i @@ -206,43 +206,43 @@ next: if (pack->cur == pack->end) #define WT_PACK_GET(session, pv, ap) do { \ WT_ITEM *__item; \ - switch (pv.type) { \ + switch ((pv).type) { \ case 'x': \ break; \ case 's': \ case 'S': \ - pv.u.s = va_arg(ap, const char *); \ + (pv).u.s = va_arg(ap, const char *); \ break; \ case 'U': \ case 'u': \ __item = va_arg(ap, WT_ITEM *); \ - pv.u.item.data = __item->data; \ - pv.u.item.size = __item->size; \ + (pv).u.item.data = __item->data; \ + (pv).u.item.size = __item->size; \ break; \ case 'b': \ case 'h': \ case 'i': \ - pv.u.i = va_arg(ap, int); \ + (pv).u.i = va_arg(ap, int); \ break; \ case 'B': \ case 'H': \ case 'I': \ case 't': \ - pv.u.u = va_arg(ap, unsigned int); \ + (pv).u.u = va_arg(ap, unsigned int); \ break; \ case 'l': \ - pv.u.i = va_arg(ap, long); \ + (pv).u.i = va_arg(ap, long); \ break; \ case 'L': \ - pv.u.u = va_arg(ap, unsigned long); \ + (pv).u.u = va_arg(ap, unsigned long); \ break; \ case 'q': \ - pv.u.i = va_arg(ap, int64_t); \ + (pv).u.i = va_arg(ap, int64_t); \ break; \ case 'Q': \ case 'r': \ case 'R': \ - pv.u.u = va_arg(ap, uint64_t); \ + (pv).u.u = va_arg(ap, uint64_t); \ break; \ /* User format strings have already been validated. */ \ WT_ILLEGAL_VALUE(session); \ @@ -556,47 +556,47 @@ __unpack_read(WT_SESSION_IMPL *session, #define WT_UNPACK_PUT(session, pv, ap) do { \ WT_ITEM *__item; \ - switch (pv.type) { \ + switch ((pv).type) { \ case 'x': \ break; \ case 's': \ case 'S': \ - *va_arg(ap, const char **) = pv.u.s; \ + *va_arg(ap, const char **) = (pv).u.s; \ break; \ case 'U': \ case 'u': \ __item = va_arg(ap, WT_ITEM *); \ - __item->data = pv.u.item.data; \ - __item->size = pv.u.item.size; \ + __item->data = (pv).u.item.data; \ + __item->size = (pv).u.item.size; \ break; \ case 'b': \ - *va_arg(ap, int8_t *) = (int8_t)pv.u.i; \ + *va_arg(ap, int8_t *) = (int8_t)(pv).u.i; \ break; \ case 'h': \ - *va_arg(ap, int16_t *) = (short)pv.u.i; \ + *va_arg(ap, int16_t *) = (short)(pv).u.i; \ break; \ case 'i': \ case 'l': \ - *va_arg(ap, int32_t *) = (int32_t)pv.u.i; \ + *va_arg(ap, int32_t *) = (int32_t)(pv).u.i; \ break; \ case 'q': \ - *va_arg(ap, int64_t *) = pv.u.i; \ + *va_arg(ap, int64_t *) = (pv).u.i; \ break; \ case 'B': \ case 't': \ - *va_arg(ap, uint8_t *) = (uint8_t)pv.u.u; \ + *va_arg(ap, uint8_t *) = (uint8_t)(pv).u.u; \ break; \ case 'H': \ - *va_arg(ap, uint16_t *) = (uint16_t)pv.u.u; \ + *va_arg(ap, uint16_t *) = (uint16_t)(pv).u.u; \ break; \ case 'I': \ case 'L': \ - *va_arg(ap, uint32_t *) = (uint32_t)pv.u.u; \ + *va_arg(ap, uint32_t *) = (uint32_t)(pv).u.u; \ break; \ case 'Q': \ case 'r': \ case 'R': \ - *va_arg(ap, uint64_t *) = pv.u.u; \ + *va_arg(ap, uint64_t *) = (pv).u.u; \ break; \ /* User format strings have already been validated. */ \ WT_ILLEGAL_VALUE(session); \ diff --git a/src/include/schema.h b/src/include/schema.h index 9a6e1e54e80..50e141d9921 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -107,10 +107,11 @@ struct __wt_table { * Acquire a lock if available, perform an operation, drop the lock. */ #define WT_WITH_LOCK_NOWAIT(session, ret, lock, flag, op) do { \ - ret = 0; \ + (ret) = 0; \ if (F_ISSET(session, (flag))) { \ op; \ - } else if ((ret = __wt_spin_trylock_track(session, lock)) == 0) {\ + } else if (((ret) = \ + __wt_spin_trylock_track(session, lock)) == 0) { \ F_SET(session, (flag)); \ op; \ F_CLR(session, (flag)); \ @@ -248,7 +249,7 @@ struct __wt_table { WT_SESSION_LOCKED_HANDLE_LIST)); \ if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \ op; \ - } else if ((ret = __wt_try_writelock(session, \ + } else if (((ret) = __wt_try_writelock(session, \ &S2C(session)->table_lock)) == 0) { \ F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ op; \ diff --git a/src/include/session.h b/src/include/session.h index 085f871a34f..674e92671b1 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -67,7 +67,6 @@ struct __wt_session_impl { TAILQ_HEAD(__dhandles, __wt_data_handle_cache) dhandles; time_t last_sweep; /* Last sweep for dead handles */ - WT_CURSOR *cursor; /* Current cursor */ /* Cursors closed with the session */ TAILQ_HEAD(__cursors, __wt_cursor) cursors; @@ -88,7 +87,7 @@ struct __wt_session_impl { void *meta_track_sub; /* Child transaction / save point */ size_t meta_track_alloc; /* Currently allocated */ int meta_track_nest; /* Nesting level of meta transaction */ -#define WT_META_TRACKING(session) (session->meta_track_next != NULL) +#define WT_META_TRACKING(session) ((session)->meta_track_next != NULL) /* * Each session keeps a cache of table handles. The set of handles @@ -151,20 +150,16 @@ struct __wt_session_impl { uint32_t flags; /* - * The split stash memory and hazard information persist past session - * close because they are accessed by threads of control other than the - * thread owning the session. - * + * All of the following fields live at the end of the structure so it's + * easier to clear everything but the fields that persist. + */ +#define WT_SESSION_CLEAR_SIZE (offsetof(WT_SESSION_IMPL, rnd)) + + /* * The random number state persists past session close because we don't - * want to repeatedly allocate repeated values for skiplist depth if the + * want to repeatedly use the same values for skiplist depth when the * application isn't caching sessions. - * - * All of these fields live at the end of the structure so it's easier - * to clear everything but the fields that persist. */ -#define WT_SESSION_CLEAR_SIZE(s) \ - (WT_PTRDIFF(&(s)->rnd, s)) - WT_RAND_STATE rnd; /* Random number generation state */ /* Hashed handle reference list array */ @@ -173,6 +168,9 @@ struct __wt_session_impl { TAILQ_HEAD(__tables_hash, __wt_table) *tablehash; /* + * Split stash memory persists past session close because it's accessed + * by threads of control other than the thread owning the session. + * * Splits can "free" memory that may still be in use, and we use a * split generation number to track it, that is, the session stores a * reference to the memory and allocates a split generation; when no @@ -192,6 +190,9 @@ struct __wt_session_impl { /* * Hazard pointers. * + * Hazard information persists past session close because it's accessed + * by threads of control other than the thread owning the session. + * * Use the non-NULL state of the hazard field to know if the session has * previously been initialized. */ diff --git a/src/include/stat.h b/src/include/stat.h index 8b2e78a4ed5..ed3d588b7d3 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -72,7 +72,7 @@ * and the session ID is a small, monotonically increasing number. */ #define WT_STATS_SLOT_ID(session) \ - ((session)->id) % WT_COUNTER_SLOTS + (((session)->id) % WT_COUNTER_SLOTS) /* * Statistic structures are arrays of int64_t's. We have functions to read/write diff --git a/src/include/txn.i b/src/include/txn.i index 0cc4a6f8439..314c948e4d1 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -125,7 +125,8 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) * minimum of it with the oldest ID, which is what we want. */ oldest_id = txn_global->oldest_id; - include_checkpoint_txn = btree == NULL || btree->include_checkpoint_txn; + include_checkpoint_txn = btree == NULL || + btree->checkpoint_gen != txn_global->checkpoint_gen; WT_READ_BARRIER(); checkpoint_pinned = txn_global->checkpoint_pinned; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index c148e759299..7223aeae0f6 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -36,7 +36,7 @@ extern "C" { #if defined(DOXYGEN) || defined(SWIG) #define __F(func) func #else -#define __F(func) (*func) +#define __F(func) (*(func)) #endif #ifdef SWIG @@ -427,7 +427,7 @@ struct __wt_cursor { * * @param cursor the cursor handle * @errors - * In particular, if \c overwrite is not configured and a record with + * In particular, if \c overwrite=false is configured and a record with * the specified key already exists, ::WT_DUPLICATE_KEY is returned. * Also, if \c in_memory is configured for the database and the insert * requires more than the configured cache size to complete, @@ -452,7 +452,9 @@ struct __wt_cursor { * * On success, the cursor ends positioned at the modified record; to * minimize cursor resources, the WT_CURSOR::reset method should be - * called as soon as the cursor no longer needs that position. + * called as soon as the cursor no longer needs that position. (The + * WT_CURSOR::insert method never keeps a cursor position and may be + * more efficient for that reason.) * * The maximum length of a single column stored in a table is not fixed * (as it partially depends on the underlying file configuration), but @@ -460,7 +462,7 @@ struct __wt_cursor { * * @param cursor the cursor handle * @errors - * In particular, if \c overwrite is not configured and no record with + * In particular, if \c overwrite=false is configured and no record with * the specified key exists, ::WT_NOTFOUND is returned. * Also, if \c in_memory is configured for the database and the insert * requires more than the configured cache size to complete, @@ -477,8 +479,18 @@ struct __wt_cursor { * * @snippet ex_all.c Remove a record * - * If the cursor was not configured with "overwrite=true", the key must - * be set and the key's record must exist; the record will be removed. + * If the cursor was configured with "overwrite=false" (not the + * default), the key must be set and the key's record must exist; the + * record will be removed. + * + * Any cursor position does not change: if the cursor was positioned + * before the WT_CURSOR::remove call, the cursor remains positioned + * at the removed record; to minimize cursor resources, the + * WT_CURSOR::reset method should be called as soon as the cursor no + * longer needs that position. If the cursor was not positioned before + * the WT_CURSOR::remove call, the cursor ends with no position, and a + * subsequent call to the WT_CURSOR::next (WT_CURSOR::prev) method will + * iterate from the beginning (end) of the table. * * @snippet ex_all.c Remove a record and fail if DNE * @@ -486,14 +498,10 @@ struct __wt_cursor { * (that is, a store with an 'r' type key and 't' type value) is * identical to setting the record's value to 0. * - * On success, the cursor ends positioned at the removed record; to - * minimize cursor resources, the WT_CURSOR::reset method should be - * called as soon as the cursor no longer needs that position. - * * @param cursor the cursor handle * @errors - * In particular, if \c overwrite is not configured and no record with - * the specified key exists, ::WT_NOTFOUND is returned. + * In particular, if \c overwrite=false is configured and no record + * with the specified key exists, ::WT_NOTFOUND is returned. */ int __F(remove)(WT_CURSOR *cursor); /*! @} */ @@ -3065,27 +3073,27 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp); * transaction is in progress, it should be rolled back and the operation * retried in a new transaction. */ -#define WT_ROLLBACK -31800 +#define WT_ROLLBACK (-31800) /*! * Attempt to insert an existing key. * This error is generated when the application attempts to insert a record with * the same key as an existing record without the 'overwrite' configuration to * WT_SESSION::open_cursor. */ -#define WT_DUPLICATE_KEY -31801 +#define WT_DUPLICATE_KEY (-31801) /*! * Non-specific WiredTiger error. * This error is returned when an error is not covered by a specific error * return. */ -#define WT_ERROR -31802 +#define WT_ERROR (-31802) /*! * Item not found. * This error indicates an operation did not find a value to return. This * includes cursor search and other operations where no record matched the * cursor's search key such as WT_CURSOR::update or WT_CURSOR::remove. */ -#define WT_NOTFOUND -31803 +#define WT_NOTFOUND (-31803) /*! * WiredTiger library panic. * This error indicates an underlying problem that requires the application exit @@ -3093,17 +3101,17 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp); * returned from a WiredTiger interface, no further WiredTiger calls are * required. */ -#define WT_PANIC -31804 +#define WT_PANIC (-31804) /*! @cond internal */ /*! Restart the operation (internal). */ -#define WT_RESTART -31805 +#define WT_RESTART (-31805) /*! @endcond */ /*! * Recovery must be run to continue. * This error is generated when wiredtiger_open is configured to return an error * if recovery is required to use the database. */ -#define WT_RUN_RECOVERY -31806 +#define WT_RUN_RECOVERY (-31806) /*! * Operation would overflow cache. * This error is only generated when wiredtiger_open is configured to run in- @@ -3112,7 +3120,7 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp); * progress, it should be rolled back and the operation retried in a new * transaction. */ -#define WT_CACHE_FULL -31807 +#define WT_CACHE_FULL (-31807) /* * Error return section: END * DO NOT EDIT: automatically built by dist/api_err.py. diff --git a/src/log/log.c b/src/log/log.c index 3477ca52502..05234619d32 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -309,14 +309,11 @@ void __wt_log_written_reset(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; - WT_LOG *log; conn = S2C(session); - if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) - return; - log = conn->log; - log->log_written = 0; - return; + + if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) + conn->log->log_written = 0; } /* @@ -1775,9 +1772,8 @@ advance: if (eol) /* Found a hole. This LSN is the end. */ break; - else - /* Last record in log. Look for more. */ - goto advance; + /* Last record in log. Look for more. */ + goto advance; } rdup_len = __wt_rduppo2(reclen, allocsize); if (reclen > allocsize) { diff --git a/src/log/log_slot.c b/src/log/log_slot.c index b4655ff6c1a..c685aec3ffc 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -574,7 +574,6 @@ __wt_log_slot_release(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, int64_t size) wt_off_t cur_offset, my_start; int64_t my_size, rel_size; - WT_UNUSED(session); slot = myslot->slot; my_start = slot->slot_start_offset + myslot->offset; /* diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 60afbc99ade..3f0b6df8eb0 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -10,10 +10,10 @@ #define WT_FORALL_CURSORS(clsm, c, i) \ for ((i) = (clsm)->nchunks; (i) > 0;) \ - if (((c) = (clsm)->chunks[--i]->cursor) != NULL) + if (((c) = (clsm)->chunks[--(i)]->cursor) != NULL) #define WT_LSM_CURCMP(s, lsm_tree, c1, c2, cmp) \ - __wt_compare(s, (lsm_tree)->collator, &(c1)->key, &(c2)->key, &cmp) + __wt_compare(s, (lsm_tree)->collator, &(c1)->key, &(c2)->key, &(cmp)) static int __clsm_lookup(WT_CURSOR_LSM *, WT_ITEM *); static int __clsm_open_cursors(WT_CURSOR_LSM *, bool, u_int, uint32_t); @@ -688,19 +688,29 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) { if (chunk != NULL && !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && chunk->switch_txn == WT_TXN_NONE) { - clsm->primary_chunk = chunk; primary = clsm->chunks[clsm->nchunks - 1]->cursor; + btree = ((WT_CURSOR_BTREE *)primary)->btree; + /* - * Disable eviction for the in-memory chunk. Also clear the - * bulk load flag here, otherwise eviction will be enabled by - * the first update. + * If the primary is not yet set as the primary, do that now. + * Note that eviction was configured off when the underlying + * object was created, which is what we want, leave it alone. + * + * We don't have to worry about races here: every thread that + * modifies the tree will have to come through here, at worse + * we set the flag repeatedly. We don't use a WT_BTREE handle + * flag, however, we could race doing the read-modify-write of + * the flags field. + * + * If something caused the chunk to be closed and reopened + * since it was created, we can no longer use it as a primary + * chunk and we need to force a switch. We detect the tree was + * created when it was opened by checking the "original" flag. */ - btree = ((WT_CURSOR_BTREE *)(primary))->btree; - if (btree->bulk_load_ok) { - btree->bulk_load_ok = false; - WT_WITH_BTREE(session, btree, - __wt_btree_lsm_switch_primary(session, true)); - } + if (!btree->lsm_primary && btree->original) + btree->lsm_primary = true; + if (btree->lsm_primary) + clsm->primary_chunk = chunk; } clsm->dsk_gen = lsm_tree->dsk_gen; @@ -1213,7 +1223,8 @@ __clsm_lookup(WT_CURSOR_LSM *clsm, WT_ITEM *value) WT_LSM_TREE_STAT_INCR( session, clsm->lsm_tree->bloom_miss); continue; - } else if (ret == 0) + } + if (ret == 0) WT_LSM_TREE_STAT_INCR( session, clsm->lsm_tree->bloom_hit); WT_ERR(ret); @@ -1239,10 +1250,10 @@ __clsm_lookup(WT_CURSOR_LSM *clsm, WT_ITEM *value) WT_ERR(WT_NOTFOUND); done: -err: F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - if (ret == 0) { - clsm->current = c; +err: if (ret == 0) { + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); F_SET(cursor, WT_CURSTD_KEY_INT); + clsm->current = c; if (value == &cursor->value) F_SET(cursor, WT_CURSTD_VALUE_INT); } else if (c != NULL) @@ -1318,7 +1329,8 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp) if ((ret = c->search_near(c, &cmp)) == WT_NOTFOUND) { ret = 0; continue; - } else if (ret != 0) + } + if (ret != 0) goto err; /* Do we have an exact match? */ @@ -1338,7 +1350,8 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp) if ((ret = c->next(c)) == WT_NOTFOUND) { ret = 0; continue; - } else if (ret != 0) + } + if (ret != 0) goto err; } @@ -1564,12 +1577,23 @@ __clsm_update(WT_CURSOR *cursor) WT_CURSOR_NEEDVALUE(cursor); WT_ERR(__clsm_enter(clsm, false, true)); - if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) || - (ret = __clsm_lookup(clsm, &value)) == 0) { - WT_ERR(__clsm_deleted_encode( - session, &cursor->value, &value, &buf)); - ret = __clsm_put(session, clsm, &cursor->key, &value, true); - } + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) + WT_ERR(__clsm_lookup(clsm, &value)); + WT_ERR(__clsm_deleted_encode(session, &cursor->value, &value, &buf)); + WT_ERR(__clsm_put(session, clsm, &cursor->key, &value, true)); + + /* + * Set the cursor to reference the internal key/value of the positioned + * cursor. + */ + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + WT_ITEM_SET(cursor->key, clsm->current->key); + WT_ITEM_SET(cursor->value, clsm->current->value); + WT_ASSERT(session, + F_MASK(clsm->current, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); + WT_ASSERT(session, + F_MASK(clsm->current, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); err: __wt_scr_free(session, &buf); __clsm_leave(clsm); @@ -1588,9 +1612,13 @@ __clsm_remove(WT_CURSOR *cursor) WT_DECL_RET; WT_ITEM value; WT_SESSION_IMPL *session; + bool positioned; clsm = (WT_CURSOR_LSM *)cursor; + /* Check if the cursor is positioned. */ + positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT); + CURSOR_REMOVE_API_CALL(cursor, session, NULL); WT_CURSOR_NEEDKEY(cursor); WT_CURSOR_NOVALUE(cursor); @@ -1599,9 +1627,22 @@ __clsm_remove(WT_CURSOR *cursor) if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) || (ret = __clsm_lookup(clsm, &value)) == 0) ret = __clsm_put( - session, clsm, &cursor->key, &__tombstone, true); + session, clsm, &cursor->key, &__tombstone, positioned); err: __clsm_leave(clsm); + + /* + * If the cursor was positioned, it stays positioned with a key but no + * no value, otherwise, there's no position, key or value. This isn't + * just cosmetic, without a reset, iteration on this cursor won't start + * at the beginning/end of the table. + */ + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (positioned) + F_SET(cursor, WT_CURSTD_KEY_INT); + else + WT_TRET(cursor->reset(cursor)); + CURSOR_UPDATE_API_END(session, ret); return (ret); } diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index ceb5f03a2f5..a06b736bf0a 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -187,7 +187,7 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, continue; if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) || chunk->generation > 0) break; - else if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && + if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) break; } diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 4349acf7b55..e6a29666094 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -230,7 +230,7 @@ __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (__wt_atomic_cas32(&chunk->bloom_busy, 0, 1)) { if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) { ret = __lsm_bloom_create( - session, lsm_tree, chunk, (u_int)i); + session, lsm_tree, chunk, i); /* * Record if we were successful so that we can * later push a merge work unit. @@ -265,9 +265,9 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, { WT_DECL_RET; WT_TXN_ISOLATION saved_isolation; - bool flush_set; + bool flush_set, release_btree; - flush_set = false; + flush_set = release_btree = false; /* * If the chunk is already checkpointed, make sure it is also evicted. @@ -318,20 +318,18 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, * We can wait here for checkpoints and fsyncs to complete, which can * take a long time. */ - if ((ret = __wt_session_get_btree( - session, chunk->uri, NULL, NULL, 0)) == 0) { - /* - * Set read-uncommitted: we have already checked that all of the - * updates in this chunk are globally visible, use the cheapest - * possible check in reconciliation. - */ - saved_isolation = session->txn.isolation; - session->txn.isolation = WT_ISO_READ_UNCOMMITTED; - ret = __wt_cache_op(session, WT_SYNC_WRITE_LEAVES); - session->txn.isolation = saved_isolation; - WT_TRET(__wt_session_release_btree(session)); - } - WT_ERR(ret); + WT_ERR(__wt_session_get_btree(session, chunk->uri, NULL, NULL, 0)); + release_btree = true; + + /* + * Set read-uncommitted: we have already checked that all of the updates + * in this chunk are globally visible, use the cheapest possible check + * in reconciliation. + */ + saved_isolation = session->txn.isolation; + session->txn.isolation = WT_ISO_READ_UNCOMMITTED; + WT_ERR(__wt_cache_op(session, WT_SYNC_WRITE_LEAVES)); + session->txn.isolation = saved_isolation; __wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointing %s", chunk->uri); @@ -348,12 +346,14 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_ERR(__wt_meta_track_on(session)); WT_WITH_CHECKPOINT_LOCK(session, WT_WITH_SCHEMA_LOCK(session, - ret = __wt_schema_worker( - session, chunk->uri, __wt_checkpoint, NULL, NULL, 0))); + ret = __wt_checkpoint(session, NULL))); WT_TRET(__wt_meta_track_off(session, false, ret != 0)); if (ret != 0) WT_ERR_MSG(session, ret, "LSM checkpoint"); + release_btree = false; + WT_ERR(__wt_session_release_btree(session)); + /* Now the file is written, get the chunk size. */ WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk)); @@ -376,16 +376,6 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_PUBLISH(chunk->flushing, 0); flush_set = false; - /* - * Clear the no-eviction flag so the primary can be evicted and - * eventually closed. Only do this once the checkpoint has succeeded: - * otherwise, accessing the leaf page during the checkpoint can trigger - * forced eviction. - */ - WT_ERR(__wt_session_get_btree(session, chunk->uri, NULL, NULL, 0)); - __wt_btree_lsm_switch_primary(session, false); - WT_ERR(__wt_session_release_btree(session)); - /* Make sure we aren't pinning a transaction ID. */ __wt_txn_release_snapshot(session); @@ -402,6 +392,8 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, err: if (flush_set) WT_PUBLISH(chunk->flushing, 0); + if (release_btree) + WT_TRET(__wt_session_release_btree(session)); return (ret); } @@ -518,7 +510,7 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri) * This will fail with EBUSY if the file is still in use. */ WT_WITH_HANDLE_LIST_WRITE_LOCK(session, - ret = __lsm_discard_handle(session, uri, WT_CHECKPOINT)); + ret = __lsm_discard_handle(session, uri, WT_CHECKPOINT)); WT_RET(ret); /* @@ -610,7 +602,8 @@ __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (drop_ret == EBUSY) { ++skipped; continue; - } else if (drop_ret != ENOENT) + } + if (drop_ret != ENOENT) WT_ERR(drop_ret); flush_metadata = true; @@ -621,7 +614,8 @@ __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (drop_ret == EBUSY) { ++skipped; continue; - } else if (drop_ret != ENOENT) + } + if (drop_ret != ENOENT) WT_ERR(drop_ret); flush_metadata = true; } diff --git a/src/meta/meta_apply.c b/src/meta/meta_apply.c index fb483c21dd9..dc93180a5e5 100644 --- a/src/meta/meta_apply.c +++ b/src/meta/meta_apply.c @@ -45,11 +45,7 @@ __meta_btree_apply(WT_SESSION_IMPL *session, WT_CURSOR *cursor, session, uri, NULL, NULL, 0)) != 0) return (ret == EBUSY ? 0 : ret); WT_SAVE_DHANDLE(session, ret = file_func(session, cfg)); - if (WT_META_TRACKING(session)) - WT_TRET(__wt_meta_track_handle_lock( - session, false)); - else - WT_TRET(__wt_session_release_btree(session)); + WT_TRET(__wt_session_release_btree(session)); WT_RET(ret); } WT_RET_NOTFOUND_OK(ret); diff --git a/src/meta/meta_ckpt.c b/src/meta/meta_ckpt.c index b985104c2eb..151bbe0e081 100644 --- a/src/meta/meta_ckpt.c +++ b/src/meta/meta_ckpt.c @@ -297,7 +297,7 @@ __wt_meta_ckptlist_get( *ckptbasep = ckptbase; if (0) { -err: __wt_meta_ckptlist_free(session, ckptbase); +err: __wt_meta_ckptlist_free(session, &ckptbase); } __wt_free(session, config); __wt_scr_free(session, &buf); @@ -463,16 +463,16 @@ err: __wt_scr_free(session, &buf); * Discard the checkpoint array. */ void -__wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT *ckptbase) +__wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep) { - WT_CKPT *ckpt; + WT_CKPT *ckpt, *ckptbase; - if (ckptbase == NULL) + if ((ckptbase = *ckptbasep) == NULL) return; WT_CKPT_FOREACH(ckptbase, ckpt) __wt_meta_checkpoint_free(session, ckpt); - __wt_free(session, ckptbase); + __wt_free(session, *ckptbasep); } /* diff --git a/src/meta/meta_ext.c b/src/meta/meta_ext.c index 50e7568fe77..aa1ea8b974d 100644 --- a/src/meta/meta_ext.c +++ b/src/meta/meta_ext.c @@ -102,5 +102,5 @@ void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase) WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) { - __wt_meta_ckptlist_free((WT_SESSION_IMPL *)session, ckptbase); + __wt_meta_ckptlist_free((WT_SESSION_IMPL *)session, &ckptbase); } diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c index 4f60728b2d2..aca69d0e6a2 100644 --- a/src/meta/meta_table.c +++ b/src/meta/meta_table.c @@ -68,9 +68,6 @@ __wt_metadata_cursor_open( if (F_ISSET(btree, WT_BTREE_NO_LOGGING)) F_CLR(btree, WT_BTREE_NO_LOGGING); - /* The metadata file always uses checkpoint IDs in visibility checks. */ - btree->include_checkpoint_txn = true; - return (0); } diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c index 2f76fff04a5..5cf47ea5763 100644 --- a/src/os_win/os_fs.c +++ b/src/os_win/os_fs.c @@ -87,22 +87,19 @@ __win_fs_rename(WT_FILE_SYSTEM *file_system, WT_ERR(__wt_to_utf16_string(session, to, &to_wide)); /* - * Check if file exists since Windows does not override the file if - * it exists. + * We want an atomic rename, but that's not guaranteed by MoveFileExW + * (or by any MSDN API). Don't set the MOVEFILE_COPY_ALLOWED flag to + * prevent the system from falling back to a copy and delete process. + * Do set the MOVEFILE_WRITE_THROUGH flag so the window is as small + * as possible, just in case. WiredTiger renames are done in a single + * directory and we expect that to be an atomic metadata update on any + * modern filesystem. */ - if (GetFileAttributesW(to_wide->data) != INVALID_FILE_ATTRIBUTES) - if (DeleteFileW(to_wide->data) == FALSE) { - windows_error = __wt_getlasterror(); - __wt_errx(session, - "%s: file-rename: DeleteFileW: %s", - to, __wt_formatmessage(session, windows_error)); - WT_ERR(__wt_map_windows_error(windows_error)); - } - - if (MoveFileW(from_wide->data, to_wide->data) == FALSE) { + if (MoveFileExW(from_wide->data, to_wide->data, + MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH) == FALSE) { windows_error = __wt_getlasterror(); __wt_errx(session, - "%s to %s: file-rename: MoveFileW: %s", + "%s to %s: file-rename: MoveFileExW: %s", from, to, __wt_formatmessage(session, windows_error)); WT_ERR(__wt_map_windows_error(windows_error)); } diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index a667a288187..23f654caa70 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -1395,7 +1395,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, */ #define WT_CHILD_RELEASE(session, hazard, ref) do { \ if (hazard) { \ - hazard = false; \ + (hazard) = false; \ WT_TRET( \ __wt_page_release(session, ref, WT_READ_NO_EVICT)); \ } \ @@ -1737,7 +1737,7 @@ __rec_copy_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_KV *kv) * WT_CELLs are typically small, 1 or 2 bytes -- don't call memcpy, do * the copy in-line. */ - for (p = (uint8_t *)r->first_free, + for (p = r->first_free, t = (uint8_t *)&kv->cell, len = kv->cell_len; len > 0; --len) *p++ = *t++; @@ -2889,7 +2889,7 @@ no_slots: len = WT_PTRDIFF( r->first_free, (uint8_t *)dsk + dsk_dst->mem_size); dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); - (void)memmove(dsk_start, (uint8_t *)r->first_free - len, len); + (void)memmove(dsk_start, r->first_free - len, len); r->entries -= r->raw_entries[result_slots - 1]; r->first_free = dsk_start + len; @@ -3583,11 +3583,12 @@ __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) uint64_t recno; btree = S2BT(session); + /* * Bulk-load is only permitted on newly created files, not any empty * file -- see the checkpoint code for a discussion. */ - if (!btree->bulk_load_ok) + if (!btree->original) WT_RET_MSG(session, EINVAL, "bulk-load is only possible for newly created trees"); @@ -3604,16 +3605,7 @@ __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) r = cbulk->reconcile; r->is_bulk_load = true; - recno = WT_RECNO_OOB; /* -Werror=maybe-uninitialized */ - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: - recno = 1; - break; - case BTREE_ROW: - recno = WT_RECNO_OOB; - break; - } + recno = btree->type == BTREE_ROW ? WT_RECNO_OOB : 1; return (__rec_split_init( session, r, cbulk->leaf, recno, btree->maxleafpage)); diff --git a/src/session/session_api.c b/src/session/session_api.c index d282c5d0c32..3d13287fbe6 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -128,7 +128,7 @@ __session_clear(WT_SESSION_IMPL *session) * * For these reasons, be careful when clearing the session structure. */ - memset(session, 0, WT_SESSION_CLEAR_SIZE(session)); + memset(session, 0, WT_SESSION_CLEAR_SIZE); WT_INIT_LSN(&session->bg_sync_lsn); diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c index ee9bddbfc19..95fb6a6f90e 100644 --- a/src/session/session_dhandle.c +++ b/src/session/session_dhandle.c @@ -270,6 +270,16 @@ __wt_session_release_btree(WT_SESSION_IMPL *session) if (F_ISSET(dhandle, WT_DHANDLE_DISCARD_FORCE)) { ret = __wt_conn_btree_sync_and_close(session, false, true); F_CLR(dhandle, WT_DHANDLE_DISCARD_FORCE); + } else if (F_ISSET(btree, WT_BTREE_BULK)) { + WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) && + !F_ISSET(dhandle, WT_DHANDLE_DISCARD)); + /* + * Acquire the schema lock while completing a bulk load. This + * avoids racing with a checkpoint while it gathers a set + * of handles. + */ + WT_WITH_SCHEMA_LOCK(session, ret = + __wt_conn_btree_sync_and_close(session, false, false)); } else if (F_ISSET(dhandle, WT_DHANDLE_DISCARD) || F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)) { WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE)); @@ -560,7 +570,7 @@ __wt_session_get_btree(WT_SESSION_IMPL *session, int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) { - WT_DATA_HANDLE *dhandle, *saved_dhandle; + WT_DATA_HANDLE *saved_dhandle; WT_DECL_RET; WT_ASSERT(session, WT_META_TRACKING(session)); @@ -568,31 +578,33 @@ __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) /* * Get the checkpoint handle exclusive, so no one else can access it - * while we are creating the new checkpoint. + * while we are creating the new checkpoint. Hold the lock until the + * checkpoint completes. */ WT_ERR(__wt_session_get_btree(session, saved_dhandle->name, checkpoint, NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY)); + if ((ret = __wt_meta_track_handle_lock(session, false)) != 0) { + WT_TRET(__wt_session_release_btree(session)); + goto err; + } /* - * Flush any pages in this checkpoint from the cache (we are about to - * re-write the checkpoint which will mean cached pages no longer have - * valid contents). This is especially noticeable with memory mapped - * files, since changes to the underlying file are visible to the in - * memory pages. + * Get exclusive access to the handle and then flush any pages in this + * checkpoint from the cache (we are about to re-write the checkpoint + * which will mean cached pages no longer have valid contents). This + * is especially noticeable with memory mapped files, since changes to + * the underlying file are visible to the in-memory pages. */ + WT_ERR(__wt_evict_file_exclusive_on(session)); WT_ERR(__wt_cache_op(session, WT_SYNC_DISCARD)); /* * We lock checkpoint handles that we are overwriting, so the handle * must be closed when we release it. */ - dhandle = session->dhandle; - F_SET(dhandle, WT_DHANDLE_DISCARD); - - WT_ERR(__wt_meta_track_handle_lock(session, false)); + F_SET(session->dhandle, WT_DHANDLE_DISCARD); - /* Restore the original btree in the session. */ + /* Restore the original data handle in the session. */ err: session->dhandle = saved_dhandle; - return (ret); } diff --git a/src/session/session_salvage.c b/src/session/session_salvage.c index 983b28dd8ea..12ce71cdbb0 100644 --- a/src/session/session_salvage.c +++ b/src/session/session_salvage.c @@ -54,6 +54,6 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_meta_ckptlist_set( session, dhandle->name, ckptbase, NULL)); -err: __wt_meta_ckptlist_free(session, ckptbase); +err: __wt_meta_ckptlist_free(session, &ckptbase); return (ret); } diff --git a/src/support/crypto.c b/src/support/crypto.c index ab94ec2c829..cce0d228832 100644 --- a/src/support/crypto.c +++ b/src/support/crypto.c @@ -133,5 +133,4 @@ __wt_encrypt_size(WT_SESSION_IMPL *session, return; *sizep = incoming_size + kencryptor->size_const + WT_ENCRYPT_LEN_SIZE; - return; } diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 3261c8089f4..6c97922f7e1 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -8,9 +8,9 @@ #include "wt_internal.h" -static int __checkpoint_lock_tree( - WT_SESSION_IMPL *, bool, bool, const char *[]); -static int __checkpoint_mark_deletes(WT_SESSION_IMPL *, const char *[]); +static int __checkpoint_lock_dirty_tree( + WT_SESSION_IMPL *, bool, bool, bool, const char *[]); +static int __checkpoint_mark_skip(WT_SESSION_IMPL *, WT_CKPT *, bool); static int __checkpoint_presync(WT_SESSION_IMPL *, const char *[]); static int __checkpoint_tree_helper(WT_SESSION_IMPL *, const char *[]); @@ -90,6 +90,33 @@ err: WT_TRET(__wt_metadata_cursor_release(session, &cursor)); } /* + * __checkpoint_update_generation -- + * Update the checkpoint generation of the current tree. + * + * This indicates that the tree will not be visited again by the current + * checkpoint. + */ +static void +__checkpoint_update_generation(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + + btree = S2BT(session); + + /* + * Updates to the metadata are made by the checkpoint transaction, so + * the metadata tree's checkpoint generation should never be updated. + */ + if (WT_IS_METADATA(session->dhandle)) + return; + + WT_PUBLISH(btree->checkpoint_gen, + S2C(session)->txn_global.checkpoint_gen); + WT_STAT_DATA_SET(session, + btree_checkpoint_generation, btree->checkpoint_gen); +} + +/* * __checkpoint_apply_all -- * Apply an operation to all files involved in a checkpoint. */ @@ -239,22 +266,82 @@ int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) { WT_BTREE *btree; + WT_CONFIG_ITEM cval; WT_DECL_RET; const char *name; + bool force; + + btree = S2BT(session); + + /* Find out if we have to force a checkpoint. */ + WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval)); + force = cval.val != 0; + if (!force) { + WT_RET(__wt_config_gets_def(session, cfg, "name", 0, &cval)); + force = cval.len != 0; + } /* Should not be called with anything other than a file object. */ WT_ASSERT(session, session->dhandle->checkpoint == NULL); WT_ASSERT(session, WT_PREFIX_MATCH(session->dhandle->name, "file:")); /* Skip files that are never involved in a checkpoint. */ - if (F_ISSET(S2BT(session), WT_BTREE_NO_CHECKPOINT)) + if (F_ISSET(btree, WT_BTREE_NO_CHECKPOINT)) + return (0); + +#ifdef HAVE_DIAGNOSTIC + /* + * We may have raced between starting the checkpoint transaction and + * some operation completing on the handle that updated the metadata + * (e.g., closing a bulk load cursor). All such operations either have + * exclusive access to the handle or hold the schema lock. We are now + * holding the schema lock and have an open btree handle, so if we + * can't update the metadata, then there has been some state change + * invisible to the checkpoint transaction. + */ + if (!WT_IS_METADATA(session->dhandle)) { + WT_CURSOR *meta_cursor; + bool metadata_race; + + WT_ASSERT(session, !F_ISSET(&session->txn, WT_TXN_ERROR)); + WT_RET(__wt_metadata_cursor(session, &meta_cursor)); + meta_cursor->set_key(meta_cursor, session->dhandle->name); + ret = __wt_curfile_update_check(meta_cursor); + if (ret == WT_ROLLBACK) { + metadata_race = true; + ret = 0; + } else + metadata_race = false; + WT_TRET(__wt_metadata_cursor_release(session, &meta_cursor)); + WT_RET(ret); + WT_ASSERT(session, !metadata_race); + } +#endif + + /* + * Decide whether the tree needs to be included in the checkpoint and + * if so, acquire the necessary locks. + */ + WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree( + session, true, force, true, cfg)); + WT_RET(ret); + if (F_ISSET(btree, WT_BTREE_SKIP_CKPT)) { + WT_ASSERT(session, btree->ckpt == NULL); + __checkpoint_update_generation(session); return (0); + } - /* Make sure there is space for the next entry. */ + /* + * Make sure there is space for the new entry: do this before getting + * the handle to avoid cleanup if we can't allocate the memory. + */ WT_RET(__wt_realloc_def(session, &session->ckpt_handle_allocated, session->ckpt_handle_next + 1, &session->ckpt_handle)); - /* Not strictly necessary, but cleaner to clear the current handle. */ + /* + * The current tree will be included: get it again because the handle + * we have is only valid for the duration of this function. + */ name = session->dhandle->name; session->dhandle = NULL; @@ -266,49 +353,13 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) * with eviction and we don't want to unfairly penalize (or promote) * eviction in trees due to checkpoints. */ - btree = S2BT(session); btree->evict_walk_saved = btree->evict_walk_period; - WT_SAVE_DHANDLE(session, - ret = __checkpoint_lock_tree(session, true, true, cfg)); - if (ret != 0) { - WT_TRET(__wt_session_release_btree(session)); - return (ret); - } - - /* - * Flag that the handle is part of a checkpoint for the purposes - * of transaction visibility checks. - */ - WT_PUBLISH(btree->include_checkpoint_txn, true); - session->ckpt_handle[session->ckpt_handle_next++] = session->dhandle; return (0); } /* - * __checkpoint_update_generation -- - * Update the checkpoint generation of the current tree. - * - * This indicates that the tree will not be visited again by the current - * checkpoint. - */ -static void -__checkpoint_update_generation(WT_SESSION_IMPL *session) -{ - WT_BTREE *btree; - - btree = S2BT(session); - if (!WT_IS_METADATA(session->dhandle)) - WT_PUBLISH(btree->include_checkpoint_txn, false); - - WT_PUBLISH(btree->checkpoint_gen, - S2C(session)->txn_global.checkpoint_gen); - WT_STAT_DATA_SET(session, - btree_checkpoint_generation, btree->checkpoint_gen); -} - -/* * __checkpoint_reduce_dirty_cache -- * Release clean trees from the list cached for checkpoints. */ @@ -371,7 +422,6 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session) __wt_sleep(0, stepdown_us / 10); __wt_epoch(session, &stop); current_us = WT_TIMEDIFF_US(stop, last); - total_ms = WT_TIMEDIFF_MS(stop, start); bytes_written_total = cache->bytes_written - bytes_written_start; @@ -434,36 +484,6 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session) } /* - * __checkpoint_release_clean_trees -- - * Release clean trees from the list cached for checkpoints. - */ -static int -__checkpoint_release_clean_trees(WT_SESSION_IMPL *session) -{ - WT_BTREE *btree; - WT_DATA_HANDLE *dhandle; - WT_DECL_RET; - u_int i; - - for (i = 0; i < session->ckpt_handle_next; i++) { - dhandle = session->ckpt_handle[i]; - btree = dhandle->handle; - if (!F_ISSET(btree, WT_BTREE_SKIP_CKPT)) - continue; - __wt_meta_ckptlist_free(session, btree->ckpt); - btree->ckpt = NULL; - WT_WITH_DHANDLE(session, dhandle, - __checkpoint_update_generation(session)); - session->ckpt_handle[i] = NULL; - WT_WITH_DHANDLE(session, dhandle, - ret = __wt_session_release_btree(session)); - WT_RET(ret); - } - - return (0); -} - -/* * __checkpoint_stats -- * Update checkpoint timer stats. */ @@ -531,8 +551,103 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session, static void __checkpoint_fail_reset(WT_SESSION_IMPL *session) { - S2BT(session)->modified = true; - S2BT(session)->ckpt = NULL; + WT_BTREE *btree; + + btree = S2BT(session); + btree->modified = true; + __wt_meta_ckptlist_free(session, &btree->ckpt); +} + +/* + * __checkpoint_prepare -- + * Start the transaction for a checkpoint and gather handles. + */ +static int +__checkpoint_prepare(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_TXN *txn; + WT_TXN_GLOBAL *txn_global; + WT_TXN_STATE *txn_state; + const char *txn_cfg[] = { WT_CONFIG_BASE(session, + WT_SESSION_begin_transaction), "isolation=snapshot", NULL }; + + conn = S2C(session); + txn = &session->txn; + txn_global = &conn->txn_global; + txn_state = WT_SESSION_TXN_STATE(session); + + /* + * Start a snapshot transaction for the checkpoint. + * + * Note: we don't go through the public API calls because they have + * side effects on cursors, which applications can hold open across + * calls to checkpoint. + */ + WT_RET(__wt_txn_begin(session, txn_cfg)); + + WT_DIAGNOSTIC_YIELD; + + /* Ensure a transaction ID is allocated prior to sharing it globally */ + WT_RET(__wt_txn_id_check(session)); + + /* + * Mark the connection as clean. If some data gets modified after + * generating checkpoint transaction id, connection will be reset to + * dirty when reconciliation marks the btree dirty on encountering the + * dirty page. + */ + conn->modified = false; + + /* + * Save the checkpoint session ID. + * + * We never do checkpoints in the default session (with id zero). + */ + WT_ASSERT(session, session->id != 0 && txn_global->checkpoint_id == 0); + txn_global->checkpoint_id = session->id; + + /* + * Remove the checkpoint transaction from the global table. + * + * This allows ordinary visibility checks to move forward because + * checkpoints often take a long time and only write to the metadata. + */ + __wt_writelock(session, &txn_global->scan_rwlock); + txn_global->checkpoint_txnid = txn->id; + txn_global->checkpoint_pinned = WT_MIN(txn->id, txn->snap_min); + + /* + * Sanity check that the oldest ID hasn't moved on before we have + * cleared our entry. + */ + WT_ASSERT(session, + WT_TXNID_LE(txn_global->oldest_id, txn_state->id) && + WT_TXNID_LE(txn_global->oldest_id, txn_state->pinned_id)); + + /* + * Clear our entry from the global transaction session table. Any + * operation that needs to know about the ID for this checkpoint will + * consider the checkpoint ID in the global structure. Most operations + * can safely ignore the checkpoint ID (see the visible all check for + * details). + */ + txn_state->id = txn_state->pinned_id = + txn_state->metadata_pinned = WT_TXN_NONE; + __wt_writeunlock(session, &txn_global->scan_rwlock); + + /* + * Get a list of handles we want to flush; for named checkpoints this + * may pull closed objects into the session cache. + * + * First, gather all handles, then start the checkpoint transaction, + * then release any clean handles. + */ + WT_ASSERT(session, session->ckpt_handle_next == 0); + WT_WITH_TABLE_READ_LOCK(session, ret = __checkpoint_apply_all( + session, cfg, __wt_checkpoint_get_handles, NULL)); + return (ret); } /* @@ -550,19 +665,15 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_ISOLATION saved_isolation; - WT_TXN_STATE *txn_state; void *saved_meta_next; u_int i; uint64_t fsync_duration_usecs; bool failed, full, idle, logging, tracking; - const char *txn_cfg[] = { WT_CONFIG_BASE(session, - WT_SESSION_begin_transaction), "isolation=snapshot", NULL }; conn = S2C(session); cache = conn->cache; txn = &session->txn; txn_global = &conn->txn_global; - txn_state = WT_SESSION_TXN_STATE(session); saved_isolation = session->isolation; full = idle = logging = tracking = false; @@ -631,86 +742,24 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) tracking = true; /* - * Get a list of handles we want to flush; for named checkpoints this - * may pull closed objects into the session cache. - * * We want to skip checkpointing clean handles whenever possible. That * is, when the checkpoint is not named or forced. However, we need to * take care about ordering with respect to the checkpoint transaction. * - * If we skip clean handles before starting the transaction, the + * We can't skip clean handles before starting the transaction or the * checkpoint can miss updates in trees that become dirty as the * checkpoint is starting. If we wait until the transaction has * started before locking a handle, there could be a metadata-changing * operation in between (e.g., salvage) that will cause a write * conflict when the checkpoint goes to write the metadata. * - * First, gather all handles, then start the checkpoint transaction, - * then release any clean handles. + * Hold the schema lock while starting the transaction and gathering + * handles so the set we get is complete and correct. */ - WT_ASSERT(session, session->ckpt_handle_next == 0); - WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_READ_LOCK(session, - ret = __checkpoint_apply_all( - session, cfg, __wt_checkpoint_get_handles, NULL))); + WT_WITH_SCHEMA_LOCK(session, ret = __checkpoint_prepare(session, cfg)); WT_ERR(ret); - /* - * Start a snapshot transaction for the checkpoint. - * - * Note: we don't go through the public API calls because they have - * side effects on cursors, which applications can hold open across - * calls to checkpoint. - */ - WT_ERR(__wt_txn_begin(session, txn_cfg)); - - /* Ensure a transaction ID is allocated prior to sharing it globally */ - WT_ERR(__wt_txn_id_check(session)); - - /* - * Mark the connection as clean. If some data gets modified after - * generating checkpoint transaction id, connection will be reset to - * dirty when reconciliation marks the btree dirty on encountering the - * dirty page. - */ - conn->modified = false; - - /* - * Save the checkpoint session ID. - * - * We never do checkpoints in the default session (with id zero). - */ - WT_ASSERT(session, session->id != 0 && txn_global->checkpoint_id == 0); - txn_global->checkpoint_id = session->id; - - /* - * Remove the checkpoint transaction from the global table. - * - * This allows ordinary visibility checks to move forward because - * checkpoints often take a long time and only write to the metadata. - */ - __wt_writelock(session, &txn_global->scan_rwlock); - txn_global->checkpoint_txnid = txn->id; - txn_global->checkpoint_pinned = WT_MIN(txn->id, txn->snap_min); - - /* - * Sanity check that the oldest ID hasn't moved on before we have - * cleared our entry. - */ - WT_ASSERT(session, - WT_TXNID_LE(txn_global->oldest_id, txn_state->id) && - WT_TXNID_LE(txn_global->oldest_id, txn_state->pinned_id)); - - /* - * Clear our entry from the global transaction session table. Any - * operation that needs to know about the ID for this checkpoint will - * consider the checkpoint ID in the global structure. Most operations - * can safely ignore the checkpoint ID (see the visible all check for - * details). - */ - txn_state->id = txn_state->pinned_id = - txn_state->metadata_pinned = WT_TXN_NONE; - __wt_writeunlock(session, &txn_global->scan_rwlock); + WT_ASSERT(session, txn->isolation == WT_ISO_SNAPSHOT); /* * Unblock updates -- we can figure out that any updates to clean pages @@ -719,16 +768,6 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) cache->eviction_scrub_limit = 0.0; WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0); - /* - * Mark old checkpoints that are being deleted and figure out which - * trees we can skip in this checkpoint. - * - * Release clean trees. Any updates made after this point will not - * visible to the checkpoint transaction. - */ - WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_mark_deletes)); - WT_ERR(__checkpoint_release_clean_trees(session)); - /* Tell logging that we have started a database checkpoint. */ if (full && logging) WT_ERR(__wt_txn_checkpoint_log( @@ -1065,12 +1104,13 @@ __drop_to(WT_CKPT *ckptbase, const char *name, size_t len) } /* - * __checkpoint_lock_tree -- - * Acquire the locks required to checkpoint a tree. + * __checkpoint_lock_dirty_tree -- + * Decide whether the tree needs to be included in the checkpoint and if + * so, acquire the necessary locks. */ static int -__checkpoint_lock_tree(WT_SESSION_IMPL *session, - bool is_checkpoint, bool need_tracking, const char *cfg[]) +__checkpoint_lock_dirty_tree(WT_SESSION_IMPL *session, + bool is_checkpoint, bool force, bool need_tracking, const char *cfg[]) { WT_BTREE *btree; WT_CKPT *ckpt, *ckptbase; @@ -1195,6 +1235,14 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session, } /* + * Mark old checkpoints that are being deleted and figure out which + * trees we can skip in this checkpoint. + */ + WT_ERR(__checkpoint_mark_skip(session, ckptbase, force)); + if (F_ISSET(btree, WT_BTREE_SKIP_CKPT)) + goto err; + + /* * Lock the checkpoints that will be deleted. * * Checkpoints are only locked when tracking is enabled, which covers @@ -1227,33 +1275,20 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session, } /* - * There are special files: those being bulk-loaded, salvaged, upgraded - * or verified during the checkpoint. We have to do something for those - * objects because a checkpoint is an external name the application can - * reference and the name must exist no matter what's happening during - * the checkpoint. For bulk-loaded files, we could block until the load - * completes, checkpoint the partial load, or magic up an empty-file - * checkpoint. The first is too slow, the second is insane, so do the - * third. - * Salvage, upgrade and verify don't currently require any work, all - * three hold the schema lock, blocking checkpoints. If we ever want to - * fix that (and I bet we eventually will, at least for verify), we can - * copy the last checkpoint the file has. That works if we guarantee - * salvage, upgrade and verify act on objects with previous checkpoints - * (true if handles are closed/re-opened between object creation and a - * subsequent salvage, upgrade or verify operation). Presumably, - * salvage and upgrade will discard all previous checkpoints when they - * complete, which is fine with us. This change will require reference - * counting checkpoints, and once that's done, we should use checkpoint - * copy instead of forcing checkpoints on clean objects to associate - * names with checkpoints. + * There are special tree: those being bulk-loaded, salvaged, upgraded + * or verified during the checkpoint. They should never be part of a + * checkpoint: we will fail to lock them because the operations have + * exclusive access to the handles. Named checkpoints will fail in that + * case, ordinary checkpoints will skip files that cannot be opened + * normally. */ WT_ASSERT(session, !is_checkpoint || !F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)); __wt_readunlock(session, &conn->hot_backup_lock); - WT_ASSERT(session, btree->ckpt == NULL); + WT_ASSERT(session, btree->ckpt == NULL && + !F_ISSET(btree, WT_BTREE_SKIP_CKPT)); btree->ckpt = ckptbase; return (0); @@ -1261,30 +1296,26 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session, err: if (hot_backup_locked) __wt_readunlock(session, &conn->hot_backup_lock); - __wt_meta_ckptlist_free(session, ckptbase); + __wt_meta_ckptlist_free(session, &ckptbase); __wt_free(session, name_alloc); return (ret); } /* - * __checkpoint_mark_deletes -- - * Figure out what old checkpoints will be deleted, and whether the - * checkpoint can be skipped entirely. + * __checkpoint_mark_skip -- + * Figure out whether the checkpoint can be skipped for a tree. */ static int -__checkpoint_mark_deletes( - WT_SESSION_IMPL *session, const char *cfg[]) +__checkpoint_mark_skip( + WT_SESSION_IMPL *session, WT_CKPT *ckptbase, bool force) { WT_BTREE *btree; - WT_CKPT *ckpt, *ckptbase; - WT_CONFIG_ITEM cval; + WT_CKPT *ckpt; const char *name; int deleted; - bool force; btree = S2BT(session); - ckptbase = btree->ckpt; /* * Check for clean objects not requiring a checkpoint. @@ -1310,12 +1341,7 @@ __checkpoint_mark_deletes( * to open the checkpoint in a cursor after taking any checkpoint, which * means it must exist. */ - force = false; F_CLR(btree, WT_BTREE_SKIP_CKPT); - if (!btree->modified && cfg != NULL) { - WT_RET(__wt_config_gets(session, cfg, "force", &cval)); - force = cval.val != 0; - } if (!btree->modified && !force) { deleted = 0; WT_CKPT_FOREACH(ckptbase, ckpt) @@ -1393,7 +1419,7 @@ __checkpoint_tree( * delete a physical checkpoint, and that will end in tears. */ if (is_checkpoint) - if (btree->bulk_load_ok) { + if (btree->original) { fake_ckpt = true; goto fake; } @@ -1504,8 +1530,7 @@ err: /* S2C(session)->modified = true; } - __wt_meta_ckptlist_free(session, ckptbase); - btree->ckpt = NULL; + __wt_meta_ckptlist_free(session, &btree->ckpt); return (ret); } @@ -1524,7 +1549,8 @@ __checkpoint_presync(WT_SESSION_IMPL *session, const char *cfg[]) WT_UNUSED(cfg); btree = S2BT(session); - WT_ASSERT(session, !btree->include_checkpoint_txn); + WT_ASSERT(session, btree->checkpoint_gen == + S2C(session)->txn_global.checkpoint_gen); btree->evict_walk_period = btree->evict_walk_saved; return (0); } @@ -1582,12 +1608,11 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ASSERT(session, !WT_IS_METADATA(session->dhandle) || F_ISSET(session, WT_SESSION_LOCKED_METADATA)); - WT_SAVE_DHANDLE(session, - ret = __checkpoint_lock_tree(session, true, true, cfg)); - WT_RET(ret); - WT_SAVE_DHANDLE(session, - ret = __checkpoint_mark_deletes(session, cfg)); + WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree( + session, true, false, true, cfg)); WT_RET(ret); + if (F_ISSET(S2BT(session), WT_BTREE_SKIP_CKPT)) + return (0); return (__checkpoint_tree(session, true, cfg)); } @@ -1662,15 +1687,10 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) if (need_tracking) WT_RET(__wt_meta_track_on(session)); - WT_SAVE_DHANDLE(session, - ret = __checkpoint_lock_tree(session, false, need_tracking, NULL)); + WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree( + session, false, false, need_tracking, NULL)); WT_ASSERT(session, ret == 0); - if (ret == 0) { - WT_SAVE_DHANDLE(session, - ret = __checkpoint_mark_deletes(session, NULL)); - WT_ASSERT(session, ret == 0); - } - if (ret == 0) + if (ret == 0 && !F_ISSET(btree, WT_BTREE_SKIP_CKPT)) ret = __checkpoint_tree(session, false, NULL); if (need_tracking) diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c index 2d8a77a69e6..30932195b1e 100644 --- a/src/txn/txn_recover.c +++ b/src/txn/txn_recover.c @@ -93,7 +93,7 @@ __recovery_cursor(WT_SESSION_IMPL *session, WT_RECOVERY *r, "%s op %" PRIu32 " to file %" PRIu32 " at LSN %" PRIu32 \ "/%" PRIu32, \ cursor == NULL ? "Skipping" : "Applying", \ - optype, fileid, lsnp->l.file, lsnp->l.offset); \ + optype, fileid, (lsnp)->l.file, (lsnp)->l.offset); \ if (cursor == NULL) \ break diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c index cded40a8b45..947fa7bf9ef 100644 --- a/src/utilities/util_dump.c +++ b/src/utilities/util_dump.c @@ -504,17 +504,18 @@ dump_prefix(WT_SESSION *session, bool hex, bool json) (void)wiredtiger_version(&vmajor, &vminor, &vpatch); + if (json && printf( + " \"%s\" : \"%d (%d.%d.%d)\",\n", + DUMP_JSON_VERSION_MARKER, DUMP_JSON_CURRENT_VERSION, + vmajor, vminor, vpatch) < 0) + return (util_err(session, EIO, NULL)); + if (!json && (printf( "WiredTiger Dump (WiredTiger Version %d.%d.%d)\n", vmajor, vminor, vpatch) < 0 || printf("Format=%s\n", hex ? "hex" : "print") < 0 || printf("Header\n") < 0)) return (util_err(session, EIO, NULL)); - else if (json && printf( - " \"%s\" : \"%d (%d.%d.%d)\",\n", - DUMP_JSON_VERSION_MARKER, DUMP_JSON_CURRENT_VERSION, - vmajor, vminor, vpatch) < 0) - return (util_err(session, EIO, NULL)); return (0); } diff --git a/src/utilities/util_load.c b/src/utilities/util_load.c index ca77643eb49..d31fa4c9d08 100644 --- a/src/utilities/util_load.c +++ b/src/utilities/util_load.c @@ -80,8 +80,8 @@ util_load(WT_SESSION *session, int argc, char *argv[]) if (no_overwrite) flags |= LOAD_JSON_NO_OVERWRITE; return (util_load_json(session, filename, flags)); - } else - return (load_dump(session)); + } + return (load_dump(session)); } /* diff --git a/src/utilities/util_main.c b/src/utilities/util_main.c index 7157f0d90fe..68e3b0f1bc5 100644 --- a/src/utilities/util_main.c +++ b/src/utilities/util_main.c @@ -20,7 +20,43 @@ static const char *command; /* Command name */ #define REC_LOGOFF "log=(enabled=false)" #define REC_RECOVER "log=(recover=on)" -static int usage(void); +static void +usage(void) +{ + fprintf(stderr, + "WiredTiger Data Engine (version %d.%d)\n", + WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR); + fprintf(stderr, + "global options:\n" + "\t" "-C\t" "wiredtiger_open configuration\n" + "\t" "-h\t" "database directory\n" + "\t" "-L\t" "turn logging off for debug-mode\n" + "\t" "-R\t" "run recovery if configured\n" + "\t" "-V\t" "display library version and exit\n" + "\t" "-v\t" "verbose\n"); + fprintf(stderr, + "commands:\n" + "\t" "alter\t alter an object\n" + "\t" "backup\t database backup\n" + "\t" "compact\t compact an object\n" + "\t" "copyright copyright information\n" + "\t" "create\t create an object\n" + "\t" "drop\t drop an object\n" + "\t" "dump\t dump an object\n" + "\t" "list\t list database objects\n" + "\t" "load\t load an object\n" + "\t" "loadtext load an object from a text file\n" + "\t" "printlog display the database log\n" + "\t" "read\t read values from an object\n" + "\t" "rebalance rebalance an object\n" + "\t" "rename\t rename an object\n" + "\t" "salvage\t salvage a file\n" + "\t" "stat\t display statistics for an object\n" + "\t" "truncate truncate an object, removing all content\n" + "\t" "upgrade\t upgrade an object\n" + "\t" "verify\t verify an object\n" + "\t" "write\t write values to an object\n"); +} int main(int argc, char *argv[]) @@ -73,8 +109,9 @@ main(int argc, char *argv[]) cmd_config = __wt_optarg; break; case 'E': /* secret key */ + free(secretkey); /* lint: set more than once */ if ((secretkey = strdup(__wt_optarg)) == NULL) { - ret = util_err(NULL, errno, NULL); + (void)util_err(NULL, errno, NULL); goto err; } memset(__wt_optarg, 0, strlen(__wt_optarg)); @@ -92,24 +129,27 @@ main(int argc, char *argv[]) break; case 'V': /* version */ printf("%s\n", wiredtiger_version(NULL, NULL, NULL)); - return (EXIT_SUCCESS); + goto done; case 'v': /* verbose */ verbose = true; break; case '?': default: - return (usage()); + usage(); + goto err; } if (logoff && recover) { fprintf(stderr, "Only one of -L and -R is allowed.\n"); - return (EXIT_FAILURE); + goto err; } argc -= __wt_optind; argv += __wt_optind; /* The next argument is the command name. */ - if (argc < 1) - return (usage()); + if (argc < 1) { + usage(); + goto err; + } command = argv[0]; /* Reset getopt. */ @@ -130,7 +170,7 @@ main(int argc, char *argv[]) func = util_compact; else if (strcmp(command, "copyright") == 0) { util_copyright(); - return (EXIT_SUCCESS); + goto done; } else if (strcmp(command, "create") == 0) { func = util_create; config = "create"; @@ -194,8 +234,10 @@ main(int argc, char *argv[]) default: break; } - if (func == NULL) - return (usage()); + if (func == NULL) { + usage(); + goto err; + } /* Build the configuration string. */ len = 10; /* some slop */ @@ -212,7 +254,7 @@ main(int argc, char *argv[]) } len += strlen(rec_config); if ((p = malloc(len)) == NULL) { - ret = util_err(NULL, errno, NULL); + (void)util_err(NULL, errno, NULL); goto err; } (void)snprintf(p, len, "%s,%s,%s%s%s%s", @@ -223,19 +265,24 @@ main(int argc, char *argv[]) /* Open the database and a session. */ if ((ret = wiredtiger_open(home, verbose ? verbose_handler : NULL, config, &conn)) != 0) { - ret = util_err(NULL, ret, NULL); + (void)util_err(NULL, ret, NULL); goto err; } if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) { - ret = util_err(NULL, ret, NULL); + (void)util_err(NULL, ret, NULL); goto err; } /* Call the function. */ ret = func(session, argc, argv); + if (0) { +err: ret = 1; + } +done: + /* Close the database. */ -err: if (conn != NULL && (tret = conn->close(conn, NULL)) != 0 && ret == 0) + if (conn != NULL && (tret = conn->close(conn, NULL)) != 0 && ret == 0) ret = tret; free(p); @@ -244,46 +291,6 @@ err: if (conn != NULL && (tret = conn->close(conn, NULL)) != 0 && ret == 0) return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } -static int -usage(void) -{ - fprintf(stderr, - "WiredTiger Data Engine (version %d.%d)\n", - WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR); - fprintf(stderr, - "global options:\n" - "\t" "-C\t" "wiredtiger_open configuration\n" - "\t" "-h\t" "database directory\n" - "\t" "-L\t" "turn logging off for debug-mode\n" - "\t" "-R\t" "run recovery if configured\n" - "\t" "-V\t" "display library version and exit\n" - "\t" "-v\t" "verbose\n"); - fprintf(stderr, - "commands:\n" - "\t" "alter\t alter an object\n" - "\t" "backup\t database backup\n" - "\t" "compact\t compact an object\n" - "\t" "copyright copyright information\n" - "\t" "create\t create an object\n" - "\t" "drop\t drop an object\n" - "\t" "dump\t dump an object\n" - "\t" "list\t list database objects\n" - "\t" "load\t load an object\n" - "\t" "loadtext load an object from a text file\n" - "\t" "printlog display the database log\n" - "\t" "read\t read values from an object\n" - "\t" "rebalance rebalance an object\n" - "\t" "rename\t rename an object\n" - "\t" "salvage\t salvage a file\n" - "\t" "stat\t display statistics for an object\n" - "\t" "truncate truncate an object, removing all content\n" - "\t" "upgrade\t upgrade an object\n" - "\t" "verify\t verify an object\n" - "\t" "write\t write values to an object\n"); - - return (EXIT_FAILURE); -} - /* * util_uri -- * Build a name. @@ -314,7 +321,7 @@ util_uri(WT_SESSION *session, const char *s, const char *type) * the default type for the operation. */ if (strchr(s, ':') != NULL) - strcpy(name, s); + snprintf(name, len, "%s", s); else snprintf(name, len, "%s:%s", type, s); return (name); diff --git a/test/bloom/test_bloom.c b/test/bloom/test_bloom.c index 67249ff887e..bef509e01d8 100644 --- a/test/bloom/test_bloom.c +++ b/test/bloom/test_bloom.c @@ -29,8 +29,6 @@ #include "test_util.h" static struct { - char *progname; /* Program name */ - WT_CONNECTION *wt_conn; /* WT_CONNECTION handle */ WT_SESSION *wt_session; /* WT_SESSION handle */ @@ -61,10 +59,7 @@ main(int argc, char *argv[]) { int ch; - if ((g.progname = strrchr(argv[0], DIR_DELIM)) == NULL) - g.progname = argv[0]; - else - ++g.progname; + (void)testutil_set_progname(argv); /* Set default configuration values. */ g.c_cache = 10; @@ -75,7 +70,7 @@ main(int argc, char *argv[]) g.c_srand = 3233456; /* Set values from the command line. */ - while ((ch = __wt_getopt(g.progname, argc, argv, "c:f:k:o:s:")) != EOF) + while ((ch = __wt_getopt(progname, argc, argv, "c:f:k:o:s:")) != EOF) switch (ch) { case 'c': /* Cache size */ g.c_cache = (u_int)atoi(__wt_optarg); @@ -128,7 +123,7 @@ setup(void) */ snprintf(config, sizeof(config), "create,error_prefix=\"%s\",cache_size=%" PRIu32 "MB,%s", - g.progname, g.c_cache, g.config_open == NULL ? "" : g.config_open); + progname, g.c_cache, g.config_open == NULL ? "" : g.config_open); testutil_check(wiredtiger_open(NULL, NULL, config, &conn)); @@ -246,7 +241,7 @@ populate_entries(void) void usage(void) { - fprintf(stderr, "usage: %s [-cfkos]\n", g.progname); + fprintf(stderr, "usage: %s [-cfkos]\n", progname); fprintf(stderr, "%s", "\t-c cache size\n" "\t-f number of bits per item\n" diff --git a/test/checkpoint/smoke.sh b/test/checkpoint/smoke.sh index 123d4e00df5..39b1f428c2c 100755 --- a/test/checkpoint/smoke.sh +++ b/test/checkpoint/smoke.sh @@ -6,8 +6,8 @@ set -e echo "checkpoint: 3 mixed tables" $TEST_WRAPPER ./t -T 3 -t m -# We are done if short tests are requested -test -z "$TESTUTIL_DISABLE_LONG_TESTS" || exit 0 +# We are done unless long tests are enabled. +test "$TESTUTIL_ENABLE_LONG_TESTS" = "1" || exit 0 echo "checkpoint: 6 column-store tables" $TEST_WRAPPER ./t -T 6 -t c diff --git a/test/checkpoint/test_checkpoint.c b/test/checkpoint/test_checkpoint.c index 4998019ad8e..c7132b433d2 100644 --- a/test/checkpoint/test_checkpoint.c +++ b/test/checkpoint/test_checkpoint.c @@ -50,10 +50,7 @@ main(int argc, char *argv[]) char *working_dir; const char *config_open; - if ((g.progname = strrchr(argv[0], DIR_DELIM)) == NULL) - g.progname = argv[0]; - else - ++g.progname; + (void)testutil_set_progname(argv); config_open = NULL; ret = 0; @@ -68,7 +65,7 @@ main(int argc, char *argv[]) runs = 1; while ((ch = __wt_getopt( - g.progname, argc, argv, "c:C:h:k:l:n:r:t:T:W:")) != EOF) + progname, argc, argv, "c:C:h:k:l:n:r:t:T:W:")) != EOF) switch (ch) { case 'c': g.checkpoint_name = __wt_optarg; @@ -132,7 +129,7 @@ main(int argc, char *argv[]) testutil_work_dir_from_path(g.home, 512, working_dir); - printf("%s: process %" PRIu64 "\n", g.progname, (uint64_t)getpid()); + printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid()); for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) { printf(" %d: %d workers, %d tables\n", cnt, g.nworkers, g.ntables); @@ -204,7 +201,7 @@ wt_connect(const char *config_open) snprintf(config, sizeof(config), "create,statistics=(fast),error_prefix=\"%s\",cache_size=1GB%s%s", - g.progname, + progname, config_open == NULL ? "" : ",", config_open == NULL ? "" : config_open); @@ -297,10 +294,10 @@ log_print_err(const char *m, int e, int fatal) g.running = 0; g.status = e; } - fprintf(stderr, "%s: %s: %s\n", g.progname, m, wiredtiger_strerror(e)); + fprintf(stderr, "%s: %s: %s\n", progname, m, wiredtiger_strerror(e)); if (g.logfp != NULL) fprintf(g.logfp, "%s: %s: %s\n", - g.progname, m, wiredtiger_strerror(e)); + progname, m, wiredtiger_strerror(e)); return (e); } @@ -333,7 +330,7 @@ usage(void) "usage: %s " "[-S] [-C wiredtiger-config] [-k keys] [-l log]\n\t" "[-n ops] [-c checkpoint] [-r runs] [-t f|r|v] [-W workers]\n", - g.progname); + progname); fprintf(stderr, "%s", "\t-C specify wiredtiger_open configuration arguments\n" "\t-c checkpoint name to used named checkpoints\n" diff --git a/test/checkpoint/test_checkpoint.h b/test/checkpoint/test_checkpoint.h index 0d0d02447d5..347bd2c6e89 100644 --- a/test/checkpoint/test_checkpoint.h +++ b/test/checkpoint/test_checkpoint.h @@ -58,7 +58,6 @@ typedef struct { u_int nkeys; /* Keys to load */ u_int nops; /* Operations per thread */ FILE *logfp; /* Message log file. */ - char *progname; /* Program name */ int nworkers; /* Number workers configured */ int ntables; /* Number tables configured */ int ntables_created; /* Number tables opened */ diff --git a/test/csuite/Makefile.am b/test/csuite/Makefile.am index e2b72532703..10ab890f2f5 100644 --- a/test/csuite/Makefile.am +++ b/test/csuite/Makefile.am @@ -4,8 +4,13 @@ LDADD = $(top_builddir)/test/utility/libtest_util.la \ $(top_builddir)/libwiredtiger.la AM_LDFLAGS = -static +noinst_PROGRAMS= + +test_scope_SOURCES = scope/main.c +noinst_PROGRAMS += test_scope + test_wt1965_col_efficiency_SOURCES = wt1965_col_efficiency/main.c -noinst_PROGRAMS = test_wt1965_col_efficiency +noinst_PROGRAMS += test_wt1965_col_efficiency test_wt2403_lsm_workload_SOURCES = wt2403_lsm_workload/main.c noinst_PROGRAMS += test_wt2403_lsm_workload diff --git a/test/csuite/scope/main.c b/test/csuite/scope/main.c new file mode 100644 index 00000000000..15dabd97c40 --- /dev/null +++ b/test/csuite/scope/main.c @@ -0,0 +1,288 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +#define KEY "key" +#define VALUE "value" + +static int ignore_errors; + +static int +handle_error(WT_EVENT_HANDLER *handler, + WT_SESSION *session, int error, const char *message) +{ + (void)(handler); + + /* Skip the error messages we're expecting to see. */ + if (ignore_errors > 0 && + (strstr(message, "requires key be set") != NULL || + strstr(message, "requires value be set") != NULL)) { + --ignore_errors; + return (0); + } + + (void)fprintf(stderr, "%s: %s\n", + message, session->strerror(session, error)); + return (0); +} + +static WT_EVENT_HANDLER event_handler = { + handle_error, + NULL, + NULL, + NULL +}; + +static void +cursor_scope_ops(WT_SESSION *session, const char *uri) +{ + struct { + const char *op; + enum { INSERT, SEARCH, SEARCH_NEAR, + REMOVE, REMOVE_POS, RESERVE, UPDATE } func; + const char *config; + } *op, ops[] = { + /* + * The ops order is fixed and shouldn't change, that is, insert + * has to happen first so search, update and remove operations + * are possible, and remove has to be last. + */ + { "insert", INSERT, NULL, }, + { "search", SEARCH, NULL, }, + { "search", SEARCH_NEAR, NULL, }, +#if 0 + { "reserve", RESERVE, NULL, }, +#endif + { "update", UPDATE, NULL, }, + { "remove", REMOVE, NULL, }, + { "remove", REMOVE_POS, NULL, }, + { NULL, INSERT, NULL } + }; + WT_CURSOR *cursor; + uint64_t keyr; + const char *key, *value; + char keybuf[100], valuebuf[100]; + int exact; + bool recno; + + /* Reserve requires a running transaction. */ + testutil_check(session->begin_transaction(session, NULL)); + + cursor = NULL; + for (op = ops; op->op != NULL; op++) { + key = value = NULL; + + /* Open a cursor. */ + if (cursor != NULL) + testutil_check(cursor->close(cursor)); + testutil_check(session->open_cursor( + session, uri, NULL, op->config, &cursor)); + recno = strcmp(cursor->key_format, "r") == 0; + + /* + * Set up application buffers so we can detect overwrites + * or failure to copy application information into library + * memory. + */ + if (recno) + cursor->set_key(cursor, (uint64_t)1); + else { + strcpy(keybuf, KEY); + cursor->set_key(cursor, keybuf); + } + strcpy(valuebuf, VALUE); + cursor->set_value(cursor, valuebuf); + + /* + * The application must keep key and value memory valid until + * the next operation that positions the cursor, modifies the + * data, or resets or closes the cursor. + * + * Modifying either the key or value buffers is not permitted. + */ + switch (op->func) { + case INSERT: + testutil_check(cursor->insert(cursor)); + break; + case SEARCH: + testutil_check(cursor->search(cursor)); + break; + case SEARCH_NEAR: + testutil_check(cursor->search_near(cursor, &exact)); + break; + case REMOVE_POS: + /* + * Remove has two modes, one where the remove is based + * on a cursor position, the other where it's based on + * a set key. The results are different, so test them + * separately. + */ + testutil_check(cursor->search(cursor)); + /* FALLTHROUGH */ + case REMOVE: + testutil_check(cursor->remove(cursor)); + break; + case RESERVE: +#if 0 + testutil_check(cursor->reserve(cursor)); +#endif + break; + case UPDATE: + testutil_check(cursor->update(cursor)); + break; + } + + /* + * The cursor should no longer reference application memory, + * and application buffers can be safely overwritten. + */ + memset(keybuf, 'K', sizeof(keybuf)); + memset(valuebuf, 'V', sizeof(valuebuf)); + + /* + * Check that get_key/get_value behave as expected after the + * operation. + */ + switch (op->func) { + case INSERT: + case REMOVE: + /* + * Insert and remove configured with a search key do + * not position the cursor and have no key or value. + * + * There should be two error messages, ignore them. + */ + ignore_errors = 2; + if (recno) + testutil_assert( + cursor->get_key(cursor, &keyr) != 0); + else + testutil_assert( + cursor->get_key(cursor, &key) != 0); + testutil_assert(cursor->get_value(cursor, &value) != 0); + testutil_assert(ignore_errors == 0); + break; + case REMOVE_POS: + /* + * Remove configured with a cursor position has a key, + * but no value. + * + * There should be one error message, ignore it. + */ + if (recno) { + testutil_assert( + cursor->get_key(cursor, &keyr) == 0); + testutil_assert(keyr == 1); + } else { + testutil_assert( + cursor->get_key(cursor, &key) == 0); + testutil_assert(key != keybuf); + testutil_assert(strcmp(key, KEY) == 0); + } + ignore_errors = 1; + testutil_assert(cursor->get_value(cursor, &value) != 0); + testutil_assert(ignore_errors == 0); + break; + case RESERVE: + case SEARCH: + case SEARCH_NEAR: + case UPDATE: + /* + * Reserve, search, search-near and update position the + * cursor and have both a key and value. + * + * Any key/value should not reference application + * memory. + */ + if (recno) { + testutil_assert( + cursor->get_key(cursor, &keyr) == 0); + testutil_assert(keyr == 1); + } else { + testutil_assert( + cursor->get_key(cursor, &key) == 0); + testutil_assert(key != keybuf); + testutil_assert(strcmp(key, KEY) == 0); + } + testutil_assert(cursor->get_value(cursor, &value) == 0); + testutil_assert(value != valuebuf); + testutil_assert(strcmp(value, VALUE) == 0); + break; + } + + /* + * We have more than one remove operation, add the key back + * in. + */ + if (op->func == REMOVE || op->func == REMOVE_POS) { + if (recno) + cursor->set_key(cursor, (uint64_t)1); + else { + cursor->set_key(cursor, KEY); + } + cursor->set_value(cursor, VALUE); + testutil_check(cursor->insert(cursor)); + } + } +} + +static void +run(WT_CONNECTION *conn, const char *uri, const char *config) +{ + WT_SESSION *session; + + testutil_check(conn->open_session(conn, NULL, NULL, &session)); + testutil_check(session->create(session, uri, config)); + cursor_scope_ops(session, uri); + testutil_check(session->close(session, NULL)); +} + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + testutil_check( + wiredtiger_open(opts->home, &event_handler, "create", &opts->conn)); + + run(opts->conn, "file:file.SS", "key_format=S,value_format=S"); + run(opts->conn, "file:file.rS", "key_format=r,value_format=S"); + run(opts->conn, "lsm:lsm.SS", "key_format=S,value_format=S"); + run(opts->conn, "lsm:lsm.rS", "key_format=r,value_format=S"); + run(opts->conn, "table:table.SS", "key_format=S,value_format=S"); + run(opts->conn, "table:table.rS", "key_format=r,value_format=S"); + + testutil_cleanup(opts); + + return (EXIT_SUCCESS); +} diff --git a/test/csuite/wt2246_col_append/main.c b/test/csuite/wt2246_col_append/main.c index 4b352b26051..976e2269da6 100644 --- a/test/csuite/wt2246_col_append/main.c +++ b/test/csuite/wt2246_col_append/main.c @@ -101,9 +101,10 @@ main(int argc, char *argv[]) uint64_t i, id; char buf[100]; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); opts->table_type = TABLE_ROW; opts->n_append_threads = N_APPEND_THREADS; diff --git a/test/csuite/wt2323_join_visibility/main.c b/test/csuite/wt2323_join_visibility/main.c index 239a3f300d0..a61f707e008 100644 --- a/test/csuite/wt2323_join_visibility/main.c +++ b/test/csuite/wt2323_join_visibility/main.c @@ -92,10 +92,11 @@ main(int argc, char *argv[]) TEST_OPTS *opts, _opts; const char *tablename; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; sharedopts = &_sharedopts; - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); memset(sharedopts, 0, sizeof(*sharedopts)); diff --git a/test/csuite/wt2535_insert_race/main.c b/test/csuite/wt2535_insert_race/main.c index ae18760a829..ba17d485e07 100644 --- a/test/csuite/wt2535_insert_race/main.c +++ b/test/csuite/wt2535_insert_race/main.c @@ -49,9 +49,10 @@ main(int argc, char *argv[]) uint64_t current_value; int i; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); opts->nthreads = 10; opts->nrecords = 1000; diff --git a/test/csuite/wt2834_join_bloom_fix/main.c b/test/csuite/wt2834_join_bloom_fix/main.c index 7c80496f1b6..f2c54b942be 100644 --- a/test/csuite/wt2834_join_bloom_fix/main.c +++ b/test/csuite/wt2834_join_bloom_fix/main.c @@ -59,11 +59,11 @@ main(int argc, char *argv[]) char flaguri[256]; char joinuri[256]; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); - testutil_check(testutil_parse_opts(argc, argv, opts)); testutil_make_work_dir(opts->home); diff --git a/test/csuite/wt2853_perf/main.c b/test/csuite/wt2853_perf/main.c index 6cec9634cd1..b365b03493a 100644 --- a/test/csuite/wt2853_perf/main.c +++ b/test/csuite/wt2853_perf/main.c @@ -82,11 +82,11 @@ main(int argc, char *argv[]) int i, nfail; const char *tablename; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; sharedopts = &_sharedopts; - - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); memset(sharedopts, 0, sizeof(*sharedopts)); memset(insert_args, 0, sizeof(insert_args)); diff --git a/test/csuite/wt2909_checkpoint_integrity/main.c b/test/csuite/wt2909_checkpoint_integrity/main.c index ddf249fb406..0ae81543050 100644 --- a/test/csuite/wt2909_checkpoint_integrity/main.c +++ b/test/csuite/wt2909_checkpoint_integrity/main.c @@ -96,9 +96,8 @@ static void run_check_subtest(TEST_OPTS *, const char *, uint64_t, bool, uint64_t *); static void run_check_subtest_range(TEST_OPTS *, const char *, bool); static int run_process(TEST_OPTS *, const char *, char *[], int *); -static int subtest_main(int, char *[], bool); +static void subtest_main(int, char *[], bool); static void subtest_populate(TEST_OPTS *, bool); -int main(int, char *[]); extern int __wt_optind; @@ -446,7 +445,7 @@ run_process(TEST_OPTS *opts, const char *prog, char *argv[], int *status) * subtest_main -- * The main program for the subtest */ -static int +static void subtest_main(int argc, char *argv[], bool close_test) { TEST_OPTS *opts, _opts; @@ -454,8 +453,6 @@ subtest_main(int argc, char *argv[], bool close_test) char config[1024], filename[1024]; struct rlimit rlim; - if (testutil_disable_long_tests()) - return (0); opts = &_opts; memset(opts, 0, sizeof(*opts)); memset(&rlim, 0, sizeof(rlim)); @@ -499,8 +496,6 @@ subtest_main(int argc, char *argv[], bool close_test) subtest_populate(opts, close_test); testutil_cleanup(opts); - - return (0); } /* @@ -622,8 +617,9 @@ main(int argc, char *argv[]) uint64_t nresults; const char *debugger; - if (testutil_disable_long_tests()) - return (0); + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; memset(opts, 0, sizeof(*opts)); debugger = NULL; @@ -635,11 +631,13 @@ main(int argc, char *argv[]) opts->nrecords = 50000; while (argc > 0) { - if (strcmp(argv[0], "subtest") == 0) - return (subtest_main(argc, argv, false)); - else if (strcmp(argv[0], "subtest_close") == 0) - return (subtest_main(argc, argv, true)); - else if (strcmp(argv[0], "gdb") == 0) + if (strcmp(argv[0], "subtest") == 0) { + subtest_main(argc, argv, false); + return (0); + } else if (strcmp(argv[0], "subtest_close") == 0) { + subtest_main(argc, argv, true); + return (0); + } else if (strcmp(argv[0], "gdb") == 0) debugger = "/usr/bin/gdb"; else testutil_assert(false); diff --git a/test/csuite/wt3184_dup_index_collator/main.c b/test/csuite/wt3184_dup_index_collator/main.c index bcefd2f1a3b..c969e7a1d7e 100644 --- a/test/csuite/wt3184_dup_index_collator/main.c +++ b/test/csuite/wt3184_dup_index_collator/main.c @@ -157,9 +157,9 @@ main(int argc, char *argv[]) printf("duplicating cursor\n"); testutil_check(session->open_cursor(session, NULL, cursor, NULL, &cursor1)); - cursor->get_value(cursor, &got); + testutil_check(cursor->get_value(cursor, &got)); testutil_assert(item_to_int(&got) == 17); - cursor1->get_value(cursor1, &got); + testutil_check(cursor1->get_value(cursor1, &got)); testutil_assert(item_to_int(&got) == 17); testutil_check(session->close(session, NULL)); diff --git a/test/cursor_order/cursor_order.c b/test/cursor_order/cursor_order.c index 85b8c68e545..62777f552bf 100644 --- a/test/cursor_order/cursor_order.c +++ b/test/cursor_order/cursor_order.c @@ -29,7 +29,6 @@ #include "cursor_order.h" static char home[512]; /* Program working dir */ -static char *progname; /* Program name */ static FILE *logfp; /* Log file */ static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *); @@ -51,10 +50,7 @@ main(int argc, char *argv[]) int ch, cnt, runs; char *config_open, *working_dir; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); cfg = &_cfg; config_open = NULL; diff --git a/test/fops/file.c b/test/fops/file.c index ea15f1ee80d..66c23dfed3c 100644 --- a/test/fops/file.c +++ b/test/fops/file.c @@ -51,7 +51,7 @@ obj_bulk(void) if ((ret = c->close(c)) != 0) testutil_die(ret, "cursor.close"); } else if (ret != ENOENT && ret != EBUSY && ret != EINVAL) - testutil_die(ret, "session.open_cursor"); + testutil_die(ret, "session.open_cursor bulk"); } if ((ret = session->close(session, NULL)) != 0) testutil_die(ret, "session.close"); @@ -79,12 +79,17 @@ obj_bulk_unique(int force) testutil_die(ret, "session.create: %s", new_uri); __wt_yield(); - if ((ret = - session->open_cursor(session, new_uri, NULL, "bulk", &c)) != 0) - testutil_die(ret, "session.open_cursor: %s", new_uri); - - if ((ret = c->close(c)) != 0) - testutil_die(ret, "cursor.close"); + /* + * Opening a bulk cursor may have raced with a forced checkpoint + * which created a checkpoint of the empty file, and triggers an EINVAL + */ + if ((ret = session->open_cursor( + session, new_uri, NULL, "bulk", &c)) == 0) { + if ((ret = c->close(c)) != 0) + testutil_die(ret, "cursor.close"); + } else if (ret != EINVAL) + testutil_die(ret, + "session.open_cursor bulk unique: %s, new_uri"); while ((ret = session->drop( session, new_uri, force ? "force" : NULL)) != 0) @@ -190,9 +195,13 @@ obj_checkpoint(void) if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "conn.session"); - /* Force the checkpoint so it has to be taken. */ + /* + * Force the checkpoint so it has to be taken. Forced checkpoints can + * race with other metadata operations and return EBUSY - we'd expect + * applications using forced checkpoints to retry on EBUSY. + */ if ((ret = session->checkpoint(session, "force")) != 0) - if (ret != ENOENT) + if (ret != EBUSY && ret != ENOENT) testutil_die(ret, "session.checkpoint"); if ((ret = session->close(session, NULL)) != 0) diff --git a/test/fops/t.c b/test/fops/t.c index 7b4a7cf8fca..469d5acd33a 100644 --- a/test/fops/t.c +++ b/test/fops/t.c @@ -34,7 +34,6 @@ u_int nops; /* Operations */ const char *uri; /* Object */ const char *config; /* Object config */ -static char *progname; /* Program name */ static FILE *logfp; /* Log file */ static char home[512]; @@ -71,22 +70,15 @@ main(int argc, char *argv[]) int ch, cnt, ret, runs; char *config_open, *working_dir; - working_dir = NULL; - - /* Remove directories */ - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); if ((ret = pthread_rwlock_init(&single, NULL)) != 0) testutil_die(ret, "pthread_rwlock_init: single"); - config_open = NULL; nops = 1000; nthreads = 10; runs = 1; - + config_open = working_dir = NULL; while ((ch = __wt_getopt(progname, argc, argv, "C:h:l:n:r:t:")) != EOF) switch (ch) { case 'C': /* wiredtiger_open config */ @@ -225,6 +217,11 @@ handle_message(WT_EVENT_HANDLER *handler, (void)(handler); (void)(session); + /* Ignore messages about failing to create forced checkpoints. */ + if (strstr( + message, "forced or named checkpoint") != NULL) + return (0); + if (logfp != NULL) return (fprintf(logfp, "%s\n", message) < 0 ? -1 : 0); diff --git a/test/format/config.c b/test/format/config.c index 50430fe073e..cd9856d641e 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -104,7 +104,7 @@ config_setup(void) if (DATASOURCE("lsm") && g.type != ROW) { fprintf(stderr, "%s: lsm data_source is only compatible with row file_type\n", - g.progname); + progname); exit(EXIT_FAILURE); } @@ -462,7 +462,7 @@ config_lrt(void) * stores. */ if (g.type == FIX) { - if (config_is_perm("long_running_txn")) + if (config_is_perm("long_running_txn") && g.c_long_running_txn) testutil_die(EINVAL, "long_running_txn not supported with fixed-length " "column store"); @@ -681,7 +681,7 @@ config_single(const char *s, int perm) if ((ep = strchr(s, '=')) == NULL) { fprintf(stderr, - "%s: %s: illegal configuration value\n", g.progname, s); + "%s: %s: illegal configuration value\n", progname, s); exit(EXIT_FAILURE); } @@ -751,20 +751,20 @@ config_single(const char *s, int perm) v = strtol(ep, &p, 10); if (*p != '\0') { fprintf(stderr, "%s: %s: illegal numeric value\n", - g.progname, s); + progname, s); exit(EXIT_FAILURE); } } if (F_ISSET(cp, C_BOOL)) { if (v != 0 && v != 1) { fprintf(stderr, "%s: %s: value of boolean not 0 or 1\n", - g.progname, s); + progname, s); exit(EXIT_FAILURE); } } else if (v < cp->min || v > cp->maxset) { fprintf(stderr, "%s: %s: value outside min/max values of %" PRIu32 "-%" PRIu32 "\n", - g.progname, s, cp->min, cp->maxset); + progname, s, cp->min, cp->maxset); exit(EXIT_FAILURE); } *cp->v = (uint32_t)v; @@ -883,7 +883,7 @@ config_find(const char *s, size_t len) return (cp); fprintf(stderr, - "%s: %s: unknown configuration keyword\n", g.progname, s); + "%s: %s: unknown configuration keyword\n", progname, s); config_error(); exit(EXIT_FAILURE); } diff --git a/test/format/format.h b/test/format/format.h index 6bb44410acc..41cc48c4278 100644 --- a/test/format/format.h +++ b/test/format/format.h @@ -79,8 +79,6 @@ #define FORMAT_OPERATION_REPS 3 /* 3 thread operations sets */ typedef struct { - char *progname; /* Program name */ - char *home; /* Home directory */ char *home_backup; /* Hot-backup directory */ char *home_backup_init; /* Initialize backup command */ diff --git a/test/format/ops.c b/test/format/ops.c index 940318c87a9..1013d1da30b 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -1448,7 +1448,7 @@ notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno) return (1); if (bdb_notfound) { - fprintf(stderr, "%s: %s:", g.progname, f); + fprintf(stderr, "%s: %s:", progname, f); if (keyno != 0) fprintf(stderr, " row %" PRIu64 ":", keyno); fprintf(stderr, @@ -1456,7 +1456,7 @@ notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno) testutil_die(0, NULL); } if (wt_ret == WT_NOTFOUND) { - fprintf(stderr, "%s: %s:", g.progname, f); + fprintf(stderr, "%s: %s:", progname, f); if (keyno != 0) fprintf(stderr, " row %" PRIu64 ":", keyno); fprintf(stderr, diff --git a/test/format/t.c b/test/format/t.c index 7701595776c..c6686ae8b91 100644 --- a/test/format/t.c +++ b/test/format/t.c @@ -49,14 +49,7 @@ main(int argc, char *argv[]) config = NULL; -#ifdef _WIN32 - g.progname = "t_format.exe"; -#else - if ((g.progname = strrchr(argv[0], DIR_DELIM)) == NULL) - g.progname = argv[0]; - else - ++g.progname; -#endif + (void)testutil_set_progname(argv); #if 0 /* Configure the GNU malloc for debugging. */ @@ -74,7 +67,7 @@ main(int argc, char *argv[]) home = NULL; onerun = 0; while ((ch = __wt_getopt( - g.progname, argc, argv, "1C:c:H:h:Llqrt:")) != EOF) + progname, argc, argv, "1C:c:H:h:Llqrt:")) != EOF) switch (ch) { case '1': /* One run */ onerun = 1; @@ -179,7 +172,7 @@ main(int argc, char *argv[]) testutil_check(pthread_rwlock_init(&g.checkpoint_lock, NULL)); testutil_check(pthread_rwlock_init(&g.death_lock, NULL)); - printf("%s: process %" PRIdMAX "\n", g.progname, (intmax_t)getpid()); + printf("%s: process %" PRIdMAX "\n", progname, (intmax_t)getpid()); while (++g.run_cnt <= g.c_runs || g.c_runs == 0 ) { startup(); /* Start a run */ @@ -344,7 +337,7 @@ usage(void) "usage: %s [-1Llqr] [-C wiredtiger-config]\n " "[-c config-file] [-H mount] [-h home] " "[name=value ...]\n", - g.progname); + progname); fprintf(stderr, "%s", "\t-1 run once\n" "\t-C specify wiredtiger_open configuration arguments\n" diff --git a/test/format/wts.c b/test/format/wts.c index da234ce53c7..a87aa5b9f88 100644 --- a/test/format/wts.c +++ b/test/format/wts.c @@ -144,7 +144,7 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) "cache_size=%" PRIu32 "MB," "checkpoint_sync=false," "error_prefix=\"%s\"", - g.c_cache, g.progname); + g.c_cache, progname); /* In-memory configuration. */ if (g.c_in_memory != 0) diff --git a/test/huge/huge.c b/test/huge/huge.c index 17e2db353d5..2b0d5f498e3 100644 --- a/test/huge/huge.c +++ b/test/huge/huge.c @@ -29,7 +29,6 @@ #include "test_util.h" static char home[512]; /* Program working dir */ -static const char *progname; /* Program name */ static uint8_t *big; /* Big key/value buffer */ #define GIGABYTE (1073741824) @@ -167,14 +166,10 @@ main(int argc, char *argv[]) int ch, small; char *working_dir; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); small = 0; working_dir = NULL; - while ((ch = __wt_getopt(progname, argc, argv, "h:s")) != EOF) switch (ch) { case 'h': diff --git a/test/manydbs/manydbs.c b/test/manydbs/manydbs.c index 7e986d47af3..345c470ba90 100644 --- a/test/manydbs/manydbs.c +++ b/test/manydbs/manydbs.c @@ -32,7 +32,6 @@ #define HOME_BASE "WT_TEST" static char home[HOME_SIZE]; /* Base home directory */ static char hometmp[HOME_SIZE]; /* Each conn home directory */ -static const char *progname; /* Program name */ static const char * const uri = "table:main"; #define WTOPEN_CFG_COMMON \ @@ -129,10 +128,8 @@ main(int argc, char *argv[]) const char *working_dir, *wt_cfg; char cmd[128]; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); + dbs = MAX_DBS; working_dir = HOME_BASE; idle = false; diff --git a/test/mciproject.yml b/test/mciproject.yml index eb74914eb46..6456475aa00 100644 --- a/test/mciproject.yml +++ b/test/mciproject.yml @@ -65,7 +65,7 @@ tasks: ./build_posix/reconf ${configure_env_vars|} ./configure --enable-diagnostic --enable-python --enable-zlib --enable-strict --enable-verbose ${make_command|make} ${smp_command|} 2>&1 - ${make_command|make} VERBOSE=1 check 2>&1 + TESTUTIL_ENABLE_LONG_TESTS=1 ${make_command|make} VERBOSE=1 check 2>&1 fi - command: archive.targz_pack params: diff --git a/test/readonly/readonly.c b/test/readonly/readonly.c index a4b79f5859f..746aecbf6c5 100644 --- a/test/readonly/readonly.c +++ b/test/readonly/readonly.c @@ -39,7 +39,6 @@ static char home_rd[HOME_SIZE + sizeof(HOME_RD_SUFFIX)]; #define HOME_RD2_SUFFIX ".RDNOLOCK" /* Read-only dir no lock file */ static char home_rd2[HOME_SIZE + sizeof(HOME_RD2_SUFFIX)]; -static const char *progname; /* Program name */ static const char *saved_argv0; /* Program command */ static const char * const uri = "table:main"; @@ -172,10 +171,8 @@ main(int argc, char *argv[]) char cmd[512]; uint8_t buf[MAX_VAL]; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); + /* * Needed unaltered for system command later. */ diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c index 660ef0cca67..1d6599ce1b3 100644 --- a/test/recovery/random-abort.c +++ b/test/recovery/random-abort.c @@ -32,7 +32,7 @@ #include <signal.h> static char home[1024]; /* Program working dir */ -static const char *progname; /* Program name */ + /* * These two names for the URI and file system must be maintained in tandem. */ @@ -229,10 +229,7 @@ main(int argc, char *argv[]) const char *working_dir; char fname[64], kname[64], statname[1024]; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); inmem = false; nth = MIN_TH; diff --git a/test/recovery/truncated-log.c b/test/recovery/truncated-log.c index 6a142b8e710..1f0a0f7a7bd 100644 --- a/test/recovery/truncated-log.c +++ b/test/recovery/truncated-log.c @@ -36,7 +36,6 @@ #endif static char home[1024]; /* Program working dir */ -static const char *progname; /* Program name */ static const char * const uri = "table:main"; #define RECORDS_FILE "records" @@ -271,10 +270,7 @@ main(int argc, char *argv[]) pid_t pid; const char *working_dir; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); working_dir = "WT_TEST.truncated-log"; while ((ch = __wt_getopt(progname, argc, argv, "h:")) != EOF) diff --git a/test/salvage/salvage.c b/test/salvage/salvage.c index b8553bbd72d..942f7faba03 100644 --- a/test/salvage/salvage.c +++ b/test/salvage/salvage.c @@ -54,8 +54,6 @@ void run(int); void t(int, u_int, int); int usage(void); -static const char *progname; /* Program name */ - static FILE *res_fp; /* Results file */ static u_int page_type; /* File types */ static int value_unique; /* Values are unique */ @@ -70,10 +68,7 @@ main(int argc, char *argv[]) u_int ptype; int ch, r; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); r = 0; ptype = 0; diff --git a/test/suite/test_compact02.py b/test/suite/test_compact02.py index 7af76b5fd58..803600eea14 100644 --- a/test/suite/test_compact02.py +++ b/test/suite/test_compact02.py @@ -99,7 +99,7 @@ class test_compact02(wttest.WiredTigerTestCase): def ConnectionOpen(self, cacheSize): self.home = '.' conn_params = 'create,' + \ - cacheSize + ',error_prefix="%s: ",' % self.shortid() + \ + cacheSize + ',error_prefix="%s",' % self.shortid() + \ 'statistics=(all),' + \ 'eviction_dirty_target=99,eviction_dirty_trigger=99' try: diff --git a/test/suite/test_cursor10.py b/test/suite/test_cursor10.py index b3cffeab4e9..6cabfde9f1f 100644 --- a/test/suite/test_cursor10.py +++ b/test/suite/test_cursor10.py @@ -31,11 +31,11 @@ from wtscenario import make_scenarios # test_cursor10.py # Cursors with projections. -class test_cursor04(wttest.WiredTigerTestCase): +class test_cursor10(wttest.WiredTigerTestCase): """ Test cursor search and search_near """ - table_name1 = 'test_cursor04' + table_name1 = 'test_cursor10' nentries = 20 scenarios = make_scenarios([ diff --git a/test/suite/test_cursor11.py b/test/suite/test_cursor11.py new file mode 100644 index 00000000000..e159ec499e6 --- /dev/null +++ b/test/suite/test_cursor11.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from wtdataset import SimpleDataSet, SimpleIndexDataSet +from wtdataset import SimpleLSMDataSet, ComplexDataSet, ComplexLSMDataSet +from wtscenario import make_scenarios + +# test_cursor11.py +# WT_CURSOR position tests: remove (if not already positioned), and insert +# leave the cursor without position or information. +class test_cursor11(wttest.WiredTigerTestCase): + + keyfmt = [ + ('integer', dict(keyfmt='i')), + ('recno', dict(keyfmt='r')), + ('string', dict(keyfmt='S')), + ] + types = [ + ('file', dict(uri='file', ds=SimpleDataSet)), + ('lsm', dict(uri='lsm', ds=SimpleDataSet)), + ('table-complex', dict(uri='table', ds=ComplexDataSet)), + ('table-complex-lsm', dict(uri='table', ds=ComplexLSMDataSet)), + ('table-index', dict(uri='table', ds=SimpleIndexDataSet)), + ('table-simple', dict(uri='table', ds=SimpleDataSet)), + ('table-simple-lsm', dict(uri='table', ds=SimpleLSMDataSet)), + ] + scenarios = make_scenarios(types, keyfmt) + + def skip(self): + return self.keyfmt == 'r' and \ + (self.ds.is_lsm() or self.uri == 'lsm') + + # Do a remove using the cursor after setting a position, and confirm + # the key and position remain set but no value. + def test_cursor_remove_with_position(self): + if self.skip(): + return + + # Build an object. + uri = self.uri + ':test_cursor11' + ds = self.ds(self, uri, 50, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + + c.set_key(ds.key(25)) + self.assertEquals(c.search(), 0) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(26)) + c.remove() + self.assertEquals(c.get_key(), ds.key(26)) + msg = '/requires value be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_value, msg) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(27)) + + # Do a remove using the cursor without setting a position, and confirm + # no key, value or position remains. + def test_cursor_remove_without_position(self): + if self.skip(): + return + + # Build an object. + uri = self.uri + ':test_cursor11' + ds = self.ds(self, uri, 50, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + + c.set_key(ds.key(25)) + c.remove() + msg = '/requires key be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_key, msg) + msg = '/requires value be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_value, msg) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(1)) + + # Do a remove using the key after also setting a position, and confirm + # no key, value or position remains. + def test_cursor_remove_with_key_and_position(self): + if self.skip(): + return + + # Build an object. + uri = self.uri + ':test_cursor11' + ds = self.ds(self, uri, 50, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + + c.set_key(ds.key(25)) + self.assertEquals(c.search(), 0) + c.set_key(ds.key(25)) + c.remove() + msg = '/requires key be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_key, msg) + msg = '/requires value be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_value, msg) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(1)) + + # Do an insert and confirm no key, value or position remains. + def test_cursor_insert(self): + if self.skip(): + return + + # Build an object. + uri = self.uri + ':test_cursor11' + ds = self.ds(self, uri, 50, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + + c.set_key(ds.key(25)) + c.set_value(ds.value(300)) + c.insert() + msg = '/requires key be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_key, msg) + msg = '/requires value be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_value, msg) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(1)) + +if __name__ == '__main__': + wttest.run() diff --git a/test/suite/test_encrypt04.py b/test/suite/test_encrypt04.py index 17777fc9564..19c0b85d427 100644 --- a/test/suite/test_encrypt04.py +++ b/test/suite/test_encrypt04.py @@ -113,7 +113,7 @@ class test_encrypt04(wttest.WiredTigerTestCase, suite_subprocess): completed = False try: conn = self.wiredtiger_open(dir, - 'create,error_prefix="{0}: ",{1}{2}'.format( + 'create,error_prefix="{0}",{1}{2}'.format( self.shortid(), encarg, extarg)) except (BaseException) as err: # Capture the recognizable error created by rotn diff --git a/test/suite/test_overwrite.py b/test/suite/test_overwrite.py index 4739abaa578..c894de99bd0 100644 --- a/test/suite/test_overwrite.py +++ b/test/suite/test_overwrite.py @@ -27,32 +27,47 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtdataset import SimpleDataSet +from wtdataset import SimpleDataSet, SimpleIndexDataSet +from wtdataset import SimpleLSMDataSet, ComplexDataSet, ComplexLSMDataSet from wtscenario import make_scenarios # test_overwrite.py # cursor overwrite configuration method class test_overwrite(wttest.WiredTigerTestCase): name = 'overwrite' - scenarios = make_scenarios([ - ('file-r', dict(type='file:', keyfmt='r', dataset=SimpleDataSet)), - ('file-S', dict(type='file:', keyfmt='S', dataset=SimpleDataSet)), - ('lsm-S', dict(type='lsm:', keyfmt='S', dataset=SimpleDataSet)), - ('table-r', dict(type='table:', keyfmt='r', dataset=SimpleDataSet)), - ('table-S', dict(type='table:', keyfmt='S', dataset=SimpleDataSet)), - ]) + keyfmt = [ + ('integer', dict(keyfmt='i')), + ('recno', dict(keyfmt='r')), + ('string', dict(keyfmt='S')), + ] + types = [ + ('file', dict(uri='file:', ds=SimpleDataSet)), + ('lsm', dict(uri='lsm:', ds=SimpleDataSet)), + ('table-complex', dict(uri='table:', ds=ComplexDataSet)), + ('table-complex-lsm', dict(uri='table:', ds=ComplexLSMDataSet)), + ('table-index', dict(uri='table:', ds=SimpleIndexDataSet)), + ('table-simple', dict(uri='table:', ds=SimpleDataSet)), + ('table-simple-lsm', dict(uri='table:', ds=SimpleLSMDataSet)), + ] + scenarios = make_scenarios(types, keyfmt) + def skip(self): + return self.keyfmt == 'r' and \ + (self.ds.is_lsm() or self.uri == 'lsm') # Confirm a cursor configured with/without overwrite correctly handles # non-existent records during insert, remove and update operations. def test_overwrite_insert(self): - uri = self.type + self.name - ds = self.dataset(self, uri, 100, key_format=self.keyfmt) + if self.skip(): + return + + uri = self.uri + self.name + ds = self.ds(self, uri, 100, key_format=self.keyfmt) ds.populate() # Insert of an existing record with overwrite off fails. cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(5)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1000)) self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.insert()) # One additional test for the insert method: duplicate the cursor with @@ -63,30 +78,33 @@ class test_overwrite(wttest.WiredTigerTestCase): cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(5)) dupc = self.session.open_cursor(None, cursor, "overwrite=true") - dupc.set_value('XXXXXXXXXX') + dupc.set_value(ds.value(1001)) self.assertEquals(dupc.insert(), 0) # Insert of an existing record with overwrite on succeeds. cursor = self.session.open_cursor(uri, None) cursor.set_key(ds.key(6)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1002)) self.assertEquals(cursor.insert(), 0) # Insert of a non-existent record with overwrite off succeeds. cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(200)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1003)) self.assertEquals(cursor.insert(), 0) # Insert of a non-existent record with overwrite on succeeds. cursor = self.session.open_cursor(uri, None) cursor.set_key(ds.key(201)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1004)) self.assertEquals(cursor.insert(), 0) def test_overwrite_remove(self): - uri = self.type + self.name - ds = self.dataset(self, uri, 100, key_format=self.keyfmt) + if self.skip(): + return + + uri = self.uri + self.name + ds = self.ds(self, uri, 100, key_format=self.keyfmt) ds.populate() # Remove of an existing record with overwrite off succeeds. @@ -110,32 +128,35 @@ class test_overwrite(wttest.WiredTigerTestCase): self.assertEquals(cursor.remove(), 0) def test_overwrite_update(self): - uri = self.type + self.name - ds = self.dataset(self, uri, 100, key_format=self.keyfmt) + if self.skip(): + return + + uri = self.uri + self.name + ds = self.ds(self, uri, 100, key_format=self.keyfmt) ds.populate() # Update of an existing record with overwrite off succeeds. cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(5)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1005)) self.assertEquals(cursor.update(), 0) # Update of an existing record with overwrite on succeeds. cursor = self.session.open_cursor(uri, None) cursor.set_key(ds.key(6)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1006)) self.assertEquals(cursor.update(), 0) # Update of a non-existent record with overwrite off fails. cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(200)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1007)) self.assertEquals(cursor.update(), wiredtiger.WT_NOTFOUND) # Update of a non-existent record with overwrite on succeeds. cursor = self.session.open_cursor(uri, None) cursor.set_key(ds.key(201)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1008)) self.assertEquals(cursor.update(), 0) if __name__ == '__main__': diff --git a/test/suite/test_shared_cache01.py b/test/suite/test_shared_cache01.py index 70560a625ee..c3bd946cc4b 100644 --- a/test/suite/test_shared_cache01.py +++ b/test/suite/test_shared_cache01.py @@ -73,7 +73,7 @@ class test_shared_cache01(wttest.WiredTigerTestCase): os.mkdir(name) next_conn = self.wiredtiger_open( name, - 'create,error_prefix="' + self.shortid() + ': "' + + 'create,error_prefix="%s",' % self.shortid() + pool_opts + extra_opts) self.conns.append(next_conn) self.sessions.append(next_conn.open_session(None)) diff --git a/test/suite/test_shared_cache02.py b/test/suite/test_shared_cache02.py index 7cde6c86695..67f9bf7c6b7 100644 --- a/test/suite/test_shared_cache02.py +++ b/test/suite/test_shared_cache02.py @@ -73,7 +73,7 @@ class test_shared_cache02(wttest.WiredTigerTestCase): os.mkdir(name) next_conn = self.wiredtiger_open( name, - 'create,error_prefix="' + self.shortid() + ': "' + + 'create,error_prefix="%s",' % self.shortid() + pool_opts + extra_opts) self.conns.append(next_conn) self.sessions.append(next_conn.open_session(None)) diff --git a/test/suite/test_txn07.py b/test/suite/test_txn07.py index e2986fb999a..e26cf5aaaea 100644 --- a/test/suite/test_txn07.py +++ b/test/suite/test_txn07.py @@ -76,7 +76,7 @@ class test_txn07(wttest.WiredTigerTestCase, suite_subprocess): def conn_config(self): return 'log=(archive=false,enabled,file_max=%s,' % self.logmax + \ 'compressor=%s)' % self.compress + \ - ',create,error_prefix="%s: ",' % self.shortid() + \ + ',create,error_prefix="%s",' % self.shortid() + \ "statistics=(fast)," + \ 'transaction_sync="%s",' % \ self.sync_list[self.scenario_number % len(self.sync_list)] diff --git a/test/suite/wttest.py b/test/suite/wttest.py index 0dce51f07d5..e91838544b9 100644 --- a/test/suite/wttest.py +++ b/test/suite/wttest.py @@ -302,7 +302,7 @@ class WiredTigerTestCase(unittest.TestCase): # In case the open starts additional threads, flush first to # avoid confusion. sys.stdout.flush() - conn_param = 'create,error_prefix="%s: ",%s' % (self.shortid(), config) + conn_param = 'create,error_prefix="%s",%s' % (self.shortid(), config) try: conn = self.wiredtiger_open(home, conn_param) except wiredtiger.WiredTigerError as e: diff --git a/test/thread/t.c b/test/thread/t.c index baadbf2adb9..9dfd02bdad2 100644 --- a/test/thread/t.c +++ b/test/thread/t.c @@ -37,7 +37,6 @@ int multiple_files; /* File per thread */ int session_per_op; /* New session per operation */ static char home[512]; /* Program working dir */ -static char *progname; /* Program name */ static FILE *logfp; /* Log file */ static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *); @@ -59,10 +58,7 @@ main(int argc, char *argv[]) int ch, cnt, runs; char *config_open, *working_dir; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); config_open = NULL; working_dir = NULL; diff --git a/test/utility/misc.c b/test/utility/misc.c index 1ba08ddd77f..61dad3d76c2 100644 --- a/test/utility/misc.c +++ b/test/utility/misc.c @@ -28,6 +28,7 @@ #include "test_util.h" void (*custom_die)(void) = NULL; +const char *progname = "program name not set"; /* * die -- @@ -42,7 +43,9 @@ testutil_die(int e, const char *fmt, ...) if (custom_die != NULL) (*custom_die)(); + fprintf(stderr, "%s: FAILED", progname); if (fmt != NULL) { + fprintf(stderr, ": "); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); @@ -55,6 +58,20 @@ testutil_die(int e, const char *fmt, ...) } /* + * testutil_set_progname -- + * Set the global program name for error handling. + */ +const char * +testutil_set_progname(char * const *argv) +{ + if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) + progname = argv[0]; + else + ++progname; + return (progname); +} + +/* * testutil_work_dir_from_path -- * Takes a buffer, its size and the intended work directory. * Creates the full intended work directory in buffer. @@ -149,20 +166,25 @@ testutil_cleanup(TEST_OPTS *opts) } /* - * testutil_disable_long_tests -- - * Return if TESTUTIL_DISABLE_LONG_TESTS is set. + * testutil_enable_long_tests -- + * Return if TESTUTIL_ENABLE_LONG_TESTS is set. */ bool -testutil_disable_long_tests(void) +testutil_enable_long_tests(void) { const char *res; + bool enable_long_tests; if (__wt_getenv(NULL, - "TESTUTIL_DISABLE_LONG_TESTS", &res) == WT_NOTFOUND) + "TESTUTIL_ENABLE_LONG_TESTS", &res) == WT_NOTFOUND) return (false); + /* Accept anything other than "TESTUTIL_ENABLE_LONG_TESTS=0". */ + enable_long_tests = res[0] != '0'; + free((void *)res); - return (true); + + return (enable_long_tests); } /* diff --git a/test/utility/parse_opts.c b/test/utility/parse_opts.c index 74a1c021d5d..af9256b199a 100644 --- a/test/utility/parse_opts.c +++ b/test/utility/parse_opts.c @@ -43,10 +43,7 @@ testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts) opts->running = true; opts->verbose = false; - if ((opts->progname = strrchr(argv[0], DIR_DELIM)) == NULL) - opts->progname = argv[0]; - else - ++opts->progname; + opts->progname = testutil_set_progname(argv); while ((ch = __wt_getopt(opts->progname, argc, argv, "A:h:n:o:pR:T:t:vW:")) != EOF) diff --git a/test/utility/test_util.h b/test/utility/test_util.h index 489bbe18d87..406ed2c4961 100644 --- a/test/utility/test_util.h +++ b/test/utility/test_util.h @@ -48,7 +48,7 @@ /* Generic option parsing structure shared by all test cases. */ typedef struct { char *home; - char *progname; + const char *progname; enum { TABLE_COL=1, /* Fixed-length column store */ TABLE_FIX=2, /* Variable-length column store */ TABLE_ROW=3 /* Row-store */ @@ -185,10 +185,13 @@ void *dstrdup(const void *); void *dstrndup(const char *, size_t); void testutil_clean_work_dir(const char *); void testutil_cleanup(TEST_OPTS *); -bool testutil_disable_long_tests(void); +bool testutil_enable_long_tests(void); void testutil_make_work_dir(char *); int testutil_parse_opts(int, char * const *, TEST_OPTS *); void testutil_work_dir_from_path(char *, size_t, const char *); void *thread_append(void *); void *thread_insert_append(void *); void *thread_prev(void *); + +extern const char *progname; +const char *testutil_set_progname(char * const *); |