diff options
319 files changed, 17278 insertions, 6439 deletions
diff --git a/src/third_party/wiredtiger/SConstruct b/src/third_party/wiredtiger/SConstruct index a7306262f82..a5dd8761d6c 100644 --- a/src/third_party/wiredtiger/SConstruct +++ b/src/third_party/wiredtiger/SConstruct @@ -214,6 +214,7 @@ if (VERSION_MAJOR == None or wiredtiger_includes = """ #include <sys/types.h> #include <stdarg.h> + #include <stdbool.h> #include <stdint.h> #include <stdio.h> """ @@ -239,12 +240,26 @@ wtheader = env.Substfile( # # WiredTiger library # -filelistfile = r'build_win\filelist.win' -filelist = open(filelistfile) -wtsources = [line.strip() - for line in filelist - if not line.startswith("#") and len(line) > 1] -filelist.close() +# Map WiredTiger build conditions: any conditions that appear in WiredTiger's +# dist/filelist must appear here, and if the value is true, those files will be +# included. +# +condition_map = { + 'POSIX_HOST' : env['PLATFORM'] == 'posix', + 'POWERPC_HOST' : False, + 'WINDOWS_HOST' : env['PLATFORM'] == 'win32', +} + +def filtered_filelist(f): + for line in f: + file_cond = line.split() + if line.startswith("#") or len(file_cond) == 0: + continue + if len(file_cond) == 1 or condition_map[file_cond[1]]: + yield file_cond[0] + +filelistfile = r'dist/filelist' +wtsources = list(filtered_filelist(open(filelistfile))) if useZlib: wtsources.append("ext/compressors/zlib/zlib_compress.c") @@ -345,12 +360,12 @@ examples = [ "ex_all", "ex_async", "ex_call_center", - "ex_config", "ex_config_parse", "ex_cursor", "ex_data_source", "ex_encrypt", "ex_extending", + "ex_file_system", "ex_hello", "ex_log", "ex_pack", @@ -392,10 +407,16 @@ env.Append(BUILDERS={'SmokeTest' : Builder(action = builder_smoke_test)}) #Build the tests and setup the "scons test" target +testutil = env.Library('testutil', + [ + 'test/utility/misc.c', + 'test/utility/parse_opts.c' + ]) + #Don't test bloom on Windows, its broken t = env.Program("t_bloom", "test/bloom/test_bloom.c", - LIBS=[wtlib] + wtlibs) + LIBS=[wtlib, testutil] + wtlibs) #env.Alias("check", env.SmokeTest(t)) Default(t) @@ -418,7 +439,7 @@ t = env.Program("t_fops", ["test/fops/file.c", "test/fops/fops.c", "test/fops/t.c"], - LIBS=[wtlib, shim] + wtlibs) + LIBS=[wtlib, shim, testutil] + wtlibs) env.Append(CPPPATH=["test/utility"]) env.Alias("check", env.SmokeTest(t)) Default(t) @@ -468,7 +489,7 @@ Default(t) #Build the Examples for ex in examples: - if(ex in ['ex_all', 'ex_async', 'ex_thread', 'ex_encrypt']): + if(ex in ['ex_all', 'ex_async', 'ex_encrypt', 'ex_file_system' , 'ex_thread']): exp = env.Program(ex, "examples/c/" + ex + ".c", LIBS=[wtlib, shim] + wtlibs) Default(exp) env.Alias("check", env.SmokeTest(exp)) diff --git a/src/third_party/wiredtiger/bench/wtperf/config.c b/src/third_party/wiredtiger/bench/wtperf/config.c index e83d6fcceed..0dc38287155 100644 --- a/src/third_party/wiredtiger/bench/wtperf/config.c +++ b/src/third_party/wiredtiger/bench/wtperf/config.c @@ -47,6 +47,53 @@ static void config_opt_usage(void); (strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0') /* + * config_unescape -- + * Modify a string in place, replacing any backslash escape sequences. + * The modified string is always shorter. + */ +static int +config_unescape(char *orig) +{ + char ch, *dst, *s; + + for (dst = s = orig; *s != '\0';) { + if ((ch = *s++) == '\\') { + ch = *s++; + switch (ch) { + case 'b': + *dst++ = '\b'; + break; + case 'f': + *dst++ = '\f'; + break; + case 'n': + *dst++ = '\n'; + break; + case 'r': + *dst++ = '\r'; + break; + case 't': + *dst++ = '\t'; + break; + case '\\': + case '/': + case '\"': /* Backslash needed for spell check. */ + *dst++ = ch; + break; + default: + /* Note: Unicode (\u) not implemented. */ + fprintf(stderr, + "invalid escape in string: %s\n", orig); + return (EINVAL); + } + } else + *dst++ = ch; + } + *dst = '\0'; + return (0); +} + +/* * config_assign -- * Assign the src config to the dest, any storage allocated in dest is * freed as a result. @@ -123,7 +170,7 @@ config_free(CONFIG *cfg) if (config_opts[i].type == STRING_TYPE || config_opts[i].type == CONFIG_STRING_TYPE) { pstr = (char **) - ((unsigned char *)cfg + config_opts[i].offset); + ((u_char *)cfg + config_opts[i].offset); free(*pstr); *pstr = NULL; } @@ -363,7 +410,8 @@ static int config_opt(CONFIG *cfg, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v) { CONFIG_OPT *popt; - char *newstr, **strp; + char *begin, *newstr, **strp; + int ret; size_t i, newlen, nopt; void *valueloc; @@ -383,7 +431,7 @@ config_opt(CONFIG *cfg, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v) fprintf(stderr, "\t%s\n", config_opts[i].name); return (EINVAL); } - valueloc = ((unsigned char *)cfg + popt->offset); + valueloc = ((u_char *)cfg + popt->offset); switch (popt->type) { case BOOL_TYPE: if (v->type != WT_CONFIG_ITEM_BOOL) { @@ -438,15 +486,20 @@ config_opt(CONFIG *cfg, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v) } strp = (char **)valueloc; newlen = v->len + 1; - if (*strp == NULL) { - newstr = dstrdup(v->str); - } else { - newlen += (strlen(*strp) + 1); + if (*strp == NULL) + begin = newstr = dstrdup(v->str); + else { + newlen += strlen(*strp) + 1; newstr = dcalloc(newlen, sizeof(char)); snprintf(newstr, newlen, "%s,%*s", *strp, (int)v->len, v->str); /* Free the old value now we've copied it. */ free(*strp); + begin = &newstr[(newlen - 1) - v->len]; + } + if ((ret = config_unescape(begin)) != 0) { + free(newstr); + return (ret); } *strp = newstr; break; @@ -487,84 +540,100 @@ config_opt(CONFIG *cfg, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v) int config_opt_file(CONFIG *cfg, const char *filename) { - struct stat sb; - ssize_t read_size; - size_t buf_size, linelen, optionpos; - int contline, fd, linenum, ret; - char option[1024]; - char *comment, *file_buf, *line, *ltrim, *rtrim; + FILE *fp; + size_t linelen, optionpos; + int linenum, ret; + bool contline; + char line[4 * 1024], option[4 * 1024]; + char *comment, *ltrim, *rtrim; - file_buf = NULL; + ret = 0; - if ((fd = open(filename, O_RDONLY)) == -1) { + if ((fp = fopen(filename, "r")) == NULL) { fprintf(stderr, "wtperf: %s: %s\n", filename, strerror(errno)); return (errno); } - if ((ret = fstat(fd, &sb)) != 0) { - fprintf(stderr, "wtperf: stat of %s: %s\n", - filename, strerror(errno)); - ret = errno; - goto err; - } - buf_size = (size_t)sb.st_size; - file_buf = dcalloc(buf_size + 2, 1); - read_size = read(fd, file_buf, buf_size); - if (read_size == -1 -#ifndef _WIN32 - /* Windows automatically translates \r\n -> \n so counts will be off */ - || (size_t)read_size != buf_size -#endif - ) { - fprintf(stderr, - "wtperf: read unexpected amount from config file\n"); - ret = EINVAL; - goto err; - } - /* Make sure the buffer is terminated correctly. */ - file_buf[read_size] = '\0'; - ret = 0; optionpos = 0; linenum = 0; - /* - * We should switch this from using strtok to generating a single - * WiredTiger configuration string compatible string, and using - * the WiredTiger configuration parser to parse it at once. - */ -#define WTPERF_CONFIG_DELIMS "\n\\" - for (line = strtok(file_buf, WTPERF_CONFIG_DELIMS); - line != NULL; - line = strtok(NULL, WTPERF_CONFIG_DELIMS)) { + while (fgets(line, sizeof(line), fp) != NULL) { linenum++; - /* trim the line */ - for (ltrim = line; *ltrim && isspace(*ltrim); ltrim++) + + /* Skip leading space. */ + for (ltrim = line; *ltrim && isspace((u_char)*ltrim); + ltrim++) + ; + + /* + * Find the end of the line; if there's no trailing newline, the + * the line is too long for the buffer or the file was corrupted + * (there's no terminating newline in the file). + */ + for (rtrim = line; *rtrim && *rtrim != '\n'; rtrim++) ; - rtrim = <rim[strlen(ltrim)]; - if (rtrim > ltrim && rtrim[-1] == '\n') + if (*rtrim != '\n') { + fprintf(stderr, + "wtperf: %s: %d: configuration line too long\n", + filename, linenum); + ret = EINVAL; + break; + } + + /* Skip trailing space. */ + while (rtrim > ltrim && isspace((u_char)rtrim[-1])) rtrim--; - contline = (rtrim > ltrim && rtrim[-1] == '\\'); + /* + * If the last non-space character in the line is an escape, the + * line will be continued. Checked early because the line might + * otherwise be empty. + */ + contline = rtrim > ltrim && rtrim[-1] == '\\'; if (contline) rtrim--; - comment = strchr(ltrim, '#'); - if (comment != NULL && comment < rtrim) + /* + * Discard anything after the first hash character. Check after + * the escape character, the escape can appear after a comment. + */ + if ((comment = strchr(ltrim, '#')) != NULL) rtrim = comment; - while (rtrim > ltrim && isspace(rtrim[-1])) + + /* Skip trailing space again. */ + while (rtrim > ltrim && isspace((u_char)rtrim[-1])) rtrim--; - linelen = (size_t)(rtrim - ltrim); - if (linelen == 0) - continue; + /* + * Check for empty lines: note that the right-hand boundary can + * cross over the left-hand boundary, less-than or equal to is + * the correct test. + */ + if (rtrim <= ltrim) { + /* + * If we're continuing from this line, or we haven't + * started building an option, ignore this line. + */ + if (contline || optionpos == 0) + continue; + + /* + * An empty line terminating an option we're building; + * clean things up so we can proceed. + */ + linelen = 0; + } else + linelen = (size_t)(rtrim - ltrim); + ltrim[linelen] = '\0'; if (linelen + optionpos + 1 > sizeof(option)) { - fprintf(stderr, "wtperf: %s: %d: line overflow\n", + fprintf(stderr, + "wtperf: %s: %d: option value overflow\n", filename, linenum); ret = EINVAL; break; } - *rtrim = '\0'; - strncpy(&option[optionpos], ltrim, linelen); + + memcpy(&option[optionpos], ltrim, linelen); option[optionpos + linelen] = '\0'; if (contline) optionpos += linelen; @@ -577,16 +646,19 @@ config_opt_file(CONFIG *cfg, const char *filename) optionpos = 0; } } - if (ret == 0 && optionpos > 0) { - fprintf(stderr, "wtperf: %s: %d: last line continues\n", - filename, linenum); - ret = EINVAL; - goto err; + if (ret == 0) { + if (ferror(fp)) { + fprintf(stderr, "wtperf: %s: read error\n", filename); + ret = errno; + } + if (optionpos > 0) { + fprintf(stderr, "wtperf: %s: %d: last line continues\n", + filename, linenum); + ret = EINVAL; + } } -err: if (fd != -1) - (void)close(fd); - free(file_buf); + (void)fclose(fp); return (ret); } @@ -754,7 +826,7 @@ config_consolidate(CONFIG *cfg) * as being the same key. */ if (strncmp(conf_line->string, test_line->string, - (size_t)(string_key - conf_line->string + 1)) + (size_t)((string_key - conf_line->string) + 1)) == 0) { TAILQ_REMOVE(&cfg->config_head, conf_line, c); free(conf_line->string); diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-1.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-1.wtperf new file mode 100644 index 00000000000..24da4dd7902 --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-1.wtperf @@ -0,0 +1,11 @@ +# wtperf options file: evict btree configuration +conn_config="cache_size=50M" +table_config="type=file" +icount=10000000 +report_interval=5 +run_time=120 +populate_threads=1 +threads=((count=16,reads=1)) +# Add throughput/latency monitoring +max_latency=2000 +sample_interval=5 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress.wtperf new file mode 100644 index 00000000000..740fb88c050 --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress.wtperf @@ -0,0 +1,12 @@ +# wtperf options file: evict btree configuration +conn_config="cache_size=50M,eviction=(threads_max=4)" +table_config="type=file" +icount=10000000 +report_interval=5 +run_time=120 +populate_threads=1 +threads=((count=16,reads=1)) +# Add throughput/latency monitoring +max_latency=2000 +sample_interval=5 +session_count_idle=100 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/evict-lsm-1.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/evict-lsm-1.wtperf new file mode 100644 index 00000000000..ad885d98eb7 --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/evict-lsm-1.wtperf @@ -0,0 +1,12 @@ +# wtperf options file: evict lsm configuration +conn_config="cache_size=50M,lsm_manager=(worker_thread_max=6)" +table_config="type=lsm,lsm=(chunk_size=2M),os_cache_dirty_max=16MB" +compact=true +icount=10000000 +report_interval=5 +run_time=120 +populate_threads=1 +threads=((count=16,reads=1)) +# Add throughput/latency monitoring +max_latency=2000 +sample_interval=5 diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c index 9d57bdcf6b0..9d35f6fa640 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c @@ -1631,6 +1631,8 @@ execute_workload(CONFIG *cfg) { CONFIG_THREAD *threads; WORKLOAD *workp; + WT_CONNECTION *conn; + WT_SESSION **sessions; pthread_t idle_table_cycle_thread; uint64_t last_ckpts, last_inserts, last_reads, last_truncates; uint64_t last_updates; @@ -1647,6 +1649,8 @@ execute_workload(CONFIG *cfg) last_updates = 0; ret = 0; + sessions = NULL; + /* Start cycling idle tables. */ if ((ret = start_idle_table_cycle(cfg, &idle_table_cycle_thread)) != 0) return (ret); @@ -1664,6 +1668,18 @@ execute_workload(CONFIG *cfg) } else pfunc = worker; + if (cfg->session_count_idle != 0) { + sessions = dcalloc((size_t)cfg->session_count_idle, + sizeof(WT_SESSION *)); + conn = cfg->conn; + for (i = 0; i < cfg->session_count_idle; ++i) + if ((ret = conn->open_session( + conn, NULL, cfg->sess_config, &sessions[i])) != 0) { + lprintf(cfg, ret, 0, + "execute_workload: idle open_session"); + goto err; + } + } /* Start each workload. */ for (threads = cfg->workers, i = 0, workp = cfg->workload; i < cfg->workload_cnt; ++i, ++workp) { @@ -1758,6 +1774,7 @@ err: cfg->stop = 1; if (ret == 0 && cfg->drop_tables && (ret = drop_all_tables(cfg)) != 0) lprintf(cfg, ret, 0, "Drop tables failed."); + free(sessions); /* Report if any worker threads didn't finish. */ if (cfg->error != 0) { lprintf(cfg, WT_ERROR, 0, @@ -2170,15 +2187,15 @@ int main(int argc, char *argv[]) { CONFIG *cfg, _cfg; - size_t req_len; + size_t req_len, sreq_len; int ch, monitor_set, ret; const char *opts = "C:H:h:m:O:o:T:"; const char *config_opts; - char *cc_buf, *tc_buf, *user_cconfig, *user_tconfig; + char *cc_buf, *sess_cfg, *tc_buf, *user_cconfig, *user_tconfig; monitor_set = ret = 0; config_opts = NULL; - cc_buf = tc_buf = user_cconfig = user_tconfig = NULL; + cc_buf = sess_cfg = tc_buf = user_cconfig = user_tconfig = NULL; /* Setup the default configuration values. */ cfg = &_cfg; @@ -2317,7 +2334,8 @@ main(int argc, char *argv[]) /* Concatenate non-default configuration strings. */ if (cfg->verbose > 1 || user_cconfig != NULL || - cfg->compress_ext != NULL || cfg->async_config != NULL) { + cfg->session_count_idle > 0 || cfg->compress_ext != NULL || + cfg->async_config != NULL) { req_len = strlen(cfg->conn_config) + strlen(debug_cconfig) + 3; if (user_cconfig != NULL) req_len += strlen(user_cconfig); @@ -2325,16 +2343,26 @@ main(int argc, char *argv[]) req_len += strlen(cfg->async_config); if (cfg->compress_ext != NULL) req_len += strlen(cfg->compress_ext); + if (cfg->session_count_idle > 0) { + sreq_len = strlen(",session_max=") + 6; + req_len += sreq_len; + sess_cfg = dcalloc(sreq_len, 1); + snprintf(sess_cfg, sreq_len, + ",session_max=%" PRIu32, + cfg->session_count_idle + cfg->workers_cnt + + cfg->populate_threads + 10); + } cc_buf = dcalloc(req_len, 1); /* * This is getting hard to parse. */ - snprintf(cc_buf, req_len, "%s%s%s%s%s%s%s", + snprintf(cc_buf, req_len, "%s%s%s%s%s%s%s%s", cfg->conn_config, cfg->async_config ? cfg->async_config : "", cfg->compress_ext ? cfg->compress_ext : "", cfg->verbose > 1 ? ",": "", cfg->verbose > 1 ? debug_cconfig : "", + sess_cfg ? sess_cfg : "", user_cconfig ? ",": "", user_cconfig ? user_cconfig : ""); if ((ret = config_opt_str(cfg, "conn_config", cc_buf)) != 0) @@ -2410,6 +2438,7 @@ einval: ret = EINVAL; err: config_free(cfg); free(cc_buf); + free(sess_cfg); free(tc_buf); free(user_cconfig); free(user_tconfig); @@ -2579,7 +2608,7 @@ wtperf_rand(CONFIG_THREAD *thread) S2 = wtperf_value_range(cfg) * (cfg->pareto / 100.0) * (PARETO_SHAPE - 1); U = 1 - (double)rval / (double)UINT32_MAX; - rval = (pow(U, S1) - 1) * S2; + rval = (uint64_t)((pow(U, S1) - 1) * S2); /* * This Pareto calculation chooses out of range values about * 2% of the time, from my testing. That will lead to the diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.h b/src/third_party/wiredtiger/bench/wtperf/wtperf.h index a2b497b3142..d874fa4eefe 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.h +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.h @@ -30,33 +30,8 @@ #define HAVE_WTPERF_H #include <wt_internal.h> - -#ifndef _WIN32 -#include <sys/time.h> -#endif -#include <sys/types.h> -#include <sys/stat.h> - #include <assert.h> -#include <ctype.h> -#ifndef _WIN32 -#include <dirent.h> -#endif -#include <errno.h> -#include <fcntl.h> -#include <inttypes.h> -#include <limits.h> #include <math.h> -#ifndef _WIN32 -#include <pthread.h> -#endif -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#ifndef _WIN32 -#include <unistd.h> -#endif #ifdef _WIN32 #include "windows_shim.h" @@ -345,6 +320,9 @@ extract_key(char *key_buf, uint64_t *keynop) * Print message and exit on failure. */ static inline void +die(int, const char *) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); +static inline void die(int e, const char *str) { fprintf(stderr, "Call to %s failed: %s", str, wiredtiger_strerror(e)); diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i index b5e274a17c2..2afd20f777f 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i @@ -163,6 +163,8 @@ DEF_OPT_AS_UINT32(sample_rate, 50, "how often the latency of operations is measured. One for every operation," "two for every second operation, three for every third operation etc.") DEF_OPT_AS_CONFIG_STRING(sess_config, "", "session configuration string") +DEF_OPT_AS_UINT32(session_count_idle, 0, + "number of idle sessions to create. Default 0.") DEF_OPT_AS_CONFIG_STRING(table_config, "key_format=S,value_format=S,type=lsm,exclusive=true," "allocation_size=4kb,internal_page_max=64kb,leaf_page_max=4kb," diff --git a/src/third_party/wiredtiger/build_posix/Make.subdirs b/src/third_party/wiredtiger/build_posix/Make.subdirs index 4e1f829c0c5..64749378ed1 100644 --- a/src/third_party/wiredtiger/build_posix/Make.subdirs +++ b/src/third_party/wiredtiger/build_posix/Make.subdirs @@ -25,8 +25,10 @@ examples/java JAVA lang/python PYTHON # Make the tests +test/utility test/bloom test/checkpoint +test/csuite test/cursor_order test/fops test/format diff --git a/src/third_party/wiredtiger/build_posix/aclocal/options.m4 b/src/third_party/wiredtiger/build_posix/aclocal/options.m4 index 0fb49dbf1df..5f9b8748df2 100644 --- a/src/third_party/wiredtiger/build_posix/aclocal/options.m4 +++ b/src/third_party/wiredtiger/build_posix/aclocal/options.m4 @@ -215,6 +215,16 @@ pthread_adaptive|pthreads_adaptive) esac AC_MSG_RESULT($with_spinlock) +AC_MSG_CHECKING(if --enable-strict option specified) +AC_ARG_ENABLE(strict, + [AS_HELP_STRING([--enable-strict], + [Enable strict compiler checking.])], r=$enableval, r=no) +case "$r" in +no) wt_cv_enable_strict=no;; +*) wt_cv_enable_strict=yes;; +esac +AC_MSG_RESULT($wt_cv_enable_strict) + AH_TEMPLATE(HAVE_VERBOSE, [Enable verbose message configuration.]) AC_MSG_CHECKING(if --enable-verbose option specified) AC_ARG_ENABLE(verbose, diff --git a/src/third_party/wiredtiger/build_posix/aclocal/strict.m4 b/src/third_party/wiredtiger/build_posix/aclocal/strict.m4 new file mode 100644 index 00000000000..b59f09fe584 --- /dev/null +++ b/src/third_party/wiredtiger/build_posix/aclocal/strict.m4 @@ -0,0 +1,74 @@ +# AM_STRICT +# Per compiler-version flags used when compiling in strict mode. + +# GCC warnings. +AC_DEFUN([AM_GCC_WARNINGS], [ + w="$w -Wall -Wextra -Werror" + + w="$w -Waggregate-return" + w="$w -Wbad-function-cast" + w="$w -Wcast-align" + w="$w -Wdeclaration-after-statement" + w="$w -Wdouble-promotion" + w="$w -Wfloat-equal" + w="$w -Wformat-nonliteral" + w="$w -Wformat-security" + w="$w -Wformat=2" + w="$w -Winit-self" + w="$w -Wjump-misses-init" + w="$w -Wmissing-declarations" + w="$w -Wmissing-field-initializers" + w="$w -Wmissing-parameter-type" + w="$w -Wmissing-prototypes" + w="$w -Wnested-externs" + w="$w -Wold-style-definition" + w="$w -Wpacked" + w="$w -Wpointer-arith" + w="$w -Wpointer-sign" + w="$w -Wredundant-decls" + w="$w -Wshadow" + w="$w -Wsign-conversion" + w="$w -Wstrict-prototypes" + w="$w -Wswitch-enum" + w="$w -Wundef" + w="$w -Wunreachable-code" + w="$w -Wunsafe-loop-optimizations" + w="$w -Wunused" + w="$w -Wwrite-strings" + + # Non-fatal informational warnings. + w="$w -Wno-error=inline" + w="$w -Wno-error=unsafe-loop-optimizations" + + wt_cv_strict_warnings="$w" +]) + +# Clang warnings. +AC_DEFUN([AM_CLANG_WARNINGS], [ + w="-Weverything -Werror" + + w="$w -Wno-cast-align" + w="$w -Wno-documentation-unknown-command" + w="$w -Wno-format-nonliteral" + w="$w -Wno-packed" + w="$w -Wno-padded" + w="$w -Wno-reserved-id-macro" + w="$w -Wno-zero-length-array" + + # We should turn on cast-qual, but not as a fatal error: see WT-2690. + # For now, turn it off. + # w="$w -Wno-error=cast-qual" + w="$w -Wno-cast-qual" + + # Older OS X releases need some special love; these flags should be + # removed in the not-too-distant future. + # Apple clang version 4.1 + # (tags/Apple/clang-421.11.66) (based on LLVM 3.1svn) + w="$w -Wno-pedantic" + w="$w -Wno-unused-command-line-argument" + + # Ignore unrecognized options. + w="$w -Wno-unknown-warning-option" + + wt_cv_strict_warnings="$w" +]) diff --git a/src/third_party/wiredtiger/build_posix/aclocal/types.m4 b/src/third_party/wiredtiger/build_posix/aclocal/types.m4 index 439034c89d2..089058f5611 100644 --- a/src/third_party/wiredtiger/build_posix/aclocal/types.m4 +++ b/src/third_party/wiredtiger/build_posix/aclocal/types.m4 @@ -7,6 +7,7 @@ AC_DEFUN([AM_TYPES], [ #include <sys/types.h> #include <inttypes.h> #include <stdarg.h> +#include <stdbool.h> #include <stdint.h> #include <stdio.h>" AC_SUBST(wiredtiger_includes_decl) diff --git a/src/third_party/wiredtiger/build_posix/configure.ac.in b/src/third_party/wiredtiger/build_posix/configure.ac.in index 9251873be73..bbc6cf89d91 100644 --- a/src/third_party/wiredtiger/build_posix/configure.ac.in +++ b/src/third_party/wiredtiger/build_posix/configure.ac.in @@ -9,19 +9,20 @@ AC_CONFIG_AUX_DIR([build_posix/gnu-support]) AC_CONFIG_MACRO_DIR([build_posix/aclocal]) AC_CONFIG_SRCDIR([RELEASE_INFO]) -# If CFLAGS/CXXFLAGS were not set on entry, default to "-O3 -g" -: ${CFLAGS=-O3 -g} -: ${CXXFLAGS=-O3 -g} - # We rely on some automake features for testing (like AM_TEST_ENVIRONMENT) # that didn't work before 1.11.6. AM_INIT_AUTOMAKE([1.11.6 foreign parallel-tests subdir-objects]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([no])]) -# Configure options. The AM_OPTIONS and the libtool configuration -# need to stay here. Moving them below the compiler and other -# configurations causes -Wcast_align warnings and other warnings -# on OS X. +# If CFLAGS/CXXFLAGS were not set on entry, default to "-O3 -g" +: ${CFLAGS=-O3 -g} +: ${CXXFLAGS=-O3 -g} + +AC_PROG_CC(cc gcc) +AC_PROG_CXX(c++ g++) +AM_PROG_AS(as gas) + +# Configure options. AM_OPTIONS define([AC_LIBTOOL_LANG_CXX_CONFIG], [:])dnl @@ -30,9 +31,33 @@ LT_PREREQ(2.2.6) LT_INIT([pic-only]) AC_SUBST([LIBTOOL_DEPS]) -AC_PROG_CC(cc gcc) -AC_PROG_CXX(c++ g++) -AM_PROG_AS(as gas) +# If enable-strict is configured, turn on as much error checking as we can for +# this compiler. Intended for developers, and only works for gcc/clang, but it +# fills a need. +if test "$wt_cv_enable_strict" = "yes"; then + wt_cv_cc_version="`$CC --version | sed -eq`" + case "$wt_cv_cc_version" in + *clang*) + AM_CLANG_WARNINGS;; + *gcc*|*GCC*) + AM_GCC_WARNINGS;; + *) + AC_MSG_ERROR( + [--enable-strict does not support "$wt_cv_cc_version".]);; + esac + + AM_CFLAGS="$AM_CFLAGS $wt_cv_strict_warnings" +fi + +AM_CONDITIONAL([POSIX_HOST], [true]) +AM_CONDITIONAL([WINDOWS_HOST], [false]) + +AS_CASE([$host_cpu], + [ppc64*], [wt_cv_powerpc="yes"], + [elf64lppc], [wt_cv_powerpc="yes"], + [powerpc*], [wt_cv_powerpc="yes"], + [wt_cv_powerpc="no"]) +AM_CONDITIONAL([POWERPC_HOST], [test "$wt_cv_powerpc" = "yes"]) # This is a workaround as part of WT-2459. Currently, clang (v3.7) does not # support compiling the ASM code we have to perform the CRC checks on PowerPC. @@ -41,12 +66,8 @@ AM_PROG_AS(as gas) # determine what tag to use for that one .S file. If we catch that we are using # two different compilers for CC and CCAS and we are on a PowerPC system we # overload the libtool flags to provide CC by default. -if test "$CC" != "$CCAS"; then - AS_CASE([$host_cpu], - [ppc64*], [AM_LIBTOOLFLAGS+="--tag=CC"], - [elf64lppc], [AM_LIBTOOLFLAGS+="--tag=CC"], - [powerpc*], [AM_LIBTOOLFLAGS+="--tag=CC"], - []) +if test "$wt_cv_powerpc" = "yes" -a "$CC" != "$CCAS"; then + [AM_LIBTOOLFLAGS+="--tag=CC"] fi AC_SUBST(AM_LIBTOOLFLAGS) diff --git a/src/third_party/wiredtiger/build_posix/makemake b/src/third_party/wiredtiger/build_posix/makemake index 9ed9d252911..506420b4aaf 100755 --- a/src/third_party/wiredtiger/build_posix/makemake +++ b/src/third_party/wiredtiger/build_posix/makemake @@ -7,7 +7,7 @@ (sed -n '1,/BEGIN SUBDIRS/p' Make.base echo "SUBDIRS =" -sed -e 's/#.*$//' -e '/^$/d' Make.subdirs | (while read dir cond ; do +sed -e 's/#.*$//' -e '/^$/d' Make.subdirs | while read dir cond ; do test -d ../$dir || continue if test -n "$cond" ; then cat <<END_CONDITIONAL @@ -18,17 +18,27 @@ END_CONDITIONAL else echo "SUBDIRS += $dir" fi -done) +done # Write the rest of Make.base, up to SOURCES sed -n '/END SUBDIRS/,/BEGIN SOURCES/p' Make.base +# Write the list of sources. echo echo "libwiredtiger_la_LDFLAGS = -release @VERSION@" -echo "libwiredtiger_la_SOURCES=\\" -sed -e '/^[a-z]/!d' \ - -e 's/.*/ & \\/' \ - -e '$s/ \\$//' < ../dist/filelist +echo "libwiredtiger_la_SOURCES =" +sed -e '/^[a-z]/!d' < ../dist/filelist | while read file cond; do + if test -n "$cond"; then + cat <<END_CONDITIONAL +# DO NOT indent the "libwiredtiger_la_SOURCES" lines, it breaks the build. +if ${cond} +libwiredtiger_la_SOURCES += $file +endif +END_CONDITIONAL + else + echo "libwiredtiger_la_SOURCES += $file" + fi +done # Write the rest of Make.base sed -n '/END SOURCES/,$p' Make.base diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 8cfa83dadc4..90b1c8378a2 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -439,7 +439,7 @@ connection_runtime_config = [ Config('file_max', '100MB', r''' the maximum size of log files''', min='100KB', max='2GB'), - Config('path', '', r''' + Config('path', '"."', r''' the path to a directory into which the log files are written. If the value is not an absolute path name, the files are created relative to the database home'''), @@ -722,8 +722,8 @@ wiredtiger_open = wiredtiger_open_common + [ \c create option''', type='boolean'), Config('in_memory', 'false', r''' - keep data in-memory only, minimize disk I/O''', - type='boolean', undoc=True), + keep data in-memory only. See @ref in_memory for more information''', + type='boolean'), Config('use_environment', 'true', r''' use the \c WIREDTIGER_CONFIG and \c WIREDTIGER_HOME environment variables if the process is not running with special privileges. @@ -822,6 +822,13 @@ methods = { Config('bloom_hash_count', '8', r''' the number of hash values per item for the bloom filter''', min='2', max='100'), + Config('operation', '"and"', r''' + the operation applied between this and other joined cursors. + When "operation=and" is specified, all the conditions implied by + joins must be satisfied for an entry to be returned by the join cursor; + when "operation=or" is specified, only one must be satisfied. + All cursors joined to a join cursor must have matching operations''', + choices=['and', 'or']), Config('strategy', '', r''' when set to bloom, a bloom filter is created and populated for this index. This has an up front cost but may reduce the number @@ -952,6 +959,11 @@ methods = { Display the contents of on-disk blocks as they are verified, using the application's message handler, intended for debugging''', type='boolean'), + Config('dump_layout', 'false', r''' + Display the layout of the files as they are verified, using the + application's message handler, intended for debugging; requires + optional support from the block manager''', + type='boolean'), Config('dump_offsets', '', r''' Display the contents of specific on-disk blocks, using the application's message handler, intended for debugging''', @@ -960,10 +972,6 @@ methods = { Display the contents of in-memory pages as they are verified, using the application's message handler, intended for debugging''', type='boolean'), - Config('dump_shape', 'false', r''' - Display the shape of the tree after verification, - using the application's message handler, intended for debugging''', - type='boolean'), Config('strict', 'false', r''' Treat any verification problem as an error; by default, verify will warn, but not fail, in the case of errors that won't affect future @@ -1077,11 +1085,17 @@ methods = { type='boolean'), ]), 'WT_CONNECTION.reconfigure' : Method(connection_runtime_config), +'WT_CONNECTION.set_file_system' : Method([]), 'WT_CONNECTION.load_extension' : Method([ Config('config', '', r''' configuration string passed to the entry point of the extension as its WT_CONFIG_ARG argument'''), + Config('early_load', 'false', r''' + whether this extension should be loaded at the beginning of + ::wiredtiger_open. Only applicable to extensions loaded via the + wiredtiger_open configurations string''', + type='boolean'), Config('entry', 'wiredtiger_extension_init', r''' the entry point of the extension, called to initialize the extension when it is loaded. The signature of the function diff --git a/src/third_party/wiredtiger/dist/api_err.py b/src/third_party/wiredtiger/dist/api_err.py index a17c68ee196..82f961a4ac9 100644 --- a/src/third_party/wiredtiger/dist/api_err.py +++ b/src/third_party/wiredtiger/dist/api_err.py @@ -53,11 +53,11 @@ errors = [ to return an error if recovery is required to use the database.'''), Error('WT_CACHE_FULL', -31807, 'operation would overflow cache', ''' - This error is generated when wiredtiger_open is configured - to run in-memory, and an insert or update operation requires more - than the configured cache size to complete.''', undoc=True), - Error('WT_PERM_DENIED', -31808, - 'permission denied (internal)', undoc=True), + This error is only generated when wiredtiger_open is configured + to run in-memory, and an insert or update operation requires + more than the configured cache size to complete. The operation + may be retried; if a transaction is in progress, it should be + rolled back and the operation retried in a new transaction.'''), ] # Update the #defines in the wiredtiger.in file. diff --git a/src/third_party/wiredtiger/dist/dist.py b/src/third_party/wiredtiger/dist/dist.py index 1b3ad828dfb..555cc03989b 100644 --- a/src/third_party/wiredtiger/dist/dist.py +++ b/src/third_party/wiredtiger/dist/dist.py @@ -2,21 +2,16 @@ import filecmp, glob, os, re, shutil # source_files -- # Return a list of the WiredTiger source file names. -def source_files(skip_includes=False): - if not skip_includes: - for line in glob.iglob('../src/include/*.[hi]'): - yield line +def source_files(): file_re = re.compile(r'^\w') + for line in glob.iglob('../src/include/*.[hi]'): + yield line for line in open('filelist', 'r'): if file_re.match(line): - yield os.path.join('..', line.rstrip()) - # Return only the Windows-specific files in the Windows filelist - for line in open('../build_win/filelist.win', 'r'): - if 'os_win' in line and file_re.match(line): - yield os.path.join('..', line.rstrip()) + yield os.path.join('..', line.split()[0]) for line in open('extlist', 'r'): if file_re.match(line): - yield os.path.join('..', line.rstrip()) + yield os.path.join('..', line.split()[0]) # source_dirs -- # Return a list of the WiredTiger source directory names. diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist index 1d7ffa76922..59624508cf0 100644 --- a/src/third_party/wiredtiger/dist/filelist +++ b/src/third_party/wiredtiger/dist/filelist @@ -47,6 +47,9 @@ src/btree/row_key.c src/btree/row_modify.c src/btree/row_srch.c src/cache/cache_las.c +src/checksum/checksum.c +src/checksum/power8/crc32.S POWERPC_HOST +src/checksum/power8/crc32_wrapper.c POWERPC_HOST src/config/config.c src/config/config_api.c src/config/config_check.c @@ -104,30 +107,47 @@ src/meta/meta_turtle.c src/os_common/filename.c src/os_common/os_abort.c src/os_common/os_alloc.c +src/os_common/os_errno.c src/os_common/os_fhandle.c src/os_common/os_fs_inmemory.c -src/os_common/os_fs_stdio.c -src/os_common/os_getline.c +src/os_common/os_fstream.c +src/os_common/os_fstream_stdio.c src/os_common/os_getopt.c -src/os_common/os_init.c src/os_common/os_strtouq.c -src/os_posix/os_dir.c -src/os_posix/os_dlopen.c -src/os_posix/os_errno.c -src/os_posix/os_fallocate.c -src/os_posix/os_fs.c -src/os_posix/os_getenv.c -src/os_posix/os_map.c -src/os_posix/os_mtx_cond.c -src/os_posix/os_once.c -src/os_posix/os_pagesize.c -src/os_posix/os_path.c -src/os_posix/os_priv.c -src/os_posix/os_setvbuf.c -src/os_posix/os_sleep.c -src/os_posix/os_thread.c -src/os_posix/os_time.c -src/os_posix/os_yield.c +src/os_posix/os_dir.c POSIX_HOST +src/os_posix/os_dlopen.c POSIX_HOST +src/os_posix/os_fallocate.c POSIX_HOST +src/os_posix/os_fs.c POSIX_HOST +src/os_posix/os_getenv.c POSIX_HOST +src/os_posix/os_map.c POSIX_HOST +src/os_posix/os_mtx_cond.c POSIX_HOST +src/os_posix/os_once.c POSIX_HOST +src/os_posix/os_pagesize.c POSIX_HOST +src/os_posix/os_path.c POSIX_HOST +src/os_posix/os_priv.c POSIX_HOST +src/os_posix/os_setvbuf.c POSIX_HOST +src/os_posix/os_sleep.c POSIX_HOST +src/os_posix/os_thread.c POSIX_HOST +src/os_posix/os_time.c POSIX_HOST +src/os_posix/os_yield.c POSIX_HOST +src/os_win/os_dir.c WINDOWS_HOST +src/os_win/os_dlopen.c WINDOWS_HOST +src/os_win/os_fs.c WINDOWS_HOST +src/os_win/os_getenv.c WINDOWS_HOST +src/os_win/os_map.c WINDOWS_HOST +src/os_win/os_mtx_cond.c WINDOWS_HOST +src/os_win/os_once.c WINDOWS_HOST +src/os_win/os_pagesize.c WINDOWS_HOST +src/os_win/os_path.c WINDOWS_HOST +src/os_win/os_priv.c WINDOWS_HOST +src/os_win/os_setvbuf.c WINDOWS_HOST +src/os_win/os_sleep.c WINDOWS_HOST +src/os_win/os_snprintf.c WINDOWS_HOST +src/os_win/os_thread.c WINDOWS_HOST +src/os_win/os_time.c WINDOWS_HOST +src/os_win/os_vsnprintf.c WINDOWS_HOST +src/os_win/os_winerr.c WINDOWS_HOST +src/os_win/os_yield.c WINDOWS_HOST src/packing/pack_api.c src/packing/pack_impl.c src/packing/pack_stream.c @@ -148,7 +168,6 @@ src/session/session_api.c src/session/session_compact.c src/session/session_dhandle.c src/session/session_salvage.c -src/support/cksum.c src/support/cond_auto.c src/support/crypto.c src/support/err.c @@ -160,8 +179,6 @@ src/support/hex.c src/support/huffman.c src/support/mtx_rw.c src/support/pow.c -src/support/power8/crc32.S -src/support/power8/crc32_wrapper.c src/support/rand.c src/support/scratch.c src/support/stat.c diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py index 806fac2137d..b5f36fb707a 100644 --- a/src/third_party/wiredtiger/dist/flags.py +++ b/src/third_party/wiredtiger/dist/flags.py @@ -8,13 +8,6 @@ flags = { ################################################### # Internal routine flag declarations ################################################### - 'file_types' : [ - 'FILE_TYPE_CHECKPOINT', - 'FILE_TYPE_DATA', - 'FILE_TYPE_DIRECTORY', - 'FILE_TYPE_LOG', - 'FILE_TYPE_REGULAR', - ], 'log_scan' : [ 'LOGSCAN_FIRST', 'LOGSCAN_FROM_CKP', @@ -105,6 +98,7 @@ flags = { 'CONN_LSM_MERGE', 'CONN_PANIC', 'CONN_READONLY', + 'CONN_RECOVERING', 'CONN_SERVER_ASYNC', 'CONN_SERVER_CHECKPOINT', 'CONN_SERVER_LSM', @@ -115,12 +109,12 @@ flags = { ], 'session' : [ 'SESSION_CAN_WAIT', - 'SESSION_CLEAR_EVICT_WALK', 'SESSION_INTERNAL', 'SESSION_LOCK_NO_WAIT', 'SESSION_LOCKED_CHECKPOINT', 'SESSION_LOCKED_HANDLE_LIST', 'SESSION_LOCKED_METADATA', + 'SESSION_LOCKED_PASS', 'SESSION_LOCKED_SCHEMA', 'SESSION_LOCKED_SLOT', 'SESSION_LOCKED_TABLE', diff --git a/src/third_party/wiredtiger/dist/log.py b/src/third_party/wiredtiger/dist/log.py index 9201b20054b..8743dd3a71c 100644 --- a/src/third_party/wiredtiger/dist/log.py +++ b/src/third_party/wiredtiger/dist/log.py @@ -178,7 +178,7 @@ __wt_logop_read(WT_SESSION_IMPL *session, } static size_t -__logrec_json_unpack_str(char *dest, size_t destlen, const char *src, +__logrec_json_unpack_str(char *dest, size_t destlen, const u_char *src, size_t srclen) { \tsize_t total; diff --git a/src/third_party/wiredtiger/dist/s_c_test_create b/src/third_party/wiredtiger/dist/s_c_test_create new file mode 100755 index 00000000000..fd0fa809d99 --- /dev/null +++ b/src/third_party/wiredtiger/dist/s_c_test_create @@ -0,0 +1,105 @@ +#! /bin/sh + +# +# Usage: s_c_test_create test_name +# +# Create a new test case in the C test suite. +# This will create the infrastructure for a new C test case. The given +# test name is a new directory in the C suite directory and the Makefile +# components and C program template are created. +# +# Any 'make check' variations of this test should be added to the smoke.sh +# script in the main C suite directory. +# +tmp=__a +trap 'rm -f $tmp; exit 0' 0 1 2 3 13 15 + +if [ "x$1" = "x" ]; then + echo "Usage: $0 test_name" + exit 1 +fi +CSUITE_DIRECTORY=../test/csuite +MAKEFILE_NAME=$CSUITE_DIRECTORY/Makefile.am + +TEST_NAME=$1 + +exists=`grep $TEST_NAME $MAKEFILE_NAME` + +if [ "x$exists" != "x" ]; then + echo "Test with requested name already exists. Try another name." + exit 1 +fi + +# Create a subdirectory and stub for the new test +mkdir $CSUITE_DIRECTORY/$TEST_NAME + +(cat <<EOF +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +/* + * JIRA ticket reference: + * Test case description: + * Failure mode: + */ + +void (*custom_die)(void) = NULL; + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + /* + * Insert test implementation here. + */ + + testutil_cleanup(opts); + + return (0); +} +EOF +) > $CSUITE_DIRECTORY/$TEST_NAME/main.c + + +# Now update the C test suite makefile to include the new test case + +NEW_MAKE_SECT="test_${TEST_NAME}_SOURCES = ${TEST_NAME}\/main.c\\nnoinst_PROGRAMS = test_${TEST_NAME}\\n\\n" + +cat $CSUITE_DIRECTORY/Makefile.am | awk \ + "/^# Script add new line here/ && !modif { printf(\"$NEW_MAKE_SECT\"); modif=1 } {print}" > $tmp + +mv $tmp $CSUITE_DIRECTORY/Makefile.am + +exit 0 diff --git a/src/third_party/wiredtiger/dist/s_copyright.list b/src/third_party/wiredtiger/dist/s_copyright.list index c6a5910087b..4999d2a37a2 100644 --- a/src/third_party/wiredtiger/dist/s_copyright.list +++ b/src/third_party/wiredtiger/dist/s_copyright.list @@ -29,6 +29,8 @@ skip src/config/config_def.c skip src/conn/api_strerror.c skip src/docs/tools/doxypy.py skip src/include/extern.h +skip src/include/extern_posix.h +skip src/include/extern_win.h skip src/include/flags.h skip src/include/queue.h skip src/log/log_auto.c diff --git a/src/third_party/wiredtiger/dist/s_define b/src/third_party/wiredtiger/dist/s_define index 77673bdcdf9..050101e8510 100755 --- a/src/third_party/wiredtiger/dist/s_define +++ b/src/third_party/wiredtiger/dist/s_define @@ -5,7 +5,7 @@ t=__wt.$$ trap 'rm -f $t; exit 0' 0 1 2 3 13 15 # List of source files to search. -l=`sed -e 's,#.*,,' -e '/^$/d' -e 's,^,../,' filelist` +l=`sed -e '/^[a-z]/!d' -e 's/[ ].*$//' -e 's,^,../,' filelist` l="$l `echo ../src/include/*.i ../src/utilities/*.c ../test/*/*.c`" # List of include files for source #defines. diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list index c9777c86675..2cdda74e7d4 100644 --- a/src/third_party/wiredtiger/dist/s_define.list +++ b/src/third_party/wiredtiger/dist/s_define.list @@ -23,6 +23,7 @@ WT_CONN_CHECK_PANIC WT_DEADLOCK WT_DEBUG_BYTE WT_ERR_ERROR_OK +WT_EXT_FOREACH_OFF WT_HANDLE_CLOSED WT_HANDLE_NULLABLE WT_LOG_SLOT_ACTIVE @@ -42,13 +43,17 @@ WT_STATS_FIELD_TO_SLOT WT_STATS_SLOT_ID WT_STAT_DECR WT_STAT_DECRV +WT_STAT_DECRV_ATOMIC WT_STAT_FAST_CONN_DECRV WT_STAT_FAST_DATA_DECRV WT_STAT_FAST_DECR WT_STAT_FAST_DECRV +WT_STAT_FAST_DECRV_ATOMIC WT_STAT_FAST_INCR WT_STAT_FAST_INCRV +WT_STAT_FAST_INCRV_ATOMIC WT_STAT_FAST_SET +WT_STAT_INCRV_ATOMIC WT_STAT_WRITE WT_TIMEDIFF_US WT_TRET_ERROR_OK diff --git a/src/third_party/wiredtiger/dist/s_docs b/src/third_party/wiredtiger/dist/s_docs index c66bcb0bd06..08602989fe8 100755 --- a/src/third_party/wiredtiger/dist/s_docs +++ b/src/third_party/wiredtiger/dist/s_docs @@ -114,7 +114,8 @@ valid_build() } classf=`ls ../docs/struct___* 2>/dev/null` for c in $classf; do - echo "$c: Need to add class to PREDEFINED in src/docs/Doxyfile" + echo "$c: Add class to PREDEFINED in src/docs/Doxyfile, then remove docs/*.{html,js} and rebuild" + done } diff --git a/src/third_party/wiredtiger/dist/s_funcs b/src/third_party/wiredtiger/dist/s_funcs index 5fee03b5615..8695c8d4fa7 100755 --- a/src/third_party/wiredtiger/dist/s_funcs +++ b/src/third_party/wiredtiger/dist/s_funcs @@ -5,7 +5,7 @@ t=__wt.$$ trap 'rm -f $t; exit 0' 0 1 2 3 13 15 # List of files to search. -l=`sed -e 's,#.*,,' -e '/^$/d' -e 's,^,../,' filelist` +l=`sed -e '/^[a-z]/!d' -e 's/[ ].*$//' -e 's,^,../,' filelist` l="$l `echo ../src/*/*.i ../src/utilities/*.c ../bench/wtperf/*.c`" ( diff --git a/src/third_party/wiredtiger/dist/s_label b/src/third_party/wiredtiger/dist/s_label index b7c5795234a..b56ecc6fc78 100755 --- a/src/third_party/wiredtiger/dist/s_label +++ b/src/third_party/wiredtiger/dist/s_label @@ -23,7 +23,7 @@ file_parse() # where there's a jump to the error label after the error label. for f in `find bench examples ext src test -name '*.[ci]'`; do file_parse $f | - egrep '(WT_ERR|WT_ILLEGAL_VALUE_ERR)\(.*(WT_ILLEGAL_VALUE|WT_RET)\(.*err:|[^a-z_]err:.*(WT_ERR|WT_ILLEGAL_VALUE_ERR)\(' | + egrep '(WT_ERR[_A-Z]*|WT_ILLEGAL_VALUE_ERR)\(.*(WT_ILLEGAL_VALUE|WT_RET[_A-Z]*)\(.*err:|[^a-z_]err:.*(WT_ERR|WT_ILLEGAL_VALUE_ERR)\(' | sed 's/:.*//' > $t test -s $t && { @@ -32,6 +32,14 @@ for f in `find bench examples ext src test -name '*.[ci]'`; do } done +# Returns before jumps to an error label within the same loop. +# Jumps before returns have already been detected above. +for f in `find bench examples ext src test -name '*.[ci]'`; do + file_parse $f | sed "s=^=$f:=" +done | python dist/s_label_loop.py | + egrep '\{@[^@]*(WT_ILLEGAL_VALUE|WT_RET[_A-Z]*)\([^@]*(WT_ERR[_A-Z]*|WT_ILLEGAL_VALUE_ERR)\(.*err:' | + sed -e 's/^\([^:]*\): *\([^:]*\):.*/\1:\2: mix of returns and jump to the error label within a loop/' + # Return of 0 in functions after a jump to the error label. for f in `find bench examples ext src test -name '*.[ci]'`; do file_parse $f | diff --git a/src/third_party/wiredtiger/dist/s_label_loop.py b/src/third_party/wiredtiger/dist/s_label_loop.py new file mode 100644 index 00000000000..5cc222a4250 --- /dev/null +++ b/src/third_party/wiredtiger/dist/s_label_loop.py @@ -0,0 +1,28 @@ +# Mark outer loop boundaries with {@ and }@ . Nested loops are not marked. +# Each input line is the content of a C function. +import re, sys + +p = re.compile('((for |while |_FOREACH|FOREACH_BEGIN)\([^{)]*\)|do) {') +for line in sys.stdin: + matched = 0 + m = p.search(line) + while m != None: + matched = 1 + pos = m.end() + out = line[:pos] + "@" + level = 1 + length = len(line) + while level > 0 and pos < length: + c = line[pos:pos+1] + pos += 1 + out += c + if c == "}": + level -= 1 + elif c == "{": + level += 1 + out += "@" + sys.stdout.write(out) + line = line[pos:] + m = p.search(line) + if matched != 0: + sys.stdout.write(line) diff --git a/src/third_party/wiredtiger/dist/s_longlines b/src/third_party/wiredtiger/dist/s_longlines index 000f33d51d5..91dada361f4 100755 --- a/src/third_party/wiredtiger/dist/s_longlines +++ b/src/third_party/wiredtiger/dist/s_longlines @@ -8,9 +8,11 @@ l=`(cd .. && find bench/wtperf examples ext src test -name '*.[chisy]' && find dist -name '*.py' && find src -name '*.in') | - sed -e '/dist\/stat_data\.py/d' \ + sed -e '/checksum\/power8/d' \ + -e '/dist\/stat_data\.py/d' \ -e '/include\/extern\.h/d' \ - -e '/support\/power8/d' \ + -e '/include\/extern_posix\.h/d' \ + -e '/include\/extern_win\.h/d' \ -e '/support\/stat\.c/d'` for f in $l ; do diff --git a/src/third_party/wiredtiger/dist/s_prototypes b/src/third_party/wiredtiger/dist/s_prototypes index 4ceb69f4c77..73f7be371ea 100755 --- a/src/third_party/wiredtiger/dist/s_prototypes +++ b/src/third_party/wiredtiger/dist/s_prototypes @@ -28,36 +28,52 @@ proto() -e 's/\* /\*/g' \ -e 's/ */ /g' \ -e 's/^/extern /' \ - -e 's/WT_GCC_FUNC_/WT_GCC_FUNC_DECL_/' \ - -e 's/$/;/p' < $1 + -e 's/WT_GCC_FUNC_/WT_GCC_FUNC_DECL_/g' \ + -e '# If a line ends in #endif, appending a semicolon will result' \ + -e '# in an illegal expression, force an appended newline using' \ + -e '# the H command because substitute may not allow newline in' \ + -e '# the RHS of the expression.' \ + -e '/#endif$/{' \ + -e x \ + -e 's/.*//' \ + -e H \ + -e x \ + -e '}' \ + -e 's/$/;/' \ + -e p < $1 } -( -cat <<EOF +# proto -- +# generate the list of prototypes given a file list +externs() +{ +(cat <<EOF /* DO NOT EDIT: automatically built by dist/s_prototypes. */ EOF + for i in $l; do + proto ../$i + done) > $t + cmp $t $f > /dev/null 2>&1 || + (echo "Building $f" && rm -f $f && cp $t $f) +} -# First, get prototypes for everything but the OS directories. -# Second, get prototypes for the OS directories. -# The reason for this is because the OS directories repeat names (that is, there -# are common names in both os_posix and os_win), and so we sort the prototypes -# to avoid repeating them in the output (which some compilers won't tolerate). -# We'd sort everything and discard duplicates, but we can't sort when function -# signatures are on multiple lines, that is, #ifdef'd function signatures. Since -# the OS directories are the only places with repeated names, and they have no -# #ifdef'd signatures, we do it this way. -l=`sed -e '/^[a-z]/!d' -e '/src\/os/d' filelist` -for i in $l; do - proto ../$i -done -l=`echo ../src\/os*/*.c` +f=../src/include/extern_win.h +l=`sed \ + -e '/os_win/!d' \ + -e 's/[ ].*$//' filelist` +externs -for i in $l; do - proto $i -done | tee xxx | env LC_ALL=C sort -u -) > $t +f=../src/include/extern_posix.h +l=`sed \ + -e '/os_posix/!d' \ + -e 's/[ ].*$//' filelist` +externs f=../src/include/extern.h -cmp $t $f > /dev/null 2>&1 || - (echo "Building $f" && rm -f $f && cp $t $f) +l=`sed \ + -e '/^[a-z]/!d' \ + -e '/os_posix/d' \ + -e '/os_win/d' \ + -e 's/[ ].*$//' filelist` +externs diff --git a/src/third_party/wiredtiger/dist/s_stat b/src/third_party/wiredtiger/dist/s_stat index 3938b8e65eb..0638a7f3337 100755 --- a/src/third_party/wiredtiger/dist/s_stat +++ b/src/third_party/wiredtiger/dist/s_stat @@ -8,8 +8,8 @@ trap 'rm -f $t; exit 0' 0 1 2 3 13 15 # definition. l=`sed \ -e '/src\/support\/stat.c/d' \ - -e 's,#.*,,' \ - -e '/^$/d' \ + -e '/^[a-z]/!d' \ + -e 's/[ ].*$//' \ -e 's,^,../,' filelist` l="$l `echo ../src/include/*.i ../src/include/os.h`" diff --git a/src/third_party/wiredtiger/dist/s_string b/src/third_party/wiredtiger/dist/s_string index 3a4f9e190d3..32aa7528979 100755 --- a/src/third_party/wiredtiger/dist/s_string +++ b/src/third_party/wiredtiger/dist/s_string @@ -31,7 +31,9 @@ replace() { # Check the spelling of an individual file. check() { # Strip out git hashes, which are seven character hex strings. - sed 's/ [0-9a-f]\{7\} / /g' ../$2 | aspell --lang=en $1 list | + # Strip out double quote char literals ('"'), they confuse aspell. + sed -e 's/ [0-9a-f]\{7\} / /g' -e "s/'\"'//g" ../$2 | + aspell --lang=en $1 list | sort -u | comm -23 /dev/stdin s_string.ok > $t test -s $t && { diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 631f2a5c909..7966ff2cf2e 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -76,6 +76,7 @@ DECL DECR DESC DHANDLE +DIRECTIO DNE DOI DONTNEED @@ -86,6 +87,7 @@ Decrement Decrypt DeleteFileA EAGAIN +EB EBUSY EEXIST EINTR @@ -116,6 +118,7 @@ FNV FORALL FOREACH FULLFSYNC +FindClose FindFirstFile Fixup Fk @@ -130,6 +133,7 @@ GIDs Gcc Geoff GetEnvironmentVariableA +GetFileAttributesA GetFileAttributesEx GetFileSizeEx GetLastError @@ -145,6 +149,7 @@ IEC IEEE IKEY IMPL +IMPL's INCR INIT INITIALIZER @@ -211,6 +216,7 @@ Mewhort Mitzenmacher MongoDB MoveFile +MoveFileA Multi Multithreaded Mutex @@ -255,6 +261,7 @@ Qsort RCS RDNOLOCK RDONLY +READONLY RECNO REF's REFs @@ -468,6 +475,7 @@ ckptfrag ckptlist cksum cloexec +closedir clsm cmd cmp @@ -488,9 +496,11 @@ conn connectionp const constantp +cookiep copydoc copyin copyout +countp cp cpuid crc @@ -602,6 +612,7 @@ evictserver exactp exe execop +existp extern extlist fadvise @@ -618,6 +629,7 @@ ffs fgetc fgetln fh +fhandle filefrag filehandle fileid @@ -637,11 +649,14 @@ fmterr fnv foc fopen +formatmessage fp fprintf free'd +fs fscanf fstat +fstream fsync fsyncLock fsyncs @@ -669,11 +684,13 @@ gostruct goutf gt handleops +handlep hashval havesize hdr highjack hotbackup +hselasky html huffman hval @@ -684,11 +701,13 @@ ibackup icount idx ifdef's +iiu ikey im impl incase incr +incrementals incrementing indices indirects @@ -697,6 +716,7 @@ infeasible inflateInit infmt init +initializers initn initsize initval @@ -715,8 +735,20 @@ intrin inuse io ip +isalnum +isalpha +iscntrl +isdigit +isgraph islocked +islower ispo +isprint +ispunct +isrc +isspace +isupper +isxdigit iter iteratively jnr @@ -739,6 +771,7 @@ lbracket ld le len +lengthp lenp level's leveldb @@ -844,6 +877,7 @@ noraw notfound notsup notused +nowait nset nsnap nul @@ -866,6 +900,7 @@ os osfhandle ovfl ownp +pR packv pagesize parens @@ -911,6 +946,7 @@ pushms putK putV pv +pvA pwrite py qdown @@ -939,6 +975,7 @@ recsize rectype recurse refp +regionp reinitialization relocked resize @@ -1038,7 +1075,9 @@ toklen tokname tokstart toktype +tolower totalsec +toupper transactional transactionally trecno @@ -1064,6 +1103,7 @@ uncompresssed undef unencrypted unesc +unescape unescaped unicode uninstantiated diff --git a/src/third_party/wiredtiger/dist/s_style b/src/third_party/wiredtiger/dist/s_style index a163eb83b25..a222c004cc3 100755 --- a/src/third_party/wiredtiger/dist/s_style +++ b/src/third_party/wiredtiger/dist/s_style @@ -20,7 +20,7 @@ if [ $# -ne 1 ]; then -name '*.[chisy]' -o -name '*.in' -o -name '*.dox' | sed -e '/Makefile.in/d' \ -e '/build_win\/wiredtiger_config.h/d' \ - -e '/support\/power8/d' | + -e '/checksum\/power8/d' | xargs $xp -n 1 -I{} sh ./dist/s_style {} else # General style correction and cleanup for a single file @@ -60,11 +60,13 @@ else echo "$f: use TAILQ for all lists" fi - if ! expr "$f" : 'src/os_common/.*' > /dev/null && + if ! expr "$f" : 'src/include/extern.h' > /dev/null && + ! expr "$f" : 'src/include/extern_posix.h' > /dev/null && + ! expr "$f" : 'src/include/extern_win.h' > /dev/null && + ! expr "$f" : 'src/include/os.h' > /dev/null && + ! expr "$f" : 'src/os_common/.*' > /dev/null && ! expr "$f" : 'src/os_posix/.*' > /dev/null && ! expr "$f" : 'src/os_win/.*' > /dev/null && - ! expr "$f" : 'src/include/extern.h' > /dev/null && - ! expr "$f" : 'src/include/os.h' > /dev/null && grep '__wt_errno' $f > $t; then echo "$f: upper-level code should not call __wt_errno" cat $t @@ -73,7 +75,7 @@ else if ! expr "$f" : 'examples/c/.*' > /dev/null && ! expr "$f" : 'ext/datasources/helium/helium.c' > /dev/null && ! expr "$f" : 'src/include/os.h' > /dev/null && - grep "%zu" $f | grep -v 'SIZET_FMT' > $t; then + egrep "%[0-9]*zu" $f | grep -v 'SIZET_FMT' > $t; then echo "$f: %zu needs to be fixed for Windows" cat $t fi @@ -138,6 +140,20 @@ else } fi + # Use of ctype functions that sign extend their arguments. + if ! expr "$f" : 'bench/.*' > /dev/null && + ! expr "$f" : 'test/csuite/.*' > /dev/null && + ! expr "$f" : 'examples/.*' > /dev/null && + ! expr "$f" : 'ext/.*' > /dev/null && + ! expr "$f" : 'src/include/ctype.i' > /dev/null; then + if egrep '(#include.*["</]ctype.h[">]|\b(is(alnum|alpha|cntrl|digit|graph|lower|print|punct|space|upper|xdigit)|to(lower|toupper))\()' $f > $t; then + test -s $t && { + echo "$f: direct use of ctype.h functions, instead of ctype.i equivalents" + cat $t + } + fi + fi + tr -cd '[:alnum:][:space:][:punct:]' < $f | unexpand | sed -e 's/){/) {/' \ diff --git a/src/third_party/wiredtiger/dist/s_typedef b/src/third_party/wiredtiger/dist/s_typedef index 233f432f0e5..b044a0e6b4b 100755 --- a/src/third_party/wiredtiger/dist/s_typedef +++ b/src/third_party/wiredtiger/dist/s_typedef @@ -44,7 +44,7 @@ build() { check() { # Complain about unused #typedefs. # List of files to search. - l=`sed -e 's,#.*,,' -e '/^$/d' -e 's,^,../,' filelist` + l=`sed -e '/^[a-z]/!d' -e 's/[ ].*$//' -e 's,^,../,' filelist` l="$l `echo ../src/utilities/*.c`" ( diff --git a/src/third_party/wiredtiger/dist/s_whitespace b/src/third_party/wiredtiger/dist/s_whitespace index 74820a4f0e9..8cf3f7dfe6f 100755 --- a/src/third_party/wiredtiger/dist/s_whitespace +++ b/src/third_party/wiredtiger/dist/s_whitespace @@ -38,7 +38,7 @@ for f in `find bench examples ext src test \ -name '*.in' -o \ -name 'Makefile.am' | sed -e '/Makefile.in/d' \ - -e '/support\/power8/d'`; do + -e '/checksum\/power8/d'`; do whitespace_and_empty_line $f done diff --git a/src/third_party/wiredtiger/dist/s_win b/src/third_party/wiredtiger/dist/s_win index 562e89f94c6..49deb348bc3 100755 --- a/src/third_party/wiredtiger/dist/s_win +++ b/src/third_party/wiredtiger/dist/s_win @@ -39,42 +39,7 @@ win_export() (echo "Building $f" && rm -f $f && cp $t $f) } -win_filelist() -{ - f='../build_win/filelist.win' - - # Discard POSIX-only and PPC-only files, add in Windows-only files. - ( - sed \ - -e '/\/os_posix\//d' \ - -e '/src\/support\/power8\/crc32.S/d' \ - -e '/src\/support\/power8\/crc32_wrapper.c/d' - - echo 'src/os_win/os_dir.c' - echo 'src/os_win/os_dlopen.c' - echo 'src/os_win/os_errno.c' - echo 'src/os_win/os_fs.c' - echo 'src/os_win/os_getenv.c' - echo 'src/os_win/os_map.c' - echo 'src/os_win/os_mtx_cond.c' - echo 'src/os_win/os_once.c' - echo 'src/os_win/os_pagesize.c' - echo 'src/os_win/os_path.c' - echo 'src/os_win/os_priv.c' - echo 'src/os_win/os_setvbuf.c' - echo 'src/os_win/os_sleep.c' - echo 'src/os_win/os_snprintf.c' - echo 'src/os_win/os_thread.c' - echo 'src/os_win/os_time.c' - echo 'src/os_win/os_vsnprintf.c' - echo 'src/os_win/os_yield.c') < filelist | sort > $t - - cmp $t $f > /dev/null 2>&1 || - (echo "Building $f" && rm -f $f && cp $t $f) -} - win_config win_export -win_filelist exit 0 diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 483e0bd3ef2..694ffc86ee4 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -81,6 +81,10 @@ class SessionStat(Stat): prefix = 'session' def __init__(self, name, desc, flags=''): Stat.__init__(self, name, SessionStat.prefix, desc, flags) +class ThreadState(Stat): + prefix = 'thread-state' + def __init__(self, name, desc, flags=''): + Stat.__init__(self, name, ThreadState.prefix, desc, flags) class TxnStat(Stat): prefix = 'transaction' def __init__(self, name, desc, flags=''): @@ -97,10 +101,20 @@ class YieldStat(Stat): ########################################## groups = {} groups['cursor'] = [CursorStat.prefix, SessionStat.prefix] -groups['evict'] = [CacheStat.prefix, ConnStat.prefix, BlockStat.prefix] +groups['evict'] = [ + BlockStat.prefix, + CacheStat.prefix, + ConnStat.prefix, + ThreadState.prefix +] groups['lsm'] = [LSMStat.prefix, TxnStat.prefix] groups['memory'] = [CacheStat.prefix, ConnStat.prefix, RecStat.prefix] -groups['system'] = [ConnStat.prefix, DhandleStat.prefix, SessionStat.prefix] +groups['system'] = [ + ConnStat.prefix, + DhandleStat.prefix, + SessionStat.prefix, + ThreadState.prefix +] ########################################## # CONNECTION statistics @@ -113,6 +127,7 @@ connection_stats = [ ConnStat('cond_auto_wait_reset', 'auto adjusting condition resets'), ConnStat('cond_wait', 'pthread mutex condition wait calls'), ConnStat('file_open', 'files currently open', 'no_clear,no_scale'), + ConnStat('fsync_io', 'total fsync I/Os'), ConnStat('memory_allocation', 'memory allocations'), ConnStat('memory_free', 'memory frees'), ConnStat('memory_grow', 'memory re-allocations'), @@ -171,6 +186,9 @@ connection_stats = [ CacheStat('cache_eviction_force', 'pages evicted because they exceeded the in-memory maximum'), CacheStat('cache_eviction_force_delete', 'pages evicted because they had chains of deleted items'), CacheStat('cache_eviction_force_fail', 'failed eviction of pages that exceeded the in-memory maximum'), + CacheStat('cache_eviction_get_ref', 'eviction calls to get a page'), + CacheStat('cache_eviction_get_ref_empty', 'eviction calls to get a page found queue empty'), + CacheStat('cache_eviction_get_ref_empty2', 'eviction calls to get a page found queue empty after locking'), CacheStat('cache_eviction_hazard', 'hazard pointer blocked page eviction'), CacheStat('cache_eviction_internal', 'internal pages evicted'), CacheStat('cache_eviction_maximum_page_size', 'maximum page size at eviction', 'no_clear,no_scale,size'), @@ -181,6 +199,8 @@ connection_stats = [ CacheStat('cache_eviction_queue_not_empty', 'eviction server candidate queue not empty when topping up'), CacheStat('cache_eviction_server_evicting', 'eviction server evicting pages'), CacheStat('cache_eviction_server_not_evicting', 'eviction server populating queue, but not evicting pages'), + CacheStat('cache_eviction_server_slept', 'eviction server slept, because we did not make progress with eviction'), + CacheStat('cache_eviction_server_toobig', 'eviction server skipped very large page'), CacheStat('cache_eviction_slow', 'eviction server unable to reach eviction goal'), CacheStat('cache_eviction_split_internal', 'internal pages split during eviction'), CacheStat('cache_eviction_split_leaf', 'leaf pages split during eviction'), @@ -188,6 +208,9 @@ connection_stats = [ CacheStat('cache_eviction_walks_active', 'files with active eviction walks', 'no_clear,no_scale,size'), CacheStat('cache_eviction_walks_started', 'files with new eviction walks started'), CacheStat('cache_eviction_worker_evicting', 'eviction worker thread evicting pages'), + CacheStat('cache_hazard_checks', 'hazard pointer check calls'), + CacheStat('cache_hazard_max', 'hazard pointer maximum array length', 'max_aggregate,no_scale'), + CacheStat('cache_hazard_walks', 'hazard pointer check entries walked'), CacheStat('cache_inmem_split', 'in-memory page splits'), CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'), CacheStat('cache_lookaside_insert', 'lookaside table insert calls'), @@ -195,6 +218,7 @@ connection_stats = [ CacheStat('cache_overhead', 'percentage overhead', 'no_clear,no_scale'), CacheStat('cache_pages_dirty', 'tracked dirty pages in the cache', 'no_clear,no_scale'), CacheStat('cache_pages_inuse', 'pages currently held in the cache', 'no_clear,no_scale'), + CacheStat('cache_pages_requested', 'pages requested from the cache'), CacheStat('cache_read', 'pages read into cache'), CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'), CacheStat('cache_write', 'pages written from cache'), @@ -247,6 +271,8 @@ connection_stats = [ LogStat('log_slot_unbuffered', 'consolidated slot unbuffered writes'), LogStat('log_sync', 'log sync operations'), LogStat('log_sync_dir', 'log sync_dir operations'), + LogStat('log_sync_dir_duration', 'log sync_dir time duration (usecs)'), + LogStat('log_sync_duration', 'log sync time duration (usecs)'), LogStat('log_write_lsn', 'log server thread advances write LSN'), LogStat('log_write_lsn_skip', 'log server thread write LSN walk skipped'), LogStat('log_writes', 'log write operations'), @@ -267,6 +293,10 @@ connection_stats = [ ########################################## TxnStat('txn_begin', 'transaction begins'), TxnStat('txn_checkpoint', 'transaction checkpoints'), + TxnStat('txn_checkpoint_fsync_post', 'transaction fsync calls for checkpoint after allocating the transaction ID'), + TxnStat('txn_checkpoint_fsync_post_duration', 'transaction fsync duration for checkpoint after allocating the transaction ID (usecs)'), + TxnStat('txn_checkpoint_fsync_pre', 'transaction fsync calls for checkpoint before allocating the transaction ID'), + TxnStat('txn_checkpoint_fsync_pre_duration', 'transaction fsync duration for checkpoint before allocating the transaction ID (usecs)'), TxnStat('txn_checkpoint_generation', 'transaction checkpoint generation', 'no_clear,no_scale'), TxnStat('txn_checkpoint_running', 'transaction checkpoint currently running', 'no_clear,no_scale'), TxnStat('txn_checkpoint_time_max', 'transaction checkpoint max time (msecs)', 'no_clear,no_scale'), @@ -319,6 +349,13 @@ connection_stats = [ CursorStat('cursor_update', 'cursor update calls'), ########################################## + # Thread State statistics + ########################################## + ThreadState('fsync_active', 'active filesystem fsync calls','no_clear,no_scale'), + ThreadState('read_active', 'active filesystem read calls','no_clear,no_scale'), + ThreadState('write_active', 'active filesystem write calls','no_clear,no_scale'), + + ########################################## # Yield statistics ########################################## YieldStat('page_busy_blocked', 'page acquire busy blocked'), @@ -414,7 +451,6 @@ dsrc_stats = [ ########################################## # Cache and eviction statistics ########################################## - CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_read', 'bytes read into cache', 'size'), CacheStat('cache_bytes_write', 'bytes written from cache', 'size'), CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'), @@ -429,6 +465,7 @@ dsrc_stats = [ CacheStat('cache_inmem_split', 'in-memory page splits'), CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'), CacheStat('cache_overflow_value', 'overflow values cached in memory', 'no_scale'), + CacheStat('cache_pages_requested', 'pages requested from the cache'), CacheStat('cache_read', 'pages read into cache'), CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'), CacheStat('cache_read_overflow', 'overflow pages read into cache'), @@ -477,9 +514,11 @@ dsrc_stats = sorted(dsrc_stats, key=attrgetter('desc')) # Cursor Join statistics ########################################## join_stats = [ - JoinStat('accesses', 'accesses'), - JoinStat('actual_count', 'actual count of items'), JoinStat('bloom_false_positive', 'bloom filter false positives'), + JoinStat('bloom_insert', 'items inserted into a bloom filter'), + JoinStat('iterated', 'items iterated'), + JoinStat('main_access', 'accesses to the main table'), + JoinStat('membership_check', 'checks that conditions of membership are satisfied'), ] join_stats = sorted(join_stats, key=attrgetter('desc')) diff --git a/src/third_party/wiredtiger/examples/c/Makefile.am b/src/third_party/wiredtiger/examples/c/Makefile.am index 72fd98aff7b..d5305eec5c8 100644 --- a/src/third_party/wiredtiger/examples/c/Makefile.am +++ b/src/third_party/wiredtiger/examples/c/Makefile.am @@ -7,7 +7,6 @@ noinst_PROGRAMS = \ ex_async \ ex_backup \ ex_call_center \ - ex_config \ ex_config_parse \ ex_cursor \ ex_data_source \ @@ -15,6 +14,7 @@ noinst_PROGRAMS = \ ex_event_handler \ ex_extending \ ex_extractor \ + ex_file_system \ ex_hello \ ex_log \ ex_pack \ @@ -26,6 +26,7 @@ noinst_PROGRAMS = \ ex_thread ex_encrypt_LDFLAGS = -rdynamic +ex_file_system_LDFLAGS = -rdynamic # The examples can be run with no arguments as simple smoke tests TESTS = $(noinst_PROGRAMS) diff --git a/src/third_party/wiredtiger/examples/c/ex_access.c b/src/third_party/wiredtiger/examples/c/ex_access.c index cc42982617b..d7f3cc557ad 100644 --- a/src/third_party/wiredtiger/examples/c/ex_access.c +++ b/src/third_party/wiredtiger/examples/c/ex_access.c @@ -60,8 +60,8 @@ main(void) if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0 || (ret = conn->open_session(conn, NULL, NULL, &session)) != 0) { fprintf(stderr, "Error connecting to %s: %s\n", - home, wiredtiger_strerror(ret)); - return (ret); + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); } /*! [access example connection] */ @@ -95,5 +95,5 @@ main(void) ret = conn->close(conn, NULL); /*! [access example close] */ - return (ret); + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c index 1c036b75461..dd807922c10 100644 --- a/src/third_party/wiredtiger/examples/c/ex_all.c +++ b/src/third_party/wiredtiger/examples/c/ex_all.c @@ -1037,6 +1037,13 @@ backup(WT_SESSION *session) ret = cursor->close(cursor); /*! [backup]*/ + /*! [incremental backup]*/ + /* Open the backup data source for incremental backup. */ + ret = session->open_cursor( + session, "backup:", NULL, "target=(\"log:\")", &cursor); + /*! [incremental backup]*/ + ret = cursor->close(cursor); + /*! [backup of a checkpoint]*/ ret = session->checkpoint(session, "drop=(from=June01),name=June01"); /*! [backup of a checkpoint]*/ @@ -1207,5 +1214,5 @@ main(void) /*! [Get the WiredTiger library version #2] */ } - return (ret); + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_async.c b/src/third_party/wiredtiger/examples/c/ex_async.c index ecdbd2f4fea..f7531a5c3d8 100644 --- a/src/third_party/wiredtiger/examples/c/ex_async.c +++ b/src/third_party/wiredtiger/examples/c/ex_async.c @@ -31,7 +31,9 @@ #include <errno.h> #include <inttypes.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> + #ifndef _WIN32 #include <unistd.h> #else @@ -48,7 +50,6 @@ #define ATOMIC_ADD(v, val) __sync_add_and_fetch(&(v), val) #endif -static const char * const home = NULL; static int global_error = 0; /*! [async example callback implementation] */ @@ -120,8 +121,19 @@ main(void) WT_CONNECTION *conn; WT_SESSION *session; int i, ret; + const char *home; char k[MAX_KEYS][16], v[MAX_KEYS][16]; + /* + * Create a clean test directory for this run of the test program if the + * environment variable isn't already set (as is done by make check). + */ + if (getenv("WIREDTIGER_HOME") == NULL) { + home = "WT_HOME"; + ret = system("rm -rf WT_HOME && mkdir WT_HOME"); + } else + home = NULL; + /*! [async example connection] */ ret = wiredtiger_open(home, NULL, "create,cache_size=100MB," @@ -148,7 +160,7 @@ main(void) if (ret == EBUSY) sleep(1); else - return (ret); + return (EXIT_FAILURE); } /*! [async handle allocation] */ @@ -198,7 +210,7 @@ main(void) if (ret == EBUSY) sleep(1); else - return (ret); + return (EXIT_FAILURE); } /*! [async search] */ @@ -220,5 +232,5 @@ main(void) printf("Searched for %" PRIu32 " keys\n", ex_asynckeys.num_keys); - return (ret); + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_backup.c b/src/third_party/wiredtiger/examples/c/ex_backup.c index 12eeaa4b7c3..0697cbb3458 100644 --- a/src/third_party/wiredtiger/examples/c/ex_backup.c +++ b/src/third_party/wiredtiger/examples/c/ex_backup.c @@ -273,12 +273,12 @@ main(void) snprintf(cmd_buf, sizeof(cmd_buf), "rm -rf %s && mkdir %s", home, home); if ((ret = system(cmd_buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret); - return (ret); + return (EXIT_FAILURE); } if ((ret = wiredtiger_open(home, NULL, CONN_CONFIG, &wt_conn)) != 0) { fprintf(stderr, "Error connecting to %s: %s\n", home, wiredtiger_strerror(ret)); - return (ret); + return (EXIT_FAILURE); } ret = setup_directories(); @@ -320,7 +320,9 @@ main(void) * comparison between the incremental and original. */ ret = wt_conn->close(wt_conn, NULL); + printf("Final comparison: dumping and comparing data\n"); ret = compare_backups(0); - return (ret); + + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_call_center.c b/src/third_party/wiredtiger/examples/c/ex_call_center.c index d401507d165..cd53a1cdaf9 100644 --- a/src/third_party/wiredtiger/examples/c/ex_call_center.c +++ b/src/third_party/wiredtiger/examples/c/ex_call_center.c @@ -107,8 +107,8 @@ main(void) if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0) { fprintf(stderr, "Error connecting to %s: %s\n", - home, wiredtiger_strerror(ret)); - return (1); + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); } /* Note: further error checking omitted for clarity. */ @@ -245,5 +245,5 @@ main(void) ret = conn->close(conn, NULL); - return (ret); + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_config_parse.c b/src/third_party/wiredtiger/examples/c/ex_config_parse.c index be3c78bedd4..40508b38204 100644 --- a/src/third_party/wiredtiger/examples/c/ex_config_parse.c +++ b/src/third_party/wiredtiger/examples/c/ex_config_parse.c @@ -32,6 +32,7 @@ #include <inttypes.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> #include <wiredtiger.h> @@ -51,12 +52,12 @@ main(void) NULL, config_string, strlen(config_string), &parser)) != 0) { fprintf(stderr, "Error creating configuration parser: %s\n", wiredtiger_strerror(ret)); - return (ret); + return (EXIT_FAILURE); } if ((ret = parser->close(parser)) != 0) { fprintf(stderr, "Error closing configuration parser: %s\n", wiredtiger_strerror(ret)); - return (ret); + return (EXIT_FAILURE); } /*! [Create a configuration parser] */ @@ -64,7 +65,7 @@ main(void) NULL, config_string, strlen(config_string), &parser)) != 0) { fprintf(stderr, "Error creating configuration parser: %s\n", wiredtiger_strerror(ret)); - return (ret); + return (EXIT_FAILURE); } { @@ -76,7 +77,7 @@ main(void) if ((ret = parser->get(parser, "page_size", &v)) != 0) { fprintf(stderr, "page_size configuration: %s", wiredtiger_strerror(ret)); - return (ret); + return (EXIT_FAILURE); } my_page_size = v.val; /*! [get] */ @@ -91,7 +92,7 @@ main(void) NULL, config_string, strlen(config_string), &parser)) != 0) { fprintf(stderr, "Error creating configuration parser: %s\n", wiredtiger_strerror(ret)); - return (ret); + return (EXIT_FAILURE); } /*! [next] */ /* @@ -112,7 +113,7 @@ main(void) NULL, config_string, strlen(config_string), &parser)) != 0) { fprintf(stderr, "Error creating configuration parser: %s\n", wiredtiger_strerror(ret)); - return (ret); + return (EXIT_FAILURE); } /*! [nested get] */ @@ -125,7 +126,7 @@ main(void) if ((ret = parser->get(parser, "log.file_max", &v)) != 0) { fprintf(stderr, "log.file_max configuration: %s", wiredtiger_strerror(ret)); - return (ret); + return (EXIT_FAILURE); } printf("log file max: %" PRId64 "\n", v.val); /*! [nested get] */ @@ -135,7 +136,7 @@ main(void) NULL, config_string, strlen(config_string), &parser)) != 0) { fprintf(stderr, "Error creating configuration parser: %s\n", wiredtiger_strerror(ret)); - return (ret); + return (EXIT_FAILURE); } /*! [nested traverse] */ { @@ -150,11 +151,10 @@ main(void) "Error creating nested configuration " "parser: %s\n", wiredtiger_strerror(ret)); - ret = parser->close(parser); - return (ret); + break; } - while ((ret = sub_parser->next( - sub_parser, &k, &v)) == 0) + while ((ret = + sub_parser->next(sub_parser, &k, &v)) == 0) printf("\t%.*s\n", (int)k.len, k.str); ret = sub_parser->close(sub_parser); } @@ -163,5 +163,5 @@ main(void) ret = parser->close(parser); } - return (ret); + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_cursor.c b/src/third_party/wiredtiger/examples/c/ex_cursor.c index 67c945ebc0b..b8ed6ab169d 100644 --- a/src/third_party/wiredtiger/examples/c/ex_cursor.c +++ b/src/third_party/wiredtiger/examples/c/ex_cursor.c @@ -181,12 +181,12 @@ main(void) if ((ret = wiredtiger_open( home, NULL, "create,statistics=(fast)", &conn)) != 0) fprintf(stderr, "Error connecting to %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); /* Open a session for the current thread's work. */ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) fprintf(stderr, "Error opening a session on %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); ret = session->create(session, "table:world", "key_format=r,value_format=5sii," @@ -220,9 +220,11 @@ main(void) ret = cursor->close(cursor); /* Note: closing the connection implicitly closes open session(s). */ - if ((ret = conn->close(conn, NULL)) != 0) + if ((ret = conn->close(conn, NULL)) != 0) { fprintf(stderr, "Error closing %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } - return (ret); + return (EXIT_SUCCESS); } diff --git a/src/third_party/wiredtiger/examples/c/ex_data_source.c b/src/third_party/wiredtiger/examples/c/ex_data_source.c index dd2b835e6ae..6ed80dfcf19 100644 --- a/src/third_party/wiredtiger/examples/c/ex_data_source.c +++ b/src/third_party/wiredtiger/examples/c/ex_data_source.c @@ -58,6 +58,17 @@ my_create(WT_DATA_SOURCE *dsrc, WT_SESSION *session, (void)config; { +#if !defined(ERROR_BAD_COMMAND) +#define ERROR_BAD_COMMAND 37 +#endif + /*! [WT_EXTENSION_API map_windows_error] */ + int posix_error = + wt_api->map_windows_error(wt_api, session, ERROR_BAD_COMMAND); + /*! [WT_EXTENSION_API map_windows_error] */ + (void)posix_error; + } + + { const char *msg = "string"; /*! [WT_EXTENSION_API err_printf] */ (void)wt_api->err_printf( @@ -667,7 +678,7 @@ main(void) (void)wt_api->msg_printf(wt_api, NULL, "configuration complete"); /*! [WT_EXTENSION_API default_session] */ - (void)conn->close(conn, NULL); + ret = conn->close(conn, NULL); - return (ret); + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_encrypt.c b/src/third_party/wiredtiger/examples/c/ex_encrypt.c index c53a61c92ea..3b3323bc091 100644 --- a/src/third_party/wiredtiger/examples/c/ex_encrypt.c +++ b/src/third_party/wiredtiger/examples/c/ex_encrypt.c @@ -51,7 +51,7 @@ __declspec(dllexport) #endif int add_my_encryptors(WT_CONNECTION *connection); -static const char *home = NULL; +static const char *home; #define SYS_KEYID "system" #define SYS_PW "system_password" @@ -122,8 +122,8 @@ do_rotate(char *buf, size_t len, int rotn) * Now rotate */ for (i = 0; i < len; i++) - if (isalpha(buf[i])) { - if (islower(buf[i])) + if (isalpha((unsigned char)buf[i])) { + if (islower((unsigned char)buf[i])) buf[i] = ((buf[i] - 'a') + rotn) % 26 + 'a'; else buf[i] = ((buf[i] - 'A') + rotn) % 26 + 'A'; @@ -587,6 +587,8 @@ main(void) printf("Verified key %s; value %s\n", key1, val1); } + ret = conn->close(conn, NULL); - return (ret); + + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_event_handler.c b/src/third_party/wiredtiger/examples/c/ex_event_handler.c index d1e08edb04d..7122e71882e 100644 --- a/src/third_party/wiredtiger/examples/c/ex_event_handler.c +++ b/src/third_party/wiredtiger/examples/c/ex_event_handler.c @@ -68,7 +68,7 @@ handle_wiredtiger_error(WT_EVENT_HANDLER *handler, /* Report the error on the console. */ fprintf(stderr, "app_id %s, thread context %p, error %d, message %s\n", - custom_handler->app_id, session, error, message); + custom_handler->app_id, (void *)session, error, message); return (0); } @@ -83,7 +83,8 @@ handle_wiredtiger_message( { /* Cast the handler back to our custom handler. */ printf("app id %s, thread context %p, message %s\n", - ((CUSTOM_EVENT_HANDLER *)handler)->app_id, session, message); + ((CUSTOM_EVENT_HANDLER *)handler)->app_id, + (void *)session, message); return (0); } @@ -111,10 +112,10 @@ config_event_handler(void) /*! [Configure event_handler] */ /* Make an invalid API call, to ensure the event handler works. */ - (void)conn->open_session(conn, NULL, "isolation=invalid", &session); + printf("ex_event_handler: expect an error message to follow\n"); + ret = conn->open_session(conn, NULL, "isolation=invalid", &session); - if (ret == 0) - ret = conn->close(conn, NULL); + ret = conn->close(conn, NULL); return (ret); } @@ -122,6 +123,8 @@ config_event_handler(void) int main(void) { + int ret; + /* * Create a clean test directory for this run of the test program if the * environment variable isn't already set (as is done by make check). @@ -132,5 +135,7 @@ main(void) } else home = NULL; - return (config_event_handler()); + ret = config_event_handler(); + + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_extending.c b/src/third_party/wiredtiger/examples/c/ex_extending.c index 4d265ae1d2b..f276cdd3e1e 100644 --- a/src/third_party/wiredtiger/examples/c/ex_extending.c +++ b/src/third_party/wiredtiger/examples/c/ex_extending.c @@ -108,7 +108,7 @@ main(void) /* Open a connection to the database, creating it if necessary. */ if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0) fprintf(stderr, "Error connecting to %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); /*! [add collator nocase] */ ret = conn->add_collator(conn, "nocase", &nocasecoll, NULL); @@ -119,15 +119,12 @@ main(void) /* Open a session for the current thread's work. */ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) fprintf(stderr, "Error opening a session on %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); - /* XXX Do some work... */ + /* Do some work... */ - /* Note: closing the connection implicitly closes open session(s). */ - if ((ret = conn->close(conn, NULL)) != 0) + ret = conn->close(conn, NULL); /*! [add collator prefix10] */ - fprintf(stderr, "Error closing %s: %s\n", - home, wiredtiger_strerror(ret)); - return (ret); + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_extractor.c b/src/third_party/wiredtiger/examples/c/ex_extractor.c index 8623f4759fc..f9d7af4af0f 100644 --- a/src/third_party/wiredtiger/examples/c/ex_extractor.c +++ b/src/third_party/wiredtiger/examples/c/ex_extractor.c @@ -283,5 +283,5 @@ main(void) ret = conn->close(conn, NULL); - return (ret); + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_file_system.c b/src/third_party/wiredtiger/examples/c/ex_file_system.c new file mode 100644 index 00000000000..77e8f40480b --- /dev/null +++ b/src/third_party/wiredtiger/examples/c/ex_file_system.c @@ -0,0 +1,975 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * ex_file_system.c + * demonstrates how to use the custom file system interface + */ + +/* + * Include WiredTiger internal functions: we need architecture portable locking + * in this example, and we use the TAILQ_XXX functions to keep the code simple. + * + * Application-writers SHOULD NOT INCLUDE "wt_internal.h", the public WiredTiger + * include files should be used instead: + * + * #include <wiredtiger.h> + * #include <wiredtiger_ext.h> + */ +#include "wt_internal.h" + +/* + * This example code uses internal WiredTiger functions for portable locking. + * We use #defines to clarify the meaning and ignore errors to simplify the + * code. + * + * Application writers SHOULD NOT COPY THIS LOCKING CODE, it's special-case code + * to make this example portable across platforms. + */ +#define ALLOCATE_FILE_SYSTEM_LOCK(demo_fs) \ + (void)__wt_spin_init(NULL, &(demo_fs)->lock, "demo file handle lock") +#define DESTROY_FILE_SYSTEM_LOCK(wt_session, demo_fs) \ + __wt_spin_destroy((WT_SESSION_IMPL *)(wt_session), &(demo_fs)->lock) +#define LOCK_FILE_SYSTEM(wt_session, demo_fs) \ + __wt_spin_lock((WT_SESSION_IMPL *)(wt_session), &(demo_fs)->lock) +#define UNLOCK_FILE_SYSTEM(wt_session, demo_fs) \ + __wt_spin_unlock( \ + (WT_SESSION_IMPL *)(wt_session), &(demo_fs)->lock) + +/* + * Example file system implementation, using memory buffers to represent files. + */ +typedef struct { + WT_FILE_SYSTEM iface; + + /* + * WiredTiger performs schema and I/O operations in parallel, all file + * system and file handle access must be thread-safe. This example uses + * a single, global file system lock for simplicity; real applications + * might require finer granularity, for example, a single lock for the + * file system handle list and per-handle locks serializing I/O. + */ + WT_SPINLOCK lock; /* Lock */ + + int opened_file_count; + int opened_unique_file_count; + int closed_file_count; + int read_ops; + int write_ops; + + /* Queue of file handles */ + TAILQ_HEAD(demo_file_handle_qh, demo_file_handle) fileq; + + WT_EXTENSION_API *wtext; /* Extension functions */ + +} DEMO_FILE_SYSTEM; + +typedef struct demo_file_handle { + WT_FILE_HANDLE iface; + + /* + * Add custom file handle fields after the interface. + */ + DEMO_FILE_SYSTEM *demo_fs; /* Enclosing file system */ + + TAILQ_ENTRY(demo_file_handle) q; /* Queue of handles */ + uint32_t ref; /* Reference count */ + + char *buf; /* In-memory contents */ + size_t bufsize; /* In-memory buffer size */ + + size_t size; /* Read/write data size */ +} DEMO_FILE_HANDLE; + +/* + * Extension initialization function. + */ +#ifdef _WIN32 +/* + * Explicitly export this function so it is visible when loading extensions. + */ +__declspec(dllexport) +#endif +int demo_file_system_create(WT_CONNECTION *, WT_CONFIG_ARG *); + +/* + * Forward function declarations for file system API implementation + */ +static int demo_fs_open(WT_FILE_SYSTEM *, + WT_SESSION *, const char *, WT_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **); +static int demo_fs_directory_list(WT_FILE_SYSTEM *, WT_SESSION *, + const char *, const char *, char ***, uint32_t *); +static int demo_fs_directory_list_free( + WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t); +static int demo_fs_directory_sync(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *directory); +static int demo_fs_exist(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *); +static int demo_fs_remove(WT_FILE_SYSTEM *, WT_SESSION *, const char *); +static int demo_fs_rename( + WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *); +static int demo_fs_size( + WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *); +static int demo_fs_terminate(WT_FILE_SYSTEM *, WT_SESSION *); + +/* + * Forward function declarations for file handle API implementation + */ +static int demo_file_close(WT_FILE_HANDLE *, WT_SESSION *); +static int demo_file_lock(WT_FILE_HANDLE *, WT_SESSION *, bool); +static int demo_file_read( + WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, void *); +static int demo_file_size(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *); +static int demo_file_sync(WT_FILE_HANDLE *, WT_SESSION *); +static int demo_file_truncate(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t); +static int demo_file_write( + WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, const void *); + +/* + * Forward function declarations for internal functions + */ +static int demo_handle_remove(WT_SESSION *, DEMO_FILE_HANDLE *); +static DEMO_FILE_HANDLE *demo_handle_search(WT_FILE_SYSTEM *, const char *); + +#define DEMO_FILE_SIZE_INCREMENT 32768 + +/* + * string_match -- + * Return if a string matches a byte string of len bytes. + */ +static bool +byte_string_match(const char *str, const char *bytes, size_t len) +{ + return (strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0'); +} + +/* + * demo_file_system_create -- + * Initialization point for demo file system + */ +int +demo_file_system_create(WT_CONNECTION *conn, WT_CONFIG_ARG *config) +{ + DEMO_FILE_SYSTEM *demo_fs; + WT_CONFIG_ITEM k, v; + WT_CONFIG_PARSER *config_parser; + WT_EXTENSION_API *wtext; + WT_FILE_SYSTEM *file_system; + int ret = 0; + + wtext = conn->get_extension_api(conn); + + if ((demo_fs = calloc(1, sizeof(DEMO_FILE_SYSTEM))) == NULL) { + (void)wtext->err_printf(wtext, NULL, + "demo_file_system_create: %s", + wtext->strerror(wtext, NULL, ENOMEM)); + return (ENOMEM); + } + demo_fs->wtext = wtext; + file_system = (WT_FILE_SYSTEM *)demo_fs; + + /* + * Applications may have their own configuration information to pass to + * the underlying filesystem implementation. See the main function for + * the setup of those configuration strings; here we parse configuration + * information as passed in by main, through WiredTiger. + * + * Retrieve our configuration information, the "config" value. + */ + if ((ret = wtext->config_get(wtext, NULL, config, "config", &v)) != 0) { + (void)wtext->err_printf(wtext, NULL, + "WT_EXTENSION_API.config_get: config: %s", + wtext->strerror(wtext, NULL, ret)); + goto err; + } + + /* Open a WiredTiger parser on the "config" value. */ + if ((ret = wtext->config_parser_open( + wtext, NULL, v.str, v.len, &config_parser)) != 0) { + (void)wtext->err_printf(wtext, NULL, + "WT_EXTENSION_API.config_parser_open: config: %s", + wtext->strerror(wtext, NULL, ret)); + goto err; + } + + /* Step through our configuration values. */ + printf("Custom file system configuration\n"); + while ((ret = config_parser->next(config_parser, &k, &v)) == 0) { + if (byte_string_match("config_string", k.str, k.len)) { + printf("\t" "key %.*s=\"%.*s\"\n", + (int)k.len, k.str, (int)v.len, v.str); + continue; + } + if (byte_string_match("config_value", k.str, k.len)) { + printf("\t" "key %.*s=%" PRId64 "\n", + (int)k.len, k.str, v.val); + continue; + } + ret = EINVAL; + (void)wtext->err_printf(wtext, NULL, + "WT_CONFIG_PARSER.next: unexpected configuration " + "information: %.*s=%.*s: %s", + (int)k.len, k.str, (int)v.len, v.str, + wtext->strerror(wtext, NULL, ret)); + goto err; + } + + /* Check for expected parser termination and close the parser. */ + if (ret != WT_NOTFOUND) { + (void)wtext->err_printf(wtext, NULL, + "WT_CONFIG_PARSER.next: config: %s", + wtext->strerror(wtext, NULL, ret)); + goto err; + } + if ((ret = config_parser->close(config_parser)) != 0) { + (void)wtext->err_printf(wtext, NULL, + "WT_CONFIG_PARSER.close: config: %s", + wtext->strerror(wtext, NULL, ret)); + goto err; + } + + ALLOCATE_FILE_SYSTEM_LOCK(demo_fs); + + /* Initialize the in-memory jump table. */ + file_system->fs_directory_list = demo_fs_directory_list; + file_system->fs_directory_list_free = demo_fs_directory_list_free; + file_system->fs_directory_sync = demo_fs_directory_sync; + file_system->fs_exist = demo_fs_exist; + file_system->fs_open_file = demo_fs_open; + file_system->fs_remove = demo_fs_remove; + file_system->fs_rename = demo_fs_rename; + file_system->fs_size = demo_fs_size; + file_system->terminate = demo_fs_terminate; + + if ((ret = conn->set_file_system(conn, file_system, NULL)) != 0) { + (void)wtext->err_printf(wtext, NULL, + "WT_CONNECTION.set_file_system: %s", + wtext->strerror(wtext, NULL, ret)); + goto err; + } + return (0); + +err: free(demo_fs); + /* An error installing the file system is fatal. */ + exit(1); +} + +/* + * demo_fs_open -- + * fopen for our demo file system + */ +static int +demo_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, + const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + WT_FILE_HANDLE **file_handlep) +{ + DEMO_FILE_HANDLE *demo_fh; + DEMO_FILE_SYSTEM *demo_fs; + WT_EXTENSION_API *wtext; + WT_FILE_HANDLE *file_handle; + int ret = 0; + + (void)file_type; /* Unused */ + (void)flags; /* Unused */ + + *file_handlep = NULL; + + demo_fs = (DEMO_FILE_SYSTEM *)file_system; + demo_fh = NULL; + wtext = demo_fs->wtext; + + LOCK_FILE_SYSTEM(session, demo_fs); + ++demo_fs->opened_file_count; + + /* + * First search the file queue, if we find it, assert there's only a + * single reference, we only support a single handle on any file. + */ + demo_fh = demo_handle_search(file_system, name); + if (demo_fh != NULL) { + if (demo_fh->ref != 0) { + (void)wtext->err_printf(wtext, session, + "demo_fs_open: %s: file already open", name); + ret = EBUSY; + goto err; + } + + demo_fh->ref = 1; + + *file_handlep = (WT_FILE_HANDLE *)demo_fh; + + UNLOCK_FILE_SYSTEM(session, demo_fs); + return (0); + } + + /* The file hasn't been opened before, create a new one. */ + if ((demo_fh = calloc(1, sizeof(DEMO_FILE_HANDLE))) == NULL) { + ret = ENOMEM; + goto err; + } + + /* Initialize private information. */ + demo_fh->demo_fs = demo_fs; + demo_fh->ref = 1; + if ((demo_fh->buf = calloc(1, DEMO_FILE_SIZE_INCREMENT)) == NULL) { + ret = ENOMEM; + goto err; + } + demo_fh->bufsize = DEMO_FILE_SIZE_INCREMENT; + demo_fh->size = 0; + + /* Initialize public information. */ + file_handle = (WT_FILE_HANDLE *)demo_fh; + if ((file_handle->name = strdup(name)) == NULL) { + ret = ENOMEM; + goto err; + } + + /* + * Setup the function call table for our custom file system. Set the + * function pointer to NULL where our implementation doesn't support + * the functionality. + */ + file_handle->close = demo_file_close; + file_handle->fh_advise = NULL; + file_handle->fh_allocate = NULL; + file_handle->fh_allocate_nolock = NULL; + file_handle->fh_lock = demo_file_lock; + file_handle->fh_map = NULL; + file_handle->fh_map_discard = NULL; + file_handle->fh_map_preload = NULL; + file_handle->fh_unmap = NULL; + file_handle->fh_read = demo_file_read; + file_handle->fh_size = demo_file_size; + file_handle->fh_sync = demo_file_sync; + file_handle->fh_sync_nowait = NULL; + file_handle->fh_truncate = demo_file_truncate; + file_handle->fh_write = demo_file_write; + + TAILQ_INSERT_HEAD(&demo_fs->fileq, demo_fh, q); + ++demo_fs->opened_unique_file_count; + + *file_handlep = file_handle; + + if (0) { +err: free(demo_fh->buf); + free(demo_fh); + } + + UNLOCK_FILE_SYSTEM(session, demo_fs); + return (ret); +} + +/* + * demo_fs_directory_list -- + * Return a list of files in a given sub-directory. + */ +static int +demo_fs_directory_list(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *directory, + const char *prefix, char ***dirlistp, uint32_t *countp) +{ + DEMO_FILE_HANDLE *demo_fh; + DEMO_FILE_SYSTEM *demo_fs; + size_t len, prefix_len; + uint32_t allocated, count; + int ret = 0; + char *name, **entries; + + demo_fs = (DEMO_FILE_SYSTEM *)file_system; + + *dirlistp = NULL; + *countp = 0; + + entries = NULL; + allocated = count = 0; + len = strlen(directory); + prefix_len = prefix == NULL ? 0 : strlen(prefix); + + LOCK_FILE_SYSTEM(session, demo_fs); + TAILQ_FOREACH(demo_fh, &demo_fs->fileq, q) { + name = demo_fh->iface.name; + if (strncmp(name, directory, len) != 0 || + (prefix != NULL && strncmp(name, prefix, prefix_len) != 0)) + continue; + + /* + * Increase the list size in groups of 10, it doesn't + * matter if the list is a bit longer than necessary. + */ + if (count >= allocated) { + entries = realloc( + entries, (allocated + 10) * sizeof(char *)); + if (entries == NULL) { + ret = ENOMEM; + goto err; + } + memset(entries + allocated * sizeof(char *), + 0, 10 * sizeof(char *)); + allocated += 10; + } + entries[count++] = strdup(name); + } + + *dirlistp = entries; + *countp = count; + +err: UNLOCK_FILE_SYSTEM(session, demo_fs); + if (ret == 0) + return (0); + + if (entries != NULL) { + while (count > 0) + free(entries[--count]); + free(entries); + } + + return (ret); +} + +/* + * demo_fs_directory_list_free -- + * Free memory allocated by demo_fs_directory_list. + */ +static int +demo_fs_directory_list_free(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, char **dirlist, uint32_t count) +{ + (void)file_system; + (void)session; + + if (dirlist != NULL) { + while (count > 0) + free(dirlist[--count]); + free(dirlist); + } + return (0); +} + +/* + * demo_fs_directory_sync -- + * Directory sync for our demo file system, which is a no-op. + */ +static int +demo_fs_directory_sync(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *directory) +{ + (void)file_system; /* Unused */ + (void)session; /* Unused */ + (void)directory; /* Unused */ + + return (0); +} + +/* + * demo_fs_exist -- + * Return if the file exists. + */ +static int +demo_fs_exist(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *name, bool *existp) +{ + DEMO_FILE_SYSTEM *demo_fs; + + demo_fs = (DEMO_FILE_SYSTEM *)file_system; + + LOCK_FILE_SYSTEM(session, demo_fs); + *existp = demo_handle_search(file_system, name) != NULL; + UNLOCK_FILE_SYSTEM(session, demo_fs); + + return (0); +} + +/* + * demo_fs_remove -- + * POSIX remove. + */ +static int +demo_fs_remove( + WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name) +{ + DEMO_FILE_SYSTEM *demo_fs; + DEMO_FILE_HANDLE *demo_fh; + int ret = 0; + + demo_fs = (DEMO_FILE_SYSTEM *)file_system; + + ret = ENOENT; + LOCK_FILE_SYSTEM(session, demo_fs); + if ((demo_fh = demo_handle_search(file_system, name)) != NULL) + ret = demo_handle_remove(session, demo_fh); + UNLOCK_FILE_SYSTEM(session, demo_fs); + + return (ret); +} + +/* + * demo_fs_rename -- + * POSIX rename. + */ +static int +demo_fs_rename(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *from, const char *to) +{ + DEMO_FILE_HANDLE *demo_fh; + DEMO_FILE_SYSTEM *demo_fs; + char *copy; + int ret = 0; + + demo_fs = (DEMO_FILE_SYSTEM *)file_system; + + LOCK_FILE_SYSTEM(session, demo_fs); + if ((demo_fh = demo_handle_search(file_system, from)) == NULL) + ret = ENOENT; + else if ((copy = strdup(to)) == NULL) + ret = ENOMEM; + else { + free(demo_fh->iface.name); + demo_fh->iface.name = copy; + } + UNLOCK_FILE_SYSTEM(session, demo_fs); + return (ret); +} + +/* + * demo_fs_size -- + * Get the size of a file in bytes, by file name. + */ +static int +demo_fs_size(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *name, wt_off_t *sizep) +{ + DEMO_FILE_SYSTEM *demo_fs; + DEMO_FILE_HANDLE *demo_fh; + int ret = 0; + + demo_fs = (DEMO_FILE_SYSTEM *)file_system; + + ret = ENOENT; + LOCK_FILE_SYSTEM(session, demo_fs); + if ((demo_fh = demo_handle_search(file_system, name)) != NULL) + ret = demo_file_size((WT_FILE_HANDLE *)demo_fh, session, sizep); + UNLOCK_FILE_SYSTEM(session, demo_fs); + + return (ret); +} + +/* + * demo_fs_terminate -- + * Discard any resources on termination + */ +static int +demo_fs_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *session) +{ + DEMO_FILE_HANDLE *demo_fh; + DEMO_FILE_SYSTEM *demo_fs; + int ret = 0, tret; + + demo_fs = (DEMO_FILE_SYSTEM *)file_system; + + while ((demo_fh = TAILQ_FIRST(&demo_fs->fileq)) != NULL) + if ((tret = + demo_handle_remove(session, demo_fh)) != 0 && ret == 0) + ret = tret; + + printf("Custom file system\n"); + printf("\t%d unique file opens\n", demo_fs->opened_unique_file_count); + printf("\t%d files opened\n", demo_fs->opened_file_count); + printf("\t%d files closed\n", demo_fs->closed_file_count); + printf("\t%d reads, %d writes\n", + demo_fs->read_ops, demo_fs->write_ops); + + DESTROY_FILE_SYSTEM_LOCK(session, demo_fs); + free(demo_fs); + + return (ret); +} + +/* + * demo_file_close -- + * ANSI C close. + */ +static int +demo_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *session) +{ + DEMO_FILE_HANDLE *demo_fh; + DEMO_FILE_SYSTEM *demo_fs; + + demo_fh = (DEMO_FILE_HANDLE *)file_handle; + demo_fs = demo_fh->demo_fs; + + LOCK_FILE_SYSTEM(session, demo_fs); + if (--demo_fh->ref == 0) + ++demo_fs->closed_file_count; + UNLOCK_FILE_SYSTEM(session, demo_fs); + + return (0); +} + +/* + * demo_file_lock -- + * Lock/unlock a file. + */ +static int +demo_file_lock(WT_FILE_HANDLE *file_handle, WT_SESSION *session, bool lock) +{ + /* Locks are always granted. */ + (void)file_handle; /* Unused */ + (void)session; /* Unused */ + (void)lock; /* Unused */ + return (0); +} + +/* + * demo_file_read -- + * POSIX pread. + */ +static int +demo_file_read(WT_FILE_HANDLE *file_handle, + WT_SESSION *session, wt_off_t offset, size_t len, void *buf) +{ + DEMO_FILE_HANDLE *demo_fh; + DEMO_FILE_SYSTEM *demo_fs; + WT_EXTENSION_API *wtext; + size_t off; + int ret = 0; + + demo_fh = (DEMO_FILE_HANDLE *)file_handle; + demo_fs = demo_fh->demo_fs; + wtext = demo_fs->wtext; + off = (size_t)offset; + + LOCK_FILE_SYSTEM(session, demo_fs); + ++demo_fs->read_ops; + if (off < demo_fh->size) { + if (len > demo_fh->size - off) + len = demo_fh->size - off; + memcpy(buf, (uint8_t *)demo_fh->buf + off, len); + } else + ret = EIO; /* EOF */ + UNLOCK_FILE_SYSTEM(session, demo_fs); + if (ret == 0) + return (0); + + (void)wtext->err_printf(wtext, session, + "%s: handle-read: failed to read %zu bytes at offset %zu: %s", + demo_fh->iface.name, len, off, wtext->strerror(wtext, NULL, ret)); + return (ret); +} + +/* + * demo_file_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +demo_file_size( + WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t *sizep) +{ + DEMO_FILE_HANDLE *demo_fh; + DEMO_FILE_SYSTEM *demo_fs; + + demo_fh = (DEMO_FILE_HANDLE *)file_handle; + demo_fs = demo_fh->demo_fs; + + LOCK_FILE_SYSTEM(session, demo_fs); + *sizep = (wt_off_t)demo_fh->size; + UNLOCK_FILE_SYSTEM(session, demo_fs); + return (0); +} + +/* + * demo_file_sync -- + * Ensure the content of the file is stable. This is a no-op in our + * memory backed file system. + */ +static int +demo_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *session) +{ + (void)file_handle; /* Unused */ + (void)session; /* Unused */ + + return (0); +} + +/* + * demo_buffer_resize -- + * Resize the write buffer. + */ +static int +demo_buffer_resize( + WT_SESSION *session, DEMO_FILE_HANDLE *demo_fh, wt_off_t offset) +{ + DEMO_FILE_SYSTEM *demo_fs; + WT_EXTENSION_API *wtext; + size_t off; + void *p; + + demo_fs = demo_fh->demo_fs; + wtext = demo_fs->wtext; + off = (size_t)offset; + + /* Grow the buffer as necessary and clear any new space in the file. */ + if (demo_fh->bufsize >= off) + return (0); + + if ((p = realloc(demo_fh->buf, off)) == NULL) { + (void)wtext->err_printf(wtext, session, + "%s: failed to resize buffer", + demo_fh->iface.name, wtext->strerror(wtext, NULL, ENOMEM)); + return (ENOMEM); + } + memset((uint8_t *)p + demo_fh->bufsize, 0, off - demo_fh->bufsize); + demo_fh->buf = p; + demo_fh->bufsize = off; + + return (0); +} + +/* + * demo_file_truncate -- + * POSIX ftruncate. + */ +static int +demo_file_truncate( + WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset) +{ + DEMO_FILE_HANDLE *demo_fh; + DEMO_FILE_SYSTEM *demo_fs; + int ret = 0; + + demo_fh = (DEMO_FILE_HANDLE *)file_handle; + demo_fs = demo_fh->demo_fs; + + LOCK_FILE_SYSTEM(session, demo_fs); + if ((ret = demo_buffer_resize(session, demo_fh, offset)) == 0) + demo_fh->size = (size_t)offset; + UNLOCK_FILE_SYSTEM(session, demo_fs); + return (ret); +} + +/* + * demo_file_write -- + * POSIX pwrite. + */ +static int +demo_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *session, + wt_off_t offset, size_t len, const void *buf) +{ + DEMO_FILE_HANDLE *demo_fh; + DEMO_FILE_SYSTEM *demo_fs; + WT_EXTENSION_API *wtext; + size_t off; + int ret = 0; + + demo_fh = (DEMO_FILE_HANDLE *)file_handle; + demo_fs = demo_fh->demo_fs; + wtext = demo_fs->wtext; + off = (size_t)offset; + + LOCK_FILE_SYSTEM(session, demo_fs); + ++demo_fs->write_ops; + if ((ret = demo_buffer_resize(session, demo_fh, + offset + (wt_off_t)(len + DEMO_FILE_SIZE_INCREMENT))) == 0) { + memcpy((uint8_t *)demo_fh->buf + off, buf, len); + if (off + len > demo_fh->size) + demo_fh->size = off + len; + } + UNLOCK_FILE_SYSTEM(session, demo_fs); + if (ret == 0) + return (0); + + (void)wtext->err_printf(wtext, session, + "%s: handle-write: failed to write %zu bytes at offset %zu: %s", + demo_fh->iface.name, len, off, wtext->strerror(wtext, NULL, ret)); + return (ret); +} + +/* + * demo_handle_remove -- + * Destroy an in-memory file handle. Should only happen on remove or + * shutdown. + */ +static int +demo_handle_remove(WT_SESSION *session, DEMO_FILE_HANDLE *demo_fh) +{ + DEMO_FILE_SYSTEM *demo_fs; + WT_EXTENSION_API *wtext; + + demo_fs = demo_fh->demo_fs; + wtext = demo_fs->wtext; + + if (demo_fh->ref != 0) { + (void)wtext->err_printf(wtext, session, + "demo_handle_remove: %s: file is currently open", + demo_fh->iface.name, wtext->strerror(wtext, NULL, EBUSY)); + return (EBUSY); + } + + TAILQ_REMOVE(&demo_fs->fileq, demo_fh, q); + + /* Clean up private information. */ + free(demo_fh->buf); + + /* Clean up public information. */ + free(demo_fh->iface.name); + + free(demo_fh); + + return (0); +} + +/* + * demo_handle_search -- + * Return a matching handle, if one exists. + */ +static DEMO_FILE_HANDLE * +demo_handle_search(WT_FILE_SYSTEM *file_system, const char *name) +{ + DEMO_FILE_HANDLE *demo_fh; + DEMO_FILE_SYSTEM *demo_fs; + + demo_fs = (DEMO_FILE_SYSTEM *)file_system; + + TAILQ_FOREACH(demo_fh, &demo_fs->fileq, q) + if (strcmp(demo_fh->iface.name, name) == 0) + break; + return (demo_fh); +} + +static const char *home; + +int +main(void) +{ + WT_CONNECTION *conn; + WT_CURSOR *cursor; + WT_SESSION *session; + const char *key, *open_config, *uri; + int i; + int ret = 0; + char kbuf[64]; + + /* + * Create a clean test directory for this run of the test program if the + * environment variable isn't already set (as is done by make check). + */ + if (getenv("WIREDTIGER_HOME") == NULL) { + home = "WT_HOME"; + ret = system("rm -rf WT_HOME && mkdir WT_HOME"); + } else + home = NULL; + + /*! [WT_FILE_SYSTEM register] */ + /* + * Setup a configuration string that will load our custom file system. + * Use the special local extension to indicate that the entry point is + * in the same executable. Also enable early load for this extension, + * since WiredTiger needs to be able to find it before doing any file + * operations. Finally, pass in two pieces of configuration information + * to our initialization function as the "config" value. + */ + open_config = "create,log=(enabled=true),extensions=(local={" + "entry=demo_file_system_create,early_load=true," + "config={config_string=\"demo-file-system\",config_value=37}" + "})"; + /* Open a connection to the database, creating it if necessary. */ + if ((ret = wiredtiger_open(home, NULL, open_config, &conn)) != 0) { + fprintf(stderr, "Error connecting to %s: %s\n", + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } + /*! [WT_FILE_SYSTEM register] */ + + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) { + fprintf(stderr, "WT_CONNECTION.open_session: %s\n", + wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } + uri = "table:fs"; + if ((ret = session->create( + session, uri, "key_format=S,value_format=S")) != 0) { + fprintf(stderr, "WT_SESSION.create: %s: %s\n", + uri, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } + if ((ret = session->open_cursor( + session, uri, NULL, NULL, &cursor)) != 0) { + fprintf(stderr, "WT_SESSION.open_cursor: %s: %s\n", + uri, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } + for (i = 0; i < 1000; ++i) { + (void)snprintf(kbuf, sizeof(kbuf), "%010d KEY -----", i); + cursor->set_key(cursor, kbuf); + cursor->set_value(cursor, "--- VALUE ---"); + if ((ret = cursor->insert(cursor)) != 0) { + fprintf(stderr, "WT_CURSOR.insert: %s: %s\n", + kbuf, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } + } + if ((ret = cursor->close(cursor)) != 0) { + fprintf(stderr, "WT_CURSOR.close: %s\n", + wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } + if ((ret = session->open_cursor( + session, uri, NULL, NULL, &cursor)) != 0) { + fprintf(stderr, "WT_SESSION.open_cursor: %s: %s\n", + uri, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } + for (i = 0; i < 1000; ++i) { + if ((ret = cursor->next(cursor)) != 0) { + fprintf(stderr, "WT_CURSOR.insert: %s: %s\n", + kbuf, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } + (void)snprintf(kbuf, sizeof(kbuf), "%010d KEY -----", i); + if ((ret = cursor->get_key(cursor, &key)) != 0) { + fprintf(stderr, "WT_CURSOR.get_key: %s\n", + wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } + if (strcmp(kbuf, key) != 0) { + fprintf(stderr, "Key mismatch: %s, %s\n", kbuf, key); + return (EXIT_FAILURE); + } + } + if ((ret = cursor->next(cursor)) != WT_NOTFOUND) { + fprintf(stderr, + "WT_CURSOR.insert: expected WT_NOTFOUND, got %s\n", + wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } + + if ((ret = conn->close(conn, NULL)) != 0) { + fprintf(stderr, "Error closing connection to %s: %s\n", + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } + + return (EXIT_SUCCESS); +} diff --git a/src/third_party/wiredtiger/examples/c/ex_hello.c b/src/third_party/wiredtiger/examples/c/ex_hello.c index 345e434741f..99534ee8868 100644 --- a/src/third_party/wiredtiger/examples/c/ex_hello.c +++ b/src/third_party/wiredtiger/examples/c/ex_hello.c @@ -56,21 +56,27 @@ main(void) home = NULL; /* Open a connection to the database, creating it if necessary. */ - if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0) + if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0) { fprintf(stderr, "Error connecting to %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } /* Open a session for the current thread's work. */ - if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) { fprintf(stderr, "Error opening a session on %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } /* Do some work... */ /* Note: closing the connection implicitly closes open session(s). */ - if ((ret = conn->close(conn, NULL)) != 0) + if ((ret = conn->close(conn, NULL)) != 0) { fprintf(stderr, "Error closing %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } - return (ret); + return (EXIT_SUCCESS); } diff --git a/src/third_party/wiredtiger/examples/c/ex_log.c b/src/third_party/wiredtiger/examples/c/ex_log.c index 78bd7e683cf..fdbc39412ae 100644 --- a/src/third_party/wiredtiger/examples/c/ex_log.c +++ b/src/third_party/wiredtiger/examples/c/ex_log.c @@ -295,12 +295,12 @@ main(void) home1, home2, home1, home2); if ((ret = system(cmd_buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret); - return (ret); + return (EXIT_FAILURE); } if ((ret = wiredtiger_open(home1, NULL, CONN_CONFIG, &wt_conn)) != 0) { fprintf(stderr, "Error connecting to %s: %s\n", home1, wiredtiger_strerror(ret)); - return (ret); + return (EXIT_FAILURE); } ret = wt_conn->open_session(wt_conn, NULL, NULL, &session); @@ -348,12 +348,13 @@ main(void) if ((ret = wiredtiger_open(home1, NULL, CONN_CONFIG, &wt_conn)) != 0) { fprintf(stderr, "Error connecting to %s: %s\n", home1, wiredtiger_strerror(ret)); - return (ret); + return (EXIT_FAILURE); } ret = wt_conn->open_session(wt_conn, NULL, NULL, &session); ret = simple_walk_log(session, count_min); ret = walk_log(session); ret = wt_conn->close(wt_conn, NULL); - return (ret); + + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_pack.c b/src/third_party/wiredtiger/examples/c/ex_pack.c index 43b57880674..86725123f55 100644 --- a/src/third_party/wiredtiger/examples/c/ex_pack.c +++ b/src/third_party/wiredtiger/examples/c/ex_pack.c @@ -55,14 +55,18 @@ main(void) home = NULL; /* Open a connection to the database, creating it if necessary. */ - if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0) + if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0) { fprintf(stderr, "Error connecting to %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } /* Open a session for the current thread's work. */ - if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) { fprintf(stderr, "Error opening a session on %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } { /*! [packing] */ @@ -81,9 +85,11 @@ main(void) } /* Note: closing the connection implicitly closes open session(s). */ - if ((ret = conn->close(conn, NULL)) != 0) + if ((ret = conn->close(conn, NULL)) != 0) { fprintf(stderr, "Error closing %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } - return (ret); + return (EXIT_SUCCESS); } diff --git a/src/third_party/wiredtiger/examples/c/ex_process.c b/src/third_party/wiredtiger/examples/c/ex_process.c index 19f395dddaf..217730c4288 100644 --- a/src/third_party/wiredtiger/examples/c/ex_process.c +++ b/src/third_party/wiredtiger/examples/c/ex_process.c @@ -58,22 +58,28 @@ main(void) /*! [processes] */ /* Open a connection to the database, creating it if necessary. */ if ((ret = - wiredtiger_open(home, NULL, "create,multiprocess", &conn)) != 0) + wiredtiger_open(home, NULL, "create,multiprocess", &conn)) != 0) { fprintf(stderr, "Error connecting to %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } /* Open a session for the current thread's work. */ - if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) { fprintf(stderr, "Error opening a session on %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } /* XXX Do some work... */ /* Note: closing the connection implicitly closes open session(s). */ - if ((ret = conn->close(conn, NULL)) != 0) + if ((ret = conn->close(conn, NULL)) != 0) { fprintf(stderr, "Error closing %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); + } /*! [processes] */ - return (ret); + return (EXIT_SUCCESS); } diff --git a/src/third_party/wiredtiger/examples/c/ex_schema.c b/src/third_party/wiredtiger/examples/c/ex_schema.c index 70fc7eb2e62..a59d9480780 100644 --- a/src/third_party/wiredtiger/examples/c/ex_schema.c +++ b/src/third_party/wiredtiger/examples/c/ex_schema.c @@ -69,7 +69,8 @@ main(void) { POP_RECORD *p; WT_CONNECTION *conn; - WT_CURSOR *cursor, *cursor2, *join_cursor, *stat_cursor; + WT_CURSOR *country_cursor, *country_cursor2, *cursor, *join_cursor, + *stat_cursor, *subjoin_cursor, *year_cursor; WT_SESSION *session; const char *country; uint64_t recno, population; @@ -89,8 +90,8 @@ main(void) if ((ret = wiredtiger_open( home, NULL, "create,statistics=(fast)", &conn)) != 0) { fprintf(stderr, "Error connecting to %s: %s\n", - home, wiredtiger_strerror(ret)); - return (ret); + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); } /* Note: error checking omitted for clarity. */ @@ -336,18 +337,18 @@ main(void) ret = session->open_cursor(session, "join:table:poptable", NULL, NULL, &join_cursor); ret = session->open_cursor(session, - "index:poptable:country", NULL, NULL, &cursor); + "index:poptable:country", NULL, NULL, &country_cursor); ret = session->open_cursor(session, - "index:poptable:immutable_year", NULL, NULL, &cursor2); + "index:poptable:immutable_year", NULL, NULL, &year_cursor); /* select values WHERE country == "AU" AND year > 1900 */ - cursor->set_key(cursor, "AU\0\0\0"); - ret = cursor->search(cursor); - ret = session->join(session, join_cursor, cursor, + country_cursor->set_key(country_cursor, "AU\0\0\0"); + ret = country_cursor->search(country_cursor); + ret = session->join(session, join_cursor, country_cursor, "compare=eq,count=10"); - cursor2->set_key(cursor2, (uint16_t)1900); - ret = cursor2->search(cursor2); - ret = session->join(session, join_cursor, cursor2, + year_cursor->set_key(year_cursor, (uint16_t)1900); + ret = year_cursor->search(year_cursor); + ret = session->join(session, join_cursor, year_cursor, "compare=gt,count=10,strategy=bloom"); /* List the values that are joined */ @@ -370,10 +371,63 @@ main(void) ret = stat_cursor->close(stat_cursor); ret = join_cursor->close(join_cursor); - ret = cursor2->close(cursor2); - ret = cursor->close(cursor); + ret = year_cursor->close(year_cursor); + ret = country_cursor->close(country_cursor); + + /*! [Complex join cursors] */ + /* Open cursors needed by the join. */ + ret = session->open_cursor(session, + "join:table:poptable", NULL, NULL, &join_cursor); + ret = session->open_cursor(session, + "join:table:poptable", NULL, NULL, &subjoin_cursor); + ret = session->open_cursor(session, + "index:poptable:country", NULL, NULL, &country_cursor); + ret = session->open_cursor(session, + "index:poptable:country", NULL, NULL, &country_cursor2); + ret = session->open_cursor(session, + "index:poptable:immutable_year", NULL, NULL, &year_cursor); + + /* + * select values WHERE (country == "AU" OR country == "UK") + * AND year > 1900 + * + * First, set up the join representing the country clause. + */ + country_cursor->set_key(country_cursor, "AU\0\0\0"); + ret = country_cursor->search(country_cursor); + ret = session->join(session, subjoin_cursor, country_cursor, + "operation=or,compare=eq,count=10"); + country_cursor2->set_key(country_cursor2, "UK\0\0\0"); + ret = country_cursor2->search(country_cursor2); + ret = session->join(session, subjoin_cursor, country_cursor2, + "operation=or,compare=eq,count=10"); + + /* Join that to the top join, and add the year clause */ + ret = session->join(session, join_cursor, subjoin_cursor, NULL); + year_cursor->set_key(year_cursor, (uint16_t)1900); + ret = year_cursor->search(year_cursor); + ret = session->join(session, join_cursor, year_cursor, + "compare=gt,count=10,strategy=bloom"); + + /* List the values that are joined */ + while ((ret = join_cursor->next(join_cursor)) == 0) { + ret = join_cursor->get_key(join_cursor, &recno); + ret = join_cursor->get_value(join_cursor, &country, &year, + &population); + printf("ID %" PRIu64, recno); + printf( + ": country %s, year %" PRIu16 ", population %" PRIu64 "\n", + country, year, population); + } + /*! [Complex join cursors] */ + + ret = join_cursor->close(join_cursor); + ret = subjoin_cursor->close(subjoin_cursor); + ret = country_cursor->close(country_cursor); + ret = country_cursor2->close(country_cursor2); + ret = year_cursor->close(year_cursor); ret = conn->close(conn, NULL); - return (ret); + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_scope.c b/src/third_party/wiredtiger/examples/c/ex_scope.c index 93878ec7e3d..795ad85d57b 100644 --- a/src/third_party/wiredtiger/examples/c/ex_scope.c +++ b/src/third_party/wiredtiger/examples/c/ex_scope.c @@ -106,10 +106,12 @@ cursor_scope_ops(WT_CURSOR *cursor) * memory, but as it does not position the cursor, it * doesn't reference memory owned by the cursor, either. */ + printf("ex_scope: " + "expect two WiredTiger error messages:\n"); if ((ret = cursor->get_key(cursor, &key)) == 0 || (ret = cursor->get_value(cursor, &value)) == 0) { fprintf(stderr, - "%s: error in s get_key/value: %s\n", + "%s: error in get_key/value: %s\n", op->op, session->strerror(session, ret)); return (ret); } @@ -122,6 +124,8 @@ cursor_scope_ops(WT_CURSOR *cursor) * reference key memory owned by the cursor, but has no * value. */ + printf("ex_scope: " + "expect one WiredTiger error message:\n"); if ((ret = cursor->get_key(cursor, &key)) != 0 || (ret = cursor->get_value(cursor, &value)) == 0) { fprintf(stderr, @@ -178,7 +182,7 @@ main(void) WT_CONNECTION *conn; WT_CURSOR *cursor; WT_SESSION *session; - int ret, tret; + int ret; /* * Create a clean test directory for this run of the test program if the @@ -194,8 +198,8 @@ main(void) if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0 || (ret = conn->open_session(conn, NULL, NULL, &session)) != 0) { fprintf(stderr, "Error connecting to %s: %s\n", - home, wiredtiger_strerror(ret)); - return (ret); + home == NULL ? "." : home, wiredtiger_strerror(ret)); + return (EXIT_FAILURE); } ret = session->create(session, @@ -207,8 +211,7 @@ main(void) ret = cursor_scope_ops(cursor); /* Close the connection and clean up. */ - if ((tret = conn->close(conn, NULL)) != 0 && ret == 0) - ret = tret; + ret = conn->close(conn, NULL); - return (ret); + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_stat.c b/src/third_party/wiredtiger/examples/c/ex_stat.c index 6c5c15aacc6..ba473d6be04 100644 --- a/src/third_party/wiredtiger/examples/c/ex_stat.c +++ b/src/third_party/wiredtiger/examples/c/ex_stat.c @@ -235,9 +235,8 @@ main(void) ret = wiredtiger_open(home, NULL, "create,statistics=(all)", &conn); ret = conn->open_session(conn, NULL, NULL, &session); - ret = session->create( - session, "table:access", - "key_format=S,value_format=S,columns=(k,v)"); + ret = session->create(session, + "table:access", "key_format=S,value_format=S,columns=(k,v)"); ret = session->open_cursor( session, "table:access", NULL, NULL, &cursor); @@ -258,5 +257,7 @@ main(void) ret = print_derived_stats(session); - return (conn->close(conn, NULL) == 0 ? ret : EXIT_FAILURE); + ret = conn->close(conn, NULL); + + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_sync.c b/src/third_party/wiredtiger/examples/c/ex_sync.c index 8c3a6463a82..2c610b1e570 100644 --- a/src/third_party/wiredtiger/examples/c/ex_sync.c +++ b/src/third_party/wiredtiger/examples/c/ex_sync.c @@ -63,12 +63,12 @@ main(void) home, home); if ((ret = system(cmd_buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret); - return (ret); + return (EXIT_FAILURE); } if ((ret = wiredtiger_open(home, NULL, CONN_CONFIG, &wt_conn)) != 0) { fprintf(stderr, "Error connecting to %s: %s\n", home, wiredtiger_strerror(ret)); - return (ret); + return (EXIT_FAILURE); } ret = wt_conn->open_session(wt_conn, NULL, NULL, &session); @@ -149,5 +149,6 @@ main(void) ret = session->log_flush(session, "sync=on"); ret = wt_conn->close(wt_conn, NULL); - return (ret); + + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/examples/c/ex_thread.c b/src/third_party/wiredtiger/examples/c/ex_thread.c index a72211b6243..7c52d3b8189 100644 --- a/src/third_party/wiredtiger/examples/c/ex_thread.c +++ b/src/third_party/wiredtiger/examples/c/ex_thread.c @@ -101,7 +101,7 @@ main(void) if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0) fprintf(stderr, "Error connecting to %s: %s\n", - home, wiredtiger_strerror(ret)); + home == NULL ? "." : home, wiredtiger_strerror(ret)); /* Note: further error checking omitted for clarity. */ ret = conn->open_session(conn, NULL, NULL, &session); @@ -122,6 +122,6 @@ main(void) ret = conn->close(conn, NULL); - return (ret); + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } /*! [thread main] */ diff --git a/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java b/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java index 5fe767d49bf..48e85c9fade 100644 --- a/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java +++ b/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java @@ -878,6 +878,18 @@ backup(Session session) ": backup failed: " + ex.toString()); } /*! [backup]*/ + try { + /*! [incremental backup]*/ + /* Open the backup data source for incremental backup. */ + cursor = session.open_cursor("backup:", null, "target=(\"log:\")"); + /*! [incremental backup]*/ + + ret = cursor.close(); + } + catch (Exception ex) { + System.err.println(progname + + ": incremental backup failed: " + ex.toString()); + } /*! [backup of a checkpoint]*/ ret = session.checkpoint("drop=(from=June01),name=June01"); diff --git a/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_schema.java b/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_schema.java index 7cc26acb479..76bff66a688 100644 --- a/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_schema.java +++ b/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_schema.java @@ -76,7 +76,8 @@ public class ex_schema { throws WiredTigerException { Connection conn; - Cursor cursor, cursor2, join_cursor, stat_cursor; + Cursor country_cursor, country_cursor2, cursor, join_cursor, + stat_cursor, subjoin_cursor, year_cursor; Session session; String country; long recno, population; @@ -343,18 +344,18 @@ public class ex_schema { /* Open cursors needed by the join. */ join_cursor = session.open_cursor( "join:table:poptable", null, null); - cursor = session.open_cursor( + country_cursor = session.open_cursor( "index:poptable:country", null, null); - cursor2 = session.open_cursor( + year_cursor = session.open_cursor( "index:poptable:immutable_year", null, null); /* select values WHERE country == "AU" AND year > 1900 */ - cursor.putKeyString("AU"); - ret = cursor.search(); - session.join(join_cursor, cursor, "compare=eq,count=10"); - cursor2.putKeyShort((short)1900); - ret = cursor2.search(); - session.join(join_cursor, cursor2, + country_cursor.putKeyString("AU"); + ret = country_cursor.search(); + session.join(join_cursor, country_cursor, "compare=eq,count=10"); + year_cursor.putKeyShort((short)1900); + ret = year_cursor.search(); + session.join(join_cursor, year_cursor, "compare=gt,count=10,strategy=bloom"); /* List the values that are joined */ @@ -376,8 +377,61 @@ public class ex_schema { ret = stat_cursor.close(); ret = join_cursor.close(); - ret = cursor2.close(); - ret = cursor.close(); + ret = year_cursor.close(); + ret = country_cursor.close(); + + /*! [Complex join cursors] */ + /* Open cursors needed by the join. */ + join_cursor = session.open_cursor( + "join:table:poptable", null, null); + subjoin_cursor = session.open_cursor( + "join:table:poptable", null, null); + country_cursor = session.open_cursor( + "index:poptable:country", null, null); + country_cursor2 = session.open_cursor( + "index:poptable:country", null, null); + year_cursor = session.open_cursor( + "index:poptable:immutable_year", null, null); + + /* + * select values WHERE (country == "AU" OR country == "UK") + * AND year > 1900 + * + * First, set up the join representing the country clause. + */ + country_cursor.putKeyString("AU"); + ret = country_cursor.search(); + ret = session.join(subjoin_cursor, country_cursor, + "operation=or,compare=eq,count=10"); + country_cursor2.putKeyString("UK"); + ret = country_cursor2.search(); + ret = session.join(subjoin_cursor, country_cursor2, + "operation=or,compare=eq,count=10"); + + /* Join that to the top join, and add the year clause */ + ret = session.join(join_cursor, subjoin_cursor, null); + year_cursor.putKeyShort((short)1900); + ret = year_cursor.search(); + ret = session.join(join_cursor, year_cursor, + "compare=gt,count=10,strategy=bloom"); + + /* List the values that are joined */ + while ((ret = join_cursor.next()) == 0) { + recno = join_cursor.getKeyRecord(); + country = join_cursor.getValueString(); + year = join_cursor.getValueShort(); + population = join_cursor.getValueLong(); + System.out.print("ID " + recno); + System.out.println( ": country " + country + ", year " + year + + ", population " + population); + } + /*! [Complex join cursors] */ + + ret = join_cursor.close(); + ret = subjoin_cursor.close(); + ret = year_cursor.close(); + ret = country_cursor.close(); + ret = country_cursor2.close(); ret = conn.close(null); diff --git a/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c b/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c index 4ff0d8576eb..9aede2ed907 100644 --- a/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c +++ b/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c @@ -307,17 +307,9 @@ zlib_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session, /* * If there's more compression to do, save a snapshot and keep * going, otherwise, use the current compression. - * - * Don't let the compression ratio become insanely good (which - * can happen with synthetic workloads). Once we hit a limit, - * stop so the in-memory size of pages isn't hugely larger than - * the on-disk size, otherwise we can get into trouble where - * every update to a page results in forced eviction based on - * the in-memory size, even though the data fits into a single - * on-disk block. */ last_slot = curr_slot; - if (zs.avail_out > 0 && zs.total_in <= zs.total_out * 20) { + if (zs.avail_out > 0) { if ((ret = deflateCopy(&last_zs, &zs)) != Z_OK) return (zlib_error( compressor, session, "deflateCopy", ret)); diff --git a/src/third_party/wiredtiger/ext/datasources/helium/helium.c b/src/third_party/wiredtiger/ext/datasources/helium/helium.c index 0350f9a2752..2a66c9a0ca7 100644 --- a/src/third_party/wiredtiger/ext/datasources/helium/helium.c +++ b/src/third_party/wiredtiger/ext/datasources/helium/helium.c @@ -2196,8 +2196,8 @@ helium_session_open_cursor(WT_DATA_SOURCE *wtds, WT_SESSION *session, EMSG_ERR(wtext, session, ret, "value_format configuration: %s", wtext->strerror(wtext, session, ret)); - ws->config_bitfield = - v.len == 2 && isdigit(v.str[0]) && v.str[1] == 't'; + ws->config_bitfield = v.len == 2 && + isdigit((u_char)v.str[0]) && v.str[1] == 't'; if ((ret = config_parser->get( config_parser, "helium_o_compress", &v)) != 0) diff --git a/src/third_party/wiredtiger/ext/test/kvs_bdb/kvs_bdb.c b/src/third_party/wiredtiger/ext/test/kvs_bdb/kvs_bdb.c index 05c522ff41f..3d78bca1d1b 100644 --- a/src/third_party/wiredtiger/ext/test/kvs_bdb/kvs_bdb.c +++ b/src/third_party/wiredtiger/ext/test/kvs_bdb/kvs_bdb.c @@ -831,7 +831,7 @@ kvs_session_open_cursor(WT_DATA_SOURCE *wtds, WT_SESSION *session, goto err; } cursor->config_bitfield = - v.len == 2 && isdigit(v.str[0]) && v.str[1] == 't'; + v.len == 2 && isdigit((u_char)v.str[0]) && v.str[1] == 't'; if ((ret = writelock(wtext, session, &ds->rwlock)) != 0) goto err; diff --git a/src/third_party/wiredtiger/lang/java/java_doc.i b/src/third_party/wiredtiger/lang/java/java_doc.i index 450cb1d5ab2..2264cb31ef1 100644 --- a/src/third_party/wiredtiger/lang/java/java_doc.i +++ b/src/third_party/wiredtiger/lang/java/java_doc.i @@ -63,6 +63,7 @@ COPYDOC(__wt_connection, WT_CONNECTION, add_collator) COPYDOC(__wt_connection, WT_CONNECTION, add_compressor) COPYDOC(__wt_connection, WT_CONNECTION, add_encryptor) COPYDOC(__wt_connection, WT_CONNECTION, add_extractor) +COPYDOC(__wt_connection, WT_CONNECTION, set_file_system) COPYDOC(__wt_config_parser, WT_CONFIG_PARSER, close) COPYDOC(__wt_config_parser, WT_CONFIG_PARSER, next) COPYDOC(__wt_config_parser, WT_CONFIG_PARSER, get) diff --git a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java index 02639bfe77a..4f05e153607 100644 --- a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java +++ b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java @@ -40,6 +40,7 @@ import com.wiredtiger.db.WiredTigerPackingException; public class PackFormatInputStream { protected String format; + protected boolean isRaw; protected int formatOff; protected int formatRepeatCount; @@ -48,8 +49,9 @@ public class PackFormatInputStream { * * \param format the encoded format backing string. */ - protected PackFormatInputStream(String format) { + protected PackFormatInputStream(String format, boolean isRaw) { this.format = format; + this.isRaw = isRaw; formatOff = 0; formatRepeatCount = 0; } @@ -114,6 +116,9 @@ public class PackFormatInputStream { throws WiredTigerPackingException { char expected = getType(); + if (isRaw) + throw new WiredTigerPackingException( + "Format mismatch for raw mode"); if (Character.toLowerCase(expected) != Character.toLowerCase(asking)) throw new WiredTigerPackingException( "Format mismatch. Wanted: " + asking + ", got: " + expected); diff --git a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackInputStream.java b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackInputStream.java index f265d041d94..732bf450acd 100644 --- a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackInputStream.java +++ b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackInputStream.java @@ -43,6 +43,7 @@ public class PackInputStream { protected byte[] value; protected int valueOff; protected int valueLen; + protected boolean isRaw; /** * Constructor. @@ -52,7 +53,7 @@ public class PackInputStream { * \param value The raw bytes that back the stream. */ public PackInputStream(String format, byte[] value) { - this(format, value, 0, value.length); + this(format, value, false, 0, value.length); } /** @@ -61,14 +62,29 @@ public class PackInputStream { * \param format A String that contains the WiredTiger format that * defines the layout of this packed value. * \param value The raw bytes that back the stream. + * \param isRaw The stream is opened raw. + */ + public PackInputStream(String format, byte[] value, boolean isRaw) { + this(format, value, isRaw, 0, value.length); + } + + /** + * Constructor. + * + * \param format A String that contains the WiredTiger format that + * defines the layout of this packed value. + * \param value The raw bytes that back the stream. + * \param isRaw The stream is opened raw. * \param off Offset into the value array at which the stream begins. * \param len Length of the value array that forms the stream. */ - public PackInputStream(String format, byte[] value, int off, int len) { - this.format = new PackFormatInputStream(format); + public PackInputStream( + String format, byte[] value, boolean isRaw, int off, int len) { + this.format = new PackFormatInputStream(format, isRaw); this.value = value; this.valueOff = off; this.valueLen = len; + this.isRaw = isRaw; } /** @@ -117,7 +133,9 @@ public class PackInputStream { */ public void getByteArray(byte[] dest, int off, int len) throws WiredTigerPackingException { - format.checkType('U', false); + if (!isRaw) { + format.checkType('U', false); + } getByteArrayInternal(getByteArrayLength(), dest, off, len); } @@ -127,7 +145,9 @@ public class PackInputStream { */ public byte[] getByteArray() throws WiredTigerPackingException { - format.checkType('U', false); + if (!isRaw) { + format.checkType('U', false); + } int itemLen = getByteArrayLength(); byte[] unpacked = new byte[itemLen]; getByteArrayInternal(itemLen, unpacked, 0, itemLen); @@ -142,7 +162,10 @@ public class PackInputStream { throws WiredTigerPackingException { int itemLen = 0; - if (format.hasLength()) { + if (isRaw) { + // The rest of the buffer is a byte array. + itemLen = valueLen - valueOff; + } else if (format.hasLength()) { // If the format has a length, it's always used. itemLen = format.getLengthFromFormat(true); } else if (format.getType() == 'U') { diff --git a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackOutputStream.java b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackOutputStream.java index 805e34f6ca8..46b3aef0974 100644 --- a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackOutputStream.java +++ b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackOutputStream.java @@ -50,7 +50,7 @@ public class PackOutputStream { * defines the layout of this packed value. */ public PackOutputStream(String format) { - this.format = new PackFormatInputStream(format); + this.format = new PackFormatInputStream(format, false); intBuf = new byte[MAX_INT_BYTES]; packed = new ByteArrayOutputStream(100); } diff --git a/src/third_party/wiredtiger/lang/java/wiredtiger.i b/src/third_party/wiredtiger/lang/java/wiredtiger.i index ce013a1939c..c04bae63cbc 100644 --- a/src/third_party/wiredtiger/lang/java/wiredtiger.i +++ b/src/third_party/wiredtiger/lang/java/wiredtiger.i @@ -80,6 +80,7 @@ typedef struct { JavaVM *javavm; /* Used in async threads to craft a jnienv */ JNIEnv *jnienv; /* jni env that created the Session/Cursor */ WT_SESSION_IMPL *session; /* session used for alloc/free */ + bool cursor_raw; /* is the cursor opened raw? */ jobject jobj; /* the java Session/Cursor/AsyncOp object */ jobject jcallback; /* callback object for async ops */ jfieldID cptr_fid; /* cached Cursor.swigCPtr field id in session */ @@ -576,8 +577,15 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; return $self->update($self); } - %javamethodmodifiers java_init "protected"; - int java_init(jobject jasyncop) { + %javamethodmodifiers _java_raw "protected"; + bool _java_raw(JNIEnv *jenv) { + (void)jenv; + JAVA_CALLBACK *jcb = (JAVA_CALLBACK *)$self->c.lang_private; + return jcb->cursor_raw; + } + + %javamethodmodifiers _java_init "protected"; + int _java_init(jobject jasyncop) { JAVA_CALLBACK *jcb = (JAVA_CALLBACK *)$self->c.lang_private; jcb->jobj = JCALL1(NewGlobalRef, jcb->jnienv, jasyncop); @@ -604,7 +612,7 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; valueFormat = getValue_format(); keyPacker = new PackOutputStream(keyFormat); valuePacker = new PackOutputStream(valueFormat); - wiredtigerJNI.AsyncOp_java_init(swigCPtr, this, this); + wiredtigerJNI.AsyncOp__java_init(swigCPtr, this, this); } protected static long getCPtr($javaclassname obj) { @@ -1090,7 +1098,8 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; throws WiredTigerPackingException { if (keyUnpacker == null) keyUnpacker = - new PackInputStream(keyFormat, get_key_wrap()); + new PackInputStream(keyFormat, get_key_wrap(), + _java_raw()); return keyUnpacker; } @@ -1103,7 +1112,8 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; throws WiredTigerPackingException { if (valueUnpacker == null) valueUnpacker = - new PackInputStream(valueFormat, get_value_wrap()); + new PackInputStream(valueFormat, get_value_wrap(), + _java_raw()); return valueUnpacker; } @@ -1175,6 +1185,7 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; return $self->update($self); } + %javamethodmodifiers compare_wrap "protected"; int compare_wrap(JNIEnv *jenv, WT_CURSOR *other) { int cmp, ret = $self->compare($self, other, &cmp); if (ret != 0) @@ -1182,6 +1193,7 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; return cmp; } + %javamethodmodifiers equals_wrap "protected"; int equals_wrap(JNIEnv *jenv, WT_CURSOR *other) { int cmp, ret = $self->equals($self, other, &cmp); if (ret != 0) @@ -1189,8 +1201,15 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; return cmp; } - %javamethodmodifiers java_init "protected"; - int java_init(jobject jcursor) { + %javamethodmodifiers _java_raw "protected"; + bool _java_raw(JNIEnv *jenv) { + (void)jenv; + JAVA_CALLBACK *jcb = (JAVA_CALLBACK *)$self->lang_private; + return jcb->cursor_raw; + } + + %javamethodmodifiers _java_init "protected"; + int _java_init(jobject jcursor) { JAVA_CALLBACK *jcb = (JAVA_CALLBACK *)$self->lang_private; jcb->jobj = JCALL1(NewGlobalRef, jcb->jnienv, jcursor); JCALL1(DeleteLocalRef, jcb->jnienv, jcursor); @@ -1216,7 +1235,7 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; valueFormat = getValue_format(); keyPacker = new PackOutputStream(keyFormat); valuePacker = new PackOutputStream(valueFormat); - wiredtigerJNI.Cursor_java_init(swigCPtr, this, this); + wiredtigerJNI.Cursor__java_init(swigCPtr, this, this); } protected static long getCPtr($javaclassname obj) { @@ -1773,7 +1792,8 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; if (!success || keyFormat.equals("")) return null; else - return new PackInputStream(keyFormat, get_key_wrap()); + return new PackInputStream(keyFormat, + get_key_wrap(), _java_raw()); } /** @@ -1789,7 +1809,7 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; return null; else return new PackInputStream(valueFormat, - get_value_wrap()); + get_value_wrap(), _java_raw()); } %} @@ -1799,20 +1819,22 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; */ %javaexception("com.wiredtiger.db.WiredTigerException") { $action; } %javaexception("") wiredtiger_strerror { $action; } +%javaexception("") __wt_async_op::_java_raw { $action; } %javaexception("") __wt_async_op::connection { $action; } %javaexception("") __wt_async_op::get_type { $action; } %javaexception("") __wt_async_op::get_id { $action; } %javaexception("") __wt_async_op::key_format { $action; } %javaexception("") __wt_async_op::value_format { $action; } +%javaexception("") __wt_connection::_java_init { $action; } %javaexception("") __wt_connection::get_home { $action; } %javaexception("") __wt_connection::is_new { $action; } -%javaexception("") __wt_connection::java_init { $action; } +%javaexception("") __wt_cursor::_java_raw { $action; } %javaexception("") __wt_cursor::key_format { $action; } %javaexception("") __wt_cursor::session { $action; } %javaexception("") __wt_cursor::uri { $action; } %javaexception("") __wt_cursor::value_format { $action; } +%javaexception("") __wt_session::_java_init { $action; } %javaexception("") __wt_session::connection { $action; } -%javaexception("") __wt_session::java_init { $action; } /* Remove / rename parts of the C API that we don't want in Java. */ %immutable __wt_cursor::session; @@ -1832,6 +1854,9 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; %ignore __wt_event_handler; %ignore __wt_extractor; %ignore __wt_connection::add_extractor; +%ignore __wt_file_system; +%ignore __wt_file_handle; +%ignore __wt_connection::set_file_system; %ignore __wt_item; %ignore __wt_lsn; %ignore __wt_session::msg_printf; @@ -1890,8 +1915,8 @@ REQUIRE_WRAP(WT_ASYNC_OP::get_id, __wt_async_op::get_id,getId) %} %extend ctypename { - %javamethodmodifiers java_init "protected"; - int java_init(jobject jsess) { + %javamethodmodifiers _java_init "protected"; + int _java_init(jobject jsess) { implclass *session = (implclass *)$self; JAVA_CALLBACK *jcb = (JAVA_CALLBACK *)session->lang_private; jcb->jobj = JCALL1(NewGlobalRef, jcb->jnienv, jsess); @@ -1901,8 +1926,8 @@ REQUIRE_WRAP(WT_ASYNC_OP::get_id, __wt_async_op::get_id,getId) } %enddef -TRACKED_CLASS(Session, __wt_session, wiredtigerJNI.Session_java_init, WT_SESSION_IMPL) -TRACKED_CLASS(Connection, __wt_connection, wiredtigerJNI.Connection_java_init, WT_CONNECTION_IMPL) +TRACKED_CLASS(Session, __wt_session, wiredtigerJNI.Session__java_init, WT_SESSION_IMPL) +TRACKED_CLASS(Connection, __wt_connection, wiredtigerJNI.Connection__java_init, WT_CONNECTION_IMPL) /* Note: Cursor incorporates the elements of TRACKED_CLASS into its * custom constructor and %extend clause. */ @@ -1996,13 +2021,15 @@ err: if (ret != 0) if ((ret = $self->open_cursor($self, uri, to_dup, config, &cursor)) != 0) goto err; - if ((cursor->flags & WT_CURSTD_DUMP_JSON) == 0) - cursor->flags |= WT_CURSTD_RAW; - if ((ret = __wt_calloc_def((WT_SESSION_IMPL *)cursor->session, 1, &jcb)) != 0) goto err; + if ((cursor->flags & WT_CURSTD_RAW) != 0) + jcb->cursor_raw = true; + if ((cursor->flags & WT_CURSTD_DUMP_JSON) == 0) + cursor->flags |= WT_CURSTD_RAW; + jcb->jnienv = jenv; jcb->session = (WT_SESSION_IMPL *)cursor->session; cursor->lang_private = jcb; diff --git a/src/third_party/wiredtiger/src/async/async_worker.c b/src/third_party/wiredtiger/src/async/async_worker.c index e692bc619a9..90dac557e36 100644 --- a/src/third_party/wiredtiger/src/async/async_worker.c +++ b/src/third_party/wiredtiger/src/async/async_worker.c @@ -216,9 +216,8 @@ __async_worker_execop(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op, __wt_cursor_set_raw_value(&asyncop->c, &val); break; case WT_AOP_NONE: - default: - WT_RET_MSG(session, EINVAL, "Unknown async optype %d\n", - op->optype); + WT_RET_MSG(session, EINVAL, + "Unknown async optype %d\n", op->optype); } return (0); } diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c index a861a21876b..b9f0ec25d53 100644 --- a/src/third_party/wiredtiger/src/block/block_ckpt.c +++ b/src/third_party/wiredtiger/src/block/block_ckpt.c @@ -63,6 +63,7 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, */ *root_addr_sizep = 0; +#ifdef HAVE_VERBOSE if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) { if (addr != NULL) { WT_ERR(__wt_scr_alloc(session, 0, &tmp)); @@ -72,6 +73,7 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, "%s: load-checkpoint: %s", block->name, addr == NULL ? "[Empty]" : (const char *)tmp->data)); } +#endif /* * There's a single checkpoint in the file that can be written, all of @@ -140,12 +142,10 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, * will unnecessarily allocate buffer space. */ if (!checkpoint && !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) { - /* - * The truncate might fail if there's a file mapping (if there's - * an open checkpoint on the file), that's OK. - */ WT_ERR(__wt_verbose(session, WT_VERB_CHECKPOINT, "truncate file to %" PRIuMAX, (uintmax_t)ci->file_size)); + + /* The truncate might fail, and that's OK. */ WT_ERR_BUSY_OK( __wt_block_truncate(session, block, ci->file_size)); } @@ -190,10 +190,7 @@ __wt_block_checkpoint_unload( * checkpoints. */ if (!checkpoint) { - /* - * The truncate might fail if there's a file mapping (if there's - * an open checkpoint on the file), that's OK. - */ + /* The truncate might fail, and that's OK. */ WT_TRET_BUSY_OK( __wt_block_truncate(session, block, block->size)); @@ -512,6 +509,7 @@ __ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase) !F_ISSET(ckpt, WT_CKPT_DELETE)) continue; +#ifdef HAVE_VERBOSE if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) { if (tmp == NULL) WT_ERR(__wt_scr_alloc(session, 0, &tmp)); @@ -521,7 +519,7 @@ __ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase) "%s: delete-checkpoint: %s: %s", block->name, ckpt->name, (const char *)tmp->data)); } - +#endif /* * Find the checkpoint into which we'll roll this checkpoint's * blocks: it's the next real checkpoint in the list, and it diff --git a/src/third_party/wiredtiger/src/block/block_compact.c b/src/third_party/wiredtiger/src/block/block_compact.c index 24ca6632311..02862ea842f 100644 --- a/src/third_party/wiredtiger/src/block/block_compact.c +++ b/src/third_party/wiredtiger/src/block/block_compact.c @@ -39,12 +39,14 @@ __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block) /* Restore the original allocation plan. */ __wt_block_configure_first_fit(block, false); +#ifdef HAVE_VERBOSE /* Dump the results of the compaction pass. */ if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT)) { __wt_spin_lock(session, &block->live_lock); ret = __block_dump_avail(session, block, false); __wt_spin_unlock(session, &block->live_lock); } +#endif return (ret); } @@ -188,6 +190,7 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session, } __wt_spin_unlock(session, &block->live_lock); +#ifdef HAVE_VERBOSE if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT)) { ++block->compact_pages_reviewed; if (*skipp) @@ -195,6 +198,7 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session, else ++block->compact_pages_written; } +#endif return (ret); } diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c index caafcc77c48..0d3e7b54f17 100644 --- a/src/third_party/wiredtiger/src/block/block_ext.c +++ b/src/third_party/wiredtiger/src/block/block_ext.c @@ -24,7 +24,7 @@ static int __block_append(WT_SESSION_IMPL *, static int __block_ext_overlap(WT_SESSION_IMPL *, WT_BLOCK *, WT_EXTLIST *, WT_EXT **, WT_EXTLIST *, WT_EXT **); static int __block_extlist_dump( - WT_SESSION_IMPL *, const char *, WT_EXTLIST *, bool); + WT_SESSION_IMPL *, WT_BLOCK *, WT_EXTLIST *, const char *); static int __block_merge(WT_SESSION_IMPL *, WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t); @@ -1227,8 +1227,7 @@ corrupted: __wt_scr_free(session, &tmp); WT_ERR(func(session, block, el, off, size)); } - if (WT_VERBOSE_ISSET(session, WT_VERB_BLOCK)) - WT_ERR(__block_extlist_dump(session, "read extlist", el, 0)); + WT_ERR(__block_extlist_dump(session, block, el, "read")); err: __wt_scr_free(session, &tmp); return (ret); @@ -1250,8 +1249,7 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session, uint32_t entries; uint8_t *p; - if (WT_VERBOSE_ISSET(session, WT_VERB_BLOCK)) - WT_RET(__block_extlist_dump(session, "write extlist", el, 0)); + WT_RET(__block_extlist_dump(session, block, el, "write")); /* * Figure out how many entries we're writing -- if there aren't any @@ -1362,9 +1360,8 @@ __wt_block_extlist_truncate( block->size = size; /* - * Truncate the file. The truncate might fail if there's a file mapping - * (if there's an open checkpoint on the file), that's OK, we'll ignore - * those blocks. + * Truncate the file. The truncate might fail, and that's OK, we simply + * ignore those blocks. */ WT_RET(__wt_verbose(session, WT_VERB_BLOCK, "truncate file from %" PRIdMAX " to %" PRIdMAX, @@ -1428,38 +1425,62 @@ __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el) */ static int __block_extlist_dump( - WT_SESSION_IMPL *session, const char *tag, WT_EXTLIST *el, bool show_size) + WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, const char *tag) { + WT_DECL_ITEM(t1); + WT_DECL_ITEM(t2); + WT_DECL_RET; WT_EXT *ext; - WT_SIZE *szp; + uint64_t pow, sizes[64]; + u_int i; + const char *sep; - WT_RET(__wt_verbose(session, WT_VERB_BLOCK, - "%s: %s: %" PRIu64 " bytes, by offset:%s", - tag, el->name, el->bytes, el->entries == 0 ? " [Empty]" : "")); - if (el->entries == 0) + if (!block->verify_layout && !WT_VERBOSE_ISSET(session, WT_VERB_BLOCK)) return (0); - WT_EXT_FOREACH(ext, el->off) - WT_RET(__wt_verbose(session, WT_VERB_BLOCK, - "\t{%" PRIuMAX "/%" PRIuMAX "}", - (uintmax_t)ext->off, (uintmax_t)ext->size)); + WT_ERR(__wt_scr_alloc(session, 0, &t1)); + if (block->verify_layout) + WT_ERR(__wt_msg(session, + "%s extent list %s, %" PRIu32 " entries, %s bytes", + tag, el->name, el->entries, + __wt_buf_set_size(session, el->bytes, true, t1))); + else + WT_ERR(__wt_verbose(session, WT_VERB_BLOCK, + "%s extent list %s, %" PRIu32 " entries, %s bytes", + tag, el->name, el->entries, + __wt_buf_set_size(session, el->bytes, true, t1))); - if (!show_size) - return (0); + if (ret != 0 || el->entries == 0) + goto done; - WT_RET(__wt_verbose(session, WT_VERB_BLOCK, - "%s: %s: by size:%s", - tag, el->name, el->entries == 0 ? " [Empty]" : "")); - if (el->entries == 0) - return (0); + memset(sizes, 0, sizeof(sizes)); + WT_EXT_FOREACH(ext, el->off) + for (i = 9, pow = 512;; ++i, pow *= 2) + if (ext->size <= (wt_off_t)pow) { + ++sizes[i]; + break; + } + sep = "extents by bucket:"; + t1->size = 0; + WT_ERR(__wt_scr_alloc(session, 0, &t2)); + for (i = 9, pow = 512; i < WT_ELEMENTS(sizes); ++i, pow *= 2) + if (sizes[i] != 0) { + WT_ERR(__wt_buf_catfmt(session, t1, + "%s {%s: %" PRIu64 "}", + sep, + __wt_buf_set_size(session, pow, false, t2), + sizes[i])); + sep = ","; + } - WT_EXT_FOREACH(szp, el->sz) { - WT_RET(__wt_verbose(session, WT_VERB_BLOCK, - "\t{%" PRIuMAX "}", (uintmax_t)szp->size)); - WT_EXT_FOREACH_OFF(ext, szp->off) - WT_RET(__wt_verbose(session, WT_VERB_BLOCK, - "\t\t{%" PRIuMAX "/%" PRIuMAX "}", - (uintmax_t)ext->off, (uintmax_t)ext->size)); - } - return (0); + if (block->verify_layout) + WT_ERR(__wt_msg(session, "%s", (char *)t1->data)); + else + WT_ERR(__wt_verbose( + session, WT_VERB_BLOCK, "%s", (char *)t1->data)); + +done: err: + __wt_scr_free(session, &t1); + __wt_scr_free(session, &t2); + return (ret); } diff --git a/src/third_party/wiredtiger/src/block/block_map.c b/src/third_party/wiredtiger/src/block/block_map.c index b16fe7f8423..d2c70fb4c49 100644 --- a/src/third_party/wiredtiger/src/block/block_map.c +++ b/src/third_party/wiredtiger/src/block/block_map.c @@ -13,24 +13,16 @@ * Map a segment of the file in, if possible. */ int -__wt_block_map( - WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp, - void **mappingcookie) +__wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, + void *mapped_regionp, size_t *lengthp, void *mapped_cookiep) { WT_DECL_RET; + WT_FILE_HANDLE *handle; - *(void **)mapp = NULL; - *maplenp = 0; + *(void **)mapped_regionp = NULL; + *lengthp = 0; + *(void **)mapped_cookiep = NULL; -#ifdef WORDS_BIGENDIAN - /* - * The underlying objects are little-endian, mapping objects isn't - * currently supported on big-endian systems. - */ - WT_UNUSED(session); - WT_UNUSED(block); - WT_UNUSED(mappingcookie); -#else /* Map support is configurable. */ if (!S2C(session)->mmap) return (0); @@ -51,15 +43,23 @@ __wt_block_map( return (0); /* + * There may be no underlying functionality. + */ + handle = block->fh->handle; + if (handle->fh_map == NULL) + return (0); + + /* * Map the file into memory. * Ignore not-supported errors, we'll read the file through the cache * if map fails. */ - ret = block->fh->fh_map( - session, block->fh, mapp, maplenp, mappingcookie); - if (ret == ENOTSUP) + ret = handle->fh_map(handle, + (WT_SESSION *)session, mapped_regionp, lengthp, mapped_cookiep); + if (ret == ENOTSUP) { + *(void **)mapped_regionp = NULL; ret = 0; -#endif + } return (ret); } @@ -69,11 +69,13 @@ __wt_block_map( * Unmap any mapped-in segment of the file. */ int -__wt_block_unmap( - WT_SESSION_IMPL *session, WT_BLOCK *block, void *map, size_t maplen, - void **mappingcookie) +__wt_block_unmap(WT_SESSION_IMPL *session, + WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie) { + WT_FILE_HANDLE *handle; + /* Unmap the file from memory. */ - return (block->fh->fh_map_unmap( - session, block->fh, map, maplen, mappingcookie)); + handle = block->fh->handle; + return (handle->fh_unmap(handle, + (WT_SESSION *)session, mapped_region, length, mapped_cookie)); } diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c index 06150a0f062..971fe713f83 100644 --- a/src/third_party/wiredtiger/src/block/block_mgr.c +++ b/src/third_party/wiredtiger/src/block/block_mgr.c @@ -103,7 +103,7 @@ __bm_checkpoint_load(WT_BM *bm, WT_SESSION_IMPL *session, * of being read into cache buffers. */ WT_RET(__wt_block_map(session, - bm->block, &bm->map, &bm->maplen, &bm->mappingcookie)); + bm->block, &bm->map, &bm->maplen, &bm->mapped_cookie)); /* * If this handle is for a checkpoint, that is, read-only, there @@ -149,7 +149,7 @@ __bm_checkpoint_unload(WT_BM *bm, WT_SESSION_IMPL *session) /* Unmap any mapped segment. */ if (bm->map != NULL) WT_TRET(__wt_block_unmap(session, - bm->block, bm->map, bm->maplen, &bm->mappingcookie)); + bm->block, bm->map, bm->maplen, &bm->mapped_cookie)); /* Unload the checkpoint. */ WT_TRET(__wt_block_checkpoint_unload(session, bm->block, !bm->is_live)); @@ -302,6 +302,20 @@ __bm_is_mapped(WT_BM *bm, WT_SESSION_IMPL *session) } /* + * __bm_map_discard -- + * Discard a mapped segment. + */ +static int +__bm_map_discard(WT_BM *bm, WT_SESSION_IMPL *session, void *map, size_t len) +{ + WT_FILE_HANDLE *handle; + + handle = bm->block->fh->handle; + return (handle->fh_map_discard( + handle, (WT_SESSION *)session, map, len, bm->mapped_cookie)); +} + +/* * __bm_salvage_end -- * End a block manager salvage. */ @@ -413,19 +427,7 @@ __bm_stat(WT_BM *bm, WT_SESSION_IMPL *session, WT_DSRC_STATS *stats) static int __bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool block) { - WT_DECL_RET; - - if (!block && !bm->block->nowait_sync_available) - return (0); - - if ((ret = __wt_fsync(session, bm->block->fh, block)) == 0) - return (0); - - /* Ignore ENOTSUP, but don't try again. */ - if (ret != ENOTSUP) - return (ret); - bm->block->nowait_sync_available = false; - return (0); + return (__wt_fsync(session, bm->block->fh, block)); } /* @@ -544,6 +546,7 @@ __bm_method_set(WT_BM *bm, bool readonly) bm->compact_start = __bm_compact_start; bm->free = __bm_free; bm->is_mapped = __bm_is_mapped; + bm->map_discard = __bm_map_discard; bm->preload = __wt_bm_preload; bm->read = __wt_bm_read; bm->salvage_end = __bm_salvage_end; diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c index f4da5ca7c05..1603b1574e7 100644 --- a/src/third_party/wiredtiger/src/block/block_open.c +++ b/src/third_party/wiredtiger/src/block/block_open.c @@ -33,7 +33,6 @@ __wt_block_manager_create( WT_FH *fh; int suffix; bool exists; - char *path; /* * Create the underlying file and open a handle. @@ -44,7 +43,7 @@ __wt_block_manager_create( * in our space. Move any existing files out of the way and complain. */ for (;;) { - if ((ret = __wt_open(session, filename, WT_FILE_TYPE_DATA, + if ((ret = __wt_open(session, filename, WT_OPEN_FILE_TYPE_DATA, WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &fh)) == 0) break; WT_ERR_TEST(ret != EEXIST, ret); @@ -54,13 +53,13 @@ __wt_block_manager_create( for (suffix = 1;; ++suffix) { WT_ERR(__wt_buf_fmt( session, tmp, "%s.%d", filename, suffix)); - WT_ERR(__wt_exist(session, tmp->data, &exists)); + WT_ERR(__wt_fs_exist(session, tmp->data, &exists)); if (!exists) { - WT_ERR( - __wt_rename(session, filename, tmp->data)); + WT_ERR(__wt_fs_rename( + session, filename, tmp->data)); WT_ERR(__wt_msg(session, "unexpected file %s found, renamed to %s", - filename, (char *)tmp->data)); + filename, (const char *)tmp->data)); break; } } @@ -82,14 +81,12 @@ __wt_block_manager_create( * Some filesystems require that we sync the directory to be confident * that the file will appear. */ - if (ret == 0 && (ret = __wt_filename(session, filename, &path)) == 0) { - ret = __wt_directory_sync(session, path); - __wt_free(session, path); - } + if (ret == 0) + WT_TRET(__wt_fs_directory_sync(session, filename)); /* Undo any create on error. */ if (ret != 0) - WT_TRET(__wt_remove(session, filename)); + WT_TRET(__wt_fs_remove(session, filename)); err: __wt_scr_free(session, &tmp); @@ -156,8 +153,7 @@ __wt_block_open(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *conn; WT_DECL_RET; uint64_t bucket, hash; - - WT_UNUSED(readonly); + uint32_t flags; WT_RET(__wt_verbose(session, WT_VERB_BLOCK, "open: %s", filename)); @@ -204,12 +200,18 @@ __wt_block_open(WT_SESSION_IMPL *session, /* Set the file extension information. */ block->extend_len = conn->data_extend_len; - /* Set the asynchronous flush, preload availability. */ - block->nowait_sync_available = true; - block->preload_available = true; - - /* Open the underlying file handle. */ - WT_ERR(__wt_open(session, filename, WT_FILE_TYPE_DATA, 0, &block->fh)); + /* + * Open the underlying file handle. + * + * "direct_io=checkpoint" configures direct I/O for readonly data files. + */ + flags = 0; + if (readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_CHECKPOINT)) + LF_SET(WT_OPEN_DIRECTIO); + if (!readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_DATA)) + LF_SET(WT_OPEN_DIRECTIO); + WT_ERR(__wt_open( + session, filename, WT_OPEN_FILE_TYPE_DATA, flags, &block->fh)); /* Set the file's size. */ WT_ERR(__wt_filesize(session, block->fh, &block->size)); @@ -422,5 +424,5 @@ int __wt_block_manager_named_size( WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) { - return (__wt_filesize_name(session, name, false, sizep)); + return (__wt_fs_size(session, name, sizep)); } diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c index 6f0c41c1b5c..97157e4a0f1 100644 --- a/src/third_party/wiredtiger/src/block/block_read.c +++ b/src/third_party/wiredtiger/src/block/block_read.c @@ -19,44 +19,32 @@ __wt_bm_preload( WT_BLOCK *block; WT_DECL_ITEM(tmp); WT_DECL_RET; + WT_FILE_HANDLE *handle; wt_off_t offset; uint32_t cksum, size; bool mapped; WT_UNUSED(addr_size); + block = bm->block; WT_STAT_FAST_CONN_INCR(session, block_preload); - /* Preload the block. */ - if (block->preload_available) { - /* Crack the cookie. */ - WT_RET(__wt_block_buffer_to_addr( - block, addr, &offset, &size, &cksum)); - - mapped = bm->map != NULL && - offset + size <= (wt_off_t)bm->maplen; - if (mapped) - ret = block->fh->fh_map_preload(session, - block->fh, (uint8_t *)bm->map + offset, size); - else - ret = block->fh->fh_advise(session, - block->fh, (wt_off_t)offset, - (wt_off_t)size, POSIX_FADV_WILLNEED); - if (ret == 0) - return (0); - - /* Ignore ENOTSUP, but don't try again. */ - if (ret != ENOTSUP) - return (ret); - block->preload_available = false; - } + /* Crack the cookie. */ + WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum)); - /* - * If preload isn't supported, do it the slow way; don't call the - * underlying read routine directly, we don't know for certain if - * this is a mapped range. - */ + handle = block->fh->handle; + mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen; + if (mapped && handle->fh_map_preload != NULL) + ret = handle->fh_map_preload(handle, (WT_SESSION *)session, + (uint8_t *)bm->map + offset, size, bm->mapped_cookie); + if (!mapped && handle->fh_advise != NULL) + ret = handle->fh_advise(handle, (WT_SESSION *)session, + (wt_off_t)offset, (wt_off_t)size, WT_FILE_HANDLE_WILLNEED); + if (ret != EBUSY && ret != ENOTSUP) + return (ret); + + /* If preload isn't supported, do it the slow way. */ WT_RET(__wt_scr_alloc(session, 0, &tmp)); ret = __wt_bm_read(bm, session, tmp, addr, addr_size); __wt_scr_free(session, &tmp); @@ -74,6 +62,7 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, { WT_BLOCK *block; WT_DECL_RET; + WT_FILE_HANDLE *handle; wt_off_t offset; uint32_t cksum, size; bool mapped; @@ -87,23 +76,17 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, /* * Map the block if it's possible. */ + handle = block->fh->handle; mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen; - if (mapped) { + if (mapped && handle->fh_map_preload != NULL) { buf->data = (uint8_t *)bm->map + offset; buf->size = size; - if (block->preload_available) { - ret = block->fh->fh_map_preload( - session, block->fh, buf->data, buf->size); - - /* Ignore ENOTSUP, but don't try again. */ - if (ret != ENOTSUP) - return (ret); - block->preload_available = false; - } + ret = handle->fh_map_preload(handle, (WT_SESSION *)session, + buf->data, buf->size,bm->mapped_cookie); WT_STAT_FAST_CONN_INCR(session, block_map_read); WT_STAT_FAST_CONN_INCRV(session, block_byte_map_read, size); - return (0); + return (ret); } #ifdef HAVE_DIAGNOSTIC diff --git a/src/third_party/wiredtiger/src/block/block_vrfy.c b/src/third_party/wiredtiger/src/block/block_vrfy.c index 6570184ca10..af58864b9dc 100644 --- a/src/third_party/wiredtiger/src/block/block_vrfy.c +++ b/src/third_party/wiredtiger/src/block/block_vrfy.c @@ -15,13 +15,15 @@ static int __verify_filefrag_add( WT_SESSION_IMPL *, WT_BLOCK *, const char *, wt_off_t, wt_off_t, bool); static int __verify_filefrag_chk(WT_SESSION_IMPL *, WT_BLOCK *); static int __verify_last_avail(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *); -static int __verify_last_truncate(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *); +static int __verify_set_file_size(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *); /* The bit list ignores the first block: convert to/from a frag/offset. */ #define WT_wt_off_TO_FRAG(block, off) \ ((off) / (block)->allocsize - 1) +#ifdef HAVE_VERBOSE #define WT_FRAG_TO_OFF(block, frag) \ (((wt_off_t)(frag + 1)) * (block)->allocsize) +#endif /* * __wt_block_verify_start -- @@ -35,6 +37,14 @@ __wt_block_verify_start(WT_SESSION_IMPL *session, WT_CONFIG_ITEM cval; wt_off_t size; + /* Configuration: strict behavior on any error. */ + WT_RET(__wt_config_gets(session, cfg, "strict", &cval)); + block->verify_strict = cval.val != 0; + + /* Configuration: dump the file's layout. */ + WT_RET(__wt_config_gets(session, cfg, "dump_layout", &cval)); + block->verify_layout = cval.val != 0; + /* * Find the last checkpoint in the list: if there are none, or the only * checkpoint we have is fake, there's no work to do. Don't complain, @@ -49,8 +59,8 @@ __wt_block_verify_start(WT_SESSION_IMPL *session, return (0); } - /* Truncate the file to the size of the last checkpoint. */ - WT_RET(__verify_last_truncate(session, block, ckpt)); + /* Set the size of the file to the size of the last checkpoint. */ + WT_RET(__verify_set_file_size(session, block, ckpt)); /* * We're done if the file has no data pages (this happens if we verify @@ -105,9 +115,6 @@ __wt_block_verify_start(WT_SESSION_IMPL *session, */ WT_RET(__verify_last_avail(session, block, ckpt)); - /* Configuration: strict behavior on any error. */ - WT_RET(__wt_config_gets(session, cfg, "strict", &cval)); - block->verify_strict = cval.val != 0; return (0); } @@ -144,21 +151,40 @@ err: __wt_block_ckpt_destroy(session, ci); } /* - * __verify_last_truncate -- - * Truncate the file to the last checkpoint's size. + * __verify_set_file_size -- + * Set the file size to the last checkpoint's size. */ static int -__verify_last_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt) +__verify_set_file_size(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt) { WT_BLOCK_CKPT *ci, _ci; WT_DECL_RET; + WT_DECL_ITEM(tmp); ci = &_ci; WT_RET(__wt_block_ckpt_init(session, ci, ckpt->name)); WT_ERR(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci)); - WT_ERR_BUSY_OK(__wt_block_truncate(session, block, ci->file_size)); + + if (block->verify_layout) { + WT_ERR(__wt_scr_alloc(session, 0, &tmp)); + WT_ERR(__wt_msg(session, "%s: physical size %s", block->name, + __wt_buf_set_size( + session, (uint64_t)block->size, true, tmp))); + WT_ERR( + __wt_msg(session, "%s: correcting to %s checkpoint size %s", + block->name, ckpt->name, __wt_buf_set_size( + session, (uint64_t)ci->file_size, true, tmp))); + } + + /* + * Verify is read-only. Set the block's file size information as if we + * truncated the file during checkpoint load, so references to blocks + * after last checkpoint's file size fail. + */ + block->size = block->extend_size = ci->file_size; err: __wt_block_ckpt_destroy(session, ci); + __wt_scr_free(session, &tmp); return (ret); } @@ -247,9 +273,9 @@ __wt_verify_ckpt_load( } /* - * We don't need to list of blocks on a checkpoint's avail list, but we - * read it to ensure it wasn't corrupted. We could confirm correctness - * of intermediate avail lists (that is, if they're logically the result + * We don't need the blocks on a checkpoint's avail list, but we read it + * to ensure it wasn't corrupted. We could confirm correctness of the + * intermediate avail lists (that is, if they're logically the result * of the allocations and discards to this point). We don't because the * only avail list ever used is the one for the last checkpoint, which * is separately verified by checking it against all of the blocks found @@ -437,6 +463,7 @@ __verify_filefrag_chk(WT_SESSION_IMPL *session, WT_BLOCK *block) __bit_set(block->fragfile, last); } +#ifdef HAVE_VERBOSE if (!WT_VERBOSE_ISSET(session, WT_VERB_VERIFY)) continue; @@ -444,6 +471,7 @@ __verify_filefrag_chk(WT_SESSION_IMPL *session, WT_BLOCK *block) "file range %" PRIuMAX "-%" PRIuMAX " never verified", (uintmax_t)WT_FRAG_TO_OFF(block, first), (uintmax_t)WT_FRAG_TO_OFF(block, last)); +#endif } if (count == 0) return (0); @@ -528,6 +556,7 @@ __verify_ckptfrag_chk(WT_SESSION_IMPL *session, WT_BLOCK *block) __bit_clear(block->fragckpt, last); } +#ifdef HAVE_VERBOSE if (!WT_VERBOSE_ISSET(session, WT_VERB_VERIFY)) continue; @@ -535,6 +564,7 @@ __verify_ckptfrag_chk(WT_SESSION_IMPL *session, WT_BLOCK *block) "checkpoint range %" PRIuMAX "-%" PRIuMAX " never verified", (uintmax_t)WT_FRAG_TO_OFF(block, first), (uintmax_t)WT_FRAG_TO_OFF(block, last)); +#endif } if (count == 0) diff --git a/src/third_party/wiredtiger/src/block/block_write.c b/src/third_party/wiredtiger/src/block/block_write.c index 134272b52f9..1fefeee09da 100644 --- a/src/third_party/wiredtiger/src/block/block_write.c +++ b/src/third_party/wiredtiger/src/block/block_write.c @@ -15,6 +15,24 @@ int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) { + /* + * Backups are done by copying files outside of WiredTiger, potentially + * by system utilities. We cannot truncate the file during the backup + * window, we might surprise an application. + * + * Stop block truncation. This affects files that aren't involved in the + * backup (for example, doing incremental backups, which only copies log + * files, or targeted backups, stops all truncation). We may want a more + * targeted solution at some point. + */ + if (S2C(session)->hot_backup) + return (EBUSY); + + /* + * Additionally, the truncate might fail if there's a file mapping (if + * there's an open checkpoint on the file), in which case the underlying + * function returns EBUSY. + */ WT_RET(__wt_ftruncate(session, block->fh, len)); block->size = block->extend_size = len; @@ -30,27 +48,28 @@ int __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size) { WT_DECL_RET; + WT_FILE_HANDLE *handle; + /* The file may not support this call. */ + handle = block->fh->handle; + if (handle->fh_advise == NULL) + return (0); + + /* The call may not be configured. */ if (block->os_cache_max == 0) return (0); /* * We're racing on the addition, but I'm not willing to serialize on it - * in the standard read path with more evidence it's needed. + * in the standard read path without evidence it's needed. */ if ((block->os_cache += added_size) <= block->os_cache_max) return (0); block->os_cache = 0; - WT_ERR(block->fh->fh_advise(session, - block->fh, (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)); - return (0); - -err: /* Ignore ENOTSUP, but don't try again. */ - if (ret != ENOTSUP) - return (ret); - block->os_cache_max = 0; - return (0); + ret = handle->fh_advise(handle, (WT_SESSION *)session, + (wt_off_t)0, (wt_off_t)0, WT_FILE_HANDLE_DONTNEED); + return (ret == EBUSY || ret == ENOTSUP ? 0 : ret); } /* @@ -62,6 +81,7 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_FH *fh, wt_off_t offset, size_t align_size, bool *release_lockp) { WT_DECL_RET; + WT_FILE_HANDLE *handle; bool locked; /* @@ -107,7 +127,9 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, * based on the filesystem type, fall back to ftruncate in that case, * and remember that ftruncate requires locking. */ - if (fh->fallocate_available != WT_FALLOCATE_NOT_AVAILABLE) { + handle = fh->handle; + if (handle->fh_allocate != NULL || + handle->fh_allocate_nolock != NULL) { /* * Release any locally acquired lock if not needed to extend the * file, extending the file may require updating on-disk file's @@ -115,7 +137,7 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, * configure for file extension on systems that require locking * over the extend call.) */ - if (!fh->fallocate_requires_locking && *release_lockp) { + if (handle->fh_allocate_nolock != NULL && *release_lockp) { *release_lockp = locked = false; __wt_spin_unlock(session, &block->live_lock); } @@ -131,8 +153,7 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, if ((ret = __wt_fallocate( session, fh, block->size, block->extend_len * 2)) == 0) return (0); - if (ret != ENOTSUP) - return (ret); + WT_RET_ERROR_OK(ret, ENOTSUP); } /* @@ -155,9 +176,8 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, * The truncate might fail if there's a mapped file (in other words, if * there's an open checkpoint on the file), that's OK. */ - if ((ret = __wt_ftruncate(session, fh, block->extend_size)) == EBUSY) - ret = 0; - return (ret); + WT_RET_BUSY_OK(__wt_ftruncate(session, fh, block->extend_size)); + return (0); } /* diff --git a/src/third_party/wiredtiger/src/bloom/bloom.c b/src/third_party/wiredtiger/src/bloom/bloom.c index 505630f12cf..e32544d5521 100644 --- a/src/third_party/wiredtiger/src/bloom/bloom.c +++ b/src/third_party/wiredtiger/src/bloom/bloom.c @@ -295,7 +295,7 @@ __wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash) err: /* Don't return WT_NOTFOUND from a failed search. */ if (ret == WT_NOTFOUND) ret = WT_ERROR; - __wt_err(bloom->session, ret, "Failed lookup in bloom filter."); + __wt_err(bloom->session, ret, "Failed lookup in bloom filter"); return (ret); } diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c index 63b2e2abebc..70b3ba56e31 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curnext.c +++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c @@ -86,10 +86,10 @@ __cursor_fix_next(WT_CURSOR_BTREE *cbt, bool newpage) /* Initialize for each new page. */ if (newpage) { - cbt->last_standard_recno = __col_fix_last_recno(page); + cbt->last_standard_recno = __col_fix_last_recno(cbt->ref); if (cbt->last_standard_recno == 0) return (WT_NOTFOUND); - __cursor_set_recno(cbt, page->pg_fix_recno); + __cursor_set_recno(cbt, cbt->ref->ref_recno); goto new_page; } @@ -107,7 +107,7 @@ new_page: cbt->ins = NULL; upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd); if (upd == NULL) { - cbt->v = __bit_getv_recno(page, cbt->recno, btree->bitcnt); + cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt); val->data = &cbt->v; } else val->data = WT_UPDATE_DATA(upd); @@ -179,10 +179,10 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage) /* Initialize for each new page. */ if (newpage) { - cbt->last_standard_recno = __col_var_last_recno(page); + cbt->last_standard_recno = __col_var_last_recno(cbt->ref); if (cbt->last_standard_recno == 0) return (WT_NOTFOUND); - __cursor_set_recno(cbt, page->pg_var_recno); + __cursor_set_recno(cbt, cbt->ref->ref_recno); goto new_page; } @@ -194,7 +194,7 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage) new_page: /* Find the matching WT_COL slot. */ if ((cip = - __col_var_search(page, cbt->recno, &rle_start)) == NULL) + __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL) return (WT_NOTFOUND); cbt->slot = WT_COL_SLOT(page, cip); @@ -558,7 +558,8 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt) * page. */ cbt->last_standard_recno = page->type == WT_PAGE_COL_VAR ? - __col_var_last_recno(page) : __col_fix_last_recno(page); + __col_var_last_recno(cbt->ref) : + __col_fix_last_recno(cbt->ref); /* If we're traversing the append list, set the reference. */ if (cbt->ins_head != NULL && diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c index 7475c0f1312..872f648446c 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curprev.c +++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c @@ -128,12 +128,10 @@ static inline int __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage) { WT_ITEM *val; - WT_PAGE *page; WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; - page = cbt->ref->page; val = &cbt->iface.value; if (newpage) { @@ -176,8 +174,8 @@ __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage) * to a record number matching the first record on the page. */ if (cbt->ins == NULL && - (cbt->recno == page->pg_fix_recno || - __col_fix_last_recno(page) != 0)) + (cbt->recno == cbt->ref->ref_recno || + __col_fix_last_recno(cbt->ref) != 0)) return (WT_NOTFOUND); } @@ -234,7 +232,7 @@ __cursor_fix_prev(WT_CURSOR_BTREE *cbt, bool newpage) /* Initialize for each new page. */ if (newpage) { - cbt->last_standard_recno = __col_fix_last_recno(page); + cbt->last_standard_recno = __col_fix_last_recno(cbt->ref); if (cbt->last_standard_recno == 0) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->last_standard_recno); @@ -242,7 +240,7 @@ __cursor_fix_prev(WT_CURSOR_BTREE *cbt, bool newpage) } /* Move to the previous entry and return the item. */ - if (cbt->recno == page->pg_fix_recno) + if (cbt->recno == cbt->ref->ref_recno) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->recno - 1); @@ -255,7 +253,7 @@ new_page: cbt->ins = NULL; upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd); if (upd == NULL) { - cbt->v = __bit_getv_recno(page, cbt->recno, btree->bitcnt); + cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt); val->data = &cbt->v; } else val->data = WT_UPDATE_DATA(upd); @@ -327,7 +325,7 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage) /* Initialize for each new page. */ if (newpage) { - cbt->last_standard_recno = __col_var_last_recno(page); + cbt->last_standard_recno = __col_var_last_recno(cbt->ref); if (cbt->last_standard_recno == 0) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->last_standard_recno); @@ -338,12 +336,12 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage) for (;;) { __cursor_set_recno(cbt, cbt->recno - 1); -new_page: if (cbt->recno < page->pg_var_recno) +new_page: if (cbt->recno < cbt->ref->ref_recno) return (WT_NOTFOUND); /* Find the matching WT_COL slot. */ if ((cip = - __col_var_search(page, cbt->recno, &rle_start)) == NULL) + __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL) return (WT_NOTFOUND); cbt->slot = WT_COL_SLOT(page, cip); diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index 018eb6ed73a..9a57759570a 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -164,12 +164,12 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) * column-store pages don't have slots, but map one-to-one to * keys, check for retrieval past the end of the page. */ - if (cbt->recno >= page->pg_fix_recno + page->pg_fix_entries) + if (cbt->recno >= cbt->ref->ref_recno + page->pg_fix_entries) return (false); /* - * Updates aren't stored on the page, an update would have - * appeared as an "insert" object; no further checks to do. + * An update would have appeared as an "insert" object; no + * further checks to do. */ break; case BTREE_COL_VAR: @@ -179,19 +179,18 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) WT_ASSERT(session, cbt->slot < page->pg_var_entries); /* - * Column-store updates aren't stored on the page, instead they - * are stored as "insert" objects. If search returned an insert - * object we can't return, the returned on-page object must be - * checked for a match. + * Column-store updates are stored as "insert" objects. If + * search returned an insert object we can't return, the + * returned on-page object must be checked for a match. */ if (cbt->ins != NULL && !F_ISSET(cbt, WT_CBT_VAR_ONPAGE_MATCH)) return (false); /* - * Updates aren't stored on the page, an update would have - * appeared as an "insert" object; however, variable-length - * column store deletes are written into the backing store, - * check the cell for a record already deleted when read. + * Although updates would have appeared as an "insert" objects, + * variable-length column store deletes are written into the + * backing store; check the cell for a record already deleted + * when read. */ cip = &page->pg_var_d[cbt->slot]; if ((cell = WT_COL_PTR(page, cip)) == NULL || @@ -211,9 +210,11 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) if (cbt->ins != NULL) return (false); - /* Updates are stored on the page, check for a delete. */ - if (page->pg_row_upd != NULL && (upd = __wt_txn_read( - session, page->pg_row_upd[cbt->slot])) != NULL) { + /* Check for an update. */ + if (page->modify != NULL && + page->modify->mod_row_update != NULL && + (upd = __wt_txn_read(session, + page->modify->mod_row_update[cbt->slot])) != NULL) { if (WT_UPDATE_DELETED_ISSET(upd)) return (false); if (updp != NULL) @@ -558,7 +559,6 @@ retry: WT_RET(__cursor_func_init(cbt, true)); ret = __cursor_row_modify(session, cbt, false); break; - WT_ILLEGAL_VALUE_ERR(session); } err: if (ret == WT_RESTART) { @@ -596,9 +596,12 @@ __curfile_update_check(WT_CURSOR_BTREE *cbt) return (0); if (cbt->ins != NULL) return (__wt_txn_update_check(session, cbt->ins->upd)); - if (btree->type == BTREE_ROW && cbt->ref->page->pg_row_upd != NULL) - return (__wt_txn_update_check( - session, cbt->ref->page->pg_row_upd[cbt->slot])); + + if (btree->type == BTREE_ROW && + cbt->ref->page->modify != NULL && + cbt->ref->page->modify->mod_row_update != NULL) + return (__wt_txn_update_check(session, + cbt->ref->page->modify->mod_row_update[cbt->slot])); return (0); } @@ -636,7 +639,8 @@ retry: WT_RET(__cursor_func_init(cbt, true)); break; case BTREE_COL_FIX: case BTREE_COL_VAR: - WT_ILLEGAL_VALUE_ERR(session); + WT_ERR(__wt_illegal_value(session, NULL)); + break; } err: if (ret == WT_RESTART) { @@ -714,7 +718,6 @@ retry: WT_RET(__cursor_func_init(cbt, true)); ret = __cursor_row_modify(session, cbt, true); break; - WT_ILLEGAL_VALUE_ERR(session); } err: if (ret == WT_RESTART) { @@ -805,7 +808,6 @@ retry: WT_RET(__cursor_func_init(cbt, true)); } ret = __cursor_row_modify(session, cbt, false); break; - WT_ILLEGAL_VALUE_ERR(session); } err: if (ret == WT_RESTART) { @@ -972,7 +974,6 @@ __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) WT_RET(__wt_compare( session, a_arg->btree->collator, &a->key, &b->key, cmpp)); break; - WT_ILLEGAL_VALUE(session); } return (0); } @@ -1023,6 +1024,7 @@ __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) a = (WT_CURSOR *)a_arg; b = (WT_CURSOR *)b_arg; + cmp = 0; session = (WT_SESSION_IMPL *)a->session; /* Confirm both cursors reference the same object. */ @@ -1110,7 +1112,7 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session, int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, bool)) { WT_DECL_RET; - uint8_t *value; + const uint8_t *value; /* * Handle fixed-length column-store objects separately: for row-store @@ -1139,7 +1141,7 @@ retry: WT_RET(__wt_btcur_remove(start)); if ((ret = __wt_btcur_next(start, true)) != 0) break; start->compare = 0; /* Exact match */ - value = (uint8_t *)start->iface.value.data; + value = (const uint8_t *)start->iface.value.data; if (*value != 0 && (ret = rmfunc(session, start, 1)) != 0) break; diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index 8ce1463a0db..b1579d25dc6 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -19,7 +19,7 @@ typedef struct { * When using the standard event handlers, the debugging output has to * do its own message handling because its output isn't line-oriented. */ - WT_FH *fh; /* Output file stream */ + FILE *fp; WT_ITEM *msg; /* Buffered message */ WT_ITEM *tmp; /* Temporary space */ @@ -36,17 +36,17 @@ static int __debug_config(WT_SESSION_IMPL *, WT_DBG *, const char *); static int __debug_dsk_cell(WT_DBG *, const WT_PAGE_HEADER *); static void __debug_dsk_col_fix(WT_DBG *, const WT_PAGE_HEADER *); static void __debug_item(WT_DBG *, const char *, const void *, size_t); -static int __debug_page(WT_DBG *, WT_PAGE *, uint32_t); -static void __debug_page_col_fix(WT_DBG *, WT_PAGE *); +static int __debug_page(WT_DBG *, WT_REF *, uint32_t); +static void __debug_page_col_fix(WT_DBG *, WT_REF *); static int __debug_page_col_int(WT_DBG *, WT_PAGE *, uint32_t); -static int __debug_page_col_var(WT_DBG *, WT_PAGE *); -static int __debug_page_metadata(WT_DBG *, WT_PAGE *); +static int __debug_page_col_var(WT_DBG *, WT_REF *); +static int __debug_page_metadata(WT_DBG *, WT_REF *); static int __debug_page_row_int(WT_DBG *, WT_PAGE *, uint32_t); static int __debug_page_row_leaf(WT_DBG *, WT_PAGE *); static void __debug_ref(WT_DBG *, WT_REF *); static void __debug_row_skip(WT_DBG *, WT_INSERT_HEAD *); static int __debug_tree( - WT_SESSION_IMPL *, WT_BTREE *, WT_PAGE *, const char *, uint32_t); + WT_SESSION_IMPL *, WT_BTREE *, WT_REF *, const char *, uint32_t); static void __debug_update(WT_DBG *, WT_UPDATE *, bool); static void __dmsg(WT_DBG *, const char *, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3))); @@ -97,8 +97,11 @@ __debug_config(WT_SESSION_IMPL *session, WT_DBG *ds, const char *ofile) if (ofile == NULL) return (__wt_scr_alloc(session, 512, &ds->msg)); - return (__wt_open(session, ofile, WT_FILE_TYPE_REGULAR, - WT_OPEN_CREATE | WT_STREAM_LINE_BUFFER | WT_STREAM_WRITE, &ds->fh)); + if ((ds->fp = fopen(ofile, "w")) == NULL) + return (EIO); + __wt_stream_set_line_buffer(ds->fp); + + return (0); } /* @@ -127,7 +130,8 @@ __dmsg_wrapup(WT_DBG *ds) } /* Close any file we opened. */ - (void)__wt_close(session, &ds->fh); + if (ds->fp != NULL) + (void)fclose(ds->fp); } /* @@ -152,7 +156,7 @@ __dmsg(WT_DBG *ds, const char *fmt, ...) * the output chunk, and pass it to the event handler once we see a * terminating newline. */ - if (ds->fh == NULL) { + if (ds->fp == NULL) { msg = ds->msg; for (;;) { p = (char *)msg->mem + msg->size; @@ -184,7 +188,7 @@ __dmsg(WT_DBG *ds, const char *fmt, ...) } } else { va_start(ap, fmt); - (void)__wt_vfprintf(session, ds->fh, fmt, ap); + (void)vfprintf(ds->fp, fmt, ap); va_end(ap); } } @@ -427,12 +431,12 @@ __debug_tree_shape_info(WT_PAGE *page) v = page->memory_footprint; if (v >= WT_GIGABYTE) snprintf(buf, sizeof(buf), - "(%p %" PRIu64 "G)", page, v / WT_GIGABYTE); + "(%p %" PRIu64 "G)", (void *)page, v / WT_GIGABYTE); else if (v >= WT_MEGABYTE) snprintf(buf, sizeof(buf), - "(%p %" PRIu64 "M)", page, v / WT_MEGABYTE); + "(%p %" PRIu64 "M)", (void *)page, v / WT_MEGABYTE); else - snprintf(buf, sizeof(buf), "(%p %" PRIu64 ")", page, v); + snprintf(buf, sizeof(buf), "(%p %" PRIu64 ")", (void *)page, v); return (buf); } @@ -498,10 +502,10 @@ __wt_debug_tree_shape( */ int __wt_debug_tree_all( - WT_SESSION_IMPL *session, WT_BTREE *btree, WT_PAGE *page, const char *ofile) + WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile) { return (__debug_tree(session, - btree, page, ofile, WT_DEBUG_TREE_LEAF | WT_DEBUG_TREE_WALK)); + btree, ref, ofile, WT_DEBUG_TREE_LEAF | WT_DEBUG_TREE_WALK)); } /* @@ -513,9 +517,9 @@ __wt_debug_tree_all( */ int __wt_debug_tree( - WT_SESSION_IMPL *session, WT_BTREE *btree, WT_PAGE *page, const char *ofile) + WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile) { - return (__debug_tree(session, btree, page, ofile, WT_DEBUG_TREE_WALK)); + return (__debug_tree(session, btree, ref, ofile, WT_DEBUG_TREE_WALK)); } /* @@ -523,7 +527,7 @@ __wt_debug_tree( * Dump the in-memory information for a page. */ int -__wt_debug_page(WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile) +__wt_debug_page(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile) { WT_DBG *ds, _ds; WT_DECL_RET; @@ -533,7 +537,7 @@ __wt_debug_page(WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile) ds = &_ds; WT_RET(__debug_config(session, ds, ofile)); - ret = __debug_page(ds, page, WT_DEBUG_TREE_LEAF); + ret = __debug_page(ds, ref, WT_DEBUG_TREE_LEAF); __dmsg_wrapup(ds); @@ -549,9 +553,8 @@ __wt_debug_page(WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile) * in this function */ static int -__debug_tree( - WT_SESSION_IMPL *session, WT_BTREE *btree, - WT_PAGE *page, const char *ofile, uint32_t flags) +__debug_tree(WT_SESSION_IMPL *session, + WT_BTREE *btree, WT_REF *ref, const char *ofile, uint32_t flags) { WT_DBG *ds, _ds; WT_DECL_RET; @@ -560,10 +563,10 @@ __debug_tree( WT_RET(__debug_config(session, ds, ofile)); /* A NULL page starts at the top of the tree -- it's a convenience. */ - if (page == NULL) - page = btree->root.page; + if (ref == NULL) + ref = &btree->root; - WT_WITH_BTREE(session, btree, ret = __debug_page(ds, page, flags)); + WT_WITH_BTREE(session, btree, ret = __debug_page(ds, ref, flags)); __dmsg_wrapup(ds); @@ -575,7 +578,7 @@ __debug_tree( * Dump the in-memory information for an in-memory page. */ static int -__debug_page(WT_DBG *ds, WT_PAGE *page, uint32_t flags) +__debug_page(WT_DBG *ds, WT_REF *ref, uint32_t flags) { WT_DECL_RET; WT_SESSION_IMPL *session; @@ -583,32 +586,32 @@ __debug_page(WT_DBG *ds, WT_PAGE *page, uint32_t flags) session = ds->session; /* Dump the page metadata. */ - WT_WITH_PAGE_INDEX(session, ret = __debug_page_metadata(ds, page)); + WT_WITH_PAGE_INDEX(session, ret = __debug_page_metadata(ds, ref)); WT_RET(ret); /* Dump the page. */ - switch (page->type) { + switch (ref->page->type) { case WT_PAGE_COL_FIX: if (LF_ISSET(WT_DEBUG_TREE_LEAF)) - __debug_page_col_fix(ds, page); + __debug_page_col_fix(ds, ref); break; case WT_PAGE_COL_INT: WT_WITH_PAGE_INDEX(session, - ret = __debug_page_col_int(ds, page, flags)); + ret = __debug_page_col_int(ds, ref->page, flags)); WT_RET(ret); break; case WT_PAGE_COL_VAR: if (LF_ISSET(WT_DEBUG_TREE_LEAF)) - WT_RET(__debug_page_col_var(ds, page)); + WT_RET(__debug_page_col_var(ds, ref)); break; case WT_PAGE_ROW_INT: WT_WITH_PAGE_INDEX(session, - ret = __debug_page_row_int(ds, page, flags)); + ret = __debug_page_row_int(ds, ref->page, flags)); WT_RET(ret); break; case WT_PAGE_ROW_LEAF: if (LF_ISSET(WT_DEBUG_TREE_LEAF)) - WT_RET(__debug_page_row_leaf(ds, page)); + WT_RET(__debug_page_row_leaf(ds, ref->page)); break; WT_ILLEGAL_VALUE(session); } @@ -621,30 +624,32 @@ __debug_page(WT_DBG *ds, WT_PAGE *page, uint32_t flags) * Dump an in-memory page's metadata. */ static int -__debug_page_metadata(WT_DBG *ds, WT_PAGE *page) +__debug_page_metadata(WT_DBG *ds, WT_REF *ref) { + WT_PAGE *page; WT_PAGE_INDEX *pindex; WT_PAGE_MODIFY *mod; WT_SESSION_IMPL *session; uint32_t entries; session = ds->session; + page = ref->page; mod = page->modify; - __dmsg(ds, "%p", page); + __dmsg(ds, "%p", (void *)page); switch (page->type) { case WT_PAGE_COL_INT: - __dmsg(ds, " recno %" PRIu64, page->pg_intl_recno); + __dmsg(ds, " recno %" PRIu64, ref->ref_recno); WT_INTL_INDEX_GET(session, page, pindex); entries = pindex->entries; break; case WT_PAGE_COL_FIX: - __dmsg(ds, " recno %" PRIu64, page->pg_fix_recno); + __dmsg(ds, " recno %" PRIu64, ref->ref_recno); entries = page->pg_fix_entries; break; case WT_PAGE_COL_VAR: - __dmsg(ds, " recno %" PRIu64, page->pg_var_recno); + __dmsg(ds, " recno %" PRIu64, ref->ref_recno); entries = page->pg_var_entries; break; case WT_PAGE_ROW_INT: @@ -658,7 +663,8 @@ __debug_page_metadata(WT_DBG *ds, WT_PAGE *page) } __dmsg(ds, ": %s\n", __wt_page_type_string(page->type)); - __dmsg(ds, "\t" "disk %p, entries %" PRIu32, page->dsk, entries); + __dmsg(ds, + "\t" "disk %p, entries %" PRIu32, (void *)page->dsk, entries); __dmsg(ds, ", %s", __wt_page_is_modified(page) ? "dirty" : "clean"); __dmsg(ds, ", %s", __wt_fair_islocked( session, &page->page_lock) ? "locked" : "unlocked"); @@ -707,10 +713,11 @@ __debug_page_metadata(WT_DBG *ds, WT_PAGE *page) * Dump an in-memory WT_PAGE_COL_FIX page. */ static void -__debug_page_col_fix(WT_DBG *ds, WT_PAGE *page) +__debug_page_col_fix(WT_DBG *ds, WT_REF *ref) { WT_BTREE *btree; WT_INSERT *ins; + WT_PAGE *page; const WT_PAGE_HEADER *dsk; WT_SESSION_IMPL *session; uint64_t recno; @@ -721,8 +728,9 @@ __debug_page_col_fix(WT_DBG *ds, WT_PAGE *page) session = ds->session; btree = S2BT(session); + page = ref->page; dsk = page->dsk; - recno = page->pg_fix_recno; + recno = ref->ref_recno; if (dsk != NULL) { ins = WT_SKIP_FIRST(WT_COL_UPDATE_SINGLE(page)); @@ -767,7 +775,7 @@ __debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags) session = ds->session; WT_INTL_FOREACH_BEGIN(session, page, ref) { - __dmsg(ds, "\trecno %" PRIu64 "\n", ref->key.recno); + __dmsg(ds, "\trecno %" PRIu64 "\n", ref->ref_recno); __debug_ref(ds, ref); } WT_INTL_FOREACH_END; @@ -775,7 +783,7 @@ __debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags) WT_INTL_FOREACH_BEGIN(session, page, ref) { if (ref->state == WT_REF_MEM) { __dmsg(ds, "\n"); - WT_RET(__debug_page(ds, ref->page, flags)); + WT_RET(__debug_page(ds, ref, flags)); } } WT_INTL_FOREACH_END; @@ -787,18 +795,20 @@ __debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags) * Dump an in-memory WT_PAGE_COL_VAR page. */ static int -__debug_page_col_var(WT_DBG *ds, WT_PAGE *page) +__debug_page_col_var(WT_DBG *ds, WT_REF *ref) { WT_CELL *cell; WT_CELL_UNPACK *unpack, _unpack; WT_COL *cip; WT_INSERT_HEAD *update; + WT_PAGE *page; uint64_t recno, rle; uint32_t i; char tag[64]; unpack = &_unpack; - recno = page->pg_var_recno; + page = ref->page; + recno = ref->ref_recno; WT_COL_FOREACH(page, cip, i) { if ((cell = WT_COL_PTR(page, cip)) == NULL) { @@ -849,7 +859,7 @@ __debug_page_row_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags) WT_INTL_FOREACH_BEGIN(session, page, ref) { if (ref->state == WT_REF_MEM) { __dmsg(ds, "\n"); - WT_RET(__debug_page(ds, ref->page, flags)); + WT_RET(__debug_page(ds, ref, flags)); } } WT_INTL_FOREACH_END; return (0); @@ -885,7 +895,7 @@ __debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page) /* Dump the page's K/V pairs. */ WT_ROW_FOREACH(page, rip, i) { - WT_RET(__wt_row_leaf_key(session, page, rip, key, false)); + WT_ERR(__wt_row_leaf_key(session, page, rip, key, false)); __debug_item(ds, "K", key->data, key->size); if ((cell = __wt_row_leaf_value_cell(page, rip, NULL)) == NULL) @@ -952,8 +962,7 @@ __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte) __dmsg(ds, "\tvalue {deleted}\n"); else if (hexbyte) { __dmsg(ds, "\t{"); - __debug_hex_byte(ds, - ((uint8_t *)WT_UPDATE_DATA(upd))[0]); + __debug_hex_byte(ds, *(uint8_t *)WT_UPDATE_DATA(upd)); __dmsg(ds, "}\n"); } else __debug_item(ds, @@ -982,10 +991,10 @@ __debug_ref(WT_DBG *ds, WT_REF *ref) __dmsg(ds, "deleted"); break; case WT_REF_LOCKED: - __dmsg(ds, "locked %p", ref->page); + __dmsg(ds, "locked %p", (void *)ref->page); break; case WT_REF_MEM: - __dmsg(ds, "memory %p", ref->page); + __dmsg(ds, "memory %p", (void *)ref->page); break; case WT_REF_READING: __dmsg(ds, "reading"); @@ -1147,14 +1156,14 @@ static void __debug_item(WT_DBG *ds, const char *tag, const void *data_arg, size_t size) { size_t i; - int ch; + u_char ch; const uint8_t *data; __dmsg(ds, "\t%s%s{", tag == NULL ? "" : tag, tag == NULL ? "" : " "); for (data = data_arg, i = 0; i < size; ++i, ++data) { ch = data[0]; - if (isprint(ch)) - __dmsg(ds, "%c", ch); + if (__wt_isprint(ch)) + __dmsg(ds, "%c", (int)ch); else __debug_hex_byte(ds, data[0]); } diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c index ba16dd204e8..54b7fedb31d 100644 --- a/src/third_party/wiredtiger/src/btree/bt_delete.c +++ b/src/third_party/wiredtiger/src/btree/bt_delete.c @@ -288,10 +288,9 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) * read-only or if the application never modifies the tree, we're not * able to do so.) */ - if (btree->modified) { - WT_RET(__wt_page_modify_init(session, page)); + WT_RET(__wt_page_modify_init(session, page)); + if (btree->modified) __wt_page_modify_set(session, page); - } /* * An operation is accessing a "deleted" page, and we're building an @@ -326,7 +325,7 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) /* Allocate the per-page update array. */ WT_ERR(__wt_calloc_def(session, page->pg_row_entries, &upd_array)); - page->pg_row_upd = upd_array; + page->modify->mod_row_update = upd_array; /* * Fill in the per-reference update array with references to update diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c index 1181d92609f..a00bb7dc2b5 100644 --- a/src/third_party/wiredtiger/src/btree/bt_discard.c +++ b/src/third_party/wiredtiger/src/btree/bt_discard.c @@ -40,7 +40,6 @@ __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) void __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) { - WT_FH *fh; WT_PAGE *page; WT_PAGE_HEADER *dsk; WT_PAGE_MODIFY *mod; @@ -83,7 +82,7 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) if (hp != NULL) __wt_errx(session, "discarded page has hazard pointer: (%p: %s, line %d)", - hp->page, hp->file, hp->line); + (void *)hp->page, hp->file, hp->line); WT_ASSERT(session, hp == NULL); } #endif @@ -134,10 +133,11 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) dsk = (WT_PAGE_HEADER *)page->dsk; if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) __wt_overwrite_and_free_len(session, dsk, dsk->mem_size); - if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED)) { - fh = S2BT(session)->bm->block->fh; - (void)fh->fh_map_discard(session, fh, dsk, dsk->mem_size); - } + + /* Discard any mapped image. */ + if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED)) + (void)S2BT(session)->bm->map_discard( + S2BT(session)->bm, session, dsk, (size_t)dsk->mem_size); __wt_overwrite_and_free(session, page); } @@ -194,16 +194,33 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page) __free_skip_list( session, WT_SKIP_FIRST(append), update_ignore); __wt_free(session, append); - __wt_free(session, mod->mod_append); + __wt_free(session, mod->mod_col_append); } /* Free the insert/update array. */ - if (mod->mod_update != NULL) - __free_skip_array(session, mod->mod_update, + if (mod->mod_col_update != NULL) + __free_skip_array(session, mod->mod_col_update, page->type == WT_PAGE_COL_FIX ? 1 : page->pg_var_entries, update_ignore); break; + case WT_PAGE_ROW_LEAF: + /* + * Free the insert array. + * + * Row-store tables have one additional slot in the insert array + * (the insert array has an extra slot to hold keys that sort + * before keys found on the original page). + */ + if (mod->mod_row_insert != NULL) + __free_skip_array(session, mod->mod_row_insert, + page->pg_row_entries + 1, update_ignore); + + /* Free the update array. */ + if (mod->mod_row_update != NULL) + __free_update(session, mod->mod_row_update, + page->pg_row_entries, update_ignore); + break; } /* Free the overflow on-page, reuse and transaction-cache skiplists. */ @@ -324,10 +341,6 @@ __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) WT_ROW *rip; uint32_t i; void *copy; - bool update_ignore; - - /* In some failed-split cases, we can't discard updates. */ - update_ignore = F_ISSET_ATOMIC(page, WT_PAGE_UPDATE_IGNORE); /* * Free the in-memory index array. @@ -342,22 +355,6 @@ __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) page, copy, &ikey, NULL, NULL, NULL); __wt_free(session, ikey); } - - /* - * Free the insert array. - * - * Row-store tables have one additional slot in the insert array (the - * insert array has an extra slot to hold keys that sort before keys - * found on the original page). - */ - if (page->pg_row_ins != NULL) - __free_skip_array(session, - page->pg_row_ins, page->pg_row_entries + 1, update_ignore); - - /* Free the update array. */ - if (page->pg_row_upd != NULL) - __free_update(session, - page->pg_row_upd, page->pg_row_entries, update_ignore); } /* diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index 02eea9c2f0c..c97e05d74a7 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -371,7 +371,7 @@ __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, bool is_recno) root_ref->page = root; root_ref->state = WT_REF_MEM; - root_ref->key.recno = is_recno ? 1 : WT_RECNO_OOB; + root_ref->ref_recno = is_recno ? 1 : WT_RECNO_OOB; root->pg_intl_parent_ref = root_ref; } @@ -495,7 +495,7 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation) case BTREE_COL_FIX: case BTREE_COL_VAR: WT_ERR(__wt_page_alloc( - session, WT_PAGE_COL_INT, 1, 1, true, &root)); + session, WT_PAGE_COL_INT, 1, true, &root)); root->pg_intl_parent_ref = &btree->root; pindex = WT_INTL_INDEX_GET_SAFE(root); @@ -504,11 +504,11 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation) ref->page = NULL; ref->addr = NULL; ref->state = WT_REF_DELETED; - ref->key.recno = 1; + ref->ref_recno = 1; break; case BTREE_ROW: WT_ERR(__wt_page_alloc( - session, WT_PAGE_ROW_INT, 0, 1, true, &root)); + session, WT_PAGE_ROW_INT, 1, true, &root)); root->pg_intl_parent_ref = &btree->root; pindex = WT_INTL_INDEX_GET_SAFE(root); @@ -519,12 +519,11 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation) ref->state = WT_REF_DELETED; WT_ERR(__wt_row_ikey_incr(session, root, 0, "", 1, ref)); break; - WT_ILLEGAL_VALUE_ERR(session); } /* Bulk loads require a leaf page for reconciliation: create it now. */ if (F_ISSET(btree, WT_BTREE_BULK)) { - WT_ERR(__wt_btree_new_leaf_page(session, 1, &leaf)); + WT_ERR(__wt_btree_new_leaf_page(session, &leaf)); ref->page = leaf; ref->state = WT_REF_MEM; WT_ERR(__wt_page_modify_init(session, leaf)); @@ -548,8 +547,7 @@ err: if (leaf != NULL) * Create an empty leaf page. */ int -__wt_btree_new_leaf_page( - WT_SESSION_IMPL *session, uint64_t recno, WT_PAGE **pagep) +__wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) { WT_BTREE *btree; @@ -558,17 +556,16 @@ __wt_btree_new_leaf_page( switch (btree->type) { case BTREE_COL_FIX: WT_RET(__wt_page_alloc( - session, WT_PAGE_COL_FIX, recno, 0, false, pagep)); + session, WT_PAGE_COL_FIX, 0, false, pagep)); break; case BTREE_COL_VAR: WT_RET(__wt_page_alloc( - session, WT_PAGE_COL_VAR, recno, 0, false, pagep)); + session, WT_PAGE_COL_VAR, 0, false, pagep)); break; case BTREE_ROW: WT_RET(__wt_page_alloc( - session, WT_PAGE_ROW_LEAF, WT_RECNO_OOB, 0, false, pagep)); + session, WT_PAGE_ROW_LEAF, 0, false, pagep)); break; - WT_ILLEGAL_VALUE(session); } return (0); } @@ -639,7 +636,7 @@ __btree_get_last_recno(WT_SESSION_IMPL *session) page = next_walk->page; btree->last_recno = page->type == WT_PAGE_COL_VAR ? - __col_var_last_recno(page) : __col_fix_last_recno(page); + __col_var_last_recno(next_walk) : __col_fix_last_recno(next_walk); return (__wt_page_release(session, next_walk, 0)); } @@ -690,22 +687,19 @@ __btree_page_sizes(WT_SESSION_IMPL *session) "size (%" PRIu32 "B)", btree->allocsize); /* - * When a page is forced to split, we want at least 50 entries on its - * parent. - * - * Don't let pages grow larger than a quarter of the cache, with too- - * small caches, we can end up in a situation where nothing can be - * evicted. Take care getting the cache size: with a shared cache, - * it may not have been set. + * Don't let pages grow large compared to the cache size or we can end + * up in a situation where nothing can be evicted. Take care getting + * the cache size: with a shared cache, it may not have been set. */ WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval)); - btree->maxmempage = - WT_MAX((uint64_t)cval.val, 50 * (uint64_t)btree->maxleafpage); + btree->maxmempage = (uint64_t)cval.val; if (!F_ISSET(conn, WT_CONN_CACHE_POOL)) { if ((cache_size = conn->cache_size) > 0) btree->maxmempage = - WT_MIN(btree->maxmempage, cache_size / 4); + WT_MIN(btree->maxmempage, cache_size / 10); } + /* Enforce a lower bound of a single disk leaf page */ + btree->maxmempage = WT_MAX(btree->maxmempage, btree->maxleafpage); /* * Try in-memory splits once we hit 80% of the maximum in-memory page diff --git a/src/third_party/wiredtiger/src/btree/bt_huffman.c b/src/third_party/wiredtiger/src/btree/bt_huffman.c index a1aaf2c7ea0..9e9d69c342e 100644 --- a/src/third_party/wiredtiger/src/btree/bt_huffman.c +++ b/src/third_party/wiredtiger/src/btree/bt_huffman.c @@ -133,10 +133,10 @@ static int __wt_huffman_read(WT_SESSION_IMPL *, * Check for a Huffman configuration file and return the file name. */ static int -__huffman_confchk_file( - WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v, bool *is_utf8p, WT_FH **fhp) +__huffman_confchk_file(WT_SESSION_IMPL *session, + WT_CONFIG_ITEM *v, bool *is_utf8p, WT_FSTREAM **fsp) { - WT_FH *fh; + WT_FSTREAM *fs; WT_DECL_RET; size_t len; char *fname; @@ -157,14 +157,13 @@ __huffman_confchk_file( /* Check the file exists. */ WT_RET(__wt_strndup(session, v->str + len, v->len - len, &fname)); - WT_ERR(__wt_open(session, fname, WT_FILE_TYPE_REGULAR, - WT_OPEN_FIXED | WT_OPEN_READONLY | WT_STREAM_READ, &fh)); + WT_ERR(__wt_fopen(session, fname, WT_OPEN_FIXED, WT_STREAM_READ, &fs)); /* Optionally return the file handle. */ - if (fhp == NULL) - (void)__wt_close(session, &fh); + if (fsp == NULL) + (void)__wt_fclose(session, &fs); else - *fhp = fh; + *fsp = fs; err: __wt_free(session, fname); @@ -300,7 +299,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, struct __wt_huffman_table *table, *tp; WT_DECL_ITEM(tmp); WT_DECL_RET; - WT_FH *fh; + WT_FSTREAM *fs; int64_t symbol, frequency; u_int entries, lineno; int n; @@ -309,13 +308,13 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, *tablep = NULL; *entriesp = *numbytesp = 0; - fh = NULL; + fs = NULL; table = NULL; /* * Try and open the backing file. */ - WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fh)); + WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fs)); /* * UTF-8 table is 256 bytes, with a range of 0-255. @@ -333,7 +332,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, WT_ERR(__wt_scr_alloc(session, 0, &tmp)); for (tp = table, lineno = 1;; ++tp, ++lineno) { - WT_ERR(__wt_getline(session, tmp, fh)); + WT_ERR(__wt_getline(session, fs, tmp)); if (tmp->size == 0) break; n = sscanf( @@ -378,7 +377,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, if (0) { err: __wt_free(session, table); } - (void)__wt_close(session, &fh); + (void)__wt_fclose(session, &fs); __wt_scr_free(session, &tmp); return (ret); diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c index aaf906ca785..4339de6f25c 100644 --- a/src/third_party/wiredtiger/src/btree/bt_io.c +++ b/src/third_party/wiredtiger/src/btree/bt_io.c @@ -343,6 +343,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, * Checksum the data if the buffer isn't compressed or checksums are * configured. */ + data_cksum = true; /* -Werror=maybe-uninitialized */ switch (btree->checksum) { case CKSUM_ON: data_cksum = true; @@ -351,7 +352,6 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, data_cksum = false; break; case CKSUM_UNCOMPRESSED: - default: data_cksum = !compressed; break; } diff --git a/src/third_party/wiredtiger/src/btree/bt_misc.c b/src/third_party/wiredtiger/src/btree/bt_misc.c index 7f188502a0a..b6e2cc07f5a 100644 --- a/src/third_party/wiredtiger/src/btree/bt_misc.c +++ b/src/third_party/wiredtiger/src/btree/bt_misc.c @@ -129,19 +129,3 @@ __wt_addr_string(WT_SESSION_IMPL *session, } return (buf->data); } - -/* - * __wt_buf_set_printable -- - * Set the contents of the buffer to a printable representation of a - * byte string. - */ -const char * -__wt_buf_set_printable( - WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf) -{ - if (__wt_raw_to_esc_hex(session, p, size, buf)) { - buf->data = "[Error]"; - buf->size = strlen("[Error]"); - } - return (buf->data); -} diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c index 9fa0145bbdd..00ec8aa4494 100644 --- a/src/third_party/wiredtiger/src/btree/bt_page.c +++ b/src/third_party/wiredtiger/src/btree/bt_page.c @@ -10,7 +10,7 @@ static void __inmem_col_fix(WT_SESSION_IMPL *, WT_PAGE *); static void __inmem_col_int(WT_SESSION_IMPL *, WT_PAGE *); -static int __inmem_col_var(WT_SESSION_IMPL *, WT_PAGE *, size_t *); +static int __inmem_col_var(WT_SESSION_IMPL *, WT_PAGE *, uint64_t, size_t *); static int __inmem_row_int(WT_SESSION_IMPL *, WT_PAGE *, size_t *); static int __inmem_row_leaf(WT_SESSION_IMPL *, WT_PAGE *); static int __inmem_row_leaf_entries( @@ -21,8 +21,8 @@ static int __inmem_row_leaf_entries( * Create or read a page into the cache. */ int -__wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, - uint64_t recno, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep) +__wt_page_alloc(WT_SESSION_IMPL *session, + uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep) { WT_CACHE *cache; WT_DECL_RET; @@ -67,13 +67,10 @@ __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, switch (type) { case WT_PAGE_COL_FIX: - page->pg_fix_recno = recno; page->pg_fix_entries = alloc_entries; break; case WT_PAGE_COL_INT: case WT_PAGE_ROW_INT: - page->pg_intl_recno = recno; - /* * Internal pages have an array of references to objects so they * can split. Allocate the array of references and optionally, @@ -105,7 +102,6 @@ err: if ((pindex = WT_INTL_INDEX_GET_SAFE(page)) != NULL) { } break; case WT_PAGE_COL_VAR: - page->pg_var_recno = recno; page->pg_var_d = (WT_COL *)((uint8_t *)page + sizeof(WT_PAGE)); page->pg_var_entries = alloc_entries; break; @@ -191,8 +187,7 @@ __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, } /* Allocate and initialize a new WT_PAGE. */ - WT_RET(__wt_page_alloc( - session, dsk->type, dsk->recno, alloc_entries, true, &page)); + WT_RET(__wt_page_alloc(session, dsk->type, alloc_entries, true, &page)); page->dsk = dsk; F_SET_ATOMIC(page, flags); @@ -211,7 +206,7 @@ __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, __inmem_col_int(session, page); break; case WT_PAGE_COL_VAR: - WT_ERR(__inmem_col_var(session, page, &size)); + WT_ERR(__inmem_col_var(session, page, dsk->recno, &size)); break; case WT_PAGE_ROW_INT: WT_ERR(__inmem_row_int(session, page, &size)); @@ -292,7 +287,7 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page) __wt_cell_unpack(cell, unpack); ref->addr = cell; - ref->key.recno = unpack->v; + ref->ref_recno = unpack->v; } } @@ -329,7 +324,8 @@ __inmem_col_var_repeats(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t *np) * column-store trees. */ static int -__inmem_col_var(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep) +__inmem_col_var( + WT_SESSION_IMPL *session, WT_PAGE *page, uint64_t recno, size_t *sizep) { WT_BTREE *btree; WT_COL *cip; @@ -337,13 +333,12 @@ __inmem_col_var(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep) WT_CELL *cell; WT_CELL_UNPACK *unpack, _unpack; const WT_PAGE_HEADER *dsk; - uint64_t recno, rle; + uint64_t rle; size_t bytes_allocated; uint32_t i, indx, n, repeat_off; btree = S2BT(session); dsk = page->dsk; - recno = page->pg_var_recno; repeats = NULL; repeat_off = 0; diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index df5f5cc2df8..086500c8b2f 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -377,9 +377,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref) if (addr == NULL) { WT_ASSERT(session, previous_state == WT_REF_DELETED); - WT_ERR(__wt_btree_new_leaf_page(session, - btree->type == BTREE_ROW ? WT_RECNO_OOB : ref->key.recno, - &page)); + WT_ERR(__wt_btree_new_leaf_page(session, &page)); ref->page = page; goto done; } @@ -463,6 +461,8 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags btree = S2BT(session); + WT_STAT_FAST_CONN_INCR(session, cache_pages_requested); + WT_STAT_FAST_DATA_INCR(session, cache_pages_requested); for (evict_soon = stalled = false, force_attempts = 0, sleep_cnt = wait_cnt = 0;;) { switch (ref->state) { diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c index d94eb2ddd80..de54e8433a8 100644 --- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c +++ b/src/third_party/wiredtiger/src/btree/bt_rebalance.c @@ -90,7 +90,7 @@ __rebalance_leaf_append(WT_SESSION_IMPL *session, if (recno == WT_RECNO_OOB) WT_RET(__wt_row_ikey(session, 0, key, key_len, copy)); else - copy->key.recno = recno; + copy->ref_recno = recno; copy->page_del = NULL; return (0); @@ -147,8 +147,7 @@ __rebalance_internal(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs) leaf_next = (uint32_t)rs->leaf_next; /* Allocate a row-store root (internal) page and fill it in. */ - WT_RET(__wt_page_alloc(session, rs->type, - rs->type == WT_PAGE_COL_INT ? 1 : 0, leaf_next, false, &page)); + WT_RET(__wt_page_alloc(session, rs->type, leaf_next, false, &page)); page->pg_intl_parent_ref = &btree->root; WT_ERR(__wt_page_modify_init(session, page)); __wt_page_modify_set(session, page); diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c index ebc0499f6a2..8ef2db67e7b 100644 --- a/src/third_party/wiredtiger/src/btree/bt_ret.c +++ b/src/third_party/wiredtiger/src/btree/bt_ret.c @@ -46,7 +46,7 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) } /* Take the value from the original page. */ - v = __bit_getv_recno(page, cbt->iface.recno, btree->bitcnt); + v = __bit_getv_recno(cbt->ref, cursor->recno, btree->bitcnt); return (__wt_buf_set(session, &cursor->value, &v, 1)); case WT_PAGE_COL_VAR: /* diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c index 0e064d306b6..9b5e4daf74a 100644 --- a/src/third_party/wiredtiger/src/btree/bt_slvg.c +++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c @@ -116,8 +116,8 @@ struct __wt_track { static int __slvg_cleanup(WT_SESSION_IMPL *, WT_STUFF *); static int __slvg_col_build_internal(WT_SESSION_IMPL *, uint32_t, WT_STUFF *); static int __slvg_col_build_leaf(WT_SESSION_IMPL *, WT_TRACK *, WT_REF *); -static int __slvg_col_ovfl( - WT_SESSION_IMPL *, WT_TRACK *, WT_PAGE *, uint64_t, uint64_t); +static int __slvg_col_ovfl(WT_SESSION_IMPL *, + WT_TRACK *, WT_PAGE *, uint64_t, uint64_t, uint64_t); static int __slvg_col_range(WT_SESSION_IMPL *, WT_STUFF *); static int __slvg_col_range_missing(WT_SESSION_IMPL *, WT_STUFF *); static int __slvg_col_range_overlap( @@ -166,11 +166,13 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_DECL_RET; WT_STUFF *ss, stuff; uint32_t i, leaf_cnt; + bool evict_reset; WT_UNUSED(cfg); btree = S2BT(session); bm = btree->bm; + evict_reset = false; WT_CLEAR(stuff); ss = &stuff; @@ -182,6 +184,13 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_ERR(__wt_scr_alloc(session, 0, &ss->tmp2)); /* + * Salvage handles its own page eviction; get exclusive access to the + * file, have eviction ignore the tree entirely. + */ + WT_ERR(__wt_evict_file_exclusive_on(session)); + evict_reset = true; + + /* * Step 1: * Inform the underlying block manager that we're salvaging the file. */ @@ -295,13 +304,13 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) case WT_PAGE_COL_VAR: WT_WITH_PAGE_INDEX(session, ret = __slvg_col_build_internal( - session, leaf_cnt, ss)); + session, leaf_cnt, ss)); WT_ERR(ret); break; case WT_PAGE_ROW_LEAF: WT_WITH_PAGE_INDEX(session, ret = __slvg_row_build_internal( - session, leaf_cnt, ss)); + session, leaf_cnt, ss)); WT_ERR(ret); break; } @@ -341,6 +350,9 @@ err: WT_TRET(bm->salvage_end(bm, session)); if (ss->root_ref.page != NULL) __wt_ref_out(session, &ss->root_ref); + if (evict_reset) + __wt_evict_file_exclusive_off(session); + /* Discard the leaf and overflow page memory. */ WT_TRET(__slvg_cleanup(session, ss)); @@ -1159,7 +1171,7 @@ __slvg_col_build_internal( /* Allocate a column-store root (internal) page and fill it in. */ WT_RET(__wt_page_alloc( - session, WT_PAGE_COL_INT, 1, leaf_cnt, true, &page)); + session, WT_PAGE_COL_INT, leaf_cnt, true, &page)); WT_ERR(__slvg_modify_init(session, page)); pindex = WT_INTL_INDEX_GET_SAFE(page); @@ -1180,7 +1192,7 @@ __slvg_col_build_internal( ref->addr = addr; addr = NULL; - ref->key.recno = trk->col_start; + ref->ref_recno = trk->col_start; ref->state = WT_REF_DISK; /* @@ -1223,7 +1235,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) WT_DECL_RET; WT_PAGE *page; WT_SALVAGE_COOKIE *cookie, _cookie; - uint64_t skip, take; + uint64_t recno, skip, take; uint32_t *entriesp, save_entries; cookie = &_cookie; @@ -1243,7 +1255,8 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) * Calculate the number of K/V entries we are going to skip, and * the total number of K/V entries we'll take from this page. */ - cookie->skip = skip = trk->col_start - page->pg_var_recno; + recno = page->dsk->recno; + cookie->skip = skip = trk->col_start - recno; cookie->take = take = (trk->col_stop - trk->col_start) + 1; WT_ERR(__wt_verbose(session, WT_VERB_SALVAGE, @@ -1255,7 +1268,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) /* Set the referenced flag on overflow pages we're using. */ if (page->type == WT_PAGE_COL_VAR && trk->trk_ovfl_cnt != 0) - WT_ERR(__slvg_col_ovfl(session, trk, page, skip, take)); + WT_ERR(__slvg_col_ovfl(session, trk, page, recno, skip, take)); /* * If we're missing some part of the range, the real start range is in @@ -1263,9 +1276,9 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) * reference as well as the page itself. */ if (trk->col_missing == 0) - page->pg_var_recno = trk->col_start; + ref->ref_recno = trk->col_start; else { - page->pg_var_recno = trk->col_missing; + ref->ref_recno = trk->col_missing; cookie->missing = trk->col_start - trk->col_missing; WT_ERR(__wt_verbose(session, WT_VERB_SALVAGE, @@ -1274,7 +1287,6 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) session, trk->trk_addr, trk->trk_addr_size, trk->ss->tmp1), cookie->missing)); } - ref->key.recno = page->pg_var_recno; /* * We can't discard the original blocks associated with this page now. @@ -1338,21 +1350,20 @@ __slvg_col_ovfl_single( * Mark overflow items referenced by the merged page. */ static int -__slvg_col_ovfl(WT_SESSION_IMPL *session, - WT_TRACK *trk, WT_PAGE *page, uint64_t skip, uint64_t take) +__slvg_col_ovfl(WT_SESSION_IMPL *session, WT_TRACK *trk, + WT_PAGE *page, uint64_t recno, uint64_t skip, uint64_t take) { WT_CELL_UNPACK unpack; WT_CELL *cell; WT_COL *cip; WT_DECL_RET; - uint64_t recno, start, stop; + uint64_t start, stop; uint32_t i; /* * Merging a variable-length column-store page, and we took some number * of records, figure out which (if any) overflow records we used. */ - recno = page->pg_var_recno; start = recno + skip; stop = (recno + skip + take) - 1; @@ -1816,7 +1827,7 @@ __slvg_row_build_internal( /* Allocate a row-store root (internal) page and fill it in. */ WT_RET(__wt_page_alloc( - session, WT_PAGE_ROW_INT, WT_RECNO_OOB, leaf_cnt, true, &page)); + session, WT_PAGE_ROW_INT, leaf_cnt, true, &page)); WT_ERR(__slvg_modify_init(session, page)); pindex = WT_INTL_INDEX_GET_SAFE(page); diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index 00bea5a6773..7a05a883f83 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -207,8 +207,8 @@ __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page) WT_INTL_FOREACH_BEGIN(session, page, ref) { WT_ASSERT(session, ref->home == page); - WT_ASSERT(session, ref->key.recno > recno); - recno = ref->key.recno; + WT_ASSERT(session, ref->ref_recno > recno); + recno = ref->ref_recno; } WT_INTL_FOREACH_END; break; case WT_PAGE_ROW_INT: @@ -298,7 +298,7 @@ static int __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, WT_REF **from_refp, size_t *decrp, WT_REF **to_refp, size_t *incrp) { - WT_ADDR *addr, *ref_addr; + WT_ADDR *addr; WT_CELL_UNPACK unpack; WT_DECL_RET; WT_IKEY *ikey; @@ -335,7 +335,7 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, if ((ikey = __wt_ref_key_instantiated(ref)) == NULL) { __wt_ref_key(from_home, ref, &key, &size); WT_RET(__wt_row_ikey(session, 0, key, size, ref)); - ikey = ref->key.ikey; + ikey = ref->ref_ikey; } else { WT_RET( __split_ovfl_key_cleanup(session, from_home, ref)); @@ -345,18 +345,13 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, } /* - * If there's no address at all (the page has never been written), or - * the address has already been instantiated, there's no work to do. - * Otherwise, the address still references a split page on-page cell, - * instantiate it. We can race with reconciliation and/or eviction of - * the child pages, be cautious: read the address and verify it, and - * only update it if the value is unchanged from the original. In the - * case of a race, the address must no longer reference the split page, - * we're done. + * If there's no address (the page has never been written), or the + * address has been instantiated, there's no work to do. Otherwise, + * instantiate the address in-memory, from the on-page cell. */ - WT_ORDERED_READ(ref_addr, ref->addr); - if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) { - __wt_cell_unpack((WT_CELL *)ref_addr, &unpack); + addr = ref->addr; + if (addr != NULL && !__wt_off_page(from_home, addr)) { + __wt_cell_unpack((WT_CELL *)ref->addr, &unpack); WT_RET(__wt_calloc_one(session, &addr)); if ((ret = __wt_strndup( session, unpack.data, unpack.size, &addr->addr)) != 0) { @@ -376,10 +371,7 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, break; WT_ILLEGAL_VALUE(session); } - if (!__wt_atomic_cas_ptr(&ref->addr, ref_addr, addr)) { - __wt_free(session, addr->addr); - __wt_free(session, addr); - } + ref->addr = addr; } /* And finally, copy the WT_REF pointer itself. */ @@ -537,7 +529,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) WT_REF **child_refp, *ref, **root_refp; WT_SPLIT_ERROR_PHASE complete; size_t child_incr, root_decr, root_incr, size; - uint64_t recno, split_gen; + uint64_t split_gen; uint32_t children, chunk, i, j, remain; uint32_t slots; void *p; @@ -601,10 +593,8 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) alloc_refp = alloc_index->index, i = 0; i < children; ++i) { slots = i == children - 1 ? remain : chunk; - recno = root->type == WT_PAGE_COL_INT ? - (*root_refp)->key.recno : WT_RECNO_OOB; WT_ERR(__wt_page_alloc( - session, root->type, recno, slots, false, &child)); + session, root->type, slots, false, &child)); /* * Initialize the page's child reference; we need a copy of the @@ -619,7 +609,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) WT_ERR(__wt_row_ikey(session, 0, p, size, ref)); root_incr += sizeof(WT_IKEY) + size; } else - ref->key.recno = recno; + ref->ref_recno = (*root_refp)->ref_recno; ref->state = WT_REF_MEM; /* Initialize the child page. */ @@ -745,7 +735,6 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, alloc_index = pindex = NULL; parent_decr = 0; - parent_entries = 0; empty_parent = false; complete = WT_ERR_RETURN; @@ -1022,7 +1011,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) WT_REF **child_refp, *page_ref, **page_refp, *ref; WT_SPLIT_ERROR_PHASE complete; size_t child_incr, page_decr, page_incr, parent_incr, size; - uint64_t recno, split_gen; + uint64_t split_gen; uint32_t children, chunk, i, j, remain; uint32_t slots; void *p; @@ -1107,10 +1096,8 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) for (alloc_refp = alloc_index->index + 1, i = 1; i < children; ++i) { slots = i == children - 1 ? remain : chunk; - recno = page->type == WT_PAGE_COL_INT ? - (*page_refp)->key.recno : WT_RECNO_OOB; WT_ERR(__wt_page_alloc( - session, page->type, recno, slots, false, &child)); + session, page->type, slots, false, &child)); /* * Initialize the page's child reference; we need a copy of the @@ -1125,7 +1112,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) WT_ERR(__wt_row_ikey(session, 0, p, size, ref)); parent_incr += sizeof(WT_IKEY) + size; } else - ref->key.recno = recno; + ref->ref_recno = (*page_refp)->ref_recno; ref->state = WT_REF_MEM; /* Initialize the child page. */ @@ -1491,6 +1478,15 @@ __split_multi_inmem( uint32_t i, slot; /* + * In 04/2016, we removed column-store record numbers from the WT_PAGE + * structure, leading to hard-to-debug problems because we corrupt the + * page if we search it using the wrong initial record number. For now, + * assert the record number is set. + */ + WT_ASSERT(session, + orig->type != WT_PAGE_COL_VAR || ref->ref_recno != 0); + + /* * This code re-creates an in-memory page that is part of a set created * while evicting a large page, and adds references to any unresolved * update chains to the new page. We get here due to choosing to keep @@ -1533,7 +1529,7 @@ __split_multi_inmem( /* Build a key. */ if (supd->ins == NULL) { slot = WT_ROW_SLOT(orig, supd->rip); - upd = orig->pg_row_upd[slot]; + upd = orig->modify->mod_row_update[slot]; WT_ERR(__wt_row_leaf_key( session, orig, supd->rip, key, false)); @@ -1596,7 +1592,7 @@ __split_multi_inmem_final(WT_PAGE *orig, WT_MULTI *multi) case WT_PAGE_ROW_LEAF: if (supd->ins == NULL) { slot = WT_ROW_SLOT(orig, supd->rip); - orig->pg_row_upd[slot] = NULL; + orig->modify->mod_row_update[slot] = NULL; } else supd->ins->upd = NULL; break; @@ -1613,11 +1609,16 @@ __split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref) /* * We failed creating new in-memory pages. For error-handling reasons, * we've left the update chains referenced by both the original and - * new pages. Discard the new pages, setting a flag so the discard code - * doesn't discard the updates on the page. + * new pages. Discard the new allocated WT_REF structures and their + * pages (setting a flag so the discard code doesn't discard the updates + * on the page). + * + * Our callers allocate WT_REF arrays, then individual WT_REFs, check + * for uninitialized information. */ - if (ref->page != NULL) { - F_SET_ATOMIC(ref->page, WT_PAGE_UPDATE_IGNORE); + if (ref != NULL) { + if (ref->page != NULL) + F_SET_ATOMIC(ref->page, WT_PAGE_UPDATE_IGNORE); __wt_free_ref(session, ref, orig->type, true); } } @@ -1635,7 +1636,6 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_REF *ref; size_t incr; - addr = NULL; incr = 0; /* Allocate an underlying WT_REF. */ @@ -1643,9 +1643,24 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, ref = *refp; incr += sizeof(WT_REF); - /* Any parent reference is filled in by our caller. */ - ref->home = NULL; + /* + * Set the WT_REF key before (optionally) building the page, underlying + * column-store functions need the page's key space to search it. + */ + switch (page->type) { + case WT_PAGE_ROW_INT: + case WT_PAGE_ROW_LEAF: + ikey = multi->key.ikey; + WT_RET(__wt_row_ikey( + session, 0, WT_IKEY_DATA(ikey), ikey->size, ref)); + incr += sizeof(WT_IKEY) + ikey->size; + break; + default: + ref->ref_recno = multi->key.recno; + break; + } + /* If there's a disk image, build a page, otherwise set the address. */ if (multi->disk_image == NULL) { /* * Copy the address: we could simply take the buffer, but that @@ -1659,28 +1674,13 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, addr->type = multi->addr.type; WT_RET(__wt_strndup(session, multi->addr.addr, addr->size, &addr->addr)); - } else + ref->state = WT_REF_DISK; + } else { WT_RET(__split_multi_inmem(session, page, ref, multi)); - - switch (page->type) { - case WT_PAGE_ROW_INT: - case WT_PAGE_ROW_LEAF: - ikey = multi->key.ikey; - WT_RET(__wt_row_ikey( - session, 0, WT_IKEY_DATA(ikey), ikey->size, ref)); - incr += sizeof(WT_IKEY) + ikey->size; - break; - default: - ref->key.recno = multi->key.recno; - break; + ref->state = WT_REF_MEM; } - ref->state = addr != NULL ? WT_REF_DISK : WT_REF_MEM; - - /* - * If our caller wants to track the memory allocations, we have a return - * reference. - */ + /* Optionally return changes in the memory footprint. */ if (incrp != NULL) *incrp += incr; return (0); @@ -1781,17 +1781,12 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) parent_incr += sizeof(WT_IKEY) + key->size; __wt_scr_free(session, &key); } else - child->key.recno = ref->key.recno; + child->ref_recno = ref->ref_recno; /* * The second page in the split is a new WT_REF/page pair. */ - if (type == WT_PAGE_ROW_LEAF) - WT_ERR(__wt_page_alloc(session, - type, WT_RECNO_OOB, 0, false, &right)); - else - WT_ERR(__wt_page_alloc(session, - type, WT_INSERT_RECNO(moved_ins), 0, false, &right)); + WT_ERR(__wt_page_alloc(session, type, 0, false, &right)); /* * The new page is dirty by definition, plus column-store splits update @@ -1801,11 +1796,15 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) __wt_page_modify_set(session, right); if (type == WT_PAGE_ROW_LEAF) { - WT_ERR(__wt_calloc_one(session, &right->pg_row_ins)); - WT_ERR(__wt_calloc_one(session, &right->pg_row_ins[0])); + WT_ERR(__wt_calloc_one( + session, &right->modify->mod_row_insert)); + WT_ERR(__wt_calloc_one( + session, &right->modify->mod_row_insert[0])); } else { - WT_ERR(__wt_calloc_one(session, &right->modify->mod_append)); - WT_ERR(__wt_calloc_one(session, &right->modify->mod_append[0])); + WT_ERR(__wt_calloc_one( + session, &right->modify->mod_col_append)); + WT_ERR(__wt_calloc_one( + session, &right->modify->mod_col_append[0])); } right_incr += sizeof(WT_INSERT_HEAD); right_incr += sizeof(WT_INSERT_HEAD *); @@ -1822,7 +1821,7 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) child)); parent_incr += sizeof(WT_IKEY) + WT_INSERT_KEY_SIZE(moved_ins); } else - child->key.recno = WT_INSERT_RECNO(moved_ins); + child->ref_recno = WT_INSERT_RECNO(moved_ins); /* * Allocation operations completed, we're going to split. @@ -1831,8 +1830,8 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) */ if (type != WT_PAGE_ROW_LEAF) { WT_ASSERT(session, - page->modify->mod_split_recno == WT_RECNO_OOB); - page->modify->mod_split_recno = child->key.recno; + page->modify->mod_col_split_recno == WT_RECNO_OOB); + page->modify->mod_col_split_recno = child->ref_recno; } /* @@ -1842,8 +1841,11 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) */ for (i = 0; i < WT_SKIP_MAXDEPTH && ins_head->tail[i] == moved_ins; ++i) ; - WT_MEM_TRANSFER(page_decr, right_incr, sizeof(WT_INSERT) + - (size_t)i * sizeof(WT_INSERT *) + WT_INSERT_KEY_SIZE(moved_ins)); + WT_MEM_TRANSFER(page_decr, right_incr, + sizeof(WT_INSERT) + (size_t)i * sizeof(WT_INSERT *)); + if (type == WT_PAGE_ROW_LEAF) + WT_MEM_TRANSFER( + page_decr, right_incr, WT_INSERT_KEY_SIZE(moved_ins)); WT_MEM_TRANSFER( page_decr, right_incr, __wt_update_list_memsize(moved_ins->upd)); @@ -1856,7 +1858,7 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) * can be ignored.) */ tmp_ins_head = type == WT_PAGE_ROW_LEAF ? - right->pg_row_ins[0] : right->modify->mod_append[0]; + right->modify->mod_row_insert[0] : right->modify->mod_col_append[0]; tmp_ins_head->head[0] = tmp_ins_head->tail[0] = moved_ins; /* @@ -1952,9 +1954,6 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) /* * Update the page accounting. - * - * XXX - * If we fail to split the parent, the page's accounting will be wrong. */ __wt_cache_page_inmem_decr(session, page, page_decr); __wt_cache_page_inmem_incr(session, right, right_incr); @@ -1978,7 +1977,7 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) * Reset the split column-store page record. */ if (type != WT_PAGE_ROW_LEAF) - page->modify->mod_split_recno = WT_RECNO_OOB; + page->modify->mod_col_split_recno = WT_RECNO_OOB; /* * Clear the allocated page's reference to the moved insert list element @@ -1991,15 +1990,18 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) * lists have. */ if (type == WT_PAGE_ROW_LEAF) - right->pg_row_ins[0]->head[0] = - right->pg_row_ins[0]->tail[0] = NULL; + right->modify->mod_row_insert[0]->head[0] = + right->modify->mod_row_insert[0]->tail[0] = NULL; else - right->modify->mod_append[0]->head[0] = - right->modify->mod_append[0]->tail[0] = NULL; + right->modify->mod_col_append[0]->head[0] = + right->modify->mod_col_append[0]->tail[0] = NULL; ins_head->tail[0]->next[0] = moved_ins; ins_head->tail[0] = moved_ins; + /* Fix up accounting for the page size. */ + __wt_cache_page_inmem_incr(session, page, page_decr); + err: if (split_ref[0] != NULL) { /* * The address was moved to the replacement WT_REF, restore it. @@ -2007,12 +2009,12 @@ err: if (split_ref[0] != NULL) { ref->addr = split_ref[0]->addr; if (type == WT_PAGE_ROW_LEAF) - __wt_free(session, split_ref[0]->key.ikey); + __wt_free(session, split_ref[0]->ref_ikey); __wt_free(session, split_ref[0]); } if (split_ref[1] != NULL) { if (type == WT_PAGE_ROW_LEAF) - __wt_free(session, split_ref[1]->key.ikey); + __wt_free(session, split_ref[1]->ref_ikey); __wt_free(session, split_ref[1]); } if (right != NULL) { @@ -2178,7 +2180,7 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref) WT_DECL_RET; WT_PAGE *page; WT_PAGE_MODIFY *mod; - WT_REF new; + WT_REF *new; page = ref->page; mod = page->modify; @@ -2195,9 +2197,15 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref) * exactly what we want to do. * * Build the new page. + * + * Allocate a WT_REF because the error path uses routines that will ea + * free memory. The only field we need to set is the record number, as + * it's used by the search routines. */ - memset(&new, 0, sizeof(new)); - WT_ERR(__split_multi_inmem(session, page, &new, &mod->mod_multi[0])); + WT_RET(__wt_calloc_one(session, &new)); + new->ref_recno = ref->ref_recno; + + WT_ERR(__split_multi_inmem(session, page, new, &mod->mod_multi[0])); /* * The rewrite succeeded, we can no longer fail. @@ -2217,11 +2225,12 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref) __wt_ref_out(session, ref); /* Swap the new page into place. */ - ref->page = new.page; + ref->page = new->page; WT_PUBLISH(ref->state, WT_REF_MEM); + __wt_free(session, new); return (0); -err: __split_multi_inmem_fail(session, page, &new); +err: __split_multi_inmem_fail(session, page, new); return (ret); } diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c index f1e3c0b40d5..3d5abf34147 100644 --- a/src/third_party/wiredtiger/src/btree/bt_stat.c +++ b/src/third_party/wiredtiger/src/btree/bt_stat.c @@ -41,9 +41,6 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage); WT_STAT_SET(session, stats, btree_maxleafvalue, btree->maxleafvalue); - WT_STAT_SET(session, stats, cache_bytes_inuse, - __wt_btree_bytes_inuse(session)); - /* Everything else is really, really expensive. */ if (!F_ISSET(cst, WT_CONN_STAT_ALL)) return (0); diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c index df4ceea8ffa..da6c53aa316 100644 --- a/src/third_party/wiredtiger/src/btree/bt_sync.c +++ b/src/third_party/wiredtiger/src/btree/bt_sync.c @@ -26,14 +26,12 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages; uint64_t oldest_id, saved_snap_min; uint32_t flags; - u_int saved_evict_walk_period; conn = S2C(session); btree = S2BT(session); walk = NULL; txn = &session->txn; saved_snap_min = WT_SESSION_TXN_STATE(session)->snap_min; - saved_evict_walk_period = btree->evict_walk_period; flags = WT_READ_CACHE | WT_READ_NO_GEN; internal_bytes = leaf_bytes = 0; @@ -98,8 +96,10 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) * snapshot now. * * All changes committed up to this point should be included. - * We don't update the snapshot in between pages because (a) - * the metadata shouldn't be that big, and (b) if we do ever + * We don't update the snapshot in between pages because the + * metadata shouldn't have many pages. Instead, read-committed + * isolation ensures that all metadata updates completed before + * the checkpoint are included. */ if (txn->isolation == WT_ISO_READ_COMMITTED) WT_ERR(__wt_txn_get_snapshot(session)); @@ -188,7 +188,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) break; case WT_SYNC_CLOSE: case WT_SYNC_DISCARD: - WT_ILLEGAL_VALUE_ERR(session); + WT_ERR(__wt_illegal_value(session, NULL)); + break; } if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) { @@ -238,10 +239,10 @@ err: /* On error, clear any left-over tree walk. */ WT_FULL_BARRIER(); /* - * In case this tree was being skipped by the eviction server - * during the checkpoint, restore the previous state. + * If this tree was being skipped by the eviction server during + * the checkpoint, clear the wait. */ - btree->evict_walk_period = saved_evict_walk_period; + btree->evict_walk_period = 0; /* * Wake the eviction server, in case application threads have @@ -273,6 +274,8 @@ err: /* On error, clear any left-over tree walk. */ int __wt_cache_op(WT_SESSION_IMPL *session, WT_CACHE_OP op) { + WT_DECL_RET; + switch (op) { case WT_SYNC_CHECKPOINT: case WT_SYNC_CLOSE: @@ -292,10 +295,12 @@ __wt_cache_op(WT_SESSION_IMPL *session, WT_CACHE_OP op) switch (op) { case WT_SYNC_CHECKPOINT: case WT_SYNC_WRITE_LEAVES: - return (__sync_file(session, op)); + ret = __sync_file(session, op); + break; case WT_SYNC_CLOSE: case WT_SYNC_DISCARD: - return (__wt_evict_file(session, op)); - WT_ILLEGAL_VALUE(session); + ret = __wt_evict_file(session, op); + break; } + return (ret); } diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c index 83dc7924312..0a04c387a0f 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c @@ -22,13 +22,13 @@ typedef struct { #define WT_VRFY_DUMP(vs) \ ((vs)->dump_address || \ - (vs)->dump_blocks || (vs)->dump_pages || (vs)->dump_shape) + (vs)->dump_blocks || (vs)->dump_layout || (vs)->dump_pages) bool dump_address; /* Configure: dump special */ bool dump_blocks; + bool dump_layout; bool dump_pages; - bool dump_shape; - - u_int depth, depth_internal[100], depth_leaf[100]; + /* Page layout information */ + uint64_t depth, depth_internal[100], depth_leaf[100]; WT_ITEM *tmp1, *tmp2, *tmp3, *tmp4; /* Temporary buffers */ } WT_VSTUFF; @@ -59,12 +59,12 @@ __verify_config(WT_SESSION_IMPL *session, const char *cfg[], WT_VSTUFF *vs) WT_RET(__wt_config_gets(session, cfg, "dump_blocks", &cval)); vs->dump_blocks = cval.val != 0; + WT_RET(__wt_config_gets(session, cfg, "dump_layout", &cval)); + vs->dump_layout = cval.val != 0; + WT_RET(__wt_config_gets(session, cfg, "dump_pages", &cval)); vs->dump_pages = cval.val != 0; - WT_RET(__wt_config_gets(session, cfg, "dump_shape", &cval)); - vs->dump_shape = cval.val != 0; - #if !defined(HAVE_DIAGNOSTIC) if (vs->dump_blocks || vs->dump_pages) WT_RET_MSG(session, ENOTSUP, @@ -112,33 +112,38 @@ __verify_config_offsets( } /* - * __verify_tree_shape -- + * __verify_layout -- * Dump the tree shape. */ static int -__verify_tree_shape(WT_SESSION_IMPL *session, WT_VSTUFF *vs) +__verify_layout(WT_SESSION_IMPL *session, WT_VSTUFF *vs) { - uint32_t total; + uint64_t total; size_t i; for (i = 0, total = 0; i < WT_ELEMENTS(vs->depth_internal); ++i) total += vs->depth_internal[i]; WT_RET(__wt_msg( - session, "Internal page tree-depth (total %" PRIu32 "):", total)); + session, "Internal page tree-depth (total %" PRIu64 "):", total)); for (i = 0; i < WT_ELEMENTS(vs->depth_internal); ++i) - if (vs->depth_internal[i] != 0) + if (vs->depth_internal[i] != 0) { WT_RET(__wt_msg(session, - "\t%03zu: %u", i, vs->depth_internal[i])); + "\t%03" WT_SIZET_FMT ": %" PRIu64, + i, vs->depth_internal[i])); + vs->depth_internal[i] = 0; + } for (i = 0, total = 0; i < WT_ELEMENTS(vs->depth_leaf); ++i) total += vs->depth_leaf[i]; WT_RET(__wt_msg( - session, "Leaf page tree-depth (total %" PRIu32 "):", total)); + session, "Leaf page tree-depth (total %" PRIu64 "):", total)); for (i = 0; i < WT_ELEMENTS(vs->depth_leaf); ++i) - if (vs->depth_leaf[i] != 0) + if (vs->depth_leaf[i] != 0) { WT_RET(__wt_msg(session, - "\t%03zu: %u", i, vs->depth_leaf[i])); - + "\t%03" WT_SIZET_FMT ": %" PRIu64, + i, vs->depth_leaf[i])); + vs->depth_leaf[i] = 0; + } return (0); } @@ -200,9 +205,11 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) /* House-keeping between checkpoints. */ __verify_checkpoint_reset(vs); - if (WT_VRFY_DUMP(vs)) + if (WT_VRFY_DUMP(vs)) { + WT_ERR(__wt_msg(session, "%s", WT_DIVIDER)); WT_ERR(__wt_msg(session, "%s: checkpoint %s", btree->dhandle->name, ckpt->name)); + } /* Load the checkpoint. */ WT_ERR(bm->checkpoint_load(bm, session, @@ -234,8 +241,8 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(ret); /* Display the tree shape. */ - if (vs->dump_shape) - WT_ERR(__verify_tree_shape(session, vs)); + if (vs->dump_layout) + WT_ERR(__verify_layout(session, vs)); } done: @@ -355,7 +362,7 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs) if (vs->dump_blocks) WT_RET(__wt_debug_disk(session, page->dsk, NULL)); if (vs->dump_pages) - WT_RET(__wt_debug_page(session, page, NULL)); + WT_RET(__wt_debug_page(session, ref, NULL)); #endif /* @@ -364,13 +371,11 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs) */ switch (page->type) { case WT_PAGE_COL_FIX: - recno = page->pg_fix_recno; - goto recno_chk; case WT_PAGE_COL_INT: - recno = page->pg_intl_recno; + recno = ref->ref_recno; goto recno_chk; case WT_PAGE_COL_VAR: - recno = page->pg_var_recno; + recno = ref->ref_recno; recno_chk: if (recno != vs->record_total + 1) WT_RET_MSG(session, WT_ERROR, "page at %s has a starting record of %" PRIu64 @@ -485,7 +490,7 @@ celltype_err: WT_RET_MSG(session, WT_ERROR, * reviewed to this point. */ ++entry; - if (child_ref->key.recno != vs->record_total + 1) { + if (child_ref->ref_recno != vs->record_total + 1) { WT_RET_MSG(session, WT_ERROR, "the starting record number in entry %" PRIu32 " of the column internal page at " @@ -494,7 +499,7 @@ celltype_err: WT_RET_MSG(session, WT_ERROR, entry, __wt_page_addr_string( session, child_ref, vs->tmp1), - child_ref->key.recno, + child_ref->ref_recno, vs->record_total + 1); } diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c index 5480a25b5ec..3a6fd8261ba 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c @@ -298,14 +298,11 @@ __verify_dsk_row( case WT_CELL_ADDR_LEAF_NO: case WT_CELL_KEY_OVFL: case WT_CELL_VALUE_OVFL: - ret = bm->addr_invalid( - bm, session, unpack->data, unpack->size); - WT_RET_ERROR_OK(ret, EINVAL); - if (ret == EINVAL) { + if ((ret = bm->addr_invalid( + bm, session, unpack->data, unpack->size)) == EINVAL) ret = __err_cell_corrupt_or_eof( session, cell_num, tag); - goto err; - } + WT_ERR(ret); break; } diff --git a/src/third_party/wiredtiger/src/btree/col_modify.c b/src/third_party/wiredtiger/src/btree/col_modify.c index fd60b12538a..a7920da5267 100644 --- a/src/third_party/wiredtiger/src/btree/col_modify.c +++ b/src/third_party/wiredtiger/src/btree/col_modify.c @@ -55,7 +55,8 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, */ if (recno == WT_RECNO_OOB || recno > (btree->type == BTREE_COL_VAR ? - __col_var_last_recno(page) : __col_fix_last_recno(page))) + __col_var_last_recno(cbt->ref) : + __col_fix_last_recno(cbt->ref))) append = true; } @@ -107,17 +108,17 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, /* Allocate the append/update list reference as necessary. */ if (append) { WT_PAGE_ALLOC_AND_SWAP(session, - page, mod->mod_append, ins_headp, 1); - ins_headp = &mod->mod_append[0]; + page, mod->mod_col_append, ins_headp, 1); + ins_headp = &mod->mod_col_append[0]; } else if (page->type == WT_PAGE_COL_FIX) { WT_PAGE_ALLOC_AND_SWAP(session, - page, mod->mod_update, ins_headp, 1); - ins_headp = &mod->mod_update[0]; + page, mod->mod_col_update, ins_headp, 1); + ins_headp = &mod->mod_col_update[0]; } else { WT_PAGE_ALLOC_AND_SWAP(session, - page, mod->mod_update, ins_headp, + page, mod->mod_col_update, ins_headp, page->pg_var_entries); - ins_headp = &mod->mod_update[cbt->slot]; + ins_headp = &mod->mod_col_update[cbt->slot]; } /* Allocate the WT_INSERT_HEAD structure as necessary. */ @@ -142,8 +143,9 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, * it's easy (as opposed to in row-store) and a difficult bug to * otherwise diagnose. */ - WT_ASSERT(session, mod->mod_split_recno == WT_RECNO_OOB || - (recno != WT_RECNO_OOB && mod->mod_split_recno > recno)); + WT_ASSERT(session, mod->mod_col_split_recno == WT_RECNO_OOB || + (recno != WT_RECNO_OOB && + mod->mod_col_split_recno > recno)); if (upd_arg == NULL) { WT_ERR( diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c index 4730267a545..6c96181d3bf 100644 --- a/src/third_party/wiredtiger/src/btree/col_srch.c +++ b/src/third_party/wiredtiger/src/btree/col_srch.c @@ -30,7 +30,7 @@ __check_leaf_key_range(WT_SESSION_IMPL *session, * Check if the search key is smaller than the parent's starting key for * this page. */ - if (recno < leaf->key.recno) { + if (recno < leaf->ref_recno) { cbt->compare = 1; /* page keys > search key */ return (0); } @@ -48,7 +48,7 @@ __check_leaf_key_range(WT_SESSION_IMPL *session, WT_INTL_INDEX_GET(session, leaf->home, pindex); indx = leaf->pindex_hint; if (indx + 1 < pindex->entries && pindex->index[indx] == leaf) - if (recno >= pindex->index[indx + 1]->key.recno) { + if (recno >= pindex->index[indx + 1]->ref_recno) { cbt->compare = -1; /* page keys < search key */ return (0); } @@ -133,14 +133,12 @@ restart: /* if (page->type != WT_PAGE_COL_INT) break; - WT_ASSERT(session, current->key.recno == page->pg_intl_recno); - WT_INTL_INDEX_GET(session, page, pindex); base = pindex->entries; descent = pindex->index[base - 1]; /* Fast path appends. */ - if (recno >= descent->key.recno) { + if (recno >= descent->ref_recno) { /* * If on the last slot (the key is larger than any key * on the page), check for an internal page split race. @@ -158,9 +156,9 @@ restart: /* indx = base + (limit >> 1); descent = pindex->index[indx]; - if (recno == descent->key.recno) + if (recno == descent->ref_recno) break; - if (recno < descent->key.recno) + if (recno < descent->ref_recno) continue; base = indx + 1; --limit; @@ -172,7 +170,7 @@ descend: /* * (last + 1) index. The slot for descent is the one before * base. */ - if (recno != descent->key.recno) { + if (recno != descent->ref_recno) { /* * We don't have to correct for base == 0 because the * only way for base to be 0 is if recno is the page's @@ -237,13 +235,13 @@ leaf_only: * do in that case, the record may be appended to the page. */ if (page->type == WT_PAGE_COL_FIX) { - if (recno < page->pg_fix_recno) { - cbt->recno = page->pg_fix_recno; + if (recno < current->ref_recno) { + cbt->recno = current->ref_recno; cbt->compare = 1; return (0); } - if (recno >= page->pg_fix_recno + page->pg_fix_entries) { - cbt->recno = page->pg_fix_recno + page->pg_fix_entries; + if (recno >= current->ref_recno + page->pg_fix_entries) { + cbt->recno = current->ref_recno + page->pg_fix_entries; goto past_end; } else { cbt->recno = recno; @@ -251,14 +249,14 @@ leaf_only: ins_head = WT_COL_UPDATE_SINGLE(page); } } else { - if (recno < page->pg_var_recno) { - cbt->recno = page->pg_var_recno; + if (recno < current->ref_recno) { + cbt->recno = current->ref_recno; cbt->slot = 0; cbt->compare = 1; return (0); } - if ((cip = __col_var_search(page, recno, NULL)) == NULL) { - cbt->recno = __col_var_last_recno(page); + if ((cip = __col_var_search(current, recno, NULL)) == NULL) { + cbt->recno = __col_var_last_recno(current); cbt->slot = page->pg_var_entries == 0 ? 0 : page->pg_var_entries - 1; goto past_end; diff --git a/src/third_party/wiredtiger/src/btree/row_key.c b/src/third_party/wiredtiger/src/btree/row_key.c index 9fff092d079..83fd2dad9e4 100644 --- a/src/third_party/wiredtiger/src/btree/row_key.c +++ b/src/third_party/wiredtiger/src/btree/row_key.c @@ -517,7 +517,7 @@ __wt_row_ikey(WT_SESSION_IMPL *session, { uintptr_t oldv; - oldv = (uintptr_t)ref->key.ikey; + oldv = (uintptr_t)ref->ref_ikey; WT_DIAGNOSTIC_YIELD; /* @@ -527,10 +527,10 @@ __wt_row_ikey(WT_SESSION_IMPL *session, WT_ASSERT(session, oldv == 0 || (oldv & WT_IK_FLAG) != 0); WT_ASSERT(session, ref->state != WT_REF_SPLIT); WT_ASSERT(session, - __wt_atomic_cas_ptr(&ref->key.ikey, (WT_IKEY *)oldv, ikey)); + __wt_atomic_cas_ptr(&ref->ref_ikey, (WT_IKEY *)oldv, ikey)); } #else - ref->key.ikey = ikey; + ref->ref_ikey = ikey; #endif return (0); } diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c index 176016bb340..f0424ff93b4 100644 --- a/src/third_party/wiredtiger/src/btree/row_modify.c +++ b/src/third_party/wiredtiger/src/btree/row_modify.c @@ -53,6 +53,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT *ins; WT_INSERT_HEAD *ins_head, **ins_headp; WT_PAGE *page; + WT_PAGE_MODIFY *mod; WT_UPDATE *old_upd, *upd, **upd_entry; size_t ins_size, upd_size; uint32_t ins_slot; @@ -70,6 +71,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, /* If we don't yet have a modify structure, we'll need one. */ WT_RET(__wt_page_modify_init(session, page)); + mod = page->modify; /* * Modify: allocate an update array as necessary, build a WT_UPDATE @@ -83,11 +85,12 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, if (cbt->compare == 0) { if (cbt->ins == NULL) { /* Allocate an update array as necessary. */ - WT_PAGE_ALLOC_AND_SWAP(session, page, - page->pg_row_upd, upd_entry, page->pg_row_entries); + WT_PAGE_ALLOC_AND_SWAP(session, + page, mod->mod_row_update, + upd_entry, page->pg_row_entries); /* Set the WT_UPDATE array reference. */ - upd_entry = &page->pg_row_upd[cbt->slot]; + upd_entry = &mod->mod_row_update[cbt->slot]; } else upd_entry = &cbt->ins->upd; @@ -144,11 +147,11 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, * slot. That's hard, so we set a flag. */ WT_PAGE_ALLOC_AND_SWAP(session, page, - page->pg_row_ins, ins_headp, page->pg_row_entries + 1); + mod->mod_row_insert, ins_headp, page->pg_row_entries + 1); ins_slot = F_ISSET(cbt, WT_CBT_SEARCH_SMALLEST) ? page->pg_row_entries: cbt->slot; - ins_headp = &page->pg_row_ins[ins_slot]; + ins_headp = &mod->mod_row_insert[ins_slot]; /* Allocate the WT_INSERT_HEAD structure as necessary. */ WT_PAGE_ALLOC_AND_SWAP(session, page, *ins_headp, ins_head, 1); diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c index 6169a0a810a..4afcd74520f 100644 --- a/src/third_party/wiredtiger/src/btree/row_srch.c +++ b/src/third_party/wiredtiger/src/btree/row_srch.c @@ -634,14 +634,16 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_INSERT *ins, **start, **stop; WT_INSERT_HEAD *ins_head; WT_PAGE *page; + uint64_t samples; uint32_t choice, entries, i; int level; page = cbt->ref->page; - start = stop = NULL; /* [-Wconditional-uninitialized] */ entries = 0; /* [-Wconditional-uninitialized] */ + __cursor_pos_clear(cbt); + /* If the page has disk-based entries, select from them. */ if (page->pg_row_entries != 0) { cbt->compare = 0; @@ -688,7 +690,7 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) * Step down the skip list levels, selecting a random chunk of the name * space at each level. */ - while (level > 0) { + for (samples = entries; level > 0; samples += entries) { /* * There are (entries) or (entries + 1) chunks of the name space * considered at each level. They are: between start and the 1st @@ -765,6 +767,16 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) cbt->ins_head = ins_head; cbt->compare = 0; + /* + * Random lookups in newly created collections can be slow if a page + * consists of a large skiplist. Schedule the page for eviction if we + * encounter a large skiplist. This worthwhile because applications + * that take a sample often take many samples, so the overhead of + * traversing the skip list each time accumulates to real time. + */ + if (samples > 5000) + __wt_page_evict_soon(page); + return (0); } @@ -784,8 +796,6 @@ __wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) btree = S2BT(session); current = NULL; - __cursor_pos_clear(cbt); - if (0) { restart: /* * Discard the currently held page and restart the search from diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c index fd541458fa8..27c2900fa98 100644 --- a/src/third_party/wiredtiger/src/cache/cache_las.c +++ b/src/third_party/wiredtiger/src/cache/cache_las.c @@ -42,6 +42,17 @@ __wt_las_stats_update(WT_SESSION_IMPL *session) WT_STAT_SET(session, cstats, cache_lookaside_insert, v); v = WT_STAT_READ(dstats, cursor_remove); WT_STAT_SET(session, cstats, cache_lookaside_remove, v); + /* + * If we're clearing stats we need to clear the cursor values we just + * read. This does not clear the rest of the statistics in the + * lookaside data source stat cursor, but we own that namespace so we + * don't have to worry about users seeing inconsistent data source + * information. + */ + if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_CLEAR)) { + WT_STAT_SET(session, dstats, cursor_insert, 0); + WT_STAT_SET(session, dstats, cursor_remove, 0); + } } /* diff --git a/src/third_party/wiredtiger/src/checksum/checksum.c b/src/third_party/wiredtiger/src/checksum/checksum.c new file mode 100644 index 00000000000..b6a76dacfd8 --- /dev/null +++ b/src/third_party/wiredtiger/src/checksum/checksum.c @@ -0,0 +1,1329 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +/* + * Slicing-by-8 algorithm by Michael E. Kounavis and Frank L. Berry, described + * in "Novel Table Lookup-Based Algorithms for High-Performance CRC Generation", + * IEEE Transactions on Computers, Volume 57 Issue 11, November 2008. + * + * See also Peter Kanowski's posting: + * http://www.strchr.com/crc32_popcnt + * + * The big endian version calculates the same result at each step, except the + * value of the crc is byte reversed from what it would be at that step for + * little endian. + */ + +#include "wt_internal.h" + +/* + * This file contains two implementations for computing CRC: one that uses + * hardware CRC instructions, available on newer x86_64/amd64, and one that uses + * a fast software algorithm. __wt_cksum() provides a common entry point that + * indirects to one of these two methods. + */ +static uint32_t (*__wt_cksum_func)(const void *chunk, size_t len); + +/* + * The CRC slicing tables are used by __wt_cksum_sw. + */ +static const uint32_t g_crc_slicing[8][256] = { +#ifdef WORDS_BIGENDIAN + /* + * Big endian tables have entries that are byte reversed from little + * endian tables. + */ + { + 0x00000000, 0x03836bf2, 0xf7703be1, 0xf4f35013, + 0x1f979ac7, 0x1c14f135, 0xe8e7a126, 0xeb64cad4, + 0xcf58d98a, 0xccdbb278, 0x3828e26b, 0x3bab8999, + 0xd0cf434d, 0xd34c28bf, 0x27bf78ac, 0x243c135e, + 0x6fc75e10, 0x6c4435e2, 0x98b765f1, 0x9b340e03, + 0x7050c4d7, 0x73d3af25, 0x8720ff36, 0x84a394c4, + 0xa09f879a, 0xa31cec68, 0x57efbc7b, 0x546cd789, + 0xbf081d5d, 0xbc8b76af, 0x487826bc, 0x4bfb4d4e, + 0xde8ebd20, 0xdd0dd6d2, 0x29fe86c1, 0x2a7ded33, + 0xc11927e7, 0xc29a4c15, 0x36691c06, 0x35ea77f4, + 0x11d664aa, 0x12550f58, 0xe6a65f4b, 0xe52534b9, + 0x0e41fe6d, 0x0dc2959f, 0xf931c58c, 0xfab2ae7e, + 0xb149e330, 0xb2ca88c2, 0x4639d8d1, 0x45bab323, + 0xaede79f7, 0xad5d1205, 0x59ae4216, 0x5a2d29e4, + 0x7e113aba, 0x7d925148, 0x8961015b, 0x8ae26aa9, + 0x6186a07d, 0x6205cb8f, 0x96f69b9c, 0x9575f06e, + 0xbc1d7b41, 0xbf9e10b3, 0x4b6d40a0, 0x48ee2b52, + 0xa38ae186, 0xa0098a74, 0x54fada67, 0x5779b195, + 0x7345a2cb, 0x70c6c939, 0x8435992a, 0x87b6f2d8, + 0x6cd2380c, 0x6f5153fe, 0x9ba203ed, 0x9821681f, + 0xd3da2551, 0xd0594ea3, 0x24aa1eb0, 0x27297542, + 0xcc4dbf96, 0xcfced464, 0x3b3d8477, 0x38beef85, + 0x1c82fcdb, 0x1f019729, 0xebf2c73a, 0xe871acc8, + 0x0315661c, 0x00960dee, 0xf4655dfd, 0xf7e6360f, + 0x6293c661, 0x6110ad93, 0x95e3fd80, 0x96609672, + 0x7d045ca6, 0x7e873754, 0x8a746747, 0x89f70cb5, + 0xadcb1feb, 0xae487419, 0x5abb240a, 0x59384ff8, + 0xb25c852c, 0xb1dfeede, 0x452cbecd, 0x46afd53f, + 0x0d549871, 0x0ed7f383, 0xfa24a390, 0xf9a7c862, + 0x12c302b6, 0x11406944, 0xe5b33957, 0xe63052a5, + 0xc20c41fb, 0xc18f2a09, 0x357c7a1a, 0x36ff11e8, + 0xdd9bdb3c, 0xde18b0ce, 0x2aebe0dd, 0x29688b2f, + 0x783bf682, 0x7bb89d70, 0x8f4bcd63, 0x8cc8a691, + 0x67ac6c45, 0x642f07b7, 0x90dc57a4, 0x935f3c56, + 0xb7632f08, 0xb4e044fa, 0x401314e9, 0x43907f1b, + 0xa8f4b5cf, 0xab77de3d, 0x5f848e2e, 0x5c07e5dc, + 0x17fca892, 0x147fc360, 0xe08c9373, 0xe30ff881, + 0x086b3255, 0x0be859a7, 0xff1b09b4, 0xfc986246, + 0xd8a47118, 0xdb271aea, 0x2fd44af9, 0x2c57210b, + 0xc733ebdf, 0xc4b0802d, 0x3043d03e, 0x33c0bbcc, + 0xa6b54ba2, 0xa5362050, 0x51c57043, 0x52461bb1, + 0xb922d165, 0xbaa1ba97, 0x4e52ea84, 0x4dd18176, + 0x69ed9228, 0x6a6ef9da, 0x9e9da9c9, 0x9d1ec23b, + 0x767a08ef, 0x75f9631d, 0x810a330e, 0x828958fc, + 0xc97215b2, 0xcaf17e40, 0x3e022e53, 0x3d8145a1, + 0xd6e58f75, 0xd566e487, 0x2195b494, 0x2216df66, + 0x062acc38, 0x05a9a7ca, 0xf15af7d9, 0xf2d99c2b, + 0x19bd56ff, 0x1a3e3d0d, 0xeecd6d1e, 0xed4e06ec, + 0xc4268dc3, 0xc7a5e631, 0x3356b622, 0x30d5ddd0, + 0xdbb11704, 0xd8327cf6, 0x2cc12ce5, 0x2f424717, + 0x0b7e5449, 0x08fd3fbb, 0xfc0e6fa8, 0xff8d045a, + 0x14e9ce8e, 0x176aa57c, 0xe399f56f, 0xe01a9e9d, + 0xabe1d3d3, 0xa862b821, 0x5c91e832, 0x5f1283c0, + 0xb4764914, 0xb7f522e6, 0x430672f5, 0x40851907, + 0x64b90a59, 0x673a61ab, 0x93c931b8, 0x904a5a4a, + 0x7b2e909e, 0x78adfb6c, 0x8c5eab7f, 0x8fddc08d, + 0x1aa830e3, 0x192b5b11, 0xedd80b02, 0xee5b60f0, + 0x053faa24, 0x06bcc1d6, 0xf24f91c5, 0xf1ccfa37, + 0xd5f0e969, 0xd673829b, 0x2280d288, 0x2103b97a, + 0xca6773ae, 0xc9e4185c, 0x3d17484f, 0x3e9423bd, + 0x756f6ef3, 0x76ec0501, 0x821f5512, 0x819c3ee0, + 0x6af8f434, 0x697b9fc6, 0x9d88cfd5, 0x9e0ba427, + 0xba37b779, 0xb9b4dc8b, 0x4d478c98, 0x4ec4e76a, + 0xa5a02dbe, 0xa623464c, 0x52d0165f, 0x51537dad + },{ + 0x00000000, 0x7798a213, 0xee304527, 0x99a8e734, + 0xdc618a4e, 0xabf9285d, 0x3251cf69, 0x45c96d7a, + 0xb8c3149d, 0xcf5bb68e, 0x56f351ba, 0x216bf3a9, + 0x64a29ed3, 0x133a3cc0, 0x8a92dbf4, 0xfd0a79e7, + 0x81f1c53f, 0xf669672c, 0x6fc18018, 0x1859220b, + 0x5d904f71, 0x2a08ed62, 0xb3a00a56, 0xc438a845, + 0x3932d1a2, 0x4eaa73b1, 0xd7029485, 0xa09a3696, + 0xe5535bec, 0x92cbf9ff, 0x0b631ecb, 0x7cfbbcd8, + 0x02e38b7f, 0x757b296c, 0xecd3ce58, 0x9b4b6c4b, + 0xde820131, 0xa91aa322, 0x30b24416, 0x472ae605, + 0xba209fe2, 0xcdb83df1, 0x5410dac5, 0x238878d6, + 0x664115ac, 0x11d9b7bf, 0x8871508b, 0xffe9f298, + 0x83124e40, 0xf48aec53, 0x6d220b67, 0x1abaa974, + 0x5f73c40e, 0x28eb661d, 0xb1438129, 0xc6db233a, + 0x3bd15add, 0x4c49f8ce, 0xd5e11ffa, 0xa279bde9, + 0xe7b0d093, 0x90287280, 0x098095b4, 0x7e1837a7, + 0x04c617ff, 0x735eb5ec, 0xeaf652d8, 0x9d6ef0cb, + 0xd8a79db1, 0xaf3f3fa2, 0x3697d896, 0x410f7a85, + 0xbc050362, 0xcb9da171, 0x52354645, 0x25ade456, + 0x6064892c, 0x17fc2b3f, 0x8e54cc0b, 0xf9cc6e18, + 0x8537d2c0, 0xf2af70d3, 0x6b0797e7, 0x1c9f35f4, + 0x5956588e, 0x2ecefa9d, 0xb7661da9, 0xc0febfba, + 0x3df4c65d, 0x4a6c644e, 0xd3c4837a, 0xa45c2169, + 0xe1954c13, 0x960dee00, 0x0fa50934, 0x783dab27, + 0x06259c80, 0x71bd3e93, 0xe815d9a7, 0x9f8d7bb4, + 0xda4416ce, 0xaddcb4dd, 0x347453e9, 0x43ecf1fa, + 0xbee6881d, 0xc97e2a0e, 0x50d6cd3a, 0x274e6f29, + 0x62870253, 0x151fa040, 0x8cb74774, 0xfb2fe567, + 0x87d459bf, 0xf04cfbac, 0x69e41c98, 0x1e7cbe8b, + 0x5bb5d3f1, 0x2c2d71e2, 0xb58596d6, 0xc21d34c5, + 0x3f174d22, 0x488fef31, 0xd1270805, 0xa6bfaa16, + 0xe376c76c, 0x94ee657f, 0x0d46824b, 0x7ade2058, + 0xf9fac3fb, 0x8e6261e8, 0x17ca86dc, 0x605224cf, + 0x259b49b5, 0x5203eba6, 0xcbab0c92, 0xbc33ae81, + 0x4139d766, 0x36a17575, 0xaf099241, 0xd8913052, + 0x9d585d28, 0xeac0ff3b, 0x7368180f, 0x04f0ba1c, + 0x780b06c4, 0x0f93a4d7, 0x963b43e3, 0xe1a3e1f0, + 0xa46a8c8a, 0xd3f22e99, 0x4a5ac9ad, 0x3dc26bbe, + 0xc0c81259, 0xb750b04a, 0x2ef8577e, 0x5960f56d, + 0x1ca99817, 0x6b313a04, 0xf299dd30, 0x85017f23, + 0xfb194884, 0x8c81ea97, 0x15290da3, 0x62b1afb0, + 0x2778c2ca, 0x50e060d9, 0xc94887ed, 0xbed025fe, + 0x43da5c19, 0x3442fe0a, 0xadea193e, 0xda72bb2d, + 0x9fbbd657, 0xe8237444, 0x718b9370, 0x06133163, + 0x7ae88dbb, 0x0d702fa8, 0x94d8c89c, 0xe3406a8f, + 0xa68907f5, 0xd111a5e6, 0x48b942d2, 0x3f21e0c1, + 0xc22b9926, 0xb5b33b35, 0x2c1bdc01, 0x5b837e12, + 0x1e4a1368, 0x69d2b17b, 0xf07a564f, 0x87e2f45c, + 0xfd3cd404, 0x8aa47617, 0x130c9123, 0x64943330, + 0x215d5e4a, 0x56c5fc59, 0xcf6d1b6d, 0xb8f5b97e, + 0x45ffc099, 0x3267628a, 0xabcf85be, 0xdc5727ad, + 0x999e4ad7, 0xee06e8c4, 0x77ae0ff0, 0x0036ade3, + 0x7ccd113b, 0x0b55b328, 0x92fd541c, 0xe565f60f, + 0xa0ac9b75, 0xd7343966, 0x4e9cde52, 0x39047c41, + 0xc40e05a6, 0xb396a7b5, 0x2a3e4081, 0x5da6e292, + 0x186f8fe8, 0x6ff72dfb, 0xf65fcacf, 0x81c768dc, + 0xffdf5f7b, 0x8847fd68, 0x11ef1a5c, 0x6677b84f, + 0x23bed535, 0x54267726, 0xcd8e9012, 0xba163201, + 0x471c4be6, 0x3084e9f5, 0xa92c0ec1, 0xdeb4acd2, + 0x9b7dc1a8, 0xece563bb, 0x754d848f, 0x02d5269c, + 0x7e2e9a44, 0x09b63857, 0x901edf63, 0xe7867d70, + 0xa24f100a, 0xd5d7b219, 0x4c7f552d, 0x3be7f73e, + 0xc6ed8ed9, 0xb1752cca, 0x28ddcbfe, 0x5f4569ed, + 0x1a8c0497, 0x6d14a684, 0xf4bc41b0, 0x8324e3a3 + },{ + 0x00000000, 0x7e9241a5, 0x0d526f4f, 0x73c02eea, + 0x1aa4de9e, 0x64369f3b, 0x17f6b1d1, 0x6964f074, + 0xc53e5138, 0xbbac109d, 0xc86c3e77, 0xb6fe7fd2, + 0xdf9a8fa6, 0xa108ce03, 0xd2c8e0e9, 0xac5aa14c, + 0x8a7da270, 0xf4efe3d5, 0x872fcd3f, 0xf9bd8c9a, + 0x90d97cee, 0xee4b3d4b, 0x9d8b13a1, 0xe3195204, + 0x4f43f348, 0x31d1b2ed, 0x42119c07, 0x3c83dda2, + 0x55e72dd6, 0x2b756c73, 0x58b54299, 0x2627033c, + 0x14fb44e1, 0x6a690544, 0x19a92bae, 0x673b6a0b, + 0x0e5f9a7f, 0x70cddbda, 0x030df530, 0x7d9fb495, + 0xd1c515d9, 0xaf57547c, 0xdc977a96, 0xa2053b33, + 0xcb61cb47, 0xb5f38ae2, 0xc633a408, 0xb8a1e5ad, + 0x9e86e691, 0xe014a734, 0x93d489de, 0xed46c87b, + 0x8422380f, 0xfab079aa, 0x89705740, 0xf7e216e5, + 0x5bb8b7a9, 0x252af60c, 0x56ead8e6, 0x28789943, + 0x411c6937, 0x3f8e2892, 0x4c4e0678, 0x32dc47dd, + 0xd98065c7, 0xa7122462, 0xd4d20a88, 0xaa404b2d, + 0xc324bb59, 0xbdb6fafc, 0xce76d416, 0xb0e495b3, + 0x1cbe34ff, 0x622c755a, 0x11ec5bb0, 0x6f7e1a15, + 0x061aea61, 0x7888abc4, 0x0b48852e, 0x75dac48b, + 0x53fdc7b7, 0x2d6f8612, 0x5eafa8f8, 0x203de95d, + 0x49591929, 0x37cb588c, 0x440b7666, 0x3a9937c3, + 0x96c3968f, 0xe851d72a, 0x9b91f9c0, 0xe503b865, + 0x8c674811, 0xf2f509b4, 0x8135275e, 0xffa766fb, + 0xcd7b2126, 0xb3e96083, 0xc0294e69, 0xbebb0fcc, + 0xd7dfffb8, 0xa94dbe1d, 0xda8d90f7, 0xa41fd152, + 0x0845701e, 0x76d731bb, 0x05171f51, 0x7b855ef4, + 0x12e1ae80, 0x6c73ef25, 0x1fb3c1cf, 0x6121806a, + 0x47068356, 0x3994c2f3, 0x4a54ec19, 0x34c6adbc, + 0x5da25dc8, 0x23301c6d, 0x50f03287, 0x2e627322, + 0x8238d26e, 0xfcaa93cb, 0x8f6abd21, 0xf1f8fc84, + 0x989c0cf0, 0xe60e4d55, 0x95ce63bf, 0xeb5c221a, + 0x4377278b, 0x3de5662e, 0x4e2548c4, 0x30b70961, + 0x59d3f915, 0x2741b8b0, 0x5481965a, 0x2a13d7ff, + 0x864976b3, 0xf8db3716, 0x8b1b19fc, 0xf5895859, + 0x9ceda82d, 0xe27fe988, 0x91bfc762, 0xef2d86c7, + 0xc90a85fb, 0xb798c45e, 0xc458eab4, 0xbacaab11, + 0xd3ae5b65, 0xad3c1ac0, 0xdefc342a, 0xa06e758f, + 0x0c34d4c3, 0x72a69566, 0x0166bb8c, 0x7ff4fa29, + 0x16900a5d, 0x68024bf8, 0x1bc26512, 0x655024b7, + 0x578c636a, 0x291e22cf, 0x5ade0c25, 0x244c4d80, + 0x4d28bdf4, 0x33bafc51, 0x407ad2bb, 0x3ee8931e, + 0x92b23252, 0xec2073f7, 0x9fe05d1d, 0xe1721cb8, + 0x8816eccc, 0xf684ad69, 0x85448383, 0xfbd6c226, + 0xddf1c11a, 0xa36380bf, 0xd0a3ae55, 0xae31eff0, + 0xc7551f84, 0xb9c75e21, 0xca0770cb, 0xb495316e, + 0x18cf9022, 0x665dd187, 0x159dff6d, 0x6b0fbec8, + 0x026b4ebc, 0x7cf90f19, 0x0f3921f3, 0x71ab6056, + 0x9af7424c, 0xe46503e9, 0x97a52d03, 0xe9376ca6, + 0x80539cd2, 0xfec1dd77, 0x8d01f39d, 0xf393b238, + 0x5fc91374, 0x215b52d1, 0x529b7c3b, 0x2c093d9e, + 0x456dcdea, 0x3bff8c4f, 0x483fa2a5, 0x36ade300, + 0x108ae03c, 0x6e18a199, 0x1dd88f73, 0x634aced6, + 0x0a2e3ea2, 0x74bc7f07, 0x077c51ed, 0x79ee1048, + 0xd5b4b104, 0xab26f0a1, 0xd8e6de4b, 0xa6749fee, + 0xcf106f9a, 0xb1822e3f, 0xc24200d5, 0xbcd04170, + 0x8e0c06ad, 0xf09e4708, 0x835e69e2, 0xfdcc2847, + 0x94a8d833, 0xea3a9996, 0x99fab77c, 0xe768f6d9, + 0x4b325795, 0x35a01630, 0x466038da, 0x38f2797f, + 0x5196890b, 0x2f04c8ae, 0x5cc4e644, 0x2256a7e1, + 0x0471a4dd, 0x7ae3e578, 0x0923cb92, 0x77b18a37, + 0x1ed57a43, 0x60473be6, 0x1387150c, 0x6d1554a9, + 0xc14ff5e5, 0xbfddb440, 0xcc1d9aaa, 0xb28fdb0f, + 0xdbeb2b7b, 0xa5796ade, 0xd6b94434, 0xa82b0591 + },{ + 0x00000000, 0xb8aa45dd, 0x812367bf, 0x39892262, + 0xf331227b, 0x4b9b67a6, 0x721245c4, 0xcab80019, + 0xe66344f6, 0x5ec9012b, 0x67402349, 0xdfea6694, + 0x1552668d, 0xadf82350, 0x94710132, 0x2cdb44ef, + 0x3db164e9, 0x851b2134, 0xbc920356, 0x0438468b, + 0xce804692, 0x762a034f, 0x4fa3212d, 0xf70964f0, + 0xdbd2201f, 0x637865c2, 0x5af147a0, 0xe25b027d, + 0x28e30264, 0x904947b9, 0xa9c065db, 0x116a2006, + 0x8b1425d7, 0x33be600a, 0x0a374268, 0xb29d07b5, + 0x782507ac, 0xc08f4271, 0xf9066013, 0x41ac25ce, + 0x6d776121, 0xd5dd24fc, 0xec54069e, 0x54fe4343, + 0x9e46435a, 0x26ec0687, 0x1f6524e5, 0xa7cf6138, + 0xb6a5413e, 0x0e0f04e3, 0x37862681, 0x8f2c635c, + 0x45946345, 0xfd3e2698, 0xc4b704fa, 0x7c1d4127, + 0x50c605c8, 0xe86c4015, 0xd1e56277, 0x694f27aa, + 0xa3f727b3, 0x1b5d626e, 0x22d4400c, 0x9a7e05d1, + 0xe75fa6ab, 0x5ff5e376, 0x667cc114, 0xded684c9, + 0x146e84d0, 0xacc4c10d, 0x954de36f, 0x2de7a6b2, + 0x013ce25d, 0xb996a780, 0x801f85e2, 0x38b5c03f, + 0xf20dc026, 0x4aa785fb, 0x732ea799, 0xcb84e244, + 0xdaeec242, 0x6244879f, 0x5bcda5fd, 0xe367e020, + 0x29dfe039, 0x9175a5e4, 0xa8fc8786, 0x1056c25b, + 0x3c8d86b4, 0x8427c369, 0xbdaee10b, 0x0504a4d6, + 0xcfbca4cf, 0x7716e112, 0x4e9fc370, 0xf63586ad, + 0x6c4b837c, 0xd4e1c6a1, 0xed68e4c3, 0x55c2a11e, + 0x9f7aa107, 0x27d0e4da, 0x1e59c6b8, 0xa6f38365, + 0x8a28c78a, 0x32828257, 0x0b0ba035, 0xb3a1e5e8, + 0x7919e5f1, 0xc1b3a02c, 0xf83a824e, 0x4090c793, + 0x51fae795, 0xe950a248, 0xd0d9802a, 0x6873c5f7, + 0xa2cbc5ee, 0x1a618033, 0x23e8a251, 0x9b42e78c, + 0xb799a363, 0x0f33e6be, 0x36bac4dc, 0x8e108101, + 0x44a88118, 0xfc02c4c5, 0xc58be6a7, 0x7d21a37a, + 0x3fc9a052, 0x8763e58f, 0xbeeac7ed, 0x06408230, + 0xccf88229, 0x7452c7f4, 0x4ddbe596, 0xf571a04b, + 0xd9aae4a4, 0x6100a179, 0x5889831b, 0xe023c6c6, + 0x2a9bc6df, 0x92318302, 0xabb8a160, 0x1312e4bd, + 0x0278c4bb, 0xbad28166, 0x835ba304, 0x3bf1e6d9, + 0xf149e6c0, 0x49e3a31d, 0x706a817f, 0xc8c0c4a2, + 0xe41b804d, 0x5cb1c590, 0x6538e7f2, 0xdd92a22f, + 0x172aa236, 0xaf80e7eb, 0x9609c589, 0x2ea38054, + 0xb4dd8585, 0x0c77c058, 0x35fee23a, 0x8d54a7e7, + 0x47eca7fe, 0xff46e223, 0xc6cfc041, 0x7e65859c, + 0x52bec173, 0xea1484ae, 0xd39da6cc, 0x6b37e311, + 0xa18fe308, 0x1925a6d5, 0x20ac84b7, 0x9806c16a, + 0x896ce16c, 0x31c6a4b1, 0x084f86d3, 0xb0e5c30e, + 0x7a5dc317, 0xc2f786ca, 0xfb7ea4a8, 0x43d4e175, + 0x6f0fa59a, 0xd7a5e047, 0xee2cc225, 0x568687f8, + 0x9c3e87e1, 0x2494c23c, 0x1d1de05e, 0xa5b7a583, + 0xd89606f9, 0x603c4324, 0x59b56146, 0xe11f249b, + 0x2ba72482, 0x930d615f, 0xaa84433d, 0x122e06e0, + 0x3ef5420f, 0x865f07d2, 0xbfd625b0, 0x077c606d, + 0xcdc46074, 0x756e25a9, 0x4ce707cb, 0xf44d4216, + 0xe5276210, 0x5d8d27cd, 0x640405af, 0xdcae4072, + 0x1616406b, 0xaebc05b6, 0x973527d4, 0x2f9f6209, + 0x034426e6, 0xbbee633b, 0x82674159, 0x3acd0484, + 0xf075049d, 0x48df4140, 0x71566322, 0xc9fc26ff, + 0x5382232e, 0xeb2866f3, 0xd2a14491, 0x6a0b014c, + 0xa0b30155, 0x18194488, 0x219066ea, 0x993a2337, + 0xb5e167d8, 0x0d4b2205, 0x34c20067, 0x8c6845ba, + 0x46d045a3, 0xfe7a007e, 0xc7f3221c, 0x7f5967c1, + 0x6e3347c7, 0xd699021a, 0xef102078, 0x57ba65a5, + 0x9d0265bc, 0x25a82061, 0x1c210203, 0xa48b47de, + 0x88500331, 0x30fa46ec, 0x0973648e, 0xb1d92153, + 0x7b61214a, 0xc3cb6497, 0xfa4246f5, 0x42e80328 + },{ + 0x00000000, 0xac6f1138, 0x58df2270, 0xf4b03348, + 0xb0be45e0, 0x1cd154d8, 0xe8616790, 0x440e76a8, + 0x910b67c5, 0x3d6476fd, 0xc9d445b5, 0x65bb548d, + 0x21b52225, 0x8dda331d, 0x796a0055, 0xd505116d, + 0xd361228f, 0x7f0e33b7, 0x8bbe00ff, 0x27d111c7, + 0x63df676f, 0xcfb07657, 0x3b00451f, 0x976f5427, + 0x426a454a, 0xee055472, 0x1ab5673a, 0xb6da7602, + 0xf2d400aa, 0x5ebb1192, 0xaa0b22da, 0x066433e2, + 0x57b5a81b, 0xfbdab923, 0x0f6a8a6b, 0xa3059b53, + 0xe70bedfb, 0x4b64fcc3, 0xbfd4cf8b, 0x13bbdeb3, + 0xc6becfde, 0x6ad1dee6, 0x9e61edae, 0x320efc96, + 0x76008a3e, 0xda6f9b06, 0x2edfa84e, 0x82b0b976, + 0x84d48a94, 0x28bb9bac, 0xdc0ba8e4, 0x7064b9dc, + 0x346acf74, 0x9805de4c, 0x6cb5ed04, 0xc0dafc3c, + 0x15dfed51, 0xb9b0fc69, 0x4d00cf21, 0xe16fde19, + 0xa561a8b1, 0x090eb989, 0xfdbe8ac1, 0x51d19bf9, + 0xae6a5137, 0x0205400f, 0xf6b57347, 0x5ada627f, + 0x1ed414d7, 0xb2bb05ef, 0x460b36a7, 0xea64279f, + 0x3f6136f2, 0x930e27ca, 0x67be1482, 0xcbd105ba, + 0x8fdf7312, 0x23b0622a, 0xd7005162, 0x7b6f405a, + 0x7d0b73b8, 0xd1646280, 0x25d451c8, 0x89bb40f0, + 0xcdb53658, 0x61da2760, 0x956a1428, 0x39050510, + 0xec00147d, 0x406f0545, 0xb4df360d, 0x18b02735, + 0x5cbe519d, 0xf0d140a5, 0x046173ed, 0xa80e62d5, + 0xf9dff92c, 0x55b0e814, 0xa100db5c, 0x0d6fca64, + 0x4961bccc, 0xe50eadf4, 0x11be9ebc, 0xbdd18f84, + 0x68d49ee9, 0xc4bb8fd1, 0x300bbc99, 0x9c64ada1, + 0xd86adb09, 0x7405ca31, 0x80b5f979, 0x2cdae841, + 0x2abedba3, 0x86d1ca9b, 0x7261f9d3, 0xde0ee8eb, + 0x9a009e43, 0x366f8f7b, 0xc2dfbc33, 0x6eb0ad0b, + 0xbbb5bc66, 0x17daad5e, 0xe36a9e16, 0x4f058f2e, + 0x0b0bf986, 0xa764e8be, 0x53d4dbf6, 0xffbbcace, + 0x5cd5a26e, 0xf0bab356, 0x040a801e, 0xa8659126, + 0xec6be78e, 0x4004f6b6, 0xb4b4c5fe, 0x18dbd4c6, + 0xcddec5ab, 0x61b1d493, 0x9501e7db, 0x396ef6e3, + 0x7d60804b, 0xd10f9173, 0x25bfa23b, 0x89d0b303, + 0x8fb480e1, 0x23db91d9, 0xd76ba291, 0x7b04b3a9, + 0x3f0ac501, 0x9365d439, 0x67d5e771, 0xcbbaf649, + 0x1ebfe724, 0xb2d0f61c, 0x4660c554, 0xea0fd46c, + 0xae01a2c4, 0x026eb3fc, 0xf6de80b4, 0x5ab1918c, + 0x0b600a75, 0xa70f1b4d, 0x53bf2805, 0xffd0393d, + 0xbbde4f95, 0x17b15ead, 0xe3016de5, 0x4f6e7cdd, + 0x9a6b6db0, 0x36047c88, 0xc2b44fc0, 0x6edb5ef8, + 0x2ad52850, 0x86ba3968, 0x720a0a20, 0xde651b18, + 0xd80128fa, 0x746e39c2, 0x80de0a8a, 0x2cb11bb2, + 0x68bf6d1a, 0xc4d07c22, 0x30604f6a, 0x9c0f5e52, + 0x490a4f3f, 0xe5655e07, 0x11d56d4f, 0xbdba7c77, + 0xf9b40adf, 0x55db1be7, 0xa16b28af, 0x0d043997, + 0xf2bff359, 0x5ed0e261, 0xaa60d129, 0x060fc011, + 0x4201b6b9, 0xee6ea781, 0x1ade94c9, 0xb6b185f1, + 0x63b4949c, 0xcfdb85a4, 0x3b6bb6ec, 0x9704a7d4, + 0xd30ad17c, 0x7f65c044, 0x8bd5f30c, 0x27bae234, + 0x21ded1d6, 0x8db1c0ee, 0x7901f3a6, 0xd56ee29e, + 0x91609436, 0x3d0f850e, 0xc9bfb646, 0x65d0a77e, + 0xb0d5b613, 0x1cbaa72b, 0xe80a9463, 0x4465855b, + 0x006bf3f3, 0xac04e2cb, 0x58b4d183, 0xf4dbc0bb, + 0xa50a5b42, 0x09654a7a, 0xfdd57932, 0x51ba680a, + 0x15b41ea2, 0xb9db0f9a, 0x4d6b3cd2, 0xe1042dea, + 0x34013c87, 0x986e2dbf, 0x6cde1ef7, 0xc0b10fcf, + 0x84bf7967, 0x28d0685f, 0xdc605b17, 0x700f4a2f, + 0x766b79cd, 0xda0468f5, 0x2eb45bbd, 0x82db4a85, + 0xc6d53c2d, 0x6aba2d15, 0x9e0a1e5d, 0x32650f65, + 0xe7601e08, 0x4b0f0f30, 0xbfbf3c78, 0x13d02d40, + 0x57de5be8, 0xfbb14ad0, 0x0f017998, 0xa36e68a0 + },{ + 0x00000000, 0x196b30ef, 0xc3a08cdb, 0xdacbbc34, + 0x7737f5b2, 0x6e5cc55d, 0xb4977969, 0xadfc4986, + 0x1f180660, 0x0673368f, 0xdcb88abb, 0xc5d3ba54, + 0x682ff3d2, 0x7144c33d, 0xab8f7f09, 0xb2e44fe6, + 0x3e300cc0, 0x275b3c2f, 0xfd90801b, 0xe4fbb0f4, + 0x4907f972, 0x506cc99d, 0x8aa775a9, 0x93cc4546, + 0x21280aa0, 0x38433a4f, 0xe288867b, 0xfbe3b694, + 0x561fff12, 0x4f74cffd, 0x95bf73c9, 0x8cd44326, + 0x8d16f485, 0x947dc46a, 0x4eb6785e, 0x57dd48b1, + 0xfa210137, 0xe34a31d8, 0x39818dec, 0x20eabd03, + 0x920ef2e5, 0x8b65c20a, 0x51ae7e3e, 0x48c54ed1, + 0xe5390757, 0xfc5237b8, 0x26998b8c, 0x3ff2bb63, + 0xb326f845, 0xaa4dc8aa, 0x7086749e, 0x69ed4471, + 0xc4110df7, 0xdd7a3d18, 0x07b1812c, 0x1edab1c3, + 0xac3efe25, 0xb555ceca, 0x6f9e72fe, 0x76f54211, + 0xdb090b97, 0xc2623b78, 0x18a9874c, 0x01c2b7a3, + 0xeb5b040e, 0xf23034e1, 0x28fb88d5, 0x3190b83a, + 0x9c6cf1bc, 0x8507c153, 0x5fcc7d67, 0x46a74d88, + 0xf443026e, 0xed283281, 0x37e38eb5, 0x2e88be5a, + 0x8374f7dc, 0x9a1fc733, 0x40d47b07, 0x59bf4be8, + 0xd56b08ce, 0xcc003821, 0x16cb8415, 0x0fa0b4fa, + 0xa25cfd7c, 0xbb37cd93, 0x61fc71a7, 0x78974148, + 0xca730eae, 0xd3183e41, 0x09d38275, 0x10b8b29a, + 0xbd44fb1c, 0xa42fcbf3, 0x7ee477c7, 0x678f4728, + 0x664df08b, 0x7f26c064, 0xa5ed7c50, 0xbc864cbf, + 0x117a0539, 0x081135d6, 0xd2da89e2, 0xcbb1b90d, + 0x7955f6eb, 0x603ec604, 0xbaf57a30, 0xa39e4adf, + 0x0e620359, 0x170933b6, 0xcdc28f82, 0xd4a9bf6d, + 0x587dfc4b, 0x4116cca4, 0x9bdd7090, 0x82b6407f, + 0x2f4a09f9, 0x36213916, 0xecea8522, 0xf581b5cd, + 0x4765fa2b, 0x5e0ecac4, 0x84c576f0, 0x9dae461f, + 0x30520f99, 0x29393f76, 0xf3f28342, 0xea99b3ad, + 0xd6b7081c, 0xcfdc38f3, 0x151784c7, 0x0c7cb428, + 0xa180fdae, 0xb8ebcd41, 0x62207175, 0x7b4b419a, + 0xc9af0e7c, 0xd0c43e93, 0x0a0f82a7, 0x1364b248, + 0xbe98fbce, 0xa7f3cb21, 0x7d387715, 0x645347fa, + 0xe88704dc, 0xf1ec3433, 0x2b278807, 0x324cb8e8, + 0x9fb0f16e, 0x86dbc181, 0x5c107db5, 0x457b4d5a, + 0xf79f02bc, 0xeef43253, 0x343f8e67, 0x2d54be88, + 0x80a8f70e, 0x99c3c7e1, 0x43087bd5, 0x5a634b3a, + 0x5ba1fc99, 0x42cacc76, 0x98017042, 0x816a40ad, + 0x2c96092b, 0x35fd39c4, 0xef3685f0, 0xf65db51f, + 0x44b9faf9, 0x5dd2ca16, 0x87197622, 0x9e7246cd, + 0x338e0f4b, 0x2ae53fa4, 0xf02e8390, 0xe945b37f, + 0x6591f059, 0x7cfac0b6, 0xa6317c82, 0xbf5a4c6d, + 0x12a605eb, 0x0bcd3504, 0xd1068930, 0xc86db9df, + 0x7a89f639, 0x63e2c6d6, 0xb9297ae2, 0xa0424a0d, + 0x0dbe038b, 0x14d53364, 0xce1e8f50, 0xd775bfbf, + 0x3dec0c12, 0x24873cfd, 0xfe4c80c9, 0xe727b026, + 0x4adbf9a0, 0x53b0c94f, 0x897b757b, 0x90104594, + 0x22f40a72, 0x3b9f3a9d, 0xe15486a9, 0xf83fb646, + 0x55c3ffc0, 0x4ca8cf2f, 0x9663731b, 0x8f0843f4, + 0x03dc00d2, 0x1ab7303d, 0xc07c8c09, 0xd917bce6, + 0x74ebf560, 0x6d80c58f, 0xb74b79bb, 0xae204954, + 0x1cc406b2, 0x05af365d, 0xdf648a69, 0xc60fba86, + 0x6bf3f300, 0x7298c3ef, 0xa8537fdb, 0xb1384f34, + 0xb0faf897, 0xa991c878, 0x735a744c, 0x6a3144a3, + 0xc7cd0d25, 0xdea63dca, 0x046d81fe, 0x1d06b111, + 0xafe2fef7, 0xb689ce18, 0x6c42722c, 0x752942c3, + 0xd8d50b45, 0xc1be3baa, 0x1b75879e, 0x021eb771, + 0x8ecaf457, 0x97a1c4b8, 0x4d6a788c, 0x54014863, + 0xf9fd01e5, 0xe096310a, 0x3a5d8d3e, 0x2336bdd1, + 0x91d2f237, 0x88b9c2d8, 0x52727eec, 0x4b194e03, + 0xe6e50785, 0xff8e376a, 0x25458b5e, 0x3c2ebbb1 + },{ + 0x00000000, 0xc82c0368, 0x905906d0, 0x587505b8, + 0xd1c5e0a5, 0x19e9e3cd, 0x419ce675, 0x89b0e51d, + 0x53fd2d4e, 0x9bd12e26, 0xc3a42b9e, 0x0b8828f6, + 0x8238cdeb, 0x4a14ce83, 0x1261cb3b, 0xda4dc853, + 0xa6fa5b9c, 0x6ed658f4, 0x36a35d4c, 0xfe8f5e24, + 0x773fbb39, 0xbf13b851, 0xe766bde9, 0x2f4abe81, + 0xf50776d2, 0x3d2b75ba, 0x655e7002, 0xad72736a, + 0x24c29677, 0xecee951f, 0xb49b90a7, 0x7cb793cf, + 0xbd835b3d, 0x75af5855, 0x2dda5ded, 0xe5f65e85, + 0x6c46bb98, 0xa46ab8f0, 0xfc1fbd48, 0x3433be20, + 0xee7e7673, 0x2652751b, 0x7e2770a3, 0xb60b73cb, + 0x3fbb96d6, 0xf79795be, 0xafe29006, 0x67ce936e, + 0x1b7900a1, 0xd35503c9, 0x8b200671, 0x430c0519, + 0xcabce004, 0x0290e36c, 0x5ae5e6d4, 0x92c9e5bc, + 0x48842def, 0x80a82e87, 0xd8dd2b3f, 0x10f12857, + 0x9941cd4a, 0x516dce22, 0x0918cb9a, 0xc134c8f2, + 0x7a07b77a, 0xb22bb412, 0xea5eb1aa, 0x2272b2c2, + 0xabc257df, 0x63ee54b7, 0x3b9b510f, 0xf3b75267, + 0x29fa9a34, 0xe1d6995c, 0xb9a39ce4, 0x718f9f8c, + 0xf83f7a91, 0x301379f9, 0x68667c41, 0xa04a7f29, + 0xdcfdece6, 0x14d1ef8e, 0x4ca4ea36, 0x8488e95e, + 0x0d380c43, 0xc5140f2b, 0x9d610a93, 0x554d09fb, + 0x8f00c1a8, 0x472cc2c0, 0x1f59c778, 0xd775c410, + 0x5ec5210d, 0x96e92265, 0xce9c27dd, 0x06b024b5, + 0xc784ec47, 0x0fa8ef2f, 0x57ddea97, 0x9ff1e9ff, + 0x16410ce2, 0xde6d0f8a, 0x86180a32, 0x4e34095a, + 0x9479c109, 0x5c55c261, 0x0420c7d9, 0xcc0cc4b1, + 0x45bc21ac, 0x8d9022c4, 0xd5e5277c, 0x1dc92414, + 0x617eb7db, 0xa952b4b3, 0xf127b10b, 0x390bb263, + 0xb0bb577e, 0x78975416, 0x20e251ae, 0xe8ce52c6, + 0x32839a95, 0xfaaf99fd, 0xa2da9c45, 0x6af69f2d, + 0xe3467a30, 0x2b6a7958, 0x731f7ce0, 0xbb337f88, + 0xf40e6ef5, 0x3c226d9d, 0x64576825, 0xac7b6b4d, + 0x25cb8e50, 0xede78d38, 0xb5928880, 0x7dbe8be8, + 0xa7f343bb, 0x6fdf40d3, 0x37aa456b, 0xff864603, + 0x7636a31e, 0xbe1aa076, 0xe66fa5ce, 0x2e43a6a6, + 0x52f43569, 0x9ad83601, 0xc2ad33b9, 0x0a8130d1, + 0x8331d5cc, 0x4b1dd6a4, 0x1368d31c, 0xdb44d074, + 0x01091827, 0xc9251b4f, 0x91501ef7, 0x597c1d9f, + 0xd0ccf882, 0x18e0fbea, 0x4095fe52, 0x88b9fd3a, + 0x498d35c8, 0x81a136a0, 0xd9d43318, 0x11f83070, + 0x9848d56d, 0x5064d605, 0x0811d3bd, 0xc03dd0d5, + 0x1a701886, 0xd25c1bee, 0x8a291e56, 0x42051d3e, + 0xcbb5f823, 0x0399fb4b, 0x5becfef3, 0x93c0fd9b, + 0xef776e54, 0x275b6d3c, 0x7f2e6884, 0xb7026bec, + 0x3eb28ef1, 0xf69e8d99, 0xaeeb8821, 0x66c78b49, + 0xbc8a431a, 0x74a64072, 0x2cd345ca, 0xe4ff46a2, + 0x6d4fa3bf, 0xa563a0d7, 0xfd16a56f, 0x353aa607, + 0x8e09d98f, 0x4625dae7, 0x1e50df5f, 0xd67cdc37, + 0x5fcc392a, 0x97e03a42, 0xcf953ffa, 0x07b93c92, + 0xddf4f4c1, 0x15d8f7a9, 0x4dadf211, 0x8581f179, + 0x0c311464, 0xc41d170c, 0x9c6812b4, 0x544411dc, + 0x28f38213, 0xe0df817b, 0xb8aa84c3, 0x708687ab, + 0xf93662b6, 0x311a61de, 0x696f6466, 0xa143670e, + 0x7b0eaf5d, 0xb322ac35, 0xeb57a98d, 0x237baae5, + 0xaacb4ff8, 0x62e74c90, 0x3a924928, 0xf2be4a40, + 0x338a82b2, 0xfba681da, 0xa3d38462, 0x6bff870a, + 0xe24f6217, 0x2a63617f, 0x721664c7, 0xba3a67af, + 0x6077affc, 0xa85bac94, 0xf02ea92c, 0x3802aa44, + 0xb1b24f59, 0x799e4c31, 0x21eb4989, 0xe9c74ae1, + 0x9570d92e, 0x5d5cda46, 0x0529dffe, 0xcd05dc96, + 0x44b5398b, 0x8c993ae3, 0xd4ec3f5b, 0x1cc03c33, + 0xc68df460, 0x0ea1f708, 0x56d4f2b0, 0x9ef8f1d8, + 0x174814c5, 0xdf6417ad, 0x87111215, 0x4f3d117d + },{ + 0x00000000, 0x277d3c49, 0x4efa7892, 0x698744db, + 0x6d821d21, 0x4aff2168, 0x237865b3, 0x040559fa, + 0xda043b42, 0xfd79070b, 0x94fe43d0, 0xb3837f99, + 0xb7862663, 0x90fb1a2a, 0xf97c5ef1, 0xde0162b8, + 0xb4097684, 0x93744acd, 0xfaf30e16, 0xdd8e325f, + 0xd98b6ba5, 0xfef657ec, 0x97711337, 0xb00c2f7e, + 0x6e0d4dc6, 0x4970718f, 0x20f73554, 0x078a091d, + 0x038f50e7, 0x24f26cae, 0x4d752875, 0x6a08143c, + 0x9965000d, 0xbe183c44, 0xd79f789f, 0xf0e244d6, + 0xf4e71d2c, 0xd39a2165, 0xba1d65be, 0x9d6059f7, + 0x43613b4f, 0x641c0706, 0x0d9b43dd, 0x2ae67f94, + 0x2ee3266e, 0x099e1a27, 0x60195efc, 0x476462b5, + 0x2d6c7689, 0x0a114ac0, 0x63960e1b, 0x44eb3252, + 0x40ee6ba8, 0x679357e1, 0x0e14133a, 0x29692f73, + 0xf7684dcb, 0xd0157182, 0xb9923559, 0x9eef0910, + 0x9aea50ea, 0xbd976ca3, 0xd4102878, 0xf36d1431, + 0x32cb001a, 0x15b63c53, 0x7c317888, 0x5b4c44c1, + 0x5f491d3b, 0x78342172, 0x11b365a9, 0x36ce59e0, + 0xe8cf3b58, 0xcfb20711, 0xa63543ca, 0x81487f83, + 0x854d2679, 0xa2301a30, 0xcbb75eeb, 0xecca62a2, + 0x86c2769e, 0xa1bf4ad7, 0xc8380e0c, 0xef453245, + 0xeb406bbf, 0xcc3d57f6, 0xa5ba132d, 0x82c72f64, + 0x5cc64ddc, 0x7bbb7195, 0x123c354e, 0x35410907, + 0x314450fd, 0x16396cb4, 0x7fbe286f, 0x58c31426, + 0xabae0017, 0x8cd33c5e, 0xe5547885, 0xc22944cc, + 0xc62c1d36, 0xe151217f, 0x88d665a4, 0xafab59ed, + 0x71aa3b55, 0x56d7071c, 0x3f5043c7, 0x182d7f8e, + 0x1c282674, 0x3b551a3d, 0x52d25ee6, 0x75af62af, + 0x1fa77693, 0x38da4ada, 0x515d0e01, 0x76203248, + 0x72256bb2, 0x555857fb, 0x3cdf1320, 0x1ba22f69, + 0xc5a34dd1, 0xe2de7198, 0x8b593543, 0xac24090a, + 0xa82150f0, 0x8f5c6cb9, 0xe6db2862, 0xc1a6142b, + 0x64960134, 0x43eb3d7d, 0x2a6c79a6, 0x0d1145ef, + 0x09141c15, 0x2e69205c, 0x47ee6487, 0x609358ce, + 0xbe923a76, 0x99ef063f, 0xf06842e4, 0xd7157ead, + 0xd3102757, 0xf46d1b1e, 0x9dea5fc5, 0xba97638c, + 0xd09f77b0, 0xf7e24bf9, 0x9e650f22, 0xb918336b, + 0xbd1d6a91, 0x9a6056d8, 0xf3e71203, 0xd49a2e4a, + 0x0a9b4cf2, 0x2de670bb, 0x44613460, 0x631c0829, + 0x671951d3, 0x40646d9a, 0x29e32941, 0x0e9e1508, + 0xfdf30139, 0xda8e3d70, 0xb30979ab, 0x947445e2, + 0x90711c18, 0xb70c2051, 0xde8b648a, 0xf9f658c3, + 0x27f73a7b, 0x008a0632, 0x690d42e9, 0x4e707ea0, + 0x4a75275a, 0x6d081b13, 0x048f5fc8, 0x23f26381, + 0x49fa77bd, 0x6e874bf4, 0x07000f2f, 0x207d3366, + 0x24786a9c, 0x030556d5, 0x6a82120e, 0x4dff2e47, + 0x93fe4cff, 0xb48370b6, 0xdd04346d, 0xfa790824, + 0xfe7c51de, 0xd9016d97, 0xb086294c, 0x97fb1505, + 0x565d012e, 0x71203d67, 0x18a779bc, 0x3fda45f5, + 0x3bdf1c0f, 0x1ca22046, 0x7525649d, 0x525858d4, + 0x8c593a6c, 0xab240625, 0xc2a342fe, 0xe5de7eb7, + 0xe1db274d, 0xc6a61b04, 0xaf215fdf, 0x885c6396, + 0xe25477aa, 0xc5294be3, 0xacae0f38, 0x8bd33371, + 0x8fd66a8b, 0xa8ab56c2, 0xc12c1219, 0xe6512e50, + 0x38504ce8, 0x1f2d70a1, 0x76aa347a, 0x51d70833, + 0x55d251c9, 0x72af6d80, 0x1b28295b, 0x3c551512, + 0xcf380123, 0xe8453d6a, 0x81c279b1, 0xa6bf45f8, + 0xa2ba1c02, 0x85c7204b, 0xec406490, 0xcb3d58d9, + 0x153c3a61, 0x32410628, 0x5bc642f3, 0x7cbb7eba, + 0x78be2740, 0x5fc31b09, 0x36445fd2, 0x1139639b, + 0x7b3177a7, 0x5c4c4bee, 0x35cb0f35, 0x12b6337c, + 0x16b36a86, 0x31ce56cf, 0x58491214, 0x7f342e5d, + 0xa1354ce5, 0x864870ac, 0xefcf3477, 0xc8b2083e, + 0xccb751c4, 0xebca6d8d, 0x824d2956, 0xa530151f + } +#else + { + 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, + 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, + 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, + 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, + 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, + 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, + 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, + 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, + 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, + 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, + 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, + 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, + 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, + 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, + 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, + 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, + 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, + 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, + 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, + 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, + 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, + 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, + 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, + 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, + 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, + 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, + 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, + 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, + 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, + 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, + 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, + 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, + 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, + 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, + 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, + 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, + 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, + 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, + 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, + 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, + 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, + 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, + 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, + 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, + 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, + 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, + 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, + 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, + 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, + 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, + 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, + 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, + 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, + 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, + 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, + 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, + 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, + 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, + 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, + 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, + 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, + 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, + 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, + 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351 + },{ + 0x00000000, 0x13a29877, 0x274530ee, 0x34e7a899, + 0x4e8a61dc, 0x5d28f9ab, 0x69cf5132, 0x7a6dc945, + 0x9d14c3b8, 0x8eb65bcf, 0xba51f356, 0xa9f36b21, + 0xd39ea264, 0xc03c3a13, 0xf4db928a, 0xe7790afd, + 0x3fc5f181, 0x2c6769f6, 0x1880c16f, 0x0b225918, + 0x714f905d, 0x62ed082a, 0x560aa0b3, 0x45a838c4, + 0xa2d13239, 0xb173aa4e, 0x859402d7, 0x96369aa0, + 0xec5b53e5, 0xfff9cb92, 0xcb1e630b, 0xd8bcfb7c, + 0x7f8be302, 0x6c297b75, 0x58ced3ec, 0x4b6c4b9b, + 0x310182de, 0x22a31aa9, 0x1644b230, 0x05e62a47, + 0xe29f20ba, 0xf13db8cd, 0xc5da1054, 0xd6788823, + 0xac154166, 0xbfb7d911, 0x8b507188, 0x98f2e9ff, + 0x404e1283, 0x53ec8af4, 0x670b226d, 0x74a9ba1a, + 0x0ec4735f, 0x1d66eb28, 0x298143b1, 0x3a23dbc6, + 0xdd5ad13b, 0xcef8494c, 0xfa1fe1d5, 0xe9bd79a2, + 0x93d0b0e7, 0x80722890, 0xb4958009, 0xa737187e, + 0xff17c604, 0xecb55e73, 0xd852f6ea, 0xcbf06e9d, + 0xb19da7d8, 0xa23f3faf, 0x96d89736, 0x857a0f41, + 0x620305bc, 0x71a19dcb, 0x45463552, 0x56e4ad25, + 0x2c896460, 0x3f2bfc17, 0x0bcc548e, 0x186eccf9, + 0xc0d23785, 0xd370aff2, 0xe797076b, 0xf4359f1c, + 0x8e585659, 0x9dface2e, 0xa91d66b7, 0xbabffec0, + 0x5dc6f43d, 0x4e646c4a, 0x7a83c4d3, 0x69215ca4, + 0x134c95e1, 0x00ee0d96, 0x3409a50f, 0x27ab3d78, + 0x809c2506, 0x933ebd71, 0xa7d915e8, 0xb47b8d9f, + 0xce1644da, 0xddb4dcad, 0xe9537434, 0xfaf1ec43, + 0x1d88e6be, 0x0e2a7ec9, 0x3acdd650, 0x296f4e27, + 0x53028762, 0x40a01f15, 0x7447b78c, 0x67e52ffb, + 0xbf59d487, 0xacfb4cf0, 0x981ce469, 0x8bbe7c1e, + 0xf1d3b55b, 0xe2712d2c, 0xd69685b5, 0xc5341dc2, + 0x224d173f, 0x31ef8f48, 0x050827d1, 0x16aabfa6, + 0x6cc776e3, 0x7f65ee94, 0x4b82460d, 0x5820de7a, + 0xfbc3faf9, 0xe861628e, 0xdc86ca17, 0xcf245260, + 0xb5499b25, 0xa6eb0352, 0x920cabcb, 0x81ae33bc, + 0x66d73941, 0x7575a136, 0x419209af, 0x523091d8, + 0x285d589d, 0x3bffc0ea, 0x0f186873, 0x1cbaf004, + 0xc4060b78, 0xd7a4930f, 0xe3433b96, 0xf0e1a3e1, + 0x8a8c6aa4, 0x992ef2d3, 0xadc95a4a, 0xbe6bc23d, + 0x5912c8c0, 0x4ab050b7, 0x7e57f82e, 0x6df56059, + 0x1798a91c, 0x043a316b, 0x30dd99f2, 0x237f0185, + 0x844819fb, 0x97ea818c, 0xa30d2915, 0xb0afb162, + 0xcac27827, 0xd960e050, 0xed8748c9, 0xfe25d0be, + 0x195cda43, 0x0afe4234, 0x3e19eaad, 0x2dbb72da, + 0x57d6bb9f, 0x447423e8, 0x70938b71, 0x63311306, + 0xbb8de87a, 0xa82f700d, 0x9cc8d894, 0x8f6a40e3, + 0xf50789a6, 0xe6a511d1, 0xd242b948, 0xc1e0213f, + 0x26992bc2, 0x353bb3b5, 0x01dc1b2c, 0x127e835b, + 0x68134a1e, 0x7bb1d269, 0x4f567af0, 0x5cf4e287, + 0x04d43cfd, 0x1776a48a, 0x23910c13, 0x30339464, + 0x4a5e5d21, 0x59fcc556, 0x6d1b6dcf, 0x7eb9f5b8, + 0x99c0ff45, 0x8a626732, 0xbe85cfab, 0xad2757dc, + 0xd74a9e99, 0xc4e806ee, 0xf00fae77, 0xe3ad3600, + 0x3b11cd7c, 0x28b3550b, 0x1c54fd92, 0x0ff665e5, + 0x759baca0, 0x663934d7, 0x52de9c4e, 0x417c0439, + 0xa6050ec4, 0xb5a796b3, 0x81403e2a, 0x92e2a65d, + 0xe88f6f18, 0xfb2df76f, 0xcfca5ff6, 0xdc68c781, + 0x7b5fdfff, 0x68fd4788, 0x5c1aef11, 0x4fb87766, + 0x35d5be23, 0x26772654, 0x12908ecd, 0x013216ba, + 0xe64b1c47, 0xf5e98430, 0xc10e2ca9, 0xd2acb4de, + 0xa8c17d9b, 0xbb63e5ec, 0x8f844d75, 0x9c26d502, + 0x449a2e7e, 0x5738b609, 0x63df1e90, 0x707d86e7, + 0x0a104fa2, 0x19b2d7d5, 0x2d557f4c, 0x3ef7e73b, + 0xd98eedc6, 0xca2c75b1, 0xfecbdd28, 0xed69455f, + 0x97048c1a, 0x84a6146d, 0xb041bcf4, 0xa3e32483 + },{ + 0x00000000, 0xa541927e, 0x4f6f520d, 0xea2ec073, + 0x9edea41a, 0x3b9f3664, 0xd1b1f617, 0x74f06469, + 0x38513ec5, 0x9d10acbb, 0x773e6cc8, 0xd27ffeb6, + 0xa68f9adf, 0x03ce08a1, 0xe9e0c8d2, 0x4ca15aac, + 0x70a27d8a, 0xd5e3eff4, 0x3fcd2f87, 0x9a8cbdf9, + 0xee7cd990, 0x4b3d4bee, 0xa1138b9d, 0x045219e3, + 0x48f3434f, 0xedb2d131, 0x079c1142, 0xa2dd833c, + 0xd62de755, 0x736c752b, 0x9942b558, 0x3c032726, + 0xe144fb14, 0x4405696a, 0xae2ba919, 0x0b6a3b67, + 0x7f9a5f0e, 0xdadbcd70, 0x30f50d03, 0x95b49f7d, + 0xd915c5d1, 0x7c5457af, 0x967a97dc, 0x333b05a2, + 0x47cb61cb, 0xe28af3b5, 0x08a433c6, 0xade5a1b8, + 0x91e6869e, 0x34a714e0, 0xde89d493, 0x7bc846ed, + 0x0f382284, 0xaa79b0fa, 0x40577089, 0xe516e2f7, + 0xa9b7b85b, 0x0cf62a25, 0xe6d8ea56, 0x43997828, + 0x37691c41, 0x92288e3f, 0x78064e4c, 0xdd47dc32, + 0xc76580d9, 0x622412a7, 0x880ad2d4, 0x2d4b40aa, + 0x59bb24c3, 0xfcfab6bd, 0x16d476ce, 0xb395e4b0, + 0xff34be1c, 0x5a752c62, 0xb05bec11, 0x151a7e6f, + 0x61ea1a06, 0xc4ab8878, 0x2e85480b, 0x8bc4da75, + 0xb7c7fd53, 0x12866f2d, 0xf8a8af5e, 0x5de93d20, + 0x29195949, 0x8c58cb37, 0x66760b44, 0xc337993a, + 0x8f96c396, 0x2ad751e8, 0xc0f9919b, 0x65b803e5, + 0x1148678c, 0xb409f5f2, 0x5e273581, 0xfb66a7ff, + 0x26217bcd, 0x8360e9b3, 0x694e29c0, 0xcc0fbbbe, + 0xb8ffdfd7, 0x1dbe4da9, 0xf7908dda, 0x52d11fa4, + 0x1e704508, 0xbb31d776, 0x511f1705, 0xf45e857b, + 0x80aee112, 0x25ef736c, 0xcfc1b31f, 0x6a802161, + 0x56830647, 0xf3c29439, 0x19ec544a, 0xbcadc634, + 0xc85da25d, 0x6d1c3023, 0x8732f050, 0x2273622e, + 0x6ed23882, 0xcb93aafc, 0x21bd6a8f, 0x84fcf8f1, + 0xf00c9c98, 0x554d0ee6, 0xbf63ce95, 0x1a225ceb, + 0x8b277743, 0x2e66e53d, 0xc448254e, 0x6109b730, + 0x15f9d359, 0xb0b84127, 0x5a968154, 0xffd7132a, + 0xb3764986, 0x1637dbf8, 0xfc191b8b, 0x595889f5, + 0x2da8ed9c, 0x88e97fe2, 0x62c7bf91, 0xc7862def, + 0xfb850ac9, 0x5ec498b7, 0xb4ea58c4, 0x11abcaba, + 0x655baed3, 0xc01a3cad, 0x2a34fcde, 0x8f756ea0, + 0xc3d4340c, 0x6695a672, 0x8cbb6601, 0x29faf47f, + 0x5d0a9016, 0xf84b0268, 0x1265c21b, 0xb7245065, + 0x6a638c57, 0xcf221e29, 0x250cde5a, 0x804d4c24, + 0xf4bd284d, 0x51fcba33, 0xbbd27a40, 0x1e93e83e, + 0x5232b292, 0xf77320ec, 0x1d5de09f, 0xb81c72e1, + 0xccec1688, 0x69ad84f6, 0x83834485, 0x26c2d6fb, + 0x1ac1f1dd, 0xbf8063a3, 0x55aea3d0, 0xf0ef31ae, + 0x841f55c7, 0x215ec7b9, 0xcb7007ca, 0x6e3195b4, + 0x2290cf18, 0x87d15d66, 0x6dff9d15, 0xc8be0f6b, + 0xbc4e6b02, 0x190ff97c, 0xf321390f, 0x5660ab71, + 0x4c42f79a, 0xe90365e4, 0x032da597, 0xa66c37e9, + 0xd29c5380, 0x77ddc1fe, 0x9df3018d, 0x38b293f3, + 0x7413c95f, 0xd1525b21, 0x3b7c9b52, 0x9e3d092c, + 0xeacd6d45, 0x4f8cff3b, 0xa5a23f48, 0x00e3ad36, + 0x3ce08a10, 0x99a1186e, 0x738fd81d, 0xd6ce4a63, + 0xa23e2e0a, 0x077fbc74, 0xed517c07, 0x4810ee79, + 0x04b1b4d5, 0xa1f026ab, 0x4bdee6d8, 0xee9f74a6, + 0x9a6f10cf, 0x3f2e82b1, 0xd50042c2, 0x7041d0bc, + 0xad060c8e, 0x08479ef0, 0xe2695e83, 0x4728ccfd, + 0x33d8a894, 0x96993aea, 0x7cb7fa99, 0xd9f668e7, + 0x9557324b, 0x3016a035, 0xda386046, 0x7f79f238, + 0x0b899651, 0xaec8042f, 0x44e6c45c, 0xe1a75622, + 0xdda47104, 0x78e5e37a, 0x92cb2309, 0x378ab177, + 0x437ad51e, 0xe63b4760, 0x0c158713, 0xa954156d, + 0xe5f54fc1, 0x40b4ddbf, 0xaa9a1dcc, 0x0fdb8fb2, + 0x7b2bebdb, 0xde6a79a5, 0x3444b9d6, 0x91052ba8 + },{ + 0x00000000, 0xdd45aab8, 0xbf672381, 0x62228939, + 0x7b2231f3, 0xa6679b4b, 0xc4451272, 0x1900b8ca, + 0xf64463e6, 0x2b01c95e, 0x49234067, 0x9466eadf, + 0x8d665215, 0x5023f8ad, 0x32017194, 0xef44db2c, + 0xe964b13d, 0x34211b85, 0x560392bc, 0x8b463804, + 0x924680ce, 0x4f032a76, 0x2d21a34f, 0xf06409f7, + 0x1f20d2db, 0xc2657863, 0xa047f15a, 0x7d025be2, + 0x6402e328, 0xb9474990, 0xdb65c0a9, 0x06206a11, + 0xd725148b, 0x0a60be33, 0x6842370a, 0xb5079db2, + 0xac072578, 0x71428fc0, 0x136006f9, 0xce25ac41, + 0x2161776d, 0xfc24ddd5, 0x9e0654ec, 0x4343fe54, + 0x5a43469e, 0x8706ec26, 0xe524651f, 0x3861cfa7, + 0x3e41a5b6, 0xe3040f0e, 0x81268637, 0x5c632c8f, + 0x45639445, 0x98263efd, 0xfa04b7c4, 0x27411d7c, + 0xc805c650, 0x15406ce8, 0x7762e5d1, 0xaa274f69, + 0xb327f7a3, 0x6e625d1b, 0x0c40d422, 0xd1057e9a, + 0xaba65fe7, 0x76e3f55f, 0x14c17c66, 0xc984d6de, + 0xd0846e14, 0x0dc1c4ac, 0x6fe34d95, 0xb2a6e72d, + 0x5de23c01, 0x80a796b9, 0xe2851f80, 0x3fc0b538, + 0x26c00df2, 0xfb85a74a, 0x99a72e73, 0x44e284cb, + 0x42c2eeda, 0x9f874462, 0xfda5cd5b, 0x20e067e3, + 0x39e0df29, 0xe4a57591, 0x8687fca8, 0x5bc25610, + 0xb4868d3c, 0x69c32784, 0x0be1aebd, 0xd6a40405, + 0xcfa4bccf, 0x12e11677, 0x70c39f4e, 0xad8635f6, + 0x7c834b6c, 0xa1c6e1d4, 0xc3e468ed, 0x1ea1c255, + 0x07a17a9f, 0xdae4d027, 0xb8c6591e, 0x6583f3a6, + 0x8ac7288a, 0x57828232, 0x35a00b0b, 0xe8e5a1b3, + 0xf1e51979, 0x2ca0b3c1, 0x4e823af8, 0x93c79040, + 0x95e7fa51, 0x48a250e9, 0x2a80d9d0, 0xf7c57368, + 0xeec5cba2, 0x3380611a, 0x51a2e823, 0x8ce7429b, + 0x63a399b7, 0xbee6330f, 0xdcc4ba36, 0x0181108e, + 0x1881a844, 0xc5c402fc, 0xa7e68bc5, 0x7aa3217d, + 0x52a0c93f, 0x8fe56387, 0xedc7eabe, 0x30824006, + 0x2982f8cc, 0xf4c75274, 0x96e5db4d, 0x4ba071f5, + 0xa4e4aad9, 0x79a10061, 0x1b838958, 0xc6c623e0, + 0xdfc69b2a, 0x02833192, 0x60a1b8ab, 0xbde41213, + 0xbbc47802, 0x6681d2ba, 0x04a35b83, 0xd9e6f13b, + 0xc0e649f1, 0x1da3e349, 0x7f816a70, 0xa2c4c0c8, + 0x4d801be4, 0x90c5b15c, 0xf2e73865, 0x2fa292dd, + 0x36a22a17, 0xebe780af, 0x89c50996, 0x5480a32e, + 0x8585ddb4, 0x58c0770c, 0x3ae2fe35, 0xe7a7548d, + 0xfea7ec47, 0x23e246ff, 0x41c0cfc6, 0x9c85657e, + 0x73c1be52, 0xae8414ea, 0xcca69dd3, 0x11e3376b, + 0x08e38fa1, 0xd5a62519, 0xb784ac20, 0x6ac10698, + 0x6ce16c89, 0xb1a4c631, 0xd3864f08, 0x0ec3e5b0, + 0x17c35d7a, 0xca86f7c2, 0xa8a47efb, 0x75e1d443, + 0x9aa50f6f, 0x47e0a5d7, 0x25c22cee, 0xf8878656, + 0xe1873e9c, 0x3cc29424, 0x5ee01d1d, 0x83a5b7a5, + 0xf90696d8, 0x24433c60, 0x4661b559, 0x9b241fe1, + 0x8224a72b, 0x5f610d93, 0x3d4384aa, 0xe0062e12, + 0x0f42f53e, 0xd2075f86, 0xb025d6bf, 0x6d607c07, + 0x7460c4cd, 0xa9256e75, 0xcb07e74c, 0x16424df4, + 0x106227e5, 0xcd278d5d, 0xaf050464, 0x7240aedc, + 0x6b401616, 0xb605bcae, 0xd4273597, 0x09629f2f, + 0xe6264403, 0x3b63eebb, 0x59416782, 0x8404cd3a, + 0x9d0475f0, 0x4041df48, 0x22635671, 0xff26fcc9, + 0x2e238253, 0xf36628eb, 0x9144a1d2, 0x4c010b6a, + 0x5501b3a0, 0x88441918, 0xea669021, 0x37233a99, + 0xd867e1b5, 0x05224b0d, 0x6700c234, 0xba45688c, + 0xa345d046, 0x7e007afe, 0x1c22f3c7, 0xc167597f, + 0xc747336e, 0x1a0299d6, 0x782010ef, 0xa565ba57, + 0xbc65029d, 0x6120a825, 0x0302211c, 0xde478ba4, + 0x31035088, 0xec46fa30, 0x8e647309, 0x5321d9b1, + 0x4a21617b, 0x9764cbc3, 0xf54642fa, 0x2803e842 + },{ + 0x00000000, 0x38116fac, 0x7022df58, 0x4833b0f4, + 0xe045beb0, 0xd854d11c, 0x906761e8, 0xa8760e44, + 0xc5670b91, 0xfd76643d, 0xb545d4c9, 0x8d54bb65, + 0x2522b521, 0x1d33da8d, 0x55006a79, 0x6d1105d5, + 0x8f2261d3, 0xb7330e7f, 0xff00be8b, 0xc711d127, + 0x6f67df63, 0x5776b0cf, 0x1f45003b, 0x27546f97, + 0x4a456a42, 0x725405ee, 0x3a67b51a, 0x0276dab6, + 0xaa00d4f2, 0x9211bb5e, 0xda220baa, 0xe2336406, + 0x1ba8b557, 0x23b9dafb, 0x6b8a6a0f, 0x539b05a3, + 0xfbed0be7, 0xc3fc644b, 0x8bcfd4bf, 0xb3debb13, + 0xdecfbec6, 0xe6ded16a, 0xaeed619e, 0x96fc0e32, + 0x3e8a0076, 0x069b6fda, 0x4ea8df2e, 0x76b9b082, + 0x948ad484, 0xac9bbb28, 0xe4a80bdc, 0xdcb96470, + 0x74cf6a34, 0x4cde0598, 0x04edb56c, 0x3cfcdac0, + 0x51eddf15, 0x69fcb0b9, 0x21cf004d, 0x19de6fe1, + 0xb1a861a5, 0x89b90e09, 0xc18abefd, 0xf99bd151, + 0x37516aae, 0x0f400502, 0x4773b5f6, 0x7f62da5a, + 0xd714d41e, 0xef05bbb2, 0xa7360b46, 0x9f2764ea, + 0xf236613f, 0xca270e93, 0x8214be67, 0xba05d1cb, + 0x1273df8f, 0x2a62b023, 0x625100d7, 0x5a406f7b, + 0xb8730b7d, 0x806264d1, 0xc851d425, 0xf040bb89, + 0x5836b5cd, 0x6027da61, 0x28146a95, 0x10050539, + 0x7d1400ec, 0x45056f40, 0x0d36dfb4, 0x3527b018, + 0x9d51be5c, 0xa540d1f0, 0xed736104, 0xd5620ea8, + 0x2cf9dff9, 0x14e8b055, 0x5cdb00a1, 0x64ca6f0d, + 0xccbc6149, 0xf4ad0ee5, 0xbc9ebe11, 0x848fd1bd, + 0xe99ed468, 0xd18fbbc4, 0x99bc0b30, 0xa1ad649c, + 0x09db6ad8, 0x31ca0574, 0x79f9b580, 0x41e8da2c, + 0xa3dbbe2a, 0x9bcad186, 0xd3f96172, 0xebe80ede, + 0x439e009a, 0x7b8f6f36, 0x33bcdfc2, 0x0badb06e, + 0x66bcb5bb, 0x5eadda17, 0x169e6ae3, 0x2e8f054f, + 0x86f90b0b, 0xbee864a7, 0xf6dbd453, 0xcecabbff, + 0x6ea2d55c, 0x56b3baf0, 0x1e800a04, 0x269165a8, + 0x8ee76bec, 0xb6f60440, 0xfec5b4b4, 0xc6d4db18, + 0xabc5decd, 0x93d4b161, 0xdbe70195, 0xe3f66e39, + 0x4b80607d, 0x73910fd1, 0x3ba2bf25, 0x03b3d089, + 0xe180b48f, 0xd991db23, 0x91a26bd7, 0xa9b3047b, + 0x01c50a3f, 0x39d46593, 0x71e7d567, 0x49f6bacb, + 0x24e7bf1e, 0x1cf6d0b2, 0x54c56046, 0x6cd40fea, + 0xc4a201ae, 0xfcb36e02, 0xb480def6, 0x8c91b15a, + 0x750a600b, 0x4d1b0fa7, 0x0528bf53, 0x3d39d0ff, + 0x954fdebb, 0xad5eb117, 0xe56d01e3, 0xdd7c6e4f, + 0xb06d6b9a, 0x887c0436, 0xc04fb4c2, 0xf85edb6e, + 0x5028d52a, 0x6839ba86, 0x200a0a72, 0x181b65de, + 0xfa2801d8, 0xc2396e74, 0x8a0ade80, 0xb21bb12c, + 0x1a6dbf68, 0x227cd0c4, 0x6a4f6030, 0x525e0f9c, + 0x3f4f0a49, 0x075e65e5, 0x4f6dd511, 0x777cbabd, + 0xdf0ab4f9, 0xe71bdb55, 0xaf286ba1, 0x9739040d, + 0x59f3bff2, 0x61e2d05e, 0x29d160aa, 0x11c00f06, + 0xb9b60142, 0x81a76eee, 0xc994de1a, 0xf185b1b6, + 0x9c94b463, 0xa485dbcf, 0xecb66b3b, 0xd4a70497, + 0x7cd10ad3, 0x44c0657f, 0x0cf3d58b, 0x34e2ba27, + 0xd6d1de21, 0xeec0b18d, 0xa6f30179, 0x9ee26ed5, + 0x36946091, 0x0e850f3d, 0x46b6bfc9, 0x7ea7d065, + 0x13b6d5b0, 0x2ba7ba1c, 0x63940ae8, 0x5b856544, + 0xf3f36b00, 0xcbe204ac, 0x83d1b458, 0xbbc0dbf4, + 0x425b0aa5, 0x7a4a6509, 0x3279d5fd, 0x0a68ba51, + 0xa21eb415, 0x9a0fdbb9, 0xd23c6b4d, 0xea2d04e1, + 0x873c0134, 0xbf2d6e98, 0xf71ede6c, 0xcf0fb1c0, + 0x6779bf84, 0x5f68d028, 0x175b60dc, 0x2f4a0f70, + 0xcd796b76, 0xf56804da, 0xbd5bb42e, 0x854adb82, + 0x2d3cd5c6, 0x152dba6a, 0x5d1e0a9e, 0x650f6532, + 0x081e60e7, 0x300f0f4b, 0x783cbfbf, 0x402dd013, + 0xe85bde57, 0xd04ab1fb, 0x9879010f, 0xa0686ea3 + },{ + 0x00000000, 0xef306b19, 0xdb8ca0c3, 0x34bccbda, + 0xb2f53777, 0x5dc55c6e, 0x697997b4, 0x8649fcad, + 0x6006181f, 0x8f367306, 0xbb8ab8dc, 0x54bad3c5, + 0xd2f32f68, 0x3dc34471, 0x097f8fab, 0xe64fe4b2, + 0xc00c303e, 0x2f3c5b27, 0x1b8090fd, 0xf4b0fbe4, + 0x72f90749, 0x9dc96c50, 0xa975a78a, 0x4645cc93, + 0xa00a2821, 0x4f3a4338, 0x7b8688e2, 0x94b6e3fb, + 0x12ff1f56, 0xfdcf744f, 0xc973bf95, 0x2643d48c, + 0x85f4168d, 0x6ac47d94, 0x5e78b64e, 0xb148dd57, + 0x370121fa, 0xd8314ae3, 0xec8d8139, 0x03bdea20, + 0xe5f20e92, 0x0ac2658b, 0x3e7eae51, 0xd14ec548, + 0x570739e5, 0xb83752fc, 0x8c8b9926, 0x63bbf23f, + 0x45f826b3, 0xaac84daa, 0x9e748670, 0x7144ed69, + 0xf70d11c4, 0x183d7add, 0x2c81b107, 0xc3b1da1e, + 0x25fe3eac, 0xcace55b5, 0xfe729e6f, 0x1142f576, + 0x970b09db, 0x783b62c2, 0x4c87a918, 0xa3b7c201, + 0x0e045beb, 0xe13430f2, 0xd588fb28, 0x3ab89031, + 0xbcf16c9c, 0x53c10785, 0x677dcc5f, 0x884da746, + 0x6e0243f4, 0x813228ed, 0xb58ee337, 0x5abe882e, + 0xdcf77483, 0x33c71f9a, 0x077bd440, 0xe84bbf59, + 0xce086bd5, 0x213800cc, 0x1584cb16, 0xfab4a00f, + 0x7cfd5ca2, 0x93cd37bb, 0xa771fc61, 0x48419778, + 0xae0e73ca, 0x413e18d3, 0x7582d309, 0x9ab2b810, + 0x1cfb44bd, 0xf3cb2fa4, 0xc777e47e, 0x28478f67, + 0x8bf04d66, 0x64c0267f, 0x507ceda5, 0xbf4c86bc, + 0x39057a11, 0xd6351108, 0xe289dad2, 0x0db9b1cb, + 0xebf65579, 0x04c63e60, 0x307af5ba, 0xdf4a9ea3, + 0x5903620e, 0xb6330917, 0x828fc2cd, 0x6dbfa9d4, + 0x4bfc7d58, 0xa4cc1641, 0x9070dd9b, 0x7f40b682, + 0xf9094a2f, 0x16392136, 0x2285eaec, 0xcdb581f5, + 0x2bfa6547, 0xc4ca0e5e, 0xf076c584, 0x1f46ae9d, + 0x990f5230, 0x763f3929, 0x4283f2f3, 0xadb399ea, + 0x1c08b7d6, 0xf338dccf, 0xc7841715, 0x28b47c0c, + 0xaefd80a1, 0x41cdebb8, 0x75712062, 0x9a414b7b, + 0x7c0eafc9, 0x933ec4d0, 0xa7820f0a, 0x48b26413, + 0xcefb98be, 0x21cbf3a7, 0x1577387d, 0xfa475364, + 0xdc0487e8, 0x3334ecf1, 0x0788272b, 0xe8b84c32, + 0x6ef1b09f, 0x81c1db86, 0xb57d105c, 0x5a4d7b45, + 0xbc029ff7, 0x5332f4ee, 0x678e3f34, 0x88be542d, + 0x0ef7a880, 0xe1c7c399, 0xd57b0843, 0x3a4b635a, + 0x99fca15b, 0x76ccca42, 0x42700198, 0xad406a81, + 0x2b09962c, 0xc439fd35, 0xf08536ef, 0x1fb55df6, + 0xf9fab944, 0x16cad25d, 0x22761987, 0xcd46729e, + 0x4b0f8e33, 0xa43fe52a, 0x90832ef0, 0x7fb345e9, + 0x59f09165, 0xb6c0fa7c, 0x827c31a6, 0x6d4c5abf, + 0xeb05a612, 0x0435cd0b, 0x308906d1, 0xdfb96dc8, + 0x39f6897a, 0xd6c6e263, 0xe27a29b9, 0x0d4a42a0, + 0x8b03be0d, 0x6433d514, 0x508f1ece, 0xbfbf75d7, + 0x120cec3d, 0xfd3c8724, 0xc9804cfe, 0x26b027e7, + 0xa0f9db4a, 0x4fc9b053, 0x7b757b89, 0x94451090, + 0x720af422, 0x9d3a9f3b, 0xa98654e1, 0x46b63ff8, + 0xc0ffc355, 0x2fcfa84c, 0x1b736396, 0xf443088f, + 0xd200dc03, 0x3d30b71a, 0x098c7cc0, 0xe6bc17d9, + 0x60f5eb74, 0x8fc5806d, 0xbb794bb7, 0x544920ae, + 0xb206c41c, 0x5d36af05, 0x698a64df, 0x86ba0fc6, + 0x00f3f36b, 0xefc39872, 0xdb7f53a8, 0x344f38b1, + 0x97f8fab0, 0x78c891a9, 0x4c745a73, 0xa344316a, + 0x250dcdc7, 0xca3da6de, 0xfe816d04, 0x11b1061d, + 0xf7fee2af, 0x18ce89b6, 0x2c72426c, 0xc3422975, + 0x450bd5d8, 0xaa3bbec1, 0x9e87751b, 0x71b71e02, + 0x57f4ca8e, 0xb8c4a197, 0x8c786a4d, 0x63480154, + 0xe501fdf9, 0x0a3196e0, 0x3e8d5d3a, 0xd1bd3623, + 0x37f2d291, 0xd8c2b988, 0xec7e7252, 0x034e194b, + 0x8507e5e6, 0x6a378eff, 0x5e8b4525, 0xb1bb2e3c + },{ + 0x00000000, 0x68032cc8, 0xd0065990, 0xb8057558, + 0xa5e0c5d1, 0xcde3e919, 0x75e69c41, 0x1de5b089, + 0x4e2dfd53, 0x262ed19b, 0x9e2ba4c3, 0xf628880b, + 0xebcd3882, 0x83ce144a, 0x3bcb6112, 0x53c84dda, + 0x9c5bfaa6, 0xf458d66e, 0x4c5da336, 0x245e8ffe, + 0x39bb3f77, 0x51b813bf, 0xe9bd66e7, 0x81be4a2f, + 0xd27607f5, 0xba752b3d, 0x02705e65, 0x6a7372ad, + 0x7796c224, 0x1f95eeec, 0xa7909bb4, 0xcf93b77c, + 0x3d5b83bd, 0x5558af75, 0xed5dda2d, 0x855ef6e5, + 0x98bb466c, 0xf0b86aa4, 0x48bd1ffc, 0x20be3334, + 0x73767eee, 0x1b755226, 0xa370277e, 0xcb730bb6, + 0xd696bb3f, 0xbe9597f7, 0x0690e2af, 0x6e93ce67, + 0xa100791b, 0xc90355d3, 0x7106208b, 0x19050c43, + 0x04e0bcca, 0x6ce39002, 0xd4e6e55a, 0xbce5c992, + 0xef2d8448, 0x872ea880, 0x3f2bddd8, 0x5728f110, + 0x4acd4199, 0x22ce6d51, 0x9acb1809, 0xf2c834c1, + 0x7ab7077a, 0x12b42bb2, 0xaab15eea, 0xc2b27222, + 0xdf57c2ab, 0xb754ee63, 0x0f519b3b, 0x6752b7f3, + 0x349afa29, 0x5c99d6e1, 0xe49ca3b9, 0x8c9f8f71, + 0x917a3ff8, 0xf9791330, 0x417c6668, 0x297f4aa0, + 0xe6ecfddc, 0x8eefd114, 0x36eaa44c, 0x5ee98884, + 0x430c380d, 0x2b0f14c5, 0x930a619d, 0xfb094d55, + 0xa8c1008f, 0xc0c22c47, 0x78c7591f, 0x10c475d7, + 0x0d21c55e, 0x6522e996, 0xdd279cce, 0xb524b006, + 0x47ec84c7, 0x2fefa80f, 0x97eadd57, 0xffe9f19f, + 0xe20c4116, 0x8a0f6dde, 0x320a1886, 0x5a09344e, + 0x09c17994, 0x61c2555c, 0xd9c72004, 0xb1c40ccc, + 0xac21bc45, 0xc422908d, 0x7c27e5d5, 0x1424c91d, + 0xdbb77e61, 0xb3b452a9, 0x0bb127f1, 0x63b20b39, + 0x7e57bbb0, 0x16549778, 0xae51e220, 0xc652cee8, + 0x959a8332, 0xfd99affa, 0x459cdaa2, 0x2d9ff66a, + 0x307a46e3, 0x58796a2b, 0xe07c1f73, 0x887f33bb, + 0xf56e0ef4, 0x9d6d223c, 0x25685764, 0x4d6b7bac, + 0x508ecb25, 0x388de7ed, 0x808892b5, 0xe88bbe7d, + 0xbb43f3a7, 0xd340df6f, 0x6b45aa37, 0x034686ff, + 0x1ea33676, 0x76a01abe, 0xcea56fe6, 0xa6a6432e, + 0x6935f452, 0x0136d89a, 0xb933adc2, 0xd130810a, + 0xccd53183, 0xa4d61d4b, 0x1cd36813, 0x74d044db, + 0x27180901, 0x4f1b25c9, 0xf71e5091, 0x9f1d7c59, + 0x82f8ccd0, 0xeafbe018, 0x52fe9540, 0x3afdb988, + 0xc8358d49, 0xa036a181, 0x1833d4d9, 0x7030f811, + 0x6dd54898, 0x05d66450, 0xbdd31108, 0xd5d03dc0, + 0x8618701a, 0xee1b5cd2, 0x561e298a, 0x3e1d0542, + 0x23f8b5cb, 0x4bfb9903, 0xf3feec5b, 0x9bfdc093, + 0x546e77ef, 0x3c6d5b27, 0x84682e7f, 0xec6b02b7, + 0xf18eb23e, 0x998d9ef6, 0x2188ebae, 0x498bc766, + 0x1a438abc, 0x7240a674, 0xca45d32c, 0xa246ffe4, + 0xbfa34f6d, 0xd7a063a5, 0x6fa516fd, 0x07a63a35, + 0x8fd9098e, 0xe7da2546, 0x5fdf501e, 0x37dc7cd6, + 0x2a39cc5f, 0x423ae097, 0xfa3f95cf, 0x923cb907, + 0xc1f4f4dd, 0xa9f7d815, 0x11f2ad4d, 0x79f18185, + 0x6414310c, 0x0c171dc4, 0xb412689c, 0xdc114454, + 0x1382f328, 0x7b81dfe0, 0xc384aab8, 0xab878670, + 0xb66236f9, 0xde611a31, 0x66646f69, 0x0e6743a1, + 0x5daf0e7b, 0x35ac22b3, 0x8da957eb, 0xe5aa7b23, + 0xf84fcbaa, 0x904ce762, 0x2849923a, 0x404abef2, + 0xb2828a33, 0xda81a6fb, 0x6284d3a3, 0x0a87ff6b, + 0x17624fe2, 0x7f61632a, 0xc7641672, 0xaf673aba, + 0xfcaf7760, 0x94ac5ba8, 0x2ca92ef0, 0x44aa0238, + 0x594fb2b1, 0x314c9e79, 0x8949eb21, 0xe14ac7e9, + 0x2ed97095, 0x46da5c5d, 0xfedf2905, 0x96dc05cd, + 0x8b39b544, 0xe33a998c, 0x5b3fecd4, 0x333cc01c, + 0x60f48dc6, 0x08f7a10e, 0xb0f2d456, 0xd8f1f89e, + 0xc5144817, 0xad1764df, 0x15121187, 0x7d113d4f + },{ + 0x00000000, 0x493c7d27, 0x9278fa4e, 0xdb448769, + 0x211d826d, 0x6821ff4a, 0xb3657823, 0xfa590504, + 0x423b04da, 0x0b0779fd, 0xd043fe94, 0x997f83b3, + 0x632686b7, 0x2a1afb90, 0xf15e7cf9, 0xb86201de, + 0x847609b4, 0xcd4a7493, 0x160ef3fa, 0x5f328edd, + 0xa56b8bd9, 0xec57f6fe, 0x37137197, 0x7e2f0cb0, + 0xc64d0d6e, 0x8f717049, 0x5435f720, 0x1d098a07, + 0xe7508f03, 0xae6cf224, 0x7528754d, 0x3c14086a, + 0x0d006599, 0x443c18be, 0x9f789fd7, 0xd644e2f0, + 0x2c1de7f4, 0x65219ad3, 0xbe651dba, 0xf759609d, + 0x4f3b6143, 0x06071c64, 0xdd439b0d, 0x947fe62a, + 0x6e26e32e, 0x271a9e09, 0xfc5e1960, 0xb5626447, + 0x89766c2d, 0xc04a110a, 0x1b0e9663, 0x5232eb44, + 0xa86bee40, 0xe1579367, 0x3a13140e, 0x732f6929, + 0xcb4d68f7, 0x827115d0, 0x593592b9, 0x1009ef9e, + 0xea50ea9a, 0xa36c97bd, 0x782810d4, 0x31146df3, + 0x1a00cb32, 0x533cb615, 0x8878317c, 0xc1444c5b, + 0x3b1d495f, 0x72213478, 0xa965b311, 0xe059ce36, + 0x583bcfe8, 0x1107b2cf, 0xca4335a6, 0x837f4881, + 0x79264d85, 0x301a30a2, 0xeb5eb7cb, 0xa262caec, + 0x9e76c286, 0xd74abfa1, 0x0c0e38c8, 0x453245ef, + 0xbf6b40eb, 0xf6573dcc, 0x2d13baa5, 0x642fc782, + 0xdc4dc65c, 0x9571bb7b, 0x4e353c12, 0x07094135, + 0xfd504431, 0xb46c3916, 0x6f28be7f, 0x2614c358, + 0x1700aeab, 0x5e3cd38c, 0x857854e5, 0xcc4429c2, + 0x361d2cc6, 0x7f2151e1, 0xa465d688, 0xed59abaf, + 0x553baa71, 0x1c07d756, 0xc743503f, 0x8e7f2d18, + 0x7426281c, 0x3d1a553b, 0xe65ed252, 0xaf62af75, + 0x9376a71f, 0xda4ada38, 0x010e5d51, 0x48322076, + 0xb26b2572, 0xfb575855, 0x2013df3c, 0x692fa21b, + 0xd14da3c5, 0x9871dee2, 0x4335598b, 0x0a0924ac, + 0xf05021a8, 0xb96c5c8f, 0x6228dbe6, 0x2b14a6c1, + 0x34019664, 0x7d3deb43, 0xa6796c2a, 0xef45110d, + 0x151c1409, 0x5c20692e, 0x8764ee47, 0xce589360, + 0x763a92be, 0x3f06ef99, 0xe44268f0, 0xad7e15d7, + 0x572710d3, 0x1e1b6df4, 0xc55fea9d, 0x8c6397ba, + 0xb0779fd0, 0xf94be2f7, 0x220f659e, 0x6b3318b9, + 0x916a1dbd, 0xd856609a, 0x0312e7f3, 0x4a2e9ad4, + 0xf24c9b0a, 0xbb70e62d, 0x60346144, 0x29081c63, + 0xd3511967, 0x9a6d6440, 0x4129e329, 0x08159e0e, + 0x3901f3fd, 0x703d8eda, 0xab7909b3, 0xe2457494, + 0x181c7190, 0x51200cb7, 0x8a648bde, 0xc358f6f9, + 0x7b3af727, 0x32068a00, 0xe9420d69, 0xa07e704e, + 0x5a27754a, 0x131b086d, 0xc85f8f04, 0x8163f223, + 0xbd77fa49, 0xf44b876e, 0x2f0f0007, 0x66337d20, + 0x9c6a7824, 0xd5560503, 0x0e12826a, 0x472eff4d, + 0xff4cfe93, 0xb67083b4, 0x6d3404dd, 0x240879fa, + 0xde517cfe, 0x976d01d9, 0x4c2986b0, 0x0515fb97, + 0x2e015d56, 0x673d2071, 0xbc79a718, 0xf545da3f, + 0x0f1cdf3b, 0x4620a21c, 0x9d642575, 0xd4585852, + 0x6c3a598c, 0x250624ab, 0xfe42a3c2, 0xb77edee5, + 0x4d27dbe1, 0x041ba6c6, 0xdf5f21af, 0x96635c88, + 0xaa7754e2, 0xe34b29c5, 0x380faeac, 0x7133d38b, + 0x8b6ad68f, 0xc256aba8, 0x19122cc1, 0x502e51e6, + 0xe84c5038, 0xa1702d1f, 0x7a34aa76, 0x3308d751, + 0xc951d255, 0x806daf72, 0x5b29281b, 0x1215553c, + 0x230138cf, 0x6a3d45e8, 0xb179c281, 0xf845bfa6, + 0x021cbaa2, 0x4b20c785, 0x906440ec, 0xd9583dcb, + 0x613a3c15, 0x28064132, 0xf342c65b, 0xba7ebb7c, + 0x4027be78, 0x091bc35f, 0xd25f4436, 0x9b633911, + 0xa777317b, 0xee4b4c5c, 0x350fcb35, 0x7c33b612, + 0x866ab316, 0xcf56ce31, 0x14124958, 0x5d2e347f, + 0xe54c35a1, 0xac704886, 0x7734cfef, 0x3e08b2c8, + 0xc451b7cc, 0x8d6dcaeb, 0x56294d82, 0x1f1530a5 + } +#endif +}; + +#if !defined(__powerpc64__) +/* + * __wt_cksum_sw -- + * Return a checksum for a chunk of memory, computed in software. + */ +static uint32_t +__wt_cksum_sw(const void *chunk, size_t len) +{ + uint32_t crc, next; + size_t nqwords; + const uint8_t *p; + + crc = 0xffffffff; + + /* Checksum one byte at a time to the first 4B boundary. */ + for (p = chunk; + ((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0 && + len > 0; ++p, --len) +#ifdef WORDS_BIGENDIAN + crc = g_crc_slicing[0][((crc >> 24) ^ *p) & 0xFF] ^ (crc << 8); +#else + crc = g_crc_slicing[0][(crc ^ *p) & 0xFF] ^ (crc >> 8); +#endif + + /* Checksum in 8B chunks. */ + for (nqwords = len / sizeof(uint64_t); nqwords; nqwords--) { + crc ^= *(uint32_t *)p; + p += sizeof(uint32_t); + next = *(uint32_t *)p; + p += sizeof(uint32_t); + crc = +#ifdef WORDS_BIGENDIAN + g_crc_slicing[4][(crc ) & 0xFF] ^ + g_crc_slicing[5][(crc >> 8) & 0xFF] ^ + g_crc_slicing[6][(crc >> 16) & 0xFF] ^ + g_crc_slicing[7][(crc >> 24)] ^ + g_crc_slicing[0][(next ) & 0xFF] ^ + g_crc_slicing[1][(next >> 8) & 0xFF] ^ + g_crc_slicing[2][(next >> 16) & 0xFF] ^ + g_crc_slicing[3][(next >> 24)]; +#else + g_crc_slicing[7][(crc ) & 0xFF] ^ + g_crc_slicing[6][(crc >> 8) & 0xFF] ^ + g_crc_slicing[5][(crc >> 16) & 0xFF] ^ + g_crc_slicing[4][(crc >> 24)] ^ + g_crc_slicing[3][(next ) & 0xFF] ^ + g_crc_slicing[2][(next >> 8) & 0xFF] ^ + g_crc_slicing[1][(next >> 16) & 0xFF] ^ + g_crc_slicing[0][(next >> 24)]; +#endif + } + + /* Checksum trailing bytes one byte at a time. */ +#ifdef WORDS_BIGENDIAN + for (len &= 0x7; len > 0; ++p, len--) + crc = g_crc_slicing[0][((crc >> 24) ^ *p) & 0xFF] ^ (crc << 8); + + /* Do final byte swap to produce a result identical to little endian */ + crc = + ((crc << 24) & 0xFF000000) | + ((crc << 8) & 0x00FF0000) | + ((crc >> 8) & 0x0000FF00) | + ((crc >> 24) & 0x000000FF); +#else + for (len &= 0x7; len > 0; ++p, len--) + crc = g_crc_slicing[0][(crc ^ *p) & 0xFF] ^ (crc >> 8); +#endif + return (~crc); +} +#endif + +#if (defined(__amd64) || defined(__x86_64)) +/* + * __wt_cksum_hw -- + * Return a checksum for a chunk of memory, computed in hardware + * using 8 byte steps. + */ +static uint32_t +__wt_cksum_hw(const void *chunk, size_t len) +{ + uint32_t crc; + size_t nqwords; + const uint8_t *p; + const uint64_t *p64; + + crc = 0xffffffff; + + /* Checksum one byte at a time to the first 4B boundary. */ + for (p = chunk; + ((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0 && + len > 0; ++p, --len) { + __asm__ __volatile__( + ".byte 0xF2, 0x0F, 0x38, 0xF0, 0xF1" + : "=S" (crc) + : "0" (crc), "c" (*p)); + } + + p64 = (const uint64_t *)p; + /* Checksum in 8B chunks. */ + for (nqwords = len / sizeof(uint64_t); nqwords; nqwords--) { + __asm__ __volatile__ ( + ".byte 0xF2, 0x48, 0x0F, 0x38, 0xF1, 0xF1" + : "=S"(crc) + : "0"(crc), "c" (*p64)); + p64++; + } + + /* Checksum trailing bytes one byte at a time. */ + p = (const uint8_t *)p64; + for (len &= 0x7; len > 0; ++p, len--) { + __asm__ __volatile__( + ".byte 0xF2, 0x0F, 0x38, 0xF0, 0xF1" + : "=S" (crc) + : "0" (crc), "c" (*p)); + } + return (~crc); +} +#endif + +#if defined(_M_AMD64) +/* + * __wt_cksum_hw -- + * Return a checksum for a chunk of memory, computed in hardware + * using 8 byte steps. + */ +static uint32_t +__wt_cksum_hw(const void *chunk, size_t len) +{ + uint32_t crc; + size_t nqwords; + const uint8_t *p; + const uint64_t *p64; + + crc = 0xffffffff; + + /* Checksum one byte at a time to the first 4B boundary. */ + for (p = chunk; + ((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0 && + len > 0; ++p, --len) { + crc = _mm_crc32_u8(crc, *p); + } + + p64 = (const uint64_t *)p; + /* Checksum in 8B chunks. */ + for (nqwords = len / sizeof(uint64_t); nqwords; nqwords--) { + crc = (uint32_t)_mm_crc32_u64(crc, *p64); + p64++; + } + + /* Checksum trailing bytes one byte at a time. */ + p = (const uint8_t *)p64; + for (len &= 0x7; len > 0; ++p, len--) { + crc = _mm_crc32_u8(crc, *p); + } + + return (~crc); +} +#endif + +#if defined(__powerpc64__) + +unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, + unsigned long len); + +/* + * __wt_cksum_hw -- + * Return a checksum for a chunk of memory, computed in hardware + * using 8 byte steps. + */ +static uint32_t +__wt_cksum_hw(const void *chunk, size_t len) +{ + return crc32_vpmsum(0, chunk, len); +} +#endif + +/* + * __wt_cksum -- + * Return a checksum for a chunk of memory using the fastest method + * available. + */ +uint32_t +__wt_cksum(const void *chunk, size_t len) +{ + return (*__wt_cksum_func)(chunk, len); +} + +/* + * __wt_cksum_init -- + * Detect CRC hardware and set the checksum function. + */ +void +__wt_cksum_init(void) +{ +#define CPUID_ECX_HAS_SSE42 (1 << 20) + +#if (defined(__amd64) || defined(__x86_64)) + unsigned int eax, ebx, ecx, edx; + + __asm__ __volatile__ ( + "cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (1)); + + if (ecx & CPUID_ECX_HAS_SSE42) + __wt_cksum_func = __wt_cksum_hw; + else + __wt_cksum_func = __wt_cksum_sw; + +#elif defined(_M_AMD64) + int cpuInfo[4]; + + __cpuid(cpuInfo, 1); + + if (cpuInfo[2] & CPUID_ECX_HAS_SSE42) + __wt_cksum_func = __wt_cksum_hw; + else + __wt_cksum_func = __wt_cksum_sw; +#elif defined(__powerpc64__) + __wt_cksum_func = __wt_cksum_hw; +#else + __wt_cksum_func = __wt_cksum_sw; +#endif +} diff --git a/src/third_party/wiredtiger/src/checksum/power8/LICENSE.TXT b/src/third_party/wiredtiger/src/checksum/power8/LICENSE.TXT new file mode 100644 index 00000000000..2f4bb91f574 --- /dev/null +++ b/src/third_party/wiredtiger/src/checksum/power8/LICENSE.TXT @@ -0,0 +1,476 @@ +Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM + +crc32-vpmsum is free software; you can redistribute it and/or +modify it under the terms of either: + + a) the GNU General Public License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version., or + b) the Apache License, Version 2.0 + + + + + + + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + + + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/src/third_party/wiredtiger/src/checksum/power8/README.md b/src/third_party/wiredtiger/src/checksum/power8/README.md new file mode 100644 index 00000000000..3e2976650cd --- /dev/null +++ b/src/third_party/wiredtiger/src/checksum/power8/README.md @@ -0,0 +1,208 @@ +crc32-vpmsum +============ + +A set of examples for accelerating CRC32 calculations using the vector +polynomial multiply sum (vpmsum) instructions introduced in POWER8. These +instructions implement byte, halfword, word and doubleword carryless +multiply/add. + +Performance +----------- + +An implementation of slice-by-8, one of the fastest lookup table methods +is included so we can compare performance against it. Testing 5000000 +iterations of a CRC of 32 kB of data (to keep it L1 cache contained): + +``` +# time slice_by_8_bench 32768 5000000 +122.220 seconds + +# time crc32_bench 32768 5000000 +2.937 seconds +``` + +The vpmsum accelerated CRC is just over 41x faster. + +This test was run on a 4.1 GHz POWER8, so the algorithm sustains about +52 GiB/sec or 13.6 bytes/cycle. The theoretical limit is 16 bytes/cycle +since we can execute a maximum of one vpmsum instruction per cycle. + +In another test, a version was added to the kernel and btrfs write +performance was shown to be 3.8x faster. The test was done to a ramdisk +to mitigate any I/O induced variability. + +Quick start +----------- + +- Modify CRC and OPTIONS in the Makefile. There are examples for the two most + common crc32s. + +- Type make to create the constants (crc32_constants.h) + +- Import the code into your application (crc32.S crc32_wrapper.c + crc32_constants.h ppc-opcode.h) and call the CRC: + +``` +unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p, unsigned long len); +``` + +CRC background +-------------- + +For a good background on CRCs, check out: + +http://www.ross.net/crc/download/crc_v3.txt + +A few key points: + +- A CRC is the remainder after dividing a message by the CRC polynomial, + ie M mod CRC_POLY +- multiply/divide is carryless +- add/subtract is an xor +- n (where n is the order of the CRC) bits of zeroes are appended to the + end of the message. + +One more important piece of information - a CRC is a linear function, so: + +``` + CRC(A xor B) = CRC(A) xor CRC(B) + + CRC(A . B) = CRC(A) . CRC(B) (remember this is carryless multiply) +``` + +If we take 64bits of data, represented by two 32 bit chunks (AAAAAAAA +and BBBBBBBB): + +``` +CRC(AAAAAAAABBBBBBBB) + = CRC(AAAAAAAA00000000 xor BBBBBBBB) + = CRC(AAAAAAAA00000000) xor CRC(BBBBBBBB) +``` + +If we operate on AAAAAAAA: + +``` +CRC(AAAAAAAA00000000) + = CRC(AAAAAAAA . 100000000) + = CRC(AAAAAAAA) . CRC(100000000) +``` + +And CRC(100000000) is a constant which we can pre-calculate: + +``` +CRC(100000000) + = 100000000 mod CRC_POLY + = 2^32 mod CRC_POLY +``` + +Finally we can add our modified AAAAAAAA to BBBBBBBB: + +``` +CRC(AAAAAAAABBBBBBBB) + = ((2^32 mod CRC_POLY) . CRC(AAAAAAAA)) xor CRC(BBBBBBBB) +``` + +In other words, with the right constants pre-calculated we can shift the +input data around and we can also calculate the CRC in as many parallel +chunks as we want. + +No matter how much shifting we do, the final result will be be 64 bits of +data (63 actually, because there is no carry into the top bit). To reduce +it further we need a another trick, and that is Barrett reduction: + +http://en.wikipedia.org/wiki/Barrett_reduction + +Barrett reduction is a method of calculating a mod n. The idea is to +calculate q, the multiple of our polynomial that we need to subtract. By +doing the computation 2x bits higher (ie 64 bits) and shifting the +result back down 2x bits, we round down to the nearest multiple. + +``` + k = 32 + m = floor((4^k)/n) = floor((4^32))/n) + n = 64 bits of data + a = 32 bit CRC + + q = floor(ma/(2^64)) + result = a - qn +``` + +An example in the floating point domain makes it clearer how this works: + +``` +a mod n = a - floor(am) * n +``` + +Let's use it to calculate 22 mod 10: + +``` + a = 22 + n = 10 + m = 1/n = 1/10 = 0.1 + +22 mod 10 + = 22 - floor(22*0.1) * 10 + = 22 - 2 * 10 + = 22 - 20 + = 2 +``` + +There is one more issue left - bit reflection. Some CRCs are defined to +operate on the least significant bit first (eg CRC32c). Lets look at +how this would get laid out in a register, and lets simplify it to just +two bytes (vs a 16 byte VMX register): + + [ 8..15 ] [ 0..7 ] + +Notice how the bits and bytes are out of order. Since we are doing +multi word multiplication on these values we need them to both be +in order. + +The simplest way to fix this is to reflect the bits in each byte: + + [ 15..8 ] [ 7..0 ] + +However shuffling bits in a byte is expensive on most CPUs. It is +however relatively cheap to shuffle bytes around. What if we load +the bytes in reversed: + + [ 0..7 ] [ 8..15 ] + +Now the bits and bytes are in order, except the least significant bit +of the register is now on the left and the most significant bit is on the +right. We operate as if the register is reflected, which normally we +cannot do. The reason we get away with this is our multiplies are carryless +and our addition and subtraction is xor, so our operations never create +carries. + +The only trick is we have to shift the result of multiplies left one +because the high bit of the multiply is always 0, and we want that high bit +on the right not the left. + +Implementation +-------------- + +The vpmsum instructions on POWER8 have a 6 cycle latency and we can +execute one every cycle. In light of this the main loop has 8 parallel +streams which consume 8 x 16 B each iteration. At the completion of this +loop we have taken 32 kB of data and reduced it to 8 x 16 B (128 B). + +The next step is to take this 128 B and reduce it to 8 B. At this stage +we also add 32 bits of 0 to the end. + +We then apply Barrett reduction to get our CRC. + +Examples +-------- +- barrett_reduction: An example of Barrett reduction + +- final_fold: Starting with 128 bits, add 32 bits of zeros and reduce it to + 64 bits, then apply Barrett reduction + +- final_fold2: A second method of reduction + +Acknowledgements +---------------- + +Thanks to Michael Gschwind, Jeff Derby, Lorena Pesantez and Stewart Smith +for their ideas and assistance. diff --git a/src/third_party/wiredtiger/src/checksum/power8/crc32.S b/src/third_party/wiredtiger/src/checksum/power8/crc32.S new file mode 100644 index 00000000000..0b7870668b5 --- /dev/null +++ b/src/third_party/wiredtiger/src/checksum/power8/crc32.S @@ -0,0 +1,778 @@ +#if defined(__powerpc64__) +/* + * Calculate the checksum of data that is 16 byte aligned and a multiple of + * 16 bytes. + * + * The first step is to reduce it to 1024 bits. We do this in 8 parallel + * chunks in order to mask the latency of the vpmsum instructions. If we + * have more than 32 kB of data to checksum we repeat this step multiple + * times, passing in the previous 1024 bits. + * + * The next step is to reduce the 1024 bits to 64 bits. This step adds + * 32 bits of 0s to the end - this matches what a CRC does. We just + * calculate constants that land the data in this 32 bits. + * + * We then use fixed point Barrett reduction to compute a mod n over GF(2) + * for n = CRC using POWER8 instructions. We use x = 32. + * + * http://en.wikipedia.org/wiki/Barrett_reduction + * + * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <ppc-asm.h> +#include "ppc-opcode.h" + +#undef toc + +#ifndef r1 +#define r1 1 +#endif + +#ifndef r2 +#define r2 2 +#endif + + .section .rodata +.balign 16 + +.byteswap_constant: + /* byte reverse permute constant */ + .octa 0x0F0E0D0C0B0A09080706050403020100 + +#define __ASSEMBLY__ +#include "crc32_constants.h" + + .text + +#if defined(__BIG_ENDIAN__) && defined(REFLECT) +#define BYTESWAP_DATA +#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT) +#define BYTESWAP_DATA +#else +#undef BYTESWAP_DATA +#endif + +#define off16 r25 +#define off32 r26 +#define off48 r27 +#define off64 r28 +#define off80 r29 +#define off96 r30 +#define off112 r31 + +#define const1 v24 +#define const2 v25 + +#define byteswap v26 +#define mask_32bit v27 +#define mask_64bit v28 +#define zeroes v29 + +#ifdef BYTESWAP_DATA +#define VPERM(A, B, C, D) vperm A, B, C, D +#else +#define VPERM(A, B, C, D) +#endif + +/* unsigned int __crc32_vpmsum(unsigned int crc, void *p, unsigned long len) */ +FUNC_START(__crc32_vpmsum) + std r31,-8(r1) + std r30,-16(r1) + std r29,-24(r1) + std r28,-32(r1) + std r27,-40(r1) + std r26,-48(r1) + std r25,-56(r1) + + li off16,16 + li off32,32 + li off48,48 + li off64,64 + li off80,80 + li off96,96 + li off112,112 + li r0,0 + + /* Enough room for saving 10 non volatile VMX registers */ + subi r6,r1,56+10*16 + subi r7,r1,56+2*16 + + stvx v20,0,r6 + stvx v21,off16,r6 + stvx v22,off32,r6 + stvx v23,off48,r6 + stvx v24,off64,r6 + stvx v25,off80,r6 + stvx v26,off96,r6 + stvx v27,off112,r6 + stvx v28,0,r7 + stvx v29,off16,r7 + + mr r10,r3 + + vxor zeroes,zeroes,zeroes + vspltisw v0,-1 + + vsldoi mask_32bit,zeroes,v0,4 + vsldoi mask_64bit,zeroes,v0,8 + + /* Get the initial value into v8 */ + vxor v8,v8,v8 + MTVRD(v8, r3) +#ifdef REFLECT + vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */ +#else + vsldoi v8,v8,zeroes,4 /* shift into top 32 bits */ +#endif + +#ifdef BYTESWAP_DATA + addis r3,r2,.byteswap_constant@toc@ha + addi r3,r3,.byteswap_constant@toc@l + + lvx byteswap,0,r3 + addi r3,r3,16 +#endif + + cmpdi r5,256 + blt .Lshort + + rldicr r6,r5,0,56 + + /* Checksum in blocks of MAX_SIZE */ +1: lis r7,MAX_SIZE@h + ori r7,r7,MAX_SIZE@l + mr r9,r7 + cmpd r6,r7 + bgt 2f + mr r7,r6 +2: subf r6,r7,r6 + + /* our main loop does 128 bytes at a time */ + srdi r7,r7,7 + + /* + * Work out the offset into the constants table to start at. Each + * constant is 16 bytes, and it is used against 128 bytes of input + * data - 128 / 16 = 8 + */ + sldi r8,r7,4 + srdi r9,r9,3 + subf r8,r8,r9 + + /* We reduce our final 128 bytes in a separate step */ + addi r7,r7,-1 + mtctr r7 + + addis r3,r2,.constants@toc@ha + addi r3,r3,.constants@toc@l + + /* Find the start of our constants */ + add r3,r3,r8 + + /* zero v0-v7 which will contain our checksums */ + vxor v0,v0,v0 + vxor v1,v1,v1 + vxor v2,v2,v2 + vxor v3,v3,v3 + vxor v4,v4,v4 + vxor v5,v5,v5 + vxor v6,v6,v6 + vxor v7,v7,v7 + + lvx const1,0,r3 + + /* + * If we are looping back to consume more data we use the values + * already in v16-v23. + */ + cmpdi r0,1 + beq 2f + + /* First warm up pass */ + lvx v16,0,r4 + lvx v17,off16,r4 + VPERM(v16,v16,v16,byteswap) + VPERM(v17,v17,v17,byteswap) + lvx v18,off32,r4 + lvx v19,off48,r4 + VPERM(v18,v18,v18,byteswap) + VPERM(v19,v19,v19,byteswap) + lvx v20,off64,r4 + lvx v21,off80,r4 + VPERM(v20,v20,v20,byteswap) + VPERM(v21,v21,v21,byteswap) + lvx v22,off96,r4 + lvx v23,off112,r4 + VPERM(v22,v22,v22,byteswap) + VPERM(v23,v23,v23,byteswap) + addi r4,r4,8*16 + + /* xor in initial value */ + vxor v16,v16,v8 + +2: bdz .Lfirst_warm_up_done + + addi r3,r3,16 + lvx const2,0,r3 + + /* Second warm up pass */ + VPMSUMD(v8,v16,const1) + lvx v16,0,r4 + VPERM(v16,v16,v16,byteswap) + ori r2,r2,0 + + VPMSUMD(v9,v17,const1) + lvx v17,off16,r4 + VPERM(v17,v17,v17,byteswap) + ori r2,r2,0 + + VPMSUMD(v10,v18,const1) + lvx v18,off32,r4 + VPERM(v18,v18,v18,byteswap) + ori r2,r2,0 + + VPMSUMD(v11,v19,const1) + lvx v19,off48,r4 + VPERM(v19,v19,v19,byteswap) + ori r2,r2,0 + + VPMSUMD(v12,v20,const1) + lvx v20,off64,r4 + VPERM(v20,v20,v20,byteswap) + ori r2,r2,0 + + VPMSUMD(v13,v21,const1) + lvx v21,off80,r4 + VPERM(v21,v21,v21,byteswap) + ori r2,r2,0 + + VPMSUMD(v14,v22,const1) + lvx v22,off96,r4 + VPERM(v22,v22,v22,byteswap) + ori r2,r2,0 + + VPMSUMD(v15,v23,const1) + lvx v23,off112,r4 + VPERM(v23,v23,v23,byteswap) + + addi r4,r4,8*16 + + bdz .Lfirst_cool_down + + /* + * main loop. We modulo schedule it such that it takes three iterations + * to complete - first iteration load, second iteration vpmsum, third + * iteration xor. + */ + .balign 16 +4: lvx const1,0,r3 + addi r3,r3,16 + ori r2,r2,0 + + vxor v0,v0,v8 + VPMSUMD(v8,v16,const2) + lvx v16,0,r4 + VPERM(v16,v16,v16,byteswap) + ori r2,r2,0 + + vxor v1,v1,v9 + VPMSUMD(v9,v17,const2) + lvx v17,off16,r4 + VPERM(v17,v17,v17,byteswap) + ori r2,r2,0 + + vxor v2,v2,v10 + VPMSUMD(v10,v18,const2) + lvx v18,off32,r4 + VPERM(v18,v18,v18,byteswap) + ori r2,r2,0 + + vxor v3,v3,v11 + VPMSUMD(v11,v19,const2) + lvx v19,off48,r4 + VPERM(v19,v19,v19,byteswap) + lvx const2,0,r3 + ori r2,r2,0 + + vxor v4,v4,v12 + VPMSUMD(v12,v20,const1) + lvx v20,off64,r4 + VPERM(v20,v20,v20,byteswap) + ori r2,r2,0 + + vxor v5,v5,v13 + VPMSUMD(v13,v21,const1) + lvx v21,off80,r4 + VPERM(v21,v21,v21,byteswap) + ori r2,r2,0 + + vxor v6,v6,v14 + VPMSUMD(v14,v22,const1) + lvx v22,off96,r4 + VPERM(v22,v22,v22,byteswap) + ori r2,r2,0 + + vxor v7,v7,v15 + VPMSUMD(v15,v23,const1) + lvx v23,off112,r4 + VPERM(v23,v23,v23,byteswap) + + addi r4,r4,8*16 + + bdnz 4b + +.Lfirst_cool_down: + /* First cool down pass */ + lvx const1,0,r3 + addi r3,r3,16 + + vxor v0,v0,v8 + VPMSUMD(v8,v16,const1) + ori r2,r2,0 + + vxor v1,v1,v9 + VPMSUMD(v9,v17,const1) + ori r2,r2,0 + + vxor v2,v2,v10 + VPMSUMD(v10,v18,const1) + ori r2,r2,0 + + vxor v3,v3,v11 + VPMSUMD(v11,v19,const1) + ori r2,r2,0 + + vxor v4,v4,v12 + VPMSUMD(v12,v20,const1) + ori r2,r2,0 + + vxor v5,v5,v13 + VPMSUMD(v13,v21,const1) + ori r2,r2,0 + + vxor v6,v6,v14 + VPMSUMD(v14,v22,const1) + ori r2,r2,0 + + vxor v7,v7,v15 + VPMSUMD(v15,v23,const1) + ori r2,r2,0 + +.Lsecond_cool_down: + /* Second cool down pass */ + vxor v0,v0,v8 + vxor v1,v1,v9 + vxor v2,v2,v10 + vxor v3,v3,v11 + vxor v4,v4,v12 + vxor v5,v5,v13 + vxor v6,v6,v14 + vxor v7,v7,v15 + +#ifdef REFLECT + /* + * vpmsumd produces a 96 bit result in the least significant bits + * of the register. Since we are bit reflected we have to shift it + * left 32 bits so it occupies the least significant bits in the + * bit reflected domain. + */ + vsldoi v0,v0,zeroes,4 + vsldoi v1,v1,zeroes,4 + vsldoi v2,v2,zeroes,4 + vsldoi v3,v3,zeroes,4 + vsldoi v4,v4,zeroes,4 + vsldoi v5,v5,zeroes,4 + vsldoi v6,v6,zeroes,4 + vsldoi v7,v7,zeroes,4 +#endif + + /* xor with last 1024 bits */ + lvx v8,0,r4 + lvx v9,off16,r4 + VPERM(v8,v8,v8,byteswap) + VPERM(v9,v9,v9,byteswap) + lvx v10,off32,r4 + lvx v11,off48,r4 + VPERM(v10,v10,v10,byteswap) + VPERM(v11,v11,v11,byteswap) + lvx v12,off64,r4 + lvx v13,off80,r4 + VPERM(v12,v12,v12,byteswap) + VPERM(v13,v13,v13,byteswap) + lvx v14,off96,r4 + lvx v15,off112,r4 + VPERM(v14,v14,v14,byteswap) + VPERM(v15,v15,v15,byteswap) + + addi r4,r4,8*16 + + vxor v16,v0,v8 + vxor v17,v1,v9 + vxor v18,v2,v10 + vxor v19,v3,v11 + vxor v20,v4,v12 + vxor v21,v5,v13 + vxor v22,v6,v14 + vxor v23,v7,v15 + + li r0,1 + cmpdi r6,0 + addi r6,r6,128 + bne 1b + + /* Work out how many bytes we have left */ + andi. r5,r5,127 + + /* Calculate where in the constant table we need to start */ + subfic r6,r5,128 + add r3,r3,r6 + + /* How many 16 byte chunks are in the tail */ + srdi r7,r5,4 + mtctr r7 + + /* + * Reduce the previously calculated 1024 bits to 64 bits, shifting + * 32 bits to include the trailing 32 bits of zeros + */ + lvx v0,0,r3 + lvx v1,off16,r3 + lvx v2,off32,r3 + lvx v3,off48,r3 + lvx v4,off64,r3 + lvx v5,off80,r3 + lvx v6,off96,r3 + lvx v7,off112,r3 + addi r3,r3,8*16 + + VPMSUMW(v0,v16,v0) + VPMSUMW(v1,v17,v1) + VPMSUMW(v2,v18,v2) + VPMSUMW(v3,v19,v3) + VPMSUMW(v4,v20,v4) + VPMSUMW(v5,v21,v5) + VPMSUMW(v6,v22,v6) + VPMSUMW(v7,v23,v7) + + /* Now reduce the tail (0 - 112 bytes) */ + cmpdi r7,0 + beq 1f + + lvx v16,0,r4 + lvx v17,0,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off16,r4 + lvx v17,off16,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off32,r4 + lvx v17,off32,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off48,r4 + lvx v17,off48,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off64,r4 + lvx v17,off64,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off80,r4 + lvx v17,off80,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off96,r4 + lvx v17,off96,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + + /* Now xor all the parallel chunks together */ +1: vxor v0,v0,v1 + vxor v2,v2,v3 + vxor v4,v4,v5 + vxor v6,v6,v7 + + vxor v0,v0,v2 + vxor v4,v4,v6 + + vxor v0,v0,v4 + +.Lbarrett_reduction: + /* Barrett constants */ + addis r3,r2,.barrett_constants@toc@ha + addi r3,r3,.barrett_constants@toc@l + + lvx const1,0,r3 + lvx const2,off16,r3 + + vsldoi v1,v0,v0,8 + vxor v0,v0,v1 /* xor two 64 bit results together */ + +#ifdef REFLECT + /* shift left one bit */ + vspltisb v1,1 + vsl v0,v0,v1 +#endif + + vand v0,v0,mask_64bit + +#ifndef REFLECT + /* + * Now for the Barrett reduction algorithm. The idea is to calculate q, + * the multiple of our polynomial that we need to subtract. By + * doing the computation 2x bits higher (ie 64 bits) and shifting the + * result back down 2x bits, we round down to the nearest multiple. + */ + VPMSUMD(v1,v0,const1) /* ma */ + vsldoi v1,zeroes,v1,8 /* q = floor(ma/(2^64)) */ + VPMSUMD(v1,v1,const2) /* qn */ + vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */ + + /* + * Get the result into r3. We need to shift it left 8 bytes: + * V0 [ 0 1 2 X ] + * V0 [ 0 X 2 3 ] + */ + vsldoi v0,v0,zeroes,8 /* shift result into top 64 bits */ +#else + /* + * The reflected version of Barrett reduction. Instead of bit + * reflecting our data (which is expensive to do), we bit reflect our + * constants and our algorithm, which means the intermediate data in + * our vector registers goes from 0-63 instead of 63-0. We can reflect + * the algorithm because we don't carry in mod 2 arithmetic. + */ + vand v1,v0,mask_32bit /* bottom 32 bits of a */ + VPMSUMD(v1,v1,const1) /* ma */ + vand v1,v1,mask_32bit /* bottom 32bits of ma */ + VPMSUMD(v1,v1,const2) /* qn */ + vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */ + + /* + * Since we are bit reflected, the result (ie the low 32 bits) is in + * the high 32 bits. We just need to shift it left 4 bytes + * V0 [ 0 1 X 3 ] + * V0 [ 0 X 2 3 ] + */ + vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */ +#endif + + /* Get it into r3 */ + MFVRD(r3, v0) + +.Lout: + subi r6,r1,56+10*16 + subi r7,r1,56+2*16 + + lvx v20,0,r6 + lvx v21,off16,r6 + lvx v22,off32,r6 + lvx v23,off48,r6 + lvx v24,off64,r6 + lvx v25,off80,r6 + lvx v26,off96,r6 + lvx v27,off112,r6 + lvx v28,0,r7 + lvx v29,off16,r7 + + ld r31,-8(r1) + ld r30,-16(r1) + ld r29,-24(r1) + ld r28,-32(r1) + ld r27,-40(r1) + ld r26,-48(r1) + ld r25,-56(r1) + + blr + +.Lfirst_warm_up_done: + lvx const1,0,r3 + addi r3,r3,16 + + VPMSUMD(v8,v16,const1) + VPMSUMD(v9,v17,const1) + VPMSUMD(v10,v18,const1) + VPMSUMD(v11,v19,const1) + VPMSUMD(v12,v20,const1) + VPMSUMD(v13,v21,const1) + VPMSUMD(v14,v22,const1) + VPMSUMD(v15,v23,const1) + + b .Lsecond_cool_down + +.Lshort: + cmpdi r5,0 + beq .Lzero + + addis r3,r2,.short_constants@toc@ha + addi r3,r3,.short_constants@toc@l + + /* Calculate where in the constant table we need to start */ + subfic r6,r5,256 + add r3,r3,r6 + + /* How many 16 byte chunks? */ + srdi r7,r5,4 + mtctr r7 + + vxor v19,v19,v19 + vxor v20,v20,v20 + + lvx v0,0,r4 + lvx v16,0,r3 + VPERM(v0,v0,v16,byteswap) + vxor v0,v0,v8 /* xor in initial value */ + VPMSUMW(v0,v0,v16) + bdz .Lv0 + + lvx v1,off16,r4 + lvx v17,off16,r3 + VPERM(v1,v1,v17,byteswap) + VPMSUMW(v1,v1,v17) + bdz .Lv1 + + lvx v2,off32,r4 + lvx v16,off32,r3 + VPERM(v2,v2,v16,byteswap) + VPMSUMW(v2,v2,v16) + bdz .Lv2 + + lvx v3,off48,r4 + lvx v17,off48,r3 + VPERM(v3,v3,v17,byteswap) + VPMSUMW(v3,v3,v17) + bdz .Lv3 + + lvx v4,off64,r4 + lvx v16,off64,r3 + VPERM(v4,v4,v16,byteswap) + VPMSUMW(v4,v4,v16) + bdz .Lv4 + + lvx v5,off80,r4 + lvx v17,off80,r3 + VPERM(v5,v5,v17,byteswap) + VPMSUMW(v5,v5,v17) + bdz .Lv5 + + lvx v6,off96,r4 + lvx v16,off96,r3 + VPERM(v6,v6,v16,byteswap) + VPMSUMW(v6,v6,v16) + bdz .Lv6 + + lvx v7,off112,r4 + lvx v17,off112,r3 + VPERM(v7,v7,v17,byteswap) + VPMSUMW(v7,v7,v17) + bdz .Lv7 + + addi r3,r3,128 + addi r4,r4,128 + + lvx v8,0,r4 + lvx v16,0,r3 + VPERM(v8,v8,v16,byteswap) + VPMSUMW(v8,v8,v16) + bdz .Lv8 + + lvx v9,off16,r4 + lvx v17,off16,r3 + VPERM(v9,v9,v17,byteswap) + VPMSUMW(v9,v9,v17) + bdz .Lv9 + + lvx v10,off32,r4 + lvx v16,off32,r3 + VPERM(v10,v10,v16,byteswap) + VPMSUMW(v10,v10,v16) + bdz .Lv10 + + lvx v11,off48,r4 + lvx v17,off48,r3 + VPERM(v11,v11,v17,byteswap) + VPMSUMW(v11,v11,v17) + bdz .Lv11 + + lvx v12,off64,r4 + lvx v16,off64,r3 + VPERM(v12,v12,v16,byteswap) + VPMSUMW(v12,v12,v16) + bdz .Lv12 + + lvx v13,off80,r4 + lvx v17,off80,r3 + VPERM(v13,v13,v17,byteswap) + VPMSUMW(v13,v13,v17) + bdz .Lv13 + + lvx v14,off96,r4 + lvx v16,off96,r3 + VPERM(v14,v14,v16,byteswap) + VPMSUMW(v14,v14,v16) + bdz .Lv14 + + lvx v15,off112,r4 + lvx v17,off112,r3 + VPERM(v15,v15,v17,byteswap) + VPMSUMW(v15,v15,v17) + +.Lv15: vxor v19,v19,v15 +.Lv14: vxor v20,v20,v14 +.Lv13: vxor v19,v19,v13 +.Lv12: vxor v20,v20,v12 +.Lv11: vxor v19,v19,v11 +.Lv10: vxor v20,v20,v10 +.Lv9: vxor v19,v19,v9 +.Lv8: vxor v20,v20,v8 +.Lv7: vxor v19,v19,v7 +.Lv6: vxor v20,v20,v6 +.Lv5: vxor v19,v19,v5 +.Lv4: vxor v20,v20,v4 +.Lv3: vxor v19,v19,v3 +.Lv2: vxor v20,v20,v2 +.Lv1: vxor v19,v19,v1 +.Lv0: vxor v20,v20,v0 + + vxor v0,v19,v20 + + b .Lbarrett_reduction + +.Lzero: + mr r3,r10 + b .Lout + +FUNC_END(__crc32_vpmsum) +#endif + +/* + * Make sure the stack isn't executable with GCC (regardless of platform). + */ +#ifdef __ELF__ +.section .note.GNU-stack,"",@progbits +#endif diff --git a/src/third_party/wiredtiger/src/checksum/power8/crc32_constants.h b/src/third_party/wiredtiger/src/checksum/power8/crc32_constants.h new file mode 100644 index 00000000000..02c471d1c56 --- /dev/null +++ b/src/third_party/wiredtiger/src/checksum/power8/crc32_constants.h @@ -0,0 +1,901 @@ +#define CRC 0x1edc6f41 +#define CRC_XOR +#define REFLECT + +#ifndef __ASSEMBLY__ +#ifdef CRC_TABLE +static const unsigned int crc_table[] = { + 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, + 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, + 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, + 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, + 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, + 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, + 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, + 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, + 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, + 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, + 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, + 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, + 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, + 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, + 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, + 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, + 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, + 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, + 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, + 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, + 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, + 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, + 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, + 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, + 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, + 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, + 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, + 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, + 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, + 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, + 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, + 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, + 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, + 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, + 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, + 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, + 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, + 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, + 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, + 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, + 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, + 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, + 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, + 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, + 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, + 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, + 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, + 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, + 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, + 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, + 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, + 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, + 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, + 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, + 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, + 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, + 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, + 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, + 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, + 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, + 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, + 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, + 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, + 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,}; + +#endif +#else +#define MAX_SIZE 32768 +.constants: + + /* Reduce 262144 kbits to 1024 bits */ + /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ + .octa 0x00000000b6ca9e20000000009c37c408 + + /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ + .octa 0x00000000350249a800000001b51df26c + + /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ + .octa 0x00000001862dac54000000000724b9d0 + + /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ + .octa 0x00000001d87fb48c00000001c00532fe + + /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ + .octa 0x00000001f39b699e00000000f05a9362 + + /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ + .octa 0x0000000101da11b400000001e1007970 + + /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ + .octa 0x00000001cab571e000000000a57366ee + + /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ + .octa 0x00000000c7020cfe0000000192011284 + + /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ + .octa 0x00000000cdaed1ae0000000162716d9a + + /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ + .octa 0x00000001e804effc00000000cd97ecde + + /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ + .octa 0x0000000077c3ea3a0000000058812bc0 + + /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ + .octa 0x0000000068df31b40000000088b8c12e + + /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ + .octa 0x00000000b059b6c200000001230b234c + + /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ + .octa 0x0000000145fb8ed800000001120b416e + + /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ + .octa 0x00000000cbc0916800000001974aecb0 + + /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ + .octa 0x000000005ceeedc2000000008ee3f226 + + /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ + .octa 0x0000000047d74e8600000001089aba9a + + /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ + .octa 0x00000001407e9e220000000065113872 + + /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ + .octa 0x00000001da967bda000000005c07ec10 + + /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ + .octa 0x000000006c8983680000000187590924 + + /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ + .octa 0x00000000f2d14c9800000000e35da7c6 + + /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ + .octa 0x00000001993c6ad4000000000415855a + + /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ + .octa 0x000000014683d1ac0000000073617758 + + /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ + .octa 0x00000001a7c93e6c0000000176021d28 + + /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ + .octa 0x000000010211e90a00000001c358fd0a + + /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ + .octa 0x000000001119403e00000001ff7a2c18 + + /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ + .octa 0x000000001c3261aa00000000f2d9f7e4 + + /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ + .octa 0x000000014e37a634000000016cf1f9c8 + + /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ + .octa 0x0000000073786c0c000000010af9279a + + /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ + .octa 0x000000011dc037f80000000004f101e8 + + /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ + .octa 0x0000000031433dfc0000000070bcf184 + + /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ + .octa 0x000000009cde8348000000000a8de642 + + /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ + .octa 0x0000000038d3c2a60000000062ea130c + + /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ + .octa 0x000000011b25f26000000001eb31cbb2 + + /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ + .octa 0x000000001629e6f00000000170783448 + + /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ + .octa 0x0000000160838b4c00000001a684b4c6 + + /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ + .octa 0x000000007a44011c00000000253ca5b4 + + /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ + .octa 0x00000000226f417a0000000057b4b1e2 + + /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ + .octa 0x0000000045eb2eb400000000b6bd084c + + /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ + .octa 0x000000014459d70c0000000123c2d592 + + /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ + .octa 0x00000001d406ed8200000000159dafce + + /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ + .octa 0x0000000160c8e1a80000000127e1a64e + + /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ + .octa 0x0000000027ba80980000000056860754 + + /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ + .octa 0x000000006d92d01800000001e661aae8 + + /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ + .octa 0x000000012ed7e3f200000000f82c6166 + + /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ + .octa 0x000000002dc8778800000000c4f9c7ae + + /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ + .octa 0x0000000018240bb80000000074203d20 + + /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ + .octa 0x000000001ad381580000000198173052 + + /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ + .octa 0x00000001396b78f200000001ce8aba54 + + /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ + .octa 0x000000011a68133400000001850d5d94 + + /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ + .octa 0x000000012104732e00000001d609239c + + /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ + .octa 0x00000000a140d90c000000001595f048 + + /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ + .octa 0x00000001b7215eda0000000042ccee08 + + /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ + .octa 0x00000001aaf1df3c000000010a389d74 + + /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ + .octa 0x0000000029d15b8a000000012a840da6 + + /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ + .octa 0x00000000f1a96922000000001d181c0c + + /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ + .octa 0x00000001ac80d03c0000000068b7d1f6 + + /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ + .octa 0x000000000f11d56a000000005b0f14fc + + /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ + .octa 0x00000001f1c022a20000000179e9e730 + + /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ + .octa 0x0000000173d00ae200000001ce1368d6 + + /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ + .octa 0x00000001d4ffe4ac0000000112c3a84c + + /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ + .octa 0x000000016edc5ae400000000de940fee + + /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ + .octa 0x00000001f1a0214000000000fe896b7e + + /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ + .octa 0x00000000ca0b28a000000001f797431c + + /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ + .octa 0x00000001928e30a20000000053e989ba + + /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ + .octa 0x0000000097b1b002000000003920cd16 + + /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ + .octa 0x00000000b15bf90600000001e6f579b8 + + /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ + .octa 0x00000000411c5d52000000007493cb0a + + /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ + .octa 0x00000001c36f330000000001bdd376d8 + + /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ + .octa 0x00000001119227e0000000016badfee6 + + /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ + .octa 0x00000000114d47020000000071de5c58 + + /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ + .octa 0x00000000458b5b9800000000453f317c + + /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ + .octa 0x000000012e31fb8e0000000121675cce + + /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ + .octa 0x000000005cf619d800000001f409ee92 + + /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ + .octa 0x0000000063f4d8b200000000f36b9c88 + + /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ + .octa 0x000000004138dc8a0000000036b398f4 + + /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ + .octa 0x00000001d29ee8e000000001748f9adc + + /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ + .octa 0x000000006a08ace800000001be94ec00 + + /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ + .octa 0x0000000127d4201000000000b74370d6 + + /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ + .octa 0x0000000019d76b6200000001174d0b98 + + /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ + .octa 0x00000001b1471f6e00000000befc06a4 + + /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ + .octa 0x00000001f64c19cc00000001ae125288 + + /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ + .octa 0x00000000003c0ea00000000095c19b34 + + /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ + .octa 0x000000014d73abf600000001a78496f2 + + /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ + .octa 0x00000001620eb84400000001ac5390a0 + + /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ + .octa 0x0000000147655048000000002a80ed6e + + /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ + .octa 0x0000000067b5077e00000001fa9b0128 + + /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ + .octa 0x0000000010ffe20600000001ea94929e + + /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ + .octa 0x000000000fee8f1e0000000125f4305c + + /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ + .octa 0x00000001da26fbae00000001471e2002 + + /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ + .octa 0x00000001b3a8bd880000000132d2253a + + /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ + .octa 0x00000000e8f3898e00000000f26b3592 + + /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ + .octa 0x00000000b0d0d28c00000000bc8b67b0 + + /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ + .octa 0x0000000030f2a798000000013a826ef2 + + /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ + .octa 0x000000000fba10020000000081482c84 + + /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ + .octa 0x00000000bdb9bd7200000000e77307c2 + + /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ + .octa 0x0000000075d3bf5a00000000d4a07ec8 + + /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ + .octa 0x00000000ef1f98a00000000017102100 + + /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ + .octa 0x00000000689c760200000000db406486 + + /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ + .octa 0x000000016d5fa5fe0000000192db7f88 + + /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ + .octa 0x00000001d0d2b9ca000000018bf67b1e + + /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ + .octa 0x0000000041e7b470000000007c09163e + + /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ + .octa 0x00000001cbb6495e000000000adac060 + + /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ + .octa 0x000000010052a0b000000000bd8316ae + + /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ + .octa 0x00000001d8effb5c000000019f09ab54 + + /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ + .octa 0x00000001d969853c0000000125155542 + + /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ + .octa 0x00000000523ccce2000000018fdb5882 + + /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ + .octa 0x000000001e2436bc00000000e794b3f4 + + /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ + .octa 0x00000000ddd1c3a2000000016f9bb022 + + /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ + .octa 0x0000000019fcfe3800000000290c9978 + + /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ + .octa 0x00000001ce95db640000000083c0f350 + + /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ + .octa 0x00000000af5828060000000173ea6628 + + /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ + .octa 0x00000001006388f600000001c8b4e00a + + /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ + .octa 0x0000000179eca00a00000000de95d6aa + + /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ + .octa 0x0000000122410a6a000000010b7f7248 + + /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ + .octa 0x000000004288e87c00000001326e3a06 + + /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ + .octa 0x000000016c5490da00000000bb62c2e6 + + /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ + .octa 0x00000000d1c71f6e0000000156a4b2c2 + + /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ + .octa 0x00000001b4ce08a6000000011dfe763a + + /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ + .octa 0x00000001466ba60c000000007bcca8e2 + + /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ + .octa 0x00000001f6c488a40000000186118faa + + /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ + .octa 0x000000013bfb06820000000111a65a88 + + /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ + .octa 0x00000000690e9e54000000003565e1c4 + + /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ + .octa 0x00000000281346b6000000012ed02a82 + + /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ + .octa 0x000000015646402400000000c486ecfc + + /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ + .octa 0x000000016063a8dc0000000001b951b2 + + /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ + .octa 0x0000000116a663620000000048143916 + + /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ + .octa 0x000000017e8aa4d200000001dc2ae124 + + /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ + .octa 0x00000001728eb10c00000001416c58d6 + + /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ + .octa 0x00000001b08fd7fa00000000a479744a + + /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ + .octa 0x00000001092a16e80000000096ca3a26 + + /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ + .octa 0x00000000a505637c00000000ff223d4e + + /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ + .octa 0x00000000d94869b2000000010e84da42 + + /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ + .octa 0x00000001c8b203ae00000001b61ba3d0 + + /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ + .octa 0x000000005704aea000000000680f2de8 + + /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ + .octa 0x000000012e295fa2000000008772a9a8 + + /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ + .octa 0x000000011d0908bc0000000155f295bc + + /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ + .octa 0x0000000193ed97ea00000000595f9282 + + /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ + .octa 0x000000013a0f1c520000000164b1c25a + + /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ + .octa 0x000000010c2c40c000000000fbd67c50 + + /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ + .octa 0x00000000ff6fac3e0000000096076268 + + /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ + .octa 0x000000017b3609c000000001d288e4cc + + /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ + .octa 0x0000000088c8c92200000001eaac1bdc + + /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ + .octa 0x00000001751baae600000001f1ea39e2 + + /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ + .octa 0x000000010795297200000001eb6506fc + + /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ + .octa 0x0000000162b00abe000000010f806ffe + + /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ + .octa 0x000000000d7b404c000000010408481e + + /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ + .octa 0x00000000763b13d40000000188260534 + + /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ + .octa 0x00000000f6dc22d80000000058fc73e0 + + /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ + .octa 0x000000007daae06000000000391c59b8 + + /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ + .octa 0x000000013359ab7c000000018b638400 + + /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ + .octa 0x000000008add438a000000011738f5c4 + + /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ + .octa 0x00000001edbefdea000000008cf7c6da + + /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ + .octa 0x000000004104e0f800000001ef97fb16 + + /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ + .octa 0x00000000b48a82220000000102130e20 + + /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ + .octa 0x00000001bcb4684400000000db968898 + + /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ + .octa 0x000000013293ce0a00000000b5047b5e + + /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ + .octa 0x00000001710d0844000000010b90fdb2 + + /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ + .octa 0x0000000117907f6e000000004834a32e + + /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ + .octa 0x0000000087ddf93e0000000059c8f2b0 + + /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ + .octa 0x000000005970e9b00000000122cec508 + + /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ + .octa 0x0000000185b2b7d0000000000a330cda + + /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ + .octa 0x00000001dcee0efc000000014a47148c + + /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ + .octa 0x0000000030da27220000000042c61cb8 + + /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ + .octa 0x000000012f925a180000000012fe6960 + + /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ + .octa 0x00000000dd2e357c00000000dbda2c20 + + /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ + .octa 0x00000000071c80de000000011122410c + + /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ + .octa 0x000000011513140a00000000977b2070 + + /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ + .octa 0x00000001df876e8e000000014050438e + + /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ + .octa 0x000000015f81d6ce0000000147c840e8 + + /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ + .octa 0x000000019dd94dbe00000001cc7c88ce + + /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ + .octa 0x00000001373d206e00000001476b35a4 + + /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ + .octa 0x00000000668ccade000000013d52d508 + + /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ + .octa 0x00000001b192d268000000008e4be32e + + /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ + .octa 0x00000000e30f3a7800000000024120fe + + /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ + .octa 0x000000010ef1f7bc00000000ddecddb4 + + /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ + .octa 0x00000001f5ac738000000000d4d403bc + + /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ + .octa 0x000000011822ea7000000001734b89aa + + /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ + .octa 0x00000000c3a33848000000010e7a58d6 + + /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ + .octa 0x00000001bd151c2400000001f9f04e9c + + /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ + .octa 0x0000000056002d7600000000b692225e + + /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ + .octa 0x000000014657c4f4000000019b8d3f3e + + /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ + .octa 0x0000000113742d7c00000001a874f11e + + /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ + .octa 0x000000019c5920ba000000010d5a4254 + + /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ + .octa 0x000000005216d2d600000000bbb2f5d6 + + /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ + .octa 0x0000000136f5ad8a0000000179cc0e36 + + /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ + .octa 0x000000018b07beb600000001dca1da4a + + /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ + .octa 0x00000000db1e93b000000000feb1a192 + + /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ + .octa 0x000000000b96fa3a00000000d1eeedd6 + + /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ + .octa 0x00000001d9968af0000000008fad9bb4 + + /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ + .octa 0x000000000e4a77a200000001884938e4 + + /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ + .octa 0x00000000508c2ac800000001bc2e9bc0 + + /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ + .octa 0x0000000021572a8000000001f9658a68 + + /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ + .octa 0x00000001b859daf2000000001b9224fc + + /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ + .octa 0x000000016f7884740000000055b2fb84 + + /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ + .octa 0x00000001b438810e000000018b090348 + + /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ + .octa 0x0000000095ddc6f2000000011ccbd5ea + + /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ + .octa 0x00000001d977c20c0000000007ae47f8 + + /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ + .octa 0x00000000ebedb99a0000000172acbec0 + + /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ + .octa 0x00000001df9e9e9200000001c6e3ff20 + + /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ + .octa 0x00000001a4a3f95200000000e1b38744 + + /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ + .octa 0x00000000e2f5122000000000791585b2 + + /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ + .octa 0x000000004aa01f3e00000000ac53b894 + + /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ + .octa 0x00000000b3e90a5800000001ed5f2cf4 + + /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ + .octa 0x000000000c9ca2aa00000001df48b2e0 + + /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ + .octa 0x000000015168231600000000049c1c62 + + /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ + .octa 0x0000000036fce78c000000017c460c12 + + /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ + .octa 0x000000009037dc10000000015be4da7e + + /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ + .octa 0x00000000d3298582000000010f38f668 + + /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ + .octa 0x00000001b42e8ad60000000039f40a00 + + /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ + .octa 0x00000000142a983800000000bd4c10c4 + + /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ + .octa 0x0000000109c7f1900000000042db1d98 + + /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ + .octa 0x0000000056ff931000000001c905bae6 + + /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ + .octa 0x00000001594513aa00000000069d40ea + + /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ + .octa 0x00000001e3b5b1e8000000008e4fbad0 + + /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ + .octa 0x000000011dd5fc080000000047bedd46 + + /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ + .octa 0x00000001675f0cc20000000026396bf8 + + /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ + .octa 0x00000000d1c8dd4400000000379beb92 + + /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ + .octa 0x0000000115ebd3d8000000000abae54a + + /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ + .octa 0x00000001ecbd0dac0000000007e6a128 + + /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ + .octa 0x00000000cdf67af2000000000ade29d2 + + /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ + .octa 0x000000004c01ff4c00000000f974c45c + + /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ + .octa 0x00000000f2d8657e00000000e77ac60a + + /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ + .octa 0x000000006bae74c40000000145895816 + + /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ + .octa 0x0000000152af8aa00000000038e362be + + /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ + .octa 0x0000000004663802000000007f991a64 + + /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ + .octa 0x00000001ab2f5afc00000000fa366d3a + + /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ + .octa 0x0000000074a4ebd400000001a2bb34f0 + + /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ + .octa 0x00000001d7ab3a4c0000000028a9981e + + /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ + .octa 0x00000001a8da60c600000001dbc672be + + /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ + .octa 0x000000013cf6382000000000b04d77f6 + + /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ + .octa 0x00000000bec12e1e0000000124400d96 + + /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ + .octa 0x00000001c6368010000000014ca4b414 + + /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ + .octa 0x00000001e6e78758000000012fe2c938 + + /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ + .octa 0x000000008d7f2b3c00000001faed01e6 + + /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ + .octa 0x000000016b4a156e000000007e80ecfe + + /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ + .octa 0x00000001c63cfeb60000000098daee94 + + /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ + .octa 0x000000015f902670000000010a04edea + + /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ + .octa 0x00000001cd5de11e00000001c00b4524 + + /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ + .octa 0x000000001acaec540000000170296550 + + /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ + .octa 0x000000002bd0ca780000000181afaa48 + + /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ + .octa 0x0000000032d63d5c0000000185a31ffa + + /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ + .octa 0x000000001c6d4e4c000000002469f608 + + /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ + .octa 0x0000000106a60b92000000006980102a + + /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ + .octa 0x00000000d3855e120000000111ea9ca8 + + /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ + .octa 0x00000000e312563600000001bd1d29ce + + /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ + .octa 0x000000009e8f7ea400000001b34b9580 + + /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ + .octa 0x00000001c82e562c000000003076054e + + /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ + .octa 0x00000000ca9f09ce000000012a608ea4 + + /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ + .octa 0x00000000c63764e600000000784d05fe + + /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ + .octa 0x0000000168d2e49e000000016ef0d82a + + /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ + .octa 0x00000000e986c1480000000075bda454 + + /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ + .octa 0x00000000cfb65894000000003dc0a1c4 + + /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ + .octa 0x0000000111cadee400000000e9a5d8be + + /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ + .octa 0x0000000171fb63ce00000001609bc4b4 + +.short_constants: + + /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ + /* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */ + .octa 0x7fec2963e5bf80485cf015c388e56f72 + + /* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod p(x)` */ + .octa 0x38e888d4844752a9963a18920246e2e6 + + /* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod p(x)` */ + .octa 0x42316c00730206ad419a441956993a31 + + /* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod p(x)` */ + .octa 0x543d5c543e65ddf9924752ba2b830011 + + /* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod p(x)` */ + .octa 0x78e87aaf56767c9255bd7f9518e4a304 + + /* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod p(x)` */ + .octa 0x8f68fcec1903da7f6d76739fe0553f1e + + /* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod p(x)` */ + .octa 0x3f4840246791d588c133722b1fe0b5c3 + + /* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod p(x)` */ + .octa 0x34c96751b04de25a64b67ee0e55ef1f3 + + /* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)` */ + .octa 0x156c8e180b4a395b069db049b8fdb1e7 + + /* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */ + .octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e + + /* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */ + .octa 0x041d37768cd75659817cdc5119b29a35 + + /* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */ + .octa 0x3a0777818cfaa9651ce9d94b36c41f1c + + /* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */ + .octa 0x0e148e8252377a554f256efcb82be955 + + /* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */ + .octa 0x9c25531d19e65ddeec1631edb2dea967 + + /* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */ + .octa 0x790606ff9957c0a65d27e147510ac59a + + /* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */ + .octa 0x82f63b786ea2d55ca66805eb18b8ea18 + + +.barrett_constants: + /* 33 bit reflected Barrett constant m - (4^32)/n */ + .octa 0x000000000000000000000000dea713f1 /* x^64 div p(x)` */ + /* 33 bit reflected Barrett constant n */ + .octa 0x00000000000000000000000105ec76f1 +#endif diff --git a/src/third_party/wiredtiger/src/checksum/power8/crc32_wrapper.c b/src/third_party/wiredtiger/src/checksum/power8/crc32_wrapper.c new file mode 100644 index 00000000000..62bd3e64f5c --- /dev/null +++ b/src/third_party/wiredtiger/src/checksum/power8/crc32_wrapper.c @@ -0,0 +1,69 @@ +#if defined(__powerpc64__) +#define CRC_TABLE +#include "crc32_constants.h" + +#define VMX_ALIGN 16U +#define VMX_ALIGN_MASK (VMX_ALIGN-1) + +#ifdef REFLECT +static unsigned int crc32_align(unsigned int crc, unsigned char *p, + unsigned long len) +{ + while (len--) + crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8); + return crc; +} +#else +static unsigned int crc32_align(unsigned int crc, unsigned char *p, + unsigned long len) +{ + while (len--) + crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8); + return crc; +} +#endif + +unsigned int __crc32_vpmsum(unsigned int crc, unsigned char *p, + unsigned long len); + +/* -Werror=missing-prototypes */ +unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p, + unsigned long len); +unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p, + unsigned long len) +{ + unsigned int prealign; + unsigned int tail; + +#ifdef CRC_XOR + crc ^= 0xffffffff; +#endif + + if (len < VMX_ALIGN + VMX_ALIGN_MASK) { + crc = crc32_align(crc, p, len); + goto out; + } + + if ((unsigned long)p & VMX_ALIGN_MASK) { + prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); + crc = crc32_align(crc, p, prealign); + len -= prealign; + p += prealign; + } + + crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); + + tail = len & VMX_ALIGN_MASK; + if (tail) { + p += len & ~VMX_ALIGN_MASK; + crc = crc32_align(crc, p, tail); + } + +out: +#ifdef CRC_XOR + crc ^= 0xffffffff; +#endif + + return crc; +} +#endif diff --git a/src/third_party/wiredtiger/src/checksum/power8/ppc-opcode.h b/src/third_party/wiredtiger/src/checksum/power8/ppc-opcode.h new file mode 100644 index 00000000000..b63feea60a0 --- /dev/null +++ b/src/third_party/wiredtiger/src/checksum/power8/ppc-opcode.h @@ -0,0 +1,23 @@ +#ifndef __OPCODES_H +#define __OPCODES_H + +#define __PPC_RA(a) (((a) & 0x1f) << 16) +#define __PPC_RB(b) (((b) & 0x1f) << 11) +#define __PPC_XA(a) ((((a) & 0x1f) << 16) | (((a) & 0x20) >> 3)) +#define __PPC_XB(b) ((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4)) +#define __PPC_XS(s) ((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5)) +#define __PPC_XT(s) __PPC_XS(s) +#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b)) +#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b)) + +#define PPC_INST_VPMSUMW 0x10000488 +#define PPC_INST_VPMSUMD 0x100004c8 +#define PPC_INST_MFVSRD 0x7c000066 +#define PPC_INST_MTVSRD 0x7c000166 + +#define VPMSUMW(t, a, b) .long PPC_INST_VPMSUMW | VSX_XX3((t), a, b) +#define VPMSUMD(t, a, b) .long PPC_INST_VPMSUMD | VSX_XX3((t), a, b) +#define MFVRD(a, t) .long PPC_INST_MFVSRD | VSX_XX1((t)+32, a, 0) +#define MTVRD(t, a) .long PPC_INST_MTVSRD | VSX_XX1((t)+32, a, 0) + +#endif diff --git a/src/third_party/wiredtiger/src/config/config_collapse.c b/src/third_party/wiredtiger/src/config/config_collapse.c index 27bd6255a0a..591d22284f5 100644 --- a/src/third_party/wiredtiger/src/config/config_collapse.c +++ b/src/third_party/wiredtiger/src/config/config_collapse.c @@ -38,6 +38,8 @@ __wt_config_collapse( WT_DECL_ITEM(tmp); WT_DECL_RET; + *config_ret = NULL; + WT_RET(__wt_scr_alloc(session, 0, &tmp)); WT_ERR(__wt_config_init(session, &cparser, cfg[0])); @@ -59,6 +61,8 @@ __wt_config_collapse( WT_ERR(__wt_buf_catfmt(session, tmp, "%.*s=%.*s,", (int)k.len, k.str, (int)v.len, v.str)); } + + /* We loop until error, and the expected error is WT_NOTFOUND. */ if (ret != WT_NOTFOUND) goto err; diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 4b601fbc53a..1b656c5a0aa 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -17,6 +17,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_close[] = { static const WT_CONFIG_CHECK confchk_WT_CONNECTION_load_extension[] = { { "config", "string", NULL, NULL, NULL, 0 }, + { "early_load", "boolean", NULL, NULL, NULL, 0 }, { "entry", "string", NULL, NULL, NULL, 0 }, { "terminate", "string", NULL, NULL, NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } @@ -305,6 +306,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_join[] = { NULL, "choices=[\"eq\",\"ge\",\"gt\",\"le\",\"lt\"]", NULL, 0 }, { "count", "int", NULL, NULL, NULL, 0 }, + { "operation", "string", + NULL, "choices=[\"and\",\"or\"]", + NULL, 0 }, { "strategy", "string", NULL, "choices=[\"bloom\",\"default\"]", NULL, 0 }, @@ -376,9 +380,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_transaction_sync[] = { static const WT_CONFIG_CHECK confchk_WT_SESSION_verify[] = { { "dump_address", "boolean", NULL, NULL, NULL, 0 }, { "dump_blocks", "boolean", NULL, NULL, NULL, 0 }, + { "dump_layout", "boolean", NULL, NULL, NULL, 0 }, { "dump_offsets", "list", NULL, NULL, NULL, 0 }, { "dump_pages", "boolean", NULL, NULL, NULL, 0 }, - { "dump_shape", "boolean", NULL, NULL, NULL, 0 }, { "strict", "boolean", NULL, NULL, NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -956,9 +960,9 @@ static const WT_CONFIG_ENTRY config_entries[] = { confchk_WT_CONNECTION_close, 1 }, { "WT_CONNECTION.load_extension", - "config=,entry=wiredtiger_extension_init," + "config=,early_load=0,entry=wiredtiger_extension_init," "terminate=wiredtiger_extension_terminate", - confchk_WT_CONNECTION_load_extension, 3 + confchk_WT_CONNECTION_load_extension, 4 }, { "WT_CONNECTION.open_session", "isolation=read-committed", @@ -972,7 +976,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95" ",file_manager=(close_handle_minimum=250,close_idle_time=30," "close_scan_interval=10),log=(archive=,compressor=,enabled=0," - "file_max=100MB,path=,prealloc=,recover=on,zero_fill=0)," + "file_max=100MB,path=\".\",prealloc=,recover=on,zero_fill=0)," "lsm_manager=(merge=,worker_thread_max=4),lsm_merge=," "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," "statistics=none,statistics_log=(json=0,on_close=0," @@ -980,6 +984,10 @@ static const WT_CONFIG_ENTRY config_entries[] = { "timestamp=\"%b %d %H:%M:%S\",wait=0),verbose=", confchk_WT_CONNECTION_reconfigure, 18 }, + { "WT_CONNECTION.set_file_system", + "", + NULL, 0 + }, { "WT_CURSOR.close", "", NULL, 0 @@ -1032,8 +1040,8 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "WT_SESSION.join", "bloom_bit_count=16,bloom_hash_count=8,compare=\"eq\",count=," - "strategy=", - confchk_WT_SESSION_join, 5 + "operation=\"and\",strategy=", + confchk_WT_SESSION_join, 6 }, { "WT_SESSION.log_flush", "sync=on", @@ -1094,8 +1102,8 @@ static const WT_CONFIG_ENTRY config_entries[] = { NULL, 0 }, { "WT_SESSION.verify", - "dump_address=0,dump_blocks=0,dump_offsets=,dump_pages=0," - "dump_shape=0,strict=0", + "dump_address=0,dump_blocks=0,dump_layout=0,dump_offsets=," + "dump_pages=0,strict=0", confchk_WT_SESSION_verify, 6 }, { "colgroup.meta", @@ -1169,14 +1177,15 @@ static const WT_CONFIG_ENTRY config_entries[] = { "file_extend=,file_manager=(close_handle_minimum=250," "close_idle_time=30,close_scan_interval=10),hazard_max=1000," "in_memory=0,log=(archive=,compressor=,enabled=0,file_max=100MB," - "path=,prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," - "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0," - "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" - ",name=,quota=0,reserve=0,size=500MB),statistics=none," - "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\"," - "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," - "transaction_sync=(enabled=0,method=fsync),use_environment=," - "use_environment_priv=0,verbose=,write_through=", + "path=\".\",prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=" + ",worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0" + ",session_max=100,session_scratch_max=2MB," + "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," + "statistics=none,statistics_log=(json=0,on_close=0," + "path=\"WiredTigerStat.%d.%H\",sources=," + "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" + ",method=fsync),use_environment=,use_environment_priv=0,verbose=," + "write_through=", confchk_wiredtiger_open, 38 }, { "wiredtiger_open_all", @@ -1190,15 +1199,15 @@ static const WT_CONFIG_ENTRY config_entries[] = { "file_extend=,file_manager=(close_handle_minimum=250," "close_idle_time=30,close_scan_interval=10),hazard_max=1000," "in_memory=0,log=(archive=,compressor=,enabled=0,file_max=100MB," - "path=,prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," - "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0," - "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" - ",name=,quota=0,reserve=0,size=500MB),statistics=none," - "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\"," - "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," - "transaction_sync=(enabled=0,method=fsync),use_environment=," - "use_environment_priv=0,verbose=,version=(major=0,minor=0)," - "write_through=", + "path=\".\",prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=" + ",worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0" + ",session_max=100,session_scratch_max=2MB," + "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," + "statistics=none,statistics_log=(json=0,on_close=0," + "path=\"WiredTigerStat.%d.%H\",sources=," + "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" + ",method=fsync),use_environment=,use_environment_priv=0,verbose=," + "version=(major=0,minor=0),write_through=", confchk_wiredtiger_open_all, 39 }, { "wiredtiger_open_basecfg", @@ -1210,7 +1219,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95" ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" ",close_idle_time=30,close_scan_interval=10),hazard_max=1000," - "log=(archive=,compressor=,enabled=0,file_max=100MB,path=," + "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\"," "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0," "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" @@ -1230,7 +1239,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95" ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" ",close_idle_time=30,close_scan_interval=10),hazard_max=1000," - "log=(archive=,compressor=,enabled=0,file_max=100MB,path=," + "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\"," "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0," "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" diff --git a/src/third_party/wiredtiger/src/conn/api_strerror.c b/src/third_party/wiredtiger/src/conn/api_strerror.c index 87864f7f4b0..edb11957556 100644 --- a/src/third_party/wiredtiger/src/conn/api_strerror.c +++ b/src/third_party/wiredtiger/src/conn/api_strerror.c @@ -40,8 +40,6 @@ __wt_wiredtiger_error(int error) return ("WT_RUN_RECOVERY: recovery must be run to continue"); case WT_CACHE_FULL: return ("WT_CACHE_FULL: operation would overflow cache"); - case WT_PERM_DENIED: - return ("WT_PERM_DENIED: permission denied (internal)"); } /* diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index 9e2f03da21f..98267eeeb2c 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -751,6 +751,7 @@ __conn_get_extension_api(WT_CONNECTION *wt_conn) conn->extension_api.err_printf = __wt_ext_err_printf; conn->extension_api.msg_printf = __wt_ext_msg_printf; conn->extension_api.strerror = __wt_ext_strerror; + conn->extension_api.map_windows_error = __wt_ext_map_windows_error; conn->extension_api.scr_alloc = __wt_ext_scr_alloc; conn->extension_api.scr_free = __wt_ext_scr_free; conn->extension_api.collator_config = ext_collator_config; @@ -806,6 +807,7 @@ static int __conn_load_default_extensions(WT_CONNECTION_IMPL *conn) { WT_UNUSED(conn); + #ifdef HAVE_BUILTIN_EXTENSION_SNAPPY WT_RET(snappy_extension_init(&conn->iface, NULL)); #endif @@ -819,18 +821,16 @@ __conn_load_default_extensions(WT_CONNECTION_IMPL *conn) } /* - * __conn_load_extension -- - * WT_CONNECTION->load_extension method. + * __conn_load_extension_int -- + * Internal extension load interface */ static int -__conn_load_extension( - WT_CONNECTION *wt_conn, const char *path, const char *config) +__conn_load_extension_int(WT_SESSION_IMPL *session, + const char *path, const char *cfg[], bool early_load) { WT_CONFIG_ITEM cval; - WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_DLH *dlh; - WT_SESSION_IMPL *session; int (*load)(WT_CONNECTION *, WT_CONFIG_ARG *); bool is_local; const char *init_name, *terminate_name; @@ -839,8 +839,10 @@ __conn_load_extension( init_name = terminate_name = NULL; is_local = strcmp(path, "local") == 0; - conn = (WT_CONNECTION_IMPL *)wt_conn; - CONNECTION_API_CALL(conn, session, load_extension, config, cfg); + /* Ensure that the load matches the phase of startup we are in. */ + WT_ERR(__wt_config_gets(session, cfg, "early_load", &cval)); + if ((cval.val == 0 && early_load) || (cval.val != 0 && !early_load)) + return (0); /* * This assumes the underlying shared libraries are reference counted, @@ -865,20 +867,39 @@ __conn_load_extension( __wt_dlsym(session, dlh, terminate_name, false, &dlh->terminate)); /* Call the load function last, it simplifies error handling. */ - WT_ERR(load(wt_conn, (WT_CONFIG_ARG *)cfg)); + WT_ERR(load(&S2C(session)->iface, (WT_CONFIG_ARG *)cfg)); /* Link onto the environment's list of open libraries. */ - __wt_spin_lock(session, &conn->api_lock); - TAILQ_INSERT_TAIL(&conn->dlhqh, dlh, q); - __wt_spin_unlock(session, &conn->api_lock); + __wt_spin_lock(session, &S2C(session)->api_lock); + TAILQ_INSERT_TAIL(&S2C(session)->dlhqh, dlh, q); + __wt_spin_unlock(session, &S2C(session)->api_lock); dlh = NULL; err: if (dlh != NULL) WT_TRET(__wt_dlclose(session, dlh)); __wt_free(session, init_name); __wt_free(session, terminate_name); + return (ret); +} - API_END_RET_NOTFOUND_MAP(session, ret); +/* + * __conn_load_extension -- + * WT_CONNECTION->load_extension method. + */ +static int +__conn_load_extension( + WT_CONNECTION *wt_conn, const char *path, const char *config) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_SESSION_IMPL *session; + + conn = (WT_CONNECTION_IMPL *)wt_conn; + CONNECTION_API_CALL(conn, session, load_extension, config, cfg); + + ret = __conn_load_extension_int(session, path, cfg, false); + +err: API_END_RET_NOTFOUND_MAP(session, ret); } /* @@ -886,18 +907,16 @@ err: if (dlh != NULL) * Load the list of application-configured extensions. */ static int -__conn_load_extensions(WT_SESSION_IMPL *session, const char *cfg[]) +__conn_load_extensions( + WT_SESSION_IMPL *session, const char *cfg[], bool early_load) { WT_CONFIG subconfig; WT_CONFIG_ITEM cval, skey, sval; - WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(exconfig); WT_DECL_ITEM(expath); WT_DECL_RET; - - conn = S2C(session); - - WT_ERR(__conn_load_default_extensions(conn)); + const char *sub_cfg[] = { + WT_CONFIG_BASE(session, WT_CONNECTION_load_extension), NULL, NULL }; WT_ERR(__wt_config_gets(session, cfg, "extensions", &cval)); WT_ERR(__wt_config_subinit(session, &subconfig, &cval)); @@ -912,8 +931,9 @@ __conn_load_extensions(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_buf_fmt(session, exconfig, "%.*s", (int)sval.len, sval.str)); } - WT_ERR(conn->iface.load_extension(&conn->iface, - expath->data, (sval.len > 0) ? exconfig->data : NULL)); + sub_cfg[1] = sval.len > 0 ? exconfig->data : NULL; + WT_ERR(__conn_load_extension_int( + session, expath->data, sub_cfg, early_load)); } WT_ERR_NOTFOUND_OK(ret); @@ -1192,13 +1212,12 @@ __conn_config_file(WT_SESSION_IMPL *session, fh = NULL; /* Configuration files are always optional. */ - WT_RET(__wt_exist(session, filename, &exist)); + WT_RET(__wt_fs_exist(session, filename, &exist)); if (!exist) return (0); /* Open the configuration file. */ - WT_RET(__wt_open( - session, filename, WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY, &fh)); + WT_RET(__wt_open(session, filename, WT_OPEN_FILE_TYPE_REGULAR, 0, &fh)); WT_ERR(__wt_filesize(session, fh, &size)); if (size == 0) goto err; @@ -1280,7 +1299,8 @@ __conn_config_file(WT_SESSION_IMPL *session, * the next character is a hash mark, skip to the next newline. */ for (;;) { - for (*t++ = ','; --len > 0 && isspace(*++p);) + for (*t++ = ','; + --len > 0 && __wt_isspace((u_char)*++p);) ; if (len == 0) break; @@ -1489,8 +1509,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) */ exist = false; if (!is_create) - WT_ERR(__wt_exist(session, WT_WIREDTIGER, &exist)); - ret = __wt_open(session, WT_SINGLETHREAD, WT_FILE_TYPE_REGULAR, + WT_ERR(__wt_fs_exist(session, WT_WIREDTIGER, &exist)); + ret = __wt_open(session, WT_SINGLETHREAD, WT_OPEN_FILE_TYPE_REGULAR, is_create || exist ? WT_OPEN_CREATE : 0, &conn->lock_fh); /* @@ -1499,17 +1519,14 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) * if the file does not exist. If so, then ignore the error. * XXX Ignoring the error does allow multiple read-only * connections to exist at the same time on a read-only directory. + * + * If we got an expected permission or non-existence error then skip + * the byte lock. */ - if (F_ISSET(conn, WT_CONN_READONLY)) { - /* - * If we got an expected permission or non-existence error - * then skip the byte lock. - */ - ret = __wt_map_error_rdonly(ret); - if (ret == WT_NOTFOUND || ret == WT_PERM_DENIED) { - bytelock = false; - ret = 0; - } + if (F_ISSET(conn, WT_CONN_READONLY) && + (ret == EACCES || ret == ENOENT)) { + bytelock = false; + ret = 0; } WT_ERR(ret); if (bytelock) { @@ -1546,22 +1563,19 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) /* We own the lock file, optionally create the WiredTiger file. */ ret = __wt_open(session, WT_WIREDTIGER, - WT_FILE_TYPE_REGULAR, is_create ? WT_OPEN_CREATE : 0, &fh); + WT_OPEN_FILE_TYPE_REGULAR, is_create ? WT_OPEN_CREATE : 0, &fh); /* - * If we're read-only, check for success as well as handled errors. - * Even if we're able to open the WiredTiger file successfully, we - * do not try to lock it. The lock file test above is the only - * one we do for read-only. + * If we're read-only, check for handled errors. Even if able to open + * the WiredTiger file successfully, we do not try to lock it. The + * lock file test above is the only one we do for read-only. */ if (F_ISSET(conn, WT_CONN_READONLY)) { - ret = __wt_map_error_rdonly(ret); - if (ret == 0 || ret == WT_NOTFOUND || ret == WT_PERM_DENIED) + if (ret == EACCES || ret == ENOENT) ret = 0; WT_ERR(ret); } else { WT_ERR(ret); - /* * Lock the WiredTiger file (for backward compatibility reasons * as described above). Immediately release the lock, it's @@ -1583,13 +1597,14 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) * and there's never a database home after that point without a turtle * file. If the turtle file doesn't exist, it's a create. */ - WT_ERR(__wt_exist(session, WT_METADATA_TURTLE, &exist)); + WT_ERR(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist)); conn->is_new = exist ? 0 : 1; if (conn->is_new) { if (F_ISSET(conn, WT_CONN_READONLY)) - WT_ERR_MSG(session, EINVAL, "Creating a new database is" - " incompatible with read-only configuration."); + WT_ERR_MSG(session, EINVAL, + "Creating a new database is incompatible with " + "read-only configuration"); len = (size_t)snprintf(buf, sizeof(buf), "%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING); WT_ERR(__wt_write(session, fh, (wt_off_t)0, len, buf)); @@ -1754,14 +1769,14 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) static int __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) { - WT_FH *fh; + WT_FSTREAM *fs; WT_CONFIG parser; WT_CONFIG_ITEM cval, k, v; WT_DECL_RET; bool exist; const char *base_config; - fh = NULL; + fs = NULL; base_config = NULL; /* @@ -1789,15 +1804,14 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) * only NOT exist if we crashed before it was created; in other words, * if the base configuration file exists, we're done. */ - WT_RET(__wt_exist(session, WT_BASECONFIG, &exist)); + WT_RET(__wt_fs_exist(session, WT_BASECONFIG, &exist)); if (exist) return (0); - WT_RET(__wt_open(session, - WT_BASECONFIG_SET, WT_FILE_TYPE_REGULAR, - WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE | WT_STREAM_WRITE, &fh)); + WT_RET(__wt_fopen(session, WT_BASECONFIG_SET, + WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs)); - WT_ERR(__wt_fprintf(session, fh, "%s\n\n", + WT_ERR(__wt_fprintf(session, fs, "%s\n\n", "# Do not modify this file.\n" "#\n" "# WiredTiger created this file when the database was created,\n" @@ -1844,18 +1858,18 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) --v.str; v.len += 2; } - WT_ERR(__wt_fprintf(session, fh, + WT_ERR(__wt_fprintf(session, fs, "%.*s=%.*s\n", (int)k.len, k.str, (int)v.len, v.str)); } WT_ERR_NOTFOUND_OK(ret); - /* Flush the handle and rename the file into place. */ - ret = __wt_sync_handle_and_rename( - session, &fh, WT_BASECONFIG_SET, WT_BASECONFIG); + /* Flush the stream and rename the file into place. */ + ret = __wt_sync_and_rename( + session, &fs, WT_BASECONFIG_SET, WT_BASECONFIG); if (0) { /* Close open file handle, remove any temporary file. */ -err: WT_TRET(__wt_close(session, &fh)); +err: WT_TRET(__wt_fclose(session, &fs)); WT_TRET(__wt_remove_if_exists(session, WT_BASECONFIG_SET)); } @@ -1865,6 +1879,57 @@ err: WT_TRET(__wt_close(session, &fh)); } /* + * __conn_set_file_system -- + * Configure a custom file system implementation on database open. + */ +static int +__conn_set_file_system( + WT_CONNECTION *wt_conn, WT_FILE_SYSTEM *file_system, const char *config) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_SESSION_IMPL *session; + + conn = (WT_CONNECTION_IMPL *)wt_conn; + CONNECTION_API_CALL(conn, session, set_file_system, config, cfg); + WT_UNUSED(cfg); + + conn->file_system = file_system; + +err: API_END_RET(session, ret); +} + +/* + * __conn_chk_file_system -- + * Check the configured file system. + */ +static int +__conn_chk_file_system(WT_SESSION_IMPL *session, bool readonly) +{ + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + +#define WT_CONN_SET_FILE_SYSTEM_REQ(name) \ + if (conn->file_system->name == NULL) \ + WT_RET_MSG(session, EINVAL, \ + "a WT_FILE_SYSTEM.%s method must be configured", #name) + + WT_CONN_SET_FILE_SYSTEM_REQ(fs_directory_list); + WT_CONN_SET_FILE_SYSTEM_REQ(fs_directory_list_free); + /* not required: directory_sync */ + WT_CONN_SET_FILE_SYSTEM_REQ(fs_exist); + WT_CONN_SET_FILE_SYSTEM_REQ(fs_open_file); + if (!readonly) { + WT_CONN_SET_FILE_SYSTEM_REQ(fs_remove); + WT_CONN_SET_FILE_SYSTEM_REQ(fs_rename); + } + WT_CONN_SET_FILE_SYSTEM_REQ(fs_size); + + return (0); +} + +/* * wiredtiger_open -- * Main library entry point: open a new connection to a WiredTiger * database. @@ -1888,12 +1953,13 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, __conn_add_compressor, __conn_add_encryptor, __conn_add_extractor, + __conn_set_file_system, __conn_get_extension_api }; static const WT_NAME_FLAG file_types[] = { - { "checkpoint", WT_FILE_TYPE_CHECKPOINT }, - { "data", WT_FILE_TYPE_DATA }, - { "log", WT_FILE_TYPE_LOG }, + { "checkpoint", WT_DIRECT_IO_CHECKPOINT }, + { "data", WT_DIRECT_IO_DATA }, + { "log", WT_DIRECT_IO_LOG }, { NULL, 0 } }; @@ -1942,6 +2008,14 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__wt_os_stdio(session)); __wt_event_handler_set(session, event_handler); + /* + * Set the default session's strerror method. If one of the extensions + * being loaded reports an error via the WT_EXTENSION_API strerror + * method, but doesn't supply that method a WT_SESSION handle, we'll + * use the WT_CONNECTION_IMPL's default session and its strerror method. + */ + conn->default_session->iface.strerror = __wt_session_strerror; + /* Basic initialization of the connection structure. */ WT_ERR(__wt_connection_init(conn)); @@ -1983,10 +2057,27 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, F_SET(conn, WT_CONN_READONLY); /* - * After checking readonly and in-memory, but before we do anything that - * touches the filesystem, configure the OS layer. + * Load early extensions before doing further initialization (one early + * extension is to configure a file system). + */ + WT_ERR(__conn_load_extensions(session, cfg, true)); + + /* + * If the application didn't configure its own file system, configure + * one of ours. Check to ensure we have a valid file system. */ - WT_ERR(__wt_os_init(session)); + if (conn->file_system == NULL) { + if (F_ISSET(conn, WT_CONN_IN_MEMORY)) + WT_ERR(__wt_os_inmemory(session)); + else +#if defined(_MSC_VER) + WT_ERR(__wt_os_win(session)); +#else + WT_ERR(__wt_os_posix(session)); +#endif + } + WT_ERR( + __conn_chk_file_system(session, F_ISSET(conn, WT_CONN_READONLY))); /* * Capture the config_base setting file for later use. Again, if the @@ -2036,7 +2127,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, __conn_config_append(cfg, version); /* Ignore the base_config file if config_base_set is false. */ - if (config_base_set || F_ISSET(conn, WT_CONN_READONLY)) + if (config_base_set) WT_ERR( __conn_config_file(session, WT_BASECONFIG, false, cfg, i1)); __conn_config_append(cfg, config); @@ -2119,8 +2210,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, if (ret == 0) { if (sval.val) FLD_SET(conn->direct_io, ft->flag); - } else if (ret != WT_NOTFOUND) - goto err; + } else + WT_ERR_NOTFOUND_OK(ret); } WT_ERR(__wt_config_gets(session, cfg, "write_through", &cval)); @@ -2129,8 +2220,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, if (ret == 0) { if (sval.val) FLD_SET(conn->write_through, ft->flag); - } else if (ret != WT_NOTFOUND) - goto err; + } else + WT_ERR_NOTFOUND_OK(ret); } /* @@ -2154,15 +2245,15 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, ret = __wt_config_subgets(session, &cval, ft->name, &sval); if (ret == 0) { switch (ft->flag) { - case WT_FILE_TYPE_DATA: + case WT_DIRECT_IO_DATA: conn->data_extend_len = sval.val; break; - case WT_FILE_TYPE_LOG: + case WT_DIRECT_IO_LOG: conn->log_extend_len = sval.val; break; } - } else if (ret != WT_NOTFOUND) - goto err; + } else + WT_ERR_NOTFOUND_OK(ret); } WT_ERR(__wt_config_gets(session, cfg, "mmap", &cval)); @@ -2191,7 +2282,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, * everything else to be in place, and the extensions call back into the * library. */ - WT_ERR(__conn_load_extensions(session, cfg)); + WT_ERR(__conn_load_default_extensions(conn)); + WT_ERR(__conn_load_extensions(session, cfg, false)); /* * The metadata/log encryptor is configured after extensions, since @@ -2234,7 +2326,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, */ WT_ERR(__wt_turtle_init(session)); - __wt_metadata_init(session); WT_ERR(__wt_metadata_cursor(session, NULL)); /* Start the worker threads and run recovery. */ diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c index 9dfd1cdcbfa..9f15db5382b 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache.c @@ -127,6 +127,7 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; + int i; conn = S2C(session); @@ -157,13 +158,23 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) false, 10000, WT_MILLION, &cache->evict_cond)); WT_ERR(__wt_cond_alloc(session, "eviction waiters", false, &cache->evict_waiter_cond)); - WT_ERR(__wt_spin_init(session, &cache->evict_lock, "cache eviction")); + WT_ERR(__wt_spin_init(session, &cache->evict_pass_lock, "evict pass")); + WT_ERR(__wt_spin_init(session, + &cache->evict_queue_lock, "cache eviction queue")); WT_ERR(__wt_spin_init(session, &cache->evict_walk_lock, "cache walk")); + if ((ret = __wt_open_internal_session(conn, "evict pass", + false, WT_SESSION_NO_DATA_HANDLES, &cache->walk_session)) != 0) + WT_ERR_MSG(NULL, ret, + "Failed to create session for eviction walks"); /* Allocate the LRU eviction queue. */ cache->evict_slots = WT_EVICT_WALK_BASE + WT_EVICT_WALK_INCR; - WT_ERR(__wt_calloc_def(session, - cache->evict_slots, &cache->evict_queue)); + for (i = 0; i < WT_EVICT_QUEUE_MAX; ++i) { + WT_ERR(__wt_calloc_def(session, + cache->evict_slots, &cache->evict_queues[i].evict_queue)); + WT_ERR(__wt_spin_init(session, + &cache->evict_queues[i].evict_lock, "cache eviction")); + } /* * We get/set some values in the cache statistics (rather than have @@ -237,6 +248,8 @@ __wt_cache_destroy(WT_SESSION_IMPL *session) WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; + WT_SESSION *wt_session; + int i; conn = S2C(session); cache = conn->cache; @@ -262,10 +275,17 @@ __wt_cache_destroy(WT_SESSION_IMPL *session) WT_TRET(__wt_cond_auto_destroy(session, &cache->evict_cond)); WT_TRET(__wt_cond_destroy(session, &cache->evict_waiter_cond)); - __wt_spin_destroy(session, &cache->evict_lock); + __wt_spin_destroy(session, &cache->evict_pass_lock); + __wt_spin_destroy(session, &cache->evict_queue_lock); __wt_spin_destroy(session, &cache->evict_walk_lock); + wt_session = &cache->walk_session->iface; + if (wt_session != NULL) + WT_TRET(wt_session->close(wt_session, NULL)); - __wt_free(session, cache->evict_queue); + for (i = 0; i < WT_EVICT_QUEUE_MAX; ++i) { + __wt_spin_destroy(session, &cache->evict_queues[i].evict_lock); + __wt_free(session, cache->evict_queues[i].evict_queue); + } __wt_free(session, conn->cache); return (ret); } diff --git a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c index 72f23b015b7..75ecb6b3b4a 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c @@ -58,7 +58,6 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) created = updating = false; pool_name = NULL; cp = NULL; - size = 0; if (F_ISSET(conn, WT_CONN_CACHE_POOL)) updating = true; @@ -310,6 +309,7 @@ __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session) if (!F_ISSET(conn, WT_CONN_CACHE_POOL)) return (0); + F_CLR(conn, WT_CONN_CACHE_POOL); __wt_spin_lock(session, &cp->cache_pool_lock); cp_locked = true; @@ -572,6 +572,7 @@ __cache_pool_adjust(WT_SESSION_IMPL *session, cp = __wt_process.cache_pool; grow = false; pool_full = cp->currently_used >= cp->size; + pct_full = 0; /* Highest as a percentage, avoid 0 */ highest_percentile = (highest / 100) + 1; diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c index 5019ab59fe3..08fb2b24468 100644 --- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c +++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c @@ -39,6 +39,9 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session, WT_BTREE *btree; WT_DATA_HANDLE *dhandle; WT_DECL_RET; + uint64_t bucket; + + *dhandlep = NULL; WT_RET(__wt_calloc_one(session, &dhandle)); @@ -57,6 +60,16 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session, __wt_stat_dsrc_init(dhandle); + if (strcmp(uri, WT_METAFILE_URI) == 0) + F_SET(dhandle, WT_DHANDLE_IS_METADATA); + + /* + * Prepend the handle to the connection list, assuming we're likely to + * need new files again soon, until they are cached by all sessions. + */ + bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE; + WT_CONN_DHANDLE_INSERT(S2C(session), dhandle, bucket); + *dhandlep = dhandle; return (0); @@ -106,14 +119,6 @@ __wt_conn_dhandle_find( WT_RET(__conn_dhandle_alloc(session, uri, checkpoint, &dhandle)); - /* - * Prepend the handle to the connection list, assuming we're likely to - * need new files again soon, until they are cached by all sessions. - * Find the right hash bucket to insert into as well. - */ - bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE; - WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket); - session->dhandle = dhandle; return (0); } @@ -158,7 +163,8 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) /* * We may not be holding the schema lock, and threads may be walking * the list of open handles (for example, checkpoint). Acquire the - * handle's close lock. + * handle's close lock. We don't have the sweep server acquire the + * handle's rwlock so we have to prevent races through the close code. */ __wt_spin_lock(session, &dhandle->close_lock); @@ -538,6 +544,7 @@ __wt_conn_dhandle_discard_single( WT_DATA_HANDLE *dhandle; WT_DECL_RET; int tret; + bool set_pass_intr; dhandle = session->dhandle; @@ -556,12 +563,17 @@ __wt_conn_dhandle_discard_single( * Kludge: interrupt the eviction server in case it is holding the * handle list lock. */ - if (!F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) - F_SET(S2C(session)->cache, WT_CACHE_CLEAR_WALKS); + set_pass_intr = false; + if (!F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) { + set_pass_intr = true; + (void)__wt_atomic_add32(&S2C(session)->cache->pass_intr, 1); + } /* Try to remove the handle, protected by the data handle lock. */ WT_WITH_HANDLE_LIST_LOCK(session, tret = __conn_dhandle_remove(session, final)); + if (set_pass_intr) + (void)__wt_atomic_sub32(&S2C(session)->cache->pass_intr, 1); WT_TRET(tret); /* diff --git a/src/third_party/wiredtiger/src/conn/conn_handle.c b/src/third_party/wiredtiger/src/conn/conn_handle.c index 5f4c38e7361..509966793e5 100644 --- a/src/third_party/wiredtiger/src/conn/conn_handle.c +++ b/src/third_party/wiredtiger/src/conn/conn_handle.c @@ -149,15 +149,17 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_spin_destroy(session, &conn->page_lock[i]); __wt_free(session, conn->page_lock); + /* Destroy the file-system configuration. */ + if (conn->file_system != NULL && conn->file_system->terminate != NULL) + WT_TRET(conn->file_system->terminate( + conn->file_system, (WT_SESSION *)session)); + /* Free allocated memory. */ __wt_free(session, conn->cfg); __wt_free(session, conn->home); __wt_free(session, conn->error_prefix); __wt_free(session, conn->sessions); - /* Destroy the OS configuration. */ - WT_TRET(__wt_os_cleanup(session)); - __wt_free(NULL, conn); return (ret); } diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c index f83430735ef..1ae370ef2fa 100644 --- a/src/third_party/wiredtiger/src/conn/conn_log.c +++ b/src/third_party/wiredtiger/src/conn/conn_log.c @@ -178,6 +178,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file) conn = S2C(session); log = conn->log; logcount = 0; + locked = false; logfiles = NULL; /* @@ -198,14 +199,14 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file) * Main archive code. Get the list of all log files and * remove any earlier than the minimum log number. */ - WT_RET(__wt_dirlist(session, conn->log_path, - WT_LOG_FILENAME, WT_DIRLIST_INCLUDE, &logfiles, &logcount)); + WT_ERR(__wt_fs_directory_list( + session, conn->log_path, WT_LOG_FILENAME, &logfiles, &logcount)); /* * We can only archive files if a hot backup is not in progress or * if we are the backup. */ - WT_RET(__wt_readlock(session, conn->hot_backup_lock)); + WT_ERR(__wt_readlock(session, conn->hot_backup_lock)); locked = true; if (!conn->hot_backup || backup_file != 0) { for (i = 0; i < logcount; i++) { @@ -218,9 +219,6 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file) } WT_ERR(__wt_readunlock(session, conn->hot_backup_lock)); locked = false; - __wt_log_files_free(session, logfiles, logcount); - logfiles = NULL; - logcount = 0; /* * Indicate what is our new earliest LSN. It is the start @@ -232,8 +230,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file) err: __wt_err(session, ret, "log archive server error"); if (locked) WT_TRET(__wt_readunlock(session, conn->hot_backup_lock)); - if (logfiles != NULL) - __wt_log_files_free(session, logfiles, logcount); + WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount)); return (ret); } @@ -259,10 +256,9 @@ __log_prealloc_once(WT_SESSION_IMPL *session) * Allocate up to the maximum number, accounting for any existing * files that may not have been used yet. */ - WT_ERR(__wt_dirlist(session, conn->log_path, - WT_LOG_PREPNAME, WT_DIRLIST_INCLUDE, &recfiles, &reccount)); - __wt_log_files_free(session, recfiles, reccount); - recfiles = NULL; + WT_ERR(__wt_fs_directory_list( + session, conn->log_path, WT_LOG_PREPNAME, &recfiles, &reccount)); + /* * Adjust the number of files to pre-allocate if we find that * the critical path had to allocate them since we last ran. @@ -292,8 +288,7 @@ __log_prealloc_once(WT_SESSION_IMPL *session) if (0) err: __wt_err(session, ret, "log pre-alloc server error"); - if (recfiles != NULL) - __wt_log_files_free(session, recfiles, reccount); + WT_TRET(__wt_fs_directory_list_free(session, &recfiles, reccount)); return (ret); } @@ -314,12 +309,15 @@ __wt_log_truncate_files( WT_UNUSED(cfg); conn = S2C(session); - log = conn->log; + if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) + return (0); if (F_ISSET(conn, WT_CONN_SERVER_RUN) && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) WT_RET_MSG(session, EINVAL, "Attempt to archive manually while a server is running"); + log = conn->log; + backup_file = 0; if (cursor != NULL) backup_file = WT_CURSOR_BACKUP_ID(cursor); @@ -327,6 +325,7 @@ __wt_log_truncate_files( WT_RET(__wt_verbose(session, WT_VERB_LOG, "log_truncate_files: Archive once up to %" PRIu32, backup_file)); + WT_RET(__wt_writelock(session, log->log_archive_lock)); locked = true; WT_ERR(__log_archive_once(session, backup_file)); @@ -677,7 +676,6 @@ __log_wrlsn_server(void *arg) log = conn->log; yield = 0; WT_INIT_LSN(&prev); - did_work = false; while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { /* * Write out any log record buffers if anything was done @@ -692,10 +690,8 @@ __log_wrlsn_server(void *arg) else WT_STAT_FAST_CONN_INCR(session, log_write_lsn_skip); prev = log->alloc_lsn; - if (yield == 0) - did_work = true; - else - did_work = false; + did_work = yield == 0; + /* * If __wt_log_wrlsn did work we want to yield instead of sleep. */ @@ -865,9 +861,9 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) "log write LSN")); WT_RET(__wt_rwlock_alloc(session, &log->log_archive_lock, "log archive lock")); - if (FLD_ISSET(conn->direct_io, WT_FILE_TYPE_LOG)) - log->allocsize = - WT_MAX((uint32_t)conn->buffer_alignment, WT_LOG_ALIGN); + if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG)) + log->allocsize = (uint32_t) + WT_MAX(conn->buffer_alignment, WT_LOG_ALIGN); else log->allocsize = WT_LOG_ALIGN; WT_INIT_LSN(&log->alloc_lsn); diff --git a/src/third_party/wiredtiger/src/conn/conn_stat.c b/src/third_party/wiredtiger/src/conn/conn_stat.c index fccc4786402..855ff57808e 100644 --- a/src/third_party/wiredtiger/src/conn/conn_stat.c +++ b/src/third_party/wiredtiger/src/conn/conn_stat.c @@ -209,11 +209,11 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats) } if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON)) { - WT_ERR(__wt_fprintf(session, conn->stat_fh, + WT_ERR(__wt_fprintf(session, conn->stat_fs, "{\"version\":\"%s\",\"localTime\":\"%s\"", WIREDTIGER_VERSION_STRING, conn->stat_stamp)); WT_ERR(__wt_fprintf( - session, conn->stat_fh, ",\"wiredTiger\":{")); + session, conn->stat_fs, ",\"wiredTiger\":{")); while ((ret = cursor->next(cursor)) == 0) { WT_ERR(cursor->get_value(cursor, &desc, &valstr, &val)); /* Check if we are starting a new section. */ @@ -225,23 +225,23 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats) strncmp(desc, tmp->data, tmp->size) != 0) { WT_ERR(__wt_buf_set( session, tmp, desc, prefixlen)); - WT_ERR(__wt_fprintf(session, conn->stat_fh, + WT_ERR(__wt_fprintf(session, conn->stat_fs, "%s\"%.*s\":{", first ? "" : "},", (int)prefixlen, desc)); first = false; groupfirst = true; } - WT_ERR(__wt_fprintf(session, conn->stat_fh, + WT_ERR(__wt_fprintf(session, conn->stat_fs, "%s\"%s\":%" PRId64, groupfirst ? "" : ",", endprefix + 2, val)); groupfirst = false; } WT_ERR_NOTFOUND_OK(ret); - WT_ERR(__wt_fprintf(session, conn->stat_fh, "}}}\n")); + WT_ERR(__wt_fprintf(session, conn->stat_fs, "}}}\n")); } else { while ((ret = cursor->next(cursor)) == 0) { WT_ERR(cursor->get_value(cursor, &desc, &valstr, &val)); - WT_ERR(__wt_fprintf(session, conn->stat_fh, + WT_ERR(__wt_fprintf(session, conn->stat_fs, "%s %" PRId64 " %s %s\n", conn->stat_stamp, val, name, desc)); } @@ -354,7 +354,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) struct tm *tm, _tm; WT_CONNECTION_IMPL *conn; WT_DECL_RET; - WT_FH *log_file; + WT_FSTREAM *log_stream; conn = S2C(session); @@ -367,18 +367,16 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) WT_RET_MSG(session, ENOMEM, "strftime path conversion"); /* If the path has changed, cycle the log file. */ - if ((log_file = conn->stat_fh) == NULL || + if ((log_stream = conn->stat_fs) == NULL || path == NULL || strcmp(tmp->mem, path->mem) != 0) { - conn->stat_fh = NULL; - WT_RET(__wt_close(session, &log_file)); + WT_RET(__wt_fclose(session, &conn->stat_fs)); if (path != NULL) (void)strcpy(path->mem, tmp->mem); - WT_RET(__wt_open(session, tmp->mem, - WT_FILE_TYPE_REGULAR, - WT_OPEN_CREATE | WT_OPEN_FIXED | WT_STREAM_APPEND, - &log_file)); + WT_RET(__wt_fopen(session, tmp->mem, + WT_OPEN_CREATE | WT_OPEN_FIXED, WT_STREAM_APPEND, + &log_stream)); } - conn->stat_fh = log_file; + conn->stat_fs = log_stream; /* Create the entry prefix for this time of day. */ if (strftime(tmp->mem, tmp->memsize, conn->stat_format, tm) == 0) @@ -411,7 +409,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) WT_RET(__statlog_lsm_apply(session)); /* Flush. */ - return (__wt_fsync(session, conn->stat_fh, true)); + return (__wt_fflush(session, conn->stat_fs)); } /* @@ -597,7 +595,7 @@ __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close) conn->stat_session = NULL; conn->stat_tid_set = false; conn->stat_format = NULL; - WT_TRET(__wt_close(session, &conn->stat_fh)); + WT_TRET(__wt_fclose(session, &conn->stat_fs)); conn->stat_path = NULL; conn->stat_sources = NULL; conn->stat_stamp = NULL; diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c index 5be9b311a79..4ee23008687 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c @@ -10,7 +10,6 @@ static int __backup_all(WT_SESSION_IMPL *); static int __backup_cleanup_handles(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *); -static int __backup_file_create(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, bool); static int __backup_list_append( WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *); static int __backup_list_uri_append(WT_SESSION_IMPL *, const char *, bool *); @@ -178,8 +177,7 @@ __backup_log_append(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, bool active) for (i = 0; i < logcount; i++) WT_ERR(__backup_list_append(session, cb, logfiles[i])); } -err: if (logfiles != NULL) - __wt_log_files_free(session, logfiles, logcount); +err: WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount)); return (ret); } @@ -193,9 +191,13 @@ __backup_start( { WT_CONNECTION_IMPL *conn; WT_DECL_RET; + WT_FSTREAM *srcfs; + const char *dest; bool exist, log_only, target_list; conn = S2C(session); + srcfs = NULL; + dest = NULL; cb->next = 0; cb->list = NULL; @@ -224,11 +226,16 @@ __backup_start( conn->hot_backup = true; WT_ERR(__wt_writeunlock(session, conn->hot_backup_lock)); - /* Create the hot backup file. */ - WT_ERR(__backup_file_create(session, cb, false)); - - /* Add log files if logging is enabled. */ - + /* + * Create a temporary backup file. This must be opened before + * generating the list of targets in backup_uri. This file will + * later be renamed to the correct name depending on whether or not + * we're doing an incremental backup. We need a temp file so that if + * we fail or crash while filling it, the existence of a partial file + * doesn't confuse restarting in the source database. + */ + WT_ERR(__wt_fopen(session, WT_BACKUP_TMP, + WT_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs)); /* * If a list of targets was specified, work our way through them. * Else, generate a list of all database objects. @@ -248,20 +255,23 @@ __backup_start( /* Add the hot backup and standard WiredTiger files to the list. */ if (log_only) { /* - * Close any hot backup file. - * We're about to open the incremental backup file. + * We also open an incremental backup source file so that we + * can detect a crash with an incremental backup existing in + * the source directory versus an improper destination. */ - WT_TRET(__wt_close(session, &cb->bfh)); - WT_ERR(__backup_file_create(session, cb, log_only)); + dest = WT_INCREMENTAL_BACKUP; + WT_ERR(__wt_fopen(session, WT_INCREMENTAL_SRC, + WT_OPEN_CREATE, WT_STREAM_WRITE, &srcfs)); WT_ERR(__backup_list_append( session, cb, WT_INCREMENTAL_BACKUP)); } else { + dest = WT_METADATA_BACKUP; WT_ERR(__backup_list_append(session, cb, WT_METADATA_BACKUP)); - WT_ERR(__wt_exist(session, WT_BASECONFIG, &exist)); + WT_ERR(__wt_fs_exist(session, WT_BASECONFIG, &exist)); if (exist) WT_ERR(__backup_list_append( session, cb, WT_BASECONFIG)); - WT_ERR(__wt_exist(session, WT_USERCONFIG, &exist)); + WT_ERR(__wt_fs_exist(session, WT_USERCONFIG, &exist)); if (exist) WT_ERR(__backup_list_append( session, cb, WT_USERCONFIG)); @@ -269,10 +279,15 @@ __backup_start( } err: /* Close the hot backup file. */ - WT_TRET(__wt_close(session, &cb->bfh)); + WT_TRET(__wt_fclose(session, &cb->bfs)); + if (srcfs != NULL) + WT_TRET(__wt_fclose(session, &srcfs)); if (ret != 0) { WT_TRET(__backup_cleanup_handles(session, cb)); WT_TRET(__backup_stop(session)); + } else { + WT_ASSERT(session, dest != NULL); + WT_TRET(__wt_fs_rename(session, WT_BACKUP_TMP, dest)); } return (ret); @@ -384,13 +399,23 @@ __backup_uri(WT_SESSION_IMPL *session, uri); /* - * Handle log targets. We do not need to go through the - * schema worker, just call the function to append them. - * Set log_only only if it is our only URI target. + * Handle log targets. We do not need to go through the schema + * worker, just call the function to append them. Set log_only + * only if it is our only URI target. */ if (WT_PREFIX_MATCH(uri, "log:")) { + /* + * Log archive cannot mix with incremental backup, don't + * let that happen. + */ + if (FLD_ISSET( + S2C(session)->log_flags, WT_CONN_LOG_ARCHIVE)) + WT_ERR_MSG(session, EINVAL, + "incremental backup not possible when " + "automatic log archival configured"); *log_only = !target_list; - WT_ERR(__backup_list_uri_append(session, uri, NULL)); + WT_ERR(__backup_log_append( + session, session->bkp_cursor, false)); } else { *log_only = false; WT_ERR(__wt_schema_worker(session, @@ -404,19 +429,6 @@ err: __wt_scr_free(session, &tmp); } /* - * __backup_file_create -- - * Create the meta-data backup file. - */ -static int -__backup_file_create( - WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, bool incremental) -{ - return (__wt_open(session, - incremental ? WT_INCREMENTAL_BACKUP : WT_METADATA_BACKUP, - WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_STREAM_WRITE, &cb->bfh)); -} - -/* * __wt_backup_file_remove -- * Remove the incremental and meta-data backup files. */ @@ -425,7 +437,15 @@ __wt_backup_file_remove(WT_SESSION_IMPL *session) { WT_DECL_RET; + /* + * Note that order matters for removing the incremental files. We must + * remove the backup file before removing the source file so that we + * always know we were a source directory while there's any chance of + * an incremental backup file existing. + */ + WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP)); WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_BACKUP)); + WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_SRC)); WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP)); return (ret); } @@ -453,11 +473,6 @@ __backup_list_uri_append( * if there's an entry backed by anything other than a file or lsm * entry, we're confused. */ - if (WT_PREFIX_MATCH(name, "log:")) { - WT_RET(__backup_log_append(session, cb, false)); - return (0); - } - if (!WT_PREFIX_MATCH(name, "file:") && !WT_PREFIX_MATCH(name, "colgroup:") && !WT_PREFIX_MATCH(name, "index:") && @@ -473,7 +488,7 @@ __backup_list_uri_append( /* Add the metadata entry to the backup file. */ WT_RET(__wt_metadata_search(session, name, &value)); - ret = __wt_fprintf(session, cb->bfh, "%s\n%s\n", name, value); + ret = __wt_fprintf(session, cb->bfs, "%s\n%s\n", name, value); __wt_free(session, value); WT_RET(ret); diff --git a/src/third_party/wiredtiger/src/cursor/cur_bulk.c b/src/third_party/wiredtiger/src/cursor/cur_bulk.c index c013383fa61..d1a53057650 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_bulk.c +++ b/src/third_party/wiredtiger/src/cursor/cur_bulk.c @@ -328,7 +328,6 @@ __wt_curbulk_init(WT_SESSION_IMPL *session, c->insert = skip_sort_check ? __curbulk_insert_row_skip_check : __curbulk_insert_row; break; - WT_ILLEGAL_VALUE(session); } cbulk->first_insert = true; diff --git a/src/third_party/wiredtiger/src/cursor/cur_dump.c b/src/third_party/wiredtiger/src/cursor/cur_dump.c index a7b1c98871a..595915df7b7 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_dump.c +++ b/src/third_party/wiredtiger/src/cursor/cur_dump.c @@ -128,7 +128,7 @@ str2recno(WT_SESSION_IMPL *session, const char *p, uint64_t *recnop) * forth -- none of them are OK with us. Check the string starts with * digit, that turns off the special processing. */ - if (!isdigit(p[0])) + if (!__wt_isdigit((u_char)p[0])) goto format; errno = 0; @@ -155,7 +155,9 @@ __curdump_set_key(WT_CURSOR *cursor, ...) WT_SESSION_IMPL *session; uint64_t recno; va_list ap; + const uint8_t *up; const char *p; + bool json; cdump = (WT_CURSOR_DUMP *)cursor; child = cdump->child; @@ -168,16 +170,23 @@ __curdump_set_key(WT_CURSOR *cursor, ...) p = va_arg(ap, const char *); va_end(ap); + json = F_ISSET(cursor, WT_CURSTD_DUMP_JSON); + if (json) + WT_ERR(__wt_json_to_item(session, p, cursor->key_format, + (WT_CURSOR_JSON *)cursor->json_private, true, + &cursor->key)); + if (WT_CURSOR_RECNO(cursor) && !F_ISSET(cursor, WT_CURSTD_RAW)) { - WT_ERR(str2recno(session, p, &recno)); + if (json) { + up = (const uint8_t *)cursor->key.data; + WT_ERR(__wt_vunpack_uint(&up, cursor->key.size, + &recno)); + } else + WT_ERR(str2recno(session, p, &recno)); child->set_key(child, recno); } else { - if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON)) - WT_ERR(__wt_json_to_item(session, p, cursor->key_format, - (WT_CURSOR_JSON *)cursor->json_private, true, - &cursor->key)); - else + if (!json) WT_ERR(__dump_to_raw(session, p, &cursor->key, F_ISSET(cursor, WT_CURSTD_DUMP_HEX))); diff --git a/src/third_party/wiredtiger/src/cursor/cur_index.c b/src/third_party/wiredtiger/src/cursor/cur_index.c index dbe8046ca21..6de68d86778 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_index.c +++ b/src/third_party/wiredtiger/src/cursor/cur_index.c @@ -8,20 +8,6 @@ #include "wt_internal.h" - /* - * __wt_curindex_joined -- - * Produce an error that this cursor is being used in a join call. - */ -int -__wt_curindex_joined(WT_CURSOR *cursor) -{ - WT_SESSION_IMPL *session; - - session = (WT_SESSION_IMPL *)cursor->session; - __wt_errx(session, "index cursor is being used in a join"); - return (ENOTSUP); -} - /* * __curindex_get_value -- * WT_CURSOR->get_value implementation for index cursors. @@ -462,7 +448,7 @@ __wt_curindex_open(WT_SESSION_IMPL *session, if (WT_CURSOR_RECNO(cursor)) WT_ERR_MSG(session, WT_ERROR, "Column store indexes based on a record number primary " - "key are not supported."); + "key are not supported"); /* Handle projections. */ if (columns != NULL) { diff --git a/src/third_party/wiredtiger/src/cursor/cur_join.c b/src/third_party/wiredtiger/src/cursor/cur_join.c index 38a83217933..0760a07a3aa 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_join.c +++ b/src/third_party/wiredtiger/src/cursor/cur_join.c @@ -8,159 +8,293 @@ #include "wt_internal.h" +static int __curjoin_entries_in_range(WT_SESSION_IMPL *, WT_CURSOR_JOIN *, + WT_ITEM *, WT_CURSOR_JOIN_ITER *); +static int __curjoin_entry_in_range(WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *, + WT_ITEM *, WT_CURSOR_JOIN_ITER *); +static int __curjoin_entry_member(WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *, + WT_ITEM *, WT_CURSOR_JOIN_ITER *); static int __curjoin_insert_endpoint(WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *, u_int, WT_CURSOR_JOIN_ENDPOINT **); +static int __curjoin_iter_close(WT_CURSOR_JOIN_ITER *); +static int __curjoin_iter_close_all(WT_CURSOR_JOIN_ITER *); +static bool __curjoin_iter_ready(WT_CURSOR_JOIN_ITER *); +static int __curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *, u_int); +static int __curjoin_pack_recno(WT_SESSION_IMPL *, uint64_t, uint8_t *, + size_t, WT_ITEM *); +static int __curjoin_split_key(WT_SESSION_IMPL *, WT_CURSOR_JOIN *, WT_ITEM *, + WT_CURSOR *, WT_CURSOR *, const char *, bool); + +#define WT_CURJOIN_ITER_CONSUMED(iter) \ + ((iter)->entry_pos >= (iter)->entry_count) /* - * __curjoin_entry_iter_init -- + * __wt_curjoin_joined -- + * Produce an error that this cursor is being used in a join call. + */ +int +__wt_curjoin_joined(WT_CURSOR *cursor) +{ + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)cursor->session; + __wt_errx(session, "cursor is being used in a join"); + return (ENOTSUP); +} + +/* + * __curjoin_iter_init -- * Initialize an iteration for the index managed by a join entry. - * */ static int -__curjoin_entry_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, - WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ITER **iterp) +__curjoin_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, + WT_CURSOR_JOIN_ITER **iterp) { - WT_CURSOR *to_dup; - WT_DECL_RET; - const char *raw_cfg[] = { WT_CONFIG_BASE( - session, WT_SESSION_open_cursor), "raw", NULL }; - const char *def_cfg[] = { WT_CONFIG_BASE( - session, WT_SESSION_open_cursor), NULL }; - const char *urimain, **config; - char *mainbuf, *uri; WT_CURSOR_JOIN_ITER *iter; - size_t size; - - iter = NULL; - mainbuf = uri = NULL; - to_dup = entry->ends[0].cursor; - - if (F_ISSET((WT_CURSOR *)cjoin, WT_CURSTD_RAW)) - config = &raw_cfg[0]; - else - config = &def_cfg[0]; - - size = strlen(to_dup->internal_uri) + 3; - WT_ERR(__wt_calloc(session, size, 1, &uri)); - snprintf(uri, size, "%s()", to_dup->internal_uri); - urimain = cjoin->table->name; - if (cjoin->projection != NULL) { - size = strlen(urimain) + strlen(cjoin->projection) + 1; - WT_ERR(__wt_calloc(session, size, 1, &mainbuf)); - snprintf(mainbuf, size, "%s%s", urimain, cjoin->projection); - urimain = mainbuf; - } - WT_ERR(__wt_calloc_one(session, &iter)); - WT_ERR(__wt_open_cursor(session, uri, (WT_CURSOR *)cjoin, config, - &iter->cursor)); - WT_ERR(__wt_cursor_dup_position(to_dup, iter->cursor)); - WT_ERR(__wt_open_cursor(session, urimain, (WT_CURSOR *)cjoin, config, - &iter->main)); + *iterp = NULL; + WT_RET(__wt_calloc_one(session, iterp)); + iter = *iterp; iter->cjoin = cjoin; iter->session = session; - iter->entry = entry; - iter->positioned = false; - iter->isequal = (entry->ends_next == 1 && - WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_EQ); - *iterp = iter; + cjoin->iter = iter; + WT_RET(__curjoin_iter_set_entry(iter, 0)); + return (0); +} - if (0) { -err: __wt_free(session, iter); - } - __wt_free(session, mainbuf); - __wt_free(session, uri); +/* + * __curjoin_iter_close -- + * Close the iteration, release resources. + */ +static int +__curjoin_iter_close(WT_CURSOR_JOIN_ITER *iter) +{ + WT_DECL_RET; + + if (iter->cursor != NULL) + WT_TRET(iter->cursor->close(iter->cursor)); + __wt_free(iter->session, iter); return (ret); } /* - * __curjoin_pack_recno -- - * Pack the given recno into a buffer; prepare an item referencing it. - * + * __curjoin_iter_close_all -- + * Free the iterator and all of its children recursively. */ static int -__curjoin_pack_recno(WT_SESSION_IMPL *session, uint64_t r, uint8_t *buf, - size_t bufsize, WT_ITEM *item) +__curjoin_iter_close_all(WT_CURSOR_JOIN_ITER *iter) { - WT_SESSION *wtsession; - size_t sz; + WT_CURSOR_JOIN *parent; + WT_DECL_RET; - wtsession = (WT_SESSION *)session; - WT_RET(wiredtiger_struct_size(wtsession, &sz, "r", r)); - WT_ASSERT(session, sz < bufsize); - WT_RET(wiredtiger_struct_pack(wtsession, buf, bufsize, "r", r)); - item->size = sz; - item->data = buf; + if (iter->child) + WT_TRET(__curjoin_iter_close_all(iter->child)); + iter->child = NULL; + WT_ASSERT(iter->session, iter->cjoin->parent == NULL || + iter->cjoin->parent->iter->child == iter); + if ((parent = iter->cjoin->parent) != NULL) + parent->iter->child = NULL; + iter->cjoin->iter = NULL; + WT_TRET(__curjoin_iter_close(iter)); + return (ret); +} + +/* + * __curjoin_iter_reset -- + * Reset an iteration to the starting point. + */ +static int +__curjoin_iter_reset(WT_CURSOR_JOIN_ITER *iter) +{ + if (iter->child != NULL) + WT_RET(__curjoin_iter_close_all(iter->child)); + WT_RET(__curjoin_iter_set_entry(iter, 0)); + iter->positioned = false; return (0); } /* - * __curjoin_split_key -- - * Copy the primary key from a cursor (either main table or index) - * to another cursor. When copying from an index file, the index - * key is also returned. - * + * __curjoin_iter_ready -- + * Check the positioned flag for all nested iterators. + */ +static bool +__curjoin_iter_ready(WT_CURSOR_JOIN_ITER *iter) +{ + while (iter != NULL) { + if (!iter->positioned) + return (false); + iter = iter->child; + } + return (true); +} + +/* + * __curjoin_iter_set_entry -- + * Set the current entry for an iterator. */ static int -__curjoin_split_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, - WT_ITEM *idxkey, WT_CURSOR *tocur, WT_CURSOR *fromcur, - const char *repack_fmt, bool isindex) +__curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *iter, u_int entry_pos) { - WT_CURSOR *firstcg_cur; - WT_CURSOR_INDEX *cindex; - WT_ITEM *keyp; - const uint8_t *p; + WT_CURSOR *c, *to_dup; + WT_CURSOR_JOIN *cjoin, *topjoin; + WT_CURSOR_JOIN_ENTRY *entry; + WT_DECL_RET; + WT_SESSION_IMPL *session; + size_t size; + const char *raw_cfg[] = { WT_CONFIG_BASE( + iter->session, WT_SESSION_open_cursor), "raw", NULL }; + const char *def_cfg[] = { WT_CONFIG_BASE( + iter->session, WT_SESSION_open_cursor), NULL }; + const char **config; + char *uri; + + session = iter->session; + cjoin = iter->cjoin; + uri = NULL; + entry = iter->entry = &cjoin->entries[entry_pos]; + iter->positioned = false; + iter->entry_pos = entry_pos; + iter->end_pos = 0; - if (isindex) { - cindex = ((WT_CURSOR_INDEX *)fromcur); - /* - * Repack tells us where the index key ends; advance past - * that to get where the raw primary key starts. - */ - WT_RET(__wt_struct_repack(session, cindex->child->key_format, - repack_fmt != NULL ? repack_fmt : cindex->iface.key_format, - &cindex->child->key, idxkey)); - WT_ASSERT(session, cindex->child->key.size > idxkey->size); - tocur->key.data = (uint8_t *)idxkey->data + idxkey->size; - tocur->key.size = cindex->child->key.size - idxkey->size; - if (WT_CURSOR_RECNO(tocur)) { - p = (const uint8_t *)tocur->key.data; - WT_RET(__wt_vunpack_uint(&p, tocur->key.size, - &tocur->recno)); - } else - tocur->recno = 0; - } else { - firstcg_cur = ((WT_CURSOR_TABLE *)fromcur)->cg_cursors[0]; - keyp = &firstcg_cur->key; - if (WT_CURSOR_RECNO(tocur)) { - WT_ASSERT(session, keyp->size == sizeof(uint64_t)); - tocur->recno = *(uint64_t *)keyp->data; - WT_RET(__curjoin_pack_recno(session, tocur->recno, - cjoin->recno_buf, sizeof(cjoin->recno_buf), - &tocur->key)); - } else { - WT_ITEM_SET(tocur->key, *keyp); - tocur->recno = 0; + iter->is_equal = (entry->ends_next == 1 && + WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_EQ); + iter->end_skip = (entry->ends_next > 0 && + WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_GE) ? 1 : 0; + + iter->end_count = WT_MIN(1, entry->ends_next); + if (F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) { + iter->entry_count = cjoin->entries_next; + if (iter->is_equal) + iter->end_count = entry->ends_next; + } else + iter->entry_count = 1; + WT_ASSERT(iter->session, iter->entry_pos < iter->entry_count); + + entry->stats.iterated = 0; + + if (entry->subjoin == NULL) { + for (topjoin = iter->cjoin; topjoin->parent != NULL; + topjoin = topjoin->parent) + ; + to_dup = entry->ends[0].cursor; + + if (F_ISSET((WT_CURSOR *)topjoin, WT_CURSTD_RAW)) + config = &raw_cfg[0]; + else + config = &def_cfg[0]; + + size = strlen(to_dup->internal_uri) + 3; + WT_ERR(__wt_calloc(session, size, 1, &uri)); + snprintf(uri, size, "%s()", to_dup->internal_uri); + if ((c = iter->cursor) == NULL || !WT_STREQ(c->uri, uri)) { + iter->cursor = NULL; + if (c != NULL) + WT_ERR(c->close(c)); + WT_ERR(__wt_open_cursor(session, uri, + (WT_CURSOR *)topjoin, config, &iter->cursor)); } - idxkey->data = NULL; - idxkey->size = 0; + WT_ERR(__wt_cursor_dup_position(to_dup, iter->cursor)); + } else if (iter->cursor != NULL) { + WT_ERR(iter->cursor->close(iter->cursor)); + iter->cursor = NULL; } + +err: __wt_free(session, uri); + return (ret); +} + +/* + * __curjoin_iter_bump -- + * Called to advance the iterator to the next endpoint, which may in turn + * advance to the next entry. + */ +static int +__curjoin_iter_bump(WT_CURSOR_JOIN_ITER *iter) +{ + WT_CURSOR_JOIN_ENTRY *entry; + WT_SESSION_IMPL *session; + + session = iter->session; + iter->positioned = false; + entry = iter->entry; + if (entry->subjoin == NULL && iter->is_equal && + ++iter->end_pos < iter->end_count) { + WT_RET(__wt_cursor_dup_position( + entry->ends[iter->end_pos].cursor, iter->cursor)); + return (0); + } + iter->end_pos = iter->end_count = iter->end_skip = 0; + if (entry->subjoin != NULL && entry->subjoin->iter != NULL) + WT_RET(__curjoin_iter_close_all(entry->subjoin->iter)); + + if (++iter->entry_pos >= iter->entry_count) { + iter->entry = NULL; + return (0); + } + iter->entry = ++entry; + if (entry->subjoin != NULL) { + WT_RET(__curjoin_iter_init(session, entry->subjoin, + &iter->child)); + return (0); + } + WT_RET(__curjoin_iter_set_entry(iter, iter->entry_pos)); return (0); } /* - * __curjoin_entry_iter_next -- + * __curjoin_iter_next -- * Get the next item in an iteration. * */ static int -__curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_CURSOR *cursor) +__curjoin_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_CURSOR *cursor) { - if (iter->positioned) - WT_RET(iter->cursor->next(iter->cursor)); - else + WT_CURSOR_JOIN_ENTRY *entry; + WT_DECL_RET; + WT_SESSION_IMPL *session; + + session = iter->session; + + if (WT_CURJOIN_ITER_CONSUMED(iter)) + return (WT_NOTFOUND); +again: + entry = iter->entry; + if (entry->subjoin != NULL) { + if (iter->child == NULL) + WT_RET(__curjoin_iter_init(session, + entry->subjoin, &iter->child)); + ret = __curjoin_iter_next(iter->child, cursor); + if (ret == 0) { + /* The child did the work, we're done. */ + iter->curkey = &cursor->key; + iter->positioned = true; + return (ret); + } + else if (ret == WT_NOTFOUND) { + WT_RET(__curjoin_iter_close_all(iter->child)); + entry->subjoin->iter = NULL; + iter->child = NULL; + WT_RET(__curjoin_iter_bump(iter)); + ret = 0; + } + } else if (iter->positioned) { + ret = iter->cursor->next(iter->cursor); + if (ret == WT_NOTFOUND) { + WT_RET(__curjoin_iter_bump(iter)); + ret = 0; + } else + WT_RET(ret); + } else iter->positioned = true; + if (WT_CURJOIN_ITER_CONSUMED(iter)) + return (WT_NOTFOUND); + + if (!__curjoin_iter_ready(iter)) + goto again; + + WT_RET(ret); + /* * Set our key to the primary key, we'll also need this * to check membership. @@ -169,57 +303,385 @@ __curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_CURSOR *cursor) cursor, iter->cursor, iter->entry->repack_format, iter->entry->index != NULL)); iter->curkey = &cursor->key; - iter->entry->stats.actual_count++; - iter->entry->stats.accesses++; + iter->entry->stats.iterated++; return (0); } /* - * __curjoin_entry_iter_reset -- - * Reset an iteration to the starting point. - * + * __curjoin_close -- + * WT_CURSOR::close for join cursors. */ static int -__curjoin_entry_iter_reset(WT_CURSOR_JOIN_ITER *iter) +__curjoin_close(WT_CURSOR *cursor) { - if (iter->positioned) { - WT_RET(iter->cursor->reset(iter->cursor)); - WT_RET(iter->main->reset(iter->main)); - WT_RET(__wt_cursor_dup_position( - iter->cjoin->entries[0].ends[0].cursor, iter->cursor)); - iter->positioned = false; - iter->entry->stats.actual_count = 0; + WT_CURSOR_JOIN *cjoin; + WT_CURSOR_JOIN_ENDPOINT *end; + WT_CURSOR_JOIN_ENTRY *entry; + WT_DECL_RET; + WT_SESSION_IMPL *session; + u_int i; + + cjoin = (WT_CURSOR_JOIN *)cursor; + + JOINABLE_CURSOR_API_CALL(cursor, session, close, NULL); + + __wt_schema_release_table(session, cjoin->table); + /* These are owned by the table */ + cursor->internal_uri = NULL; + cursor->key_format = NULL; + if (cjoin->projection != NULL) { + __wt_free(session, cjoin->projection); + __wt_free(session, cursor->value_format); + } + + for (entry = cjoin->entries, i = 0; i < cjoin->entries_next; + entry++, i++) { + if (entry->subjoin != NULL) { + F_CLR(&entry->subjoin->iface, WT_CURSTD_JOINED); + entry->subjoin->parent = NULL; + } + if (entry->main != NULL) + WT_TRET(entry->main->close(entry->main)); + if (F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM)) + WT_TRET(__wt_bloom_close(entry->bloom)); + for (end = &entry->ends[0]; + end < &entry->ends[entry->ends_next]; end++) { + F_CLR(end->cursor, WT_CURSTD_JOINED); + if (F_ISSET(end, WT_CURJOIN_END_OWN_CURSOR)) + WT_TRET(end->cursor->close(end->cursor)); + } + __wt_free(session, entry->ends); + __wt_free(session, entry->repack_format); + } + + if (cjoin->iter != NULL) + WT_TRET(__curjoin_iter_close_all(cjoin->iter)); + if (cjoin->main != NULL) + WT_TRET(cjoin->main->close(cjoin->main)); + + __wt_free(session, cjoin->entries); + WT_TRET(__wt_cursor_close(cursor)); + +err: API_END_RET(session, ret); +} + +/* + * __curjoin_endpoint_init_key -- + * Set the key in the reference endpoint. + */ +static int +__curjoin_endpoint_init_key(WT_SESSION_IMPL *session, + WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ENDPOINT *endpoint) +{ + WT_CURSOR *cursor; + WT_CURSOR_INDEX *cindex; + WT_ITEM *k; + uint64_t r; + + if ((cursor = endpoint->cursor) != NULL) { + if (entry->index != NULL) { + /* Extract and save the index's logical key. */ + cindex = (WT_CURSOR_INDEX *)endpoint->cursor; + WT_RET(__wt_struct_repack(session, + cindex->child->key_format, + (entry->repack_format != NULL ? + entry->repack_format : cindex->iface.key_format), + &cindex->child->key, &endpoint->key)); + } else { + k = &((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]->key; + if (WT_CURSOR_RECNO(cursor)) { + r = *(uint64_t *)k->data; + WT_RET(__curjoin_pack_recno(session, r, + endpoint->recno_buf, + sizeof(endpoint->recno_buf), + &endpoint->key)); + } else + endpoint->key = *k; + } } return (0); } /* - * __curjoin_entry_iter_ready -- - * The iterator is positioned. - * + * __curjoin_entries_in_range -- + * Check if a key is in the range specified by the remaining entries, + * returning WT_NOTFOUND if not. */ -static bool -__curjoin_entry_iter_ready(WT_CURSOR_JOIN_ITER *iter) +static int +__curjoin_entries_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, + WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iterarg) { - return (iter->positioned); + WT_CURSOR_JOIN_ENTRY *entry; + WT_CURSOR_JOIN_ITER *iter; + WT_DECL_RET; + u_int pos; + int fastret, slowret; + + iter = iterarg; + if (F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) { + fastret = 0; + slowret = WT_NOTFOUND; + } else { + fastret = WT_NOTFOUND; + slowret = 0; + } + pos = iter == NULL ? 0 : iter->entry_pos; + for (entry = &cjoin->entries[pos]; pos < cjoin->entries_next; + entry++, pos++) { + ret = __curjoin_entry_member(session, entry, curkey, iter); + if (ret == fastret) + return (fastret); + if (ret != slowret) + break; + iter = NULL; + } + + return (ret == 0 ? slowret : ret); } /* - * __curjoin_entry_iter_close -- - * Close the iteration, release resources. - * + * __curjoin_entry_in_range -- + * Check if a key is in the range specified by the entry, returning + * WT_NOTFOUND if not. */ static int -__curjoin_entry_iter_close(WT_CURSOR_JOIN_ITER *iter) +__curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, + WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iter) { + WT_COLLATOR *collator; + WT_CURSOR_JOIN_ENDPOINT *end, *endmax; + bool disjunction, passed; + u_int pos; + int cmp; + + collator = (entry->index != NULL) ? entry->index->collator : NULL; + endmax = &entry->ends[entry->ends_next]; + disjunction = F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION); + passed = false; + + /* + * The iterator may have already satisfied some endpoint conditions. + * If so and we're a disjunction, we're done. If so and we're a + * conjunction, we can start past the satisfied conditions. + */ + if (iter == NULL) + pos = 0; + else { + if (disjunction && iter->end_skip) + return (0); + pos = iter->end_pos + iter->end_skip; + } + + for (end = &entry->ends[pos]; end < endmax; end++) { + WT_RET(__wt_compare(session, collator, curkey, &end->key, + &cmp)); + switch (WT_CURJOIN_END_RANGE(end)) { + case WT_CURJOIN_END_EQ: + passed = (cmp == 0); + break; + + case WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ: + passed = (cmp >= 0); + WT_ASSERT(session, iter == NULL); + break; + + case WT_CURJOIN_END_GT: + passed = (cmp > 0); + if (passed && iter != NULL && pos == 0) + iter->end_skip = 1; + break; + + case WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ: + passed = (cmp <= 0); + break; + + case WT_CURJOIN_END_LT: + passed = (cmp < 0); + break; + + default: + WT_RET(__wt_illegal_value(session, NULL)); + break; + } + + if (!passed) { + if (iter != NULL && + (iter->is_equal || + F_ISSET(end, WT_CURJOIN_END_LT))) { + WT_RET(__curjoin_iter_bump(iter)); + return (WT_NOTFOUND); + } + if (!disjunction) + return (WT_NOTFOUND); + iter = NULL; + } else if (disjunction) + break; + } + if (disjunction && end == endmax) + return (WT_NOTFOUND); + else + return (0); +} + +typedef struct { + WT_CURSOR iface; + WT_CURSOR_JOIN_ENTRY *entry; + bool ismember; +} WT_CURJOIN_EXTRACTOR; + +/* + * __curjoin_extract_insert -- + * Handle a key produced by a custom extractor. + */ +static int +__curjoin_extract_insert(WT_CURSOR *cursor) { + WT_CURJOIN_EXTRACTOR *cextract; WT_DECL_RET; + WT_ITEM ikey; + WT_SESSION_IMPL *session; - if (iter->cursor != NULL) - WT_TRET(iter->cursor->close(iter->cursor)); - if (iter->main != NULL) - WT_TRET(iter->main->close(iter->main)); - __wt_free(iter->session, iter); + cextract = (WT_CURJOIN_EXTRACTOR *)cursor; + /* + * This insert method may be called multiple times during a single + * extraction. If we already have a definitive answer to the + * membership question, exit early. + */ + if (cextract->ismember) + return (0); + + session = (WT_SESSION_IMPL *)cursor->session; + WT_ITEM_SET(ikey, cursor->key); + /* + * We appended a padding byte to the key to avoid rewriting the last + * column. Strip that away here. + */ + WT_ASSERT(session, ikey.size > 0); + --ikey.size; + + ret = __curjoin_entry_in_range(session, cextract->entry, &ikey, false); + if (ret == WT_NOTFOUND) + ret = 0; + else if (ret == 0) + cextract->ismember = true; + + return (ret); +} + +/* + * __curjoin_entry_member -- + * Do a membership check for a particular index that was joined, + * if not a member, returns WT_NOTFOUND. + */ +static int +__curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, + WT_ITEM *key, WT_CURSOR_JOIN_ITER *iter) +{ + WT_CURJOIN_EXTRACTOR extract_cursor; + WT_CURSOR *c; + WT_CURSOR_STATIC_INIT(iface, + __wt_cursor_get_key, /* get-key */ + __wt_cursor_get_value, /* get-value */ + __wt_cursor_set_key, /* set-key */ + __wt_cursor_set_value, /* set-value */ + __wt_cursor_compare_notsup, /* compare */ + __wt_cursor_equals_notsup, /* equals */ + __wt_cursor_notsup, /* next */ + __wt_cursor_notsup, /* prev */ + __wt_cursor_notsup, /* reset */ + __wt_cursor_notsup, /* search */ + __wt_cursor_search_near_notsup, /* search-near */ + __curjoin_extract_insert, /* insert */ + __wt_cursor_notsup, /* update */ + __wt_cursor_notsup, /* remove */ + __wt_cursor_reconfigure_notsup, /* reconfigure */ + __wt_cursor_notsup); /* close */ + WT_DECL_RET; + WT_INDEX *idx; + WT_ITEM v; + bool bloom_found; + + if (entry->subjoin == NULL && iter != NULL && + (iter->end_pos + iter->end_skip >= entry->ends_next || + (iter->end_skip > 0 && + F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)))) + return (0); /* no checks to make */ + + entry->stats.membership_check++; + bloom_found = false; + + if (entry->bloom != NULL) { + /* + * If we don't own the Bloom filter, we must be sharing one + * in a previous entry. So the shared filter has already + * been checked and passed. + */ + if (!F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM)) + return (0); + + /* + * If the item is not in the Bloom filter, we return + * immediately, otherwise, we still need to check the + * long way. + */ + WT_ERR(__wt_bloom_inmem_get(entry->bloom, key)); + bloom_found = true; + } + if (entry->subjoin != NULL) { + WT_ASSERT(session, + iter == NULL || entry->subjoin == iter->child->cjoin); + ret = __curjoin_entries_in_range(session, entry->subjoin, + key, iter == NULL ? NULL : iter->child); + if (iter != NULL && + WT_CURJOIN_ITER_CONSUMED(iter->child)) { + WT_ERR(__curjoin_iter_bump(iter)); + ret = WT_NOTFOUND; + } + return (ret); + } + if (entry->index != NULL) { + /* + * If this entry is used by the iterator, then we already + * have the index key, and we won't have to do any + * extraction either. + */ + if (iter != NULL && entry == iter->entry) + WT_ITEM_SET(v, iter->idxkey); + else { + memset(&v, 0, sizeof(v)); /* Keep lint quiet. */ + c = entry->main; + c->set_key(c, key); + entry->stats.main_access++; + if ((ret = c->search(c)) == 0) + ret = c->get_value(c, &v); + else if (ret == WT_NOTFOUND) + WT_ERR_MSG(session, WT_ERROR, + "main table for join is missing entry"); + WT_TRET(c->reset(c)); + WT_ERR(ret); + } + } else + WT_ITEM_SET(v, *key); + + if ((idx = entry->index) != NULL && idx->extractor != NULL && + (iter == NULL || entry != iter->entry)) { + WT_CLEAR(extract_cursor); + extract_cursor.iface = iface; + extract_cursor.iface.session = &session->iface; + extract_cursor.iface.key_format = idx->exkey_format; + extract_cursor.ismember = false; + extract_cursor.entry = entry; + WT_ERR(idx->extractor->extract(idx->extractor, + &session->iface, key, &v, &extract_cursor.iface)); + if (!extract_cursor.ismember) + WT_ERR(WT_NOTFOUND); + } else + WT_ERR(__curjoin_entry_in_range(session, entry, &v, iter)); + + if (0) { +err: if (ret == WT_NOTFOUND && bloom_found) + entry->stats.bloom_false_positive++; + } return (ret); } @@ -238,10 +700,10 @@ __curjoin_get_key(WT_CURSOR *cursor, ...) cjoin = (WT_CURSOR_JOIN *)cursor; va_start(ap, cursor); - CURSOR_API_CALL(cursor, session, get_key, NULL); + JOINABLE_CURSOR_API_CALL(cursor, session, get_key, NULL); if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED) || - !__curjoin_entry_iter_ready(cjoin->iter)) + !cjoin->iter->positioned) WT_ERR_MSG(session, EINVAL, "join cursor must be advanced with next()"); WT_ERR(__wt_cursor_get_keyv(cursor, cursor->flags, ap)); @@ -258,23 +720,21 @@ static int __curjoin_get_value(WT_CURSOR *cursor, ...) { WT_CURSOR_JOIN *cjoin; - WT_CURSOR_JOIN_ITER *iter; WT_DECL_RET; WT_SESSION_IMPL *session; va_list ap; cjoin = (WT_CURSOR_JOIN *)cursor; - iter = cjoin->iter; va_start(ap, cursor); - CURSOR_API_CALL(cursor, session, get_value, NULL); + JOINABLE_CURSOR_API_CALL(cursor, session, get_value, NULL); if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED) || - !__curjoin_entry_iter_ready(iter)) + !cjoin->iter->positioned) WT_ERR_MSG(session, EINVAL, "join cursor must be advanced with next()"); - WT_ERR(__wt_curtable_get_valuev(iter->main, ap)); + WT_ERR(__wt_curtable_get_valuev(cjoin->main, ap)); err: va_end(ap); API_END_RET(session, ret); @@ -291,14 +751,15 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_COLLATOR *collator; WT_CURSOR *c; WT_CURSOR_JOIN_ENDPOINT *end, *endmax; - WT_DECL_RET; WT_DECL_ITEM(uribuf); + WT_DECL_RET; WT_ITEM curkey, curvalue; + size_t size; + u_int skip; + int cmp; + const char *uri; const char *raw_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), "raw", NULL }; - const char *uri; - size_t size; - int cmp, skip; c = NULL; skip = 0; @@ -340,6 +801,7 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, collator = (entry->index == NULL) ? NULL : entry->index->collator; while (ret == 0) { WT_ERR(c->get_key(c, &curkey)); + entry->stats.iterated++; if (entry->index != NULL) { /* * Repack so it's comparable to the @@ -354,7 +816,34 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, for (end = &entry->ends[skip]; end < endmax; end++) { WT_ERR(__wt_compare(session, collator, &curkey, &end->key, &cmp)); - if (!F_ISSET(end, WT_CURJOIN_END_LT)) { + if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) { + /* if condition satisfied, insert immediately */ + switch (WT_CURJOIN_END_RANGE(end)) { + case WT_CURJOIN_END_EQ: + if (cmp == 0) + goto insert; + break; + case WT_CURJOIN_END_GT: + if (cmp > 0) { + /* skip this check next time */ + skip = entry->ends_next; + goto insert; + } + break; + case WT_CURJOIN_END_GE: + if (cmp >= 0) + goto insert; + break; + case WT_CURJOIN_END_LT: + if (cmp < 0) + goto insert; + break; + case WT_CURJOIN_END_LE: + if (cmp <= 0) + goto insert; + break; + } + } else if (!F_ISSET(end, WT_CURJOIN_END_LT)) { if (cmp < 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto advance; @@ -370,6 +859,14 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, goto done; } } + /* + * Either it's a disjunction that hasn't satisfied any + * condition, or it's a conjunction that has satisfied all + * conditions. + */ + if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) + goto advance; +insert: if (entry->index != NULL) { curvalue.data = (unsigned char *)curkey.data + curkey.size; @@ -379,7 +876,7 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, else WT_ERR(c->get_key(c, &curvalue)); WT_ERR(__wt_bloom_insert(bloom, &curvalue)); - entry->stats.actual_count++; + entry->stats.bloom_insert++; advance: if ((ret = c->next(c)) == WT_NOTFOUND) break; @@ -394,107 +891,86 @@ err: if (c != NULL) } /* - * __curjoin_endpoint_init_key -- - * Set the key in the reference endpoint. + * __curjoin_init_next -- + * Initialize the cursor join when the next function is first called. */ static int -__curjoin_endpoint_init_key(WT_SESSION_IMPL *session, - WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ENDPOINT *endpoint) -{ - WT_CURSOR *cursor; - WT_CURSOR_INDEX *cindex; - WT_ITEM *k; - uint64_t r; - - if ((cursor = endpoint->cursor) != NULL) { - if (entry->index != NULL) { - /* Extract and save the index's logical key. */ - cindex = (WT_CURSOR_INDEX *)endpoint->cursor; - WT_RET(__wt_struct_repack(session, - cindex->child->key_format, - (entry->repack_format != NULL ? - entry->repack_format : cindex->iface.key_format), - &cindex->child->key, &endpoint->key)); - } else { - k = &((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]->key; - if (WT_CURSOR_RECNO(cursor)) { - r = *(uint64_t *)k->data; - WT_RET(__curjoin_pack_recno(session, r, - endpoint->recno_buf, - sizeof(endpoint->recno_buf), - &endpoint->key)); - } - else - endpoint->key = *k; - } - } - return (0); -} - -/* - * __curjoin_init_iter -- - * Initialize before any iteration. - */ -static int -__curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin) +__curjoin_init_next(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, + bool iterable) { WT_BLOOM *bloom; - WT_DECL_RET; WT_CURSOR *origcur; - WT_CURSOR_JOIN_ENTRY *je, *jeend, *je2; WT_CURSOR_JOIN_ENDPOINT *end; + WT_CURSOR_JOIN_ENTRY *je, *jeend, *je2; + WT_DECL_RET; + size_t size; + uint32_t f, k; + char *mainbuf; const char *def_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), NULL }; const char *raw_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), "raw", NULL }; - uint32_t f, k; + const char **config, *proj, *urimain; + mainbuf = NULL; if (cjoin->entries_next == 0) WT_RET_MSG(session, EINVAL, "join cursor has not yet been joined with any other " "cursors"); - je = &cjoin->entries[0]; - jeend = &cjoin->entries[cjoin->entries_next]; - - /* - * For a single compare=le endpoint in the first iterated entry, - * construct a companion compare=ge endpoint that will actually - * be iterated. - */ - if (((je = cjoin->entries) != jeend) && - je->ends_next == 1 && F_ISSET(&je->ends[0], WT_CURJOIN_END_LT)) { - origcur = je->ends[0].cursor; - WT_RET(__curjoin_insert_endpoint(session, je, 0, &end)); - WT_RET(__wt_open_cursor(session, origcur->uri, - (WT_CURSOR *)cjoin, - F_ISSET(origcur, WT_CURSTD_RAW) ? raw_cfg : def_cfg, - &end->cursor)); - WT_RET(end->cursor->next(end->cursor)); - end->flags = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ | - WT_CURJOIN_END_OWN_CURSOR; + if (F_ISSET((WT_CURSOR *)cjoin, WT_CURSTD_RAW)) + config = &raw_cfg[0]; + else + config = &def_cfg[0]; + urimain = cjoin->table->name; + if ((proj = cjoin->projection) != NULL) { + size = strlen(urimain) + strlen(proj) + 1; + WT_ERR(__wt_calloc(session, size, 1, &mainbuf)); + snprintf(mainbuf, size, "%s%s", urimain, proj); + urimain = mainbuf; } - WT_RET(__curjoin_entry_iter_init(session, cjoin, je, &cjoin->iter)); + WT_ERR(__wt_open_cursor(session, urimain, (WT_CURSOR *)cjoin, config, + &cjoin->main)); + jeend = &cjoin->entries[cjoin->entries_next]; for (je = cjoin->entries; je < jeend; je++) { + if (je->subjoin != NULL) { + WT_ERR(__curjoin_init_next(session, je->subjoin, + iterable)); + continue; + } __wt_stat_join_init_single(&je->stats); + /* + * For a single compare=le/lt endpoint in any entry that may + * be iterated, construct a companion compare=ge endpoint + * that will actually be iterated. + */ + if (iterable && je->ends_next == 1 && + F_ISSET(&je->ends[0], WT_CURJOIN_END_LT)) { + origcur = je->ends[0].cursor; + WT_ERR(__curjoin_insert_endpoint(session, je, 0, &end)); + WT_ERR(__wt_open_cursor(session, origcur->uri, + (WT_CURSOR *)cjoin, + F_ISSET(origcur, WT_CURSTD_RAW) ? raw_cfg : def_cfg, + &end->cursor)); + end->flags = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ | + WT_CURJOIN_END_OWN_CURSOR; + WT_ERR(end->cursor->next(end->cursor)); + F_CLR(je, WT_CURJOIN_ENTRY_DISJUNCTION); + } for (end = &je->ends[0]; end < &je->ends[je->ends_next]; end++) - WT_RET(__curjoin_endpoint_init_key(session, je, end)); + WT_ERR(__curjoin_endpoint_init_key(session, je, end)); /* - * The first entry is iterated as the 'outermost' cursor. - * For the common GE case, we don't have to test against - * the left reference key, we know it will be true since - * the btree is ordered. + * Do any needed Bloom filter initialization. Ignore Bloom + * filters for entries that will be iterated. They won't + * help since these entries either don't need an inclusion + * check or are doing any needed check during the iteration. */ - if (je == cjoin->entries && je->ends[0].flags == - (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ)) - F_SET(cjoin, WT_CURJOIN_SKIP_FIRST_LEFT); - - if (F_ISSET(je, WT_CURJOIN_ENTRY_BLOOM)) { + if (!iterable && F_ISSET(je, WT_CURJOIN_ENTRY_BLOOM)) { if (session->txn.isolation == WT_ISO_READ_UNCOMMITTED) - WT_RET_MSG(session, EINVAL, + WT_ERR_MSG(session, EINVAL, "join cursors with Bloom filters cannot be " "used with read-uncommitted isolation"); if (je->bloom == NULL) { @@ -516,10 +992,10 @@ __curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin) } je->bloom_bit_count = f; je->bloom_hash_count = k; - WT_RET(__wt_bloom_create(session, NULL, + WT_ERR(__wt_bloom_create(session, NULL, NULL, je->count, f, k, &je->bloom)); F_SET(je, WT_CURJOIN_ENTRY_OWN_BLOOM); - WT_RET(__curjoin_init_bloom(session, cjoin, + WT_ERR(__curjoin_init_bloom(session, cjoin, je, je->bloom)); /* * Share the Bloom filter, making all @@ -541,201 +1017,45 @@ __curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin) * merge into the shared one. The Bloom * parameters of the two filters must match. */ - WT_RET(__wt_bloom_create(session, NULL, + WT_ERR(__wt_bloom_create(session, NULL, NULL, je->count, je->bloom_bit_count, je->bloom_hash_count, &bloom)); - WT_RET(__curjoin_init_bloom(session, cjoin, + WT_ERR(__curjoin_init_bloom(session, cjoin, je, bloom)); - WT_RET(__wt_bloom_intersection(je->bloom, + WT_ERR(__wt_bloom_intersection(je->bloom, bloom)); - WT_RET(__wt_bloom_close(bloom)); + WT_ERR(__wt_bloom_close(bloom)); } } + if (!F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) + iterable = false; } - F_SET(cjoin, WT_CURJOIN_INITIALIZED); - return (ret); -} - -/* - * __curjoin_entry_in_range -- - * Check if a key is in the range specified by the entry, returning - * WT_NOTFOUND if not. - */ -static int -__curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, - WT_ITEM *curkey, bool skip_left) -{ - WT_COLLATOR *collator; - WT_CURSOR_JOIN_ENDPOINT *end, *endmax; - int cmp; - - collator = (entry->index != NULL) ? entry->index->collator : NULL; - endmax = &entry->ends[entry->ends_next]; - for (end = &entry->ends[skip_left ? 1 : 0]; end < endmax; end++) { - WT_RET(__wt_compare(session, collator, curkey, &end->key, - &cmp)); - if (!F_ISSET(end, WT_CURJOIN_END_LT)) { - if (cmp < 0 || - (cmp == 0 && - !F_ISSET(end, WT_CURJOIN_END_EQ)) || - (cmp > 0 && !F_ISSET(end, WT_CURJOIN_END_GT))) - WT_RET(WT_NOTFOUND); - } else { - if (cmp > 0 || - (cmp == 0 && - !F_ISSET(end, WT_CURJOIN_END_EQ)) || - (cmp < 0 && !F_ISSET(end, WT_CURJOIN_END_LT))) - WT_RET(WT_NOTFOUND); - } - } - return (0); -} - -typedef struct { - WT_CURSOR iface; - WT_CURSOR_JOIN_ENTRY *entry; - bool ismember; -} WT_CURJOIN_EXTRACTOR; - -/* - * __curjoin_extract_insert -- - * Handle a key produced by a custom extractor. - */ -static int -__curjoin_extract_insert(WT_CURSOR *cursor) { - WT_CURJOIN_EXTRACTOR *cextract; - WT_DECL_RET; - WT_ITEM ikey; - WT_SESSION_IMPL *session; - - cextract = (WT_CURJOIN_EXTRACTOR *)cursor; - /* - * This insert method may be called multiple times during a single - * extraction. If we already have a definitive answer to the - * membership question, exit early. - */ - if (cextract->ismember) - return (0); - - session = (WT_SESSION_IMPL *)cursor->session; - - WT_ITEM_SET(ikey, cursor->key); - /* - * We appended a padding byte to the key to avoid rewriting the last - * column. Strip that away here. - */ - WT_ASSERT(session, ikey.size > 0); - --ikey.size; - - ret = __curjoin_entry_in_range(session, cextract->entry, &ikey, false); - if (ret == WT_NOTFOUND) - ret = 0; - else if (ret == 0) - cextract->ismember = true; +err: __wt_free(session, mainbuf); return (ret); } /* - * __curjoin_entry_member -- - * Do a membership check for a particular index that was joined, - * if not a member, returns WT_NOTFOUND. + * __curjoin_insert_endpoint -- + * Insert a new entry into the endpoint array for the join entry. */ static int -__curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, - WT_CURSOR_JOIN_ENTRY *entry, bool skip_left) +__curjoin_insert_endpoint(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, + u_int pos, WT_CURSOR_JOIN_ENDPOINT **newendp) { - WT_CURJOIN_EXTRACTOR extract_cursor; - WT_CURSOR *c; - WT_CURSOR_STATIC_INIT(iface, - __wt_cursor_get_key, /* get-key */ - __wt_cursor_get_value, /* get-value */ - __wt_cursor_set_key, /* set-key */ - __wt_cursor_set_value, /* set-value */ - __wt_cursor_compare_notsup, /* compare */ - __wt_cursor_equals_notsup, /* equals */ - __wt_cursor_notsup, /* next */ - __wt_cursor_notsup, /* prev */ - __wt_cursor_notsup, /* reset */ - __wt_cursor_notsup, /* search */ - __wt_cursor_search_near_notsup, /* search-near */ - __curjoin_extract_insert, /* insert */ - __wt_cursor_notsup, /* update */ - __wt_cursor_notsup, /* remove */ - __wt_cursor_reconfigure_notsup, /* reconfigure */ - __wt_cursor_notsup); /* close */ - WT_DECL_RET; - WT_INDEX *idx; - WT_ITEM *key, v; - bool bloom_found; - - if (skip_left && entry->ends_next == 1) - return (0); /* no checks to make */ - key = cjoin->iter->curkey; - entry->stats.accesses++; - bloom_found = false; - - if (entry->bloom != NULL) { - /* - * If we don't own the Bloom filter, we must be sharing one - * in a previous entry. So the shared filter has already - * been checked and passed. - */ - if (!F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM)) - return (0); - - /* - * If the item is not in the Bloom filter, we return - * immediately, otherwise, we still need to check the - * long way. - */ - WT_ERR(__wt_bloom_inmem_get(entry->bloom, key)); - bloom_found = true; - } - if (entry->index != NULL) { - /* - * If this entry is used by the iterator, then we already - * have the index key, and we won't have to do any extraction - * either. - */ - if (entry == cjoin->iter->entry) - WT_ITEM_SET(v, cjoin->iter->idxkey); - else { - memset(&v, 0, sizeof(v)); /* Keep lint quiet. */ - c = entry->main; - c->set_key(c, key); - if ((ret = c->search(c)) == 0) - ret = c->get_value(c, &v); - else if (ret == WT_NOTFOUND) - WT_ERR_MSG(session, WT_ERROR, - "main table for join is missing entry"); - WT_TRET(c->reset(c)); - WT_ERR(ret); - } - } else - WT_ITEM_SET(v, *key); + WT_CURSOR_JOIN_ENDPOINT *newend; - if ((idx = entry->index) != NULL && idx->extractor != NULL && - entry != cjoin->iter->entry) { - WT_CLEAR(extract_cursor); - extract_cursor.iface = iface; - extract_cursor.iface.session = &session->iface; - extract_cursor.iface.key_format = idx->exkey_format; - extract_cursor.ismember = false; - extract_cursor.entry = entry; - WT_ERR(idx->extractor->extract(idx->extractor, - &session->iface, key, &v, &extract_cursor.iface)); - if (!extract_cursor.ismember) - WT_ERR(WT_NOTFOUND); - } else - WT_ERR(__curjoin_entry_in_range(session, entry, &v, skip_left)); + WT_RET(__wt_realloc_def(session, &entry->ends_allocated, + entry->ends_next + 1, &entry->ends)); + newend = &entry->ends[pos]; + memmove(newend + 1, newend, + (entry->ends_next - pos) * sizeof(WT_CURSOR_JOIN_ENDPOINT)); + memset(newend, 0, sizeof(WT_CURSOR_JOIN_ENDPOINT)); + entry->ends_next++; + *newendp = newend; - if (0) { -err: if (ret == WT_NOTFOUND && bloom_found) - entry->stats.bloom_false_positive++; - } - return (ret); + return (0); } /* @@ -750,61 +1070,53 @@ __curjoin_next(WT_CURSOR *cursor) WT_CURSOR_JOIN_ITER *iter; WT_DECL_RET; WT_SESSION_IMPL *session; - bool skip_left; - u_int i; + int tret; cjoin = (WT_CURSOR_JOIN *)cursor; - CURSOR_API_CALL(cursor, session, next, NULL); + JOINABLE_CURSOR_API_CALL(cursor, session, next, NULL); if (F_ISSET(cjoin, WT_CURJOIN_ERROR)) WT_ERR_MSG(session, WT_ERROR, "join cursor encountered previous error"); if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED)) - WT_ERR(__curjoin_init_iter(session, cjoin)); - - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + WT_ERR(__curjoin_init_next(session, cjoin, true)); + if (cjoin->iter == NULL) + WT_ERR(__curjoin_iter_init(session, cjoin, &cjoin->iter)); iter = cjoin->iter; + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); -nextkey: - if ((ret = __curjoin_entry_iter_next(iter, cursor)) == 0) { - F_SET(cursor, WT_CURSTD_KEY_EXT); + while ((ret = __curjoin_iter_next(iter, cursor)) == 0) { + if ((ret = __curjoin_entries_in_range(session, cjoin, + iter->curkey, iter)) != WT_NOTFOUND) + break; + } + iter->positioned = (ret == 0); + if (ret != 0 && ret != WT_NOTFOUND) + WT_ERR(ret); + if (ret == 0) { /* - * We may have already established membership for the - * 'left' case for the first entry, since we're - * using that in our iteration. + * Position the 'main' cursor, this will be used to retrieve + * values from the cursor join. The key we have is raw, but + * the main cursor may not be raw. */ - skip_left = F_ISSET(cjoin, WT_CURJOIN_SKIP_FIRST_LEFT); - for (i = 0; i < cjoin->entries_next; i++) { - ret = __curjoin_entry_member(session, cjoin, - &cjoin->entries[i], skip_left); - if (ret == WT_NOTFOUND) { - /* - * If this is compare=eq on our outer iterator, - * and we've moved past it, we're done. - */ - if (iter->isequal && i == 0) - break; - goto nextkey; - } - skip_left = false; - WT_ERR(ret); - } - } else if (ret != WT_NOTFOUND) - WT_ERR(ret); + c = cjoin->main; + __wt_cursor_set_raw_key(c, iter->curkey); - if (ret == 0) { /* - * Position the 'main' cursor, this will be used to - * retrieve values from the cursor join. + * A failed search is not expected, convert WT_NOTFOUND into a + * generic error. */ - c = iter->main; - c->set_key(c, iter->curkey); - if ((ret = c->search(c)) != 0) - WT_ERR(c->search(c)); + iter->entry->stats.main_access++; + if ((ret = c->search(c)) == WT_NOTFOUND) + ret = WT_ERROR; + WT_ERR(ret); + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); - } + } else if (ret == WT_NOTFOUND && + (tret = __curjoin_iter_close_all(iter)) != 0) + WT_ERR(tret); if (0) { err: F_SET(cjoin, WT_CURJOIN_ERROR); @@ -813,78 +1125,146 @@ err: F_SET(cjoin, WT_CURJOIN_ERROR); } /* - * __curjoin_reset -- - * WT_CURSOR::reset for join cursors. + * __curjoin_open_main -- + * For the given index, open the main file with a projection + * that is the index keys. */ static int -__curjoin_reset(WT_CURSOR *cursor) +__curjoin_open_main(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, + WT_CURSOR_JOIN_ENTRY *entry) { - WT_CURSOR_JOIN *cjoin; WT_DECL_RET; - WT_SESSION_IMPL *session; + WT_INDEX *idx; + size_t len, newsize; + char *main_uri, *newformat; + const char *raw_cfg[] = { WT_CONFIG_BASE( + session, WT_SESSION_open_cursor), "raw", NULL }; - cjoin = (WT_CURSOR_JOIN *)cursor; + main_uri = NULL; + idx = entry->index; + + newsize = strlen(cjoin->table->name) + idx->colconf.len + 1; + WT_ERR(__wt_calloc(session, 1, newsize, &main_uri)); + snprintf(main_uri, newsize, "%s%.*s", + cjoin->table->name, (int)idx->colconf.len, idx->colconf.str); + WT_ERR(__wt_open_cursor(session, main_uri, + (WT_CURSOR *)cjoin, raw_cfg, &entry->main)); + if (idx->extractor == NULL) { + /* + * Add no-op padding so trailing 'u' formats are not + * transformed to 'U'. This matches what happens in + * the index. We don't do this when we have an + * extractor, extractors already use the padding + * byte trick. + */ + len = strlen(entry->main->value_format) + 3; + WT_ERR(__wt_calloc(session, len, 1, &newformat)); + snprintf(newformat, len, "%s0x", entry->main->value_format); + __wt_free(session, entry->main->value_format); + entry->main->value_format = newformat; + } - CURSOR_API_CALL(cursor, session, reset, NULL); +err: __wt_free(session, main_uri); + return (ret); +} - if (F_ISSET(cjoin, WT_CURJOIN_INITIALIZED)) - WT_ERR(__curjoin_entry_iter_reset(cjoin->iter)); +/* + * __curjoin_pack_recno -- + * Pack the given recno into a buffer; prepare an item referencing it. + * + */ +static int +__curjoin_pack_recno(WT_SESSION_IMPL *session, uint64_t r, uint8_t *buf, + size_t bufsize, WT_ITEM *item) +{ + WT_SESSION *wtsession; + size_t sz; -err: API_END_RET(session, ret); + wtsession = (WT_SESSION *)session; + WT_RET(wiredtiger_struct_size(wtsession, &sz, "r", r)); + WT_ASSERT(session, sz < bufsize); + WT_RET(wiredtiger_struct_pack(wtsession, buf, bufsize, "r", r)); + item->size = sz; + item->data = buf; + return (0); } /* - * __curjoin_close -- - * WT_CURSOR::close for join cursors. + * __curjoin_reset -- + * WT_CURSOR::reset for join cursors. */ static int -__curjoin_close(WT_CURSOR *cursor) +__curjoin_reset(WT_CURSOR *cursor) { WT_CURSOR_JOIN *cjoin; - WT_CURSOR_JOIN_ENDPOINT *end; - WT_CURSOR_JOIN_ENTRY *entry; WT_DECL_RET; WT_SESSION_IMPL *session; - u_int i; cjoin = (WT_CURSOR_JOIN *)cursor; - CURSOR_API_CALL(cursor, session, close, NULL); - - __wt_schema_release_table(session, cjoin->table); - /* These are owned by the table */ - cursor->internal_uri = NULL; - cursor->key_format = NULL; - if (cjoin->projection != NULL) { - __wt_free(session, cjoin->projection); - __wt_free(session, cursor->value_format); - } - - for (entry = cjoin->entries, i = 0; i < cjoin->entries_next; - entry++, i++) { - if (entry->main != NULL) - WT_TRET(entry->main->close(entry->main)); - if (F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM)) - WT_TRET(__wt_bloom_close(entry->bloom)); - for (end = &entry->ends[0]; - end < &entry->ends[entry->ends_next]; end++) { - F_CLR(end->cursor, WT_CURSTD_JOINED); - if (F_ISSET(end, WT_CURJOIN_END_OWN_CURSOR)) - WT_TRET(end->cursor->close(end->cursor)); - } - __wt_free(session, entry->ends); - __wt_free(session, entry->repack_format); - } + JOINABLE_CURSOR_API_CALL(cursor, session, reset, NULL); if (cjoin->iter != NULL) - WT_TRET(__curjoin_entry_iter_close(cjoin->iter)); - __wt_free(session, cjoin->entries); - WT_TRET(__wt_cursor_close(cursor)); + WT_ERR(__curjoin_iter_reset(cjoin->iter)); err: API_END_RET(session, ret); } /* + * __curjoin_split_key -- + * Copy the primary key from a cursor (either main table or index) + * to another cursor. When copying from an index file, the index + * key is also returned. + * + */ +static int +__curjoin_split_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, + WT_ITEM *idxkey, WT_CURSOR *tocur, WT_CURSOR *fromcur, + const char *repack_fmt, bool isindex) +{ + WT_CURSOR *firstcg_cur; + WT_CURSOR_INDEX *cindex; + WT_ITEM *keyp; + const uint8_t *p; + + if (isindex) { + cindex = ((WT_CURSOR_INDEX *)fromcur); + /* + * Repack tells us where the index key ends; advance past + * that to get where the raw primary key starts. + */ + WT_RET(__wt_struct_repack(session, cindex->child->key_format, + repack_fmt != NULL ? repack_fmt : cindex->iface.key_format, + &cindex->child->key, idxkey)); + WT_ASSERT(session, cindex->child->key.size > idxkey->size); + tocur->key.data = (uint8_t *)idxkey->data + idxkey->size; + tocur->key.size = cindex->child->key.size - idxkey->size; + if (WT_CURSOR_RECNO(tocur)) { + p = (const uint8_t *)tocur->key.data; + WT_RET(__wt_vunpack_uint(&p, tocur->key.size, + &tocur->recno)); + } else + tocur->recno = 0; + } else { + firstcg_cur = ((WT_CURSOR_TABLE *)fromcur)->cg_cursors[0]; + keyp = &firstcg_cur->key; + if (WT_CURSOR_RECNO(tocur)) { + WT_ASSERT(session, keyp->size == sizeof(uint64_t)); + tocur->recno = *(uint64_t *)keyp->data; + WT_RET(__curjoin_pack_recno(session, tocur->recno, + cjoin->recno_buf, sizeof(cjoin->recno_buf), + &tocur->key)); + } else { + WT_ITEM_SET(tocur->key, *keyp); + tocur->recno = 0; + } + idxkey->data = NULL; + idxkey->size = 0; + } + return (0); +} + +/* * __wt_curjoin_open -- * Initialize a join cursor. * @@ -977,35 +1357,53 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, uint64_t count, uint32_t bloom_bit_count, uint32_t bloom_hash_count) { WT_CURSOR_INDEX *cindex; + WT_CURSOR_JOIN *child; WT_CURSOR_JOIN_ENDPOINT *end; WT_CURSOR_JOIN_ENTRY *entry; - WT_DECL_RET; - bool hasins, needbloom, range_eq; - char *main_uri, *newformat; - const char *raw_cfg[] = { WT_CONFIG_BASE( - session, WT_SESSION_open_cursor), "raw", NULL }; - size_t len, newsize; + bool hasins, needbloom, nested, range_eq; + size_t len; u_int i, ins, nonbloom; + uint8_t endrange; entry = NULL; hasins = needbloom = false; - ins = 0; /* -Wuninitialized */ - main_uri = NULL; - nonbloom = 0; /* -Wuninitialized */ + ins = nonbloom = 0; /* -Wuninitialized */ - for (i = 0; i < cjoin->entries_next; i++) { - if (cjoin->entries[i].index == idx) { - entry = &cjoin->entries[i]; - break; - } - if (!needbloom && i > 0 && - !F_ISSET(&cjoin->entries[i], WT_CURJOIN_ENTRY_BLOOM)) { - needbloom = true; - nonbloom = i; + if (cjoin->entries_next == 0) { + if (LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION)) + F_SET(cjoin, WT_CURJOIN_DISJUNCTION); + } else if (LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION) && + !F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) + WT_RET_MSG(session, EINVAL, + "operation=or does not match previous operation=and"); + else if (!LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION) && + F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) + WT_RET_MSG(session, EINVAL, + "operation=and does not match previous operation=or"); + + nested = WT_PREFIX_MATCH(ref_cursor->uri, "join:"); + if (!nested) + for (i = 0; i < cjoin->entries_next; i++) { + if (cjoin->entries[i].index == idx && + cjoin->entries[i].subjoin == NULL) { + entry = &cjoin->entries[i]; + break; + } + if (!needbloom && i > 0 && + !F_ISSET(&cjoin->entries[i], + WT_CURJOIN_ENTRY_BLOOM)) { + needbloom = true; + nonbloom = i; + } } + else { + if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM)) + WT_RET_MSG(session, EINVAL, + "Bloom filters cannot be used with subjoins"); } + if (entry == NULL) { - WT_ERR(__wt_realloc_def(session, &cjoin->entries_allocated, + WT_RET(__wt_realloc_def(session, &cjoin->entries_allocated, cjoin->entries_next + 1, &cjoin->entries)); if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM) && needbloom) { /* @@ -1034,13 +1432,13 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, } else { /* Merge the join into an existing entry for this index */ if (count != 0 && entry->count != 0 && entry->count != count) - WT_ERR_MSG(session, EINVAL, + WT_RET_MSG(session, EINVAL, "count=%" PRIu64 " does not match " "previous count=%" PRIu64 " for this index", count, entry->count); if (LF_MASK(WT_CURJOIN_ENTRY_BLOOM) != F_MASK(entry, WT_CURJOIN_ENTRY_BLOOM)) - WT_ERR_MSG(session, EINVAL, + WT_RET_MSG(session, EINVAL, "join has incompatible strategy " "values for the same index"); @@ -1063,19 +1461,20 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, for (i = 0; i < entry->ends_next; i++) { end = &entry->ends[i]; range_eq = (range == WT_CURJOIN_END_EQ); + endrange = WT_CURJOIN_END_RANGE(end); if ((F_ISSET(end, WT_CURJOIN_END_GT) && ((range & WT_CURJOIN_END_GT) != 0 || range_eq)) || (F_ISSET(end, WT_CURJOIN_END_LT) && ((range & WT_CURJOIN_END_LT) != 0 || range_eq)) || - (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ && + (endrange == WT_CURJOIN_END_EQ && (range & (WT_CURJOIN_END_LT | WT_CURJOIN_END_GT)) != 0)) - WT_ERR_MSG(session, EINVAL, + WT_RET_MSG(session, EINVAL, "join has overlapping ranges"); if (range == WT_CURJOIN_END_EQ && - WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ && + endrange == WT_CURJOIN_END_EQ && !F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) - WT_ERR_MSG(session, EINVAL, + WT_RET_MSG(session, EINVAL, "compare=eq can only be combined " "using operation=or"); @@ -1086,6 +1485,7 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, if (!hasins && ((range & WT_CURJOIN_END_GT) != 0 || (range == WT_CURJOIN_END_EQ && + endrange != WT_CURJOIN_END_EQ && !F_ISSET(end, WT_CURJOIN_END_GT)))) { ins = i; hasins = true; @@ -1098,70 +1498,35 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, entry->bloom_hash_count = WT_MAX(entry->bloom_hash_count, bloom_hash_count); } - WT_ERR(__curjoin_insert_endpoint(session, entry, - hasins ? ins : entry->ends_next, &end)); - end->cursor = ref_cursor; - F_SET(end, range); - - /* Open the main file with a projection of the indexed columns. */ - if (entry->main == NULL && idx != NULL) { - newsize = strlen(cjoin->table->name) + idx->colconf.len + 1; - WT_ERR(__wt_calloc(session, 1, newsize, &main_uri)); - snprintf(main_uri, newsize, "%s%.*s", - cjoin->table->name, (int)idx->colconf.len, - idx->colconf.str); - WT_ERR(__wt_open_cursor(session, main_uri, - (WT_CURSOR *)cjoin, raw_cfg, &entry->main)); - if (idx->extractor == NULL) { + if (nested) { + child = (WT_CURSOR_JOIN *)ref_cursor; + entry->subjoin = child; + child->parent = cjoin; + } else { + WT_RET(__curjoin_insert_endpoint(session, entry, + hasins ? ins : entry->ends_next, &end)); + end->cursor = ref_cursor; + F_SET(end, range); + + if (entry->main == NULL && idx != NULL) { /* - * Add no-op padding so trailing 'u' formats are not - * transformed to 'U'. This matches what happens in - * the index. We don't do this when we have an - * extractor, extractors already use the padding - * byte trick. + * Open the main file with a projection of the + * indexed columns. */ - len = strlen(entry->main->value_format) + 3; - WT_ERR(__wt_calloc(session, len, 1, &newformat)); - snprintf(newformat, len, "%s0x", - entry->main->value_format); - __wt_free(session, entry->main->value_format); - entry->main->value_format = newformat; - } + WT_RET(__curjoin_open_main(session, cjoin, entry)); - /* - * When we are repacking index keys to remove the primary - * key, we never want to transform trailing 'u'. Use no-op - * padding to force this. - */ - cindex = (WT_CURSOR_INDEX *)ref_cursor; - len = strlen(cindex->iface.key_format) + 3; - WT_ERR(__wt_calloc(session, len, 1, &entry->repack_format)); - snprintf(entry->repack_format, len, "%s0x", - cindex->iface.key_format); + /* + * When we are repacking index keys to remove the + * primary key, we never want to transform trailing + * 'u'. Use no-op padding to force this. + */ + cindex = (WT_CURSOR_INDEX *)ref_cursor; + len = strlen(cindex->iface.key_format) + 3; + WT_RET(__wt_calloc(session, len, 1, + &entry->repack_format)); + snprintf(entry->repack_format, len, "%s0x", + cindex->iface.key_format); + } } - -err: __wt_free(session, main_uri); - return (ret); -} - -/* - * __curjoin_insert_endpoint -- - * Insert a new entry into the endpoint array for the join entry. - */ -static int -__curjoin_insert_endpoint(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, - u_int pos, WT_CURSOR_JOIN_ENDPOINT **newendp) -{ - WT_CURSOR_JOIN_ENDPOINT *newend; - - WT_RET(__wt_realloc_def(session, &entry->ends_allocated, - entry->ends_next + 1, &entry->ends)); - newend = &entry->ends[pos]; - memmove(newend + 1, newend, - (entry->ends_next - pos) * sizeof(WT_CURSOR_JOIN_ENDPOINT)); - memset(newend, 0, sizeof(WT_CURSOR_JOIN_ENDPOINT)); - entry->ends_next++; - *newendp = newend; - return (0); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_json.c b/src/third_party/wiredtiger/src/cursor/cur_json.c index fcb66d3e8b3..f0fa0d8aec2 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_json.c +++ b/src/third_party/wiredtiger/src/cursor/cur_json.c @@ -48,6 +48,10 @@ static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *, case 't': \ WT_RET(json_uint_arg(session, &jstr, &pv.u.u)); \ break; \ + case 'u': \ + WT_RET(json_string_arg(session, &jstr, &pv.u.item)); \ + pv.type = 'K'; \ + break; \ /* User format strings have already been validated. */ \ WT_ILLEGAL_VALUE(session); \ } \ @@ -62,7 +66,7 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, u_char *buf, size_t bufsz, WT_CONFIG_ITEM *name) { WT_PACK_VALUE *pv; - const char *p, *end; + const u_char *p, *end; size_t s, n; pv = (WT_PACK_VALUE *)voidpv; @@ -82,7 +86,7 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, case 'S': /* Account for '"' quote in front and back. */ s += 2; - p = (const char *)pv->u.s; + p = (const u_char *)pv->u.s; if (bufsz > 0) { *buf++ = '"'; bufsz--; @@ -118,7 +122,7 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, case 'U': case 'u': s += 2; - p = (const char *)pv->u.item.data; + p = (const u_char *)pv->u.item.data; end = p + pv->u.item.size; if (bufsz > 0) { *buf++ = '"'; @@ -310,14 +314,14 @@ __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor) * Can be called with null buf for sizing. */ size_t -__wt_json_unpack_char(char ch, u_char *buf, size_t bufsz, bool force_unicode) +__wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode) { - char abbrev; + u_char abbrev; if (!force_unicode) { - if (isprint(ch) && ch != '\\' && ch != '"') { + if (__wt_isprint(ch) && ch != '\\' && ch != '"') { if (bufsz >= 1) - *buf = (u_char)ch; + *buf = ch; return (1); } else { abbrev = '\0'; @@ -342,7 +346,7 @@ __wt_json_unpack_char(char ch, u_char *buf, size_t bufsz, bool force_unicode) if (abbrev != '\0') { if (bufsz >= 2) { *buf++ = '\\'; - *buf = (u_char)abbrev; + *buf = abbrev; } return (2); } @@ -386,7 +390,7 @@ __wt_json_column_init(WT_CURSOR *cursor, const char *keyformat, } for (nkeys = 0; *keyformat; keyformat++) - if (!isdigit(*keyformat)) + if (!__wt_isdigit((u_char)*keyformat)) nkeys++; p = beginkey; @@ -409,12 +413,13 @@ __wt_json_column_init(WT_CURSOR *cursor, const char *keyformat, #define MATCH_KEYWORD(session, in, result, keyword, matchval) do { \ size_t _kwlen = strlen(keyword); \ - if (strncmp(in, keyword, _kwlen) == 0 && !isalnum(in[_kwlen])) { \ + if (strncmp(in, keyword, _kwlen) == 0 && \ + !__wt_isalnum((u_char)in[_kwlen])) { \ in += _kwlen; \ result = matchval; \ } else { \ const char *_bad = in; \ - while (isalnum(*in)) \ + while (__wt_isalnum((u_char)*in)) \ in++; \ __wt_errx(session, "unknown keyword \"%.*s\" in JSON", \ (int)(in - _bad), _bad); \ @@ -456,7 +461,7 @@ __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, result = -1; session = (WT_SESSION_IMPL *)wt_session; - while (isspace(*src)) + while (__wt_isspace((u_char)*src)) src++; *tokstart = src; @@ -493,7 +498,7 @@ __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, "invalid Unicode within JSON string"); return (-1); } - src += 5; + src += 4; } backslash = false; } @@ -516,13 +521,12 @@ __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, isfloat = false; if (*src == '-') src++; - while ((ch = *src) != '\0' && isdigit(ch)) + while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch)) src++; if (*src == '.') { isfloat = true; src++; - while ((ch = *src) != '\0' && - isdigit(ch)) + while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch)) src++; } if (*src == 'e' || *src == 'E') { @@ -530,8 +534,7 @@ __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, src++; if (*src == '+' || *src == '-') src++; - while ((ch = *src) != '\0' && - isdigit(ch)) + while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch)) src++; } result = isfloat ? 'f' : 'i'; @@ -556,10 +559,10 @@ __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, default: /* An illegal token, move past it anyway */ bad = src; - isalph = isalnum(*src); + isalph = __wt_isalnum((u_char)*src); src++; if (isalph) - while (*src != '\0' && isalnum(*src)) + while (*src != '\0' && __wt_isalnum((u_char)*src)) src++; __wt_errx(session, "unknown token \"%.*s\" in JSON", (int)(src - bad), bad); @@ -840,20 +843,17 @@ __wt_json_strlen(const char *src, size_t srclen) if (__wt_hex2byte((const u_char *)src, &lo)) return (-1); src += 2; - /* RFC 3629 */ - if (hi >= 0x8) { - /* 3 bytes total */ - dstlen += 2; - } - else if (hi != 0 || lo >= 0x80) { - /* 2 bytes total */ - dstlen++; - } - /* else 1 byte total */ + if (hi != 0) + /* + * For our dump representation, + * every Unicode character on input + * represents a single byte. + */ + return (-1); } - } + } else + src++; dstlen++; - src++; } if (src != srcend) return (-1); /* invalid input, e.g. final char is '\\' */ @@ -867,55 +867,58 @@ __wt_json_strlen(const char *src, size_t srclen) * the result if zero padded. */ int -__wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen) +__wt_json_strncpy(WT_SESSION *wt_session, char **pdst, size_t dstlen, + const char *src, size_t srclen) { - char *dst; + WT_SESSION_IMPL *session; + char ch, *dst; const char *dstend, *srcend; u_char hi, lo; + session = (WT_SESSION_IMPL *)wt_session; + dst = *pdst; dstend = dst + dstlen; srcend = src + srclen; while (src < srcend && dst < dstend) { /* JSON can include any UTF-8 expressed in 4 hex chars. */ - if (*src == '\\') { - if (*++src == 'u') { - if (__wt_hex2byte((const u_char *)++src, &hi)) + if ((ch = *src++) == '\\') + switch (ch = *src++) { + case 'u': + if (__wt_hex2byte((const u_char *)src, &hi)) return (EINVAL); src += 2; if (__wt_hex2byte((const u_char *)src, &lo)) return (EINVAL); src += 2; - /* RFC 3629 */ - if (hi >= 0x8) { - /* 3 bytes total */ - /* byte 0: 1110HHHH */ - /* byte 1: 10HHHHLL */ - /* byte 2: 10LLLLLL */ - *dst++ = (char)(0xe0 | - ((hi >> 4) & 0x0f)); - *dst++ = (char)(0x80 | - ((hi << 2) & 0x3c) | - ((lo >> 6) & 0x03)); - *dst++ = (char)(0x80 | (lo & 0x3f)); - } else if (hi != 0 || lo >= 0x80) { - /* 2 bytes total */ - /* byte 0: 110HHHLL */ - /* byte 1: 10LLLLLL */ - *dst++ = (char)(0xc0 | - (hi << 2) | - ((lo >> 6) & 0x03)); - *dst++ = (char)(0x80 | (lo & 0x3f)); - } else - /* else 1 byte total */ - /* byte 0: 0LLLLLLL */ - *dst++ = (char)lo; + if (hi != 0) { + __wt_errx(NULL, "Unicode \"%6.6s\"" + " byte out of range in JSON", + src - 6); + return (EINVAL); + } + *dst++ = (char)lo; + break; + case 'f': + *dst++ = '\f'; + break; + case 'n': + *dst++ = '\n'; + break; + case 'r': + *dst++ = '\r'; + break; + case 't': + *dst++ = '\t'; + break; + case '"': + case '\\': + *dst++ = ch; + break; + WT_ILLEGAL_VALUE(session); } - else - *dst++ = *src; - } else - *dst++ = *src; - src++; + else + *dst++ = ch; } if (src != srcend) return (ENOMEM); diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c index 7839971f975..8bb8931f36f 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_std.c +++ b/src/third_party/wiredtiger/src/cursor/cur_std.c @@ -435,7 +435,7 @@ __wt_cursor_get_valuev(WT_CURSOR *cursor, va_list ap) } else if (WT_STREQ(fmt, "S")) *va_arg(ap, const char **) = cursor->value.data; else if (WT_STREQ(fmt, "t") || - (isdigit(fmt[0]) && WT_STREQ(fmt + 1, "t"))) + (__wt_isdigit((u_char)fmt[0]) && WT_STREQ(fmt + 1, "t"))) *va_arg(ap, uint8_t *) = *(uint8_t *)cursor->value.data; else ret = __wt_struct_unpackv(session, @@ -496,7 +496,7 @@ __wt_cursor_set_valuev(WT_CURSOR *cursor, va_list ap) sz = strlen(str) + 1; buf->data = str; } else if (WT_STREQ(fmt, "t") || - (isdigit(fmt[0]) && WT_STREQ(fmt + 1, "t"))) { + (__wt_isdigit((u_char)fmt[0]) && WT_STREQ(fmt + 1, "t"))) { sz = 1; WT_ERR(__wt_buf_initsize(session, buf, sz)); *(uint8_t *)buf->mem = (uint8_t)va_arg(ap, int); @@ -571,7 +571,6 @@ __wt_cursor_equals(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) WT_SESSION_IMPL *session; int cmp; - session = (WT_SESSION_IMPL *)cursor->session; CURSOR_API_CALL(cursor, session, equals, NULL); WT_ERR(cursor->compare(cursor, other, &cmp)); diff --git a/src/third_party/wiredtiger/src/cursor/cur_table.c b/src/third_party/wiredtiger/src/cursor/cur_table.c index 9eb88ec6fcd..6d50523043a 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_table.c +++ b/src/third_party/wiredtiger/src/cursor/cur_table.c @@ -972,7 +972,8 @@ __wt_curtable_open(WT_SESSION_IMPL *session, if (0) { err: if (*cursorp != NULL) { - WT_TRET(__wt_cursor_close(*cursorp)); + if (*cursorp != cursor) + WT_TRET(__wt_cursor_close(*cursorp)); *cursorp = NULL; } WT_TRET(__curtable_close(cursor)); diff --git a/src/third_party/wiredtiger/src/docs/Doxyfile b/src/third_party/wiredtiger/src/docs/Doxyfile index 4c1682de6eb..69e9716b425 100644 --- a/src/third_party/wiredtiger/src/docs/Doxyfile +++ b/src/third_party/wiredtiger/src/docs/Doxyfile @@ -1570,6 +1570,8 @@ PREDEFINED = DOXYGEN \ __wt_event_handler:=WT_EVENT_HANDLER \ __wt_extension_api:=WT_EXTENSION_API \ __wt_extractor:=WT_EXTRACTOR \ + __wt_file_handle:=WT_FILE_HANDLE \ + __wt_file_system:=WT_FILE_SYSTEM \ __wt_item:=WT_ITEM \ __wt_lsn:=WT_LSN \ __wt_session:=WT_SESSION \ diff --git a/src/third_party/wiredtiger/src/docs/backup.dox b/src/third_party/wiredtiger/src/docs/backup.dox index 7742e698ac8..45edc85d6a5 100644 --- a/src/third_party/wiredtiger/src/docs/backup.dox +++ b/src/third_party/wiredtiger/src/docs/backup.dox @@ -42,6 +42,12 @@ Copying the database files for a backup does not require any special alignment or block size (specifically, Linux or Windows filesystems that do not support read/write isolation can be safely read for backups). +The database file may grow in size during the copy, and the file copy +should not consider that an error. Blocks appended to the file after the +copy starts can be safely ignored, that is, it is correct for the copy +to determine an initial size of the file and then copy that many bytes, +ignoring any bytes appended after the backup cursor was opened. + The cursor must not be closed until all of the files have been copied, however, there is no requirement the files be copied in any order or in any relationship to the WT_CURSOR::next calls, only that all files have @@ -98,29 +104,35 @@ and removing log files from the original database home: 1. Perform a full backup of the database (as described above). -2. Perform a full database checkpoint. - -3. Open a cursor on the \c "backup:" data source, with the - \c "target=(\"log:\\")" target specified, which begins the - process of an incremental backup. +2. Open a cursor on the \c "backup:" data source, configured with the + \c "target=(\"log:\\")" target specified, which begins the process + of an incremental backup. -4. Copy each log file returned by the WT_CURSOR::next method to the backup +3. Copy each log file returned by the WT_CURSOR::next method to the backup directory. It is not an error to copy a log file which has been copied before, but care should be taken to ensure each log file is completely copied - as the most recent log file may change in size while being copied. + as the most recent log file may grow in size while being copied. -5. If all log files have been successfully copied, archive the log +4. If all log files have been successfully copied, archive the log files by calling the WT_SESSION::truncate method with the URI - <code>log:</code> and specifying the backup cursor as the - start cursor to that method. + <code>log:</code> and specifying the backup cursor as the start + cursor to that method. (Note there is no requirement backups be + coordinated with database checkpoints, however, an incremental backup + will repeatedly copy the same files, and will not make additional log + files available for archival, unless there was a checkpoint after the + previous incremental backup.) + +5. Close the backup cursor. + +Steps 2-5 can be repeated any number of times before step 1 is repeated. +Full and incremental backups may be repeated as long as the backup +database directory has not been opened and recovery run. Once recovery +has run in a backup directory, you can no longer back up to that +database directory. -6. Close the backup cursor. +An example of opening the backup data source for an incremental backup: -Steps 2-6 can be repeated any number of times before step 1 is -repeated. These steps can be repeated as long as the backup database -directory has not been opened, recovery run and become live. Once -the database becomes live, you must repeat all steps 1-6 to another, -different backup database directory. +@snippet ex_all.c incremental backup @section backup_o_direct Backup and O_DIRECT diff --git a/src/third_party/wiredtiger/src/docs/cursor-join.dox b/src/third_party/wiredtiger/src/docs/cursor-join.dox index 51da6b174bf..5ea064a250b 100644 --- a/src/third_party/wiredtiger/src/docs/cursor-join.dox +++ b/src/third_party/wiredtiger/src/docs/cursor-join.dox @@ -14,6 +14,31 @@ Here is an example using join cursors: Joins support various comparison operators: \c "eq", \c "gt", \c "ge", \c "lt", \c "le". Ranges with lower and upper bounds can also be specified, by joining two cursors on the same index, for example, one with \c "compare=ge" and another \c "compare=lt". In addition to joining indices, the main table can be joined so that a range of primary keys can be specified. +By default, a join cursor returns a conjunction, that is, all keys that +satisfy all the joined comparisons. By specifying a configuration with \c +"operation=or", a join cursor will return a disjunction, or all keys that +satisfy at least one of the joined comparisons. More complex joins can be +composed by specifying another join cursor as the reference cursor in a join +call. + +Here is an example using these concepts to show a conjunction of a disjunction: + +@snippet ex_schema.c Complex join cursors + All the joins should be done on the join cursor before WT_CURSOR::next is called. Calling WT_CURSOR::next on a join cursor for the first time populates any bloom filters and performs other initialization. The join cursor's key is the primary key (the key for the main table), and its value is the entire set of values of the main table. A join cursor can be created with a projection by appending \c "(col1,col2,...)" to the URI if a different set of values is needed. +Keys returned from the join cursor are ordered according to the +first reference cursor joined. For example, if an index cursor was joined +first, that index determines the order of results. If the join cursor +uses disjunctions, then the ordering of all joins determines the order. +The first join in a conjunctive join, or all joins in a disjunctive join, +are distinctive in that they are iterated internally as the cursor join +returns values in order. Any bloom filters specified on the +joins that are used for iteration are not useful, and are silently ignored. + +When disjunctions are used where the sets of keys overlap on these 'iteration +joins', a join cursor will return duplicates. A join cursor never returns +duplicates unless \c "operation=or" is used in a join configuration, or unless +the first joined cursor is itself a join cursor that would return duplicates. + */ diff --git a/src/third_party/wiredtiger/src/docs/custom-file-systems.dox b/src/third_party/wiredtiger/src/docs/custom-file-systems.dox new file mode 100644 index 00000000000..d496002b0fb --- /dev/null +++ b/src/third_party/wiredtiger/src/docs/custom-file-systems.dox @@ -0,0 +1,47 @@ +/*! @page custom_file_systems Custom File Systems + +Applications can provide a custom file system implementation that will be +used by WiredTiger to interact with the I/O subsystem using the +WT_FILE_SYSTEM and WT_FILE_HANDLE interfaces. + +It is not necessary for all file system providers to implement all methods +in the WT_FILE_SYSTEM and WT_FILE_HANDLE structures, and documentation for +those structures indicate which methods are optional. Methods which are not +provided should be set to NULL. + +Generally, function pointers should not be changed once a handle is +created. An exception to this are the WT_FILE_HANDLE::fallocate and +WT_FILE_HANDLE::fallocate_nolock methods, because a file system +implementation may not know what support the system provides until file +allocation is attempted. See the WiredTiger POSIX file system +implementation for an example of how the fallocate method might be +changed after initialization. + +WT_FILE_SYSTEM and WT_FILE_HANDLE methods are expected to return POSIX +1003.1 or ANSI C standard error codes on failure. Custom file systems +on Windows systems can use the WT_EXTENSION_API::map_windows_error +method to translate Windows system errors into POSIX system errors for +return to WiredTiger. + +WT_FILE_SYSTEM and WT_FILE_HANDLE methods which fail but not fatally +(for example, a WT_FILE_HANDLE::truncate method call which fails because +the file is currently mapped into memory), should return EBUSY. + +WT_FILE_SYSTEM and WT_FILE_HANDLE methods which fail fatally, but not +in all cases (for example, a WT_FILE_HANDLE::fadvise method call which +only supports ::WT_FILE_HANDLE_WILLNEED), should return ENOTSUP. + +Additionally, custom file system functions may return ::WT_PANIC to +shut down the system. + +Unless explicitly stated otherwise, WiredTiger may invoke methods on the +WT_FILE_SYSTEM and WT_FILE_HANDLE interfaces from multiple threads +concurrently. It is the responsibility of the implementation to protect +any shared data. + +See @ex_ref{ex_file_system.c} for an example implementation of a custom +file system; the WiredTiger code for a POSIX standard file system is in +the public domain and may also be useful as a starting point for a custom +file system implementation. + +*/ diff --git a/src/third_party/wiredtiger/src/docs/error-handling.dox b/src/third_party/wiredtiger/src/docs/error-handling.dox index d91a126ee21..62be498fc15 100644 --- a/src/third_party/wiredtiger/src/docs/error-handling.dox +++ b/src/third_party/wiredtiger/src/docs/error-handling.dox @@ -52,6 +52,9 @@ This error indicates an underlying problem that requires the application exit an @par <code>WT_RUN_RECOVERY</code> This error is generated when wiredtiger_open is configured to return an error if recovery is required to use the database. +@par <code>WT_CACHE_FULL</code> +This error is only generated when wiredtiger_open is configured to run in-memory, and an insert or update operation requires more than the configured cache size to complete. The operation may be retried; if a transaction is in progress, it should be rolled back and the operation retried in a new transaction. + @if IGNORE_BUILT_BY_API_ERR_END @endif diff --git a/src/third_party/wiredtiger/src/docs/examples.dox b/src/third_party/wiredtiger/src/docs/examples.dox index 3ed7357b52c..c5a106a00c9 100644 --- a/src/third_party/wiredtiger/src/docs/examples.dox +++ b/src/third_party/wiredtiger/src/docs/examples.dox @@ -9,9 +9,6 @@ Show how to configure and use asynchronous operations. A more complex schema based on a call center example, showing how to map some SQL constructs onto the WiredTiger API. -@example ex_config.c -Shows how to configure some properties of the database and tables. - @example ex_cursor.c Shows some common cursor types and operations. @@ -55,4 +52,7 @@ Shows how to access the database log files. @example ex_thread.c Shows how to access a database with multiple threads. +@example ex_file_system.c +Shows how to extend WiredTiger with a custom file-system implementation. + */ diff --git a/src/third_party/wiredtiger/src/docs/in-memory.dox b/src/third_party/wiredtiger/src/docs/in-memory.dox new file mode 100644 index 00000000000..df221dc34d6 --- /dev/null +++ b/src/third_party/wiredtiger/src/docs/in-memory.dox @@ -0,0 +1,12 @@ +/*! @m_page{{c,java},in_memory,In-memory databases} + +The ::wiredtiger_open \c in_memory configuration changes WiredTiger to +run in cache without writing to a backing disk. Data is limited to the +configured cache size. + +If \c in_memory is configured, WT_CURSOR::insert and WT_CURSOR::update +methods may return an additional error, ::WT_CACHE_FULL, indicating the +insert or update operation requires more than the configured cache size +to complete. If a transaction is in progress, it should be rolled back. + + */ diff --git a/src/third_party/wiredtiger/src/docs/programming.dox b/src/third_party/wiredtiger/src/docs/programming.dox index f717f4ed1fe..81e612e8ee8 100644 --- a/src/third_party/wiredtiger/src/docs/programming.dox +++ b/src/third_party/wiredtiger/src/docs/programming.dox @@ -41,6 +41,7 @@ each of which is ordered by one or more columns. - @subpage compact - @subpage checkpoint - @subpage durability +- @subpage in_memory - @subpage cursor_join - @subpage cursor_log - @ref transaction_named_snapshots @@ -55,6 +56,7 @@ each of which is ordered by one or more columns. - @subpage custom_collators - @subpage custom_extractors - @subpage custom_data_sources +- @subpage custom_file_systems - @subpage helium @m_endif diff --git a/src/third_party/wiredtiger/src/docs/spell.ok b/src/third_party/wiredtiger/src/docs/spell.ok index efc306568cd..96fe04d7426 100644 --- a/src/third_party/wiredtiger/src/docs/spell.ok +++ b/src/third_party/wiredtiger/src/docs/spell.ok @@ -25,6 +25,7 @@ EBUSY ECMA EINVAL ENCRYPTOR +ENOTSUP EmpId Encryptors Facebook @@ -80,6 +81,7 @@ Seward's SiH TXT URIs +WILLNEED WiredTiger WiredTiger's WiredTigerCheckpoint @@ -178,6 +180,8 @@ desc destructor destructors dev +disjunction +disjunctions distclean dl dll @@ -208,6 +212,7 @@ errno exe fadvise failchk +fallocate fd's fdatasync fieldname @@ -331,6 +336,7 @@ nocase nocasecoll nodup noflush +nolock nolocking nommap nop diff --git a/src/third_party/wiredtiger/src/docs/tune-cache.dox b/src/third_party/wiredtiger/src/docs/tune-cache.dox index c9603085905..505da436277 100644 --- a/src/third_party/wiredtiger/src/docs/tune-cache.dox +++ b/src/third_party/wiredtiger/src/docs/tune-cache.dox @@ -11,9 +11,9 @@ The cache size for the database is normally configured by setting the function. The cache size can be adjusted after the open call with WT_CONNECTION::reconfigure. -An example of setting a cache size to 500MB: +An example of setting a cache size to 5GB: -@snippet ex_config.c configure cache size +@snippet ex_all.c Open a connection The effectiveness of the chosen cache size can be measured by reviewing the page eviction statistics for the database. diff --git a/src/third_party/wiredtiger/src/docs/wtperf.dox b/src/third_party/wiredtiger/src/docs/wtperf.dox index 6d8dcab8f65..e06272d117c 100644 --- a/src/third_party/wiredtiger/src/docs/wtperf.dox +++ b/src/third_party/wiredtiger/src/docs/wtperf.dox @@ -232,6 +232,8 @@ operation,two for every second operation, three for every third operation etc. @par sess_config (string, default=) session configuration string +@par session_count_idle (unsigned int, default=0) +number of idle sessions to create. Default 0. @par table_config (string, default=key_format=S,value_format=S,type=lsm,exclusive=true,allocation_size=4kb,internal_page_max=64kb,leaf_page_max=4kb,split_pct=100) table configuration string @par table_count (unsigned int, default=1) diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c index 4b9e2442f32..b0cd50cc655 100644 --- a/src/third_party/wiredtiger/src/evict/evict_file.c +++ b/src/third_party/wiredtiger/src/evict/evict_file.c @@ -87,7 +87,10 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) __wt_page_can_evict(session, ref, NULL)); __wt_ref_out(session, ref); break; - WT_ILLEGAL_VALUE_ERR(session); + case WT_SYNC_CHECKPOINT: + case WT_SYNC_WRITE_LEAVES: + WT_ERR(__wt_illegal_value(session, NULL)); + break; } } diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index 360a3f69cd2..8ea487bbf83 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -9,16 +9,15 @@ #include "wt_internal.h" static int __evict_clear_all_walks(WT_SESSION_IMPL *); -static int __evict_clear_walks(WT_SESSION_IMPL *); +static int __evict_helper(WT_SESSION_IMPL *); static int WT_CDECL __evict_lru_cmp(const void *, const void *); static int __evict_lru_pages(WT_SESSION_IMPL *, bool); static int __evict_lru_walk(WT_SESSION_IMPL *); static int __evict_page(WT_SESSION_IMPL *, bool); static int __evict_pass(WT_SESSION_IMPL *); -static int __evict_walk(WT_SESSION_IMPL *); -static int __evict_walk_file(WT_SESSION_IMPL *, u_int, u_int *); -static WT_THREAD_RET __evict_worker(void *); -static int __evict_server_work(WT_SESSION_IMPL *); +static int __evict_server(WT_SESSION_IMPL *, bool *); +static int __evict_walk(WT_SESSION_IMPL *, uint32_t); +static int __evict_walk_file(WT_SESSION_IMPL *, uint32_t, u_int *); /* * __evict_read_gen -- @@ -32,6 +31,11 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry) uint64_t read_gen; btree = entry->btree; + + /* Never prioritize empty slots. */ + if (entry->ref == NULL) + return (UINT64_MAX); + page = entry->ref->page; /* Any page set to the oldest generation should be discarded. */ @@ -66,15 +70,14 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry) * Qsort function: sort the eviction array. */ static int WT_CDECL -__evict_lru_cmp(const void *a_arg, const void *b_arg) +__evict_lru_cmp(const void *a, const void *b) { - const WT_EVICT_ENTRY *a = a_arg, *b = b_arg; - uint64_t a_score, b_score; + uint64_t a_lru, b_lru; - a_score = (a->ref == NULL ? UINT64_MAX : a->score); - b_score = (b->ref == NULL ? UINT64_MAX : b->score); + a_lru = __evict_read_gen(a); + b_lru = __evict_read_gen(b); - return ((a_score < b_score) ? -1 : (a_score == b_score) ? 0 : 1); + return ((a_lru < b_lru) ? -1 : (a_lru == b_lru) ? 0 : 1); } /* @@ -104,7 +107,8 @@ __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref) { WT_CACHE *cache; WT_EVICT_ENTRY *evict; - uint32_t i, elem; + uint32_t i, elem, q; + bool found; WT_ASSERT(session, __wt_ref_is_root(ref) || ref->state == WT_REF_LOCKED); @@ -114,18 +118,25 @@ __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref) return; cache = S2C(session)->cache; - __wt_spin_lock(session, &cache->evict_lock); - - elem = cache->evict_max; - for (i = 0, evict = cache->evict_queue; i < elem; i++, evict++) - if (evict->ref == ref) { - __evict_list_clear(session, evict); - break; - } - - WT_ASSERT(session, !F_ISSET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU)); + __wt_spin_lock(session, &cache->evict_queue_lock); + + found = false; + for (q = 0; q < WT_EVICT_QUEUE_MAX && !found; q++) { + __wt_spin_lock(session, &cache->evict_queues[q].evict_lock); + elem = cache->evict_queues[q].evict_max; + for (i = 0, evict = cache->evict_queues[q].evict_queue; + i < elem; i++, evict++) + if (evict->ref == ref) { + found = true; + __evict_list_clear(session, evict); + break; + } + __wt_spin_unlock(session, &cache->evict_queues[q].evict_lock); + } + WT_ASSERT(session, + !F_ISSET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU)); - __wt_spin_unlock(session, &cache->evict_lock); + __wt_spin_unlock(session, &cache->evict_queue_lock); } /* @@ -141,6 +152,7 @@ __wt_evict_server_wake(WT_SESSION_IMPL *session) conn = S2C(session); cache = conn->cache; +#ifdef HAVE_VERBOSE if (WT_VERBOSE_ISSET(session, WT_VERB_EVICTSERVER)) { uint64_t bytes_inuse, bytes_max; @@ -154,104 +166,159 @@ __wt_evict_server_wake(WT_SESSION_IMPL *session) bytes_inuse <= bytes_max ? "<=" : ">", bytes_max / WT_MEGABYTE)); } +#endif return (__wt_cond_auto_signal(session, cache->evict_cond)); } /* - * __evict_server -- - * Thread to evict pages from the cache. + * __evict_thread_run -- + * General wrapper for any eviction thread. */ static WT_THREAD_RET -__evict_server(void *arg) +__evict_thread_run(void *arg) { WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION_IMPL *session; -#ifdef HAVE_DIAGNOSTIC - struct timespec now, stuck_ts; -#endif - uint64_t pages_evicted = 0; - u_int spins; + bool did_work; session = arg; conn = S2C(session); cache = conn->cache; +#ifdef HAVE_DIAGNOSTIC + if (session == conn->evict_session) + WT_ERR(__wt_epoch( + session, &cache->stuck_ts)); /* -Wuninitialized */ +#endif while (F_ISSET(conn, WT_CONN_EVICTION_RUN)) { - /* Evict pages from the cache as needed. */ - WT_ERR(__evict_pass(session)); - - if (!F_ISSET(conn, WT_CONN_EVICTION_RUN)) - break; - - /* - * Clear the walks so we don't pin pages while asleep, - * otherwise we can block applications evicting large pages. - */ - if (!F_ISSET(cache, WT_CACHE_STUCK)) { - for (spins = 0; (ret = __wt_spin_trylock( - session, &conn->dhandle_lock)) == EBUSY && - !F_ISSET(cache, WT_CACHE_CLEAR_WALKS); - spins++) { - if (spins < WT_THOUSAND) - __wt_yield(); - else - __wt_sleep(0, WT_THOUSAND); - } + if (conn->evict_tid_set && + __wt_spin_trylock(session, &cache->evict_pass_lock) == 0) { /* - * If we gave up acquiring the lock, that indicates a - * session is waiting for us to clear walks. Do that - * as part of a normal pass (without the handle list - * lock) to avoid deadlock. + * Cannot use WT_WITH_PASS_LOCK because this is a try + * lock. Fix when that is supported. We set the flag + * on both sessions because we may call clear_walk when + * we are walking with the walk session, locked. */ - if (ret == EBUSY) - continue; - WT_ERR(ret); - ret = __evict_clear_all_walks(session); - __wt_spin_unlock(session, &conn->dhandle_lock); + F_SET(session, WT_SESSION_LOCKED_PASS); + F_SET(cache->walk_session, WT_SESSION_LOCKED_PASS); + ret = __evict_server(session, &did_work); + F_CLR(cache->walk_session, WT_SESSION_LOCKED_PASS); + F_CLR(session, WT_SESSION_LOCKED_PASS); + __wt_spin_unlock(session, &cache->evict_pass_lock); WT_ERR(ret); + WT_ERR(__wt_verbose( + session, WT_VERB_EVICTSERVER, "sleeping")); + /* Don't rely on signals: check periodically. */ + WT_ERR(__wt_cond_auto_wait( + session, cache->evict_cond, did_work)); + WT_ERR(__wt_verbose( + session, WT_VERB_EVICTSERVER, "waking")); + } else + WT_ERR(__evict_helper(session)); + } - /* Next time we wake up, reverse the sweep direction. */ - cache->flags ^= WT_CACHE_WALK_REVERSE; - pages_evicted = 0; - } else if (pages_evicted != cache->pages_evict) { - pages_evicted = cache->pages_evict; + if (session == conn->evict_session) { + /* + * The eviction server is shutting down: in case any trees are + * still open, clear all walks now so that they can be closed. + */ + WT_WITH_PASS_LOCK(session, ret, + ret = __evict_clear_all_walks(session)); + WT_ERR(ret); + } + WT_ERR(__wt_verbose( + session, WT_VERB_EVICTSERVER, "cache eviction thread exiting")); + + /* + * The only two cases when eviction workers are expected to stop are + * when recovery is finished or when the connection is closing. Check + * otherwise fewer eviction worker threads may be running than + * expected. + */ + WT_ASSERT(session, F_ISSET(conn, WT_CONN_CLOSING | WT_CONN_RECOVERING)); + if (0) { +err: WT_PANIC_MSG(session, ret, "cache eviction thread error"); + } + return (WT_THREAD_RET_VALUE); +} + +/* + * __evict_server -- + * Thread to evict pages from the cache. + */ +static int +__evict_server(WT_SESSION_IMPL *session, bool *did_work) +{ + WT_CACHE *cache; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; #ifdef HAVE_DIAGNOSTIC - WT_ERR(__wt_epoch(session, &stuck_ts)); - } else { - /* After being stuck for 5 minutes, give up. */ - WT_ERR(__wt_epoch(session, &now)); - if (WT_TIMEDIFF_SEC(now, stuck_ts) > 300) { - __wt_err(session, ETIMEDOUT, - "Cache stuck for too long, giving up"); - (void)__wt_cache_dump(session, NULL); - WT_ERR(ETIMEDOUT); - } + struct timespec now; #endif - } + uint64_t orig_pages_evicted; + u_int spins; - WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "sleeping")); - /* Don't rely on signals: check periodically. */ - WT_ERR(__wt_cond_auto_wait( - session, cache->evict_cond, pages_evicted != 0)); - WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "waking")); - } + conn = S2C(session); + cache = conn->cache; + WT_ASSERT(session, did_work != NULL); + *did_work = false; + orig_pages_evicted = cache->pages_evicted; + + /* Evict pages from the cache as needed. */ + WT_RET(__evict_pass(session)); + + if (!F_ISSET(conn, WT_CONN_EVICTION_RUN)) + return (0); /* - * The eviction server is shutting down: in case any trees are still - * open, clear all walks now so that they can be closed. + * Clear the walks so we don't pin pages while asleep, + * otherwise we can block applications evicting large pages. */ - WT_ERR(__evict_clear_all_walks(session)); - - WT_ERR(__wt_verbose( - session, WT_VERB_EVICTSERVER, "cache eviction server exiting")); + if (!F_ISSET(cache, WT_CACHE_STUCK)) { + for (spins = 0; (ret = __wt_spin_trylock( + session, &conn->dhandle_lock)) == EBUSY && + cache->pass_intr == 0; spins++) { + if (spins < WT_THOUSAND) + __wt_yield(); + else + __wt_sleep(0, WT_THOUSAND); + } + /* + * If we gave up acquiring the lock, that indicates a + * session is waiting for us to clear walks. Do that + * as part of a normal pass (without the handle list + * lock) to avoid deadlock. + */ + if (ret == EBUSY) + return (0); + WT_RET(ret); + ret = __evict_clear_all_walks(session); + __wt_spin_unlock(session, &conn->dhandle_lock); + WT_RET(ret); - if (0) { -err: WT_PANIC_MSG(session, ret, "cache eviction server error"); + /* Next time we wake up, reverse the sweep direction. */ + cache->flags ^= WT_CACHE_WALK_REVERSE; + cache->pages_evicted = 0; + } else if (cache->pages_evicted != cache->pages_evict) { + cache->pages_evicted = cache->pages_evict; +#ifdef HAVE_DIAGNOSTIC + WT_RET(__wt_epoch(session, &cache->stuck_ts)); + } else { + /* After being stuck for 5 minutes, give up. */ + WT_RET(__wt_epoch(session, &now)); + if (WT_TIMEDIFF_SEC(now, cache->stuck_ts) > 300) { + __wt_err(session, ETIMEDOUT, + "Cache stuck for too long, giving up"); + (void)__wt_cache_dump(session, NULL); + WT_RET(ETIMEDOUT); + } +#endif } - return (WT_THREAD_RET_VALUE); + *did_work = cache->pages_evicted != orig_pages_evicted; + return (0); } /* @@ -270,6 +337,7 @@ __evict_workers_resize(WT_SESSION_IMPL *session) uint32_t i, session_flags; conn = S2C(session); + workers = NULL; /* -Wconditional-uninitialized */ if (conn->evict_workers_alloc < conn->evict_workers_max) { alloc = conn->evict_workers_alloc * sizeof(*workers); @@ -301,7 +369,8 @@ __evict_workers_resize(WT_SESSION_IMPL *session) ++conn->evict_workers; F_SET(&workers[i], WT_EVICT_WORKER_RUN); WT_ERR(__wt_thread_create(workers[i].session, - &workers[i].tid, __evict_worker, &workers[i])); + &workers[i].tid, __evict_thread_run, + workers[i].session)); } } @@ -355,7 +424,7 @@ __wt_evict_create(WT_SESSION_IMPL *session) * the worker's sessions are created. */ WT_RET(__wt_thread_create( - session, &conn->evict_tid, __evict_server, session)); + session, &conn->evict_tid, __evict_thread_run, session)); conn->evict_tid_set = true; return (0); @@ -425,39 +494,22 @@ __wt_evict_destroy(WT_SESSION_IMPL *session) } /* - * __evict_worker -- + * __evict_helper -- * Thread to help evict pages from the cache. */ -static WT_THREAD_RET -__evict_worker(void *arg) +static int +__evict_helper(WT_SESSION_IMPL *session) { WT_CACHE *cache; - WT_CONNECTION_IMPL *conn; WT_DECL_RET; - WT_EVICT_WORKER *worker; - WT_SESSION_IMPL *session; - worker = arg; - session = worker->session; - conn = S2C(session); - cache = conn->cache; - - while (F_ISSET(conn, WT_CONN_EVICTION_RUN) && - F_ISSET(worker, WT_EVICT_WORKER_RUN)) { - /* Don't spin in a busy loop if there is no work to do */ - if ((ret = __evict_lru_pages(session, false)) == WT_NOTFOUND) - WT_ERR(__wt_cond_wait( - session, cache->evict_waiter_cond, 10000)); - else - WT_ERR(ret); - } - WT_ERR(__wt_verbose( - session, WT_VERB_EVICTSERVER, "cache eviction worker exiting")); - - if (0) { -err: WT_PANIC_MSG(session, ret, "cache eviction worker error"); - } - return (WT_THREAD_RET_VALUE); + cache = S2C(session)->cache; + if ((ret = __evict_lru_pages(session, false)) == WT_NOTFOUND) + WT_RET(__wt_cond_wait( + session, cache->evict_waiter_cond, 10000)); + else + WT_RET(ret); + return (0); } /* @@ -565,12 +617,8 @@ __evict_pass(WT_SESSION_IMPL *session) * If there is a request to clear eviction walks, do that now, * before checking if the cache is full. */ - if (F_ISSET(cache, WT_CACHE_CLEAR_WALKS)) { - F_CLR(cache, WT_CACHE_CLEAR_WALKS); - WT_RET(__evict_clear_walks(session)); - WT_RET(__wt_cond_signal( - session, cache->evict_waiter_cond)); - } + if (cache->pass_intr != 0) + break; /* * Increment the shared read generation. Do this occasionally @@ -617,7 +665,7 @@ __evict_pass(WT_SESSION_IMPL *session) worker = &conn->evict_workctx[conn->evict_workers++]; F_SET(worker, WT_EVICT_WORKER_RUN); WT_RET(__wt_thread_create(session, - &worker->tid, __evict_worker, worker)); + &worker->tid, __evict_thread_run, worker->session)); } WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER, @@ -626,7 +674,7 @@ __evict_pass(WT_SESSION_IMPL *session) conn->cache_size, cache->bytes_inmem, cache->bytes_dirty)); WT_RET(__evict_lru_walk(session)); - WT_RET(__evict_server_work(session)); + WT_RET_NOTFOUND_OK(__evict_lru_pages(session, true)); /* * If we're making progress, keep going; if we're not making @@ -634,6 +682,8 @@ __evict_pass(WT_SESSION_IMPL *session) * sleep, it's not something we can fix. */ if (pages_evicted == cache->pages_evict) { + WT_STAT_FAST_CONN_INCR(session, + cache_eviction_server_slept); /* * Back off if we aren't making progress: walks hold * the handle list lock, which blocks other operations @@ -674,11 +724,13 @@ __evict_clear_walk(WT_SESSION_IMPL *session) { WT_BTREE *btree; WT_CACHE *cache; + WT_DECL_RET; WT_REF *ref; btree = S2BT(session); cache = S2C(session)->cache; + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_PASS)); if (session->dhandle == cache->evict_file_next) cache->evict_file_next = NULL; @@ -690,30 +742,9 @@ __evict_clear_walk(WT_SESSION_IMPL *session) * assert we never try to evict the current eviction walk point). */ btree->evict_ref = NULL; - return (__wt_page_release(session, ref, WT_READ_NO_EVICT)); -} - -/* - * __evict_clear_walks -- - * Clear the eviction walk points for any file a session is waiting on. - */ -static int -__evict_clear_walks(WT_SESSION_IMPL *session) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_SESSION_IMPL *s; - u_int i, session_cnt; - - conn = S2C(session); - - WT_ORDERED_READ(session_cnt, conn->session_cnt); - for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) { - if (!s->active || !F_ISSET(s, WT_SESSION_CLEAR_EVICT_WALK)) - continue; - WT_WITH_DHANDLE( - session, s->dhandle, WT_TRET(__evict_clear_walk(session))); - } + WT_WITH_DHANDLE(cache->walk_session, session->dhandle, + (ret = __wt_page_release(cache->walk_session, + ref, WT_READ_NO_EVICT))); return (ret); } @@ -738,39 +769,6 @@ __evict_clear_all_walks(WT_SESSION_IMPL *session) } /* - * __evict_request_clear_walk -- - * Request that the eviction server clear the tree's current eviction - * point. - */ -static int -__evict_request_clear_walk(WT_SESSION_IMPL *session) -{ - WT_BTREE *btree; - WT_CACHE *cache; - WT_DECL_RET; - - btree = S2BT(session); - cache = S2C(session)->cache; - - F_SET(session, WT_SESSION_CLEAR_EVICT_WALK); - - while (ret == 0 && (btree->evict_ref != NULL || - cache->evict_file_next == session->dhandle)) { - F_SET(cache, WT_CACHE_CLEAR_WALKS); - ret = __wt_cond_wait( - session, cache->evict_waiter_cond, 100000); - } - - F_CLR(session, WT_SESSION_CLEAR_EVICT_WALK); - - /* An error is unexpected - flag the failure. */ - if (ret != 0) - __wt_err(session, ret, "Failed to clear eviction walk point"); - - return (ret); -} - -/* * __wt_evict_file_exclusive_on -- * Get exclusive eviction access to a file and discard any of the file's * blocks queued for eviction. @@ -782,7 +780,7 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) WT_CACHE *cache; WT_DECL_RET; WT_EVICT_ENTRY *evict; - u_int i, elem; + u_int i, elem, q; btree = S2BT(session); cache = S2C(session)->cache; @@ -807,21 +805,32 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) * this point. */ F_SET(btree, WT_BTREE_NO_EVICTION); + (void)__wt_atomic_add32(&cache->pass_intr, 1); WT_FULL_BARRIER(); /* Clear any existing LRU eviction walk for the file. */ - WT_ERR(__evict_request_clear_walk(session)); + WT_WITH_PASS_LOCK(session, ret, + ret = __evict_clear_walk(session)); + (void)__wt_atomic_sub32(&cache->pass_intr, 1); + WT_ERR(ret); /* * The eviction candidate list might reference pages from the file, * clear it. Hold the evict lock to remove queued pages from a file. */ - __wt_spin_lock(session, &cache->evict_lock); - elem = cache->evict_max; - for (i = 0, evict = cache->evict_queue; i < elem; i++, evict++) - if (evict->btree == btree) - __evict_list_clear(session, evict); - __wt_spin_unlock(session, &cache->evict_lock); + __wt_spin_lock(session, &cache->evict_queue_lock); + + for (q = 0; q < WT_EVICT_QUEUE_MAX; q++) { + __wt_spin_lock(session, &cache->evict_queues[q].evict_lock); + elem = cache->evict_queues[q].evict_max; + for (i = 0, evict = cache->evict_queues[q].evict_queue; + i < elem; i++, evict++) + if (evict->btree == btree) + __evict_list_clear(session, evict); + __wt_spin_unlock(session, &cache->evict_queues[q].evict_lock); + } + + __wt_spin_unlock(session, &cache->evict_queue_lock); /* * We have disabled further eviction: wait for concurrent LRU eviction @@ -870,6 +879,7 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) __wt_spin_unlock(session, &cache->evict_walk_lock); } +#define APP_EVICT_THRESHOLD 3 /* Threshold to help evict */ /* * __evict_lru_pages -- * Get pages from the LRU queue to evict. @@ -877,7 +887,27 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) static int __evict_lru_pages(WT_SESSION_IMPL *session, bool is_server) { + WT_CACHE *cache; WT_DECL_RET; + uint64_t app_evict_percent, total_evict; + + /* + * The server will not help evict if the workers are coping with + * eviction workload, that is, if fewer than the threshold of the + * pages are evicted by application threads. + */ + if (is_server && S2C(session)->evict_workers > 1) { + cache = S2C(session)->cache; + total_evict = cache->app_evicts + + cache->server_evicts + cache->worker_evicts; + app_evict_percent = (100 * cache->app_evicts) / + (total_evict + 1); + if (app_evict_percent < APP_EVICT_THRESHOLD) { + WT_STAT_FAST_CONN_INCR(session, + cache_eviction_server_not_evicting); + return (0); + } + } /* * Reconcile and discard some pages: EBUSY is returned if a page fails @@ -897,23 +927,26 @@ __evict_lru_walk(WT_SESSION_IMPL *session) { WT_CACHE *cache; WT_DECL_RET; - uint64_t read_gen_oldest; - uint32_t candidates, entries; + WT_EVICT_QUEUE *evict_queue; + uint64_t cutoff, read_gen_oldest; + uint32_t candidates, entries, queue_index; cache = S2C(session)->cache; + queue_index = cache->evict_queue_fill++ % WT_EVICT_QUEUE_MAX; + evict_queue = &cache->evict_queues[queue_index]; /* Get some more pages to consider for eviction. */ - if ((ret = __evict_walk(session)) != 0) + if ((ret = __evict_walk(cache->walk_session, queue_index)) != 0) return (ret == EBUSY ? 0 : ret); /* Sort the list into LRU order and restart. */ - __wt_spin_lock(session, &cache->evict_lock); + __wt_spin_lock(session, &evict_queue->evict_lock); - entries = cache->evict_entries; - qsort(cache->evict_queue, + entries = evict_queue->evict_entries; + qsort(evict_queue->evict_queue, entries, sizeof(WT_EVICT_ENTRY), __evict_lru_cmp); - while (entries > 0 && cache->evict_queue[entries - 1].ref == NULL) + while (entries > 0 && evict_queue->evict_queue[entries - 1].ref == NULL) --entries; /* @@ -922,9 +955,10 @@ __evict_lru_walk(WT_SESSION_IMPL *session) * candidates so we never end up with more candidates than entries. */ while (entries > WT_EVICT_WALK_BASE) - __evict_list_clear(session, &cache->evict_queue[--entries]); + __evict_list_clear(session, + &evict_queue->evict_queue[--entries]); - cache->evict_entries = entries; + evict_queue->evict_entries = entries; if (entries == 0) { /* @@ -932,9 +966,12 @@ __evict_lru_walk(WT_SESSION_IMPL *session) * Make sure application threads don't read past the end of the * candidate list, or they may race with the next walk. */ - cache->evict_candidates = 0; + evict_queue->evict_candidates = 0; + __wt_spin_unlock(session, &evict_queue->evict_lock); + __wt_spin_lock(session, &cache->evict_queue_lock); cache->evict_current = NULL; - __wt_spin_unlock(session, &cache->evict_lock); + cache->evict_current_queue = NULL; + __wt_spin_unlock(session, &cache->evict_queue_lock); return (0); } @@ -945,7 +982,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session) * Take all candidates if we only gathered pages with an oldest * read generation set. */ - cache->evict_candidates = entries; + evict_queue->evict_candidates = entries; } else { /* * Find the oldest read generation we have in the queue, used @@ -955,7 +992,8 @@ __evict_lru_walk(WT_SESSION_IMPL *session) read_gen_oldest = WT_READGEN_OLDEST; for (candidates = 0; candidates < entries; ++candidates) { read_gen_oldest = - cache->evict_queue[candidates].score; + __evict_read_gen( + &evict_queue->evict_queue[candidates]); if (read_gen_oldest != WT_READGEN_OLDEST) break; } @@ -964,68 +1002,58 @@ __evict_lru_walk(WT_SESSION_IMPL *session) * Take all candidates if we only gathered pages with an oldest * read generation set. * - * We normally never take more than 50% of the entries but if - * 50% of the entries were at the oldest read generation, take - * all of them. + * We normally never take more than 50% of the entries; if 50% + * of the entries were at the oldest read generation, take them. */ if (read_gen_oldest == WT_READGEN_OLDEST) - cache->evict_candidates = entries; + evict_queue->evict_candidates = entries; else if (candidates >= entries / 2) - cache->evict_candidates = candidates; + evict_queue->evict_candidates = candidates; else { + /* Save the calculated oldest generation. */ + cache->read_gen_oldest = read_gen_oldest; + + /* Find the bottom 25% of read generations. */ + cutoff = + (3 * read_gen_oldest + __evict_read_gen( + &evict_queue->evict_queue[entries - 1])) / 4; + /* - * Take all of the urgent pages plus a third of - * ordinary candidates (which could be expressed as - * WT_EVICT_WALK_INCR / WT_EVICT_WALK_BASE). In the - * steady state, we want to get as many candidates as - * the eviction walk adds to the queue. - * - * That said, if there is only one entry, which is - * normal when populating an empty file, don't exclude - * it. + * Don't take less than 10% or more than 50% of entries, + * regardless. That said, if there is only one entry, + * which is normal when populating an empty file, don't + * exclude it. */ - cache->evict_candidates = - 1 + candidates + ((entries - candidates) - 1) / 3; - cache->read_gen_oldest = read_gen_oldest; + for (candidates = 1 + entries / 10; + candidates < entries / 2; + candidates++) + if (__evict_read_gen( + &evict_queue->evict_queue[candidates]) > + cutoff) + break; + evict_queue->evict_candidates = candidates; } } - cache->evict_current = cache->evict_queue; - __wt_spin_unlock(session, &cache->evict_lock); - + __wt_spin_unlock(session, &evict_queue->evict_lock); /* - * The eviction server thread doesn't do any actual eviction if there - * are multiple eviction workers running. + * Now we can set the next queue. */ - WT_RET(__wt_cond_signal(session, cache->evict_waiter_cond)); - - return (0); -} - -/* - * __evict_server_work -- - * Evict pages from the cache based on their read generation. - */ -static int -__evict_server_work(WT_SESSION_IMPL *session) -{ - WT_CACHE *cache; - - cache = S2C(session)->cache; + __wt_spin_lock(session, &cache->evict_queue_lock); + if (cache->evict_current == NULL) + WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_empty); + else + WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_not_empty); - if (S2C(session)->evict_workers > 1) { - WT_STAT_FAST_CONN_INCR( - session, cache_eviction_server_not_evicting); + cache->evict_current = evict_queue->evict_queue; + cache->evict_current_queue = evict_queue; + __wt_spin_unlock(session, &cache->evict_queue_lock); - /* - * If there are candidates queued, give other threads a chance - * to access them before gathering more. - */ - if (cache->evict_candidates > 10 && - cache->evict_current != NULL) - __wt_yield(); - } else - WT_RET_NOTFOUND_OK(__evict_lru_pages(session, true)); + /* + * Signal any application or helper threads that may be waiting + * to help with eviction. + */ + WT_RET(__wt_cond_signal(session, cache->evict_waiter_cond)); return (0); } @@ -1035,14 +1063,16 @@ __evict_server_work(WT_SESSION_IMPL *session) * Fill in the array by walking the next set of pages. */ static int -__evict_walk(WT_SESSION_IMPL *session) +__evict_walk(WT_SESSION_IMPL *session, uint32_t queue_index) { WT_BTREE *btree; WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; WT_DECL_RET; - u_int max_entries, prev_slot, retries, slot, start_slot, spins; + WT_EVICT_QUEUE *evict_queue; + u_int max_entries, prev_slot, retries; + u_int slot, start_slot, spins; bool dhandle_locked, incr; conn = S2C(session); @@ -1052,24 +1082,20 @@ __evict_walk(WT_SESSION_IMPL *session) dhandle_locked = incr = false; retries = 0; - if (cache->evict_current == NULL) - WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_empty); - else - WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_not_empty); - /* * Set the starting slot in the queue and the maximum pages added * per walk. */ - start_slot = slot = cache->evict_entries; - max_entries = WT_MIN(slot + WT_EVICT_WALK_INCR, cache->evict_slots); + evict_queue = &cache->evict_queues[queue_index]; + start_slot = slot = evict_queue->evict_entries; + max_entries = slot + WT_EVICT_WALK_INCR; retry: while (slot < max_entries && ret == 0) { /* * If another thread is waiting on the eviction server to clear * the walk point in a tree, give up. */ - if (F_ISSET(cache, WT_CACHE_CLEAR_WALKS)) + if (cache->pass_intr != 0) break; /* @@ -1079,7 +1105,7 @@ retry: while (slot < max_entries && ret == 0) { if (!dhandle_locked) { for (spins = 0; (ret = __wt_spin_trylock( session, &conn->dhandle_lock)) == EBUSY && - !F_ISSET(cache, WT_CACHE_CLEAR_WALKS); + cache->pass_intr == 0; spins++) { if (spins < WT_THOUSAND) __wt_yield(); @@ -1145,6 +1171,7 @@ retry: while (slot < max_entries && ret == 0) { * useful in the past. */ if (btree->evict_walk_period != 0 && + evict_queue->evict_entries >= WT_EVICT_WALK_INCR && btree->evict_walk_skips++ < btree->evict_walk_period) continue; btree->evict_walk_skips = 0; @@ -1171,7 +1198,7 @@ retry: while (slot < max_entries && ret == 0) { cache->evict_file_next = dhandle; WT_WITH_DHANDLE(session, dhandle, ret = __evict_walk_file( - session, max_entries, &slot)); + session, queue_index, &slot)); WT_ASSERT(session, session->split_gen == 0); } __wt_spin_unlock(session, &cache->evict_walk_lock); @@ -1204,17 +1231,17 @@ retry: while (slot < max_entries && ret == 0) { * Try two passes through all the files, give up when we have some * candidates and we aren't finding more. */ - if (!F_ISSET(cache, WT_CACHE_CLEAR_WALKS) && ret == 0 && + if (cache->pass_intr == 0 && ret == 0 && slot < max_entries && (retries < 2 || (retries < 10 && !FLD_ISSET(cache->state, WT_EVICT_PASS_WOULD_BLOCK) && - (slot == cache->evict_entries || slot > start_slot)))) { + (slot == evict_queue->evict_entries || slot > start_slot)))) { start_slot = slot; ++retries; goto retry; } - cache->evict_entries = slot; + evict_queue->evict_entries = slot; return (ret); } @@ -1223,24 +1250,20 @@ retry: while (slot < max_entries && ret == 0) { * Initialize a WT_EVICT_ENTRY structure with a given page. */ static void -__evict_init_candidate( - WT_SESSION_IMPL *session, WT_EVICT_ENTRY *evict, WT_REF *ref) +__evict_init_candidate(WT_SESSION_IMPL *session, + WT_EVICT_QUEUE *evict_queue, WT_EVICT_ENTRY *evict, WT_REF *ref) { - WT_CACHE *cache; u_int slot; - cache = S2C(session)->cache; - /* Keep track of the maximum slot we are using. */ - slot = (u_int)(evict - cache->evict_queue); - if (slot >= cache->evict_max) - cache->evict_max = slot + 1; + slot = (u_int)(evict - evict_queue->evict_queue); + if (slot >= evict_queue->evict_max) + evict_queue->evict_max = slot + 1; if (evict->ref != NULL) __evict_list_clear(session, evict); - evict->btree = S2BT(session); evict->ref = ref; - evict->score = __evict_read_gen(evict); + evict->btree = S2BT(session); /* Mark the page on the list; set last to flush the other updates. */ F_SET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU); @@ -1251,65 +1274,34 @@ __evict_init_candidate( * Get a few page eviction candidates from a single underlying file. */ static int -__evict_walk_file(WT_SESSION_IMPL *session, u_int max_entries, u_int *slotp) +__evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp) { WT_BTREE *btree; WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_EVICT_ENTRY *end, *evict, *start; + WT_EVICT_QUEUE *evict_queue; WT_PAGE *page; WT_PAGE_MODIFY *mod; WT_REF *ref; - uint64_t btree_inuse, bytes_per_slot, cache_inuse; uint64_t pages_seen, refs_walked; - uint32_t remaining_slots, target_pages, total_slots, walk_flags; + uint32_t walk_flags; int internal_pages, restarts; bool enough, modified; conn = S2C(session); btree = S2BT(session); cache = conn->cache; + evict_queue = &cache->evict_queues[queue_index]; internal_pages = restarts = 0; enough = false; - /* - * Figure out how many slots to fill from this tree. - * Note that some care is taken in the calculation to avoid overflow. - */ - start = cache->evict_queue + *slotp; - btree_inuse = __wt_btree_bytes_inuse(session); - cache_inuse = __wt_cache_bytes_inuse(cache); - remaining_slots = max_entries - *slotp; - total_slots = max_entries - cache->evict_entries; - target_pages = (uint32_t)(btree_inuse / - (cache_inuse / total_slots)); - - /* - * The target number of pages for this tree is proportional to the - * space it is taking up in cache. Round to the nearest number of - * slots so we assign all of the slots to a tree filling 99+% of the - * cache (and only have to walk it once). - */ - bytes_per_slot = cache_inuse / total_slots; - target_pages = (uint32_t)( - (btree_inuse + bytes_per_slot / 2) / bytes_per_slot); - if (target_pages == 0) { - /* - * Randomly walk trees with a tiny fraction of the cache in - * case there are so many trees that none of them use enough of - * the cache to be allocated slots. - */ - if (__wt_random(&session->rnd) / (double)UINT32_MAX > - btree_inuse / (double)cache_inuse) - return (0); - target_pages = 10; - } - + start = evict_queue->evict_queue + *slotp; + end = start + WT_EVICT_WALK_PER_FILE; if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || - target_pages > remaining_slots) - target_pages = remaining_slots; - end = start + target_pages; + end > evict_queue->evict_queue + cache->evict_slots) + end = evict_queue->evict_queue + cache->evict_slots; walk_flags = WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT; @@ -1430,7 +1422,7 @@ fast: /* If the page can't be evicted, give up. */ } WT_ASSERT(session, evict->ref == NULL); - __evict_init_candidate(session, evict, ref); + __evict_init_candidate(session, evict_queue, evict, ref); ++evict; if (WT_PAGE_IS_INTERNAL(page)) @@ -1472,6 +1464,43 @@ fast: /* If the page can't be evicted, give up. */ } /* + * __evict_check_entry_size -- + * Check if the size of an entry is too large for this thread to evict. + * We use this so that the server thread doesn't get stalled evicting + * a very large page. + */ +static bool +__evict_check_entry_size(WT_SESSION_IMPL *session, WT_EVICT_ENTRY *entry) +{ + WT_CACHE *cache; + WT_PAGE *page; + WT_REF *ref; + uint64_t max; + + cache = S2C(session)->cache; + + if (cache->pages_evict == 0) + return (true); + + max = (cache->bytes_evict / cache->pages_evict) * 4; + if ((ref = entry->ref) != NULL) { + if ((page = ref->page) == NULL) + return (true); + /* + * If this page is more than four times the average evicted page + * size then return false. Return true in all other cases. + * XXX Should we care here if the page is dirty? Probably... + */ + if (page->memory_footprint > max) { + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_server_toobig); + return (false); + } + } + return (true); +} + +/* * __evict_get_ref -- * Get a page for eviction. */ @@ -1481,6 +1510,7 @@ __evict_get_ref( { WT_CACHE *cache; WT_EVICT_ENTRY *evict; + WT_EVICT_QUEUE *evict_queue; uint32_t candidates; cache = S2C(session)->cache; @@ -1488,39 +1518,63 @@ __evict_get_ref( *refp = NULL; /* - * Avoid the LRU lock if no pages are available. If there are pages - * available, spin until we get the lock. If this function returns - * without getting a page to evict, application threads assume there - * are no more pages available and will attempt to wake the eviction - * server. + * Avoid the LRU lock if no pages are available. */ + WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref); + if (cache->evict_current == NULL) { + WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref_empty); + return (WT_NOTFOUND); + } + __wt_spin_lock(session, &cache->evict_queue_lock); + /* + * Verify there are still pages available. + */ + if (cache->evict_current == NULL) { + __wt_spin_unlock(session, &cache->evict_queue_lock); + WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref_empty2); + return (WT_NOTFOUND); + } + /* + * We got the queue lock, which should be fast, and now we want to + * get the lock on the individual queue. We know that the shared + * queue fields cannot change now. + */ + evict_queue = cache->evict_current_queue; for (;;) { - if (cache->evict_current == NULL) - return (WT_NOTFOUND); - if (__wt_spin_trylock(session, &cache->evict_lock) == 0) + if (__wt_spin_trylock(session, &evict_queue->evict_lock) == 0) break; - if (!F_ISSET(session, WT_SESSION_INTERNAL)) + if (!F_ISSET(session, WT_SESSION_INTERNAL)) { + __wt_spin_unlock(session, &cache->evict_queue_lock); return (WT_NOTFOUND); + } __wt_yield(); } - /* * Only evict half of the pages before looking for more. The remainder * are left to eviction workers (if configured), or application threads * if necessary. */ - candidates = cache->evict_candidates; + candidates = evict_queue->evict_candidates; if (is_server && candidates > 1) candidates /= 2; /* Get the next page queued for eviction. */ - for (evict = cache->evict_current; - evict >= cache->evict_queue && - evict < cache->evict_queue + candidates; - ++evict) { - if (evict->ref == NULL) - continue; + while ((evict = cache->evict_current) != NULL && + evict < evict_queue->evict_queue + candidates && + evict->ref != NULL) { WT_ASSERT(session, evict->btree != NULL); + /* + * If the server is helping out and encounters an entry that + * is too large, it stops helping. Evicting a very large + * page in the server thread could stall eviction from finding + * new work. + */ + if (is_server && S2C(session)->evict_workers > 1 && + !__evict_check_entry_size(session, evict)) + break; + + /* Move to the next item. */ + ++cache->evict_current; /* * Lock the page while holding the eviction mutex to prevent @@ -1551,12 +1605,10 @@ __evict_get_ref( } /* Clear the current pointer if there are no more candidates. */ - if (evict == NULL || evict + 1 >= - cache->evict_queue + cache->evict_candidates) + if (evict >= evict_queue->evict_queue + evict_queue->evict_candidates) cache->evict_current = NULL; - else - cache->evict_current = evict + 1; - __wt_spin_unlock(session, &cache->evict_lock); + __wt_spin_unlock(session, &evict_queue->evict_lock); + __wt_spin_unlock(session, &cache->evict_queue_lock); return ((*refp == NULL) ? WT_NOTFOUND : 0); } @@ -1569,27 +1621,34 @@ static int __evict_page(WT_SESSION_IMPL *session, bool is_server) { WT_BTREE *btree; + WT_CACHE *cache; WT_DECL_RET; WT_REF *ref; WT_RET(__evict_get_ref(session, is_server, &btree, &ref)); WT_ASSERT(session, ref->state == WT_REF_LOCKED); + cache = S2C(session)->cache; /* * An internal session flags either the server itself or an eviction * worker thread. */ - if (is_server) - WT_STAT_FAST_CONN_INCR( - session, cache_eviction_server_evicting); - else if (F_ISSET(session, WT_SESSION_INTERNAL)) - WT_STAT_FAST_CONN_INCR( - session, cache_eviction_worker_evicting); - else { + if (F_ISSET(session, WT_SESSION_INTERNAL)) { + if (is_server) { + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_server_evicting); + cache->server_evicts++; + } else { + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_worker_evicting); + cache->worker_evicts++; + } + } else { if (__wt_page_is_modified(ref->page)) WT_STAT_FAST_CONN_INCR( session, cache_eviction_app_dirty); WT_STAT_FAST_CONN_INCR(session, cache_eviction_app); + cache->app_evicts++; } /* @@ -1685,7 +1744,6 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) /* Evict a page. */ switch (ret = __evict_page(session, false)) { case 0: - cache->app_evicts++; if (txn_busy) return (0); /* FALLTHROUGH */ @@ -1738,9 +1796,9 @@ __wt_evict_priority_clear(WT_SESSION_IMPL *session) int __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) { + FILE *fp; WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle, *saved_dhandle; - WT_FH *fh; WT_PAGE *page; WT_REF *next_walk; uint64_t dirty_bytes, dirty_pages, intl_bytes, intl_pages; @@ -1752,13 +1810,12 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) total_bytes = 0; if (ofile == NULL) - fh = WT_STDERR(session); - else - WT_RET(__wt_open(session, ofile, WT_FILE_TYPE_REGULAR, - WT_OPEN_CREATE | WT_STREAM_WRITE, &fh)); + fp = stderr; + else if ((fp = fopen(ofile, "w")) == NULL) + return (EIO); /* Note: odd string concatenation avoids spelling errors. */ - (void)__wt_fprintf(session, fh, "==========\n" "cache dump\n"); + (void)fprintf(fp, "==========\n" "cache dump\n"); saved_dhandle = session->dhandle; TAILQ_FOREACH(dhandle, &conn->dhqh, q) { @@ -1797,24 +1854,22 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) session->dhandle = NULL; if (dhandle->checkpoint == NULL) - (void)__wt_fprintf(session, fh, - "%s(<live>): \n", dhandle->name); + (void)fprintf(fp, "%s(<live>): \n", dhandle->name); else - (void)__wt_fprintf(session, fh, - "%s(checkpoint=%s): \n", + (void)fprintf(fp, "%s(checkpoint=%s): \n", dhandle->name, dhandle->checkpoint); if (intl_pages != 0) - (void)__wt_fprintf(session, fh, + (void)fprintf(fp, "\t" "internal pages: %" PRIu64 " pages, %" PRIu64 " max, %" PRIu64 "MB total\n", intl_pages, max_intl_bytes, intl_bytes >> 20); if (leaf_pages != 0) - (void)__wt_fprintf(session, fh, + (void)fprintf(fp, "\t" "leaf pages: %" PRIu64 " pages, %" PRIu64 " max, %" PRIu64 "MB total\n", leaf_pages, max_leaf_bytes, leaf_bytes >> 20); if (dirty_pages != 0) - (void)__wt_fprintf(session, fh, + (void)fprintf(fp, "\t" "dirty pages: %" PRIu64 " pages, %" PRIu64 " max, %" PRIu64 "MB total\n", dirty_pages, max_dirty_bytes, dirty_bytes >> 20); @@ -1830,13 +1885,13 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) if (conn->cache->overhead_pct != 0) total_bytes += (total_bytes * (uint64_t)conn->cache->overhead_pct) / 100; - (void)__wt_fprintf(session, fh, + (void)fprintf(fp, "cache dump: total found = %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB\n", total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20); - (void)__wt_fprintf(session, fh, "==========\n"); - if (ofile != NULL) - WT_RET(__wt_close(session, &fh)); + (void)fprintf(fp, "==========\n"); + if (ofile != NULL && fclose(fp) != 0) + return (EIO); return (0); } #endif diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h index c6a5af40698..50b2eab83b8 100644 --- a/src/third_party/wiredtiger/src/include/api.h +++ b/src/third_party/wiredtiger/src/include/api.h @@ -118,7 +118,7 @@ #define JOINABLE_CURSOR_CALL_CHECK(cur) \ if (F_ISSET(cur, WT_CURSTD_JOINED)) \ - WT_ERR(__wt_curindex_joined(cur)) + WT_ERR(__wt_curjoin_joined(cur)) #define JOINABLE_CURSOR_API_CALL(cur, s, n, bt) \ CURSOR_API_CALL(cur, s, n, bt); \ diff --git a/src/third_party/wiredtiger/src/include/bitstring.i b/src/third_party/wiredtiger/src/include/bitstring.i index 0d30e55d1ef..08746beb9b9 100644 --- a/src/third_party/wiredtiger/src/include/bitstring.i +++ b/src/third_party/wiredtiger/src/include/bitstring.i @@ -261,10 +261,10 @@ __bit_getv(uint8_t *bitf, uint64_t entry, uint8_t width) * Return a record number's bit-field value. */ static inline uint8_t -__bit_getv_recno(WT_PAGE *page, uint64_t recno, uint8_t width) +__bit_getv_recno(WT_REF *ref, uint64_t recno, uint8_t width) { return (__bit_getv( - page->pg_fix_bitf, recno - page->pg_fix_recno, width)); + ref->page->pg_fix_bitf, recno - ref->ref_recno, width)); } /* @@ -305,13 +305,3 @@ __bit_setv(uint8_t *bitf, uint64_t entry, uint8_t width, uint8_t value) __BIT_SET(1, 0x01); } } - -/* - * __bit_setv_recno -- - * Set a record number's bit-field value. - */ -static inline void -__bit_setv_recno(WT_PAGE *page, uint64_t recno, uint8_t width, uint8_t value) -{ - __bit_setv(page->pg_fix_bitf, recno - page->pg_fix_recno, width, value); -} diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h index e964fb4e8c2..a8080c1651c 100644 --- a/src/third_party/wiredtiger/src/include/block.h +++ b/src/third_party/wiredtiger/src/include/block.h @@ -174,6 +174,7 @@ struct __wt_bm { int (*compact_start)(WT_BM *, WT_SESSION_IMPL *); int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *); + int (*map_discard)(WT_BM *, WT_SESSION_IMPL *, void *, size_t); int (*preload)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); int (*read) (WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, size_t); @@ -196,9 +197,9 @@ struct __wt_bm { WT_BLOCK *block; /* Underlying file */ - void *map; /* Mapped region */ - size_t maplen; - void *mappingcookie; + void *map; /* Mapped region */ + size_t maplen; + void *mapped_cookie; /* * There's only a single block manager handle that can be written, all @@ -224,8 +225,6 @@ struct __wt_block { wt_off_t size; /* File size */ wt_off_t extend_size; /* File extended size */ wt_off_t extend_len; /* File extend chunk size */ - bool nowait_sync_available; /* File can flush asynchronously */ - bool preload_available; /* File pages can be preloaded */ /* Configuration information, set when the file is opened. */ uint32_t allocfirst; /* Allocation is first-fit */ @@ -262,6 +261,7 @@ struct __wt_block { /* Verification support */ bool verify; /* If performing verification */ + bool verify_layout; /* Print out file layout information */ bool verify_strict; /* Fail hard on any error */ wt_off_t verify_size; /* Checkpoint's file size */ WT_EXTLIST verify_alloc; /* Verification allocation list */ diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 7cdf2bef43a..9700b6f4761 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -251,6 +251,7 @@ struct __wt_page_modify { */ union { WT_ADDR replace; /* Single, written replacement block */ +#undef mod_replace #define mod_replace u1.replace struct { /* Multiple replacement blocks */ @@ -295,7 +296,9 @@ struct __wt_page_modify { } *multi; uint32_t multi_entries; /* Multiple blocks element count */ } m; +#undef mod_multi #define mod_multi u1.m.multi +#undef mod_multi_entries #define mod_multi_entries u1.m.multi_entries } u1; @@ -318,6 +321,7 @@ struct __wt_page_modify { */ WT_PAGE *root_split; /* Linked list of root split pages */ } intl; +#undef mod_root_split #define mod_root_split u2.intl.root_split struct { /* @@ -344,10 +348,24 @@ struct __wt_page_modify { * write any implicitly created deleted records for the page. */ uint64_t split_recno; - } leaf; -#define mod_append u2.leaf.append -#define mod_update u2.leaf.update -#define mod_split_recno u2.leaf.split_recno + } column_leaf; +#undef mod_col_append +#define mod_col_append u2.column_leaf.append +#undef mod_col_update +#define mod_col_update u2.column_leaf.update +#undef mod_col_split_recno +#define mod_col_split_recno u2.column_leaf.split_recno + struct { + /* Inserted items for row-store. */ + WT_INSERT_HEAD **insert; + + /* Updated items for row-stores. */ + WT_UPDATE **update; + } row_leaf; +#undef mod_row_insert +#define mod_row_insert u2.row_leaf.insert +#undef mod_row_update +#define mod_row_update u2.row_leaf.update } u2; /* @@ -433,7 +451,6 @@ struct __wt_page { * doesn't read it multiple times). */ struct { - uint64_t recno; /* Starting recno */ WT_REF *parent_ref; /* Parent reference */ struct __wt_page_index { @@ -442,8 +459,7 @@ struct __wt_page { WT_REF **index; } * volatile __index; /* Collated children */ } intl; -#undef pg_intl_recno -#define pg_intl_recno u.intl.recno +#undef pg_intl_parent_ref #define pg_intl_parent_ref u.intl.parent_ref /* @@ -482,40 +498,19 @@ struct __wt_page { /* Row-store leaf page. */ struct { - /* - * The column-store leaf page modification structures - * live in the WT_PAGE_MODIFY structure to keep the - * WT_PAGE structure as small as possible for read-only - * pages. For consistency, we could move the row-store - * modification structures into WT_PAGE_MODIFY too, but - * that doesn't shrink WT_PAGE any further and it would - * require really ugly naming inside of WT_PAGE_MODIFY - * to avoid growing that structure. - */ - WT_INSERT_HEAD **ins; /* Inserts */ - WT_UPDATE **upd; /* Updates */ - WT_ROW *d; /* Key/value pairs */ uint32_t entries; /* Entries */ } row; #undef pg_row_d #define pg_row_d u.row.d -#undef pg_row_ins -#define pg_row_ins u.row.ins -#undef pg_row_upd -#define pg_row_upd u.row.upd #undef pg_row_entries #define pg_row_entries u.row.entries /* Fixed-length column-store leaf page. */ struct { - uint64_t recno; /* Starting recno */ - uint8_t *bitf; /* Values */ uint32_t entries; /* Entries */ } col_fix; -#undef pg_fix_recno -#define pg_fix_recno u.col_fix.recno #undef pg_fix_bitf #define pg_fix_bitf u.col_fix.bitf #undef pg_fix_entries @@ -523,8 +518,6 @@ struct __wt_page { /* Variable-length column-store leaf page. */ struct { - uint64_t recno; /* Starting recno */ - WT_COL *d; /* Values */ /* @@ -537,8 +530,6 @@ struct __wt_page { uint32_t entries; /* Entries */ } col_var; -#undef pg_var_recno -#define pg_var_recno u.col_var.recno #undef pg_var_d #define pg_var_d u.col_var.d #undef pg_var_repeats @@ -732,6 +723,10 @@ struct __wt_ref { uint64_t recno; /* Column-store: starting recno */ void *ikey; /* Row-store: key */ } key; +#undef ref_recno +#define ref_recno key.recno +#undef ref_ikey +#define ref_ikey key.ikey WT_PAGE_DELETED *page_del; /* Deleted on-disk page information */ }; @@ -1007,12 +1002,15 @@ struct __wt_insert_head { * of pointers and the specific structure exist, else NULL. */ #define WT_ROW_INSERT_SLOT(page, slot) \ - ((page)->pg_row_ins == NULL ? NULL : (page)->pg_row_ins[slot]) + ((page)->modify == NULL || \ + (page)->modify->mod_row_insert == NULL ? \ + NULL : (page)->modify->mod_row_insert[slot]) #define WT_ROW_INSERT(page, ip) \ WT_ROW_INSERT_SLOT(page, WT_ROW_SLOT(page, ip)) #define WT_ROW_UPDATE(page, ip) \ - ((page)->pg_row_upd == NULL ? \ - NULL : (page)->pg_row_upd[WT_ROW_SLOT(page, ip)]) + ((page)->modify == NULL || \ + (page)->modify->mod_row_update == NULL ? \ + NULL : (page)->modify->mod_row_update[WT_ROW_SLOT(page, ip)]) /* * WT_ROW_INSERT_SMALLEST references an additional slot past the end of the * the "one per WT_ROW slot" insert array. That's because the insert array @@ -1020,8 +1018,9 @@ struct __wt_insert_head { * original page. */ #define WT_ROW_INSERT_SMALLEST(page) \ - ((page)->pg_row_ins == NULL ? \ - NULL : (page)->pg_row_ins[(page)->pg_row_entries]) + ((page)->modify == NULL || \ + (page)->modify->mod_row_insert == NULL ? \ + NULL : (page)->modify->mod_row_insert[(page)->pg_row_entries]) /* * The column-store leaf page update lists are arrays of pointers to structures, @@ -1029,8 +1028,9 @@ struct __wt_insert_head { * of pointers and the specific structure exist, else NULL. */ #define WT_COL_UPDATE_SLOT(page, slot) \ - ((page)->modify == NULL || (page)->modify->mod_update == NULL ? \ - NULL : (page)->modify->mod_update[slot]) + ((page)->modify == NULL || \ + (page)->modify->mod_col_update == NULL ? \ + NULL : (page)->modify->mod_col_update[slot]) #define WT_COL_UPDATE(page, ip) \ WT_COL_UPDATE_SLOT(page, WT_COL_SLOT(page, ip)) @@ -1046,8 +1046,9 @@ struct __wt_insert_head { * appends. */ #define WT_COL_APPEND(page) \ - ((page)->modify != NULL && (page)->modify->mod_append != NULL ? \ - (page)->modify->mod_append[0] : NULL) + ((page)->modify == NULL || \ + (page)->modify->mod_col_append == NULL ? \ + NULL : (page)->modify->mod_col_append[0]) /* WT_FIX_FOREACH walks fixed-length bit-fields on a disk page. */ #define WT_FIX_FOREACH(btree, dsk, v, i) \ diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h index 96097115afd..fd921677751 100644 --- a/src/third_party/wiredtiger/src/include/btree.h +++ b/src/third_party/wiredtiger/src/include/btree.h @@ -129,8 +129,6 @@ struct __wt_btree { uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ uint64_t write_gen; /* Write generation */ - uint64_t bytes_inmem; /* Cache bytes in memory. */ - WT_REF *evict_ref; /* Eviction thread's location */ uint64_t evict_priority; /* Relative priority of cached pages */ u_int evict_walk_period; /* Skip this many LRU walks */ diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 03f27861e75..e0102a11511 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -55,27 +55,6 @@ __wt_btree_block_free( } /* - * __wt_btree_bytes_inuse -- - * Return the number of bytes in use. - */ -static inline uint64_t -__wt_btree_bytes_inuse(WT_SESSION_IMPL *session) -{ - WT_CACHE *cache; - uint64_t bytes_inuse; - - cache = S2C(session)->cache; - - /* Adjust the cache size to take allocation overhead into account. */ - bytes_inuse = S2BT(session)->bytes_inmem; - if (cache->overhead_pct != 0) - bytes_inuse += - (bytes_inuse * (uint64_t)cache->overhead_pct) / 100; - - return (bytes_inuse); -} - -/* * __wt_cache_page_inmem_incr -- * Increment a page's memory footprint in the cache. */ @@ -87,7 +66,6 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) WT_ASSERT(session, size < WT_EXABYTE); cache = S2C(session)->cache; - (void)__wt_atomic_add64(&S2BT(session)->bytes_inmem, size); (void)__wt_atomic_add64(&cache->bytes_inmem, size); (void)__wt_atomic_addsize(&page->memory_footprint, size); if (__wt_page_is_modified(page)) { @@ -218,8 +196,6 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) WT_ASSERT(session, size < WT_EXABYTE); __wt_cache_decr_check_uint64( - session, &S2BT(session)->bytes_inmem, size, "WT_BTREE.bytes_inmem"); - __wt_cache_decr_check_uint64( session, &cache->bytes_inmem, size, "WT_CACHE.bytes_inmem"); __wt_cache_decr_check_size( session, &page->memory_footprint, size, "WT_PAGE.memory_footprint"); @@ -298,9 +274,8 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) modify = page->modify; /* Update the bytes in-memory to reflect the eviction. */ - __wt_cache_decr_check_uint64(session, &S2BT(session)->bytes_inmem, - page->memory_footprint, "WT_BTREE.bytes_inmem"); - __wt_cache_decr_check_uint64(session, &cache->bytes_inmem, + __wt_cache_decr_check_uint64(session, + &cache->bytes_inmem, page->memory_footprint, "WT_CACHE.bytes_inmem"); /* Update the bytes_internal value to reflect the eviction */ @@ -536,8 +511,8 @@ __wt_ref_key(WT_PAGE *page, WT_REF *ref, void *keyp, size_t *sizep) /* * An internal page key is in one of two places: if we instantiated the - * key (for example, when reading the page), WT_REF.key.ikey references - * a WT_IKEY structure, otherwise WT_REF.key.ikey references an on-page + * key (for example, when reading the page), WT_REF.ref_ikey references + * a WT_IKEY structure, otherwise WT_REF.ref_ikey references an on-page * key offset/length pair. * * Now the magic: allocated memory must be aligned to store any standard @@ -561,14 +536,14 @@ __wt_ref_key(WT_PAGE *page, WT_REF *ref, void *keyp, size_t *sizep) #define WT_IK_DECODE_KEY_LEN(v) ((v) >> 32) #define WT_IK_ENCODE_KEY_OFFSET(v) ((uintptr_t)(v) << 1) #define WT_IK_DECODE_KEY_OFFSET(v) (((v) & 0xFFFFFFFF) >> 1) - v = (uintptr_t)ref->key.ikey; + v = (uintptr_t)ref->ref_ikey; if (v & WT_IK_FLAG) { *(void **)keyp = WT_PAGE_REF_OFFSET(page, WT_IK_DECODE_KEY_OFFSET(v)); *sizep = WT_IK_DECODE_KEY_LEN(v); } else { - *(void **)keyp = WT_IKEY_DATA(ref->key.ikey); - *sizep = ((WT_IKEY *)ref->key.ikey)->size; + *(void **)keyp = WT_IKEY_DATA(ref->ref_ikey); + *sizep = ((WT_IKEY *)ref->ref_ikey)->size; } } @@ -587,7 +562,7 @@ __wt_ref_key_onpage_set(WT_PAGE *page, WT_REF *ref, WT_CELL_UNPACK *unpack) v = WT_IK_ENCODE_KEY_LEN(unpack->size) | WT_IK_ENCODE_KEY_OFFSET(WT_PAGE_DISK_OFFSET(page, unpack->data)) | WT_IK_FLAG; - ref->key.ikey = (void *)v; + ref->ref_ikey = (void *)v; } /* @@ -602,8 +577,8 @@ __wt_ref_key_instantiated(WT_REF *ref) /* * See the comment in __wt_ref_key for an explanation of the magic. */ - v = (uintptr_t)ref->key.ikey; - return (v & WT_IK_FLAG ? NULL : ref->key.ikey); + v = (uintptr_t)ref->ref_ikey; + return (v & WT_IK_FLAG ? NULL : ref->ref_ikey); } /* @@ -616,10 +591,10 @@ __wt_ref_key_clear(WT_REF *ref) /* * The key union has 2 8B fields; this is equivalent to: * - * ref->key.recno = WT_RECNO_OOB; - * ref->key.ikey = NULL; + * ref->ref_recno = WT_RECNO_OOB; + * ref->ref_ikey = NULL; */ - ref->key.recno = 0; + ref->ref_recno = 0; } /* @@ -1385,7 +1360,7 @@ __wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page) WT_CONNECTION_IMPL *conn; WT_HAZARD *hp; WT_SESSION_IMPL *s; - uint32_t i, hazard_size, session_cnt; + uint32_t i, j, hazard_size, max, session_cnt; conn = S2C(session); @@ -1397,15 +1372,28 @@ __wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page) * come or go, we'll check the slots for all of the sessions that could * have been active when we started our check. */ + WT_STAT_FAST_CONN_INCR(session, cache_hazard_checks); WT_ORDERED_READ(session_cnt, conn->session_cnt); - for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) { + for (s = conn->sessions, i = 0, j = 0, max = 0; + i < session_cnt; ++s, ++i) { if (!s->active) continue; WT_ORDERED_READ(hazard_size, s->hazard_size); - for (hp = s->hazard; hp < s->hazard + hazard_size; ++hp) - if (hp->page == page) + if (s->hazard_size > max) { + max = s->hazard_size; + WT_STAT_FAST_CONN_SET(session, + cache_hazard_max, max); + } + for (hp = s->hazard; hp < s->hazard + hazard_size; ++hp) { + ++j; + if (hp->page == page) { + WT_STAT_FAST_CONN_INCRV(session, + cache_hazard_walks, j); return (hp); + } + } } + WT_STAT_FAST_CONN_INCRV(session, cache_hazard_walks, j); return (NULL); } diff --git a/src/third_party/wiredtiger/src/include/btree_cmp.i b/src/third_party/wiredtiger/src/include/btree_cmp.i index 1993c1be293..23a462e4e50 100644 --- a/src/third_party/wiredtiger/src/include/btree_cmp.i +++ b/src/third_party/wiredtiger/src/include/btree_cmp.i @@ -52,8 +52,8 @@ __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item) for (; len > 0; len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) { - u = _mm_load_si128((__m128i *)userp); - t = _mm_load_si128((__m128i *)treep); + u = _mm_load_si128((const __m128i *)userp); + t = _mm_load_si128((const __m128i *)treep); res_eq = _mm_cmpeq_epi8(u, t); if (_mm_movemask_epi8(res_eq) != 65535) break; @@ -62,8 +62,8 @@ __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item) for (; len > 0; len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) { - u = _mm_loadu_si128((__m128i *)userp); - t = _mm_loadu_si128((__m128i *)treep); + u = _mm_loadu_si128((const __m128i *)userp); + t = _mm_loadu_si128((const __m128i *)treep); res_eq = _mm_cmpeq_epi8(u, t); if (_mm_movemask_epi8(res_eq) != 65535) break; @@ -123,8 +123,8 @@ __wt_lex_compare_skip( tsz = tree_item->size; len = WT_MIN(usz, tsz) - *matchp; - userp = (uint8_t *)user_item->data + *matchp; - treep = (uint8_t *)tree_item->data + *matchp; + userp = (const uint8_t *)user_item->data + *matchp; + treep = (const uint8_t *)tree_item->data + *matchp; #ifdef HAVE_X86INTRIN_H /* Use vector instructions if we'll execute at least 2 of them. */ @@ -139,8 +139,8 @@ __wt_lex_compare_skip( len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE, *matchp += WT_VECTOR_SIZE) { - u = _mm_load_si128((__m128i *)userp); - t = _mm_load_si128((__m128i *)treep); + u = _mm_load_si128((const __m128i *)userp); + t = _mm_load_si128((const __m128i *)treep); res_eq = _mm_cmpeq_epi8(u, t); if (_mm_movemask_epi8(res_eq) != 65535) break; @@ -150,8 +150,8 @@ __wt_lex_compare_skip( len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE, *matchp += WT_VECTOR_SIZE) { - u = _mm_loadu_si128((__m128i *)userp); - t = _mm_loadu_si128((__m128i *)treep); + u = _mm_loadu_si128((const __m128i *)userp); + t = _mm_loadu_si128((const __m128i *)treep); res_eq = _mm_cmpeq_epi8(u, t); if (_mm_movemask_epi8(res_eq) != 65535) break; diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index f683ed6b0f8..f4a35de7201 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -13,6 +13,7 @@ #define WT_EVICT_INT_SKEW (1<<20) /* Prefer leaf pages over internal pages by this many increments of the read generation. */ +#define WT_EVICT_WALK_PER_FILE 10 /* Pages to queue per file */ #define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */ #define WT_EVICT_WALK_INCR 100 /* Pages added each walk */ @@ -23,7 +24,19 @@ struct __wt_evict_entry { WT_BTREE *btree; /* Enclosing btree object */ WT_REF *ref; /* Page to flush/evict */ - uint64_t score; /* Relative eviction priority */ +}; + +#define WT_EVICT_QUEUE_MAX 2 +/* + * WT_EVICT_QUEUE -- + * Encapsulation of an eviction candidate queue. + */ +struct __wt_evict_queue { + WT_SPINLOCK evict_lock; /* Eviction LRU queue */ + WT_EVICT_ENTRY *evict_queue; /* LRU pages being tracked */ + uint32_t evict_candidates; /* LRU list pages to evict */ + uint32_t evict_entries; /* LRU entries in the queue */ + volatile uint32_t evict_max; /* LRU maximum eviction slot used */ }; /* @@ -63,14 +76,20 @@ struct __wt_cache { uint64_t bytes_overflow; /* Bytes of overflow pages */ uint64_t bytes_evict; /* Bytes/pages discarded by eviction */ uint64_t pages_evict; + uint64_t pages_evicted; /* Pages evicted during a pass */ uint64_t bytes_dirty; /* Bytes/pages currently dirty */ uint64_t pages_dirty; uint64_t bytes_read; /* Bytes read into memory */ - uint64_t app_evicts; /* Pages evicted by user threads */ uint64_t app_waits; /* User threads waited for cache */ + uint64_t app_evicts; /* Pages evicted by user threads */ + uint64_t server_evicts; /* Pages evicted by server thread */ + uint64_t worker_evicts; /* Pages evicted by worker threads */ uint64_t evict_max_page_size; /* Largest page seen at eviction */ +#ifdef HAVE_DIAGNOSTIC + struct timespec stuck_ts; /* Stuck timestamp */ +#endif /* * Read information. @@ -83,7 +102,6 @@ struct __wt_cache { * Eviction thread information. */ WT_CONDVAR *evict_cond; /* Eviction server condition */ - WT_SPINLOCK evict_lock; /* Eviction LRU queue */ WT_SPINLOCK evict_walk_lock; /* Eviction walk location */ /* Condition signalled when the eviction server populates the queue */ WT_CONDVAR *evict_waiter_cond; @@ -98,11 +116,13 @@ struct __wt_cache { /* * LRU eviction list information. */ - WT_EVICT_ENTRY *evict_queue; /* LRU pages being tracked */ + WT_SPINLOCK evict_pass_lock; /* Eviction pass lock */ + WT_SESSION_IMPL *walk_session; /* Eviction pass session */ + WT_SPINLOCK evict_queue_lock; /* Eviction current queue lock */ + WT_EVICT_QUEUE evict_queues[WT_EVICT_QUEUE_MAX]; + WT_EVICT_QUEUE *evict_current_queue;/* LRU current queue in use */ WT_EVICT_ENTRY *evict_current; /* LRU current page to be evicted */ - uint32_t evict_candidates; /* LRU list pages to evict */ - uint32_t evict_entries; /* LRU entries in the queue */ - volatile uint32_t evict_max; /* LRU maximum eviction slot used */ + uint32_t evict_queue_fill; /* LRU eviction queue index to fill */ uint32_t evict_slots; /* LRU list eviction slots */ WT_DATA_HANDLE *evict_file_next; /* LRU next file to search */ @@ -130,19 +150,28 @@ struct __wt_cache { #define WT_EVICT_PASS_DIRTY 0x04 #define WT_EVICT_PASS_WOULD_BLOCK 0x08 uint32_t state; + /* + * Pass interrupt counter. + */ + uint32_t pass_intr; /* Interrupt eviction pass. */ /* * Flags. */ #define WT_CACHE_POOL_MANAGER 0x01 /* The active cache pool manager */ #define WT_CACHE_POOL_RUN 0x02 /* Cache pool thread running */ -#define WT_CACHE_CLEAR_WALKS 0x04 /* Clear eviction walks */ -#define WT_CACHE_STUCK 0x08 /* Eviction server is stuck */ -#define WT_CACHE_WALK_REVERSE 0x10 /* Scan backwards for candidates */ -#define WT_CACHE_WOULD_BLOCK 0x20 /* Pages that would block apps */ +#define WT_CACHE_STUCK 0x04 /* Eviction server is stuck */ +#define WT_CACHE_WALK_REVERSE 0x08 /* Scan backwards for candidates */ +#define WT_CACHE_WOULD_BLOCK 0x10 /* Pages that would block apps */ uint32_t flags; }; +#define WT_WITH_PASS_LOCK(session, ret, op) do { \ + WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_PASS)); \ + WT_WITH_LOCK(session, ret, \ + &cache->evict_pass_lock, WT_SESSION_LOCKED_PASS, op); \ +} while (0) + /* * WT_CACHE_POOL -- * A structure that represents a shared cache. diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i index 8cf7555e716..72c8307756d 100644 --- a/src/third_party/wiredtiger/src/include/cache.i +++ b/src/third_party/wiredtiger/src/include/cache.i @@ -166,6 +166,13 @@ __wt_eviction_needed(WT_SESSION_IMPL *session, u_int *pct_fullp) cache = conn->cache; /* + * If the connection is closing we do not need eviction from an + * application thread. The eviction subsystem is already closed. + */ + if (F_ISSET(conn, WT_CONN_CLOSING)) + return (false); + + /* * Avoid division by zero if the cache size has not yet been set in a * shared cache. */ @@ -179,6 +186,15 @@ __wt_eviction_needed(WT_SESSION_IMPL *session, u_int *pct_fullp) pct_full = (u_int)((100 * bytes_inuse) / bytes_max); if (pct_fullp != NULL) *pct_fullp = pct_full; + /* + * If the connection is closing we do not need eviction from an + * application thread. The eviction subsystem is already closed. + * We return here because some callers depend on the percent full + * having been filled in. + */ + if (F_ISSET(conn, WT_CONN_CLOSING)) + return (false); + if (pct_full > cache->eviction_trigger) return (true); diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i index 481d2a29764..c130768e595 100644 --- a/src/third_party/wiredtiger/src/include/cell.i +++ b/src/third_party/wiredtiger/src/include/cell.i @@ -183,9 +183,9 @@ __wt_cell_pack_addr(WT_CELL *cell, u_int cell_type, uint64_t recno, size_t size) p = cell->__chunk + 1; if (recno == WT_RECNO_OOB) - cell->__chunk[0] = cell_type; /* Type */ + cell->__chunk[0] = (uint8_t)cell_type; /* Type */ else { - cell->__chunk[0] = cell_type | WT_CELL_64V; + cell->__chunk[0] = (uint8_t)(cell_type | WT_CELL_64V); (void)__wt_vpack_uint(&p, 0, recno); /* Record number */ } (void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */ @@ -207,8 +207,8 @@ __wt_cell_pack_data(WT_CELL *cell, uint64_t rle, size_t size) */ if (rle < 2 && size <= WT_CELL_SHORT_MAX) { byte = (uint8_t)size; /* Type + length */ - cell->__chunk[0] = - (byte << WT_CELL_SHORT_SHIFT) | WT_CELL_VALUE_SHORT; + cell->__chunk[0] = (uint8_t) + ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_VALUE_SHORT); return (1); } @@ -331,8 +331,8 @@ __wt_cell_pack_int_key(WT_CELL *cell, size_t size) /* Short keys have 6 bits of data length in the descriptor byte. */ if (size <= WT_CELL_SHORT_MAX) { byte = (uint8_t)size; - cell->__chunk[0] = - (byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT; + cell->__chunk[0] = (uint8_t) + ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT); return (1); } @@ -358,14 +358,14 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size) if (size <= WT_CELL_SHORT_MAX) { if (prefix == 0) { byte = (uint8_t)size; /* Type + length */ - cell->__chunk[0] = - (byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT; + cell->__chunk[0] = (uint8_t) + ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT); return (1); } else { byte = (uint8_t)size; /* Type + length */ - cell->__chunk[0] = - (byte << WT_CELL_SHORT_SHIFT) | - WT_CELL_KEY_SHORT_PFX; + cell->__chunk[0] = (uint8_t) + ((byte << WT_CELL_SHORT_SHIFT) | + WT_CELL_KEY_SHORT_PFX); cell->__chunk[1] = prefix; /* Prefix */ return (2); } @@ -585,8 +585,8 @@ restart: WT_CELL_LEN_CHK(cell, 0); unpack->cell = cell; unpack->v = 0; - unpack->raw = __wt_cell_type_raw(cell); - unpack->type = __wt_cell_type(cell); + unpack->raw = (uint8_t)__wt_cell_type_raw(cell); + unpack->type = (uint8_t)__wt_cell_type(cell); unpack->ovfl = 0; /* diff --git a/src/third_party/wiredtiger/src/include/column.i b/src/third_party/wiredtiger/src/include/column.i index d64e68420a5..d15f874b281 100644 --- a/src/third_party/wiredtiger/src/include/column.i +++ b/src/third_party/wiredtiger/src/include/column.i @@ -209,9 +209,12 @@ __col_insert_search(WT_INSERT_HEAD *ins_head, * Return the last record number for a variable-length column-store page. */ static inline uint64_t -__col_var_last_recno(WT_PAGE *page) +__col_var_last_recno(WT_REF *ref) { WT_COL_RLE *repeat; + WT_PAGE *page; + + page = ref->page; /* * If there's an append list, there may be more records on the page. @@ -220,7 +223,7 @@ __col_var_last_recno(WT_PAGE *page) */ if (page->pg_var_nrepeats == 0) return (page->pg_var_entries == 0 ? 0 : - page->pg_var_recno + (page->pg_var_entries - 1)); + ref->ref_recno + (page->pg_var_entries - 1)); repeat = &page->pg_var_repeats[page->pg_var_nrepeats - 1]; return ((repeat->recno + repeat->rle) - 1 + @@ -232,15 +235,19 @@ __col_var_last_recno(WT_PAGE *page) * Return the last record number for a fixed-length column-store page. */ static inline uint64_t -__col_fix_last_recno(WT_PAGE *page) +__col_fix_last_recno(WT_REF *ref) { + WT_PAGE *page; + + page = ref->page; + /* * If there's an append list, there may be more records on the page. * This function ignores those records, our callers must handle that * explicitly, if they care. */ - return (page->pg_fix_entries == 0 ? 0 : - page->pg_fix_recno + (page->pg_fix_entries - 1)); + return (page->pg_fix_entries == 0 ? + 0 : ref->ref_recno + (page->pg_fix_entries - 1)); } /* @@ -248,12 +255,15 @@ __col_fix_last_recno(WT_PAGE *page) * Search a variable-length column-store page for a record. */ static inline WT_COL * -__col_var_search(WT_PAGE *page, uint64_t recno, uint64_t *start_recnop) +__col_var_search(WT_REF *ref, uint64_t recno, uint64_t *start_recnop) { WT_COL_RLE *repeat; + WT_PAGE *page; uint64_t start_recno; uint32_t base, indx, limit, start_indx; + page = ref->page; + /* * Find the matching slot. * @@ -285,7 +295,7 @@ __col_var_search(WT_PAGE *page, uint64_t recno, uint64_t *start_recnop) */ if (base == 0) { start_indx = 0; - start_recno = page->pg_var_recno; + start_recno = ref->ref_recno; } else { repeat = page->pg_var_repeats + (base - 1); start_indx = repeat->indx + 1; diff --git a/src/third_party/wiredtiger/src/include/config.h b/src/third_party/wiredtiger/src/include/config.h index 48a255134af..486aa50e86c 100644 --- a/src/third_party/wiredtiger/src/include/config.h +++ b/src/third_party/wiredtiger/src/include/config.h @@ -59,41 +59,42 @@ struct __wt_config_parser_impl { #define WT_CONFIG_ENTRY_WT_CONNECTION_load_extension 7 #define WT_CONFIG_ENTRY_WT_CONNECTION_open_session 8 #define WT_CONFIG_ENTRY_WT_CONNECTION_reconfigure 9 -#define WT_CONFIG_ENTRY_WT_CURSOR_close 10 -#define WT_CONFIG_ENTRY_WT_CURSOR_reconfigure 11 -#define WT_CONFIG_ENTRY_WT_SESSION_begin_transaction 12 -#define WT_CONFIG_ENTRY_WT_SESSION_checkpoint 13 -#define WT_CONFIG_ENTRY_WT_SESSION_close 14 -#define WT_CONFIG_ENTRY_WT_SESSION_commit_transaction 15 -#define WT_CONFIG_ENTRY_WT_SESSION_compact 16 -#define WT_CONFIG_ENTRY_WT_SESSION_create 17 -#define WT_CONFIG_ENTRY_WT_SESSION_drop 18 -#define WT_CONFIG_ENTRY_WT_SESSION_join 19 -#define WT_CONFIG_ENTRY_WT_SESSION_log_flush 20 -#define WT_CONFIG_ENTRY_WT_SESSION_log_printf 21 -#define WT_CONFIG_ENTRY_WT_SESSION_open_cursor 22 -#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 23 -#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 24 -#define WT_CONFIG_ENTRY_WT_SESSION_rename 25 -#define WT_CONFIG_ENTRY_WT_SESSION_reset 26 -#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 27 -#define WT_CONFIG_ENTRY_WT_SESSION_salvage 28 -#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 29 -#define WT_CONFIG_ENTRY_WT_SESSION_strerror 30 -#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 31 -#define WT_CONFIG_ENTRY_WT_SESSION_truncate 32 -#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 33 -#define WT_CONFIG_ENTRY_WT_SESSION_verify 34 -#define WT_CONFIG_ENTRY_colgroup_meta 35 -#define WT_CONFIG_ENTRY_file_config 36 -#define WT_CONFIG_ENTRY_file_meta 37 -#define WT_CONFIG_ENTRY_index_meta 38 -#define WT_CONFIG_ENTRY_lsm_meta 39 -#define WT_CONFIG_ENTRY_table_meta 40 -#define WT_CONFIG_ENTRY_wiredtiger_open 41 -#define WT_CONFIG_ENTRY_wiredtiger_open_all 42 -#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 43 -#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 44 +#define WT_CONFIG_ENTRY_WT_CONNECTION_set_file_system 10 +#define WT_CONFIG_ENTRY_WT_CURSOR_close 11 +#define WT_CONFIG_ENTRY_WT_CURSOR_reconfigure 12 +#define WT_CONFIG_ENTRY_WT_SESSION_begin_transaction 13 +#define WT_CONFIG_ENTRY_WT_SESSION_checkpoint 14 +#define WT_CONFIG_ENTRY_WT_SESSION_close 15 +#define WT_CONFIG_ENTRY_WT_SESSION_commit_transaction 16 +#define WT_CONFIG_ENTRY_WT_SESSION_compact 17 +#define WT_CONFIG_ENTRY_WT_SESSION_create 18 +#define WT_CONFIG_ENTRY_WT_SESSION_drop 19 +#define WT_CONFIG_ENTRY_WT_SESSION_join 20 +#define WT_CONFIG_ENTRY_WT_SESSION_log_flush 21 +#define WT_CONFIG_ENTRY_WT_SESSION_log_printf 22 +#define WT_CONFIG_ENTRY_WT_SESSION_open_cursor 23 +#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 24 +#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 25 +#define WT_CONFIG_ENTRY_WT_SESSION_rename 26 +#define WT_CONFIG_ENTRY_WT_SESSION_reset 27 +#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 28 +#define WT_CONFIG_ENTRY_WT_SESSION_salvage 29 +#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 30 +#define WT_CONFIG_ENTRY_WT_SESSION_strerror 31 +#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 32 +#define WT_CONFIG_ENTRY_WT_SESSION_truncate 33 +#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 34 +#define WT_CONFIG_ENTRY_WT_SESSION_verify 35 +#define WT_CONFIG_ENTRY_colgroup_meta 36 +#define WT_CONFIG_ENTRY_file_config 37 +#define WT_CONFIG_ENTRY_file_meta 38 +#define WT_CONFIG_ENTRY_index_meta 39 +#define WT_CONFIG_ENTRY_lsm_meta 40 +#define WT_CONFIG_ENTRY_table_meta 41 +#define WT_CONFIG_ENTRY_wiredtiger_open 42 +#define WT_CONFIG_ENTRY_wiredtiger_open_all 43 +#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 44 +#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 45 /* * configuration section: END * DO NOT EDIT: automatically built by dist/flags.py. diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index c2b1dd68c18..0e0c357279a 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -145,20 +145,6 @@ struct __wt_named_extractor { } while (0) /* - * Macros to ensure the file handle is inserted or removed from both the - * main queue and the hashed queue. - */ -#define WT_CONN_FILE_INSERT(conn, fh, bucket) do { \ - TAILQ_INSERT_HEAD(&(conn)->fhqh, fh, q); \ - TAILQ_INSERT_HEAD(&(conn)->fhhash[bucket], fh, hashq); \ -} while (0) - -#define WT_CONN_FILE_REMOVE(conn, fh, bucket) do { \ - TAILQ_REMOVE(&(conn)->fhqh, fh, q); \ - TAILQ_REMOVE(&(conn)->fhhash[bucket], fh, hashq); \ -} while (0) - -/* * WT_CONNECTION_IMPL -- * Implementation of WT_CONNECTION */ @@ -333,7 +319,7 @@ struct __wt_connection_impl { bool stat_tid_set; /* Statistics log thread set */ WT_CONDVAR *stat_cond; /* Statistics log wait mutex */ const char *stat_format; /* Statistics log timestamp format */ - WT_FH *stat_fh; /* Statistics log file handle */ + WT_FSTREAM *stat_fs; /* Statistics log stream */ char *stat_path; /* Statistics log path format */ char **stat_sources; /* Statistics log list of objects */ const char *stat_stamp; /* Statistics log entry timestamp */ @@ -366,7 +352,6 @@ struct __wt_connection_impl { uint32_t txn_logsync; /* Log sync configuration */ WT_SESSION_IMPL *meta_ckpt_session;/* Metadata checkpoint session */ - uint64_t meta_uri_hash; /* Metadata file name hash */ WT_SESSION_IMPL *sweep_session; /* Handle sweep session */ wt_thread_t sweep_tid; /* Handle sweep thread */ @@ -414,32 +399,26 @@ struct __wt_connection_impl { wt_off_t data_extend_len; /* file_extend data length */ wt_off_t log_extend_len; /* file_extend log length */ - /* O_DIRECT/FILE_FLAG_NO_BUFFERING file type flags */ - uint32_t direct_io; - uint32_t write_through; /* FILE_FLAG_WRITE_THROUGH type flags */ +#define WT_DIRECT_IO_CHECKPOINT 0x01 /* Checkpoints */ +#define WT_DIRECT_IO_DATA 0x02 /* Data files */ +#define WT_DIRECT_IO_LOG 0x04 /* Log files */ + uint32_t direct_io; /* O_DIRECT, FILE_FLAG_NO_BUFFERING */ + + uint32_t write_through; /* FILE_FLAG_WRITE_THROUGH */ + bool mmap; /* mmap configuration */ int page_size; /* OS page size for mmap alignment */ uint32_t verbose; - void *inmemory; /* In-memory configuration cookie */ - #define WT_STDERR(s) (&S2C(s)->wt_stderr) #define WT_STDOUT(s) (&S2C(s)->wt_stdout) - WT_FH wt_stderr, wt_stdout; + WT_FSTREAM wt_stderr, wt_stdout; /* - * OS library/system call jump table, to support in-memory and readonly - * configurations as well as special devices with other non-POSIX APIs. + * File system interface abstracted to support alternative file system + * implementations. */ - int (*file_directory_list)(WT_SESSION_IMPL *, - const char *, const char *, uint32_t, char ***, u_int *); - int (*file_directory_sync)(WT_SESSION_IMPL *, const char *); - int (*file_exist)(WT_SESSION_IMPL *, const char *, bool *); - int (*file_remove)(WT_SESSION_IMPL *, const char *); - int (*file_rename)(WT_SESSION_IMPL *, const char *, const char *); - int (*file_size)(WT_SESSION_IMPL *, const char *, bool, wt_off_t *); - int (*handle_open)(WT_SESSION_IMPL *, - WT_FH *, const char *, uint32_t, uint32_t); + WT_FILE_SYSTEM *file_system; uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/ctype.i b/src/third_party/wiredtiger/src/include/ctype.i new file mode 100644 index 00000000000..b4a1ad9f318 --- /dev/null +++ b/src/third_party/wiredtiger/src/include/ctype.i @@ -0,0 +1,69 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include <ctype.h> + +/* + * __wt_isalnum -- + * Wrap the ctype function without sign extension. + */ +static inline bool +__wt_isalnum(u_char c) +{ + return (isalnum(c) != 0); +} + +/* + * __wt_isalpha -- + * Wrap the ctype function without sign extension. + */ +static inline bool +__wt_isalpha(u_char c) +{ + return (isalpha(c) != 0); +} + +/* + * __wt_isdigit -- + * Wrap the ctype function without sign extension. + */ +static inline bool +__wt_isdigit(u_char c) +{ + return (isdigit(c) != 0); +} + +/* + * __wt_isprint -- + * Wrap the ctype function without sign extension. + */ +static inline bool +__wt_isprint(u_char c) +{ + return (isprint(c) != 0); +} + +/* + * __wt_isspace -- + * Wrap the ctype function without sign extension. + */ +static inline bool +__wt_isspace(u_char c) +{ + return (isspace(c) != 0); +} + +/* + * __wt_tolower -- + * Wrap the ctype function without sign extension. + */ +static inline u_char +__wt_tolower(u_char c) +{ + return ((u_char)tolower(c)); +} diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 1d2ce1bfd82..6357523a03f 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -67,7 +67,7 @@ struct __wt_cursor_backup { WT_CURSOR iface; size_t next; /* Cursor position */ - WT_FH *bfh; /* Backup file */ + WT_FSTREAM *bfs; /* Backup file stream */ uint32_t maxid; /* Maximum log file ID seen */ WT_CURSOR_BACKUP_ENTRY *list; /* List of files to be copied. */ @@ -284,18 +284,50 @@ struct __wt_cursor_index { uint8_t *cg_needvalue; }; +/* + * A join iterator structure is used to generate candidate primary keys. It + * is the responsibility of the caller of the iterator to filter these + * primary key against the other conditions of the join before returning + * them the caller of WT_CURSOR::next. + * + * For a conjunction join (the default), entry_count will be 1, meaning that + * the iterator only consumes the first entry (WT_CURSOR_JOIN_ENTRY). That + * is, it successively returns primary keys from a cursor for the first + * index that was joined. When the values returned by that cursor are + * exhausted, the iterator has completed. For a disjunction join, + * exhausting a cursor just means that the iterator advances to the next + * entry. If the next entry represents an index, a new cursor is opened and + * primary keys from that index are then successively returned. + * + * When positioned on an entry that represents a nested join, a new child + * iterator is created that will be bound to the nested WT_CURSOR_JOIN. + * That iterator is then used to generate candidate primary keys. When its + * iteration is completed, that iterator is destroyed and the parent + * iterator advances to the next entry. Thus, depending on how deeply joins + * are nested, a similarly deep stack of iterators is created. + */ struct __wt_cursor_join_iter { WT_SESSION_IMPL *session; WT_CURSOR_JOIN *cjoin; WT_CURSOR_JOIN_ENTRY *entry; + WT_CURSOR_JOIN_ITER *child; WT_CURSOR *cursor; /* has null projection */ - WT_CURSOR *main; /* main table with projection */ WT_ITEM *curkey; /* primary key */ WT_ITEM idxkey; + u_int entry_pos; /* the current entry */ + u_int entry_count; /* entries to walk */ + u_int end_pos; /* the current endpoint */ + u_int end_count; /* endpoints to walk */ + u_int end_skip; /* when testing for inclusion */ + /* can we skip current end? */ bool positioned; - bool isequal; /* advancing means we're done */ + bool is_equal; }; +/* + * A join endpoint represents a positioned cursor that is 'captured' by a + * WT_SESSION::join call. + */ struct __wt_cursor_join_endpoint { WT_ITEM key; uint8_t recno_buf[10]; /* holds packed recno */ @@ -313,9 +345,17 @@ struct __wt_cursor_join_endpoint { ((endp)->flags & \ (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ | WT_CURJOIN_END_LT)) +/* + * Each join entry typically represents an index's participation in a join. + * For example, if 'k' is an index, then "t.k > 10 && t.k < 20" would be + * represented by a single entry, with two endpoints. When the index and + * subjoin fields are NULL, the join is on the main table. When subjoin is + * non-NULL, there is a nested join clause. + */ struct __wt_cursor_join_entry { WT_INDEX *index; WT_CURSOR *main; /* raw main table cursor */ + WT_CURSOR_JOIN *subjoin; /* a nested join clause */ WT_BLOOM *bloom; /* Bloom filter handle */ char *repack_format; /* target format for repack */ uint32_t bloom_bit_count; /* bits per item in bloom */ @@ -339,15 +379,17 @@ struct __wt_cursor_join { WT_TABLE *table; const char *projection; - WT_CURSOR_JOIN_ITER *iter; + WT_CURSOR *main; /* main table with projection */ + WT_CURSOR_JOIN *parent; /* parent of nested group */ + WT_CURSOR_JOIN_ITER *iter; /* chain of iterators */ WT_CURSOR_JOIN_ENTRY *entries; size_t entries_allocated; u_int entries_next; uint8_t recno_buf[10]; /* holds packed recno */ -#define WT_CURJOIN_ERROR 0x01 /* Error in initialization */ -#define WT_CURJOIN_INITIALIZED 0x02 /* Successful initialization */ -#define WT_CURJOIN_SKIP_FIRST_LEFT 0x04 /* First check not needed */ +#define WT_CURJOIN_DISJUNCTION 0x01 /* Entries are or-ed */ +#define WT_CURJOIN_ERROR 0x02 /* Error in initialization */ +#define WT_CURJOIN_INITIALIZED 0x04 /* Successful initialization */ uint8_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/dhandle.h b/src/third_party/wiredtiger/src/include/dhandle.h index 8b313428d06..9a11594c893 100644 --- a/src/third_party/wiredtiger/src/include/dhandle.h +++ b/src/third_party/wiredtiger/src/include/dhandle.h @@ -82,7 +82,8 @@ struct __wt_data_handle { #define WT_DHANDLE_DISCARD 0x02 /* Discard on release */ #define WT_DHANDLE_DISCARD_FORCE 0x04 /* Force discard on release */ #define WT_DHANDLE_EXCLUSIVE 0x08 /* Need exclusive access */ -#define WT_DHANDLE_LOCK_ONLY 0x10 /* Handle only used as a lock */ -#define WT_DHANDLE_OPEN 0x20 /* Handle is open */ +#define WT_DHANDLE_IS_METADATA 0x10 /* Metadata handle */ +#define WT_DHANDLE_LOCK_ONLY 0x20 /* Handle only used as a lock */ +#define WT_DHANDLE_OPEN 0x40 /* Handle is open */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index f2b13023386..b0c0f6eccad 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -41,8 +41,8 @@ extern int __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_BLOCK *block, W extern int __wt_block_extlist_truncate( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el); extern int __wt_block_extlist_init(WT_SESSION_IMPL *session, WT_EXTLIST *el, const char *name, const char *extname, bool track_size); extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el); -extern int __wt_block_map( WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp, void **mappingcookie); -extern int __wt_block_unmap( WT_SESSION_IMPL *session, WT_BLOCK *block, void *map, size_t maplen, void **mappingcookie); +extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp, void *mapped_cookiep); +extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie); extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp); extern int __wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename); extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize); @@ -118,9 +118,9 @@ extern int __wt_debug_offset_blind( WT_SESSION_IMPL *session, wt_off_t offset, c extern int __wt_debug_offset(WT_SESSION_IMPL *session, wt_off_t offset, uint32_t size, uint32_t cksum, const char *ofile); extern int __wt_debug_disk( WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile); extern int __wt_debug_tree_shape( WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile); -extern int __wt_debug_tree_all( WT_SESSION_IMPL *session, WT_BTREE *btree, WT_PAGE *page, const char *ofile); -extern int __wt_debug_tree( WT_SESSION_IMPL *session, WT_BTREE *btree, WT_PAGE *page, const char *ofile); -extern int __wt_debug_page(WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile); +extern int __wt_debug_tree_all( WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile); +extern int __wt_debug_tree( WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile); +extern int __wt_debug_page(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile); extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp); extern void __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref); extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all); @@ -134,7 +134,7 @@ extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]); extern int __wt_btree_close(WT_SESSION_IMPL *session); extern void __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, bool is_recno); extern int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size); -extern int __wt_btree_new_leaf_page( WT_SESSION_IMPL *session, uint64_t recno, WT_PAGE **pagep); +extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep); extern void __wt_btree_evictable(WT_SESSION_IMPL *session, bool on); extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session); extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session); @@ -144,11 +144,10 @@ extern const char *__wt_page_type_string(u_int type); extern const char *__wt_cell_type_string(uint8_t type); extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf); extern const char *__wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_ITEM *buf); -extern const char *__wt_buf_set_printable( WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf); extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store); extern int __wt_ovfl_cache(WT_SESSION_IMPL *session, WT_PAGE *page, void *cookie, WT_CELL_UNPACK *vpack); extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell); -extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint64_t recno, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep); +extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep); extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, size_t memsize, uint32_t flags, WT_PAGE **pagep); extern int __wt_las_remove_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, const uint8_t *addr, size_t addr_size); extern int @@ -203,6 +202,8 @@ extern int __wt_las_cursor_open(WT_SESSION_IMPL *session, WT_CURSOR **cursorp); extern int __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags); extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags); extern int __wt_las_sweep(WT_SESSION_IMPL *session); +extern uint32_t __wt_cksum(const void *chunk, size_t len); +extern void __wt_cksum_init(void); extern int __wt_config_initn( WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str, size_t len); extern int __wt_config_init(WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str); extern int __wt_config_subinit( WT_SESSION_IMPL *session, WT_CONFIG *conf, WT_CONFIG_ITEM *item); @@ -283,19 +284,19 @@ extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **c extern int __wt_curfile_update_check(WT_CURSOR *cursor); extern int __wt_curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], bool bulk, bool bitmap, WT_CURSOR **cursorp); extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); -extern int __wt_curindex_joined(WT_CURSOR *cursor); extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); +extern int __wt_curjoin_joined(WT_CURSOR *cursor); extern int __wt_curjoin_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); extern int __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_INDEX *idx, WT_CURSOR *ref_cursor, uint8_t flags, uint8_t range, uint64_t count, uint32_t bloom_bit_count, uint32_t bloom_hash_count); extern int __wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, WT_CURSOR_JSON *json, bool iskey, va_list ap); extern void __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor); -extern size_t __wt_json_unpack_char(char ch, u_char *buf, size_t bufsz, bool force_unicode); +extern size_t __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode); extern int __wt_json_column_init(WT_CURSOR *cursor, const char *keyformat, const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf); extern int __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, const char **tokstart, size_t *toklen); extern const char *__wt_json_tokname(int toktype); extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const char *format, WT_CURSOR_JSON *json, bool iskey, WT_ITEM *item); extern ssize_t __wt_json_strlen(const char *src, size_t srclen); -extern int __wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen); +extern int __wt_json_strncpy(WT_SESSION *wt_session, char **pdst, size_t dstlen, const char *src, size_t srclen); extern int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp); extern int __wt_schema_create_final( WT_SESSION_IMPL *session, char *cfg_arg[], char **value_ret); extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); @@ -356,7 +357,6 @@ extern int __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn); extern int __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, bool *recp); extern void __wt_log_written_reset(WT_SESSION_IMPL *session); extern int __wt_log_get_all_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, bool active_only); -extern void __wt_log_files_free(WT_SESSION_IMPL *session, char **files, u_int count); extern int __wt_log_extract_lognum( WT_SESSION_IMPL *session, const char *name, uint32_t *id); extern int __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot); extern int __wt_log_allocfile( WT_SESSION_IMPL *session, uint32_t lognum, const char *dest); @@ -460,7 +460,6 @@ extern int __wt_ext_metadata_search(WT_EXTENSION_API *wt_api, WT_SESSION *wt_ses extern int __wt_ext_metadata_update(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value); extern int __wt_metadata_get_ckptlist( WT_SESSION *session, const char *name, WT_CKPT **ckptbasep); extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase); -extern void __wt_metadata_init(WT_SESSION_IMPL *session); extern int __wt_metadata_cursor_open( WT_SESSION_IMPL *session, const char *config, WT_CURSOR **cursorp); extern int __wt_metadata_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp); extern int __wt_metadata_cursor_release(WT_SESSION_IMPL *session, WT_CURSOR **cursorp); @@ -484,6 +483,31 @@ extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session); extern int __wt_turtle_init(WT_SESSION_IMPL *session); extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep); extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value); +extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path); +extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path); +extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name); +extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to); +extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to); +extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); +extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp); +extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp); +extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); +extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); +extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); +extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp); +extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg); +extern int __wt_errno(void); +extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); +extern int __wt_ext_map_windows_error( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error); +extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name); +extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp); +extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp); +extern int __wt_close_connection_close(WT_SESSION_IMPL *session); +extern int __wt_os_inmemory(WT_SESSION_IMPL *session); +extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp); +extern int __wt_os_stdio(WT_SESSION_IMPL *session); +extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); +extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base); extern int __wt_ext_struct_pack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *buffer, size_t size, const char *fmt, ...); extern int __wt_ext_struct_size(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t *sizep, const char *fmt, ...); extern int __wt_ext_struct_unpack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const void *buffer, size_t size, const char *fmt, ...); @@ -569,6 +593,7 @@ extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR extern int __wt_session_create( WT_SESSION_IMPL *session, const char *uri, const char *config); extern int __wt_session_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]); extern int __wt_session_range_truncate(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop); +extern const char *__wt_session_strerror(WT_SESSION *wt_session, int error); extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp); extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp); extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config); @@ -580,8 +605,6 @@ extern void __wt_session_close_cache(WT_SESSION_IMPL *session); extern int __wt_session_get_btree(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, const char *cfg[], uint32_t flags); extern int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint); extern int __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]); -extern uint32_t __wt_cksum(const void *chunk, size_t len); -extern void __wt_cksum_init(void); extern int __wt_cond_auto_alloc( WT_SESSION_IMPL *session, const char *name, bool is_signalled, uint64_t min, uint64_t max, WT_CONDVAR **condp); extern int __wt_cond_auto_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); extern int __wt_cond_auto_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool *signalled); @@ -599,7 +622,14 @@ extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_ extern int __wt_ext_msg_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))); extern const char *__wt_ext_strerror(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, int error); extern int __wt_progress(WT_SESSION_IMPL *session, const char *s, uint64_t v); -extern void __wt_assert(WT_SESSION_IMPL *session, int error, const char *file_name, int line_number, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 5, 6))); +extern void +__wt_assert(WT_SESSION_IMPL *session, + int error, const char *file_name, int line_number, const char *fmt, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 5, 6))) +#ifdef HAVE_DIAGNOSTIC + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)) +#endif +; extern int __wt_panic(WT_SESSION_IMPL *session); extern int __wt_illegal_value(WT_SESSION_IMPL *session, const char *name); extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri); @@ -648,6 +678,8 @@ extern uint32_t __wt_random(WT_RAND_STATE volatile *rnd_state); extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size); extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))); extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))); +extern const char *__wt_buf_set_printable( WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf); +extern const char *__wt_buf_set_size( WT_SESSION_IMPL *session, uint64_t size, bool exact, WT_ITEM *buf); extern int __wt_scr_alloc_func(WT_SESSION_IMPL *session, size_t size, WT_ITEM **scratchp #ifdef HAVE_DIAGNOSTIC @@ -712,70 +744,3 @@ extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[], bool *has_create, bool *has_drops); extern int __wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session); extern int __wt_txn_recover(WT_SESSION_IMPL *session); -extern bool __wt_absolute_path(const char *path); -extern bool __wt_handle_search(WT_SESSION_IMPL *session, const char *name, bool increment_ref, WT_FH *newfh, WT_FH **fhp); -extern bool __wt_has_priv(void); -extern const char *__wt_path_separator(void); -extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); -extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp); -extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp); -extern int __wt_close_connection_close(WT_SESSION_IMPL *session); -extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp); -extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); -extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); -extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled); -extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to); -extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh); -extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp); -extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret); -extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp); -extern int __wt_errno(void); -extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path); -extern int __wt_get_vm_pagesize(void); -extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp); -extern int __wt_getlasterror(void); -extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh); -extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); -extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp); -extern int __wt_map_error_rdonly(int error); -extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path); -extern int __wt_once(void (*init_routine)(void)); -extern int __wt_open(WT_SESSION_IMPL *session, const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp); -extern int __wt_os_cleanup(WT_SESSION_IMPL *session); -extern int __wt_os_init(WT_SESSION_IMPL *session); -extern int __wt_os_inmemory(WT_SESSION_IMPL *session); -extern int __wt_os_inmemory_cleanup(WT_SESSION_IMPL *session); -extern int __wt_os_posix(WT_SESSION_IMPL *session); -extern int __wt_os_posix_cleanup(WT_SESSION_IMPL *session); -extern int __wt_os_stdio(WT_SESSION_IMPL *session); -extern int __wt_os_win(WT_SESSION_IMPL *session); -extern int __wt_os_win_cleanup(WT_SESSION_IMPL *session); -extern int __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); -extern int __wt_posix_handle_allocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len); -extern int __wt_posix_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); -extern int __wt_posix_map_discard( WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size); -extern int __wt_posix_map_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size); -extern int __wt_posix_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); -extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); -extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); -extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); -extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name); -extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to); -extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp); -extern int __wt_sync_handle_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to); -extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); -extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); -extern int __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); -extern int __wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); -extern int __wt_win_map_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size); -extern int __wt_win_map_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size); -extern int __wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); -extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base); -extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); -extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg); -extern void __wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh); -extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); -extern void __wt_stream_set_line_buffer(FILE *fp); -extern void __wt_stream_set_no_buffer(FILE *fp); -extern void __wt_thread_id(char *buf, size_t buflen); -extern void __wt_yield(void); diff --git a/src/third_party/wiredtiger/src/include/extern_posix.h b/src/third_party/wiredtiger/src/include/extern_posix.h new file mode 100644 index 00000000000..6fde537f36b --- /dev/null +++ b/src/third_party/wiredtiger/src/include/extern_posix.h @@ -0,0 +1,31 @@ +/* DO NOT EDIT: automatically built by dist/s_prototypes. */ + +extern int __wt_posix_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp); +extern int __wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count); +extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp); +extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret); +extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh); +extern int __wt_posix_file_fallocate(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, wt_off_t len); +extern int __wt_os_posix(WT_SESSION_IMPL *session); +extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp); +extern int __wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep); +extern int __wt_posix_map_preload(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie); +extern int __wt_posix_map_discard(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie); +extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_region, size_t len, void *mapped_cookie); +extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp); +extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled); +extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); +extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); +extern int __wt_once(void (*init_routine)(void)); +extern int __wt_get_vm_pagesize(void); +extern bool __wt_absolute_path(const char *path); +extern const char *__wt_path_separator(void); +extern bool __wt_has_priv(void); +extern void __wt_stream_set_line_buffer(FILE *fp); +extern void __wt_stream_set_no_buffer(FILE *fp); +extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); +extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); +extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); +extern void __wt_thread_id(char *buf, size_t buflen); +extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp); +extern void __wt_yield(void); diff --git a/src/third_party/wiredtiger/src/include/extern_win.h b/src/third_party/wiredtiger/src/include/extern_win.h new file mode 100644 index 00000000000..c5c2624db2c --- /dev/null +++ b/src/third_party/wiredtiger/src/include/extern_win.h @@ -0,0 +1,32 @@ +/* DO NOT EDIT: automatically built by dist/s_prototypes. */ + +extern int __wt_win_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp); +extern int __wt_win_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count); +extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp); +extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret); +extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh); +extern int __wt_win_fs_size(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, wt_off_t *sizep); +extern int __wt_os_win(WT_SESSION_IMPL *session); +extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp); +extern int __wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep); +extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_region, size_t length, void *mapped_cookie); +extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp); +extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled); +extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); +extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); +extern int __wt_once(void (*init_routine)(void)); +extern int __wt_get_vm_pagesize(void); +extern bool __wt_absolute_path(const char *path); +extern const char *__wt_path_separator(void); +extern bool __wt_has_priv(void); +extern void __wt_stream_set_line_buffer(FILE *fp); +extern void __wt_stream_set_no_buffer(FILE *fp); +extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); +extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); +extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); +extern void __wt_thread_id(char *buf, size_t buflen); +extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp); +extern DWORD __wt_getlasterror(void); +extern int __wt_map_windows_error(DWORD windows_error); +extern const char *__wt_formatmessage(WT_SESSION_IMPL *session, DWORD windows_error); +extern void __wt_yield(void); diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h index 7682af5a4b8..f134af69d29 100644 --- a/src/third_party/wiredtiger/src/include/flags.h +++ b/src/third_party/wiredtiger/src/include/flags.h @@ -13,22 +13,18 @@ #define WT_CONN_LSM_MERGE 0x00000100 #define WT_CONN_PANIC 0x00000200 #define WT_CONN_READONLY 0x00000400 -#define WT_CONN_SERVER_ASYNC 0x00000800 -#define WT_CONN_SERVER_CHECKPOINT 0x00001000 -#define WT_CONN_SERVER_LSM 0x00002000 -#define WT_CONN_SERVER_RUN 0x00004000 -#define WT_CONN_SERVER_STATISTICS 0x00008000 -#define WT_CONN_SERVER_SWEEP 0x00010000 -#define WT_CONN_WAS_BACKUP 0x00020000 +#define WT_CONN_RECOVERING 0x00000800 +#define WT_CONN_SERVER_ASYNC 0x00001000 +#define WT_CONN_SERVER_CHECKPOINT 0x00002000 +#define WT_CONN_SERVER_LSM 0x00004000 +#define WT_CONN_SERVER_RUN 0x00008000 +#define WT_CONN_SERVER_STATISTICS 0x00010000 +#define WT_CONN_SERVER_SWEEP 0x00020000 +#define WT_CONN_WAS_BACKUP 0x00040000 #define WT_EVICTING 0x00000001 #define WT_EVICT_IN_MEMORY 0x00000002 #define WT_EVICT_LOOKASIDE 0x00000004 #define WT_EVICT_UPDATE_RESTORE 0x00000008 -#define WT_FILE_TYPE_CHECKPOINT 0x00000001 -#define WT_FILE_TYPE_DATA 0x00000002 -#define WT_FILE_TYPE_DIRECTORY 0x00000004 -#define WT_FILE_TYPE_LOG 0x00000008 -#define WT_FILE_TYPE_REGULAR 0x00000010 #define WT_LOGSCAN_FIRST 0x00000001 #define WT_LOGSCAN_FROM_CKP 0x00000002 #define WT_LOGSCAN_ONE 0x00000004 @@ -52,11 +48,11 @@ #define WT_READ_TRUNCATE 0x00000800 #define WT_READ_WONT_NEED 0x00001000 #define WT_SESSION_CAN_WAIT 0x00000001 -#define WT_SESSION_CLEAR_EVICT_WALK 0x00000002 -#define WT_SESSION_INTERNAL 0x00000004 -#define WT_SESSION_LOCKED_CHECKPOINT 0x00000008 -#define WT_SESSION_LOCKED_HANDLE_LIST 0x00000010 -#define WT_SESSION_LOCKED_METADATA 0x00000020 +#define WT_SESSION_INTERNAL 0x00000002 +#define WT_SESSION_LOCKED_CHECKPOINT 0x00000004 +#define WT_SESSION_LOCKED_HANDLE_LIST 0x00000008 +#define WT_SESSION_LOCKED_METADATA 0x00000010 +#define WT_SESSION_LOCKED_PASS 0x00000020 #define WT_SESSION_LOCKED_SCHEMA 0x00000040 #define WT_SESSION_LOCKED_SLOT 0x00000080 #define WT_SESSION_LOCKED_TABLE 0x00000100 diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h index f84b147cb70..870c046252c 100644 --- a/src/third_party/wiredtiger/src/include/log.h +++ b/src/third_party/wiredtiger/src/include/log.h @@ -46,10 +46,12 @@ union __wt_lsn { */ #define WT_IS_INIT_LSN(l) ((l)->file_offset == ((uint64_t)1 << 32)) /* - * XXX Original tested INT32_MAX. + * Original tested INT32_MAX. But if we read one from an older + * release we may see UINT32_MAX. */ #define WT_IS_MAX_LSN(lsn) \ - ((lsn)->l.file == UINT32_MAX && (lsn)->l.offset == INT32_MAX) + ((lsn)->l.file == UINT32_MAX && \ + ((lsn)->l.offset == INT32_MAX || (lsn)->l.offset == UINT32_MAX)) /* * Both of the macros below need to change if the content of __wt_lsn @@ -254,7 +256,6 @@ struct __wt_log { #ifdef HAVE_DIAGNOSTIC uint64_t write_calls; /* Calls to log_write */ #endif - #define WT_LOG_OPENED 0x01 /* Log subsystem successfully open */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h index ac0f5fedac4..63c79dbc72e 100644 --- a/src/third_party/wiredtiger/src/include/meta.h +++ b/src/third_party/wiredtiger/src/include/meta.h @@ -14,8 +14,10 @@ #define WT_USERCONFIG "WiredTiger.config" /* User configuration */ +#define WT_BACKUP_TMP "WiredTiger.backup.tmp" /* Backup tmp file */ #define WT_METADATA_BACKUP "WiredTiger.backup" /* Hot backup file */ #define WT_INCREMENTAL_BACKUP "WiredTiger.ibackup" /* Incremental backup */ +#define WT_INCREMENTAL_SRC "WiredTiger.isrc" /* Incremental source */ #define WT_METADATA_TURTLE "WiredTiger.turtle" /* Metadata metadata */ #define WT_METADATA_TURTLE_SET "WiredTiger.turtle.set" /* Turtle temp file */ @@ -32,8 +34,7 @@ * when diagnostic is enabled. */ #define WT_IS_METADATA(session, dh) \ - ((dh)->name_hash == S2C(session)->meta_uri_hash && \ - strcmp((dh)->name, WT_METAFILE_URI) == 0) + F_ISSET((dh), WT_DHANDLE_IS_METADATA) #define WT_METAFILE_ID 0 /* Metadata file ID */ #define WT_METADATA_VERSION "WiredTiger version" /* Version keys */ diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h index 07d52c61eac..1121b7dfa75 100644 --- a/src/third_party/wiredtiger/src/include/misc.h +++ b/src/third_party/wiredtiger/src/include/misc.h @@ -12,6 +12,8 @@ */ #define WT_UNUSED(var) (void)(var) +#define WT_DIVIDER "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" + /* Basic constants. */ #define WT_THOUSAND (1000) #define WT_MILLION (1000000) @@ -31,12 +33,12 @@ */ #define WT_STORE_SIZE(s) ((uint32_t)(s)) #define WT_PTRDIFF(end, begin) \ - ((size_t)((uint8_t *)(end) - (uint8_t *)(begin))) + ((size_t)((const uint8_t *)(end) - (const uint8_t *)(begin))) #define WT_PTRDIFF32(end, begin) \ WT_STORE_SIZE(WT_PTRDIFF((end), (begin))) #define WT_BLOCK_FITS(p, len, begin, maxlen) \ - ((uint8_t *)(p) >= (uint8_t *)(begin) && \ - ((uint8_t *)(p) + (len) <= (uint8_t *)(begin) + (maxlen))) + ((const uint8_t *)(p) >= (const uint8_t *)(begin) && \ + ((const uint8_t *)(p) + (len) <= (const uint8_t *)(begin) + (maxlen))) #define WT_PTR_IN_RANGE(p, begin, maxlen) \ WT_BLOCK_FITS((p), 1, (begin), (maxlen)) @@ -96,8 +98,9 @@ * the caller remember to put the & operator on the pointer. */ #define __wt_free(session, p) do { \ - if ((p) != NULL) \ - __wt_free_int(session, (void *)&(p)); \ + void *__p = &(p); \ + if (*(void **)__p != NULL) \ + __wt_free_int(session, __p); \ } while (0) #ifdef HAVE_DIAGNOSTIC #define __wt_overwrite_and_free(session, p) do { \ diff --git a/src/third_party/wiredtiger/src/include/misc.i b/src/third_party/wiredtiger/src/include/misc.i index 114b711ac88..eaa7a328ff1 100644 --- a/src/third_party/wiredtiger/src/include/misc.i +++ b/src/third_party/wiredtiger/src/include/misc.i @@ -70,248 +70,3 @@ __wt_verbose(WT_SESSION_IMPL *session, int flag, const char *fmt, ...) return (0); #endif } - -/* - * __wt_dirlist -- - * Get a list of files from a directory. - */ -static inline int -__wt_dirlist(WT_SESSION_IMPL *session, const char *dir, - const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, - "%s: directory-list: %s prefix %s", - dir, LF_ISSET(WT_DIRLIST_INCLUDE) ? "include" : "exclude", - prefix == NULL ? "all" : prefix)); - - return (S2C(session)->file_directory_list( - session, dir, prefix, flags, dirlist, countp)); -} - -/* - * __wt_directory_sync -- - * Flush a directory to ensure file creation is durable. - */ -static inline int -__wt_directory_sync(WT_SESSION_IMPL *session, const char *name) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: directory-sync", name)); - - return (S2C(session)->file_directory_sync(session, name)); -} - -/* - * __wt_exist -- - * Return if the file exists. - */ -static inline int -__wt_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) -{ - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-exist", name)); - - return (S2C(session)->file_exist(session, name, existp)); -} - -/* - * __wt_remove -- - * POSIX remove. - */ -static inline int -__wt_remove(WT_SESSION_IMPL *session, const char *name) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-remove", name)); - - return (S2C(session)->file_remove(session, name)); -} - -/* - * __wt_rename -- - * POSIX rename. - */ -static inline int -__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s to %s: file-rename", from, to)); - - return (S2C(session)->file_rename(session, from, to)); -} - -/* - * __wt_filesize_name -- - * Get the size of a file in bytes, by file name. - */ -static inline int -__wt_filesize_name( - WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) -{ - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-size", name)); - - return (S2C(session)->file_size(session, name, silent, sizep)); -} - -/* - * __wt_directory_sync_fh -- - * Flush a directory file handle to ensure file creation is durable. - * - * We don't use the normal sync path because many file systems don't require - * this step and we don't want to penalize them. - */ -static inline int -__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - - return (fh->fh_sync(session, fh, true)); -} - -/* - * __wt_fallocate -- - * Extend a file. - */ -static inline int -__wt_fallocate( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); - - WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, - "%s: handle-allocate: %" PRIuMAX " at %" PRIuMAX, - fh->name, (uintmax_t)len, (uintmax_t)offset)); - - return (fh->fh_allocate(session, fh, offset, len)); -} - -/* - * __wt_file_lock -- - * Lock/unlock a file. - */ -static inline int -__wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock) -{ - WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, - "%s: handle-lock: %s", fh->name, lock ? "lock" : "unlock")); - - return (fh->fh_lock(session, fh, lock)); -} - -/* - * __wt_vfprintf -- - * ANSI C vfprintf. - */ -static inline int -__wt_vfprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) -{ - WT_RET(__wt_verbose( - session, WT_VERB_HANDLEOPS, "%s: handle-printf", fh->name)); - - return (fh->fh_printf(session, fh, fmt, ap)); -} - -/* - * __wt_fprintf -- - * ANSI C fprintf. - */ -static inline int -__wt_fprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, ...) - WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) -{ - WT_DECL_RET; - va_list ap; - - va_start(ap, fmt); - ret = __wt_vfprintf(session, fh, fmt, ap); - va_end(ap); - - return (ret); -} - -/* - * __wt_read -- - * POSIX pread. - */ -static inline int -__wt_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) -{ - WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, - "%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX, - fh->name, len, (uintmax_t)offset)); - - WT_STAT_FAST_CONN_INCR(session, read_io); - - return (fh->fh_read(session, fh, offset, len, buf)); -} - -/* - * __wt_filesize -- - * Get the size of a file in bytes, by file handle. - */ -static inline int -__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) -{ - WT_RET(__wt_verbose( - session, WT_VERB_HANDLEOPS, "%s: handle-size", fh->name)); - - return (fh->fh_size(session, fh, sizep)); -} - -/* - * __wt_fsync -- - * POSIX fflush/fsync. - */ -static inline int -__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) -{ - WT_RET(__wt_verbose( - session, WT_VERB_HANDLEOPS, "%s: handle-sync", fh->name)); - - return (fh->fh_sync(session, fh, block)); -} - -/* - * __wt_ftruncate -- - * POSIX ftruncate. - */ -static inline int -__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - - WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, - "%s: handle-truncate: %" PRIuMAX, - fh->name, (uintmax_t)len)); - - return (fh->fh_truncate(session, fh, len)); -} - -/* - * __wt_write -- - * POSIX pwrite. - */ -static inline int -__wt_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || - WT_STRING_MATCH(fh->name, - WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))); - - WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, - "%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX, - fh->name, len, (uintmax_t)offset)); - - WT_STAT_FAST_CONN_INCR(session, write_io); - - return (fh->fh_write(session, fh, offset, len, buf)); -} diff --git a/src/third_party/wiredtiger/src/include/os.h b/src/third_party/wiredtiger/src/include/os.h index 44cceee6c40..7a8e47ed81f 100644 --- a/src/third_party/wiredtiger/src/include/os.h +++ b/src/third_party/wiredtiger/src/include/os.h @@ -6,36 +6,32 @@ * See the file LICENSE for redistribution information. */ -/* - * Number of directory entries can grow dynamically. - */ -#define WT_DIR_ENTRY 32 - -#define WT_DIRLIST_EXCLUDE 0x1 /* Exclude files matching prefix */ -#define WT_DIRLIST_INCLUDE 0x2 /* Include files matching prefix */ +#define WT_SYSCALL(call, ret) do { \ + /* \ + * A call returning 0 indicates success; any call where \ + * 0 is not the only successful return must provide an \ + * expression evaluating to 0 in all successful cases. \ + */ \ + if (((ret) = (call)) == 0) \ + break; \ + /* \ + * The call's error was either returned by the call or \ + * is in errno, and there are cases where it depends on \ + * the software release as to which it is (for example, \ + * posix_fadvise on FreeBSD and OS X). Failing calls \ + * must either return a non-zero error value, or -1 if \ + * the error value is in errno. (The WiredTiger errno \ + * function returns WT_ERROR if errno is 0, which isn't \ + * ideal but won't discard the failure.) \ + */ \ + if ((ret) == -1) \ + (ret) = __wt_errno(); \ +} while (0) #define WT_SYSCALL_RETRY(call, ret) do { \ int __retry; \ for (__retry = 0; __retry < 10; ++__retry) { \ - /* \ - * A call returning 0 indicates success; any call where \ - * 0 is not the only successful return must provide an \ - * expression evaluating to 0 in all successful cases. \ - */ \ - if (((ret) = (call)) == 0) \ - break; \ - /* \ - * The call's error was either returned by the call or \ - * is in errno, and there are cases where it depends on \ - * the software release as to which it is (for example, \ - * posix_fadvise on FreeBSD and OS X). Failing calls \ - * must either return a non-zero error value, or -1 if \ - * the error value is in errno. (The WiredTiger errno \ - * function returns WT_ERROR if errno is 0, which isn't \ - * ideal but won't discard the failure.) \ - */ \ - if ((ret) == -1) \ - (ret) = __wt_errno(); \ + WT_SYSCALL(call, ret); \ switch (ret) { \ case EAGAIN: \ case EBUSY: \ @@ -70,81 +66,97 @@ (t1).tv_nsec == (t2).tv_nsec ? 0 : 1 : 1) /* - * The underlying OS calls return ENOTSUP if posix_fadvise functionality isn't - * available, but WiredTiger uses the POSIX flag names in the API. Use distinct - * values so the underlying code can distinguish. + * Macros to ensure a file handle is inserted or removed from both the main and + * the hashed queue, used by connection-level and in-memory data structures. */ -#ifndef POSIX_FADV_DONTNEED -#define POSIX_FADV_DONTNEED 0x01 -#endif -#ifndef POSIX_FADV_WILLNEED -#define POSIX_FADV_WILLNEED 0x02 -#endif +#define WT_FILE_HANDLE_INSERT(h, fh, bucket) do { \ + TAILQ_INSERT_HEAD(&(h)->fhqh, fh, q); \ + TAILQ_INSERT_HEAD(&(h)->fhhash[bucket], fh, hashq); \ +} while (0) -#define WT_OPEN_CREATE 0x001 /* Create is OK */ -#define WT_OPEN_EXCLUSIVE 0x002 /* Exclusive open */ -#define WT_OPEN_FIXED 0x004 /* Path isn't relative to home */ -#define WT_OPEN_READONLY 0x008 /* Readonly open */ -#define WT_STREAM_APPEND 0x010 /* Open a stream: append */ -#define WT_STREAM_LINE_BUFFER 0x020 /* Line buffer the stream */ -#define WT_STREAM_READ 0x040 /* Open a stream: read */ -#define WT_STREAM_WRITE 0x080 /* Open a stream: write */ +#define WT_FILE_HANDLE_REMOVE(h, fh, bucket) do { \ + TAILQ_REMOVE(&(h)->fhqh, fh, q); \ + TAILQ_REMOVE(&(h)->fhhash[bucket], fh, hashq); \ +} while (0) struct __wt_fh { + /* + * There is a file name field in both the WT_FH and WT_FILE_HANDLE + * structures, which isn't ideal. There would be compromises to keeping + * a single copy: If it were in WT_FH, file systems could not access + * the name field, if it were just in the WT_FILE_HANDLE internal + * WiredTiger code would need to maintain a string inside a structure + * that is owned by the user (since we care about the content of the + * file name). Keeping two copies seems most reasonable. + */ const char *name; /* File name */ - uint64_t name_hash; /* Hash of name */ - TAILQ_ENTRY(__wt_fh) q; /* List of open handles */ - TAILQ_ENTRY(__wt_fh) hashq; /* Hashed list of handles */ - u_int ref; /* Reference count */ + uint64_t name_hash; /* hash of name */ + TAILQ_ENTRY(__wt_fh) q; /* internal queue */ + TAILQ_ENTRY(__wt_fh) hashq; /* internal hash queue */ + u_int ref; /* reference count */ + + WT_FILE_HANDLE *handle; +}; + +#ifdef _WIN32 +struct __wt_file_handle_win { + WT_FILE_HANDLE iface; /* - * Underlying file system handle support. + * Windows specific file handle fields */ -#ifdef _WIN32 HANDLE filehandle; /* Windows file handle */ HANDLE filehandle_secondary; /* Windows file handle for file size changes */ + bool direct_io; /* O_DIRECT configured */ +}; + #else + +struct __wt_file_handle_posix { + WT_FILE_HANDLE iface; + + /* + * POSIX specific file handle fields + */ int fd; /* POSIX file handle */ + + bool direct_io; /* O_DIRECT configured */ +}; #endif - FILE *fp; /* ANSI C stdio handle */ + +struct __wt_file_handle_inmem { + WT_FILE_HANDLE iface; /* - * Underlying in-memory handle support. + * In memory specific file handle fields */ - size_t off; /* Read/write offset */ + uint64_t name_hash; /* hash of name */ + TAILQ_ENTRY(__wt_file_handle_inmem) q; /* internal queue, hash queue */ + TAILQ_ENTRY(__wt_file_handle_inmem) hashq; + WT_ITEM buf; /* Data */ + u_int ref; /* Reference count */ +}; - bool direct_io; /* O_DIRECT configured */ +struct __wt_fstream { + const char *name; /* Stream name */ - enum { /* file extend configuration */ - WT_FALLOCATE_AVAILABLE, - WT_FALLOCATE_NOT_AVAILABLE, - WT_FALLOCATE_POSIX, - WT_FALLOCATE_STD, - WT_FALLOCATE_SYS } fallocate_available; - bool fallocate_requires_locking; + FILE *fp; /* stdio FILE stream */ + WT_FH *fh; /* WT file handle */ + wt_off_t off; /* Read/write offset */ + wt_off_t size; /* File size */ + WT_ITEM buf; /* Data */ -#define WT_FH_FLUSH_ON_CLOSE 0x01 /* Flush when closing */ -#define WT_FH_IN_MEMORY 0x02 /* In-memory, don't remove */ +#define WT_STREAM_APPEND 0x01 /* Open a stream for append */ +#define WT_STREAM_READ 0x02 /* Open a stream for read */ +#define WT_STREAM_WRITE 0x04 /* Open a stream for write */ uint32_t flags; - int (*fh_advise)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, wt_off_t, int); - int (*fh_allocate)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, wt_off_t); - int (*fh_close)(WT_SESSION_IMPL *, WT_FH *); - int (*fh_getc)(WT_SESSION_IMPL *, WT_FH *, int *); - int (*fh_lock)(WT_SESSION_IMPL *, WT_FH *, bool); - int (*fh_map)(WT_SESSION_IMPL *, WT_FH *, void *, size_t *, void **); - int (*fh_map_discard)(WT_SESSION_IMPL *, WT_FH *, void *, size_t); - int (*fh_map_preload)(WT_SESSION_IMPL *, WT_FH *, const void *, size_t); - int (*fh_map_unmap)( - WT_SESSION_IMPL *, WT_FH *, void *, size_t, void **); - int (*fh_printf)(WT_SESSION_IMPL *, WT_FH *, const char *, va_list); - int (*fh_read)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, void *); - int (*fh_size)(WT_SESSION_IMPL *, WT_FH *, wt_off_t *); - int (*fh_sync)(WT_SESSION_IMPL *, WT_FH *, bool); - int (*fh_truncate)(WT_SESSION_IMPL *, WT_FH *, wt_off_t); - int (*fh_write)( - WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, const void *); + int (*close)(WT_SESSION_IMPL *, WT_FSTREAM *); + int (*fstr_flush)(WT_SESSION_IMPL *, WT_FSTREAM *); + int (*fstr_getline)(WT_SESSION_IMPL *, WT_FSTREAM *, WT_ITEM *); + int (*fstr_printf)( + WT_SESSION_IMPL *, WT_FSTREAM *, const char *, va_list); }; diff --git a/src/third_party/wiredtiger/src/include/os_fhandle.i b/src/third_party/wiredtiger/src/include/os_fhandle.i new file mode 100644 index 00000000000..313bf8eca3f --- /dev/null +++ b/src/third_party/wiredtiger/src/include/os_fhandle.i @@ -0,0 +1,176 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +/* + * __wt_fsync -- + * POSIX fsync. + */ +static inline int +__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + WT_DECL_RET; + WT_FILE_HANDLE *handle; + + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + WT_RET(__wt_verbose( + session, WT_VERB_HANDLEOPS, "%s: handle-sync", fh->handle->name)); + + handle = fh->handle; + /* + * There is no way to check when the non-blocking sync-file-range is + * complete, but we track the time taken in the call for completeness. + */ + WT_STAT_FAST_CONN_INCR_ATOMIC(session, fsync_active); + WT_STAT_FAST_CONN_INCR(session, fsync_io); + if (block) + ret = (handle->fh_sync == NULL ? 0 : + handle->fh_sync(handle, (WT_SESSION *)session)); + else + ret = (handle->fh_sync_nowait == NULL ? 0 : + handle->fh_sync_nowait(handle, (WT_SESSION *)session)); + WT_STAT_FAST_CONN_DECR_ATOMIC(session, fsync_active); + return (ret); +} + +/* + * __wt_fallocate -- + * Extend a file. + */ +static inline int +__wt_fallocate( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) +{ + WT_DECL_RET; + WT_FILE_HANDLE *handle; + + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-allocate: %" PRIuMAX " at %" PRIuMAX, + fh->handle->name, (uintmax_t)len, (uintmax_t)offset)); + + /* + * Our caller is responsible for handling any locking issues, all we + * have to do is find a function to call. + * + * Be cautious, the underlying system might have configured the nolock + * flavor, that failed, and we have to fallback to the locking flavor. + */ + handle = fh->handle; + if (handle->fh_allocate_nolock != NULL) { + if ((ret = handle->fh_allocate_nolock( + handle, (WT_SESSION *)session, offset, len)) == 0) + return (0); + WT_RET_ERROR_OK(ret, ENOTSUP); + } + if (handle->fh_allocate != NULL) + return (handle->fh_allocate( + handle, (WT_SESSION *)session, offset, len)); + return (ENOTSUP); +} + +/* + * __wt_file_lock -- + * Lock/unlock a file. + */ +static inline int +__wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock) +{ + WT_FILE_HANDLE *handle; + + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-lock: %s", fh->handle->name, lock ? "lock" : "unlock")); + + handle = fh->handle; + return (handle->fh_lock == NULL ? 0 : + handle->fh_lock(handle, (WT_SESSION*)session, lock)); +} + +/* + * __wt_read -- + * POSIX pread. + */ +static inline int +__wt_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + WT_DECL_RET; + + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX, + fh->handle->name, len, (uintmax_t)offset)); + + WT_STAT_FAST_CONN_INCR_ATOMIC(session, read_active); + WT_STAT_FAST_CONN_INCR(session, read_io); + + ret = fh->handle->fh_read( + fh->handle, (WT_SESSION *)session, offset, len, buf); + + WT_STAT_FAST_CONN_DECR_ATOMIC(session, read_active); + return (ret); +} + +/* + * __wt_filesize -- + * Get the size of a file in bytes, by file handle. + */ +static inline int +__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + WT_RET(__wt_verbose( + session, WT_VERB_HANDLEOPS, "%s: handle-size", fh->handle->name)); + + return (fh->handle->fh_size(fh->handle, (WT_SESSION *)session, sizep)); +} + +/* + * __wt_ftruncate -- + * POSIX ftruncate. + */ +static inline int +__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-truncate: %" PRIuMAX, + fh->handle->name, (uintmax_t)len)); + + return (fh->handle->fh_truncate( + fh->handle, (WT_SESSION *)session, len)); +} + +/* + * __wt_write -- + * POSIX pwrite. + */ +static inline int +__wt_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + WT_DECL_RET; + + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || + WT_STRING_MATCH(fh->name, + WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))); + + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX, + fh->handle->name, len, (uintmax_t)offset)); + + WT_STAT_FAST_CONN_INCR_ATOMIC(session, write_active); + WT_STAT_FAST_CONN_INCR(session, write_io); + + ret = fh->handle->fh_write( + fh->handle, (WT_SESSION *)session, offset, len, buf); + + WT_STAT_FAST_CONN_DECR_ATOMIC(session, write_active); + return (ret); +} diff --git a/src/third_party/wiredtiger/src/include/os_fs.i b/src/third_party/wiredtiger/src/include/os_fs.i new file mode 100644 index 00000000000..88ee71d953a --- /dev/null +++ b/src/third_party/wiredtiger/src/include/os_fs.i @@ -0,0 +1,244 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +/* + * __wt_fs_directory_list -- + * Get a list of files from a directory. + */ +static inline int +__wt_fs_directory_list(WT_SESSION_IMPL *session, + const char *dir, const char *prefix, char ***dirlistp, u_int *countp) +{ + WT_DECL_RET; + WT_FILE_SYSTEM *file_system; + WT_SESSION *wt_session; + char *path; + + *dirlistp = NULL; + *countp = 0; + + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, + "%s: directory-list: %s prefix %s", + dir, prefix == NULL ? "all" : prefix)); + + WT_RET(__wt_filename(session, dir, &path)); + + file_system = S2C(session)->file_system; + wt_session = (WT_SESSION *)session; + ret = file_system->fs_directory_list( + file_system, wt_session, path, prefix, dirlistp, countp); + + __wt_free(session, path); + return (ret); +} + +/* + * __wt_fs_directory_list_free -- + * Free memory allocated by __wt_fs_directory_list. + */ +static inline int +__wt_fs_directory_list_free( + WT_SESSION_IMPL *session, char ***dirlistp, u_int count) +{ + WT_DECL_RET; + WT_FILE_SYSTEM *file_system; + WT_SESSION *wt_session; + + if (*dirlistp != NULL) { + file_system = S2C(session)->file_system; + wt_session = (WT_SESSION *)session; + ret = file_system->fs_directory_list_free( + file_system, wt_session, *dirlistp, count); + } + + *dirlistp = NULL; + return (ret); +} + +/* + * __wt_fs_directory_sync -- + * Flush a directory to ensure file creation is durable. + */ +static inline int +__wt_fs_directory_sync(WT_SESSION_IMPL *session, const char *name) +{ + WT_DECL_RET; + WT_FILE_SYSTEM *file_system; + WT_SESSION *wt_session; + char *copy, *dir; + + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + WT_RET(__wt_verbose( + session, WT_VERB_FILEOPS, "%s: directory-sync", name)); + + /* + * POSIX 1003.1 does not require that fsync of a file handle ensures the + * entry in the directory containing the file has also reached disk (and + * there are historic Linux filesystems requiring it). If the underlying + * filesystem method is set, do an explicit fsync on a file descriptor + * for the directory to be sure. + * + * directory-sync is not a required call, no method means the call isn't + * needed. + */ + file_system = S2C(session)->file_system; + if (file_system->fs_directory_sync == NULL) + return (0); + + copy = NULL; + if (name == NULL || strchr(name, '/') == NULL) + name = S2C(session)->home; + else { + /* + * File name construction should not return a path without any + * slash separator, but caution isn't unreasonable. + */ + WT_RET(__wt_filename(session, name, ©)); + if ((dir = strrchr(copy, '/')) == NULL) + name = S2C(session)->home; + else { + dir[1] = '\0'; + name = copy; + } + } + + wt_session = (WT_SESSION *)session; + ret = file_system->fs_directory_sync(file_system, wt_session, name); + + __wt_free(session, copy); + return (ret); +} + +/* + * __wt_fs_exist -- + * Return if the file exists. + */ +static inline int +__wt_fs_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + WT_DECL_RET; + WT_FILE_SYSTEM *file_system; + WT_SESSION *wt_session; + char *path; + + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-exist", name)); + + WT_RET(__wt_filename(session, name, &path)); + + file_system = S2C(session)->file_system; + wt_session = (WT_SESSION *)session; + ret = file_system->fs_exist(file_system, wt_session, path, existp); + + __wt_free(session, path); + return (ret); +} + +/* + * __wt_fs_remove -- + * POSIX remove. + */ +static inline int +__wt_fs_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_DECL_RET; + WT_FILE_SYSTEM *file_system; + WT_SESSION *wt_session; + char *path; + + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-remove", name)); + +#ifdef HAVE_DIAGNOSTIC + /* + * It is a layering violation to retrieve a WT_FH here, but it is a + * useful diagnostic to ensure WiredTiger doesn't have the handle open. + */ + if (__wt_handle_is_open(session, name)) + WT_RET_MSG(session, EINVAL, + "%s: file-remove: file has open handles", name); +#endif + + WT_RET(__wt_filename(session, name, &path)); + + file_system = S2C(session)->file_system; + wt_session = (WT_SESSION *)session; + ret = file_system->fs_remove(file_system, wt_session, path); + + __wt_free(session, path); + return (ret); +} + +/* + * __wt_fs_rename -- + * POSIX rename. + */ +static inline int +__wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_DECL_RET; + WT_FILE_SYSTEM *file_system; + WT_SESSION *wt_session; + char *from_path, *to_path; + + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + WT_RET(__wt_verbose( + session, WT_VERB_FILEOPS, "%s to %s: file-rename", from, to)); + +#ifdef HAVE_DIAGNOSTIC + /* + * It is a layering violation to retrieve a WT_FH here, but it is a + * useful diagnostic to ensure WiredTiger doesn't have the handle open. + */ + if (__wt_handle_is_open(session, from)) + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", from); + if (__wt_handle_is_open(session, to)) + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", to); +#endif + + from_path = to_path = NULL; + WT_ERR(__wt_filename(session, from, &from_path)); + WT_ERR(__wt_filename(session, to, &to_path)); + + file_system = S2C(session)->file_system; + wt_session = (WT_SESSION *)session; + ret = file_system->fs_rename( + file_system, wt_session, from_path, to_path); + +err: __wt_free(session, from_path); + __wt_free(session, to_path); + return (ret); +} + +/* + * __wt_fs_size -- + * Get the size of a file in bytes, by file name. + */ +static inline int +__wt_fs_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) +{ + WT_DECL_RET; + WT_FILE_SYSTEM *file_system; + WT_SESSION *wt_session; + char *path; + + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-size", name)); + + WT_RET(__wt_filename(session, name, &path)); + + file_system = S2C(session)->file_system; + wt_session = (WT_SESSION *)session; + ret = file_system->fs_size(file_system, wt_session, path, sizep); + + __wt_free(session, path); + return (ret); +} diff --git a/src/third_party/wiredtiger/src/include/os_fstream.i b/src/third_party/wiredtiger/src/include/os_fstream.i new file mode 100644 index 00000000000..8c0fdadbdb0 --- /dev/null +++ b/src/third_party/wiredtiger/src/include/os_fstream.i @@ -0,0 +1,97 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +/* + * __wt_getline -- + * Get a line from a stream. + */ +static inline int +__wt_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf) +{ + return (fstr->fstr_getline(session, fstr, buf)); +} + +/* + * __wt_fclose -- + * Close a stream. + */ +static inline int +__wt_fclose(WT_SESSION_IMPL *session, WT_FSTREAM **fstrp) +{ + WT_FSTREAM *fstr; + + if ((fstr = *fstrp) == NULL) + return (0); + *fstrp = NULL; + return (fstr->close(session, fstr)); +} + +/* + * __wt_fflush -- + * Flush a stream. + */ +static inline int +__wt_fflush(WT_SESSION_IMPL *session, WT_FSTREAM *fstr) +{ + return (fstr->fstr_flush(session, fstr)); +} + +/* + * __wt_vfprintf -- + * ANSI C vfprintf. + */ +static inline int +__wt_vfprintf( + WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap) +{ + WT_RET(__wt_verbose( + session, WT_VERB_HANDLEOPS, "%s: handle-printf", fstr->name)); + + return (fstr->fstr_printf(session, fstr, fmt, ap)); +} + +/* + * __wt_fprintf -- + * ANSI C fprintf. + */ +static inline int +__wt_fprintf(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) +{ + WT_DECL_RET; + va_list ap; + + va_start(ap, fmt); + ret = __wt_vfprintf(session, fstr, fmt, ap); + va_end(ap); + + return (ret); +} + +/* + * __wt_sync_and_rename -- + * Flush and close a stream, then swap it into place. + */ +static inline int +__wt_sync_and_rename(WT_SESSION_IMPL *session, + WT_FSTREAM **fstrp, const char *from, const char *to) +{ + WT_DECL_RET; + WT_FSTREAM *fstr; + + fstr = *fstrp; + *fstrp = NULL; + + /* Flush to disk and close the handle. */ + WT_TRET(__wt_fflush(session, fstr)); + WT_TRET(__wt_fsync(session, fstr->fh, true)); + WT_TRET(__wt_fclose(session, &fstr)); + WT_RET(ret); + + return (__wt_rename_and_sync_directory(session, from, to)); +} diff --git a/src/third_party/wiredtiger/src/include/packing.i b/src/third_party/wiredtiger/src/include/packing.i index 35b2ddc43db..d662c60d221 100644 --- a/src/third_party/wiredtiger/src/include/packing.i +++ b/src/third_party/wiredtiger/src/include/packing.i @@ -138,7 +138,7 @@ __pack_next(WT_PACK *pack, WT_PACK_VALUE *pv) next: if (pack->cur == pack->end) return (WT_NOTFOUND); - if (isdigit(*pack->cur)) { + if (__wt_isdigit((u_char)*pack->cur)) { pv->havesize = 1; pv->size = WT_STORE_SIZE(strtoul(pack->cur, &endsize, 10)); pack->cur = endsize; @@ -260,6 +260,8 @@ __pack_size(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv) return (pv->size); case 'j': case 'J': + case 'K': + /* These formats are only used internally. */ if (pv->type == 'j' || pv->havesize) s = pv->size; else { @@ -269,7 +271,7 @@ __pack_size(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv) len = __wt_json_strlen(pv->u.item.data, pv->u.item.size); WT_ASSERT(session, len >= 0); - s = (size_t)len + 1; + s = (size_t)len + (pv->type == 'K' ? 0 : 1); } return (s); case 's': @@ -357,18 +359,22 @@ __pack_write( break; case 'j': case 'J': + case 'K': + /* These formats are only used internally. */ s = pv->u.item.size; if ((pv->type == 'j' || pv->havesize) && pv->size < s) { s = pv->size; pad = 0; } else if (pv->havesize) pad = pv->size - s; + else if (pv->type == 'K') + pad = 0; else pad = 1; if (s > 0) { oldp = *pp; - WT_RET(__wt_json_strncpy((char **)pp, maxlen, - pv->u.item.data, s)); + WT_RET(__wt_json_strncpy((WT_SESSION *)session, + (char **)pp, maxlen, pv->u.item.data, s)); maxlen -= (size_t)(*pp - oldp); } if (pad > 0) { @@ -534,7 +540,7 @@ __unpack_read(WT_SESSION_IMPL *session, break; case 'R': WT_SIZE_CHECK_UNPACK(sizeof(uint64_t), maxlen); - pv->u.u = *(uint64_t *)*pp; + pv->u.u = *(const uint64_t *)*pp; *pp += sizeof(uint64_t); break; default: diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index 7fdb7fc2548..aa51dae58c4 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -198,7 +198,7 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl { ((s)->hazard == NULL) /* The number of hazard pointers grows dynamically. */ -#define WT_HAZARD_INCR 10 +#define WT_HAZARD_INCR 1 uint32_t hazard_size; /* Allocated slots in hazard array. */ uint32_t nhazard; /* Count of active hazard pointers */ WT_HAZARD *hazard; /* Hazard pointer array */ diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index e728b634c6e..57126af8aa4 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -144,10 +144,16 @@ __wt_stats_clear(void *stats_arg, int slot) #define WT_STAT_DECRV(session, stats, fld, value) \ (stats)[WT_STATS_SLOT_ID(session)]->fld -= (int64_t)(value) +#define WT_STAT_DECRV_ATOMIC(session, stats, fld, value) \ + __wt_atomic_addi64( \ + &(stats)[WT_STATS_SLOT_ID(session)]->fld, (int64_t)(value)) #define WT_STAT_DECR(session, stats, fld) \ WT_STAT_DECRV(session, stats, fld, 1) #define WT_STAT_INCRV(session, stats, fld, value) \ (stats)[WT_STATS_SLOT_ID(session)]->fld += (int64_t)(value) +#define WT_STAT_INCRV_ATOMIC(session, stats, fld, value) \ + __wt_atomic_subi64( \ + &(stats)[WT_STATS_SLOT_ID(session)]->fld, (int64_t)(value)) #define WT_STAT_INCR(session, stats, fld) \ WT_STAT_INCRV(session, stats, fld, 1) #define WT_STAT_SET(session, stats, fld, value) do { \ @@ -164,12 +170,20 @@ __wt_stats_clear(void *stats_arg, int slot) } while (0) #define WT_STAT_FAST_DECR(session, stats, fld) \ WT_STAT_FAST_DECRV(session, stats, fld, 1) +#define WT_STAT_FAST_DECRV_ATOMIC(session, stats, fld, value) do { \ + if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \ + WT_STAT_DECRV_ATOMIC(session, stats, fld, value); \ +} while (0) #define WT_STAT_FAST_INCRV(session, stats, fld, value) do { \ if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \ WT_STAT_INCRV(session, stats, fld, value); \ } while (0) #define WT_STAT_FAST_INCR(session, stats, fld) \ WT_STAT_FAST_INCRV(session, stats, fld, 1) +#define WT_STAT_FAST_INCRV_ATOMIC(session, stats, fld, value) do { \ + if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \ + WT_STAT_INCRV_ATOMIC(session, stats, fld, value); \ +} while (0) #define WT_STAT_FAST_SET(session, stats, fld, value) do { \ if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \ WT_STAT_SET(session, stats, fld, value); \ @@ -180,10 +194,14 @@ __wt_stats_clear(void *stats_arg, int slot) */ #define WT_STAT_FAST_CONN_DECR(session, fld) \ WT_STAT_FAST_DECR(session, S2C(session)->stats, fld) +#define WT_STAT_FAST_CONN_DECR_ATOMIC(session, fld) \ + WT_STAT_FAST_DECRV_ATOMIC(session, S2C(session)->stats, fld, 1) #define WT_STAT_FAST_CONN_DECRV(session, fld, value) \ WT_STAT_FAST_DECRV(session, S2C(session)->stats, fld, value) #define WT_STAT_FAST_CONN_INCR(session, fld) \ WT_STAT_FAST_INCR(session, S2C(session)->stats, fld) +#define WT_STAT_FAST_CONN_INCR_ATOMIC(session, fld) \ + WT_STAT_FAST_INCRV_ATOMIC(session, S2C(session)->stats, fld, 1) #define WT_STAT_FAST_CONN_INCRV(session, fld, value) \ WT_STAT_FAST_INCRV(session, S2C(session)->stats, fld, value) #define WT_STAT_FAST_CONN_SET(session, fld, value) \ @@ -261,17 +279,25 @@ struct __wt_connection_stats { int64_t cache_bytes_read; int64_t cache_bytes_write; int64_t cache_eviction_checkpoint; + int64_t cache_eviction_get_ref; + int64_t cache_eviction_get_ref_empty; + int64_t cache_eviction_get_ref_empty2; int64_t cache_eviction_aggressive_set; int64_t cache_eviction_queue_empty; int64_t cache_eviction_queue_not_empty; int64_t cache_eviction_server_evicting; int64_t cache_eviction_server_not_evicting; + int64_t cache_eviction_server_toobig; + int64_t cache_eviction_server_slept; int64_t cache_eviction_slow; int64_t cache_eviction_worker_evicting; int64_t cache_eviction_force_fail; int64_t cache_eviction_walks_active; int64_t cache_eviction_walks_started; int64_t cache_eviction_hazard; + int64_t cache_hazard_checks; + int64_t cache_hazard_walks; + int64_t cache_hazard_max; int64_t cache_inmem_splittable; int64_t cache_inmem_split; int64_t cache_eviction_internal; @@ -293,6 +319,7 @@ struct __wt_connection_stats { int64_t cache_eviction_pages_queued_oldest; int64_t cache_read; int64_t cache_read_lookaside; + int64_t cache_pages_requested; int64_t cache_eviction_pages_seen; int64_t cache_eviction_fail; int64_t cache_eviction_walk; @@ -314,6 +341,7 @@ struct __wt_connection_stats { int64_t cond_wait; int64_t rwlock_read; int64_t rwlock_write; + int64_t fsync_io; int64_t read_io; int64_t write_io; int64_t cursor_create; @@ -356,7 +384,9 @@ struct __wt_connection_stats { int64_t log_write_lsn; int64_t log_write_lsn_skip; int64_t log_sync; + int64_t log_sync_duration; int64_t log_sync_dir; + int64_t log_sync_dir_duration; int64_t log_writes; int64_t log_slot_consolidated; int64_t log_max_filesize; @@ -378,6 +408,9 @@ struct __wt_connection_stats { int64_t rec_split_stashed_objects; int64_t session_cursor_open; int64_t session_open; + int64_t fsync_active; + int64_t read_active; + int64_t write_active; int64_t page_busy_blocked; int64_t page_forcible_evict_blocked; int64_t page_locked_blocked; @@ -394,6 +427,10 @@ struct __wt_connection_stats { int64_t txn_checkpoint_time_total; int64_t txn_checkpoint; int64_t txn_fail_cache; + int64_t txn_checkpoint_fsync_post; + int64_t txn_checkpoint_fsync_pre; + int64_t txn_checkpoint_fsync_post_duration; + int64_t txn_checkpoint_fsync_pre_duration; int64_t txn_pinned_range; int64_t txn_pinned_checkpoint_range; int64_t txn_pinned_snapshot_range; @@ -447,7 +484,6 @@ struct __wt_dsrc_stats { int64_t btree_compact_rewrite; int64_t btree_row_internal; int64_t btree_row_leaf; - int64_t cache_bytes_inuse; int64_t cache_bytes_read; int64_t cache_bytes_write; int64_t cache_eviction_checkpoint; @@ -465,6 +501,7 @@ struct __wt_dsrc_stats { int64_t cache_write_lookaside; int64_t cache_read; int64_t cache_read_lookaside; + int64_t cache_pages_requested; int64_t cache_write; int64_t cache_write_restore; int64_t cache_eviction_clean; @@ -514,9 +551,11 @@ struct __wt_dsrc_stats { */ #define WT_JOIN_STATS_BASE 3000 struct __wt_join_stats { - int64_t accesses; - int64_t actual_count; + int64_t main_access; int64_t bloom_false_positive; + int64_t membership_check; + int64_t bloom_insert; + int64_t iterated; }; /* Statistics section: END */ diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 4f422af32d4..f578f4e6c08 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -71,6 +71,8 @@ struct __wt_encryptor; typedef struct __wt_encryptor WT_ENCRYPTOR; struct __wt_event_handler; typedef struct __wt_event_handler WT_EVENT_HANDLER; struct __wt_extension_api; typedef struct __wt_extension_api WT_EXTENSION_API; struct __wt_extractor; typedef struct __wt_extractor WT_EXTRACTOR; +struct __wt_file_handle; typedef struct __wt_file_handle WT_FILE_HANDLE; +struct __wt_file_system; typedef struct __wt_file_system WT_FILE_SYSTEM; struct __wt_item; typedef struct __wt_item WT_ITEM; struct __wt_session; typedef struct __wt_session WT_SESSION; @@ -421,6 +423,9 @@ struct __wt_cursor { * @errors * In particular, if \c overwrite is not configured and a record with * the specified key already exists, ::WT_DUPLICATE_KEY is returned. + * Also, if \c in_memory is configured for the database and the insert + * requires more than the configured cache size to complete, + * ::WT_CACHE_FULL is returned. */ int __F(insert)(WT_CURSOR *cursor); @@ -451,6 +456,9 @@ struct __wt_cursor { * @errors * In particular, if \c overwrite is not configured and no record with * the specified key exists, ::WT_NOTFOUND is returned. + * Also, if \c in_memory is configured for the database and the insert + * requires more than the configured cache size to complete, + * ::WT_CACHE_FULL is returned. */ int __F(update)(WT_CURSOR *cursor); @@ -1238,18 +1246,21 @@ struct __wt_session { * @param join_cursor a cursor that was opened using a * \c "join:" URI. It may not have been used for any operations * other than other join calls. - * @param ref_cursor either an index cursor having the same base table - * as the join_cursor, or a table cursor open on the same base table. - * The ref_cursor must be positioned. + * @param ref_cursor an index cursor having the same base table + * as the join_cursor, or a table cursor open on the same base table, + * or another join cursor. Unless the ref_cursor is another join + * cursor, it must be positioned. * * The ref_cursor limits the results seen by iterating the * join_cursor to table items referred to by the key in this * index. The set of keys referred to is modified by the compare * config option. * - * Multiple join calls builds up a set of ref_cursors, and the - * results seen by iteration are the intersection of the cursor - * ranges participating in the join. + * Multiple join calls builds up a set of ref_cursors, and + * by default, the results seen by iteration are the intersection + * of the cursor ranges participating in the join. When configured + * with \c "operation=or", the results seen are the union of + * the participating cursor ranges. * * After the join call completes, the ref_cursor cursor may not be * used for any purpose other than get_key and get_value. Any other @@ -1272,6 +1283,13 @@ struct __wt_session { * also influences evaluation order for cursors in the join. When the * count is equal for multiple bloom filters in a composition of joins\, * the bloom filter may be shared., an integer; default \c .} + * @config{operation, the operation applied between this and other + * joined cursors. When "operation=and" is specified\, all the + * conditions implied by joins must be satisfied for an entry to be + * returned by the join cursor; when "operation=or" is specified\, only + * one must be satisfied. All cursors joined to a join cursor must have + * matching operations., a string\, chosen from the following options: + * \c "and"\, \c "or"; default \c "and".} * @config{strategy, when set to bloom\, a bloom filter is created and * populated for this index. This has an up front cost but may reduce * the number of accesses to the main table when iterating the joined @@ -1448,15 +1466,16 @@ struct __wt_session { * @config{dump_blocks, Display the contents of on-disk blocks as they * are verified\, using the application's message handler\, intended for * debugging., a boolean flag; default \c false.} + * @config{dump_layout, Display the layout of the files as they are + * verified\, using the application's message handler\, intended for + * debugging; requires optional support from the block manager., a + * boolean flag; default \c false.} * @config{dump_offsets, Display the contents of specific on-disk * blocks\, using the application's message handler\, intended for * debugging., a list of strings; default empty.} * @config{dump_pages, Display the contents of in-memory pages as they * are verified\, using the application's message handler\, intended for * debugging., a boolean flag; default \c false.} - * @config{dump_shape, Display the shape of the tree after - * verification\, using the application's message handler\, intended for - * debugging., a boolean flag; default \c false.} * @config{strict, Treat any verification problem as an error; by * default\, verify will warn\, but not fail\, in the case of errors * that won't affect future behavior (for example\, a leaked block)., a @@ -1830,7 +1849,7 @@ struct __wt_connection { * @config{ path, the path to a directory into * which the log files are written. If the value is not an absolute * path name\, the files are created relative to the database home., a - * string; default empty.} + * string; default \c ".".} * @config{ prealloc, * pre-allocate log files., a boolean flag; default \c true.} * @config{ recover, run recovery or error if @@ -2015,6 +2034,10 @@ struct __wt_connection { * @configstart{WT_CONNECTION.load_extension, see dist/api_data.py} * @config{config, configuration string passed to the entry point of the * extension as its WT_CONFIG_ARG argument., a string; default empty.} + * @config{early_load, whether this extension should be loaded at the + * beginning of ::wiredtiger_open. Only applicable to extensions loaded + * via the wiredtiger_open configurations string., a boolean flag; + * default \c false.} * @config{entry, the entry point of the extension\, called to * initialize the extension when it is loaded. The signature of the * function must match ::wiredtiger_extension_init., a string; default @@ -2126,6 +2149,23 @@ struct __wt_connection { WT_EXTRACTOR *extractor, const char *config); /*! + * Configure a custom file system. + * + * This method can only be called from an early loaded extension + * module. The application must first implement the WT_FILE_SYSTEM + * interface and then register the implementation with WiredTiger: + * + * @snippet ex_file_system.c WT_FILE_SYSTEM register + * + * @param connection the connection handle + * @param fs the populated file system structure + * @configempty{WT_CONNECTION.set_file_system, see dist/api_data.py} + * @errors + */ + int __F(set_file_system)( + WT_CONNECTION *connection, WT_FILE_SYSTEM *fs, const char *config); + + /*! * Return a reference to the WiredTiger extension functions. * * @snippet ex_data_source.c WT_EXTENSION_API declaration @@ -2286,6 +2326,8 @@ struct __wt_connection { * @config{ ),,} * @config{hazard_max, maximum number of simultaneous hazard pointers per * session handle., an integer greater than or equal to 15; default \c 1000.} + * @config{in_memory, keep data in-memory only. See @ref in_memory for more + * information., a boolean flag; default \c false.} * @config{log = (, enable logging. Enabling logging uses three sessions from * the configured session_max., a set of related configuration options defined * below.} @@ -2303,7 +2345,7 @@ struct __wt_connection { * integer between 100KB and 2GB; default \c 100MB.} * @config{ path, the path to a directory into which the * log files are written. If the value is not an absolute path name\, the files - * are created relative to the database home., a string; default empty.} + * are created relative to the database home., a string; default \c ".".} * @config{ prealloc, pre-allocate log files., a boolean * flag; default \c true.} * @config{ recover, run recovery @@ -3000,19 +3042,15 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp); * if recovery is required to use the database. */ #define WT_RUN_RECOVERY -31806 -/*! @cond internal */ /*! * Operation would overflow cache. - * This error is generated when wiredtiger_open is configured to run in-memory, - * and an insert or update operation requires more than the configured cache - * size to complete. + * This error is only generated when wiredtiger_open is configured to run in- + * memory, and an insert or update operation requires more than the configured + * cache size to complete. The operation may be retried; if a transaction is in + * progress, it should be rolled back and the operation retried in a new + * transaction. */ #define WT_CACHE_FULL -31807 -/*! @endcond */ -/*! @cond internal */ -/*! Permission denied (internal). */ -#define WT_PERM_DENIED -31808 -/*! @endcond */ /* * Error return section: END * DO NOT EDIT: automatically built by dist/api_err.py. @@ -3035,7 +3073,7 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp); /******************************************* * Forward structure declarations for the extension API *******************************************/ -struct __wt_config_arg; typedef struct __wt_config_arg WT_CONFIG_ARG; +struct __wt_config_arg; typedef struct __wt_config_arg WT_CONFIG_ARG; /*! * The interface implemented by applications to provide custom ordering of @@ -3566,7 +3604,7 @@ struct __wt_encryptor { * number of bytes needed. * * @param[out] expansion_constantp the additional number of bytes needed - * when encrypting. + * when encrypting. * @returns zero for success, non-zero to indicate an error. * * @snippet nop_encrypt.c WT_ENCRYPTOR sizing @@ -3585,8 +3623,7 @@ struct __wt_encryptor { * is used instead of this one for any callbacks. * * @param[in] encrypt_config the "encryption" portion of the - * configuration from the wiredtiger_open or - * WT_SESSION::create call + * configuration from the wiredtiger_open or WT_SESSION::create call * @param[out] customp the new modified encryptor, or NULL. * @returns zero for success, non-zero to indicate an error. */ @@ -3661,6 +3698,466 @@ struct __wt_extractor { int (*terminate)(WT_EXTRACTOR *extractor, WT_SESSION *session); }; +#if !defined(SWIG) +/*! WT_FILE_SYSTEM::open_file file types */ +typedef enum { + WT_OPEN_FILE_TYPE_CHECKPOINT, /*!< open a data file checkpoint */ + WT_OPEN_FILE_TYPE_DATA, /*!< open a data file */ + WT_OPEN_FILE_TYPE_DIRECTORY, /*!< open a directory */ + WT_OPEN_FILE_TYPE_LOG, /*!< open a log file */ + WT_OPEN_FILE_TYPE_REGULAR /*!< open a regular file */ +} WT_OPEN_FILE_TYPE; + +/*! WT_FILE_SYSTEM::open_file flags: create if does not exist */ +#define WT_OPEN_CREATE 0x001 +/*! WT_FILE_SYSTEM::open_file flags: direct I/O requested */ +#define WT_OPEN_DIRECTIO 0x002 +/*! WT_FILE_SYSTEM::open_file flags: error if exclusive use not available */ +#define WT_OPEN_EXCLUSIVE 0x004 +#ifndef DOXYGEN +#define WT_OPEN_FIXED 0x008 /* Path not home relative (internal) */ +#endif +/*! WT_FILE_SYSTEM::open_file flags: open is read-only */ +#define WT_OPEN_READONLY 0x010 + +/*! + * The interface implemented by applications to provide a custom file system + * implementation. + * + * <b>Thread safety:</b> WiredTiger may invoke methods on the WT_FILE_SYSTEM + * interface from multiple threads concurrently. It is the responsibility of + * the implementation to protect any shared data. + * + * Applications register implementations with WiredTiger by calling + * WT_CONNECTION::add_file_system. See @ref custom_file_systems for more + * information. + * + * @snippet ex_file_system.c WT_FILE_SYSTEM register + */ +struct __wt_file_system { + /*! + * Return a list of file names for the named directory. + * + * @errors + * + * @param file_system the WT_FILE_SYSTEM + * @param session the current WiredTiger session + * @param directory the name of the directory + * @param prefix if not NULL, only files with names matching the prefix + * are returned + * @param[out] dirlist the method returns an allocated array of + * individually allocated strings, one for each entry in the + * directory. + * @param[out] countp the method the number of entries returned + */ + int (*fs_directory_list)(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *directory, const char *prefix, + char ***dirlist, uint32_t *countp); + + /*! + * Free memory allocated by WT_FILE_SYSTEM::directory_list. + * + * @errors + * + * @param file_system the WT_FILE_SYSTEM + * @param session the current WiredTiger session + * @param dirlist array returned by WT_FILE_SYSTEM::directory_list + * @param count count returned by WT_FILE_SYSTEM::directory_list + */ + int (*fs_directory_list_free)(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, char **dirlist, uint32_t count); + + /*! + * Flush the named directory. + * + * This method is not required for readonly file systems or file systems + * where it is not necessary to flush a file's directory to ensure the + * durability of file system operations, and should be set to NULL when + * not required by the file system. + * + * @errors + * + * @param file_system the WT_FILE_SYSTEM + * @param session the current WiredTiger session + * @param directory the name of the directory + */ + int (*fs_directory_sync)(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *directory); + + /*! + * Return if the named file system object exists. + * + * @errors + * + * @param file_system the WT_FILE_SYSTEM + * @param session the current WiredTiger session + * @param name the name of the file + * @param[out] existp If the named file system object exists + */ + int (*fs_exist)(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *name, bool *existp); + + /*! + * Open a handle for a named file system object + * + * @errors + * + * @param file_system the WT_FILE_SYSTEM + * @param session the current WiredTiger session + * @param name the name of the file system object + * @param file_type the type of the file + * The file type is provided to allow optimization for different file + * access patterns. + * @param flags flags indicating how to open the file, one or more of + * ::WT_OPEN_CREATE, ::WT_OPEN_DIRECTIO, ::WT_OPEN_EXCLUSIVE or + * ::WT_OPEN_READONLY. + * @param[out] file_handlep the handle to the newly opened file. File + * system implementations must allocate memory for the handle and + * the WT_FILE_HANDLE::name field, and fill in the WT_FILE_HANDLE:: + * fields. Applications wanting to associate private information + * with the WT_FILE_HANDLE:: structure should declare and allocate + * their own structure as a superset of a WT_FILE_HANDLE:: structure. + */ + int (*fs_open_file)(WT_FILE_SYSTEM *file_system, WT_SESSION *session, + const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + WT_FILE_HANDLE **file_handlep); + + /*! + * Remove a named file system object + * + * This method is not required for readonly file systems and should be + * set to NULL when not required by the file system. + * + * @errors + * + * @param file_system the WT_FILE_SYSTEM + * @param session the current WiredTiger session + * @param name the name of the file system object + */ + int (*fs_remove)( + WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name); + + /*! + * Rename a named file system object + * + * This method is not required for readonly file systems and should be + * set to NULL when not required by the file system. + * + * @errors + * + * @param file_system the WT_FILE_SYSTEM + * @param session the current WiredTiger session + * @param from the original name of the object + * @param to the new name for the object + */ + int (*fs_rename)(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *from, const char *to); + + /*! + * Return the size of a named file system object + * + * @errors + * + * @param file_system the WT_FILE_SYSTEM + * @param session the current WiredTiger session + * @param name the name of the file system object + * @param[out] sizep the size of the file system entry + */ + int (*fs_size)(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *name, wt_off_t *sizep); + + /*! + * A callback performed when the file system is closed and will no + * longer be accessed by the WiredTiger database. + * + * This method is not required and should be set to NULL when not + * required by the file system. + * + * The WT_FILE_SYSTEM::terminate callback is intended to allow cleanup, + * the handle will not be subsequently accessed by WiredTiger. + */ + int (*terminate)(WT_FILE_SYSTEM *file_system, WT_SESSION *session); +}; + +/*! WT_FILE_HANDLE::fadvise flags: no longer need */ +#define WT_FILE_HANDLE_DONTNEED 1 +/*! WT_FILE_HANDLE::fadvise flags: will need */ +#define WT_FILE_HANDLE_WILLNEED 2 + +/*! + * A file handle implementation returned by WT_FILE_SYSTEM::open_file. + * + * <b>Thread safety:</b> Unless explicitly stated otherwise, WiredTiger may + * invoke methods on the WT_FILE_HANDLE interface from multiple threads + * concurrently. It is the responsibility of the implementation to protect + * any shared data. + * + * See @ref custom_file_systems for more information. + */ +struct __wt_file_handle { + /*! + * The enclosing file system, set by WT_FILE_SYSTEM::open_file. + */ + WT_FILE_SYSTEM *file_system; + + /*! + * The name of the file, set by WT_FILE_SYSTEM::open_file. + */ + char *name; + + /*! + * Close a file handle, the handle will not be further accessed by + * WiredTiger. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + */ + int (*close)(WT_FILE_HANDLE *file_handle, WT_SESSION *session); + + /*! + * Indicate expected future use of file ranges, based on the POSIX + * 1003.1 standard fadvise. + * + * This method is not required, and should be set to NULL when not + * supported by the file. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + * @param offset the file offset + * @param len the size of the advisory + * @param advice one of ::WT_FILE_HANDLE_WILLNEED or + * ::WT_FILE_HANDLE_DONTNEED. + */ + int (*fh_advise)(WT_FILE_HANDLE *file_handle, + WT_SESSION *session, wt_off_t offset, wt_off_t len, int advice); + + /*! + * Ensure disk space is allocated for the file, based on the POSIX + * 1003.1 standard fallocate. + * + * This method is not required, and should be set to NULL when not + * supported by the file. + * + * This method is not called by multiple threads concurrently (on the + * same file handle). If the file handle's fallocate method supports + * concurrent calls, set the WT_FILE_HANDLE::fallocate_nolock method + * instead. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + * @param offset the file offset + * @param len the size of the advisory + */ + int (*fh_allocate)(WT_FILE_HANDLE *file_handle, + WT_SESSION *session, wt_off_t, wt_off_t); + + /*! + * Ensure disk space is allocated for the file, based on the POSIX + * 1003.1 standard fallocate. + * + * This method is not required, and should be set to NULL when not + * supported by the file. + * + * This method may be called by multiple threads concurrently (on the + * same file handle). If the file handle's fallocate method does not + * support concurrent calls, set the WT_FILE_HANDLE::fallocate method + * instead. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + * @param offset the file offset + * @param len the size of the advisory + */ + int (*fh_allocate_nolock)(WT_FILE_HANDLE *file_handle, + WT_SESSION *session, wt_off_t, wt_off_t); + + /*! + * Lock/unlock a file from the perspective of other processes running + * in the system. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + * @param lock whether to lock or unlock + */ + int (*fh_lock)( + WT_FILE_HANDLE *file_handle, WT_SESSION *session, bool lock); + + /*! + * Map a file into memory, based on the POSIX 1003.1 standard mmap. + * + * This method is not required, and should be set to NULL when not + * supported by the file. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + * @param[out] mapped_regionp a reference to a memory location into + * which should be stored a pointer to the start of the mapped region + * @param[out] lengthp a reference to a memory location into which + * should be stored the length of the region + * @param[out] mapped_cookiep a reference to a memory location into + * which can be optionally stored a pointer to an opaque cookie + * which is subsequently passed to WT_FILE_HANDLE::unmap. + */ + int (*fh_map)(WT_FILE_HANDLE *file_handle, WT_SESSION *session, + void *mapped_regionp, size_t *lengthp, void *mapped_cookiep); + + /*! + * Unmap part of a memory mapped file, based on the POSIX 1003.1 + * standard madvise. + * + * This method is not required, and should be set to NULL when not + * supported by the file. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + * @param map a location in the mapped region unlikely to be used in the + * near future + * @param length the length of the mapped region to discard + * @param mapped_cookie any cookie set by the WT_FILE_HANDLE::map method + */ + int (*fh_map_discard)(WT_FILE_HANDLE *file_handle, + WT_SESSION *session, void *map, size_t length, void *mapped_cookie); + + /*! + * Preload part of a memory mapped file, based on the POSIX 1003.1 + * standard madvise. + * + * This method is not required, and should be set to NULL when not + * supported by the file. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + * @param map a location in the mapped region likely to be used in the + * near future + * @param length the size of the mapped region to preload + * @param mapped_cookie any cookie set by the WT_FILE_HANDLE::map method + */ + int (*fh_map_preload)(WT_FILE_HANDLE *file_handle, WT_SESSION *session, + const void *map, size_t length, void *mapped_cookie); + + /*! + * Unmap a memory mapped file, based on the POSIX 1003.1 standard + * munmap. + * + * This method is only required if a valid implementation of map is + * provided by the file, and should be set to NULL otherwise. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + * @param mapped_region a pointer to the start of the mapped region + * @param length the length of the mapped region + * @param mapped_cookie any cookie set by the WT_FILE_HANDLE::map method + */ + int (*fh_unmap)(WT_FILE_HANDLE *file_handle, WT_SESSION *session, + void *mapped_region, size_t length, void *mapped_cookie); + + /*! + * Read from a file, based on the POSIX 1003.1 standard pread. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + * @param offset the offset in the file to start reading from + * @param len the amount to read + * @param[out] buf buffer to hold the content read from file + */ + int (*fh_read)(WT_FILE_HANDLE *file_handle, + WT_SESSION *session, wt_off_t offset, size_t len, void *buf); + + /*! + * Return the size of a file. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + * @param sizep the size of the file + */ + int (*fh_size)( + WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t *sizep); + + /*! + * Make outstanding file writes durable and do not return until writes + * are complete. + * + * This method is not required for read-only files, and should be set + * to NULL when not supported by the file. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + */ + int (*fh_sync)(WT_FILE_HANDLE *file_handle, WT_SESSION *session); + + /*! + * Schedule the outstanding file writes required for durability and + * return immediately. + * + * This method is not required, and should be set to NULL when not + * supported by the file. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + */ + int (*fh_sync_nowait)(WT_FILE_HANDLE *file_handle, WT_SESSION *session); + + /*! + * Lengthen or shorten a file to the specified length, based on the + * POSIX 1003.1 standard ftruncate. + * + * This method is not required for read-only files, and should be set + * to NULL when not supported by the file. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + * @param length desired file size after truncate + */ + int (*fh_truncate)( + WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t length); + + /*! + * Write to a file, based on the POSIX 1003.1 standard pwrite. + * + * This method is not required for read-only files, and should be set + * to NULL when not supported by the file. + * + * @errors + * + * @param file_handle the WT_FILE_HANDLE + * @param session the current WiredTiger session + * @param offset offset at which to start writing + * @param length amount of data to write + * @param buf content to be written to the file + */ + int (*fh_write)(WT_FILE_HANDLE *file_handle, WT_SESSION *session, + wt_off_t offset, size_t length, const void *buf); +}; +#endif /* !defined(SWIG) */ + /*! * Entry point to an extension, called when the extension is loaded. * @@ -3771,285 +4268,328 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_BYTES_WRITE 1032 /*! cache: checkpoint blocked page eviction */ #define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1033 +/*! cache: eviction calls to get a page */ +#define WT_STAT_CONN_CACHE_EVICTION_GET_REF 1034 +/*! cache: eviction calls to get a page found queue empty */ +#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY 1035 +/*! cache: eviction calls to get a page found queue empty after locking */ +#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2 1036 /*! cache: eviction currently operating in aggressive mode */ -#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1034 +#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1037 /*! cache: eviction server candidate queue empty when topping up */ -#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1035 +#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1038 /*! cache: eviction server candidate queue not empty when topping up */ -#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1036 +#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1039 /*! cache: eviction server evicting pages */ -#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1037 +#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1040 /*! cache: eviction server populating queue, but not evicting pages */ -#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1038 +#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1041 +/*! cache: eviction server skipped very large page */ +#define WT_STAT_CONN_CACHE_EVICTION_SERVER_TOOBIG 1042 +/*! cache: eviction server slept, because we did not make progress with + * eviction */ +#define WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT 1043 /*! cache: eviction server unable to reach eviction goal */ -#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1039 +#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1044 /*! cache: eviction worker thread evicting pages */ -#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1040 +#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1045 /*! cache: failed eviction of pages that exceeded the in-memory maximum */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1041 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1046 /*! cache: files with active eviction walks */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1042 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1047 /*! cache: files with new eviction walks started */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1043 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1048 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1044 +#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1049 +/*! cache: hazard pointer check calls */ +#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1050 +/*! cache: hazard pointer check entries walked */ +#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1051 +/*! cache: hazard pointer maximum array length */ +#define WT_STAT_CONN_CACHE_HAZARD_MAX 1052 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1045 +#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1053 /*! cache: in-memory page splits */ -#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1046 +#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1054 /*! cache: internal pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1047 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1055 /*! cache: internal pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1048 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1056 /*! cache: leaf pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1049 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1057 /*! cache: lookaside table insert calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1050 +#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1058 /*! cache: lookaside table remove calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1051 +#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1059 /*! cache: maximum bytes configured */ -#define WT_STAT_CONN_CACHE_BYTES_MAX 1052 +#define WT_STAT_CONN_CACHE_BYTES_MAX 1060 /*! cache: maximum page size at eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1053 +#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1061 /*! cache: modified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1054 +#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1062 /*! cache: modified pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1055 +#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1063 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1056 +#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1064 /*! cache: page written requiring lookaside records */ -#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1057 +#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1065 /*! cache: pages currently held in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_INUSE 1058 +#define WT_STAT_CONN_CACHE_PAGES_INUSE 1066 /*! cache: pages evicted because they exceeded the in-memory maximum */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1059 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1067 /*! cache: pages evicted because they had chains of deleted items */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1060 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1068 /*! cache: pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP 1061 +#define WT_STAT_CONN_CACHE_EVICTION_APP 1069 /*! cache: pages queued for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1062 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1070 /*! cache: pages queued for urgent eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1063 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1071 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1064 +#define WT_STAT_CONN_CACHE_READ 1072 /*! cache: pages read into cache requiring lookaside entries */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1065 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1073 +/*! cache: pages requested from the cache */ +#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1074 /*! cache: pages seen by eviction walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1066 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1075 /*! cache: pages selected for eviction unable to be evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1067 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1076 /*! cache: pages walked for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK 1068 +#define WT_STAT_CONN_CACHE_EVICTION_WALK 1077 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1069 +#define WT_STAT_CONN_CACHE_WRITE 1078 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1070 +#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1079 /*! cache: percentage overhead */ -#define WT_STAT_CONN_CACHE_OVERHEAD 1071 +#define WT_STAT_CONN_CACHE_OVERHEAD 1080 /*! cache: tracked bytes belonging to internal pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1072 +#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1081 /*! cache: tracked bytes belonging to leaf pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_LEAF 1073 +#define WT_STAT_CONN_CACHE_BYTES_LEAF 1082 /*! cache: tracked bytes belonging to overflow pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_OVERFLOW 1074 +#define WT_STAT_CONN_CACHE_BYTES_OVERFLOW 1083 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1075 +#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1084 /*! cache: tracked dirty pages in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1076 +#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1085 /*! cache: unmodified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1077 +#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1086 /*! connection: auto adjusting condition resets */ -#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1078 +#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1087 /*! connection: auto adjusting condition wait calls */ -#define WT_STAT_CONN_COND_AUTO_WAIT 1079 +#define WT_STAT_CONN_COND_AUTO_WAIT 1088 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1080 +#define WT_STAT_CONN_FILE_OPEN 1089 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1081 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1090 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1082 +#define WT_STAT_CONN_MEMORY_FREE 1091 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1083 +#define WT_STAT_CONN_MEMORY_GROW 1092 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1084 +#define WT_STAT_CONN_COND_WAIT 1093 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1085 +#define WT_STAT_CONN_RWLOCK_READ 1094 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1086 +#define WT_STAT_CONN_RWLOCK_WRITE 1095 +/*! connection: total fsync I/Os */ +#define WT_STAT_CONN_FSYNC_IO 1096 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1087 +#define WT_STAT_CONN_READ_IO 1097 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1088 +#define WT_STAT_CONN_WRITE_IO 1098 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1089 +#define WT_STAT_CONN_CURSOR_CREATE 1099 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1090 +#define WT_STAT_CONN_CURSOR_INSERT 1100 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1091 +#define WT_STAT_CONN_CURSOR_NEXT 1101 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1092 +#define WT_STAT_CONN_CURSOR_PREV 1102 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1093 +#define WT_STAT_CONN_CURSOR_REMOVE 1103 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1094 +#define WT_STAT_CONN_CURSOR_RESET 1104 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1095 +#define WT_STAT_CONN_CURSOR_RESTART 1105 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1096 +#define WT_STAT_CONN_CURSOR_SEARCH 1106 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1097 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1107 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1098 +#define WT_STAT_CONN_CURSOR_UPDATE 1108 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1099 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1109 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1100 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1110 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1101 +#define WT_STAT_CONN_DH_SWEEP_REF 1111 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1102 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1112 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1103 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1113 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1104 +#define WT_STAT_CONN_DH_SWEEP_TOD 1114 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1105 +#define WT_STAT_CONN_DH_SWEEPS 1115 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1106 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1116 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1107 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1117 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1108 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1118 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1109 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1119 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1110 +#define WT_STAT_CONN_LOG_SLOT_RACES 1120 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1111 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1121 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1112 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1122 /*! log: consolidated slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1113 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1123 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1114 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1124 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1115 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1125 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1116 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1126 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1117 +#define WT_STAT_CONN_LOG_FLUSH 1127 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1118 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1128 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1119 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1129 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1120 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1130 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1121 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1131 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1122 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1132 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1123 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1133 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1124 +#define WT_STAT_CONN_LOG_SCANS 1134 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1125 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1135 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1126 +#define WT_STAT_CONN_LOG_WRITE_LSN 1136 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1127 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1137 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1128 +#define WT_STAT_CONN_LOG_SYNC 1138 +/*! log: log sync time duration (usecs) */ +#define WT_STAT_CONN_LOG_SYNC_DURATION 1139 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1129 +#define WT_STAT_CONN_LOG_SYNC_DIR 1140 +/*! log: log sync_dir time duration (usecs) */ +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1141 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1130 +#define WT_STAT_CONN_LOG_WRITES 1142 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1131 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1143 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1132 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1144 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1133 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1145 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1134 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1146 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1135 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1147 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1136 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1148 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1137 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1149 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1138 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1150 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1139 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1151 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1140 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1152 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1141 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1153 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1142 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1154 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1143 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1155 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1144 +#define WT_STAT_CONN_REC_PAGES 1156 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1145 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1157 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1146 +#define WT_STAT_CONN_REC_PAGE_DELETE 1158 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1147 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1159 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1148 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1160 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1149 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1161 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1150 +#define WT_STAT_CONN_SESSION_OPEN 1162 +/*! thread-state: active filesystem fsync calls */ +#define WT_STAT_CONN_FSYNC_ACTIVE 1163 +/*! thread-state: active filesystem read calls */ +#define WT_STAT_CONN_READ_ACTIVE 1164 +/*! thread-state: active filesystem write calls */ +#define WT_STAT_CONN_WRITE_ACTIVE 1165 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1151 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1166 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1152 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1167 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1153 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1168 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1154 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1169 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1155 +#define WT_STAT_CONN_PAGE_SLEEP 1170 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1156 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1171 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1157 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1172 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1158 +#define WT_STAT_CONN_TXN_BEGIN 1173 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1159 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1174 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1160 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1175 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1161 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1176 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1162 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1177 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1163 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1178 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1164 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1179 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1165 +#define WT_STAT_CONN_TXN_CHECKPOINT 1180 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1166 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1181 +/*! transaction: transaction fsync calls for checkpoint after allocating + * the transaction ID */ +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1182 +/*! transaction: transaction fsync calls for checkpoint before allocating + * the transaction ID */ +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_PRE 1183 +/*! transaction: transaction fsync duration for checkpoint after + * allocating the transaction ID (usecs) */ +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1184 +/*! transaction: transaction fsync duration for checkpoint before + * allocating the transaction ID (usecs) */ +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_PRE_DURATION 1185 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1167 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1186 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1168 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1187 /*! transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1169 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1188 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1170 +#define WT_STAT_CONN_TXN_SYNC 1189 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1171 +#define WT_STAT_CONN_TXN_COMMIT 1190 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1172 +#define WT_STAT_CONN_TXN_ROLLBACK 1191 /*! * @} @@ -4138,42 +4678,42 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2038 /*! btree: row-store leaf pages */ #define WT_STAT_DSRC_BTREE_ROW_LEAF 2039 -/*! cache: bytes currently in the cache */ -#define WT_STAT_DSRC_CACHE_BYTES_INUSE 2040 /*! cache: bytes read into cache */ -#define WT_STAT_DSRC_CACHE_BYTES_READ 2041 +#define WT_STAT_DSRC_CACHE_BYTES_READ 2040 /*! cache: bytes written from cache */ -#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2042 +#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2041 /*! cache: checkpoint blocked page eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2043 +#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2042 /*! cache: data source pages selected for eviction unable to be evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2044 +#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2043 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2045 +#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2044 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2046 +#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2045 /*! cache: in-memory page splits */ -#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2047 +#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2046 /*! cache: internal pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2048 +#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2047 /*! cache: internal pages split during eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2049 +#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2048 /*! cache: leaf pages split during eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2050 +#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2049 /*! cache: modified pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2051 +#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2050 /*! cache: overflow pages read into cache */ -#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2052 +#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2051 /*! cache: overflow values cached in memory */ -#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2053 +#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2052 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2054 +#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2053 /*! cache: page written requiring lookaside records */ -#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2055 +#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2054 /*! cache: pages read into cache */ -#define WT_STAT_DSRC_CACHE_READ 2056 +#define WT_STAT_DSRC_CACHE_READ 2055 /*! cache: pages read into cache requiring lookaside entries */ -#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2057 +#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2056 +/*! cache: pages requested from the cache */ +#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2057 /*! cache: pages written from cache */ #define WT_STAT_DSRC_CACHE_WRITE 2058 /*! cache: pages written requiring in-memory restoration */ @@ -4266,12 +4806,16 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); * @anchor statistics_join * @{ */ -/*! : accesses */ -#define WT_STAT_JOIN_ACCESSES 3000 -/*! : actual count of items */ -#define WT_STAT_JOIN_ACTUAL_COUNT 3001 +/*! : accesses to the main table */ +#define WT_STAT_JOIN_MAIN_ACCESS 3000 /*! : bloom filter false positives */ -#define WT_STAT_JOIN_BLOOM_FALSE_POSITIVE 3002 +#define WT_STAT_JOIN_BLOOM_FALSE_POSITIVE 3001 +/*! : checks that conditions of membership are satisfied */ +#define WT_STAT_JOIN_MEMBERSHIP_CHECK 3002 +/*! : items inserted into a bloom filter */ +#define WT_STAT_JOIN_BLOOM_INSERT 3003 +/*! : items iterated */ +#define WT_STAT_JOIN_ITERATED 3004 /*! @} */ /* * Statistics section: END diff --git a/src/third_party/wiredtiger/src/include/wiredtiger_ext.h b/src/third_party/wiredtiger/src/include/wiredtiger_ext.h index 7d97d97dcf5..3d65cd1fc24 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger_ext.h +++ b/src/third_party/wiredtiger/src/include/wiredtiger_ext.h @@ -131,6 +131,19 @@ struct __wt_extension_api { WT_EXTENSION_API *, WT_SESSION *session, int error); /*! + * Map a Windows system error code to a POSIX 1003.1/ANSI C error. + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param windows_error a Windows system error code + * @returns a string representation of the error + * + * @snippet ex_data_source.c WT_EXTENSION_API map_windows_error + */ + int (*map_windows_error)(WT_EXTENSION_API *wt_api, + WT_SESSION *session, uint32_t windows_error); + + /*! * Allocate short-term use scratch memory. * * @param wt_api the extension handle diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h index 9e5007b38ed..c5337967f22 100644 --- a/src/third_party/wiredtiger/src/include/wt_internal.h +++ b/src/third_party/wiredtiger/src/include/wt_internal.h @@ -28,7 +28,6 @@ extern "C" { #include <sys/time.h> #include <sys/uio.h> #endif -#include <ctype.h> #ifndef _WIN32 #include <dlfcn.h> #endif @@ -169,6 +168,8 @@ struct __wt_dsrc_stats; typedef struct __wt_dsrc_stats WT_DSRC_STATS; struct __wt_evict_entry; typedef struct __wt_evict_entry WT_EVICT_ENTRY; +struct __wt_evict_queue; + typedef struct __wt_evict_queue WT_EVICT_QUEUE; struct __wt_evict_worker; typedef struct __wt_evict_worker WT_EVICT_WORKER; struct __wt_ext; @@ -179,6 +180,14 @@ struct __wt_fair_lock; typedef struct __wt_fair_lock WT_FAIR_LOCK; struct __wt_fh; typedef struct __wt_fh WT_FH; +struct __wt_file_handle_inmem; + typedef struct __wt_file_handle_inmem WT_FILE_HANDLE_INMEM; +struct __wt_file_handle_posix; + typedef struct __wt_file_handle_posix WT_FILE_HANDLE_POSIX; +struct __wt_file_handle_win; + typedef struct __wt_file_handle_win WT_FILE_HANDLE_WIN; +struct __wt_fstream; + typedef struct __wt_fstream WT_FSTREAM; struct __wt_hazard; typedef struct __wt_hazard WT_HAZARD; struct __wt_ikey; @@ -347,25 +356,33 @@ union __wt_rand_state; #include "connection.h" #include "extern.h" +#ifdef _WIN32 +#include "extern_win.h" +#else +#include "extern_posix.h" +#endif #include "verify_build.h" +#include "ctype.i" /* required by packing.i */ #include "intpack.i" /* required by cell.i, packing.i */ -#include "buf.i" +#include "buf.i" /* required by cell.i */ #include "cache.i" /* required by txn.i */ #include "cell.i" /* required by btree.i */ -#include "log.i" -#include "misc.i" #include "mutex.i" /* required by btree.i */ -#include "packing.i" #include "txn.i" /* required by btree.i */ +#include "bitstring.i" #include "btree.i" /* required by cursor.i */ #include "btree_cmp.i" -#include "cursor.i" - -#include "bitstring.i" #include "column.i" +#include "cursor.i" +#include "log.i" +#include "misc.i" +#include "os_fhandle.i" +#include "os_fs.i" +#include "os_fstream.i" +#include "packing.i" #include "serial.i" #if defined(__cplusplus) diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index 8591818b5a3..bf83c280d8d 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -95,9 +95,11 @@ __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn) int __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) { + struct timespec fsync_start, fsync_stop; WT_DECL_RET; WT_FH *log_fh; WT_LOG *log; + uint64_t fsync_duration_usecs; log = S2C(session)->log; @@ -124,9 +126,14 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) "log_force_sync: sync directory %s to LSN %" PRIu32 "/%" PRIu32, log->log_dir_fh->name, min_lsn->l.file, min_lsn->l.offset)); - WT_ERR(__wt_directory_sync_fh(session, log->log_dir_fh)); + WT_ERR(__wt_epoch(session, &fsync_start)); + WT_ERR(__wt_fsync(session, log->log_dir_fh, true)); + WT_ERR(__wt_epoch(session, &fsync_stop)); + fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start); log->sync_dir_lsn = *min_lsn; WT_STAT_FAST_CONN_INCR(session, log_sync_dir); + WT_STAT_FAST_CONN_INCRV(session, + log_sync_dir_duration, fsync_duration_usecs); } /* * Sync the log file if needed. @@ -143,9 +150,14 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) WT_ERR(__wt_verbose(session, WT_VERB_LOG, "log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32, log_fh->name, min_lsn->l.file, min_lsn->l.offset)); + WT_ERR(__wt_epoch(session, &fsync_start)); WT_ERR(__wt_fsync(session, log_fh, true)); + WT_ERR(__wt_epoch(session, &fsync_stop)); + fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start); log->sync_lsn = *min_lsn; WT_STAT_FAST_CONN_INCR(session, log_sync); + WT_STAT_FAST_CONN_INCRV(session, + log_sync_duration, fsync_duration_usecs); WT_ERR(__wt_close(session, &log_fh)); WT_ERR(__wt_cond_signal(session, log->log_sync_cond)); } @@ -258,8 +270,8 @@ __log_get_files(WT_SESSION_IMPL *session, log_path = conn->log_path; if (log_path == NULL) log_path = ""; - return (__wt_dirlist(session, log_path, file_prefix, - WT_DIRLIST_INCLUDE, filesp, countp)); + return (__wt_fs_directory_list( + session, log_path, file_prefix, filesp, countp)); } /* @@ -277,6 +289,9 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session, uint32_t id, max; u_int count, i; + *filesp = NULL; + *countp = 0; + id = 0; log = S2C(session)->log; @@ -307,26 +322,12 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session, *countp = count; if (0) { -err: __wt_log_files_free(session, files, count); +err: WT_TRET(__wt_fs_directory_list_free(session, &files, count)); } return (ret); } /* - * __wt_log_files_free -- - * Free memory associated with a log file list. - */ -void -__wt_log_files_free(WT_SESSION_IMPL *session, char **files, u_int count) -{ - u_int i; - - for (i = 0; i < count; i++) - __wt_free(session, files[i]); - __wt_free(session, files); -} - -/* * __log_filename -- * Given a log number, return a WT_ITEM of a generated log file name * of the given prefix type. @@ -443,21 +444,27 @@ __log_prealloc(WT_SESSION_IMPL *session, WT_FH *fh) conn = S2C(session); log = conn->log; - ret = 0; + /* * If the user configured zero filling, pre-allocate the log file * manually. Otherwise use either fallocate or ftruncate to create * and zero the log file based on what is available. */ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ZERO_FILL)) - ret = __log_zero(session, fh, - WT_LOG_FIRST_RECORD, conn->log_file_max); - else if (fh->fallocate_available == WT_FALLOCATE_NOT_AVAILABLE || - (ret = __wt_fallocate(session, fh, - WT_LOG_FIRST_RECORD, conn->log_file_max)) == ENOTSUP) - ret = __wt_ftruncate(session, fh, - WT_LOG_FIRST_RECORD + conn->log_file_max); - return (ret); + return (__log_zero(session, fh, + WT_LOG_FIRST_RECORD, conn->log_file_max)); + + /* + * We have exclusive access to the log file and there are no other + * writes happening concurrently, so there are no locking issues. + */ + if ((ret = __wt_fallocate( + session, fh, WT_LOG_FIRST_RECORD, + conn->log_file_max - WT_LOG_FIRST_RECORD)) == 0) + return (0); + WT_RET_ERROR_OK(ret, ENOTSUP); + + return (__wt_ftruncate(session, fh, conn->log_file_max)); } /* @@ -669,14 +676,17 @@ static int __log_openfile(WT_SESSION_IMPL *session, bool ok_create, WT_FH **fhp, const char *file_prefix, uint32_t id) { + WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(buf); WT_DECL_RET; WT_LOG *log; WT_LOG_DESC *desc; WT_LOG_RECORD *logrec; uint32_t allocsize; + u_int flags; - log = S2C(session)->log; + conn = S2C(session); + log = conn->log; if (log == NULL) allocsize = WT_LOG_ALIGN; else @@ -685,8 +695,14 @@ __log_openfile(WT_SESSION_IMPL *session, WT_ERR(__log_filename(session, id, file_prefix, buf)); WT_ERR(__wt_verbose(session, WT_VERB_LOG, "opening log %s", (const char *)buf->data)); - WT_ERR(__wt_open(session, buf->data, - WT_FILE_TYPE_LOG, ok_create ? WT_OPEN_CREATE : 0, fhp)); + flags = 0; + if (ok_create) + LF_SET(WT_OPEN_CREATE); + if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG)) + LF_SET(WT_OPEN_DIRECTIO); + WT_ERR(__wt_open( + session, buf->data, WT_OPEN_FILE_TYPE_LOG, flags, fhp)); + /* * If we are not creating the log file but opening it for reading, * check that the magic number and versions are correct. @@ -757,12 +773,11 @@ __log_alloc_prealloc(WT_SESSION_IMPL *session, uint32_t to_num) * All file setup, writing the header and pre-allocation was done * before. We only need to rename it. */ - WT_ERR(__wt_rename(session, from_path->data, to_path->data)); + WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data)); err: __wt_scr_free(session, &from_path); __wt_scr_free(session, &to_path); - if (logfiles != NULL) - __wt_log_files_free(session, logfiles, logcount); + WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount)); return (ret); } @@ -992,8 +1007,7 @@ __log_truncate(WT_SESSION_IMPL *session, } } err: WT_TRET(__wt_close(session, &log_fh)); - if (logfiles != NULL) - __wt_log_files_free(session, logfiles, logcount); + WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount)); return (ret); } @@ -1035,7 +1049,6 @@ __wt_log_allocfile( */ WT_ERR(__log_openfile(session, true, &log_fh, WT_LOG_TMPNAME, tmp_id)); WT_ERR(__log_file_header(session, log_fh, NULL, true)); - WT_ERR(__wt_ftruncate(session, log_fh, WT_LOG_FIRST_RECORD)); WT_ERR(__log_prealloc(session, log_fh)); WT_ERR(__wt_fsync(session, log_fh, true)); WT_ERR(__wt_close(session, &log_fh)); @@ -1045,7 +1058,7 @@ __wt_log_allocfile( /* * Rename it into place and make it available. */ - WT_ERR(__wt_rename(session, from_path->data, to_path->data)); + WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data)); err: __wt_scr_free(session, &from_path); __wt_scr_free(session, &to_path); @@ -1068,7 +1081,7 @@ __wt_log_remove(WT_SESSION_IMPL *session, WT_ERR(__log_filename(session, lognum, file_prefix, path)); WT_ERR(__wt_verbose(session, WT_VERB_LOG, "log_remove: remove log %s", (char *)path->data)); - WT_ERR(__wt_remove(session, path->data)); + WT_ERR(__wt_fs_remove(session, path->data)); err: __wt_scr_free(session, &path); return (ret); } @@ -1104,7 +1117,7 @@ __wt_log_open(WT_SESSION_IMPL *session) WT_RET(__wt_verbose(session, WT_VERB_LOG, "log_open: open fh to directory %s", conn->log_path)); WT_RET(__wt_open(session, conn->log_path, - WT_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh)); + WT_OPEN_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh)); } if (!F_ISSET(conn, WT_CONN_READONLY)) { @@ -1121,9 +1134,8 @@ __wt_log_open(WT_SESSION_IMPL *session) WT_ERR(__wt_log_remove( session, WT_LOG_TMPNAME, lognum)); } - __wt_log_files_free(session, logfiles, logcount); - logfiles = NULL; - logcount = 0; + WT_ERR( + __wt_fs_directory_list_free(session, &logfiles, logcount)); WT_ERR(__log_get_files(session, WT_LOG_PREPNAME, &logfiles, &logcount)); for (i = 0; i < logcount; i++) { @@ -1132,8 +1144,8 @@ __wt_log_open(WT_SESSION_IMPL *session) WT_ERR(__wt_log_remove( session, WT_LOG_PREPNAME, lognum)); } - __wt_log_files_free(session, logfiles, logcount); - logfiles = NULL; + WT_ERR( + __wt_fs_directory_list_free(session, &logfiles, logcount)); } /* @@ -1171,8 +1183,7 @@ __wt_log_open(WT_SESSION_IMPL *session) FLD_SET(conn->log_flags, WT_CONN_LOG_EXISTED); } -err: if (logfiles != NULL) - __wt_log_files_free(session, logfiles, logcount); +err: WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount)); if (ret == 0) F_SET(log, WT_LOG_OPENED); return (ret); @@ -1210,8 +1221,7 @@ __wt_log_close(WT_SESSION_IMPL *session) WT_RET(__wt_verbose(session, WT_VERB_LOG, "closing log directory %s", log->log_dir_fh->name)); if (!F_ISSET(conn, WT_CONN_READONLY)) - WT_RET( - __wt_directory_sync_fh(session, log->log_dir_fh)); + WT_RET(__wt_fsync(session, log->log_dir_fh, true)); WT_RET(__wt_close(session, &log->log_dir_fh)); log->log_dir_fh = NULL; } @@ -1285,11 +1295,13 @@ err: __wt_free(session, buf); int __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) { + struct timespec fsync_start, fsync_stop; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LOG *log; WT_LSN sync_lsn; int64_t release_buffered, release_bytes; + uint64_t fsync_duration_usecs; int yield_count; bool locked; @@ -1419,10 +1431,15 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) "/%" PRIu32, log->log_dir_fh->name, sync_lsn.l.file, sync_lsn.l.offset)); - WT_ERR(__wt_directory_sync_fh( - session, log->log_dir_fh)); + WT_ERR(__wt_epoch(session, &fsync_start)); + WT_ERR(__wt_fsync(session, log->log_dir_fh, true)); + WT_ERR(__wt_epoch(session, &fsync_stop)); + fsync_duration_usecs = + WT_TIMEDIFF_US(fsync_stop, fsync_start); log->sync_dir_lsn = sync_lsn; WT_STAT_FAST_CONN_INCR(session, log_sync_dir); + WT_STAT_FAST_CONN_INCRV(session, + log_sync_dir_duration, fsync_duration_usecs); } /* @@ -1436,7 +1453,13 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) log->log_fh->name, sync_lsn.l.file, sync_lsn.l.offset)); WT_STAT_FAST_CONN_INCR(session, log_sync); + WT_ERR(__wt_epoch(session, &fsync_start)); WT_ERR(__wt_fsync(session, log->log_fh, true)); + WT_ERR(__wt_epoch(session, &fsync_stop)); + fsync_duration_usecs = + WT_TIMEDIFF_US(fsync_stop, fsync_start); + WT_STAT_FAST_CONN_INCRV(session, + log_sync_duration, fsync_duration_usecs); log->sync_lsn = sync_lsn; WT_ERR(__wt_cond_signal(session, log->log_sync_cond)); } @@ -1561,8 +1584,8 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, } WT_SET_LSN(&start_lsn, firstlog, 0); WT_SET_LSN(&end_lsn, lastlog, 0); - __wt_log_files_free(session, logfiles, logcount); - logfiles = NULL; + WT_ERR( + __wt_fs_directory_list_free(session, &logfiles, logcount)); } WT_ERR(__log_openfile( session, false, &log_fh, WT_LOG_FILENAME, start_lsn.l.file)); @@ -1757,9 +1780,23 @@ advance: &rd_lsn, WT_LOG_FILENAME, 0)); err: WT_STAT_FAST_CONN_INCR(session, log_scans); + /* + * If the first attempt to read a log record results in + * an error recovery is likely going to fail. Try to provide + * a helpful failure message. + */ + if (ret != 0 && firstrecord) { + __wt_errx(session, + "WiredTiger is unable to read the recovery log."); + __wt_errx(session, "This may be due to the log" + " files being encrypted, being from an older" + " version or due to corruption on disk"); + __wt_errx(session, "You should confirm that you have" + " opened the database with the correct options including" + " all encryption and compression options"); + } - if (logfiles != NULL) - __wt_log_files_free(session, logfiles, logcount); + WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount)); __wt_scr_free(session, &buf); __wt_scr_free(session, &decryptitem); diff --git a/src/third_party/wiredtiger/src/log/log_auto.c b/src/third_party/wiredtiger/src/log/log_auto.c index d4dab4e1a33..34bccd0ede4 100644 --- a/src/third_party/wiredtiger/src/log/log_auto.c +++ b/src/third_party/wiredtiger/src/log/log_auto.c @@ -44,7 +44,7 @@ __wt_logop_read(WT_SESSION_IMPL *session, } static size_t -__logrec_json_unpack_str(char *dest, size_t destlen, const char *src, +__logrec_json_unpack_str(char *dest, size_t destlen, const u_char *src, size_t srclen) { size_t total; diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c index 9ca850da9f1..78235fb6a92 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c @@ -103,7 +103,6 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm) bool hard_limit, have_primary, ovfl; lsm_tree = clsm->lsm_tree; - ovfl = false; session = (WT_SESSION_IMPL *)clsm->iface.session; if (clsm->nchunks == 0) { @@ -1155,7 +1154,6 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp) closest = NULL; clsm = (WT_CURSOR_LSM *)cursor; exact = 0; - deleted = false; CURSOR_API_CALL(cursor, session, search_near, NULL); WT_CURSOR_NEEDKEY(cursor); diff --git a/src/third_party/wiredtiger/src/lsm/lsm_merge.c b/src/third_party/wiredtiger/src/lsm/lsm_merge.c index 6d907284546..1ff0a216c02 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_merge.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_merge.c @@ -152,16 +152,13 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int end_chunk, i, merge_max, merge_min, nchunks, start_chunk; u_int oldest_gen, youngest_gen; - chunk_size = 0; - nchunks = 0; - record_count = 0; - chunk = youngest = NULL; - /* Clear the return parameters */ - *start = 0; - *end = 0; + *start = *end = 0; *records = 0; + chunk_size = 0; + chunk = youngest = NULL; + aggressive = lsm_tree->merge_aggressiveness; merge_max = (aggressive > WT_LSM_AGGRESSIVE_THRESHOLD) ? 100 : lsm_tree->merge_max; @@ -218,8 +215,8 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, */ retry_find: oldest_gen = youngest_gen = lsm_tree->chunk[end_chunk]->generation; - for (start_chunk = end_chunk + 1, record_count = 0; - start_chunk > 0; ) { + for (record_count = 0, + start_chunk = end_chunk + 1; start_chunk > 0;) { chunk = lsm_tree->chunk[start_chunk - 1]; youngest = lsm_tree->chunk[end_chunk]; nchunks = (end_chunk + 1) - start_chunk; @@ -306,14 +303,12 @@ retry_find: } #endif - WT_ASSERT(session, - nchunks == 0 || (chunk != NULL && youngest != NULL)); + WT_ASSERT(session, nchunks == 0 || (chunk != NULL && youngest != NULL)); + /* - * Don't do merges that are too small or across too many - * generations. + * Don't do merges that are too small or across too many generations. */ - if (nchunks < merge_min || - oldest_gen - youngest_gen > max_gap) { + if (nchunks < merge_min || oldest_gen - youngest_gen > max_gap) { for (i = 0; i < nchunks; i++) { chunk = lsm_tree->chunk[start_chunk + i]; WT_ASSERT(session, @@ -365,7 +360,6 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) bloom = NULL; chunk = NULL; dest = src = NULL; - start_id = 0; created_chunk = create_bloom = locked = in_sync = false; /* Fast path if it's obvious no merges could be done. */ @@ -485,7 +479,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) lsm_rows_merged, insert_count % LSM_MERGE_CHECK_INTERVAL); ++lsm_tree->merge_progressing; WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Bloom size for %" PRIu64 " has %" PRIu64 " items inserted.", + "Bloom size for %" PRIu64 " has %" PRIu64 " items inserted", record_count, insert_count)); /* diff --git a/src/third_party/wiredtiger/src/lsm/lsm_meta.c b/src/third_party/wiredtiger/src/lsm/lsm_meta.c index e19e2cd0126..7e100cb855c 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_meta.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_meta.c @@ -331,7 +331,7 @@ __lsm_meta_read_v1( WT_ERR(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__wt_buf_fmt(session, buf, "key_format=u,value_format=u,memory_page_max=%" PRIu64, - 2 * lsm_tree->chunk_max)); + 2 * lsm_tree->chunk_size)); file_cfg[2] = buf->data; WT_ERR(__wt_config_collapse(session, file_cfg, &fileconf)); lsm_tree->file_config = fileconf; diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c index cb1ddf22f84..da106ae2089 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c @@ -235,7 +235,7 @@ __wt_lsm_tree_set_chunk_size( if (!WT_PREFIX_SKIP(filename, "file:")) WT_RET_MSG(session, EINVAL, "Expected a 'file:' URI: %s", chunk->uri); - WT_RET(__wt_filesize_name(session, filename, false, &size)); + WT_RET(__wt_fs_size(session, filename, &size)); chunk->size = (uint64_t)size; @@ -256,7 +256,7 @@ __lsm_tree_cleanup_old(WT_SESSION_IMPL *session, const char *uri) { WT_CONFIG_BASE(session, WT_SESSION_drop), "force", NULL }; bool exists; - WT_RET(__wt_exist(session, uri + strlen("file:"), &exists)); + WT_RET(__wt_fs_exist(session, uri + strlen("file:"), &exists)); if (exists) WT_WITH_SCHEMA_LOCK(session, ret, ret = __wt_schema_drop(session, uri, cfg)); @@ -1344,8 +1344,14 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session, locked = true; for (i = 0; i < lsm_tree->nchunks; i++) { chunk = lsm_tree->chunk[i]; - if (file_func == __wt_checkpoint && - F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) + /* + * If the chunk is on disk, don't include underlying handles in + * the checkpoint. Checking the "get handles" function is all + * we need to do, no further checkpoint calls are done if the + * handle is not gathered. + */ + if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && + file_func == __wt_checkpoint_get_handles) continue; WT_ERR(__wt_schema_worker(session, chunk->uri, file_func, name_func, cfg, open_flags)); diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c index f5bb4cfd337..c19f42327be 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c @@ -526,7 +526,7 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri) ret = __wt_schema_drop(session, uri, drop_cfg)); if (ret == 0) - ret = __wt_remove(session, uri + strlen("file:")); + ret = __wt_fs_remove(session, uri + strlen("file:")); WT_RET(__wt_verbose(session, WT_VERB_LSM, "Dropped %s", uri)); if (ret == EBUSY || ret == ENOENT) diff --git a/src/third_party/wiredtiger/src/meta/meta_table.c b/src/third_party/wiredtiger/src/meta/meta_table.c index dd65f1a7ef9..38a2edd7219 100644 --- a/src/third_party/wiredtiger/src/meta/meta_table.c +++ b/src/third_party/wiredtiger/src/meta/meta_table.c @@ -9,18 +9,6 @@ #include "wt_internal.h" /* - * __wt_metadata_init -- - * Metadata initialization. - */ -void -__wt_metadata_init(WT_SESSION_IMPL *session) -{ - /* We cache the metadata file's URI hash for fast detection. */ - S2C(session)->meta_uri_hash = - __wt_hash_city64(WT_METAFILE_URI, strlen(WT_METAFILE_URI)); -} - -/* * __metadata_turtle -- * Return if a key's value should be taken from the turtle file. */ diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c index a73b7e09d37..eb06b2bed66 100644 --- a/src/third_party/wiredtiger/src/meta/meta_track.c +++ b/src/third_party/wiredtiger/src/meta/meta_track.c @@ -153,7 +153,6 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk) case WT_ST_REMOVE: case WT_ST_SET: break; - WT_ILLEGAL_VALUE(session); } __meta_track_clear(session, trk); @@ -194,8 +193,8 @@ __meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk) __wt_err(session, ret, "metadata unroll rename %s to %s", trk->b, trk->a); - if (trk->a == NULL && - (ret = __wt_remove(session, trk->b + strlen("file:"))) != 0) + if (trk->a == NULL && (ret = + __wt_fs_remove(session, trk->b + strlen("file:"))) != 0) __wt_err(session, ret, "metadata unroll create %s", trk->b); @@ -215,7 +214,6 @@ __meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk) __wt_err(session, ret, "metadata unroll update %s to %s", trk->a, trk->b); break; - WT_ILLEGAL_VALUE(session); } __meta_track_clear(session, trk); diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c index 0b287c228e5..4d2b359bbed 100644 --- a/src/third_party/wiredtiger/src/meta/meta_turtle.c +++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c @@ -18,12 +18,9 @@ __metadata_config(WT_SESSION_IMPL *session, char **metaconfp) WT_DECL_ITEM(buf); WT_DECL_RET; const char *cfg[] = { WT_CONFIG_BASE(session, file_meta), NULL, NULL }; - char *metaconf; *metaconfp = NULL; - metaconf = NULL; - /* Create a turtle file with default values. */ WT_RET(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__wt_buf_fmt(session, buf, @@ -31,14 +28,9 @@ __metadata_config(WT_SESSION_IMPL *session, char **metaconfp) WT_METAFILE_ID, WT_BTREE_MAJOR_VERSION_MAX, WT_BTREE_MINOR_VERSION_MAX)); cfg[1] = buf->data; - WT_ERR(__wt_config_collapse(session, cfg, &metaconf)); + ret = __wt_config_collapse(session, cfg, metaconfp); - *metaconfp = metaconf; - - if (0) { -err: __wt_free(session, metaconf); - } - __wt_scr_free(session, &buf); +err: __wt_scr_free(session, &buf); return (ret); } @@ -71,24 +63,24 @@ __metadata_load_hot_backup(WT_SESSION_IMPL *session) WT_DECL_ITEM(key); WT_DECL_ITEM(value); WT_DECL_RET; - WT_FH *fh; + WT_FSTREAM *fs; bool exist; /* Look for a hot backup file: if we find it, load it. */ - WT_RET(__wt_exist(session, WT_METADATA_BACKUP, &exist)); + WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist)); if (!exist) return (0); - WT_RET(__wt_open(session, WT_METADATA_BACKUP, - WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY | WT_STREAM_READ, &fh)); + WT_RET(__wt_fopen(session, + WT_METADATA_BACKUP, 0, WT_STREAM_READ, &fs)); /* Read line pairs and load them into the metadata file. */ WT_ERR(__wt_scr_alloc(session, 512, &key)); WT_ERR(__wt_scr_alloc(session, 512, &value)); for (;;) { - WT_ERR(__wt_getline(session, key, fh)); + WT_ERR(__wt_getline(session, fs, key)); if (key->size == 0) break; - WT_ERR(__wt_getline(session, value, fh)); + WT_ERR(__wt_getline(session, fs, value)); if (value->size == 0) WT_ERR(__wt_illegal_value(session, WT_METADATA_BACKUP)); WT_ERR(__wt_metadata_update(session, key->data, value->data)); @@ -96,7 +88,7 @@ __metadata_load_hot_backup(WT_SESSION_IMPL *session) F_SET(S2C(session), WT_CONN_WAS_BACKUP); -err: WT_TRET(__wt_close(session, &fh)); +err: WT_TRET(__wt_fclose(session, &fs)); __wt_scr_free(session, &key); __wt_scr_free(session, &value); return (ret); @@ -128,7 +120,7 @@ __metadata_load_bulk(WT_SESSION_IMPL *session) continue; /* If the file exists, it's all good. */ - WT_ERR(__wt_exist(session, key, &exist)); + WT_ERR(__wt_fs_exist(session, key, &exist)); if (exist) continue; @@ -156,7 +148,7 @@ int __wt_turtle_init(WT_SESSION_IMPL *session) { WT_DECL_RET; - bool exist_backup, exist_incr, exist_turtle, load; + bool exist_backup, exist_incr, exist_isrc, exist_turtle, load; char *metaconf; metaconf = NULL; @@ -182,21 +174,28 @@ __wt_turtle_init(WT_SESSION_IMPL *session) * that is an error. Otherwise, if there's already a turtle file, we're * done. */ - WT_RET(__wt_exist(session, WT_INCREMENTAL_BACKUP, &exist_incr)); - WT_RET(__wt_exist(session, WT_METADATA_BACKUP, &exist_backup)); - WT_RET(__wt_exist(session, WT_METADATA_TURTLE, &exist_turtle)); + WT_RET(__wt_fs_exist(session, WT_INCREMENTAL_BACKUP, &exist_incr)); + WT_RET(__wt_fs_exist(session, WT_INCREMENTAL_SRC, &exist_isrc)); + WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist_backup)); + WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist_turtle)); if (exist_turtle) { - if (exist_incr) + /* + * We need to detect the difference between a source database + * that may have crashed with an incremental backup file + * and a destination database that incorrectly ran recovery. + */ + if (exist_incr && !exist_isrc) WT_RET_MSG(session, EINVAL, "Incremental backup after running recovery " - "is not allowed."); + "is not allowed"); /* * If we have a backup file and metadata and turtle files, * we want to recreate the metadata from the backup. */ if (exist_backup) { - WT_RET(__wt_msg(session, "Both %s and %s exist. " - "Recreating metadata from backup.", + WT_RET(__wt_msg(session, + "Both %s and %s exist; recreating metadata from " + "backup", WT_METADATA_TURTLE, WT_METADATA_BACKUP)); WT_RET(__wt_remove_if_exists(session, WT_METAFILE)); WT_RET(__wt_remove_if_exists( @@ -242,7 +241,7 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) { WT_DECL_ITEM(buf); WT_DECL_RET; - WT_FH *fh; + WT_FSTREAM *fs; bool exist, match; *valuep = NULL; @@ -253,24 +252,23 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) * the turtle file, and that means returning the default configuration * string for the metadata file. */ - WT_RET(__wt_exist(session, WT_METADATA_TURTLE, &exist)); + WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist)); if (!exist) return (strcmp(key, WT_METAFILE_URI) == 0 ? __metadata_config(session, valuep) : WT_NOTFOUND); - WT_RET(__wt_open(session, WT_METADATA_TURTLE, - WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY | WT_STREAM_READ, &fh)); + WT_RET(__wt_fopen(session, WT_METADATA_TURTLE, 0, WT_STREAM_READ, &fs)); /* Search for the key. */ WT_ERR(__wt_scr_alloc(session, 512, &buf)); for (match = false;;) { - WT_ERR(__wt_getline(session, buf, fh)); + WT_ERR(__wt_getline(session, fs, buf)); if (buf->size == 0) WT_ERR(WT_NOTFOUND); if (strcmp(key, buf->data) == 0) match = true; /* Key matched: read the subsequent line for the value. */ - WT_ERR(__wt_getline(session, buf, fh)); + WT_ERR(__wt_getline(session, fs, buf)); if (buf->size == 0) WT_ERR(__wt_illegal_value(session, WT_METADATA_TURTLE)); if (match) @@ -280,7 +278,7 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) /* Copy the value for the caller. */ WT_ERR(__wt_strdup(session, buf->data, valuep)); -err: WT_TRET(__wt_close(session, &fh)); +err: WT_TRET(__wt_fclose(session, &fs)); __wt_scr_free(session, &buf); if (ret != 0) @@ -295,38 +293,34 @@ err: WT_TRET(__wt_close(session, &fh)); int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) { - WT_FH *fh; - WT_DECL_ITEM(buf); + WT_FSTREAM *fs; WT_DECL_RET; int vmajor, vminor, vpatch; const char *version; - fh = NULL; + fs = NULL; /* * Create the turtle setup file: we currently re-write it from scratch * every time. */ - WT_RET(__wt_open(session, WT_METADATA_TURTLE_SET, - WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &fh)); + WT_RET(__wt_fopen(session, WT_METADATA_TURTLE_SET, + WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs)); version = wiredtiger_version(&vmajor, &vminor, &vpatch); - WT_ERR(__wt_scr_alloc(session, 2 * 1024, &buf)); - WT_ERR(__wt_buf_fmt(session, buf, + WT_ERR(__wt_fprintf(session, fs, "%s\n%s\n%s\n" "major=%d,minor=%d,patch=%d\n%s\n%s\n", WT_METADATA_VERSION_STR, version, WT_METADATA_VERSION, vmajor, vminor, vpatch, key, value)); - WT_ERR(__wt_write(session, fh, 0, buf->size, buf->data)); - /* Flush the handle and rename the file into place. */ - ret = __wt_sync_handle_and_rename( - session, &fh, WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE); + /* Flush the stream and rename the file into place. */ + ret = __wt_sync_and_rename( + session, &fs, WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE); /* Close any file handle left open, remove any temporary file. */ -err: WT_TRET(__wt_close(session, &fh)); +err: WT_TRET(__wt_fclose(session, &fs)); WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET)); - __wt_scr_free(session, &buf); return (ret); } diff --git a/src/third_party/wiredtiger/src/os_common/filename.c b/src/third_party/wiredtiger/src/os_common/filename.c index dfd67284948..5f174288350 100644 --- a/src/third_party/wiredtiger/src/os_common/filename.c +++ b/src/third_party/wiredtiger/src/os_common/filename.c @@ -60,9 +60,9 @@ __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name) { bool exist; - WT_RET(__wt_exist(session, name, &exist)); + WT_RET(__wt_fs_exist(session, name, &exist)); if (exist) - WT_RET(__wt_remove(session, name)); + WT_RET(__wt_fs_remove(session, name)); return (0); } @@ -78,7 +78,7 @@ __wt_rename_and_sync_directory( bool same_directory; /* Rename the source file to the target. */ - WT_RET(__wt_rename(session, from, to)); + WT_RET(__wt_fs_rename(session, from, to)); /* * Flush the backing directory to guarantee the rename. My reading of @@ -89,7 +89,7 @@ __wt_rename_and_sync_directory( * with specific mount options. Flush both of the from/to directories * until it's a performance problem. */ - WT_RET(__wt_directory_sync(session, from)); + WT_RET(__wt_fs_directory_sync(session, from)); /* * In almost all cases, we're going to be renaming files in the same @@ -101,29 +101,7 @@ __wt_rename_and_sync_directory( (fp != NULL && tp != NULL && fp - from == tp - to && memcmp(from, to, (size_t)(fp - from)) == 0); - return (same_directory ? 0 : __wt_directory_sync(session, to)); -} - -/* - * __wt_sync_handle_and_rename -- - * Sync and close a handle, and swap it into place. - */ -int -__wt_sync_handle_and_rename( - WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to) -{ - WT_DECL_RET; - WT_FH *fh; - - fh = *fhp; - *fhp = NULL; - - /* Flush to disk and close the handle. */ - ret = __wt_fsync(session, fh, true); - WT_TRET(__wt_close(session, &fh)); - WT_RET(ret); - - return (__wt_rename_and_sync_directory(session, from, to)); + return (same_directory ? 0 : __wt_fs_directory_sync(session, to)); } /* @@ -160,10 +138,9 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) WT_ERR(__wt_remove_if_exists(session, tmp->data)); /* Open the from and temporary file handles. */ - WT_ERR(__wt_open(session, from, - WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY, &ffh)); - WT_ERR(__wt_open(session, tmp->data, - WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &tfh)); + WT_ERR(__wt_open(session, from, WT_OPEN_FILE_TYPE_REGULAR, 0, &ffh)); + WT_ERR(__wt_open(session, tmp->data, WT_OPEN_FILE_TYPE_REGULAR, + WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &tfh)); /* * Allocate a copy buffer. Don't use a scratch buffer, this thing is @@ -182,7 +159,10 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) /* Close the from handle, then swap the temporary file into place. */ WT_ERR(__wt_close(session, &ffh)); - ret = __wt_sync_handle_and_rename(session, &tfh, tmp->data, to); + WT_ERR(__wt_fsync(session, tfh, true)); + WT_ERR(__wt_close(session, &tfh)); + + ret = __wt_rename_and_sync_directory(session, tmp->data, to); err: WT_TRET(__wt_close(session, &ffh)); WT_TRET(__wt_close(session, &tfh)); diff --git a/src/third_party/wiredtiger/src/os_common/os_errno.c b/src/third_party/wiredtiger/src/os_common/os_errno.c new file mode 100644 index 00000000000..a8e56b7f1aa --- /dev/null +++ b/src/third_party/wiredtiger/src/os_common/os_errno.c @@ -0,0 +1,80 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_errno -- + * Return errno, or WT_ERROR if errno not set. + */ +int +__wt_errno(void) +{ + /* + * Called when we know an error occurred, and we want the system + * error code, but there's some chance it's not set. + */ + return (errno == 0 ? WT_ERROR : errno); +} + +/* + * __wt_strerror -- + * WT_SESSION.strerror and wiredtiger_strerror. + */ +const char * +__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen) +{ + const char *p; + + /* + * Check for a WiredTiger or POSIX constant string, no buffer needed. + */ + if ((p = __wt_wiredtiger_error(error)) != NULL) + return (p); + + /* + * When called from wiredtiger_strerror, write a passed-in buffer. + * When called from WT_SESSION.strerror, write the session's buffer. + * + * Fallback to a generic message. + */ + if (session == NULL && + snprintf(errbuf, errlen, "error return: %d", error) > 0) + return (errbuf); + if (session != NULL && __wt_buf_fmt( + session, &session->err, "error return: %d", error) == 0) + return (session->err.data); + + /* Defeated. */ + return ("Unable to return error string"); +} + +/* + * __wt_ext_map_windows_error -- + * Extension API call to map a Windows system error to a POSIX/ANSI error. + */ +int +__wt_ext_map_windows_error( + WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error) +{ + WT_UNUSED(wt_api); + WT_UNUSED(wt_session); + + /* + * This extension API only makes sense in Windows builds, but it's hard + * to exclude it otherwise (there's no way to return an error, anyway). + * Call an underlying function on Windows, else panic so callers figure + * out what they're doing wrong. + */ +#ifdef _WIN32 + return (__wt_map_windows_error(windows_error)); +#else + WT_UNUSED(windows_error); + return (WT_PANIC); +#endif +} diff --git a/src/third_party/wiredtiger/src/os_common/os_fhandle.c b/src/third_party/wiredtiger/src/os_common/os_fhandle.c index b16b2e24bfa..81e4cc14ccb 100644 --- a/src/third_party/wiredtiger/src/os_common/os_fhandle.c +++ b/src/third_party/wiredtiger/src/os_common/os_fhandle.c @@ -9,20 +9,89 @@ #include "wt_internal.h" /* - * __wt_handle_search -- - * Search for a matching handle. + * __fhandle_method_finalize -- + * Initialize any NULL WT_FH structure methods to not-supported. Doing + * this means that custom file systems with incomplete implementations + * won't dereference NULL pointers. + */ +static int +__fhandle_method_finalize( + WT_SESSION_IMPL *session, WT_FILE_HANDLE *handle, bool readonly) +{ +#define WT_HANDLE_METHOD_REQ(name) \ + if (handle->name == NULL) \ + WT_RET_MSG(session, EINVAL, \ + "a WT_FILE_HANDLE.%s method must be configured", #name) + + WT_HANDLE_METHOD_REQ(close); + /* not required: fadvise */ + /* not required: fallocate */ + /* not required: fallocate_nolock */ + WT_HANDLE_METHOD_REQ(fh_lock); + /* not required: map */ + /* not required: map_discard */ + /* not required: map_preload */ + /* not required: map_unmap */ + WT_HANDLE_METHOD_REQ(fh_read); + WT_HANDLE_METHOD_REQ(fh_size); + if (!readonly) + WT_HANDLE_METHOD_REQ(fh_sync); + /* not required: sync_nowait */ + if (!readonly) { + WT_HANDLE_METHOD_REQ(fh_truncate); + WT_HANDLE_METHOD_REQ(fh_write); + } + + return (0); +} + +#ifdef HAVE_DIAGNOSTIC +/* + * __wt_handle_is_open -- + * Return if there's an open handle matching a name. */ bool -__wt_handle_search(WT_SESSION_IMPL *session, - const char *name, bool increment_ref, WT_FH *newfh, WT_FH **fhp) +__wt_handle_is_open(WT_SESSION_IMPL *session, const char *name) +{ + WT_CONNECTION_IMPL *conn; + WT_FH *fh; + uint64_t bucket, hash; + bool found; + + conn = S2C(session); + found = false; + + hash = __wt_hash_city64(name, strlen(name)); + bucket = hash % WT_HASH_ARRAY_SIZE; + + __wt_spin_lock(session, &conn->fh_lock); + + TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) + if (strcmp(name, fh->name) == 0) { + found = true; + break; + } + + __wt_spin_unlock(session, &conn->fh_lock); + + return (found); +} +#endif + +/* + * __handle_search -- + * Search for a matching handle. + */ +static bool +__handle_search( + WT_SESSION_IMPL *session, const char *name, WT_FH *newfh, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_FH *fh; uint64_t bucket, hash; bool found; - if (fhp != NULL) - *fhp = NULL; + *fhp = NULL; conn = S2C(session); found = false; @@ -33,15 +102,13 @@ __wt_handle_search(WT_SESSION_IMPL *session, __wt_spin_lock(session, &conn->fh_lock); /* - * If we already have the file open, optionally increment the reference - * count and return a pointer. + * If we already have the file open, increment the reference count and + * return a pointer. */ TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) if (strcmp(name, fh->name) == 0) { - if (increment_ref) - ++fh->ref; - if (fhp != NULL) - *fhp = fh; + ++fh->ref; + *fhp = fh; found = true; break; } @@ -49,13 +116,11 @@ __wt_handle_search(WT_SESSION_IMPL *session, /* If we don't find a match, optionally add a new entry. */ if (!found && newfh != NULL) { newfh->name_hash = hash; - WT_CONN_FILE_INSERT(conn, newfh, bucket); + WT_FILE_HANDLE_INSERT(conn, newfh, bucket); (void)__wt_atomic_add32(&conn->open_file_count, 1); - if (increment_ref) - ++newfh->ref; - if (fhp != NULL) - *fhp = newfh; + ++newfh->ref; + *fhp = newfh; } __wt_spin_unlock(session, &conn->fh_lock); @@ -68,8 +133,8 @@ __wt_handle_search(WT_SESSION_IMPL *session, * Optionally output a verbose message on handle open. */ static inline int -__open_verbose(WT_SESSION_IMPL *session, - const char *name, uint32_t file_type, uint32_t flags) +__open_verbose( + WT_SESSION_IMPL *session, const char *name, int file_type, u_int flags) { #ifdef HAVE_VERBOSE WT_DECL_RET; @@ -85,19 +150,19 @@ __open_verbose(WT_SESSION_IMPL *session, */ switch (file_type) { - case WT_FILE_TYPE_CHECKPOINT: + case WT_OPEN_FILE_TYPE_CHECKPOINT: file_type_tag = "checkpoint"; break; - case WT_FILE_TYPE_DATA: + case WT_OPEN_FILE_TYPE_DATA: file_type_tag = "data"; break; - case WT_FILE_TYPE_DIRECTORY: + case WT_OPEN_FILE_TYPE_DIRECTORY: file_type_tag = "directory"; break; - case WT_FILE_TYPE_LOG: + case WT_OPEN_FILE_TYPE_LOG: file_type_tag = "log"; break; - case WT_FILE_TYPE_REGULAR: + case WT_OPEN_FILE_TYPE_REGULAR: file_type_tag = "regular"; break; default: @@ -115,18 +180,16 @@ __open_verbose(WT_SESSION_IMPL *session, } WT_OPEN_VERBOSE_FLAG(WT_OPEN_CREATE, "create"); + WT_OPEN_VERBOSE_FLAG(WT_OPEN_DIRECTIO, "direct-IO"); WT_OPEN_VERBOSE_FLAG(WT_OPEN_EXCLUSIVE, "exclusive"); WT_OPEN_VERBOSE_FLAG(WT_OPEN_FIXED, "fixed"); WT_OPEN_VERBOSE_FLAG(WT_OPEN_READONLY, "readonly"); - WT_OPEN_VERBOSE_FLAG(WT_STREAM_APPEND, "stream-append"); - WT_OPEN_VERBOSE_FLAG(WT_STREAM_READ, "stream-read"); - WT_OPEN_VERBOSE_FLAG(WT_STREAM_WRITE, "stream-write"); if (tmp->size != 0) WT_ERR(__wt_buf_catfmt(session, tmp, ")")); ret = __wt_verbose(session, WT_VERB_FILEOPS, - "%s: handle-open: type %s%s", + "%s: file-open: type %s%s", name, file_type_tag, tmp->size == 0 ? "" : (char *)tmp->data); err: __wt_scr_free(session, &tmp); @@ -146,17 +209,19 @@ err: __wt_scr_free(session, &tmp); */ int __wt_open(WT_SESSION_IMPL *session, - const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp) + const char *name, WT_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *fh; + WT_FILE_SYSTEM *file_system; bool lock_file, open_called; char *path; WT_ASSERT(session, file_type != 0); /* A file type is required. */ conn = S2C(session); + file_system = conn->file_system; fh = NULL; open_called = false; path = NULL; @@ -164,21 +229,12 @@ __wt_open(WT_SESSION_IMPL *session, WT_RET(__open_verbose(session, name, file_type, flags)); /* Check if the handle is already open. */ - if (__wt_handle_search(session, name, true, NULL, &fh)) { - /* - * XXX - * The in-memory implementation has to reset the file offset - * when a file is re-opened (which obviously also depends on - * in-memory configurations never opening a file in more than - * one thread at a time). This needs to be fixed. - */ - if (F_ISSET(fh, WT_FH_IN_MEMORY) && fh->ref == 1) - fh->off = 0; + if (__handle_search(session, name, NULL, &fh)) { *fhp = fh; return (0); } - /* Allocate a structure and set the name. */ + /* Allocate and initialize the handle. */ WT_ERR(__wt_calloc_one(session, &fh)); WT_ERR(__wt_strdup(session, name, &fh->name)); @@ -200,17 +256,21 @@ __wt_open(WT_SESSION_IMPL *session, WT_ERR(__wt_filename(session, name, &path)); /* Call the underlying open function. */ - WT_ERR(conn->handle_open( - session, fh, path == NULL ? name : path, file_type, flags)); + WT_ERR(file_system->fs_open_file(file_system, &session->iface, + path == NULL ? name : path, file_type, flags, &fh->handle)); open_called = true; + WT_ERR(__fhandle_method_finalize( + session, fh->handle, LF_ISSET(WT_OPEN_READONLY))); + /* * Repeat the check for a match: if there's no match, link our newly * created handle onto the database's list of files. */ - if (__wt_handle_search(session, name, true, fh, fhp)) { + if (__handle_search(session, name, fh, fhp)) { err: if (open_called) - WT_TRET(fh->fh_close(session, fh)); + WT_TRET(fh->handle->close( + fh->handle, (WT_SESSION *)session)); if (fh != NULL) { __wt_free(session, fh->name); __wt_free(session, fh); @@ -242,7 +302,7 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) /* Track handle-close as a file operation, so open and close match. */ WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: handle-close", fh->name)); + session, WT_VERB_FILEOPS, "%s: file-close", fh->name)); /* * If the reference count hasn't gone to 0, or if it's an in-memory @@ -252,20 +312,20 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) */ __wt_spin_lock(session, &conn->fh_lock); WT_ASSERT(session, fh->ref > 0); - if ((fh->ref > 0 && --fh->ref > 0) || F_ISSET(fh, WT_FH_IN_MEMORY)) { + if ((fh->ref > 0 && --fh->ref > 0)) { __wt_spin_unlock(session, &conn->fh_lock); return (0); } /* Remove from the list. */ bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; - WT_CONN_FILE_REMOVE(conn, fh, bucket); + WT_FILE_HANDLE_REMOVE(conn, fh, bucket); (void)__wt_atomic_sub32(&conn->open_file_count, 1); __wt_spin_unlock(session, &conn->fh_lock); /* Discard underlying resources. */ - ret = fh->fh_close(session, fh); + ret = fh->handle->close(fh->handle, (WT_SESSION *)session); __wt_free(session, fh->name); __wt_free(session, fh); @@ -287,18 +347,13 @@ __wt_close_connection_close(WT_SESSION_IMPL *session) conn = S2C(session); while ((fh = TAILQ_FIRST(&conn->fhqh)) != NULL) { - /* - * In-memory configurations will have open files, but the ref - * counts should be zero. - */ - if (!F_ISSET(conn, WT_CONN_IN_MEMORY) || fh->ref != 0) { + if (fh->ref != 0) { ret = EBUSY; __wt_errx(session, "Connection has open file handles: %s", fh->name); } fh->ref = 1; - F_CLR(fh, WT_FH_IN_MEMORY); WT_TRET(__wt_close(session, &fh)); } diff --git a/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c b/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c index 260514eac66..09c2e08db83 100644 --- a/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c +++ b/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c @@ -8,475 +8,596 @@ #include "wt_internal.h" -static int __im_handle_size(WT_SESSION_IMPL *, WT_FH *, wt_off_t *); - /* - * In-memory information. + * File system interface for in-memory implementation. */ typedef struct { + WT_FILE_SYSTEM iface; + + TAILQ_HEAD(__wt_fhhash_inmem, + __wt_file_handle_inmem) fhhash[WT_HASH_ARRAY_SIZE]; + TAILQ_HEAD(__wt_fh_inmem_qh, __wt_file_handle_inmem) fhqh; + WT_SPINLOCK lock; -} WT_IM; +} WT_FILE_SYSTEM_INMEM; + +static int __im_file_size(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *); /* - * __im_directory_list -- - * Get a list of files from a directory, in-memory version. + * __im_handle_search -- + * Return a matching handle, if one exists. */ -static int -__im_directory_list(WT_SESSION_IMPL *session, const char *dir, - const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) +static WT_FILE_HANDLE_INMEM * +__im_handle_search(WT_FILE_SYSTEM *file_system, const char *name) { - WT_UNUSED(session); - WT_UNUSED(dir); - WT_UNUSED(prefix); - WT_UNUSED(flags); - WT_UNUSED(dirlist); - WT_UNUSED(countp); + WT_FILE_HANDLE_INMEM *im_fh; + WT_FILE_SYSTEM_INMEM *im_fs; + uint64_t bucket, hash; - WT_RET_MSG(session, ENOTSUP, "directory-list"); -} + im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; -/* - * __im_directory_sync -- - * Flush a directory to ensure file creation is durable. - */ -static int -__im_directory_sync(WT_SESSION_IMPL *session, const char *path) -{ - WT_UNUSED(session); - WT_UNUSED(path); - return (0); + hash = __wt_hash_city64(name, strlen(name)); + bucket = hash % WT_HASH_ARRAY_SIZE; + TAILQ_FOREACH(im_fh, &im_fs->fhhash[bucket], hashq) + if (strcmp(im_fh->iface.name, name) == 0) + break; + + return (im_fh); } /* - * __im_file_exist -- - * Return if the file exists. + * __im_handle_remove -- + * Destroy an in-memory file handle. Should only happen on remove or + * shutdown. */ static int -__im_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +__im_handle_remove(WT_SESSION_IMPL *session, + WT_FILE_SYSTEM *file_system, WT_FILE_HANDLE_INMEM *im_fh) { - *existp = __wt_handle_search(session, name, false, NULL, NULL); + WT_FILE_HANDLE *fhp; + WT_FILE_SYSTEM_INMEM *im_fs; + uint64_t bucket; + + im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; + + if (im_fh->ref != 0) + WT_RET_MSG(session, EBUSY, + "%s: file-remove", im_fh->iface.name); + + bucket = im_fh->name_hash % WT_HASH_ARRAY_SIZE; + WT_FILE_HANDLE_REMOVE(im_fs, im_fh, bucket); + + /* Clean up private information. */ + __wt_buf_free(session, &im_fh->buf); + + /* Clean up public information. */ + fhp = (WT_FILE_HANDLE *)im_fh; + __wt_free(session, fhp->name); + + __wt_free(session, im_fh); + return (0); } /* - * __im_file_remove -- - * POSIX remove. + * __im_fs_directory_list -- + * Return the directory contents. */ static int -__im_file_remove(WT_SESSION_IMPL *session, const char *name) +__im_fs_directory_list(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *directory, + const char *prefix, char ***dirlistp, uint32_t *countp) { WT_DECL_RET; - WT_FH *fh; + WT_FILE_HANDLE_INMEM *im_fh; + WT_FILE_SYSTEM_INMEM *im_fs; + WT_SESSION_IMPL *session; + size_t dirallocsz, len; + uint32_t count; + char *name, **entries; + + im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; + session = (WT_SESSION_IMPL *)wt_session; + + *dirlistp = NULL; + *countp = 0; + + dirallocsz = 0; + len = strlen(directory); + entries = NULL; + + __wt_spin_lock(session, &im_fs->lock); + + count = 0; + TAILQ_FOREACH(im_fh, &im_fs->fhqh, q) { + name = im_fh->iface.name; + if (strncmp(name, directory, len) != 0 || + (prefix != NULL && !WT_PREFIX_MATCH(name + len, prefix))) + continue; + + WT_ERR(__wt_realloc_def( + session, &dirallocsz, count + 1, &entries)); + WT_ERR(__wt_strdup(session, name, &entries[count])); + ++count; + } - if (__wt_handle_search(session, name, true, NULL, &fh)) { - WT_ASSERT(session, fh->ref == 1); + *dirlistp = entries; + *countp = count; + +err: __wt_spin_unlock(session, &im_fs->lock); + if (ret == 0) + return (0); - /* Force a discard of the handle. */ - F_CLR(fh, WT_FH_IN_MEMORY); - ret = __wt_close(session, &fh); + if (entries != NULL) { + while (count > 0) + __wt_free(session, entries[--count]); + __wt_free(session, entries); } - return (ret); + + WT_RET_MSG(session, ret, + "%s: directory-list, prefix \"%s\"", + directory, prefix == NULL ? "" : prefix); } /* - * __im_file_rename -- - * POSIX rename. + * __im_fs_directory_list_free -- + * Free memory returned by __im_fs_directory_list. */ static int -__im_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +__im_fs_directory_list_free(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, char **dirlist, uint32_t count) { - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_FH *fh; - uint64_t bucket, hash; - char *to_name; - - conn = S2C(session); - - /* We'll need a copy of the target name. */ - WT_RET(__wt_strdup(session, to, &to_name)); + WT_SESSION_IMPL *session; - __wt_spin_lock(session, &conn->fh_lock); + WT_UNUSED(file_system); - /* Make sure the target name isn't active. */ - hash = __wt_hash_city64(to, strlen(to)); - bucket = hash % WT_HASH_ARRAY_SIZE; - TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) - if (strcmp(to, fh->name) == 0) - WT_ERR(EPERM); + session = (WT_SESSION_IMPL *)wt_session; - /* Find the source name. */ - hash = __wt_hash_city64(from, strlen(from)); - bucket = hash % WT_HASH_ARRAY_SIZE; - TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) - if (strcmp(from, fh->name) == 0) - break; - if (fh == NULL) - WT_ERR(ENOENT); + if (dirlist != NULL) { + while (count > 0) + __wt_free(session, dirlist[--count]); + __wt_free(session, dirlist); + } + return (0); +} - /* Remove source from the list. */ - WT_CONN_FILE_REMOVE(conn, fh, bucket); +/* + * __im_fs_exist -- + * Return if the file exists. + */ +static int +__im_fs_exist(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *name, bool *existp) +{ + WT_FILE_SYSTEM_INMEM *im_fs; + WT_SESSION_IMPL *session; - /* Swap the names. */ - __wt_free(session, fh->name); - fh->name = to_name; - to_name = NULL; + im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; + session = (WT_SESSION_IMPL *)wt_session; - /* Put source back on the list. */ - hash = __wt_hash_city64(to, strlen(to)); - bucket = hash % WT_HASH_ARRAY_SIZE; - WT_CONN_FILE_INSERT(conn, fh, bucket); + __wt_spin_lock(session, &im_fs->lock); - if (0) { -err: __wt_free(session, to_name); - } - __wt_spin_unlock(session, &conn->fh_lock); + *existp = __im_handle_search(file_system, name) != NULL; - return (ret); + __wt_spin_unlock(session, &im_fs->lock); + return (0); } /* - * __im_file_size -- - * Get the size of a file in bytes, by file name. + * __im_fs_remove -- + * POSIX remove. */ static int -__im_file_size( - WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +__im_fs_remove( + WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name) { WT_DECL_RET; - WT_FH *fh; - WT_IM *im; + WT_FILE_HANDLE_INMEM *im_fh; + WT_FILE_SYSTEM_INMEM *im_fs; + WT_SESSION_IMPL *session; - WT_UNUSED(silent); + im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; + session = (WT_SESSION_IMPL *)wt_session; - im = S2C(session)->inmemory; - __wt_spin_lock(session, &im->lock); + __wt_spin_lock(session, &im_fs->lock); - if (__wt_handle_search(session, name, true, NULL, &fh)) { - WT_ERR(__im_handle_size(session, fh, sizep)); - WT_ERR(__wt_close(session, &fh)); - } else - ret = ENOENT; + ret = ENOENT; + if ((im_fh = __im_handle_search(file_system, name)) != NULL) + ret = __im_handle_remove(session, file_system, im_fh); -err: __wt_spin_unlock(session, &im->lock); + __wt_spin_unlock(session, &im_fs->lock); return (ret); } /* - * __im_handle_advise -- - * POSIX fadvise. + * __im_fs_rename -- + * POSIX rename. */ static int -__im_handle_advise(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +__im_fs_rename(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *from, const char *to) { - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(offset); - WT_UNUSED(len); - WT_UNUSED(advice); - return (ENOTSUP); + WT_DECL_RET; + WT_FILE_HANDLE_INMEM *im_fh; + WT_FILE_SYSTEM_INMEM *im_fs; + WT_SESSION_IMPL *session; + uint64_t bucket; + char *copy; + + im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; + session = (WT_SESSION_IMPL *)wt_session; + + __wt_spin_lock(session, &im_fs->lock); + + ret = ENOENT; + if ((im_fh = __im_handle_search(file_system, from)) != NULL) { + WT_ERR(__wt_strdup(session, to, ©)); + __wt_free(session, im_fh->iface.name); + im_fh->iface.name = copy; + + bucket = im_fh->name_hash % WT_HASH_ARRAY_SIZE; + WT_FILE_HANDLE_REMOVE(im_fs, im_fh, bucket); + im_fh->name_hash = __wt_hash_city64(to, strlen(to)); + bucket = im_fh->name_hash % WT_HASH_ARRAY_SIZE; + WT_FILE_HANDLE_INSERT(im_fs, im_fh, bucket); + } + +err: __wt_spin_unlock(session, &im_fs->lock); + return (ret); } /* - * __im_handle_close -- - * ANSI C close/fclose. + * __im_fs_size -- + * Get the size of a file in bytes, by file name. */ static int -__im_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +__im_fs_size(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *name, wt_off_t *sizep) { - __wt_buf_free(session, &fh->buf); + WT_DECL_RET; + WT_FILE_HANDLE_INMEM *im_fh; + WT_FILE_SYSTEM_INMEM *im_fs; + WT_SESSION_IMPL *session; - return (0); + im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; + session = (WT_SESSION_IMPL *)wt_session; + + __wt_spin_lock(session, &im_fs->lock); + + /* Search for the handle, then get its size. */ + if ((im_fh = __im_handle_search(file_system, name)) == NULL) + ret = ENOENT; + else + *sizep = (wt_off_t)im_fh->buf.size; + + __wt_spin_unlock(session, &im_fs->lock); + + return (ret); } /* - * __im_handle_getc -- - * ANSI C fgetc. + * __im_file_close -- + * ANSI C close. */ static int -__im_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +__im_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session) { - WT_IM *im; + WT_FILE_HANDLE_INMEM *im_fh; + WT_FILE_SYSTEM_INMEM *im_fs; + WT_SESSION_IMPL *session; - im = S2C(session)->inmemory; - __wt_spin_lock(session, &im->lock); + im_fh = (WT_FILE_HANDLE_INMEM *)file_handle; + im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system; + session = (WT_SESSION_IMPL *)wt_session; - if (fh->off >= fh->buf.size) - *chp = EOF; - else - *chp = ((char *)fh->buf.data)[fh->off++]; + __wt_spin_lock(session, &im_fs->lock); + + --im_fh->ref; + + __wt_spin_unlock(session, &im_fs->lock); - __wt_spin_unlock(session, &im->lock); return (0); } /* - * __im_handle_lock -- + * __im_file_lock -- * Lock/unlock a file. */ static int -__im_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +__im_file_lock( + WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock) { - WT_UNUSED(session); - WT_UNUSED(fh); + WT_UNUSED(file_handle); + WT_UNUSED(wt_session); WT_UNUSED(lock); return (0); } /* - * __im_handle_printf -- - * ANSI C vfprintf. - */ -static int -__im_handle_printf( - WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) -{ - va_list ap_copy; - WT_DECL_ITEM(tmp); - WT_DECL_RET; - WT_IM *im; - size_t len; - - im = S2C(session)->inmemory; - - /* Build the string we're writing. */ - WT_RET(__wt_scr_alloc(session, strlen(fmt) * 2 + 128, &tmp)); - for (;;) { - va_copy(ap_copy, ap); - len = (size_t)vsnprintf(tmp->mem, tmp->memsize, fmt, ap_copy); - va_end(ap_copy); - if (len < tmp->memsize) { - tmp->data = tmp->mem; - tmp->size = len; - break; - } - WT_ERR(__wt_buf_extend(session, tmp, len + 1)); - } - - __wt_spin_lock(session, &im->lock); - - /* Grow the handle's buffer as necessary. */ - WT_ERR(__wt_buf_grow(session, &fh->buf, fh->off + len)); - - /* Copy the data into place and update the offset. */ - memcpy((uint8_t *)fh->buf.mem + fh->off, tmp->data, len); - fh->off += len; - -err: __wt_spin_unlock(session, &im->lock); - - __wt_scr_free(session, &tmp); - return (ret); -} - -/* - * __im_handle_read -- + * __im_file_read -- * POSIX pread. */ static int -__im_handle_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +__im_file_read(WT_FILE_HANDLE *file_handle, + WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf) { WT_DECL_RET; - WT_IM *im; + WT_FILE_HANDLE_INMEM *im_fh; + WT_FILE_SYSTEM_INMEM *im_fs; + WT_SESSION_IMPL *session; size_t off; - im = S2C(session)->inmemory; - __wt_spin_lock(session, &im->lock); + im_fh = (WT_FILE_HANDLE_INMEM *)file_handle; + im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system; + session = (WT_SESSION_IMPL *)wt_session; + + __wt_spin_lock(session, &im_fs->lock); off = (size_t)offset; - if (off < fh->buf.size) { - len = WT_MIN(len, fh->buf.size - off); - memcpy(buf, (uint8_t *)fh->buf.mem + off, len); - fh->off = off + len; + if (off < im_fh->buf.size) { + len = WT_MIN(len, im_fh->buf.size - off); + memcpy(buf, (uint8_t *)im_fh->buf.mem + off, len); } else ret = WT_ERROR; - __wt_spin_unlock(session, &im->lock); + __wt_spin_unlock(session, &im_fs->lock); if (ret == 0) return (0); WT_RET_MSG(session, WT_ERROR, "%s: handle-read: failed to read %" WT_SIZET_FMT " bytes at " "offset %" WT_SIZET_FMT, - fh->name, len, off); + file_handle->name, len, off); } /* - * __im_handle_size -- + * __im_file_size -- * Get the size of a file in bytes, by file handle. */ static int -__im_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +__im_file_size( + WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep) { - WT_UNUSED(session); + WT_FILE_HANDLE_INMEM *im_fh; + WT_FILE_SYSTEM_INMEM *im_fs; + WT_SESSION_IMPL *session; + + im_fh = (WT_FILE_HANDLE_INMEM *)file_handle; + im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system; + session = (WT_SESSION_IMPL *)wt_session; + + __wt_spin_lock(session, &im_fs->lock); + + *sizep = (wt_off_t)im_fh->buf.size; + + __wt_spin_unlock(session, &im_fs->lock); - /* - * XXX hack - MongoDB assumes that any file with content will have a - * non-zero size. In memory tables generally are zero-sized, make - * MongoDB happy. - */ - *sizep = fh->buf.size == 0 ? 1024 : (wt_off_t)fh->buf.size; return (0); } /* - * __im_handle_sync -- - * POSIX fflush/fsync. + * __im_file_sync -- + * In-memory sync. */ static int -__im_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +__im_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session) { - WT_UNUSED(session); - WT_UNUSED(fh); - - /* - * Callers attempting asynchronous flush handle ENOTSUP returns, and - * won't make further attempts. - */ - return (block ? 0 : ENOTSUP); + WT_UNUSED(file_handle); + WT_UNUSED(wt_session); + return (0); } /* - * __im_handle_truncate -- + * __im_file_truncate -- * POSIX ftruncate. */ static int -__im_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset) +__im_file_truncate( + WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset) { WT_DECL_RET; - WT_IM *im; + WT_FILE_HANDLE_INMEM *im_fh; + WT_FILE_SYSTEM_INMEM *im_fs; + WT_SESSION_IMPL *session; size_t off; - im = S2C(session)->inmemory; - __wt_spin_lock(session, &im->lock); + im_fh = (WT_FILE_HANDLE_INMEM *)file_handle; + im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system; + session = (WT_SESSION_IMPL *)wt_session; + + __wt_spin_lock(session, &im_fs->lock); /* - * Grow the buffer as necessary, clear any new space in the file, - * and reset the file's data length. + * Grow the buffer as necessary, clear any new space in the file, and + * reset the file's data length. */ off = (size_t)offset; - WT_ERR(__wt_buf_grow(session, &fh->buf, off)); - if (fh->buf.size < off) - memset((uint8_t *) - fh->buf.data + fh->buf.size, 0, off - fh->buf.size); - fh->buf.size = off; + WT_ERR(__wt_buf_grow(session, &im_fh->buf, off)); + if (im_fh->buf.size < off) + memset((uint8_t *)im_fh->buf.data + im_fh->buf.size, + 0, off - im_fh->buf.size); + im_fh->buf.size = off; -err: __wt_spin_unlock(session, &im->lock); +err: __wt_spin_unlock(session, &im_fs->lock); return (ret); } /* - * __im_handle_write -- + * __im_file_write -- * POSIX pwrite. */ static int -__im_handle_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +__im_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, + wt_off_t offset, size_t len, const void *buf) { WT_DECL_RET; - WT_IM *im; + WT_FILE_HANDLE_INMEM *im_fh; + WT_FILE_SYSTEM_INMEM *im_fs; + WT_SESSION_IMPL *session; size_t off; - im = S2C(session)->inmemory; - __wt_spin_lock(session, &im->lock); + im_fh = (WT_FILE_HANDLE_INMEM *)file_handle; + im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system; + session = (WT_SESSION_IMPL *)wt_session; + + __wt_spin_lock(session, &im_fs->lock); off = (size_t)offset; - WT_ERR(__wt_buf_grow(session, &fh->buf, off + len + 1024)); + WT_ERR(__wt_buf_grow(session, &im_fh->buf, off + len + 1024)); - memcpy((uint8_t *)fh->buf.data + off, buf, len); - if (off + len > fh->buf.size) - fh->buf.size = off + len; - fh->off = off + len; + memcpy((uint8_t *)im_fh->buf.data + off, buf, len); + if (off + len > im_fh->buf.size) + im_fh->buf.size = off + len; -err: __wt_spin_unlock(session, &im->lock); +err: __wt_spin_unlock(session, &im_fs->lock); if (ret == 0) return (0); WT_RET_MSG(session, ret, "%s: handle-write: failed to write %" WT_SIZET_FMT " bytes at " "offset %" WT_SIZET_FMT, - fh->name, len, off); + file_handle->name, len, off); } /* - * __im_handle_open -- + * __im_file_open -- * POSIX fopen/open. */ static int -__im_handle_open(WT_SESSION_IMPL *session, - WT_FH *fh, const char *path, uint32_t file_type, uint32_t flags) +__im_file_open(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, + const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + WT_FILE_HANDLE **file_handlep) { - WT_UNUSED(session); - WT_UNUSED(path); + WT_DECL_RET; + WT_FILE_HANDLE *file_handle; + WT_FILE_HANDLE_INMEM *im_fh; + WT_FILE_SYSTEM_INMEM *im_fs; + WT_SESSION_IMPL *session; + uint64_t bucket, hash; + WT_UNUSED(file_type); WT_UNUSED(flags); - fh->off = 0; - F_SET(fh, WT_FH_IN_MEMORY); + im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; + session = (WT_SESSION_IMPL *)wt_session; - fh->fh_advise = __im_handle_advise; - fh->fh_close = __im_handle_close; - fh->fh_getc = __im_handle_getc; - fh->fh_lock = __im_handle_lock; - fh->fh_printf = __im_handle_printf; - fh->fh_read = __im_handle_read; - fh->fh_size = __im_handle_size; - fh->fh_sync = __im_handle_sync; - fh->fh_truncate = __im_handle_truncate; - fh->fh_write = __im_handle_write; + __wt_spin_lock(session, &im_fs->lock); - return (0); + /* + * First search the file queue, if we find it, assert there's only a + * single reference, in-memory only supports a single handle on any + * file, for now. + */ + im_fh = __im_handle_search(file_system, name); + if (im_fh != NULL) { + + if (im_fh->ref != 0) + WT_ERR_MSG(session, EBUSY, + "%s: file-open: already open", name); + + im_fh->ref = 1; + + *file_handlep = (WT_FILE_HANDLE *)im_fh; + + __wt_spin_unlock(session, &im_fs->lock); + return (0); + } + + /* The file hasn't been opened before, create a new one. */ + WT_ERR(__wt_calloc_one(session, &im_fh)); + + /* Initialize public information. */ + file_handle = (WT_FILE_HANDLE *)im_fh; + file_handle->file_system = file_system; + WT_ERR(__wt_strdup(session, name, &file_handle->name)); + + /* Initialize private information. */ + im_fh->ref = 1; + + hash = __wt_hash_city64(name, strlen(name)); + bucket = hash % WT_HASH_ARRAY_SIZE; + im_fh->name_hash = hash; + WT_FILE_HANDLE_INSERT(im_fs, im_fh, bucket); + + file_handle->close = __im_file_close; + file_handle->fh_lock = __im_file_lock; + file_handle->fh_read = __im_file_read; + file_handle->fh_size = __im_file_size; + file_handle->fh_sync = __im_file_sync; + file_handle->fh_truncate = __im_file_truncate; + file_handle->fh_write = __im_file_write; + + *file_handlep = file_handle; + + if (0) { +err: __wt_free(session, im_fh); + } + + __wt_spin_unlock(session, &im_fs->lock); + return (ret); } /* - * __wt_os_inmemory -- - * Initialize an in-memory configuration. + * __im_terminate -- + * Terminate an in-memory configuration. */ -int -__wt_os_inmemory(WT_SESSION_IMPL *session) +static int +__im_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session) { - WT_CONNECTION_IMPL *conn; WT_DECL_RET; - WT_IM *im; + WT_FILE_HANDLE_INMEM *im_fh; + WT_FILE_SYSTEM_INMEM *im_fs; + WT_SESSION_IMPL *session; - conn = S2C(session); - im = NULL; + WT_UNUSED(file_system); - /* Initialize the in-memory jump table. */ - conn->file_directory_list = __im_directory_list; - conn->file_directory_sync = __im_directory_sync; - conn->file_exist = __im_file_exist; - conn->file_remove = __im_file_remove; - conn->file_rename = __im_file_rename; - conn->file_size = __im_file_size; - conn->handle_open = __im_handle_open; - - /* Allocate an in-memory structure. */ - WT_RET(__wt_calloc_one(session, &im)); - WT_ERR(__wt_spin_init(session, &im->lock, "in-memory I/O")); - conn->inmemory = im; + session = (WT_SESSION_IMPL *)wt_session; + im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; - return (0); + while ((im_fh = TAILQ_FIRST(&im_fs->fhqh)) != NULL) + WT_TRET(__im_handle_remove(session, file_system, im_fh)); + + __wt_spin_destroy(session, &im_fs->lock); + __wt_free(session, im_fs); -err: __wt_free(session, im); return (ret); } /* - * __wt_os_inmemory_cleanup -- - * Discard an in-memory configuration. + * __wt_os_inmemory -- + * Initialize an in-memory configuration. */ int -__wt_os_inmemory_cleanup(WT_SESSION_IMPL *session) +__wt_os_inmemory(WT_SESSION_IMPL *session) { WT_DECL_RET; - WT_IM *im; + WT_FILE_SYSTEM *file_system; + WT_FILE_SYSTEM_INMEM *im_fs; + u_int i; - if ((im = S2C(session)->inmemory) == NULL) - return (0); - S2C(session)->inmemory = NULL; + WT_RET(__wt_calloc_one(session, &im_fs)); + + /* Initialize private information. */ + TAILQ_INIT(&im_fs->fhqh); + for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) + TAILQ_INIT(&im_fs->fhhash[i]); - __wt_spin_destroy(session, &im->lock); - __wt_free(session, im); + WT_ERR(__wt_spin_init(session, &im_fs->lock, "in-memory I/O")); + + /* Initialize the in-memory jump table. */ + file_system = (WT_FILE_SYSTEM *)im_fs; + file_system->fs_directory_list = __im_fs_directory_list; + file_system->fs_directory_list_free = __im_fs_directory_list_free; + file_system->fs_exist = __im_fs_exist; + file_system->fs_open_file = __im_file_open; + file_system->fs_remove = __im_fs_remove; + file_system->fs_rename = __im_fs_rename; + file_system->fs_size = __im_fs_size; + file_system->terminate = __im_terminate; + + /* Switch the file system into place. */ + S2C(session)->file_system = (WT_FILE_SYSTEM *)im_fs; + + return (0); +err: __wt_free(session, im_fs); return (ret); } diff --git a/src/third_party/wiredtiger/src/os_common/os_fstream.c b/src/third_party/wiredtiger/src/os_common/os_fstream.c new file mode 100644 index 00000000000..0b199529e19 --- /dev/null +++ b/src/third_party/wiredtiger/src/os_common/os_fstream.c @@ -0,0 +1,217 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* Buffer size for streamed reads/writes. */ +#define WT_STREAM_BUFSIZE 8192 + +/* + * __fstream_close -- + * Close a stream handle. + */ +static int +__fstream_close(WT_SESSION_IMPL *session, WT_FSTREAM *fstr) +{ + WT_DECL_RET; + + if (!F_ISSET(fstr, WT_STREAM_READ)) + WT_TRET(fstr->fstr_flush(session, fstr)); + + WT_TRET(__wt_close(session, &fstr->fh)); + __wt_buf_free(session, &fstr->buf); + __wt_free(session, fstr); + return (ret); +} + +/* + * __fstream_flush -- + * Flush the data from a stream. + */ +static int +__fstream_flush(WT_SESSION_IMPL *session, WT_FSTREAM *fstr) +{ + if (fstr->buf.size > 0) { + WT_RET(__wt_write(session, + fstr->fh, fstr->off, fstr->buf.size, fstr->buf.data)); + fstr->off += (wt_off_t)fstr->buf.size; + fstr->buf.size = 0; + } + + return (0); +} + +/* + * __fstream_flush_notsup -- + * Stream flush unsupported. + */ +static int +__fstream_flush_notsup(WT_SESSION_IMPL *session, WT_FSTREAM *fstr) +{ + WT_RET_MSG(session, ENOTSUP, "%s: flush", fstr->name); +} + +/* + * __fstream_getline -- + * Get a line from a stream. + * + * Implementation of the POSIX getline or BSD fgetln functions (finding the + * function in a portable way is hard, it's simple enough to write it instead). + * + * Note: Unlike the standard getline calls, this function doesn't include the + * trailing newline character in the returned buffer and discards empty lines + * (so the caller's EOF marker is a returned line length of 0). + */ +static int +__fstream_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf) +{ + const char *p; + size_t len; + char c; + + /* + * We always NUL-terminate the returned string (even if it's empty), + * make sure there's buffer space for a trailing NUL in all cases. + */ + WT_RET(__wt_buf_init(session, buf, 100)); + + for (;;) { + /* Check if we need to refill the buffer. */ + if (WT_PTRDIFF(fstr->buf.data, fstr->buf.mem) >= + fstr->buf.size) { + len = WT_MIN(WT_STREAM_BUFSIZE, + (size_t)(fstr->size - fstr->off)); + if (len == 0) + break; /* EOF */ + WT_RET(__wt_buf_initsize(session, &fstr->buf, len)); + WT_RET(__wt_read( + session, fstr->fh, fstr->off, len, fstr->buf.mem)); + fstr->off += (wt_off_t)len; + } + + c = *(p = fstr->buf.data); + fstr->buf.data = ++p; + + /* Leave space for a trailing NUL. */ + WT_RET(__wt_buf_extend(session, buf, buf->size + 2)); + if (c == '\n') { + if (buf->size == 0) + continue; + break; + } + ((char *)buf->mem)[buf->size++] = c; + } + + ((char *)buf->mem)[buf->size] = '\0'; + + return (0); +} + +/* + * __fstream_getline_notsup -- + * Stream getline unsupported. + */ +static int +__fstream_getline_notsup( + WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf) +{ + WT_UNUSED(buf); + WT_RET_MSG(session, ENOTSUP, "%s: getline", fstr->name); +} + +/* + * __fstream_printf -- + * ANSI C vfprintf. + */ +static int +__fstream_printf( + WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap) +{ + WT_ITEM *buf; + va_list ap_copy; + size_t len, space; + char *p; + + buf = &fstr->buf; + + for (;;) { + va_copy(ap_copy, ap); + p = (char *)((uint8_t *)buf->mem + buf->size); + WT_ASSERT(session, buf->memsize >= buf->size); + space = buf->memsize - buf->size; + len = (size_t)vsnprintf(p, space, fmt, ap_copy); + va_end(ap_copy); + + if (len < space) { + buf->size += len; + + return (buf->size >= WT_STREAM_BUFSIZE ? + __wt_fflush(session, fstr) : 0); + } + WT_RET(__wt_buf_extend(session, buf, buf->size + len + 1)); + } +} + +/* + * __fstream_printf_notsup -- + * ANSI C vfprintf unsupported. + */ +static int +__fstream_printf_notsup( + WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap) +{ + WT_UNUSED(fmt); + WT_UNUSED(ap); + WT_RET_MSG(session, ENOTSUP, "%s: printf", fstr->name); +} + +/* + * __wt_fopen -- + * Open a stream handle. + */ +int +__wt_fopen(WT_SESSION_IMPL *session, + const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp) +{ + WT_DECL_RET; + WT_FH *fh; + WT_FSTREAM *fstr; + + *fstrp = NULL; + + fstr = NULL; + + WT_RET(__wt_open( + session, name, WT_OPEN_FILE_TYPE_REGULAR, open_flags, &fh)); + + WT_ERR(__wt_calloc_one(session, &fstr)); + fstr->fh = fh; + fstr->name = fh->name; + fstr->flags = flags; + + fstr->close = __fstream_close; + WT_ERR(__wt_filesize(session, fh, &fstr->size)); + if (LF_ISSET(WT_STREAM_APPEND)) + fstr->off = fstr->size; + if (LF_ISSET(WT_STREAM_APPEND | WT_STREAM_WRITE)) { + fstr->fstr_flush = __fstream_flush; + fstr->fstr_getline = __fstream_getline_notsup; + fstr->fstr_printf = __fstream_printf; + } else { + WT_ASSERT(session, LF_ISSET(WT_STREAM_READ)); + fstr->fstr_flush = __fstream_flush_notsup; + fstr->fstr_getline = __fstream_getline; + fstr->fstr_printf = __fstream_printf_notsup; + } + *fstrp = fstr; + return (0); + +err: WT_TRET(__wt_close(session, &fh)); + __wt_free(session, fstr); + return (ret); +} diff --git a/src/third_party/wiredtiger/src/os_common/os_fstream_stdio.c b/src/third_party/wiredtiger/src/os_common/os_fstream_stdio.c new file mode 100644 index 00000000000..eea2c80ff0e --- /dev/null +++ b/src/third_party/wiredtiger/src/os_common/os_fstream_stdio.c @@ -0,0 +1,84 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __stdio_close -- + * ANSI C close/fclose. + */ +static int +__stdio_close(WT_SESSION_IMPL *session, WT_FSTREAM *fs) +{ + WT_RET_MSG(session, ENOTSUP, "%s: close", fs->name); +} + +/* + * __stdio_flush -- + * POSIX fflush. + */ +static int +__stdio_flush(WT_SESSION_IMPL *session, WT_FSTREAM *fs) +{ + if (fflush(fs->fp) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: flush", fs->name); +} + +/* + * __stdio_getline -- + * ANSI C getline. + */ +static int +__stdio_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fs, WT_ITEM *buf) +{ + WT_UNUSED(buf); + WT_RET_MSG(session, ENOTSUP, "%s: getline", fs->name); +} + +/* + * __stdio_printf -- + * ANSI C vfprintf. + */ +static int +__stdio_printf( + WT_SESSION_IMPL *session, WT_FSTREAM *fs, const char *fmt, va_list ap) +{ + if (vfprintf(fs->fp, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, "%s: printf", fs->name); +} + +/* + * __stdio_init -- + * Initialize stdio functions. + */ +static void +__stdio_init(WT_FSTREAM *fs, const char *name, FILE *fp) +{ + fs->name = name; + fs->fp = fp; + + fs->close = __stdio_close; + fs->fstr_flush = __stdio_flush; + fs->fstr_getline = __stdio_getline; + fs->fstr_printf = __stdio_printf; +} + +/* + * __wt_os_stdio -- + * Initialize the stdio configuration. + */ +int +__wt_os_stdio(WT_SESSION_IMPL *session) +{ + __stdio_init(WT_STDERR(session), "stderr", stderr); + __stdio_init(WT_STDOUT(session), "stdout", stdout); + + return (0); +} diff --git a/src/third_party/wiredtiger/src/os_posix/os_dir.c b/src/third_party/wiredtiger/src/os_posix/os_dir.c index 02f12ec7311..768a1324cd8 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_dir.c +++ b/src/third_party/wiredtiger/src/os_posix/os_dir.c @@ -15,30 +15,34 @@ * Get a list of files from a directory, POSIX version. */ int -__wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, - const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) +__wt_posix_directory_list(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *directory, + const char *prefix, char ***dirlistp, uint32_t *countp) { struct dirent *dp; DIR *dirp; WT_DECL_RET; + WT_SESSION_IMPL *session; size_t dirallocsz; - u_int count, dirsz; - bool match; - char **entries, *path; + uint32_t count; + int tret; + char **entries; - *dirlist = NULL; - *countp = 0; + WT_UNUSED(file_system); + + session = (WT_SESSION_IMPL *)wt_session; - WT_RET(__wt_filename(session, dir, &path)); + *dirlistp = NULL; + *countp = 0; dirp = NULL; dirallocsz = 0; - dirsz = 0; entries = NULL; - WT_SYSCALL_RETRY(((dirp = opendir(path)) == NULL ? -1 : 0), ret); + WT_SYSCALL_RETRY(((dirp = opendir(directory)) == NULL ? -1 : 0), ret); if (ret != 0) - WT_ERR_MSG(session, ret, "%s: directory-list: opendir", path); + WT_RET_MSG(session, ret, + "%s: directory-list: opendir", directory); for (count = 0; (dp = readdir(dirp)) != NULL;) { /* @@ -49,44 +53,57 @@ __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, continue; /* The list of files is optionally filtered by a prefix. */ - match = false; - if (prefix != NULL && - ((LF_ISSET(WT_DIRLIST_INCLUDE) && - WT_PREFIX_MATCH(dp->d_name, prefix)) || - (LF_ISSET(WT_DIRLIST_EXCLUDE) && - !WT_PREFIX_MATCH(dp->d_name, prefix)))) - match = true; - if (prefix == NULL || match) { - /* - * We have a file name we want to return. - */ - count++; - if (count > dirsz) { - dirsz += WT_DIR_ENTRY; - WT_ERR(__wt_realloc_def( - session, &dirallocsz, dirsz, &entries)); - } - WT_ERR(__wt_strdup( - session, dp->d_name, &entries[count-1])); - } + if (prefix != NULL && !WT_PREFIX_MATCH(dp->d_name, prefix)) + continue; + + WT_ERR(__wt_realloc_def( + session, &dirallocsz, count + 1, &entries)); + WT_ERR(__wt_strdup(session, dp->d_name, &entries[count])); + ++count; } - if (count > 0) - *dirlist = entries; + + *dirlistp = entries; *countp = count; -err: if (dirp != NULL) - (void)closedir(dirp); - __wt_free(session, path); +err: if (dirp != NULL) { + WT_SYSCALL(closedir(dirp), tret); + if (tret != 0) { + __wt_err(session, tret, + "%s: directory-list: closedir", directory); + if (ret == 0) + ret = tret; + } + } if (ret == 0) return (0); - if (*dirlist != NULL) { - for (count = dirsz; count > 0; count--) - __wt_free(session, entries[count]); - __wt_free(session, entries); - } + WT_TRET(__wt_posix_directory_list_free( + file_system, wt_session, entries, count)); + WT_RET_MSG(session, ret, "%s: directory-list, prefix \"%s\"", - dir, prefix == NULL ? "" : prefix); + directory, prefix == NULL ? "" : prefix); +} + +/* + * __wt_posix_directory_list_free -- + * Free memory returned by __wt_posix_directory_list. + */ +int +__wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, char **dirlist, uint32_t count) +{ + WT_SESSION_IMPL *session; + + WT_UNUSED(file_system); + + session = (WT_SESSION_IMPL *)wt_session; + + if (dirlist != NULL) { + while (count > 0) + __wt_free(session, dirlist[--count]); + __wt_free(session, dirlist); + } + return (0); } diff --git a/src/third_party/wiredtiger/src/os_posix/os_dlopen.c b/src/third_party/wiredtiger/src/os_posix/os_dlopen.c index 9a74eb4813d..ad1fcc90150 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_dlopen.c +++ b/src/third_party/wiredtiger/src/os_posix/os_dlopen.c @@ -19,7 +19,7 @@ __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_DLH *dlh; WT_RET(__wt_calloc_one(session, &dlh)); - WT_ERR(__wt_strdup(session, path, &dlh->name)); + WT_ERR(__wt_strdup(session, path == NULL ? "local" : path, &dlh->name)); if ((dlh->handle = dlopen(path, RTLD_LAZY)) == NULL) WT_ERR_MSG( diff --git a/src/third_party/wiredtiger/src/os_posix/os_fallocate.c b/src/third_party/wiredtiger/src/os_posix/os_fallocate.c index 22879d36182..9e5d9519900 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_fallocate.c +++ b/src/third_party/wiredtiger/src/os_posix/os_fallocate.c @@ -12,47 +12,28 @@ #include <linux/falloc.h> #include <sys/syscall.h> #endif -/* - * __wt_posix_handle_allocate_configure -- - * Configure POSIX file-extension behavior for a file handle. - */ -void -__wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_UNUSED(session); - - fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE; - fh->fallocate_requires_locking = false; - - /* - * Check for the availability of some form of fallocate; in all cases, - * start off requiring locking, we'll relax that requirement once we - * know which system calls work with the handle's underlying filesystem. - */ -#if defined(HAVE_FALLOCATE) || defined(HAVE_POSIX_FALLOCATE) - fh->fallocate_available = WT_FALLOCATE_AVAILABLE; - fh->fallocate_requires_locking = true; -#endif -#if defined(__linux__) && defined(SYS_fallocate) - fh->fallocate_available = WT_FALLOCATE_AVAILABLE; - fh->fallocate_requires_locking = true; -#endif -} /* * __posix_std_fallocate -- * Linux fallocate call. */ static int -__posix_std_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) +__posix_std_fallocate(WT_FILE_HANDLE *file_handle, + WT_SESSION *wt_session, wt_off_t offset, wt_off_t len) { #if defined(HAVE_FALLOCATE) WT_DECL_RET; + WT_FILE_HANDLE_POSIX *pfh; - WT_SYSCALL_RETRY(fallocate(fh->fd, 0, offset, len), ret); + WT_UNUSED(wt_session); + + pfh = (WT_FILE_HANDLE_POSIX *)file_handle; + + WT_SYSCALL_RETRY(fallocate(pfh->fd, 0, offset, len), ret); return (ret); #else - WT_UNUSED(fh); + WT_UNUSED(file_handle); + WT_UNUSED(wt_session); WT_UNUSED(offset); WT_UNUSED(len); return (ENOTSUP); @@ -64,10 +45,16 @@ __posix_std_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) * Linux fallocate call (system call version). */ static int -__posix_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) +__posix_sys_fallocate(WT_FILE_HANDLE *file_handle, + WT_SESSION *wt_session, wt_off_t offset, wt_off_t len) { #if defined(__linux__) && defined(SYS_fallocate) WT_DECL_RET; + WT_FILE_HANDLE_POSIX *pfh; + + WT_UNUSED(wt_session); + + pfh = (WT_FILE_HANDLE_POSIX *)file_handle; /* * Try the system call for fallocate even if the C library wrapper was @@ -75,10 +62,11 @@ __posix_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) * Linux versions (RHEL 5.5), but not in the version of the C library. * This allows it to work everywhere the kernel supports it. */ - WT_SYSCALL_RETRY(syscall(SYS_fallocate, fh->fd, 0, offset, len), ret); + WT_SYSCALL_RETRY(syscall(SYS_fallocate, pfh->fd, 0, offset, len), ret); return (ret); #else - WT_UNUSED(fh); + WT_UNUSED(file_handle); + WT_UNUSED(wt_session); WT_UNUSED(offset); WT_UNUSED(len); return (ENOTSUP); @@ -90,15 +78,22 @@ __posix_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) * POSIX fallocate call. */ static int -__posix_posix_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) +__posix_posix_fallocate(WT_FILE_HANDLE *file_handle, + WT_SESSION *wt_session, wt_off_t offset, wt_off_t len) { #if defined(HAVE_POSIX_FALLOCATE) WT_DECL_RET; + WT_FILE_HANDLE_POSIX *pfh; + + WT_UNUSED(wt_session); - WT_SYSCALL_RETRY(posix_fallocate(fh->fd, offset, len), ret); + pfh = (WT_FILE_HANDLE_POSIX *)file_handle; + + WT_SYSCALL_RETRY(posix_fallocate(pfh->fd, offset, len), ret); return (ret); #else - WT_UNUSED(fh); + WT_UNUSED(file_handle); + WT_UNUSED(wt_session); WT_UNUSED(offset); WT_UNUSED(len); return (ENOTSUP); @@ -106,67 +101,52 @@ __posix_posix_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) } /* - * __wt_posix_handle_allocate -- + * __wt_posix_file_fallocate -- * POSIX fallocate. */ int -__wt_posix_handle_allocate( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) +__wt_posix_file_fallocate(WT_FILE_HANDLE *file_handle, + WT_SESSION *wt_session, wt_off_t offset, wt_off_t len) { - WT_DECL_RET; - - switch (fh->fallocate_available) { - /* - * Check for already configured handles and make the configured call. - */ - case WT_FALLOCATE_POSIX: - if ((ret = __posix_posix_fallocate(fh, offset, len)) == 0) - return (0); - WT_RET_MSG(session, ret, "%s: posix_fallocate", fh->name); - case WT_FALLOCATE_STD: - if ((ret = __posix_std_fallocate(fh, offset, len)) == 0) - return (0); - WT_RET_MSG(session, ret, "%s: fallocate", fh->name); - case WT_FALLOCATE_SYS: - if ((ret = __posix_sys_fallocate(fh, offset, len)) == 0) - return (0); - WT_RET_MSG(session, ret, "%s: sys_fallocate", fh->name); - /* - * Figure out what allocation call this system/filesystem supports, if - * any. + * The first fallocate call: figure out what fallocate call this system + * supports, if any. + * + * The function is configured as a locking fallocate call, so we know + * we're single-threaded through here. Set the nolock function first, + * then publish the NULL replacement to ensure the handle functions are + * always correct. + * + * We've seen Linux systems where posix_fallocate has corrupted + * existing file data (even though that is explicitly disallowed + * by POSIX). FreeBSD and Solaris support posix_fallocate, and + * so far we've seen no problems leaving it unlocked. Check for + * fallocate (and the system call version of fallocate) first to + * avoid locking on Linux if at all possible. */ - case WT_FALLOCATE_AVAILABLE: - /* - * We've seen Linux systems where posix_fallocate has corrupted - * existing file data (even though that is explicitly disallowed - * by POSIX). FreeBSD and Solaris support posix_fallocate, and - * so far we've seen no problems leaving it unlocked. Check for - * fallocate (and the system call version of fallocate) first to - * avoid locking on Linux if at all possible. - */ - if ((ret = __posix_std_fallocate(fh, offset, len)) == 0) { - fh->fallocate_available = WT_FALLOCATE_STD; - fh->fallocate_requires_locking = false; - return (0); - } - if ((ret = __posix_sys_fallocate(fh, offset, len)) == 0) { - fh->fallocate_available = WT_FALLOCATE_SYS; - fh->fallocate_requires_locking = false; - return (0); - } - if ((ret = __posix_posix_fallocate(fh, offset, len)) == 0) { - fh->fallocate_available = WT_FALLOCATE_POSIX; -#if !defined(__linux__) - fh->fallocate_requires_locking = false; + if (__posix_std_fallocate(file_handle, wt_session, offset, len) == 0) { + file_handle->fh_allocate_nolock = __posix_std_fallocate; + WT_PUBLISH(file_handle->fh_allocate, NULL); + return (0); + } + if (__posix_sys_fallocate(file_handle, wt_session, offset, len) == 0) { + file_handle->fh_allocate_nolock = __posix_sys_fallocate; + WT_PUBLISH(file_handle->fh_allocate, NULL); + return (0); + } + if (__posix_posix_fallocate( + file_handle, wt_session, offset, len) == 0) { +#if defined(__linux__) + file_handle->fh_allocate = __posix_posix_fallocate; + WT_WRITE_BARRIER(); +#else + file_handle->fh_allocate_nolock = __posix_posix_fallocate; + WT_PUBLISH(file_handle->fh_allocate, NULL); #endif - return (0); - } - /* FALLTHROUGH */ - case WT_FALLOCATE_NOT_AVAILABLE: - default: - fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE; - return (ENOTSUP); + return (0); } - /* NOTREACHED */ + + file_handle->fh_allocate = NULL; + WT_WRITE_BARRIER(); + return (ENOTSUP); } diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c index 7d8f3b937b6..86fa2e8f117 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_fs.c +++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c @@ -1,9 +1,29 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. * - * See the file LICENSE for redistribution information. + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. */ #include "wt_internal.h" @@ -13,30 +33,11 @@ * Underlying support function to flush a file handle. */ static int -__posix_sync(WT_SESSION_IMPL *session, - int fd, const char *name, const char *func, bool block) +__posix_sync( + WT_SESSION_IMPL *session, int fd, const char *name, const char *func) { WT_DECL_RET; - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - -#ifdef HAVE_SYNC_FILE_RANGE - if (!block) { - WT_SYSCALL_RETRY(sync_file_range(fd, - (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: %s: sync_file_range", name, func); - } -#else - /* - * Callers attempting asynchronous flush handle ENOTSUP returns, and - * won't make further attempts. - */ - if (!block) - return (ENOTSUP); -#endif - #if defined(F_FULLFSYNC) /* * OS X fsync documentation: @@ -73,105 +74,82 @@ __posix_sync(WT_SESSION_IMPL *session, #endif } +#ifdef __linux__ /* * __posix_directory_sync -- * Flush a directory to ensure file creation is durable. */ static int -__posix_directory_sync(WT_SESSION_IMPL *session, const char *path) +__posix_directory_sync( + WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *path) { -#ifdef __linux__ WT_DECL_RET; + WT_SESSION_IMPL *session; int fd, tret; - const char *dir; - char *copy; - tret = 0; - /* - * POSIX 1003.1 does not require that fsync of a file handle ensures the - * entry in the directory containing the file has also reached disk (and - * there are historic Linux filesystems requiring this), do an explicit - * fsync on a file descriptor for the directory to be sure. - */ - copy = NULL; - if (path == NULL || (dir = strrchr(path, '/')) == NULL) - path = S2C(session)->home; - else { - /* - * Copy the directory name, leaving the trailing slash in place, - * so a path of "/foo" doesn't result in an empty string. - */ - WT_RET(__wt_strndup( - session, path, (size_t)(dir - path) + 1, ©)); - path = copy; - } + WT_UNUSED(file_system); + + session = (WT_SESSION_IMPL *)wt_session; WT_SYSCALL_RETRY(( (fd = open(path, O_RDONLY, 0444)) == -1 ? -1 : 0), ret); if (ret != 0) - WT_ERR_MSG(session, ret, "%s: directory-sync: open", path); + WT_RET_MSG(session, ret, "%s: directory-sync: open", path); - ret = __posix_sync(session, fd, path, "directory-sync", true); + ret = __posix_sync(session, fd, path, "directory-sync"); - WT_SYSCALL_RETRY(close(fd), tret); + WT_SYSCALL(close(fd), tret); if (tret != 0) { __wt_err(session, tret, "%s: directory-sync: close", path); if (ret == 0) ret = tret; } -err: __wt_free(session, copy); return (ret); -#else - WT_UNUSED(session); - WT_UNUSED(path); - return (0); -#endif } +#endif /* - * __posix_file_exist -- + * __posix_fs_exist -- * Return if the file exists. */ static int -__posix_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +__posix_fs_exist(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *name, bool *existp) { struct stat sb; WT_DECL_RET; - char *path; + WT_SESSION_IMPL *session; - WT_RET(__wt_filename(session, name, &path)); - name = path; + WT_UNUSED(file_system); - WT_SYSCALL_RETRY(stat(name, &sb), ret); - if (ret == 0) + session = (WT_SESSION_IMPL *)wt_session; + + WT_SYSCALL(stat(name, &sb), ret); + if (ret == 0) { *existp = true; - else if (ret == ENOENT) { + return (0); + } + if (ret == ENOENT) { *existp = false; - ret = 0; - } else - __wt_err(session, ret, "%s: file-exist: stat", name); - - __wt_free(session, path); - return (ret); + return (0); + } + WT_RET_MSG(session, ret, "%s: file-exist: stat", name); } /* - * __posix_file_remove -- + * __posix_fs_remove -- * Remove a file. */ static int -__posix_file_remove(WT_SESSION_IMPL *session, const char *name) +__posix_fs_remove( + WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name) { WT_DECL_RET; - char *path; + WT_SESSION_IMPL *session; -#ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, name, false, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-remove: file has open handles", name); -#endif + WT_UNUSED(file_system); - WT_RET(__wt_filename(session, name, &path)); + session = (WT_SESSION_IMPL *)wt_session; /* * ISO C doesn't require remove return -1 on failure or set errno (note @@ -180,35 +158,26 @@ __posix_file_remove(WT_SESSION_IMPL *session, const char *name) * where we're not doing any special checking for standards compliance, * using unlink may be marginally safer. */ - WT_SYSCALL_RETRY(unlink(path), ret); - __wt_free(session, path); + WT_SYSCALL(unlink(name), ret); if (ret == 0) return (0); WT_RET_MSG(session, ret, "%s: file-remove: unlink", name); } /* - * __posix_file_rename -- + * __posix_fs_rename -- * Rename a file. */ static int -__posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +__posix_fs_rename(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *from, const char *to) { WT_DECL_RET; - char *from_path, *to_path; - -#ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, from, false, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-rename: file has open handles", from); - if (__wt_handle_search(session, to, false, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-rename: file has open handles", to); -#endif + WT_SESSION_IMPL *session; - from_path = to_path = NULL; - WT_ERR(__wt_filename(session, from, &from_path)); - WT_ERR(__wt_filename(session, to, &to_path)); + WT_UNUSED(file_system); + + session = (WT_SESSION_IMPL *)wt_session; /* * ISO C doesn't require rename return -1 on failure or set errno (note @@ -217,143 +186,114 @@ __posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) * with the wrong errno (if errno is garbage), or the generic WT_ERROR * return (if errno is 0), but we've done the best we can. */ - WT_SYSCALL_RETRY(rename(from_path, to_path) != 0 ? -1 : 0, ret); - -err: __wt_free(session, from_path); - __wt_free(session, to_path); + WT_SYSCALL(rename(from, to) != 0 ? -1 : 0, ret); if (ret == 0) return (0); WT_RET_MSG(session, ret, "%s to %s: file-rename: rename", from, to); } /* - * __posix_file_size -- + * __posix_fs_size -- * Get the size of a file in bytes, by file name. */ static int -__posix_file_size( - WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +__posix_fs_size(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *name, wt_off_t *sizep) { struct stat sb; WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, name, &path)); - name = path; + WT_SESSION_IMPL *session; - /* - * Optionally don't log errors on ENOENT; some callers of this function - * expect failure in that case and don't want an error message logged. - */ - WT_SYSCALL_RETRY(stat(name, &sb), ret); - if (ret == 0) - *sizep = sb.st_size; - else if (ret != ENOENT || !silent) - __wt_err(session, ret, "%s: file-size: stat", name); + WT_UNUSED(file_system); - __wt_free(session, path); + session = (WT_SESSION_IMPL *)wt_session; - return (ret); + WT_SYSCALL(stat(name, &sb), ret); + if (ret == 0) { + *sizep = sb.st_size; + return (0); + } + WT_RET_MSG(session, ret, "%s: file-size: stat", name); } +#if defined(HAVE_POSIX_FADVISE) /* - * __posix_handle_advise -- + * __posix_file_advise -- * POSIX fadvise. */ static int -__posix_handle_advise(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +__posix_file_advise(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, + wt_off_t offset, wt_off_t len, int advice) { -#if defined(HAVE_POSIX_FADVISE) WT_DECL_RET; + WT_FILE_HANDLE_POSIX *pfh; + WT_SESSION_IMPL *session; - /* - * Refuse pre-load when direct I/O is configured for the file, the - * kernel cache isn't interesting. - */ - if (advice == POSIX_MADV_WILLNEED && fh->direct_io) - return (ENOTSUP); + session = (WT_SESSION_IMPL *)wt_session; + pfh = (WT_FILE_HANDLE_POSIX *)file_handle; - WT_SYSCALL_RETRY(posix_fadvise(fh->fd, offset, len, advice), ret); + WT_SYSCALL(posix_fadvise(pfh->fd, offset, len, advice), ret); if (ret == 0) return (0); /* * Treat EINVAL as not-supported, some systems don't support some flags. - * Quietly fail, callers expect not-supported failures. + * Quietly fail, callers expect not-supported failures, and reset the + * handle method to prevent future calls. */ - if (ret == EINVAL) + if (ret == EINVAL) { + file_handle->fh_advise = NULL; return (ENOTSUP); + } - WT_RET_MSG(session, ret, "%s: handle-advise: posix_fadvise", fh->name); -#else - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(offset); - WT_UNUSED(len); - WT_UNUSED(advice); + WT_RET_MSG(session, ret, + "%s: handle-advise: posix_fadvise", file_handle->name); - /* Quietly fail, callers expect not-supported failures. */ - return (ENOTSUP); -#endif } +#endif /* - * __posix_handle_close -- - * ANSI C close/fclose. + * __posix_file_close -- + * ANSI C close. */ static int -__posix_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +__posix_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session) { WT_DECL_RET; + WT_FILE_HANDLE_POSIX *pfh; + WT_SESSION_IMPL *session; - if (fh->fp == NULL) { - WT_SYSCALL_RETRY(close(fh->fd), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: handle-close: close", fh->name); - } + session = (WT_SESSION_IMPL *)wt_session; + pfh = (WT_FILE_HANDLE_POSIX *)file_handle; - /* If the stream was opened for writing, flush the file. */ - if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) { - ret = __wt_errno(); - __wt_err(session, ret, "%s: handle-close: fflush", fh->name); + /* Close the file handle. */ + if (pfh->fd != -1) { + WT_SYSCALL(close(pfh->fd), ret); + if (ret != 0) + __wt_err(session, ret, + "%s: handle-close: close", file_handle->name); } - /* Close the file. */ - if (fclose(fh->fp) != 0) { - ret = __wt_errno(); - __wt_err(session, ret, "%s: handle-close: fclose", fh->name); - } + __wt_free(session, file_handle->name); + __wt_free(session, pfh); return (ret); } /* - * __posix_handle_getc -- - * ANSI C fgetc. - */ -static int -__posix_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) -{ - if (fh->fp == NULL) - WT_RET_MSG(session, - ENOTSUP, "%s: handle-getc: no stream configured", fh->name); - - *chp = fgetc(fh->fp); - if (*chp != EOF || !ferror(fh->fp)) - return (0); - WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name); -} - -/* - * __posix_handle_lock -- + * __posix_file_lock -- * Lock/unlock a file. */ static int -__posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +__posix_file_lock( + WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock) { struct flock fl; WT_DECL_RET; + WT_FILE_HANDLE_POSIX *pfh; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + pfh = (WT_FILE_HANDLE_POSIX *)file_handle; /* * WiredTiger requires this function be able to acquire locks past @@ -369,44 +309,32 @@ __posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) fl.l_type = lock ? F_WRLCK : F_UNLCK; fl.l_whence = SEEK_SET; - WT_SYSCALL_RETRY(fcntl(fh->fd, F_SETLK, &fl) == -1 ? -1 : 0, ret); + WT_SYSCALL(fcntl(pfh->fd, F_SETLK, &fl) == -1 ? -1 : 0, ret); if (ret == 0) return (0); - WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", fh->name); -} - -/* - * __posix_handle_printf -- - * ANSI C vfprintf. - */ -static int -__posix_handle_printf( - WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) -{ - if (fh->fp == NULL) - WT_RET_MSG(session, ENOTSUP, - "%s: vfprintf: no stream configured", fh->name); - - if (vfprintf(fh->fp, fmt, ap) >= 0) - return (0); - WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); + WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", file_handle->name); } /* - * __posix_handle_read -- + * __posix_file_read -- * POSIX pread. */ static int -__posix_handle_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +__posix_file_read(WT_FILE_HANDLE *file_handle, + WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf) { + WT_FILE_HANDLE_POSIX *pfh; + WT_SESSION_IMPL *session; size_t chunk; ssize_t nr; uint8_t *addr; + session = (WT_SESSION_IMPL *)wt_session; + pfh = (WT_FILE_HANDLE_POSIX *)file_handle; + /* Assert direct I/O is aligned and a multiple of the alignment. */ WT_ASSERT(session, - !fh->direct_io || + !pfh->direct_io || S2C(session)->buffer_alignment == 0 || (!((uintptr_t)buf & (uintptr_t)(S2C(session)->buffer_alignment - 1)) && @@ -416,79 +344,122 @@ __posix_handle_read( /* Break reads larger than 1GB into 1GB chunks. */ for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { chunk = WT_MIN(len, WT_GIGABYTE); - if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0) + if ((nr = pread(pfh->fd, addr, chunk, offset)) <= 0) WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(), "%s: handle-read: pread: failed to read %" WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); + file_handle->name, chunk, (uintmax_t)offset); } return (0); } /* - * __posix_handle_size -- + * __posix_file_size -- * Get the size of a file in bytes, by file handle. */ static int -__posix_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +__posix_file_size( + WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep) { struct stat sb; WT_DECL_RET; + WT_FILE_HANDLE_POSIX *pfh; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + pfh = (WT_FILE_HANDLE_POSIX *)file_handle; - WT_SYSCALL_RETRY(fstat(fh->fd, &sb), ret); + WT_SYSCALL(fstat(pfh->fd, &sb), ret); if (ret == 0) { *sizep = sb.st_size; return (0); } - WT_RET_MSG(session, ret, "%s: handle-size: fstat", fh->name); + WT_RET_MSG(session, ret, "%s: handle-size: fstat", file_handle->name); } /* - * __posix_handle_sync -- - * POSIX fflush/fsync. + * __posix_file_sync -- + * POSIX fsync. */ static int -__posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +__posix_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session) { - if (fh->fp == NULL) - return (__posix_sync( - session, fh->fd, fh->name, "handle-sync", block)); + WT_FILE_HANDLE_POSIX *pfh; + WT_SESSION_IMPL *session; - if (fflush(fh->fp) == 0) + session = (WT_SESSION_IMPL *)wt_session; + pfh = (WT_FILE_HANDLE_POSIX *)file_handle; + + return ( + __posix_sync(session, pfh->fd, file_handle->name, "handle-sync")); +} + +#ifdef HAVE_SYNC_FILE_RANGE +/* + * __posix_file_sync_nowait -- + * POSIX fsync. + */ +static int +__posix_file_sync_nowait(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session) +{ + WT_DECL_RET; + WT_FILE_HANDLE_POSIX *pfh; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + pfh = (WT_FILE_HANDLE_POSIX *)file_handle; + + WT_SYSCALL_RETRY(sync_file_range(pfh->fd, + (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret); + if (ret == 0) return (0); - WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); + WT_RET_MSG(session, ret, + "%s: handle-sync-nowait: sync_file_range", file_handle->name); } +#endif /* - * __posix_handle_truncate -- + * __posix_file_truncate -- * POSIX ftruncate. */ static int -__posix_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +__posix_file_truncate( + WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t len) { WT_DECL_RET; + WT_FILE_HANDLE_POSIX *pfh; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + pfh = (WT_FILE_HANDLE_POSIX *)file_handle; - WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret); + WT_SYSCALL_RETRY(ftruncate(pfh->fd, len), ret); if (ret == 0) return (0); - WT_RET_MSG(session, ret, "%s: handle-truncate: ftruncate", fh->name); + WT_RET_MSG(session, ret, + "%s: handle-truncate: ftruncate", file_handle->name); } /* - * __posix_handle_write -- + * __posix_file_write -- * POSIX pwrite. */ static int -__posix_handle_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +__posix_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, + wt_off_t offset, size_t len, const void *buf) { + WT_FILE_HANDLE_POSIX *pfh; + WT_SESSION_IMPL *session; size_t chunk; ssize_t nw; const uint8_t *addr; + session = (WT_SESSION_IMPL *)wt_session; + pfh = (WT_FILE_HANDLE_POSIX *)file_handle; + /* Assert direct I/O is aligned and a multiple of the alignment. */ WT_ASSERT(session, - !fh->direct_io || + !pfh->direct_io || S2C(session)->buffer_alignment == 0 || (!((uintptr_t)buf & (uintptr_t)(S2C(session)->buffer_alignment - 1)) && @@ -498,21 +469,21 @@ __posix_handle_write(WT_SESSION_IMPL *session, /* Break writes larger than 1GB into 1GB chunks. */ for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { chunk = WT_MIN(len, WT_GIGABYTE); - if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0) + if ((nw = pwrite(pfh->fd, addr, chunk, offset)) < 0) WT_RET_MSG(session, __wt_errno(), "%s: handle-write: pwrite: failed to write %" WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); + file_handle->name, chunk, (uintmax_t)offset); } return (0); } /* - * __posix_handle_open_cloexec -- + * __posix_open_file_cloexec -- * Prevent child access to file handles. */ static inline int -__posix_handle_open_cloexec(WT_SESSION_IMPL *session, int fd, const char *name) +__posix_open_file_cloexec(WT_SESSION_IMPL *session, int fd, const char *name) { #if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC) int f; @@ -537,28 +508,35 @@ __posix_handle_open_cloexec(WT_SESSION_IMPL *session, int fd, const char *name) } /* - * __posix_handle_open -- + * __posix_open_file -- * Open a file handle. */ static int -__posix_handle_open(WT_SESSION_IMPL *session, - WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags) +__posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, + const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + WT_FILE_HANDLE **file_handlep) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; + WT_FILE_HANDLE *file_handle; + WT_FILE_HANDLE_POSIX *pfh; + WT_SESSION_IMPL *session; mode_t mode; - int f, fd, tret; - bool direct_io; - const char *stream_mode; + int f; + WT_UNUSED(file_system); + + *file_handlep = NULL; + + session = (WT_SESSION_IMPL *)wt_session; conn = S2C(session); - direct_io = false; + + WT_RET(__wt_calloc_one(session, &pfh)); /* Set up error handling. */ - fh->fd = fd = -1; - fh->fp = NULL; + pfh->fd = -1; - if (file_type == WT_FILE_TYPE_DIRECTORY) { + if (file_type == WT_OPEN_FILE_TYPE_DIRECTORY) { f = O_RDONLY; #ifdef O_CLOEXEC /* @@ -569,10 +547,10 @@ __posix_handle_open(WT_SESSION_IMPL *session, f |= O_CLOEXEC; #endif WT_SYSCALL_RETRY(( - (fd = open(name, f, 0444)) == -1 ? -1 : 0), ret); + (pfh->fd = open(name, f, 0444)) == -1 ? -1 : 0), ret); if (ret != 0) WT_ERR_MSG(session, ret, "%s: handle-open: open", name); - WT_ERR(__posix_handle_open_cloexec(session, fd, name)); + WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name)); goto directory_open; } @@ -598,28 +576,20 @@ __posix_handle_open(WT_SESSION_IMPL *session, f |= O_CLOEXEC; #endif #ifdef O_DIRECT - /* - * Direct I/O: file-type is a flag from the set of possible flags stored - * in the connection handle during configuration, check for a match. - * Also, "direct_io=checkpoint" configures direct I/O for readonly data - * files. - */ - if (FLD_ISSET(conn->direct_io, file_type) || - (LF_ISSET(WT_OPEN_READONLY) && - file_type == WT_FILE_TYPE_DATA && - FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) { + /* Direct I/O. */ + if (LF_ISSET(WT_OPEN_DIRECTIO)) { f |= O_DIRECT; - direct_io = true; - } + pfh->direct_io = true; + } else + pfh->direct_io = false; #endif - fh->direct_io = direct_io; #ifdef O_NOATIME /* Avoid updating metadata for read-only workloads. */ - if (file_type == WT_FILE_TYPE_DATA) + if (file_type == WT_OPEN_FILE_TYPE_DATA) f |= O_NOATIME; #endif - if (file_type == WT_FILE_TYPE_LOG && + if (file_type == WT_OPEN_FILE_TYPE_LOG && FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { #ifdef O_DSYNC f |= O_DSYNC; @@ -631,115 +601,122 @@ __posix_handle_open(WT_SESSION_IMPL *session, #endif } - WT_SYSCALL_RETRY(((fd = open(name, f, mode)) == -1 ? -1 : 0), ret); + WT_SYSCALL_RETRY(((pfh->fd = open(name, f, mode)) == -1 ? -1 : 0), ret); if (ret != 0) WT_ERR_MSG(session, ret, - direct_io ? + pfh->direct_io ? "%s: handle-open: open: failed with direct I/O configured, " "some filesystem types do not support direct I/O" : "%s: handle-open: open", name); - WT_ERR(__posix_handle_open_cloexec(session, fd, name)); + WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name)); - /* Disable read-ahead on trees: it slows down random read workloads. */ #if defined(HAVE_POSIX_FADVISE) - if (file_type == WT_FILE_TYPE_DATA) { - WT_SYSCALL_RETRY( - posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM), ret); + /* + * Disable read-ahead on trees: it slows down random read workloads. + * Ignore fadvise when doing direct I/O, the kernel cache isn't + * interesting. + */ + if (!pfh->direct_io && file_type == WT_OPEN_FILE_TYPE_DATA) { + WT_SYSCALL( + posix_fadvise(pfh->fd, 0, 0, POSIX_FADV_RANDOM), ret); if (ret != 0) WT_ERR_MSG(session, ret, "%s: handle-open: posix_fadvise", name); } #endif - /* Optionally configure a stdio stream API. */ - switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { - case WT_STREAM_APPEND: - stream_mode = "a"; - F_SET(fh, WT_FH_FLUSH_ON_CLOSE); - break; - case WT_STREAM_READ: - stream_mode = "r"; - break; - case WT_STREAM_WRITE: - stream_mode = "w"; - F_SET(fh, WT_FH_FLUSH_ON_CLOSE); - break; - case 0: - default: - stream_mode = NULL; - break; - } - if (stream_mode != NULL) { - if ((fh->fp = fdopen(fd, stream_mode)) == NULL) - WT_ERR_MSG(session, __wt_errno(), - "%s: handle-open: fdopen", name); - if (LF_ISSET(WT_STREAM_LINE_BUFFER)) - __wt_stream_set_line_buffer(fh->fp); - } - directory_open: - fh->fd = fd; - - /* Configure fallocate calls. */ - __wt_posix_handle_allocate_configure(session, fh); - - fh->fh_advise = __posix_handle_advise; - fh->fh_allocate = __wt_posix_handle_allocate; - fh->fh_close = __posix_handle_close; - fh->fh_getc = __posix_handle_getc; - fh->fh_lock = __posix_handle_lock; - fh->fh_map = __wt_posix_map; - fh->fh_map_discard = __wt_posix_map_discard; - fh->fh_map_preload = __wt_posix_map_preload; - fh->fh_map_unmap = __wt_posix_map_unmap; - fh->fh_printf = __posix_handle_printf; - fh->fh_read = __posix_handle_read; - fh->fh_size = __posix_handle_size; - fh->fh_sync = __posix_handle_sync; - fh->fh_truncate = __posix_handle_truncate; - fh->fh_write = __posix_handle_write; + /* Initialize public information. */ + file_handle = (WT_FILE_HANDLE *)pfh; + WT_ERR(__wt_strdup(session, name, &file_handle->name)); + + file_handle->close = __posix_file_close; +#if defined(HAVE_POSIX_FADVISE) + /* + * Ignore fadvise when doing direct I/O, the kernel cache isn't + * interesting. + */ + if (!pfh->direct_io) + file_handle->fh_advise = __posix_file_advise; +#endif + file_handle->fh_allocate = __wt_posix_file_fallocate; + file_handle->fh_lock = __posix_file_lock; +#ifdef WORDS_BIGENDIAN + /* + * The underlying objects are little-endian, mapping objects isn't + * currently supported on big-endian systems. + */ +#else + file_handle->fh_map = __wt_posix_map; +#ifdef HAVE_POSIX_MADVISE + file_handle->fh_map_discard = __wt_posix_map_discard; + file_handle->fh_map_preload = __wt_posix_map_preload; +#endif + file_handle->fh_unmap = __wt_posix_unmap; +#endif + file_handle->fh_read = __posix_file_read; + file_handle->fh_size = __posix_file_size; + file_handle->fh_sync = __posix_file_sync; +#ifdef HAVE_SYNC_FILE_RANGE + file_handle->fh_sync_nowait = __posix_file_sync_nowait; +#endif + file_handle->fh_truncate = __posix_file_truncate; + file_handle->fh_write = __posix_file_write; + + *file_handlep = file_handle; return (0); -err: if (fd != -1) { - WT_SYSCALL_RETRY(close(fd), tret); - if (tret != 0) - __wt_err(session, tret, "%s: handle-open: close", name); - } +err: WT_TRET(__posix_file_close((WT_FILE_HANDLE *)pfh, wt_session)); return (ret); } /* - * __wt_os_posix -- - * Initialize a POSIX configuration. + * __posix_terminate -- + * Terminate a POSIX configuration. */ -int -__wt_os_posix(WT_SESSION_IMPL *session) +static int +__posix_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session) { - WT_CONNECTION_IMPL *conn; + WT_SESSION_IMPL *session; - conn = S2C(session); + WT_UNUSED(file_system); - /* Initialize the POSIX jump table. */ - conn->file_directory_list = __wt_posix_directory_list; - conn->file_directory_sync = __posix_directory_sync; - conn->file_exist = __posix_file_exist; - conn->file_remove = __posix_file_remove; - conn->file_rename = __posix_file_rename; - conn->file_size = __posix_file_size; - conn->handle_open = __posix_handle_open; + session = (WT_SESSION_IMPL *)wt_session; + __wt_free(session, file_system); return (0); } /* - * __wt_os_posix_cleanup -- - * Discard a POSIX configuration. + * __wt_os_posix -- + * Initialize a POSIX configuration. */ int -__wt_os_posix_cleanup(WT_SESSION_IMPL *session) +__wt_os_posix(WT_SESSION_IMPL *session) { - WT_UNUSED(session); + WT_CONNECTION_IMPL *conn; + WT_FILE_SYSTEM *file_system; + + conn = S2C(session); + + WT_RET(__wt_calloc_one(session, &file_system)); + + /* Initialize the POSIX jump table. */ + file_system->fs_directory_list = __wt_posix_directory_list; + file_system->fs_directory_list_free = __wt_posix_directory_list_free; +#ifdef __linux__ + file_system->fs_directory_sync = __posix_directory_sync; +#endif + file_system->fs_exist = __posix_fs_exist; + file_system->fs_open_file = __posix_open_file; + file_system->fs_remove = __posix_fs_remove; + file_system->fs_rename = __posix_fs_rename; + file_system->fs_size = __posix_fs_size; + file_system->terminate = __posix_terminate; + + /* Switch it into place. */ + conn->file_system = file_system; return (0); } diff --git a/src/third_party/wiredtiger/src/os_posix/os_map.c b/src/third_party/wiredtiger/src/os_posix/os_map.c index e161e268f6d..b33f6d82e34 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_map.c +++ b/src/third_party/wiredtiger/src/os_posix/os_map.c @@ -13,23 +13,26 @@ * Map a file into memory. */ int -__wt_posix_map(WT_SESSION_IMPL *session, - WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie) +__wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, + void *mapped_regionp, size_t *lenp, void *mapped_cookiep) { + WT_FILE_HANDLE_POSIX *pfh; + WT_SESSION_IMPL *session; size_t len; wt_off_t file_size; void *map; - WT_UNUSED(mappingcookie); + WT_UNUSED(mapped_cookiep); - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + session = (WT_SESSION_IMPL *)wt_session; + pfh = (WT_FILE_HANDLE_POSIX *)fh; /* * Mapping isn't possible if direct I/O configured for the file, the * Linux open(2) documentation says applications should avoid mixing * mmap(2) of files with direct I/O to the same files. */ - if (fh->direct_io) + if (pfh->direct_io) return (ENOTSUP); /* @@ -37,7 +40,7 @@ __wt_posix_map(WT_SESSION_IMPL *session, * underneath us, our caller needs to ensure consistency of the mapped * region vs. any other file activity. */ - WT_RET(__wt_filesize(session, fh, &file_size)); + WT_RET(fh->fh_size(fh, wt_session, &file_size)); len = (size_t)file_size; (void)__wt_verbose(session, WT_VERB_HANDLEOPS, @@ -49,43 +52,48 @@ __wt_posix_map(WT_SESSION_IMPL *session, MAP_NOCORE | #endif MAP_PRIVATE, - fh->fd, (wt_off_t)0)) == MAP_FAILED) + pfh->fd, (wt_off_t)0)) == MAP_FAILED) WT_RET_MSG(session, __wt_errno(), "%s: memory-map: mmap", fh->name); - *(void **)mapp = map; + *(void **)mapped_regionp = map; *lenp = len; return (0); } #ifdef HAVE_POSIX_MADVISE /* - * __posix_map_preload_madvise -- + * __wt_posix_map_preload -- * Cause a section of a memory map to be faulted in. */ -static int -__posix_map_preload_madvise( - WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size) +int +__wt_posix_map_preload(WT_FILE_HANDLE *fh, + WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie) { WT_BM *bm; WT_CONNECTION_IMPL *conn; WT_DECL_RET; + WT_SESSION_IMPL *session; void *blk; + WT_UNUSED(mapped_cookie); + + session = (WT_SESSION_IMPL *)wt_session; + conn = S2C(session); bm = S2BT(session)->bm; /* Linux requires the address be aligned to a 4KB boundary. */ - blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); - size += WT_PTRDIFF(p, blk); + blk = (void *)((uintptr_t)map & ~(uintptr_t)(conn->page_size - 1)); + length += WT_PTRDIFF(map, blk); /* XXX proxy for "am I doing a scan?" -- manual read-ahead */ if (F_ISSET(session, WT_SESSION_NO_CACHE)) { /* Read in 2MB blocks every 1MB of data. */ - if (((uintptr_t)((uint8_t *)blk + size) & + if (((uintptr_t)((uint8_t *)blk + length) & (uintptr_t)((1<<20) - 1)) < (uintptr_t)blk) return (0); - size = WT_MIN(WT_MAX(20 * size, 2 << 20), + length = WT_MIN(WT_MAX(20 * length, 2 << 20), WT_PTRDIFF((uint8_t *)bm->map + bm->maplen, blk)); } @@ -93,10 +101,12 @@ __posix_map_preload_madvise( * Manual pages aren't clear on whether alignment is required for the * size, so we will be conservative. */ - size &= ~(size_t)(conn->page_size - 1); + length &= ~(size_t)(conn->page_size - 1); + if (length <= (size_t)conn->page_size) + return (0); - if (size <= (size_t)conn->page_size || - (ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) == 0) + WT_SYSCALL(posix_madvise(blk, length, POSIX_MADV_WILLNEED), ret); + if (ret == 0) return (0); WT_RET_MSG(session, ret, @@ -105,46 +115,31 @@ __posix_map_preload_madvise( } #endif -/* - * __wt_posix_map_preload -- - * Cause a section of a memory map to be faulted in. - */ -int -__wt_posix_map_preload( - WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); - -#ifdef HAVE_POSIX_MADVISE - return (__posix_map_preload_madvise(session, fh, p, size)); -#else - WT_UNUSED(fh); - WT_UNUSED(p); - WT_UNUSED(size); - return (ENOTSUP); -#endif -} - #ifdef HAVE_POSIX_MADVISE /* - * __posix_map_discard_madvise -- + * __wt_posix_map_discard -- * Discard a chunk of the memory map. */ -static int -__posix_map_discard_madvise( - WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) +int +__wt_posix_map_discard(WT_FILE_HANDLE *fh, + WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; + WT_SESSION_IMPL *session; void *blk; + WT_UNUSED(mapped_cookie); + + session = (WT_SESSION_IMPL *)wt_session; conn = S2C(session); /* Linux requires the address be aligned to a 4KB boundary. */ - blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); - size += WT_PTRDIFF(p, blk); + blk = (void *)((uintptr_t)map & ~(uintptr_t)(conn->page_size - 1)); + length += WT_PTRDIFF(map, blk); - if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) == 0) + WT_SYSCALL(posix_madvise(blk, length, POSIX_MADV_DONTNEED), ret); + if (ret == 0) return (0); WT_RET_MSG(session, ret, @@ -154,41 +149,23 @@ __posix_map_discard_madvise( #endif /* - * __wt_posix_map_discard -- - * Discard a chunk of the memory map. - */ -int -__wt_posix_map_discard( - WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); - -#ifdef HAVE_POSIX_MADVISE - return (__posix_map_discard_madvise(session, fh, p, size)); -#else - WT_UNUSED(fh); - WT_UNUSED(p); - WT_UNUSED(size); - return (ENOTSUP); -#endif -} - -/* - * __wt_posix_map_unmap -- + * __wt_posix_unmap -- * Remove a memory mapping. */ int -__wt_posix_map_unmap(WT_SESSION_IMPL *session, - WT_FH *fh, void *map, size_t len, void **mappingcookie) +__wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, + void *mapped_region, size_t len, void *mapped_cookie) { - WT_UNUSED(mappingcookie); + WT_SESSION_IMPL *session; + + WT_UNUSED(mapped_cookie); - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + session = (WT_SESSION_IMPL *)wt_session; (void)__wt_verbose(session, WT_VERB_HANDLEOPS, "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len); - if (munmap(map, len) == 0) + if (munmap(mapped_region, len) == 0) return (0); WT_RET_MSG(session, __wt_errno(), "%s: memory-unmap: munmap", fh->name); diff --git a/src/third_party/wiredtiger/src/os_posix/os_thread.c b/src/third_party/wiredtiger/src/os_posix/os_thread.c index 35a23622ddc..e57a308c9b0 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_thread.c +++ b/src/third_party/wiredtiger/src/os_posix/os_thread.c @@ -34,7 +34,7 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) { WT_DECL_RET; - WT_SYSCALL_RETRY(pthread_join(tid, NULL), ret); + WT_SYSCALL(pthread_join(tid, NULL), ret); if (ret == 0) return (0); diff --git a/src/third_party/wiredtiger/src/os_posix/os_time.c b/src/third_party/wiredtiger/src/os_posix/os_time.c index 0e5a1cdadfb..b1b22a8e684 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_time.c +++ b/src/third_party/wiredtiger/src/os_posix/os_time.c @@ -18,14 +18,14 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_DECL_RET; #if defined(HAVE_CLOCK_GETTIME) - WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, tsp), ret); + WT_SYSCALL(clock_gettime(CLOCK_REALTIME, tsp), ret); if (ret == 0) return (0); WT_RET_MSG(session, ret, "clock_gettime"); #elif defined(HAVE_GETTIMEOFDAY) struct timeval v; - WT_SYSCALL_RETRY(gettimeofday(&v, NULL), ret); + WT_SYSCALL(gettimeofday(&v, NULL), ret); if (ret == 0) { tsp->tv_sec = v.tv_sec; tsp->tv_nsec = v.tv_usec * WT_THOUSAND; diff --git a/src/third_party/wiredtiger/src/os_win/os_dir.c b/src/third_party/wiredtiger/src/os_win/os_dir.c index 64eae60983c..dccacc1e446 100644 --- a/src/third_party/wiredtiger/src/os_win/os_dir.c +++ b/src/third_party/wiredtiger/src/os_win/os_dir.c @@ -13,38 +13,46 @@ * Get a list of files from a directory, MSVC version. */ int -__wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, - const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) +__wt_win_directory_list(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *directory, + const char *prefix, char ***dirlistp, uint32_t *countp) { + DWORD windows_error; HANDLE findhandle; WIN32_FIND_DATA finddata; WT_DECL_ITEM(pathbuf); WT_DECL_RET; + WT_SESSION_IMPL *session; size_t dirallocsz, pathlen; - u_int count, dirsz; - bool match; - char **entries, *path; + uint32_t count; + char *dir_copy, **entries; - *dirlist = NULL; - *countp = 0; + WT_UNUSED(file_system); - WT_RET(__wt_filename(session, dir, &path)); + session = (WT_SESSION_IMPL *)wt_session; - pathlen = strlen(path); - if (path[pathlen - 1] == '\\') - path[pathlen - 1] = '\0'; - WT_ERR(__wt_scr_alloc(session, pathlen + 3, &pathbuf)); - WT_ERR(__wt_buf_fmt(session, pathbuf, "%s\\*", path)); + *dirlistp = NULL; + *countp = 0; findhandle = INVALID_HANDLE_VALUE; dirallocsz = 0; - dirsz = 0; entries = NULL; + WT_ERR(__wt_strdup(session, directory, &dir_copy)); + pathlen = strlen(dir_copy); + if (dir_copy[pathlen - 1] == '\\') + dir_copy[pathlen - 1] = '\0'; + WT_ERR(__wt_scr_alloc(session, pathlen + 3, &pathbuf)); + WT_ERR(__wt_buf_fmt(session, pathbuf, "%s\\*", dir_copy)); + findhandle = FindFirstFileA(pathbuf->data, &finddata); - if (findhandle == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_getlasterror(), - "%s: directory-list: FindFirstFile", pathbuf->data); + if (findhandle == INVALID_HANDLE_VALUE) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: directory-list: FindFirstFile: %s", + pathbuf->data, __wt_formatmessage(session, windows_error)); + WT_ERR(__wt_map_windows_error(windows_error)); + } count = 0; do { @@ -56,46 +64,63 @@ __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, continue; /* The list of files is optionally filtered by a prefix. */ - match = false; if (prefix != NULL && - ((LF_ISSET(WT_DIRLIST_INCLUDE) && - WT_PREFIX_MATCH(finddata.cFileName, prefix)) || - (LF_ISSET(WT_DIRLIST_EXCLUDE) && - !WT_PREFIX_MATCH(finddata.cFileName, prefix)))) - match = true; - if (prefix == NULL || match) { - /* - * We have a file name we want to return. - */ - count++; - if (count > dirsz) { - dirsz += WT_DIR_ENTRY; - WT_ERR(__wt_realloc_def(session, - &dirallocsz, dirsz, &entries)); - } - WT_ERR(__wt_strdup(session, - finddata.cFileName, &entries[count - 1])); - } + !WT_PREFIX_MATCH(finddata.cFileName, prefix)) + continue; + + WT_ERR(__wt_realloc_def( + session, &dirallocsz, count + 1, &entries)); + WT_ERR(__wt_strdup( + session, finddata.cFileName, &entries[count])); + ++count; } while (FindNextFileA(findhandle, &finddata) != 0); - if (count > 0) - *dirlist = entries; + + *dirlistp = entries; *countp = count; err: if (findhandle != INVALID_HANDLE_VALUE) - (void)FindClose(findhandle); - __wt_free(session, path); + if (FindClose(findhandle) == 0) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: directory-list: FindClose: %s", + pathbuf->data, + __wt_formatmessage(session, windows_error)); + if (ret == 0) + ret = __wt_map_windows_error(windows_error); + } + + __wt_free(session, dir_copy); __wt_scr_free(session, &pathbuf); if (ret == 0) return (0); - if (*dirlist != NULL) { - for (count = dirsz; count > 0; count--) - __wt_free(session, entries[count]); - __wt_free(session, entries); - } + WT_TRET(__wt_win_directory_list_free( + file_system, wt_session, entries, count)); WT_RET_MSG(session, ret, "%s: directory-list, prefix \"%s\"", - dir, prefix == NULL ? "" : prefix); + directory, prefix == NULL ? "" : prefix); +} + +/* + * __wt_win_directory_list_free -- + * Free memory returned by __wt_win_directory_list, Windows version. + */ +int +__wt_win_directory_list_free(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, char **dirlist, uint32_t count) +{ + WT_SESSION_IMPL *session; + + WT_UNUSED(file_system); + + session = (WT_SESSION_IMPL *)wt_session; + + if (dirlist != NULL) { + while (count > 0) + __wt_free(session, dirlist[--count]); + __wt_free(session, dirlist); + } + return (0); } diff --git a/src/third_party/wiredtiger/src/os_win/os_dlopen.c b/src/third_party/wiredtiger/src/os_win/os_dlopen.c index ce949e4ea5f..3da47bf23a3 100644 --- a/src/third_party/wiredtiger/src/os_win/os_dlopen.c +++ b/src/third_party/wiredtiger/src/os_win/os_dlopen.c @@ -15,19 +15,23 @@ int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) { + DWORD windows_error; WT_DECL_RET; WT_DLH *dlh; WT_RET(__wt_calloc_one(session, &dlh)); WT_ERR(__wt_strdup(session, path, &dlh->name)); + WT_ERR(__wt_strdup(session, path == NULL ? "local" : path, &dlh->name)); /* NULL means load from the current binary */ if (path == NULL) { if (GetModuleHandleExA( 0, NULL, (HMODULE *)&dlh->handle) == FALSE) { - ret = __wt_getlasterror(); - WT_ERR_MSG(session, ret, - "GetModuleHandleEx(%s): %s", path, 0); + windows_error = __wt_getlasterror(); + __wt_errx(session, + "GetModuleHandleEx: %s: %s", + path, __wt_formatmessage(session, windows_error)); + WT_ERR(__wt_map_windows_error(windows_error)); } } else { // TODO: load dll here @@ -50,14 +54,20 @@ int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret) { + DWORD windows_error; void *sym; *(void **)sym_ret = NULL; sym = GetProcAddress(dlh->handle, name); - if (sym == NULL && fail) - WT_RET_MSG(session, __wt_getlasterror(), - "GetProcAddress(%s in %s)", name, dlh->name); + if (sym == NULL && fail) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + "GetProcAddress: %s in %s: %s", + name, dlh->name, + __wt_formatmessage(session, windows_error)); + WT_RET(__wt_map_windows_error(windows_error)); + } *(void **)sym_ret = sym; return (0); @@ -70,11 +80,14 @@ __wt_dlsym(WT_SESSION_IMPL *session, int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) { + DWORD windows_error; WT_DECL_RET; if (FreeLibrary(dlh->handle) == FALSE) { - ret = __wt_getlasterror(); - __wt_err(session, ret, "FreeLibrary: %s", dlh->name); + windows_error = __wt_getlasterror(); + __wt_errx(session, "FreeLibrary: %s: %s", + dlh->name, __wt_formatmessage(session, windows_error)); + ret = __wt_map_windows_error(windows_error); } __wt_free(session, dlh->name); diff --git a/src/third_party/wiredtiger/src/os_win/os_fs.c b/src/third_party/wiredtiger/src/os_win/os_fs.c index 4ac613fc9f9..5daba124e90 100644 --- a/src/third_party/wiredtiger/src/os_win/os_fs.c +++ b/src/third_party/wiredtiger/src/os_win/os_fs.c @@ -9,34 +9,21 @@ #include "wt_internal.h" /* - * __win_directory_sync -- - * Flush a directory to ensure a file creation is durable. - */ -static int -__win_directory_sync(WT_SESSION_IMPL *session, const char *path) -{ - WT_UNUSED(session); - WT_UNUSED(path); - return (0); -} - -/* - * __win_file_exist -- + * __win_fs_exist -- * Return if the file exists. */ static int -__win_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +__win_fs_exist(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *name, bool *existp) { WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, name, &path)); + WT_SESSION_IMPL *session; - ret = GetFileAttributesA(path); + WT_UNUSED(file_system); - __wt_free(session, path); + session = (WT_SESSION_IMPL *)wt_session; - if (ret != INVALID_FILE_ATTRIBUTES) + if (GetFileAttributesA(name) != INVALID_FILE_ATTRIBUTES) *existp = true; else *existp = false; @@ -45,57 +32,44 @@ __win_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) } /* - * __win_file_remove -- + * __win_fs_remove -- * Remove a file. */ static int -__win_file_remove(WT_SESSION_IMPL *session, const char *name) +__win_fs_remove( + WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name) { - WT_DECL_RET; - char *path; + DWORD windows_error; + WT_SESSION_IMPL *session; -#ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, name, false, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-remove: file has open handles", name); -#endif + WT_UNUSED(file_system); - WT_RET(__wt_filename(session, name, &path)); - name = path; + session = (WT_SESSION_IMPL *)wt_session; if (DeleteFileA(name) == FALSE) { - ret = __wt_getlasterror(); - __wt_err(session, ret, "%s: file-remove: DeleteFileA", name); + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: file-remove: DeleteFileA: %s", + name, __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); } - - __wt_free(session, path); - return (ret); + return (0); } /* - * __win_file_rename -- + * __win_fs_rename -- * Rename a file. */ static int -__win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +__win_fs_rename(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *from, const char *to) { - WT_DECL_RET; - char *from_path, *to_path; + DWORD windows_error; + WT_SESSION_IMPL *session; -#ifdef HAVE_DIAGNOSTIC - if (__wt_handle_search(session, from, false, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-rename: file has open handles", from); - if (__wt_handle_search(session, to, false, NULL, NULL)) - WT_RET_MSG(session, EINVAL, - "%s: file-rename: file has open handles", to); -#endif + WT_UNUSED(file_system); - from_path = to_path = NULL; - WT_ERR(__wt_filename(session, from, &from_path)); - from = from_path; - WT_ERR(__wt_filename(session, to, &to_path)); - to = to_path; + session = (WT_SESSION_IMPL *)wt_session; /* * Check if file exists since Windows does not override the file if @@ -103,184 +77,114 @@ __win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) */ if (GetFileAttributesA(to) != INVALID_FILE_ATTRIBUTES) if (DeleteFileA(to) == FALSE) { - ret = __wt_getlasterror(); - __wt_err(session, ret, - "%s to %s: file-rename: rename", from, to); + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: file-rename: DeleteFileA: %s", + to, __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); } - if (ret == 0 && MoveFileA(from, to) == FALSE) { - ret = __wt_getlasterror(); - __wt_err(session, ret, - "%s to %s: file-rename: rename", from, to); + if (MoveFileA(from, to) == FALSE) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s to %s: file-rename: MoveFileA: %s", + from, to, __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); } -err: __wt_free(session, from_path); - __wt_free(session, to_path); - return (ret); + return (0); } /* - * __win_file_size -- + * __wt_win_fs_size -- * Get the size of a file in bytes, by file name. */ -static int -__win_file_size( - WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +int +__wt_win_fs_size(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *name, wt_off_t *sizep) { + DWORD windows_error; WIN32_FILE_ATTRIBUTE_DATA data; - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, name, &path)); + WT_SESSION_IMPL *session; - ret = GetFileAttributesExA(path, GetFileExInfoStandard, &data); + WT_UNUSED(file_system); - __wt_free(session, path); + session = (WT_SESSION_IMPL *)wt_session; - if (ret != 0) { + if (GetFileAttributesExA(name, GetFileExInfoStandard, &data) != 0) { *sizep = ((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow; return (0); } - /* - * Some callers of this function expect failure if the file doesn't - * exist, and don't want an error message logged. - */ - ret = __wt_getlasterror(); - if (!silent) - WT_RET_MSG(session, ret, - "%s: file-size: GetFileAttributesEx", name); - return (ret); + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: file-size: GetFileAttributesEx: %s", + name, __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); } /* - * __win_handle_advise -- - * MSVC fadvise. + * __win_file_close -- + * ANSI C close. */ static int -__win_handle_advise(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +__win_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session) { - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(offset); - WT_UNUSED(len); - WT_UNUSED(advice); - - /* Quietly fail, callers expect not-supported failures. */ - return (ENOTSUP); -} + DWORD windows_error; + WT_DECL_RET; + WT_FILE_HANDLE_WIN *win_fh; + WT_SESSION_IMPL *session; -/* - * __win_handle_allocate_configure -- - * Configure fallocate behavior for a file handle. - */ -static void -__win_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_UNUSED(session); + win_fh = (WT_FILE_HANDLE_WIN *)file_handle; + session = (WT_SESSION_IMPL *)wt_session; /* - * fallocate on Windows would be implemented using SetEndOfFile, which - * can also truncate the file. WiredTiger expects fallocate to ignore - * requests to truncate the file which Windows does not do, so we don't - * support the call. + * Close the primary and secondary handles. + * + * We don't open Windows system handles when opening directories for + * flushing, as it's not necessary (or possible) to flush a directory + * on Windows. Confirm the file handle is open before closing it. */ - fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE; - fh->fallocate_requires_locking = false; -} - -/* - * __win_handle_allocate -- - * Allocate space for a file handle. - */ -static int -__win_handle_allocate( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) -{ - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(offset); - WT_UNUSED(len); - - WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name); - return (ENOTSUP); -} - -/* - * __win_handle_close -- - * Close a file handle. - */ -static int -__win_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_DECL_RET; - - if (fh->filehandle != INVALID_HANDLE_VALUE) { - /* - * We don't open Windows system handles when opening directories - * for flushing, as it is not necessary (or possible) to flush - * a directory on Windows. Confirm the file handle is set before - * attempting to close it. - */ - if (CloseHandle(fh->filehandle) == 0) { - ret = __wt_getlasterror(); - __wt_err(session, ret, - "%s: handle-close: CloseHandle", fh->name); - } + if (win_fh->filehandle != INVALID_HANDLE_VALUE && + CloseHandle(win_fh->filehandle) == 0) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: handle-close: CloseHandle: %s", + file_handle->name, + __wt_formatmessage(session, windows_error)); + ret = __wt_map_windows_error(windows_error); } - if (fh->fp != NULL) { - /* If the stream was opened for writing, flush the file. */ - if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) { - ret = __wt_errno(); - __wt_err(session, - ret, "%s: handle-close: fflush", fh->name); - } - /* Close the file, closing all the underlying handles. */ - if (fclose(fh->fp) != 0) { - ret = __wt_errno(); - __wt_err(session, - ret, "%s: handle-close: fclose", fh->name); - } + if (win_fh->filehandle_secondary != INVALID_HANDLE_VALUE && + CloseHandle(win_fh->filehandle_secondary) == 0) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: handle-close: secondary: CloseHandle: %s", + file_handle->name, + __wt_formatmessage(session, windows_error)); + ret = __wt_map_windows_error(windows_error); } - /* Close the secondary handle. */ - if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && - CloseHandle(fh->filehandle_secondary) == 0) { - ret = __wt_getlasterror(); - __wt_err(session, ret, - "%s: handle-close: secondary: CloseHandle", fh->name); - } + __wt_free(session, file_handle->name); + __wt_free(session, win_fh); return (ret); } /* - * __win_handle_getc -- - * ANSI C fgetc. - */ -static int -__win_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) -{ - if (fh->fp == NULL) - WT_RET_MSG(session, - ENOTSUP, "%s: handle-getc: no stream configured", fh->name); - - *chp = fgetc(fh->fp); - if (*chp != EOF || !ferror(fh->fp)) - return (0); - WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name); -} - -/* - * __win_handle_lock -- + * __win_file_lock -- * Lock/unlock a file. */ static int -__win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +__win_file_lock( + WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock) { - WT_DECL_RET; + DWORD windows_error; + WT_FILE_HANDLE_WIN *win_fh; + WT_SESSION_IMPL *session; + + win_fh = (WT_FILE_HANDLE_WIN *)file_handle; + session = (WT_SESSION_IMPL *)wt_session; /* * WiredTiger requires this function be able to acquire locks past @@ -293,54 +197,48 @@ __win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) * This is useful to coordinate adding records to the end of a file. */ if (lock) { - if (LockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { - ret = __wt_getlasterror(); - __wt_err(session, ret, - "%s: handle-lock: LockFile", fh->name); + if (LockFile(win_fh->filehandle, 0, 0, 1, 0) == FALSE) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: handle-lock: LockFile: %s", + file_handle->name, + __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); } } else - if (UnlockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { - ret = __wt_getlasterror(); - __wt_err(session, ret, - "%s: handle-lock: UnlockFile", fh->name); + if (UnlockFile(win_fh->filehandle, 0, 0, 1, 0) == FALSE) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: handle-lock: UnlockFile: %s", + file_handle->name, + __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); } - return (ret); -} - -/* - * __win_handle_printf -- - * ANSI C vfprintf. - */ -static int -__win_handle_printf( - WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) -{ - if (fh->fp == NULL) - WT_RET_MSG(session, ENOTSUP, - "%s: vfprintf: no stream configured", fh->name); - - if (vfprintf(fh->fp, fmt, ap) >= 0) - return (0); - WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); + return (0); } /* - * __win_handle_read -- + * __win_file_read -- * Read a chunk. */ static int -__win_handle_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +__win_file_read(WT_FILE_HANDLE *file_handle, + WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf) { - DWORD chunk, nr; + DWORD chunk, nr, windows_error; uint8_t *addr; OVERLAPPED overlapped = { 0 }; + WT_FILE_HANDLE_WIN *win_fh; + WT_SESSION_IMPL *session; + + win_fh = (WT_FILE_HANDLE_WIN *)file_handle; + session = (WT_SESSION_IMPL *)wt_session; nr = 0; /* Assert direct I/O is aligned and a multiple of the alignment. */ WT_ASSERT(session, - !fh->direct_io || + !win_fh->direct_io || S2C(session)->buffer_alignment == 0 || (!((uintptr_t)buf & (uintptr_t)(S2C(session)->buffer_alignment - 1)) && @@ -353,44 +251,61 @@ __win_handle_read( overlapped.Offset = UINT32_MAX & offset; overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); - if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped)) - WT_RET_MSG(session, - nr == 0 ? WT_ERROR : __wt_getlasterror(), + if (!ReadFile( + win_fh->filehandle, addr, chunk, &nr, &overlapped)) { + windows_error = __wt_getlasterror(); + __wt_errx(session, "%s: handle-read: ReadFile: failed to read %lu " - "bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); + "bytes at offset %" PRIuMAX ": %s", + file_handle->name, chunk, (uintmax_t)offset, + __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); + } } return (0); } /* - * __win_handle_size -- + * __win_file_size -- * Get the size of a file in bytes, by file handle. */ static int -__win_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +__win_file_size( + WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep) { + DWORD windows_error; + WT_FILE_HANDLE_WIN *win_fh; + WT_SESSION_IMPL *session; LARGE_INTEGER size; - if (GetFileSizeEx(fh->filehandle, &size) != 0) { + win_fh = (WT_FILE_HANDLE_WIN *)file_handle; + session = (WT_SESSION_IMPL *)wt_session; + + if (GetFileSizeEx(win_fh->filehandle, &size) != 0) { *sizep = size.QuadPart; return (0); } - WT_RET_MSG(session, - __wt_getlasterror(), "%s: handle-size: GetFileSizeEx", fh->name); + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: handle-size: GetFileSizeEx: %s", + file_handle->name, __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); } /* - * __win_handle_sync -- - * MSVC fflush/fsync. + * __win_file_sync -- + * MSVC fsync. */ static int -__win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +__win_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session) { - WT_DECL_RET; + DWORD windows_error; + WT_FILE_HANDLE_WIN *win_fh; + WT_SESSION_IMPL *session; - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + win_fh = (WT_FILE_HANDLE_WIN *)file_handle; + session = (WT_SESSION_IMPL *)wt_session; /* * We don't open Windows system handles when opening directories @@ -398,76 +313,87 @@ __win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) * a directory on Windows. Confirm the file handle is set before * attempting to sync it. */ - if (fh->fp == NULL && fh->filehandle == INVALID_HANDLE_VALUE) + if (win_fh->filehandle == INVALID_HANDLE_VALUE) return (0); - if (fh->fp == NULL) { - /* - * Callers attempting asynchronous flush handle ENOTSUP returns, - * and won't make further attempts. - */ - if (!block) - return (ENOTSUP); - - if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE) - WT_RET_MSG(session, __wt_getlasterror(), - "%s handle-sync: FlushFileBuffers error", fh->name); - return (0); + if (FlushFileBuffers(win_fh->filehandle) == FALSE) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s handle-sync: FlushFileBuffers: %s", + file_handle->name, + __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); } - - if (fflush(fh->fp) == 0) - return (0); - WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); + return (0); } /* - * __win_handle_truncate -- + * __win_file_truncate -- * Truncate a file. */ static int -__win_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +__win_file_truncate( + WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t len) { - WT_DECL_RET; + DWORD windows_error; + WT_FILE_HANDLE_WIN *win_fh; + WT_SESSION_IMPL *session; LARGE_INTEGER largeint; + win_fh = (WT_FILE_HANDLE_WIN *)file_handle; + session = (WT_SESSION_IMPL *)wt_session; + largeint.QuadPart = len; - if (fh->filehandle_secondary == INVALID_HANDLE_VALUE) + if (win_fh->filehandle_secondary == INVALID_HANDLE_VALUE) WT_RET_MSG(session, EINVAL, - "%s: handle-truncate: read-only", fh->name); - - if (SetFilePointerEx( - fh->filehandle_secondary, largeint, NULL, FILE_BEGIN) == FALSE) - WT_RET_MSG(session, __wt_getlasterror(), - "%s: handle-truncate: SetFilePointerEx", fh->name); + "%s: handle-truncate: read-only", file_handle->name); + + if (SetFilePointerEx(win_fh->filehandle_secondary, + largeint, NULL, FILE_BEGIN) == FALSE) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: handle-truncate: SetFilePointerEx: %s", + file_handle->name, + __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); + } - if (SetEndOfFile(fh->filehandle_secondary) == FALSE) { + if (SetEndOfFile(win_fh->filehandle_secondary) == FALSE) { if (GetLastError() == ERROR_USER_MAPPED_FILE) return (EBUSY); - WT_RET_MSG(session, __wt_getlasterror(), - "%s: handle-truncate: SetEndOfFile error", fh->name); + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: handle-truncate: SetEndOfFile: %s", + file_handle->name, + __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); } return (0); } /* - * __win_handle_write -- + * __win_file_write -- * Write a chunk. */ static int -__win_handle_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +__win_file_write(WT_FILE_HANDLE *file_handle, + WT_SESSION *wt_session, wt_off_t offset, size_t len, const void *buf) { - DWORD chunk; - DWORD nw; + DWORD chunk, nw, windows_error; const uint8_t *addr; OVERLAPPED overlapped = { 0 }; + WT_FILE_HANDLE_WIN *win_fh; + WT_SESSION_IMPL *session; + + win_fh = (WT_FILE_HANDLE_WIN *)file_handle; + session = (WT_SESSION_IMPL *)wt_session; nw = 0; /* Assert direct I/O is aligned and a multiple of the alignment. */ WT_ASSERT(session, - !fh->direct_io || + !win_fh->direct_io || S2C(session)->buffer_alignment == 0 || (!((uintptr_t)buf & (uintptr_t)(S2C(session)->buffer_alignment - 1)) && @@ -480,38 +406,51 @@ __win_handle_write(WT_SESSION_IMPL *session, overlapped.Offset = UINT32_MAX & offset; overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); - if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped)) - WT_RET_MSG(session, __wt_getlasterror(), + if (!WriteFile( + win_fh->filehandle, addr, chunk, &nw, &overlapped)) { + windows_error = __wt_getlasterror(); + __wt_errx(session, "%s: handle-write: WriteFile: failed to write %lu " - "bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); + "bytes at offset %" PRIuMAX ": %s", + file_handle->name, chunk, (uintmax_t)offset, + __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); + } } return (0); } /* - * __win_handle_open -- + * __win_open_file -- * Open a file handle. */ static int -__win_handle_open(WT_SESSION_IMPL *session, - WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags) +__win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, + const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + WT_FILE_HANDLE **file_handlep) { - DWORD dwCreationDisposition; - HANDLE filehandle, filehandle_secondary; + DWORD dwCreationDisposition, windows_error; WT_CONNECTION_IMPL *conn; WT_DECL_RET; + WT_FILE_HANDLE *file_handle; + WT_FILE_HANDLE_WIN *win_fh; + WT_SESSION_IMPL *session; int desired_access, f; - bool direct_io; - const char *stream_mode; + WT_UNUSED(file_system); + + *file_handlep = NULL; + + session = (WT_SESSION_IMPL *)wt_session; conn = S2C(session); - direct_io = false; + + WT_RET(__wt_calloc_one(session, &win_fh)); + + win_fh->direct_io = false; /* Set up error handling. */ - fh->filehandle = fh->filehandle_secondary = - filehandle = filehandle_secondary = INVALID_HANDLE_VALUE; - fh->fp = NULL; + win_fh->filehandle = + win_fh->filehandle_secondary = INVALID_HANDLE_VALUE; /* * Opening a file handle on a directory is only to support filesystems @@ -519,7 +458,7 @@ __win_handle_open(WT_SESSION_IMPL *session, * require that functionality: create an empty WT_FH structure with * invalid handles. */ - if (file_type == WT_FILE_TYPE_DIRECTORY) + if (file_type == WT_OPEN_FILE_TYPE_DIRECTORY) goto directory_open; desired_access = GENERIC_READ; @@ -544,47 +483,44 @@ __win_handle_open(WT_SESSION_IMPL *session, } else dwCreationDisposition = OPEN_EXISTING; - /* - * direct_io means no OS file caching. This requires aligned buffer - * allocations like O_DIRECT. - */ - if (FLD_ISSET(conn->direct_io, file_type) || - (LF_ISSET(WT_OPEN_READONLY) && - file_type == WT_FILE_TYPE_DATA && - FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) { + /* Direct I/O. */ + if (LF_ISSET(WT_OPEN_DIRECTIO)) { f |= FILE_FLAG_NO_BUFFERING; - direct_io = true; + win_fh->direct_io = true; } - fh->direct_io = direct_io; /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */ if (FLD_ISSET(conn->write_through, file_type)) f |= FILE_FLAG_WRITE_THROUGH; - if (file_type == WT_FILE_TYPE_LOG && + if (file_type == WT_OPEN_FILE_TYPE_LOG && FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) f |= FILE_FLAG_WRITE_THROUGH; /* Disable read-ahead on trees: it slows down random read workloads. */ - if (file_type == WT_FILE_TYPE_DATA) + if (file_type == WT_OPEN_FILE_TYPE_DATA) f |= FILE_FLAG_RANDOM_ACCESS; - filehandle = CreateFileA(name, desired_access, + win_fh->filehandle = CreateFileA(name, desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, dwCreationDisposition, f, NULL); - if (filehandle == INVALID_HANDLE_VALUE) { + if (win_fh->filehandle == INVALID_HANDLE_VALUE) { if (LF_ISSET(WT_OPEN_CREATE) && GetLastError() == ERROR_FILE_EXISTS) - filehandle = CreateFileA(name, desired_access, + win_fh->filehandle = CreateFileA(name, desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, f, NULL); - if (filehandle == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_getlasterror(), - direct_io ? + if (win_fh->filehandle == INVALID_HANDLE_VALUE) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + win_fh->direct_io ? "%s: handle-open: CreateFileA: failed with direct " "I/O configured, some filesystem types do not " - "support direct I/O" : - "%s: handle-open: CreateFileA", name); + "support direct I/O: %s" : + "%s: handle-open: CreateFileA: %s", + name, __wt_formatmessage(session, windows_error)); + WT_ERR(__wt_map_windows_error(windows_error)); + } } /* @@ -593,78 +529,64 @@ __win_handle_open(WT_SESSION_IMPL *session, * pointer. */ if (!LF_ISSET(WT_OPEN_READONLY)) { - filehandle_secondary = CreateFileA(name, desired_access, + win_fh->filehandle_secondary = CreateFileA(name, desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, f, NULL); - if (filehandle_secondary == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_getlasterror(), - "%s: handle-open: CreateFileA: secondary", name); - } - - /* Optionally configure a stdio stream API. */ - switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { - case WT_STREAM_APPEND: - f = _O_APPEND | _O_TEXT; - stream_mode = "a"; - F_SET(fh, WT_FH_FLUSH_ON_CLOSE); - break; - case WT_STREAM_READ: - f = _O_RDONLY | _O_TEXT; - stream_mode = "r"; - break; - case WT_STREAM_WRITE: - f = _O_TEXT; - stream_mode = "w"; - F_SET(fh, WT_FH_FLUSH_ON_CLOSE); - break; - case 0: - default: - stream_mode = NULL; - break; + if (win_fh->filehandle_secondary == INVALID_HANDLE_VALUE) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: handle-open: CreateFileA: secondary: %s", + name, __wt_formatmessage(session, windows_error)); + WT_ERR(__wt_map_windows_error(windows_error)); + } } - if (stream_mode != NULL) { - if ((fh->fp = fopen(name, stream_mode)) == NULL) - WT_ERR_MSG(session, __wt_errno(), - "%s: handle-open: fopen", name); - if (LF_ISSET(WT_STREAM_LINE_BUFFER)) - __wt_stream_set_line_buffer(fh->fp); - } +directory_open: + /* Initialize public information. */ + file_handle = (WT_FILE_HANDLE *)win_fh; + WT_ERR(__wt_strdup(session, name, &file_handle->name)); - /* Configure fallocate/posix_fallocate calls. */ - __win_handle_allocate_configure(session, fh); + file_handle->close = __win_file_close; + file_handle->fh_lock = __win_file_lock; +#ifdef WORDS_BIGENDIAN + /* + * The underlying objects are little-endian, mapping objects isn't + * currently supported on big-endian systems. + */ +#else + file_handle->fh_map = __wt_win_map; + file_handle->fh_unmap = __wt_win_unmap; +#endif + file_handle->fh_read = __win_file_read; + file_handle->fh_size = __win_file_size; + file_handle->fh_sync = __win_file_sync; + file_handle->fh_truncate = __win_file_truncate; + file_handle->fh_write = __win_file_write; -directory_open: - fh->filehandle = filehandle; - fh->filehandle_secondary = filehandle_secondary; - - fh->fh_advise = __win_handle_advise; - fh->fh_allocate = __win_handle_allocate; - fh->fh_close = __win_handle_close; - fh->fh_getc = __win_handle_getc; - fh->fh_lock = __win_handle_lock; - fh->fh_map = __wt_win_map; - fh->fh_map_discard = __wt_win_map_discard; - fh->fh_map_preload = __wt_win_map_preload; - fh->fh_map_unmap = __wt_win_map_unmap; - fh->fh_printf = __win_handle_printf; - fh->fh_read = __win_handle_read; - fh->fh_size = __win_handle_size; - fh->fh_sync = __win_handle_sync; - fh->fh_truncate = __win_handle_truncate; - fh->fh_write = __win_handle_write; + *file_handlep = file_handle; return (0); -err: if (filehandle != INVALID_HANDLE_VALUE) - (void)CloseHandle(filehandle); - if (filehandle_secondary != INVALID_HANDLE_VALUE) - (void)CloseHandle(filehandle_secondary); - +err: WT_TRET(__win_file_close((WT_FILE_HANDLE *)win_fh, wt_session)); return (ret); } /* + * __win_terminate -- + * Discard a Windows configuration. + */ +static int +__win_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session) +{ + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + + __wt_free(session, file_system); + return (0); +} + +/* * __wt_os_win -- * Initialize a MSVC configuration. */ @@ -672,29 +594,24 @@ int __wt_os_win(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; + WT_FILE_SYSTEM *file_system; conn = S2C(session); - /* Initialize the POSIX jump table. */ - conn->file_directory_list = __wt_win_directory_list; - conn->file_directory_sync = __win_directory_sync; - conn->file_exist = __win_file_exist; - conn->file_remove = __win_file_remove; - conn->file_rename = __win_file_rename; - conn->file_size = __win_file_size; - conn->handle_open = __win_handle_open; + WT_RET(__wt_calloc_one(session, &file_system)); - return (0); -} + /* Initialize the Windows jump table. */ + file_system->fs_directory_list = __wt_win_directory_list; + file_system->fs_directory_list_free = __wt_win_directory_list_free; + file_system->fs_exist = __win_fs_exist; + file_system->fs_open_file = __win_open_file; + file_system->fs_remove = __win_fs_remove; + file_system->fs_rename = __win_fs_rename; + file_system->fs_size = __wt_win_fs_size; + file_system->terminate = __win_terminate; -/* - * __wt_os_win_cleanup -- - * Discard a POSIX configuration. - */ -int -__wt_os_win_cleanup(WT_SESSION_IMPL *session) -{ - WT_UNUSED(session); + /* Switch it into place. */ + conn->file_system = file_system; return (0); } diff --git a/src/third_party/wiredtiger/src/os_win/os_getenv.c b/src/third_party/wiredtiger/src/os_win/os_getenv.c index 9b297ac3a74..fe228328ee6 100644 --- a/src/third_party/wiredtiger/src/os_win/os_getenv.c +++ b/src/third_party/wiredtiger/src/os_win/os_getenv.c @@ -15,22 +15,22 @@ int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) { - WT_DECL_RET; - DWORD size; + DWORD size, windows_error; *envp = NULL; - size = GetEnvironmentVariableA(variable, NULL, 0); - if (size <= 1) + if ((size = GetEnvironmentVariableA(variable, NULL, 0)) <= 1) return (WT_NOTFOUND); - WT_RET(__wt_calloc(session, 1, size, envp)); + WT_RET(__wt_malloc(session, (size_t)size, envp)); - ret = GetEnvironmentVariableA(variable, *envp, size); /* We expect the number of bytes not including nul terminator. */ - if ((ret + 1) != size) - WT_RET_MSG(session, __wt_getlasterror(), - "GetEnvironmentVariableA failed: %s", variable); + if (GetEnvironmentVariableA(variable, *envp, size) == size - 1) + return (0); - return (0); + windows_error = __wt_getlasterror(); + __wt_errx(session, + "GetEnvironmentVariableA: %s: %s", + variable, __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); } diff --git a/src/third_party/wiredtiger/src/os_win/os_map.c b/src/third_party/wiredtiger/src/os_win/os_map.c index b043f9c9923..8f5b289062d 100644 --- a/src/third_party/wiredtiger/src/os_win/os_map.c +++ b/src/third_party/wiredtiger/src/os_win/os_map.c @@ -13,106 +13,99 @@ * Map a file into memory. */ int -__wt_win_map(WT_SESSION_IMPL *session, - WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie) +__wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, + void *mapped_regionp, size_t *lenp, void *mapped_cookiep) { - WT_DECL_RET; + DWORD windows_error; + WT_FILE_HANDLE_WIN *win_fh; + WT_SESSION_IMPL *session; size_t len; wt_off_t file_size; - void *map; + void *map, *mapped_cookie; + + win_fh = (WT_FILE_HANDLE_WIN *)file_handle; + session = (WT_SESSION_IMPL *)wt_session; /* * There's no locking here to prevent the underlying file from changing * underneath us, our caller needs to ensure consistency of the mapped * region vs. any other file activity. */ - WT_RET(__wt_filesize(session, fh, &file_size)); + WT_RET(__wt_win_fs_size(file_handle->file_system, + wt_session, file_handle->name, &file_size)); len = (size_t)file_size; (void)__wt_verbose(session, WT_VERB_HANDLEOPS, - "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len); - - *mappingcookie = - CreateFileMappingA(fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL); - if (*mappingcookie == NULL) - WT_RET_MSG(session, __wt_getlasterror(), - "%s: memory-map: CreateFileMappingA", fh->name); + "%s: memory-map: %" WT_SIZET_FMT " bytes", file_handle->name, len); + + mapped_cookie = CreateFileMappingA( + win_fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL); + if (mapped_cookie == NULL) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: memory-map: CreateFileMappingA: %s", + file_handle->name, + __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); + } if ((map = - MapViewOfFile(*mappingcookie, FILE_MAP_READ, 0, 0, len)) == NULL) { + MapViewOfFile(mapped_cookie, FILE_MAP_READ, 0, 0, len)) == NULL) { /* Retrieve the error before cleaning up. */ - ret = __wt_getlasterror(); - CloseHandle(*mappingcookie); - *mappingcookie = NULL; + windows_error = __wt_getlasterror(); + + (void)CloseHandle(mapped_cookie); - WT_RET_MSG(session, ret, - "%s: memory-map: MapViewOfFile", fh->name); + __wt_errx(session, + "%s: memory-map: MapViewOfFile: %s", + file_handle->name, + __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); } - *(void **)mapp = map; + *(void **)mapped_cookiep = mapped_cookie; + *(void **)mapped_regionp = map; *lenp = len; return (0); } /* - * __wt_win_map_preload -- - * Cause a section of a memory map to be faulted in. - */ -int -__wt_win_map_preload( - WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size) -{ - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(p); - WT_UNUSED(size); - - return (ENOTSUP); -} - -/* - * __wt_win_map_discard -- - * Discard a chunk of the memory map. - */ -int -__wt_win_map_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) -{ - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(p); - WT_UNUSED(size); - - return (ENOTSUP); -} - -/* - * __wt_win_map_unmap -- + * __wt_win_unmap -- * Remove a memory mapping. */ int -__wt_win_map_unmap(WT_SESSION_IMPL *session, - WT_FH *fh, void *map, size_t len, void **mappingcookie) +__wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, + void *mapped_region, size_t length, void *mapped_cookie) { + DWORD windows_error; WT_DECL_RET; + WT_FILE_HANDLE_WIN *win_fh; + WT_SESSION_IMPL *session; - (void)__wt_verbose(session, WT_VERB_HANDLEOPS, - "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len); - - WT_ASSERT(session, *mappingcookie != NULL); + win_fh = (WT_FILE_HANDLE_WIN *)file_handle; + session = (WT_SESSION_IMPL *)wt_session; - if (UnmapViewOfFile(map) == 0) { - ret = __wt_getlasterror(); - __wt_err(session, ret, - "%s: memory-unmap: UnmapViewOfFile", fh->name); + (void)__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: memory-unmap: %" WT_SIZET_FMT " bytes", + file_handle->name, length); + + if (UnmapViewOfFile(mapped_region) == 0) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: memory-unmap: UnmapViewOfFile: %s", + file_handle->name, + __wt_formatmessage(session, windows_error)); + ret = __wt_map_windows_error(windows_error); } - if (CloseHandle(*mappingcookie) == 0) { - ret = __wt_getlasterror(); - __wt_err(session, ret, - "%s: memory-unmap: CloseHandle", fh->name); + if (CloseHandle(*(void **)mapped_cookie) == 0) { + windows_error = __wt_getlasterror(); + __wt_errx(session, + "%s: memory-unmap: CloseHandle: %s", + file_handle->name, + __wt_formatmessage(session, windows_error)); + ret = __wt_map_windows_error(windows_error); } - *mappingcookie = NULL; - return (ret); } diff --git a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c index af4a5035076..8645fdaccb3 100644 --- a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c +++ b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c @@ -45,10 +45,10 @@ int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled) { - DWORD err, milliseconds; - WT_DECL_RET; - uint64_t milliseconds64; + BOOL sleepret; + DWORD milliseconds, windows_error; bool locked; + uint64_t milliseconds64; locked = false; @@ -88,33 +88,35 @@ __wt_cond_wait_signal( if (milliseconds == 0) milliseconds = 1; - ret = SleepConditionVariableCS( + sleepret = SleepConditionVariableCS( &cond->cond, &cond->mtx, milliseconds); } else - ret = SleepConditionVariableCS( + sleepret = SleepConditionVariableCS( &cond->cond, &cond->mtx, INFINITE); /* * SleepConditionVariableCS returns non-zero on success, 0 on timeout - * or failure. Check for timeout, else convert to a WiredTiger error - * value and fail. + * or failure. */ - if (ret == 0) { - if ((err = GetLastError()) == ERROR_TIMEOUT) + if (sleepret == 0) { + windows_error = __wt_getlasterror(); + if (windows_error == ERROR_TIMEOUT) { *signalled = false; - else - ret = __wt_getlasterror(); - } else - ret = 0; + sleepret = 1; + } + } (void)__wt_atomic_subi32(&cond->waiters, 1); if (locked) LeaveCriticalSection(&cond->mtx); - if (ret == 0) + if (sleepret != 0) return (0); - WT_RET_MSG(session, ret, "SleepConditionVariableCS"); + + __wt_errx(session, "SleepConditionVariableCS: %s", + __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); } /* diff --git a/src/third_party/wiredtiger/src/os_win/os_path.c b/src/third_party/wiredtiger/src/os_win/os_path.c index e9532de2b38..220752ce7a1 100644 --- a/src/third_party/wiredtiger/src/os_win/os_path.c +++ b/src/third_party/wiredtiger/src/os_win/os_path.c @@ -19,7 +19,7 @@ __wt_absolute_path(const char *path) * Check for a drive name (for example, "D:"), allow both forward and * backward slashes. */ - if (strlen(path) >= 3 && isalpha(path[0]) && path[1] == ':') + if (strlen(path) >= 3 && __wt_isalpha(path[0]) && path[1] == ':') path += 2; return (path[0] == '/' || path[0] == '\\'); } diff --git a/src/third_party/wiredtiger/src/os_win/os_thread.c b/src/third_party/wiredtiger/src/os_win/os_thread.c index 94c5a8b0ab2..a34dff776b6 100644 --- a/src/third_party/wiredtiger/src/os_win/os_thread.c +++ b/src/third_party/wiredtiger/src/os_win/os_thread.c @@ -21,7 +21,7 @@ __wt_thread_create(WT_SESSION_IMPL *session, if (*tidret != 0) return (0); - WT_RET_MSG(session, __wt_errno, "thread create: _beginthreadex"); + WT_RET_MSG(session, __wt_errno(), "thread create: _beginthreadex"); } /* @@ -31,19 +31,24 @@ __wt_thread_create(WT_SESSION_IMPL *session, int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) { - WT_DECL_RET; + DWORD windows_error; - if ((ret = WaitForSingleObject(tid, INFINITE)) != WAIT_OBJECT_0) - /* - * If we fail to wait, we will leak handles so do not continue - */ - WT_PANIC_RET(session, - ret == WAIT_FAILED ? __wt_getlasterror() : ret, - "thread join: WaitForSingleObject"); + if ((windows_error = + WaitForSingleObject(tid, INFINITE)) != WAIT_OBJECT_0) { + if (windows_error == WAIT_FAILED) + windows_error = __wt_getlasterror(); + __wt_errx(session, "thread join: WaitForSingleObject: %s", + __wt_formatmessage(session, windows_error)); + + /* If we fail to wait, we will leak handles, do not continue. */ + return (WT_PANIC); + } if (CloseHandle(tid) == 0) { - WT_RET_MSG(session, - __wt_getlasterror(), "thread join: CloseHandle"); + windows_error = __wt_getlasterror(); + __wt_errx(session, "thread join: CloseHandle: %s", + __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); } return (0); diff --git a/src/third_party/wiredtiger/src/os_win/os_winerr.c b/src/third_party/wiredtiger/src/os_win/os_winerr.c new file mode 100644 index 00000000000..70499580c48 --- /dev/null +++ b/src/third_party/wiredtiger/src/os_win/os_winerr.c @@ -0,0 +1,130 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_getlasterror -- + * Return GetLastError, or a relatively generic Windows error if the system + * error code isn't set. + */ +DWORD +__wt_getlasterror(void) +{ + DWORD windows_error; + + /* + * Check for ERROR_SUCCESS: + * It's easy to introduce a problem by calling the wrong error function, + * for example, this function when the MSVC function set the C runtime + * error value. Handle gracefully and always return an error. + */ + windows_error = GetLastError(); + return (windows_error == ERROR_SUCCESS ? + ERROR_INVALID_PARAMETER : windows_error); +} + +/* + * __wt_map_windows_error -- + * Map Windows errors to POSIX/ANSI errors. + */ +int +__wt_map_windows_error(DWORD windows_error) +{ + static const struct { + int windows_error; + int posix_error; + } list[] = { + { ERROR_ACCESS_DENIED, EACCES }, + { ERROR_ALREADY_EXISTS, EEXIST }, + { ERROR_ARENA_TRASHED, EFAULT }, + { ERROR_BAD_COMMAND, EFAULT }, + { ERROR_BAD_ENVIRONMENT, EFAULT }, + { ERROR_BAD_FORMAT, EFAULT }, + { ERROR_BAD_NETPATH, ENOENT }, + { ERROR_BAD_NET_NAME, ENOENT }, + { ERROR_BAD_PATHNAME, ENOENT }, + { ERROR_BROKEN_PIPE, EPIPE }, + { ERROR_CANNOT_MAKE, EACCES }, + { ERROR_CHILD_NOT_COMPLETE, ECHILD }, + { ERROR_CURRENT_DIRECTORY, EACCES }, + { ERROR_DIRECT_ACCESS_HANDLE, EBADF }, + { ERROR_DIR_NOT_EMPTY, ENOTEMPTY }, + { ERROR_DISK_FULL, ENOSPC }, + { ERROR_DRIVE_LOCKED, EACCES }, + { ERROR_FAIL_I24, EACCES }, + { ERROR_FILENAME_EXCED_RANGE, ENOENT }, + { ERROR_FILE_EXISTS, EEXIST }, + { ERROR_FILE_NOT_FOUND, ENOENT }, + { ERROR_GEN_FAILURE, EFAULT }, + { ERROR_INVALID_ACCESS, EACCES }, + { ERROR_INVALID_BLOCK, EFAULT }, + { ERROR_INVALID_DATA, EFAULT }, + { ERROR_INVALID_DRIVE, ENOENT }, + { ERROR_INVALID_FUNCTION, EINVAL }, + { ERROR_INVALID_HANDLE, EBADF }, + { ERROR_INVALID_PARAMETER, EINVAL }, + { ERROR_INVALID_TARGET_HANDLE, EBADF }, + { ERROR_LOCK_FAILED, EBUSY }, + { ERROR_LOCK_VIOLATION, EBUSY }, + { ERROR_MAX_THRDS_REACHED, EAGAIN }, + { ERROR_NEGATIVE_SEEK, EINVAL }, + { ERROR_NESTING_NOT_ALLOWED, EAGAIN }, + { ERROR_NETWORK_ACCESS_DENIED, EACCES }, + { ERROR_NOT_ENOUGH_MEMORY, ENOMEM }, + { ERROR_NOT_ENOUGH_QUOTA, ENOMEM }, + { ERROR_NOT_LOCKED, EACCES }, + { ERROR_NOT_READY, EBUSY }, + { ERROR_NOT_SAME_DEVICE, EXDEV }, + { ERROR_NO_DATA, EPIPE }, + { ERROR_NO_MORE_FILES, EMFILE }, + { ERROR_NO_PROC_SLOTS, EAGAIN }, + { ERROR_PATH_NOT_FOUND, ENOENT }, + { ERROR_READ_FAULT, EFAULT }, + { ERROR_RETRY, EINTR }, + { ERROR_SEEK_ON_DEVICE, EACCES }, + { ERROR_SHARING_VIOLATION, EBUSY }, + { ERROR_TOO_MANY_OPEN_FILES, EMFILE }, + { ERROR_WAIT_NO_CHILDREN, ECHILD }, + { ERROR_WRITE_FAULT, EFAULT }, + { ERROR_WRITE_PROTECT, EACCES }, + }; + int i; + + for (i = 0; i < WT_ELEMENTS(list); ++i) + if (windows_error == list[i].windows_error) + return (list[i].posix_error); + + /* Untranslatable error, go generic. */ + return (WT_ERROR); +} + +/* + * __wt_formatmessage -- + * Windows error formatting. + */ +const char * +__wt_formatmessage(WT_SESSION_IMPL *session, DWORD windows_error) +{ + /* + * !!! + * This function MUST handle a NULL session handle. + * + * Grow the session error buffer as necessary. + */ + if (session != NULL && + __wt_buf_initsize(session, &session->err, 512) == 0 && + FormatMessageA( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, windows_error, + 0, /* Let system choose the correct LANGID. */ + session->err.mem, (DWORD)512, NULL) != 0) + return (session->err.data); + + return ("Unable to format Windows error string"); +} diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 26123f6b66d..b49946bb10e 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -299,13 +299,13 @@ static int __rec_cell_build_ovfl(WT_SESSION_IMPL *, WT_RECONCILE *, WT_KV *, uint8_t, uint64_t); static int __rec_cell_build_val(WT_SESSION_IMPL *, WT_RECONCILE *, const void *, size_t, uint64_t); -static int __rec_col_fix(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); +static int __rec_col_fix(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REF *); static int __rec_col_fix_slvg(WT_SESSION_IMPL *, - WT_RECONCILE *, WT_PAGE *, WT_SALVAGE_COOKIE *); -static int __rec_col_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); + WT_RECONCILE *, WT_REF *, WT_SALVAGE_COOKIE *); +static int __rec_col_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REF *); static int __rec_col_merge(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_col_var(WT_SESSION_IMPL *, - WT_RECONCILE *, WT_PAGE *, WT_SALVAGE_COOKIE *); + WT_RECONCILE *, WT_REF *, WT_SALVAGE_COOKIE *); static int __rec_col_var_helper(WT_SESSION_IMPL *, WT_RECONCILE *, WT_SALVAGE_COOKIE *, WT_ITEM *, bool, uint8_t, uint64_t); static int __rec_destroy_session(WT_SESSION_IMPL *); @@ -383,24 +383,27 @@ __wt_reconcile(WT_SESSION_IMPL *session, mod->last_oldest_id = oldest_id; /* Initialize the reconciliation structure for each new run. */ - WT_RET(__rec_write_init( - session, ref, flags, salvage, &session->reconcile)); + if ((ret = __rec_write_init( + session, ref, flags, salvage, &session->reconcile)) != 0) { + WT_TRET(__wt_fair_unlock(session, &page->page_lock)); + return (ret); + } r = session->reconcile; /* Reconcile the page. */ switch (page->type) { case WT_PAGE_COL_FIX: if (salvage != NULL) - ret = __rec_col_fix_slvg(session, r, page, salvage); + ret = __rec_col_fix_slvg(session, r, ref, salvage); else - ret = __rec_col_fix(session, r, page); + ret = __rec_col_fix(session, r, ref); break; case WT_PAGE_COL_INT: WT_WITH_PAGE_INDEX(session, - ret = __rec_col_int(session, r, page)); + ret = __rec_col_int(session, r, ref)); break; case WT_PAGE_COL_VAR: - ret = __rec_col_var(session, r, page, salvage); + ret = __rec_col_var(session, r, ref, salvage); break; case WT_PAGE_ROW_INT: WT_WITH_PAGE_INDEX(session, @@ -630,12 +633,12 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) */ switch (page->type) { case WT_PAGE_COL_INT: - WT_RET(__wt_page_alloc(session, WT_PAGE_COL_INT, - 1, mod->mod_multi_entries, false, &next)); + WT_RET(__wt_page_alloc(session, + WT_PAGE_COL_INT, mod->mod_multi_entries, false, &next)); break; case WT_PAGE_ROW_INT: - WT_RET(__wt_page_alloc(session, WT_PAGE_ROW_INT, - WT_RECNO_OOB, mod->mod_multi_entries, false, &next)); + WT_RET(__wt_page_alloc(session, + WT_PAGE_ROW_INT, mod->mod_multi_entries, false, &next)); break; WT_ILLEGAL_VALUE(session); } @@ -1038,6 +1041,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, bool append_origv, skipped; *updp = NULL; + append = NULL; /* -Wconditional-uninitialized */ btree = S2BT(session); page = r->page; @@ -2425,7 +2429,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); break; case SPLIT_TRACKING_RAW: - WT_ILLEGAL_VALUE(session); + return (__wt_illegal_value(session, NULL)); } /* @@ -2465,7 +2469,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, WT_SESSION *wt_session; size_t corrected_page_size, extra_skip, len, result_len; uint64_t recno; - uint32_t entry, i, result_slots, slots; + uint32_t entry, i, max_image_slot, result_slots, slots; bool last_block; uint8_t *dsk_start; @@ -2525,7 +2529,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, if (dsk->type == WT_PAGE_COL_VAR) recno = last->recno; - entry = slots = 0; + entry = max_image_slot = slots = 0; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { ++entry; @@ -2575,6 +2579,15 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, dsk->type == WT_PAGE_COL_VAR) r->raw_recnos[slots] = recno; r->raw_entries[slots] = entry; + + /* + * Don't create an image so large that any future update will + * cause a split in memory. Use half of the maximum size so + * we split very compressible pages that have reached the + * maximum size in memory into two equal blocks. + */ + if (len > (size_t)btree->maxmempage / 2) + max_image_slot = slots; } /* @@ -2634,21 +2647,32 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, ret = compressor->compress_raw(compressor, wt_session, r->page_size_orig, btree->split_pct, WT_BLOCK_COMPRESS_SKIP + extra_skip, - (uint8_t *)dsk + WT_BLOCK_COMPRESS_SKIP, - r->raw_offsets, slots, + (uint8_t *)dsk + WT_BLOCK_COMPRESS_SKIP, r->raw_offsets, + no_more_rows || max_image_slot == 0 ? slots : max_image_slot, (uint8_t *)dst->mem + WT_BLOCK_COMPRESS_SKIP, - result_len, no_more_rows, &result_len, &result_slots); + result_len, + no_more_rows || max_image_slot != 0, + &result_len, &result_slots); switch (ret) { case EAGAIN: /* - * The compression function wants more rows; accumulate and - * retry. + * The compression function wants more rows, accumulate and + * retry if possible. * - * Reset the resulting slots count, just in case the compression - * function modified it before giving up. + * First, reset the resulting slots count, just in case the + * compression function modified it before giving up. */ result_slots = 0; - break; + + /* + * If the image is too large and there are more rows to gather, + * act as if the compression engine gave up on this chunk of + * data. That doesn't make sense (we flagged the engine that we + * wouldn't give it any more rows, but it's a possible return). + */ + if (no_more_rows || max_image_slot == 0) + break; + /* FALLTHROUGH */ case 0: /* * If the compression function returned zero result slots, it's @@ -2936,7 +2960,6 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) * wrote anything, or there's a remaindered block of data. */ break; - WT_ILLEGAL_VALUE(session); } /* @@ -3307,6 +3330,8 @@ supd_check_complete: } bnd->entries = r->entries; + +#ifdef HAVE_VERBOSE /* Output a verbose message if we create a page without many entries */ if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && r->entries < 6) WT_ERR(__wt_verbose(session, WT_VERB_SPLIT, @@ -3316,6 +3341,7 @@ supd_check_complete: r->entries, r->page->memory_footprint, r->bnd_next, F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint", r->bnd_state)); +#endif WT_ERR(__wt_bt_write(session, buf, addr, &addr_size, false, bnd->already_compressed)); @@ -3431,7 +3457,7 @@ __rec_update_las(WT_SESSION_IMPL *session, case WT_PAGE_ROW_LEAF: if (list->ins == NULL) { slot = WT_ROW_SLOT(page, list->rip); - upd = page->pg_row_upd[slot]; + upd = page->modify->mod_row_update[slot]; } else upd = list->ins->upd; break; @@ -3504,6 +3530,7 @@ __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) r = cbulk->reconcile; r->is_bulk_load = true; + recno = WT_RECNO_OOB; /* -Werror=maybe-uninitialized */ switch (btree->type) { case BTREE_COL_FIX: case BTREE_COL_VAR: @@ -3512,7 +3539,6 @@ __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) case BTREE_ROW: recno = WT_RECNO_OOB; break; - WT_ILLEGAL_VALUE(session); } return (__rec_split_init( @@ -3546,7 +3572,6 @@ __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) break; case BTREE_ROW: break; - WT_ILLEGAL_VALUE(session); } WT_RET(__rec_split_finish(session, r)); @@ -3787,7 +3812,7 @@ __rec_vtype(WT_ADDR *addr) * Reconcile a column-store internal page. */ static int -__rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) +__rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) { WT_ADDR *addr; WT_BTREE *btree; @@ -3795,11 +3820,12 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_CHILD_STATE state; WT_DECL_RET; WT_KV *val; - WT_PAGE *child; + WT_PAGE *child, *page; WT_REF *ref; bool hazard; btree = S2BT(session); + page = pageref->page; child = NULL; hazard = false; @@ -3807,12 +3833,12 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) vpack = &_vpack; WT_RET(__rec_split_init( - session, r, page, page->pg_intl_recno, btree->maxintlpage)); + session, r, page, pageref->ref_recno, btree->maxintlpage)); /* For each entry in the in-memory page... */ WT_INTL_FOREACH_BEGIN(session, page, ref) { /* Update the starting record number in case we split. */ - r->recno = ref->key.recno; + r->recno = ref->ref_recno; /* * Modified child. @@ -3886,7 +3912,7 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) } else __rec_cell_build_addr(session, r, addr->addr, addr->size, - __rec_vtype(addr), ref->key.recno); + __rec_vtype(addr), ref->ref_recno); WT_CHILD_RELEASE_ERR(session, hazard, ref); /* Boundary: split or write the page. */ @@ -3951,31 +3977,34 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) * Reconcile a fixed-width, column-store leaf page. */ static int -__rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) +__rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) { WT_BTREE *btree; WT_INSERT *ins; + WT_PAGE *page; WT_UPDATE *upd; uint64_t recno; uint32_t entry, nrecs; btree = S2BT(session); + page = pageref->page; WT_RET(__rec_split_init( - session, r, page, page->pg_fix_recno, btree->maxleafpage)); + session, r, page, pageref->ref_recno, btree->maxleafpage)); + + /* Copy the original, disk-image bytes into place. */ + memcpy(r->first_free, page->pg_fix_bitf, + __bitstr_size((size_t)page->pg_fix_entries * btree->bitcnt)); /* Update any changes to the original on-page data items. */ WT_SKIP_FOREACH(ins, WT_COL_UPDATE_SINGLE(page)) { WT_RET(__rec_txn_read(session, r, ins, NULL, NULL, &upd)); if (upd != NULL) - __bit_setv_recno(page, WT_INSERT_RECNO(ins), - btree->bitcnt, ((uint8_t *)WT_UPDATE_DATA(upd))[0]); + __bit_setv(r->first_free, + WT_INSERT_RECNO(ins) - pageref->ref_recno, + btree->bitcnt, *(uint8_t *)WT_UPDATE_DATA(upd)); } - /* Copy the updated, disk-image bytes into place. */ - memcpy(r->first_free, page->pg_fix_bitf, - __bitstr_size((size_t)page->pg_fix_entries * btree->bitcnt)); - /* Calculate the number of entries per page remainder. */ entry = page->pg_fix_entries; nrecs = WT_FIX_BYTES_TO_ENTRIES( @@ -4002,7 +4031,7 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) * the last key on this page, we have to decrement it. */ if ((recno = - page->modify->mod_split_recno) == WT_RECNO_OOB) + page->modify->mod_col_split_recno) == WT_RECNO_OOB) break; recno -= 1; @@ -4032,7 +4061,7 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) if (nrecs > 0) { __bit_setv(r->first_free, entry, btree->bitcnt, upd == NULL ? 0 : - ((uint8_t *)WT_UPDATE_DATA(upd))[0]); + *(uint8_t *)WT_UPDATE_DATA(upd)); --nrecs; ++entry; ++r->recno; @@ -4076,13 +4105,15 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) */ static int __rec_col_fix_slvg(WT_SESSION_IMPL *session, - WT_RECONCILE *r, WT_PAGE *page, WT_SALVAGE_COOKIE *salvage) + WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage) { WT_BTREE *btree; + WT_PAGE *page; uint64_t page_start, page_take; uint32_t entry, nrecs; btree = S2BT(session); + page = pageref->page; /* * !!! @@ -4097,7 +4128,7 @@ __rec_col_fix_slvg(WT_SESSION_IMPL *session, * don't want to have to retrofit the code later. */ WT_RET(__rec_split_init( - session, r, page, page->pg_fix_recno, btree->maxleafpage)); + session, r, page, pageref->ref_recno, btree->maxleafpage)); /* We may not be taking all of the entries on the original page. */ page_take = salvage->take == 0 ? page->pg_fix_entries : salvage->take; @@ -4220,7 +4251,7 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, */ static int __rec_col_var(WT_SESSION_IMPL *session, - WT_RECONCILE *r, WT_PAGE *page, WT_SALVAGE_COOKIE *salvage) + WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage) { enum { OVFL_IGNORE, OVFL_UNUSED, OVFL_USED } ovfl_state; WT_BTREE *btree; @@ -4231,6 +4262,7 @@ __rec_col_var(WT_SESSION_IMPL *session, WT_DECL_RET; WT_INSERT *ins; WT_ITEM *last; + WT_PAGE *page; WT_UPDATE *upd; uint64_t n, nrepeat, repeat_count, rle, skip, src_recno; uint32_t i, size; @@ -4238,17 +4270,18 @@ __rec_col_var(WT_SESSION_IMPL *session, const void *data; btree = S2BT(session); + page = pageref->page; last = r->last; vpack = &_vpack; + WT_RET(__rec_split_init( + session, r, page, pageref->ref_recno, btree->maxleafpage)); + WT_RET(__wt_scr_alloc(session, 0, &orig)); data = NULL; size = 0; upd = NULL; - WT_RET(__rec_split_init( - session, r, page, page->pg_var_recno, btree->maxleafpage)); - /* * The salvage code may be calling us to reconcile a page where there * were missing records in the column-store name space. If taking the @@ -4561,7 +4594,8 @@ compare: /* * first key on the split page, that is, one larger than * the last key on this page, we have to decrement it. */ - if ((n = page->modify->mod_split_recno) == WT_RECNO_OOB) + if ((n = page-> + modify->mod_col_split_recno) == WT_RECNO_OOB) break; WT_ASSERT(session, n >= src_recno); n -= 1; @@ -4990,8 +5024,8 @@ __rec_row_leaf(WT_SESSION_IMPL *session, * Temporary buffers in which to instantiate any uninstantiated keys * or value items we need. */ - WT_RET(__wt_scr_alloc(session, 0, &tmpkey)); - WT_RET(__wt_scr_alloc(session, 0, &tmpval)); + WT_ERR(__wt_scr_alloc(session, 0, &tmpkey)); + WT_ERR(__wt_scr_alloc(session, 0, &tmpval)); /* For each entry in the page... */ WT_ROW_FOREACH(page, rip, i) { @@ -5151,7 +5185,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, * can't remove them from the in-memory * tree; if an overflow key was deleted * without being instantiated (for - * example, cursor-based truncation, do + * example, cursor-based truncation), do * it now. */ if (ikey == NULL) @@ -5430,18 +5464,24 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page) __wt_free(session, multi->key.ikey); break; } - if (multi->disk_image == NULL) { - if (multi->addr.reuse) - multi->addr.addr = NULL; - else { - WT_RET(__wt_btree_block_free(session, - multi->addr.addr, multi->addr.size)); - __wt_free(session, multi->addr.addr); - } - } else { - __wt_free(session, multi->supd); - __wt_free(session, multi->disk_image); + + /* + * If the page was re-written free the backing disk blocks used + * in the previous write (unless the blocks were reused in this + * write). The page may instead have been a disk image with + * associated saved updates: ownership of the disk image is + * transferred when rewriting the page in-memory and there may + * not have been saved updates. We've gotten this wrong a few + * times, so use the existence of an address to confirm backing + * blocks we care about, and free any disk image/saved updates. + */ + if (multi->addr.addr != NULL && !multi->addr.reuse) { + WT_RET(__wt_btree_block_free( + session, multi->addr.addr, multi->addr.size)); + __wt_free(session, multi->addr.addr); } + __wt_free(session, multi->supd); + __wt_free(session, multi->disk_image); } __wt_free(session, mod->mod_multi); mod->mod_multi_entries = 0; diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c index 756f1fdcc6c..f250612d0ae 100644 --- a/src/third_party/wiredtiger/src/schema/schema_create.c +++ b/src/third_party/wiredtiger/src/schema/schema_create.c @@ -35,7 +35,7 @@ __wt_direct_io_size_check(WT_SESSION_IMPL *session, * units of its happy place. */ if (FLD_ISSET(conn->direct_io, - WT_FILE_TYPE_CHECKPOINT | WT_FILE_TYPE_DATA)) { + WT_DIRECT_IO_CHECKPOINT | WT_DIRECT_IO_DATA)) { align = (int64_t)conn->buffer_alignment; if (align != 0 && (cval.val < align || cval.val % align != 0)) WT_RET_MSG(session, EINVAL, @@ -578,7 +578,7 @@ __create_table(WT_SESSION_IMPL *session, WT_ERR(EEXIST); exists = true; } - WT_RET_NOTFOUND_OK(ret); + WT_ERR_NOTFOUND_OK(ret); WT_ERR(__wt_config_gets(session, cfg, "colgroups", &cval)); WT_ERR(__wt_config_subinit(session, &conf, &cval)); diff --git a/src/third_party/wiredtiger/src/schema/schema_list.c b/src/third_party/wiredtiger/src/schema/schema_list.c index 5e9caf94b7a..79e3ef1da7c 100644 --- a/src/third_party/wiredtiger/src/schema/schema_list.c +++ b/src/third_party/wiredtiger/src/schema/schema_list.c @@ -20,6 +20,8 @@ __schema_add_table(WT_SESSION_IMPL *session, WT_TABLE *table; uint64_t bucket; + table = NULL; /* -Wconditional-uninitialized */ + /* Make sure the metadata is open before getting other locks. */ WT_RET(__wt_metadata_cursor(session, NULL)); diff --git a/src/third_party/wiredtiger/src/schema/schema_open.c b/src/third_party/wiredtiger/src/schema/schema_open.c index e7ce4e42498..1554d021953 100644 --- a/src/third_party/wiredtiger/src/schema/schema_open.c +++ b/src/third_party/wiredtiger/src/schema/schema_open.c @@ -427,6 +427,8 @@ __schema_open_table(WT_SESSION_IMPL *session, const char *tconfig; char *tablename; + *tablep = NULL; + cursor = NULL; table = NULL; tablename = NULL; @@ -527,6 +529,8 @@ __wt_schema_get_colgroup(WT_SESSION_IMPL *session, const char *tablename, *tend; u_int i; + if (tablep != NULL) + *tablep = NULL; *colgroupp = NULL; tablename = uri; @@ -571,6 +575,8 @@ __wt_schema_get_index(WT_SESSION_IMPL *session, const char *tablename, *tend; u_int i; + if (tablep != NULL) + *tablep = NULL; *indexp = NULL; tablename = uri; diff --git a/src/third_party/wiredtiger/src/schema/schema_project.c b/src/third_party/wiredtiger/src/schema/schema_project.c index 4d29b2baa13..fd59539ae89 100644 --- a/src/third_party/wiredtiger/src/schema/schema_project.c +++ b/src/third_party/wiredtiger/src/schema/schema_project.c @@ -353,7 +353,8 @@ __wt_schema_project_slice(WT_SESSION_IMPL *session, WT_CURSOR **cp, /* Make sure the types are compatible. */ WT_ASSERT(session, - tolower(pv.type) == tolower(vpv.type)); + __wt_tolower((u_char)pv.type) == + __wt_tolower((u_char)vpv.type)); pv.u = vpv.u; len = __pack_size(session, &pv); @@ -459,7 +460,8 @@ __wt_schema_project_merge(WT_SESSION_IMPL *session, WT_RET(__pack_next(&vpack, &vpv)); /* Make sure the types are compatible. */ WT_ASSERT(session, - tolower(pv.type) == tolower(vpv.type)); + __wt_tolower((u_char)pv.type) == + __wt_tolower((u_char)vpv.type)); vpv.u = pv.u; len = __pack_size(session, &vpv); WT_RET(__wt_buf_grow(session, diff --git a/src/third_party/wiredtiger/src/schema/schema_rename.c b/src/third_party/wiredtiger/src/schema/schema_rename.c index 21402ed9332..8f4d374fd22 100644 --- a/src/third_party/wiredtiger/src/schema/schema_rename.c +++ b/src/third_party/wiredtiger/src/schema/schema_rename.c @@ -55,7 +55,7 @@ __rename_file( default: WT_ERR(ret); } - WT_ERR(__wt_exist(session, newfile, &exist)); + WT_ERR(__wt_fs_exist(session, newfile, &exist)); if (exist) WT_ERR_MSG(session, EEXIST, "%s", newfile); @@ -64,7 +64,7 @@ __rename_file( WT_ERR(__wt_metadata_insert(session, newuri, oldvalue)); /* Rename the underlying file. */ - WT_ERR(__wt_rename(session, filename, newfile)); + WT_ERR(__wt_fs_rename(session, filename, newfile)); if (WT_META_TRACKING(session)) WT_ERR(__wt_meta_track_fileop(session, uri, newuri)); diff --git a/src/third_party/wiredtiger/src/schema/schema_stat.c b/src/third_party/wiredtiger/src/schema/schema_stat.c index d3d0605c60a..c204d6b1a24 100644 --- a/src/third_party/wiredtiger/src/schema/schema_stat.c +++ b/src/third_party/wiredtiger/src/schema/schema_stat.c @@ -69,6 +69,7 @@ __curstat_size_only(WT_SESSION_IMPL *session, WT_ITEM namebuf; wt_off_t filesize; char *tableconf; + bool exist; WT_CLEAR(namebuf); *was_fast = false; @@ -96,10 +97,11 @@ __curstat_size_only(WT_SESSION_IMPL *session, * are concurrent schema level operations (for example drop). That is * fine - failing here results in falling back to the slow path of * opening the handle. - * !!! Deliberately discard the return code from a failed call - the - * error is flagged by not setting fast to true. */ - if (__wt_filesize_name(session, namebuf.data, true, &filesize) == 0) { + WT_ERR(__wt_fs_exist(session, namebuf.data, &exist)); + if (exist) { + WT_ERR(__wt_fs_size(session, namebuf.data, &filesize)); + /* Setup and populate the statistics structure */ __wt_stat_dsrc_init_single(&cst->u.dsrc_stats); cst->u.dsrc_stats.block_size = filesize; diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index eaa3781169b..77d1dc74c84 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -796,8 +796,8 @@ static int __session_join(WT_SESSION *wt_session, WT_CURSOR *join_cursor, WT_CURSOR *ref_cursor, const char *config) { - WT_CURSOR *firstcg; WT_CONFIG_ITEM cval; + WT_CURSOR *firstcg; WT_CURSOR_INDEX *cindex; WT_CURSOR_JOIN *cjoin; WT_CURSOR_TABLE *ctable; @@ -805,15 +805,18 @@ __session_join(WT_SESSION *wt_session, WT_CURSOR *join_cursor, WT_INDEX *idx; WT_SESSION_IMPL *session; WT_TABLE *table; + bool nested; uint64_t count; uint32_t bloom_bit_count, bloom_hash_count; uint8_t flags, range; - count = 0; - firstcg = NULL; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, join, config, cfg); + + firstcg = NULL; table = NULL; + nested = false; + count = 0; if (!WT_PREFIX_MATCH(join_cursor->uri, "join:")) WT_ERR_MSG(session, EINVAL, "not a join cursor"); @@ -828,19 +831,25 @@ __session_join(WT_SESSION *wt_session, WT_CURSOR *join_cursor, ctable = (WT_CURSOR_TABLE *)ref_cursor; table = ctable->table; firstcg = ctable->cg_cursors[0]; + } else if (WT_PREFIX_MATCH(ref_cursor->uri, "join:")) { + idx = NULL; + table = ((WT_CURSOR_JOIN *)ref_cursor)->table; + nested = true; } else - WT_ERR_MSG(session, EINVAL, "not an index or table cursor"); + WT_ERR_MSG(session, EINVAL, + "ref_cursor must be an index, table or join cursor"); - if (!F_ISSET(firstcg, WT_CURSTD_KEY_SET)) + if (firstcg != NULL && !F_ISSET(firstcg, WT_CURSTD_KEY_SET)) WT_ERR_MSG(session, EINVAL, "requires reference cursor be positioned"); cjoin = (WT_CURSOR_JOIN *)join_cursor; if (cjoin->table != table) WT_ERR_MSG(session, EINVAL, - "table for join cursor does not match table for index"); + "table for join cursor does not match table for " + "ref_cursor"); if (F_ISSET(ref_cursor, WT_CURSTD_JOINED)) WT_ERR_MSG(session, EINVAL, - "index cursor already used in a join"); + "cursor already used in a join"); /* "ge" is the default */ range = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ; @@ -879,15 +888,20 @@ __session_join(WT_SESSION *wt_session, WT_CURSOR *join_cursor, WT_ERR_MSG(session, EINVAL, "bloom_hash_count: value too large"); bloom_hash_count = (uint32_t)cval.val; - if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM)) { - if (count == 0) - WT_ERR_MSG(session, EINVAL, - "count must be nonzero when strategy=bloom"); - if (cjoin->entries_next == 0) - WT_ERR_MSG(session, EINVAL, - "the first joined cursor cannot specify " - "strategy=bloom"); - } + if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM) && count == 0) + WT_ERR_MSG(session, EINVAL, + "count must be nonzero when strategy=bloom"); + + WT_ERR(__wt_config_gets(session, cfg, "operation", &cval)); + if (cval.len != 0 && WT_STRING_MATCH("or", cval.str, cval.len)) + LF_SET(WT_CURJOIN_ENTRY_DISJUNCTION); + + if (nested && (count != 0 || range != WT_CURJOIN_END_EQ || + LF_ISSET(WT_CURJOIN_ENTRY_BLOOM))) + WT_ERR_MSG(session, EINVAL, + "joining a nested join cursor is incompatible with " + "setting \"strategy\", \"compare\" or \"count\""); + WT_ERR(__wt_curjoin_join(session, cjoin, idx, ref_cursor, flags, range, count, bloom_bit_count, bloom_hash_count)); /* @@ -1106,7 +1120,7 @@ __session_truncate(WT_SESSION *wt_session, if (!WT_STREQ(uri, "log:")) WT_ERR_MSG(session, EINVAL, "the truncate method should not specify any" - "target after the log: URI prefix."); + "target after the log: URI prefix"); WT_ERR(__wt_log_truncate_files(session, start, cfg)); } else if (WT_PREFIX_MATCH(uri, "file:")) WT_ERR(__wt_session_range_truncate( @@ -1509,11 +1523,11 @@ err: WT_TRET(__wt_writeunlock(session, txn_global->nsnap_rwlock)); } /* - * __session_strerror -- + * __wt_session_strerror -- * WT_SESSION->strerror method. */ -static const char * -__session_strerror(WT_SESSION *wt_session, int error) +const char * +__wt_session_strerror(WT_SESSION *wt_session, int error) { WT_SESSION_IMPL *session; @@ -1536,7 +1550,7 @@ __open_session(WT_CONNECTION_IMPL *conn, NULL, __session_close, __session_reconfigure, - __session_strerror, + __wt_session_strerror, __session_open_cursor, __session_create, __wt_session_compact, @@ -1563,7 +1577,7 @@ __open_session(WT_CONNECTION_IMPL *conn, NULL, __session_close, __session_reconfigure, - __session_strerror, + __wt_session_strerror, __session_open_cursor, __session_create_readonly, __wt_session_compact_readonly, @@ -1672,7 +1686,7 @@ __open_session(WT_CONNECTION_IMPL *conn, * __wt_hazard_close ensures the array is cleared - so it is safe to * reset the starting size on each open. */ - session_ret->hazard_size = WT_HAZARD_INCR; + session_ret->hazard_size = 0; /* * Configuration: currently, the configuration for open_session is the diff --git a/src/third_party/wiredtiger/src/support/err.c b/src/third_party/wiredtiger/src/support/err.c index f64492f1561..93c0af37328 100644 --- a/src/third_party/wiredtiger/src/support/err.c +++ b/src/third_party/wiredtiger/src/support/err.c @@ -24,7 +24,7 @@ __handle_error_default(WT_EVENT_HANDLER *handler, session = (WT_SESSION_IMPL *)wt_session; WT_RET(__wt_fprintf(session, WT_STDERR(session), "%s\n", errmsg)); - WT_RET(__wt_fsync(session, WT_STDERR(session), true)); + WT_RET(__wt_fflush(session, WT_STDERR(session))); return (0); } @@ -42,7 +42,7 @@ __handle_message_default(WT_EVENT_HANDLER *handler, session = (WT_SESSION_IMPL *)wt_session; WT_RET(__wt_fprintf(session, WT_STDOUT(session), "%s\n", message)); - WT_RET(__wt_fsync(session, WT_STDOUT(session), true)); + WT_RET(__wt_fflush(session, WT_STDOUT(session))); return (0); } @@ -469,6 +469,9 @@ void __wt_assert(WT_SESSION_IMPL *session, int error, const char *file_name, int line_number, const char *fmt, ...) WT_GCC_FUNC_ATTRIBUTE((format (printf, 5, 6))) +#ifdef HAVE_DIAGNOSTIC + WT_GCC_FUNC_ATTRIBUTE((noreturn)) +#endif { va_list ap; @@ -493,7 +496,10 @@ __wt_panic(WT_SESSION_IMPL *session) F_SET(S2C(session), WT_CONN_PANIC); __wt_err(session, WT_PANIC, "the process must exit and restart"); -#if !defined(HAVE_DIAGNOSTIC) +#if defined(HAVE_DIAGNOSTIC) + __wt_abort(session); /* Drop core if testing. */ + /* NOTREACHED */ +#else /* * Chaos reigns within. * Reflect, repent, and reboot. @@ -501,9 +507,6 @@ __wt_panic(WT_SESSION_IMPL *session) */ return (WT_PANIC); #endif - - __wt_abort(session); /* Drop core if testing. */ - /* NOTREACHED */ } /* @@ -517,12 +520,12 @@ __wt_illegal_value(WT_SESSION_IMPL *session, const char *name) name == NULL ? "" : name, name == NULL ? "" : ": ", "encountered an illegal file format or internal value"); -#if !defined(HAVE_DIAGNOSTIC) - return (__wt_panic(session)); -#endif - +#if defined(HAVE_DIAGNOSTIC) __wt_abort(session); /* Drop core if testing. */ /* NOTREACHED */ +#else + return (__wt_panic(session)); +#endif } /* diff --git a/src/third_party/wiredtiger/src/support/global.c b/src/third_party/wiredtiger/src/support/global.c index e0d5bafeaa8..eba88bf2b20 100644 --- a/src/third_party/wiredtiger/src/support/global.c +++ b/src/third_party/wiredtiger/src/support/global.c @@ -111,11 +111,13 @@ void __wt_attach(WT_SESSION_IMPL *session) { #ifdef HAVE_ATTACH + u_int i; + __wt_errx(session, "process ID %" PRIdMAX ": waiting for debugger...", (intmax_t)getpid()); /* Sleep forever, the debugger will interrupt us when it attaches. */ - for (;;) + for (i = 0; i < WT_MILLION; ++i) __wt_sleep(10, 0); #else WT_UNUSED(session); diff --git a/src/third_party/wiredtiger/src/support/hash_city.c b/src/third_party/wiredtiger/src/support/hash_city.c index 7a700aa809c..8354532e820 100644 --- a/src/third_party/wiredtiger/src/support/hash_city.c +++ b/src/third_party/wiredtiger/src/support/hash_city.c @@ -85,6 +85,7 @@ static uint32_t UNALIGNED_LOAD32(const char *p) { return (result); } +#ifdef WORDS_BIGENDIAN #ifdef _MSC_VER #include <stdlib.h> @@ -132,7 +133,6 @@ static uint32_t UNALIGNED_LOAD32(const char *p) { #endif -#ifdef WORDS_BIGENDIAN #define uint32_in_expected_order(x) (bswap_32(x)) #define uint64_in_expected_order(x) (bswap_64(x)) #else diff --git a/src/third_party/wiredtiger/src/support/hazard.c b/src/third_party/wiredtiger/src/support/hazard.c index 13e0eb3b9ac..dee85586a4d 100644 --- a/src/third_party/wiredtiger/src/support/hazard.c +++ b/src/third_party/wiredtiger/src/support/hazard.c @@ -121,7 +121,8 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp return (0); } - __wt_errx(session, "session %p: hazard pointer table full", session); + __wt_errx(session, + "session %p: hazard pointer table full", (void *)session); #ifdef HAVE_DIAGNOSTIC __hazard_dump(session); #endif @@ -176,7 +177,8 @@ __wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page) * because using a page we didn't have pinned down implies corruption. */ WT_PANIC_RET(session, EINVAL, - "session %p: clear hazard pointer: %p: not found", session, page); + "session %p: clear hazard pointer: %p: not found", + (void *)session, (void *)page); } /* @@ -204,7 +206,8 @@ __wt_hazard_close(WT_SESSION_IMPL *session) return; __wt_errx(session, - "session %p: close hazard pointer table: table not empty", session); + "session %p: close hazard pointer table: table not empty", + (void *)session); #ifdef HAVE_DIAGNOSTIC __hazard_dump(session); @@ -232,7 +235,7 @@ __wt_hazard_close(WT_SESSION_IMPL *session) __wt_errx(session, "session %p: close hazard pointer table: count didn't " "match entries", - session); + (void *)session); } #ifdef HAVE_DIAGNOSTIC @@ -250,6 +253,7 @@ __hazard_dump(WT_SESSION_IMPL *session) if (hp->page != NULL) __wt_errx(session, "session %p: hazard pointer %p: %s, line %d", - session, hp->page, hp->file, hp->line); + (void *)session, + (void *)hp->page, hp->file, hp->line); } #endif diff --git a/src/third_party/wiredtiger/src/support/hex.c b/src/third_party/wiredtiger/src/support/hex.c index d42a84154ca..5c48ce8b74a 100644 --- a/src/third_party/wiredtiger/src/support/hex.c +++ b/src/third_party/wiredtiger/src/support/hex.c @@ -84,7 +84,7 @@ __wt_raw_to_esc_hex( WT_RET(__wt_buf_init(session, to, size * 3 + 1)); for (p = from, t = to->mem, i = size; i > 0; --i, ++p) - if (isprint((int)*p)) { + if (__wt_isprint((u_char)*p)) { if (*p == '\\') *t++ = '\\'; *t++ = *p; diff --git a/src/third_party/wiredtiger/src/support/huffman.c b/src/third_party/wiredtiger/src/support/huffman.c index 1e1aaeab5b5..05612cdbe80 100644 --- a/src/third_party/wiredtiger/src/support/huffman.c +++ b/src/third_party/wiredtiger/src/support/huffman.c @@ -230,19 +230,19 @@ set_codes(WT_FREQTREE_NODE *node, * lower-order bits for consecutive numbering. */ if (len < MAX_CODE_LENGTH && - ((half = 1 << (remaining - 1)) < node->left->weight || - half < node->right->weight)) { - pattern = pattern << remaining; + ((half = (uint16_t)(1 << (remaining - 1))) < + node->left->weight || half < node->right->weight)) { + pattern = (uint16_t)(pattern << remaining); len = MAX_CODE_LENGTH; } if (len < MAX_CODE_LENGTH) { - patternleft = (pattern << 1) | 0; - patternright = (pattern << 1) | 1; + patternleft = (uint16_t)((pattern << 1) | 0); + patternright = (uint16_t)((pattern << 1) | 1); len++; } else { /* "low bit mode" */ patternleft = pattern; - patternright = pattern + node->left->weight; + patternright = (uint16_t)(pattern + node->left->weight); /* len unchanged */ } @@ -284,12 +284,12 @@ make_table(WT_SESSION_IMPL *session, uint8_t *code2symbol, * than necessary, we allocate (2 ^ max-code-length) of them. */ c = codes[i].pattern; - shift = max_depth - len; + shift = (uint8_t)(max_depth - len); c1 = (uint32_t)c << shift; c2 = (uint32_t)(c + 1) << shift; for (j = c1; j < c2; j++) { WT_ASSERT(session, code2symbol[j] == 0); - code2symbol[j] = i; + code2symbol[j] = (uint8_t)i; } } } @@ -694,7 +694,7 @@ __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, * used in the last byte, unless they're 0, in which case there are 8 * bits used in the last byte. */ - padding_info = (bitpos % 8) << (8 - WT_HUFFMAN_HEADER); + padding_info = (uint8_t)((bitpos % 8) << (8 - WT_HUFFMAN_HEADER)); ((uint8_t *)tmp->mem)[0] |= padding_info; /* Copy result of exact known size into caller's buffer. */ @@ -808,11 +808,12 @@ __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, valid += 8; from_bytes--; } - pattern = valid >= max ? /* short patterns near end */ - (bits >> (valid - max)) : (bits << (max - valid)); + pattern = (uint16_t) + (valid >= max ? /* short patterns near end */ + (bits >> (valid - max)) : (bits << (max - valid))); symbol = huffman->code2symbol[pattern & mask]; len = huffman->codes[symbol].length; - valid -= len; + valid -= (uint8_t)len; /* * from_len_bits is the total number of input bits, reduced by diff --git a/src/third_party/wiredtiger/src/support/scratch.c b/src/third_party/wiredtiger/src/support/scratch.c index aea98dc49ef..69987ebc852 100644 --- a/src/third_party/wiredtiger/src/support/scratch.c +++ b/src/third_party/wiredtiger/src/support/scratch.c @@ -117,7 +117,7 @@ __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) p = (char *)((uint8_t *)buf->mem + buf->size); WT_ASSERT(session, buf->memsize >= buf->size); space = buf->memsize - buf->size; - len = (size_t)vsnprintf(p, (size_t)space, fmt, ap); + len = (size_t)vsnprintf(p, space, fmt, ap); va_end(ap); /* Check if there was enough space. */ @@ -135,6 +135,64 @@ __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) } /* + * __wt_buf_set_printable -- + * Set the contents of the buffer to a printable representation of a + * byte string. + */ +const char * +__wt_buf_set_printable( + WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf) +{ + if (__wt_raw_to_esc_hex(session, p, size, buf)) { + buf->data = "[Error]"; + buf->size = strlen("[Error]"); + } + return (buf->data); +} + +/* + * __wt_buf_set_size -- + * Set the contents of the buffer to a printable representation of a + * byte size. + */ +const char * +__wt_buf_set_size( + WT_SESSION_IMPL *session, uint64_t size, bool exact, WT_ITEM *buf) +{ + WT_DECL_RET; + + if (size >= WT_EXABYTE) + ret = __wt_buf_fmt(session, buf, + "%" PRIu64 "EB", size / WT_EXABYTE); + else if (size >= WT_PETABYTE) + ret = __wt_buf_fmt(session, buf, + "%" PRIu64 "PB", size / WT_PETABYTE); + else if (size >= WT_TERABYTE) + ret = __wt_buf_fmt(session, buf, + "%" PRIu64 "TB", size / WT_TERABYTE); + else if (size >= WT_GIGABYTE) + ret = __wt_buf_fmt(session, buf, + "%" PRIu64 "GB", size / WT_GIGABYTE); + else if (size >= WT_MEGABYTE) + ret = __wt_buf_fmt(session, buf, + "%" PRIu64 "MB", size / WT_MEGABYTE); + else if (size >= WT_KILOBYTE) + ret = __wt_buf_fmt(session, buf, + "%" PRIu64 "KB", size / WT_KILOBYTE); + else + ret = __wt_buf_fmt(session, buf, "%" PRIu64 "B", size); + + if (ret == 0 && exact && size >= WT_KILOBYTE) + ret = __wt_buf_catfmt(session, buf, " (%" PRIu64 ")", size); + + if (ret != 0) { + buf->data = "[Error]"; + buf->size = strlen("[Error]"); + } + return (buf->data); +} + +/* * __wt_scr_alloc_func -- * Scratch buffer allocation function. */ diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index 2f5609567da..d972f0c140f 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -43,7 +43,6 @@ static const char * const __stats_dsrc_desc[] = { "btree: pages rewritten by compaction", "btree: row-store internal pages", "btree: row-store leaf pages", - "cache: bytes currently in the cache", "cache: bytes read into cache", "cache: bytes written from cache", "cache: checkpoint blocked page eviction", @@ -61,6 +60,7 @@ static const char * const __stats_dsrc_desc[] = { "cache: page written requiring lookaside records", "cache: pages read into cache", "cache: pages read into cache requiring lookaside entries", + "cache: pages requested from the cache", "cache: pages written from cache", "cache: pages written requiring in-memory restoration", "cache: unmodified pages evicted", @@ -173,7 +173,6 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) stats->btree_compact_rewrite = 0; stats->btree_row_internal = 0; stats->btree_row_leaf = 0; - /* not clearing cache_bytes_inuse */ stats->cache_bytes_read = 0; stats->cache_bytes_write = 0; stats->cache_eviction_checkpoint = 0; @@ -191,6 +190,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) stats->cache_write_lookaside = 0; stats->cache_read = 0; stats->cache_read_lookaside = 0; + stats->cache_pages_requested = 0; stats->cache_write = 0; stats->cache_write_restore = 0; stats->cache_eviction_clean = 0; @@ -300,7 +300,6 @@ __wt_stat_dsrc_aggregate_single( to->btree_compact_rewrite += from->btree_compact_rewrite; to->btree_row_internal += from->btree_row_internal; to->btree_row_leaf += from->btree_row_leaf; - to->cache_bytes_inuse += from->cache_bytes_inuse; to->cache_bytes_read += from->cache_bytes_read; to->cache_bytes_write += from->cache_bytes_write; to->cache_eviction_checkpoint += from->cache_eviction_checkpoint; @@ -319,6 +318,7 @@ __wt_stat_dsrc_aggregate_single( to->cache_write_lookaside += from->cache_write_lookaside; to->cache_read += from->cache_read; to->cache_read_lookaside += from->cache_read_lookaside; + to->cache_pages_requested += from->cache_pages_requested; to->cache_write += from->cache_write; to->cache_write_restore += from->cache_write_restore; to->cache_eviction_clean += from->cache_eviction_clean; @@ -433,7 +433,6 @@ __wt_stat_dsrc_aggregate( WT_STAT_READ(from, btree_compact_rewrite); to->btree_row_internal += WT_STAT_READ(from, btree_row_internal); to->btree_row_leaf += WT_STAT_READ(from, btree_row_leaf); - to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse); to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read); to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write); to->cache_eviction_checkpoint += @@ -459,6 +458,8 @@ __wt_stat_dsrc_aggregate( WT_STAT_READ(from, cache_write_lookaside); to->cache_read += WT_STAT_READ(from, cache_read); to->cache_read_lookaside += WT_STAT_READ(from, cache_read_lookaside); + to->cache_pages_requested += + WT_STAT_READ(from, cache_pages_requested); to->cache_write += WT_STAT_READ(from, cache_write); to->cache_write_restore += WT_STAT_READ(from, cache_write_restore); to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean); @@ -547,17 +548,25 @@ static const char * const __stats_connection_desc[] = { "cache: bytes read into cache", "cache: bytes written from cache", "cache: checkpoint blocked page eviction", + "cache: eviction calls to get a page", + "cache: eviction calls to get a page found queue empty", + "cache: eviction calls to get a page found queue empty after locking", "cache: eviction currently operating in aggressive mode", "cache: eviction server candidate queue empty when topping up", "cache: eviction server candidate queue not empty when topping up", "cache: eviction server evicting pages", "cache: eviction server populating queue, but not evicting pages", + "cache: eviction server skipped very large page", + "cache: eviction server slept, because we did not make progress with eviction", "cache: eviction server unable to reach eviction goal", "cache: eviction worker thread evicting pages", "cache: failed eviction of pages that exceeded the in-memory maximum", "cache: files with active eviction walks", "cache: files with new eviction walks started", "cache: hazard pointer blocked page eviction", + "cache: hazard pointer check calls", + "cache: hazard pointer check entries walked", + "cache: hazard pointer maximum array length", "cache: in-memory page passed criteria to be split", "cache: in-memory page splits", "cache: internal pages evicted", @@ -579,6 +588,7 @@ static const char * const __stats_connection_desc[] = { "cache: pages queued for urgent eviction", "cache: pages read into cache", "cache: pages read into cache requiring lookaside entries", + "cache: pages requested from the cache", "cache: pages seen by eviction walk", "cache: pages selected for eviction unable to be evicted", "cache: pages walked for eviction", @@ -600,6 +610,7 @@ static const char * const __stats_connection_desc[] = { "connection: pthread mutex condition wait calls", "connection: pthread mutex shared lock read-lock calls", "connection: pthread mutex shared lock write-lock calls", + "connection: total fsync I/Os", "connection: total read I/Os", "connection: total write I/Os", "cursor: cursor create calls", @@ -642,7 +653,9 @@ static const char * const __stats_connection_desc[] = { "log: log server thread advances write LSN", "log: log server thread write LSN walk skipped", "log: log sync operations", + "log: log sync time duration (usecs)", "log: log sync_dir operations", + "log: log sync_dir time duration (usecs)", "log: log write operations", "log: logging bytes consolidated", "log: maximum log file size", @@ -664,6 +677,9 @@ static const char * const __stats_connection_desc[] = { "reconciliation: split objects currently awaiting free", "session: open cursor count", "session: open session count", + "thread-state: active filesystem fsync calls", + "thread-state: active filesystem read calls", + "thread-state: active filesystem write calls", "thread-yield: page acquire busy blocked", "thread-yield: page acquire eviction blocked", "thread-yield: page acquire locked blocked", @@ -680,6 +696,10 @@ static const char * const __stats_connection_desc[] = { "transaction: transaction checkpoint total time (msecs)", "transaction: transaction checkpoints", "transaction: transaction failures due to cache overflow", + "transaction: transaction fsync calls for checkpoint after allocating the transaction ID", + "transaction: transaction fsync calls for checkpoint before allocating the transaction ID", + "transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)", + "transaction: transaction fsync duration for checkpoint before allocating the transaction ID (usecs)", "transaction: transaction range of IDs currently pinned", "transaction: transaction range of IDs currently pinned by a checkpoint", "transaction: transaction range of IDs currently pinned by named snapshots", @@ -750,17 +770,25 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_bytes_read = 0; stats->cache_bytes_write = 0; stats->cache_eviction_checkpoint = 0; + stats->cache_eviction_get_ref = 0; + stats->cache_eviction_get_ref_empty = 0; + stats->cache_eviction_get_ref_empty2 = 0; /* not clearing cache_eviction_aggressive_set */ stats->cache_eviction_queue_empty = 0; stats->cache_eviction_queue_not_empty = 0; stats->cache_eviction_server_evicting = 0; stats->cache_eviction_server_not_evicting = 0; + stats->cache_eviction_server_toobig = 0; + stats->cache_eviction_server_slept = 0; stats->cache_eviction_slow = 0; stats->cache_eviction_worker_evicting = 0; stats->cache_eviction_force_fail = 0; /* not clearing cache_eviction_walks_active */ stats->cache_eviction_walks_started = 0; stats->cache_eviction_hazard = 0; + stats->cache_hazard_checks = 0; + stats->cache_hazard_walks = 0; + stats->cache_hazard_max = 0; stats->cache_inmem_splittable = 0; stats->cache_inmem_split = 0; stats->cache_eviction_internal = 0; @@ -782,6 +810,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_pages_queued_oldest = 0; stats->cache_read = 0; stats->cache_read_lookaside = 0; + stats->cache_pages_requested = 0; stats->cache_eviction_pages_seen = 0; stats->cache_eviction_fail = 0; stats->cache_eviction_walk = 0; @@ -803,6 +832,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cond_wait = 0; stats->rwlock_read = 0; stats->rwlock_write = 0; + stats->fsync_io = 0; stats->read_io = 0; stats->write_io = 0; stats->cursor_create = 0; @@ -845,7 +875,9 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->log_write_lsn = 0; stats->log_write_lsn_skip = 0; stats->log_sync = 0; + stats->log_sync_duration = 0; stats->log_sync_dir = 0; + stats->log_sync_dir_duration = 0; stats->log_writes = 0; stats->log_slot_consolidated = 0; /* not clearing log_max_filesize */ @@ -867,6 +899,9 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) /* not clearing rec_split_stashed_objects */ /* not clearing session_cursor_open */ /* not clearing session_open */ + /* not clearing fsync_active */ + /* not clearing read_active */ + /* not clearing write_active */ stats->page_busy_blocked = 0; stats->page_forcible_evict_blocked = 0; stats->page_locked_blocked = 0; @@ -883,6 +918,10 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) /* not clearing txn_checkpoint_time_total */ stats->txn_checkpoint = 0; stats->txn_fail_cache = 0; + stats->txn_checkpoint_fsync_post = 0; + stats->txn_checkpoint_fsync_pre = 0; + stats->txn_checkpoint_fsync_post_duration = 0; + stats->txn_checkpoint_fsync_pre_duration = 0; /* not clearing txn_pinned_range */ /* not clearing txn_pinned_checkpoint_range */ /* not clearing txn_pinned_snapshot_range */ @@ -904,6 +943,8 @@ void __wt_stat_connection_aggregate( WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *to) { + int64_t v; + to->lsm_work_queue_app += WT_STAT_READ(from, lsm_work_queue_app); to->lsm_work_queue_manager += WT_STAT_READ(from, lsm_work_queue_manager); @@ -944,6 +985,12 @@ __wt_stat_connection_aggregate( to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write); to->cache_eviction_checkpoint += WT_STAT_READ(from, cache_eviction_checkpoint); + to->cache_eviction_get_ref += + WT_STAT_READ(from, cache_eviction_get_ref); + to->cache_eviction_get_ref_empty += + WT_STAT_READ(from, cache_eviction_get_ref_empty); + to->cache_eviction_get_ref_empty2 += + WT_STAT_READ(from, cache_eviction_get_ref_empty2); to->cache_eviction_aggressive_set += WT_STAT_READ(from, cache_eviction_aggressive_set); to->cache_eviction_queue_empty += @@ -954,6 +1001,10 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, cache_eviction_server_evicting); to->cache_eviction_server_not_evicting += WT_STAT_READ(from, cache_eviction_server_not_evicting); + to->cache_eviction_server_toobig += + WT_STAT_READ(from, cache_eviction_server_toobig); + to->cache_eviction_server_slept += + WT_STAT_READ(from, cache_eviction_server_slept); to->cache_eviction_slow += WT_STAT_READ(from, cache_eviction_slow); to->cache_eviction_worker_evicting += WT_STAT_READ(from, cache_eviction_worker_evicting); @@ -965,6 +1016,10 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, cache_eviction_walks_started); to->cache_eviction_hazard += WT_STAT_READ(from, cache_eviction_hazard); + to->cache_hazard_checks += WT_STAT_READ(from, cache_hazard_checks); + to->cache_hazard_walks += WT_STAT_READ(from, cache_hazard_walks); + if ((v = WT_STAT_READ(from, cache_hazard_max)) > to->cache_hazard_max) + to->cache_hazard_max = v; to->cache_inmem_splittable += WT_STAT_READ(from, cache_inmem_splittable); to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split); @@ -999,6 +1054,8 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, cache_eviction_pages_queued_oldest); to->cache_read += WT_STAT_READ(from, cache_read); to->cache_read_lookaside += WT_STAT_READ(from, cache_read_lookaside); + to->cache_pages_requested += + WT_STAT_READ(from, cache_pages_requested); to->cache_eviction_pages_seen += WT_STAT_READ(from, cache_eviction_pages_seen); to->cache_eviction_fail += WT_STAT_READ(from, cache_eviction_fail); @@ -1021,6 +1078,7 @@ __wt_stat_connection_aggregate( to->cond_wait += WT_STAT_READ(from, cond_wait); to->rwlock_read += WT_STAT_READ(from, rwlock_read); to->rwlock_write += WT_STAT_READ(from, rwlock_write); + to->fsync_io += WT_STAT_READ(from, fsync_io); to->read_io += WT_STAT_READ(from, read_io); to->write_io += WT_STAT_READ(from, write_io); to->cursor_create += WT_STAT_READ(from, cursor_create); @@ -1065,7 +1123,10 @@ __wt_stat_connection_aggregate( to->log_write_lsn += WT_STAT_READ(from, log_write_lsn); to->log_write_lsn_skip += WT_STAT_READ(from, log_write_lsn_skip); to->log_sync += WT_STAT_READ(from, log_sync); + to->log_sync_duration += WT_STAT_READ(from, log_sync_duration); to->log_sync_dir += WT_STAT_READ(from, log_sync_dir); + to->log_sync_dir_duration += + WT_STAT_READ(from, log_sync_dir_duration); to->log_writes += WT_STAT_READ(from, log_writes); to->log_slot_consolidated += WT_STAT_READ(from, log_slot_consolidated); @@ -1090,6 +1151,9 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, rec_split_stashed_objects); to->session_cursor_open += WT_STAT_READ(from, session_cursor_open); to->session_open += WT_STAT_READ(from, session_open); + to->fsync_active += WT_STAT_READ(from, fsync_active); + to->read_active += WT_STAT_READ(from, read_active); + to->write_active += WT_STAT_READ(from, write_active); to->page_busy_blocked += WT_STAT_READ(from, page_busy_blocked); to->page_forcible_evict_blocked += WT_STAT_READ(from, page_forcible_evict_blocked); @@ -1115,6 +1179,14 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, txn_checkpoint_time_total); to->txn_checkpoint += WT_STAT_READ(from, txn_checkpoint); to->txn_fail_cache += WT_STAT_READ(from, txn_fail_cache); + to->txn_checkpoint_fsync_post += + WT_STAT_READ(from, txn_checkpoint_fsync_post); + to->txn_checkpoint_fsync_pre += + WT_STAT_READ(from, txn_checkpoint_fsync_pre); + to->txn_checkpoint_fsync_post_duration += + WT_STAT_READ(from, txn_checkpoint_fsync_post_duration); + to->txn_checkpoint_fsync_pre_duration += + WT_STAT_READ(from, txn_checkpoint_fsync_pre_duration); to->txn_pinned_range += WT_STAT_READ(from, txn_pinned_range); to->txn_pinned_checkpoint_range += WT_STAT_READ(from, txn_pinned_checkpoint_range); @@ -1126,9 +1198,11 @@ __wt_stat_connection_aggregate( } static const char * const __stats_join_desc[] = { - ": accesses", - ": actual count of items", + ": accesses to the main table", ": bloom filter false positives", + ": checks that conditions of membership are satisfied", + ": items inserted into a bloom filter", + ": items iterated", }; int @@ -1148,9 +1222,11 @@ __wt_stat_join_init_single(WT_JOIN_STATS *stats) void __wt_stat_join_clear_single(WT_JOIN_STATS *stats) { - stats->accesses = 0; - stats->actual_count = 0; + stats->main_access = 0; stats->bloom_false_positive = 0; + stats->membership_check = 0; + stats->bloom_insert = 0; + stats->iterated = 0; } void @@ -1166,7 +1242,9 @@ void __wt_stat_join_aggregate( WT_JOIN_STATS **from, WT_JOIN_STATS *to) { - to->accesses += WT_STAT_READ(from, accesses); - to->actual_count += WT_STAT_READ(from, actual_count); + to->main_access += WT_STAT_READ(from, main_access); to->bloom_false_positive += WT_STAT_READ(from, bloom_false_positive); + to->membership_check += WT_STAT_READ(from, membership_check); + to->bloom_insert += WT_STAT_READ(from, bloom_insert); + to->iterated += WT_STAT_READ(from, iterated); } diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index 9d5975b2bc5..dd4384d9a9a 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -346,6 +346,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) if (WT_TXNID_LT(txn_global->last_running, last_running)) { txn_global->last_running = last_running; +#ifdef HAVE_VERBOSE /* Output a verbose message about long-running transactions, * but only when some progress is being made. */ if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) && @@ -358,6 +359,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) oldest_session->lastop, oldest_session->txn.snap_min)); } +#endif } done: WT_TRET(__wt_writeunlock(session, txn_global->scan_rwlock)); @@ -522,7 +524,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) */ if (F_ISSET(txn, WT_TXN_SYNC_SET)) WT_RET_MSG(session, EINVAL, - "Sync already set during begin_transaction."); + "Sync already set during begin_transaction"); if (WT_STRING_MATCH("background", cval.str, cval.len)) txn->txn_logsync = WT_LOG_BACKGROUND; else if (WT_STRING_MATCH("off", cval.str, cval.len)) diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 5c0c55963a3..51d26b9aed6 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -350,6 +350,7 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session, static int __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) { + struct timespec fsync_start, fsync_stop; struct timespec start, stop, verb_timer; WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -359,6 +360,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_TXN_STATE *txn_state; void *saved_meta_next; u_int i; + uint64_t fsync_duration_usecs; bool full, idle, logging, tracking; const char *txn_cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_begin_transaction), "isolation=snapshot", NULL }; @@ -425,7 +427,13 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * completion. Do it after flushing the pages to give the * asynchronous flush as much time as possible before we wait. */ + WT_ERR(__wt_epoch(session, &fsync_start)); WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync)); + WT_ERR(__wt_epoch(session, &fsync_stop)); + fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start); + WT_STAT_FAST_CONN_INCR(session, txn_checkpoint_fsync_pre); + WT_STAT_FAST_CONN_INCRV(session, + txn_checkpoint_fsync_pre_duration, fsync_duration_usecs); /* Tell logging that we are about to start a database checkpoint. */ if (full && logging) @@ -524,7 +532,13 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * Checkpoints have to hit disk (it would be reasonable to configure for * lazy checkpoints, but we don't support them yet). */ + WT_ERR(__wt_epoch(session, &fsync_start)); WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync)); + WT_ERR(__wt_epoch(session, &fsync_stop)); + fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start); + WT_STAT_FAST_CONN_INCR(session, txn_checkpoint_fsync_post); + WT_STAT_FAST_CONN_INCRV(session, + txn_checkpoint_fsync_post_duration, fsync_duration_usecs); WT_ERR(__checkpoint_verbose_track(session, "sync completed", &verb_timer)); diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c index da2670fb344..470515244f3 100644 --- a/src/third_party/wiredtiger/src/txn/txn_log.c +++ b/src/third_party/wiredtiger/src/txn/txn_log.c @@ -156,6 +156,7 @@ err: __wt_logrec_free(session, &logrec); int __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) { + WT_DECL_RET; WT_ITEM *logrec; WT_TXN *txn; WT_TXN_OP *op; @@ -179,24 +180,25 @@ __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) switch (op->type) { case WT_TXN_OP_BASIC: - return (__txn_op_log(session, logrec, op, cbt)); + ret = __txn_op_log(session, logrec, op, cbt); + break; case WT_TXN_OP_INMEM: case WT_TXN_OP_REF: /* Nothing to log, we're done. */ - return (0); + break; case WT_TXN_OP_TRUNCATE_COL: - return (__wt_logop_col_truncate_pack(session, logrec, + ret = __wt_logop_col_truncate_pack(session, logrec, op->fileid, - op->u.truncate_col.start, op->u.truncate_col.stop)); + op->u.truncate_col.start, op->u.truncate_col.stop); + break; case WT_TXN_OP_TRUNCATE_ROW: - return (__wt_logop_row_truncate_pack(session, txn->logrec, + ret = __wt_logop_row_truncate_pack(session, txn->logrec, op->fileid, &op->u.truncate_row.start, &op->u.truncate_row.stop, - (uint32_t)op->u.truncate_row.mode)); - WT_ILLEGAL_VALUE(session); + (uint32_t)op->u.truncate_row.mode); + break; } - - /* NOTREACHED */ + return (ret); } /* diff --git a/src/third_party/wiredtiger/src/txn/txn_nsnap.c b/src/third_party/wiredtiger/src/txn/txn_nsnap.c index eddcca9248f..5b8fed23a9f 100644 --- a/src/third_party/wiredtiger/src/txn/txn_nsnap.c +++ b/src/third_party/wiredtiger/src/txn/txn_nsnap.c @@ -343,7 +343,7 @@ __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, if (!*has_create && !*has_drops) WT_RET_MSG(session, EINVAL, "WT_SESSION::snapshot API called without any drop or " - "name option."); + "name option"); return (0); } diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c index 1ea4dba1152..bd004e0f837 100644 --- a/src/third_party/wiredtiger/src/txn/txn_recover.c +++ b/src/third_party/wiredtiger/src/txn/txn_recover.c @@ -424,6 +424,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session) false, WT_SESSION_NO_LOGGING, &session)); r.session = session; + F_SET(conn, WT_CONN_RECOVERING); WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config)); WT_ERR(__recovery_setup_file(&r, WT_METAFILE_URI, config)); WT_ERR(__wt_metadata_cursor_open(session, NULL, &metac)); @@ -566,6 +567,7 @@ err: WT_TRET(__recovery_free(&r)); WT_TRET(__wt_evict_destroy(session)); WT_TRET(session->iface.close(&session->iface, NULL)); + F_CLR(conn, WT_CONN_RECOVERING); return (ret); } diff --git a/src/third_party/wiredtiger/src/utilities/util_backup.c b/src/third_party/wiredtiger/src/utilities/util_backup.c index 190c0878f38..5dc9671fb45 100644 --- a/src/third_party/wiredtiger/src/utilities/util_backup.c +++ b/src/third_party/wiredtiger/src/utilities/util_backup.c @@ -23,7 +23,7 @@ append_target(WT_SESSION *session, const char *target, char **bufp) static char *buf = NULL; /* 20 bytes of slop */ - if (remain < strlen(target) + 20) { + if (buf == NULL || remain < strlen(target) + 20) { len += strlen(target) + 512; remain += strlen(target) + 512; if ((buf = realloc(buf, len)) == NULL) diff --git a/src/third_party/wiredtiger/src/utilities/util_dump.c b/src/third_party/wiredtiger/src/utilities/util_dump.c index aedd9168fbd..da70aea35be 100644 --- a/src/third_party/wiredtiger/src/utilities/util_dump.c +++ b/src/third_party/wiredtiger/src/utilities/util_dump.c @@ -7,25 +7,21 @@ */ #include "util.h" +#include "util_dump.h" -static int dump_config(WT_SESSION *, const char *, bool); +static int dump_config(WT_SESSION *, const char *, bool, bool); static int dump_json_begin(WT_SESSION *); static int dump_json_end(WT_SESSION *); static int dump_json_separator(WT_SESSION *); -static int dump_json_table_begin( - WT_SESSION *, WT_CURSOR *, const char *, const char *); -static int dump_json_table_cg( - WT_SESSION *, WT_CURSOR *, const char *, const char *, const char *); -static int dump_json_table_config(WT_SESSION *, const char *); static int dump_json_table_end(WT_SESSION *); -static int dump_prefix(WT_SESSION *, bool); +static int dump_prefix(WT_SESSION *, bool, bool); static int dump_record(WT_CURSOR *, bool, bool); -static int dump_suffix(WT_SESSION *); -static int dump_table_config(WT_SESSION *, WT_CURSOR *, const char *); +static int dump_suffix(WT_SESSION *, bool); +static int dump_table_config(WT_SESSION *, WT_CURSOR *, const char *, bool); static int dump_table_config_complex( - WT_SESSION *, WT_CURSOR *, WT_CURSOR *, const char *, const char *); + WT_SESSION *, WT_CURSOR *, WT_CURSOR *, const char *, const char *, bool); static int dup_json_string(const char *, char **); -static int print_config(WT_SESSION *, const char *, char *[]); +static int print_config(WT_SESSION *, const char *, char *[], bool, bool); static int usage(void); int @@ -78,7 +74,9 @@ util_dump(WT_SESSION *session, int argc, char *argv[]) if (argc < 1 || (argc != 1 && !json)) return (usage()); - if (json && (ret = dump_json_begin(session)) != 0) + if (json && + ((ret = dump_json_begin(session)) != 0 || + (ret = dump_prefix(session, hex, json)) != 0)) goto err; for (i = 0; i < argc; i++) { @@ -91,9 +89,7 @@ util_dump(WT_SESSION *session, int argc, char *argv[]) if ((name = util_name(session, argv[i], "table")) == NULL) goto err; - if (json && dump_json_table_config(session, name) != 0) - goto err; - if (!json && dump_config(session, name, hex) != 0) + if (dump_config(session, name, hex, json) != 0) goto err; len = @@ -142,7 +138,7 @@ err: ret = 1; * Dump the config for the uri. */ static int -dump_config(WT_SESSION *session, const char *uri, bool hex) +dump_config(WT_SESSION *session, const char *uri, bool hex, bool json) { WT_CURSOR *cursor; WT_DECL_RET; @@ -162,9 +158,9 @@ dump_config(WT_SESSION *session, const char *uri, bool hex) */ cursor->set_key(cursor, uri); if ((ret = cursor->search(cursor)) == 0) { - if (dump_prefix(session, hex) != 0 || - dump_table_config(session, cursor, uri) != 0 || - dump_suffix(session) != 0) + if ((!json && dump_prefix(session, hex, json) != 0) || + dump_table_config(session, cursor, uri, json) != 0 || + dump_suffix(session, json) != 0) ret = 1; } else if (ret == WT_NOTFOUND) ret = util_err(session, 0, "%s: No such object exists", uri); @@ -217,225 +213,6 @@ dump_json_separator(WT_SESSION *session) } /* - * dump_json_table_begin -- - * Output the JSON syntax that starts a table, along with its config. - */ -static int -dump_json_table_begin( - WT_SESSION *session, WT_CURSOR *cursor, const char *uri, const char *config) -{ - WT_DECL_RET; - const char *name; - char *jsonconfig; - - jsonconfig = NULL; - - /* Get the table name. */ - if ((name = strchr(uri, ':')) == NULL) { - fprintf(stderr, "%s: %s: corrupted uri\n", progname, uri); - return (1); - } - ++name; - - if ((ret = dup_json_string(config, &jsonconfig)) != 0) - return (util_cerr(cursor, "config dup", ret)); - if (printf(" \"%s\" : [\n {\n", uri) < 0) - goto eio; - if (printf(" \"config\" : \"%s\",\n", jsonconfig) < 0) - goto eio; - - if ((ret = dump_json_table_cg( - session, cursor, name, "colgroup:", "colgroups")) == 0) { - if (printf(",\n") < 0) - goto eio; - ret = dump_json_table_cg( - session, cursor, name, "index:", "indices"); - } - - if (printf("\n },\n {\n \"data\" : [") < 0) - goto eio; - - if (0) { -eio: ret = util_err(session, EIO, NULL); - } - - free(jsonconfig); - return (ret); -} - -/* - * dump_json_table_cg -- - * Dump the column groups or indices for a table. - */ -static int -dump_json_table_cg(WT_SESSION *session, WT_CURSOR *cursor, - const char *name, const char *entry, const char *header) -{ - static const char * const indent = " "; - WT_DECL_RET; - int exact; - bool once; - const char *key, *skip, *value; - char *jsonconfig; - - once = false; - if (printf(" \"%s\" : [", header) < 0) - return (util_err(session, EIO, NULL)); - - /* - * For table dumps, we're done. - */ - if (cursor == NULL) { - if (printf("]") < 0) - return (util_err(session, EIO, NULL)); - else - return (0); - } - - /* - * Search the file looking for column group and index key/value pairs: - * for each one, look up the related source information and append it - * to the base record. - */ - cursor->set_key(cursor, entry); - if ((ret = cursor->search_near(cursor, &exact)) != 0) { - if (ret == WT_NOTFOUND) - return (0); - return (util_cerr(cursor, "search_near", ret)); - } - if (exact >= 0) - goto match; - while ((ret = cursor->next(cursor)) == 0) { -match: if ((ret = cursor->get_key(cursor, &key)) != 0) - return (util_cerr(cursor, "get_key", ret)); - - /* Check if we've finished the list of entries. */ - if (!WT_PREFIX_MATCH(key, entry)) - break; - - /* Check for a table name match. */ - skip = key + strlen(entry); - if (strncmp( - skip, name, strlen(name)) != 0 || skip[strlen(name)] != ':') - continue; - - /* Get the value. */ - if ((ret = cursor->get_value(cursor, &value)) != 0) - return (util_cerr(cursor, "get_value", ret)); - - if ((ret = dup_json_string(value, &jsonconfig)) != 0) - return (util_cerr(cursor, "config dup", ret)); - ret = printf("%s\n" - "%s{\n" - "%s \"uri\" : \"%s\",\n" - "%s \"config\" : \"%s\"\n" - "%s}", - once ? "," : "", - indent, indent, key, indent, jsonconfig, indent); - free(jsonconfig); - if (ret < 0) - return (util_err(session, EIO, NULL)); - - once = true; - } - if (printf("%s]", once ? "\n " : "") < 0) - return (util_err(session, EIO, NULL)); - if (ret == 0 || ret == WT_NOTFOUND) - return (0); - return (util_cerr(cursor, "next", ret)); -} - -/* - * dump_json_table_config -- - * Dump the config for the uri. - */ -static int -dump_json_table_config(WT_SESSION *session, const char *uri) -{ - WT_CONFIG_ITEM cval; - WT_CURSOR *cursor; - WT_DECL_RET; - size_t len; - int tret; - const char *name, *value; - char *p; - - p = NULL; - - /* Get the table name. */ - if ((name = strchr(uri, ':')) == NULL) { - fprintf(stderr, "%s: %s: corrupted uri\n", progname, uri); - return (1); - } - ++name; - - /* Open a metadata cursor. */ - if ((ret = session->open_cursor( - session, "metadata:create", NULL, NULL, &cursor)) != 0) { - fprintf(stderr, "%s: %s: session.open_cursor: %s\n", - progname, "metadata:create", - session->strerror(session, ret)); - return (1); - } - - /* - * Search for the object itself, just to make sure it exists, we don't - * want to output a header if the user entered the wrong name. This is - * where we find out a table doesn't exist, use a simple error message. - * - * Workaround for WiredTiger "simple" table handling. Simple tables - * have column-group entries, but they aren't listed in the metadata's - * table entry. Figure out if it's a simple table and in that case, - * retrieve the column-group entry and use the value from its "source" - * file. - */ - if (WT_PREFIX_MATCH(uri, "table:")) { - len = strlen("colgroup:") + strlen(name) + 1; - if ((p = malloc(len)) == NULL) - return (util_err(session, errno, NULL)); - (void)snprintf(p, len, "colgroup:%s", name); - cursor->set_key(cursor, p); - if ((ret = cursor->search(cursor)) == 0) { - if ((ret = cursor->get_value(cursor, &value)) != 0) - return (util_cerr(cursor, "get_value", ret)); - if ((ret = __wt_config_getones( - (WT_SESSION_IMPL *)session, - value, "source", &cval)) != 0) - return (util_err( - session, ret, "%s: source entry", p)); - free(p); - len = cval.len + 10; - if ((p = malloc(len)) == NULL) - return (util_err(session, errno, NULL)); - (void)snprintf(p, len, "%.*s", (int)cval.len, cval.str); - cursor->set_key(cursor, p); - } else - cursor->set_key(cursor, uri); - } else - cursor->set_key(cursor, uri); - - if ((ret = cursor->search(cursor)) == 0) { - if ((ret = cursor->get_value(cursor, &value)) != 0) - ret = util_cerr(cursor, "get_value", ret); - else if (dump_json_table_begin( - session, cursor, uri, value) != 0) - ret = 1; - } else if (ret == WT_NOTFOUND) - ret = util_err(session, 0, "%s: No such object exists", uri); - else - ret = util_err(session, ret, "%s", uri); - - if ((tret = cursor->close(cursor)) != 0) { - tret = util_cerr(cursor, "close", tret); - if (ret == 0) - ret = tret; - } - - free(p); - return (ret); -} - -/* * dump_json_table_end -- * Output the JSON syntax that ends a table. */ @@ -452,7 +229,8 @@ dump_json_table_end(WT_SESSION *session) * Dump the config for a table. */ static int -dump_table_config(WT_SESSION *session, WT_CURSOR *cursor, const char *uri) +dump_table_config( + WT_SESSION *session, WT_CURSOR *cursor, const char *uri, bool json) { WT_CONFIG_ITEM cval; WT_CURSOR *srch; @@ -479,11 +257,11 @@ dump_table_config(WT_SESSION *session, WT_CURSOR *cursor, const char *uri) */ cursor->set_key(cursor, uri); if ((ret = cursor->search(cursor)) != 0) - return (util_cerr(cursor, "search", ret)); + WT_ERR(util_cerr(cursor, "search", ret)); if ((ret = cursor->get_value(cursor, &v)) != 0) - return (util_cerr(cursor, "get_value", ret)); + WT_ERR(util_cerr(cursor, "get_value", ret)); if ((*--cfg = strdup(v)) == NULL) - return (util_err(session, errno, NULL)); + WT_ERR(util_err(session, errno, NULL)); /* * Workaround for WiredTiger "simple" table handling. Simple tables @@ -497,37 +275,36 @@ dump_table_config(WT_SESSION *session, WT_CURSOR *cursor, const char *uri) if (WT_PREFIX_MATCH(uri, "table:")) { len = strlen("colgroup:") + strlen(name) + 1; if ((p = malloc(len)) == NULL) - return (util_err(session, errno, NULL)); + WT_ERR(util_err(session, errno, NULL)); (void)snprintf(p, len, "colgroup:%s", name); cursor->set_key(cursor, p); if ((ret = cursor->search(cursor)) == 0) { if ((ret = cursor->get_value(cursor, &v)) != 0) - return (util_cerr(cursor, "get_value", ret)); + WT_ERR(util_cerr(cursor, "get_value", ret)); if ((*--cfg = strdup(v)) == NULL) - return (util_err(session, errno, NULL)); + WT_ERR(util_err(session, errno, NULL)); if ((ret =__wt_config_getones( (WT_SESSION_IMPL *)session, *cfg, "source", &cval)) != 0) - return (util_err( + WT_ERR(util_err( session, ret, "%s: source entry", p)); free(p); len = cval.len + 10; if ((p = malloc(len)) == NULL) - return (util_err(session, errno, NULL)); + WT_ERR(util_err(session, errno, NULL)); (void)snprintf(p, len, "%.*s", (int)cval.len, cval.str); cursor->set_key(cursor, p); if ((ret = cursor->search(cursor)) != 0) - return (util_cerr(cursor, "search", ret)); + WT_ERR(util_cerr(cursor, "search", ret)); if ((ret = cursor->get_value(cursor, &v)) != 0) - return (util_cerr(cursor, "get_value", ret)); + WT_ERR(util_cerr(cursor, "get_value", ret)); if ((*--cfg = strdup(v)) == NULL) - return (util_err(session, errno, NULL)); + WT_ERR(util_err(session, errno, NULL)); } else complex_table = true; } - if (print_config(session, uri, cfg) != 0) - return (1); + WT_ERR(print_config(session, uri, cfg, json, true)); if (complex_table) { /* @@ -537,21 +314,24 @@ dump_table_config(WT_SESSION *session, WT_CURSOR *cursor, const char *uri) */ if ((ret = session->open_cursor( session, "metadata:", NULL, NULL, &srch)) != 0) - return (util_cerr(cursor, "open_cursor", ret)); + WT_ERR(util_cerr(cursor, "open_cursor", ret)); if ((ret = dump_table_config_complex( - session, cursor, srch, name, "colgroup:")) == 0) + session, cursor, srch, name, "colgroup:", json)) == 0) ret = dump_table_config_complex( - session, cursor, srch, name, "index:"); + session, cursor, srch, name, "index:", json); if ((tret = srch->close(srch)) != 0) { tret = util_cerr(cursor, "close", tret); if (ret == 0) ret = tret; } - } + } else if (json && printf( + " \"colgroups\" : [],\n" + " \"indices\" : []\n") < 0) + WT_ERR(util_cerr(cursor, NULL, EIO)); - free(p); +err: free(p); free(_cfg[0]); free(_cfg[1]); free(_cfg[2]); @@ -563,17 +343,31 @@ dump_table_config(WT_SESSION *session, WT_CURSOR *cursor, const char *uri) * Dump the column groups or indices for a table. */ static int -dump_table_config_complex(WT_SESSION *session, - WT_CURSOR *cursor, WT_CURSOR *srch, const char *name, const char *entry) +dump_table_config_complex(WT_SESSION *session, WT_CURSOR *cursor, + WT_CURSOR *srch, const char *name, const char *entry, bool json) { WT_CONFIG_ITEM cval; WT_DECL_RET; - const char *key; + bool multiple; + const char *groupname, *key, *sep; size_t len; int exact; const char *v; char *p, *cfg[3] = {NULL, NULL, NULL}; + multiple = false; + sep = ""; + + if (json) { + if (strcmp(entry, "colgroup:") == 0) { + groupname = "colgroups"; + sep = ","; + } else { + groupname = "indices"; + } + if (printf(" \"%s\" : [", groupname) < 0) + return (util_err(session, EIO, NULL)); + } /* * Search the file looking for column group and index key/value pairs: * for each one, look up the related source information and append it @@ -594,7 +388,7 @@ match: if ((ret = cursor->get_key(cursor, &key)) != 0) /* Check if we've finished the list of entries. */ if (!WT_PREFIX_MATCH(key, entry)) - return (0); + break; /* * Check for a table name match. This test will match "simple" @@ -635,14 +429,19 @@ match: if ((ret = cursor->get_key(cursor, &key)) != 0) if ((cfg[0] = strdup(v)) == NULL) return (util_err(session, errno, NULL)); + if (json && printf("%s\n", multiple ? "," : "") < 0) + return (util_err(session, EIO, NULL)); /* * The dumped configuration string is the original key plus the * source's configuration, where the values of the original key * override any source configurations of the same name. */ - if (print_config(session, key, cfg) != 0) + if (print_config(session, key, cfg, json, false) != 0) return (util_err(session, EIO, NULL)); + multiple = true; } + if (json && printf("\n ]%s\n", sep) < 0) + return (util_err(session, EIO, NULL)); free(cfg[0]); free(cfg[1]); @@ -656,18 +455,24 @@ match: if ((ret = cursor->get_key(cursor, &key)) != 0) * Output the dump file header prefix. */ static int -dump_prefix(WT_SESSION *session, bool hex) +dump_prefix(WT_SESSION *session, bool hex, bool json) { int vmajor, vminor, vpatch; (void)wiredtiger_version(&vmajor, &vminor, &vpatch); - if (printf( + if (!json && (printf( "WiredTiger Dump (WiredTiger Version %d.%d.%d)\n", vmajor, vminor, vpatch) < 0 || printf("Format=%s\n", hex ? "hex" : "print") < 0 || - printf("Header\n") < 0) + printf("Header\n") < 0)) + return (util_err(session, EIO, NULL)); + else if (json && printf( + " \"%s\" : \"%d (%d.%d.%d)\",\n", + DUMP_JSON_VERSION_MARKER, DUMP_JSON_CURRENT_VERSION, + vmajor, vminor, vpatch) < 0) return (util_err(session, EIO, NULL)); + return (0); } @@ -718,10 +523,18 @@ dump_record(WT_CURSOR *cursor, bool reverse, bool json) * Output the dump file header suffix. */ static int -dump_suffix(WT_SESSION *session) +dump_suffix(WT_SESSION *session, bool json) { - if (printf("Data\n") < 0) - return (util_err(session, EIO, NULL)); + if (json) { + if (printf( + " },\n" + " {\n" + " \"data\" : [") < 0) + return (util_err(session, EIO, NULL)); + } else { + if (printf("Data\n") < 0) + return (util_err(session, EIO, NULL)); + } return (0); } @@ -739,14 +552,15 @@ dup_json_string(const char *str, char **result) nchars = 0; for (p = str; *p; p++, nchars++) - nchars += __wt_json_unpack_char(*p, NULL, 0, false); + nchars += __wt_json_unpack_char((u_char)*p, NULL, 0, false); q = malloc(nchars + 1); if (q == NULL) return (1); *result = q; left = nchars; for (p = str; *p; p++, nchars++) { - nchars = __wt_json_unpack_char(*p, (u_char *)q, left, false); + nchars = __wt_json_unpack_char((u_char)*p, (u_char *)q, left, + false); left -= nchars; q += nchars; } @@ -759,21 +573,40 @@ dup_json_string(const char *str, char **result) * Output a key/value URI pair by combining v1 and v2. */ static int -print_config(WT_SESSION *session, const char *key, char *cfg[]) +print_config( + WT_SESSION *session, const char *key, char *cfg[], bool json, bool toplevel) { WT_DECL_RET; - char *value_ret; + char *jsonconfig, *value_ret; /* * We have all of the object configuration, but don't have the default * session.create configuration. Have the underlying library add in the * defaults and collapse it all into one load configuration string. */ + jsonconfig = NULL; if ((ret = __wt_schema_create_final( (WT_SESSION_IMPL *)session, cfg, &value_ret)) != 0) return (util_err(session, ret, NULL)); - ret = printf("%s\n%s\n", key, value_ret); + if (json && (ret = dup_json_string(value_ret, &jsonconfig)) != 0) { + free(value_ret); + return (util_err(session, ret, NULL)); + } + if (json) { + if (toplevel) + ret = printf( + " \"%s\" : [\n {\n " + "\"config\" : \"%s\",\n", key, jsonconfig); + else + ret = printf( + " {\n" + " \"uri\" : \"%s\",\n" + " \"config\" : \"%s\"\n" + " }", key, jsonconfig); + } else + ret = printf("%s\n%s\n", key, value_ret); free(value_ret); + free(jsonconfig); if (ret < 0) return (util_err(session, EIO, NULL)); return (0); diff --git a/src/third_party/wiredtiger/src/utilities/util_dump.h b/src/third_party/wiredtiger/src/utilities/util_dump.h new file mode 100644 index 00000000000..e3fd8e6a501 --- /dev/null +++ b/src/third_party/wiredtiger/src/utilities/util_dump.h @@ -0,0 +1,11 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#define DUMP_JSON_VERSION_MARKER "WiredTiger Dump Version" +#define DUMP_JSON_CURRENT_VERSION 1 +#define DUMP_JSON_SUPPORTED_VERSION 1 diff --git a/src/third_party/wiredtiger/src/utilities/util_load.c b/src/third_party/wiredtiger/src/utilities/util_load.c index 696dc68630a..ac18df80851 100644 --- a/src/third_party/wiredtiger/src/utilities/util_load.c +++ b/src/third_party/wiredtiger/src/utilities/util_load.c @@ -211,6 +211,8 @@ config_list_free(CONFIG_LIST *clp) free(*entry); free(clp->list); clp->list = NULL; + clp->entry = 0; + clp->max_entry = 0; } /* @@ -366,6 +368,7 @@ config_update(WT_SESSION *session, char **list) if (WT_PREFIX_MATCH(*listp, "colgroup:") || WT_PREFIX_MATCH(*listp, "file:") || WT_PREFIX_MATCH(*listp, "index:") || + WT_PREFIX_MATCH(*listp, "lsm:") || WT_PREFIX_MATCH(*listp, "table:")) if (config_rename(session, listp, cmdname)) return (1); diff --git a/src/third_party/wiredtiger/src/utilities/util_load_json.c b/src/third_party/wiredtiger/src/utilities/util_load_json.c index 3a1f847a95f..020a4ed9ba9 100644 --- a/src/third_party/wiredtiger/src/utilities/util_load_json.c +++ b/src/third_party/wiredtiger/src/utilities/util_load_json.c @@ -7,6 +7,7 @@ */ #include "util.h" +#include "util_dump.h" #include "util_load.h" /* @@ -186,9 +187,8 @@ json_strdup(WT_SESSION *session, JSON_INPUT_STATE *ins, char **resultp) } *resultp = result; resultcpy = result; - if ((ret = __wt_json_strncpy(&resultcpy, (size_t)resultlen, src, - srclen)) - != 0) { + if ((ret = __wt_json_strncpy( + session, &resultcpy, (size_t)resultlen, src, srclen)) != 0) { ret = util_err(session, ret, NULL); goto err; } @@ -248,7 +248,7 @@ json_data(WT_SESSION *session, keyformat = cursor->key_format; isrec = strcmp(keyformat, "r") == 0; for (nkeys = 0; *keyformat; keyformat++) - if (!isdigit(*keyformat)) + if (!__wt_isdigit((u_char)*keyformat)) nkeys++; recno = 0; @@ -344,13 +344,16 @@ json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags) { CONFIG_LIST cl; WT_DECL_RET; - int toktype; static const char *json_markers[] = { "\"config\"", "\"colgroups\"", "\"indices\"", "\"data\"", NULL }; char *config, *tableuri; + int curversion, toktype; + bool hasversion; memset(&cl, 0, sizeof(cl)); tableuri = NULL; + hasversion = false; + JSON_EXPECT(session, ins, '{'); while (json_peek(session, ins) == 's') { JSON_EXPECT(session, ins, 's'); @@ -358,6 +361,24 @@ json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags) snprintf(tableuri, ins->toklen, "%.*s", (int)(ins->toklen - 2), ins->tokstart + 1); JSON_EXPECT(session, ins, ':'); + if (!hasversion) { + if (strcmp(tableuri, DUMP_JSON_VERSION_MARKER) != 0) { + ret = util_err(session, ENOTSUP, + "missing \"%s\"", DUMP_JSON_VERSION_MARKER); + goto err; + } + hasversion = true; + JSON_EXPECT(session, ins, 's'); + if ((curversion = atoi(ins->tokstart + 1)) <= 0 || + curversion > DUMP_JSON_SUPPORTED_VERSION) { + ret = util_err(session, ENOTSUP, + "unsupported JSON dump version \"%.*s\"", + (int)(ins->toklen - 1), ins->tokstart + 1); + goto err; + } + JSON_EXPECT(session, ins, ','); + continue; + } /* * Allow any ordering of 'config', 'colgroups', @@ -406,6 +427,9 @@ json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags) flags)) != 0) goto err; config_list_free(&cl); + free(ins->kvraw); + ins->kvraw = NULL; + config_list_free(&cl); break; } else @@ -447,7 +471,7 @@ json_peek(WT_SESSION *session, JSON_INPUT_STATE *ins) if (!ins->peeking) { while (!ins->ateof) { - while (isspace(*ins->p)) + while (__wt_isspace((u_char)*ins->p)) ins->p++; if (*ins->p) break; @@ -523,15 +547,14 @@ json_skip(WT_SESSION *session, JSON_INPUT_STATE *ins, const char **matches) const char *hit; const char **match; - if (ins->kvraw != NULL) - return (1); - + WT_ASSERT((WT_SESSION_IMPL *)session, ins->kvraw == NULL); hit = NULL; while (!ins->ateof) { for (match = matches; *match != NULL; match++) if ((hit = strstr(ins->p, *match)) != NULL) goto out; - if (util_read_line(session, &ins->line, true, &ins->ateof)) { + if (util_read_line(session, &ins->line, true, &ins->ateof) + != 0) { ins->toktype = -1; return (1); } diff --git a/src/third_party/wiredtiger/src/utilities/util_main.c b/src/third_party/wiredtiger/src/utilities/util_main.c index e18d8d7d1f5..2054b94e3ce 100644 --- a/src/third_party/wiredtiger/src/utilities/util_main.c +++ b/src/third_party/wiredtiger/src/utilities/util_main.c @@ -36,7 +36,6 @@ main(int argc, char *argv[]) conn = NULL; p = NULL; - secretkey = NULL; /* Get the program name. */ if ((progname = strrchr(argv[0], '/')) == NULL) diff --git a/src/third_party/wiredtiger/src/utilities/util_misc.c b/src/third_party/wiredtiger/src/utilities/util_misc.c index f45f6b339f2..3c4e8d2dfa1 100644 --- a/src/third_party/wiredtiger/src/utilities/util_misc.c +++ b/src/third_party/wiredtiger/src/utilities/util_misc.c @@ -108,7 +108,7 @@ util_str2recno(WT_SESSION *session, const char *p, uint64_t *recnop) * forth -- none of them are OK with us. Check the string starts with * digit, that turns off the special processing. */ - if (!isdigit(p[0])) + if (!__wt_isdigit((u_char)p[0])) goto format; errno = 0; diff --git a/src/third_party/wiredtiger/src/utilities/util_verify.c b/src/third_party/wiredtiger/src/utilities/util_verify.c index 2df4fa65f43..82bdd780cd3 100644 --- a/src/third_party/wiredtiger/src/utilities/util_verify.c +++ b/src/third_party/wiredtiger/src/utilities/util_verify.c @@ -16,10 +16,10 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) WT_DECL_RET; size_t size; int ch; - bool dump_address, dump_blocks, dump_pages, dump_shape; + bool dump_address, dump_blocks, dump_layout, dump_pages; char *config, *dump_offsets, *name; - dump_address = dump_blocks = dump_pages = dump_shape = false; + dump_address = dump_blocks = dump_layout = dump_pages = false; config = dump_offsets = name = NULL; while ((ch = __wt_getopt(progname, argc, argv, "d:")) != EOF) switch (ch) { @@ -28,6 +28,8 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) dump_address = true; else if (strcmp(__wt_optarg, "dump_blocks") == 0) dump_blocks = true; + else if (strcmp(__wt_optarg, "dump_layout") == 0) + dump_layout = true; else if ( WT_PREFIX_MATCH(__wt_optarg, "dump_offsets=")) { if (dump_offsets != NULL) { @@ -40,8 +42,6 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) __wt_optarg + strlen("dump_offsets="); } else if (strcmp(__wt_optarg, "dump_pages") == 0) dump_pages = true; - else if (strcmp(__wt_optarg, "dump_shape") == 0) - dump_shape = true; else return (usage()); break; @@ -60,12 +60,12 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) /* Build the configuration string as necessary. */ if (dump_address || - dump_blocks || dump_offsets != NULL || dump_pages || dump_shape) { + dump_blocks || dump_layout || dump_offsets != NULL || dump_pages) { size = strlen("dump_address,") + strlen("dump_blocks,") + + strlen("dump_layout,") + strlen("dump_pages,") + - strlen("dump_shape,") + strlen("dump_offsets[],") + (dump_offsets == NULL ? 0 : strlen(dump_offsets)) + 20; if ((config = malloc(size)) == NULL) { @@ -76,11 +76,11 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) "%s%s%s%s%s%s%s", dump_address ? "dump_address," : "", dump_blocks ? "dump_blocks," : "", + dump_layout ? "dump_layout," : "", dump_offsets != NULL ? "dump_offsets=[" : "", dump_offsets != NULL ? dump_offsets : "", dump_offsets != NULL ? "]," : "", - dump_pages ? "dump_pages," : "", - dump_shape ? "dump_shape," : ""); + dump_pages ? "dump_pages," : ""); } if ((ret = session->verify(session, name, config)) != 0) { fprintf(stderr, "%s: verify(%s): %s\n", @@ -109,7 +109,7 @@ usage(void) "usage: %s %s " "verify %s\n", progname, usage_prefix, - "[-d dump_address | dump_blocks | " - "dump_offsets=#,# | dump_pages | dump_shape] uri"); + "[-d dump_address | dump_blocks | dump_layout | " + "dump_offsets=#,# | dump_pages] uri"); return (1); } diff --git a/src/third_party/wiredtiger/test/bloom/Makefile.am b/src/third_party/wiredtiger/test/bloom/Makefile.am index 86d87c70071..81a21f59882 100644 --- a/src/third_party/wiredtiger/test/bloom/Makefile.am +++ b/src/third_party/wiredtiger/test/bloom/Makefile.am @@ -1,9 +1,12 @@ -AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/test/utility +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility noinst_PROGRAMS = t t_SOURCES = test_bloom.c -t_LDADD = $(top_builddir)/libwiredtiger.la + +t_LDADD = $(top_builddir)/test/utility/libtest_util.la +t_LDADD +=$(top_builddir)/libwiredtiger.la t_LDFLAGS = -static # Run this during a "make check" smoke test. @@ -11,4 +14,4 @@ TESTS = $(noinst_PROGRAMS) LOG_COMPILER = $(TEST_WRAPPER) clean-local: - rm -rf WiredTiger* *.core __* + rm -rf WiredTiger* *.core diff --git a/src/third_party/wiredtiger/test/bloom/test_bloom.c b/src/third_party/wiredtiger/test/bloom/test_bloom.c index f95bc7faaf9..9a7584f951f 100644 --- a/src/third_party/wiredtiger/test/bloom/test_bloom.c +++ b/src/third_party/wiredtiger/test/bloom/test_bloom.c @@ -26,7 +26,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "test_util.i" +#include "test_util.h" static struct { char *progname; /* Program name */ @@ -50,7 +50,8 @@ void cleanup(void); void populate_entries(void); void run(void); void setup(void); -void usage(void); +void usage(void) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); extern char *__wt_optarg; extern int __wt_optind; @@ -189,9 +190,7 @@ run(void) * ensure the value doesn't overlap with existing values. */ item.size = g.c_key_max + 10; - item.data = calloc(item.size, 1); - if (item.data == NULL) - testutil_die(ENOMEM, "value buffer malloc"); + item.data = dcalloc(item.size, 1); memset((void *)item.data, 'a', item.size); for (i = 0, fp = 0; i < g.c_ops; i++) { ((uint8_t *)item.data)[i % item.size] = @@ -232,14 +231,10 @@ populate_entries(void) srand(g.c_srand); - entries = calloc(g.c_ops, sizeof(uint8_t *)); - if (entries == NULL) - testutil_die(ENOMEM, "key buffer malloc"); + entries = dcalloc(g.c_ops, sizeof(uint8_t *)); for (i = 0; i < g.c_ops; i++) { - entries[i] = calloc(g.c_key_max, sizeof(uint8_t)); - if (entries[i] == NULL) - testutil_die(ENOMEM, "key buffer malloc 2"); + entries[i] = dcalloc(g.c_key_max, sizeof(uint8_t)); for (j = 0; j < g.c_key_max; j++) entries[i][j] = 'a' + ((uint8_t)rand() % 26); } diff --git a/src/third_party/wiredtiger/test/checkpoint/Makefile.am b/src/third_party/wiredtiger/test/checkpoint/Makefile.am index cf879d046bf..2b5ba800c9c 100644 --- a/src/third_party/wiredtiger/test/checkpoint/Makefile.am +++ b/src/third_party/wiredtiger/test/checkpoint/Makefile.am @@ -1,9 +1,12 @@ -AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/test/utility +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility noinst_PROGRAMS = t -t_LDADD = $(top_builddir)/libwiredtiger.la t_SOURCES = checkpointer.c workers.c test_checkpoint.c + +t_LDADD = $(top_builddir)/test/utility/libtest_util.la +t_LDADD +=$(top_builddir)/libwiredtiger.la t_LDFLAGS = -static TESTS = smoke.sh diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c index c5524b3c63e..6293d36f916 100644 --- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c +++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c @@ -32,7 +32,8 @@ GLOBAL g; static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *); static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *); -static void onint(int); +static void onint(int) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static int cleanup(void); static int usage(void); static int wt_connect(const char *); @@ -61,8 +62,7 @@ main(int argc, char *argv[]) working_dir = NULL; ttype = MIX; g.checkpoint_name = "WiredTigerCheckpoint"; - if ((g.home = malloc(512)) == NULL) - testutil_die(ENOMEM, "Unable to allocate memory"); + g.home = dmalloc(512); g.nkeys = 10000; g.nops = 100000; g.ntables = 3; diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h index 09edaeb84bc..0d0d02447d5 100644 --- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h +++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h @@ -26,19 +26,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <sys/types.h> -#include <sys/time.h> +#include "test_util.h" -#include <errno.h> -#include <inttypes.h> -#include <pthread.h> #include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#include "test_util.i" #define URI_BASE "table:__wt" /* File name */ diff --git a/src/third_party/wiredtiger/test/csuite/Makefile.am b/src/third_party/wiredtiger/test/csuite/Makefile.am new file mode 100644 index 00000000000..f842bc1316f --- /dev/null +++ b/src/third_party/wiredtiger/test/csuite/Makefile.am @@ -0,0 +1,27 @@ +AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \ + -I$(top_srcdir)/test/utility +LDADD = $(top_builddir)/test/utility/libtest_util.la \ + $(top_builddir)/libwiredtiger.la +AM_LDFLAGS = -static + +test_wt1965_col_efficiency_SOURCES = wt1965_col_efficiency/main.c +noinst_PROGRAMS = test_wt1965_col_efficiency + +test_wt2246_col_append_SOURCES = wt2246_col_append/main.c +noinst_PROGRAMS += test_wt2246_col_append + +test_wt2535_insert_race_SOURCES = wt2535_insert_race/main.c +noinst_PROGRAMS += test_wt2535_insert_race + +test_wt2447_join_main_table_SOURCES = wt2447_join_main_table/main.c +noinst_PROGRAMS += test_wt2447_join_main_table + +test_wt2592_join_schema_SOURCES = wt2592_join_schema/main.c +noinst_PROGRAMS += test_wt2592_join_schema + +# Run this during a "make check" smoke test. +TESTS = $(noinst_PROGRAMS) +LOG_COMPILER = $(TEST_WRAPPER) + +clean-local: + rm -rf WT_TEST.* *.core diff --git a/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c b/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c new file mode 100644 index 00000000000..2882ce9cdf5 --- /dev/null +++ b/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c @@ -0,0 +1,186 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +/* + * JIRA ticket reference: WT-1965 + * Test case description: The reported issue was that column store tables + * exhibit high CPU usage when populated with sparse record IDs. + * Failure mode: It isn't simple to make this test case failure explicit since + * it is demonstrating an inefficiency rather than a correctness bug. + */ + +void (*custom_die)(void) = NULL; + +/* If changing field count also need to change set_value and get_value calls */ +#define NR_FIELDS 8 +#define NR_OBJECTS 100 +#define NR_THREADS 4 + +static uint64_t g_ts = 0; + +/* + * Each thread inserts a set of keys into the record store database. The keys + * are generated in such a way that there are large gaps in the key range. + */ +static void * +thread_func(void *arg) +{ + TEST_OPTS *opts; + WT_CURSOR *cursor, *idx_cursor; + WT_SESSION *session; + uint64_t i, ins_rotor, ins_thr_idx, thr_idx, ts; + uint64_t *obj_data; + + opts = (TEST_OPTS *)arg; + thr_idx = __wt_atomic_fetch_addv64(&opts->next_threadid, 1); + ts = g_ts; + obj_data = dcalloc( + (NR_OBJECTS/NR_THREADS + 1) * NR_FIELDS, sizeof(*obj_data)); + + testutil_check(opts->conn->open_session( + opts->conn, NULL, NULL, &session)); + + testutil_check(session->open_cursor( + session, opts->uri, NULL, NULL, &cursor)); + testutil_check(session->open_cursor( + session, "table:index", NULL, NULL, &idx_cursor)); + + for (ins_rotor = 1; ins_rotor < 10; ++ins_rotor) { + for (ins_thr_idx = thr_idx, i = 0; ins_thr_idx < NR_OBJECTS; + ins_thr_idx += NR_THREADS, i += NR_FIELDS) { + + testutil_check( + session->begin_transaction(session, "sync=false")); + + cursor->set_key(cursor, ins_thr_idx << 40 | ins_rotor); + cursor->set_value(cursor, ts, + obj_data[i+0], obj_data[i+1], obj_data[i+2], + obj_data[i+3], obj_data[i+4], obj_data[i+5], + obj_data[i+6], obj_data[i+7]); + testutil_check(cursor->insert(cursor)); + + idx_cursor->set_key( + idx_cursor, ins_thr_idx << 40 | ts); + idx_cursor->set_value(idx_cursor, ins_rotor); + testutil_check(idx_cursor->insert(idx_cursor)); + + testutil_check( + session->commit_transaction(session, NULL)); + + /* change object fields */ + ++obj_data[i + ((ins_thr_idx + ins_rotor) % NR_FIELDS)]; + ++obj_data[i + + ((ins_thr_idx + ins_rotor + 1) % NR_FIELDS)]; + + ++g_ts; + /* 5K updates/sec */ + (void)usleep(1000000ULL * NR_THREADS / 5000); + } + } + + testutil_check(session->close(session, NULL)); + free(obj_data); + return (NULL); +} + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + WT_CURSOR *cursor; + WT_SESSION *session; + pthread_t thr[NR_THREADS]; + size_t t; + uint64_t f[NR_FIELDS], r, ts; + int i, ret; + char table_format[256]; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + testutil_check(wiredtiger_open(opts->home, NULL, + "create,cache_size=1G,checkpoint=(wait=30)," + "eviction_trigger=80,eviction_target=64,eviction_dirty_target=65," + "log=(enabled,file_max=10M)," + "transaction_sync=(enabled=true,method=none)", &opts->conn)); + testutil_check(opts->conn->open_session( + opts->conn, NULL, NULL, &session)); + + sprintf(table_format, "key_format=r,value_format="); + for (i = 0; i < NR_FIELDS; i++) + strcat(table_format, "Q"); + + /* recno -> timestamp + NR_FIELDS * Q */ + testutil_check(session->create( + session, opts->uri, table_format)); + /* timestamp -> recno */ + testutil_check(session->create(session, + "table:index", "key_format=Q,value_format=Q")); + + testutil_check(session->close(session, NULL)); + + for (t = 0; t < NR_THREADS; ++t) + testutil_check(pthread_create( + &thr[t], NULL, thread_func, (void *)opts)); + + for (t = 0; t < NR_THREADS; ++t) + (void)pthread_join(thr[t], NULL); + + testutil_check(opts->conn->open_session( + opts->conn, NULL, NULL, &session)); + + /* recno -> timestamp + NR_FIELDS * Q */ + testutil_check(session->create(session, opts->uri, table_format)); + + testutil_check(session->open_cursor( + session, opts->uri, NULL, NULL, &cursor)); + + while ((ret = cursor->next(cursor)) == 0) { + testutil_check(cursor->get_key(cursor, &r)); + testutil_check(cursor->get_value(cursor, &ts, + &f[0], &f[1], &f[2], &f[3], &f[4], &f[5], &f[6], &f[7])); + + if (!opts->verbose) + continue; + + printf("(%" PRIu64 ",%llu)\t\t%" PRIu64, + (r >> 40), r & ((1ULL << 40) - 1), ts); + + for (i = 0; i < NR_FIELDS; i++) + printf("\t%" PRIu64, f[i]); + printf("\n"); + } + testutil_assert(ret == WT_NOTFOUND); + + testutil_cleanup(opts); + + return (0); +} diff --git a/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c b/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c new file mode 100644 index 00000000000..798970cbb6d --- /dev/null +++ b/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c @@ -0,0 +1,158 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +/* + * JIRA ticket reference: WT-2246 + * Test case description: The column-store search routine used to search the + * target leaf page even when the cursor is configured with append and we're + * allocating a record number. That was inefficient, this test case + * demonstrates the inefficiency. + * Failure mode: It isn't simple to make this test case failure explicit since + * it is demonstrating an inefficiency rather than a correctness bug. + */ + +/* Don't move into shared function there is a cross platform solution */ +#include <signal.h> + +#define MILLION 1000000 + +void (*custom_die)(void) = NULL; + +/* Needs to be global for signal handling. */ +static TEST_OPTS *opts, _opts; + +static void +page_init(uint64_t n) +{ + WT_CONNECTION *conn; + WT_CURSOR *cursor; + WT_SESSION *session; + uint64_t recno, vrecno; + char buf[64]; + + conn = opts->conn; + + testutil_check(conn->open_session(conn, NULL, NULL, &session)); + testutil_check( + session->open_cursor(session, opts->uri, NULL, "append", &cursor)); + + vrecno = 0; + buf[0] = '\2'; + for (recno = 1;; ++recno) { + if (opts->table_type == TABLE_FIX) + cursor->set_value(cursor, buf[0]); + else { + if (recno % 3 == 0) + ++vrecno; + snprintf(buf, + sizeof(buf), "%" PRIu64 " VALUE ------", vrecno); + cursor->set_value(cursor, buf); + } + testutil_check(cursor->insert(cursor)); + testutil_check(cursor->get_key(cursor, &opts->max_inserted_id)); + if (opts->max_inserted_id >= n) + break; + } +} + +/* + * TODO: Platform specific? + */ +static void +onsig(int signo) +{ + WT_UNUSED(signo); + opts->running = false; +} + +#define N_APPEND_THREADS 6 +#define N_RECORDS (20 * WT_MILLION) + +int +main(int argc, char *argv[]) +{ + WT_SESSION *session; + clock_t ce, cs; + pthread_t idlist[100]; + uint64_t i, id; + char buf[100]; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + opts->table_type = TABLE_ROW; + opts->n_append_threads = N_APPEND_THREADS; + opts->nrecords = N_RECORDS; + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + snprintf(buf, sizeof(buf), + "create," + "cache_size=%s," + "eviction=(threads_max=5)," + "statistics=(fast)", + opts->table_type == TABLE_FIX ? "500MB" : "2GB"); + testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + snprintf(buf, sizeof(buf), + "key_format=r,value_format=%s," + "allocation_size=4K,leaf_page_max=64K", + opts->table_type == TABLE_FIX ? "8t" : "S"); + testutil_check(session->create(session, opts->uri, buf)); + testutil_check(session->close(session, NULL)); + + page_init(5000); + + /* Force to disk and re-open. */ + testutil_check(opts->conn->close(opts->conn, NULL)); + testutil_check(wiredtiger_open(opts->home, NULL, NULL, &opts->conn)); + + (void)signal(SIGINT, onsig); + + cs = clock(); + id = 0; + for (i = 0; i < opts->n_append_threads; ++i, ++id) { + printf("append: %" PRIu64 "\n", id); + testutil_check(pthread_create( + &idlist[id], NULL, thread_append, (void *)opts)); + } + + for (i = 0; i < id; ++i) + testutil_check(pthread_join(idlist[i], NULL)); + + ce = clock(); + printf("%" PRIu64 "M records: %.2lf processor seconds\n", + opts->max_inserted_id / MILLION, + (ce - cs) / (double)CLOCKS_PER_SEC); + + testutil_cleanup(opts); + /* NOTREACHED */ + + return (0); +} diff --git a/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c b/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c new file mode 100644 index 00000000000..a6f19cb0858 --- /dev/null +++ b/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c @@ -0,0 +1,189 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +/* + * JIRA ticket reference: WT-2447 + * + * Test case description: This test case is adapted from the submitted test + * program in the JIRA ticket. We create a database of 10,000 entries, with + * every key i having pair of values (i, i). Create indices on both values, + * and establish a join: table.v1 >= 5000 AND table.v2 < 5001. There's a + * Bloom filter on v2. We expect that although we iterate from 5000 to + * 10000, we'll only have accesses to the main table for key 5000, as + * 5001-10000 will generally not be in the Bloom filter. For key 5000, + * we technically have two accesses to the main table - one occurs when we + * see key 5000 is in the Bloom filter, and we need to do a full test, we + * make an access to the projection table:tablename(v2), that's just to get + * the value of v2, which we'll check by comparison to the cursor at 5001. + * That counts as a main table access, and when we see it is satisfied and + * return the complete set of values, we'll access the main table with the + * full projection (that's the second main table access). + * + * Failure mode: Before fixes of WT-2447, we saw lots of accesses to the main + * table. + */ + +void (*custom_die)(void) = NULL; + +#define N_RECORDS 10000 + +static void +get_stat_total(WT_SESSION *session, WT_CURSOR *jcursor, const char *descmatch, + uint64_t *pval) +{ + WT_CURSOR *statcursor; + uint64_t val; + int ret; + bool match; + char *desc, *valstr; + + match = false; + *pval = 0; + testutil_check(session->open_cursor(session, "statistics:join", jcursor, + NULL, &statcursor)); + + while ((ret = statcursor->next(statcursor)) == 0) { + testutil_assert(statcursor->get_value( + statcursor, &desc, &valstr, &val) == 0); + + printf("statistics: %s: %s: %" PRIu64 "\n", desc, valstr, val); + + if (strstr(desc, descmatch) != NULL) { + *pval += val; + match = true; + } + } + testutil_assert(ret == WT_NOTFOUND); + testutil_check(statcursor->close(statcursor)); + testutil_assert(match); +} + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + WT_CURSOR *cursor1, *cursor2, *jcursor; + WT_ITEM d; + WT_SESSION *session; + uint64_t maincount; + int half, i, j; + const char *tablename; + char bloom_cfg[128], index1uri[256], index2uri[256], joinuri[256]; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + tablename = strchr(opts->uri, ':'); + testutil_assert(tablename != NULL); + tablename++; + snprintf(index1uri, sizeof(index1uri), "index:%s:index1", tablename); + snprintf(index2uri, sizeof(index2uri), "index:%s:index2", tablename); + snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri); + + testutil_check(wiredtiger_open(opts->home, NULL, + "statistics=(all),create", &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + + testutil_check(session->create(session, opts->uri, + "key_format=i,value_format=iiu,columns=(k,v1,v2,d)")); + testutil_check(session->create(session, index1uri, "columns=(v1)")); + testutil_check(session->create(session, index2uri, "columns=(v2)")); + + testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, + &cursor1)); + + d.size = 4100; + d.data = dmalloc(d.size); + memset((char *)d.data, 7, d.size); + + for (i = 0; i < N_RECORDS; ++i) + { + cursor1->set_key(cursor1, i); + cursor1->set_value(cursor1, i, i, &d); + testutil_check(cursor1->insert(cursor1)); + } + + free((void*)d.data); + + testutil_check(opts->conn->close(opts->conn, NULL)); + testutil_check(wiredtiger_open(opts->home, NULL, + "statistics=(all),create,cache_size=1GB", &opts->conn)); + testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, + &session)); + + testutil_check(session->open_cursor(session, index1uri, NULL, NULL, + &cursor1)); + testutil_check(session->open_cursor(session, index2uri, NULL, NULL, + &cursor2)); + + half = N_RECORDS / 2; + cursor1->set_key(cursor1, half); + testutil_check(cursor1->search(cursor1)); + + cursor2->set_key(cursor2, half + 1); + testutil_check(cursor2->search(cursor2)); + + sprintf(bloom_cfg, "compare=lt,strategy=bloom,count=%d", half); + + testutil_check(session->open_cursor(session, joinuri, NULL, NULL, + &jcursor)); + testutil_check(session->join(session, jcursor, cursor1, "compare=ge")); + testutil_check(session->join(session, jcursor, cursor2, bloom_cfg)); + + /* Expect one value returned */ + testutil_assert(jcursor->next(jcursor) == 0); + i = 0; + testutil_assert(jcursor->get_key(jcursor, &i) == 0); + testutil_assert(i == (int)half); + i = j = 0; + memset(&d, 0, sizeof(d)); + testutil_assert(jcursor->get_value(jcursor, &i, &j, &d) == 0); + testutil_assert(i == (int)half); + testutil_assert(j == (int)half); + testutil_assert(d.size == 4100); + for (i = 0; i < 4100; i++) + testutil_assert(((char *)d.data)[i] == 7); + + testutil_assert(jcursor->next(jcursor) == WT_NOTFOUND); + + /* + * Make sure there have been 2 accesses to the main table, + * explained in the discussion above. + */ + get_stat_total(session, jcursor, "accesses to the main table", + &maincount); + testutil_assert(maincount == 2); + + testutil_cleanup(opts); + + return (0); +} diff --git a/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c b/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c new file mode 100644 index 00000000000..5eaca3279b6 --- /dev/null +++ b/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c @@ -0,0 +1,159 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +/* + * JIRA ticket reference: WT-2535 + * Test case description: This is a test case that looks for lost updates to + * a single record. That is multiple threads each do the same number of read + * modify write operations on a single record. At the end verify that the + * data contains the expected value. + * Failure mode: Check that the data is correct at the end of the run. + */ + +void (*custom_die)(void) = NULL; + +void *thread_insert_race(void *); + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + WT_CURSOR *c; + WT_SESSION *session; + clock_t ce, cs; + pthread_t id[100]; + uint64_t current_value; + int i; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + opts->nthreads = 10; + opts->nrecords = 1000; + opts->table_type = TABLE_ROW; + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + testutil_check(wiredtiger_open(opts->home, NULL, + "create," + "cache_size=2G," + "eviction=(threads_max=5)," + "statistics=(fast)", &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + testutil_check(session->create(session, opts->uri, + "key_format=Q,value_format=Q," + "leaf_page_max=32k,")); + + /* Create the single record. */ + testutil_check( + session->open_cursor(session, opts->uri, NULL, NULL, &c)); + c->set_key(c, 1); + c->set_value(c, 0); + testutil_check(c->insert(c)); + testutil_check(c->close(c)); + cs = clock(); + for (i = 0; i < (int)opts->nthreads; ++i) { + testutil_check(pthread_create( + &id[i], NULL, thread_insert_race, (void *)opts)); + } + while (--i >= 0) + testutil_check(pthread_join(id[i], NULL)); + testutil_check( + session->open_cursor(session, opts->uri, NULL, NULL, &c)); + c->set_key(c, 1); + testutil_check(c->search(c)); + testutil_check(c->get_value(c, ¤t_value)); + if (current_value != opts->nthreads * opts->nrecords) { + fprintf(stderr, + "ERROR: didn't get expected number of changes\n"); + fprintf(stderr, "got: %" PRIu64 ", expected: %" PRIu64 "\n", + current_value, opts->nthreads * opts->nrecords); + return (EXIT_FAILURE); + } + testutil_check(session->close(session, NULL)); + ce = clock(); + printf("%" PRIu64 ": %.2lf\n", + opts->nrecords, (ce - cs) / (double)CLOCKS_PER_SEC); + + testutil_cleanup(opts); + return (EXIT_SUCCESS); +} + +/* + * Append to a table in a "racy" fashion - that is attempt to insert the + * same record another thread is likely to also be inserting. + */ +void * +thread_insert_race(void *arg) +{ + TEST_OPTS *opts; + WT_CONNECTION *conn; + WT_CURSOR *cursor; + WT_SESSION *session; + uint64_t i, value; + int ret; + + opts = (TEST_OPTS *)arg; + conn = opts->conn; + + testutil_check(conn->open_session(conn, NULL, NULL, &session)); + testutil_check(session->open_cursor( + session, opts->uri, NULL, NULL, &cursor)); + + printf("Running insert thread\n"); + for (i = 0; i < opts->nrecords; ++i) { + testutil_check( + session->begin_transaction(session, "isolation=snapshot")); + cursor->set_key(cursor, 1); + testutil_check(cursor->search(cursor)); + testutil_check(cursor->get_value(cursor, &value)); + cursor->set_key(cursor, 1); + cursor->set_value(cursor, value + 1); + if ((ret = cursor->update(cursor)) != 0) { + if (ret == WT_ROLLBACK) { + testutil_check(session->rollback_transaction( + session, NULL)); + i--; + continue; + } + printf("Error in update: %d\n", ret); + } + testutil_check(session->commit_transaction(session, NULL)); + if (i % 10000 == 0) { + printf("insert: %" PRIu64 "\r", i); + fflush(stdout); + } + } + if (i > 10000) + printf("\n"); + + opts->running = false; + + return (NULL); +} diff --git a/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c b/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c new file mode 100644 index 00000000000..4ffc9194646 --- /dev/null +++ b/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c @@ -0,0 +1,222 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +/* + * JIRA ticket reference: WT-2592 + * Test case description: This is an adaptation of the join parts of + * ex_schema.c, but written as a test. Though we have join tests in the + * Python test suite, the Python API uses raw mode for cursors, so errors + * that are specific to non-raw mode are undetected in Python. + * Failure mode: The failure seen in WT-2592 was that no items were returned + * by a join. + */ +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <wiredtiger.h> + +/* The C struct for the data we are storing in a WiredTiger table. */ +typedef struct { + char country[5]; + uint16_t year; + uint64_t population; +} POP_RECORD; + +static POP_RECORD pop_data[] = { + { "AU", 1900, 4000000 }, + { "AU", 1950, 8267337 }, + { "AU", 2000, 19053186 }, + { "CAN", 1900, 5500000 }, + { "CAN", 1950, 14011422 }, + { "CAN", 2000, 31099561 }, + { "UK", 1900, 369000000 }, + { "UK", 1950, 50127000 }, + { "UK", 2000, 59522468 }, + { "USA", 1900, 76212168 }, + { "USA", 1950, 150697361 }, + { "USA", 2000, 301279593 }, + { "", 0, 0 } +}; + +void (*custom_die)(void) = NULL; + +int +main(int argc, char *argv[]) +{ + POP_RECORD *p; + TEST_OPTS *opts, _opts; + WT_CURSOR *country_cursor, *country_cursor2, *cursor, *join_cursor, + *subjoin_cursor, *year_cursor; + WT_SESSION *session; + const char *country, *tablename; + char countryuri[256], joinuri[256], yearuri[256]; + uint64_t recno, population; + uint16_t year; + int count, ret; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + tablename = strchr(opts->uri, ':'); + testutil_assert(tablename != NULL); + tablename++; + snprintf(countryuri, sizeof(countryuri), "index:%s:country", tablename); + snprintf(yearuri, sizeof(yearuri), "index:%s:year", tablename); + snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri); + + testutil_check(wiredtiger_open(opts->home, NULL, + "create,cache_size=200M", &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + testutil_check(session->create(session, opts->uri, + "key_format=r," + "value_format=5sHQ," + "columns=(id,country,year,population)")); + + /* Create an index with a simple key. */ + testutil_check(session->create(session, + countryuri, "columns=(country)")); + + /* Create an immutable index. */ + testutil_check(session->create(session, + yearuri, "columns=(year),immutable")); + + /* Insert the records into the table. */ + testutil_check(session->open_cursor( + session, opts->uri, NULL, "append", &cursor)); + count = 1; + for (p = pop_data; p->year != 0; p++) { + cursor->set_key(cursor, count); + cursor->set_value(cursor, p->country, p->year, p->population); + testutil_check(cursor->insert(cursor)); + count++; + } + testutil_check(cursor->close(cursor)); + + /* Open cursors needed by the join. */ + testutil_check(session->open_cursor(session, + joinuri, NULL, NULL, &join_cursor)); + testutil_check(session->open_cursor(session, + countryuri, NULL, NULL, &country_cursor)); + testutil_check(session->open_cursor(session, + yearuri, NULL, NULL, &year_cursor)); + + /* select values WHERE country == "AU" AND year > 1900 */ + country_cursor->set_key(country_cursor, "AU\0\0\0"); + testutil_check(country_cursor->search(country_cursor)); + testutil_check(session->join(session, join_cursor, country_cursor, + "compare=eq,count=10")); + year_cursor->set_key(year_cursor, (uint16_t)1900); + testutil_check(year_cursor->search(year_cursor)); + testutil_check(session->join(session, join_cursor, year_cursor, + "compare=gt,count=10,strategy=bloom")); + + count = 0; + /* List the values that are joined */ + while ((ret = join_cursor->next(join_cursor)) == 0) { + testutil_check(join_cursor->get_key(join_cursor, &recno)); + testutil_check(join_cursor->get_value(join_cursor, &country, + &year, &population)); + printf("ID %" PRIu64, recno); + printf( + ": country %s, year %" PRIu16 ", population %" PRIu64 "\n", + country, year, population); + count++; + } + testutil_assert(ret == WT_NOTFOUND); + testutil_assert(count == 2); + + testutil_check(join_cursor->close(join_cursor)); + testutil_check(year_cursor->close(year_cursor)); + testutil_check(country_cursor->close(country_cursor)); + + /* Open cursors needed by the join. */ + testutil_check(session->open_cursor(session, + joinuri, NULL, NULL, &join_cursor)); + testutil_check(session->open_cursor(session, + joinuri, NULL, NULL, &subjoin_cursor)); + testutil_check(session->open_cursor(session, + countryuri, NULL, NULL, &country_cursor)); + testutil_check(session->open_cursor(session, + countryuri, NULL, NULL, &country_cursor2)); + testutil_check(session->open_cursor(session, + yearuri, NULL, NULL, &year_cursor)); + + /* + * select values WHERE (country == "AU" OR country == "UK") + * AND year > 1900 + * + * First, set up the join representing the country clause. + */ + country_cursor->set_key(country_cursor, "AU\0\0\0"); + testutil_check(country_cursor->search(country_cursor)); + testutil_check(session->join(session, subjoin_cursor, country_cursor, + "operation=or,compare=eq,count=10")); + country_cursor2->set_key(country_cursor2, "UK\0\0\0"); + testutil_check(country_cursor2->search(country_cursor2)); + testutil_check(session->join(session, subjoin_cursor, country_cursor2, + "operation=or,compare=eq,count=10")); + + /* Join that to the top join, and add the year clause */ + testutil_check(session->join(session, join_cursor, subjoin_cursor, + NULL)); + year_cursor->set_key(year_cursor, (uint16_t)1900); + testutil_check(year_cursor->search(year_cursor)); + testutil_check(session->join(session, join_cursor, year_cursor, + "compare=gt,count=10,strategy=bloom")); + + count = 0; + /* List the values that are joined */ + while ((ret = join_cursor->next(join_cursor)) == 0) { + testutil_check(join_cursor->get_key(join_cursor, &recno)); + testutil_check(join_cursor->get_value(join_cursor, &country, + &year, &population)); + printf("ID %" PRIu64, recno); + printf( + ": country %s, year %" PRIu16 ", population %" PRIu64 "\n", + country, year, population); + count++; + } + testutil_assert(ret == WT_NOTFOUND); + testutil_assert(count == 4); + + testutil_check(join_cursor->close(join_cursor)); + testutil_check(subjoin_cursor->close(subjoin_cursor)); + testutil_check(country_cursor->close(country_cursor)); + testutil_check(country_cursor2->close(country_cursor2)); + testutil_check(year_cursor->close(year_cursor)); + testutil_check(session->close(session, NULL)); + + testutil_cleanup(opts); + return (EXIT_SUCCESS); +} diff --git a/src/third_party/wiredtiger/test/cursor_order/Makefile.am b/src/third_party/wiredtiger/test/cursor_order/Makefile.am index c0c0ed639bf..c98cf1fa047 100644 --- a/src/third_party/wiredtiger/test/cursor_order/Makefile.am +++ b/src/third_party/wiredtiger/test/cursor_order/Makefile.am @@ -1,13 +1,15 @@ -AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/test/utility +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility noinst_PROGRAMS = cursor_order -cursor_order_LDADD = $(top_builddir)/libwiredtiger.la - cursor_order_SOURCES = cursor_order_file.c cursor_order_ops.c cursor_order.c + +cursor_order_LDADD = $(top_builddir)/test/utility/libtest_util.la +cursor_order_LDADD +=$(top_builddir)/libwiredtiger.la cursor_order_LDFLAGS = -static TESTS = $(noinst_PROGRAMS) clean-local: - rm -rf WiredTiger* wt.* *.core __stats + rm -rf WT_TEST *.core diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order.c index d8cfc0c1421..aa351e6fea8 100644 --- a/src/third_party/wiredtiger/test/cursor_order/cursor_order.c +++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order.c @@ -34,7 +34,8 @@ static FILE *logfp; /* Log file */ static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *); static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *); -static void onint(int); +static void onint(int) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void shutdown(void); static int usage(void); static void wt_connect(SHARED_CONFIG *, char *); diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order.h b/src/third_party/wiredtiger/test/cursor_order/cursor_order.h index dd49fce124b..98a7d03c6f3 100644 --- a/src/third_party/wiredtiger/test/cursor_order/cursor_order.h +++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order.h @@ -28,7 +28,7 @@ #include <signal.h> -#include "test_util.i" +#include "test_util.h" #define FNAME "file:cursor_order.%03d" /* File name */ diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c index d44505ab2f3..a2185dd123f 100644 --- a/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c +++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c @@ -59,22 +59,16 @@ ops_start(SHARED_CONFIG *cfg) total_nops = 0; /* Create per-thread structures. */ - if ((run_info = calloc( - (size_t)(cfg->reverse_scanners + cfg->append_inserters), - sizeof(*run_info))) == NULL) - testutil_die(errno, "calloc"); - - if ((tids = calloc( - (size_t)(cfg->reverse_scanners + cfg->append_inserters), - sizeof(*tids))) == NULL) - testutil_die(errno, "calloc"); + run_info = dcalloc((size_t) + (cfg->reverse_scanners + cfg->append_inserters), sizeof(*run_info)); + tids = dcalloc((size_t) + (cfg->reverse_scanners + cfg->append_inserters), sizeof(*tids)); /* Create the files and load the initial records. */ for (i = 0; i < cfg->append_inserters; ++i) { run_info[i].cfg = cfg; if (i == 0 || cfg->multiple_files) { - if ((run_info[i].name = malloc(64)) == NULL) - testutil_die(errno, "malloc"); + run_info[i].name = dmalloc(64); snprintf(run_info[i].name, 64, FNAME, (int)i); /* Vary by orders of magnitude */ @@ -96,8 +90,7 @@ ops_start(SHARED_CONFIG *cfg) offset = i + cfg->append_inserters; run_info[offset].cfg = cfg; if (cfg->multiple_files) { - if ((run_info[offset].name = malloc(64)) == NULL) - testutil_die(errno, "malloc"); + run_info[offset].name = dmalloc(64); /* Have reverse scans read from tables with writes. */ name_index = i % cfg->append_inserters; snprintf( diff --git a/src/third_party/wiredtiger/test/fops/Makefile.am b/src/third_party/wiredtiger/test/fops/Makefile.am index a4fa7175f1b..f8a76de82bc 100644 --- a/src/third_party/wiredtiger/test/fops/Makefile.am +++ b/src/third_party/wiredtiger/test/fops/Makefile.am @@ -1,10 +1,13 @@ -AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/test/utility +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility noinst_PROGRAMS = t -t_LDADD = $(top_builddir)/libwiredtiger.la t_SOURCES = thread.h file.c fops.c t.c + +t_LDADD = $(top_builddir)/test/utility/libtest_util.la +t_LDADD +=$(top_builddir)/libwiredtiger.la t_LDFLAGS = -static # Run this during a "make check" smoke test. diff --git a/src/third_party/wiredtiger/test/fops/fops.c b/src/third_party/wiredtiger/test/fops/fops.c index 3333ff16858..3c4de161423 100644 --- a/src/third_party/wiredtiger/test/fops/fops.c +++ b/src/third_party/wiredtiger/test/fops/fops.c @@ -59,10 +59,8 @@ fop_start(u_int nthreads) tids = NULL; /* Silence GCC 4.1 warning. */ /* Create statistics and thread structures. */ - if ((run_stats = calloc( - (size_t)(nthreads), sizeof(*run_stats))) == NULL || - (tids = calloc((size_t)(nthreads), sizeof(*tids))) == NULL) - testutil_die(errno, "calloc"); + run_stats = dcalloc((size_t)(nthreads), sizeof(*run_stats)); + tids = dcalloc((size_t)(nthreads), sizeof(*tids)); (void)gettimeofday(&start, NULL); diff --git a/src/third_party/wiredtiger/test/fops/t.c b/src/third_party/wiredtiger/test/fops/t.c index 24994404c7c..bf0588d5a53 100644 --- a/src/third_party/wiredtiger/test/fops/t.c +++ b/src/third_party/wiredtiger/test/fops/t.c @@ -41,7 +41,8 @@ static char home[512]; static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *); static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *); -static void onint(int); +static void onint(int) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void shutdown(void); static int usage(void); static void wt_startup(char *); diff --git a/src/third_party/wiredtiger/test/fops/thread.h b/src/third_party/wiredtiger/test/fops/thread.h index f9707c14590..89b7984a166 100644 --- a/src/third_party/wiredtiger/test/fops/thread.h +++ b/src/third_party/wiredtiger/test/fops/thread.h @@ -26,25 +26,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <sys/types.h> -#ifndef _WIN32 -#include <sys/time.h> -#endif +#include "test_util.h" -#include <errno.h> -#include <inttypes.h> -#ifndef _WIN32 -#include <pthread.h> -#endif #include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#ifndef _WIN32 -#include <unistd.h> -#endif - -#include "test_util.i" extern WT_CONNECTION *conn; /* WiredTiger connection */ diff --git a/src/third_party/wiredtiger/test/format/Makefile.am b/src/third_party/wiredtiger/test/format/Makefile.am index 8a2e2b49e4b..5d946e5b63d 100644 --- a/src/third_party/wiredtiger/test/format/Makefile.am +++ b/src/third_party/wiredtiger/test/format/Makefile.am @@ -1,21 +1,24 @@ -AM_CPPFLAGS = -I$(top_builddir) \ - -I$(top_srcdir)/src/include -I$(top_srcdir)/test/utility +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility if HAVE_BERKELEY_DB -AM_CPPFLAGS += -DHAVE_BERKELEY_DB \ - -DBERKELEY_DB_PATH=\"$(BERKELEY_DB_PATH)\" -I$(BERKELEY_DB_PATH)/include +AM_CPPFLAGS +=-DHAVE_BERKELEY_DB +AM_CPPFLAGS +=-DBERKELEY_DB_PATH=\"$(BERKELEY_DB_PATH)\" +AM_CPPFLAGS +=-I$(BERKELEY_DB_PATH)/include endif noinst_PROGRAMS = t noinst_SCRIPTS = s_dumpcmp t_SOURCES =\ - config.h format.h backup.c bulk.c compact.c config.c lrt.c ops.c \ - rebalance.c salvage.c t.c util.c wts.c + backup.c bulk.c compact.c config.c lrt.c ops.c rebalance.c \ + salvage.c t.c util.c wts.c if HAVE_BERKELEY_DB t_SOURCES += bdb.c endif -t_LDADD = $(top_builddir)/libwiredtiger.la +t_LDADD = $(top_builddir)/test/utility/libtest_util.la +t_LDADD +=$(top_builddir)/libwiredtiger.la if HAVE_BERKELEY_DB t_LDADD += -L$(BERKELEY_DB_PATH)/lib -ldb endif diff --git a/src/third_party/wiredtiger/test/format/backup.c b/src/third_party/wiredtiger/test/format/backup.c index 2b1463bd0e3..69fdf771de9 100644 --- a/src/third_party/wiredtiger/test/format/backup.c +++ b/src/third_party/wiredtiger/test/format/backup.c @@ -38,7 +38,7 @@ check_copy(void) WT_CONNECTION *conn; WT_SESSION *session; - wts_open(g.home_backup, 0, &conn); + wts_open(g.home_backup, false, &conn); testutil_checkfmt( conn->open_session(conn, NULL, NULL, &session), @@ -53,27 +53,30 @@ check_copy(void) /* * copy_file -- - * Copy a single file into the backup directory. + * Copy a single file into the backup directories. */ static void -copy_file(const char *name) +copy_file(WT_SESSION *session, const char *name) { size_t len; - char *cmd; - - len = strlen(g.home) + strlen(g.home_backup) + strlen(name) * 2 + 20; - cmd = dmalloc(len); - (void)snprintf(cmd, len, - "cp %s/%s %s/%s", g.home, name, g.home_backup, name); - testutil_checkfmt(system(cmd), "backup copy: %s", cmd); - free(cmd); - - len = strlen(g.home) + strlen(g.home_backup2) + strlen(name) * 2 + 20; - cmd = dmalloc(len); - (void)snprintf(cmd, len, - "cp %s/%s %s/%s", g.home, name, g.home_backup2, name); - testutil_checkfmt(system(cmd), "backup copy: %s", cmd); - free(cmd); + char *first, *second; + + len = strlen("BACKUP") + strlen(name) + 10; + first = dmalloc(len); + (void)snprintf(first, len, "BACKUP/%s", name); + testutil_check(__wt_copy_and_sync(session, name, first)); + + /* + * Save another copy of the original file to make debugging recovery + * errors easier. + */ + len = strlen("BACKUP_COPY") + strlen(name) + 10; + second = dmalloc(len); + (void)snprintf(second, len, "BACKUP_COPY/%s", name); + testutil_check(__wt_copy_and_sync(session, first, second)); + + free(first); + free(second); } /* @@ -85,10 +88,11 @@ backup(void *arg) { WT_CONNECTION *conn; WT_CURSOR *backup_cursor; + WT_DECL_RET; WT_SESSION *session; - u_int period; - int ret; - const char *key; + u_int incremental, period; + bool full; + const char *config, *key; (void)(arg); @@ -102,48 +106,86 @@ backup(void *arg) testutil_check(conn->open_session(conn, NULL, NULL, &session)); /* - * Perform a backup at somewhere under 10 seconds (so we get at - * least one done), and then at 45 second intervals. + * Perform a full backup at somewhere under 10 seconds (that way there's + * at least one), then at larger intervals, optionally do incremental + * backups between full backups. */ - for (period = mmrand(NULL, 1, 10);; period = 45) { + incremental = 0; + for (period = mmrand(NULL, 1, 10);; period = mmrand(NULL, 20, 45)) { /* Sleep for short periods so we don't make the run wait. */ while (period > 0 && !g.workers_finished) { --period; sleep(1); } - if (g.workers_finished) - break; - /* Lock out named checkpoints */ + /* + * We can't drop named checkpoints while there's a backup in + * progress, serialize backups with named checkpoints. Wait + * for the checkpoint to complete, otherwise backups might be + * starved out. + */ testutil_check(pthread_rwlock_wrlock(&g.backup_lock)); + if (g.workers_finished) { + testutil_check(pthread_rwlock_unlock(&g.backup_lock)); + break; + } - /* Re-create the backup directory. */ - testutil_checkfmt( - system(g.home_backup_init), - "%s", "backup directory creation failed"); + if (incremental) { + config = "target=(\"log:\")"; + full = false; + } else { + /* Re-create the backup directory. */ + testutil_checkfmt( + system(g.home_backup_init), + "%s", "backup directory creation failed"); + + config = NULL; + full = true; + } /* - * open_cursor can return EBUSY if a metadata operation is - * currently happening - retry in that case. + * open_cursor can return EBUSY if concurrent with a metadata + * operation, retry in that case. */ - while ((ret = session->open_cursor(session, - "backup:", NULL, NULL, &backup_cursor)) == EBUSY) - sleep(1); + while ((ret = session->open_cursor( + session, "backup:", NULL, config, &backup_cursor)) == EBUSY) + __wt_yield(); if (ret != 0) testutil_die(ret, "session.open_cursor: backup"); while ((ret = backup_cursor->next(backup_cursor)) == 0) { testutil_check( backup_cursor->get_key(backup_cursor, &key)); - copy_file(key); + copy_file(session, key); } + if (ret != WT_NOTFOUND) + testutil_die(ret, "backup-cursor"); + + /* After an incremental backup, truncate the log files. */ + if (incremental) + testutil_check(session->truncate( + session, "log:", backup_cursor, NULL, NULL)); testutil_check(backup_cursor->close(backup_cursor)); testutil_check(pthread_rwlock_unlock(&g.backup_lock)); - check_copy(); + /* + * If automatic log archival isn't configured, optionally do + * incremental backups after each full backup. If we're not + * doing any more incrementals, verify the backup (we can't + * verify intermediate states, once we perform recovery on the + * backup database, we can't do any more incremental backups). + */ + if (full) + incremental = + g.c_logging_archive ? 1 : mmrand(NULL, 1, 5); + if (--incremental == 0) + check_copy(); } + if (incremental != 0) + check_copy(); + testutil_check(session->close(session, NULL)); return (NULL); diff --git a/src/third_party/wiredtiger/test/format/bdb.c b/src/third_party/wiredtiger/test/format/bdb.c index 823fc8ff888..e56281f2c3e 100644 --- a/src/third_party/wiredtiger/test/format/bdb.c +++ b/src/third_party/wiredtiger/test/format/bdb.c @@ -30,7 +30,7 @@ #include "format.h" static DBT key, value; -static uint8_t *keybuf; +static WT_ITEM keyitem; static int bdb_compare_reverse(DB *dbp, const DBT *k1, const DBT *k2 @@ -78,7 +78,7 @@ bdb_open(void) assert(db->cursor(db, NULL, &dbc, 0) == 0); g.dbc = dbc; - key_gen_setup(&keybuf); + key_gen_setup(&keyitem); } void @@ -95,8 +95,7 @@ bdb_close(void) assert(db->close(db, 0) == 0); assert(dbenv->close(dbenv, 0) == 0); - free(keybuf); - keybuf = NULL; + free(keyitem.mem); } void @@ -107,9 +106,9 @@ bdb_insert( DBC *dbc; key.data = (void *)key_data; - key.size = (uint32_t)key_size; + key.size = (u_int32_t)key_size; value.data = (void *)value_data; - value.size = (uint32_t)value_size; + value.size = (u_int32_t)value_size; dbc = g.dbc; @@ -144,12 +143,11 @@ void bdb_read(uint64_t keyno, void *valuep, size_t *valuesizep, int *notfoundp) { DBC *dbc = g.dbc; - size_t size; int ret; - key_gen(keybuf, &size, keyno); - key.data = keybuf; - key.size = (uint32_t)size; + key_gen(&keyitem, keyno); + key.data = (void *)keyitem.data; + key.size = (u_int32_t)keyitem.size; *notfoundp = 0; if ((ret = dbc->get(dbc, &key, &value, DB_SET)) != 0) { @@ -165,25 +163,20 @@ bdb_read(uint64_t keyno, void *valuep, size_t *valuesizep, int *notfoundp) void bdb_update(const void *arg_key, size_t arg_key_size, - const void *arg_value, size_t arg_value_size, int *notfoundp) + const void *arg_value, size_t arg_value_size) { DBC *dbc = g.dbc; int ret; key.data = (void *)arg_key; - key.size = (uint32_t)arg_key_size; + key.size = (u_int32_t)arg_key_size; value.data = (void *)arg_value; - value.size = (uint32_t)arg_value_size; + value.size = (u_int32_t)arg_value_size; - *notfoundp = 0; - if ((ret = dbc->put(dbc, &key, &value, DB_KEYFIRST)) != 0) { - if (ret != DB_NOTFOUND) { - testutil_die(ret, "dbc.put: DB_KEYFIRST: {%.*s}{%.*s}", - (int)key.size, (char *)key.data, - (int)value.size, (char *)value.data); - } - *notfoundp = 1; - } + if ((ret = dbc->put(dbc, &key, &value, DB_KEYFIRST)) != 0) + testutil_die(ret, "dbc.put: DB_KEYFIRST: {%.*s}{%.*s}", + (int)key.size, (char *)key.data, + (int)value.size, (char *)value.data); } void @@ -193,12 +186,12 @@ bdb_remove(uint64_t keyno, int *notfoundp) size_t size; int ret; - key_gen(keybuf, &size, keyno); - key.data = keybuf; - key.size = (uint32_t)size; + key_gen(&keyitem, keyno); + key.data = (void *)keyitem.data; + key.size = (u_int32_t)keyitem.size; bdb_read(keyno, &value.data, &size, notfoundp); - value.size = (uint32_t)size; + value.size = (u_int32_t)size; if (*notfoundp) return; diff --git a/src/third_party/wiredtiger/test/format/bulk.c b/src/third_party/wiredtiger/test/format/bulk.c index 64b005d294f..dab23bed404 100644 --- a/src/third_party/wiredtiger/test/format/bulk.c +++ b/src/third_party/wiredtiger/test/format/bulk.c @@ -33,13 +33,12 @@ wts_load(void) { WT_CONNECTION *conn; WT_CURSOR *cursor; + WT_DECL_RET; WT_ITEM key, value; WT_SESSION *session; - uint8_t *keybuf, *valbuf; bool is_bulk; conn = g.wts_conn; - keybuf = valbuf = NULL; testutil_check(conn->open_session(conn, NULL, NULL, &session)); @@ -63,8 +62,8 @@ wts_load(void) is_bulk ? "bulk,append" : NULL, &cursor)); /* Set up the key/value buffers. */ - key_gen_setup(&keybuf); - val_gen_setup(NULL, &valbuf); + key_gen_setup(&key); + val_gen_setup(NULL, &value); for (;;) { if (++g.key_cnt > g.c_rows) { @@ -73,13 +72,11 @@ wts_load(void) } /* Report on progress every 100 inserts. */ - if (g.key_cnt % 100 == 0) + if (g.key_cnt % 1000 == 0) track("bulk load", g.key_cnt, NULL); - key_gen(keybuf, &key.size, (uint64_t)g.key_cnt); - key.data = keybuf; - val_gen(NULL, valbuf, &value.size, (uint64_t)g.key_cnt); - value.data = valbuf; + key_gen(&key, g.key_cnt); + val_gen(NULL, &value, g.key_cnt); switch (g.type) { case FIX: @@ -88,7 +85,7 @@ wts_load(void) cursor->set_value(cursor, *(uint8_t *)value.data); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s %" PRIu32 " {0x%02" PRIx8 "}", + "%-10s %" PRIu64 " {0x%02" PRIx8 "}", "bulk V", g.key_cnt, ((uint8_t *)value.data)[0]); break; @@ -98,7 +95,7 @@ wts_load(void) cursor->set_value(cursor, &value); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s %" PRIu32 " {%.*s}", "bulk V", + "%-10s %" PRIu64 " {%.*s}", "bulk V", g.key_cnt, (int)value.size, (char *)value.data); break; @@ -106,18 +103,40 @@ wts_load(void) cursor->set_key(cursor, &key); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s %" PRIu32 " {%.*s}", "bulk K", + "%-10s %" PRIu64 " {%.*s}", "bulk K", g.key_cnt, (int)key.size, (char *)key.data); cursor->set_value(cursor, &value); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s %" PRIu32 " {%.*s}", "bulk V", + "%-10s %" PRIu64 " {%.*s}", "bulk V", g.key_cnt, (int)value.size, (char *)value.data); break; } - testutil_check(cursor->insert(cursor)); + /* + * We don't want to size the cache to ensure the initial data + * set can load in the in-memory case, guaranteeing the load + * succeeds probably means future updates are also guaranteed + * to succeed, which isn't what we want. If we run out of space + * in the initial load, reset the row counter and continue. + * + * Decrease inserts, they can't be successful if we're at the + * cache limit, and increase the delete percentage to get some + * extra space once the run starts. + */ + if ((ret = cursor->insert(cursor)) != 0) { + if (ret != WT_CACHE_FULL) + testutil_die(ret, "cursor.insert"); + g.rows = --g.key_cnt; + g.c_rows = (uint32_t)g.key_cnt; + + if (g.c_insert_pct > 5) + g.c_insert_pct = 5; + if (g.c_delete_pct < 20) + g.c_delete_pct += 20; + break; + } #ifdef HAVE_BERKELEY_DB if (SINGLETHREADED) @@ -133,6 +152,6 @@ wts_load(void) testutil_check(session->close(session, NULL)); - free(keybuf); - free(valbuf); + free(key.mem); + free(value.mem); } diff --git a/src/third_party/wiredtiger/test/format/compact.c b/src/third_party/wiredtiger/test/format/compact.c index a75ee4f2adf..240e5553697 100644 --- a/src/third_party/wiredtiger/test/format/compact.c +++ b/src/third_party/wiredtiger/test/format/compact.c @@ -36,9 +36,9 @@ void * compact(void *arg) { WT_CONNECTION *conn; + WT_DECL_RET; WT_SESSION *session; u_int period; - int ret; (void)(arg); diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c index 042316d8344..1b09916bd88 100644 --- a/src/third_party/wiredtiger/test/format/config.c +++ b/src/third_party/wiredtiger/test/format/config.c @@ -35,6 +35,7 @@ static void config_encryption(void); static const char *config_file_type(u_int); static CONFIG *config_find(const char *, size_t); static void config_in_memory(void); +static void config_in_memory_check(void); static int config_is_perm(const char *); static void config_isolation(void); static void config_lrt(void); @@ -43,6 +44,7 @@ static void config_map_compression(const char *, u_int *); static void config_map_encryption(const char *, u_int *); static void config_map_file_type(const char *, u_int *); static void config_map_isolation(const char *, u_int *); +static void config_reset(void); /* * config_setup -- @@ -54,14 +56,10 @@ config_setup(void) CONFIG *cp; /* Clear any temporary values. */ - config_clear(); + config_reset(); - /* - * Periodically, run in-memory; don't do it on the first run, all our - * smoke tests would hit it. - */ - if (!config_is_perm("in_memory") && g.run_cnt % 20 == 19) - g.c_in_memory = 1; + /* Periodically run in-memory. */ + config_in_memory(); /* * Choose a data source type and a file type: they're interrelated (LSM @@ -145,7 +143,7 @@ config_setup(void) /* Some data-sources don't support user-specified collations. */ if (DATASOURCE("helium") || DATASOURCE("kvsbdb")) - g.c_reverse = 0; + config_single("reverse=off", 0); /* * Periodically, run single-threaded so we can compare the results to @@ -159,7 +157,6 @@ config_setup(void) config_compression("compression"); config_compression("logging_compression"); config_encryption(); - config_in_memory(); config_isolation(); config_lrt(); @@ -169,7 +166,7 @@ config_setup(void) * Don't do it on the first run, all our smoke tests would hit it. */ if (!g.replay && g.run_cnt % 10 == 9 && !config_is_perm("delete_pct")) - g.c_delete_pct = 0; + config_single("delete_pct=0", 0); /* * If this is an LSM run, set the cache size and crank up the insert @@ -187,9 +184,12 @@ config_setup(void) if (!config_is_perm("cache") && g.c_cache < g.c_threads) g.c_cache = g.c_threads; + /* Give in-memory configuration a final review. */ + config_in_memory_check(); + /* Make the default maximum-run length 20 minutes. */ if (!config_is_perm("timer")) - g.c_timer = 20; + config_single("timer=20", 0); /* * Key/value minimum/maximum are related, correct unless specified by @@ -329,43 +329,89 @@ config_encryption(void) /* * config_in_memory -- - * In-memory configuration. + * Periodically set up an in-memory configuration. */ static void config_in_memory(void) { + /* + * Configure in-memory before configuring anything else, in-memory has + * many related requirements. Don't configure in-memory if there's any + * incompatible configurations, so we don't have to configure in-memory + * every time we configure something like LSM, that's too painful. + */ + if (config_is_perm("backups")) + return; + if (config_is_perm("checkpoints")) + return; + if (config_is_perm("compression")) + return; + if (config_is_perm("data_source") && DATASOURCE("lsm")) + return; + if (config_is_perm("logging")) + return; + if (config_is_perm("rebalance")) + return; + if (config_is_perm("salvage")) + return; + if (config_is_perm("verify")) + return; + + if (!config_is_perm("in_memory") && mmrand(NULL, 1, 20) == 1) + g.c_in_memory = 1; +} + +/* + * config_in_memory_check -- + * In-memory configuration review. + */ +static void +config_in_memory_check(void) +{ + uint32_t cache; + if (g.c_in_memory == 0) return; /* Turn off a lot of stuff. */ if (!config_is_perm("backups")) - g.c_backups = 0; + config_single("backups=off", 0); if (!config_is_perm("checkpoints")) - g.c_checkpoints = 0; - if (!config_is_perm("compression")) { - g.c_compression = dstrdup("none"); - g.c_compression_flag = COMPRESS_NONE; - } + config_single("checkpoints=off", 0); + if (!config_is_perm("compression")) + config_single("compression=none", 0); if (!config_is_perm("logging")) - g.c_logging = 0; + config_single("logging=off", 0); if (!config_is_perm("rebalance")) - g.c_rebalance = 0; + config_single("rebalance=off", 0); if (!config_is_perm("salvage")) - g.c_salvage = 0; + config_single("salvage=off", 0); if (!config_is_perm("verify")) - g.c_verify = 0; + config_single("verify=off", 0); /* - * Ensure there is 250MB of cache per thread; keep keys/values small, - * overflow items aren't an issue for in-memory configurations and it - * keeps us from overflowing the cache. + * Keep keys/values small, overflow items aren't an issue for in-memory + * configurations and it keeps us from overflowing the cache. */ - if (!config_is_perm("cache")) - g.c_cache = g.c_threads * 250; if (!config_is_perm("key_max")) - g.c_value_max = 64; + config_single("key_max=32", 0); if (!config_is_perm("value_max")) - g.c_value_max = 128; + config_single("value_max=80", 0); + + /* + * Size the cache relative to the initial data set, use 2x the base + * size as a minimum. + */ + if (!config_is_perm("cache")) { + cache = g.c_value_max; + if (g.type == ROW) + cache += g.c_key_max; + cache *= g.c_rows; + cache *= 2; + cache /= WT_MEGABYTE; + if (g.c_cache < cache) + g.c_cache = cache; + } } /* @@ -413,11 +459,11 @@ config_lrt(void) * stores. */ if (g.type == FIX) { - if (g.c_long_running_txn && config_is_perm("long_running_txn")) + if (config_is_perm("long_running_txn")) testutil_die(EINVAL, "long_running_txn not supported with fixed-length " "column store"); - g.c_long_running_txn = 0; + config_single("long_running_txn=off", 0); } } @@ -503,18 +549,36 @@ config_file(const char *name) /* * config_clear -- - * Clear per-run values. + * Clear all configuration values. */ void config_clear(void) { CONFIG *cp; - /* Clear configuration data. */ + /* Clear all allocated configuration data. */ + for (cp = c; cp->name != NULL; ++cp) + if (cp->vstr != NULL) { + free((void *)*cp->vstr); + *cp->vstr = NULL; + } + free(g.uri); + g.uri = NULL; +} + +/* + * config_reset -- + * Clear per-run configuration values. + */ +static void +config_reset(void) +{ + CONFIG *cp; + + /* Clear temporary allocated configuration data. */ for (cp = c; cp->name != NULL; ++cp) { F_CLR(cp, C_TEMP); - if (!F_ISSET(cp, C_PERM) && - F_ISSET(cp, C_STRING) && cp->vstr != NULL) { + if (!F_ISSET(cp, C_PERM) && cp->vstr != NULL) { free((void *)*cp->vstr); *cp->vstr = NULL; } @@ -531,7 +595,7 @@ void config_single(const char *s, int perm) { CONFIG *cp; - uint32_t v; + long v; char *p; const char *ep; @@ -557,43 +621,59 @@ config_single(const char *s, int perm) exit(EXIT_FAILURE); } + /* + * Free the previous setting if a configuration has been + * passed in twice. + */ + if (*cp->vstr != NULL) { + free(*cp->vstr); + *cp->vstr = NULL; + } + if (strncmp(s, "checksum", strlen("checksum")) == 0) { config_map_checksum(ep, &g.c_checksum_flag); - *cp->vstr = strdup(ep); + *cp->vstr = dstrdup(ep); } else if (strncmp( s, "compression", strlen("compression")) == 0) { config_map_compression(ep, &g.c_compression_flag); - *cp->vstr = strdup(ep); + *cp->vstr = dstrdup(ep); } else if (strncmp( s, "encryption", strlen("encryption")) == 0) { config_map_encryption(ep, &g.c_encryption_flag); - *cp->vstr = strdup(ep); + *cp->vstr = dstrdup(ep); } else if (strncmp(s, "isolation", strlen("isolation")) == 0) { config_map_isolation(ep, &g.c_isolation_flag); - *cp->vstr = strdup(ep); + *cp->vstr = dstrdup(ep); } else if (strncmp(s, "file_type", strlen("file_type")) == 0) { config_map_file_type(ep, &g.type); - *cp->vstr = strdup(config_file_type(g.type)); + *cp->vstr = dstrdup(config_file_type(g.type)); } else if (strncmp(s, "logging_compression", strlen("logging_compression")) == 0) { config_map_compression(ep, &g.c_logging_compression_flag); - *cp->vstr = strdup(ep); + *cp->vstr = dstrdup(ep); } else { free((void *)*cp->vstr); - *cp->vstr = strdup(ep); + *cp->vstr = dstrdup(ep); } - if (*cp->vstr == NULL) - testutil_die(errno, "malloc"); return; } - v = (uint32_t)strtoul(ep, &p, 10); - if (*p != '\0') { - fprintf(stderr, "%s: %s: illegal numeric value\n", - g.progname, s); - exit(EXIT_FAILURE); + v = -1; + if (F_ISSET(cp, C_BOOL)) { + if (strncmp(ep, "off", strlen("off")) == 0) + v = 0; + else if (strncmp(ep, "on", strlen("on")) == 0) + v = 1; + } + if (v == -1) { + v = strtol(ep, &p, 10); + if (*p != '\0') { + fprintf(stderr, "%s: %s: illegal numeric value\n", + g.progname, s); + exit(EXIT_FAILURE); + } } if (F_ISSET(cp, C_BOOL)) { if (v != 0 && v != 1) { @@ -607,7 +687,7 @@ config_single(const char *s, int perm) g.progname, s, cp->min, cp->maxset); exit(EXIT_FAILURE); } - *cp->v = v; + *cp->v = (uint32_t)v; } /* diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h index a17614bc044..16fffb6fafe 100644 --- a/src/third_party/wiredtiger/test/format/config.h +++ b/src/third_party/wiredtiger/test/format/config.h @@ -294,6 +294,10 @@ static CONFIG c[] = { "maximum time to run in minutes (default 20 minutes)", C_IGNORE, 0, UINT_MAX, UINT_MAX, &g.c_timer, NULL }, + { "transaction-frequency", + "percent operations done inside an explicit transaction", + 0x0, 1, 100, 100, &g.c_txn_freq, NULL }, + { "value_max", "maximum size of values", 0x0, 32, 4096, MEGABYTE(10), &g.c_value_max, NULL }, diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h index a129c5395fd..ad5f408ac30 100644 --- a/src/third_party/wiredtiger/test/format/format.h +++ b/src/third_party/wiredtiger/test/format/format.h @@ -26,42 +26,13 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <sys/stat.h> -#ifndef _WIN32 -#include <sys/time.h> -#endif -#include <sys/types.h> - -#include <assert.h> -#include <ctype.h> -#include <errno.h> -#include <fcntl.h> -#include <inttypes.h> -#include <limits.h> -#ifndef _WIN32 -#include <pthread.h> -#endif -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#ifndef _WIN32 -#include <unistd.h> -#endif -#include <time.h> - -#include "test_util.i" +#include "test_util.h" #ifdef BDB +#include <assert.h> #include <db.h> #endif -#if defined(__GNUC__) -#define WT_GCC_ATTRIBUTE(x) __attribute__(x) -#else -#define WT_GCC_ATTRIBUTE(x) -#endif - #define EXTPATH "../../ext/" /* Extensions path */ #define LZ4_PATH \ @@ -109,7 +80,6 @@ typedef struct { char *home; /* Home directory */ char *home_backup; /* Hot-backup directory */ - char *home_backup2; /* Saved Hot-backup directory */ char *home_backup_init; /* Initialize backup command */ char *home_bdb; /* BDB directory */ char *home_config; /* Run CONFIG file path */ @@ -145,7 +115,8 @@ typedef struct { int replay; /* Replaying a run. */ int workers_finished; /* Operations completed */ - pthread_rwlock_t backup_lock; /* Hot backup running */ + pthread_rwlock_t backup_lock; /* Backup running */ + pthread_rwlock_t checkpoint_lock; /* Checkpoint running */ WT_RAND_STATE rnd; /* Global RNG state */ @@ -224,6 +195,7 @@ typedef struct { uint32_t c_statistics_server; uint32_t c_threads; uint32_t c_timer; + uint32_t c_txn_freq; uint32_t c_value_max; uint32_t c_value_min; uint32_t c_verify; @@ -288,7 +260,7 @@ typedef struct { #define TINFO_COMPLETE 2 /* Finished */ #define TINFO_JOINED 3 /* Resolved */ volatile int state; /* state */ -} TINFO WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT))); +} TINFO WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT); #ifdef HAVE_BERKELEY_DB void bdb_close(void); @@ -297,7 +269,7 @@ void bdb_np(int, void *, size_t *, void *, size_t *, int *); void bdb_open(void); void bdb_read(uint64_t, void *, size_t *, int *); void bdb_remove(uint64_t, int *); -void bdb_update(const void *, size_t, const void *, size_t, int *); +void bdb_update(const void *, size_t, const void *, size_t); #endif void *backup(void *); @@ -308,25 +280,23 @@ void config_file(const char *); void config_print(int); void config_setup(void); void config_single(const char *, int); -void *dmalloc(size_t); -char *dstrdup(const char *); void fclose_and_clear(FILE **); -void key_gen(uint8_t *, size_t *, uint64_t); -void key_gen_insert(WT_RAND_STATE *, uint8_t *, size_t *, uint64_t); -void key_gen_setup(uint8_t **); +void key_gen(WT_ITEM *, uint64_t); +void key_gen_insert(WT_RAND_STATE *, WT_ITEM *, uint64_t); +void key_gen_setup(WT_ITEM *); void key_len_setup(void); void *lrt(void *); void path_setup(const char *); -int read_row(WT_CURSOR *, WT_ITEM *, uint64_t, int); +int read_row(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); uint32_t rng(WT_RAND_STATE *); void track(const char *, uint64_t, TINFO *); -void val_gen(WT_RAND_STATE *, uint8_t *, size_t *, uint64_t); -void val_gen_setup(WT_RAND_STATE *, uint8_t **); +void val_gen(WT_RAND_STATE *, WT_ITEM *, uint64_t); +void val_gen_setup(WT_RAND_STATE *, WT_ITEM *); void wts_close(void); -void wts_create(void); void wts_dump(const char *, int); +void wts_init(void); void wts_load(void); -void wts_open(const char *, int, WT_CONNECTION **); +void wts_open(const char *, bool, WT_CONNECTION **); void wts_ops(int); void wts_read_scan(void); void wts_rebalance(void); diff --git a/src/third_party/wiredtiger/test/format/lrt.c b/src/third_party/wiredtiger/test/format/lrt.c index 451d2f4fa3c..937525522fa 100644 --- a/src/third_party/wiredtiger/test/format/lrt.c +++ b/src/third_party/wiredtiger/test/format/lrt.c @@ -43,17 +43,15 @@ lrt(void *arg) uint64_t keyno, saved_keyno; u_int period; int pinned, ret; - uint8_t bitfield, *keybuf; + uint8_t bitfield; void *buf; (void)(arg); /* Unused parameter */ saved_keyno = 0; /* [-Werror=maybe-uninitialized] */ - key_gen_setup(&keybuf); - memset(&key, 0, sizeof(key)); - key.data = keybuf; - memset(&value, 0, sizeof(value)); + key_gen_setup(&key); + val_gen_setup(NULL, &value); buf = NULL; buf_len = buf_size = 0; @@ -67,8 +65,8 @@ lrt(void *arg) for (pinned = 0;;) { if (pinned) { /* Re-read the record at the end of the table. */ - while ((ret = read_row(cursor, - &key, saved_keyno, 1)) == WT_ROLLBACK) + while ((ret = read_row( + cursor, &key, &value, saved_keyno)) == WT_ROLLBACK) ; if (ret != 0) testutil_die(ret, @@ -112,7 +110,7 @@ lrt(void *arg) (u_int)(g.key_cnt - g.key_cnt / 10), (u_int)g.key_cnt); while ((ret = read_row(cursor, - &key, saved_keyno, 1)) == WT_ROLLBACK) + &key, &value, saved_keyno)) == WT_ROLLBACK) ; } while (ret == WT_NOTFOUND); if (ret != 0) @@ -129,9 +127,8 @@ lrt(void *arg) if (ret != 0) testutil_die(ret, "cursor.get_value: %" PRIu64, saved_keyno); - if (buf_len < value.size && - (buf = realloc(buf, buf_len = value.size)) == NULL) - testutil_die(errno, "malloc"); + if (buf_len < value.size) + buf = drealloc(buf, buf_len = value.size); memcpy(buf, value.data, buf_size = value.size); /* @@ -142,7 +139,7 @@ lrt(void *arg) do { keyno = mmrand(NULL, 1, (u_int)g.key_cnt / 5); while ((ret = read_row(cursor, - &key, keyno, 1)) == WT_ROLLBACK) + &key, &value, keyno)) == WT_ROLLBACK) ; } while (ret == WT_NOTFOUND); if (ret != 0) @@ -165,7 +162,8 @@ lrt(void *arg) testutil_check(session->close(session, NULL)); - free(keybuf); + free(key.mem); + free(value.mem); free(buf); return (NULL); diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c index 5d66f4d5391..c97d82809a1 100644 --- a/src/third_party/wiredtiger/test/format/ops.c +++ b/src/third_party/wiredtiger/test/format/ops.c @@ -28,14 +28,14 @@ #include "format.h" -static int col_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); -static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t, int *); -static int col_update(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); -static int nextprev(WT_CURSOR *, int, int *); +static int col_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); +static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t); +static int col_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int nextprev(WT_CURSOR *, int); static void *ops(void *); -static int row_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); -static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t, int *); -static int row_update(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int row_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t); +static int row_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); static void table_append_init(void); #ifdef HAVE_BERKELEY_DB @@ -103,8 +103,7 @@ wts_ops(int lastrun) } /* Create thread structure; start the worker threads. */ - if ((tinfo = calloc((size_t)g.c_threads, sizeof(*tinfo))) == NULL) - testutil_die(errno, "calloc"); + tinfo = dcalloc((size_t)g.c_threads, sizeof(*tinfo)); for (i = 0; i < g.c_threads; ++i) { tinfo[i].id = (int)i + 1; tinfo[i].state = TINFO_RUNNING; @@ -184,6 +183,7 @@ wts_ops(int lastrun) (void)pthread_join(compact_tid, NULL); if (!SINGLETHREADED && g.c_long_running_txn) (void)pthread_join(lrt_tid, NULL); + g.workers_finished = 0; if (g.logging != 0) { (void)g.wt_api->msg_printf(g.wt_api, session, @@ -193,57 +193,229 @@ wts_ops(int lastrun) } /* - * ops_session_config -- - * Return the current session configuration. + * isolation_config -- + * Return an isolation configuration. */ -static const char * -ops_session_config(WT_RAND_STATE *rnd) +static inline const char * +isolation_config(WT_RAND_STATE *rnd, bool *iso_snapshotp) { u_int v; - /* - * The only current session configuration is the isolation level. - */ if ((v = g.c_isolation_flag) == ISOLATION_RANDOM) v = mmrand(rnd, 2, 4); switch (v) { case ISOLATION_READ_UNCOMMITTED: + *iso_snapshotp = false; return ("isolation=read-uncommitted"); case ISOLATION_READ_COMMITTED: + *iso_snapshotp = false; return ("isolation=read-committed"); case ISOLATION_SNAPSHOT: default: + *iso_snapshotp = true; return ("isolation=snapshot"); } } +typedef struct { + uint64_t keyno; /* Row number */ + + void *kdata; /* If an insert, the generated key */ + size_t ksize; + size_t kmemsize; + + void *vdata; /* If not a delete, the value */ + size_t vsize; + size_t vmemsize; + + bool deleted; /* Delete operation */ + bool insert; /* Insert operation */ +} SNAP_OPS; + +/* + * snap_track -- + * Add a single snapshot isolation returned value to the list. + */ +static void +snap_track(SNAP_OPS *snap, uint64_t keyno, WT_ITEM *key, WT_ITEM *value) +{ + snap->keyno = keyno; + if (key == NULL) + snap->insert = false; + else { + snap->insert = true; + + if (snap->kmemsize < key->size) { + snap->kdata = drealloc(snap->kdata, key->size); + snap->kmemsize = key->size; + } + memcpy(snap->kdata, key->data, snap->ksize = key->size); + } + if (value == NULL) + snap->deleted = true; + else { + snap->deleted = false; + if (snap->vmemsize < value->size) { + snap->vdata = drealloc(snap->vdata, value->size); + snap->vmemsize = value->size; + } + memcpy(snap->vdata, value->data, snap->vsize = value->size); + } +} + +/* + * snap_check -- + * Check snapshot isolation operations are repeatable. + */ +static int +snap_check(WT_CURSOR *cursor, + SNAP_OPS *start, SNAP_OPS *stop, WT_ITEM *key, WT_ITEM *value) +{ + WT_DECL_RET; + SNAP_OPS *p; + uint8_t bitfield; + + for (; start < stop; ++start) { + /* Check for subsequent changes to this record. */ + for (p = start + 1; p < stop && p->keyno != start->keyno; ++p) + ; + if (p != stop) + continue; + + /* + * Retrieve the key/value pair by key. Row-store inserts have a + * unique generated key we saved, else generate the key from the + * key number. + */ + if (start->insert == 0) { + switch (g.type) { + case FIX: + case VAR: + cursor->set_key(cursor, start->keyno); + break; + case ROW: + key_gen(key, start->keyno); + cursor->set_key(cursor, key); + break; + } + } else { + key->data = start->kdata; + key->size = start->ksize; + cursor->set_key(cursor, key); + } + if ((ret = cursor->search(cursor)) == 0) { + if (g.type == FIX) { + testutil_check( + cursor->get_value(cursor, &bitfield)); + *(uint8_t *)(value->data) = bitfield; + value->size = 1; + } else + testutil_check( + cursor->get_value(cursor, value)); + } else + if (ret != WT_NOTFOUND) + return (ret); + + /* Check for simple matches. */ + if (ret == 0 && !start->deleted && + value->size == start->vsize && + memcmp(value->data, start->vdata, value->size) == 0) + continue; + if (ret == WT_NOTFOUND && start->deleted) + continue; + + /* + * In fixed length stores, zero values at the end of the key + * space are returned as not-found, and not-found row reads + * are saved as zero values. Map back-and-forth for simplicity. + */ + if (g.type == FIX) { + if (ret == WT_NOTFOUND && + start->vsize == 1 && *(uint8_t *)start->vdata == 0) + continue; + if (start->deleted && + value->size == 1 && *(uint8_t *)value->data == 0) + continue; + } + + /* Things went pear-shaped. */ + switch (g.type) { + case FIX: + testutil_die(ret, + "snap_check: %" PRIu64 " search: " + "expected {0x%02x}, found {0x%02x}", + start->keyno, + start->deleted ? 0 : *(uint8_t *)start->vdata, + ret == WT_NOTFOUND ? 0 : *(uint8_t *)value->data); + /* NOTREACHED */ + case ROW: + testutil_die(ret, + "snap_check: %.*s search: " + "expected {%.*s}, found {%.*s}", + (int)key->size, key->data, + start->deleted ? + (int)strlen("deleted") : (int)start->vsize, + start->deleted ? "deleted" : start->vdata, + ret == WT_NOTFOUND ? + (int)strlen("deleted") : (int)value->size, + ret == WT_NOTFOUND ? "deleted" : value->data); + /* NOTREACHED */ + case VAR: + testutil_die(ret, + "snap_check: %" PRIu64 " search: " + "expected {%.*s}, found {%.*s}", + start->keyno, + start->deleted ? + (int)strlen("deleted") : (int)start->vsize, + start->deleted ? "deleted" : start->vdata, + ret == WT_NOTFOUND ? + (int)strlen("deleted") : (int)value->size, + ret == WT_NOTFOUND ? "deleted" : value->data); + /* NOTREACHED */ + } + } + return (0); +} + +/* + * ops -- + * Per-thread operations. + */ static void * ops(void *arg) { + SNAP_OPS *snap, snap_list[64]; TINFO *tinfo; WT_CONNECTION *conn; WT_CURSOR *cursor, *cursor_insert; + WT_DECL_RET; + WT_ITEM *key, _key, *value, _value; WT_SESSION *session; - WT_ITEM key, value; uint64_t keyno, ckpt_op, reset_op, session_op; - uint32_t op; - uint8_t *keybuf, *valbuf; - u_int np; - int ckpt_available, dir, insert, intxn, notfound, readonly; + uint32_t op, rnd; + u_int i; + int dir; char *ckpt_config, ckpt_name[64]; + bool ckpt_available, intxn, iso_snapshot, positioned, readonly; tinfo = arg; conn = g.wts_conn; - keybuf = valbuf = NULL; - readonly = 0; /* -Wconditional-uninitialized */ + readonly = false; /* -Wconditional-uninitialized */ + + /* Initialize tracking of snapshot isolation transaction returns. */ + snap = NULL; + iso_snapshot = false; + memset(snap_list, 0, sizeof(snap_list)); /* Initialize the per-thread random number generator. */ __wt_random_init(&tinfo->rnd); /* Set up the default key and value buffers. */ - key_gen_setup(&keybuf); - val_gen_setup(&tinfo->rnd, &valbuf); + key = &_key; + key_gen_setup(key); + value = &_value; + val_gen_setup(&tinfo->rnd, value); /* Set the first operation where we'll create sessions and cursors. */ session_op = 0; @@ -252,12 +424,12 @@ ops(void *arg) /* Set the first operation where we'll perform checkpoint operations. */ ckpt_op = g.c_checkpoints ? mmrand(&tinfo->rnd, 100, 10000) : 0; - ckpt_available = 0; + ckpt_available = false; /* Set the first operation where we'll reset the session. */ reset_op = mmrand(&tinfo->rnd, 100, 10000); - for (intxn = 0; !tinfo->quit; ++tinfo->ops) { + for (intxn = false; !tinfo->quit; ++tinfo->ops) { /* * We can't checkpoint or swap sessions/cursors while in a * transaction, resolve any running transaction. @@ -267,7 +439,7 @@ ops(void *arg) testutil_check( session->commit_transaction(session, NULL)); ++tinfo->commit; - intxn = 0; + intxn = false; } /* Open up a new session and cursors. */ @@ -276,8 +448,8 @@ ops(void *arg) if (session != NULL) testutil_check(session->close(session, NULL)); - testutil_check(conn->open_session(conn, NULL, - ops_session_config(&tinfo->rnd), &session)); + testutil_check( + conn->open_session(conn, NULL, NULL, &session)); /* * 10% of the time, perform some read-only operations @@ -299,7 +471,7 @@ ops(void *arg) session_op += 250; /* Checkpoints are read-only. */ - readonly = 1; + readonly = true; } else { /* * Open two cursors: one for overwriting and one @@ -325,21 +497,32 @@ ops(void *arg) session_op += mmrand(&tinfo->rnd, 100, 5000); /* Updates supported. */ - readonly = 0; + readonly = false; } } /* Checkpoint the database. */ if (tinfo->ops == ckpt_op && g.c_checkpoints) { /* - * LSM and data-sources don't support named checkpoints, + * Checkpoints are single-threaded inside WiredTiger, + * skip our checkpoint if another thread is already + * doing one. + */ + ret = pthread_rwlock_trywrlock(&g.checkpoint_lock); + if (ret == EBUSY) + goto skip_checkpoint; + testutil_check(ret); + + /* + * LSM and data-sources don't support named checkpoints * and we can't drop a named checkpoint while there's a - * cursor open on it, otherwise 20% of the time name the - * checkpoint. + * backup in progress, otherwise name the checkpoint 5% + * of the time. */ - if (DATASOURCE("helium") || DATASOURCE("kvsbdb") || - DATASOURCE("lsm") || - readonly || mmrand(&tinfo->rnd, 1, 5) == 1) + if (mmrand(&tinfo->rnd, 1, 20) != 1 || + DATASOURCE("helium") || + DATASOURCE("kvsbdb") || DATASOURCE("lsm") || + pthread_rwlock_trywrlock(&g.backup_lock) == EBUSY) ckpt_config = NULL; else { (void)snprintf(ckpt_name, sizeof(ckpt_name), @@ -347,18 +530,22 @@ ops(void *arg) ckpt_config = ckpt_name; } - /* Named checkpoints lock out backups */ - if (ckpt_config != NULL) - testutil_check( - pthread_rwlock_wrlock(&g.backup_lock)); - - testutil_checkfmt( - session->checkpoint(session, ckpt_config), - "%s", ckpt_config == NULL ? "" : ckpt_config); + ret = session->checkpoint(session, ckpt_config); + /* + * We may be trying to create a named checkpoint while + * we hold a cursor open to the previous checkpoint. + * Tolerate EBUSY. + */ + if (ret != 0 && ret != EBUSY) + testutil_die(ret, "%s", + ckpt_config == NULL ? "" : ckpt_config); + ret = 0; if (ckpt_config != NULL) testutil_check( pthread_rwlock_unlock(&g.backup_lock)); + testutil_check( + pthread_rwlock_unlock(&g.checkpoint_lock)); /* Rephrase the checkpoint name for cursor open. */ if (ckpt_config == NULL) @@ -367,9 +554,9 @@ ops(void *arg) else (void)snprintf(ckpt_name, sizeof(ckpt_name), "checkpoint=thread-%d", tinfo->id); - ckpt_available = 1; + ckpt_available = true; - /* Pick the next checkpoint operation. */ +skip_checkpoint: /* Pick the next checkpoint operation. */ ckpt_op += mmrand(&tinfo->rnd, 5000, 20000); } @@ -386,21 +573,24 @@ ops(void *arg) } /* - * If we're not single-threaded and we're not in a transaction, - * start a transaction 20% of the time. + * If we're not single-threaded and not in a transaction, choose + * an isolation level and start a transaction some percentage of + * the time. */ if (!SINGLETHREADED && - !intxn && mmrand(&tinfo->rnd, 1, 10) >= 8) { + !intxn && mmrand(&tinfo->rnd, 1, 100) >= g.c_txn_freq) { + testutil_check( + session->reconfigure(session, + isolation_config(&tinfo->rnd, &iso_snapshot))); testutil_check( session->begin_transaction(session, NULL)); - intxn = 1; - } - insert = notfound = 0; + snap = iso_snapshot ? snap_list : NULL; + intxn = true; + } keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows); - key.data = keybuf; - value.data = valbuf; + positioned = false; /* * Perform some number of operations: the percentage of deletes, @@ -414,27 +604,30 @@ ops(void *arg) ++tinfo->remove; switch (g.type) { case ROW: - /* - * If deleting a non-existent record, the cursor - * won't be positioned, and so can't do a next. - */ - if (row_remove(cursor, &key, keyno, ¬found)) - goto deadlock; + ret = row_remove(cursor, key, keyno); break; case FIX: case VAR: - if (col_remove(cursor, &key, keyno, ¬found)) - goto deadlock; + ret = col_remove(cursor, key, keyno); break; } + if (ret == 0) { + positioned = true; + if (snap != NULL && (size_t) + (snap - snap_list) < WT_ELEMENTS(snap_list)) + snap_track(snap++, keyno, NULL, NULL); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) + goto deadlock; + } } else if (op < g.c_delete_pct + g.c_insert_pct) { ++tinfo->insert; switch (g.type) { case ROW: - if (row_insert( - tinfo, cursor, &key, &value, keyno)) - goto deadlock; - insert = 1; + key_gen_insert(&tinfo->rnd, key, keyno); + val_gen(&tinfo->rnd, value, keyno); + ret = row_insert(cursor, key, value, keyno); break; case FIX: case VAR: @@ -447,37 +640,60 @@ ops(void *arg) goto skip_insert; /* Insert, then reset the insert cursor. */ - if (col_insert(tinfo, - cursor_insert, &key, &value, &keyno)) - goto deadlock; + val_gen(&tinfo->rnd, value, g.rows + 1); + ret = col_insert( + cursor_insert, key, value, &keyno); testutil_check( cursor_insert->reset(cursor_insert)); - - insert = 1; break; } + positioned = false; + if (ret == 0) { + if (snap != NULL && (size_t) + (snap - snap_list) < WT_ELEMENTS(snap_list)) + snap_track(snap++, keyno, + g.type == ROW ? key : NULL, value); + } else + if (ret == WT_ROLLBACK && intxn) + goto deadlock; } else if ( op < g.c_delete_pct + g.c_insert_pct + g.c_write_pct) { ++tinfo->update; switch (g.type) { case ROW: - if (row_update( - tinfo, cursor, &key, &value, keyno)) - goto deadlock; + key_gen(key, keyno); + val_gen(&tinfo->rnd, value, keyno); + ret = row_update(cursor, key, value, keyno); break; case FIX: case VAR: -skip_insert: if (col_update(tinfo, - cursor, &key, &value, keyno)) - goto deadlock; +skip_insert: val_gen(&tinfo->rnd, value, keyno); + ret = col_update(cursor, key, value, keyno); break; } + if (ret == 0) { + positioned = true; + if (snap != NULL && (size_t) + (snap - snap_list) < WT_ELEMENTS(snap_list)) + snap_track(snap++, keyno, NULL, value); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) + goto deadlock; + } } else { ++tinfo->search; - if (read_row(cursor, &key, keyno, 0)) - if (intxn) + ret = read_row(cursor, key, value, keyno); + if (ret == 0) { + positioned = true; + if (snap != NULL && (size_t) + (snap - snap_list) < WT_ELEMENTS(snap_list)) + snap_track(snap++, keyno, NULL, value); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) goto deadlock; - continue; + } } /* @@ -485,55 +701,64 @@ skip_insert: if (col_update(tinfo, * insert, do a small number of next/prev cursor operations in * a random direction. */ - if (!insert) { + if (positioned) { dir = (int)mmrand(&tinfo->rnd, 0, 1); - for (np = 0; np < mmrand(&tinfo->rnd, 1, 100); ++np) { - if (notfound) - break; - if (nextprev(cursor, dir, ¬found)) + for (i = 0; i < mmrand(&tinfo->rnd, 1, 100); ++i) { + if ((ret = nextprev(cursor, dir)) == 0) + continue; + if (ret == WT_ROLLBACK && intxn) goto deadlock; + break; } } - /* Read to confirm the operation. */ - ++tinfo->search; - if (read_row(cursor, &key, keyno, 0)) - goto deadlock; - /* Reset the cursor: there is no reason to keep pages pinned. */ testutil_check(cursor->reset(cursor)); /* - * If we're in the transaction, commit 40% of the time and + * If we're in a transaction, commit 40% of the time and * rollback 10% of the time. */ - if (intxn) - switch (mmrand(&tinfo->rnd, 1, 10)) { - case 1: case 2: case 3: case 4: /* 40% */ - testutil_check(session->commit_transaction( - session, NULL)); - ++tinfo->commit; - intxn = 0; - break; - case 5: /* 10% */ - if (0) { -deadlock: ++tinfo->deadlock; - } - testutil_check(session->rollback_transaction( - session, NULL)); - ++tinfo->rollback; - intxn = 0; - break; - default: - break; + if (!intxn || (rnd = mmrand(&tinfo->rnd, 1, 10)) > 5) + continue; + + /* + * Ending the transaction. If in snapshot isolation, repeat the + * operations and confirm they're unchanged. + */ + if (snap != NULL && (ret = snap_check( + cursor, snap_list, snap, key, value)) == WT_ROLLBACK) + goto deadlock; + + switch (rnd) { + case 1: case 2: case 3: case 4: /* 40% */ + testutil_check( + session->commit_transaction(session, NULL)); + ++tinfo->commit; + break; + case 5: /* 10% */ + if (0) { +deadlock: ++tinfo->deadlock; } + testutil_check( + session->rollback_transaction(session, NULL)); + ++tinfo->rollback; + break; + } + + intxn = false; + snap = NULL; } if (session != NULL) testutil_check(session->close(session, NULL)); - free(keybuf); - free(valbuf); + for (i = 0; i < WT_ELEMENTS(snap_list); ++i) { + free(snap_list[i].kdata); + free(snap_list[i].vdata); + } + free(key->mem); + free(value->mem); tinfo->state = TINFO_COMPLETE; return (NULL); @@ -548,40 +773,47 @@ wts_read_scan(void) { WT_CONNECTION *conn; WT_CURSOR *cursor; - WT_ITEM key; + WT_DECL_RET; + WT_ITEM key, value; WT_SESSION *session; - uint64_t cnt, last_cnt; - uint8_t *keybuf; + uint64_t keyno, last_keyno; conn = g.wts_conn; - /* Set up the default key buffer. */ - key_gen_setup(&keybuf); + /* Set up the default key/value buffers. */ + key_gen_setup(&key); + val_gen_setup(NULL, &value); /* Open a session and cursor pair. */ - testutil_check(conn->open_session( - conn, NULL, ops_session_config(NULL), &session)); - testutil_check(session->open_cursor( - session, g.uri, NULL, NULL, &cursor)); + testutil_check(conn->open_session(conn, NULL, NULL, &session)); + testutil_check( + session->open_cursor(session, g.uri, NULL, NULL, &cursor)); /* Check a random subset of the records using the key. */ - for (last_cnt = cnt = 0; cnt < g.key_cnt;) { - cnt += mmrand(NULL, 1, 17); - if (cnt > g.rows) - cnt = g.rows; - if (cnt - last_cnt > 1000) { - track("read row scan", cnt, NULL); - last_cnt = cnt; + for (last_keyno = keyno = 0; keyno < g.key_cnt;) { + keyno += mmrand(NULL, 1, 17); + if (keyno > g.rows) + keyno = g.rows; + if (keyno - last_keyno > 1000) { + track("read row scan", keyno, NULL); + last_keyno = keyno; } - key.data = keybuf; - testutil_checkfmt( - read_row(cursor, &key, cnt, 0), "%s", "read_scan"); + switch (ret = read_row(cursor, &key, &value, keyno)) { + case 0: + case WT_NOTFOUND: + case WT_ROLLBACK: + break; + default: + testutil_die( + ret, "wts_read_scan: read row %" PRIu64, keyno); + } } testutil_check(session->close(session, NULL)); - free(keybuf); + free(key.mem); + free(value.mem); } /* @@ -589,10 +821,9 @@ wts_read_scan(void) * Read and verify a single element in a row- or column-store file. */ int -read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int notfound_err) +read_row(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) { static int sn = 0; - WT_ITEM value; WT_SESSION *session; int exact, ret; uint8_t bitfield; @@ -611,7 +842,7 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int notfound_err) cursor->set_key(cursor, keyno); break; case ROW: - key_gen((uint8_t *)key->data, &key->size, keyno); + key_gen(key, keyno); cursor->set_key(cursor, key); break; } @@ -628,37 +859,33 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int notfound_err) switch (ret) { case 0: if (g.type == FIX) { - ret = cursor->get_value(cursor, &bitfield); - value.data = &bitfield; - value.size = 1; + testutil_check(cursor->get_value(cursor, &bitfield)); + *(uint8_t *)(value->data) = bitfield; + value->size = 1; } else - ret = cursor->get_value(cursor, &value); + testutil_check(cursor->get_value(cursor, value)); break; - case WT_ROLLBACK: - return (WT_ROLLBACK); case WT_NOTFOUND: - if (notfound_err) - return (WT_NOTFOUND); + /* + * In fixed length stores, zero values at the end of the key + * space are returned as not found. Treat this the same as + * a zero value in the key space, to match BDB's behavior. + */ + if (g.type == FIX) { + *(uint8_t *)(value->data) = 0; + value->size = 1; + ret = 0; + } break; + case WT_ROLLBACK: + return (WT_ROLLBACK); default: testutil_die(ret, "read_row: read row %" PRIu64, keyno); } #ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) - return (0); - - /* - * In fixed length stores, zero values at the end of the key space are - * returned as not found. Treat this the same as a zero value in the - * key space, to match BDB's behavior. - */ - if (ret == WT_NOTFOUND && g.type == FIX) { - bitfield = 0; - value.data = &bitfield; - value.size = 1; - ret = 0; - } + return (ret); /* Retrieve the BDB value. */ { @@ -669,20 +896,20 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int notfound_err) /* Check for not-found status. */ if (notfound_chk("read_row", ret, notfound, keyno)) - return (0); + return (ret); /* Compare the two. */ - if (value.size != bdb_value.size || - memcmp(value.data, bdb_value.data, value.size) != 0) { + if (value->size != bdb_value.size || + memcmp(value->data, bdb_value.data, value->size) != 0) { fprintf(stderr, "read_row: value mismatch %" PRIu64 ":\n", keyno); print_item("bdb", &bdb_value); - print_item(" wt", &value); + print_item(" wt", value); testutil_die(0, NULL); } } #endif - return (0); + return (ret); } /* @@ -690,21 +917,19 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int notfound_err) * Read and verify the next/prev element in a row- or column-store file. */ static int -nextprev(WT_CURSOR *cursor, int next, int *notfoundp) +nextprev(WT_CURSOR *cursor, int next) { + WT_DECL_RET; WT_ITEM key, value; uint64_t keyno; - int ret; uint8_t bitfield; const char *which; + keyno = 0; which = next ? "next" : "prev"; - keyno = 0; - ret = next ? cursor->next(cursor) : cursor->prev(cursor); - if (ret == WT_ROLLBACK) - return (WT_ROLLBACK); - if (ret == 0) + switch (ret = (next ? cursor->next(cursor) : cursor->prev(cursor))) { + case 0: switch (g.type) { case FIX: if ((ret = cursor->get_key(cursor, &keyno)) == 0 && @@ -722,13 +947,20 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp) ret = cursor->get_value(cursor, &value); break; } - if (ret != 0 && ret != WT_NOTFOUND) + if (ret != 0) + testutil_die(ret, "nextprev: get_key/get_value"); + break; + case WT_NOTFOUND: + break; + case WT_ROLLBACK: + return (WT_ROLLBACK); + default: testutil_die(ret, "%s", which); - *notfoundp = (ret == WT_NOTFOUND); + } #ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) - return (0); + return (ret); { WT_ITEM bdb_key, bdb_value; @@ -743,7 +975,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp) &bdb_value.data, &bdb_value.size, ¬found); if (notfound_chk( next ? "nextprev(next)" : "nextprev(prev)", ret, notfound, keyno)) - return (0); + return (ret); /* Compare the two. */ if (g.type == ROW) { @@ -794,7 +1026,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp) } } #endif - return (0); + return (ret); } /* @@ -802,43 +1034,38 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp) * Update a row in a row-store file. */ static int -row_update(TINFO *tinfo, - WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +row_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) { + WT_DECL_RET; WT_SESSION *session; - int ret; session = cursor->session; - key_gen((uint8_t *)key->data, &key->size, keyno); - val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, keyno); - /* Log the operation */ if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s{%.*s}\n%-10s{%.*s}", - "putK", (int)key->size, (char *)key->data, - "putV", (int)value->size, (char *)value->data); + "%-10s{%.*s}, {%.*s}", + "put", + (int)key->size, key->data, (int)value->size, value->data); cursor->set_key(cursor, key); cursor->set_value(cursor, value); - ret = cursor->update(cursor); - if (ret == WT_ROLLBACK) + switch (ret = cursor->update(cursor)) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: return (WT_ROLLBACK); - if (ret != 0 && ret != WT_NOTFOUND) + default: testutil_die(ret, "row_update: update row %" PRIu64 " by key", keyno); + } #ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) return (0); - { - int notfound; - - bdb_update(key->data, key->size, value->data, value->size, ¬found); - (void)notfound_chk("row_update", ret, notfound, keyno); - } + bdb_update(key->data, key->size, value->data, value->size); #endif return (0); } @@ -848,16 +1075,13 @@ row_update(TINFO *tinfo, * Update a row in a column-store file. */ static int -col_update(TINFO *tinfo, - WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +col_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) { + WT_DECL_RET; WT_SESSION *session; - int ret; session = cursor->session; - val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, keyno); - /* Log the operation */ if (g.logging == LOG_OPS) { if (g.type == FIX) @@ -877,23 +1101,22 @@ col_update(TINFO *tinfo, cursor->set_value(cursor, *(uint8_t *)value->data); else cursor->set_value(cursor, value); - ret = cursor->update(cursor); - if (ret == WT_ROLLBACK) + switch (ret = cursor->update(cursor)) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: return (WT_ROLLBACK); - if (ret != 0 && ret != WT_NOTFOUND) + default: testutil_die(ret, "col_update: %" PRIu64, keyno); + } #ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) return (0); - { - int notfound; - - key_gen((uint8_t *)key->data, &key->size, keyno); - bdb_update(key->data, key->size, value->data, value->size, ¬found); - (void)notfound_chk("col_update", ret, notfound, keyno); - } + key_gen(key, keyno); + bdb_update(key->data, key->size, value->data, value->size); #else (void)key; /* [-Wunused-variable] */ #endif @@ -912,8 +1135,7 @@ table_append_init(void) g.append_cnt = 0; free(g.append); - if ((g.append = calloc(g.append_max, sizeof(uint64_t))) == NULL) - testutil_die(errno, "calloc"); + g.append = dcalloc(g.append_max, sizeof(uint64_t)); } /* @@ -1005,43 +1227,38 @@ table_append(uint64_t keyno) * Insert a row in a row-store file. */ static int -row_insert(TINFO *tinfo, - WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +row_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) { + WT_DECL_RET; WT_SESSION *session; - int ret; session = cursor->session; - key_gen_insert(&tinfo->rnd, (uint8_t *)key->data, &key->size, keyno); - val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, keyno); - /* Log the operation */ if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s{%.*s}\n%-10s{%.*s}", - "insertK", (int)key->size, (char *)key->data, - "insertV", (int)value->size, (char *)value->data); + "%-10s{%.*s}, {%.*s}", + "insert", + (int)key->size, key->data, (int)value->size, value->data); cursor->set_key(cursor, key); cursor->set_value(cursor, value); - ret = cursor->insert(cursor); - if (ret == WT_ROLLBACK) + switch (ret = cursor->insert(cursor)) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: return (WT_ROLLBACK); - if (ret != 0 && ret != WT_NOTFOUND) + default: testutil_die(ret, "row_insert: insert row %" PRIu64 " by key", keyno); + } #ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) return (0); - { - int notfound; - - bdb_update(key->data, key->size, value->data, value->size, ¬found); - (void)notfound_chk("row_insert", ret, notfound, keyno); - } + bdb_update(key->data, key->size, value->data, value->size); #endif return (0); } @@ -1051,24 +1268,25 @@ row_insert(TINFO *tinfo, * Insert an element in a column-store file. */ static int -col_insert(TINFO *tinfo, - WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) +col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) { + WT_DECL_RET; WT_SESSION *session; uint64_t keyno; - int ret; session = cursor->session; - val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, g.rows + 1); - if (g.type == FIX) cursor->set_value(cursor, *(uint8_t *)value->data); else cursor->set_value(cursor, value); - if ((ret = cursor->insert(cursor)) != 0) { - if (ret == WT_ROLLBACK) - return (WT_ROLLBACK); + switch (ret = cursor->insert(cursor)) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + default: testutil_die(ret, "cursor.insert"); } testutil_check(cursor->get_key(cursor, &keyno)); @@ -1093,12 +1311,8 @@ col_insert(TINFO *tinfo, if (!SINGLETHREADED) return (0); - { - int notfound; - - key_gen((uint8_t *)key->data, &key->size, keyno); - bdb_update(key->data, key->size, value->data, value->size, ¬found); - } + key_gen(key, keyno); + bdb_update(key->data, key->size, value->data, value->size); #else (void)key; /* [-Wunused-variable] */ #endif @@ -1110,14 +1324,14 @@ col_insert(TINFO *tinfo, * Remove an row from a row-store file. */ static int -row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) +row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) { + WT_DECL_RET; WT_SESSION *session; - int ret; session = cursor->session; - key_gen((uint8_t *)key->data, &key->size, keyno); + key_gen(key, keyno); /* Log the operation */ if (g.logging == LOG_OPS) @@ -1128,16 +1342,20 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) /* We use the cursor in overwrite mode, check for existence. */ if ((ret = cursor->search(cursor)) == 0) ret = cursor->remove(cursor); - if (ret == WT_ROLLBACK) + switch (ret) { + case 0: + case WT_NOTFOUND: + break; + case WT_ROLLBACK: return (WT_ROLLBACK); - if (ret != 0 && ret != WT_NOTFOUND) + default: testutil_die(ret, "row_remove: remove %" PRIu64 " by key", keyno); - *notfoundp = (ret == WT_NOTFOUND); + } #ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) - return (0); + return (ret); { int notfound; @@ -1148,7 +1366,7 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) #else (void)key; /* [-Wunused-variable] */ #endif - return (0); + return (ret); } /* @@ -1156,10 +1374,10 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) * Remove a row from a column-store file. */ static int -col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) +col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) { + WT_DECL_RET; WT_SESSION *session; - int ret; session = cursor->session; @@ -1172,35 +1390,38 @@ col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) /* We use the cursor in overwrite mode, check for existence. */ if ((ret = cursor->search(cursor)) == 0) ret = cursor->remove(cursor); - if (ret == WT_ROLLBACK) + switch (ret) { + case 0: + case WT_NOTFOUND: + break; + case WT_ROLLBACK: return (WT_ROLLBACK); - if (ret != 0 && ret != WT_NOTFOUND) + default: testutil_die(ret, "col_remove: remove %" PRIu64 " by key", keyno); - *notfoundp = (ret == WT_NOTFOUND); + } #ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) - return (0); - - { - int notfound; + return (ret); /* * Deleting a fixed-length item is the same as setting the bits to 0; * do the same thing for the BDB store. */ if (g.type == FIX) { - key_gen((uint8_t *)key->data, &key->size, keyno); - bdb_update(key->data, key->size, "\0", 1, ¬found); - } else + key_gen(key, keyno); + bdb_update(key->data, key->size, "\0", 1); + } else { + int notfound; + bdb_remove(keyno, ¬found); - (void)notfound_chk("col_remove", ret, notfound, keyno); + (void)notfound_chk("col_remove", ret, notfound, keyno); } #else (void)key; /* [-Wunused-variable] */ #endif - return (0); + return (ret); } #ifdef HAVE_BERKELEY_DB @@ -1244,7 +1465,7 @@ print_item(const char *tag, WT_ITEM *item) static const char hex[] = "0123456789abcdef"; const uint8_t *data; size_t size; - int ch; + u_char ch; data = item->data; size = item->size; @@ -1255,8 +1476,8 @@ print_item(const char *tag, WT_ITEM *item) else for (; size > 0; --size, ++data) { ch = data[0]; - if (isprint(ch)) - fprintf(stderr, "%c", ch); + if (__wt_isprint(ch)) + fprintf(stderr, "%c", (int)ch); else fprintf(stderr, "%x%x", hex[(data[0] & 0xf0) >> 4], diff --git a/src/third_party/wiredtiger/test/format/salvage.c b/src/third_party/wiredtiger/test/format/salvage.c index 526e1563390..8274c556364 100644 --- a/src/third_party/wiredtiger/test/format/salvage.c +++ b/src/third_party/wiredtiger/test/format/salvage.c @@ -36,8 +36,8 @@ static void salvage(void) { WT_CONNECTION *conn; + WT_DECL_RET; WT_SESSION *session; - int ret; conn = g.wts_conn; track("salvage", 0ULL, NULL); @@ -141,7 +141,7 @@ found: if (fstat(fd, &sb) == -1) void wts_salvage(void) { - int ret; + WT_DECL_RET; /* Some data-sources don't support salvage. */ if (DATASOURCE("helium") || DATASOURCE("kvsbdb")) @@ -158,7 +158,7 @@ wts_salvage(void) testutil_die(ret, "salvage copy step failed"); /* Salvage, then verify. */ - wts_open(g.home, 1, &g.wts_conn); + wts_open(g.home, true, &g.wts_conn); salvage(); wts_verify("post-salvage verify"); wts_close(); @@ -174,7 +174,7 @@ wts_salvage(void) /* Corrupt the file randomly, salvage, then verify. */ if (corrupt()) { - wts_open(g.home, 1, &g.wts_conn); + wts_open(g.home, true, &g.wts_conn); salvage(); wts_verify("post-corrupt-salvage verify"); wts_close(); diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c index 28c22e23cb8..2eb2b078804 100644 --- a/src/third_party/wiredtiger/test/format/t.c +++ b/src/third_party/wiredtiger/test/format/t.c @@ -32,7 +32,8 @@ GLOBAL g; static void format_die(void); static void startup(void); -static void usage(void); +static void usage(void) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); extern int __wt_optind; extern char *__wt_optarg; @@ -181,6 +182,7 @@ main(int argc, char *argv[]) */ testutil_check(pthread_rwlock_init(&g.append_lock, NULL)); testutil_check(pthread_rwlock_init(&g.backup_lock, NULL)); + testutil_check(pthread_rwlock_init(&g.checkpoint_lock, NULL)); testutil_check(pthread_rwlock_init(&g.death_lock, NULL)); printf("%s: process %" PRIdMAX "\n", g.progname, (intmax_t)getpid()); @@ -198,8 +200,8 @@ main(int argc, char *argv[]) if (SINGLETHREADED) bdb_open(); /* Initial file config */ #endif - wts_open(g.home, 1, &g.wts_conn); - wts_create(); + wts_open(g.home, true, &g.wts_conn); + wts_init(); wts_load(); /* Load initial records */ wts_verify("post-bulk verify"); /* Verify */ @@ -275,6 +277,8 @@ main(int argc, char *argv[]) testutil_check(pthread_rwlock_destroy(&g.append_lock)); testutil_check(pthread_rwlock_destroy(&g.backup_lock)); + testutil_check(pthread_rwlock_destroy(&g.checkpoint_lock)); + testutil_check(pthread_rwlock_destroy(&g.death_lock)); config_clear(); @@ -288,7 +292,7 @@ main(int argc, char *argv[]) static void startup(void) { - int ret; + WT_DECL_RET; /* Flush/close any logging information. */ fclose_and_clear(&g.logfp); diff --git a/src/third_party/wiredtiger/test/format/util.c b/src/third_party/wiredtiger/test/format/util.c index 2e4c869366c..cebe2153b3e 100644 --- a/src/third_party/wiredtiger/test/format/util.c +++ b/src/third_party/wiredtiger/test/format/util.c @@ -32,56 +32,11 @@ #define MAX(a, b) (((a) > (b)) ? (a) : (b)) #endif -/* - * dmalloc -- - * Call malloc, dying on failure. - */ -void * -dmalloc(size_t len) -{ - void *p; - - if ((p = malloc(len)) == NULL) - testutil_die(errno, "malloc"); - return (p); -} - -/* - * dstrdup -- - * Call strdup, dying on failure. - */ -char * -dstrdup(const char *str) -{ - char *p; - - if ((p = strdup(str)) == NULL) - testutil_die(errno, "strdup"); - return (p); -} - -static inline uint32_t -kv_len(WT_RAND_STATE *rnd, uint64_t keyno, uint32_t min, uint32_t max) -{ - /* - * Focus on relatively small key/value items, admitting the possibility - * of larger items. Pick a size close to the minimum most of the time, - * only create a larger item 1 in 20 times, and a really big item 1 in - * 1000 times. (Configuration can force large key/value minimum sizes, - * where every key/value item is an overflow.) - */ - if (keyno % 1000 == 0 && max < KILOBYTE(80)) { - min = KILOBYTE(80); - max = KILOBYTE(100); - } else if (keyno % 20 != 0 && max > min + 20) - max = min + 20; - return (mmrand(rnd, min, max)); -} - void key_len_setup(void) { size_t i; + uint32_t max; /* * The key is a variable length item with a leading 10-digit value. @@ -91,72 +46,113 @@ key_len_setup(void) * the pre-loaded lengths. * * Fill in the random key lengths. + * + * Focus on relatively small items, admitting the possibility of larger + * items. Pick a size close to the minimum most of the time, only create + * a larger item 1 in 20 times. */ - for (i = 0; i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i) - g.key_rand_len[i] = - kv_len(NULL, (uint64_t)i, g.c_key_min, g.c_key_max); + for (i = 0; + i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i) { + max = g.c_key_max; + if (i % 20 != 0 && max > g.c_key_min + 20) + max = g.c_key_min + 20; + g.key_rand_len[i] = mmrand(NULL, g.c_key_min, max); + } } void -key_gen_setup(uint8_t **keyp) +key_gen_setup(WT_ITEM *key) { - uint8_t *key; size_t i, len; - - *keyp = NULL; + char *p; len = MAX(KILOBYTE(100), g.c_key_max); - key = dmalloc(len); + p = dmalloc(len); for (i = 0; i < len; ++i) - key[i] = (uint8_t)("abcdefghijklmnopqrstuvwxyz"[i % 26]); - *keyp = key; + p[i] = "abcdefghijklmnopqrstuvwxyz"[i % 26]; + + key->mem = p; + key->memsize = len; + key->data = key->mem; + key->size = 0; } static void -key_gen_common(uint8_t *key, size_t *sizep, uint64_t keyno, int suffix) +key_gen_common(WT_ITEM *key, uint64_t keyno, int suffix) { int len; + char *p; + + p = key->mem; /* * The key always starts with a 10-digit string (the specified cnt) * followed by two digits, a random number between 1 and 15 if it's * an insert, otherwise 00. */ - len = sprintf((char *)key, "%010" PRIu64 ".%02d", keyno, suffix); + len = sprintf(p, "%010" PRIu64 ".%02d", keyno, suffix); /* - * In a column-store, the key is only used for BDB, and so it doesn't - * need a random length. + * In a column-store, the key is only used for Berkeley DB inserts, + * and so it doesn't need a random length. */ if (g.type == ROW) { - key[len] = '/'; - len = (int)g.key_rand_len[keyno % - (sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]))]; + p[len] = '/'; + + /* + * Because we're doing table lookup for key sizes, we weren't + * able to set really big keys sizes in the table, the table + * isn't big enough to keep our hash from selecting too many + * big keys and blowing out the cache. Handle that here, use a + * really big key 1 in 2500 times. + */ + len = keyno % 2500 == 0 && g.c_key_max < KILOBYTE(80) ? + KILOBYTE(80) : + (int)g.key_rand_len[keyno % WT_ELEMENTS(g.key_rand_len)]; } - *sizep = (size_t)len; + + key->data = key->mem; + key->size = (size_t)len; } void -key_gen(uint8_t *key, size_t *sizep, uint64_t keyno) +key_gen(WT_ITEM *key, uint64_t keyno) { - key_gen_common(key, sizep, keyno, 0); + key_gen_common(key, keyno, 0); } void -key_gen_insert(WT_RAND_STATE *rnd, uint8_t *key, size_t *sizep, uint64_t keyno) +key_gen_insert(WT_RAND_STATE *rnd, WT_ITEM *key, uint64_t keyno) { - key_gen_common(key, sizep, keyno, (int)mmrand(rnd, 1, 15)); + key_gen_common(key, keyno, (int)mmrand(rnd, 1, 15)); } static uint32_t val_dup_data_len; /* Length of duplicate data items */ +static inline uint32_t +value_len(WT_RAND_STATE *rnd, uint64_t keyno, uint32_t min, uint32_t max) +{ + /* + * Focus on relatively small items, admitting the possibility of larger + * items. Pick a size close to the minimum most of the time, only create + * a larger item 1 in 20 times, and a really big item 1 in somewhere + * around 2500 items. + */ + if (keyno % 2500 == 0 && max < KILOBYTE(80)) { + min = KILOBYTE(80); + max = KILOBYTE(100); + } else if (keyno % 20 != 0 && max > min + 20) + max = min + 20; + return (mmrand(rnd, min, max)); +} + void -val_gen_setup(WT_RAND_STATE *rnd, uint8_t **valp) +val_gen_setup(WT_RAND_STATE *rnd, WT_ITEM *value) { - uint8_t *val; size_t i, len; + char *p; - *valp = NULL; + memset(value, 0, sizeof(WT_ITEM)); /* * Set initial buffer contents to recognizable text. @@ -166,35 +162,43 @@ val_gen_setup(WT_RAND_STATE *rnd, uint8_t **valp) * data for column-store run-length encoded files. */ len = MAX(KILOBYTE(100), g.c_value_max) + 20; - val = dmalloc(len); + p = dmalloc(len); for (i = 0; i < len; ++i) - val[i] = (uint8_t)("ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % 26]); + p[i] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % 26]; - *valp = val; + value->mem = p; + value->memsize = len; + value->data = value->mem; + value->size = 0; - val_dup_data_len = kv_len(rnd, + val_dup_data_len = value_len(rnd, (uint64_t)mmrand(rnd, 1, 20), g.c_value_min, g.c_value_max); } void -val_gen(WT_RAND_STATE *rnd, uint8_t *val, size_t *sizep, uint64_t keyno) +val_gen(WT_RAND_STATE *rnd, WT_ITEM *value, uint64_t keyno) { + char *p; + + p = value->mem; + value->data = value->mem; + /* * Fixed-length records: take the low N bits from the last digit of * the record number. */ if (g.type == FIX) { switch (g.c_bitcnt) { - case 8: val[0] = (uint8_t)mmrand(rnd, 1, 0xff); break; - case 7: val[0] = (uint8_t)mmrand(rnd, 1, 0x7f); break; - case 6: val[0] = (uint8_t)mmrand(rnd, 1, 0x3f); break; - case 5: val[0] = (uint8_t)mmrand(rnd, 1, 0x1f); break; - case 4: val[0] = (uint8_t)mmrand(rnd, 1, 0x0f); break; - case 3: val[0] = (uint8_t)mmrand(rnd, 1, 0x07); break; - case 2: val[0] = (uint8_t)mmrand(rnd, 1, 0x03); break; - case 1: val[0] = 1; break; + case 8: p[0] = (char)mmrand(rnd, 1, 0xff); break; + case 7: p[0] = (char)mmrand(rnd, 1, 0x7f); break; + case 6: p[0] = (char)mmrand(rnd, 1, 0x3f); break; + case 5: p[0] = (char)mmrand(rnd, 1, 0x1f); break; + case 4: p[0] = (char)mmrand(rnd, 1, 0x0f); break; + case 3: p[0] = (char)mmrand(rnd, 1, 0x07); break; + case 2: p[0] = (char)mmrand(rnd, 1, 0x03); break; + case 1: p[0] = 1; break; } - *sizep = 1; + value->size = 1; return; } @@ -203,29 +207,24 @@ val_gen(WT_RAND_STATE *rnd, uint8_t *val, size_t *sizep, uint64_t keyno) * test that by inserting a zero-length data item every so often. */ if (keyno % 63 == 0) { - val[0] = '\0'; - *sizep = 0; + p[0] = '\0'; + value->size = 0; return; } /* - * Start the data with a 10-digit number. - * - * For row and non-repeated variable-length column-stores, change the - * leading number to ensure every data item is unique. For repeated - * variable-length column-stores (that is, to test run-length encoding), - * use the same data value all the time. + * Data items have unique leading numbers by default and random lengths; + * variable-length column-stores use a duplicate data value to test RLE. */ - if ((g.type == ROW || g.type == VAR) && - g.c_repeat_data_pct != 0 && - mmrand(rnd, 1, 100) < g.c_repeat_data_pct) { - (void)strcpy((char *)val, "DUPLICATEV"); - val[10] = '/'; - *sizep = val_dup_data_len; + if (g.type == VAR && mmrand(rnd, 1, 100) < g.c_repeat_data_pct) { + (void)strcpy(p, "DUPLICATEV"); + p[10] = '/'; + value->size = val_dup_data_len; } else { - (void)sprintf((char *)val, "%010" PRIu64, keyno); - val[10] = '/'; - *sizep = kv_len(rnd, keyno, g.c_value_min, g.c_value_max); + (void)sprintf(p, "%010" PRIu64, keyno); + p[10] = '/'; + value->size = + value_len(rnd, keyno, g.c_value_min, g.c_value_max); } } @@ -305,15 +304,6 @@ path_setup(const char *home) g.home_stats = dmalloc(len); snprintf(g.home_stats, len, "%s/%s", g.home, "stats"); - /* Backup directory. */ - len = strlen(g.home) + strlen("BACKUP") + 2; - g.home_backup = dmalloc(len); - snprintf(g.home_backup, len, "%s/%s", g.home, "BACKUP"); - - len = strlen(g.home) + strlen("BACKUP2") + 2; - g.home_backup2 = dmalloc(len); - snprintf(g.home_backup2, len, "%s/%s", g.home, "BACKUP2"); - /* BDB directory. */ len = strlen(g.home) + strlen("bdb") + 2; g.home_bdb = dmalloc(len); @@ -341,18 +331,27 @@ path_setup(const char *home) g.home_init = dmalloc(len); snprintf(g.home_init, len, CMD, g.home, g.home, g.home); - /* Backup directory initialize command, remove and re-create it. */ + /* Primary backup directory. */ + len = strlen(g.home) + strlen("BACKUP") + 2; + g.home_backup = dmalloc(len); + snprintf(g.home_backup, len, "%s/%s", g.home, "BACKUP"); + + /* + * Backup directory initialize command, remove and re-create the primary + * backup directory, plus a copy we maintain for recovery testing. + */ #undef CMD #ifdef _WIN32 -#define CMD "del /s /q >:nul && mkdir %s %s" +#define CMD "del %s/%s %s/%s /s /q >:nul && mkdir %s/%s %s/%s" #else -#define CMD "rm -rf %s %s && mkdir %s %s" +#define CMD "rm -rf %s/%s %s/%s && mkdir %s/%s %s/%s" #endif - len = strlen(g.home_backup) * 2 + - strlen(g.home_backup2) * 2 + strlen(CMD) + 1; + len = strlen(g.home) * 4 + + strlen("BACKUP") * 2 + strlen("BACKUP_COPY") * 2 + strlen(CMD) + 1; g.home_backup_init = dmalloc(len); - snprintf(g.home_backup_init, len, CMD, g.home_backup, g.home_backup2, - g.home_backup, g.home_backup2); + snprintf(g.home_backup_init, len, CMD, + g.home, "BACKUP", g.home, "BACKUP_COPY", + g.home, "BACKUP", g.home, "BACKUP_COPY"); /* * Salvage command, save the interesting files so we can replay the diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c index 81e484296e2..69195abc3d4 100644 --- a/src/third_party/wiredtiger/test/format/wts.c +++ b/src/third_party/wiredtiger/test/format/wts.c @@ -87,10 +87,10 @@ handle_message(WT_EVENT_HANDLER *handler, /* Write and flush the message so we're up-to-date on error. */ if (g.logfp == NULL) { - out = printf("%p:%s\n", session, message); + out = printf("%p:%s\n", (void *)session, message); (void)fflush(stdout); } else { - out = fprintf(g.logfp, "%p:%s\n", session, message); + out = fprintf(g.logfp, "%p:%s\n", (void *)session, message); (void)fflush(g.logfp); } return (out < 0 ? EIO : 0); @@ -126,10 +126,10 @@ static WT_EVENT_HANDLER event_handler = { * Open a connection to a WiredTiger database. */ void -wts_open(const char *home, int set_api, WT_CONNECTION **connp) +wts_open(const char *home, bool set_api, WT_CONNECTION **connp) { WT_CONNECTION *conn; - int ret; + WT_DECL_RET; char *config, *end, *p, helium_config[1024]; *connp = NULL; @@ -138,10 +138,11 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp) end = config + sizeof(g.wiredtiger_open_config); p += snprintf(p, REMAIN(p, end), - "create,checkpoint_sync=false,cache_size=%" PRIu32 "MB", - g.c_cache); - - p += snprintf(p, REMAIN(p, end), ",error_prefix=\"%s\"", g.progname); + "create=true," + "cache_size=%" PRIu32 "MB," + "checkpoint_sync=false," + "error_prefix=\"%s\"", + g.c_cache, g.progname); /* In-memory configuration. */ if (g.c_in_memory != 0) @@ -273,8 +274,13 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp) void wts_reopen(void) { + WT_CONNECTION *conn; + testutil_checkfmt(wiredtiger_open(g.home, &event_handler, - g.wiredtiger_open_config, &g.wts_conn), "%s", g.home); + g.wiredtiger_open_config, &conn), "%s", g.home); + + g.wt_api = conn->get_extension_api(conn); + g.wts_conn = conn; } /* @@ -282,7 +288,7 @@ wts_reopen(void) * Create the underlying store. */ void -wts_create(void) +wts_init(void) { WT_CONNECTION *conn; WT_SESSION *session; @@ -497,8 +503,8 @@ void wts_verify(const char *tag) { WT_CONNECTION *conn; + WT_DECL_RET; WT_SESSION *session; - int ret; if (g.c_verify == 0) return; @@ -531,12 +537,12 @@ wts_stats(void) { WT_CONNECTION *conn; WT_CURSOR *cursor; + WT_DECL_RET; WT_SESSION *session; FILE *fp; char *stat_name; const char *pval, *desc; uint64_t v; - int ret; /* Ignore statistics if they're not configured. */ if (g.c_statistics == 0) diff --git a/src/third_party/wiredtiger/test/huge/Makefile.am b/src/third_party/wiredtiger/test/huge/Makefile.am index bc76bdc0f3c..894bff5eace 100644 --- a/src/third_party/wiredtiger/test/huge/Makefile.am +++ b/src/third_party/wiredtiger/test/huge/Makefile.am @@ -1,13 +1,16 @@ -AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/test/utility +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility noinst_PROGRAMS = t t_SOURCES = huge.c -t_LDADD = $(top_builddir)/libwiredtiger.la + +t_LDADD = $(top_builddir)/test/utility/libtest_util.la +t_LDADD +=$(top_builddir)/libwiredtiger.la t_LDFLAGS = -static # Run this during a "make check" smoke test. TESTS = smoke.sh clean-local: - rm -rf WiredTiger* *.core __* + rm -rf WT_TEST *.core diff --git a/src/third_party/wiredtiger/test/huge/huge.c b/src/third_party/wiredtiger/test/huge/huge.c index ad19035ff99..3aa61a9048e 100644 --- a/src/third_party/wiredtiger/test/huge/huge.c +++ b/src/third_party/wiredtiger/test/huge/huge.c @@ -26,15 +26,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <errno.h> -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#ifndef _WIN32 -#include <unistd.h> -#endif - -#include "test_util.i" +#include "test_util.h" static char home[512]; /* Program working dir */ static const char *progname; /* Program name */ @@ -73,13 +65,13 @@ static size_t lengths[] = { 0 }; +static void usage(void) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void usage(void) { fprintf(stderr, "usage: %s [-s]\n", progname); - fprintf(stderr, "%s", - "\t-s small run, only test up to 1GB\n"); - + fprintf(stderr, "%s", "\t-s small run, only test up to 1GB\n"); exit(EXIT_FAILURE); } @@ -205,8 +197,7 @@ main(int argc, char *argv[]) /* Allocate a buffer to use. */ len = small ? ((size_t)SMALL_MAX) : ((size_t)4 * GIGABYTE); - if ((big = malloc(len)) == NULL) - testutil_die(errno, ""); + big = dmalloc(len); memset(big, 'a', len); /* Make sure the configurations all work. */ diff --git a/src/third_party/wiredtiger/test/manydbs/Makefile.am b/src/third_party/wiredtiger/test/manydbs/Makefile.am index 53559b25243..2bc47ad7f2e 100644 --- a/src/third_party/wiredtiger/test/manydbs/Makefile.am +++ b/src/third_party/wiredtiger/test/manydbs/Makefile.am @@ -1,13 +1,16 @@ -AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/test/utility +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility noinst_PROGRAMS = t t_SOURCES = manydbs.c -t_LDADD = $(top_builddir)/libwiredtiger.la + +t_LDADD = $(top_builddir)/test/utility/libtest_util.la +t_LDADD +=$(top_builddir)/libwiredtiger.la t_LDFLAGS = -static # Run this during a "make check" smoke test. TESTS = smoke.sh clean-local: - rm -rf WiredTiger* *.core __* + rm -rf WT_TEST *.core diff --git a/src/third_party/wiredtiger/test/manydbs/manydbs.c b/src/third_party/wiredtiger/test/manydbs/manydbs.c index 1d3412a7b06..e485e73067f 100644 --- a/src/third_party/wiredtiger/test/manydbs/manydbs.c +++ b/src/third_party/wiredtiger/test/manydbs/manydbs.c @@ -26,22 +26,10 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <sys/wait.h> -#include <errno.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#ifndef _WIN32 -#include <unistd.h> -#endif - -#include <wiredtiger.h> - -#include "test_util.i" +#include "test_util.h" #define HOME_SIZE 512 -#define HOME_BASE "WT_HOME" +#define HOME_BASE "WT_TEST" static char home[HOME_SIZE]; /* Base home directory */ static char hometmp[HOME_SIZE]; /* Each conn home directory */ static const char *progname; /* Program name */ @@ -67,6 +55,8 @@ static const char * const uri = "table:main"; #define MAX_KV 100 #define MAX_VAL 128 +static void usage(void) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void usage(void) { @@ -80,10 +70,10 @@ extern char *__wt_optarg; void (*custom_die)(void) = NULL; -WT_CONNECTION **connections = NULL; -WT_CURSOR **cursors = NULL; -WT_RAND_STATE rnd; -WT_SESSION **sessions = NULL; +static WT_CONNECTION **connections = NULL; +static WT_CURSOR **cursors = NULL; +static WT_RAND_STATE rnd; +static WT_SESSION **sessions = NULL; static int get_stat(WT_SESSION *stat_session, int stat_field, uint64_t *valuep) @@ -172,17 +162,10 @@ main(int argc, char *argv[]) * Allocate arrays for connection handles, sessions, statistics * cursors and, if needed, data cursors. */ - if ((connections = calloc( - (size_t)dbs, sizeof(WT_CONNECTION *))) == NULL) - testutil_die(ENOMEM, "connection array malloc"); - if ((sessions = calloc( - (size_t)dbs, sizeof(WT_SESSION *))) == NULL) - testutil_die(ENOMEM, "session array malloc"); - if ((cond_reset_orig = calloc((size_t)dbs, sizeof(uint64_t))) == NULL) - testutil_die(ENOMEM, "orig stat malloc"); - if (!idle && ((cursors = calloc( - (size_t)dbs, sizeof(WT_CURSOR *))) == NULL)) - testutil_die(ENOMEM, "cursor array malloc"); + connections = dcalloc((size_t)dbs, sizeof(WT_CONNECTION *)); + sessions = dcalloc((size_t)dbs, sizeof(WT_SESSION *)); + cond_reset_orig = dcalloc((size_t)dbs, sizeof(uint64_t)); + cursors = idle ? NULL : dcalloc((size_t)dbs, sizeof(WT_CURSOR *)); memset(cmd, 0, sizeof(cmd)); /* * Set up all the directory names. @@ -257,8 +240,7 @@ main(int argc, char *argv[]) free(connections); free(sessions); free(cond_reset_orig); - if (!idle) - free(cursors); + free(cursors); return (EXIT_SUCCESS); } diff --git a/src/third_party/wiredtiger/test/mciproject.yml b/src/third_party/wiredtiger/test/mciproject.yml index 9abdf23ec3b..3df1ce5805e 100644 --- a/src/third_party/wiredtiger/test/mciproject.yml +++ b/src/third_party/wiredtiger/test/mciproject.yml @@ -8,6 +8,14 @@ functions: command: git.get_project params: directory: wiredtiger + "fetch artifacts" : &fetch_artifacts + - command: s3.get + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz + bucket: build_external + extract_to: wiredtiger pre: - command: shell.exec @@ -21,7 +29,9 @@ post: rm -rf "wiredtiger" tasks: - - name: compile-posix +## Base compile task on posix flavours + - name: compile + depends_on: [] commands: - func: "fetch source" - command: git.apply_patch @@ -33,36 +43,49 @@ tasks: script: | set -o errexit set -o verbose - - ./build_posix/reconf - ${configure_env_vars|} ./configure --enable-diagnostic --enable-python --enable-zlib - ${make_command|make} ${smp_command|} - ${make_command|make} check - - ${test_env_vars|} python ./test/suite/run.py -v 2 - - name: compile-windows - commands: - - func: "fetch source" - - command: git.apply_patch + if [ "Windows_NT" = "$OS" ]; then + scons.bat --enable-python=c:\\swigwin-3.0.2\\swig.exe --enable-diagnostic --enable-verbose ${smp_command|} + else + ./build_posix/reconf + ${configure_env_vars|} ./configure --enable-diagnostic --enable-python --enable-zlib --enable-strict --enable-verbose + ${make_command|make} ${smp_command|} 2>&1 + ${make_command|make} check 2>&1 + fi + - command: archive.targz_pack params: - directory: wiredtiger + target: "wiredtiger.tgz" + source_dir: "wiredtiger" + include: + - "./**" + - command: s3.put + params: + aws_secret: ${aws_secret} + aws_key: ${aws_key} + local_file: wiredtiger.tgz + bucket: build_external + permissions: public-read + content_type: application/tar + display_name: Artifacts + remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz + + - name: unit-test + depends_on: + - name: compile + commands: + - func: "fetch artifacts" - command: shell.exec params: working_dir: "wiredtiger" script: | set -o errexit set -o verbose - - scons.bat --enable-python=c:\\swigwin-3.0.2\\swig.exe ${smp_command|} - - ${test_env_vars|} python ./test/suite/run.py -v 2 + ${test_env_vars|} python ./test/suite/run.py -v 2 ${smp_command|} 2>&1 - name: compile-windows-alt + depends_on: + - name: compile commands: - - func: "fetch source" - - command: git.apply_patch - params: - directory: wiredtiger + - func: "fetch artifacts" - command: shell.exec params: working_dir: "wiredtiger" @@ -72,22 +95,22 @@ tasks: scons.bat ${smp_command|} "CFLAGS=/Gv /wd4090 /wd4996 /we4047 /we4024 /TC /we4100" wiredtiger.dll libwiredtiger.lib - - name: fops-windows + - name: fops + depends_on: + - name: compile commands: - - func: "fetch source" - - command: git.apply_patch - params: - directory: wiredtiger + - func: "fetch artifacts" - command: shell.exec params: working_dir: "wiredtiger" script: | set -o errexit set -o verbose - - scons.bat --enable-python=c:\\swigwin-3.0.2\\swig.exe ${smp_command|} - - cmd.exe /c t_fops.exe + if [ "Windows_NT" = "$OS" ]; then + cmd.exe /c t_fops.exe + else + ./test/fops/t + fi buildvariants: - name: ubuntu1404 @@ -95,11 +118,14 @@ buildvariants: run_on: - ubuntu1404-test expansions: - test_env_vars: LD_LIBRARY_PATH=.libs - smp_command: -j$(grep -c ^processor /proc/cpuinfo) + # It's ugly, but we need the absolute path here, not the relative + test_env_vars: LD_LIBRARY_PATH=`pwd`/.libs + smp_command: -j $(grep -c ^processor /proc/cpuinfo) configure_env_vars: CC=/opt/mongodbtoolchain/bin/gcc CXX=/opt/mongodbtoolchain/bin/g++ tasks: - - name: compile-posix + - name: compile + - name: unit-test + - name: fops - name: solaris display_name: Solaris @@ -107,31 +133,34 @@ buildvariants: - solaris expansions: make_command: PATH=/opt/mongodbtoolchain/bin:$PATH gmake - test_env_vars: LD_LIBRARY_PATH=.libs - smp_command: -j$(kstat cpu | sort -u | grep -c "^module") + test_env_vars: LD_LIBRARY_PATH=`pwd`/.libs + smp_command: -j $(kstat cpu | sort -u | grep -c "^module") configure_env_vars: PATH=/opt/mongodbtoolchain/bin:$PATH CFLAGS="-m64" tasks: - - name: compile-posix + - name: compile + - name: unit-test + - name: fops - name: windows-64 display_name: Windows 64-bit run_on: - windows-64-vs2013-test - expansions: - smp_command: -j$(grep -c ^processor /proc/cpuinfo) tasks: - - name: compile-windows + - name: compile - name: compile-windows-alt - - name: fops-windows + - name: unit-test + - name: fops - name: osx-1010 display_name: OS X 10.10 run_on: - osx-1010 expansions: - smp_command: -j$(sysctl -n hw.logicalcpu) + smp_command: -j $(sysctl -n hw.logicalcpu) configure_env_vars: PATH=/opt/local/bin:$PATH make_command: PATH=/opt/local/bin:$PATH ARCHFLAGS=-Wno-error=unused-command-line-argument-hard-error-in-future make - test_env_vars: DYLD_LIBRARY_PATH=.libs + test_env_vars: DYLD_LIBRARY_PATH=`pwd`/.libs tasks: - - name: compile-posix + - name: compile + - name: unit-test + - name: fops diff --git a/src/third_party/wiredtiger/test/packing/Makefile.am b/src/third_party/wiredtiger/test/packing/Makefile.am index a9e7e16e5c2..c9128100cc3 100644 --- a/src/third_party/wiredtiger/test/packing/Makefile.am +++ b/src/third_party/wiredtiger/test/packing/Makefile.am @@ -1,7 +1,11 @@ -AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility noinst_PROGRAMS = intpack-test intpack-test2 intpack-test3 packing-test -LDADD = $(top_builddir)/libwiredtiger.la + +LDADD = $(top_builddir)/test/utility/libtest_util.la +LDADD +=$(top_builddir)/libwiredtiger.la LDFLAGS = -static TESTS = smoke.sh diff --git a/src/third_party/wiredtiger/test/packing/intpack-test.c b/src/third_party/wiredtiger/test/packing/intpack-test.c index 08cc3807725..76851b38e35 100644 --- a/src/third_party/wiredtiger/test/packing/intpack-test.c +++ b/src/third_party/wiredtiger/test/packing/intpack-test.c @@ -26,9 +26,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "wt_internal.h" /* For __wt_XXX */ +#include "test_util.h" -#include <assert.h> +void (*custom_die)(void) = NULL; int main(void) @@ -47,9 +47,10 @@ main(void) #if 1 p = buf; - assert(__wt_vpack_uint(&p, sizeof(buf), r) == 0); + testutil_check(__wt_vpack_uint(&p, sizeof(buf), r)); cp = buf; - assert(__wt_vunpack_uint(&cp, sizeof(buf), &r2) == 0); + testutil_check( + __wt_vunpack_uint(&cp, sizeof(buf), &r2)); #else /* * Note: use memmove for comparison because GCC does diff --git a/src/third_party/wiredtiger/test/packing/intpack-test2.c b/src/third_party/wiredtiger/test/packing/intpack-test2.c index 7555d2724e7..a7d31329069 100644 --- a/src/third_party/wiredtiger/test/packing/intpack-test2.c +++ b/src/third_party/wiredtiger/test/packing/intpack-test2.c @@ -26,9 +26,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "wt_internal.h" /* For __wt_XXX */ +#include "test_util.h" -#include <assert.h> +void (*custom_die)(void) = NULL; int main(void) @@ -38,14 +38,15 @@ main(void) for (i = 1; i < 1LL << 60; i <<= 1) { end = buf; - assert(__wt_vpack_uint(&end, sizeof(buf), (uint64_t)i) == 0); + testutil_check( + __wt_vpack_uint(&end, sizeof(buf), (uint64_t)i)); printf("%" PRId64 " ", i); for (p = buf; p < end; p++) printf("%02x", *p); printf("\n"); end = buf; - assert(__wt_vpack_int(&end, sizeof(buf), -i) == 0); + testutil_check(__wt_vpack_int(&end, sizeof(buf), -i)); printf("%" PRId64 " ", -i); for (p = buf; p < end; p++) printf("%02x", *p); diff --git a/src/third_party/wiredtiger/test/packing/intpack-test3.c b/src/third_party/wiredtiger/test/packing/intpack-test3.c index 2ebc01f9e2e..aac0178578f 100644 --- a/src/third_party/wiredtiger/test/packing/intpack-test3.c +++ b/src/third_party/wiredtiger/test/packing/intpack-test3.c @@ -26,9 +26,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "wt_internal.h" /* For __wt_XXX */ +#include "test_util.h" -#include <assert.h> +void (*custom_die)(void) = NULL; void test_value(int64_t); void test_spread(int64_t, int64_t, int64_t); @@ -42,12 +42,14 @@ test_value(int64_t val) uint64_t uinput, uoutput; size_t used_len; + soutput = 0; /* -Werror=maybe-uninitialized */ sinput = val; + soutput = 0; /* Make GCC happy. */ p = buf; - assert(__wt_vpack_int(&p, sizeof(buf), sinput) == 0); + testutil_check(__wt_vpack_int(&p, sizeof(buf), sinput)); used_len = (size_t)(p - buf); cp = buf; - assert(__wt_vunpack_int(&cp, used_len, &soutput) == 0); + testutil_check(__wt_vunpack_int(&cp, used_len, &soutput)); /* Ensure we got the correct value back */ if (sinput != soutput) { fprintf(stderr, "mismatch %" PRIu64 ", %" PRIu64 "\n", @@ -69,10 +71,9 @@ test_value(int64_t val) uinput = (uint64_t)val; p = buf; - assert(__wt_vpack_uint(&p, sizeof(buf), uinput) == 0); + testutil_check(__wt_vpack_uint(&p, sizeof(buf), uinput)); cp = buf; - assert(__wt_vunpack_uint( - &cp, sizeof(buf), &uoutput) == 0); + testutil_check(__wt_vunpack_uint(&cp, sizeof(buf), &uoutput)); /* Ensure we got the correct value back */ if (sinput != soutput) { fprintf(stderr, "mismatch %" PRIu64 ", %" PRIu64 "\n", diff --git a/src/third_party/wiredtiger/test/packing/packing-test.c b/src/third_party/wiredtiger/test/packing/packing-test.c index 9b7105d7d4a..f251c17eb67 100644 --- a/src/third_party/wiredtiger/test/packing/packing-test.c +++ b/src/third_party/wiredtiger/test/packing/packing-test.c @@ -26,9 +26,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "wt_internal.h" /* For __wt_XXX */ +#include "test_util.h" -#include <assert.h> +void (*custom_die)(void) = NULL; static void check(const char *fmt, ...) @@ -40,13 +40,15 @@ check(const char *fmt, ...) len = 0; /* -Werror=maybe-uninitialized */ va_start(ap, fmt); - assert(__wt_struct_sizev(NULL, &len, fmt, ap) == 0); + testutil_check(__wt_struct_sizev(NULL, &len, fmt, ap)); va_end(ap); - assert(len > 0 && len < sizeof(buf)); + if (len < 1 || len >= sizeof(buf)) + testutil_die(EINVAL, + "Unexpected length from __wt_struct_sizev"); va_start(ap, fmt); - assert(__wt_struct_packv(NULL, buf, sizeof(buf), fmt, ap) == 0); + testutil_check(__wt_struct_packv(NULL, buf, sizeof(buf), fmt, ap)); va_end(ap); printf("%s ", fmt); diff --git a/src/third_party/wiredtiger/test/readonly/Makefile.am b/src/third_party/wiredtiger/test/readonly/Makefile.am index 3abcd2386a1..84092e76f02 100644 --- a/src/third_party/wiredtiger/test/readonly/Makefile.am +++ b/src/third_party/wiredtiger/test/readonly/Makefile.am @@ -1,13 +1,16 @@ -AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/test/utility +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility noinst_PROGRAMS = t t_SOURCES = readonly.c -t_LDADD = $(top_builddir)/libwiredtiger.la + +t_LDADD = $(top_builddir)/test/utility/libtest_util.la +t_LDADD +=$(top_builddir)/libwiredtiger.la t_LDFLAGS = -static # Run this during a "make check" smoke test. TESTS = smoke.sh clean-local: - rm -rf WT_RD* WiredTiger* *.core __* + rm -rf WT_RD* *.core diff --git a/src/third_party/wiredtiger/test/readonly/readonly.c b/src/third_party/wiredtiger/test/readonly/readonly.c index 41400da2605..31edc0d2a24 100644 --- a/src/third_party/wiredtiger/test/readonly/readonly.c +++ b/src/third_party/wiredtiger/test/readonly/readonly.c @@ -26,19 +26,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <sys/wait.h> -#include <errno.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#ifndef _WIN32 -#include <unistd.h> -#endif - -#include <wiredtiger.h> +#include "test_util.h" -#include "test_util.i" +#include <sys/wait.h> #define HOME_SIZE 512 static char home[HOME_SIZE]; /* Program working dir lock file */ @@ -67,6 +57,8 @@ static const char * const uri = "table:main"; #define OP_READ 0 #define OP_WRITE 1 +static void usage(void) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void usage(void) { @@ -129,6 +121,9 @@ run_child(const char *homedir, int op, int expect) * Child process opens both databases readonly. */ static void +open_dbs(int, const char *, const char *, + const char *, const char *) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); +static void open_dbs(int op, const char *dir, const char *dir_wr, const char *dir_rd, const char *dir_rd2) { diff --git a/src/third_party/wiredtiger/test/recovery/Makefile.am b/src/third_party/wiredtiger/test/recovery/Makefile.am index 35f8dd15823..19fc48dce47 100644 --- a/src/third_party/wiredtiger/test/recovery/Makefile.am +++ b/src/third_party/wiredtiger/test/recovery/Makefile.am @@ -1,13 +1,16 @@ -AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/test/utility +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility noinst_PROGRAMS = random-abort truncated-log random_abort_SOURCES = random-abort.c -random_abort_LDADD = $(top_builddir)/libwiredtiger.la +random_abort_LDADD = $(top_builddir)/test/utility/libtest_util.la +random_abort_LDADD +=$(top_builddir)/libwiredtiger.la random_abort_LDFLAGS = -static truncated_log_SOURCES = truncated-log.c -truncated_log_LDADD = $(top_builddir)/libwiredtiger.la +truncated_log_LDADD = $(top_builddir)/test/utility/libtest_util.la +truncated_log_LDADD +=$(top_builddir)/libwiredtiger.la truncated_log_LDFLAGS = -static # Run this during a "make check" smoke test. @@ -15,4 +18,4 @@ TESTS = $(noinst_PROGRAMS) LOG_COMPILER = $(TEST_WRAPPER) clean-local: - rm -rf WT_TEST* *.core __* + rm -rf WT_TEST.* *.core diff --git a/src/third_party/wiredtiger/test/recovery/random-abort.c b/src/third_party/wiredtiger/test/recovery/random-abort.c index 92f65c540cf..85629eddec4 100644 --- a/src/third_party/wiredtiger/test/recovery/random-abort.c +++ b/src/third_party/wiredtiger/test/recovery/random-abort.c @@ -26,19 +26,10 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include "test_util.h" + #include <sys/wait.h> -#include <errno.h> #include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#ifndef _WIN32 -#include <unistd.h> -#endif - -#include <wt_internal.h> - -#include "test_util.i" static char home[512]; /* Program working dir */ static const char *progname; /* Program name */ @@ -48,7 +39,7 @@ static const char * const uri = "table:main"; #define MIN_TH 5 #define MAX_TIME 40 #define MIN_TIME 10 -#define RECORDS_FILE "records-%u" +#define RECORDS_FILE "records-%" PRIu32 #define ENV_CONFIG \ "create,log=(file_max=10M,archive=false,enabled)," \ @@ -56,6 +47,8 @@ static const char * const uri = "table:main"; #define ENV_CONFIG_REC "log=(recover=on)" #define MAX_VAL 4096 +static void usage(void) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void usage(void) { @@ -69,10 +62,6 @@ typedef struct { uint32_t id; } WT_THREAD_DATA; -/* - * Child process creates the database and table, and then writes data into - * the table until it is killed by the parent. - */ static void * thread_run(void *arg) { @@ -104,7 +93,7 @@ thread_run(void *arg) /* * Set to no buffering. */ - __wt_stream_set_no_buffer(fp); + __wt_stream_set_line_buffer(fp); if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0) testutil_die(ret, "WT_CONNECTION:open_session"); if ((ret = @@ -128,7 +117,7 @@ thread_run(void *arg) if (fprintf(fp, "%" PRIu64 "\n", i) == -1) testutil_die(errno, "fprintf"); } - return (NULL); + /* NOTREACHED */ } /* @@ -147,8 +136,8 @@ fill_db(uint32_t nth) uint32_t i; int ret; - thr = calloc(nth, sizeof(pthread_t)); - td = calloc(nth, sizeof(WT_THREAD_DATA)); + thr = dcalloc(nth, sizeof(pthread_t)); + td = dcalloc(nth, sizeof(WT_THREAD_DATA)); if (chdir(home) != 0) testutil_die(errno, "Child chdir: %s", home); if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0) @@ -177,7 +166,7 @@ fill_db(uint32_t nth) * it is killed. */ for (i = 0; i < nth; ++i) - pthread_join(thr[i], NULL); + testutil_assert(pthread_join(thr[i], NULL) == 0); /* * NOTREACHED */ @@ -203,7 +192,7 @@ main(int argc, char *argv[]) uint32_t absent, count, i, nth, timeout; int ch, status, ret; pid_t pid; - bool rand_th, rand_time; + bool rand_th, rand_time, verify_only; const char *working_dir; char fname[64], kname[64]; @@ -215,9 +204,10 @@ main(int argc, char *argv[]) nth = MIN_TH; rand_th = rand_time = true; timeout = MIN_TIME; + verify_only = false; working_dir = "WT_TEST.random-abort"; - while ((ch = __wt_getopt(progname, argc, argv, "h:T:t:")) != EOF) + while ((ch = __wt_getopt(progname, argc, argv, "h:T:t:v")) != EOF) switch (ch) { case 'h': working_dir = __wt_optarg; @@ -230,6 +220,9 @@ main(int argc, char *argv[]) rand_time = false; timeout = (uint32_t)atoi(__wt_optarg); break; + case 'v': + verify_only = true; + break; default: usage(); } @@ -239,48 +232,62 @@ main(int argc, char *argv[]) usage(); testutil_work_dir_from_path(home, 512, working_dir); - testutil_make_work_dir(home); - - __wt_random_init_seed(NULL, &rnd); - if (rand_time) { - timeout = __wt_random(&rnd) % MAX_TIME; - if (timeout < MIN_TIME) - timeout = MIN_TIME; - } - if (rand_th) { - nth = __wt_random(&rnd) % MAX_TH; - if (nth < MIN_TH) - nth = MIN_TH; - } - printf("Parent: Create %u threads; sleep %" PRIu32 " seconds\n", - nth, timeout); /* - * Fork a child to insert as many items. We will then randomly - * kill the child, run recovery and make sure all items we wrote - * exist after recovery runs. + * If the user wants to verify they need to tell us how many threads + * there were so we can find the old record files. */ - if ((pid = fork()) < 0) - testutil_die(errno, "fork"); - - if (pid == 0) { /* child */ - fill_db(nth); - return (EXIT_SUCCESS); + if (verify_only && rand_th) { + fprintf(stderr, + "Verify option requires specifying number of threads\n"); + exit (EXIT_FAILURE); } + if (!verify_only) { + testutil_make_work_dir(home); + + testutil_assert(__wt_random_init_seed(NULL, &rnd) == 0); + if (rand_time) { + timeout = __wt_random(&rnd) % MAX_TIME; + if (timeout < MIN_TIME) + timeout = MIN_TIME; + } + if (rand_th) { + nth = __wt_random(&rnd) % MAX_TH; + if (nth < MIN_TH) + nth = MIN_TH; + } + printf("Parent: Create %" PRIu32 + " threads; sleep %" PRIu32 " seconds\n", nth, timeout); + /* + * Fork a child to insert as many items. We will then randomly + * kill the child, run recovery and make sure all items we wrote + * exist after recovery runs. + */ + if ((pid = fork()) < 0) + testutil_die(errno, "fork"); - /* parent */ - /* Sleep for the configured amount of time before killing the child. */ - sleep(timeout); + if (pid == 0) { /* child */ + fill_db(nth); + return (EXIT_SUCCESS); + } - /* - * !!! It should be plenty long enough to make sure more than one - * log file exists. If wanted, that check would be added here. - */ - printf("Kill child\n"); - if (kill(pid, SIGKILL) != 0) - testutil_die(errno, "kill"); - if (waitpid(pid, &status, 0) == -1) - testutil_die(errno, "waitpid"); + /* parent */ + /* + * Sleep for the configured amount of time before killing + * the child. + */ + sleep(timeout); + /* + * !!! It should be plenty long enough to make sure more than + * one log file exists. If wanted, that check would be added + * here. + */ + printf("Kill child\n"); + if (kill(pid, SIGKILL) != 0) + testutil_die(errno, "kill"); + if (waitpid(pid, &status, 0) == -1) + testutil_die(errno, "waitpid"); + } /* * !!! If we wanted to take a copy of the directory before recovery, * this is the place to do it. @@ -300,7 +307,8 @@ main(int argc, char *argv[]) for (i = 0; i < nth; ++i) { snprintf(fname, sizeof(fname), RECORDS_FILE, i); if ((fp = fopen(fname, "r")) == NULL) { - fprintf(stderr, "Failed to open %s. i %u\n", fname, i); + fprintf(stderr, + "Failed to open %s. i %" PRIu32 "\n", fname, i); testutil_die(errno, "fopen"); } @@ -309,7 +317,7 @@ main(int argc, char *argv[]) * in the table after recovery. Since we did write-no-sync, we * expect every key to have been recovered. */ - for (count = 0;; ++count) { + for (;; ++count) { ret = fscanf(fp, "%" SCNu64 "\n", &key); if (ret != EOF && ret != 1) testutil_die(errno, "fscanf"); @@ -320,7 +328,8 @@ main(int argc, char *argv[]) if ((ret = cursor->search(cursor)) != 0) { if (ret != WT_NOTFOUND) testutil_die(ret, "search"); - printf("no record with key %" PRIu64 "\n", key); + printf("%s: no record with key %" PRIu64 "\n", + fname, key); ++absent; } } diff --git a/src/third_party/wiredtiger/test/recovery/truncated-log.c b/src/third_party/wiredtiger/test/recovery/truncated-log.c index e099873e5b9..a7509c27566 100644 --- a/src/third_party/wiredtiger/test/recovery/truncated-log.c +++ b/src/third_party/wiredtiger/test/recovery/truncated-log.c @@ -26,23 +26,15 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include "test_util.h" + #include <sys/wait.h> -#include <errno.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#ifndef _WIN32 -#include <unistd.h> -#else + +#ifdef _WIN32 /* snprintf is not supported on <= VS2013 */ #define snprintf _snprintf #endif -#include <wiredtiger.h> - -#include "test_util.i" - static char home[512]; /* Program working dir */ static const char *progname; /* Program name */ static const char * const uri = "table:main"; @@ -58,6 +50,8 @@ static const char * const uri = "table:main"; #define K_SIZE 16 #define V_SIZE 256 +static void usage(void) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void usage(void) { @@ -69,6 +63,7 @@ usage(void) * Child process creates the database and table, and then writes data into * the table until it is killed by the parent. */ +static void fill_db(void)WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void fill_db(void) { diff --git a/src/third_party/wiredtiger/test/salvage/Makefile.am b/src/third_party/wiredtiger/test/salvage/Makefile.am index 3e686dd2951..a3c49b9c41a 100644 --- a/src/third_party/wiredtiger/test/salvage/Makefile.am +++ b/src/third_party/wiredtiger/test/salvage/Makefile.am @@ -1,9 +1,12 @@ -AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/test/utility +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility noinst_PROGRAMS = t t_SOURCES = salvage.c -t_LDADD = $(top_builddir)/libwiredtiger.la + +t_LDADD = $(top_builddir)/test/utility/libtest_util.la +t_LDADD +=$(top_builddir)/libwiredtiger.la t_LDFLAGS = -static # Run this during a "make check" smoke test. @@ -11,4 +14,4 @@ TESTS = $(noinst_PROGRAMS) LOG_COMPILER = $(TEST_WRAPPER) clean-local: - rm -rf WiredTiger* *.core __* + rm -rf WiredTiger* __slvg* *.core diff --git a/src/third_party/wiredtiger/test/salvage/salvage.c b/src/third_party/wiredtiger/test/salvage/salvage.c index a1517d70787..c3349188623 100644 --- a/src/third_party/wiredtiger/test/salvage/salvage.c +++ b/src/third_party/wiredtiger/test/salvage/salvage.c @@ -26,7 +26,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "test_util.i" +#include "test_util.h" #include <assert.h> @@ -159,7 +159,7 @@ int usage(void) { (void)fprintf(stderr, - "usage: %s [-v] [-r run] [-t fix|rle|var|row]\n", progname); + "usage: %s [-v] [-r run] [-t fix|var|row]\n", progname); return (EXIT_FAILURE); } @@ -170,7 +170,7 @@ run(int r) printf("\t%s: run %d\n", __wt_page_type_string(page_type), r); - CHECK(system("rm -f WiredTiger* __slvg.* __schema.*") == 0); + CHECK(system("rm -f WiredTiger* __slvg.*") == 0); CHECK((res_fp = fopen(RSLT, "w")) != NULL); /* @@ -701,7 +701,7 @@ print_res(int key, int value, int cnt) switch (page_type) { /* Print value */ case WT_PAGE_COL_FIX: ch = value & 0x7f; - if (isprint(ch)) { + if (__wt_isprint((u_char)ch)) { if (ch == '\\') fputc('\\', res_fp); fputc(ch, res_fp); diff --git a/src/third_party/wiredtiger/test/suite/run.py b/src/third_party/wiredtiger/test/suite/run.py index f7f0d1399ff..6e7421b8b96 100644 --- a/src/third_party/wiredtiger/test/suite/run.py +++ b/src/third_party/wiredtiger/test/suite/run.py @@ -51,7 +51,7 @@ elif os.path.isfile(os.path.join(wt_disttop, 'wt.exe')): wt_builddir = wt_disttop else: print 'Unable to find useable WiredTiger build' - sys.exit(False) + sys.exit(1) # Cannot import wiredtiger and supporting utils until we set up paths # We want our local tree in front of any installed versions of WiredTiger. @@ -241,7 +241,7 @@ if __name__ == '__main__': if option == '-dir' or option == 'D': if dirarg != None or len(args) == 0: usage() - sys.exit(False) + sys.exit(2) dirarg = args.pop(0) continue if option == '-debug' or option == 'd': @@ -252,14 +252,14 @@ if __name__ == '__main__': continue if option == '-help' or option == 'h': usage() - sys.exit(True) + sys.exit(0) if option == '-long' or option == 'l': longtest = True continue if option == '-parallel' or option == 'j': if parallel != 0 or len(args) == 0: usage() - sys.exit(False) + sys.exit(2) parallel = int(args.pop(0)) continue if option == '-preserve' or option == 'p': @@ -271,7 +271,7 @@ if __name__ == '__main__': if option == '-verbose' or option == 'v': if len(args) == 0: usage() - sys.exit(False) + sys.exit(2) verbose = int(args.pop(0)) if verbose > 3: verbose = 3 @@ -281,19 +281,19 @@ if __name__ == '__main__': if option == '-config' or option == 'c': if configfile != None or len(args) == 0: usage() - sys.exit(False) + sys.exit(2) configfile = args.pop(0) continue if option == '-configcreate' or option == 'C': if configfile != None or len(args) == 0: usage() - sys.exit(False) + sys.exit(2) configfile = args.pop(0) configwrite = True continue print 'unknown arg: ' + arg usage() - sys.exit(False) + sys.exit(2) testargs.append(arg) # All global variables should be set before any test classes are loaded. @@ -318,4 +318,4 @@ if __name__ == '__main__': pdb.set_trace() result = wttest.runsuite(tests, parallel) - sys.exit(not result.wasSuccessful()) + sys.exit(0 if result.wasSuccessful() else 1) diff --git a/src/third_party/wiredtiger/test/suite/suite_subprocess.py b/src/third_party/wiredtiger/test/suite/suite_subprocess.py index df89d82e4c9..c56c8d8e933 100644 --- a/src/third_party/wiredtiger/test/suite/suite_subprocess.py +++ b/src/third_party/wiredtiger/test/suite/suite_subprocess.py @@ -117,13 +117,12 @@ class suite_subprocess: print 'ERROR: ' + filename + ' should not be empty (this command expected error output)' self.assertNotEqual(filesize, 0, filename + ': expected to not be empty') - def runWt(self, args, infilename=None, outfilename=None, errfilename=None, reopensession=True): - """ - Run the 'wt' process - """ + # Run the wt utility. + def runWt(self, args, infilename=None, + outfilename=None, errfilename=None, reopensession=True, failure=False): - # we close the connection to guarantee everything is - # flushed, and that we can open it from another process + # Close the connection to guarantee everything is flushed, and that + # we can open it from another process. self.close_conn() wtoutname = outfilename or "wt.out" @@ -141,14 +140,26 @@ class suite_subprocess: infilepart = "<" + infilename + " " print str(procargs) print "*********************************************" - print "**** Run 'wt' via: run " + " ".join(procargs[3:]) + infilepart + ">" + wtoutname + " 2>" + wterrname + print "**** Run 'wt' via: run " + \ + " ".join(procargs[3:]) + infilepart + \ + ">" + wtoutname + " 2>" + wterrname print "*********************************************" - subprocess.call(procargs) + returncode = subprocess.call(procargs) elif infilename: with open(infilename, "r") as wtin: - subprocess.call(procargs, stdin=wtin, stdout=wtout, stderr=wterr) + returncode = subprocess.call( + procargs, stdin=wtin, stdout=wtout, stderr=wterr) else: - subprocess.call(procargs, stdout=wtout, stderr=wterr) + returncode = subprocess.call( + procargs, stdout=wtout, stderr=wterr) + if failure: + self.assertNotEqual(returncode, 0, + 'expected failure: "' + \ + str(procargs) + '": exited ' + str(returncode)) + else: + self.assertEqual(returncode, 0, + 'expected success: "' + \ + str(procargs) + '": exited ' + str(returncode)) if errfilename == None: self.check_empty_file(wterrname) if outfilename == None: diff --git a/src/third_party/wiredtiger/test/suite/test_backup05.py b/src/third_party/wiredtiger/test/suite/test_backup05.py index 991a9f71b19..fbe219d8de8 100644 --- a/src/third_party/wiredtiger/test/suite/test_backup05.py +++ b/src/third_party/wiredtiger/test/suite/test_backup05.py @@ -37,10 +37,12 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios from helper import copy_wiredtiger_home -import wttest +import wiredtiger, wttest class test_backup05(wttest.WiredTigerTestCase, suite_subprocess): uri = 'table:test_backup05' + emptyuri = 'table:test_empty05' + newuri = 'table:test_new05' create_params = 'key_format=i,value_format=i' freq = 5 @@ -51,12 +53,35 @@ class test_backup05(wttest.WiredTigerTestCase, suite_subprocess): # With the connection still open, copy files to new directory. # Half the time use an unaligned copy. - aligned = (i % (self.freq * 2) != 0) or os.name == "nt" + even = i % (self.freq * 2) == 0 + aligned = even or os.name == "nt" copy_wiredtiger_home(olddir, newdir, aligned) + # Half the time try to rename a table and the other half try + # to remove a table. They should fail. + if not even: + self.assertRaises(wiredtiger.WiredTigerError, + lambda: self.session.rename( + self.emptyuri, self.newuri, None)) + else: + self.assertRaises(wiredtiger.WiredTigerError, + lambda: self.session.drop(self.emptyuri, None)) + # Now simulate fsyncUnlock by closing the backup cursor. cbkup.close() + # Once the backup cursor is closed we should be able to perform + # schema operations. Test that and then reset the files to their + # expected initial names. + if not even: + self.session.rename(self.emptyuri, self.newuri, None) + self.session.drop(self.newuri, None) + self.session.create(self.emptyuri, self.create_params) + else: + self.session.drop(self.emptyuri, None) + self.session.create(self.emptyuri, self.create_params) + + # Open the new directory and verify conn = self.setUpConnectionOpen(newdir) session = self.setUpSessionOpen(conn) @@ -77,6 +102,10 @@ class test_backup05(wttest.WiredTigerTestCase, suite_subprocess): # # If the metadata isn't flushed, eventually the metadata we copy will # be sufficiently out-of-sync with the data file that it won't verify. + + self.session.create(self.emptyuri, self.create_params) + self.reopen_conn() + self.session.create(self.uri, self.create_params) for i in range(100): c = self.session.open_cursor(self.uri) @@ -88,7 +117,7 @@ class test_backup05(wttest.WiredTigerTestCase, suite_subprocess): self.session.verify(self.uri) def test_backup(self): - with self.expectedStdoutPattern('Recreating metadata'): + with self.expectedStdoutPattern('recreating metadata'): self.backup() if __name__ == '__main__': diff --git a/src/third_party/wiredtiger/test/suite/test_config04.py b/src/third_party/wiredtiger/test/suite/test_config04.py index 7186bc3a716..dffa7479f1b 100644 --- a/src/third_party/wiredtiger/test/suite/test_config04.py +++ b/src/third_party/wiredtiger/test/suite/test_config04.py @@ -26,7 +26,7 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import os +import os, shutil import wiredtiger, wttest from wiredtiger import stat @@ -34,6 +34,7 @@ from wiredtiger import stat # Individually test config options class test_config04(wttest.WiredTigerTestCase): table_name1 = 'test_config04' + log1 = 'WiredTigerLog.0000000001' nentries = 100 K = 1024 @@ -86,6 +87,10 @@ class test_config04(wttest.WiredTigerTestCase): self.assertEqual(cursor[stat.conn.cache_bytes_max][2], size) cursor.close() + def common_log_test(self, path, dirname): + self.common_test('log=(archive=false,enabled,' + path + ')') + self.assertTrue(os.path.exists(dirname + os.sep + self.log1)) + def test_bad_config(self): msg = '/unknown configuration key/' self.assertRaisesWithMessage(wiredtiger.WiredTigerError, @@ -168,24 +173,46 @@ class test_config04(wttest.WiredTigerTestCase): self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.wiredtiger_open('.', '(create='), msg) - def test_session_max(self): - # Note: There isn't any direct way to know that this was set, - # but we'll have a separate functionality test to test for - # this indirectly. - self.common_test('session_max=99') - - def test_multiprocess(self): - self.common_test('multiprocess') - # TODO: how do we verify that it was set? - def test_error_prefix(self): self.common_test('error_prefix="MyOwnPrefix"') # TODO: how do we verify that it was set? def test_logging(self): - self.common_test('log=(enabled=true)') - # TODO: how do we verify that it was set? For this we could look - # for the existence of the log file in the home dir. + # Test variations on the log configuration. The log test takes + # a configuration string as the first arg and the directory pathname + # to confirm the existence of the log file. For now we're testing + # the log pathname only. + # + # Test the default in the home directory. + self.common_log_test('', '.') + self.conn.close() + + # Test a subdir of the home directory. + logdirname = 'logdir' + logdir = '.' + os.sep + logdirname + os.mkdir(logdir) + confstr = 'path=' + logdirname + self.common_log_test(confstr, logdir) + self.conn.close() + + # Test an absolute path directory. + if os.name == 'posix': + logdir = '/tmp/logdir' + os.mkdir(logdir) + confstr = 'path=' + logdir + self.common_log_test(confstr, logdir) + self.conn.close() + shutil.rmtree(logdir, ignore_errors=True) + + def test_multiprocess(self): + self.common_test('multiprocess') + # TODO: how do we verify that it was set? + + def test_session_max(self): + # Note: There isn't any direct way to know that this was set, + # but we'll have a separate functionality test to test for + # this indirectly. + self.common_test('session_max=99') def test_transactional(self): # Note: this will have functional tests in the future. diff --git a/src/third_party/wiredtiger/test/suite/test_dump.py b/src/third_party/wiredtiger/test/suite/test_dump.py index fc1422155e2..85196174c1b 100644 --- a/src/third_party/wiredtiger/test/suite/test_dump.py +++ b/src/third_party/wiredtiger/test/suite/test_dump.py @@ -26,7 +26,7 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import os +import os, shutil import wiredtiger, wttest from helper import \ complex_populate, complex_populate_check, \ @@ -42,6 +42,7 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess): dir='dump.dir' # Backup directory name name = 'test_dump' + name2 = 'test_dumpb' nentries = 2500 dumpfmt = [ @@ -109,6 +110,7 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess): # Create the object. uri = self.uri + self.name + uri2 = self.uri + self.name2 self.populate(self, uri, self.config + ',key_format=' + self.keyfmt, self.nentries) @@ -130,23 +132,19 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess): self.assertEqual(not s1.symmetric_difference(s2), True) # Check the object's contents - conn = self.wiredtiger_open(self.dir) - session = conn.open_session() + self.reopen_conn(self.dir) self.populate_check(self, uri, self.nentries) - conn.close() - # Re-load the object again. + # Re-load the object again in the original directory. + self.reopen_conn('.') self.runWt(['-h', self.dir, 'load', '-f', 'dump.out']) # Check the contents, they shouldn't have changed. - conn = self.wiredtiger_open(self.dir) - session = conn.open_session() self.populate_check(self, uri, self.nentries) - conn.close() # Re-load the object again, but confirm -n (no overwrite) fails. - self.runWt(['-h', self.dir, - 'load', '-n', '-f', 'dump.out'], errfilename='errfile.out') + self.runWt(['-h', self.dir, 'load', '-n', '-f', 'dump.out'], + errfilename='errfile.out', failure=True) self.check_non_empty_file('errfile.out') # If there are indices, dump one of them and check the output. @@ -158,5 +156,14 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess): self.check_non_empty_file('dumpidx.out') self.compare_dump_values('dump.out', 'dumpidx.out') + # Re-load the object into a different table uri + shutil.rmtree(self.dir) + os.mkdir(self.dir) + self.runWt(['-h', self.dir, 'load', '-r', self.name2, '-f', 'dump.out']) + + # Check the contents in the new table. + self.reopen_conn(self.dir) + self.populate_check(self, uri2, self.nentries) + if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_join01.py b/src/third_party/wiredtiger/test/suite/test_join01.py index 4aa2bc6e269..f8d96a2718a 100644 --- a/src/third_party/wiredtiger/test/suite/test_join01.py +++ b/src/third_party/wiredtiger/test/suite/test_join01.py @@ -35,10 +35,44 @@ from wtscenario import check_scenarios, multiply_scenarios, number_scenarios class test_join01(wttest.WiredTigerTestCase): nentries = 100 - scenarios = [ + type_scen = [ ('table', dict(ref='table')), ('index', dict(ref='index')) ] + bloom0_scen = [ + ('bloom0=0', dict(joincfg0='')), + ('bloom0=1000', dict(joincfg0=',strategy=bloom,count=1000')), + ('bloom0=10000', dict(joincfg0=',strategy=bloom,count=10000')), + ] + bloom1_scen = [ + ('bloom1=0', dict(joincfg1='')), + ('bloom1=1000', dict(joincfg1=',strategy=bloom,count=1000')), + ('bloom1=10000', dict(joincfg1=',strategy=bloom,count=10000')), + ] + projection_scen = [ + ('no-projection', dict(do_proj=False)), + ('projection', dict(do_proj=True)) + ] + nested_scen = [ + ('simple', dict(do_nested=False)), + ('nested', dict(do_nested=True)) + ] + stats_scen = [ + ('no-stats', dict(do_stats=False)), + ('stats', dict(do_stats=True)) + ] + order_scen = [ + ('order=0', dict(join_order=0)), + ('order=1', dict(join_order=1)), + ('order=2', dict(join_order=2)), + ('order=3', dict(join_order=3)), + ] + scenarios = number_scenarios(multiply_scenarios('.', type_scen, + bloom0_scen, bloom1_scen, + projection_scen, + nested_scen, stats_scen, + order_scen)) + # We need statistics for these tests. conn_config = 'statistics=(all)' @@ -52,9 +86,29 @@ class test_join01(wttest.WiredTigerTestCase): return [s, rs, sort3] # Common function for testing iteration of join cursors - def iter_common(self, jc, do_proj): + def iter_common(self, jc, do_proj, do_nested, join_order): # See comments in join_common() - expect = [73, 82, 62, 83, 92] + # The order that the results are seen depends on + # the ordering of the joins. Specifically, the first + # join drives the order that results are seen. + if do_nested: + if join_order == 0: + expect = [73, 82, 83, 92] + elif join_order == 1: + expect = [73, 82, 83, 92] + elif join_order == 2: + expect = [82, 92, 73, 83] + elif join_order == 3: + expect = [92, 73, 82, 83] + else: + if join_order == 0: + expect = [73, 82, 62, 83, 92] + elif join_order == 1: + expect = [62, 73, 82, 83, 92] + elif join_order == 2: + expect = [62, 82, 92, 73, 83] + elif join_order == 3: + expect = [73, 82, 62, 83, 92] while jc.next() == 0: [k] = jc.get_keys() i = k - 1 @@ -64,7 +118,9 @@ class test_join01(wttest.WiredTigerTestCase): [v0,v1,v2] = jc.get_values() self.assertEquals(self.gen_values(i), [v0,v1,v2]) if len(expect) == 0 or i != expect[0]: - self.tty(' result ' + str(i) + ' is not in: ' + str(expect)) + self.tty('ERROR: ' + str(i) + ' is not next in: ' + + str(expect)) + self.tty('JOIN ORDER=' + str(join_order) + ', NESTED=' + str(do_nested)) self.assertTrue(i == expect[0]) expect.remove(i) self.assertEquals(0, len(expect)) @@ -81,6 +137,8 @@ class test_join01(wttest.WiredTigerTestCase): 'join: index:join01:index2: ' + statdesc ] if self.ref == 'index': expectstats.append('join: index:join01:index0: ' + statdesc) + elif self.do_proj: + expectstats.append('join: table:join01(v2,v1,v0): ' + statdesc) else: expectstats.append('join: table:join01: ' + statdesc) self.check_stats(statcur, expectstats) @@ -118,11 +176,46 @@ class test_join01(wttest.WiredTigerTestCase): self.assertTrue(len(expectstats) == 0, 'missing expected values in stats: ' + str(expectstats)) + def session_record_join(self, jc, refc, config, order, joins): + joins.append([order, [jc, refc, config]]) + + def session_play_one_join(self, firsturi, jc, refc, config): + if refc.uri == firsturi and config != None: + config = config.replace('strategy=bloom','') + #self.tty('->join(jc, uri="' + refc.uri + + # '", config="' + str(config) + '"') + self.session.join(jc, refc, config) + + def session_play_joins(self, joins, join_order): + #self.tty('->') + firsturi = None + for [i, joinargs] in joins: + if i >= join_order: + if firsturi == None: + firsturi = joinargs[1].uri + self.session_play_one_join(firsturi, *joinargs) + for [i, joinargs] in joins: + if i < join_order: + if firsturi == None: + firsturi = joinargs[1].uri + self.session_play_one_join(firsturi, *joinargs) + # Common function for testing the most basic functionality # of joins - def join_common(self, joincfg0, joincfg1, do_proj, do_stats): + def test_join(self): + joincfg0 = self.joincfg0 + joincfg1 = self.joincfg1 + do_proj = self.do_proj + do_nested = self.do_nested + do_stats = self.do_stats + join_order = self.join_order #self.tty('join_common(' + joincfg0 + ',' + joincfg1 + ',' + - # str(do_proj) + ')') + # str(do_proj) + ',' + str(do_nested) + ',' + + # str(do_stats) + ',' + str(join_order) + ')') + + closeme = [] + joins = [] # cursors to be joined + self.session.create('table:join01', 'key_format=r' + ',value_format=SSi,columns=(k,v0,v1,v2)') self.session.create('index:join01:index0','columns=(v0)') @@ -143,7 +236,7 @@ class test_join01(wttest.WiredTigerTestCase): # We join on index2 first, not using bloom indices. # This defines the order that items are returned. - # index2 is sorts multiples of 3 first (see gen_values()) + # index2 sorts multiples of 3 first (see gen_values()) # and by using 'gt' and key 99, we'll skip multiples of 3, # and examine primary keys 2,5,8,...,95,98,1,4,7,...,94,97. jc = self.session.open_cursor('join:table:join01' + proj_suffix, @@ -152,7 +245,7 @@ class test_join01(wttest.WiredTigerTestCase): c2 = self.session.open_cursor('index:join01:index2(v1)', None, None) c2.set_key(99) # skips all entries w/ primary key divisible by three self.assertEquals(0, c2.search()) - self.session.join(jc, c2, 'compare=gt') + self.session_record_join(jc, c2, 'compare=gt', 0, joins) # Then select all the numbers 0-99 whose string representation # sort >= '60'. @@ -163,285 +256,87 @@ class test_join01(wttest.WiredTigerTestCase): c0 = self.session.open_cursor('table:join01', None, None) c0.set_key(60) self.assertEquals(0, c0.search()) - self.session.join(jc, c0, 'compare=ge' + joincfg0) + self.session_record_join(jc, c0, 'compare=ge' + joincfg0, 1, joins) # Then select all numbers whose reverse string representation # is in '20' < x < '40'. c1a = self.session.open_cursor('index:join01:index1(v1)', None, None) c1a.set_key('21') self.assertEquals(0, c1a.search()) - self.session.join(jc, c1a, 'compare=gt' + joincfg1) + self.session_record_join(jc, c1a, 'compare=gt' + joincfg1, 2, joins) c1b = self.session.open_cursor('index:join01:index1(v1)', None, None) c1b.set_key('41') self.assertEquals(0, c1b.search()) - self.session.join(jc, c1b, 'compare=lt' + joincfg1) + self.session_record_join(jc, c1b, 'compare=lt' + joincfg1, 2, joins) # Numbers that satisfy these 3 conditions (with ordering implied by c2): # [73, 82, 62, 83, 92]. # # After iterating, we should be able to reset and iterate again. + if do_nested: + # To test nesting, we create two new levels of conditions: + # + # x == 72 or x == 73 or x == 82 or x == 83 or + # (x >= 90 and x <= 99) + # + # that will get AND-ed into our existing join. The expected + # result is [73, 82, 83, 92]. + # + # We don't specify the projection here, it should be picked up + # from the 'enclosing' join. + nest1 = self.session.open_cursor('join:table:join01', None, None) + nest2 = self.session.open_cursor('join:table:join01', None, None) + + nc = self.session.open_cursor('index:join01:index0', None, None) + nc.set_key('90') + self.assertEquals(0, nc.search()) + self.session.join(nest2, nc, 'compare=ge') # joincfg left out + closeme.append(nc) + + nc = self.session.open_cursor('index:join01:index0', None, None) + nc.set_key('99') + self.assertEquals(0, nc.search()) + self.session.join(nest2, nc, 'compare=le') + closeme.append(nc) + + self.session.join(nest1, nest2, "operation=or") + + for val in [ '72', '73', '82', '83' ]: + nc = self.session.open_cursor('index:join01:index0', None, None) + nc.set_key(val) + self.assertEquals(0, nc.search()) + self.session.join(nest1, nc, 'compare=eq,operation=or' + + joincfg0) + closeme.append(nc) + self.session_record_join(jc, nest1, None, 3, joins) + + self.session_play_joins(joins, join_order) + self.iter_common(jc, do_proj, do_nested, join_order) if do_stats: self.stats(jc, 0) - self.iter_common(jc, do_proj) + jc.reset() + self.iter_common(jc, do_proj, do_nested, join_order) if do_stats: self.stats(jc, 1) jc.reset() - self.iter_common(jc, do_proj) + self.iter_common(jc, do_proj, do_nested, join_order) if do_stats: self.stats(jc, 2) jc.reset() - self.iter_common(jc, do_proj) + self.iter_common(jc, do_proj, do_nested, join_order) jc.close() c2.close() c1a.close() c1b.close() c0.close() + if do_nested: + nest1.close() + nest2.close() + for c in closeme: + c.close() self.session.drop('table:join01') - # Test joins with basic functionality - def test_join(self): - bloomcfg1000 = ',strategy=bloom,count=1000' - bloomcfg10000 = ',strategy=bloom,count=10000' - for cfga in [ '', bloomcfg1000, bloomcfg10000 ]: - for cfgb in [ '', bloomcfg1000, bloomcfg10000 ]: - for do_proj in [ False, True ]: - #self.tty('cfga=' + cfga + - # ', cfgb=' + cfgb + - # ', doproj=' + str(do_proj)) - self.join_common(cfga, cfgb, do_proj, False) - - def test_join_errors(self): - self.session.create('table:join01', 'key_format=r,value_format=SS' - ',columns=(k,v0,v1)') - self.session.create('table:join01B', 'key_format=r,value_format=SS' - ',columns=(k,v0,v1)') - self.session.create('index:join01:index0','columns=(v0)') - self.session.create('index:join01:index1','columns=(v1)') - self.session.create('index:join01B:index0','columns=(v0)') - jc = self.session.open_cursor('join:table:join01', None, None) - tc = self.session.open_cursor('table:join01', None, None) - fc = self.session.open_cursor('file:join01.wt', None, None) - ic0 = self.session.open_cursor('index:join01:index0', None, None) - ic0again = self.session.open_cursor('index:join01:index0', None, None) - ic1 = self.session.open_cursor('index:join01:index1', None, None) - icB = self.session.open_cursor('index:join01B:index0', None, None) - tcB = self.session.open_cursor('table:join01B', None, None) - - tc.set_key(1) - tc.set_value('val1', 'val1') - tc.insert() - tcB.set_key(1) - tcB.set_value('val1', 'val1') - tcB.insert() - fc.next() - - # Joining using a non join-cursor - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(tc, ic0, 'compare=ge'), - '/not a join cursor/') - # Joining a table cursor, not index - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(jc, fc, 'compare=ge'), - '/not an index or table cursor/') - # Joining a non positioned cursor - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(jc, ic0, 'compare=ge'), - '/requires reference cursor be positioned/') - ic0.set_key('val1') - # Joining a non positioned cursor (no search or next has been done) - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(jc, ic0, 'compare=ge'), - '/requires reference cursor be positioned/') - ic0.set_key('valXX') - self.assertEqual(ic0.search(), wiredtiger.WT_NOTFOUND) - # Joining a non positioned cursor after failed search - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(jc, ic0, 'compare=ge'), - '/requires reference cursor be positioned/') - - # position the cursors now - ic0.set_key('val1') - ic0.search() - ic0again.next() - icB.next() - - # Joining non matching index - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(jc, icB, 'compare=ge'), - '/table for join cursor does not match/') - - # The cursor must be positioned - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(jc, ic1, 'compare=ge'), - '/requires reference cursor be positioned/') - ic1.next() - - # The first cursor joined cannot be bloom - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(jc, ic1, - 'compare=ge,strategy=bloom,count=1000'), - '/first joined cursor cannot specify strategy=bloom/') - - # This succeeds. - self.session.join(jc, ic1, 'compare=ge'), - - # With bloom filters, a count is required - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(jc, ic0, 'compare=ge,strategy=bloom'), - '/count must be nonzero/') - - # This succeeds. - self.session.join(jc, ic0, 'compare=ge,strategy=bloom,count=1000'), - - bloom_config = ',strategy=bloom,count=1000' - # Cannot use the same index cursor - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(jc, ic0, - 'compare=le' + bloom_config), - '/index cursor already used in a join/') - - # When joining with the same index, need compatible compares - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(jc, ic0again, 'compare=ge' + bloom_config), - '/join has overlapping ranges/') - - # Another incompatible compare - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(jc, ic0again, 'compare=gt' + bloom_config), - '/join has overlapping ranges/') - - # Compare is compatible, but bloom args need to match - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(jc, ic0again, 'compare=le'), - '/join has incompatible strategy/') - - # Counts need to match for bloom filters - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(jc, ic0again, 'compare=le,strategy=bloom,' - 'count=100'), '/count.* does not match previous count/') - - # This succeeds - self.session.join(jc, ic0again, 'compare=le,strategy=bloom,count=1000') - - # Need to do initial next() before getting key/values - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: jc.get_keys(), - '/join cursor must be advanced with next/') - - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: jc.get_values(), - '/join cursor must be advanced with next/') - - # Operations on the joined cursor are frozen until the join is closed. - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: ic0.next(), - '/index cursor is being used in a join/') - - # Operations on the joined cursor are frozen until the join is closed. - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: ic0.prev(), - '/index cursor is being used in a join/') - - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: ic0.reset(), - '/index cursor is being used in a join/') - - # Only a small number of operations allowed on a join cursor - msg = "/Unsupported cursor/" - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: jc.search(), msg) - - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: jc.prev(), msg) - - self.assertEquals(jc.next(), 0) - self.assertEquals(jc.next(), wiredtiger.WT_NOTFOUND) - - # Only after the join cursor is closed can we use the index cursor - # normally - jc.close() - self.assertEquals(ic0.next(), wiredtiger.WT_NOTFOUND) - self.assertEquals(ic0.prev(), 0) - - # common code for making sure that cursors can be - # implicitly closed, no matter the order they are created - def cursor_close_common(self, joinfirst): - self.session.create('table:join01', 'key_format=r' + - ',value_format=SS,columns=(k,v0,v1)') - self.session.create('index:join01:index0','columns=(v0)') - self.session.create('index:join01:index1','columns=(v1)') - c = self.session.open_cursor('table:join01', None, None) - for i in range(0, self.nentries): - c.set_key(*self.gen_key(i)) - c.set_value(*self.gen_values(i)) - c.insert() - c.close() - - if joinfirst: - jc = self.session.open_cursor('join:table:join01', None, None) - c0 = self.session.open_cursor('index:join01:index0', None, None) - c1 = self.session.open_cursor('index:join01:index1', None, None) - c0.next() # index cursors must be positioned - c1.next() - if not joinfirst: - jc = self.session.open_cursor('join:table:join01', None, None) - self.session.join(jc, c0, 'compare=ge') - self.session.join(jc, c1, 'compare=ge') - self.session.close() - self.session = None - - def test_cursor_close1(self): - self.cursor_close_common(True) - - def test_cursor_close2(self): - self.cursor_close_common(False) - - # test statistics using the framework set up for this test - def test_stats(self): - bloomcfg1000 = ',strategy=bloom,count=1000' - bloomcfg10 = ',strategy=bloom,count=10' - self.join_common(bloomcfg1000, bloomcfg1000, False, True) - - # Intentially run with an underconfigured Bloom filter, - # statistics should pick up some false positives. - self.join_common(bloomcfg10, bloomcfg10, False, True) - - # test statistics with a simple one index join cursor - def test_simple_stats(self): - self.session.create("table:join01b", - "key_format=i,value_format=i,columns=(k,v)") - self.session.create("index:join01b:index", "columns=(v)") - - cursor = self.session.open_cursor("table:join01b", None, None) - cursor[1] = 11 - cursor[2] = 12 - cursor[3] = 13 - cursor.close() - - cursor = self.session.open_cursor("index:join01b:index", None, None) - cursor.set_key(11) - cursor.search() - - jcursor = self.session.open_cursor("join:table:join01b", None, None) - self.session.join(jcursor, cursor, "compare=gt") - - while jcursor.next() == 0: - [k] = jcursor.get_keys() - [v] = jcursor.get_values() - - statcur = self.session.open_cursor("statistics:join", jcursor, None) - found = False - while statcur.next() == 0: - [desc, pvalue, value] = statcur.get_values() - #self.tty(str(desc) + "=" + str(pvalue)) - found = True - self.assertEquals(found, True) - - jcursor.close() - cursor.close() - - if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_join07.py b/src/third_party/wiredtiger/test/suite/test_join07.py new file mode 100644 index 00000000000..36e91361329 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_join07.py @@ -0,0 +1,548 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import os, re, run +import wiredtiger, wttest, suite_random +from wtscenario import check_scenarios, multiply_scenarios, number_scenarios + +class ParseException(Exception): + def __init__(self, msg): + super(ParseException, self).__init__(msg) + +class Token: + UNKNOWN = '<unknown>' + NUMBER = 'Number' + STRING = 'String' + COLUMN = 'Column' + LPAREN = '(' + RPAREN = ')' + LBRACKET = '{' + RBRACKET = '}' + COMMA = ',' + OR = '||' + AND = '&&' + LT = '<' + GT = '>' + LE = '<=' + GE = '>=' + EQ = '==' + ATTRIBUTE = 'Attribute' # bracketed key value pair + + COMPARE_OPS = [LT, GT, LE, GE, EQ] + COMPARATORS = [NUMBER, STRING] + + def __init__(self, kind, tokenizer): + self.kind = kind + self.pos = tokenizer.off + tokenizer.pos + self.n = 0 + self.s = '' + self.index = '' + self.attr_key = '' + self.attr_value = '' + self.groups = None + + def __str__(self): + return '<Token ' + self.kind + ' at char ' + str(self.pos) + '>' + +class Tokenizer: + def __init__(self, s): + self.off = 0 + self.s = s + '?' # add a char that won't match anything + self.pos = 0 + self.end = len(s) + self.re_num = re.compile(r"(\d+)") + self.re_quote1 = re.compile(r"'([^']*)'") + self.re_quote2 = re.compile(r"\"([^\"]*)\"") + self.re_attr = re.compile(r"\[(\w+)=(\w+)\]") + self.pushed = None + + def newToken(self, kind, sz): + t = Token(kind, self) + self.pos += sz + return t + + def error(self, s): + raise ParseException(str(self.pos) + ': ' + s) + + def matched(self, kind, repat): + pos = self.pos + match = re.match(repat, self.s[pos:]) + if not match: + end = pos + 10 + if end > self.end: + end = self.end + self.error('matching ' + kind + ' at "' + + self.s[pos:end] + '..."') + t = self.newToken(kind, match.end()) + t.groups = match.groups() + t.s = self.s[pos:pos + match.end()] + return t + + def available(self): + if self.pushed == None: + self.pushback(self.token()) + return (self.pushed != None) + + def pushback(self, token): + if self.pushed != None: + raise AssertionError('pushback more than once') + self.pushed = token + + def peek(self): + token = self.token() + self.pushback(token) + return token + + def scan(self): + while self.pos < self.end and self.s[self.pos].isspace(): + self.pos += 1 + return '' if self.pos >= self.end else self.s[self.pos] + + def token(self): + if self.pushed != None: + ret = self.pushed + self.pushed = None + return ret + c = self.scan() + if self.pos >= self.end: + return None + lookahead = '' if self.pos + 1 >= self.end else self.s[self.pos+1] + #self.tty("Tokenizer.token char=" + c + ", lookahead=" + lookahead) + if c == "'": + t = self.matched(Token.STRING, self.re_quote1) + t.s = t.groups[0] + return t + if c == '"': + t = self.matched(Token.STRING, self.re_quote2) + t.s = t.groups[0] + return t + if c in "{}(),": + return self.newToken(c, 1) + if c == "|": + if lookahead != "|": + self.error('matching OR') + return self.newToken(Token.OR, 2) + if c == "&": + if lookahead != "&": + self.error('matching AND') + return self.newToken(Token.AND, 2) + if c in "0123456789": + t = self.matched(Token.NUMBER, self.re_num) + t.s = t.groups[0] + t.n = int(t.s) + return t + if c in "ABCDEFGHIJ": + t = self.newToken(Token.COLUMN, 1) + t.s = c + return t + if c == '<': + if lookahead == '=': + return self.newToken(Token.LE, 2) + else: + return self.newToken(Token.LT, 1) + if c == '>': + if lookahead == '=': + return self.newToken(Token.GE, 2) + else: + return self.newToken(Token.GT, 1) + if c in "=": + if lookahead != "=": + self.error('matching EQ') + return self.newToken(Token.EQ, 2) + if c in "[": + t = self.matched(Token.ATTRIBUTE, self.re_attr) + t.attr_key = t.groups[0] + t.attr_value = t.groups[1] + return t + return None + + def tty(self, s): + wttest.WiredTigerTestCase.tty(s) + +# test_join07.py +# Join interpreter +class test_join07(wttest.WiredTigerTestCase): + reverseop = { '==' : '==', '<=' : '>=', '<' : '>', '>=' : '<=', '>' : '<' } + compareop = { '==' : 'eq', '<=' : 'le', '<' : 'lt', '>=' : 'ge', + '>' : 'gt' } + columnmult = { 'A' : 1, 'B' : 2, 'C' : 3, 'D' : 4, 'E' : 5, + 'F' : 6, 'G' : 7, 'H' : 8, 'I' : 9, 'J' : 10 } + + extractscen = [ + ('extractor', dict(extractor=True)), + ('noextractor', dict(extractor=False)) + ] + + scenarios = number_scenarios(extractscen) + + # Return the wiredtiger_open extension argument for a shared library. + def extensionArg(self, exts): + extfiles = [] + for ext in exts: + (dirname, name, libname) = ext + if name != None and name != 'none': + testdir = os.path.dirname(__file__) + extdir = os.path.join(run.wt_builddir, 'ext', dirname) + extfile = os.path.join( + extdir, name, '.libs', 'libwiredtiger_' + libname + '.so') + if not os.path.exists(extfile): + self.skipTest('extension "' + extfile + '" not built') + if not extfile in extfiles: + extfiles.append(extfile) + if len(extfiles) == 0: + return '' + else: + return ',extensions=["' + '","'.join(extfiles) + '"]' + + # Override WiredTigerTestCase, we have extensions. + def setUpConnectionOpen(self, dir): + extarg = self.extensionArg([('extractors', 'csv', 'csv_extractor')]) + connarg = 'create,error_prefix="{0}: ",{1}'.format( + self.shortid(), extarg) + conn = self.wiredtiger_open(dir, connarg) + self.pr(`conn`) + return conn + + def expect(self, token, expected): + if token == None or token.kind not in expected: + self.err(token, 'expected one of: ' + str(expected)) + return token + + def err(self, token, msg): + self.assertTrue(False, 'ERROR at token ' + str(token) + ': ' + msg) + + def gen_key(self, i): + if self.keyformat == 'S': + return [ 'key%06d' % i ] # zero pad so it sorts expectedly + else: + return [ i ] + + def gen_values(self, i): + s = "" + ret = [] + for x in range(1, 11): + v = (i * x) % self.N + if x <= 5: + ret.append(v) + else: + ret.append(str(v)) + if s != "": + s += "," + s += str(v) + ret.insert(0, s) + return ret + + def iterate(self, jc, mbr): + mbr = set(mbr) # we need a mutable set + gotkeys = [] + #self.tty('iteration expects ' + str(len(mbr)) + + # ' entries: ' + str(mbr)) + while jc.next() == 0: + [k] = jc.get_keys() + values = jc.get_values() + if self.keyformat == 'S': + i = int(str(k[3:])) + else: + i = k + #self.tty('GOT key=' + str(k) + ', values=' + str(values)) + + # Duplicates may be returned when the disjunctions are used, + # so we ignore them. + if not i in gotkeys: + self.assertEquals(self.gen_values(i), values) + if not i in mbr: + self.tty('ERROR: result ' + str(i) + ' is not in: ' + + str(mbr)) + self.assertTrue(i in mbr) + mbr.remove(i) + gotkeys.append(i) + self.assertEquals(0, len(mbr)) + + def token_literal(self, token): + if token.kind == Token.STRING: + return token.s + elif token.kind == Token.NUMBER: + return token.n + + def idx_sim(self, x, mult, isstr): + if isstr: + return str(int(x) * mult % self.N) + else: + return (x * mult % self.N) + + def mkmbr(self, expr): + return frozenset([x for x in self.allN if expr(x)]) + + def join_one_side(self, jc, coltok, littok, optok, conjunction, + isright, mbr): + idxname = 'index:join07:' + coltok.s + cursor = self.session.open_cursor(idxname, None, None) + jc.cursors.append(cursor) + literal = self.token_literal(littok) + cursor.set_key(literal) + searchret = cursor.search() + if searchret != 0: + self.tty('ERROR: cannot find value ' + str(literal) + + ' in ' + idxname) + self.assertEquals(0, searchret) + op = optok.kind + if not isright: + op = self.reverseop[op] + mult = self.columnmult[coltok.s] + config = 'compare=' + self.compareop[op] + ',operation=' + \ + ('and' if conjunction else 'or') + if hasattr(coltok, 'bloom'): + config += ',strategy=bloom,count=' + str(coltok.bloom) + #self.tty('join(jc, cursor=' + str(literal) + ', ' + config) + self.session.join(jc, cursor, config) + isstr = type(literal) is str + if op == '==': + tmbr = self.mkmbr(lambda x: self.idx_sim(x, mult, isstr) == literal) + elif op == '<=': + tmbr = self.mkmbr(lambda x: self.idx_sim(x, mult, isstr) <= literal) + elif op == '<': + tmbr = self.mkmbr(lambda x: self.idx_sim(x, mult, isstr) < literal) + elif op == '>=': + tmbr = self.mkmbr(lambda x: self.idx_sim(x, mult, isstr) >= literal) + elif op == '>': + tmbr = self.mkmbr(lambda x: self.idx_sim(x, mult, isstr) > literal) + if conjunction: + mbr = mbr.intersection(tmbr) + else: + mbr = mbr.union(tmbr) + return mbr + + def parse_join(self, jc, tokenizer, conjunction, mbr): + left = None + right = None + leftop = None + rightop = None + col = None + token = tokenizer.token() + if token.kind == Token.LPAREN: + subjc = self.session.open_cursor('join:table:join07', None, None) + jc.cursors.append(subjc) + submbr = self.parse_junction(subjc, tokenizer) + config = 'operation=' + ('and' if conjunction else 'or') + self.session.join(jc, subjc, config) + if conjunction: + mbr = mbr.intersection(submbr) + else: + mbr = mbr.union(submbr) + return mbr + if token.kind in Token.COMPARATORS: + left = token + leftop = self.expect(tokenizer.token(), Token.COMPARE_OPS) + token = tokenizer.token() + col = self.expect(token, [Token.COLUMN]) + token = tokenizer.token() + if token.kind in Token.ATTRIBUTE: + tokenizer.pushback(token) + self.parse_column_attributes(tokenizer, col) + token = tokenizer.token() + if token.kind in Token.COMPARE_OPS: + rightop = token + right = self.expect(tokenizer.token(), Token.COMPARATORS) + token = tokenizer.token() + tokenizer.pushback(token) + + # Now we have everything we need to do a join. + if left != None: + mbr = self.join_one_side(jc, col, left, leftop, conjunction, + False, mbr) + if right != None: + mbr = self.join_one_side(jc, col, right, rightop, conjunction, + True, mbr) + return mbr + + # Parse a set of joins, grouped by && or || + def parse_junction(self, jc, tokenizer): + jc.cursors = [] + + # Take a peek at the tokenizer's stream to see if we + # have a conjunction or disjunction + token = tokenizer.peek() + s = tokenizer.s[token.pos:] + (andpos, orpos) = self.find_nonparen(s, ['&', '|']) + if orpos >= 0 and (andpos < 0 or orpos < andpos): + conjunction = False + mbr = frozenset() + else: + conjunction = True + mbr = frozenset(self.allN) + + while tokenizer.available(): + mbr = self.parse_join(jc, tokenizer, conjunction, mbr) + token = tokenizer.token() + if token != None: + if token.kind == Token.OR: + self.assertTrue(not conjunction) + elif token.kind == Token.AND: + self.assertTrue(conjunction) + elif token.kind == Token.RPAREN: + break + else: + self.err(token, 'unexpected token') + return mbr + + def parse_attributes(self, tokenizer): + attributes = [] + token = tokenizer.token() + while token != None and token.kind == Token.ATTRIBUTE: + attributes.append(token) + token = tokenizer.token() + tokenizer.pushback(token) + return attributes + + # Find a set of chars that aren't within parentheses. + # For this simple language, we don't allow parentheses in quoted literals. + def find_nonparen(self, s, matchlist): + pos = 0 + end = len(s) + nmatch = len(matchlist) + nfound = 0 + result = [-1 for i in range(0, nmatch)] + parennest = 0 + while pos < end and nfound < nmatch: + c = s[pos] + if c == '(': + parennest += 1 + elif c == ')': + parennest -= 1 + if parennest < 0: + break + elif parennest == 0 and c in matchlist: + m = matchlist.index(c) + if result[m] < 0: + result[m] = pos + nfound += 1 + pos += 1 + return result + + def parse_toplevel(self, jc, tokenizer): + return self.parse_junction(jc, tokenizer) + + def parse_toplevel_attributes(self, tokenizer): + for attrtoken in self.parse_attributes(tokenizer): + key = attrtoken.attr_key + value = attrtoken.attr_value + #self.tty('ATTR:' + str([key,value])) + if key == 'N': + self.N = int(value) + elif key == 'key': + self.keyformat = value + else: + tokenizer.error('bad attribute key: ' + str(key)) + + def parse_column_attributes(self, tokenizer, c): + for attrtoken in self.parse_attributes(tokenizer): + key = attrtoken.attr_key + value = attrtoken.attr_value + #self.tty('ATTR:' + str([key,value])) + if key == 'bloom': + c.bloom = int(value) + else: + tokenizer.error('bad column attribute key: ' + str(key)) + + def close_cursors(self, jc): + jc.close() + for c in jc.cursors: + if c.uri[0:5] == 'join:': + self.close_cursors(c) + else: + c.close() + + def interpret(self, s): + #self.tty('INTERPRET: ' + s) + self.N = 1000 + self.keyformat = "r" + self.keycols = 'k' + + # Grab attributes before creating anything, as some attributes + # may override needed parameters. + tokenizer = Tokenizer(s) + self.parse_toplevel_attributes(tokenizer) + self.allN = range(1, self.N + 1) + + self.session.create('table:join07', 'key_format=' + self.keyformat + + ',value_format=SiiiiiSSSSS,' + + 'columns=(' + self.keycols + + ',S,A,B,C,D,E,F,G,H,I,J)') + mdfieldnum = 0 + mdformat = 'i' + mdconfig = '' + for colname in [ 'A','B','C','D','E','F','G','H','I','J' ]: + if self.extractor: + if colname == 'F': + mdformat = 'S' + mdconfig = 'app_metadata={"format" : "%s","field" : "%d"}' % \ + (mdformat, mdfieldnum) + config = 'extractor=csv,key_format=%s' % mdformat + mdfieldnum += 1 + else: + config = 'columns=(%s)' % colname + self.session.create('index:join07:%s' % colname, + '%s,%s' % (config, mdconfig)) + c = self.session.open_cursor('table:join07', None, None) + for i in self.allN: + c.set_key(*self.gen_key(i)) + c.set_value(*self.gen_values(i)) + c.insert() + c.close() + + jc = self.session.open_cursor('join:table:join07', None, None) + mbr = self.parse_toplevel(jc, tokenizer) + self.iterate(jc, mbr) + + self.close_cursors(jc) + self.session.drop('table:join07') + + def test_join_string(self): + self.interpret("[N=1000][key=r] 7 < A <= 500 && B < 150 && C > 17") + self.interpret("[N=1001][key=r] 7 < A <= 500 && B < 150 && F > '234'") + self.interpret("[N=10000][key=r] 7 < A <= 500 && B < 150 && " + + "(F > '234' || G < '100')") + self.interpret("[N=7919][key=r](7 < A <= 9)&&(F > '234')") + self.interpret("[N=1000][key=S](A>=0 && A<0)||(A>999)") + self.interpret("[N=2000][key=S](A>=0 && A<0)||(A>1999)") + self.interpret("(7<A<=10 && B < 150)||(B>998)") + self.interpret("(7<A<=10 && B < 150)||(J=='990')") + clause1 = "(7 < A <= 500 && B < 150)" + clause2 = "(F > '234' || G < '100')" + self.interpret("[N=1000][key=r]" + clause1 + "&&" + clause2) + self.interpret("(7<A<=10)||(B>994||C<12)") + self.interpret("(7<A<=10 && B < 150)||(B>996||C<6)") + self.interpret("[N=1000][key=r]" + clause2 + "||" + clause1) + self.interpret("[N=1000][key=r]" + clause1 + "||" + clause2) + self.interpret("[N=1000][key=S]" + clause2 + "&&" + clause1) + clause1 = "(7 < A <= 500 && B[bloom=300] < 150)" + clause2 = "(F[bloom=500] > '234' || G[bloom=20] < '100')" + self.interpret("[N=1000][key=S]" + clause1 + "&&" + clause2) + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_join08.py b/src/third_party/wiredtiger/test/suite/test_join08.py new file mode 100644 index 00000000000..6d674ab8193 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_join08.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from wtscenario import check_scenarios, multiply_scenarios, number_scenarios + +# test_join08.py +# Test join error paths +class test_join08(wttest.WiredTigerTestCase): + nentries = 100 + + # We need statistics for these tests. + conn_config = 'statistics=(all)' + + def gen_key(self, i): + return [ i + 1 ] + + def gen_values(self, i): + s = str(i) + rs = s[::-1] + sort3 = (self.nentries * (i % 3)) + i # multiples of 3 sort first + return [s, rs, sort3] + + def test_join_errors(self): + self.session.create('table:join08', 'key_format=r,value_format=SS' + ',columns=(k,v0,v1)') + self.session.create('table:join08B', 'key_format=r,value_format=SS' + ',columns=(k,v0,v1)') + self.session.create('index:join08:index0','columns=(v0)') + self.session.create('index:join08:index1','columns=(v1)') + self.session.create('index:join08B:index0','columns=(v0)') + jc = self.session.open_cursor('join:table:join08', None, None) + tc = self.session.open_cursor('table:join08', None, None) + fc = self.session.open_cursor('file:join08.wt', None, None) + ic0 = self.session.open_cursor('index:join08:index0', None, None) + ic0again = self.session.open_cursor('index:join08:index0', None, None) + ic1 = self.session.open_cursor('index:join08:index1', None, None) + icB = self.session.open_cursor('index:join08B:index0', None, None) + tcB = self.session.open_cursor('table:join08B', None, None) + + tc.set_key(1) + tc.set_value('val1', 'val1') + tc.insert() + tcB.set_key(1) + tcB.set_value('val1', 'val1') + tcB.insert() + fc.next() + + # Joining using a non join-cursor + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(tc, ic0, 'compare=ge'), + '/not a join cursor/') + # Joining a table cursor, not index + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, fc, 'compare=ge'), + '/must be an index, table or join cursor/') + # Joining a non positioned cursor + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0, 'compare=ge'), + '/requires reference cursor be positioned/') + ic0.set_key('val1') + # Joining a non positioned cursor (no search or next has been done) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0, 'compare=ge'), + '/requires reference cursor be positioned/') + ic0.set_key('valXX') + self.assertEqual(ic0.search(), wiredtiger.WT_NOTFOUND) + # Joining a non positioned cursor after failed search + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0, 'compare=ge'), + '/requires reference cursor be positioned/') + + # position the cursors now + ic0.set_key('val1') + ic0.search() + ic0again.next() + icB.next() + + # Joining non matching index + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, icB, 'compare=ge'), + '/table for join cursor does not match/') + + # The cursor must be positioned + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic1, 'compare=ge'), + '/requires reference cursor be positioned/') + ic1.next() + + # This succeeds. + self.session.join(jc, ic1, 'compare=ge'), + + # With bloom filters, a count is required + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0, 'compare=ge,strategy=bloom'), + '/count must be nonzero/') + + # This succeeds. + self.session.join(jc, ic0, 'compare=ge,strategy=bloom,count=1000'), + + bloom_config = ',strategy=bloom,count=1000' + # Cannot use the same index cursor + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0, + 'compare=le' + bloom_config), + '/cursor already used in a join/') + + # When joining with the same index, need compatible compares + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0again, 'compare=ge' + bloom_config), + '/join has overlapping ranges/') + + # Another incompatible compare + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0again, 'compare=gt' + bloom_config), + '/join has overlapping ranges/') + + # Compare is compatible, but bloom args need to match + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0again, 'compare=le'), + '/join has incompatible strategy/') + + # Counts need to match for bloom filters + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0again, 'compare=le,strategy=bloom,' + 'count=100'), '/count.* does not match previous count/') + + # This succeeds + self.session.join(jc, ic0again, 'compare=le,strategy=bloom,count=1000') + + # Need to do initial next() before getting key/values + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: jc.get_keys(), + '/join cursor must be advanced with next/') + + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: jc.get_values(), + '/join cursor must be advanced with next/') + + # Operations on the joined cursor are frozen until the join is closed. + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: ic0.next(), + '/cursor is being used in a join/') + + # Operations on the joined cursor are frozen until the join is closed. + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: ic0.prev(), + '/cursor is being used in a join/') + + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: ic0.reset(), + '/cursor is being used in a join/') + + # Only a small number of operations allowed on a join cursor + msg = "/Unsupported cursor/" + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: jc.search(), msg) + + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: jc.prev(), msg) + + self.assertEquals(jc.next(), 0) + self.assertEquals(jc.next(), wiredtiger.WT_NOTFOUND) + + # Only after the join cursor is closed can we use the index cursor + # normally + jc.close() + self.assertEquals(ic0.next(), wiredtiger.WT_NOTFOUND) + self.assertEquals(ic0.prev(), 0) + + # common code for making sure that cursors can be + # implicitly closed, no matter the order they are created + def cursor_close_common(self, joinfirst): + self.session.create('table:join08', 'key_format=r' + + ',value_format=SS,columns=(k,v0,v1)') + self.session.create('index:join08:index0','columns=(v0)') + self.session.create('index:join08:index1','columns=(v1)') + c = self.session.open_cursor('table:join08', None, None) + for i in range(0, self.nentries): + c.set_key(*self.gen_key(i)) + c.set_value(*self.gen_values(i)) + c.insert() + c.close() + + if joinfirst: + jc = self.session.open_cursor('join:table:join08', None, None) + c0 = self.session.open_cursor('index:join08:index0', None, None) + c1 = self.session.open_cursor('index:join08:index1', None, None) + c0.next() # index cursors must be positioned + c1.next() + if not joinfirst: + jc = self.session.open_cursor('join:table:join08', None, None) + self.session.join(jc, c0, 'compare=ge') + self.session.join(jc, c1, 'compare=ge') + self.session.close() + self.session = None + + def test_cursor_close1(self): + self.cursor_close_common(True) + + def test_cursor_close2(self): + self.cursor_close_common(False) + + # test statistics with a simple one index join cursor + def test_simple_stats(self): + self.session.create("table:join01b", + "key_format=i,value_format=i,columns=(k,v)") + self.session.create("index:join01b:index", "columns=(v)") + + cursor = self.session.open_cursor("table:join01b", None, None) + cursor[1] = 11 + cursor[2] = 12 + cursor[3] = 13 + cursor.close() + + cursor = self.session.open_cursor("index:join01b:index", None, None) + cursor.set_key(11) + cursor.search() + + jcursor = self.session.open_cursor("join:table:join01b", None, None) + self.session.join(jcursor, cursor, "compare=gt") + + while jcursor.next() == 0: + [k] = jcursor.get_keys() + [v] = jcursor.get_values() + + statcur = self.session.open_cursor("statistics:join", jcursor, None) + found = False + while statcur.next() == 0: + [desc, pvalue, value] = statcur.get_values() + #self.tty(str(desc) + "=" + str(pvalue)) + found = True + self.assertEquals(found, True) + + jcursor.close() + cursor.close() + + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_jsondump01.py b/src/third_party/wiredtiger/test/suite/test_jsondump01.py index ddf871d9a24..10262edc777 100644 --- a/src/third_party/wiredtiger/test/suite/test_jsondump01.py +++ b/src/third_party/wiredtiger/test/suite/test_jsondump01.py @@ -77,16 +77,22 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess): ('string', dict(keyfmt='S')) ] types = [ - ('file', dict(type='file:', - name='file', + ('file', dict(uri='file:', config='', lsm=False, populate=simple_populate, populate_check=simple_populate_check_cursor)), - ('table-simple', dict(type='table:', - name='table-simple', + ('lsm', dict(uri='lsm:', config='', lsm=True, populate=simple_populate, populate_check=simple_populate_check_cursor)), - ('table-complex', dict(type='table:', - name='table-complex', + ('table-simple', dict(uri='table:', config='', lsm=False, + populate=simple_populate, + populate_check=simple_populate_check_cursor)), + ('table-simple-lsm', dict(uri='table:', config='type=lsm', lsm=True, + populate=simple_populate, + populate_check=simple_populate_check_cursor)), + ('table-complex', dict(uri='table:', config='', lsm=False, + populate=complex_populate, + populate_check=complex_populate_check_cursor)), + ('table-complex-lsm', dict(uri='table:', config='type=lsm', lsm=True, populate=complex_populate, populate_check=complex_populate_check_cursor)) ] @@ -95,9 +101,14 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess): # Dump using util, re-load using python's JSON, and do a content comparison. def test_jsondump_util(self): + # LSM and column-store isn't a valid combination. + if self.lsm and self.keyfmt == 'r': + return + # Create the object. - uri = self.type + self.name - self.populate(self, uri, 'key_format=' + self.keyfmt, self.nentries) + uri = self.uri + self.name + self.populate(self, uri, self.config + ',key_format=' + self.keyfmt, + self.nentries) # Dump the object. self.runWt(['dump', '-j', uri], outfilename='jsondump.out') @@ -125,9 +136,13 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess): # Dump using util, re-load using python's JSON, and do a content comparison. def test_jsonload_util(self): + # LSM and column-store isn't a valid combination. + if self.lsm and self.keyfmt == 'r': + return + # Create the object. - uri = self.type + self.name - uri2 = self.type + self.name2 + uri = self.uri + self.name + uri2 = self.uri + self.name2 self.populate(self, uri, 'key_format=' + self.keyfmt, self.nentries) # Dump the object. diff --git a/src/third_party/wiredtiger/test/suite/test_jsondump02.py b/src/third_party/wiredtiger/test/suite/test_jsondump02.py index c6cd464e453..50931f0f5e6 100644 --- a/src/third_party/wiredtiger/test/suite/test_jsondump02.py +++ b/src/third_party/wiredtiger/test/suite/test_jsondump02.py @@ -28,16 +28,19 @@ import os import wiredtiger, wttest +from suite_subprocess import suite_subprocess # test_jsondump.py # Test dump output from json cursors. -class test_jsondump02(wttest.WiredTigerTestCase): +class test_jsondump02(wttest.WiredTigerTestCase, suite_subprocess): table_uri1 = 'table:jsondump02a.wt' table_uri2 = 'table:jsondump02b.wt' table_uri3 = 'table:jsondump02c.wt' basename_uri4 = 'jsondump02d.wt' table_uri4 = 'table:' + basename_uri4 + table_uri5 = 'table:jsondump02e.wt' + table_uri6 = 'table:jsondump02f.wt' def set_kv(self, uri, key, val): cursor = self.session.open_cursor(uri, None, None) @@ -80,15 +83,14 @@ class test_jsondump02(wttest.WiredTigerTestCase): pos = 0 try: for insert in inserts: - #tty_pr('Insert: ' + str(insert)) cursor[insert[0]] = insert[1] finally: cursor.close() - # Create JSON cursors and test them directly. def test_json_cursor(self): """ - Create a table, add a key, get it back + Create JSON cursors and test them directly, also test + dump/load commands. """ extra_params = ',allocation_size=512,' +\ 'internal_page_max=16384,leaf_page_max=131072' @@ -112,7 +114,12 @@ class test_jsondump02(wttest.WiredTigerTestCase): self.session.create(uri4index3, "columns=(i2,i4)") self.set_kv(self.table_uri1, 'KEY000', 'string value') - self.set_kv(self.table_uri1, 'KEY001', '\'\"({[]})\"\', etc. allowed') + self.set_kv(self.table_uri1, 'KEY001', '\'\"({[]})\"\'\\, etc. allowed') + # \u03c0 is pi in Unicode, converted by Python to UTF-8: 0xcf 0x80. + # Here's how UTF-8 might be used. + self.set_kv(self.table_uri1, 'KEY002', u'\u03c0'.encode('utf-8')) + # 0xf5-0xff are illegal in Unicode, but may occur legally in C strings. + self.set_kv(self.table_uri1, 'KEY003', '\xff\xfe') self.set_kv2(self.table_uri2, 'KEY000', 123, 'str0') self.set_kv2(self.table_uri2, 'KEY001', 234, 'str1') self.set_kv(self.table_uri3, 1, '\x01\x02\x03') @@ -122,7 +129,9 @@ class test_jsondump02(wttest.WiredTigerTestCase): table1_json = ( ('"key0" : "KEY000"', '"value0" : "string value"'), ('"key0" : "KEY001"', '"value0" : ' + - '"\'\\\"({[]})\\\"\', etc. allowed"')) + '"\'\\\"({[]})\\\"\'\\\\, etc. allowed"'), + ('"key0" : "KEY002"', '"value0" : "\\u00cf\\u0080"'), + ('"key0" : "KEY003"', '"value0" : "\\u00ff\\u00fe"')) self.check_json(self.table_uri1, table1_json) self.session.truncate(self.table_uri1, None, None, None) @@ -199,18 +208,23 @@ class test_jsondump02(wttest.WiredTigerTestCase): # this one should work self.load_json(self.table_uri2, - (('"key0" : "KEY002"', '"value0" : 345,\n"value1" : "str2"'),)) + (('"key0" : "KEY002"', '"value0" : 34,\n"value1" : "str2"'),)) # extraneous/missing space is okay self.load_json(self.table_uri2, ((' "key0"\n:\t"KEY003" ', - '"value0":456,"value1"\n\n\r\n:\t\n"str3"'),)) + '"value0":45,"value1"\n\n\r\n:\t\n"str3"'),)) - self.check_json(self.table_uri3, ( - ('"key0" : 1', '"value0" : "\\u0001\\u0002\\u0003"'), - ('"key0" : 2', - '"value0" : "\\u0077\\u0088\\u0099\\u0000\\u00ff\\u00fe"'))) - self.check_json(self.table_uri4, ( + table2_json = ( + ('"key0" : "KEY002"', '"value0" : 34,\n"value1" : "str2"'), + ('"key0" : "KEY003"', '"value0" : 45,\n"value1" : "str3"')) + + table3_json = ( + ('"key0" : 1', '"value0" : "\\u0001\\u0002\\u0003"'), + ('"key0" : 2', + '"value0" : "\\u0077\\u0088\\u0099\\u0000\\u00ff\\u00fe"')) + self.check_json(self.table_uri3, table3_json) + table4_json = ( ('"ikey" : 1,\n"Skey" : "key1"', '"S1" : "val1",\n"i2" : 1,\n"S3" : "val1",\n"i4" : 1'), ('"ikey" : 2,\n"Skey" : "key2"', @@ -218,7 +232,8 @@ class test_jsondump02(wttest.WiredTigerTestCase): ('"ikey" : 3,\n"Skey" : "key3"', '"S1" : "val9",\n"i2" : 9,\n"S3" : "val27",\n"i4" : 27'), ('"ikey" : 4,\n"Skey" : "key4"', - '"S1" : "val16",\n"i2" : 16,\n"S3" : "val64",\n"i4" : 64'))) + '"S1" : "val16",\n"i2" : 16,\n"S3" : "val64",\n"i4" : 64')) + self.check_json(self.table_uri4, table4_json) # The dump config currently is not supported for the index type. self.check_json(uri4index1, ( ('"Skey" : "key1"', @@ -248,5 +263,141 @@ class test_jsondump02(wttest.WiredTigerTestCase): ('"i2" : 16,\n"i4" : 64', '"S1" : "val16",\n"i2" : 16,\n"S3" : "val64",\n"i4" : 64'))) + # Dump all the tables into a single file, and also each + # table into its own file. + self.runWt(['dump', '-j', + self.table_uri1, + self.table_uri2, + self.table_uri3, + self.table_uri4], + outfilename='jsondump-all.out') + self.runWt(['dump', '-j', self.table_uri1], outfilename='jsondump1.out') + self.runWt(['dump', '-j', self.table_uri2], outfilename='jsondump2.out') + self.runWt(['dump', '-j', self.table_uri3], outfilename='jsondump3.out') + self.runWt(['dump', '-j', self.table_uri4], outfilename='jsondump4.out') + self.session.drop(self.table_uri1) + self.session.drop(self.table_uri2) + self.session.drop(self.table_uri3) + self.session.drop(self.table_uri4) + self.runWt(['load', '-jf', 'jsondump1.out']) + self.session.drop(self.table_uri1) + self.runWt(['load', '-jf', 'jsondump2.out']) + self.session.drop(self.table_uri2) + self.runWt(['load', '-jf', 'jsondump3.out']) + self.session.drop(self.table_uri3) + self.runWt(['load', '-jf', 'jsondump4.out']) + self.session.drop(self.table_uri4) + + self.runWt(['load', '-jf', 'jsondump-all.out']) + self.check_json(self.table_uri1, table1_json) + self.check_json(self.table_uri2, table2_json) + self.check_json(self.table_uri3, table3_json) + self.check_json(self.table_uri4, table4_json) + + # Generate two byte keys that cover some range of byte values. + # For simplicity, the keys are monotonically increasing. + # A null byte is disallowed in a string key, so we don't use it. + def generate_key(self, i, k): + k[0] = ((i & 0xffc0) >> 6) + 1 + k[1] = (i & 0x3f) + 1 + + # Generate three byte values: + # i==0 : v:[0x00, 0x01, 0x02] + # i==1 : v:[0x01, 0x02, 0x03] + # etc. + # A null byte is disallowed in a string value, it is replaced by 'X' + def generate_value(self, i, v, isstring): + for j in range(0, 3): + val = (i + j) % 256 + if isstring and val == 0: + val = 88 # 'X' + v[j] = val + + def test_json_all_bytes(self): + """ + Test the generated JSON for all byte values in byte array and + string formats. + """ + self.session.create(self.table_uri5, 'key_format=u,value_format=u') + self.session.create(self.table_uri6, 'key_format=S,value_format=S') + + c5 = self.session.open_cursor(self.table_uri5, None, None) + c6 = self.session.open_cursor(self.table_uri6, None, None) + k = bytearray(b'\x00\x00') + v = bytearray(b'\x00\x00\x00') + for i in range(0, 512): + self.generate_key(i, k) + self.generate_value(i, v, False) + c5[str(k)] = str(v) + self.generate_value(i, v, True) # no embedded nuls + c6[str(k)] = str(v) + c5.close() + c6.close() + + # Build table5_json, we want it to look like this: + # ('"key0" : "\u0001\u0001"', '"value0" : "\u0000\u0001\u0002"'), + # ('"key0" : "\u0001\u0002"', '"value0" : "\u0001\u0002\u0003"')) + # ('"key0" : "\u0001\u0003"', '"value0" : "\u0003\u0003\u0004"')) + # ... + # table6_json is similar, except that printable values like '\u0041' + # would appear as 'A'. The string type cannot have embedded nulls, + # so '\u0000' in table6_json appears instead as an 'X'. + # + # Start by creating two tables of individual Unicode values. + # bin_unicode[] contains only the \u escape sequences. + # mix_unicode[] contains printable characters or \t \n etc. escapes + bin_unicode = [] + mix_unicode = [] + for i in range(0, 256): + u = "\\u00" + hex(256 + i)[3:] # e.g. "\u00ab") + bin_unicode.append(u) + mix_unicode.append(u) + for i in range(0x20, 0x7f): + mix_unicode[i] = chr(i) + mix_unicode[ord('"')] = '\\"' + mix_unicode[ord('\\')] = '\\\\' + mix_unicode[ord('\f')] = '\\f' + mix_unicode[ord('\n')] = '\\n' + mix_unicode[ord('\r')] = '\\r' + mix_unicode[ord('\t')] = '\\t' + + table5_json = [] + table6_json = [] + for i in range(0, 512): + self.generate_key(i, k) + self.generate_value(i, v, False) + j = i if (i > 0 and i < 254) or (i > 256 and i < 510) else 88 + table5_json.append(('"key0" : "' + bin_unicode[k[0]] + + bin_unicode[k[1]] + '"', + '"value0" : "' + bin_unicode[v[0]] + + bin_unicode[v[1]] + + bin_unicode[v[2]] + '"')) + self.generate_value(i, v, True) + table6_json.append(('"key0" : "' + mix_unicode[k[0]] + + mix_unicode[k[1]] + '"', + '"value0" : "' + mix_unicode[v[0]] + + mix_unicode[v[1]] + + mix_unicode[v[2]] + '"')) + + self.check_json(self.table_uri5, table5_json) + self.check_json(self.table_uri6, table6_json) + + self.session.truncate(self.table_uri5, None, None, None) + self.session.truncate(self.table_uri6, None, None, None) + self.load_json(self.table_uri5, table5_json) + self.load_json(self.table_uri6, table6_json) + self.check_json(self.table_uri5, table5_json) + self.check_json(self.table_uri6, table6_json) + + self.runWt(['dump', '-j', self.table_uri5], outfilename='jsondump5.out') + self.runWt(['dump', '-j', self.table_uri6], outfilename='jsondump6.out') + self.session.drop(self.table_uri5) + self.session.drop(self.table_uri6) + self.runWt(['load', '-jf', 'jsondump5.out']) + self.runWt(['load', '-jf', 'jsondump6.out']) + self.session.drop(self.table_uri5) + self.session.drop(self.table_uri6) + + if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_reconfig02.py b/src/third_party/wiredtiger/test/suite/test_reconfig02.py index aee8ee4458b..85a9ceb2a34 100644 --- a/src/third_party/wiredtiger/test/suite/test_reconfig02.py +++ b/src/third_party/wiredtiger/test/suite/test_reconfig02.py @@ -74,9 +74,15 @@ class test_reconfig02(wttest.WiredTigerTestCase): # Now turn on pre-allocation. Sleep to give the worker thread # a chance to run and verify pre-allocated log files exist. + # + # Potentially loop a few times in case it is a very slow system. self.conn.reconfigure("log=(prealloc=true)") - time.sleep(2) - prep_logs = fnmatch.filter(os.listdir('.'), "*Prep*") + for x in xrange(0, 20): + time.sleep(1) + prep_logs = fnmatch.filter(os.listdir('.'), "*Prep*") + if len(prep_logs) != 0: + break + self.assertNotEqual(0, len(prep_logs)) # Logging starts on, but archive is off. Verify it is off. diff --git a/src/third_party/wiredtiger/test/suite/test_stat05.py b/src/third_party/wiredtiger/test/suite/test_stat05.py index 6a93ec2c84d..9bcedd65089 100644 --- a/src/third_party/wiredtiger/test/suite/test_stat05.py +++ b/src/third_party/wiredtiger/test/suite/test_stat05.py @@ -37,9 +37,13 @@ from helper import complex_value_populate, key_populate, value_populate # Statistics cursor using size only class test_stat_cursor_config(wttest.WiredTigerTestCase): pfx = 'test_stat_cursor_size' + conn_config = 'statistics=(fast)' + uri = [ ('file', dict(uri='file:' + pfx, pop=simple_populate, cfg='')), ('table', dict(uri='table:' + pfx, pop=simple_populate, cfg='')), + ('inmem', dict(uri='table:' + pfx, pop=simple_populate, cfg='', + conn_config='in_memory,statistics=(fast)')), ('table-lsm', dict(uri='table:' + pfx, pop=simple_populate, cfg=',type=lsm,lsm=(chunk_size=1MB,merge_min=2)')), ('complex', dict(uri='table:' + pfx, pop=complex_populate, cfg='')), @@ -49,7 +53,6 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase): ] scenarios = number_scenarios(uri) - conn_config = 'statistics=(fast)' def openAndWalkStatCursor(self): c = self.session.open_cursor( diff --git a/src/third_party/wiredtiger/test/suite/test_txn04.py b/src/third_party/wiredtiger/test/suite/test_txn04.py index bbd6ce8c4e2..9d9d2db62c6 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn04.py +++ b/src/third_party/wiredtiger/test/suite/test_txn04.py @@ -193,7 +193,7 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess): self.hot_backup(self.uri, committed) def test_ops(self): - with self.expectedStdoutPattern('Recreating metadata'): + with self.expectedStdoutPattern('recreating metadata'): self.ops() if __name__ == '__main__': diff --git a/src/third_party/wiredtiger/test/suite/test_util02.py b/src/third_party/wiredtiger/test/suite/test_util02.py index 51e03d8d105..475e856052a 100644 --- a/src/third_party/wiredtiger/test/suite/test_util02.py +++ b/src/third_party/wiredtiger/test/suite/test_util02.py @@ -173,7 +173,7 @@ class test_load_commandline(wttest.WiredTigerTestCase, suite_subprocess): complex_populate(self, self.uri, "key_format=S,value_format=S", 20) self.runWt(["dump", self.uri], outfilename="dump.out") loadargs = ["load", "-f", "dump.out"] + args - self.runWt(loadargs, errfilename=errfile) + self.runWt(loadargs, errfilename=errfile, failure=fail) if fail: self.check_non_empty_file(errfile) else: @@ -181,23 +181,24 @@ class test_load_commandline(wttest.WiredTigerTestCase, suite_subprocess): # Empty arguments should suceed. def test_load_commandline_1(self): - self.load_commandline([], 0) + self.load_commandline([], False) # Arguments are in pairs. def test_load_commandline_2(self): - self.load_commandline(["table"], 1) - self.load_commandline([self.uri, "block_allocation=first", self.uri], 1) + self.load_commandline(["table"], True) + self.load_commandline( + [self.uri, "block_allocation=first", self.uri], True) # You can use short-hand URIs for a single object, but cannot match multiple # objects. def test_load_commandline_3(self): - self.load_commandline(["table", "block_allocation=first"], 0) - self.load_commandline(["colgroup", "block_allocation=first"], 1) + self.load_commandline(["table", "block_allocation=first"], False) + self.load_commandline(["colgroup", "block_allocation=first"], True) # You can't reference non-existent objects. def test_load_commandline_4(self): - self.load_commandline([self.uri, "block_allocation=first"], 0) - self.load_commandline(["table:bar", "block_allocation=first"], 1) + self.load_commandline([self.uri, "block_allocation=first"], False) + self.load_commandline(["table:bar", "block_allocation=first"], True) # You can specify multipleconfiguration arguments for the same object. def test_load_commandline_5(self): @@ -205,19 +206,19 @@ class test_load_commandline(wttest.WiredTigerTestCase, suite_subprocess): self.uri, "block_allocation=first", self.uri, "block_allocation=best", self.uri, "block_allocation=first", - self.uri, "block_allocation=best"], 0) + self.uri, "block_allocation=best"], False) # You can't modify a format. def test_load_commandline_6(self): - self.load_commandline(["table", "key_format=d"], 1) - self.load_commandline(["table", "value_format=d"], 1) + self.load_commandline(["table", "key_format=d"], True) + self.load_commandline(["table", "value_format=d"], True) # You can set the source or version, but it gets stripped; confirm the # attempt succeeds, so we know they configuration values are stripped. def test_load_commandline_7(self): - self.load_commandline(["table", "filename=bar"], 0) - self.load_commandline(["table", "source=bar"], 0) - self.load_commandline(["table", "version=(100,200)"], 0) + self.load_commandline(["table", "filename=bar"], False) + self.load_commandline(["table", "source=bar"], False) + self.load_commandline(["table", "version=(100,200)"], False) if __name__ == '__main__': diff --git a/src/third_party/wiredtiger/test/suite/test_util07.py b/src/third_party/wiredtiger/test/suite/test_util07.py index 2bbb40422bd..602ddbba5ff 100644 --- a/src/third_party/wiredtiger/test/suite/test_util07.py +++ b/src/third_party/wiredtiger/test/suite/test_util07.py @@ -71,7 +71,8 @@ class test_util07(wttest.WiredTigerTestCase, suite_subprocess): self.session.create('table:' + self.tablename, self.session_params) outfile = "readout.txt" errfile = "readerr.txt" - self.runWt(["read", 'table:' + self.tablename, 'NoMatch'], outfilename=outfile, errfilename=errfile) + self.runWt(["read", 'table:' + self.tablename, 'NoMatch'], + outfilename=outfile, errfilename=errfile, failure=True) self.check_empty_file(outfile) self.check_file_contains(errfile, 'NoMatch: not found\n') @@ -83,10 +84,12 @@ class test_util07(wttest.WiredTigerTestCase, suite_subprocess): self.populate(self.tablename) outfile = "readout.txt" errfile = "readerr.txt" - self.runWt(["read", 'table:' + self.tablename, 'KEY49'], outfilename=outfile, errfilename=errfile) + self.runWt(["read", 'table:' + self.tablename, 'KEY49'], + outfilename=outfile, errfilename=errfile) self.check_file_content(outfile, 'VAL49\n') self.check_empty_file(errfile) - self.runWt(["read", 'table:' + self.tablename, 'key49'], outfilename=outfile, errfilename=errfile) + self.runWt(["read", 'table:' + self.tablename, 'key49'], + outfilename=outfile, errfilename=errfile, failure=True) self.check_empty_file(outfile) self.check_file_contains(errfile, 'key49: not found\n') diff --git a/src/third_party/wiredtiger/test/suite/test_util12.py b/src/third_party/wiredtiger/test/suite/test_util12.py index e8226a3146c..f407c2ce7d6 100644 --- a/src/third_party/wiredtiger/test/suite/test_util12.py +++ b/src/third_party/wiredtiger/test/suite/test_util12.py @@ -57,7 +57,8 @@ class test_util12(wttest.WiredTigerTestCase, suite_subprocess): self.session.create('table:' + self.tablename, self.session_params) errfile = 'writeerr.txt' - self.runWt(['write', 'table:' + self.tablename], errfilename=errfile) + self.runWt(['write', 'table:' + self.tablename], + errfilename=errfile, failure=True) self.check_file_contains(errfile, 'usage:') def test_write_overwrite(self): @@ -82,7 +83,7 @@ class test_util12(wttest.WiredTigerTestCase, suite_subprocess): self.session.create('table:' + self.tablename, self.session_params) errfile = 'writeerr.txt' self.runWt(['write', 'table:' + self.tablename, - 'def', '456', 'abc'], errfilename=errfile) + 'def', '456', 'abc'], errfilename=errfile, failure=True) self.check_file_contains(errfile, 'usage:') diff --git a/src/third_party/wiredtiger/test/suite/test_verify.py b/src/third_party/wiredtiger/test/suite/test_verify.py index 5ce926027ef..28a66415b9d 100644 --- a/src/third_party/wiredtiger/test/suite/test_verify.py +++ b/src/third_party/wiredtiger/test/suite/test_verify.py @@ -151,7 +151,8 @@ class test_verify(wttest.WiredTigerTestCase, suite_subprocess): with self.open_and_position(self.tablename, 75) as f: for i in range(0, 4096): f.write(struct.pack('B', 0)) - self.runWt(["verify", "table:" + self.tablename], errfilename="verifyerr.out") + self.runWt(["verify", "table:" + self.tablename], + errfilename="verifyerr.out", failure=True) self.check_non_empty_file("verifyerr.out") def test_verify_process_25pct_junk(self): @@ -165,7 +166,8 @@ class test_verify(wttest.WiredTigerTestCase, suite_subprocess): with self.open_and_position(self.tablename, 25) as f: for i in range(0, 100): f.write('\x01\xff\x80') - self.runWt(["verify", "table:" + self.tablename], errfilename="verifyerr.out") + self.runWt(["verify", "table:" + self.tablename], + errfilename="verifyerr.out", failure=True) self.check_non_empty_file("verifyerr.out") def test_verify_process_truncated(self): @@ -178,7 +180,8 @@ class test_verify(wttest.WiredTigerTestCase, suite_subprocess): self.populate(self.tablename) with self.open_and_position(self.tablename, 75) as f: f.truncate(0) - self.runWt(["verify", "table:" + self.tablename], errfilename="verifyerr.out") + self.runWt(["verify", "table:" + self.tablename], + errfilename="verifyerr.out", failure=True) self.check_non_empty_file("verifyerr.out") def test_verify_process_zero_length(self): @@ -190,7 +193,8 @@ class test_verify(wttest.WiredTigerTestCase, suite_subprocess): self.populate(self.tablename) with self.open_and_position(self.tablename, 0) as f: f.truncate(0) - self.runWt(["verify", "table:" + self.tablename], errfilename="verifyerr.out") + self.runWt(["verify", "table:" + self.tablename], + errfilename="verifyerr.out", failure=True) self.check_non_empty_file("verifyerr.out") diff --git a/src/third_party/wiredtiger/test/suite/wttest.py b/src/third_party/wiredtiger/test/suite/wttest.py index a1945b4325d..9e430fcdba7 100644 --- a/src/third_party/wiredtiger/test/suite/wttest.py +++ b/src/third_party/wiredtiger/test/suite/wttest.py @@ -259,20 +259,20 @@ class WiredTigerTestCase(unittest.TestCase): self.conn.close() self.conn = None - def open_conn(self): + def open_conn(self, directory="."): """ Open the connection if already closed. """ if self.conn == None: - self.conn = self.setUpConnectionOpen(".") + self.conn = self.setUpConnectionOpen(directory) self.session = self.setUpSessionOpen(self.conn) - def reopen_conn(self): + def reopen_conn(self, directory="."): """ Reopen the connection. """ self.close_conn() - self.open_conn() + self.open_conn(directory) def setUp(self): if not hasattr(self.__class__, 'wt_ntests'): @@ -551,4 +551,4 @@ def runsuite(suite, parallel): def run(name='__main__'): result = runsuite(unittest.TestLoader().loadTestsFromName(name), False) - sys.exit(not result.wasSuccessful()) + sys.exit(0 if result.wasSuccessful() else 1) diff --git a/src/third_party/wiredtiger/test/thread/Makefile.am b/src/third_party/wiredtiger/test/thread/Makefile.am index a58f019b513..58b715d4a80 100644 --- a/src/third_party/wiredtiger/test/thread/Makefile.am +++ b/src/third_party/wiredtiger/test/thread/Makefile.am @@ -1,12 +1,15 @@ -AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/test/utility +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility noinst_PROGRAMS = t -t_LDADD = $(top_builddir)/libwiredtiger.la -t_SOURCES = thread.h file.c rw.c stats.c t.c +t_SOURCES = file.c rw.c stats.c t.c + +t_LDADD = $(top_builddir)/test/utility/libtest_util.la +t_LDADD +=$(top_builddir)/libwiredtiger.la t_LDFLAGS = -static TESTS = smoke.sh clean-local: - rm -rf WiredTiger* wt.* *.core __stats + rm -rf WT_TEST __stats *.core diff --git a/src/third_party/wiredtiger/test/thread/rw.c b/src/third_party/wiredtiger/test/thread/rw.c index 913fa6e6c25..10f13b9eb04 100644 --- a/src/third_party/wiredtiger/test/thread/rw.c +++ b/src/third_party/wiredtiger/test/thread/rw.c @@ -59,16 +59,13 @@ rw_start(u_int readers, u_int writers) total_nops = 0; /* Create per-thread structures. */ - if ((run_info = calloc( - (size_t)(readers + writers), sizeof(*run_info))) == NULL || - (tids = calloc((size_t)(readers + writers), sizeof(*tids))) == NULL) - testutil_die(errno, "calloc"); + run_info = dcalloc((size_t)(readers + writers), sizeof(*run_info)); + tids = dcalloc((size_t)(readers + writers), sizeof(*tids)); /* Create the files and load the initial records. */ for (i = 0; i < writers; ++i) { if (i == 0 || multiple_files) { - if ((run_info[i].name = malloc(64)) == NULL) - testutil_die(errno, "malloc"); + run_info[i].name = dmalloc(64); snprintf(run_info[i].name, 64, FNAME, i); /* Vary by orders of magnitude */ @@ -88,8 +85,7 @@ rw_start(u_int readers, u_int writers) for (i = 0; i < readers; ++i) { offset = i + writers; if (multiple_files) { - if ((run_info[offset].name = malloc(64)) == NULL) - testutil_die(errno, "malloc"); + run_info[offset].name = dmalloc(64); /* Have readers read from tables with writes. */ name_index = i % writers; snprintf( diff --git a/src/third_party/wiredtiger/test/thread/t.c b/src/third_party/wiredtiger/test/thread/t.c index 22334076ee1..5b53532e8a6 100644 --- a/src/third_party/wiredtiger/test/thread/t.c +++ b/src/third_party/wiredtiger/test/thread/t.c @@ -42,7 +42,8 @@ static FILE *logfp; /* Log file */ static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *); static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *); -static void onint(int); +static void onint(int) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void shutdown(void); static int usage(void); static void wt_connect(char *); diff --git a/src/third_party/wiredtiger/test/thread/thread.h b/src/third_party/wiredtiger/test/thread/thread.h index 36cdbebd210..edcb919ec32 100644 --- a/src/third_party/wiredtiger/test/thread/thread.h +++ b/src/third_party/wiredtiger/test/thread/thread.h @@ -26,19 +26,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <sys/types.h> -#include <sys/time.h> +#include "test_util.h" -#include <errno.h> -#include <inttypes.h> -#include <pthread.h> #include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#include "test_util.i" #define FNAME "file:wt.%03d" /* File name */ #define FNAME_STAT "__stats" /* File name for statistics */ diff --git a/src/third_party/wiredtiger/test/utility/Makefile.am b/src/third_party/wiredtiger/test/utility/Makefile.am new file mode 100644 index 00000000000..a2923eb41a8 --- /dev/null +++ b/src/third_party/wiredtiger/test/utility/Makefile.am @@ -0,0 +1,4 @@ +AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include + +libtest_util_la_SOURCES = misc.c parse_opts.c thread.c +noinst_LTLIBRARIES = libtest_util.la diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c new file mode 100644 index 00000000000..dfc655dec1a --- /dev/null +++ b/src/third_party/wiredtiger/test/utility/misc.c @@ -0,0 +1,194 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +/* + * die -- + * Report an error and quit. + */ +void +testutil_die(int e, const char *fmt, ...) +{ + va_list ap; + + /* Allow test programs to cleanup on fatal error. */ + if (custom_die != NULL) + (*custom_die)(); + + if (fmt != NULL) { + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } + if (e != 0) + fprintf(stderr, ": %s", wiredtiger_strerror(e)); + fprintf(stderr, "\n"); + + exit(EXIT_FAILURE); +} + +/* + * testutil_work_dir_from_path -- + * Takes a buffer, its size and the intended work directory. + * Creates the full intended work directory in buffer. + */ +void +testutil_work_dir_from_path(char *buffer, size_t len, const char *dir) +{ + /* If no directory is provided, use the default. */ + if (dir == NULL) + dir = DEFAULT_DIR; + + if (len < strlen(dir) + 1) + testutil_die(ENOMEM, + "Not enough memory in buffer for directory %s", dir); + + strcpy(buffer, dir); +} + +/* + * testutil_clean_work_dir -- + * Remove the work directory. + */ +void +testutil_clean_work_dir(char *dir) +{ + size_t len; + int ret; + char *buf; + + /* Additional bytes for the Windows rd command. */ + len = strlen(dir) + strlen(RM_COMMAND) + 1; + if ((buf = malloc(len)) == NULL) + testutil_die(ENOMEM, "Failed to allocate memory"); + + snprintf(buf, len, "%s%s", RM_COMMAND, dir); + + if ((ret = system(buf)) != 0 && ret != ENOENT) + testutil_die(ret, "%s", buf); + free(buf); +} + +/* + * testutil_make_work_dir -- + * Delete the existing work directory, then create a new one. + */ +void +testutil_make_work_dir(char *dir) +{ + size_t len; + int ret; + char *buf; + + testutil_clean_work_dir(dir); + + /* Additional bytes for the mkdir command */ + len = strlen(dir) + strlen(MKDIR_COMMAND) + 1; + if ((buf = malloc(len)) == NULL) + testutil_die(ENOMEM, "Failed to allocate memory"); + + /* mkdir shares syntax between Windows and Linux */ + snprintf(buf, len, "%s%s", MKDIR_COMMAND, dir); + if ((ret = system(buf)) != 0) + testutil_die(ret, "%s", buf); + free(buf); +} + +/* + * testutil_cleanup -- + * Delete the existing work directory and free the options structure. + */ +void +testutil_cleanup(TEST_OPTS *opts) +{ + if (opts->conn != NULL) + testutil_check(opts->conn->close(opts->conn, NULL)); + + if (!opts->preserve) + testutil_clean_work_dir(opts->home); + + free(opts->conn_config); + free(opts->table_config); + free(opts->uri); + free(opts->home); +} + +/* + * dcalloc -- + * Call calloc, dying on failure. + */ +void * +dcalloc(size_t number, size_t size) +{ + void *p; + + if ((p = calloc(number, size)) != NULL) + return (p); + testutil_die(errno, "calloc: %" WT_SIZET_FMT "B", number * size); +} + +/* + * dmalloc -- + * Call malloc, dying on failure. + */ +void * +dmalloc(size_t len) +{ + void *p; + + if ((p = malloc(len)) != NULL) + return (p); + testutil_die(errno, "malloc: %" WT_SIZET_FMT "B", len); +} + +/* + * drealloc -- + * Call realloc, dying on failure. + */ +void * +drealloc(void *p, size_t len) +{ + void *t; + if ((t = realloc(p, len)) != NULL) + return (t); + testutil_die(errno, "realloc: %" WT_SIZET_FMT "B", len); +} + +/* + * dstrdup -- + * Call strdup, dying on failure. + */ +void * +dstrdup(const void *str) +{ + char *p; + + if ((p = strdup(str)) != NULL) + return (p); + testutil_die(errno, "strdup"); +} diff --git a/src/third_party/wiredtiger/test/utility/parse_opts.c b/src/third_party/wiredtiger/test/utility/parse_opts.c new file mode 100644 index 00000000000..4054f318259 --- /dev/null +++ b/src/third_party/wiredtiger/test/utility/parse_opts.c @@ -0,0 +1,132 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +extern int __wt_opterr; /* if error message should be printed */ +extern int __wt_optind; /* index into parent argv vector */ +extern int __wt_optopt; /* character checked for validity */ +extern int __wt_optreset; /* reset getopt */ +extern char *__wt_optarg; /* argument associated with option */ + +/* + * testutil_parse_opts -- + * Parse command line options for a test case. + */ +int +testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts) +{ + int ch; + size_t len; + + opts->preserve = false; + opts->running = true; + opts->verbose = false; + + if ((opts->progname = strrchr(argv[0], '/')) == NULL) + opts->progname = argv[0]; + else + ++opts->progname; + + while ((ch = __wt_getopt(opts->progname, + argc, argv, "A:h:n:o:pR:T:t:vW:")) != EOF) + switch (ch) { + case 'A': /* Number of append threads */ + opts->n_append_threads = (uint64_t)atoll(__wt_optarg); + break; + case 'h': /* Home directory */ + opts->home = __wt_optarg; + break; + case 'n': /* Number of records */ + opts->nrecords = (uint64_t)atoll(__wt_optarg); + break; + case 'o': /* Number of operations */ + opts->nops = (uint64_t)atoll(__wt_optarg); + break; + case 'p': /* Preserve directory contents */ + opts->preserve = true; + break; + case 'R': /* Number of reader threads */ + opts->n_read_threads = (uint64_t)atoll(__wt_optarg); + break; + case 'T': /* Number of threads */ + opts->nthreads = (uint64_t)atoll(__wt_optarg); + break; + case 't': /* Table type */ + switch (__wt_optarg[0]) { + case 'C': + case 'c': + opts->table_type = TABLE_COL; + break; + case 'F': + case 'f': + opts->table_type = TABLE_FIX; + break; + case 'R': + case 'r': + opts->table_type = TABLE_ROW; + break; + } + break; + case 'v': + opts->verbose = true; + break; + case 'W': /* Number of writer threads */ + opts->n_write_threads = (uint64_t)atoll(__wt_optarg); + break; + case '?': + default: + (void)fprintf(stderr, "usage: %s " + "[-A append thread count] " + "[-h home] " + "[-n record count] " + "[-o op count] " + "[-p] " + "[-R read thread count] " + "[-T thread count] " + "[-t c|f|r table type] " + "[-v] " + "[-W write thread count] ", + opts->progname); + return (1); + } + + /* + * Setup the home directory. It needs to be unique for every test + * or the auto make parallel tester gets upset. + */ + len = strlen("WT_TEST.") + strlen(opts->progname) + 10; + opts->home = dmalloc(len); + snprintf(opts->home, len, "WT_TEST.%s", opts->progname); + + /* Setup the default URI string */ + len = strlen("table:") + strlen(opts->progname) + 10; + opts->uri = dmalloc(len); + snprintf(opts->uri, len, "table:%s", opts->progname); + + return (0); +} diff --git a/src/third_party/wiredtiger/test/utility/test_util.h b/src/third_party/wiredtiger/test/utility/test_util.h new file mode 100644 index 00000000000..66ff8de2d19 --- /dev/null +++ b/src/third_party/wiredtiger/test/utility/test_util.h @@ -0,0 +1,125 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "wt_internal.h" /* For __wt_XXX */ + +#ifdef _WIN32 + #define DIR_DELIM '\\' + #define RM_COMMAND "rd /s /q " +#else + #define DIR_DELIM '/' + #define RM_COMMAND "rm -rf " +#endif + +#define DEFAULT_DIR "WT_TEST" +#define MKDIR_COMMAND "mkdir " + +#ifdef _WIN32 +#include "windows_shim.h" +#endif + +/* Generic option parsing structure shared by all test cases. */ +typedef struct { + char *home; + char *progname; + enum { TABLE_COL=1, /* Fixed-length column store */ + TABLE_FIX=2, /* Variable-length column store */ + TABLE_ROW=3 /* Row-store */ + } table_type; + bool preserve; /* Don't remove files on exit */ + bool verbose; /* Run in verbose mode */ + uint64_t nrecords; /* Number of records */ + uint64_t nops; /* Number of operations */ + uint64_t nthreads; /* Number of threads */ + uint64_t n_append_threads; /* Number of append threads */ + uint64_t n_read_threads; /* Number of read threads */ + uint64_t n_write_threads; /* Number of write threads */ + + /* + * Fields commonly shared within a test program. The test cleanup + * function will attempt to automatically free and close non-null + * resources. + */ + WT_CONNECTION *conn; + char *conn_config; + WT_SESSION *session; + bool running; + char *table_config; + char *uri; + volatile uint64_t next_threadid; + uint64_t max_inserted_id; +} TEST_OPTS; + +/* + * testutil_assert -- + * Complain and quit if something isn't true. + */ +#define testutil_assert(a) do { \ + if (!(a)) \ + testutil_die(0, "%s/%d: %s", __func__, __LINE__, #a); \ +} while (0) + +/* + * testutil_check -- + * Complain and quit if a function call fails. + */ +#define testutil_check(call) do { \ + int __r; \ + if ((__r = (call)) != 0) \ + testutil_die( \ + __r, "%s/%d: %s", __func__, __LINE__, #call); \ +} while (0) + +/* + * testutil_checkfmt -- + * Complain and quit if a function call fails, with additional arguments. + */ +#define testutil_checkfmt(call, fmt, ...) do { \ + int __r; \ + if ((__r = (call)) != 0) \ + testutil_die(__r, "%s/%d: %s: " fmt, \ + __func__, __LINE__, #call, __VA_ARGS__); \ +} while (0) + +/* Allow tests to add their own death handling. */ +extern void (*custom_die)(void); + +void testutil_die(int, const char *, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); + +void *dcalloc(size_t, size_t); +void *dmalloc(size_t); +void *drealloc(void *, size_t); +void *dstrdup(const void *); +void testutil_clean_work_dir(char *); +void testutil_cleanup(TEST_OPTS *); +void testutil_make_work_dir(char *); +int testutil_parse_opts(int, char * const *, TEST_OPTS *); +void testutil_work_dir_from_path(char *, size_t, const char *); +void *thread_append(void *); +void *thread_insert_append(void *); +void *thread_prev(void *); diff --git a/src/third_party/wiredtiger/test/utility/thread.c b/src/third_party/wiredtiger/test/utility/thread.c new file mode 100644 index 00000000000..38465b2f02b --- /dev/null +++ b/src/third_party/wiredtiger/test/utility/thread.c @@ -0,0 +1,141 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "test_util.h" + +/* + * A thread dedicated to appending records into a table. Works with fixed + * length column stores and variable length column stores. + * One thread (the first thread created by an application) checks for a + * terminating condition after each insert. + */ +void * +thread_append(void *arg) +{ + TEST_OPTS *opts; + WT_CONNECTION *conn; + WT_CURSOR *cursor; + WT_SESSION *session; + uint64_t id, recno; + char buf[64]; + + opts = (TEST_OPTS *)arg; + conn = opts->conn; + + id = __wt_atomic_fetch_addv64(&opts->next_threadid, 1); + testutil_check(conn->open_session(conn, NULL, NULL, &session)); + testutil_check( + session->open_cursor(session, opts->uri, NULL, "append", &cursor)); + + buf[0] = '\2'; + for (recno = 1; opts->running; ++recno) { + if (opts->table_type == TABLE_FIX) + cursor->set_value(cursor, buf[0]); + else { + snprintf(buf, sizeof(buf), + "%" PRIu64 " VALUE ------", recno); + cursor->set_value(cursor, buf); + } + testutil_check(cursor->insert(cursor)); + if (id == 0) { + testutil_check( + cursor->get_key(cursor, &opts->max_inserted_id)); + if (opts->max_inserted_id >= opts->nrecords) + opts->running = false; + } + } + + return (NULL); +} + +/* + * Append into a row store table. + */ +void * +thread_insert_append(void *arg) +{ + TEST_OPTS *opts; + WT_CONNECTION *conn; + WT_CURSOR *cursor; + WT_SESSION *session; + uint64_t i; + char kbuf[64]; + + opts = (TEST_OPTS *)arg; + conn = opts->conn; + + testutil_check(conn->open_session(conn, NULL, NULL, &session)); + testutil_check(session->open_cursor( + session, opts->uri, NULL, NULL, &cursor)); + + for (i = 0; i < opts->nrecords; ++i) { + snprintf(kbuf, sizeof(kbuf), "%010d KEY------", (int)i); + cursor->set_key(cursor, kbuf); + cursor->set_value(cursor, "========== VALUE ======="); + testutil_check(cursor->insert(cursor)); + if (i % 100000 == 0) { + printf("insert: %" PRIu64 "\r", i); + fflush(stdout); + } + } + printf("\n"); + + opts->running = false; + + return (NULL); +} + +/* + * Repeatedly walk backwards through the records in a table. + */ +void * +thread_prev(void *arg) +{ + TEST_OPTS *opts; + WT_CURSOR *cursor; + WT_SESSION *session; + int ret; + + opts = (TEST_OPTS *)arg; + ret = 0; + + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + testutil_check( + session->open_cursor(session, opts->uri, NULL, NULL, &cursor)); + while (opts->running) { + while (opts->running && (ret = cursor->prev(cursor)) == 0) + ; + if (ret == WT_NOTFOUND) + ret = 0; + testutil_check(ret); + } + + testutil_check(session->close(session, NULL)); + return (NULL); +} diff --git a/src/third_party/wiredtiger/tools/wtstats/stat_data.py b/src/third_party/wiredtiger/tools/wtstats/stat_data.py index 7c42ab4d926..a79cf1faf5e 100644 --- a/src/third_party/wiredtiger/tools/wtstats/stat_data.py +++ b/src/third_party/wiredtiger/tools/wtstats/stat_data.py @@ -6,6 +6,7 @@ no_scale_per_second_list = [ 'cache: bytes currently in the cache', 'cache: eviction currently operating in aggressive mode', 'cache: files with active eviction walks', + 'cache: hazard pointer maximum array length', 'cache: maximum bytes configured', 'cache: maximum page size at eviction', 'cache: pages currently held in the cache', @@ -27,6 +28,9 @@ no_scale_per_second_list = [ 'reconciliation: split objects currently awaiting free', 'session: open cursor count', 'session: open session count', + 'thread-state: active filesystem fsync calls', + 'thread-state: active filesystem read calls', + 'thread-state: active filesystem write calls', 'transaction: transaction checkpoint currently running', 'transaction: transaction checkpoint generation', 'transaction: transaction checkpoint max time (msecs)', @@ -60,7 +64,6 @@ no_scale_per_second_list = [ 'btree: overflow pages', 'btree: row-store internal pages', 'btree: row-store leaf pages', - 'cache: bytes currently in the cache', 'cache: overflow values cached in memory', 'LSM: bloom filters in the LSM tree', 'LSM: chunks in the LSM tree', @@ -95,6 +98,9 @@ no_clear_list = [ 'reconciliation: split objects currently awaiting free', 'session: open cursor count', 'session: open session count', + 'thread-state: active filesystem fsync calls', + 'thread-state: active filesystem read calls', + 'thread-state: active filesystem write calls', 'transaction: transaction checkpoint currently running', 'transaction: transaction checkpoint generation', 'transaction: transaction checkpoint max time (msecs)', @@ -105,7 +111,6 @@ no_clear_list = [ 'transaction: transaction range of IDs currently pinned by a checkpoint', 'transaction: transaction range of IDs currently pinned by named snapshots', 'btree: btree checkpoint generation', - 'cache: bytes currently in the cache', 'session: open cursor count', ] prefix_list = [ @@ -122,6 +127,7 @@ prefix_list = [ 'thread-yield', 'async', 'btree', + 'thread-state', 'compression', ] -groups = {'cursor': ['cursor', 'session'], 'lsm': ['LSM', 'transaction'], 'system': ['connection', 'data-handle', 'session'], 'evict': ['cache', 'connection', 'block-manager'], 'memory': ['cache', 'connection', 'reconciliation']}
\ No newline at end of file +groups = {'cursor': ['cursor', 'session'], 'lsm': ['LSM', 'transaction'], 'system': ['connection', 'data-handle', 'session', 'thread-state'], 'evict': ['block-manager', 'cache', 'connection', 'thread-state'], 'memory': ['cache', 'connection', 'reconciliation']}
\ No newline at end of file |