diff options
89 files changed, 2758 insertions, 2263 deletions
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c index f079d6272d7..5d3b334785d 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c @@ -100,7 +100,7 @@ static uint64_t wtperf_value_range(CONFIG *); static inline uint64_t get_next_incr(CONFIG *cfg) { - return (WT_ATOMIC_ADD8(cfg->insert_key, 1)); + return (__wt_atomic_add64(&cfg->insert_key, 1)); } static void @@ -151,7 +151,7 @@ cb_asyncop(WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP *op, int ret, uint32_t flags) switch (type) { case WT_AOP_COMPACT: tables = (uint32_t *)op->app_private; - WT_ATOMIC_ADD4(*tables, (uint32_t)-1); + (void)__wt_atomic_add32(tables, (uint32_t)-1); break; case WT_AOP_INSERT: trk = &thread->insert; @@ -186,7 +186,7 @@ cb_asyncop(WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP *op, int ret, uint32_t flags) return (0); if (ret == 0 || (ret == WT_NOTFOUND && type != WT_AOP_INSERT)) { if (!cfg->in_warmup) - (void)WT_ATOMIC_ADD8(trk->ops, 1); + (void)__wt_atomic_add64(&trk->ops, 1); return (0); } err: diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 43b585a6c6d..d9830191d94 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -981,8 +981,10 @@ methods = { connection_runtime_config + common_wiredtiger_open + [ Config('config_base', 'true', r''' - write the base configuration file if creating the database, - see @ref config_base for more information''', + write the base configuration file if creating the database. If + \c false in the config passed directly to ::wiredtiger_open, will + ignore any existing base configuration file in addition to not creating + one. See @ref config_base for more information''', type='boolean'), Config('create', 'false', r''' create the database if it does not exist''', @@ -1011,8 +1013,10 @@ methods = { connection_runtime_config + common_wiredtiger_open + [ Config('config_base', 'true', r''' - write the base configuration file if creating the database, - see @ref config_base for more information''', + write the base configuration file if creating the database. If + \c false in the config passed directly to ::wiredtiger_open, will + ignore any existing base configuration file in addition to not creating + one. See @ref config_base for more information''', type='boolean'), Config('create', 'false', r''' create the database if it does not exist''', diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py index c8d9bcc6a5e..d861eabc7ff 100644 --- a/src/third_party/wiredtiger/dist/flags.py +++ b/src/third_party/wiredtiger/dist/flags.py @@ -12,7 +12,6 @@ flags = { 'SYNC_CHECKPOINT', 'SYNC_CLOSE', 'SYNC_DISCARD', - 'SYNC_DISCARD_FORCE', 'SYNC_WRITE_LEAVES', ], 'file_types' : [ @@ -107,7 +106,6 @@ flags = { 'session' : [ 'SESSION_CAN_WAIT', 'SESSION_CLEAR_EVICT_WALK', - 'SESSION_DISCARD_FORCE', 'SESSION_LOCKED_CHECKPOINT', 'SESSION_LOCKED_HANDLE_LIST', 'SESSION_LOCKED_SCHEMA', @@ -119,7 +117,7 @@ flags = { 'SESSION_NO_DATA_HANDLES', 'SESSION_NO_LOGGING', 'SESSION_NO_SCHEMA_LOCK', - 'SESSION_SALVAGE_CORRUPT_OK', + 'SESSION_QUIET_CORRUPT_FILE', 'SESSION_SERVER_ASYNC', ], } diff --git a/src/third_party/wiredtiger/dist/s_all b/src/third_party/wiredtiger/dist/s_all index c624db06a97..8e3f265e79b 100755 --- a/src/third_party/wiredtiger/dist/s_all +++ b/src/third_party/wiredtiger/dist/s_all @@ -2,7 +2,7 @@ # Run standard scripts. t=__wt.$$ -t_pfx=__s_all_tmp +t_pfx=__s_all_tmp_ trap 'rm -f $t *.pyc __tmp __wt.* __s_all_tmp*' 0 1 2 3 13 15 # We require python which may not be installed. diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list index 9e64d81bc93..f3858da477e 100644 --- a/src/third_party/wiredtiger/dist/s_define.list +++ b/src/third_party/wiredtiger/dist/s_define.list @@ -16,22 +16,14 @@ TXN_API_CALL TXN_API_CALL_NOCONF TXN_API_END WIN32_LEAN_AND_MEAN -WT_ATOMIC_ADD1 -WT_ATOMIC_ADD2 -WT_ATOMIC_CAS1 -WT_ATOMIC_CAS2 -WT_ATOMIC_FETCH_ADD1 -WT_ATOMIC_FETCH_ADD4 -WT_ATOMIC_FETCH_ADD8 -WT_ATOMIC_STORE1 -WT_ATOMIC_STORE2 -WT_ATOMIC_SUB1 -WT_ATOMIC_SUB2 +WT_ATOMIC_CAS +WT_ATOMIC_FUNC WT_BARRIER WT_BLOCK_DESC_SIZE WT_CACHE_LINE_ALIGNMENT WT_COMPILER_TYPE_ALIGN WT_CONN_CHECK_PANIC +WT_COUNTER_SLOTS WT_DEADLOCK WT_DEBUG_BYTE WT_HANDLE_CLOSED @@ -41,30 +33,19 @@ WT_PACKED_STRUCT_END WT_READ_BARRIER WT_REF_SIZE WT_SESSION_LOCKED_CHECKPOINT -WT_STAT_ATOMIC_DECR -WT_STAT_ATOMIC_DECRV -WT_STAT_ATOMIC_INCR -WT_STAT_ATOMIC_INCRV +WT_STATS_FIELD_TO_SLOT +WT_STATS_SLOT_ID WT_STAT_DECR WT_STAT_DECRV -WT_STAT_FAST_ATOMIC_DECR -WT_STAT_FAST_ATOMIC_DECRV -WT_STAT_FAST_ATOMIC_INCR -WT_STAT_FAST_ATOMIC_INCRV -WT_STAT_FAST_CONN_ATOMIC_DECRV -WT_STAT_FAST_CONN_ATOMIC_INCRV WT_STAT_FAST_CONN_DECRV WT_STAT_FAST_DATA_DECRV WT_STAT_FAST_DECR WT_STAT_FAST_DECRV +WT_STAT_FAST_INCR WT_STAT_FAST_INCRV WT_STAT_FAST_SET +WT_STAT_WRITE WT_WITH_LOCK __F __WIREDTIGER_EXT_H_ __WIREDTIGER_H_ -__WT_ATOMIC_ADD -__WT_ATOMIC_CAS -__WT_ATOMIC_FETCH_ADD -__WT_ATOMIC_STORE -__WT_ATOMIC_SUB diff --git a/src/third_party/wiredtiger/dist/s_stat b/src/third_party/wiredtiger/dist/s_stat index 152097f14be..44c22ab56bb 100755 --- a/src/third_party/wiredtiger/dist/s_stat +++ b/src/third_party/wiredtiger/dist/s_stat @@ -16,7 +16,7 @@ l="$l `echo ../src/include/*.i`" ( # Get the list of statistics fields. search=`sed \ - -e 's/^ WT_STATS \([a-z_*]*\);$/\1/p' \ + -e 's/^ int64_t \([a-z_*]*\);$/\1/p' \ -e d ../src/include/stat.h | sort` diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index a104bb011da..fc706226c0a 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -395,6 +395,7 @@ bzalloc bzfree bzip calloc +cas catfmt cd centric @@ -493,6 +494,7 @@ desc dest destSize dev +dh dhandle dhandles dir @@ -797,6 +799,7 @@ progname ps psp pthread +ptr pushms putK putV diff --git a/src/third_party/wiredtiger/dist/s_style b/src/third_party/wiredtiger/dist/s_style index 2237b520020..0e013852914 100755 --- a/src/third_party/wiredtiger/dist/s_style +++ b/src/third_party/wiredtiger/dist/s_style @@ -74,6 +74,13 @@ else cat $t } + # Alignment directive before "struct". + egrep 'WT_COMPILER_TYPE_ALIGN.*struct' $f > $t + test -s $t && { + echo "$f: compiler alignment direction must precede \"struct\"" + cat $t + } + # Direct calls to functions we're not supposed to use in the library. # We don't check for all of them, just a few of the common ones. if ! expr "$f" : 'bench/.*' > /dev/null && diff --git a/src/third_party/wiredtiger/dist/s_typedef b/src/third_party/wiredtiger/dist/s_typedef index 2e206757f48..233f432f0e5 100755 --- a/src/third_party/wiredtiger/dist/s_typedef +++ b/src/third_party/wiredtiger/dist/s_typedef @@ -25,7 +25,7 @@ build() { $l | sed -e 's/WT_PACKED_STRUCT_BEGIN(\(.*\))/struct \1 {/' \ -e 's/WT_COMPILER_TYPE_ALIGN(.*)[ ]*//' \ - -e 's/^[ ]*//' -e 's/[ ]*{.*//' | sort | \ + -e 's/^[ ]*//' -e 's/[ ]*{.*//' | sort -u | \ while read t n; do upper=`echo $n | sed -e 's/^__//' | tr [a-z] [A-Z]` echo "$t $n;" diff --git a/src/third_party/wiredtiger/dist/s_whitespace b/src/third_party/wiredtiger/dist/s_whitespace index 3a51b251bfe..dfc031e3ea4 100755 --- a/src/third_party/wiredtiger/dist/s_whitespace +++ b/src/third_party/wiredtiger/dist/s_whitespace @@ -4,7 +4,16 @@ t=__wt.$$ trap 'rm -f $t; exit 0' 0 1 2 3 13 15 -ws() +# Clear lines that only contain whitespace. +whitespace() +{ + sed -e 's/[ ][ ]*$//' < $1 > $t + cmp $t $1 > /dev/null 2>&1 || (echo "$1" && cp $t $1) +} + +# Clear lines that only contain whitespace, compress multiple empty lines +# into a single line, discarding trailing empty lines. +whitespace_and_empty_line() { sed -e 's/[ ][ ]*$//' \ -e '/^$/N' \ @@ -14,10 +23,12 @@ ws() cd .. +# Scripts. for f in `find dist -name '*.py' -name 's_*'`; do - ws $f + whitespace_and_empty_line $f done +# C-language sources. for f in `find examples ext src test \ -name '*.[chi]' -o \ -name '*.dox' -o \ @@ -26,5 +37,11 @@ for f in `find examples ext src test \ if expr "$f" : ".*/Makefile.in" > /dev/null; then continue fi - ws $f + whitespace_and_empty_line $f +done + +# Python sources. +for f in `find test \ + -name '*.py' | sed '/3rdparty/d'`; do + whitespace $f done diff --git a/src/third_party/wiredtiger/dist/stat.py b/src/third_party/wiredtiger/dist/stat.py index 2a87d4425e6..c9684665a53 100644 --- a/src/third_party/wiredtiger/dist/stat.py +++ b/src/third_party/wiredtiger/dist/stat.py @@ -12,12 +12,11 @@ def print_struct(title, name, base, stats): f.write('/*\n') f.write(' * Statistics entries for ' + title + '.\n') f.write(' */\n') - f.write( - '#define\tWT_' + name.upper() + '_STATS_BASE\t' + str(base) + '\n') + f.write('#define\tWT_' + name.upper() + '_STATS_BASE\t' + str(base) + '\n') f.write('struct __wt_' + name + '_stats {\n') for l in stats: - f.write('\tWT_STATS ' + l.name + ';\n') + f.write('\tint64_t ' + l.name + ';\n') f.write('};\n\n') # Update the #defines in the stat.h file. @@ -90,67 +89,113 @@ for line in open('../src/include/wiredtiger.in', 'r'): f.close() compare_srcfile(tmp_file, '../src/include/wiredtiger.in') -def print_func(name, list): - '''Print the functions for the stat.c file.''' +def print_func(name, handle, list): + '''Print the structures/functions for the stat.c file.''' + f.write('\n') + f.write('static const char * const __stats_' + name + '_desc[] = {\n') + for l in list: + f.write('\t"' + l.desc + '",\n') + f.write('};\n') + + f.write(''' +const char * +__wt_stat_''' + name + '''_desc(int slot) +{ +\treturn (__stats_''' + name + '''_desc[slot]); +} +''') + f.write(''' void -__wt_stat_init_''' + name + '''_stats(WT_''' + name.upper() + '''_STATS *stats) +__wt_stat_''' + name + '_init_single(WT_' + name.upper() + '''_STATS *stats) { -\t/* Clear, so can also be called for reinitialization. */ \tmemset(stats, 0, sizeof(*stats)); - -''') - for l in sorted(list): - o = '\tstats->' + l.name + '.desc = "' + l.desc + '";\n' - if len(o) + 7 > 80: - o = o.replace('= ', '=\n\t ') - f.write(o) - f.write('''} +} ''') f.write(''' void -__wt_stat_refresh_''' + name + '''_stats(void *stats_arg) +__wt_stat_''' + name + '_init(' + handle + ''' *handle) { -\tWT_''' + name.upper() + '''_STATS *stats; +\tint i; + +\tfor (i = 0; i < WT_COUNTER_SLOTS; ++i) { +\t\thandle->stats[i] = &handle->stat_array[i]; +\t\t__wt_stat_''' + name + '''_init_single(handle->stats[i]); +\t} +} +''') -\tstats = (WT_''' + name.upper() + '''_STATS *)stats_arg; + f.write(''' +void +__wt_stat_''' + name + '_clear_single(WT_' + name.upper() + '''_STATS *stats) +{ ''') for l in sorted(list): # no_clear: don't clear the value. - if not 'no_clear' in l.flags: - f.write('\tstats->' + l.name + '.v = 0;\n'); + if 'no_clear' in l.flags: + f.write('\t\t/* not clearing ' + l.name + ' */\n') + else: + f.write('\tstats->' + l.name + ' = 0;\n') f.write('}\n') - # Aggregation is only interesting for data-source statistics. - # Complain if any aggregation flags are set. - if name == 'connection': + f.write(''' +void +__wt_stat_''' + name + '_clear_all(WT_' + name.upper() + '''_STATS **stats) +{ +\tu_int i; + +\tfor (i = 0; i < WT_COUNTER_SLOTS; ++i) +\t\t__wt_stat_''' + name + '''_clear_single(stats[i]); +} +''') + + # Single structure aggregation is currently only used by data sources. + if name == 'dsrc': + f.write(''' +void +__wt_stat_''' + name + '''_aggregate_single( + WT_''' + name.upper() + '_STATS *from, WT_' + name.upper() + '''_STATS *to) +{ +''') for l in sorted(list): - if 'no_aggregate' in l.flags or 'max_aggregate' in l.flags: - print >>sys.stdout,\ - "Aggregation configuration for " +\ - name + "." + l.name + " statistics not supported" - return; + if 'no_aggregate' in l.flags: + o = '\tto->' + l.name + ' = from->' + l.name + ';\n' + elif 'max_aggregate' in l.flags: + o = '\tif (from->' + l.name + ' > to->' + l.name + ')\n' +\ + '\t\tto->' + l.name + ' = from->' + l.name + ';\n' + else: + o = '\tto->' + l.name + ' += from->' + l.name + ';\n' + if len(o) > 72: # Account for the leading tab. + o = o.replace(' += ', ' +=\n\t ') + f.write(o) + f.write('}\n') f.write(''' void -__wt_stat_aggregate_''' + name + -'''_stats(const void *child, const void *parent) +__wt_stat_''' + name + '''_aggregate( + WT_''' + name.upper() + '_STATS **from, WT_' + name.upper() + '''_STATS *to) { -\tWT_''' + name.upper() + '''_STATS *c, *p; - -\tc = (WT_''' + name.upper() + '''_STATS *)child; -\tp = (WT_''' + name.upper() + '''_STATS *)parent; ''') + # Connection level aggregation does not currently have any computation + # of a maximum value; I'm leaving in support for it, but don't declare + # a temporary variable until it's needed. + for l in sorted(list): + if 'max_aggregate' in l.flags: + f.write('\tint64_t v;\n\n') + break; for l in sorted(list): if 'no_aggregate' in l.flags: - continue; + o = '\tto->' + l.name + ' = from[0]->' + l.name + ';\n' elif 'max_aggregate' in l.flags: - o = 'if (c->' + l.name + '.v > p->' + l.name +\ - '.v)\n\t p->' + l.name + '.v = c->' + l.name + '.v;' + o = '\tif ((v = WT_STAT_READ(from, ' + l.name + ')) >\n' +\ + '\t to->' + l.name + ')\n' +\ + '\t\tto->' + l.name + ' = v;\n' else: - o = 'p->' + l.name + '.v += c->' + l.name + '.v;' - f.write('\t' + o + '\n') + o = '\tto->' + l.name + ' += WT_STAT_READ(from, ' + l.name + ');\n' + if len(o) > 72: # Account for the leading tab. + o = o.replace(' += ', ' +=\n\t ') + f.write(o) f.write('}\n') # Write the stat initialization and refresh routines to the stat.c file. @@ -158,12 +203,11 @@ f = open(tmp_file, 'w') f.write('/* DO NOT EDIT: automatically built by dist/stat.py. */\n\n') f.write('#include "wt_internal.h"\n') -print_func('dsrc', dsrc_stats) -print_func('connection', connection_stats) +print_func('dsrc', 'WT_DATA_HANDLE', dsrc_stats) +print_func('connection', 'WT_CONNECTION_IMPL', connection_stats) f.close() compare_srcfile(tmp_file, '../src/support/stat.c') - # Update the statlog file with the entries we can scale per second. scale_info = 'no_scale_per_second_list = [\n' clear_info = 'no_clear_list = [\n' diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 51152fcc6af..70e1d32843c 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -7,14 +7,21 @@ # currently open'. # NOTE: All statistics descriptions must have a prefix string followed by ':'. # -# Optional configuration flags: -# no_clear Value not cleared when statistics cleared -# no_scale Don't scale value per second in the logging tool script -# # Data-source statistics are normally aggregated across the set of underlying # objects. Additional optionaly configuration flags are available: # no_aggregate Ignore the value when aggregating statistics # max_aggregate Take the maximum value when aggregating statistics +# +# Optional configuration flags: +# no_clear Value not cleared when statistics cleared +# no_scale Don't scale value per second in the logging tool script +# +# The no_clear flag is a little complicated: it means we don't clear the values +# when resetting statistics after each run (necessary when the WiredTiger engine +# is updating values that persist over multiple runs, for example the count of +# cursors), but it also causes the underlying display routines to not treat the +# change between displays as relative to the number of seconds, that is, it's an +# absolute value. The no_clear flag should be set in either case. from operator import attrgetter import sys @@ -326,6 +333,7 @@ connection_stats = [ CursorStat('cursor_prev', 'cursor prev calls'), CursorStat('cursor_remove', 'cursor remove calls'), CursorStat('cursor_reset', 'cursor reset calls'), + CursorStat('cursor_restart', 'cursor restarted searches'), CursorStat('cursor_search', 'cursor search calls'), CursorStat('cursor_search_near', 'cursor search near calls'), CursorStat('cursor_update', 'cursor update calls'), @@ -366,6 +374,7 @@ dsrc_stats = [ CursorStat('cursor_remove', 'remove calls'), CursorStat('cursor_remove_bytes', 'cursor-remove key bytes removed'), CursorStat('cursor_reset', 'reset calls'), + CursorStat('cursor_restart', 'restarted searches'), CursorStat('cursor_search', 'search calls'), CursorStat('cursor_search_near', 'search near calls'), CursorStat('cursor_update', 'update calls'), diff --git a/src/third_party/wiredtiger/ext/encryptors/rotn/rotn_encrypt.c b/src/third_party/wiredtiger/ext/encryptors/rotn/rotn_encrypt.c index 503dcae83a7..7d68717e3ca 100644 --- a/src/third_party/wiredtiger/ext/encryptors/rotn/rotn_encrypt.c +++ b/src/third_party/wiredtiger/ext/encryptors/rotn/rotn_encrypt.c @@ -68,7 +68,7 @@ typedef struct { WT_ENCRYPTOR encryptor; /* Must come first */ - WT_EXTENSION_API *wt_api; /* Extension API */ + WT_EXTENSION_API *wtext; /* Extension API */ int rot_N; /* rotN value */ char *keyid; /* Saved keyid */ @@ -76,6 +76,7 @@ typedef struct { u_char *shift_forw; /* Encrypt shift data from secretkey */ u_char *shift_back; /* Decrypt shift data from secretkey */ size_t shift_len; /* Length of shift* byte arrays */ + int force_error; /* Force a decrypt error for testing */ } ROTN_ENCRYPTOR; /*! [WT_ENCRYPTOR initialization structure] */ @@ -84,6 +85,22 @@ typedef struct { #define IV_LEN 16 /* + * rotn_error -- + * Display an error from this module in a standard way. + */ +static int +rotn_error(ROTN_ENCRYPTOR *encryptor, WT_SESSION *session, int err, + const char *msg) +{ + WT_EXTENSION_API *wtext; + + wtext = encryptor->wtext; + (void)wtext->err_printf(wtext, session, + "rotn encryption: %s: %s", msg, wtext->strerror(wtext, NULL, err)); + return (err); +} + +/* * make_cksum -- * This is where one would call a checksum function on the encrypted * buffer. Here we just put a constant value in it. @@ -221,13 +238,18 @@ rotn_decrypt(WT_ENCRYPTOR *encryptor, WT_SESSION *session, (void)session; /* Unused */ /* + * For certain tests, force an error we can recognize. + */ + if (rotn_encryptor->force_error) + return (-1000); + + /* * Make sure it is big enough. */ mylen = src_len - (CHKSUM_LEN + IV_LEN); - if (dst_len < mylen) { - fprintf(stderr, "Rotate: ENOMEM ERROR\n"); - return (ENOMEM); - } + if (dst_len < mylen) + return (rotn_error(rotn_encryptor, session, + ENOMEM, "decrypt buffer not big enough")); /* * !!! Most implementations would verify the checksum here. @@ -286,7 +308,7 @@ rotn_customize(WT_ENCRYPTOR *encryptor, WT_SESSION *session, const ROTN_ENCRYPTOR *orig; ROTN_ENCRYPTOR *rotn_encryptor; WT_CONFIG_ITEM keyid, secret; - WT_EXTENSION_API *wt_api; + WT_EXTENSION_API *wtext; size_t i, len; int ret, keyid_val; u_char base; @@ -295,7 +317,7 @@ rotn_customize(WT_ENCRYPTOR *encryptor, WT_SESSION *session, keyid_val = 0; orig = (const ROTN_ENCRYPTOR *)encryptor; - wt_api = orig->wt_api; + wtext = orig->wtext; if ((rotn_encryptor = calloc(1, sizeof(ROTN_ENCRYPTOR))) == NULL) return (errno); @@ -305,7 +327,7 @@ rotn_customize(WT_ENCRYPTOR *encryptor, WT_SESSION *session, /* * Stash the keyid from the configuration string. */ - if ((ret = wt_api->config_get(wt_api, session, encrypt_config, + if ((ret = wtext->config_get(wtext, session, encrypt_config, "keyid", &keyid)) == 0 && keyid.len != 0) { /* * In this demonstration, we expect keyid to be a number. @@ -327,7 +349,7 @@ rotn_customize(WT_ENCRYPTOR *encryptor, WT_SESSION *session, * We stash the secret key from the configuration string * and build some shift bytes to make encryption/decryption easy. */ - if ((ret = wt_api->config_get(wt_api, session, encrypt_config, + if ((ret = wtext->config_get(wtext, session, encrypt_config, "secretkey", &secret)) == 0 && secret.len != 0) { len = secret.len; if ((rotn_encryptor->secretkey = malloc(len + 1)) == NULL || @@ -396,6 +418,53 @@ rotn_terminate(WT_ENCRYPTOR *encryptor, WT_SESSION *session) } /*! [WT_ENCRYPTOR terminate] */ +/* + * rotn_configure -- + * WiredTiger no-op encryption configuration. + */ +static int +rotn_configure(ROTN_ENCRYPTOR *rotn_encryptor, WT_CONFIG_ARG *config) +{ + WT_CONFIG_ITEM k, v; + WT_CONFIG_PARSER *config_parser; + WT_EXTENSION_API *wtext; /* Extension API */ + int ret, t_ret; + + wtext = rotn_encryptor->wtext; + + /* Get the configuration string. */ + if ((ret = wtext->config_get(wtext, NULL, config, "config", &v)) != 0) + return (rotn_error(rotn_encryptor, NULL, ret, + "WT_EXTENSION_API.config_get")); + + /* Step through the list of configuration options. */ + if ((ret = wtext->config_parser_open( + wtext, NULL, v.str, v.len, &config_parser)) != 0) + return (rotn_error(rotn_encryptor, NULL, ret, + "WT_EXTENSION_API.config_parser_open")); + + while ((ret = config_parser->next(config_parser, &k, &v)) == 0) { + if (strncmp("rotn_force_error", k.str, k.len) == 0 && + strlen("rotn_force_error") == k.len) { + rotn_encryptor->force_error = v.val == 0 ? 0 : 1; + continue; + } + else { + (void)config_parser->close(config_parser); + return (rotn_error(rotn_encryptor, NULL, EINVAL, + "unknown config key")); + } + } + if ((t_ret = config_parser->close(config_parser)) != 0) + return (rotn_error(rotn_encryptor, NULL, t_ret, + "WT_CONFIG_PARSER.close")); + if (ret != WT_NOTFOUND) + return (rotn_error(rotn_encryptor, NULL, ret, + "WT_CONFIG_PARSER.next")); + + return (0); +} + /*! [WT_ENCRYPTOR initialization function] */ /* * wiredtiger_extension_init -- @@ -405,8 +474,7 @@ int wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config) { ROTN_ENCRYPTOR *rotn_encryptor; - - (void)config; /* Unused parameters */ + int ret; if ((rotn_encryptor = calloc(1, sizeof(ROTN_ENCRYPTOR))) == NULL) return (errno); @@ -423,8 +491,10 @@ wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config) rotn_encryptor->encryptor.sizing = rotn_sizing; rotn_encryptor->encryptor.customize = rotn_customize; rotn_encryptor->encryptor.terminate = rotn_terminate; + rotn_encryptor->wtext = connection->get_extension_api(connection); - rotn_encryptor->wt_api = connection->get_extension_api(connection); + if ((ret = rotn_configure(rotn_encryptor, config)) != 0) + return (ret); /* Load the encryptor */ return (connection->add_encryptor( diff --git a/src/third_party/wiredtiger/ext/extractors/csv/csv_extractor.c b/src/third_party/wiredtiger/ext/extractors/csv/csv_extractor.c index 0dd110955ad..34b8d7c7c64 100644 --- a/src/third_party/wiredtiger/ext/extractors/csv/csv_extractor.c +++ b/src/third_party/wiredtiger/ext/extractors/csv/csv_extractor.c @@ -128,7 +128,7 @@ csv_customize(WT_EXTRACTOR *extractor, WT_SESSION *session, return (errno); *csv_extractor = *orig; - csv_extractor->field_num = field_num; + csv_extractor->field_num = (int)field_num; *customp = (WT_EXTRACTOR *)csv_extractor; return (0); } diff --git a/src/third_party/wiredtiger/src/async/async_api.c b/src/third_party/wiredtiger/src/async/async_api.c index 9874d7aab00..416c3c84f7b 100644 --- a/src/third_party/wiredtiger/src/async/async_api.c +++ b/src/third_party/wiredtiger/src/async/async_api.c @@ -151,15 +151,16 @@ retry: * If we can set the state then the op entry is ours. * Start the next search at the next entry after this one. */ - if (!WT_ATOMIC_CAS4(op->state, WT_ASYNCOP_FREE, WT_ASYNCOP_READY)) { + if (!__wt_atomic_cas32(&op->state, WT_ASYNCOP_FREE, WT_ASYNCOP_READY)) { WT_STAT_FAST_CONN_INCR(session, async_alloc_race); goto retry; } WT_STAT_FAST_CONN_INCRV(session, async_alloc_view, view); WT_RET(__async_get_format(conn, uri, config, op)); - op->unique_id = WT_ATOMIC_ADD8(async->op_id, 1); + op->unique_id = __wt_atomic_add64(&async->op_id, 1); op->optype = WT_AOP_NONE; - (void)WT_ATOMIC_STORE4(async->ops_index, (i + 1) % conn->async_size); + (void)__wt_atomic_store32( + &async->ops_index, (i + 1) % conn->async_size); *opp = op; return (0); } @@ -206,15 +207,15 @@ __wt_async_stats_update(WT_SESSION_IMPL *session) { WT_ASYNC *async; WT_CONNECTION_IMPL *conn; - WT_CONNECTION_STATS *stats; + WT_CONNECTION_STATS **stats; conn = S2C(session); async = conn->async; if (async == NULL) return; - stats = &conn->stats; - WT_STAT_SET(stats, async_cur_queue, async->cur_queue); - WT_STAT_SET(stats, async_max_queue, async->max_queue); + stats = conn->stats; + WT_STAT_SET(session, stats, async_cur_queue, async->cur_queue); + WT_STAT_SET(session, stats, async_max_queue, async->max_queue); F_SET(conn, WT_CONN_SERVER_ASYNC); } @@ -514,7 +515,7 @@ retry: */ __wt_sleep(0, 100000); - if (!WT_ATOMIC_CAS4(async->flush_state, WT_ASYNC_FLUSH_NONE, + if (!__wt_atomic_cas32(&async->flush_state, WT_ASYNC_FLUSH_NONE, WT_ASYNC_FLUSH_IN_PROGRESS)) goto retry; /* @@ -524,7 +525,7 @@ retry: * things off the work queue with the lock. */ async->flush_count = 0; - (void)WT_ATOMIC_ADD8(async->flush_gen, 1); + (void)__wt_atomic_add64(&async->flush_gen, 1); WT_ASSERT(session, async->flush_op.state == WT_ASYNCOP_FREE); async->flush_op.state = WT_ASYNCOP_READY; WT_ERR(__wt_async_op_enqueue(session, &async->flush_op)); diff --git a/src/third_party/wiredtiger/src/async/async_op.c b/src/third_party/wiredtiger/src/async/async_op.c index d0c58f584cc..7e1920933c2 100644 --- a/src/third_party/wiredtiger/src/async/async_op.c +++ b/src/third_party/wiredtiger/src/async/async_op.c @@ -280,7 +280,7 @@ __wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op) * Enqueue op at the tail of the work queue. * We get our slot in the ring buffer to use. */ - my_alloc = WT_ATOMIC_ADD8(async->alloc_head, 1); + my_alloc = __wt_atomic_add64(&async->alloc_head, 1); my_slot = my_alloc % async->async_qsize; /* @@ -300,7 +300,7 @@ __wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op) #endif WT_PUBLISH(async->async_queue[my_slot], op); op->state = WT_ASYNCOP_ENQUEUED; - if (WT_ATOMIC_ADD4(async->cur_queue, 1) > async->max_queue) + if (__wt_atomic_add32(&async->cur_queue, 1) > async->max_queue) WT_PUBLISH(async->max_queue, async->cur_queue); /* * Multiple threads may be adding ops to the queue. We need to wait diff --git a/src/third_party/wiredtiger/src/async/async_worker.c b/src/third_party/wiredtiger/src/async/async_worker.c index 473e7103832..6a5ec5feeb0 100644 --- a/src/third_party/wiredtiger/src/async/async_worker.c +++ b/src/third_party/wiredtiger/src/async/async_worker.c @@ -67,7 +67,7 @@ retry: * a race, try again. */ my_consume = last_consume + 1; - if (!WT_ATOMIC_CAS8(async->alloc_tail, last_consume, my_consume)) + if (!__wt_atomic_cas64(&async->alloc_tail, last_consume, my_consume)) goto retry; /* * This item of work is ours to process. Clear it out of the @@ -81,7 +81,7 @@ retry: WT_ASSERT(session, async->cur_queue > 0); WT_ASSERT(session, *op != NULL); WT_ASSERT(session, (*op)->state == WT_ASYNCOP_ENQUEUED); - (void)WT_ATOMIC_SUB4(async->cur_queue, 1); + (void)__wt_atomic_sub32(&async->cur_queue, 1); (*op)->state = WT_ASYNCOP_WORKING; if (*op == &async->flush_op) @@ -316,7 +316,7 @@ __wt_async_worker(void *arg) * the queue. */ WT_ORDERED_READ(flush_gen, async->flush_gen); - if (WT_ATOMIC_ADD4(async->flush_count, 1) == + if (__wt_atomic_add32(&async->flush_count, 1) == conn->async_workers) { /* * We're last. All workers accounted for so diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c index d593537446b..cdef1682faf 100644 --- a/src/third_party/wiredtiger/src/block/block_ext.c +++ b/src/third_party/wiredtiger/src/block/block_ext.c @@ -283,7 +283,7 @@ __wt_block_misplaced(WT_SESSION_IMPL *session, * Don't check during the salvage read phase, we might be reading an * already freed overflow page. */ - if (F_ISSET(session, WT_SESSION_SALVAGE_CORRUPT_OK)) + if (F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE)) return (0); /* diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c index c005b226bfc..cfb5b000092 100644 --- a/src/third_party/wiredtiger/src/block/block_open.c +++ b/src/third_party/wiredtiger/src/block/block_open.c @@ -158,9 +158,9 @@ __wt_block_configure_first_fit(WT_BLOCK *block, int on) * as long as any operation wants it. */ if (on) - (void)WT_ATOMIC_ADD4(block->allocfirst, 1); + (void)__wt_atomic_add32(&block->allocfirst, 1); else - (void)WT_ATOMIC_SUB4(block->allocfirst, 1); + (void)__wt_atomic_sub32(&block->allocfirst, 1); } /* @@ -398,21 +398,19 @@ err: __wt_scr_free(session, &buf); void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats) { + WT_UNUSED(session); + /* - * We're looking inside the live system's structure, which normally - * requires locking: the chances of a corrupted read are probably - * non-existent, and it's statistics information regardless, but it - * isn't like this is a common function for an application to call. + * Reading from the live system's structure normally requires locking, + * but it's an 8B statistics read, there's no need. */ - __wt_spin_lock(session, &block->live_lock); - WT_STAT_SET(stats, allocation_size, block->allocsize); - WT_STAT_SET(stats, block_checkpoint_size, block->live.ckpt_size); - WT_STAT_SET(stats, block_magic, WT_BLOCK_MAGIC); - WT_STAT_SET(stats, block_major, WT_BLOCK_MAJOR_VERSION); - WT_STAT_SET(stats, block_minor, WT_BLOCK_MINOR_VERSION); - WT_STAT_SET(stats, block_reuse_bytes, block->live.avail.bytes); - WT_STAT_SET(stats, block_size, block->fh->size); - __wt_spin_unlock(session, &block->live_lock); + stats->allocation_size = block->allocsize; + stats->block_checkpoint_size = (int64_t)block->live.ckpt_size; + stats->block_magic = WT_BLOCK_MAGIC; + stats->block_major = WT_BLOCK_MAJOR_VERSION; + stats->block_minor = WT_BLOCK_MINOR_VERSION; + stats->block_reuse_bytes = (int64_t)block->live.avail.bytes; + stats->block_size = block->fh->size; } /* @@ -426,7 +424,7 @@ __wt_block_manager_size( wt_off_t filesize; WT_RET(__wt_filesize_name(session, filename, &filesize)); - WT_STAT_SET(stats, block_size, filesize); + stats->block_size = filesize; return (0); } diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c index 0d631396b41..9f7c869dd38 100644 --- a/src/third_party/wiredtiger/src/block/block_read.c +++ b/src/third_party/wiredtiger/src/block/block_read.c @@ -200,7 +200,7 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, if (page_cksum == cksum) return (0); - if (!F_ISSET(session, WT_SESSION_SALVAGE_CORRUPT_OK)) + if (!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE)) __wt_errx(session, "read checksum error for %" PRIu32 "B block at " "offset %" PRIuMAX ": calculated block checksum " @@ -208,7 +208,7 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, "of %" PRIu32, size, (uintmax_t)offset, page_cksum, cksum); } else - if (!F_ISSET(session, WT_SESSION_SALVAGE_CORRUPT_OK)) + if (!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE)) __wt_errx(session, "read checksum error for %" PRIu32 "B block at " "offset %" PRIuMAX ": block header checksum " @@ -218,6 +218,6 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, /* Panic if a checksum fails during an ordinary read. */ return (block->verify || - F_ISSET(session, WT_SESSION_SALVAGE_CORRUPT_OK) ? + F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE) ? WT_ERROR : __wt_illegal_value(session, block->name)); } diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index 0aed5940533..9f41e3ae684 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -549,8 +549,11 @@ retry: WT_RET(__cursor_func_init(cbt, 1)); WT_ILLEGAL_VALUE_ERR(session); } -err: if (ret == WT_RESTART) +err: if (ret == WT_RESTART) { + WT_STAT_FAST_CONN_INCR(session, cursor_restart); + WT_STAT_FAST_DATA_INCR(session, cursor_restart); goto retry; + } /* Insert doesn't maintain a position across calls, clear resources. */ if (ret == 0) WT_TRET(__curfile_leave(cbt)); @@ -624,8 +627,11 @@ retry: WT_RET(__cursor_func_init(cbt, 1)); WT_ILLEGAL_VALUE_ERR(session); } -err: if (ret == WT_RESTART) +err: if (ret == WT_RESTART) { + WT_STAT_FAST_CONN_INCR(session, cursor_restart); + WT_STAT_FAST_DATA_INCR(session, cursor_restart); goto retry; + } WT_TRET(__curfile_leave(cbt)); if (ret != 0) WT_TRET(__cursor_reset(cbt)); @@ -702,8 +708,11 @@ retry: WT_RET(__cursor_func_init(cbt, 1)); WT_ILLEGAL_VALUE_ERR(session); } -err: if (ret == WT_RESTART) +err: if (ret == WT_RESTART) { + WT_STAT_FAST_CONN_INCR(session, cursor_restart); + WT_STAT_FAST_DATA_INCR(session, cursor_restart); goto retry; + } /* * If the cursor is configured to overwrite and the record is not * found, that is exactly what we want. @@ -790,8 +799,11 @@ retry: WT_RET(__cursor_func_init(cbt, 1)); WT_ILLEGAL_VALUE_ERR(session); } -err: if (ret == WT_RESTART) +err: if (ret == WT_RESTART) { + WT_STAT_FAST_CONN_INCR(session, cursor_restart); + WT_STAT_FAST_DATA_INCR(session, cursor_restart); goto retry; + } /* * If successful, point the cursor at internal copies of the data. We @@ -993,22 +1005,27 @@ __cursor_truncate(WT_SESSION_IMPL *session, * instantiated the end cursor, so we know that page is pinned in memory * and we can proceed without concern. */ - do { - WT_RET(__wt_btcur_remove(start)); - /* - * Reset ret each time through so that we don't loop forever in - * the cursor equals case. - */ - for (ret = 0;;) { - if (stop != NULL && __cursor_equals(start, stop)) - break; - if ((ret = __wt_btcur_next(start, 1)) != 0) - break; - start->compare = 0; /* Exact match */ - if ((ret = rmfunc(session, start, 1)) != 0) - break; - } - } while (ret == WT_RESTART); +retry: WT_RET(__wt_btcur_remove(start)); + + /* + * Reset ret each time through so that we don't loop forever in + * the cursor equals case. + */ + for (ret = 0;;) { + if (stop != NULL && __cursor_equals(start, stop)) + break; + if ((ret = __wt_btcur_next(start, 1)) != 0) + break; + start->compare = 0; /* Exact match */ + if ((ret = rmfunc(session, start, 1)) != 0) + break; + } + + if (ret == WT_RESTART) { + WT_STAT_FAST_CONN_INCR(session, cursor_restart); + WT_STAT_FAST_DATA_INCR(session, cursor_restart); + goto retry; + } WT_RET_NOTFOUND_OK(ret); return (0); @@ -1042,24 +1059,28 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session, * other thread of control; in that case, repeat the full search to * refresh the page's modification information. */ - do { - WT_RET(__wt_btcur_remove(start)); - /* - * Reset ret each time through so that we don't loop forever in - * the cursor equals case. - */ - for (ret = 0;;) { - if (stop != NULL && __cursor_equals(start, stop)) - break; - if ((ret = __wt_btcur_next(start, 1)) != 0) - break; - start->compare = 0; /* Exact match */ - value = (uint8_t *)start->iface.value.data; - if (*value != 0 && - (ret = rmfunc(session, start, 1)) != 0) - break; - } - } while (ret == WT_RESTART); +retry: WT_RET(__wt_btcur_remove(start)); + /* + * Reset ret each time through so that we don't loop forever in + * the cursor equals case. + */ + for (ret = 0;;) { + if (stop != NULL && __cursor_equals(start, stop)) + break; + if ((ret = __wt_btcur_next(start, 1)) != 0) + break; + start->compare = 0; /* Exact match */ + value = (uint8_t *)start->iface.value.data; + if (*value != 0 && + (ret = rmfunc(session, start, 1)) != 0) + break; + } + + if (ret == WT_RESTART) { + WT_STAT_FAST_CONN_INCR(session, cursor_restart); + WT_STAT_FAST_DATA_INCR(session, cursor_restart); + goto retry; + } WT_RET_NOTFOUND_OK(ret); return (0); diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c index 8cca6328f21..cddfa0ef801 100644 --- a/src/third_party/wiredtiger/src/btree/bt_delete.c +++ b/src/third_party/wiredtiger/src/btree/bt_delete.c @@ -70,15 +70,15 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, int *skipp) /* If we have a clean page in memory, attempt to evict it. */ if (ref->state == WT_REF_MEM && - WT_ATOMIC_CAS4(ref->state, WT_REF_MEM, WT_REF_LOCKED)) { + __wt_atomic_casv32(&ref->state, WT_REF_MEM, WT_REF_LOCKED)) { if (__wt_page_is_modified(ref->page)) { WT_PUBLISH(ref->state, WT_REF_MEM); return (0); } - (void)WT_ATOMIC_ADD4(S2BT(session)->evict_busy, 1); + (void)__wt_atomic_addv32(&S2BT(session)->evict_busy, 1); ret = __wt_evict_page(session, ref); - (void)WT_ATOMIC_SUB4(S2BT(session)->evict_busy, 1); + (void)__wt_atomic_subv32(&S2BT(session)->evict_busy, 1); WT_RET_BUSY_OK(ret); } @@ -93,7 +93,7 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, int *skipp) * unclear optimizing for overlapping range deletes is worth the effort. */ if (ref->state != WT_REF_DISK || - !WT_ATOMIC_CAS4(ref->state, WT_REF_DISK, WT_REF_LOCKED)) + !__wt_atomic_casv32(&ref->state, WT_REF_DISK, WT_REF_LOCKED)) return (0); /* @@ -176,8 +176,8 @@ __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref) * If the page is still "deleted", it's as we left it, * reset the state. */ - if (WT_ATOMIC_CAS4( - ref->state, WT_REF_DELETED, WT_REF_DISK)) + if (__wt_atomic_casv32( + &ref->state, WT_REF_DELETED, WT_REF_DISK)) return; break; case WT_REF_LOCKED: @@ -242,7 +242,7 @@ __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref) if (ref->page_del == NULL) return (1); - if (!WT_ATOMIC_CAS4(ref->state, WT_REF_DELETED, WT_REF_LOCKED)) + if (!__wt_atomic_casv32(&ref->state, WT_REF_DELETED, WT_REF_LOCKED)) return (0); skip = (ref->page_del == NULL || diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c index a05c6217338..060a93f543f 100644 --- a/src/third_party/wiredtiger/src/btree/bt_discard.c +++ b/src/third_party/wiredtiger/src/btree/bt_discard.c @@ -414,7 +414,7 @@ __free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd) for (; upd != NULL; upd = next) { /* Everything we free should be visible to everyone. */ WT_ASSERT(session, - F_ISSET(session, WT_SESSION_DISCARD_FORCE) || + F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || upd->txnid == WT_TXN_ABORTED || __wt_txn_visible_all(session, upd->txnid)); diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index c1a8ab61054..0cc6b6eb25f 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -352,8 +352,6 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) session, &btree->ovfl_lock, "btree overflow lock")); WT_RET(__wt_spin_init(session, &btree->flush_lock, "btree flush lock")); - __wt_stat_init_dsrc_stats(&btree->dhandle->stats); - btree->write_gen = ckpt->write_gen; /* Write generation */ btree->modified = 0; /* Clean */ @@ -385,12 +383,15 @@ int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) { + WT_BM *bm; WT_BTREE *btree; + WT_DECL_ITEM(tmp); WT_DECL_RET; WT_ITEM dsk; WT_PAGE *page; btree = S2BT(session); + bm = btree->bm; /* * A buffer into which we read a root page; don't use a scratch buffer, @@ -399,12 +400,43 @@ __wt_btree_tree_open( WT_CLEAR(dsk); /* - * Read the page, then build the in-memory version of the page. Clear - * any local reference to an allocated copy of the disk image on return, - * the page steals it. + * Read and verify the page (verify to catch encrypted objects we can't + * decrypt, where we read the object successfully but we can't decrypt + * it, and we want to fail gracefully). + * + * Create a printable version of the address to pass to verify. + */ + WT_ERR(__wt_scr_alloc(session, 0, &tmp)); + WT_ERR(bm->addr_string(bm, session, tmp, addr, addr_size)); + + F_SET(session, WT_SESSION_QUIET_CORRUPT_FILE); + if ((ret = __wt_bt_read(session, &dsk, addr, addr_size)) == 0) + ret = __wt_verify_dsk(session, tmp->data, &dsk); + F_CLR(session, WT_SESSION_QUIET_CORRUPT_FILE); + if (ret != 0) + __wt_err(session, ret, + "unable to read root page from %s", session->dhandle->name); + /* + * Failure to open metadata means that the database is unavailable. + * Try to provide a helpful failure message. + */ + if (ret != 0 && WT_IS_METADATA(session->dhandle)) { + __wt_errx(session, + "WiredTiger has failed to open its metadata"); + __wt_errx(session, "This may be due to the database" + " files being encrypted, being from an older" + " version or due to corruption on disk"); + __wt_errx(session, "You should confirm that you have" + " opened the database with the correct options including" + " all encryption and compression options"); + } + WT_ERR(ret); + + /* + * Build the in-memory version of the page. Clear our local reference to + * the allocated copy of the disk image on return, the in-memory object + * steals it. */ - WT_ERR(__wt_bt_read(session, &dsk, addr, addr_size)); - WT_ERR(__wt_verify_dsk(session, (const char *)addr, &dsk)); WT_ERR(__wt_page_inmem(session, NULL, dsk.data, dsk.memsize, WT_DATA_IN_ITEM(&dsk) ? WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED, &page)); @@ -414,6 +446,8 @@ __wt_btree_tree_open( __wt_root_ref_init(&btree->root, page, btree->type != BTREE_ROW); err: __wt_buf_free(session, &dsk); + __wt_scr_free(session, &tmp); + return (ret); } diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c index a8bbf8a0266..836c1540c5f 100644 --- a/src/third_party/wiredtiger/src/btree/bt_io.c +++ b/src/third_party/wiredtiger/src/btree/bt_io.c @@ -24,10 +24,12 @@ __wt_bt_read(WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor; WT_ITEM *ip; const WT_PAGE_HEADER *dsk; + const char *fail_msg; size_t result_len; btree = S2BT(session); bm = btree->bm; + fail_msg = NULL; /* -Wuninitialized */ /* * If anticipating a compressed or encrypted block, read into a scratch @@ -52,40 +54,36 @@ __wt_bt_read(WT_SESSION_IMPL *session, if (F_ISSET(dsk, WT_PAGE_ENCRYPTED)) { if (btree->kencryptor == NULL || (encryptor = btree->kencryptor->encryptor) == NULL || - encryptor->decrypt == NULL) - WT_ERR_MSG(session, WT_ERROR, - "read encrypted block where no decryption engine " - "configured"); + encryptor->decrypt == NULL) { + fail_msg = + "encrypted block in file for which no encryption " + "configured"; + goto corrupt; + } WT_ERR(__wt_scr_alloc(session, 0, &etmp)); - ret = __wt_decrypt(session, - encryptor, WT_BLOCK_ENCRYPT_SKIP, ip, etmp); - /* - * It may be file corruption, which is really, really bad, or - * may be a mismatch of encryption configuration, for example, - * an incorrect secretkey. - */ - if (ret != 0) - WT_ERR(F_ISSET(btree, WT_BTREE_VERIFY) || - F_ISSET(session, WT_SESSION_SALVAGE_CORRUPT_OK) ? - WT_ERROR : - __wt_illegal_value(session, btree->dhandle->name)); + if ((ret = __wt_decrypt(session, + encryptor, WT_BLOCK_ENCRYPT_SKIP, ip, etmp)) != 0) { + fail_msg = "block decryption failed"; + goto corrupt; + } ip = etmp; dsk = ip->data; - } else if (btree->kencryptor != NULL && - !F_ISSET(btree, WT_BTREE_VERIFY) && - !F_ISSET(session, WT_SESSION_SALVAGE_CORRUPT_OK)) - WT_ERR_MSG(session, WT_ERROR, - "encryption configured, and existing file is not " - "encrypted"); + } else if (btree->kencryptor != NULL) { + fail_msg = + "unencrypted block in file for which encryption configured"; + goto corrupt; + } if (F_ISSET(dsk, WT_PAGE_COMPRESSED)) { if (btree->compressor == NULL || - btree->compressor->decompress == NULL) - WT_ERR_MSG(session, WT_ERROR, - "read compressed block where no compression engine " - "configured"); + btree->compressor->decompress == NULL) { + fail_msg = + "compressed block in file for which no compression " + "configured"; + goto corrupt; + } /* * Size the buffer based on the in-memory bytes we're expecting @@ -118,11 +116,10 @@ __wt_bt_read(WT_SESSION_IMPL *session, * it's OK, otherwise it's really, really bad. */ if (ret != 0 || - result_len != dsk->mem_size - WT_BLOCK_COMPRESS_SKIP) - WT_ERR(F_ISSET(btree, WT_BTREE_VERIFY) || - F_ISSET(session, WT_SESSION_SALVAGE_CORRUPT_OK) ? - WT_ERROR : - __wt_illegal_value(session, btree->dhandle->name)); + result_len != dsk->mem_size - WT_BLOCK_COMPRESS_SKIP) { + fail_msg = "block decryption failed"; + goto corrupt; + } } else /* * If we uncompressed above, the page is in the correct buffer. @@ -139,7 +136,7 @@ __wt_bt_read(WT_SESSION_IMPL *session, if (tmp == NULL) WT_ERR(__wt_scr_alloc(session, 0, &tmp)); WT_ERR(bm->addr_string(bm, session, tmp, addr, addr_size)); - WT_ERR(__wt_verify_dsk(session, (const char *)tmp->data, buf)); + WT_ERR(__wt_verify_dsk(session, tmp->data, buf)); } WT_STAT_FAST_CONN_INCR(session, cache_read); @@ -149,6 +146,16 @@ __wt_bt_read(WT_SESSION_IMPL *session, WT_STAT_FAST_CONN_INCRV(session, cache_bytes_read, dsk->mem_size); WT_STAT_FAST_DATA_INCRV(session, cache_bytes_read, dsk->mem_size); + if (0) { +corrupt: if (ret == 0) + ret = WT_ERROR; + if (!F_ISSET(btree, WT_BTREE_VERIFY) && + !F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE)) { + __wt_err(session, ret, "%s", fail_msg); + ret = __wt_illegal_value(session, btree->dhandle->name); + } + } + err: __wt_scr_free(session, &tmp); __wt_scr_free(session, &etmp); return (ret); diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c index 86edd992b28..922dc2892b8 100644 --- a/src/third_party/wiredtiger/src/btree/bt_page.c +++ b/src/third_party/wiredtiger/src/btree/bt_page.c @@ -80,10 +80,13 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags return (WT_NOTFOUND); /* - * The page isn't in memory, attempt to read it. - * Make sure there is space in the cache. + * The page isn't in memory, read it. If this thread is + * allowed to do eviction work, check for space in the + * cache. */ - WT_RET(__wt_cache_eviction_check(session, 1, NULL)); + if (!LF_ISSET(WT_READ_NO_EVICT)) + WT_RET(__wt_cache_eviction_check( + session, 1, NULL)); WT_RET(__wt_cache_read(session, ref)); oldgen = LF_ISSET(WT_READ_WONT_NEED) || F_ISSET(session, WT_SESSION_NO_CACHE); @@ -208,18 +211,20 @@ stall: wait_cnt += 1000; } /* - * If stalling, check if the cache needs help. If we do - * work for the cache, substitute that for a sleep. + * If stalling and this thread is allowed to do eviction + * work, check if the cache needs help. If we do work + * for the cache, substitute that for a sleep. */ - WT_RET( - __wt_cache_eviction_check(session, 1, &cache_work)); - if (!cache_work) { - sleep_cnt = WT_MIN(wait_cnt, 10000); - wait_cnt *= 2; - WT_STAT_FAST_CONN_INCRV( - session, page_sleep, sleep_cnt); - __wt_sleep(0, sleep_cnt); + if (!LF_ISSET(WT_READ_NO_EVICT)) { + WT_RET(__wt_cache_eviction_check( + session, 1, &cache_work)); + if (cache_work) + continue; } + sleep_cnt = WT_MIN(wait_cnt, 10000); + wait_cnt *= 2; + WT_STAT_FAST_CONN_INCRV(session, page_sleep, sleep_cnt); + __wt_sleep(0, sleep_cnt); } } } @@ -326,8 +331,8 @@ err: if ((pindex = WT_INTL_INDEX_GET_SAFE(page)) != NULL) { /* Increment the cache statistics. */ __wt_cache_page_inmem_incr(session, page, size); - (void)WT_ATOMIC_ADD8(cache->bytes_read, size); - (void)WT_ATOMIC_ADD8(cache->pages_inmem, 1); + (void)__wt_atomic_add64(&cache->bytes_read, size); + (void)__wt_atomic_add64(&cache->pages_inmem, 1); *pagep = page; return (0); diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index e27f7c3398c..a3ce39b7758 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -18,8 +18,8 @@ __wt_cache_read(WT_SESSION_IMPL *session, WT_REF *ref) WT_DECL_RET; WT_ITEM tmp; WT_PAGE *page; - WT_PAGE_STATE previous_state; size_t addr_size; + uint32_t previous_state; const uint8_t *addr; page = NULL; @@ -35,9 +35,9 @@ __wt_cache_read(WT_SESSION_IMPL *session, WT_REF *ref) * WT_REF_LOCKED, for deleted pages. If successful, we've won the * race, read the page. */ - if (WT_ATOMIC_CAS4(ref->state, WT_REF_DISK, WT_REF_READING)) + if (__wt_atomic_casv32(&ref->state, WT_REF_DISK, WT_REF_READING)) previous_state = WT_REF_DISK; - else if (WT_ATOMIC_CAS4(ref->state, WT_REF_DELETED, WT_REF_LOCKED)) + else if (__wt_atomic_casv32(&ref->state, WT_REF_DELETED, WT_REF_LOCKED)) previous_state = WT_REF_DELETED; else return (0); diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c index f41a5d86e9f..22d4948e07d 100644 --- a/src/third_party/wiredtiger/src/btree/bt_slvg.c +++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c @@ -197,9 +197,9 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) * Turn off read checksum and verification error messages while we're * reading the file, we expect to see corrupted blocks. */ - F_SET(session, WT_SESSION_SALVAGE_CORRUPT_OK); + F_SET(session, WT_SESSION_QUIET_CORRUPT_FILE); ret = __slvg_read(session, ss); - F_CLR(session, WT_SESSION_SALVAGE_CORRUPT_OK); + F_CLR(session, WT_SESSION_QUIET_CORRUPT_FILE); WT_ERR(ret); /* diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index dbd4042129d..a63eadcaeab 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -45,10 +45,13 @@ static int __split_stash_add( WT_SESSION_IMPL *session, uint64_t split_gen, void *p, size_t len) { + WT_CONNECTION_IMPL *conn; WT_SPLIT_STASH *stash; WT_ASSERT(session, p != NULL); + conn = S2C(session); + /* Grow the list as necessary. */ WT_RET(__wt_realloc_def(session, &session->split_stash_alloc, session->split_stash_cnt + 1, &session->split_stash)); @@ -58,8 +61,8 @@ __split_stash_add( stash->p = p; stash->len = len; - WT_STAT_FAST_CONN_ATOMIC_INCRV(session, rec_split_stashed_bytes, len); - WT_STAT_FAST_CONN_ATOMIC_INCR(session, rec_split_stashed_objects); + (void)__wt_atomic_add64(&conn->split_stashed_bytes, len); + (void)__wt_atomic_add64(&conn->split_stashed_objects, 1); /* See if we can free any previous entries. */ if (session->split_stash_cnt > 1) @@ -75,10 +78,13 @@ __split_stash_add( void __wt_split_stash_discard(WT_SESSION_IMPL *session) { + WT_CONNECTION_IMPL *conn; WT_SPLIT_STASH *stash; uint64_t oldest; size_t i; + conn = S2C(session); + /* Get the oldest split generation. */ oldest = __split_oldest_gen(session); @@ -93,10 +99,8 @@ __wt_split_stash_discard(WT_SESSION_IMPL *session) * It's a bad thing if another thread is in this memory after * we free it, make sure nothing good happens to that thread. */ - WT_STAT_FAST_CONN_ATOMIC_DECRV( - session, rec_split_stashed_bytes, stash->len); - WT_STAT_FAST_CONN_ATOMIC_DECR( - session, rec_split_stashed_objects); + (void)__wt_atomic_sub64(&conn->split_stashed_bytes, stash->len); + (void)__wt_atomic_sub64(&conn->split_stashed_objects, 1); __wt_overwrite_and_free_len(session, stash->p, stash->len); } @@ -557,7 +561,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent) */ WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(parent) == pindex); WT_INTL_INDEX_SET(parent, alloc_index); - split_gen = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1); + split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); panic = 1; #ifdef HAVE_DIAGNOSTIC @@ -887,13 +891,14 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, F_CLR_ATOMIC(parent, WT_PAGE_SPLIT_LOCKED); continue; } + /* * If we're attempting an in-memory split and we can't lock the - * parent, give up. This avoids an infinite loop where we are - * trying to split a page while its parent is being - * checkpointed. + * parent while there is a checkpoint in progress, give up. + * This avoids an infinite loop where we are trying to split a + * page while its parent is being checkpointed. */ - if (LF_ISSET(WT_SPLIT_INMEM)) + if (LF_ISSET(WT_SPLIT_INMEM) && S2BT(session)->checkpointing) return (EBUSY); __wt_yield(); } @@ -905,9 +910,10 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, * could conceivably be evicted. Get a hazard pointer on the parent * now, so that we can safely access it after updating the index. * - * Take care that getting the page doesn't trigger eviction, or we - * could block trying to split a different child of our parent and - * deadlock. + * Take care getting the page doesn't trigger eviction work: we could + * block trying to split a different child of our parent and deadlock + * or we could be the eviction server relied upon by other threads to + * populate the eviction queue. */ if (!__wt_ref_is_root(parent_ref = parent->pg_intl_parent_ref)) { WT_ERR(__wt_page_in(session, parent_ref, WT_READ_NO_EVICT)); @@ -933,8 +939,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_ASSERT(session, next_ref->state != WT_REF_SPLIT); if (next_ref->state == WT_REF_DELETED && __wt_delete_page_skip(session, next_ref) && - WT_ATOMIC_CAS4(next_ref->state, - WT_REF_DELETED, WT_REF_SPLIT)) + __wt_atomic_casv32( + &next_ref->state, WT_REF_DELETED, WT_REF_SPLIT)) deleted_entries++; } @@ -994,7 +1000,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, */ WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(parent) == pindex); WT_INTL_INDEX_SET(parent, alloc_index); - split_gen = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1); + split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); alloc_index = NULL; #ifdef HAVE_DIAGNOSTIC diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c index 6285edde217..9a0584d3217 100644 --- a/src/third_party/wiredtiger/src/btree/bt_stat.c +++ b/src/third_party/wiredtiger/src/btree/bt_stat.c @@ -8,10 +8,11 @@ #include "wt_internal.h" -static int __stat_page(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS *); -static void __stat_page_col_var(WT_PAGE *, WT_DSRC_STATS *); -static void __stat_page_row_int(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS *); -static void __stat_page_row_leaf(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS *); +static int __stat_page(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **); +static void __stat_page_col_var(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **); +static void __stat_page_row_int(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **); +static void + __stat_page_row_leaf(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **); /* * __wt_btree_stat_init -- @@ -23,22 +24,22 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) WT_BM *bm; WT_BTREE *btree; WT_DECL_RET; - WT_DSRC_STATS *stats; + WT_DSRC_STATS **stats; WT_REF *next_walk; btree = S2BT(session); bm = btree->bm; - stats = &btree->dhandle->stats; + stats = btree->dhandle->stats; - WT_RET(bm->stat(bm, session, stats)); + WT_RET(bm->stat(bm, session, stats[0])); - WT_STAT_SET(stats, btree_fixed_len, btree->bitcnt); - WT_STAT_SET(stats, btree_maximum_depth, btree->maximum_depth); - WT_STAT_SET(stats, btree_maxintlpage, btree->maxintlpage); - WT_STAT_SET(stats, btree_maxintlkey, btree->maxintlkey); - WT_STAT_SET(stats, btree_maxleafpage, btree->maxleafpage); - WT_STAT_SET(stats, btree_maxleafkey, btree->maxleafkey); - WT_STAT_SET(stats, btree_maxleafvalue, btree->maxleafvalue); + WT_STAT_SET(session, stats, btree_fixed_len, btree->bitcnt); + WT_STAT_SET(session, stats, btree_maximum_depth, btree->maximum_depth); + WT_STAT_SET(session, stats, btree_maxintlpage, btree->maxintlpage); + WT_STAT_SET(session, stats, btree_maxintlkey, btree->maxintlkey); + WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage); + WT_STAT_SET(session, stats, btree_maxleafkey, btree->maxleafkey); + WT_STAT_SET(session, stats, btree_maxleafvalue, btree->maxleafvalue); /* Everything else is really, really expensive. */ if (!F_ISSET(cst, WT_CONN_STAT_ALL)) @@ -47,14 +48,14 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) /* * Clear the statistics we're about to count. */ - WT_STAT_SET(stats, btree_column_deleted, 0); - WT_STAT_SET(stats, btree_column_fix, 0); - WT_STAT_SET(stats, btree_column_internal, 0); - WT_STAT_SET(stats, btree_column_variable, 0); - WT_STAT_SET(stats, btree_entries, 0); - WT_STAT_SET(stats, btree_overflow, 0); - WT_STAT_SET(stats, btree_row_internal, 0); - WT_STAT_SET(stats, btree_row_leaf, 0); + WT_STAT_SET(session, stats, btree_column_deleted, 0); + WT_STAT_SET(session, stats, btree_column_fix, 0); + WT_STAT_SET(session, stats, btree_column_internal, 0); + WT_STAT_SET(session, stats, btree_column_variable, 0); + WT_STAT_SET(session, stats, btree_entries, 0); + WT_STAT_SET(session, stats, btree_overflow, 0); + WT_STAT_SET(session, stats, btree_row_internal, 0); + WT_STAT_SET(session, stats, btree_row_leaf, 0); next_walk = NULL; while ((ret = __wt_tree_walk(session, &next_walk, NULL, 0)) == 0 && @@ -71,7 +72,7 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) * Stat any Btree page. */ static int -__stat_page(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS *stats) +__stat_page(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats) { /* * All internal pages and overflow pages are trivial, all we track is @@ -79,14 +80,15 @@ __stat_page(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS *stats) */ switch (page->type) { case WT_PAGE_COL_FIX: - WT_STAT_INCR(stats, btree_column_fix); - WT_STAT_INCRV(stats, btree_entries, page->pg_fix_entries); + WT_STAT_INCR(session, stats, btree_column_fix); + WT_STAT_INCRV( + session, stats, btree_entries, page->pg_fix_entries); break; case WT_PAGE_COL_INT: - WT_STAT_INCR(stats, btree_column_internal); + WT_STAT_INCR(session, stats, btree_column_internal); break; case WT_PAGE_COL_VAR: - __stat_page_col_var(page, stats); + __stat_page_col_var(session, page, stats); break; case WT_PAGE_ROW_INT: __stat_page_row_int(session, page, stats); @@ -104,7 +106,8 @@ __stat_page(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS *stats) * Stat a WT_PAGE_COL_VAR page. */ static void -__stat_page_col_var(WT_PAGE *page, WT_DSRC_STATS *stats) +__stat_page_col_var( + WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats) { WT_CELL *cell; WT_CELL_UNPACK *unpack, _unpack; @@ -118,7 +121,7 @@ __stat_page_col_var(WT_PAGE *page, WT_DSRC_STATS *stats) unpack = &_unpack; deleted_cnt = entry_cnt = ovfl_cnt = 0; - WT_STAT_INCR(stats, btree_column_variable); + WT_STAT_INCR(session, stats, btree_column_variable); /* * Walk the page counting regular items, adjusting if the item has been @@ -169,9 +172,9 @@ __stat_page_col_var(WT_PAGE *page, WT_DSRC_STATS *stats) else ++entry_cnt; - WT_STAT_INCRV(stats, btree_column_deleted, deleted_cnt); - WT_STAT_INCRV(stats, btree_entries, entry_cnt); - WT_STAT_INCRV(stats, btree_overflow, ovfl_cnt); + WT_STAT_INCRV(session, stats, btree_column_deleted, deleted_cnt); + WT_STAT_INCRV(session, stats, btree_entries, entry_cnt); + WT_STAT_INCRV(session, stats, btree_overflow, ovfl_cnt); } /* @@ -180,7 +183,7 @@ __stat_page_col_var(WT_PAGE *page, WT_DSRC_STATS *stats) */ static void __stat_page_row_int( - WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS *stats) + WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats) { WT_BTREE *btree; WT_CELL *cell; @@ -190,7 +193,7 @@ __stat_page_row_int( btree = S2BT(session); ovfl_cnt = 0; - WT_STAT_INCR(stats, btree_row_internal); + WT_STAT_INCR(session, stats, btree_row_internal); /* * Overflow keys are hard: we have to walk the disk image to count them, @@ -204,7 +207,7 @@ __stat_page_row_int( ++ovfl_cnt; } - WT_STAT_INCRV(stats, btree_overflow, ovfl_cnt); + WT_STAT_INCRV(session, stats, btree_overflow, ovfl_cnt); } /* @@ -213,7 +216,7 @@ __stat_page_row_int( */ static void __stat_page_row_leaf( - WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS *stats) + WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats) { WT_BTREE *btree; WT_CELL *cell; @@ -226,7 +229,7 @@ __stat_page_row_leaf( btree = S2BT(session); entry_cnt = ovfl_cnt = 0; - WT_STAT_INCR(stats, btree_row_leaf); + WT_STAT_INCR(session, stats, btree_row_leaf); /* * Walk any K/V pairs inserted into the page before the first from-disk @@ -267,6 +270,6 @@ __stat_page_row_leaf( ++ovfl_cnt; } - WT_STAT_INCRV(stats, btree_entries, entry_cnt); - WT_STAT_INCRV(stats, btree_overflow, ovfl_cnt); + WT_STAT_INCRV(session, stats, btree_entries, entry_cnt); + WT_STAT_INCRV(session, stats, btree_overflow, ovfl_cnt); } diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c index 838d778dadf..29ae5b185cd 100644 --- a/src/third_party/wiredtiger/src/btree/bt_sync.c +++ b/src/third_party/wiredtiger/src/btree/bt_sync.c @@ -259,7 +259,6 @@ __wt_cache_op(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, int op) break; case WT_SYNC_CLOSE: case WT_SYNC_DISCARD: - case WT_SYNC_DISCARD_FORCE: WT_ERR(__wt_evict_file(session, op)); break; WT_ILLEGAL_VALUE_ERR(session); diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c index 904a16a7548..095e439786c 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c @@ -26,13 +26,13 @@ static int __verify_dsk_row( WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *); #define WT_ERR_VRFY(session, ...) do { \ - if (!(F_ISSET(session, WT_SESSION_SALVAGE_CORRUPT_OK))) \ + if (!(F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))) \ __wt_errx(session, __VA_ARGS__); \ goto err; \ } while (0) #define WT_RET_VRFY(session, ...) do { \ - if (!(F_ISSET(session, WT_SESSION_SALVAGE_CORRUPT_OK))) \ + if (!(F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))) \ __wt_errx(session, __VA_ARGS__); \ return (WT_ERROR); \ } while (0) @@ -43,7 +43,7 @@ static int __verify_dsk_row( */ int __wt_verify_dsk_image(WT_SESSION_IMPL *session, - const char *addr, const WT_PAGE_HEADER *dsk, size_t size, int empty_page_ok) + const char *tag, const WT_PAGE_HEADER *dsk, size_t size, int empty_page_ok) { const uint8_t *p, *end; u_int i; @@ -63,7 +63,7 @@ __wt_verify_dsk_image(WT_SESSION_IMPL *session, default: WT_RET_VRFY(session, "page at %s has an invalid type of %" PRIu32, - addr, dsk->type); + tag, dsk->type); } /* Check the page record number. */ @@ -75,7 +75,7 @@ __wt_verify_dsk_image(WT_SESSION_IMPL *session, break; WT_RET_VRFY(session, "%s page at %s has a record number of zero", - __wt_page_type_string(dsk->type), addr); + __wt_page_type_string(dsk->type), tag); case WT_PAGE_BLOCK_MANAGER: case WT_PAGE_OVFL: case WT_PAGE_ROW_INT: @@ -84,7 +84,7 @@ __wt_verify_dsk_image(WT_SESSION_IMPL *session, break; WT_RET_VRFY(session, "%s page at %s has a non-zero record number", - __wt_page_type_string(dsk->type), addr); + __wt_page_type_string(dsk->type), tag); } /* Check the page flags. */ @@ -99,7 +99,7 @@ __wt_verify_dsk_image(WT_SESSION_IMPL *session, WT_RET_VRFY(session, "page at %s has invalid flags combination: 0x%" PRIx8, - addr, dsk->flags); + tag, dsk->flags); if (LF_ISSET(WT_PAGE_EMPTY_V_ALL)) LF_CLR(WT_PAGE_EMPTY_V_ALL); if (LF_ISSET(WT_PAGE_EMPTY_V_NONE)) @@ -108,14 +108,14 @@ __wt_verify_dsk_image(WT_SESSION_IMPL *session, if (flags != 0) WT_RET_VRFY(session, "page at %s has invalid flags set: 0x%" PRIx8, - addr, flags); + tag, flags); /* Unused bytes */ for (p = dsk->unused, i = sizeof(dsk->unused); i > 0; --i) if (*p != '\0') WT_RET_VRFY(session, "page at %s has non-zero unused page header bytes", - addr); + tag); /* * Any bytes after the data chunk should be nul bytes; ignore if the @@ -129,7 +129,7 @@ __wt_verify_dsk_image(WT_SESSION_IMPL *session, if (*p != '\0') WT_RET_VRFY(session, "%s page at %s has non-zero trailing bytes", - __wt_page_type_string(dsk->type), addr); + __wt_page_type_string(dsk->type), tag); } /* Check for empty pages, then verify the items on the page. */ @@ -141,28 +141,28 @@ __wt_verify_dsk_image(WT_SESSION_IMPL *session, case WT_PAGE_ROW_LEAF: if (!empty_page_ok && dsk->u.entries == 0) WT_RET_VRFY(session, "%s page at %s has no entries", - __wt_page_type_string(dsk->type), addr); + __wt_page_type_string(dsk->type), tag); break; case WT_PAGE_BLOCK_MANAGER: case WT_PAGE_OVFL: if (dsk->u.datalen == 0) WT_RET_VRFY(session, "%s page at %s has no data", - __wt_page_type_string(dsk->type), addr); + __wt_page_type_string(dsk->type), tag); break; } switch (dsk->type) { case WT_PAGE_COL_INT: - return (__verify_dsk_col_int(session, addr, dsk)); + return (__verify_dsk_col_int(session, tag, dsk)); case WT_PAGE_COL_FIX: - return (__verify_dsk_col_fix(session, addr, dsk)); + return (__verify_dsk_col_fix(session, tag, dsk)); case WT_PAGE_COL_VAR: - return (__verify_dsk_col_var(session, addr, dsk)); + return (__verify_dsk_col_var(session, tag, dsk)); case WT_PAGE_ROW_INT: case WT_PAGE_ROW_LEAF: - return (__verify_dsk_row(session, addr, dsk)); + return (__verify_dsk_row(session, tag, dsk)); case WT_PAGE_BLOCK_MANAGER: case WT_PAGE_OVFL: - return (__verify_dsk_chunk(session, addr, dsk, dsk->u.datalen)); + return (__verify_dsk_chunk(session, tag, dsk, dsk->u.datalen)); WT_ILLEGAL_VALUE(session); } /* NOTREACHED */ @@ -173,9 +173,9 @@ __wt_verify_dsk_image(WT_SESSION_IMPL *session, * Verify a single Btree page as read from disk. */ int -__wt_verify_dsk(WT_SESSION_IMPL *session, const char *addr, WT_ITEM *buf) +__wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *buf) { - return (__wt_verify_dsk_image(session, addr, buf->data, buf->size, 0)); + return (__wt_verify_dsk_image(session, tag, buf->data, buf->size, 0)); } /* @@ -184,7 +184,7 @@ __wt_verify_dsk(WT_SESSION_IMPL *session, const char *addr, WT_ITEM *buf) */ static int __verify_dsk_row( - WT_SESSION_IMPL *session, const char *addr, const WT_PAGE_HEADER *dsk) + WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk) { WT_BM *bm; WT_BTREE *btree; @@ -221,15 +221,15 @@ __verify_dsk_row( /* Carefully unpack the cell. */ if (__wt_cell_unpack_safe(cell, unpack, end) != 0) { - ret = __err_cell_corrupted(session, cell_num, addr); + ret = __err_cell_corrupted(session, cell_num, tag); goto err; } /* Check the raw and collapsed cell types. */ WT_ERR(__err_cell_type( - session, cell_num, addr, unpack->raw, dsk->type)); + session, cell_num, tag, unpack->raw, dsk->type)); WT_ERR(__err_cell_type( - session, cell_num, addr, unpack->type, dsk->type)); + session, cell_num, tag, unpack->type, dsk->type)); cell_type = unpack->type; /* @@ -256,7 +256,7 @@ __verify_dsk_row( WT_ERR_VRFY(session, "cell %" PRIu32 " on page at %s is the " "first of two adjacent keys", - cell_num - 1, addr); + cell_num - 1, tag); } last_cell_type = WAS_KEY; break; @@ -269,14 +269,14 @@ __verify_dsk_row( switch (last_cell_type) { case FIRST: WT_ERR_VRFY(session, - "page at %s begins with a value", addr); + "page at %s begins with a value", tag); case WAS_KEY: break; case WAS_VALUE: WT_ERR_VRFY(session, "cell %" PRIu32 " on page at %s is the " "first of two adjacent values", - cell_num - 1, addr); + cell_num - 1, tag); } last_cell_type = WAS_VALUE; break; @@ -327,7 +327,7 @@ __verify_dsk_row( "the %" PRIu32 " key on page at %s is the first " "non-overflow key on the page and has a non-zero " "prefix compression value", - cell_num, addr); + cell_num, tag); /* Confirm the prefix compression count is possible. */ if (cell_num > 1 && prefix > last->size) @@ -335,7 +335,7 @@ __verify_dsk_row( "key %" PRIu32 " on page at %s has a prefix " "compression count of %" PRIu32 ", larger than " "the length of the previous key, %" WT_SIZET_FMT, - cell_num, addr, prefix, last->size); + cell_num, tag, prefix, last->size); /* * If Huffman decoding required, unpack the cell to build the @@ -394,7 +394,7 @@ key_compare: /* WT_ERR_VRFY(session, "the %" PRIu32 " and %" PRIu32 " keys on " "page at %s are incorrectly sorted", - cell_num - 2, cell_num, addr); + cell_num - 2, cell_num, tag); } /* @@ -414,7 +414,7 @@ key_compare: /* } WT_ASSERT(session, last != current); } - WT_ERR(__verify_dsk_memsize(session, addr, dsk, cell)); + WT_ERR(__verify_dsk_memsize(session, tag, dsk, cell)); /* * On row-store internal pages, and on row-store leaf pages, where the @@ -428,7 +428,7 @@ key_compare: /* "%s page at %s has a key count of %" PRIu32 " and a " "physical entry count of %" PRIu32, __wt_page_type_string(dsk->type), - addr, key_cnt, dsk->u.entries); + tag, key_cnt, dsk->u.entries); if (dsk->type == WT_PAGE_ROW_LEAF && F_ISSET(dsk, WT_PAGE_EMPTY_V_ALL) && key_cnt != dsk->u.entries) @@ -437,7 +437,7 @@ key_compare: /* "key count of %" PRIu32 " and a physical entry count of %" PRIu32, __wt_page_type_string(dsk->type), - addr, key_cnt, dsk->u.entries); + tag, key_cnt, dsk->u.entries); if (dsk->type == WT_PAGE_ROW_LEAF && F_ISSET(dsk, WT_PAGE_EMPTY_V_NONE) && key_cnt * 2 != dsk->u.entries) @@ -446,10 +446,10 @@ key_compare: /* "key count of %" PRIu32 " and a physical entry count of %" PRIu32, __wt_page_type_string(dsk->type), - addr, key_cnt, dsk->u.entries); + tag, key_cnt, dsk->u.entries); if (0) { -eof: ret = __err_eof(session, cell_num, addr); +eof: ret = __err_eof(session, cell_num, tag); } if (0) { @@ -468,7 +468,7 @@ err: if (ret == 0) */ static int __verify_dsk_col_int( - WT_SESSION_IMPL *session, const char *addr, const WT_PAGE_HEADER *dsk) + WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk) { WT_BM *bm; WT_BTREE *btree; @@ -488,19 +488,19 @@ __verify_dsk_col_int( /* Carefully unpack the cell. */ if (__wt_cell_unpack_safe(cell, unpack, end) != 0) - return (__err_cell_corrupted(session, cell_num, addr)); + return (__err_cell_corrupted(session, cell_num, tag)); /* Check the raw and collapsed cell types. */ WT_RET(__err_cell_type( - session, cell_num, addr, unpack->raw, dsk->type)); + session, cell_num, tag, unpack->raw, dsk->type)); WT_RET(__err_cell_type( - session, cell_num, addr, unpack->type, dsk->type)); + session, cell_num, tag, unpack->type, dsk->type)); /* Check if any referenced item is entirely in the file. */ if (!bm->addr_valid(bm, session, unpack->data, unpack->size)) - return (__err_eof(session, cell_num, addr)); + return (__err_eof(session, cell_num, tag)); } - WT_RET(__verify_dsk_memsize(session, addr, dsk, cell)); + WT_RET(__verify_dsk_memsize(session, tag, dsk, cell)); return (0); } @@ -511,7 +511,7 @@ __verify_dsk_col_int( */ static int __verify_dsk_col_fix( - WT_SESSION_IMPL *session, const char *addr, const WT_PAGE_HEADER *dsk) + WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk) { WT_BTREE *btree; uint32_t datalen; @@ -519,7 +519,7 @@ __verify_dsk_col_fix( btree = S2BT(session); datalen = __bitstr_size(btree->bitcnt * dsk->u.entries); - return (__verify_dsk_chunk(session, addr, dsk, datalen)); + return (__verify_dsk_chunk(session, tag, dsk, datalen)); } /* @@ -528,7 +528,7 @@ __verify_dsk_col_fix( */ static int __verify_dsk_col_var( - WT_SESSION_IMPL *session, const char *addr, const WT_PAGE_HEADER *dsk) + WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk) { WT_BM *bm; WT_BTREE *btree; @@ -555,19 +555,19 @@ __verify_dsk_col_var( /* Carefully unpack the cell. */ if (__wt_cell_unpack_safe(cell, unpack, end) != 0) - return (__err_cell_corrupted(session, cell_num, addr)); + return (__err_cell_corrupted(session, cell_num, tag)); /* Check the raw and collapsed cell types. */ WT_RET(__err_cell_type( - session, cell_num, addr, unpack->raw, dsk->type)); + session, cell_num, tag, unpack->raw, dsk->type)); WT_RET(__err_cell_type( - session, cell_num, addr, unpack->type, dsk->type)); + session, cell_num, tag, unpack->type, dsk->type)); cell_type = unpack->type; /* Check if any referenced item is entirely in the file. */ if (cell_type == WT_CELL_VALUE_OVFL && !bm->addr_valid(bm, session, unpack->data, unpack->size)) - return (__err_eof(session, cell_num, addr)); + return (__err_eof(session, cell_num, tag)); /* * Compare the last two items and see if reconciliation missed @@ -586,7 +586,7 @@ match_err: WT_RET_VRFY(session, "data entries %" PRIu32 " and %" PRIu32 " on page at %s are identical and should " "have been run-length encoded", - cell_num - 1, cell_num, addr); + cell_num - 1, cell_num, tag); switch (cell_type) { case WT_CELL_DEL: @@ -604,7 +604,7 @@ match_err: WT_RET_VRFY(session, break; } } - WT_RET(__verify_dsk_memsize(session, addr, dsk, cell)); + WT_RET(__verify_dsk_memsize(session, tag, dsk, cell)); return (0); } @@ -615,7 +615,7 @@ match_err: WT_RET_VRFY(session, */ static int __verify_dsk_memsize(WT_SESSION_IMPL *session, - const char *addr, const WT_PAGE_HEADER *dsk, WT_CELL *cell) + const char *tag, const WT_PAGE_HEADER *dsk, WT_CELL *cell) { size_t len; @@ -630,7 +630,7 @@ __verify_dsk_memsize(WT_SESSION_IMPL *session, WT_RET_VRFY(session, "%s page at %s has %" WT_SIZET_FMT " unexpected bytes of data " "after the last cell", - __wt_page_type_string(dsk->type), addr, len); + __wt_page_type_string(dsk->type), tag, len); } /* @@ -639,7 +639,7 @@ __verify_dsk_memsize(WT_SESSION_IMPL *session, */ static int __verify_dsk_chunk(WT_SESSION_IMPL *session, - const char *addr, const WT_PAGE_HEADER *dsk, uint32_t datalen) + const char *tag, const WT_PAGE_HEADER *dsk, uint32_t datalen) { WT_BTREE *btree; uint8_t *p, *end; @@ -655,14 +655,14 @@ __verify_dsk_chunk(WT_SESSION_IMPL *session, if (p + datalen > end) WT_RET_VRFY(session, "data on page at %s extends past the end of the page", - addr); + tag); /* Any bytes after the data chunk should be nul bytes. */ for (p += datalen; p < end; ++p) if (*p != '\0') WT_RET_VRFY(session, "%s page at %s has non-zero trailing bytes", - __wt_page_type_string(dsk->type), addr); + __wt_page_type_string(dsk->type), tag); return (0); } @@ -673,11 +673,11 @@ __verify_dsk_chunk(WT_SESSION_IMPL *session, */ static int __err_cell_corrupted( - WT_SESSION_IMPL *session, uint32_t entry_num, const char *addr) + WT_SESSION_IMPL *session, uint32_t entry_num, const char *tag) { WT_RET_VRFY(session, "item %" PRIu32 " on page at %s is a corrupted cell", - entry_num, addr); + entry_num, tag); } /* @@ -686,7 +686,7 @@ __err_cell_corrupted( */ static int __err_cell_type(WT_SESSION_IMPL *session, - uint32_t entry_num, const char *addr, uint8_t cell_type, uint8_t dsk_type) + uint32_t entry_num, const char *tag, uint8_t cell_type, uint8_t dsk_type) { switch (cell_type) { case WT_CELL_ADDR_DEL: @@ -735,7 +735,7 @@ __err_cell_type(WT_SESSION_IMPL *session, WT_RET_VRFY(session, "illegal cell and page type combination: cell %" PRIu32 " on page at %s is a %s cell on a %s page", - entry_num, addr, + entry_num, tag, __wt_cell_type_string(cell_type), __wt_page_type_string(dsk_type)); } @@ -744,10 +744,10 @@ __err_cell_type(WT_SESSION_IMPL *session, * Generic item references non-existent file pages error. */ static int -__err_eof(WT_SESSION_IMPL *session, uint32_t entry_num, const char *addr) +__err_eof(WT_SESSION_IMPL *session, uint32_t entry_num, const char *tag) { WT_RET_VRFY(session, "off-page item %" PRIu32 " on page at %s references non-existent file pages", - entry_num, addr); + entry_num, tag); } diff --git a/src/third_party/wiredtiger/src/btree/row_key.c b/src/third_party/wiredtiger/src/btree/row_key.c index f2868afe13a..4affa7fa62a 100644 --- a/src/third_party/wiredtiger/src/btree/row_key.c +++ b/src/third_party/wiredtiger/src/btree/row_key.c @@ -448,7 +448,8 @@ next: switch (direction) { * update the page's memory footprint, on failure, free * the allocated memory. */ - if (WT_ATOMIC_CAS8(WT_ROW_KEY_COPY(rip), copy, ikey)) + if (__wt_atomic_cas_ptr( + (void *)&WT_ROW_KEY_COPY(rip), copy, ikey)) __wt_cache_page_inmem_incr(session, page, sizeof(WT_IKEY) + ikey->size); else @@ -525,7 +526,7 @@ __wt_row_ikey(WT_SESSION_IMPL *session, WT_ASSERT(session, oldv == 0 || (oldv & WT_IK_FLAG) != 0); WT_ASSERT(session, ref->state != WT_REF_SPLIT); WT_ASSERT(session, - WT_ATOMIC_CAS8(ref->key.ikey, (WT_IKEY *)oldv, ikey)); + __wt_atomic_cas_ptr(&ref->key.ikey, (WT_IKEY *)oldv, ikey)); } #else ref->key.ikey = ikey; diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c index 3331632b725..49a749b8a02 100644 --- a/src/third_party/wiredtiger/src/btree/row_modify.c +++ b/src/third_party/wiredtiger/src/btree/row_modify.c @@ -26,7 +26,7 @@ __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) * Select a spinlock for the page; let the barrier immediately below * keep things from racing too badly. */ - modify->page_lock = ++conn->page_lock_cnt % WT_PAGE_LOCKS(conn); + modify->page_lock = ++conn->page_lock_cnt % WT_PAGE_LOCKS; /* * Multiple threads of control may be searching and deciding to modify @@ -34,7 +34,7 @@ __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) * footprint, else discard the modify structure, another thread did the * work. */ - if (WT_ATOMIC_CAS8(page->modify, NULL, modify)) + if (__wt_atomic_cas_ptr(&page->modify, NULL, modify)) __wt_cache_page_inmem_incr(session, page, sizeof(*modify)); else __wt_free(session, modify); @@ -316,7 +316,7 @@ __wt_update_obsolete_check( */ if (first != NULL && (next = first->next) != NULL && - WT_ATOMIC_CAS8(first->next, next, NULL)) + __wt_atomic_cas_ptr(&first->next, next, NULL)) return (next); /* diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index 62114ec8b5a..b28fca3a71b 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -1724,7 +1724,8 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) "encryption=(secretkey=)," "exclusive=," "log=(recover=)," - "use_environment_priv=,", &base_config)); + "use_environment_priv=," + "verbose=,", &base_config)); WT_ERR(__wt_config_init(session, &parser, base_config)); while ((ret = __wt_config_next(&parser, &k, &v)) == 0) { /* Fix quoting for non-trivial settings. */ @@ -1794,6 +1795,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_DECL_RET; const WT_NAME_FLAG *ft; WT_SESSION_IMPL *session; + int64_t config_base_set; const char *enc_cfg[] = { NULL, NULL }; char version[64]; @@ -1835,6 +1837,10 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open); cfg[1] = config; + /* Capture the config_base setting file for later use. */ + WT_ERR(__wt_config_gets(session, cfg, "config_base", &cval)); + config_base_set = cval.val; + /* Configure error messages so we get them right early. */ WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval)); if (cval.len != 0) @@ -1872,7 +1878,10 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR) >= (int)sizeof(version), ENOMEM); __conn_config_append(cfg, version); - WT_ERR(__conn_config_file(session, WT_BASECONFIG, 0, cfg, i1)); + + /* Ignore the base_config file if we config_base set to false. */ + if (config_base_set != 0) + WT_ERR(__conn_config_file(session, WT_BASECONFIG, 0, cfg, i1)); __conn_config_append(cfg, config); WT_ERR(__conn_config_file(session, WT_USERCONFIG, 1, cfg, i2)); WT_ERR(__conn_config_env(session, cfg, i3)); diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c index d62425fe536..85e4074d15b 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache.c @@ -178,12 +178,12 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session) { WT_CACHE *cache; WT_CONNECTION_IMPL *conn; - WT_CONNECTION_STATS *stats; + WT_CONNECTION_STATS **stats; uint64_t inuse, leaf, used; conn = S2C(session); cache = conn->cache; - stats = &conn->stats; + stats = conn->stats; inuse = __wt_cache_bytes_inuse(cache); /* @@ -193,19 +193,23 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session) used = cache->bytes_overflow + cache->bytes_internal; leaf = inuse > used ? inuse - used : 0; - WT_STAT_SET(stats, cache_bytes_max, conn->cache_size); - WT_STAT_SET(stats, cache_bytes_inuse, inuse); + WT_STAT_SET(session, stats, cache_bytes_max, conn->cache_size); + WT_STAT_SET(session, stats, cache_bytes_inuse, inuse); - WT_STAT_SET(stats, cache_overhead, cache->overhead_pct); - WT_STAT_SET(stats, cache_pages_inuse, __wt_cache_pages_inuse(cache)); - WT_STAT_SET(stats, cache_bytes_dirty, __wt_cache_dirty_inuse(cache)); - WT_STAT_SET(stats, + WT_STAT_SET(session, stats, cache_overhead, cache->overhead_pct); + WT_STAT_SET( + session, stats, cache_pages_inuse, __wt_cache_pages_inuse(cache)); + WT_STAT_SET( + session, stats, cache_bytes_dirty, __wt_cache_dirty_inuse(cache)); + WT_STAT_SET(session, stats, cache_eviction_maximum_page_size, cache->evict_max_page_size); - WT_STAT_SET(stats, cache_pages_dirty, cache->pages_dirty); + WT_STAT_SET(session, stats, cache_pages_dirty, cache->pages_dirty); - WT_STAT_SET(stats, cache_bytes_internal, cache->bytes_internal); - WT_STAT_SET(stats, cache_bytes_overflow, cache->bytes_overflow); - WT_STAT_SET(stats, cache_bytes_leaf, leaf); + WT_STAT_SET( + session, stats, cache_bytes_internal, cache->bytes_internal); + WT_STAT_SET( + session, stats, cache_bytes_overflow, cache->bytes_overflow); + WT_STAT_SET(session, stats, cache_bytes_leaf, leaf); } /* diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c index f1e67e2882b..bb3ffbf2b68 100644 --- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c +++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c @@ -55,6 +55,8 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session, WT_ERR(__wt_spin_init( session, &dhandle->close_lock, "data handle close")); + __wt_stat_dsrc_init(dhandle); + *dhandlep = dhandle; return (0); diff --git a/src/third_party/wiredtiger/src/conn/conn_handle.c b/src/third_party/wiredtiger/src/conn/conn_handle.c index c02f145a09a..1c4a631cc59 100644 --- a/src/third_party/wiredtiger/src/conn/conn_handle.c +++ b/src/third_party/wiredtiger/src/conn/conn_handle.c @@ -45,7 +45,7 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_RET(__wt_conn_config_init(session)); /* Statistics. */ - __wt_stat_init_connection_stats(&conn->stats); + __wt_stat_connection_init(conn); /* Locks. */ WT_RET(__wt_spin_init(session, &conn->api_lock, "api")); @@ -58,8 +58,10 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure")); WT_RET(__wt_spin_init(session, &conn->schema_lock, "schema")); WT_RET(__wt_spin_init(session, &conn->table_lock, "table creation")); - WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS(conn), &conn->page_lock)); - for (i = 0; i < WT_PAGE_LOCKS(conn); ++i) + + WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS, &conn->page_lock)); + WT_CACHE_LINE_ALIGNMENT_VERIFY(session, conn->page_lock); + for (i = 0; i < WT_PAGE_LOCKS; ++i) WT_RET( __wt_spin_init(session, &conn->page_lock[i], "btree page")); @@ -141,7 +143,7 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_spin_destroy(session, &conn->reconfig_lock); __wt_spin_destroy(session, &conn->schema_lock); __wt_spin_destroy(session, &conn->table_lock); - for (i = 0; i < WT_PAGE_LOCKS(conn); ++i) + for (i = 0; i < WT_PAGE_LOCKS; ++i) __wt_spin_destroy(session, &conn->page_lock[i]); __wt_free(session, conn->page_lock); diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c index 51e42cc0aa4..397f3ff8c38 100644 --- a/src/third_party/wiredtiger/src/conn/conn_open.c +++ b/src/third_party/wiredtiger/src/conn/conn_open.c @@ -30,6 +30,7 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) /* WT_SESSION_IMPL array. */ WT_RET(__wt_calloc(session, conn->session_size, sizeof(WT_SESSION_IMPL), &conn->sessions)); + WT_CACHE_LINE_ALIGNMENT_VERIFY(session, conn->sessions); /* * Open the default session. We open this before starting service diff --git a/src/third_party/wiredtiger/src/conn/conn_stat.c b/src/third_party/wiredtiger/src/conn/conn_stat.c index be842378cec..80698c536cd 100644 --- a/src/third_party/wiredtiger/src/conn/conn_stat.c +++ b/src/third_party/wiredtiger/src/conn/conn_stat.c @@ -42,13 +42,24 @@ __stat_sources_free(WT_SESSION_IMPL *session, char ***sources) void __wt_conn_stat_init(WT_SESSION_IMPL *session) { + WT_CONNECTION_IMPL *conn; + WT_CONNECTION_STATS **stats; + + conn = S2C(session); + stats = conn->stats; + __wt_async_stats_update(session); __wt_cache_stats_update(session); __wt_txn_stats_update(session); - WT_CONN_STAT(session, dh_conn_handle_count) = - S2C(session)->dhandle_count; - WT_CONN_STAT(session, file_open) = S2C(session)->open_file_count; + WT_STAT_SET(session, stats, file_open, conn->open_file_count); + WT_STAT_SET(session, + stats, session_cursor_open, conn->open_cursor_count); + WT_STAT_SET(session, stats, dh_conn_handle_count, conn->dhandle_count); + WT_STAT_SET(session, + stats, rec_split_stashed_objects, conn->split_stashed_objects); + WT_STAT_SET(session, + stats, rec_split_stashed_bytes, conn->split_stashed_bytes); } /* @@ -137,11 +148,11 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, int conn_stats) { WT_CONNECTION_IMPL *conn; WT_CURSOR *cursor; + WT_CURSOR_STAT *cst; WT_DECL_ITEM(tmp); WT_DECL_RET; - WT_STATS *stats; - u_int i; - uint64_t max; + int64_t *stats; + int i; const char *uri; const char *cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL }; @@ -165,15 +176,14 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, int conn_stats) */ switch (ret = __wt_curstat_open(session, uri, cfg, &cursor)) { case 0: - max = conn_stats ? - sizeof(WT_CONNECTION_STATS) / sizeof(WT_STATS) : - sizeof(WT_DSRC_STATS) / sizeof(WT_STATS); - for (i = 0, - stats = WT_CURSOR_STATS(cursor); i < max; ++i, ++stats) + cst = (WT_CURSOR_STAT *)cursor; + for (stats = cst->stats, i = 0; i < cst->stats_count; ++i) WT_ERR(__wt_fprintf(conn->stat_fp, - "%s %" PRIu64 " %s %s\n", - conn->stat_stamp, - stats->v, name, stats->desc)); + "%s %" PRId64 " %s %s\n", + conn->stat_stamp, stats[i], + name, conn_stats ? + __wt_stat_connection_desc(i) : + __wt_stat_dsrc_desc(i))); WT_ERR(cursor->close(cursor)); break; case EBUSY: diff --git a/src/third_party/wiredtiger/src/cursor/cur_stat.c b/src/third_party/wiredtiger/src/cursor/cur_stat.c index da9838292d6..2f844baaa00 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_stat.c +++ b/src/third_party/wiredtiger/src/cursor/cur_stat.c @@ -113,12 +113,12 @@ __curstat_get_value(WT_CURSOR *cursor, ...) if (F_ISSET(cursor, WT_CURSTD_RAW)) { WT_ERR(__wt_struct_size(session, &size, cursor->value_format, - cst->stats[WT_STAT_KEY_OFFSET(cst)].desc, + cst->stats_desc(WT_STAT_KEY_OFFSET(cst)), cst->pv.data, cst->v)); WT_ERR(__wt_buf_initsize(session, &cursor->value, size)); WT_ERR(__wt_struct_pack(session, cursor->value.mem, size, cursor->value_format, - cst->stats[WT_STAT_KEY_OFFSET(cst)].desc, + cst->stats_desc(WT_STAT_KEY_OFFSET(cst)), cst->pv.data, cst->v)); item = va_arg(ap, WT_ITEM *); @@ -130,7 +130,7 @@ __curstat_get_value(WT_CURSOR *cursor, ...) * pointer support isn't documented, but it's a cheap test. */ if ((p = va_arg(ap, const char **)) != NULL) - *p = cst->stats[WT_STAT_KEY_OFFSET(cst)].desc; + *p = cst->stats_desc(WT_STAT_KEY_OFFSET(cst)); if ((p = va_arg(ap, const char **)) != NULL) *p = cst->pv.data; if ((v = va_arg(ap, uint64_t *)) != NULL) @@ -215,7 +215,7 @@ __curstat_next(WT_CURSOR *cursor) F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); WT_ERR(WT_NOTFOUND); } - cst->v = cst->stats[WT_STAT_KEY_OFFSET(cst)].v; + cst->v = (uint64_t)cst->stats[WT_STAT_KEY_OFFSET(cst)]; WT_ERR(__curstat_print_value(session, cst->v, &cst->pv)); F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); @@ -254,7 +254,7 @@ __curstat_prev(WT_CURSOR *cursor) WT_ERR(WT_NOTFOUND); } - cst->v = cst->stats[WT_STAT_KEY_OFFSET(cst)].v; + cst->v = (uint64_t)cst->stats[WT_STAT_KEY_OFFSET(cst)]; WT_ERR(__curstat_print_value(session, cst->v, &cst->pv)); F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); @@ -308,7 +308,7 @@ __curstat_search(WT_CURSOR *cursor) if (cst->key < WT_STAT_KEY_MIN(cst) || cst->key > WT_STAT_KEY_MAX(cst)) WT_ERR(WT_NOTFOUND); - cst->v = cst->stats[WT_STAT_KEY_OFFSET(cst)].v; + cst->v = (uint64_t)cst->stats[WT_STAT_KEY_OFFSET(cst)]; WT_ERR(__curstat_print_value(session, cst->v, &cst->pv)); F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); @@ -354,13 +354,14 @@ __curstat_conn_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) * Optionally clear the connection statistics. */ __wt_conn_stat_init(session); - cst->u.conn_stats = conn->stats; + __wt_stat_connection_aggregate(conn->stats, &cst->u.conn_stats); if (F_ISSET(cst, WT_CONN_STAT_CLEAR)) - __wt_stat_refresh_connection_stats(&conn->stats); + __wt_stat_connection_clear_all(conn->stats); - cst->stats = (WT_STATS *)&cst->u.conn_stats; + cst->stats = (int64_t *)&cst->u.conn_stats; cst->stats_base = WT_CONNECTION_STATS_BASE; - cst->stats_count = sizeof(WT_CONNECTION_STATS) / sizeof(WT_STATS); + cst->stats_count = sizeof(WT_CONNECTION_STATS) / sizeof(int64_t); + cst->stats_desc = __wt_stat_connection_desc; } /* @@ -383,7 +384,7 @@ __curstat_file_init(WT_SESSION_IMPL *session, filename = uri; if (!WT_PREFIX_SKIP(filename, "file:")) return (EINVAL); - __wt_stat_init_dsrc_stats(&cst->u.dsrc_stats); + __wt_stat_dsrc_init_single(&cst->u.dsrc_stats); WT_RET(__wt_block_manager_size( session, filename, &cst->u.dsrc_stats)); __wt_curstat_dsrc_final(cst); @@ -398,9 +399,10 @@ __curstat_file_init(WT_SESSION_IMPL *session, * Optionally clear the data source statistics. */ if ((ret = __wt_btree_stat_init(session, cst)) == 0) { - cst->u.dsrc_stats = dhandle->stats; + __wt_stat_dsrc_init_single(&cst->u.dsrc_stats); + __wt_stat_dsrc_aggregate(dhandle->stats, &cst->u.dsrc_stats); if (F_ISSET(cst, WT_CONN_STAT_CLEAR)) - __wt_stat_refresh_dsrc_stats(&dhandle->stats); + __wt_stat_dsrc_clear_all(dhandle->stats); __wt_curstat_dsrc_final(cst); } @@ -417,10 +419,10 @@ __curstat_file_init(WT_SESSION_IMPL *session, void __wt_curstat_dsrc_final(WT_CURSOR_STAT *cst) { - - cst->stats = (WT_STATS *)&cst->u.dsrc_stats; + cst->stats = (int64_t *)&cst->u.dsrc_stats; cst->stats_base = WT_DSRC_STATS_BASE; - cst->stats_count = sizeof(WT_DSRC_STATS) / sizeof(WT_STATS); + cst->stats_count = sizeof(WT_DSRC_STATS) / sizeof(int64_t); + cst->stats_desc = __wt_stat_dsrc_desc; } /* diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c index 858c6af6853..b7d8be14e5c 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_std.c +++ b/src/third_party/wiredtiger/src/cursor/cur_std.c @@ -463,16 +463,17 @@ __wt_cursor_close(WT_CURSOR *cursor) WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)cursor->session; - __wt_buf_free(session, &cursor->key); - __wt_buf_free(session, &cursor->value); if (F_ISSET(cursor, WT_CURSTD_OPEN)) { TAILQ_REMOVE(&session->cursors, cursor, q); + (void)__wt_atomic_sub32(&S2C(session)->open_cursor_count, 1); WT_STAT_FAST_DATA_DECR(session, session_cursor_open); - WT_STAT_FAST_CONN_ATOMIC_DECR(session, session_cursor_open); } + __wt_buf_free(session, &cursor->key); + __wt_buf_free(session, &cursor->value); + __wt_free(session, cursor->internal_uri); __wt_free(session, cursor->uri); __wt_overwrite_and_free(session, cursor); @@ -683,8 +684,8 @@ __wt_cursor_init(WT_CURSOR *cursor, TAILQ_INSERT_HEAD(&session->cursors, cursor, q); F_SET(cursor, WT_CURSTD_OPEN); + (void)__wt_atomic_add32(&S2C(session)->open_cursor_count, 1); WT_STAT_FAST_DATA_INCR(session, session_cursor_open); - WT_STAT_FAST_CONN_ATOMIC_INCR(session, session_cursor_open); *cursorp = (cdump != NULL) ? cdump : cursor; return (0); diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c index 38cfc07ac5b..35ff0e4329e 100644 --- a/src/third_party/wiredtiger/src/evict/evict_file.c +++ b/src/third_party/wiredtiger/src/evict/evict_file.c @@ -79,26 +79,22 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop) WT_ERR(__wt_evict(session, ref, 1)); break; case WT_SYNC_DISCARD: - WT_ASSERT(session, - __wt_page_can_evict(session, page, 0, NULL)); - __wt_evict_page_clean_update(session, ref, 1); - break; - case WT_SYNC_DISCARD_FORCE: /* - * Forced discard of the page, whether clean or dirty. - * If we see a dirty page in a forced discard, clean - * the page, both to keep statistics correct, and to - * let the page-discard function assert no dirty page - * is ever discarded. + * If we see a dirty page in a dead handle, clean the + * page, both to keep statistics correct, and to let + * the page-discard function assert no dirty page is + * ever discarded. */ - if (__wt_page_is_modified(page)) { + if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) && + __wt_page_is_modified(page)) { page->modify->write_gen = 0; __wt_cache_dirty_decr(session, page); } - F_SET(session, WT_SESSION_DISCARD_FORCE); + WT_ASSERT(session, + F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || + __wt_page_can_evict(session, page, 0, NULL)); __wt_evict_page_clean_update(session, ref, 1); - F_CLR(session, WT_SESSION_DISCARD_FORCE); break; WT_ILLEGAL_VALUE_ERR(session); } diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index ced592cf9f4..d442a34de71 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -159,6 +159,7 @@ __evict_server(void *arg) WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION_IMPL *session; + u_int spins; session = arg; conn = S2C(session); @@ -176,7 +177,27 @@ __evict_server(void *arg) * otherwise we can block applications evicting large pages. */ if (!F_ISSET(cache, WT_CACHE_STUCK)) { - WT_ERR(__evict_clear_walks(session)); + for (spins = 0; (ret = __wt_spin_trylock( + session, &conn->dhandle_lock)) == EBUSY && + !F_ISSET(cache, WT_CACHE_CLEAR_WALKS); + spins++) { + if (spins < 1000) + __wt_yield(); + else + __wt_sleep(0, 1000); + } + /* + * If we gave up acquiring the lock, that indicates a + * session is waiting for us to clear walks. Do that + * as part of a normal pass (without the handle list + * lock) to avoid deadlock. + */ + if (ret == EBUSY) + continue; + WT_ERR(ret); + ret = __evict_clear_all_walks(session); + __wt_spin_unlock(session, &conn->dhandle_lock); + WT_ERR(ret); /* Next time we wake up, reverse the sweep direction. */ cache->flags ^= WT_CACHE_WALK_REVERSE; @@ -469,6 +490,18 @@ __evict_pass(WT_SESSION_IMPL *session) */ __wt_cache_read_gen_incr(session); + /* + * Update the oldest ID: we use it to decide whether pages are + * candidates for eviction. Without this, if all threads are + * blocked after a long-running transaction (such as a + * checkpoint) completes, we may never start evicting again. + * + * Do this every time the eviction server wakes up, regardless + * of whether the cache is full, to prevent the oldest ID + * falling too far behind. + */ + __wt_txn_update_oldest(session, 1); + WT_RET(__evict_has_work(session, &flags)); if (flags == 0) break; @@ -912,14 +945,6 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t flags) incr = dhandle_locked = 0; retries = 0; - /* - * Update the oldest ID: we use it to decide whether pages are - * candidates for eviction. Without this, if all threads are blocked - * after a long-running transaction (such as a checkpoint) completes, - * we may never start evicting again. - */ - __wt_txn_update_oldest(session, 1); - if (cache->evict_current == NULL) WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_empty); else @@ -972,7 +997,8 @@ retry: while (slot < max_entries && ret == 0) { } else { if (incr) { WT_ASSERT(session, dhandle->session_inuse > 0); - (void)WT_ATOMIC_SUB4(dhandle->session_inuse, 1); + (void)__wt_atomic_subi32( + &dhandle->session_inuse, 1); incr = 0; } dhandle = TAILQ_NEXT(dhandle, q); @@ -1016,7 +1042,7 @@ retry: while (slot < max_entries && ret == 0) { btree->evict_walk_skips = 0; prev_slot = slot; - (void)WT_ATOMIC_ADD4(dhandle->session_inuse, 1); + (void)__wt_atomic_addi32(&dhandle->session_inuse, 1); incr = 1; __wt_spin_unlock(session, &conn->dhandle_lock); dhandle_locked = 0; @@ -1051,7 +1077,7 @@ retry: while (slot < max_entries && ret == 0) { cache->evict_file_next = dhandle; WT_ASSERT(session, dhandle->session_inuse > 0); - (void)WT_ATOMIC_SUB4(dhandle->session_inuse, 1); + (void)__wt_atomic_subi32(&dhandle->session_inuse, 1); incr = 0; } @@ -1253,14 +1279,15 @@ fast: /* If the page can't be evicted, give up. */ * If we happen to end up on the root page, clear it. We have to track * hazard pointers, and the root page complicates that calculation. * - * Also clear the walk if we land on a page requiring forced eviction. - * The eviction server may go to sleep, and we want this page evicted - * as quickly as possible. + * If we land on a page requiring forced eviction, move on to the next + * page: we want this page evicted as quickly as possible. */ - if ((ref = btree->evict_ref) != NULL && (__wt_ref_is_root(ref) || - ref->page->read_gen == WT_READGEN_OLDEST)) { - btree->evict_ref = NULL; - WT_RET(__wt_page_release(session, ref, WT_READ_NO_EVICT)); + if ((ref = btree->evict_ref) != NULL) { + if (__wt_ref_is_root(ref)) + WT_RET(__evict_clear_walk(session)); + else if (ref->page->read_gen == WT_READGEN_OLDEST) + WT_RET_NOTFOUND_OK(__wt_tree_walk(session, + &btree->evict_ref, &pages_walked, walk_flags)); } WT_STAT_FAST_CONN_INCRV(session, cache_eviction_walk, pages_walked); @@ -1320,8 +1347,8 @@ __evict_get_ref( * multiple attempts to evict it. For pages that are already * being evicted, this operation will fail and we will move on. */ - if (!WT_ATOMIC_CAS4( - evict->ref->state, WT_REF_MEM, WT_REF_LOCKED)) { + if (!__wt_atomic_casv32( + &evict->ref->state, WT_REF_MEM, WT_REF_LOCKED)) { __evict_list_clear(session, evict); continue; } @@ -1330,7 +1357,7 @@ __evict_get_ref( * Increment the busy count in the btree handle to prevent it * from being closed under us. */ - (void)WT_ATOMIC_ADD4(evict->btree->evict_busy, 1); + (void)__wt_atomic_addv32(&evict->btree->evict_busy, 1); *btreep = evict->btree; *refp = evict->ref; @@ -1409,7 +1436,7 @@ __evict_page(WT_SESSION_IMPL *session, int is_server) WT_WITH_BTREE(session, btree, ret = __wt_evict_page(session, ref)); - (void)WT_ATOMIC_SUB4(btree->evict_busy, 1); + (void)__wt_atomic_subv32(&btree->evict_busy, 1); WT_RET(ret); diff --git a/src/third_party/wiredtiger/src/include/async.h b/src/third_party/wiredtiger/src/include/async.h index c8d9fa5aa91..fb9a64e774d 100644 --- a/src/third_party/wiredtiger/src/include/async.h +++ b/src/third_party/wiredtiger/src/include/async.h @@ -6,20 +6,6 @@ * See the file LICENSE for redistribution information. */ -typedef enum { - WT_ASYNCOP_ENQUEUED, /* Placed on the work queue */ - WT_ASYNCOP_FREE, /* Able to be allocated to user */ - WT_ASYNCOP_READY, /* Allocated and ready for user to use */ - WT_ASYNCOP_WORKING /* Operation in progress by worker */ -} WT_ASYNC_STATE; - -typedef enum { - WT_ASYNC_FLUSH_NONE=0, /* No flush in progress */ - WT_ASYNC_FLUSH_COMPLETE, /* Notify flush caller it's done */ - WT_ASYNC_FLUSH_IN_PROGRESS, /* Prevent other callers */ - WT_ASYNC_FLUSHING /* Notify workers */ -} WT_ASYNC_FLUSH_STATE; - #define MAX_ASYNC_SLEEP_USECS 100000 /* Maximum sleep waiting for work */ #define MAX_ASYNC_YIELD 200 /* Maximum number of yields for work */ @@ -53,7 +39,13 @@ struct __wt_async_op_impl { uint64_t unique_id; /* Unique identifier. */ WT_ASYNC_FORMAT *format; /* Format structure */ - WT_ASYNC_STATE state; /* Op state */ + +#define WT_ASYNCOP_ENQUEUED 0 /* Placed on the work queue */ +#define WT_ASYNCOP_FREE 1 /* Able to be allocated to user */ +#define WT_ASYNCOP_READY 2 /* Allocated, ready for user to use */ +#define WT_ASYNCOP_WORKING 3 /* Operation in progress by worker */ + uint32_t state; + WT_ASYNC_OPTYPE optype; /* Operation type */ }; @@ -89,9 +81,15 @@ struct __wt_async { uint64_t tail_slot; /* Worker slot consumed */ TAILQ_HEAD(__wt_async_format_qh, __wt_async_format) formatqh; - int cur_queue; /* Currently enqueued */ - int max_queue; /* Maximum enqueued */ - WT_ASYNC_FLUSH_STATE flush_state; /* Queue flush state */ + uint32_t cur_queue; /* Currently enqueued */ + uint32_t max_queue; /* Maximum enqueued */ + +#define WT_ASYNC_FLUSH_NONE 0 /* No flush in progress */ +#define WT_ASYNC_FLUSH_COMPLETE 1 /* Notify flush caller done */ +#define WT_ASYNC_FLUSH_IN_PROGRESS 2 /* Prevent other callers */ +#define WT_ASYNC_FLUSHING 3 /* Notify workers */ + uint32_t flush_state; + /* Notify any waiting threads when flushing is done. */ WT_CONDVAR *flush_cond; WT_ASYNC_OP_IMPL flush_op; /* Special flush op */ diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index f13504d66ca..4aa2b1c7a7d 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -656,14 +656,6 @@ struct __wt_page { * to the readers. If the evicting thread does not find a hazard pointer, * the page is evicted. */ -typedef enum __wt_page_state { - WT_REF_DISK=0, /* Page is on disk */ - WT_REF_DELETED, /* Page is on disk, but deleted */ - WT_REF_LOCKED, /* Page locked for exclusive access */ - WT_REF_MEM, /* Page is in cache and valid */ - WT_REF_READING, /* Page being read */ - WT_REF_SPLIT /* Parent page split (WT_REF dead) */ -} WT_PAGE_STATE; /* * WT_PAGE_DELETED -- @@ -691,7 +683,13 @@ struct __wt_ref { WT_PAGE * volatile home; /* Reference page */ uint32_t pindex_hint; /* Reference page index hint */ - volatile WT_PAGE_STATE state; /* Page state */ +#define WT_REF_DISK 0 /* Page is on disk */ +#define WT_REF_DELETED 1 /* Page is on disk, but deleted */ +#define WT_REF_LOCKED 2 /* Page locked for exclusive access */ +#define WT_REF_MEM 3 /* Page is in cache and valid */ +#define WT_REF_READING 4 /* Page being read */ +#define WT_REF_SPLIT 5 /* Parent page split (WT_REF dead) */ + volatile uint32_t state; /* Page state */ /* * Address: on-page cell if read from backing block, off-page WT_ADDR @@ -958,7 +956,7 @@ struct __wt_insert { #define WT_PAGE_ALLOC_AND_SWAP(s, page, dest, v, count) do { \ if (((v) = (dest)) == NULL) { \ WT_ERR(__wt_calloc_def(s, count, &(v))); \ - if (WT_ATOMIC_CAS8(dest, NULL, v)) \ + if (__wt_atomic_cas_ptr(&dest, NULL, v)) \ __wt_cache_page_inmem_incr( \ s, page, (count) * sizeof(*(v))); \ else \ diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index da014a14e35..058a00d5a78 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -49,46 +49,68 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) WT_ASSERT(session, size < WT_EXABYTE); cache = S2C(session)->cache; - (void)WT_ATOMIC_ADD8(cache->bytes_inmem, size); - (void)WT_ATOMIC_ADD8(page->memory_footprint, size); + (void)__wt_atomic_add64(&cache->bytes_inmem, size); + (void)__wt_atomic_addsize(&page->memory_footprint, size); if (__wt_page_is_modified(page)) { - (void)WT_ATOMIC_ADD8(cache->bytes_dirty, size); - (void)WT_ATOMIC_ADD8(page->modify->bytes_dirty, size); + (void)__wt_atomic_add64(&cache->bytes_dirty, size); + (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size); } /* Track internal and overflow size in cache. */ if (WT_PAGE_IS_INTERNAL(page)) - (void)WT_ATOMIC_ADD8(cache->bytes_internal, size); + (void)__wt_atomic_add64(&cache->bytes_internal, size); else if (page->type == WT_PAGE_OVFL) - (void)WT_ATOMIC_ADD8(cache->bytes_overflow, size); + (void)__wt_atomic_add64(&cache->bytes_overflow, size); } -/* - * WT_CACHE_DECR -- - * Macro to decrement a field by a size. - * - * Be defensive and don't underflow: a band-aid on a gaping wound, but underflow - * won't make things better no matter the problem (specifically, underflow makes - * eviction crazy trying to evict non-existent memory). +/* + * __wt_cache_decr_check_size -- + * Decrement a size_t cache value and check for underflow. */ +static inline void +__wt_cache_decr_check_size( + WT_SESSION_IMPL *session, size_t *vp, size_t v, const char *fld) +{ + if (__wt_atomic_subsize(vp, v) < WT_EXABYTE) + return; + #ifdef HAVE_DIAGNOSTIC -#define WT_CACHE_DECR(session, f, sz) do { \ - static int __first = 1; \ - if (WT_ATOMIC_SUB8(f, sz) > WT_EXABYTE) { \ - (void)WT_ATOMIC_ADD8(f, sz); \ - if (__first) { \ - __wt_errx(session, \ - "%s underflow: decrementing %" WT_SIZET_FMT,\ - #f, sz); \ - __first = 0; \ - } \ - } \ -} while (0) -#else -#define WT_CACHE_DECR(s, f, sz) do { \ - if (WT_ATOMIC_SUB8(f, sz) > WT_EXABYTE) \ - (void)WT_ATOMIC_ADD8(f, sz); \ -} while (0) + (void)__wt_atomic_addsize(vp, v); + + { + static int first = 1; + + if (!first) + return; + __wt_errx(session, "%s underflow: decrementing %" WT_SIZET_FMT, fld, v); + first = 0; + } #endif +} + +/* + * __wt_cache_decr_check_uint64 -- + * Decrement a uint64_t cache value and check for underflow. + */ +static inline void +__wt_cache_decr_check_uint64( + WT_SESSION_IMPL *session, uint64_t *vp, size_t v, const char *fld) +{ + if (__wt_atomic_sub64(vp, v) < WT_EXABYTE) + return; + +#ifdef HAVE_DIAGNOSTIC + (void)__wt_atomic_add64(vp, v); + + { + static int first = 1; + + if (!first) + return; + __wt_errx(session, "%s underflow: decrementing %" WT_SIZET_FMT, fld, v); + first = 0; + } +#endif +} /* * __wt_cache_page_byte_dirty_decr -- @@ -128,9 +150,10 @@ __wt_cache_page_byte_dirty_decr( */ orig = page->modify->bytes_dirty; decr = WT_MIN(size, orig); - if (WT_ATOMIC_CAS8( - page->modify->bytes_dirty, orig, orig - decr)) { - WT_CACHE_DECR(session, cache->bytes_dirty, decr); + if (__wt_atomic_cassize( + &page->modify->bytes_dirty, orig, orig - decr)) { + __wt_cache_decr_check_uint64(session, + &cache->bytes_dirty, decr, "WT_CACHE.bytes_dirty"); break; } } @@ -149,15 +172,19 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) WT_ASSERT(session, size < WT_EXABYTE); - WT_CACHE_DECR(session, cache->bytes_inmem, size); - WT_CACHE_DECR(session, page->memory_footprint, size); + __wt_cache_decr_check_uint64( + session, &cache->bytes_inmem, size, "WT_CACHE.bytes_inmem"); + __wt_cache_decr_check_size( + session, &page->memory_footprint, size, "WT_PAGE.memory_footprint"); if (__wt_page_is_modified(page)) __wt_cache_page_byte_dirty_decr(session, page, size); /* Track internal and overflow size in cache. */ if (WT_PAGE_IS_INTERNAL(page)) - WT_CACHE_DECR(session, cache->bytes_internal, size); + __wt_cache_decr_check_uint64(session, + &cache->bytes_internal, size, "WT_CACHE.bytes_internal"); else if (page->type == WT_PAGE_OVFL) - WT_CACHE_DECR(session, cache->bytes_overflow, size); + __wt_cache_decr_check_uint64(session, + &cache->bytes_overflow, size, "WT_CACHE.bytes_overflow"); } /* @@ -172,15 +199,15 @@ __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page) size_t size; cache = S2C(session)->cache; - (void)WT_ATOMIC_ADD8(cache->pages_dirty, 1); + (void)__wt_atomic_add64(&cache->pages_dirty, 1); /* * Take care to read the memory_footprint once in case we are racing * with updates. */ size = page->memory_footprint; - (void)WT_ATOMIC_ADD8(cache->bytes_dirty, size); - (void)WT_ATOMIC_ADD8(page->modify->bytes_dirty, size); + (void)__wt_atomic_add64(&cache->bytes_dirty, size); + (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size); } /* @@ -202,7 +229,7 @@ __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page) "count went negative"); cache->pages_dirty = 0; } else - (void)WT_ATOMIC_SUB8(cache->pages_dirty, 1); + (void)__wt_atomic_sub64(&cache->pages_dirty, 1); modify = page->modify; if (modify != NULL && modify->bytes_dirty != 0) @@ -224,12 +251,15 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) modify = page->modify; /* Update the bytes in-memory to reflect the eviction. */ - WT_CACHE_DECR(session, cache->bytes_inmem, page->memory_footprint); + __wt_cache_decr_check_uint64(session, + &cache->bytes_inmem, + page->memory_footprint, "WT_CACHE.bytes_inmem"); /* Update the bytes_internal value to reflect the eviction */ if (WT_PAGE_IS_INTERNAL(page)) - WT_CACHE_DECR(session, - cache->bytes_internal, page->memory_footprint); + __wt_cache_decr_check_uint64(session, + &cache->bytes_internal, + page->memory_footprint, "WT_CACHE.bytes_internal"); /* Update the cache's dirty-byte count. */ if (modify != NULL && modify->bytes_dirty != 0) { @@ -239,13 +269,14 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) "dirty byte count went negative"); cache->bytes_dirty = 0; } else - WT_CACHE_DECR( - session, cache->bytes_dirty, modify->bytes_dirty); + __wt_cache_decr_check_uint64(session, + &cache->bytes_dirty, + modify->bytes_dirty, "WT_CACHE.bytes_dirty"); } /* Update pages and bytes evicted. */ - (void)WT_ATOMIC_ADD8(cache->bytes_evict, page->memory_footprint); - (void)WT_ATOMIC_ADD8(cache->pages_evict, 1); + (void)__wt_atomic_add64(&cache->bytes_evict, page->memory_footprint); + (void)__wt_atomic_add64(&cache->pages_evict, 1); } /* @@ -306,7 +337,7 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page) * Every time the page transitions from clean to dirty, update the cache * and transactional information. */ - if (WT_ATOMIC_ADD4(page->modify->write_gen, 1) == 1) { + if (__wt_atomic_add32(&page->modify->write_gen, 1) == 1) { __wt_cache_dirty_incr(session, page); /* @@ -1059,14 +1090,14 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) * reference without first locking the page, it could be evicted in * between. */ - locked = WT_ATOMIC_CAS4(ref->state, WT_REF_MEM, WT_REF_LOCKED); + locked = __wt_atomic_casv32(&ref->state, WT_REF_MEM, WT_REF_LOCKED); if ((ret = __wt_hazard_clear(session, page)) != 0 || !locked) { if (locked) ref->state = WT_REF_MEM; return (ret == 0 ? EBUSY : ret); } - (void)WT_ATOMIC_ADD4(btree->evict_busy, 1); + (void)__wt_atomic_addv32(&btree->evict_busy, 1); too_big = (page->memory_footprint > btree->maxmempage) ? 1 : 0; if ((ret = __wt_evict_page(session, ref)) == 0) { @@ -1083,7 +1114,7 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) } else WT_STAT_FAST_CONN_INCR(session, cache_eviction_force_fail); - (void)WT_ATOMIC_SUB4(btree->evict_busy, 1); + (void)__wt_atomic_subv32(&btree->evict_busy, 1); return (ret); } diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index b3c69c68964..64043035e76 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -180,13 +180,17 @@ struct __wt_connection_impl { WT_SPINLOCK table_lock; /* Table creation spinlock */ /* - * We distribute the btree page locks across a set of spin locks; it - * can't be an array, we impose cache-line alignment and gcc doesn't - * support that for arrays. Don't use too many: they are only held for - * very short operations, each one is 64 bytes, so 256 will fill the L1 - * cache on most CPUs. + * We distribute the btree page locks across a set of spin locks. Don't + * use too many: they are only held for very short operations, each one + * is 64 bytes, so 256 will fill the L1 cache on most CPUs. + * + * Use a prime number of buckets rather than assuming a good hash + * (Reference Sedgewick, Algorithms in C, "Hash Functions"). + * + * Note: this can't be an array, we impose cache-line alignment and gcc + * doesn't support that for arrays smaller than the alignment. */ -#define WT_PAGE_LOCKS(conn) 16 +#define WT_PAGE_LOCKS 17 WT_SPINLOCK *page_lock; /* Btree page spinlocks */ u_int page_lock_cnt; /* Next spinlock to use */ @@ -211,6 +215,8 @@ struct __wt_connection_impl { WT_FH *lock_fh; /* Lock file handle */ volatile uint64_t split_gen; /* Generation number for splits */ + uint64_t split_stashed_bytes; /* Atomic: split statistics */ + uint64_t split_stashed_objects; /* * The connection keeps a cache of data handles. The set of handles @@ -238,6 +244,7 @@ struct __wt_connection_impl { u_int open_btree_count; /* Locked: open writable btree count */ uint32_t next_file_id; /* Locked: file ID counter */ uint32_t open_file_count; /* Atomic: open file handle count */ + uint32_t open_cursor_count; /* Atomic: open cursor handle count */ /* * WiredTiger allocates space for 50 simultaneous sessions (threads of @@ -278,7 +285,12 @@ struct __wt_connection_impl { #define WT_CKPT_LOGSIZE(conn) ((conn)->ckpt_logsize != 0) wt_off_t ckpt_logsize; /* Checkpoint log size period */ uint32_t ckpt_signalled;/* Checkpoint signalled */ - uint64_t ckpt_usecs; /* Checkpoint period */ + + uint64_t ckpt_usecs; /* Checkpoint timer */ + uint64_t ckpt_time_max; /* Checkpoint time min/max */ + uint64_t ckpt_time_min; + uint64_t ckpt_time_recent; /* Checkpoint time recent/total */ + uint64_t ckpt_time_total; int compact_in_memory_pass; /* Compaction serialization */ @@ -290,7 +302,9 @@ struct __wt_connection_impl { #define WT_CONN_STAT_SIZE 0x20 /* "size" statistics configured */ uint32_t stat_flags; - WT_CONNECTION_STATS stats; /* Connection statistics */ + /* Connection statistics */ + WT_CONNECTION_STATS *stats[WT_COUNTER_SLOTS]; + WT_CONNECTION_STATS stat_array[WT_COUNTER_SLOTS]; WT_ASYNC *async; /* Async structure */ int async_cfg; /* Global async configuration */ diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index c53c5d762d5..2b3a3221004 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -303,9 +303,10 @@ struct __wt_cursor_stat { int notinitialized; /* Cursor not initialized */ int notpositioned; /* Cursor not positioned */ - WT_STATS *stats; /* Stats owned by the cursor */ - int stats_base; /* Base statistics value */ - int stats_count; /* Count of stats elements */ + int64_t *stats; /* Statistics */ + int stats_base; /* Base statistics value */ + int stats_count; /* Count of statistics values */ + const char *(*stats_desc)(int); /* Statistics descriptions */ union { /* Copies of the statistics */ WT_DSRC_STATS dsrc_stats; diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i index 143a8e87449..484af0b4a58 100644 --- a/src/third_party/wiredtiger/src/include/cursor.i +++ b/src/third_party/wiredtiger/src/include/cursor.i @@ -150,7 +150,7 @@ __wt_cursor_dhandle_incr_use(WT_SESSION_IMPL *session) dhandle = session->dhandle; /* If we open a handle with a time of death set, clear it. */ - if (WT_ATOMIC_ADD4(dhandle->session_inuse, 1) == 1 && + if (__wt_atomic_addi32(&dhandle->session_inuse, 1) == 1 && dhandle->timeofdeath != 0) dhandle->timeofdeath = 0; } @@ -168,7 +168,7 @@ __wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session) /* If we close a handle with a time of death set, clear it. */ WT_ASSERT(session, dhandle->session_inuse > 0); - if (WT_ATOMIC_SUB4(dhandle->session_inuse, 1) == 0 && + if (__wt_atomic_subi32(&dhandle->session_inuse, 1) == 0 && dhandle->timeofdeath != 0) dhandle->timeofdeath = 0; } diff --git a/src/third_party/wiredtiger/src/include/dhandle.h b/src/third_party/wiredtiger/src/include/dhandle.h index 3eb117b3341..75e3c9b4607 100644 --- a/src/third_party/wiredtiger/src/include/dhandle.h +++ b/src/third_party/wiredtiger/src/include/dhandle.h @@ -67,7 +67,9 @@ struct __wt_data_handle { */ WT_SPINLOCK close_lock; /* Lock to close the handle */ - WT_DSRC_STATS stats; /* Data-source statistics */ + /* Data-source statistics */ + WT_DSRC_STATS *stats[WT_COUNTER_SLOTS]; + WT_DSRC_STATS stat_array[WT_COUNTER_SLOTS]; /* Flags values over 0xff are reserved for WT_BTREE_* */ #define WT_DHANDLE_DEAD 0x01 /* Dead, awaiting discard */ diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index e98545c3466..a7b02ec4a75 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -159,8 +159,8 @@ extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst); extern int __wt_cache_op(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, int op); extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]); -extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *addr, const WT_PAGE_HEADER *dsk, size_t size, int empty_page_ok); -extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *addr, WT_ITEM *buf); +extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, size_t size, int empty_page_ok); +extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *buf); extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags); extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd, int is_remove); extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt); @@ -654,11 +654,19 @@ __wt_scr_alloc_func(WT_SESSION_IMPL *session, size_t size, WT_ITEM **scratchp extern void __wt_scr_discard(WT_SESSION_IMPL *session); extern void *__wt_ext_scr_alloc( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t size); extern void __wt_ext_scr_free(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *p); -extern void __wt_stat_init_dsrc_stats(WT_DSRC_STATS *stats); -extern void __wt_stat_refresh_dsrc_stats(void *stats_arg); -extern void __wt_stat_aggregate_dsrc_stats(const void *child, const void *parent); -extern void __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats); -extern void __wt_stat_refresh_connection_stats(void *stats_arg); +extern const char *__wt_stat_dsrc_desc(int slot); +extern void __wt_stat_dsrc_init_single(WT_DSRC_STATS *stats); +extern void __wt_stat_dsrc_init(WT_DATA_HANDLE *handle); +extern void __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats); +extern void __wt_stat_dsrc_clear_all(WT_DSRC_STATS **stats); +extern void __wt_stat_dsrc_aggregate_single( WT_DSRC_STATS *from, WT_DSRC_STATS *to); +extern void __wt_stat_dsrc_aggregate( WT_DSRC_STATS **from, WT_DSRC_STATS *to); +extern const char *__wt_stat_connection_desc(int slot); +extern void __wt_stat_connection_init_single(WT_CONNECTION_STATS *stats); +extern void __wt_stat_connection_init(WT_CONNECTION_IMPL *handle); +extern void __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats); +extern void __wt_stat_connection_clear_all(WT_CONNECTION_STATS **stats); +extern void __wt_stat_connection_aggregate( WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *to); extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session); extern void __wt_txn_get_snapshot(WT_SESSION_IMPL *session); extern void __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force); diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h index 675ede9a8a0..031be7e7c59 100644 --- a/src/third_party/wiredtiger/src/include/flags.h +++ b/src/third_party/wiredtiger/src/include/flags.h @@ -42,27 +42,25 @@ #define WT_READ_WONT_NEED 0x00000100 #define WT_SESSION_CAN_WAIT 0x00000001 #define WT_SESSION_CLEAR_EVICT_WALK 0x00000002 -#define WT_SESSION_DISCARD_FORCE 0x00000004 -#define WT_SESSION_INTERNAL 0x00000008 -#define WT_SESSION_LOCKED_CHECKPOINT 0x00000010 -#define WT_SESSION_LOCKED_HANDLE_LIST 0x00000020 -#define WT_SESSION_LOCKED_SCHEMA 0x00000040 -#define WT_SESSION_LOCKED_TABLE 0x00000080 -#define WT_SESSION_LOGGING_INMEM 0x00000100 -#define WT_SESSION_NO_CACHE 0x00000200 -#define WT_SESSION_NO_CACHE_CHECK 0x00000400 -#define WT_SESSION_NO_DATA_HANDLES 0x00000800 -#define WT_SESSION_NO_LOGGING 0x00001000 -#define WT_SESSION_NO_SCHEMA_LOCK 0x00002000 -#define WT_SESSION_SALVAGE_CORRUPT_OK 0x00004000 -#define WT_SESSION_SERVER_ASYNC 0x00008000 +#define WT_SESSION_INTERNAL 0x00000004 +#define WT_SESSION_LOCKED_CHECKPOINT 0x00000008 +#define WT_SESSION_LOCKED_HANDLE_LIST 0x00000010 +#define WT_SESSION_LOCKED_SCHEMA 0x00000020 +#define WT_SESSION_LOCKED_TABLE 0x00000040 +#define WT_SESSION_LOGGING_INMEM 0x00000080 +#define WT_SESSION_NO_CACHE 0x00000100 +#define WT_SESSION_NO_CACHE_CHECK 0x00000200 +#define WT_SESSION_NO_DATA_HANDLES 0x00000400 +#define WT_SESSION_NO_LOGGING 0x00000800 +#define WT_SESSION_NO_SCHEMA_LOCK 0x00001000 +#define WT_SESSION_QUIET_CORRUPT_FILE 0x00002000 +#define WT_SESSION_SERVER_ASYNC 0x00004000 #define WT_SKIP_UPDATE_ERR 0x00000002 #define WT_SKIP_UPDATE_RESTORE 0x00000004 #define WT_SYNC_CHECKPOINT 0x00000001 #define WT_SYNC_CLOSE 0x00000002 #define WT_SYNC_DISCARD 0x00000004 -#define WT_SYNC_DISCARD_FORCE 0x00000008 -#define WT_SYNC_WRITE_LEAVES 0x00000010 +#define WT_SYNC_WRITE_LEAVES 0x00000008 #define WT_TXN_LOG_CKPT_CLEANUP 0x00000001 #define WT_TXN_LOG_CKPT_PREPARE 0x00000002 #define WT_TXN_LOG_CKPT_START 0x00000004 diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h index 1c61768d372..3472985745e 100644 --- a/src/third_party/wiredtiger/src/include/gcc.h +++ b/src/third_party/wiredtiger/src/include/gcc.h @@ -85,56 +85,71 @@ * In summary, locking > barriers > volatile. * * To avoid locking shared data structures such as statistics and to permit - * atomic state changes, we rely on the WT_ATOMIC_ADD and WT_ATOMIC_CAS - * (compare and swap) operations. + * atomic state changes, we rely on the atomic-add and atomic-cas (compare and + * swap) operations. */ -#define __WT_ATOMIC_ADD(v, val, n) \ - (WT_STATIC_ASSERT(sizeof(v) == (n)), __sync_add_and_fetch(&(v), val)) -#define __WT_ATOMIC_FETCH_ADD(v, val, n) \ - (WT_STATIC_ASSERT(sizeof(v) == (n)), __sync_fetch_and_add(&(v), val)) + #ifdef __clang__ /* - * We avoid __sync_bool_compare_and_swap with due to problems with - * optimization with some versions of clang. See - * http://llvm.org/bugs/show_bug.cgi?id=21499 for details. + * We avoid __sync_bool_compare_and_swap with due to problems with optimization + * with some versions of clang. See http://llvm.org/bugs/show_bug.cgi?id=21499 + * for details. */ -#define __WT_ATOMIC_CAS(v, old, new, n) \ - (WT_STATIC_ASSERT(sizeof(v) == (n)), \ - __sync_val_compare_and_swap(&(v), old, new) == (old)) +#define WT_ATOMIC_CAS(ptr, oldval, newval) \ + (__sync_val_compare_and_swap(ptr, oldval, newval) == oldval) #else -#define __WT_ATOMIC_CAS(v, old, new, n) \ - (WT_STATIC_ASSERT(sizeof(v) == (n)), \ - __sync_bool_compare_and_swap(&(v), old, new)) +#define WT_ATOMIC_CAS(ptr, oldval, newval) \ + __sync_bool_compare_and_swap(ptr, oldval, newval) #endif -#define __WT_ATOMIC_STORE(v, val, n) \ - (WT_STATIC_ASSERT(sizeof(v) == (n)), \ - __sync_lock_test_and_set(&(v), val)) -#define __WT_ATOMIC_SUB(v, val, n) \ - (WT_STATIC_ASSERT(sizeof(v) == (n)), __sync_sub_and_fetch(&(v), val)) - -#define WT_ATOMIC_ADD1(v, val) __WT_ATOMIC_ADD(v, val, 1) -#define WT_ATOMIC_FETCH_ADD1(v, val) __WT_ATOMIC_FETCH_ADD(v, val, 1) -#define WT_ATOMIC_CAS1(v, old, new) __WT_ATOMIC_CAS(v, old, new, 1) -#define WT_ATOMIC_STORE1(v, val) __WT_ATOMIC_STORE(v, val, 1) -#define WT_ATOMIC_SUB1(v, val) __WT_ATOMIC_SUB(v, val, 1) - -#define WT_ATOMIC_ADD2(v, val) __WT_ATOMIC_ADD(v, val, 2) -#define WT_ATOMIC_FETCH_ADD2(v, val) __WT_ATOMIC_FETCH_ADD(v, val, 2) -#define WT_ATOMIC_CAS2(v, old, new) __WT_ATOMIC_CAS(v, old, new, 2) -#define WT_ATOMIC_STORE2(v, val) __WT_ATOMIC_STORE(v, val, 2) -#define WT_ATOMIC_SUB2(v, val) __WT_ATOMIC_SUB(v, val, 2) - -#define WT_ATOMIC_ADD4(v, val) __WT_ATOMIC_ADD(v, val, 4) -#define WT_ATOMIC_FETCH_ADD4(v, val) __WT_ATOMIC_FETCH_ADD(v, val, 4) -#define WT_ATOMIC_CAS4(v, old, new) __WT_ATOMIC_CAS(v, old, new, 4) -#define WT_ATOMIC_STORE4(v, val) __WT_ATOMIC_STORE(v, val, 4) -#define WT_ATOMIC_SUB4(v, val) __WT_ATOMIC_SUB(v, val, 4) - -#define WT_ATOMIC_ADD8(v, val) __WT_ATOMIC_ADD(v, val, 8) -#define WT_ATOMIC_FETCH_ADD8(v, val) __WT_ATOMIC_FETCH_ADD(v, val, 8) -#define WT_ATOMIC_CAS8(v, old, new) __WT_ATOMIC_CAS(v, old, new, 8) -#define WT_ATOMIC_STORE8(v, val) __WT_ATOMIC_STORE(v, val, 8) -#define WT_ATOMIC_SUB8(v, val) __WT_ATOMIC_SUB(v, val, 8) + +#define WT_ATOMIC_FUNC(name, ret, type) \ +static inline ret \ +__wt_atomic_add##name(type *vp, type v) \ +{ \ + return (__sync_add_and_fetch(vp, v)); \ +} \ +static inline ret \ +__wt_atomic_fetch_add##name(type *vp, type v) \ +{ \ + return (__sync_fetch_and_add(vp, v)); \ +} \ +static inline ret \ +__wt_atomic_store##name(type *vp, type v) \ +{ \ + return (__sync_lock_test_and_set(vp, v)); \ +} \ +static inline ret \ +__wt_atomic_sub##name(type *vp, type v) \ +{ \ + return (__sync_sub_and_fetch(vp, v)); \ +} \ +static inline int \ +__wt_atomic_cas##name(type *vp, type old, type new) \ +{ \ + return (WT_ATOMIC_CAS(vp, old, new)); \ +} + +WT_ATOMIC_FUNC(8, uint8_t, uint8_t) +WT_ATOMIC_FUNC(16, uint16_t, uint16_t) +WT_ATOMIC_FUNC(32, uint32_t, uint32_t) +WT_ATOMIC_FUNC(v32, uint32_t, volatile uint32_t) +WT_ATOMIC_FUNC(i32, int32_t, int32_t) +WT_ATOMIC_FUNC(iv32, int32_t, volatile int32_t) +WT_ATOMIC_FUNC(64, uint64_t, uint64_t) +WT_ATOMIC_FUNC(v64, uint64_t, volatile uint64_t) +WT_ATOMIC_FUNC(i64, int64_t, int64_t) +WT_ATOMIC_FUNC(iv64, int64_t, volatile int64_t) +WT_ATOMIC_FUNC(size, size_t, size_t) + +/* + * __wt_atomic_cas_ptr -- + * Pointer compare and swap. + */ +static inline int +__wt_atomic_cas_ptr(void *vp, void *old, void *new) +{ + return (WT_ATOMIC_CAS((void **)vp, old, new)); +} /* Compile read-write barrier */ #define WT_BARRIER() __asm__ volatile("" ::: "memory") diff --git a/src/third_party/wiredtiger/src/include/hardware.h b/src/third_party/wiredtiger/src/include/hardware.h index e3c098826d0..c9b72f8a609 100644 --- a/src/third_party/wiredtiger/src/include/hardware.h +++ b/src/third_party/wiredtiger/src/include/hardware.h @@ -33,8 +33,8 @@ uint8_t __orig; \ do { \ __orig = (p)->flags_atomic; \ - } while (!WT_ATOMIC_CAS1((p)->flags_atomic, \ - __orig, __orig | (uint8_t)(mask))); \ + } while (!__wt_atomic_cas8( \ + &(p)->flags_atomic, __orig, __orig | (uint8_t)(mask))); \ } while (0) #define F_CAS_ATOMIC(p, mask, ret) do { \ @@ -46,16 +46,20 @@ ret = EBUSY; \ break; \ } \ - } while (!WT_ATOMIC_CAS1((p)->flags_atomic, \ - __orig, __orig | (uint8_t)(mask))); \ + } while (!__wt_atomic_cas8( \ + &(p)->flags_atomic, __orig, __orig | (uint8_t)(mask))); \ } while (0) #define F_CLR_ATOMIC(p, mask) do { \ uint8_t __orig; \ do { \ __orig = (p)->flags_atomic; \ - } while (!WT_ATOMIC_CAS1((p)->flags_atomic, \ - __orig, __orig & ~(uint8_t)(mask))); \ + } while (!__wt_atomic_cas8( \ + &(p)->flags_atomic, __orig, __orig & ~(uint8_t)(mask))); \ } while (0) #define WT_CACHE_LINE_ALIGNMENT 64 /* Cache line alignment */ +#define WT_CACHE_LINE_ALIGNMENT_VERIFY(session, a) \ + WT_ASSERT(session, \ + WT_PTRDIFF(&(a)[1], &(a)[0]) >= WT_CACHE_LINE_ALIGNMENT && \ + WT_PTRDIFF(&(a)[1], &(a)[0]) % WT_CACHE_LINE_ALIGNMENT == 0) diff --git a/src/third_party/wiredtiger/src/include/lint.h b/src/third_party/wiredtiger/src/include/lint.h index 964aa5c118f..eba4a1c3b3f 100644 --- a/src/third_party/wiredtiger/src/include/lint.h +++ b/src/third_party/wiredtiger/src/include/lint.h @@ -18,40 +18,71 @@ #define WT_GCC_FUNC_ATTRIBUTE(x) #define WT_GCC_FUNC_DECL_ATTRIBUTE(x) -#define __WT_ATOMIC_ADD(v, val) \ - ((v) += (val)) -#define __WT_ATOMIC_FETCH_ADD(v, val) \ - ((v) += (val), (v)) -#define __WT_ATOMIC_CAS(v, old, new) \ - ((v) = ((v) == (old) ? (new) : (old)), (v) == (old)) -#define __WT_ATOMIC_STORE(v, val) \ - ((v) = (val)) -#define __WT_ATOMIC_SUB(v, val) \ - ((v) -= (val), (v)) - -#define WT_ATOMIC_ADD1(v, val) __WT_ATOMIC_ADD(v, val) -#define WT_ATOMIC_FETCH_ADD1(v, val) __WT_ATOMIC_FETCH_ADD(v, val) -#define WT_ATOMIC_CAS1(v, old, new) __WT_ATOMIC_CAS(v, old, new) -#define WT_ATOMIC_STORE1(v, val) __WT_ATOMIC_STORE(v, val) -#define WT_ATOMIC_SUB1(v, val) __WT_ATOMIC_SUB(v, val) - -#define WT_ATOMIC_ADD2(v, val) __WT_ATOMIC_ADD(v, val) -#define WT_ATOMIC_FETCH_ADD2(v, val) __WT_ATOMIC_FETCH_ADD(v, val) -#define WT_ATOMIC_CAS2(v, old, new) __WT_ATOMIC_CAS(v, old, new) -#define WT_ATOMIC_STORE2(v, val) __WT_ATOMIC_STORE(v, val) -#define WT_ATOMIC_SUB2(v, val) __WT_ATOMIC_SUB(v, val) - -#define WT_ATOMIC_ADD4(v, val) __WT_ATOMIC_ADD(v, val) -#define WT_ATOMIC_FETCH_ADD4(v, val) __WT_ATOMIC_FETCH_ADD(v, val) -#define WT_ATOMIC_CAS4(v, old, new) __WT_ATOMIC_CAS(v, old, new) -#define WT_ATOMIC_STORE4(v, val) __WT_ATOMIC_STORE(v, val) -#define WT_ATOMIC_SUB4(v, val) __WT_ATOMIC_SUB(v, val) - -#define WT_ATOMIC_ADD8(v, val) __WT_ATOMIC_ADD(v, val) -#define WT_ATOMIC_FETCH_ADD8(v, val) __WT_ATOMIC_FETCH_ADD(v, val) -#define WT_ATOMIC_CAS8(v, old, new) __WT_ATOMIC_CAS(v, old, new) -#define WT_ATOMIC_STORE8(v, val) __WT_ATOMIC_STORE(v, val) -#define WT_ATOMIC_SUB8(v, val) __WT_ATOMIC_SUB(v, val) +#define WT_ATOMIC_FUNC(name, ret, type) \ +static inline ret \ +__wt_atomic_add##name(type *vp, type v) \ +{ \ + *vp += v; \ + return (*vp); \ +} \ +static inline ret \ +__wt_atomic_fetch_add##name(type *vp, type v) \ +{ \ + type orig; \ + \ + old = *vp; \ + *vp += v; \ + return (old); \ +} \ +static inline ret \ +__wt_atomic_store##name(type *vp, type v) \ +{ \ + type orig; \ + \ + orig = *vp; \ + *vp = v; \ + return (old); \ +} \ +static inline ret \ +__wt_atomic_sub##name(type *vp, type v) \ +{ \ + *vp -= v; \ + return (*vp); \ +} \ +static inline int \ +__wt_atomic_cas##name(type *vp, type old, type new) \ +{ \ + if (*vp == old) { \ + *vp = new; \ + return (1); \ + } \ + return (0); \ +} + +WT_ATOMIC_FUNC(8, uint8_t, uint8_t) +WT_ATOMIC_FUNC(16, uint16_t, uint16_t) +WT_ATOMIC_FUNC(32, uint32_t, uint32_t) +WT_ATOMIC_FUNC(v32, uint32_t, volatile uint32_t) +WT_ATOMIC_FUNC(i32, int32_t, int32_t) +WT_ATOMIC_FUNC(iv32, int32_t, volatile int32_t) +WT_ATOMIC_FUNC(64, uint64_t, uint64_t) +WT_ATOMIC_FUNC(v64, uint64_t, volatile uint64_t) +WT_ATOMIC_FUNC(i64, int64_t, int64_t) +WT_ATOMIC_FUNC(iv64, int64_t, volatile int64_t) +WT_ATOMIC_FUNC(size, size_t, size_t) + +/* + * __wt_atomic_cas_ptr -- + * Pointer compare and swap. + */ +static inline int +__wt_atomic_cas_ptr(void *vp, void *old, void *new) { + if (*(void **)vp == old) { + *(void **)vp = new; + return (1); + } + return (0); +} static inline void WT_BARRIER(void) { return; } static inline void WT_FULL_BARRIER(void) { return; } diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h index fbb0a3e3842..949eb09ca30 100644 --- a/src/third_party/wiredtiger/src/include/log.h +++ b/src/third_party/wiredtiger/src/include/log.h @@ -77,7 +77,7 @@ #define WT_LOG_SLOT_PENDING 2 #define WT_LOG_SLOT_WRITTEN 3 #define WT_LOG_SLOT_READY 4 -typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct { +struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_logslot { volatile int64_t slot_state; /* Slot state */ uint64_t slot_group_size; /* Group size */ int32_t slot_error; /* Error value */ @@ -96,19 +96,18 @@ typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct { #define WT_SLOT_SYNC 0x04 /* Needs sync on release */ #define WT_SLOT_SYNC_DIR 0x08 /* Directory sync on release */ uint32_t flags; /* Flags */ -} WT_LOGSLOT; +}; #define WT_SLOT_INIT_FLAGS (WT_SLOT_BUFFERED) -typedef struct { +struct __wt_myslot { WT_LOGSLOT *slot; wt_off_t offset; -} WT_MYSLOT; - +}; /* Offset of first record */ #define WT_LOG_FIRST_RECORD log->allocsize -typedef struct { +struct __wt_log { uint32_t allocsize; /* Allocation alignment size */ wt_off_t log_written; /* Amount of log written this period */ /* @@ -155,18 +154,21 @@ typedef struct { * Our testing shows that the more consolidation we generate the * better the performance we see which equates to an active slot * slot count of one. + * + * Note: this can't be an array, we impose cache-line alignment and + * gcc doesn't support that for arrays. */ #define WT_SLOT_ACTIVE 1 #define WT_SLOT_POOL 128 - WT_LOGSLOT *slot_array[WT_SLOT_ACTIVE]; /* Active slots */ - WT_LOGSLOT slot_pool[WT_SLOT_POOL]; /* Pool of all slots */ - size_t slot_buf_size; /* Buffer size for slots */ + WT_LOGSLOT *slot_array[WT_SLOT_ACTIVE]; /* Active slots */ + WT_LOGSLOT slot_pool[WT_SLOT_POOL]; /* Pool of all slots */ + size_t slot_buf_size; /* Buffer size for slots */ #define WT_LOG_FORCE_CONSOLIDATE 0x01 /* Disable direct writes */ uint32_t flags; -} WT_LOG; +}; -typedef struct { +struct __wt_log_record { uint32_t len; /* 00-03: Record length including hdr */ uint32_t checksum; /* 04-07: Checksum of the record */ @@ -176,7 +178,7 @@ typedef struct { uint8_t unused[2]; /* 10-11: Padding */ uint32_t mem_len; /* 12-15: Uncompressed len if needed */ uint8_t record[0]; /* Beginning of actual data */ -} WT_LOG_RECORD; +}; /* * WT_LOG_DESC -- diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h index 08e57794fb8..11cf8204aec 100644 --- a/src/third_party/wiredtiger/src/include/lsm.h +++ b/src/third_party/wiredtiger/src/include/lsm.h @@ -74,7 +74,7 @@ struct __wt_cursor_lsm { * WT_LSM_CHUNK -- * A single chunk (file) in an LSM tree. */ -struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_lsm_chunk { +struct __wt_lsm_chunk { const char *uri; /* Data source for this chunk */ const char *bloom_uri; /* URI of Bloom filter, if any */ struct timespec create_ts; /* Creation time (for rate limiting) */ @@ -177,16 +177,14 @@ struct __wt_lsm_tree { const char *collator_name; int collator_owned; - int refcnt; /* Number of users of the tree */ - int8_t exclusive; /* Tree is locked exclusively */ + uint32_t refcnt; /* Number of users of the tree */ + uint8_t exclusive; /* Tree is locked exclusively */ #define LSM_TREE_MAX_QUEUE 100 - int queue_ref; + uint32_t queue_ref; WT_RWLOCK *rwlock; TAILQ_ENTRY(__wt_lsm_tree) q; - WT_DSRC_STATS stats; /* LSM-level statistics */ - uint64_t dsk_gen; uint64_t ckpt_throttle; /* Rate limiting due to checkpoints */ @@ -221,9 +219,28 @@ struct __wt_lsm_tree { WT_LSM_CHUNK **old_chunks; /* Array of old LSM chunks */ size_t old_alloc; /* Space allocated for old chunks */ u_int nold_chunks; /* Number of old chunks */ - int freeing_old_chunks; /* Whether chunks are being freed */ + uint32_t freeing_old_chunks; /* Whether chunks are being freed */ uint32_t merge_aggressiveness; /* Increase amount of work per merge */ + /* + * We maintain a set of statistics outside of the normal statistics + * area, copying them into place when a statistics cursor is created. + */ +#define WT_LSM_TREE_STAT_INCR(session, fld) do { \ + if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \ + ++(fld); \ +} while (0) +#define WT_LSM_TREE_STAT_INCRV(session, fld, v) do { \ + if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \ + (fld) += (int64_t)(v); \ +} while (0) + int64_t bloom_false_positive; + int64_t bloom_hit; + int64_t bloom_miss; + int64_t lsm_checkpoint_throttle; + int64_t lsm_lookup_no_bloom; + int64_t lsm_merge_throttle; + #define WT_LSM_TREE_ACTIVE 0x01 /* Workers are active */ #define WT_LSM_TREE_AGGRESSIVE_TIMER 0x02 /* Timer for merge aggression */ #define WT_LSM_TREE_COMPACTING 0x04 /* Tree being compacted */ diff --git a/src/third_party/wiredtiger/src/include/msvc.h b/src/third_party/wiredtiger/src/include/msvc.h index bc72ddf8193..f4d8ba52fc1 100644 --- a/src/third_party/wiredtiger/src/include/msvc.h +++ b/src/third_party/wiredtiger/src/include/msvc.h @@ -31,52 +31,56 @@ #define WT_GCC_FUNC_ATTRIBUTE(x) #define WT_GCC_FUNC_DECL_ATTRIBUTE(x) -#define __WT_ATOMIC_ADD(v, val, n, s, t) \ - (WT_STATIC_ASSERT(sizeof(v) == (n)), \ - _InterlockedExchangeAdd ## s((t*)&(v), (t)(val)) + (val)) -#define __WT_ATOMIC_FETCH_ADD(v, val, n, s, t) \ - (WT_STATIC_ASSERT(sizeof(v) == (n)), \ - _InterlockedExchangeAdd ## s((t*)&(v), (t)(val))) -#define __WT_ATOMIC_CAS(v, old, new, n, s, t) \ - (WT_STATIC_ASSERT(sizeof(v) == (n)), \ - _InterlockedCompareExchange ## s \ - ((t*)&(v), (t)(new), (t)(old)) == (t)(old)) -#define __WT_ATOMIC_STORE(v, val, n, s, t) \ - (WT_STATIC_ASSERT(sizeof(v) == (n)), \ - _InterlockedExchange ## s((t*)&(v), (t)(val))) -#define __WT_ATOMIC_SUB(v, val, n, s, t) \ - (WT_STATIC_ASSERT(sizeof(v) == (n)), \ - _InterlockedExchangeAdd ## s((t*)&(v), -(t) val) - (val)) +#define WT_ATOMIC_FUNC(name, ret, type, s, t) \ +static inline ret \ +__wt_atomic_add##name(type *vp, type v) \ +{ \ + return (_InterlockedExchangeAdd ## s((t *)(vp), (t)(v)) + (v)); \ +} \ +static inline ret \ +__wt_atomic_fetch_add##name(type *vp, type v) \ +{ \ + return (_InterlockedExchangeAdd ## s((t *)(vp), (t)(v))); \ +} \ +static inline ret \ +__wt_atomic_store##name(type *vp, type v) \ +{ \ + return (_InterlockedExchange ## s((t *)(vp), (t)(v))); \ +} \ +static inline ret \ +__wt_atomic_sub##name(type *vp, type v) \ +{ \ + return (_InterlockedExchangeAdd ## s((t *)(vp), - (t)v) - (v)); \ +} \ +static inline int \ +__wt_atomic_cas##name(type *vp, type old, type new) \ +{ \ + return (_InterlockedCompareExchange ## s \ + ((t *)(vp), (t)(new), (t)(old)) == (t)(old)); \ +} -#define WT_ATOMIC_ADD1(v, val) __WT_ATOMIC_ADD(v, val, 1, 8, char) -#define WT_ATOMIC_FETCH_ADD1(v, val) \ - __WT_ATOMIC_FETCH_ADD(v, val, 1, 8, char) -#define WT_ATOMIC_CAS1(v, old, new) __WT_ATOMIC_CAS(v, old, new, 1, 8, char) -#define WT_ATOMIC_STORE1(v, val) __WT_ATOMIC_STORE(v, val, 1, 8, char) -#define WT_ATOMIC_SUB1(v, val) __WT_ATOMIC_SUB(v, val, 1, 8, char) +WT_ATOMIC_FUNC(8, uint8_t, uint8_t, 8, char) +WT_ATOMIC_FUNC(16, uint16_t, uint16_t, 16, short) +WT_ATOMIC_FUNC(32, uint32_t, uint32_t, , long) +WT_ATOMIC_FUNC(v32, uint32_t, volatile uint32_t, , long) +WT_ATOMIC_FUNC(i32, int32_t, int32_t, , long) +WT_ATOMIC_FUNC(iv32, int32_t, volatile int32_t, , long) +WT_ATOMIC_FUNC(64, uint64_t, uint64_t, 64, __int64) +WT_ATOMIC_FUNC(v64, uint64_t, volatile uint64_t, 64, __int64) +WT_ATOMIC_FUNC(i64, int64_t, int64_t, 64, __int64) +WT_ATOMIC_FUNC(iv64, int64_t, volatile int64_t, 64, __int64) +WT_ATOMIC_FUNC(size, size_t, size_t, 64, __int64) -#define WT_ATOMIC_ADD2(v, val) __WT_ATOMIC_ADD(v, val, 2, 16, short) -#define WT_ATOMIC_FETCH_ADD2(v, val) \ - __WT_ATOMIC_FETCH_ADD(v, val, 2, 16, short) -#define WT_ATOMIC_CAS2(v, old, new) \ - __WT_ATOMIC_CAS(v, old, new, 2, 16, short) -#define WT_ATOMIC_STORE2(v, val) __WT_ATOMIC_STORE(v, val, 2, 16, short) -#define WT_ATOMIC_SUB2(v, val) __WT_ATOMIC_SUB(v, val, 2, 16, short) - -#define WT_ATOMIC_ADD4(v, val) __WT_ATOMIC_ADD(v, val, 4, , long) -#define WT_ATOMIC_FETCH_ADD4(v, val) __WT_ATOMIC_FETCH_ADD(v, val, 4, , long) -#define WT_ATOMIC_CAS4(v, old, new) __WT_ATOMIC_CAS(v, old, new, 4, , long) -#define WT_ATOMIC_STORE4(v, val) __WT_ATOMIC_STORE(v, val, 4, , long) -#define WT_ATOMIC_SUB4(v, val) __WT_ATOMIC_SUB(v, val, 4, , long) - -#define WT_ATOMIC_ADD8(v, val) __WT_ATOMIC_ADD(v, val, 8, 64, __int64) -#define WT_ATOMIC_FETCH_ADD8(v, val) \ - __WT_ATOMIC_FETCH_ADD(v, val, 8, 64, __int64) -#define WT_ATOMIC_CAS8(v, old, new) \ - __WT_ATOMIC_CAS(v, old, new, 8, 64, __int64) -#define WT_ATOMIC_STORE8(v, val) \ - __WT_ATOMIC_STORE(v, val, 8, 64, __int64) -#define WT_ATOMIC_SUB8(v, val) __WT_ATOMIC_SUB(v, val, 8, 64, __int64) +/* + * __wt_atomic_cas_ptr -- + * Pointer compare and swap. + */ +static inline int +__wt_atomic_cas_ptr(void *vp, void *old, void *new) +{ + return (_InterlockedCompareExchange64( + vp, (int64_t)new, (int64_t)old) == ((int64_t)old)); +} static inline void WT_BARRIER(void) { _ReadWriteBarrier(); } static inline void WT_FULL_BARRIER(void) { _mm_mfence(); } diff --git a/src/third_party/wiredtiger/src/include/mutex.h b/src/third_party/wiredtiger/src/include/mutex.h index 7d901a38d0d..1f1bb8f4b5c 100644 --- a/src/third_party/wiredtiger/src/include/mutex.h +++ b/src/third_party/wiredtiger/src/include/mutex.h @@ -65,20 +65,21 @@ struct __wt_rwlock { #if SPINLOCK_TYPE == SPINLOCK_GCC -typedef volatile int WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) - WT_SPINLOCK; +struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_spinlock { + volatile int lock; +}; #elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE ||\ SPINLOCK_TYPE == SPINLOCK_MSVC -typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct { +struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_spinlock { wt_mutex_t lock; const char *name; /* Statistics: mutex name */ int8_t initialized; /* Lock initialized, for cleanup */ -} WT_SPINLOCK; +}; #else diff --git a/src/third_party/wiredtiger/src/include/mutex.i b/src/third_party/wiredtiger/src/include/mutex.i index 8bca50635e6..5ea4583a2ab 100644 --- a/src/third_party/wiredtiger/src/include/mutex.i +++ b/src/third_party/wiredtiger/src/include/mutex.i @@ -31,7 +31,7 @@ __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) WT_UNUSED(session); WT_UNUSED(name); - *(t) = 0; + t->lock = 0; return (0); } @@ -44,7 +44,7 @@ __wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { WT_UNUSED(session); - *(t) = 0; + t->lock = 0; } /* @@ -56,7 +56,7 @@ __wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { WT_UNUSED(session); - return (__sync_lock_test_and_set(t, 1) == 0 ? 0 : EBUSY); + return (__sync_lock_test_and_set(&t->lock, 1) == 0 ? 0 : EBUSY); } /* @@ -70,10 +70,10 @@ __wt_spin_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) WT_UNUSED(session); - while (__sync_lock_test_and_set(t, 1)) { - for (i = 0; *t && i < WT_SPIN_COUNT; i++) + while (__sync_lock_test_and_set(&t->lock, 1)) { + for (i = 0; t->lock && i < WT_SPIN_COUNT; i++) WT_PAUSE(); - if (*t) + if (t->lock) __wt_yield(); } } @@ -87,7 +87,7 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { WT_UNUSED(session); - __sync_lock_release(t); + __sync_lock_release(&t->lock); } #elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ diff --git a/src/third_party/wiredtiger/src/include/queue.h b/src/third_party/wiredtiger/src/include/queue.h index 42e736e7b09..1d494875cf6 100644 --- a/src/third_party/wiredtiger/src/include/queue.h +++ b/src/third_party/wiredtiger/src/include/queue.h @@ -38,71 +38,17 @@ extern "C" { #endif /* - * This file defines four types of data structures: singly-linked lists, - * singly-linked tail queues, lists and tail queues. + * WiredTiger only uses the TAILQ macros (we've gotten into trouble in the past + * by trying to use simpler queues and subsequently discovering a list we didn't + * think would ever get to be large could, under some workloads, become large, + * and the linear performance for removal of elements from simpler macros proved + * to be more trouble than the memory savings were worth. * - * A singly-linked list is headed by a single forward pointer. The elements - * are singly linked for minimum space and pointer manipulation overhead at - * the expense of O(n) removal for arbitrary elements. New elements can be - * added to the list after an existing element or at the head of the list. - * Elements being removed from the head of the list should use the explicit - * macro for this purpose for optimum efficiency. A singly-linked list may - * only be traversed in the forward direction. Singly-linked lists are ideal - * for applications with large datasets and few or no removals or for - * implementing a LIFO queue. + * Additionally, we've altered the TAILQ_INSERT_XXX functions to include a write + * barrier, in order to ensure we never insert a partially built structure onto + * a list (this is required because the spinlocks we use don't necessarily imply + * a write barrier). * - * A singly-linked tail queue is headed by a pair of pointers, one to the - * head of the list and the other to the tail of the list. The elements are - * singly linked for minimum space and pointer manipulation overhead at the - * expense of O(n) removal for arbitrary elements. New elements can be added - * to the list after an existing element, at the head of the list, or at the - * end of the list. Elements being removed from the head of the tail queue - * should use the explicit macro for this purpose for optimum efficiency. - * A singly-linked tail queue may only be traversed in the forward direction. - * Singly-linked tail queues are ideal for applications with large datasets - * and few or no removals or for implementing a FIFO queue. - * - * A list is headed by a single forward pointer (or an array of forward - * pointers for a hash table header). The elements are doubly linked - * so that an arbitrary element can be removed without a need to - * traverse the list. New elements can be added to the list before - * or after an existing element or at the head of the list. A list - * may only be traversed in the forward direction. - * - * A tail queue is headed by a pair of pointers, one to the head of the - * list and the other to the tail of the list. The elements are doubly - * linked so that an arbitrary element can be removed without a need to - * traverse the list. New elements can be added to the list before or - * after an existing element, at the head of the list, or at the end of - * the list. A tail queue may be traversed in either direction. - * - * For details on the use of these macros, see the queue(3) manual page. - * - * - * SLIST LIST STAILQ TAILQ - * _HEAD + + + + - * _HEAD_INITIALIZER + + + + - * _ENTRY + + + + - * _INIT + + + + - * _EMPTY + + + + - * _FIRST + + + + - * _NEXT + + + + - * _PREV - - - + - * _LAST - - + + - * _FOREACH + + + + - * _FOREACH_REVERSE - - - + - * _INSERT_HEAD + + + + - * _INSERT_BEFORE - + - + - * _INSERT_AFTER + + + + - * _INSERT_TAIL - - + + - * _CONCAT - - + + - * _REMOVE_HEAD + - + - - * _REMOVE + + + + - * - */ - -/* - * XXX * We #undef all of the macros because there are incompatible versions of this * file and these macros on various systems. What makes the problem worse is * they are included and/or defined by system include files which we may have @@ -111,50 +57,7 @@ extern "C" { * several of the LIST_XXX macros. Visual C.NET 7.0 also defines some of these * same macros in Vc7\PlatformSDK\Include\WinNT.h. Make sure we use ours. */ -#undef LIST_EMPTY -#undef LIST_ENTRY -#undef LIST_FIRST -#undef LIST_FOREACH -#undef LIST_HEAD -#undef LIST_HEAD_INITIALIZER -#undef LIST_INIT -#undef LIST_INSERT_AFTER -#undef LIST_INSERT_BEFORE -#undef LIST_INSERT_HEAD -#undef LIST_NEXT -#undef LIST_REMOVE -#undef QMD_TRACE_ELEM -#undef QMD_TRACE_HEAD -#undef QUEUE_MACRO_DEBUG -#undef SLIST_EMPTY -#undef SLIST_ENTRY -#undef SLIST_FIRST -#undef SLIST_FOREACH -#undef SLIST_FOREACH_PREVPTR -#undef SLIST_HEAD -#undef SLIST_HEAD_INITIALIZER -#undef SLIST_INIT -#undef SLIST_INSERT_AFTER -#undef SLIST_INSERT_HEAD -#undef SLIST_NEXT -#undef SLIST_REMOVE -#undef SLIST_REMOVE_HEAD -#undef STAILQ_CONCAT -#undef STAILQ_EMPTY -#undef STAILQ_ENTRY -#undef STAILQ_FIRST -#undef STAILQ_FOREACH -#undef STAILQ_HEAD -#undef STAILQ_HEAD_INITIALIZER -#undef STAILQ_INIT -#undef STAILQ_INSERT_AFTER -#undef STAILQ_INSERT_HEAD -#undef STAILQ_INSERT_TAIL -#undef STAILQ_LAST -#undef STAILQ_NEXT -#undef STAILQ_REMOVE -#undef STAILQ_REMOVE_HEAD -#undef STAILQ_REMOVE_HEAD_UNTIL + #undef TAILQ_CONCAT #undef TAILQ_EMPTY #undef TAILQ_ENTRY @@ -210,230 +113,6 @@ struct qm_trace { #endif /* QUEUE_MACRO_DEBUG */ /* - * Singly-linked List declarations. - */ -#define SLIST_HEAD(name, type) \ -struct name { \ - struct type *slh_first; /* first element */ \ -} - -#define SLIST_HEAD_INITIALIZER(head) \ - { NULL } - -#define SLIST_ENTRY(type) \ -struct { \ - struct type *sle_next; /* next element */ \ -} - -/* - * Singly-linked List functions. - */ -#define SLIST_EMPTY(head) ((head)->slh_first == NULL) - -#define SLIST_FIRST(head) ((head)->slh_first) - -#define SLIST_FOREACH(var, head, field) \ - for ((var) = SLIST_FIRST((head)); \ - (var); \ - (var) = SLIST_NEXT((var), field)) - -#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \ - for ((varp) = &SLIST_FIRST((head)); \ - ((var) = *(varp)) != NULL; \ - (varp) = &SLIST_NEXT((var), field)) - -#define SLIST_INIT(head) do { \ - SLIST_FIRST((head)) = NULL; \ -} while (0) - -#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \ - SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \ - SLIST_NEXT((slistelm), field) = (elm); \ -} while (0) - -#define SLIST_INSERT_HEAD(head, elm, field) do { \ - SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \ - SLIST_FIRST((head)) = (elm); \ -} while (0) - -#define SLIST_NEXT(elm, field) ((elm)->field.sle_next) - -#define SLIST_REMOVE(head, elm, type, field) do { \ - if (SLIST_FIRST((head)) == (elm)) { \ - SLIST_REMOVE_HEAD((head), field); \ - } \ - else { \ - struct type *curelm = SLIST_FIRST((head)); \ - while (SLIST_NEXT(curelm, field) != (elm)) \ - curelm = SLIST_NEXT(curelm, field); \ - SLIST_NEXT(curelm, field) = \ - SLIST_NEXT(SLIST_NEXT(curelm, field), field); \ - } \ -} while (0) - -#define SLIST_REMOVE_HEAD(head, field) do { \ - SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \ -} while (0) - -/* - * Singly-linked Tail queue declarations. - */ -#define STAILQ_HEAD(name, type) \ -struct name { \ - struct type *stqh_first;/* first element */ \ - struct type **stqh_last;/* addr of last next element */ \ -} - -#define STAILQ_HEAD_INITIALIZER(head) \ - { NULL, &(head).stqh_first } - -#define STAILQ_ENTRY(type) \ -struct { \ - struct type *stqe_next; /* next element */ \ -} - -/* - * Singly-linked Tail queue functions. - */ -#define STAILQ_CONCAT(head1, head2) do { \ - if (!STAILQ_EMPTY((head2))) { \ - *(head1)->stqh_last = (head2)->stqh_first; \ - (head1)->stqh_last = (head2)->stqh_last; \ - STAILQ_INIT((head2)); \ - } \ -} while (0) - -#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL) - -#define STAILQ_FIRST(head) ((head)->stqh_first) - -#define STAILQ_FOREACH(var, head, field) \ - for ((var) = STAILQ_FIRST((head)); \ - (var); \ - (var) = STAILQ_NEXT((var), field)) - -#define STAILQ_INIT(head) do { \ - STAILQ_FIRST((head)) = NULL; \ - (head)->stqh_last = &STAILQ_FIRST((head)); \ -} while (0) - -#define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ - if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\ - (head)->stqh_last = &STAILQ_NEXT((elm), field); \ - STAILQ_NEXT((tqelm), field) = (elm); \ -} while (0) - -#define STAILQ_INSERT_HEAD(head, elm, field) do { \ - if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL) \ - (head)->stqh_last = &STAILQ_NEXT((elm), field); \ - STAILQ_FIRST((head)) = (elm); \ -} while (0) - -#define STAILQ_INSERT_TAIL(head, elm, field) do { \ - STAILQ_NEXT((elm), field) = NULL; \ - *(head)->stqh_last = (elm); \ - (head)->stqh_last = &STAILQ_NEXT((elm), field); \ -} while (0) - -#define STAILQ_LAST(head, type, field) \ - (STAILQ_EMPTY((head)) ? \ - NULL : \ - ((struct type *) \ - ((char *)((head)->stqh_last) - __offsetof(struct type, field)))) - -#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) - -#define STAILQ_REMOVE(head, elm, type, field) do { \ - if (STAILQ_FIRST((head)) == (elm)) { \ - STAILQ_REMOVE_HEAD((head), field); \ - } \ - else { \ - struct type *curelm = STAILQ_FIRST((head)); \ - while (STAILQ_NEXT(curelm, field) != (elm)) \ - curelm = STAILQ_NEXT(curelm, field); \ - if ((STAILQ_NEXT(curelm, field) = \ - STAILQ_NEXT(STAILQ_NEXT(curelm, field), field)) == NULL)\ - (head)->stqh_last = &STAILQ_NEXT((curelm), field);\ - } \ -} while (0) - -#define STAILQ_REMOVE_HEAD(head, field) do { \ - if ((STAILQ_FIRST((head)) = \ - STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL) \ - (head)->stqh_last = &STAILQ_FIRST((head)); \ -} while (0) - -#define STAILQ_REMOVE_HEAD_UNTIL(head, elm, field) do { \ - if ((STAILQ_FIRST((head)) = STAILQ_NEXT((elm), field)) == NULL) \ - (head)->stqh_last = &STAILQ_FIRST((head)); \ -} while (0) - -/* - * List declarations. - */ -#define LIST_HEAD(name, type) \ -struct name { \ - struct type *lh_first; /* first element */ \ -} - -#define LIST_HEAD_INITIALIZER(head) \ - { NULL } - -#define LIST_ENTRY(type) \ -struct { \ - struct type *le_next; /* next element */ \ - struct type **le_prev; /* address of previous next element */ \ -} - -/* - * List functions. - */ - -#define LIST_EMPTY(head) ((head)->lh_first == NULL) - -#define LIST_FIRST(head) ((head)->lh_first) - -#define LIST_FOREACH(var, head, field) \ - for ((var) = LIST_FIRST((head)); \ - (var); \ - (var) = LIST_NEXT((var), field)) - -#define LIST_INIT(head) do { \ - LIST_FIRST((head)) = NULL; \ -} while (0) - -#define LIST_INSERT_AFTER(listelm, elm, field) do { \ - if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\ - LIST_NEXT((listelm), field)->field.le_prev = \ - &LIST_NEXT((elm), field); \ - LIST_NEXT((listelm), field) = (elm); \ - (elm)->field.le_prev = &LIST_NEXT((listelm), field); \ -} while (0) - -#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ - (elm)->field.le_prev = (listelm)->field.le_prev; \ - LIST_NEXT((elm), field) = (listelm); \ - *(listelm)->field.le_prev = (elm); \ - (listelm)->field.le_prev = &LIST_NEXT((elm), field); \ -} while (0) - -#define LIST_INSERT_HEAD(head, elm, field) do { \ - if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \ - LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\ - LIST_FIRST((head)) = (elm); \ - (elm)->field.le_prev = &LIST_FIRST((head)); \ -} while (0) - -#define LIST_NEXT(elm, field) ((elm)->field.le_next) - -#define LIST_REMOVE(elm, field) do { \ - if (LIST_NEXT((elm), field) != NULL) \ - LIST_NEXT((elm), field)->field.le_prev = \ - (elm)->field.le_prev; \ - *(elm)->field.le_prev = LIST_NEXT((elm), field); \ -} while (0) - -/* * Tail queue declarations. */ #define TAILQ_HEAD(name, type) \ @@ -488,6 +167,7 @@ struct { \ } while (0) #define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + WT_WRITE_BARRIER(); \ if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ TAILQ_NEXT((elm), field)->field.tqe_prev = \ &TAILQ_NEXT((elm), field); \ @@ -502,6 +182,7 @@ struct { \ } while (0) #define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + WT_WRITE_BARRIER(); \ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ TAILQ_NEXT((elm), field) = (listelm); \ *(listelm)->field.tqe_prev = (elm); \ @@ -511,6 +192,7 @@ struct { \ } while (0) #define TAILQ_INSERT_HEAD(head, elm, field) do { \ + WT_WRITE_BARRIER(); \ if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \ TAILQ_FIRST((head))->field.tqe_prev = \ &TAILQ_NEXT((elm), field); \ @@ -523,6 +205,7 @@ struct { \ } while (0) #define TAILQ_INSERT_TAIL(head, elm, field) do { \ + WT_WRITE_BARRIER(); \ TAILQ_NEXT((elm), field) = NULL; \ (elm)->field.tqe_prev = (head)->tqh_last; \ *(head)->tqh_last = (elm); \ diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i index 0fc23348800..7b62e66eccb 100644 --- a/src/third_party/wiredtiger/src/include/serial.i +++ b/src/third_party/wiredtiger/src/include/serial.i @@ -56,7 +56,7 @@ __insert_simple_func(WT_SESSION_IMPL *session, for (i = 0; i < skipdepth; i++) { WT_INSERT *old_ins = *ins_stack[i]; if (old_ins != new_ins->next[i] || - !WT_ATOMIC_CAS8(*ins_stack[i], old_ins, new_ins)) + !__wt_atomic_cas_ptr(ins_stack[i], old_ins, new_ins)) return (i == 0 ? WT_RESTART : 0); } @@ -93,7 +93,7 @@ __insert_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head, for (i = 0; i < skipdepth; i++) { WT_INSERT *old_ins = *ins_stack[i]; if (old_ins != new_ins->next[i] || - !WT_ATOMIC_CAS8(*ins_stack[i], old_ins, new_ins)) + !__wt_atomic_cas_ptr(ins_stack[i], old_ins, new_ins)) return (i == 0 ? WT_RESTART : 0); if (ins_head->tail[i] == NULL || ins_stack[i] == &ins_head->tail[i]->next[i]) @@ -271,7 +271,7 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, * Swap the update into place. If that fails, a new update was added * after our search, we raced. Check if our update is still permitted. */ - while (!WT_ATOMIC_CAS8(*srch_upd, upd->next, upd)) { + while (!__wt_atomic_cas_ptr(srch_upd, upd->next, upd)) { if ((ret = __wt_txn_update_check( session, upd->next = *srch_upd)) != 0) { /* Free unused memory on error. */ diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 82df9a1a784..6ecb6b3a3c7 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -6,122 +6,217 @@ * See the file LICENSE for redistribution information. */ -struct __wt_stats { - const char *desc; /* text description */ - uint64_t v; /* 64-bit value */ -}; +/* + * Statistics counters: + * + * We use an array of statistics structures; threads write different structures + * to avoid writing the same cache line and incurring cache coherency overheads, + * which can dramatically slow fast and otherwise read-mostly workloads. + * + * With an 8B statistics value and 64B cache-line alignment, 8 values share the + * same cache line. There are collisions when different threads choose the same + * statistics structure and update values that live on the cache line. There is + * likely some locality however: a thread updating the cursor search statistic + * is likely to update other cursor statistics with a chance of hitting already + * cached values. + * + * The actual statistic value must be signed, because one thread might increment + * the value in its structure, and then another thread might decrement the same + * value in another structure (where the value was initially zero), so the value + * in the second thread's slot will go negative. + * + * When reading a statistics value, the array values are summed and returned to + * the caller. The summation is performed without locking, so the value read + * may be inconsistent (and might be negative, if increments/decrements race + * with the reader). + * + * Choosing how many structures isn't easy: obviously, a smaller number creates + * more conflicts while a larger number uses more memory. + * + * Ideally, if the application running on the system is CPU-intensive, and using + * all CPUs on the system, we want to use the same number of slots as there are + * CPUs (because their L1 caches are the units of coherency). However, in + * practice we cannot easily determine how many CPUs are actually available to + * the application. + * + * Our next best option is to use the number of threads in the application as a + * heuristic for the number of CPUs (presumably, the application architect has + * figured out how many CPUs are available). However, inside WiredTiger we don't + * know when the application creates its threads. + * + * For now, we use a fixed number of slots. Ideally, we would approximate the + * largest number of cores we expect on any machine where WiredTiger might be + * run, however, we don't want to waste that much memory on smaller machines. + * As of 2015, machines with more than 24 CPUs are relatively rare. + * + * Default hash table size; use a prime number of buckets rather than assuming + * a good hash (Reference Sedgewick, Algorithms in C, "Hash Functions"). + */ +#define WT_COUNTER_SLOTS 23 /* - * Read/write statistics without any test for statistics configuration. + * WT_STATS_SLOT_ID is the thread's slot ID for the array of structures. + * + * Ideally, we want a slot per CPU, and we want each thread to index the slot + * corresponding to the CPU it runs on. Unfortunately, getting the ID of the + * current CPU is difficult: some operating systems provide a system call to + * acquire a CPU ID, but not all (regardless, making a system call to increment + * a statistics value is far too expensive). + * + * Our second-best option is to use the thread ID. Unfortunately, there is no + * portable way to obtain a unique thread ID that's a small-enough number to + * be used as an array index (portable thread IDs are usually a pointer or an + * opaque chunk, not a simple integer). + * + * Our solution is to use the session ID; there is normally a session per thread + * and the session ID is a small, monotonically increasing number. */ -#define WT_STAT(stats, fld) \ - ((stats)->fld.v) -#define WT_STAT_ATOMIC_DECRV(stats, fld, value) do { \ - (void)WT_ATOMIC_SUB8(WT_STAT(stats, fld), (value)); \ -} while (0) -#define WT_STAT_ATOMIC_DECR(stats, fld) WT_STAT_ATOMIC_DECRV(stats, fld, 1) -#define WT_STAT_ATOMIC_INCRV(stats, fld, value) do { \ - (void)WT_ATOMIC_ADD8(WT_STAT(stats, fld), (value)); \ -} while (0) -#define WT_STAT_ATOMIC_INCR(stats, fld) WT_STAT_ATOMIC_INCRV(stats, fld, 1) -#define WT_STAT_DECRV(stats, fld, value) do { \ - (stats)->fld.v -= (value); \ -} while (0) -#define WT_STAT_DECR(stats, fld) WT_STAT_DECRV(stats, fld, 1) -#define WT_STAT_INCRV(stats, fld, value) do { \ - (stats)->fld.v += (value); \ -} while (0) -#define WT_STAT_INCR(stats, fld) WT_STAT_INCRV(stats, fld, 1) -#define WT_STAT_SET(stats, fld, value) do { \ - (stats)->fld.v = (uint64_t)(value); \ -} while (0) +#define WT_STATS_SLOT_ID(session) \ + ((session)->id) % WT_COUNTER_SLOTS /* - * Read/write statistics if "fast" statistics are configured. + * Statistic structures are arrays of int64_t's. We have functions to read/write + * those structures regardless of the specific statistic structure we're working + * with, by translating statistics structure field names to structure offsets. + * + * Translate a statistic's value name to an offset. */ -#define WT_STAT_FAST_ATOMIC_DECRV(session, stats, fld, value) do { \ - if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \ - WT_STAT_ATOMIC_DECRV(stats, fld, value); \ -} while (0) -#define WT_STAT_FAST_ATOMIC_DECR(session, stats, fld) \ - WT_STAT_FAST_ATOMIC_DECRV(session, stats, fld, 1) -#define WT_STAT_FAST_ATOMIC_INCRV(session, stats, fld, value) do { \ - if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \ - WT_STAT_ATOMIC_INCRV(stats, fld, value); \ +#define WT_STATS_FIELD_TO_SLOT(stats, fld) \ + (int)(&(stats)[0]->fld - (int64_t *)(stats)[0]) + +/* + * Sum the values from all structures in the array. + */ +static inline int64_t +__wt_stats_aggregate(void *stats_arg, int slot) +{ + int64_t **stats, aggr_v; + int i; + + stats = stats_arg; + for (aggr_v = 0, i = 0; i < WT_COUNTER_SLOTS; i++) + aggr_v += stats[i][slot]; + + /* + * This can race. However, any implementation with a single value can + * race as well, different threads could set the same counter value + * simultaneously. While we are making races more likely, we are not + * fundamentally weakening the isolation semantics found in updating a + * single value. + * + * Additionally, the aggregation can go negative (imagine a thread + * incrementing a value after aggregation has passed its slot and a + * second thread decrementing a value before aggregation has reached + * its slot). + * + * For historic API compatibility, the external type is a uint64_t; + * limit our return to positive values, negative numbers would just + * look really, really large. + */ + if (aggr_v < 0) + aggr_v = 0; + return (aggr_v); +} + +/* + * Clear the values in all structures in the array. + */ +static inline void +__wt_stats_clear(void *stats_arg, int slot) +{ + int64_t **stats; + int i; + + stats = stats_arg; + for (i = 0; i < WT_COUNTER_SLOTS; i++) + stats[i][slot] = 0; +} + +/* + * Read/write statistics without any test for statistics configuration. Reading + * and writing the field requires different actions: reading sums the values + * across the array of structures, writing updates a single structure's value. + */ +#define WT_STAT_READ(stats, fld) \ + __wt_stats_aggregate(stats, WT_STATS_FIELD_TO_SLOT(stats, fld)) +#define WT_STAT_WRITE(session, stats, fld) \ + ((stats)[WT_STATS_SLOT_ID(session)]->fld); + +#define WT_STAT_DECRV(session, stats, fld, value) \ + (stats)[WT_STATS_SLOT_ID(session)]->fld -= (int64_t)(value) +#define WT_STAT_DECR(session, stats, fld) \ + WT_STAT_DECRV(session, stats, fld, 1) +#define WT_STAT_INCRV(session, stats, fld, value) \ + (stats)[WT_STATS_SLOT_ID(session)]->fld += (int64_t)(value) +#define WT_STAT_INCR(session, stats, fld) \ + WT_STAT_INCRV(session, stats, fld, 1) +#define WT_STAT_SET(session, stats, fld, value) do { \ + __wt_stats_clear(stats, WT_STATS_FIELD_TO_SLOT(stats, fld)); \ + (stats)[0]->fld = (int64_t)(value); \ } while (0) -#define WT_STAT_FAST_ATOMIC_INCR(session, stats, fld) \ - WT_STAT_FAST_ATOMIC_INCRV(session, stats, fld, 1) + +/* + * Update statistics if "fast" statistics are configured. + */ #define WT_STAT_FAST_DECRV(session, stats, fld, value) do { \ if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \ - WT_STAT_DECRV(stats, fld, value); \ + WT_STAT_DECRV(session, stats, fld, value); \ } while (0) #define WT_STAT_FAST_DECR(session, stats, fld) \ WT_STAT_FAST_DECRV(session, stats, fld, 1) #define WT_STAT_FAST_INCRV(session, stats, fld, value) do { \ if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \ - WT_STAT_INCRV(stats, fld, value); \ + WT_STAT_INCRV(session, stats, fld, value); \ } while (0) #define WT_STAT_FAST_INCR(session, stats, fld) \ WT_STAT_FAST_INCRV(session, stats, fld, 1) #define WT_STAT_FAST_SET(session, stats, fld, value) do { \ if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \ - WT_STAT_SET(stats, fld, value); \ + WT_STAT_SET(session, stats, fld, value); \ } while (0) /* - * Read/write connection handle statistics if "fast" statistics are configured. + * Update connection handle statistics if "fast" statistics are configured. */ -#define WT_STAT_FAST_CONN_ATOMIC_DECRV(session, fld, value) \ - WT_STAT_FAST_ATOMIC_DECRV(session, &S2C(session)->stats, fld, value) -#define WT_STAT_FAST_CONN_ATOMIC_DECR(session, fld) \ - WT_STAT_FAST_ATOMIC_DECR(session, &S2C(session)->stats, fld) -#define WT_STAT_FAST_CONN_ATOMIC_INCRV(session, fld, value) \ - WT_STAT_FAST_ATOMIC_INCRV(session, &S2C(session)->stats, fld, value) -#define WT_STAT_FAST_CONN_ATOMIC_INCR(session, fld) \ - WT_STAT_FAST_ATOMIC_INCR(session, &S2C(session)->stats, fld) #define WT_STAT_FAST_CONN_DECR(session, fld) \ - WT_STAT_FAST_DECR(session, &S2C(session)->stats, fld) + WT_STAT_FAST_DECR(session, S2C(session)->stats, fld) #define WT_STAT_FAST_CONN_DECRV(session, fld, value) \ - WT_STAT_FAST_DECRV(session, &S2C(session)->stats, fld, value) + WT_STAT_FAST_DECRV(session, S2C(session)->stats, fld, value) #define WT_STAT_FAST_CONN_INCR(session, fld) \ - WT_STAT_FAST_INCR(session, &S2C(session)->stats, fld) + WT_STAT_FAST_INCR(session, S2C(session)->stats, fld) #define WT_STAT_FAST_CONN_INCRV(session, fld, value) \ - WT_STAT_FAST_INCRV(session, &S2C(session)->stats, fld, value) + WT_STAT_FAST_INCRV(session, S2C(session)->stats, fld, value) #define WT_STAT_FAST_CONN_SET(session, fld, value) \ - WT_STAT_FAST_SET(session, &S2C(session)->stats, fld, value) + WT_STAT_FAST_SET(session, S2C(session)->stats, fld, value) /* - * Read/write data-source handle statistics if the data-source handle is set - * and "fast" statistics are configured. + * Update data-source handle statistics if "fast" statistics are configured + * and the data-source handle is set. * * XXX * We shouldn't have to check if the data-source handle is NULL, but it's - * useful until everything is converted to using data-source handles. + * necessary until everything is converted to using data-source handles. */ #define WT_STAT_FAST_DATA_DECRV(session, fld, value) do { \ if ((session)->dhandle != NULL) \ WT_STAT_FAST_DECRV( \ - session, &(session)->dhandle->stats, fld, value); \ + session, (session)->dhandle->stats, fld, value); \ } while (0) #define WT_STAT_FAST_DATA_DECR(session, fld) \ WT_STAT_FAST_DATA_DECRV(session, fld, 1) #define WT_STAT_FAST_DATA_INCRV(session, fld, value) do { \ if ((session)->dhandle != NULL) \ WT_STAT_FAST_INCRV( \ - session, &(session)->dhandle->stats, fld, value); \ + session, (session)->dhandle->stats, fld, value); \ } while (0) #define WT_STAT_FAST_DATA_INCR(session, fld) \ WT_STAT_FAST_DATA_INCRV(session, fld, 1) #define WT_STAT_FAST_DATA_SET(session, fld, value) do { \ if ((session)->dhandle != NULL) \ WT_STAT_FAST_SET( \ - session, &(session)->dhandle->stats, fld, value); \ + session, (session)->dhandle->stats, fld, value); \ } while (0) -/* Connection handle statistics value. */ -#define WT_CONN_STAT(session, fld) \ - WT_STAT(&S2C(session)->stats, fld) - /* * DO NOT EDIT: automatically built by dist/stat.py. */ @@ -132,150 +227,151 @@ struct __wt_stats { */ #define WT_CONNECTION_STATS_BASE 1000 struct __wt_connection_stats { - WT_STATS async_alloc_race; - WT_STATS async_alloc_view; - WT_STATS async_cur_queue; - WT_STATS async_flush; - WT_STATS async_full; - WT_STATS async_max_queue; - WT_STATS async_nowork; - WT_STATS async_op_alloc; - WT_STATS async_op_compact; - WT_STATS async_op_insert; - WT_STATS async_op_remove; - WT_STATS async_op_search; - WT_STATS async_op_update; - WT_STATS block_byte_map_read; - WT_STATS block_byte_read; - WT_STATS block_byte_write; - WT_STATS block_map_read; - WT_STATS block_preload; - WT_STATS block_read; - WT_STATS block_write; - WT_STATS cache_bytes_dirty; - WT_STATS cache_bytes_internal; - WT_STATS cache_bytes_inuse; - WT_STATS cache_bytes_leaf; - WT_STATS cache_bytes_max; - WT_STATS cache_bytes_overflow; - WT_STATS cache_bytes_read; - WT_STATS cache_bytes_write; - WT_STATS cache_eviction_app; - WT_STATS cache_eviction_checkpoint; - WT_STATS cache_eviction_clean; - WT_STATS cache_eviction_deepen; - WT_STATS cache_eviction_dirty; - WT_STATS cache_eviction_fail; - WT_STATS cache_eviction_force; - WT_STATS cache_eviction_force_delete; - WT_STATS cache_eviction_force_fail; - WT_STATS cache_eviction_hazard; - WT_STATS cache_eviction_internal; - WT_STATS cache_eviction_maximum_page_size; - WT_STATS cache_eviction_queue_empty; - WT_STATS cache_eviction_queue_not_empty; - WT_STATS cache_eviction_server_evicting; - WT_STATS cache_eviction_server_not_evicting; - WT_STATS cache_eviction_slow; - WT_STATS cache_eviction_split; - WT_STATS cache_eviction_walk; - WT_STATS cache_eviction_worker_evicting; - WT_STATS cache_inmem_split; - WT_STATS cache_overhead; - WT_STATS cache_pages_dirty; - WT_STATS cache_pages_inuse; - WT_STATS cache_read; - WT_STATS cache_write; - WT_STATS cond_wait; - WT_STATS cursor_create; - WT_STATS cursor_insert; - WT_STATS cursor_next; - WT_STATS cursor_prev; - WT_STATS cursor_remove; - WT_STATS cursor_reset; - WT_STATS cursor_search; - WT_STATS cursor_search_near; - WT_STATS cursor_update; - WT_STATS dh_conn_handle_count; - WT_STATS dh_session_handles; - WT_STATS dh_session_sweeps; - WT_STATS dh_sweep_close; - WT_STATS dh_sweep_ref; - WT_STATS dh_sweep_remove; - WT_STATS dh_sweep_tod; - WT_STATS dh_sweeps; - WT_STATS file_open; - WT_STATS log_buffer_size; - WT_STATS log_bytes_payload; - WT_STATS log_bytes_written; - WT_STATS log_close_yields; - WT_STATS log_compress_len; - WT_STATS log_compress_mem; - WT_STATS log_compress_small; - WT_STATS log_compress_write_fails; - WT_STATS log_compress_writes; - WT_STATS log_max_filesize; - WT_STATS log_prealloc_files; - WT_STATS log_prealloc_max; - WT_STATS log_prealloc_used; - WT_STATS log_release_write_lsn; - WT_STATS log_scan_records; - WT_STATS log_scan_rereads; - WT_STATS log_scans; - WT_STATS log_slot_closes; - WT_STATS log_slot_coalesced; - WT_STATS log_slot_consolidated; - WT_STATS log_slot_joins; - WT_STATS log_slot_races; - WT_STATS log_slot_toobig; - WT_STATS log_slot_toosmall; - WT_STATS log_slot_transitions; - WT_STATS log_sync; - WT_STATS log_sync_dir; - WT_STATS log_write_lsn; - WT_STATS log_writes; - WT_STATS lsm_checkpoint_throttle; - WT_STATS lsm_merge_throttle; - WT_STATS lsm_rows_merged; - WT_STATS lsm_work_queue_app; - WT_STATS lsm_work_queue_manager; - WT_STATS lsm_work_queue_max; - WT_STATS lsm_work_queue_switch; - WT_STATS lsm_work_units_created; - WT_STATS lsm_work_units_discarded; - WT_STATS lsm_work_units_done; - WT_STATS memory_allocation; - WT_STATS memory_free; - WT_STATS memory_grow; - WT_STATS page_busy_blocked; - WT_STATS page_forcible_evict_blocked; - WT_STATS page_locked_blocked; - WT_STATS page_read_blocked; - WT_STATS page_sleep; - WT_STATS read_io; - WT_STATS rec_pages; - WT_STATS rec_pages_eviction; - WT_STATS rec_split_stashed_bytes; - WT_STATS rec_split_stashed_objects; - WT_STATS rwlock_read; - WT_STATS rwlock_write; - WT_STATS session_cursor_open; - WT_STATS session_open; - WT_STATS txn_begin; - WT_STATS txn_checkpoint; - WT_STATS txn_checkpoint_generation; - WT_STATS txn_checkpoint_running; - WT_STATS txn_checkpoint_time_max; - WT_STATS txn_checkpoint_time_min; - WT_STATS txn_checkpoint_time_recent; - WT_STATS txn_checkpoint_time_total; - WT_STATS txn_commit; - WT_STATS txn_fail_cache; - WT_STATS txn_pinned_checkpoint_range; - WT_STATS txn_pinned_range; - WT_STATS txn_rollback; - WT_STATS txn_sync; - WT_STATS write_io; + int64_t async_alloc_race; + int64_t async_alloc_view; + int64_t async_cur_queue; + int64_t async_flush; + int64_t async_full; + int64_t async_max_queue; + int64_t async_nowork; + int64_t async_op_alloc; + int64_t async_op_compact; + int64_t async_op_insert; + int64_t async_op_remove; + int64_t async_op_search; + int64_t async_op_update; + int64_t block_byte_map_read; + int64_t block_byte_read; + int64_t block_byte_write; + int64_t block_map_read; + int64_t block_preload; + int64_t block_read; + int64_t block_write; + int64_t cache_bytes_dirty; + int64_t cache_bytes_internal; + int64_t cache_bytes_inuse; + int64_t cache_bytes_leaf; + int64_t cache_bytes_max; + int64_t cache_bytes_overflow; + int64_t cache_bytes_read; + int64_t cache_bytes_write; + int64_t cache_eviction_app; + int64_t cache_eviction_checkpoint; + int64_t cache_eviction_clean; + int64_t cache_eviction_deepen; + int64_t cache_eviction_dirty; + int64_t cache_eviction_fail; + int64_t cache_eviction_force; + int64_t cache_eviction_force_delete; + int64_t cache_eviction_force_fail; + int64_t cache_eviction_hazard; + int64_t cache_eviction_internal; + int64_t cache_eviction_maximum_page_size; + int64_t cache_eviction_queue_empty; + int64_t cache_eviction_queue_not_empty; + int64_t cache_eviction_server_evicting; + int64_t cache_eviction_server_not_evicting; + int64_t cache_eviction_slow; + int64_t cache_eviction_split; + int64_t cache_eviction_walk; + int64_t cache_eviction_worker_evicting; + int64_t cache_inmem_split; + int64_t cache_overhead; + int64_t cache_pages_dirty; + int64_t cache_pages_inuse; + int64_t cache_read; + int64_t cache_write; + int64_t cond_wait; + int64_t cursor_create; + int64_t cursor_insert; + int64_t cursor_next; + int64_t cursor_prev; + int64_t cursor_remove; + int64_t cursor_reset; + int64_t cursor_restart; + int64_t cursor_search; + int64_t cursor_search_near; + int64_t cursor_update; + int64_t dh_conn_handle_count; + int64_t dh_session_handles; + int64_t dh_session_sweeps; + int64_t dh_sweep_close; + int64_t dh_sweep_ref; + int64_t dh_sweep_remove; + int64_t dh_sweep_tod; + int64_t dh_sweeps; + int64_t file_open; + int64_t log_buffer_size; + int64_t log_bytes_payload; + int64_t log_bytes_written; + int64_t log_close_yields; + int64_t log_compress_len; + int64_t log_compress_mem; + int64_t log_compress_small; + int64_t log_compress_write_fails; + int64_t log_compress_writes; + int64_t log_max_filesize; + int64_t log_prealloc_files; + int64_t log_prealloc_max; + int64_t log_prealloc_used; + int64_t log_release_write_lsn; + int64_t log_scan_records; + int64_t log_scan_rereads; + int64_t log_scans; + int64_t log_slot_closes; + int64_t log_slot_coalesced; + int64_t log_slot_consolidated; + int64_t log_slot_joins; + int64_t log_slot_races; + int64_t log_slot_toobig; + int64_t log_slot_toosmall; + int64_t log_slot_transitions; + int64_t log_sync; + int64_t log_sync_dir; + int64_t log_write_lsn; + int64_t log_writes; + int64_t lsm_checkpoint_throttle; + int64_t lsm_merge_throttle; + int64_t lsm_rows_merged; + int64_t lsm_work_queue_app; + int64_t lsm_work_queue_manager; + int64_t lsm_work_queue_max; + int64_t lsm_work_queue_switch; + int64_t lsm_work_units_created; + int64_t lsm_work_units_discarded; + int64_t lsm_work_units_done; + int64_t memory_allocation; + int64_t memory_free; + int64_t memory_grow; + int64_t page_busy_blocked; + int64_t page_forcible_evict_blocked; + int64_t page_locked_blocked; + int64_t page_read_blocked; + int64_t page_sleep; + int64_t read_io; + int64_t rec_pages; + int64_t rec_pages_eviction; + int64_t rec_split_stashed_bytes; + int64_t rec_split_stashed_objects; + int64_t rwlock_read; + int64_t rwlock_write; + int64_t session_cursor_open; + int64_t session_open; + int64_t txn_begin; + int64_t txn_checkpoint; + int64_t txn_checkpoint_generation; + int64_t txn_checkpoint_running; + int64_t txn_checkpoint_time_max; + int64_t txn_checkpoint_time_min; + int64_t txn_checkpoint_time_recent; + int64_t txn_checkpoint_time_total; + int64_t txn_commit; + int64_t txn_fail_cache; + int64_t txn_pinned_checkpoint_range; + int64_t txn_pinned_range; + int64_t txn_rollback; + int64_t txn_sync; + int64_t write_io; }; /* @@ -283,96 +379,97 @@ struct __wt_connection_stats { */ #define WT_DSRC_STATS_BASE 2000 struct __wt_dsrc_stats { - WT_STATS allocation_size; - WT_STATS block_alloc; - WT_STATS block_checkpoint_size; - WT_STATS block_extension; - WT_STATS block_free; - WT_STATS block_magic; - WT_STATS block_major; - WT_STATS block_minor; - WT_STATS block_reuse_bytes; - WT_STATS block_size; - WT_STATS bloom_count; - WT_STATS bloom_false_positive; - WT_STATS bloom_hit; - WT_STATS bloom_miss; - WT_STATS bloom_page_evict; - WT_STATS bloom_page_read; - WT_STATS bloom_size; - WT_STATS btree_checkpoint_generation; - WT_STATS btree_column_deleted; - WT_STATS btree_column_fix; - WT_STATS btree_column_internal; - WT_STATS btree_column_variable; - WT_STATS btree_compact_rewrite; - WT_STATS btree_entries; - WT_STATS btree_fixed_len; - WT_STATS btree_maximum_depth; - WT_STATS btree_maxintlkey; - WT_STATS btree_maxintlpage; - WT_STATS btree_maxleafkey; - WT_STATS btree_maxleafpage; - WT_STATS btree_maxleafvalue; - WT_STATS btree_overflow; - WT_STATS btree_row_internal; - WT_STATS btree_row_leaf; - WT_STATS cache_bytes_read; - WT_STATS cache_bytes_write; - WT_STATS cache_eviction_checkpoint; - WT_STATS cache_eviction_clean; - WT_STATS cache_eviction_deepen; - WT_STATS cache_eviction_dirty; - WT_STATS cache_eviction_fail; - WT_STATS cache_eviction_hazard; - WT_STATS cache_eviction_internal; - WT_STATS cache_eviction_split; - WT_STATS cache_inmem_split; - WT_STATS cache_overflow_value; - WT_STATS cache_read; - WT_STATS cache_read_overflow; - WT_STATS cache_write; - WT_STATS compress_raw_fail; - WT_STATS compress_raw_fail_temporary; - WT_STATS compress_raw_ok; - WT_STATS compress_read; - WT_STATS compress_write; - WT_STATS compress_write_fail; - WT_STATS compress_write_too_small; - WT_STATS cursor_create; - WT_STATS cursor_insert; - WT_STATS cursor_insert_bulk; - WT_STATS cursor_insert_bytes; - WT_STATS cursor_next; - WT_STATS cursor_prev; - WT_STATS cursor_remove; - WT_STATS cursor_remove_bytes; - WT_STATS cursor_reset; - WT_STATS cursor_search; - WT_STATS cursor_search_near; - WT_STATS cursor_update; - WT_STATS cursor_update_bytes; - WT_STATS lsm_checkpoint_throttle; - WT_STATS lsm_chunk_count; - WT_STATS lsm_generation_max; - WT_STATS lsm_lookup_no_bloom; - WT_STATS lsm_merge_throttle; - WT_STATS rec_dictionary; - WT_STATS rec_multiblock_internal; - WT_STATS rec_multiblock_leaf; - WT_STATS rec_multiblock_max; - WT_STATS rec_overflow_key_internal; - WT_STATS rec_overflow_key_leaf; - WT_STATS rec_overflow_value; - WT_STATS rec_page_delete; - WT_STATS rec_page_match; - WT_STATS rec_pages; - WT_STATS rec_pages_eviction; - WT_STATS rec_prefix_compression; - WT_STATS rec_suffix_compression; - WT_STATS session_compact; - WT_STATS session_cursor_open; - WT_STATS txn_update_conflict; + int64_t allocation_size; + int64_t block_alloc; + int64_t block_checkpoint_size; + int64_t block_extension; + int64_t block_free; + int64_t block_magic; + int64_t block_major; + int64_t block_minor; + int64_t block_reuse_bytes; + int64_t block_size; + int64_t bloom_count; + int64_t bloom_false_positive; + int64_t bloom_hit; + int64_t bloom_miss; + int64_t bloom_page_evict; + int64_t bloom_page_read; + int64_t bloom_size; + int64_t btree_checkpoint_generation; + int64_t btree_column_deleted; + int64_t btree_column_fix; + int64_t btree_column_internal; + int64_t btree_column_variable; + int64_t btree_compact_rewrite; + int64_t btree_entries; + int64_t btree_fixed_len; + int64_t btree_maximum_depth; + int64_t btree_maxintlkey; + int64_t btree_maxintlpage; + int64_t btree_maxleafkey; + int64_t btree_maxleafpage; + int64_t btree_maxleafvalue; + int64_t btree_overflow; + int64_t btree_row_internal; + int64_t btree_row_leaf; + int64_t cache_bytes_read; + int64_t cache_bytes_write; + int64_t cache_eviction_checkpoint; + int64_t cache_eviction_clean; + int64_t cache_eviction_deepen; + int64_t cache_eviction_dirty; + int64_t cache_eviction_fail; + int64_t cache_eviction_hazard; + int64_t cache_eviction_internal; + int64_t cache_eviction_split; + int64_t cache_inmem_split; + int64_t cache_overflow_value; + int64_t cache_read; + int64_t cache_read_overflow; + int64_t cache_write; + int64_t compress_raw_fail; + int64_t compress_raw_fail_temporary; + int64_t compress_raw_ok; + int64_t compress_read; + int64_t compress_write; + int64_t compress_write_fail; + int64_t compress_write_too_small; + int64_t cursor_create; + int64_t cursor_insert; + int64_t cursor_insert_bulk; + int64_t cursor_insert_bytes; + int64_t cursor_next; + int64_t cursor_prev; + int64_t cursor_remove; + int64_t cursor_remove_bytes; + int64_t cursor_reset; + int64_t cursor_restart; + int64_t cursor_search; + int64_t cursor_search_near; + int64_t cursor_update; + int64_t cursor_update_bytes; + int64_t lsm_checkpoint_throttle; + int64_t lsm_chunk_count; + int64_t lsm_generation_max; + int64_t lsm_lookup_no_bloom; + int64_t lsm_merge_throttle; + int64_t rec_dictionary; + int64_t rec_multiblock_internal; + int64_t rec_multiblock_leaf; + int64_t rec_multiblock_max; + int64_t rec_overflow_key_internal; + int64_t rec_overflow_key_leaf; + int64_t rec_overflow_value; + int64_t rec_page_delete; + int64_t rec_page_match; + int64_t rec_pages; + int64_t rec_pages_eviction; + int64_t rec_prefix_compression; + int64_t rec_suffix_compression; + int64_t session_compact; + int64_t session_cursor_open; + int64_t txn_update_conflict; }; /* Statistics section: END */ diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index a8e052ec5eb..1228893871f 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -300,7 +300,7 @@ __wt_txn_new_id(WT_SESSION_IMPL *session) * global current ID, so we want post-increment semantics. Our atomic * add primitive does pre-increment, so adjust the result here. */ - return (WT_ATOMIC_ADD8(S2C(session)->txn_global.current, 1) - 1); + return (__wt_atomic_addv64(&S2C(session)->txn_global.current, 1) - 1); } /* @@ -376,9 +376,9 @@ __wt_txn_id_check(WT_SESSION_IMPL *session) */ do { txn_state->id = txn->id = txn_global->current; - } while (!WT_ATOMIC_CAS8( - txn_global->current, txn->id, txn->id + 1) || - WT_TXNID_LT(txn->id, txn_global->last_running)); + } while (!__wt_atomic_casv64( + &txn_global->current, txn->id, txn->id + 1) || + WT_TXNID_LT(txn->id, txn_global->last_running)); /* * If we have used 64-bits of transaction IDs, there is nothing diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index a538bb211fc..ddcbf19b847 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -2072,8 +2072,10 @@ struct __wt_connection { * @config{checkpoint_sync, flush files to stable storage when closing or * writing checkpoints., a boolean flag; default \c true.} * @config{config_base, write the base configuration file if creating the - * database\, see @ref config_base for more information., a boolean flag; - * default \c true.} + * database. If \c false in the config passed directly to ::wiredtiger_open\, + * will ignore any existing base configuration file in addition to not creating + * one. See @ref config_base for more information., a boolean flag; default \c + * true.} * @config{create, create the database if it does not exist., a boolean flag; * default \c false.} * @config{direct_io, Use \c O_DIRECT to access files. Options are given as a @@ -3664,172 +3666,174 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CURSOR_REMOVE 1059 /*! cursor: cursor reset calls */ #define WT_STAT_CONN_CURSOR_RESET 1060 +/*! cursor: cursor restarted searches */ +#define WT_STAT_CONN_CURSOR_RESTART 1061 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1061 +#define WT_STAT_CONN_CURSOR_SEARCH 1062 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1062 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1063 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1063 +#define WT_STAT_CONN_CURSOR_UPDATE 1064 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1064 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1065 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1065 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1066 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1066 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1067 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1067 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1068 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1068 +#define WT_STAT_CONN_DH_SWEEP_REF 1069 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1069 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1070 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1070 +#define WT_STAT_CONN_DH_SWEEP_TOD 1071 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1071 +#define WT_STAT_CONN_DH_SWEEPS 1072 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1072 +#define WT_STAT_CONN_FILE_OPEN 1073 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1073 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1074 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1074 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1075 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1075 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1076 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1076 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1077 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1077 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1078 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1078 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1079 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1079 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1080 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1080 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1081 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1081 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1082 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1082 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1083 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1083 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1084 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1084 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1085 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1085 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1086 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1086 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1087 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1087 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1088 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1088 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1089 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1089 +#define WT_STAT_CONN_LOG_SCANS 1090 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1090 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1091 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1091 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1092 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1092 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1093 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1093 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1094 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1094 +#define WT_STAT_CONN_LOG_SLOT_RACES 1095 /*! log: record size exceeded maximum */ -#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1095 +#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1096 /*! log: failed to find a slot large enough for record */ -#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1096 +#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1097 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1097 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1098 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1098 +#define WT_STAT_CONN_LOG_SYNC 1099 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1099 +#define WT_STAT_CONN_LOG_SYNC_DIR 1100 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1100 +#define WT_STAT_CONN_LOG_WRITE_LSN 1101 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1101 +#define WT_STAT_CONN_LOG_WRITES 1102 /*! LSM: sleep for LSM checkpoint throttle */ -#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1102 +#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1103 /*! LSM: sleep for LSM merge throttle */ -#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1103 +#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1104 /*! LSM: rows merged in an LSM tree */ -#define WT_STAT_CONN_LSM_ROWS_MERGED 1104 +#define WT_STAT_CONN_LSM_ROWS_MERGED 1105 /*! LSM: application work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1105 +#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1106 /*! LSM: merge work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1106 +#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1107 /*! LSM: tree queue hit maximum */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1107 +#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1108 /*! LSM: switch work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1108 +#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1109 /*! LSM: tree maintenance operations scheduled */ -#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1109 +#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1110 /*! LSM: tree maintenance operations discarded */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1110 +#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1111 /*! LSM: tree maintenance operations executed */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1111 +#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1112 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1112 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1113 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1113 +#define WT_STAT_CONN_MEMORY_FREE 1114 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1114 +#define WT_STAT_CONN_MEMORY_GROW 1115 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1115 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1116 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1116 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1117 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1117 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1118 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1118 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1119 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1119 +#define WT_STAT_CONN_PAGE_SLEEP 1120 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1120 +#define WT_STAT_CONN_READ_IO 1121 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1121 +#define WT_STAT_CONN_REC_PAGES 1122 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1122 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1123 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1123 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1124 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1124 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1125 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1125 +#define WT_STAT_CONN_RWLOCK_READ 1126 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1126 +#define WT_STAT_CONN_RWLOCK_WRITE 1127 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1127 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1128 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1128 +#define WT_STAT_CONN_SESSION_OPEN 1129 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1129 +#define WT_STAT_CONN_TXN_BEGIN 1130 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1130 +#define WT_STAT_CONN_TXN_CHECKPOINT 1131 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1131 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1132 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1132 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1133 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1133 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1134 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1134 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1135 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1135 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1136 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1136 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1137 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1137 +#define WT_STAT_CONN_TXN_COMMIT 1138 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1138 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1139 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1139 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1140 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1140 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1141 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1141 +#define WT_STAT_CONN_TXN_ROLLBACK 1142 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1142 +#define WT_STAT_CONN_TXN_SYNC 1143 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1143 +#define WT_STAT_CONN_WRITE_IO 1144 /*! * @} @@ -3967,58 +3971,60 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2063 /*! cursor: reset calls */ #define WT_STAT_DSRC_CURSOR_RESET 2064 +/*! cursor: restarted searches */ +#define WT_STAT_DSRC_CURSOR_RESTART 2065 /*! cursor: search calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH 2065 +#define WT_STAT_DSRC_CURSOR_SEARCH 2066 /*! cursor: search near calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2066 +#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2067 /*! cursor: update calls */ -#define WT_STAT_DSRC_CURSOR_UPDATE 2067 +#define WT_STAT_DSRC_CURSOR_UPDATE 2068 /*! cursor: cursor-update value bytes updated */ -#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2068 +#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2069 /*! LSM: sleep for LSM checkpoint throttle */ -#define WT_STAT_DSRC_LSM_CHECKPOINT_THROTTLE 2069 +#define WT_STAT_DSRC_LSM_CHECKPOINT_THROTTLE 2070 /*! LSM: chunks in the LSM tree */ -#define WT_STAT_DSRC_LSM_CHUNK_COUNT 2070 +#define WT_STAT_DSRC_LSM_CHUNK_COUNT 2071 /*! LSM: highest merge generation in the LSM tree */ -#define WT_STAT_DSRC_LSM_GENERATION_MAX 2071 +#define WT_STAT_DSRC_LSM_GENERATION_MAX 2072 /*! LSM: queries that could have benefited from a Bloom filter that did * not exist */ -#define WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM 2072 +#define WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM 2073 /*! LSM: sleep for LSM merge throttle */ -#define WT_STAT_DSRC_LSM_MERGE_THROTTLE 2073 +#define WT_STAT_DSRC_LSM_MERGE_THROTTLE 2074 /*! reconciliation: dictionary matches */ -#define WT_STAT_DSRC_REC_DICTIONARY 2074 +#define WT_STAT_DSRC_REC_DICTIONARY 2075 /*! reconciliation: internal page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2075 +#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2076 /*! reconciliation: leaf page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2076 +#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2077 /*! reconciliation: maximum blocks required for a page */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2077 +#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2078 /*! reconciliation: internal-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2078 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2079 /*! reconciliation: leaf-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2079 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2080 /*! reconciliation: overflow values written */ -#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2080 +#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2081 /*! reconciliation: pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE 2081 +#define WT_STAT_DSRC_REC_PAGE_DELETE 2082 /*! reconciliation: page checksum matches */ -#define WT_STAT_DSRC_REC_PAGE_MATCH 2082 +#define WT_STAT_DSRC_REC_PAGE_MATCH 2083 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_DSRC_REC_PAGES 2083 +#define WT_STAT_DSRC_REC_PAGES 2084 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_DSRC_REC_PAGES_EVICTION 2084 +#define WT_STAT_DSRC_REC_PAGES_EVICTION 2085 /*! reconciliation: leaf page key bytes discarded using prefix compression */ -#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2085 +#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2086 /*! reconciliation: internal page key bytes discarded using suffix * compression */ -#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2086 +#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2087 /*! session: object compaction */ -#define WT_STAT_DSRC_SESSION_COMPACT 2087 +#define WT_STAT_DSRC_SESSION_COMPACT 2088 /*! session: open cursor count */ -#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2088 +#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2089 /*! transaction: update conflicts */ -#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2089 +#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2090 /*! @} */ /* * Statistics section: END diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h index 64e29e104bc..9cc2ce2135a 100644 --- a/src/third_party/wiredtiger/src/include/wt_internal.h +++ b/src/third_party/wiredtiger/src/include/wt_internal.h @@ -55,11 +55,6 @@ extern "C" { #include <windows.h> #endif -/******************************************* - * WiredTiger externally maintained include files. - *******************************************/ -#include "queue.h" - /* * DO NOT EDIT: automatically built by dist/s_typedef. * Forward type declarations for internal types: BEGIN @@ -182,12 +177,18 @@ struct __wt_insert_head; typedef struct __wt_insert_head WT_INSERT_HEAD; struct __wt_keyed_encryptor; typedef struct __wt_keyed_encryptor WT_KEYED_ENCRYPTOR; +struct __wt_log; + typedef struct __wt_log WT_LOG; struct __wt_log_desc; typedef struct __wt_log_desc WT_LOG_DESC; struct __wt_log_op_desc; typedef struct __wt_log_op_desc WT_LOG_OP_DESC; struct __wt_log_rec_desc; typedef struct __wt_log_rec_desc WT_LOG_REC_DESC; +struct __wt_log_record; + typedef struct __wt_log_record WT_LOG_RECORD; +struct __wt_logslot; + typedef struct __wt_logslot WT_LOGSLOT; struct __wt_lsm_chunk; typedef struct __wt_lsm_chunk WT_LSM_CHUNK; struct __wt_lsm_data_source; @@ -204,6 +205,8 @@ struct __wt_lsm_worker_cookie; typedef struct __wt_lsm_worker_cookie WT_LSM_WORKER_COOKIE; struct __wt_multi; typedef struct __wt_multi WT_MULTI; +struct __wt_myslot; + typedef struct __wt_myslot WT_MYSLOT; struct __wt_named_collator; typedef struct __wt_named_collator WT_NAMED_COLLATOR; struct __wt_named_compressor; @@ -248,10 +251,10 @@ struct __wt_session_impl; typedef struct __wt_session_impl WT_SESSION_IMPL; struct __wt_size; typedef struct __wt_size WT_SIZE; +struct __wt_spinlock; + typedef struct __wt_spinlock WT_SPINLOCK; struct __wt_split_stash; typedef struct __wt_split_stash WT_SPLIT_STASH; -struct __wt_stats; - typedef struct __wt_stats WT_STATS; struct __wt_table; typedef struct __wt_table WT_TABLE; struct __wt_txn; @@ -285,6 +288,8 @@ union __wt_rand_state; #endif #include "hardware.h" +#include "queue.h" + #ifdef _WIN32 #include "os_windows.h" #else diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index 4242571fe53..26ba34c7f93 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -791,7 +791,7 @@ __wt_log_allocfile( */ WT_RET(__wt_scr_alloc(session, 0, &from_path)); WT_ERR(__wt_scr_alloc(session, 0, &to_path)); - tmp_id = WT_ATOMIC_ADD4(log->tmp_fileid, 1); + tmp_id = __wt_atomic_add32(&log->tmp_fileid, 1); WT_ERR(__log_filename(session, tmp_id, WT_LOG_TMPNAME, from_path)); WT_ERR(__log_filename(session, lognum, dest, to_path)); /* diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c index 0b580af4526..07878d1ae1e 100644 --- a/src/third_party/wiredtiger/src/log/log_slot.c +++ b/src/third_party/wiredtiger/src/log/log_slot.c @@ -35,6 +35,8 @@ __wt_log_slot_init(WT_SESSION_IMPL *session) conn = S2C(session); log = conn->log; + + WT_CACHE_LINE_ALIGNMENT_VERIFY(session, log->slot_pool); for (i = 0; i < WT_SLOT_POOL; i++) { log->slot_pool[i].slot_state = WT_LOG_SLOT_FREE; log->slot_pool[i].slot_index = WT_SLOT_INVALID_INDEX; @@ -168,7 +170,7 @@ join_slot: * We lost a race to add our size into this slot. Check the state * and try again. */ - if (!WT_ATOMIC_CAS8(slot->slot_state, old_state, new_state)) { + if (!__wt_atomic_casiv64(&slot->slot_state, old_state, new_state)) { WT_STAT_FAST_CONN_INCR(session, log_slot_races); goto join_slot; } @@ -247,7 +249,8 @@ __wt_log_slot_close(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) newslot->slot_state = WT_LOG_SLOT_READY; newslot->slot_index = slot->slot_index; log->slot_array[newslot->slot_index] = newslot; - old_state = WT_ATOMIC_STORE8(slot->slot_state, WT_LOG_SLOT_PENDING); + old_state = + __wt_atomic_storeiv64(&slot->slot_state, WT_LOG_SLOT_PENDING); slot->slot_group_size = (uint64_t)(old_state - WT_LOG_SLOT_READY); /* * Note that this statistic may be much bigger than in reality, @@ -303,14 +306,11 @@ __wt_log_slot_wait(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) int64_t __wt_log_slot_release(WT_LOGSLOT *slot, uint64_t size) { - int64_t newsize; - /* * Add my size into the state. When it reaches WT_LOG_SLOT_DONE * all participatory threads have completed copying their piece. */ - newsize = WT_ATOMIC_ADD8(slot->slot_state, (int64_t)size); - return (newsize); + return (__wt_atomic_addiv64(&slot->slot_state, (int64_t)size)); } /* diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c index 84b8d5c9532..674b9e6d3a8 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c @@ -1066,12 +1066,12 @@ __clsm_lookup(WT_CURSOR_LSM *clsm, WT_ITEM *value) ret = __wt_bloom_hash_get(bloom, &bhash); if (ret == WT_NOTFOUND) { - WT_STAT_FAST_INCR(session, - &clsm->lsm_tree->stats, bloom_miss); + WT_LSM_TREE_STAT_INCR( + session, clsm->lsm_tree->bloom_miss); continue; } else if (ret == 0) - WT_STAT_FAST_INCR(session, - &clsm->lsm_tree->stats, bloom_hit); + WT_LSM_TREE_STAT_INCR( + session, clsm->lsm_tree->bloom_hit); WT_ERR(ret); } c->set_key(c, &cursor->key); @@ -1086,11 +1086,11 @@ __clsm_lookup(WT_CURSOR_LSM *clsm, WT_ITEM *value) F_CLR(c, WT_CURSTD_KEY_SET); /* Update stats: the active chunk can't have a bloom filter. */ if (bloom != NULL) - WT_STAT_FAST_INCR(session, - &clsm->lsm_tree->stats, bloom_false_positive); + WT_LSM_TREE_STAT_INCR(session, + clsm->lsm_tree->bloom_false_positive); else if (clsm->primary_chunk == NULL || i != clsm->nchunks) - WT_STAT_FAST_INCR(session, - &clsm->lsm_tree->stats, lsm_lookup_no_bloom); + WT_LSM_TREE_STAT_INCR(session, + clsm->lsm_tree->lsm_lookup_no_bloom); } WT_ERR(WT_NOTFOUND); @@ -1331,12 +1331,12 @@ __clsm_put(WT_SESSION_IMPL *session, ++clsm->update_count >= 100) && lsm_tree->merge_throttle + lsm_tree->ckpt_throttle > 0) { clsm->update_count = 0; - WT_STAT_FAST_INCRV(session, &clsm->lsm_tree->stats, - lsm_checkpoint_throttle, lsm_tree->ckpt_throttle); + WT_LSM_TREE_STAT_INCRV(session, + lsm_tree->lsm_checkpoint_throttle, lsm_tree->ckpt_throttle); WT_STAT_FAST_CONN_INCRV(session, lsm_checkpoint_throttle, lsm_tree->ckpt_throttle); - WT_STAT_FAST_INCRV(session, &clsm->lsm_tree->stats, - lsm_merge_throttle, lsm_tree->merge_throttle); + WT_LSM_TREE_STAT_INCRV(session, + lsm_tree->lsm_merge_throttle, lsm_tree->merge_throttle); WT_STAT_FAST_CONN_INCRV(session, lsm_merge_throttle, lsm_tree->merge_throttle); __wt_sleep(0, diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c index e2f7ebb45dd..6c59232b619 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c @@ -258,7 +258,7 @@ __wt_lsm_manager_free_work_unit( if (entry != NULL) { WT_ASSERT(session, entry->lsm_tree->queue_ref > 0); - (void)WT_ATOMIC_SUB4(entry->lsm_tree->queue_ref, 1); + (void)__wt_atomic_sub32(&entry->lsm_tree->queue_ref, 1); __wt_free(session, entry); } } @@ -639,9 +639,9 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, * on close, the flag is cleared and then the queue reference count * is checked. */ - (void)WT_ATOMIC_ADD4(lsm_tree->queue_ref, 1); + (void)__wt_atomic_add32(&lsm_tree->queue_ref, 1); if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { - (void)WT_ATOMIC_SUB4(lsm_tree->queue_ref, 1); + (void)__wt_atomic_sub32(&lsm_tree->queue_ref, 1); return (0); } @@ -668,6 +668,6 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, return (0); err: if (!pushed) - (void)WT_ATOMIC_SUB4(lsm_tree->queue_ref, 1); + (void)__wt_atomic_sub32(&lsm_tree->queue_ref, 1); return (ret); } diff --git a/src/third_party/wiredtiger/src/lsm/lsm_merge.c b/src/third_party/wiredtiger/src/lsm/lsm_merge.c index d7e684b8f51..40991f845e4 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_merge.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_merge.c @@ -398,7 +398,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) locked = 0; /* Allocate an ID for the merge. */ - dest_id = WT_ATOMIC_ADD4(lsm_tree->last, 1); + dest_id = __wt_atomic_add32(&lsm_tree->last, 1); /* * We only want to do the chunk loop if we're running with verbose, @@ -493,7 +493,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) * merge_syncing field so that compact knows it is still in * progress. */ - (void)WT_ATOMIC_ADD4(lsm_tree->merge_syncing, 1); + (void)__wt_atomic_add32(&lsm_tree->merge_syncing, 1); in_sync = 1; /* * We've successfully created the new chunk. Now install it. We need @@ -544,7 +544,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) WT_TRET(dest->close(dest)); dest = NULL; ++lsm_tree->merge_progressing; - (void)WT_ATOMIC_SUB4(lsm_tree->merge_syncing, 1); + (void)__wt_atomic_sub32(&lsm_tree->merge_syncing, 1); in_sync = 0; WT_ERR_NOTFOUND_OK(ret); @@ -600,7 +600,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) err: if (locked) WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree)); if (in_sync) - (void)WT_ATOMIC_SUB4(lsm_tree->merge_syncing, 1); + (void)__wt_atomic_sub32(&lsm_tree->merge_syncing, 1); if (src != NULL) WT_TRET(src->close(src)); if (dest != NULL) diff --git a/src/third_party/wiredtiger/src/lsm/lsm_stat.c b/src/third_party/wiredtiger/src/lsm/lsm_stat.c index 126a59af0d1..2817ec9eeb7 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_stat.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_stat.c @@ -22,6 +22,7 @@ __curstat_lsm_init( WT_DSRC_STATS *new, *stats; WT_LSM_CHUNK *chunk; WT_LSM_TREE *lsm_tree; + int64_t bloom_count; u_int i; int locked; char config[64]; @@ -49,25 +50,22 @@ __curstat_lsm_init( cfg[1] = disk_cfg[1] = config; } - /* - * Set the cursor to reference the data source statistics; we don't - * initialize it, instead we copy (rather than aggregate), the first - * chunk's statistics, which has the same effect. - */ - stats = &cst->u.dsrc_stats; - /* Hold the LSM lock so that we can safely walk through the chunks. */ WT_ERR(__wt_lsm_tree_readlock(session, lsm_tree)); locked = 1; - /* Initialize the statistics. */ - __wt_stat_init_dsrc_stats(stats); + /* + * Set the cursor to reference the data source statistics into which + * we're going to aggregate statistics from the underlying objects. + */ + stats = &cst->u.dsrc_stats; + __wt_stat_dsrc_init_single(stats); /* * For each chunk, aggregate its statistics, as well as any associated * bloom filter statistics, into the total statistics. */ - for (i = 0; i < lsm_tree->nchunks; i++) { + for (bloom_count = 0, i = 0; i < lsm_tree->nchunks; i++) { chunk = lsm_tree->chunk[i]; /* @@ -93,17 +91,17 @@ __curstat_lsm_init( * top-level. */ new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor); - WT_STAT_SET(new, lsm_generation_max, chunk->generation); + new->lsm_generation_max = chunk->generation; /* Aggregate statistics from each new chunk. */ - __wt_stat_aggregate_dsrc_stats(new, stats); + __wt_stat_dsrc_aggregate_single(new, stats); WT_ERR(stat_cursor->close(stat_cursor)); if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) continue; /* Maintain a count of bloom filters. */ - WT_STAT_INCR(&lsm_tree->stats, bloom_count); + ++bloom_count; /* Get the bloom filter's underlying object. */ WT_ERR(__wt_buf_fmt( @@ -117,24 +115,39 @@ __curstat_lsm_init( * into the top-level. */ new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor); - WT_STAT_SET(new, - bloom_size, (chunk->count * lsm_tree->bloom_bit_count) / 8); - WT_STAT_SET(new, bloom_page_evict, - WT_STAT(new, cache_eviction_clean) + - WT_STAT(new, cache_eviction_dirty)); - WT_STAT_SET(new, bloom_page_read, WT_STAT(new, cache_read)); - - __wt_stat_aggregate_dsrc_stats(new, stats); + new->bloom_size = + (int64_t)((chunk->count * lsm_tree->bloom_bit_count) / 8); + new->bloom_page_evict = + new->cache_eviction_clean + new->cache_eviction_dirty; + new->bloom_page_read = new->cache_read; + + __wt_stat_dsrc_aggregate_single(new, stats); WT_ERR(stat_cursor->close(stat_cursor)); } /* Set statistics that aren't aggregated directly into the cursor */ - WT_STAT_SET(stats, lsm_chunk_count, lsm_tree->nchunks); + stats->bloom_count = bloom_count; + stats->lsm_chunk_count = lsm_tree->nchunks; - /* Aggregate, and optionally clear, LSM-level specific information. */ - __wt_stat_aggregate_dsrc_stats(&lsm_tree->stats, stats); + /* Include, and optionally clear, LSM-level specific information. */ + stats->bloom_miss = lsm_tree->bloom_miss; + if (F_ISSET(cst, WT_CONN_STAT_CLEAR)) + lsm_tree->bloom_miss = 0; + stats->bloom_hit = lsm_tree->bloom_hit; + if (F_ISSET(cst, WT_CONN_STAT_CLEAR)) + lsm_tree->bloom_hit = 0; + stats->bloom_false_positive = lsm_tree->bloom_false_positive; + if (F_ISSET(cst, WT_CONN_STAT_CLEAR)) + lsm_tree->bloom_false_positive = 0; + stats->lsm_lookup_no_bloom = lsm_tree->lsm_lookup_no_bloom; + if (F_ISSET(cst, WT_CONN_STAT_CLEAR)) + lsm_tree->lsm_lookup_no_bloom = 0; + stats->lsm_checkpoint_throttle = lsm_tree->lsm_checkpoint_throttle; + if (F_ISSET(cst, WT_CONN_STAT_CLEAR)) + lsm_tree->lsm_checkpoint_throttle = 0; + stats->lsm_merge_throttle = lsm_tree->lsm_merge_throttle; if (F_ISSET(cst, WT_CONN_STAT_CLEAR)) - __wt_stat_refresh_dsrc_stats(&lsm_tree->stats); + lsm_tree->lsm_merge_throttle = 0; __wt_curstat_dsrc_final(cst); diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c index 6c6b185f821..f34f0598261 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c @@ -141,7 +141,7 @@ __wt_lsm_tree_close_all(WT_SESSION_IMPL *session) * is no need to decrement the reference count since discard * is unconditional. */ - (void)WT_ATOMIC_ADD4(lsm_tree->refcnt, 1); + (void)__wt_atomic_add32(&lsm_tree->refcnt, 1); WT_TRET(__lsm_tree_close(session, lsm_tree)); WT_TRET(__lsm_tree_discard(session, lsm_tree, 1)); } @@ -486,15 +486,17 @@ __lsm_tree_find(WT_SESSION_IMPL *session, * Make sure we win the race to switch on the * exclusive flag. */ - if (!WT_ATOMIC_CAS1(lsm_tree->exclusive, 0, 1)) + if (!__wt_atomic_cas8( + &lsm_tree->exclusive, 0, 1)) return (EBUSY); /* Make sure there are no readers */ - if (!WT_ATOMIC_CAS4(lsm_tree->refcnt, 0, 1)) { + if (!__wt_atomic_cas32( + &lsm_tree->refcnt, 0, 1)) { lsm_tree->exclusive = 0; return (EBUSY); } } else { - (void)WT_ATOMIC_ADD4(lsm_tree->refcnt, 1); + (void)__wt_atomic_add32(&lsm_tree->refcnt, 1); /* * We got a reference, check if an exclusive @@ -503,8 +505,8 @@ __lsm_tree_find(WT_SESSION_IMPL *session, if (lsm_tree->exclusive) { WT_ASSERT(session, lsm_tree->refcnt > 0); - (void)WT_ATOMIC_SUB4( - lsm_tree->refcnt, 1); + (void)__wt_atomic_sub32( + &lsm_tree->refcnt, 1); return (EBUSY); } } @@ -565,7 +567,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session, WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); /* Start the LSM manager thread if it isn't running. */ - if (WT_ATOMIC_CAS4(conn->lsm_manager.lsm_workers, 0, 1)) + if (__wt_atomic_cas32(&conn->lsm_manager.lsm_workers, 0, 1)) WT_RET(__wt_lsm_manager_start(session)); /* Make sure no one beat us to it. */ @@ -596,7 +598,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session, * with getting handles exclusive. */ lsm_tree->refcnt = 1; - lsm_tree->exclusive = (int8_t)exclusive; + lsm_tree->exclusive = exclusive ? 1 : 0; lsm_tree->queue_ref = 0; /* Set a flush timestamp as a baseline. */ @@ -644,7 +646,7 @@ __wt_lsm_tree_release(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_ASSERT(session, lsm_tree->refcnt > 0); if (lsm_tree->exclusive) lsm_tree->exclusive = 0; - (void)WT_ATOMIC_SUB4(lsm_tree->refcnt, 1); + (void)__wt_atomic_sub32(&lsm_tree->refcnt, 1); } /* How aggressively to ramp up or down throttle due to level 0 merging */ @@ -839,7 +841,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) /* Update the throttle time. */ __wt_lsm_tree_throttle(session, lsm_tree, 0); - new_id = WT_ATOMIC_ADD4(lsm_tree->last, 1); + new_id = __wt_atomic_add32(&lsm_tree->last, 1); WT_ERR(__wt_realloc_def(session, &lsm_tree->chunk_alloc, nchunks + 1, &lsm_tree->chunk)); @@ -1097,7 +1099,7 @@ __wt_lsm_tree_truncate( /* Create the new chunk. */ WT_ERR(__wt_calloc_one(session, &chunk)); - chunk->id = WT_ATOMIC_ADD4(lsm_tree->last, 1); + chunk->id = __wt_atomic_add32(&lsm_tree->last, 1); WT_ERR(__wt_lsm_tree_setup_chunk(session, lsm_tree, chunk)); /* Mark all chunks old. */ @@ -1207,7 +1209,8 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) WT_LSM_TREE *lsm_tree; time_t begin, end; uint64_t progress; - int i, compacting, flushing, locked, ref; + uint32_t i; + int compacting, flushing, locked, ref; compacting = flushing = locked = ref = 0; chunk = NULL; @@ -1282,7 +1285,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) * If we have a chunk, we want to look for it to be on-disk. * So we need to add a reference to keep it available. */ - (void)WT_ATOMIC_ADD4(chunk->refcnt, 1); + (void)__wt_atomic_add32(&chunk->refcnt, 1); ref = 1; } @@ -1330,7 +1333,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) "Start compacting progress %" PRIu64, name, chunk->id, lsm_tree->merge_progressing)); - (void)WT_ATOMIC_SUB4(chunk->refcnt, 1); + (void)__wt_atomic_sub32(&chunk->refcnt, 1); flushing = ref = 0; compacting = 1; F_SET(lsm_tree, WT_LSM_TREE_COMPACTING); @@ -1384,7 +1387,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) err: /* Ensure anything we set is cleared. */ if (ref) - (void)WT_ATOMIC_SUB4(chunk->refcnt, 1); + (void)__wt_atomic_sub32(&chunk->refcnt, 1); if (compacting) { F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING); lsm_tree->merge_aggressiveness = 0; diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c index c3bee162ea1..0c36c68e9f5 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c @@ -53,7 +53,7 @@ __lsm_copy_chunks(WT_SESSION_IMPL *session, * it's safe. */ for (i = 0; i < nchunks; i++) - (void)WT_ATOMIC_ADD4(cookie->chunk_array[i]->refcnt, 1); + (void)__wt_atomic_add32(&cookie->chunk_array[i]->refcnt, 1); err: WT_TRET(__wt_lsm_tree_readunlock(session, lsm_tree)); @@ -122,7 +122,7 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, force ? " w/ force" : "", i, lsm_tree->nchunks, chunk->uri)); - (void)WT_ATOMIC_ADD4(chunk->refcnt, 1); + (void)__wt_atomic_add32(&chunk->refcnt, 1); } err: WT_RET(__wt_lsm_tree_readunlock(session, lsm_tree)); @@ -145,7 +145,7 @@ __lsm_unpin_chunks(WT_SESSION_IMPL *session, WT_LSM_WORKER_COOKIE *cookie) if (cookie->chunk_array[i] == NULL) continue; WT_ASSERT(session, cookie->chunk_array[i]->refcnt > 0); - (void)WT_ATOMIC_SUB4(cookie->chunk_array[i]->refcnt, 1); + (void)__wt_atomic_sub32(&cookie->chunk_array[i]->refcnt, 1); } /* Ensure subsequent calls don't double decrement. */ cookie->nchunks = 0; @@ -223,7 +223,7 @@ __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) * See if we win the race to switch on the "busy" flag and * recheck that the chunk still needs a Bloom filter. */ - if (WT_ATOMIC_CAS4(chunk->bloom_busy, 0, 1)) { + if (__wt_atomic_cas32(&chunk->bloom_busy, 0, 1)) { if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) { ret = __lsm_bloom_create( session, lsm_tree, chunk, (u_int)i); @@ -528,7 +528,7 @@ __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) * Make sure only a single thread is freeing the old chunk array * at any time. */ - if (!WT_ATOMIC_CAS4(lsm_tree->freeing_old_chunks, 0, 1)) + if (!__wt_atomic_cas32(&lsm_tree->freeing_old_chunks, 0, 1)) return (0); /* * Take a copy of the current state of the LSM tree and look for chunks diff --git a/src/third_party/wiredtiger/src/lsm/lsm_worker.c b/src/third_party/wiredtiger/src/lsm/lsm_worker.c index 8ed4a117641..3add3155e17 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_worker.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_worker.c @@ -65,7 +65,7 @@ __lsm_worker_general_op( ret = __wt_lsm_checkpoint_chunk( session, entry->lsm_tree, chunk); WT_ASSERT(session, chunk->refcnt > 0); - (void)WT_ATOMIC_SUB4(chunk->refcnt, 1); + (void)__wt_atomic_sub32(&chunk->refcnt, 1); WT_ERR(ret); } } else if (entry->type == WT_LSM_WORK_DROP) diff --git a/src/third_party/wiredtiger/src/os_posix/os_alloc.c b/src/third_party/wiredtiger/src/os_posix/os_alloc.c index 4d04f9ac579..eb2482723ec 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_alloc.c +++ b/src/third_party/wiredtiger/src/os_posix/os_alloc.c @@ -58,7 +58,9 @@ __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp) WT_STAT_FAST_CONN_INCR(session, memory_allocation); if ((p = calloc(number, size)) == NULL) - WT_RET_MSG(session, __wt_errno(), "memory allocation"); + WT_RET_MSG(session, __wt_errno(), + "memory allocation of %" WT_SIZET_FMT " bytes failed", + size * number); *(void **)retp = p; return (0); @@ -100,7 +102,9 @@ __wt_realloc(WT_SESSION_IMPL *session, } if ((p = realloc(p, bytes_to_allocate)) == NULL) - WT_RET_MSG(session, __wt_errno(), "memory allocation"); + WT_RET_MSG(session, __wt_errno(), + "memory allocation of %" WT_SIZET_FMT " bytes failed", + bytes_to_allocate); /* * Clear the allocated memory -- an application might: allocate memory, @@ -171,7 +175,9 @@ __wt_realloc_aligned(WT_SESSION_IMPL *session, if ((ret = posix_memalign(&newp, S2C(session)->buffer_alignment, bytes_to_allocate)) != 0) - WT_RET_MSG(session, ret, "memory allocation"); + WT_RET_MSG(session, ret, + "memory allocation of %" WT_SIZET_FMT + " bytes failed", bytes_to_allocate); if (p != NULL) memcpy(newp, p, bytes_allocated); diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c index dfd72dd0cd2..baf9b475777 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c +++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c @@ -54,7 +54,7 @@ __wt_cond_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs) locked = 0; /* Fast path if already signalled. */ - if (WT_ATOMIC_ADD4(cond->waiters, 1) == 0) + if (__wt_atomic_addi32(&cond->waiters, 1) == 0) return (0); /* @@ -91,7 +91,7 @@ __wt_cond_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs) ret == ETIMEDOUT) ret = 0; - (void)WT_ATOMIC_SUB4(cond->waiters, 1); + (void)__wt_atomic_subi32(&cond->waiters, 1); err: if (locked) WT_TRET(pthread_mutex_unlock(&cond->mtx)); @@ -124,7 +124,7 @@ __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) if (cond->waiters == -1) return (0); - if (cond->waiters > 0 || !WT_ATOMIC_CAS4(cond->waiters, 0, -1)) { + if (cond->waiters > 0 || !__wt_atomic_casi32(&cond->waiters, 0, -1)) { WT_ERR(pthread_mutex_lock(&cond->mtx)); locked = 1; WT_ERR(pthread_cond_broadcast(&cond->cond)); diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c index df558b12bef..d47ab197643 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c +++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c @@ -165,7 +165,7 @@ __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) * incrementing the reader value to match it. */ new.s.readers = new.s.users = old.s.users + 1; - return (WT_ATOMIC_CAS8(l->u, old.u, new.u) ? 0 : EBUSY); + return (__wt_atomic_cas64(&l->u, old.u, new.u) ? 0 : EBUSY); } /* @@ -190,7 +190,7 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) * value will wrap and two lockers will simultaneously be granted the * lock. */ - ticket = WT_ATOMIC_FETCH_ADD2(l->s.users, 1); + ticket = __wt_atomic_fetch_add16(&l->s.users, 1); for (pause_cnt = 0; ticket != l->s.readers;) { /* * We failed to get the lock; pause before retrying and if we've @@ -234,7 +234,7 @@ __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) * Increment the writers value (other readers are doing the same, make * sure we don't race). */ - WT_ATOMIC_ADD2(l->s.writers, 1); + (void)__wt_atomic_add16(&l->s.writers, 1); return (0); } @@ -267,7 +267,7 @@ __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) /* The replacement lock value is a result of allocating a new ticket. */ ++new.s.users; - return (WT_ATOMIC_CAS8(l->u, old.u, new.u) ? 0 : EBUSY); + return (__wt_atomic_cas64(&l->u, old.u, new.u) ? 0 : EBUSY); } /* @@ -292,7 +292,7 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) * value will wrap and two lockers will simultaneously be granted the * lock. */ - ticket = WT_ATOMIC_FETCH_ADD2(l->s.users, 1); + ticket = __wt_atomic_fetch_add16(&l->s.users, 1); for (pause_cnt = 0; ticket != l->s.writers;) { /* * We failed to get the lock; pause before retrying and if we've diff --git a/src/third_party/wiredtiger/src/os_posix/os_open.c b/src/third_party/wiredtiger/src/os_posix/os_open.c index 619a4afe4b6..8622bb5b4ca 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_open.c +++ b/src/third_party/wiredtiger/src/os_posix/os_open.c @@ -177,7 +177,7 @@ setupfh: } if (!matched) { WT_CONN_FILE_INSERT(conn, fh, bucket); - (void)WT_ATOMIC_ADD4(conn->open_file_count, 1); + (void)__wt_atomic_add32(&conn->open_file_count, 1); *fhp = fh; } __wt_spin_unlock(session, &conn->fh_lock); @@ -222,7 +222,7 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) /* Remove from the list. */ bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; WT_CONN_FILE_REMOVE(conn, fh, bucket); - (void)WT_ATOMIC_SUB4(conn->open_file_count, 1); + (void)__wt_atomic_sub32(&conn->open_file_count, 1); __wt_spin_unlock(session, &conn->fh_lock); diff --git a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c index 51f6d6533c8..565928cb863 100644 --- a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c +++ b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c @@ -51,7 +51,7 @@ __wt_cond_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs) locked = 0; /* Fast path if already signalled. */ - if (WT_ATOMIC_ADD4(cond->waiters, 1) == 0) + if (__wt_atomic_addi32(&cond->waiters, 1) == 0) return (0); /* @@ -97,7 +97,7 @@ __wt_cond_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs) } } - (void)WT_ATOMIC_SUB4(cond->waiters, 1); + (void)__wt_atomic_subi32(&cond->waiters, 1); if (locked) LeaveCriticalSection(&cond->mtx); @@ -130,7 +130,7 @@ __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) if (cond->waiters == -1) return (0); - if (cond->waiters > 0 || !WT_ATOMIC_CAS4(cond->waiters, 0, -1)) { + if (cond->waiters > 0 || !__wt_atomic_casi32(&cond->waiters, 0, -1)) { EnterCriticalSection(&cond->mtx); locked = 1; WakeAllConditionVariable(&cond->cond); diff --git a/src/third_party/wiredtiger/src/os_win/os_open.c b/src/third_party/wiredtiger/src/os_win/os_open.c index 1c6f5636501..3bd24369242 100644 --- a/src/third_party/wiredtiger/src/os_win/os_open.c +++ b/src/third_party/wiredtiger/src/os_win/os_open.c @@ -169,7 +169,7 @@ setupfh: } if (!matched) { WT_CONN_FILE_INSERT(conn, fh, bucket); - (void)WT_ATOMIC_ADD4(conn->open_file_count, 1); + (void)__wt_atomic_add32(&conn->open_file_count, 1); *fhp = fh; } @@ -217,7 +217,7 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) /* Remove from the list. */ bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; WT_CONN_FILE_REMOVE(conn, fh, bucket); - (void)WT_ATOMIC_SUB4(conn->open_file_count, 1); + (void)__wt_atomic_sub32(&conn->open_file_count, 1); __wt_spin_unlock(session, &conn->fh_lock); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 37acb28a00b..6b0ca54065e 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -277,7 +277,7 @@ typedef struct { WT_SALVAGE_COOKIE *salvage; /* If it's a salvage operation */ - int tested_ref_state; /* Debugging information */ + uint32_t tested_ref_state; /* Debugging information */ } WT_RECONCILE; static void __rec_bnd_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *, int); @@ -1104,8 +1104,8 @@ __rec_child_modify(WT_SESSION_IMPL *session, * to see if the delete is visible to us. Lock down the * structure. */ - if (!WT_ATOMIC_CAS4( - ref->state, WT_REF_DELETED, WT_REF_LOCKED)) + if (!__wt_atomic_casv32( + &ref->state, WT_REF_DELETED, WT_REF_LOCKED)) break; ret = __rec_child_deleted(session, r, ref, statep); WT_PUBLISH(ref->state, WT_REF_DELETED); @@ -5108,7 +5108,7 @@ err: __wt_scr_free(session, &tkey); * write generation changed, the page has been written since * we started reconciliation and remains dirty. */ - if (WT_ATOMIC_CAS4(mod->write_gen, r->orig_write_gen, 0)) + if (__wt_atomic_cas32(&mod->write_gen, r->orig_write_gen, 0)) __wt_cache_dirty_decr(session, page); } diff --git a/src/third_party/wiredtiger/src/schema/schema_stat.c b/src/third_party/wiredtiger/src/schema/schema_stat.c index dea797f823d..e9439abe16f 100644 --- a/src/third_party/wiredtiger/src/schema/schema_stat.c +++ b/src/third_party/wiredtiger/src/schema/schema_stat.c @@ -90,7 +90,7 @@ __wt_curstat_table_init(WT_SESSION_IMPL *session, if (i == 0) *stats = *new; else - __wt_stat_aggregate_dsrc_stats(new, stats); + __wt_stat_dsrc_aggregate_single(new, stats); WT_ERR(stat_cursor->close(stat_cursor)); } @@ -102,7 +102,7 @@ __wt_curstat_table_init(WT_SESSION_IMPL *session, WT_ERR(__wt_curstat_open( session, buf->data, cfg, &stat_cursor)); new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor); - __wt_stat_aggregate_dsrc_stats(new, stats); + __wt_stat_dsrc_aggregate_single(new, stats); WT_ERR(stat_cursor->close(stat_cursor)); } diff --git a/src/third_party/wiredtiger/src/session/session_dhandle.c b/src/third_party/wiredtiger/src/session/session_dhandle.c index 9fc9fd1d81d..66b669dde22 100644 --- a/src/third_party/wiredtiger/src/session/session_dhandle.c +++ b/src/third_party/wiredtiger/src/session/session_dhandle.c @@ -49,7 +49,7 @@ __session_discard_dhandle( TAILQ_REMOVE(&session->dhandles, dhandle_cache, q); TAILQ_REMOVE(&session->dhhash[bucket], dhandle_cache, hashq); - (void)WT_ATOMIC_SUB4(dhandle_cache->dhandle->session_ref, 1); + (void)__wt_atomic_sub32(&dhandle_cache->dhandle->session_ref, 1); __wt_overwrite_and_free(session, dhandle_cache); } @@ -393,7 +393,7 @@ __session_find_shared_dhandle( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) { WT_RET(__wt_conn_dhandle_find(session, uri, checkpoint)); - (void)WT_ATOMIC_ADD4(session->dhandle->session_ref, 1); + (void)__wt_atomic_add32(&session->dhandle->session_ref, 1); return (0); } diff --git a/src/third_party/wiredtiger/src/support/rand.c b/src/third_party/wiredtiger/src/support/rand.c index caac04d3529..f5ecb12633e 100644 --- a/src/third_party/wiredtiger/src/support/rand.c +++ b/src/third_party/wiredtiger/src/support/rand.c @@ -84,8 +84,11 @@ __wt_random(WT_RAND_STATE volatile * rnd_state) * to initialize the state, or initializes with a seed that results in a * short period. */ - if (z == 0 || w == 0) - __wt_random_init(rnd_state); + if (z == 0 || w == 0) { + __wt_random_init(&rnd); + w = M_W(rnd); + z = M_Z(rnd); + } M_Z(rnd) = z = 36969 * (z & 65535) + (z >> 16); M_W(rnd) = w = 18000 * (w & 65535) + (w >> 16); diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index 2f638790060..6af357202be 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -2,679 +2,972 @@ #include "wt_internal.h" +static const char * const __stats_dsrc_desc[] = { + "block-manager: file allocation unit size", + "block-manager: blocks allocated", + "block-manager: checkpoint size", + "block-manager: allocations requiring file extension", + "block-manager: blocks freed", + "block-manager: file magic number", + "block-manager: file major version number", + "block-manager: minor version number", + "block-manager: file bytes available for reuse", + "block-manager: file size in bytes", + "LSM: bloom filters in the LSM tree", + "LSM: bloom filter false positives", + "LSM: bloom filter hits", + "LSM: bloom filter misses", + "LSM: bloom filter pages evicted from cache", + "LSM: bloom filter pages read into cache", + "LSM: total size of bloom filters", + "btree: btree checkpoint generation", + "btree: column-store variable-size deleted values", + "btree: column-store fixed-size leaf pages", + "btree: column-store internal pages", + "btree: column-store variable-size leaf pages", + "btree: pages rewritten by compaction", + "btree: number of key/value pairs", + "btree: fixed-record size", + "btree: maximum tree depth", + "btree: maximum internal page key size", + "btree: maximum internal page size", + "btree: maximum leaf page key size", + "btree: maximum leaf page size", + "btree: maximum leaf page value size", + "btree: overflow pages", + "btree: row-store internal pages", + "btree: row-store leaf pages", + "cache: bytes read into cache", + "cache: bytes written from cache", + "cache: checkpoint blocked page eviction", + "cache: unmodified pages evicted", + "cache: page split during eviction deepened the tree", + "cache: modified pages evicted", + "cache: data source pages selected for eviction unable to be evicted", + "cache: hazard pointer blocked page eviction", + "cache: internal pages evicted", + "cache: pages split during eviction", + "cache: in-memory page splits", + "cache: overflow values cached in memory", + "cache: pages read into cache", + "cache: overflow pages read into cache", + "cache: pages written from cache", + "compression: raw compression call failed, no additional data available", + "compression: raw compression call failed, additional data available", + "compression: raw compression call succeeded", + "compression: compressed pages read", + "compression: compressed pages written", + "compression: page written failed to compress", + "compression: page written was too small to compress", + "cursor: create calls", + "cursor: insert calls", + "cursor: bulk-loaded cursor-insert calls", + "cursor: cursor-insert key and value bytes inserted", + "cursor: next calls", + "cursor: prev calls", + "cursor: remove calls", + "cursor: cursor-remove key bytes removed", + "cursor: reset calls", + "cursor: restarted searches", + "cursor: search calls", + "cursor: search near calls", + "cursor: update calls", + "cursor: cursor-update value bytes updated", + "LSM: sleep for LSM checkpoint throttle", + "LSM: chunks in the LSM tree", + "LSM: highest merge generation in the LSM tree", + "LSM: queries that could have benefited from a Bloom filter that did not exist", + "LSM: sleep for LSM merge throttle", + "reconciliation: dictionary matches", + "reconciliation: internal page multi-block writes", + "reconciliation: leaf page multi-block writes", + "reconciliation: maximum blocks required for a page", + "reconciliation: internal-page overflow keys", + "reconciliation: leaf-page overflow keys", + "reconciliation: overflow values written", + "reconciliation: pages deleted", + "reconciliation: page checksum matches", + "reconciliation: page reconciliation calls", + "reconciliation: page reconciliation calls for eviction", + "reconciliation: leaf page key bytes discarded using prefix compression", + "reconciliation: internal page key bytes discarded using suffix compression", + "session: object compaction", + "session: open cursor count", + "transaction: update conflicts", +}; + +const char * +__wt_stat_dsrc_desc(int slot) +{ + return (__stats_dsrc_desc[slot]); +} + void -__wt_stat_init_dsrc_stats(WT_DSRC_STATS *stats) +__wt_stat_dsrc_init_single(WT_DSRC_STATS *stats) { - /* Clear, so can also be called for reinitialization. */ memset(stats, 0, sizeof(*stats)); +} + +void +__wt_stat_dsrc_init(WT_DATA_HANDLE *handle) +{ + int i; - stats->block_extension.desc = - "block-manager: allocations requiring file extension"; - stats->block_alloc.desc = "block-manager: blocks allocated"; - stats->block_free.desc = "block-manager: blocks freed"; - stats->block_checkpoint_size.desc = "block-manager: checkpoint size"; - stats->allocation_size.desc = - "block-manager: file allocation unit size"; - stats->block_reuse_bytes.desc = - "block-manager: file bytes available for reuse"; - stats->block_magic.desc = "block-manager: file magic number"; - stats->block_major.desc = "block-manager: file major version number"; - stats->block_size.desc = "block-manager: file size in bytes"; - stats->block_minor.desc = "block-manager: minor version number"; - stats->btree_checkpoint_generation.desc = - "btree: btree checkpoint generation"; - stats->btree_column_fix.desc = - "btree: column-store fixed-size leaf pages"; - stats->btree_column_internal.desc = - "btree: column-store internal pages"; - stats->btree_column_deleted.desc = - "btree: column-store variable-size deleted values"; - stats->btree_column_variable.desc = - "btree: column-store variable-size leaf pages"; - stats->btree_fixed_len.desc = "btree: fixed-record size"; - stats->btree_maxintlkey.desc = "btree: maximum internal page key size"; - stats->btree_maxintlpage.desc = "btree: maximum internal page size"; - stats->btree_maxleafkey.desc = "btree: maximum leaf page key size"; - stats->btree_maxleafpage.desc = "btree: maximum leaf page size"; - stats->btree_maxleafvalue.desc = "btree: maximum leaf page value size"; - stats->btree_maximum_depth.desc = "btree: maximum tree depth"; - stats->btree_entries.desc = "btree: number of key/value pairs"; - stats->btree_overflow.desc = "btree: overflow pages"; - stats->btree_compact_rewrite.desc = - "btree: pages rewritten by compaction"; - stats->btree_row_internal.desc = "btree: row-store internal pages"; - stats->btree_row_leaf.desc = "btree: row-store leaf pages"; - stats->cache_bytes_read.desc = "cache: bytes read into cache"; - stats->cache_bytes_write.desc = "cache: bytes written from cache"; - stats->cache_eviction_checkpoint.desc = - "cache: checkpoint blocked page eviction"; - stats->cache_eviction_fail.desc = - "cache: data source pages selected for eviction unable to be evicted"; - stats->cache_eviction_hazard.desc = - "cache: hazard pointer blocked page eviction"; - stats->cache_inmem_split.desc = "cache: in-memory page splits"; - stats->cache_eviction_internal.desc = "cache: internal pages evicted"; - stats->cache_eviction_dirty.desc = "cache: modified pages evicted"; - stats->cache_read_overflow.desc = - "cache: overflow pages read into cache"; - stats->cache_overflow_value.desc = - "cache: overflow values cached in memory"; - stats->cache_eviction_deepen.desc = - "cache: page split during eviction deepened the tree"; - stats->cache_read.desc = "cache: pages read into cache"; - stats->cache_eviction_split.desc = - "cache: pages split during eviction"; - stats->cache_write.desc = "cache: pages written from cache"; - stats->cache_eviction_clean.desc = "cache: unmodified pages evicted"; - stats->compress_read.desc = "compression: compressed pages read"; - stats->compress_write.desc = "compression: compressed pages written"; - stats->compress_write_fail.desc = - "compression: page written failed to compress"; - stats->compress_write_too_small.desc = - "compression: page written was too small to compress"; - stats->compress_raw_fail_temporary.desc = - "compression: raw compression call failed, additional data available"; - stats->compress_raw_fail.desc = - "compression: raw compression call failed, no additional data available"; - stats->compress_raw_ok.desc = - "compression: raw compression call succeeded"; - stats->cursor_insert_bulk.desc = - "cursor: bulk-loaded cursor-insert calls"; - stats->cursor_create.desc = "cursor: create calls"; - stats->cursor_insert_bytes.desc = - "cursor: cursor-insert key and value bytes inserted"; - stats->cursor_remove_bytes.desc = - "cursor: cursor-remove key bytes removed"; - stats->cursor_update_bytes.desc = - "cursor: cursor-update value bytes updated"; - stats->cursor_insert.desc = "cursor: insert calls"; - stats->cursor_next.desc = "cursor: next calls"; - stats->cursor_prev.desc = "cursor: prev calls"; - stats->cursor_remove.desc = "cursor: remove calls"; - stats->cursor_reset.desc = "cursor: reset calls"; - stats->cursor_search.desc = "cursor: search calls"; - stats->cursor_search_near.desc = "cursor: search near calls"; - stats->cursor_update.desc = "cursor: update calls"; - stats->bloom_false_positive.desc = "LSM: bloom filter false positives"; - stats->bloom_hit.desc = "LSM: bloom filter hits"; - stats->bloom_miss.desc = "LSM: bloom filter misses"; - stats->bloom_page_evict.desc = - "LSM: bloom filter pages evicted from cache"; - stats->bloom_page_read.desc = - "LSM: bloom filter pages read into cache"; - stats->bloom_count.desc = "LSM: bloom filters in the LSM tree"; - stats->lsm_chunk_count.desc = "LSM: chunks in the LSM tree"; - stats->lsm_generation_max.desc = - "LSM: highest merge generation in the LSM tree"; - stats->lsm_lookup_no_bloom.desc = - "LSM: queries that could have benefited from a Bloom filter that did not exist"; - stats->lsm_checkpoint_throttle.desc = - "LSM: sleep for LSM checkpoint throttle"; - stats->lsm_merge_throttle.desc = "LSM: sleep for LSM merge throttle"; - stats->bloom_size.desc = "LSM: total size of bloom filters"; - stats->rec_dictionary.desc = "reconciliation: dictionary matches"; - stats->rec_suffix_compression.desc = - "reconciliation: internal page key bytes discarded using suffix compression"; - stats->rec_multiblock_internal.desc = - "reconciliation: internal page multi-block writes"; - stats->rec_overflow_key_internal.desc = - "reconciliation: internal-page overflow keys"; - stats->rec_prefix_compression.desc = - "reconciliation: leaf page key bytes discarded using prefix compression"; - stats->rec_multiblock_leaf.desc = - "reconciliation: leaf page multi-block writes"; - stats->rec_overflow_key_leaf.desc = - "reconciliation: leaf-page overflow keys"; - stats->rec_multiblock_max.desc = - "reconciliation: maximum blocks required for a page"; - stats->rec_overflow_value.desc = - "reconciliation: overflow values written"; - stats->rec_page_match.desc = "reconciliation: page checksum matches"; - stats->rec_pages.desc = "reconciliation: page reconciliation calls"; - stats->rec_pages_eviction.desc = - "reconciliation: page reconciliation calls for eviction"; - stats->rec_page_delete.desc = "reconciliation: pages deleted"; - stats->session_compact.desc = "session: object compaction"; - stats->session_cursor_open.desc = "session: open cursor count"; - stats->txn_update_conflict.desc = "transaction: update conflicts"; + for (i = 0; i < WT_COUNTER_SLOTS; ++i) { + handle->stats[i] = &handle->stat_array[i]; + __wt_stat_dsrc_init_single(handle->stats[i]); + } } void -__wt_stat_refresh_dsrc_stats(void *stats_arg) +__wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) { - WT_DSRC_STATS *stats; + stats->block_extension = 0; + stats->block_alloc = 0; + stats->block_free = 0; + stats->block_checkpoint_size = 0; + stats->allocation_size = 0; + stats->block_reuse_bytes = 0; + stats->block_magic = 0; + stats->block_major = 0; + stats->block_size = 0; + stats->block_minor = 0; + /* not clearing btree_checkpoint_generation */ + stats->btree_column_fix = 0; + stats->btree_column_internal = 0; + stats->btree_column_deleted = 0; + stats->btree_column_variable = 0; + stats->btree_fixed_len = 0; + stats->btree_maxintlkey = 0; + stats->btree_maxintlpage = 0; + stats->btree_maxleafkey = 0; + stats->btree_maxleafpage = 0; + stats->btree_maxleafvalue = 0; + stats->btree_maximum_depth = 0; + stats->btree_entries = 0; + stats->btree_overflow = 0; + stats->btree_compact_rewrite = 0; + stats->btree_row_internal = 0; + stats->btree_row_leaf = 0; + stats->cache_bytes_read = 0; + stats->cache_bytes_write = 0; + stats->cache_eviction_checkpoint = 0; + stats->cache_eviction_fail = 0; + stats->cache_eviction_hazard = 0; + stats->cache_inmem_split = 0; + stats->cache_eviction_internal = 0; + stats->cache_eviction_dirty = 0; + stats->cache_read_overflow = 0; + stats->cache_overflow_value = 0; + stats->cache_eviction_deepen = 0; + stats->cache_read = 0; + stats->cache_eviction_split = 0; + stats->cache_write = 0; + stats->cache_eviction_clean = 0; + stats->compress_read = 0; + stats->compress_write = 0; + stats->compress_write_fail = 0; + stats->compress_write_too_small = 0; + stats->compress_raw_fail_temporary = 0; + stats->compress_raw_fail = 0; + stats->compress_raw_ok = 0; + stats->cursor_insert_bulk = 0; + stats->cursor_create = 0; + stats->cursor_insert_bytes = 0; + stats->cursor_remove_bytes = 0; + stats->cursor_update_bytes = 0; + stats->cursor_insert = 0; + stats->cursor_next = 0; + stats->cursor_prev = 0; + stats->cursor_remove = 0; + stats->cursor_reset = 0; + stats->cursor_restart = 0; + stats->cursor_search = 0; + stats->cursor_search_near = 0; + stats->cursor_update = 0; + stats->bloom_false_positive = 0; + stats->bloom_hit = 0; + stats->bloom_miss = 0; + stats->bloom_page_evict = 0; + stats->bloom_page_read = 0; + stats->bloom_count = 0; + stats->lsm_chunk_count = 0; + stats->lsm_generation_max = 0; + stats->lsm_lookup_no_bloom = 0; + stats->lsm_checkpoint_throttle = 0; + stats->lsm_merge_throttle = 0; + stats->bloom_size = 0; + stats->rec_dictionary = 0; + stats->rec_suffix_compression = 0; + stats->rec_multiblock_internal = 0; + stats->rec_overflow_key_internal = 0; + stats->rec_prefix_compression = 0; + stats->rec_multiblock_leaf = 0; + stats->rec_overflow_key_leaf = 0; + stats->rec_multiblock_max = 0; + stats->rec_overflow_value = 0; + stats->rec_page_match = 0; + stats->rec_pages = 0; + stats->rec_pages_eviction = 0; + stats->rec_page_delete = 0; + stats->session_compact = 0; + /* not clearing session_cursor_open */ + stats->txn_update_conflict = 0; +} + +void +__wt_stat_dsrc_clear_all(WT_DSRC_STATS **stats) +{ + u_int i; - stats = (WT_DSRC_STATS *)stats_arg; - stats->block_extension.v = 0; - stats->block_alloc.v = 0; - stats->block_free.v = 0; - stats->block_checkpoint_size.v = 0; - stats->allocation_size.v = 0; - stats->block_reuse_bytes.v = 0; - stats->block_magic.v = 0; - stats->block_major.v = 0; - stats->block_size.v = 0; - stats->block_minor.v = 0; - stats->btree_column_fix.v = 0; - stats->btree_column_internal.v = 0; - stats->btree_column_deleted.v = 0; - stats->btree_column_variable.v = 0; - stats->btree_fixed_len.v = 0; - stats->btree_maxintlkey.v = 0; - stats->btree_maxintlpage.v = 0; - stats->btree_maxleafkey.v = 0; - stats->btree_maxleafpage.v = 0; - stats->btree_maxleafvalue.v = 0; - stats->btree_maximum_depth.v = 0; - stats->btree_entries.v = 0; - stats->btree_overflow.v = 0; - stats->btree_compact_rewrite.v = 0; - stats->btree_row_internal.v = 0; - stats->btree_row_leaf.v = 0; - stats->cache_bytes_read.v = 0; - stats->cache_bytes_write.v = 0; - stats->cache_eviction_checkpoint.v = 0; - stats->cache_eviction_fail.v = 0; - stats->cache_eviction_hazard.v = 0; - stats->cache_inmem_split.v = 0; - stats->cache_eviction_internal.v = 0; - stats->cache_eviction_dirty.v = 0; - stats->cache_read_overflow.v = 0; - stats->cache_overflow_value.v = 0; - stats->cache_eviction_deepen.v = 0; - stats->cache_read.v = 0; - stats->cache_eviction_split.v = 0; - stats->cache_write.v = 0; - stats->cache_eviction_clean.v = 0; - stats->compress_read.v = 0; - stats->compress_write.v = 0; - stats->compress_write_fail.v = 0; - stats->compress_write_too_small.v = 0; - stats->compress_raw_fail_temporary.v = 0; - stats->compress_raw_fail.v = 0; - stats->compress_raw_ok.v = 0; - stats->cursor_insert_bulk.v = 0; - stats->cursor_create.v = 0; - stats->cursor_insert_bytes.v = 0; - stats->cursor_remove_bytes.v = 0; - stats->cursor_update_bytes.v = 0; - stats->cursor_insert.v = 0; - stats->cursor_next.v = 0; - stats->cursor_prev.v = 0; - stats->cursor_remove.v = 0; - stats->cursor_reset.v = 0; - stats->cursor_search.v = 0; - stats->cursor_search_near.v = 0; - stats->cursor_update.v = 0; - stats->bloom_false_positive.v = 0; - stats->bloom_hit.v = 0; - stats->bloom_miss.v = 0; - stats->bloom_page_evict.v = 0; - stats->bloom_page_read.v = 0; - stats->bloom_count.v = 0; - stats->lsm_chunk_count.v = 0; - stats->lsm_generation_max.v = 0; - stats->lsm_lookup_no_bloom.v = 0; - stats->lsm_checkpoint_throttle.v = 0; - stats->lsm_merge_throttle.v = 0; - stats->bloom_size.v = 0; - stats->rec_dictionary.v = 0; - stats->rec_suffix_compression.v = 0; - stats->rec_multiblock_internal.v = 0; - stats->rec_overflow_key_internal.v = 0; - stats->rec_prefix_compression.v = 0; - stats->rec_multiblock_leaf.v = 0; - stats->rec_overflow_key_leaf.v = 0; - stats->rec_multiblock_max.v = 0; - stats->rec_overflow_value.v = 0; - stats->rec_page_match.v = 0; - stats->rec_pages.v = 0; - stats->rec_pages_eviction.v = 0; - stats->rec_page_delete.v = 0; - stats->session_compact.v = 0; - stats->txn_update_conflict.v = 0; + for (i = 0; i < WT_COUNTER_SLOTS; ++i) + __wt_stat_dsrc_clear_single(stats[i]); +} + +void +__wt_stat_dsrc_aggregate_single( + WT_DSRC_STATS *from, WT_DSRC_STATS *to) +{ + to->block_extension += from->block_extension; + to->block_alloc += from->block_alloc; + to->block_free += from->block_free; + to->block_checkpoint_size += from->block_checkpoint_size; + to->allocation_size = from->allocation_size; + to->block_reuse_bytes += from->block_reuse_bytes; + to->block_magic = from->block_magic; + to->block_major = from->block_major; + to->block_size += from->block_size; + to->block_minor = from->block_minor; + to->btree_checkpoint_generation += from->btree_checkpoint_generation; + to->btree_column_fix += from->btree_column_fix; + to->btree_column_internal += from->btree_column_internal; + to->btree_column_deleted += from->btree_column_deleted; + to->btree_column_variable += from->btree_column_variable; + to->btree_fixed_len = from->btree_fixed_len; + if (from->btree_maxintlkey > to->btree_maxintlkey) + to->btree_maxintlkey = from->btree_maxintlkey; + if (from->btree_maxintlpage > to->btree_maxintlpage) + to->btree_maxintlpage = from->btree_maxintlpage; + if (from->btree_maxleafkey > to->btree_maxleafkey) + to->btree_maxleafkey = from->btree_maxleafkey; + if (from->btree_maxleafpage > to->btree_maxleafpage) + to->btree_maxleafpage = from->btree_maxleafpage; + if (from->btree_maxleafvalue > to->btree_maxleafvalue) + to->btree_maxleafvalue = from->btree_maxleafvalue; + if (from->btree_maximum_depth > to->btree_maximum_depth) + to->btree_maximum_depth = from->btree_maximum_depth; + to->btree_entries += from->btree_entries; + to->btree_overflow += from->btree_overflow; + to->btree_compact_rewrite += from->btree_compact_rewrite; + to->btree_row_internal += from->btree_row_internal; + to->btree_row_leaf += from->btree_row_leaf; + to->cache_bytes_read += from->cache_bytes_read; + to->cache_bytes_write += from->cache_bytes_write; + to->cache_eviction_checkpoint += from->cache_eviction_checkpoint; + to->cache_eviction_fail += from->cache_eviction_fail; + to->cache_eviction_hazard += from->cache_eviction_hazard; + to->cache_inmem_split += from->cache_inmem_split; + to->cache_eviction_internal += from->cache_eviction_internal; + to->cache_eviction_dirty += from->cache_eviction_dirty; + to->cache_read_overflow += from->cache_read_overflow; + to->cache_overflow_value += from->cache_overflow_value; + to->cache_eviction_deepen += from->cache_eviction_deepen; + to->cache_read += from->cache_read; + to->cache_eviction_split += from->cache_eviction_split; + to->cache_write += from->cache_write; + to->cache_eviction_clean += from->cache_eviction_clean; + to->compress_read += from->compress_read; + to->compress_write += from->compress_write; + to->compress_write_fail += from->compress_write_fail; + to->compress_write_too_small += from->compress_write_too_small; + to->compress_raw_fail_temporary += from->compress_raw_fail_temporary; + to->compress_raw_fail += from->compress_raw_fail; + to->compress_raw_ok += from->compress_raw_ok; + to->cursor_insert_bulk += from->cursor_insert_bulk; + to->cursor_create += from->cursor_create; + to->cursor_insert_bytes += from->cursor_insert_bytes; + to->cursor_remove_bytes += from->cursor_remove_bytes; + to->cursor_update_bytes += from->cursor_update_bytes; + to->cursor_insert += from->cursor_insert; + to->cursor_next += from->cursor_next; + to->cursor_prev += from->cursor_prev; + to->cursor_remove += from->cursor_remove; + to->cursor_reset += from->cursor_reset; + to->cursor_restart += from->cursor_restart; + to->cursor_search += from->cursor_search; + to->cursor_search_near += from->cursor_search_near; + to->cursor_update += from->cursor_update; + to->bloom_false_positive += from->bloom_false_positive; + to->bloom_hit += from->bloom_hit; + to->bloom_miss += from->bloom_miss; + to->bloom_page_evict += from->bloom_page_evict; + to->bloom_page_read += from->bloom_page_read; + to->bloom_count += from->bloom_count; + to->lsm_chunk_count += from->lsm_chunk_count; + if (from->lsm_generation_max > to->lsm_generation_max) + to->lsm_generation_max = from->lsm_generation_max; + to->lsm_lookup_no_bloom += from->lsm_lookup_no_bloom; + to->lsm_checkpoint_throttle += from->lsm_checkpoint_throttle; + to->lsm_merge_throttle += from->lsm_merge_throttle; + to->bloom_size += from->bloom_size; + to->rec_dictionary += from->rec_dictionary; + to->rec_suffix_compression += from->rec_suffix_compression; + to->rec_multiblock_internal += from->rec_multiblock_internal; + to->rec_overflow_key_internal += from->rec_overflow_key_internal; + to->rec_prefix_compression += from->rec_prefix_compression; + to->rec_multiblock_leaf += from->rec_multiblock_leaf; + to->rec_overflow_key_leaf += from->rec_overflow_key_leaf; + if (from->rec_multiblock_max > to->rec_multiblock_max) + to->rec_multiblock_max = from->rec_multiblock_max; + to->rec_overflow_value += from->rec_overflow_value; + to->rec_page_match += from->rec_page_match; + to->rec_pages += from->rec_pages; + to->rec_pages_eviction += from->rec_pages_eviction; + to->rec_page_delete += from->rec_page_delete; + to->session_compact += from->session_compact; + to->session_cursor_open += from->session_cursor_open; + to->txn_update_conflict += from->txn_update_conflict; } void -__wt_stat_aggregate_dsrc_stats(const void *child, const void *parent) +__wt_stat_dsrc_aggregate( + WT_DSRC_STATS **from, WT_DSRC_STATS *to) { - WT_DSRC_STATS *c, *p; + int64_t v; - c = (WT_DSRC_STATS *)child; - p = (WT_DSRC_STATS *)parent; - p->block_extension.v += c->block_extension.v; - p->block_alloc.v += c->block_alloc.v; - p->block_free.v += c->block_free.v; - p->block_checkpoint_size.v += c->block_checkpoint_size.v; - p->block_reuse_bytes.v += c->block_reuse_bytes.v; - p->block_size.v += c->block_size.v; - p->btree_checkpoint_generation.v += c->btree_checkpoint_generation.v; - p->btree_column_fix.v += c->btree_column_fix.v; - p->btree_column_internal.v += c->btree_column_internal.v; - p->btree_column_deleted.v += c->btree_column_deleted.v; - p->btree_column_variable.v += c->btree_column_variable.v; - if (c->btree_maxintlkey.v > p->btree_maxintlkey.v) - p->btree_maxintlkey.v = c->btree_maxintlkey.v; - if (c->btree_maxintlpage.v > p->btree_maxintlpage.v) - p->btree_maxintlpage.v = c->btree_maxintlpage.v; - if (c->btree_maxleafkey.v > p->btree_maxleafkey.v) - p->btree_maxleafkey.v = c->btree_maxleafkey.v; - if (c->btree_maxleafpage.v > p->btree_maxleafpage.v) - p->btree_maxleafpage.v = c->btree_maxleafpage.v; - if (c->btree_maxleafvalue.v > p->btree_maxleafvalue.v) - p->btree_maxleafvalue.v = c->btree_maxleafvalue.v; - if (c->btree_maximum_depth.v > p->btree_maximum_depth.v) - p->btree_maximum_depth.v = c->btree_maximum_depth.v; - p->btree_entries.v += c->btree_entries.v; - p->btree_overflow.v += c->btree_overflow.v; - p->btree_compact_rewrite.v += c->btree_compact_rewrite.v; - p->btree_row_internal.v += c->btree_row_internal.v; - p->btree_row_leaf.v += c->btree_row_leaf.v; - p->cache_bytes_read.v += c->cache_bytes_read.v; - p->cache_bytes_write.v += c->cache_bytes_write.v; - p->cache_eviction_checkpoint.v += c->cache_eviction_checkpoint.v; - p->cache_eviction_fail.v += c->cache_eviction_fail.v; - p->cache_eviction_hazard.v += c->cache_eviction_hazard.v; - p->cache_inmem_split.v += c->cache_inmem_split.v; - p->cache_eviction_internal.v += c->cache_eviction_internal.v; - p->cache_eviction_dirty.v += c->cache_eviction_dirty.v; - p->cache_read_overflow.v += c->cache_read_overflow.v; - p->cache_overflow_value.v += c->cache_overflow_value.v; - p->cache_eviction_deepen.v += c->cache_eviction_deepen.v; - p->cache_read.v += c->cache_read.v; - p->cache_eviction_split.v += c->cache_eviction_split.v; - p->cache_write.v += c->cache_write.v; - p->cache_eviction_clean.v += c->cache_eviction_clean.v; - p->compress_read.v += c->compress_read.v; - p->compress_write.v += c->compress_write.v; - p->compress_write_fail.v += c->compress_write_fail.v; - p->compress_write_too_small.v += c->compress_write_too_small.v; - p->compress_raw_fail_temporary.v += c->compress_raw_fail_temporary.v; - p->compress_raw_fail.v += c->compress_raw_fail.v; - p->compress_raw_ok.v += c->compress_raw_ok.v; - p->cursor_insert_bulk.v += c->cursor_insert_bulk.v; - p->cursor_create.v += c->cursor_create.v; - p->cursor_insert_bytes.v += c->cursor_insert_bytes.v; - p->cursor_remove_bytes.v += c->cursor_remove_bytes.v; - p->cursor_update_bytes.v += c->cursor_update_bytes.v; - p->cursor_insert.v += c->cursor_insert.v; - p->cursor_next.v += c->cursor_next.v; - p->cursor_prev.v += c->cursor_prev.v; - p->cursor_remove.v += c->cursor_remove.v; - p->cursor_reset.v += c->cursor_reset.v; - p->cursor_search.v += c->cursor_search.v; - p->cursor_search_near.v += c->cursor_search_near.v; - p->cursor_update.v += c->cursor_update.v; - p->bloom_false_positive.v += c->bloom_false_positive.v; - p->bloom_hit.v += c->bloom_hit.v; - p->bloom_miss.v += c->bloom_miss.v; - p->bloom_page_evict.v += c->bloom_page_evict.v; - p->bloom_page_read.v += c->bloom_page_read.v; - p->bloom_count.v += c->bloom_count.v; - p->lsm_chunk_count.v += c->lsm_chunk_count.v; - if (c->lsm_generation_max.v > p->lsm_generation_max.v) - p->lsm_generation_max.v = c->lsm_generation_max.v; - p->lsm_lookup_no_bloom.v += c->lsm_lookup_no_bloom.v; - p->lsm_checkpoint_throttle.v += c->lsm_checkpoint_throttle.v; - p->lsm_merge_throttle.v += c->lsm_merge_throttle.v; - p->bloom_size.v += c->bloom_size.v; - p->rec_dictionary.v += c->rec_dictionary.v; - p->rec_suffix_compression.v += c->rec_suffix_compression.v; - p->rec_multiblock_internal.v += c->rec_multiblock_internal.v; - p->rec_overflow_key_internal.v += c->rec_overflow_key_internal.v; - p->rec_prefix_compression.v += c->rec_prefix_compression.v; - p->rec_multiblock_leaf.v += c->rec_multiblock_leaf.v; - p->rec_overflow_key_leaf.v += c->rec_overflow_key_leaf.v; - if (c->rec_multiblock_max.v > p->rec_multiblock_max.v) - p->rec_multiblock_max.v = c->rec_multiblock_max.v; - p->rec_overflow_value.v += c->rec_overflow_value.v; - p->rec_page_match.v += c->rec_page_match.v; - p->rec_pages.v += c->rec_pages.v; - p->rec_pages_eviction.v += c->rec_pages_eviction.v; - p->rec_page_delete.v += c->rec_page_delete.v; - p->session_compact.v += c->session_compact.v; - p->session_cursor_open.v += c->session_cursor_open.v; - p->txn_update_conflict.v += c->txn_update_conflict.v; + to->block_extension += WT_STAT_READ(from, block_extension); + to->block_alloc += WT_STAT_READ(from, block_alloc); + to->block_free += WT_STAT_READ(from, block_free); + to->block_checkpoint_size += + WT_STAT_READ(from, block_checkpoint_size); + to->allocation_size = from[0]->allocation_size; + to->block_reuse_bytes += WT_STAT_READ(from, block_reuse_bytes); + to->block_magic = from[0]->block_magic; + to->block_major = from[0]->block_major; + to->block_size += WT_STAT_READ(from, block_size); + to->block_minor = from[0]->block_minor; + to->btree_checkpoint_generation += + WT_STAT_READ(from, btree_checkpoint_generation); + to->btree_column_fix += WT_STAT_READ(from, btree_column_fix); + to->btree_column_internal += + WT_STAT_READ(from, btree_column_internal); + to->btree_column_deleted += WT_STAT_READ(from, btree_column_deleted); + to->btree_column_variable += + WT_STAT_READ(from, btree_column_variable); + to->btree_fixed_len = from[0]->btree_fixed_len; + if ((v = WT_STAT_READ(from, btree_maxintlkey)) > + to->btree_maxintlkey) + to->btree_maxintlkey = v; + if ((v = WT_STAT_READ(from, btree_maxintlpage)) > + to->btree_maxintlpage) + to->btree_maxintlpage = v; + if ((v = WT_STAT_READ(from, btree_maxleafkey)) > + to->btree_maxleafkey) + to->btree_maxleafkey = v; + if ((v = WT_STAT_READ(from, btree_maxleafpage)) > + to->btree_maxleafpage) + to->btree_maxleafpage = v; + if ((v = WT_STAT_READ(from, btree_maxleafvalue)) > + to->btree_maxleafvalue) + to->btree_maxleafvalue = v; + if ((v = WT_STAT_READ(from, btree_maximum_depth)) > + to->btree_maximum_depth) + to->btree_maximum_depth = v; + to->btree_entries += WT_STAT_READ(from, btree_entries); + to->btree_overflow += WT_STAT_READ(from, btree_overflow); + to->btree_compact_rewrite += + WT_STAT_READ(from, btree_compact_rewrite); + to->btree_row_internal += WT_STAT_READ(from, btree_row_internal); + to->btree_row_leaf += WT_STAT_READ(from, btree_row_leaf); + to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read); + to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write); + to->cache_eviction_checkpoint += + WT_STAT_READ(from, cache_eviction_checkpoint); + to->cache_eviction_fail += WT_STAT_READ(from, cache_eviction_fail); + to->cache_eviction_hazard += + WT_STAT_READ(from, cache_eviction_hazard); + to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split); + to->cache_eviction_internal += + WT_STAT_READ(from, cache_eviction_internal); + to->cache_eviction_dirty += WT_STAT_READ(from, cache_eviction_dirty); + to->cache_read_overflow += WT_STAT_READ(from, cache_read_overflow); + to->cache_overflow_value += WT_STAT_READ(from, cache_overflow_value); + to->cache_eviction_deepen += + WT_STAT_READ(from, cache_eviction_deepen); + to->cache_read += WT_STAT_READ(from, cache_read); + to->cache_eviction_split += WT_STAT_READ(from, cache_eviction_split); + to->cache_write += WT_STAT_READ(from, cache_write); + to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean); + to->compress_read += WT_STAT_READ(from, compress_read); + to->compress_write += WT_STAT_READ(from, compress_write); + to->compress_write_fail += WT_STAT_READ(from, compress_write_fail); + to->compress_write_too_small += + WT_STAT_READ(from, compress_write_too_small); + to->compress_raw_fail_temporary += + WT_STAT_READ(from, compress_raw_fail_temporary); + to->compress_raw_fail += WT_STAT_READ(from, compress_raw_fail); + to->compress_raw_ok += WT_STAT_READ(from, compress_raw_ok); + to->cursor_insert_bulk += WT_STAT_READ(from, cursor_insert_bulk); + to->cursor_create += WT_STAT_READ(from, cursor_create); + to->cursor_insert_bytes += WT_STAT_READ(from, cursor_insert_bytes); + to->cursor_remove_bytes += WT_STAT_READ(from, cursor_remove_bytes); + to->cursor_update_bytes += WT_STAT_READ(from, cursor_update_bytes); + to->cursor_insert += WT_STAT_READ(from, cursor_insert); + to->cursor_next += WT_STAT_READ(from, cursor_next); + to->cursor_prev += WT_STAT_READ(from, cursor_prev); + to->cursor_remove += WT_STAT_READ(from, cursor_remove); + to->cursor_reset += WT_STAT_READ(from, cursor_reset); + to->cursor_restart += WT_STAT_READ(from, cursor_restart); + to->cursor_search += WT_STAT_READ(from, cursor_search); + to->cursor_search_near += WT_STAT_READ(from, cursor_search_near); + to->cursor_update += WT_STAT_READ(from, cursor_update); + to->bloom_false_positive += WT_STAT_READ(from, bloom_false_positive); + to->bloom_hit += WT_STAT_READ(from, bloom_hit); + to->bloom_miss += WT_STAT_READ(from, bloom_miss); + to->bloom_page_evict += WT_STAT_READ(from, bloom_page_evict); + to->bloom_page_read += WT_STAT_READ(from, bloom_page_read); + to->bloom_count += WT_STAT_READ(from, bloom_count); + to->lsm_chunk_count += WT_STAT_READ(from, lsm_chunk_count); + if ((v = WT_STAT_READ(from, lsm_generation_max)) > + to->lsm_generation_max) + to->lsm_generation_max = v; + to->lsm_lookup_no_bloom += WT_STAT_READ(from, lsm_lookup_no_bloom); + to->lsm_checkpoint_throttle += + WT_STAT_READ(from, lsm_checkpoint_throttle); + to->lsm_merge_throttle += WT_STAT_READ(from, lsm_merge_throttle); + to->bloom_size += WT_STAT_READ(from, bloom_size); + to->rec_dictionary += WT_STAT_READ(from, rec_dictionary); + to->rec_suffix_compression += + WT_STAT_READ(from, rec_suffix_compression); + to->rec_multiblock_internal += + WT_STAT_READ(from, rec_multiblock_internal); + to->rec_overflow_key_internal += + WT_STAT_READ(from, rec_overflow_key_internal); + to->rec_prefix_compression += + WT_STAT_READ(from, rec_prefix_compression); + to->rec_multiblock_leaf += WT_STAT_READ(from, rec_multiblock_leaf); + to->rec_overflow_key_leaf += + WT_STAT_READ(from, rec_overflow_key_leaf); + if ((v = WT_STAT_READ(from, rec_multiblock_max)) > + to->rec_multiblock_max) + to->rec_multiblock_max = v; + to->rec_overflow_value += WT_STAT_READ(from, rec_overflow_value); + to->rec_page_match += WT_STAT_READ(from, rec_page_match); + to->rec_pages += WT_STAT_READ(from, rec_pages); + to->rec_pages_eviction += WT_STAT_READ(from, rec_pages_eviction); + to->rec_page_delete += WT_STAT_READ(from, rec_page_delete); + to->session_compact += WT_STAT_READ(from, session_compact); + to->session_cursor_open += WT_STAT_READ(from, session_cursor_open); + to->txn_update_conflict += WT_STAT_READ(from, txn_update_conflict); +} + +static const char * const __stats_connection_desc[] = { + "async: number of allocation state races", + "async: number of operation slots viewed for allocation", + "async: current work queue length", + "async: number of flush calls", + "async: number of times operation allocation failed", + "async: maximum work queue length", + "async: number of times worker found no work", + "async: total allocations", + "async: total compact calls", + "async: total insert calls", + "async: total remove calls", + "async: total search calls", + "async: total update calls", + "block-manager: mapped bytes read", + "block-manager: bytes read", + "block-manager: bytes written", + "block-manager: mapped blocks read", + "block-manager: blocks pre-loaded", + "block-manager: blocks read", + "block-manager: blocks written", + "cache: tracked dirty bytes in the cache", + "cache: tracked bytes belonging to internal pages in the cache", + "cache: bytes currently in the cache", + "cache: tracked bytes belonging to leaf pages in the cache", + "cache: maximum bytes configured", + "cache: tracked bytes belonging to overflow pages in the cache", + "cache: bytes read into cache", + "cache: bytes written from cache", + "cache: pages evicted by application threads", + "cache: checkpoint blocked page eviction", + "cache: unmodified pages evicted", + "cache: page split during eviction deepened the tree", + "cache: modified pages evicted", + "cache: pages selected for eviction unable to be evicted", + "cache: pages evicted because they exceeded the in-memory maximum", + "cache: pages evicted because they had chains of deleted items", + "cache: failed eviction of pages that exceeded the in-memory maximum", + "cache: hazard pointer blocked page eviction", + "cache: internal pages evicted", + "cache: maximum page size at eviction", + "cache: eviction server candidate queue empty when topping up", + "cache: eviction server candidate queue not empty when topping up", + "cache: eviction server evicting pages", + "cache: eviction server populating queue, but not evicting pages", + "cache: eviction server unable to reach eviction goal", + "cache: pages split during eviction", + "cache: pages walked for eviction", + "cache: eviction worker thread evicting pages", + "cache: in-memory page splits", + "cache: percentage overhead", + "cache: tracked dirty pages in the cache", + "cache: pages currently held in the cache", + "cache: pages read into cache", + "cache: pages written from cache", + "connection: pthread mutex condition wait calls", + "cursor: cursor create calls", + "cursor: cursor insert calls", + "cursor: cursor next calls", + "cursor: cursor prev calls", + "cursor: cursor remove calls", + "cursor: cursor reset calls", + "cursor: cursor restarted searches", + "cursor: cursor search calls", + "cursor: cursor search near calls", + "cursor: cursor update calls", + "data-handle: connection data handles currently active", + "data-handle: session dhandles swept", + "data-handle: session sweep attempts", + "data-handle: connection sweep dhandles closed", + "data-handle: connection sweep candidate became referenced", + "data-handle: connection sweep dhandles removed from hash list", + "data-handle: connection sweep time-of-death sets", + "data-handle: connection sweeps", + "connection: files currently open", + "log: total log buffer size", + "log: log bytes of payload data", + "log: log bytes written", + "log: yields waiting for previous log file close", + "log: total size of compressed records", + "log: total in-memory size of compressed records", + "log: log records too small to compress", + "log: log records not compressed", + "log: log records compressed", + "log: maximum log file size", + "log: pre-allocated log files prepared", + "log: number of pre-allocated log files to create", + "log: pre-allocated log files used", + "log: log release advances write LSN", + "log: records processed by log scan", + "log: log scan records requiring two reads", + "log: log scan operations", + "log: consolidated slot closures", + "log: written slots coalesced", + "log: logging bytes consolidated", + "log: consolidated slot joins", + "log: consolidated slot join races", + "log: record size exceeded maximum", + "log: failed to find a slot large enough for record", + "log: consolidated slot join transitions", + "log: log sync operations", + "log: log sync_dir operations", + "log: log server thread advances write LSN", + "log: log write operations", + "LSM: sleep for LSM checkpoint throttle", + "LSM: sleep for LSM merge throttle", + "LSM: rows merged in an LSM tree", + "LSM: application work units currently queued", + "LSM: merge work units currently queued", + "LSM: tree queue hit maximum", + "LSM: switch work units currently queued", + "LSM: tree maintenance operations scheduled", + "LSM: tree maintenance operations discarded", + "LSM: tree maintenance operations executed", + "connection: memory allocations", + "connection: memory frees", + "connection: memory re-allocations", + "thread-yield: page acquire busy blocked", + "thread-yield: page acquire eviction blocked", + "thread-yield: page acquire locked blocked", + "thread-yield: page acquire read blocked", + "thread-yield: page acquire time sleeping (usecs)", + "connection: total read I/Os", + "reconciliation: page reconciliation calls", + "reconciliation: page reconciliation calls for eviction", + "reconciliation: split bytes currently awaiting free", + "reconciliation: split objects currently awaiting free", + "connection: pthread mutex shared lock read-lock calls", + "connection: pthread mutex shared lock write-lock calls", + "session: open cursor count", + "session: open session count", + "transaction: transaction begins", + "transaction: transaction checkpoints", + "transaction: transaction checkpoint generation", + "transaction: transaction checkpoint currently running", + "transaction: transaction checkpoint max time (msecs)", + "transaction: transaction checkpoint min time (msecs)", + "transaction: transaction checkpoint most recent time (msecs)", + "transaction: transaction checkpoint total time (msecs)", + "transaction: transactions committed", + "transaction: transaction failures due to cache overflow", + "transaction: transaction range of IDs currently pinned by a checkpoint", + "transaction: transaction range of IDs currently pinned", + "transaction: transactions rolled back", + "transaction: transaction sync calls", + "connection: total write I/Os", +}; + +const char * +__wt_stat_connection_desc(int slot) +{ + return (__stats_connection_desc[slot]); } void -__wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats) +__wt_stat_connection_init_single(WT_CONNECTION_STATS *stats) { - /* Clear, so can also be called for reinitialization. */ memset(stats, 0, sizeof(*stats)); +} + +void +__wt_stat_connection_init(WT_CONNECTION_IMPL *handle) +{ + int i; - stats->async_cur_queue.desc = "async: current work queue length"; - stats->async_max_queue.desc = "async: maximum work queue length"; - stats->async_alloc_race.desc = - "async: number of allocation state races"; - stats->async_flush.desc = "async: number of flush calls"; - stats->async_alloc_view.desc = - "async: number of operation slots viewed for allocation"; - stats->async_full.desc = - "async: number of times operation allocation failed"; - stats->async_nowork.desc = - "async: number of times worker found no work"; - stats->async_op_alloc.desc = "async: total allocations"; - stats->async_op_compact.desc = "async: total compact calls"; - stats->async_op_insert.desc = "async: total insert calls"; - stats->async_op_remove.desc = "async: total remove calls"; - stats->async_op_search.desc = "async: total search calls"; - stats->async_op_update.desc = "async: total update calls"; - stats->block_preload.desc = "block-manager: blocks pre-loaded"; - stats->block_read.desc = "block-manager: blocks read"; - stats->block_write.desc = "block-manager: blocks written"; - stats->block_byte_read.desc = "block-manager: bytes read"; - stats->block_byte_write.desc = "block-manager: bytes written"; - stats->block_map_read.desc = "block-manager: mapped blocks read"; - stats->block_byte_map_read.desc = "block-manager: mapped bytes read"; - stats->cache_bytes_inuse.desc = "cache: bytes currently in the cache"; - stats->cache_bytes_read.desc = "cache: bytes read into cache"; - stats->cache_bytes_write.desc = "cache: bytes written from cache"; - stats->cache_eviction_checkpoint.desc = - "cache: checkpoint blocked page eviction"; - stats->cache_eviction_queue_empty.desc = - "cache: eviction server candidate queue empty when topping up"; - stats->cache_eviction_queue_not_empty.desc = - "cache: eviction server candidate queue not empty when topping up"; - stats->cache_eviction_server_evicting.desc = - "cache: eviction server evicting pages"; - stats->cache_eviction_server_not_evicting.desc = - "cache: eviction server populating queue, but not evicting pages"; - stats->cache_eviction_slow.desc = - "cache: eviction server unable to reach eviction goal"; - stats->cache_eviction_worker_evicting.desc = - "cache: eviction worker thread evicting pages"; - stats->cache_eviction_force_fail.desc = - "cache: failed eviction of pages that exceeded the in-memory maximum"; - stats->cache_eviction_hazard.desc = - "cache: hazard pointer blocked page eviction"; - stats->cache_inmem_split.desc = "cache: in-memory page splits"; - stats->cache_eviction_internal.desc = "cache: internal pages evicted"; - stats->cache_bytes_max.desc = "cache: maximum bytes configured"; - stats->cache_eviction_maximum_page_size.desc = - "cache: maximum page size at eviction"; - stats->cache_eviction_dirty.desc = "cache: modified pages evicted"; - stats->cache_eviction_deepen.desc = - "cache: page split during eviction deepened the tree"; - stats->cache_pages_inuse.desc = - "cache: pages currently held in the cache"; - stats->cache_eviction_force.desc = - "cache: pages evicted because they exceeded the in-memory maximum"; - stats->cache_eviction_force_delete.desc = - "cache: pages evicted because they had chains of deleted items"; - stats->cache_eviction_app.desc = - "cache: pages evicted by application threads"; - stats->cache_read.desc = "cache: pages read into cache"; - stats->cache_eviction_fail.desc = - "cache: pages selected for eviction unable to be evicted"; - stats->cache_eviction_split.desc = - "cache: pages split during eviction"; - stats->cache_eviction_walk.desc = "cache: pages walked for eviction"; - stats->cache_write.desc = "cache: pages written from cache"; - stats->cache_overhead.desc = "cache: percentage overhead"; - stats->cache_bytes_internal.desc = - "cache: tracked bytes belonging to internal pages in the cache"; - stats->cache_bytes_leaf.desc = - "cache: tracked bytes belonging to leaf pages in the cache"; - stats->cache_bytes_overflow.desc = - "cache: tracked bytes belonging to overflow pages in the cache"; - stats->cache_bytes_dirty.desc = - "cache: tracked dirty bytes in the cache"; - stats->cache_pages_dirty.desc = - "cache: tracked dirty pages in the cache"; - stats->cache_eviction_clean.desc = "cache: unmodified pages evicted"; - stats->file_open.desc = "connection: files currently open"; - stats->memory_allocation.desc = "connection: memory allocations"; - stats->memory_free.desc = "connection: memory frees"; - stats->memory_grow.desc = "connection: memory re-allocations"; - stats->cond_wait.desc = - "connection: pthread mutex condition wait calls"; - stats->rwlock_read.desc = - "connection: pthread mutex shared lock read-lock calls"; - stats->rwlock_write.desc = - "connection: pthread mutex shared lock write-lock calls"; - stats->read_io.desc = "connection: total read I/Os"; - stats->write_io.desc = "connection: total write I/Os"; - stats->cursor_create.desc = "cursor: cursor create calls"; - stats->cursor_insert.desc = "cursor: cursor insert calls"; - stats->cursor_next.desc = "cursor: cursor next calls"; - stats->cursor_prev.desc = "cursor: cursor prev calls"; - stats->cursor_remove.desc = "cursor: cursor remove calls"; - stats->cursor_reset.desc = "cursor: cursor reset calls"; - stats->cursor_search.desc = "cursor: cursor search calls"; - stats->cursor_search_near.desc = "cursor: cursor search near calls"; - stats->cursor_update.desc = "cursor: cursor update calls"; - stats->dh_conn_handle_count.desc = - "data-handle: connection data handles currently active"; - stats->dh_sweep_ref.desc = - "data-handle: connection sweep candidate became referenced"; - stats->dh_sweep_close.desc = - "data-handle: connection sweep dhandles closed"; - stats->dh_sweep_remove.desc = - "data-handle: connection sweep dhandles removed from hash list"; - stats->dh_sweep_tod.desc = - "data-handle: connection sweep time-of-death sets"; - stats->dh_sweeps.desc = "data-handle: connection sweeps"; - stats->dh_session_handles.desc = "data-handle: session dhandles swept"; - stats->dh_session_sweeps.desc = "data-handle: session sweep attempts"; - stats->log_slot_closes.desc = "log: consolidated slot closures"; - stats->log_slot_races.desc = "log: consolidated slot join races"; - stats->log_slot_transitions.desc = - "log: consolidated slot join transitions"; - stats->log_slot_joins.desc = "log: consolidated slot joins"; - stats->log_slot_toosmall.desc = - "log: failed to find a slot large enough for record"; - stats->log_bytes_payload.desc = "log: log bytes of payload data"; - stats->log_bytes_written.desc = "log: log bytes written"; - stats->log_compress_writes.desc = "log: log records compressed"; - stats->log_compress_write_fails.desc = - "log: log records not compressed"; - stats->log_compress_small.desc = - "log: log records too small to compress"; - stats->log_release_write_lsn.desc = - "log: log release advances write LSN"; - stats->log_scans.desc = "log: log scan operations"; - stats->log_scan_rereads.desc = - "log: log scan records requiring two reads"; - stats->log_write_lsn.desc = - "log: log server thread advances write LSN"; - stats->log_sync.desc = "log: log sync operations"; - stats->log_sync_dir.desc = "log: log sync_dir operations"; - stats->log_writes.desc = "log: log write operations"; - stats->log_slot_consolidated.desc = "log: logging bytes consolidated"; - stats->log_max_filesize.desc = "log: maximum log file size"; - stats->log_prealloc_max.desc = - "log: number of pre-allocated log files to create"; - stats->log_prealloc_files.desc = - "log: pre-allocated log files prepared"; - stats->log_prealloc_used.desc = "log: pre-allocated log files used"; - stats->log_slot_toobig.desc = "log: record size exceeded maximum"; - stats->log_scan_records.desc = "log: records processed by log scan"; - stats->log_compress_mem.desc = - "log: total in-memory size of compressed records"; - stats->log_buffer_size.desc = "log: total log buffer size"; - stats->log_compress_len.desc = "log: total size of compressed records"; - stats->log_slot_coalesced.desc = "log: written slots coalesced"; - stats->log_close_yields.desc = - "log: yields waiting for previous log file close"; - stats->lsm_work_queue_app.desc = - "LSM: application work units currently queued"; - stats->lsm_work_queue_manager.desc = - "LSM: merge work units currently queued"; - stats->lsm_rows_merged.desc = "LSM: rows merged in an LSM tree"; - stats->lsm_checkpoint_throttle.desc = - "LSM: sleep for LSM checkpoint throttle"; - stats->lsm_merge_throttle.desc = "LSM: sleep for LSM merge throttle"; - stats->lsm_work_queue_switch.desc = - "LSM: switch work units currently queued"; - stats->lsm_work_units_discarded.desc = - "LSM: tree maintenance operations discarded"; - stats->lsm_work_units_done.desc = - "LSM: tree maintenance operations executed"; - stats->lsm_work_units_created.desc = - "LSM: tree maintenance operations scheduled"; - stats->lsm_work_queue_max.desc = "LSM: tree queue hit maximum"; - stats->rec_pages.desc = "reconciliation: page reconciliation calls"; - stats->rec_pages_eviction.desc = - "reconciliation: page reconciliation calls for eviction"; - stats->rec_split_stashed_bytes.desc = - "reconciliation: split bytes currently awaiting free"; - stats->rec_split_stashed_objects.desc = - "reconciliation: split objects currently awaiting free"; - stats->session_cursor_open.desc = "session: open cursor count"; - stats->session_open.desc = "session: open session count"; - stats->page_busy_blocked.desc = - "thread-yield: page acquire busy blocked"; - stats->page_forcible_evict_blocked.desc = - "thread-yield: page acquire eviction blocked"; - stats->page_locked_blocked.desc = - "thread-yield: page acquire locked blocked"; - stats->page_read_blocked.desc = - "thread-yield: page acquire read blocked"; - stats->page_sleep.desc = - "thread-yield: page acquire time sleeping (usecs)"; - stats->txn_begin.desc = "transaction: transaction begins"; - stats->txn_checkpoint_running.desc = - "transaction: transaction checkpoint currently running"; - stats->txn_checkpoint_generation.desc = - "transaction: transaction checkpoint generation"; - stats->txn_checkpoint_time_max.desc = - "transaction: transaction checkpoint max time (msecs)"; - stats->txn_checkpoint_time_min.desc = - "transaction: transaction checkpoint min time (msecs)"; - stats->txn_checkpoint_time_recent.desc = - "transaction: transaction checkpoint most recent time (msecs)"; - stats->txn_checkpoint_time_total.desc = - "transaction: transaction checkpoint total time (msecs)"; - stats->txn_checkpoint.desc = "transaction: transaction checkpoints"; - stats->txn_fail_cache.desc = - "transaction: transaction failures due to cache overflow"; - stats->txn_pinned_range.desc = - "transaction: transaction range of IDs currently pinned"; - stats->txn_pinned_checkpoint_range.desc = - "transaction: transaction range of IDs currently pinned by a checkpoint"; - stats->txn_sync.desc = "transaction: transaction sync calls"; - stats->txn_commit.desc = "transaction: transactions committed"; - stats->txn_rollback.desc = "transaction: transactions rolled back"; + for (i = 0; i < WT_COUNTER_SLOTS; ++i) { + handle->stats[i] = &handle->stat_array[i]; + __wt_stat_connection_init_single(handle->stats[i]); + } } void -__wt_stat_refresh_connection_stats(void *stats_arg) +__wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) { - WT_CONNECTION_STATS *stats; + stats->async_cur_queue = 0; + /* not clearing async_max_queue */ + stats->async_alloc_race = 0; + stats->async_flush = 0; + stats->async_alloc_view = 0; + stats->async_full = 0; + stats->async_nowork = 0; + stats->async_op_alloc = 0; + stats->async_op_compact = 0; + stats->async_op_insert = 0; + stats->async_op_remove = 0; + stats->async_op_search = 0; + stats->async_op_update = 0; + stats->block_preload = 0; + stats->block_read = 0; + stats->block_write = 0; + stats->block_byte_read = 0; + stats->block_byte_write = 0; + stats->block_map_read = 0; + stats->block_byte_map_read = 0; + /* not clearing cache_bytes_inuse */ + stats->cache_bytes_read = 0; + stats->cache_bytes_write = 0; + stats->cache_eviction_checkpoint = 0; + stats->cache_eviction_queue_empty = 0; + stats->cache_eviction_queue_not_empty = 0; + stats->cache_eviction_server_evicting = 0; + stats->cache_eviction_server_not_evicting = 0; + stats->cache_eviction_slow = 0; + stats->cache_eviction_worker_evicting = 0; + stats->cache_eviction_force_fail = 0; + stats->cache_eviction_hazard = 0; + stats->cache_inmem_split = 0; + stats->cache_eviction_internal = 0; + /* not clearing cache_bytes_max */ + /* not clearing cache_eviction_maximum_page_size */ + stats->cache_eviction_dirty = 0; + stats->cache_eviction_deepen = 0; + /* not clearing cache_pages_inuse */ + stats->cache_eviction_force = 0; + stats->cache_eviction_force_delete = 0; + stats->cache_eviction_app = 0; + stats->cache_read = 0; + stats->cache_eviction_fail = 0; + stats->cache_eviction_split = 0; + stats->cache_eviction_walk = 0; + stats->cache_write = 0; + /* not clearing cache_overhead */ + /* not clearing cache_bytes_internal */ + /* not clearing cache_bytes_leaf */ + /* not clearing cache_bytes_overflow */ + /* not clearing cache_bytes_dirty */ + /* not clearing cache_pages_dirty */ + stats->cache_eviction_clean = 0; + /* not clearing file_open */ + stats->memory_allocation = 0; + stats->memory_free = 0; + stats->memory_grow = 0; + stats->cond_wait = 0; + stats->rwlock_read = 0; + stats->rwlock_write = 0; + stats->read_io = 0; + stats->write_io = 0; + stats->cursor_create = 0; + stats->cursor_insert = 0; + stats->cursor_next = 0; + stats->cursor_prev = 0; + stats->cursor_remove = 0; + stats->cursor_reset = 0; + stats->cursor_restart = 0; + stats->cursor_search = 0; + stats->cursor_search_near = 0; + stats->cursor_update = 0; + /* not clearing dh_conn_handle_count */ + stats->dh_sweep_ref = 0; + stats->dh_sweep_close = 0; + stats->dh_sweep_remove = 0; + stats->dh_sweep_tod = 0; + stats->dh_sweeps = 0; + stats->dh_session_handles = 0; + stats->dh_session_sweeps = 0; + stats->log_slot_closes = 0; + stats->log_slot_races = 0; + stats->log_slot_transitions = 0; + stats->log_slot_joins = 0; + stats->log_slot_toosmall = 0; + stats->log_bytes_payload = 0; + stats->log_bytes_written = 0; + stats->log_compress_writes = 0; + stats->log_compress_write_fails = 0; + stats->log_compress_small = 0; + stats->log_release_write_lsn = 0; + stats->log_scans = 0; + stats->log_scan_rereads = 0; + stats->log_write_lsn = 0; + stats->log_sync = 0; + stats->log_sync_dir = 0; + stats->log_writes = 0; + stats->log_slot_consolidated = 0; + /* not clearing log_max_filesize */ + /* not clearing log_prealloc_max */ + stats->log_prealloc_files = 0; + stats->log_prealloc_used = 0; + stats->log_slot_toobig = 0; + stats->log_scan_records = 0; + stats->log_compress_mem = 0; + /* not clearing log_buffer_size */ + stats->log_compress_len = 0; + stats->log_slot_coalesced = 0; + stats->log_close_yields = 0; + /* not clearing lsm_work_queue_app */ + /* not clearing lsm_work_queue_manager */ + stats->lsm_rows_merged = 0; + stats->lsm_checkpoint_throttle = 0; + stats->lsm_merge_throttle = 0; + /* not clearing lsm_work_queue_switch */ + stats->lsm_work_units_discarded = 0; + stats->lsm_work_units_done = 0; + stats->lsm_work_units_created = 0; + stats->lsm_work_queue_max = 0; + stats->rec_pages = 0; + stats->rec_pages_eviction = 0; + /* not clearing rec_split_stashed_bytes */ + /* not clearing rec_split_stashed_objects */ + /* not clearing session_cursor_open */ + /* not clearing session_open */ + stats->page_busy_blocked = 0; + stats->page_forcible_evict_blocked = 0; + stats->page_locked_blocked = 0; + stats->page_read_blocked = 0; + stats->page_sleep = 0; + stats->txn_begin = 0; + /* not clearing txn_checkpoint_running */ + /* not clearing txn_checkpoint_generation */ + /* not clearing txn_checkpoint_time_max */ + /* not clearing txn_checkpoint_time_min */ + /* not clearing txn_checkpoint_time_recent */ + /* not clearing txn_checkpoint_time_total */ + stats->txn_checkpoint = 0; + stats->txn_fail_cache = 0; + /* not clearing txn_pinned_range */ + /* not clearing txn_pinned_checkpoint_range */ + stats->txn_sync = 0; + stats->txn_commit = 0; + stats->txn_rollback = 0; +} + +void +__wt_stat_connection_clear_all(WT_CONNECTION_STATS **stats) +{ + u_int i; - stats = (WT_CONNECTION_STATS *)stats_arg; - stats->async_cur_queue.v = 0; - stats->async_alloc_race.v = 0; - stats->async_flush.v = 0; - stats->async_alloc_view.v = 0; - stats->async_full.v = 0; - stats->async_nowork.v = 0; - stats->async_op_alloc.v = 0; - stats->async_op_compact.v = 0; - stats->async_op_insert.v = 0; - stats->async_op_remove.v = 0; - stats->async_op_search.v = 0; - stats->async_op_update.v = 0; - stats->block_preload.v = 0; - stats->block_read.v = 0; - stats->block_write.v = 0; - stats->block_byte_read.v = 0; - stats->block_byte_write.v = 0; - stats->block_map_read.v = 0; - stats->block_byte_map_read.v = 0; - stats->cache_bytes_read.v = 0; - stats->cache_bytes_write.v = 0; - stats->cache_eviction_checkpoint.v = 0; - stats->cache_eviction_queue_empty.v = 0; - stats->cache_eviction_queue_not_empty.v = 0; - stats->cache_eviction_server_evicting.v = 0; - stats->cache_eviction_server_not_evicting.v = 0; - stats->cache_eviction_slow.v = 0; - stats->cache_eviction_worker_evicting.v = 0; - stats->cache_eviction_force_fail.v = 0; - stats->cache_eviction_hazard.v = 0; - stats->cache_inmem_split.v = 0; - stats->cache_eviction_internal.v = 0; - stats->cache_eviction_dirty.v = 0; - stats->cache_eviction_deepen.v = 0; - stats->cache_eviction_force.v = 0; - stats->cache_eviction_force_delete.v = 0; - stats->cache_eviction_app.v = 0; - stats->cache_read.v = 0; - stats->cache_eviction_fail.v = 0; - stats->cache_eviction_split.v = 0; - stats->cache_eviction_walk.v = 0; - stats->cache_write.v = 0; - stats->cache_eviction_clean.v = 0; - stats->memory_allocation.v = 0; - stats->memory_free.v = 0; - stats->memory_grow.v = 0; - stats->cond_wait.v = 0; - stats->rwlock_read.v = 0; - stats->rwlock_write.v = 0; - stats->read_io.v = 0; - stats->write_io.v = 0; - stats->cursor_create.v = 0; - stats->cursor_insert.v = 0; - stats->cursor_next.v = 0; - stats->cursor_prev.v = 0; - stats->cursor_remove.v = 0; - stats->cursor_reset.v = 0; - stats->cursor_search.v = 0; - stats->cursor_search_near.v = 0; - stats->cursor_update.v = 0; - stats->dh_sweep_ref.v = 0; - stats->dh_sweep_close.v = 0; - stats->dh_sweep_remove.v = 0; - stats->dh_sweep_tod.v = 0; - stats->dh_sweeps.v = 0; - stats->dh_session_handles.v = 0; - stats->dh_session_sweeps.v = 0; - stats->log_slot_closes.v = 0; - stats->log_slot_races.v = 0; - stats->log_slot_transitions.v = 0; - stats->log_slot_joins.v = 0; - stats->log_slot_toosmall.v = 0; - stats->log_bytes_payload.v = 0; - stats->log_bytes_written.v = 0; - stats->log_compress_writes.v = 0; - stats->log_compress_write_fails.v = 0; - stats->log_compress_small.v = 0; - stats->log_release_write_lsn.v = 0; - stats->log_scans.v = 0; - stats->log_scan_rereads.v = 0; - stats->log_write_lsn.v = 0; - stats->log_sync.v = 0; - stats->log_sync_dir.v = 0; - stats->log_writes.v = 0; - stats->log_slot_consolidated.v = 0; - stats->log_prealloc_files.v = 0; - stats->log_prealloc_used.v = 0; - stats->log_slot_toobig.v = 0; - stats->log_scan_records.v = 0; - stats->log_compress_mem.v = 0; - stats->log_compress_len.v = 0; - stats->log_slot_coalesced.v = 0; - stats->log_close_yields.v = 0; - stats->lsm_rows_merged.v = 0; - stats->lsm_checkpoint_throttle.v = 0; - stats->lsm_merge_throttle.v = 0; - stats->lsm_work_units_discarded.v = 0; - stats->lsm_work_units_done.v = 0; - stats->lsm_work_units_created.v = 0; - stats->lsm_work_queue_max.v = 0; - stats->rec_pages.v = 0; - stats->rec_pages_eviction.v = 0; - stats->page_busy_blocked.v = 0; - stats->page_forcible_evict_blocked.v = 0; - stats->page_locked_blocked.v = 0; - stats->page_read_blocked.v = 0; - stats->page_sleep.v = 0; - stats->txn_begin.v = 0; - stats->txn_checkpoint.v = 0; - stats->txn_fail_cache.v = 0; - stats->txn_sync.v = 0; - stats->txn_commit.v = 0; - stats->txn_rollback.v = 0; + for (i = 0; i < WT_COUNTER_SLOTS; ++i) + __wt_stat_connection_clear_single(stats[i]); +} + +void +__wt_stat_connection_aggregate( + WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *to) +{ + to->async_cur_queue += WT_STAT_READ(from, async_cur_queue); + to->async_max_queue += WT_STAT_READ(from, async_max_queue); + to->async_alloc_race += WT_STAT_READ(from, async_alloc_race); + to->async_flush += WT_STAT_READ(from, async_flush); + to->async_alloc_view += WT_STAT_READ(from, async_alloc_view); + to->async_full += WT_STAT_READ(from, async_full); + to->async_nowork += WT_STAT_READ(from, async_nowork); + to->async_op_alloc += WT_STAT_READ(from, async_op_alloc); + to->async_op_compact += WT_STAT_READ(from, async_op_compact); + to->async_op_insert += WT_STAT_READ(from, async_op_insert); + to->async_op_remove += WT_STAT_READ(from, async_op_remove); + to->async_op_search += WT_STAT_READ(from, async_op_search); + to->async_op_update += WT_STAT_READ(from, async_op_update); + to->block_preload += WT_STAT_READ(from, block_preload); + to->block_read += WT_STAT_READ(from, block_read); + to->block_write += WT_STAT_READ(from, block_write); + to->block_byte_read += WT_STAT_READ(from, block_byte_read); + to->block_byte_write += WT_STAT_READ(from, block_byte_write); + to->block_map_read += WT_STAT_READ(from, block_map_read); + to->block_byte_map_read += WT_STAT_READ(from, block_byte_map_read); + to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse); + to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read); + to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write); + to->cache_eviction_checkpoint += + WT_STAT_READ(from, cache_eviction_checkpoint); + to->cache_eviction_queue_empty += + WT_STAT_READ(from, cache_eviction_queue_empty); + to->cache_eviction_queue_not_empty += + WT_STAT_READ(from, cache_eviction_queue_not_empty); + to->cache_eviction_server_evicting += + WT_STAT_READ(from, cache_eviction_server_evicting); + to->cache_eviction_server_not_evicting += + WT_STAT_READ(from, cache_eviction_server_not_evicting); + to->cache_eviction_slow += WT_STAT_READ(from, cache_eviction_slow); + to->cache_eviction_worker_evicting += + WT_STAT_READ(from, cache_eviction_worker_evicting); + to->cache_eviction_force_fail += + WT_STAT_READ(from, cache_eviction_force_fail); + to->cache_eviction_hazard += + WT_STAT_READ(from, cache_eviction_hazard); + to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split); + to->cache_eviction_internal += + WT_STAT_READ(from, cache_eviction_internal); + to->cache_bytes_max += WT_STAT_READ(from, cache_bytes_max); + to->cache_eviction_maximum_page_size += + WT_STAT_READ(from, cache_eviction_maximum_page_size); + to->cache_eviction_dirty += WT_STAT_READ(from, cache_eviction_dirty); + to->cache_eviction_deepen += + WT_STAT_READ(from, cache_eviction_deepen); + to->cache_pages_inuse += WT_STAT_READ(from, cache_pages_inuse); + to->cache_eviction_force += WT_STAT_READ(from, cache_eviction_force); + to->cache_eviction_force_delete += + WT_STAT_READ(from, cache_eviction_force_delete); + to->cache_eviction_app += WT_STAT_READ(from, cache_eviction_app); + to->cache_read += WT_STAT_READ(from, cache_read); + to->cache_eviction_fail += WT_STAT_READ(from, cache_eviction_fail); + to->cache_eviction_split += WT_STAT_READ(from, cache_eviction_split); + to->cache_eviction_walk += WT_STAT_READ(from, cache_eviction_walk); + to->cache_write += WT_STAT_READ(from, cache_write); + to->cache_overhead += WT_STAT_READ(from, cache_overhead); + to->cache_bytes_internal += WT_STAT_READ(from, cache_bytes_internal); + to->cache_bytes_leaf += WT_STAT_READ(from, cache_bytes_leaf); + to->cache_bytes_overflow += WT_STAT_READ(from, cache_bytes_overflow); + to->cache_bytes_dirty += WT_STAT_READ(from, cache_bytes_dirty); + to->cache_pages_dirty += WT_STAT_READ(from, cache_pages_dirty); + to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean); + to->file_open += WT_STAT_READ(from, file_open); + to->memory_allocation += WT_STAT_READ(from, memory_allocation); + to->memory_free += WT_STAT_READ(from, memory_free); + to->memory_grow += WT_STAT_READ(from, memory_grow); + to->cond_wait += WT_STAT_READ(from, cond_wait); + to->rwlock_read += WT_STAT_READ(from, rwlock_read); + to->rwlock_write += WT_STAT_READ(from, rwlock_write); + to->read_io += WT_STAT_READ(from, read_io); + to->write_io += WT_STAT_READ(from, write_io); + to->cursor_create += WT_STAT_READ(from, cursor_create); + to->cursor_insert += WT_STAT_READ(from, cursor_insert); + to->cursor_next += WT_STAT_READ(from, cursor_next); + to->cursor_prev += WT_STAT_READ(from, cursor_prev); + to->cursor_remove += WT_STAT_READ(from, cursor_remove); + to->cursor_reset += WT_STAT_READ(from, cursor_reset); + to->cursor_restart += WT_STAT_READ(from, cursor_restart); + to->cursor_search += WT_STAT_READ(from, cursor_search); + to->cursor_search_near += WT_STAT_READ(from, cursor_search_near); + to->cursor_update += WT_STAT_READ(from, cursor_update); + to->dh_conn_handle_count += WT_STAT_READ(from, dh_conn_handle_count); + to->dh_sweep_ref += WT_STAT_READ(from, dh_sweep_ref); + to->dh_sweep_close += WT_STAT_READ(from, dh_sweep_close); + to->dh_sweep_remove += WT_STAT_READ(from, dh_sweep_remove); + to->dh_sweep_tod += WT_STAT_READ(from, dh_sweep_tod); + to->dh_sweeps += WT_STAT_READ(from, dh_sweeps); + to->dh_session_handles += WT_STAT_READ(from, dh_session_handles); + to->dh_session_sweeps += WT_STAT_READ(from, dh_session_sweeps); + to->log_slot_closes += WT_STAT_READ(from, log_slot_closes); + to->log_slot_races += WT_STAT_READ(from, log_slot_races); + to->log_slot_transitions += WT_STAT_READ(from, log_slot_transitions); + to->log_slot_joins += WT_STAT_READ(from, log_slot_joins); + to->log_slot_toosmall += WT_STAT_READ(from, log_slot_toosmall); + to->log_bytes_payload += WT_STAT_READ(from, log_bytes_payload); + to->log_bytes_written += WT_STAT_READ(from, log_bytes_written); + to->log_compress_writes += WT_STAT_READ(from, log_compress_writes); + to->log_compress_write_fails += + WT_STAT_READ(from, log_compress_write_fails); + to->log_compress_small += WT_STAT_READ(from, log_compress_small); + to->log_release_write_lsn += + WT_STAT_READ(from, log_release_write_lsn); + to->log_scans += WT_STAT_READ(from, log_scans); + to->log_scan_rereads += WT_STAT_READ(from, log_scan_rereads); + to->log_write_lsn += WT_STAT_READ(from, log_write_lsn); + to->log_sync += WT_STAT_READ(from, log_sync); + to->log_sync_dir += WT_STAT_READ(from, log_sync_dir); + to->log_writes += WT_STAT_READ(from, log_writes); + to->log_slot_consolidated += + WT_STAT_READ(from, log_slot_consolidated); + to->log_max_filesize += WT_STAT_READ(from, log_max_filesize); + to->log_prealloc_max += WT_STAT_READ(from, log_prealloc_max); + to->log_prealloc_files += WT_STAT_READ(from, log_prealloc_files); + to->log_prealloc_used += WT_STAT_READ(from, log_prealloc_used); + to->log_slot_toobig += WT_STAT_READ(from, log_slot_toobig); + to->log_scan_records += WT_STAT_READ(from, log_scan_records); + to->log_compress_mem += WT_STAT_READ(from, log_compress_mem); + to->log_buffer_size += WT_STAT_READ(from, log_buffer_size); + to->log_compress_len += WT_STAT_READ(from, log_compress_len); + to->log_slot_coalesced += WT_STAT_READ(from, log_slot_coalesced); + to->log_close_yields += WT_STAT_READ(from, log_close_yields); + to->lsm_work_queue_app += WT_STAT_READ(from, lsm_work_queue_app); + to->lsm_work_queue_manager += + WT_STAT_READ(from, lsm_work_queue_manager); + to->lsm_rows_merged += WT_STAT_READ(from, lsm_rows_merged); + to->lsm_checkpoint_throttle += + WT_STAT_READ(from, lsm_checkpoint_throttle); + to->lsm_merge_throttle += WT_STAT_READ(from, lsm_merge_throttle); + to->lsm_work_queue_switch += + WT_STAT_READ(from, lsm_work_queue_switch); + to->lsm_work_units_discarded += + WT_STAT_READ(from, lsm_work_units_discarded); + to->lsm_work_units_done += WT_STAT_READ(from, lsm_work_units_done); + to->lsm_work_units_created += + WT_STAT_READ(from, lsm_work_units_created); + to->lsm_work_queue_max += WT_STAT_READ(from, lsm_work_queue_max); + to->rec_pages += WT_STAT_READ(from, rec_pages); + to->rec_pages_eviction += WT_STAT_READ(from, rec_pages_eviction); + to->rec_split_stashed_bytes += + WT_STAT_READ(from, rec_split_stashed_bytes); + to->rec_split_stashed_objects += + WT_STAT_READ(from, rec_split_stashed_objects); + to->session_cursor_open += WT_STAT_READ(from, session_cursor_open); + to->session_open += WT_STAT_READ(from, session_open); + to->page_busy_blocked += WT_STAT_READ(from, page_busy_blocked); + to->page_forcible_evict_blocked += + WT_STAT_READ(from, page_forcible_evict_blocked); + to->page_locked_blocked += WT_STAT_READ(from, page_locked_blocked); + to->page_read_blocked += WT_STAT_READ(from, page_read_blocked); + to->page_sleep += WT_STAT_READ(from, page_sleep); + to->txn_begin += WT_STAT_READ(from, txn_begin); + to->txn_checkpoint_running += + WT_STAT_READ(from, txn_checkpoint_running); + to->txn_checkpoint_generation += + WT_STAT_READ(from, txn_checkpoint_generation); + to->txn_checkpoint_time_max += + WT_STAT_READ(from, txn_checkpoint_time_max); + to->txn_checkpoint_time_min += + WT_STAT_READ(from, txn_checkpoint_time_min); + to->txn_checkpoint_time_recent += + WT_STAT_READ(from, txn_checkpoint_time_recent); + to->txn_checkpoint_time_total += + WT_STAT_READ(from, txn_checkpoint_time_total); + to->txn_checkpoint += WT_STAT_READ(from, txn_checkpoint); + to->txn_fail_cache += WT_STAT_READ(from, txn_fail_cache); + to->txn_pinned_range += WT_STAT_READ(from, txn_pinned_range); + to->txn_pinned_checkpoint_range += + WT_STAT_READ(from, txn_pinned_checkpoint_range); + to->txn_sync += WT_STAT_READ(from, txn_sync); + to->txn_commit += WT_STAT_READ(from, txn_commit); + to->txn_rollback += WT_STAT_READ(from, txn_rollback); } diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index 43bafbfe9f0..e81f8a68251 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -134,7 +134,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) if ((count = txn_global->scan_count) < 0) WT_PAUSE(); } while (count < 0 || - !WT_ATOMIC_CAS4(txn_global->scan_count, count, count + 1)); + !__wt_atomic_casiv32(&txn_global->scan_count, count, count + 1)); current_id = snap_min = txn_global->current; prev_oldest_id = txn_global->oldest_id; @@ -147,7 +147,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) /* Check that the oldest ID has not moved in the meantime. */ if (prev_oldest_id == txn_global->oldest_id) { WT_ASSERT(session, txn_global->scan_count > 0); - (void)WT_ATOMIC_SUB4(txn_global->scan_count, 1); + (void)__wt_atomic_subiv32(&txn_global->scan_count, 1); return; } } @@ -183,7 +183,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) txn_state->snap_min = snap_min; WT_ASSERT(session, txn_global->scan_count > 0); - (void)WT_ATOMIC_SUB4(txn_global->scan_count, 1); + (void)__wt_atomic_subiv32(&txn_global->scan_count, 1); __txn_sort_snapshot(session, n, current_id); } @@ -237,7 +237,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) if ((count = txn_global->scan_count) < 0) WT_PAUSE(); } while (count < 0 || - !WT_ATOMIC_CAS4(txn_global->scan_count, count, count + 1)); + !__wt_atomic_casiv32(&txn_global->scan_count, count, count + 1)); /* The oldest ID cannot change until the scan count goes to zero. */ prev_oldest_id = txn_global->oldest_id; @@ -288,7 +288,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) /* Update the oldest ID. */ if ((WT_TXNID_LT(prev_oldest_id, oldest_id) || last_running_moved) && - WT_ATOMIC_CAS4(txn_global->scan_count, 1, -1)) { + __wt_atomic_casiv32(&txn_global->scan_count, 1, -1)) { WT_ORDERED_READ(session_cnt, conn->session_cnt); for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) { if ((id = s->id) != WT_TXN_NONE && @@ -333,7 +333,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) oldest_session->txn.snap_min); } WT_ASSERT(session, txn_global->scan_count > 0); - (void)WT_ATOMIC_SUB4(txn_global->scan_count, 1); + (void)__wt_atomic_subiv32(&txn_global->scan_count, 1); } } @@ -399,7 +399,6 @@ __wt_txn_release(WT_SESSION_IMPL *session) WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *txn_state; - int was_oldest; txn = &session->txn; WT_ASSERT(session, txn->mod_count == 0); @@ -407,7 +406,6 @@ __wt_txn_release(WT_SESSION_IMPL *session) txn_global = &S2C(session)->txn_global; txn_state = WT_SESSION_TXN_STATE(session); - was_oldest = 0; /* Clear the transaction's ID from the global table. */ if (WT_SESSION_IS_CHECKPOINT(session)) { @@ -424,9 +422,6 @@ __wt_txn_release(WT_SESSION_IMPL *session) WT_ASSERT(session, txn_state->id != WT_TXN_NONE && txn->id != WT_TXN_NONE); WT_PUBLISH(txn_state->id, WT_TXN_NONE); - - /* Quick check for the oldest transaction. */ - was_oldest = (txn->id == txn_global->last_running); txn->id = WT_TXN_NONE; } @@ -445,14 +440,6 @@ __wt_txn_release(WT_SESSION_IMPL *session) txn->isolation = session->isolation; /* Ensure the transaction flags are cleared on exit */ txn->flags = 0; - - /* - * When the oldest transaction in the system completes, bump the oldest - * ID. This is racy and so not guaranteed, but in practice it keeps - * the oldest ID from falling too far behind. - */ - if (was_oldest) - __wt_txn_update_oldest(session, 1); } /* @@ -663,20 +650,29 @@ __wt_txn_stats_update(WT_SESSION_IMPL *session) { WT_TXN_GLOBAL *txn_global; WT_CONNECTION_IMPL *conn; - WT_CONNECTION_STATS *stats; + WT_CONNECTION_STATS **stats; uint64_t checkpoint_pinned; conn = S2C(session); txn_global = &conn->txn_global; - stats = &conn->stats; + stats = conn->stats; checkpoint_pinned = txn_global->checkpoint_pinned; - WT_STAT_SET(stats, txn_pinned_range, - txn_global->current - txn_global->oldest_id); + WT_STAT_SET(session, stats, txn_pinned_range, + txn_global->current - txn_global->oldest_id); - WT_STAT_SET(stats, txn_pinned_checkpoint_range, + WT_STAT_SET(session, stats, txn_pinned_checkpoint_range, checkpoint_pinned == WT_TXN_NONE ? 0 : txn_global->current - checkpoint_pinned); + + WT_STAT_SET( + session, stats, txn_checkpoint_time_max, conn->ckpt_time_max); + WT_STAT_SET( + session, stats, txn_checkpoint_time_min, conn->ckpt_time_min); + WT_STAT_SET( + session, stats, txn_checkpoint_time_recent, conn->ckpt_time_recent); + WT_STAT_SET( + session, stats, txn_checkpoint_time_total, conn->ckpt_time_total); } /* @@ -719,6 +715,7 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET(__wt_calloc_def( session, conn->session_size, &txn_global->states)); + WT_CACHE_LINE_ALIGNMENT_VERIFY(session, txn_global->states); for (i = 0, s = txn_global->states; i < conn->session_size; i++, s++) s->id = s->snap_min = WT_TXN_NONE; diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 49fcd69ffed..e671ce28ffb 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -285,19 +285,22 @@ static void __checkpoint_stats( WT_SESSION_IMPL *session, struct timespec *start, struct timespec *stop) { + WT_CONNECTION_IMPL *conn; uint64_t msec; + conn = S2C(session); + /* * Get time diff in microseconds. */ msec = WT_TIMEDIFF(*stop, *start) / WT_MILLION; - if (msec > WT_CONN_STAT(session, txn_checkpoint_time_max)) - WT_STAT_FAST_CONN_SET(session, txn_checkpoint_time_max, msec); - if (WT_CONN_STAT(session, txn_checkpoint_time_min) == 0 || - msec < WT_CONN_STAT(session, txn_checkpoint_time_min)) - WT_STAT_FAST_CONN_SET(session, txn_checkpoint_time_min, msec); - WT_STAT_FAST_CONN_SET(session, txn_checkpoint_time_recent, msec); - WT_STAT_FAST_CONN_INCRV(session, txn_checkpoint_time_total, msec); + + if (msec > conn->ckpt_time_max) + conn->ckpt_time_max = msec; + if (conn->ckpt_time_min == 0 || msec < conn->ckpt_time_min) + conn->ckpt_time_min = msec; + conn->ckpt_time_recent = msec; + conn->ckpt_time_total += msec; } /* @@ -1161,9 +1164,9 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, int final) btree = S2BT(session); bulk = F_ISSET(btree, WT_BTREE_BULK) ? 1 : 0; - /* If the handle is already dead, force the discard. */ + /* If the handle is already dead, discard it. */ if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD)) - return (__wt_cache_op(session, NULL, WT_SYNC_DISCARD_FORCE)); + return (__wt_cache_op(session, NULL, WT_SYNC_DISCARD)); /* * If closing an unmodified file, check that no update is required |