diff options
59 files changed, 926 insertions, 425 deletions
diff --git a/src/third_party/wiredtiger/NEWS b/src/third_party/wiredtiger/NEWS index 8fd06136b18..1b288c74b18 100644 --- a/src/third_party/wiredtiger/NEWS +++ b/src/third_party/wiredtiger/NEWS @@ -1,3 +1,103 @@ +WiredTiger release 2.6.1, 2015-05-13 +------------------------------------ + +The WiredTiger 2.6.1 release contains new features, minor API changes and bug +fixes. + +New features: + +* Move the sync configuration setting from WT_SESSION::begin_transaction to + WT_SESSION::commit_transaction. Change the setting from a boolean to a + string. See upgrading documentation for more information. + refs WT-1908 + +* Add the ability to flag a transaction to be flushed asynchronously on + commit via a new sync=[background] configuration option. Add a new + WT_SESSION::transaction_sync API to wait for asynchronous flushes to + complete. + refs WT-1908, #1943 + +* Add the ability to create a named in-memory snapshot via a new + WT_SESSION::snapshot API. + refs WT-1839 + +* Add the ability to disable write ahead logging at a per-table granularity. + Accessed via log=(enabled) configuration for WT_SESSION::create API. + Partial logging has serious implications for recovery, it should be used + with caution. + refs #1989 + + +Other noteworthy changes: + +* Fix several bugs related to syncing files for checkpoint durability. + refs WT-1944 + +* Fix a segfault during checkpoint where we could attempt to access a file + that was in the process of being dropped in the background. + refs SERVER-18014 + +* Fix a segfault during eviction where we could attempt to evict a page from + a tree that was in the process of being dropped in the background. + refs SERVER-18460 + +* Fix a bug where WiredTiger could segfault in a workload with lots of cache + pressure. + refs WT-1937 + +* Fix a performance issue with WT_SESSION::compact, where it would spend a + long time compacting tables that had no space to reclaim. + refs WT-1953 + +* Fix a bug where accessing an overflow item could return WT_NOTFOUND + incorrectly. The issue was related to an invalid transaction visibility + check. + refs WT-1745 + +* Improve performance and avoid changing files on startup if no recovery is + required by avoiding the creation of unnecessary checkpoints and log + records for files that haven't changed. + refs WT-1936 + +* Improve how we handle create of a table, if a file with the same name + already exists (possibly from an earlier failed create). + refs #1974 + +* Fix compiler warnings for LZ4 implementation on Windows. + refs #2006 + +* Fix a bug in the WiredTiger command line utility where it could create a + base configuration file for an existing database, if there had been a + crash while creating the database. + refs WT-1943 + +* Fix a build problem where recent versions of RedHat would fail to detect + posix_memalign presence correctly. + refs WT-1951 + +* Fix several problems with how we create, recover and backup databases. + Related to order of creation and differences between Windows and POSIX + file system semantics. + refs #1993 + +* Fix a bug where we could flush the log file more often than required if + using auto-commit transactions. + refs WT-1949 + +* Fix a performance problem in LSM, where trees created with an initial bulk + load could choose poor merges. + refs WT-1947 + +* Improve how we decide whether to deepen a tree during an internal split + operation. Append workloads could create trees that were excessively deep. + +* Fix a bug in LSM which could lead to a hang on connection close. + refs WT-1935 + +* Fix a bug in the internal random number generator, where concurrent calls + could lead to invalid sequences. Never seen in the wild. + + WiredTiger release 2.6.0, 2015-05-13 ------------------------------------ diff --git a/src/third_party/wiredtiger/README b/src/third_party/wiredtiger/README index 7463219341f..c30b210029d 100644 --- a/src/third_party/wiredtiger/README +++ b/src/third_party/wiredtiger/README @@ -1,6 +1,6 @@ -WiredTiger 2.6.1: (May 28, 2015) +WiredTiger 2.6.2: (June 4, 2015) -This is version 2.6.1 of WiredTiger. +This is version 2.6.2 of WiredTiger. WiredTiger release packages and documentation can be found at: @@ -8,7 +8,7 @@ WiredTiger release packages and documentation can be found at: The documentation for this specific release can be found at: - http://source.wiredtiger.com/2.6.1/index.html + http://source.wiredtiger.com/2.6.2/index.html The WiredTiger source code can be found at: diff --git a/src/third_party/wiredtiger/RELEASE_INFO b/src/third_party/wiredtiger/RELEASE_INFO index 92b831dc3de..a178c2e40fb 100644 --- a/src/third_party/wiredtiger/RELEASE_INFO +++ b/src/third_party/wiredtiger/RELEASE_INFO @@ -1,6 +1,6 @@ WIREDTIGER_VERSION_MAJOR=2 WIREDTIGER_VERSION_MINOR=6 -WIREDTIGER_VERSION_PATCH=1 +WIREDTIGER_VERSION_PATCH=2 WIREDTIGER_VERSION="$WIREDTIGER_VERSION_MAJOR.$WIREDTIGER_VERSION_MINOR.$WIREDTIGER_VERSION_PATCH" WIREDTIGER_RELEASE_DATE=`date "+%B %e, %Y"` diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree.wtperf index f8c270f3d1f..24da4dd7902 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree.wtperf @@ -6,3 +6,6 @@ report_interval=5 run_time=120 populate_threads=1 threads=((count=16,reads=1)) +# Add throughput/latency monitoring +max_latency=2000 +sample_interval=5 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/evict-lsm.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/evict-lsm.wtperf index c51cb7859c1..ad885d98eb7 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/evict-lsm.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/evict-lsm.wtperf @@ -7,3 +7,6 @@ report_interval=5 run_time=120 populate_threads=1 threads=((count=16,reads=1)) +# Add throughput/latency monitoring +max_latency=2000 +sample_interval=5 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/medium-btree.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/medium-btree.wtperf index 92c63e73480..ea182ef5144 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/medium-btree.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/medium-btree.wtperf @@ -6,3 +6,6 @@ report_interval=5 run_time=120 populate_threads=1 threads=((count=16,reads=1)) +# Add throughput/latency monitoring +max_latency=2000 +sample_interval=5 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/medium-lsm-async.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/medium-lsm-async.wtperf index 1d729bcee23..2661fe0b14b 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/medium-lsm-async.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/medium-lsm-async.wtperf @@ -7,3 +7,6 @@ report_interval=5 run_time=120 populate_threads=1 threads=((count=5,reads=1,updates=2)) +# Add throughput/latency monitoring +max_latency=2000 +sample_interval=5 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/medium-lsm.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/medium-lsm.wtperf index 3e676dc0b70..33c652c65fe 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/medium-lsm.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/medium-lsm.wtperf @@ -6,3 +6,6 @@ report_interval=5 run_time=120 populate_threads=1 threads=((count=16,reads=1)) +# Add throughput/latency monitoring +max_latency=2000 +sample_interval=5 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-btree-log-partial.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-btree-log-partial.wtperf new file mode 100644 index 00000000000..513c1387665 --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-btree-log-partial.wtperf @@ -0,0 +1,12 @@ +# wtperf options file: medium lsm configuration, with multiple tables. +conn_config="cache_size=1G,log=(enabled,file_max=10MB)" +table_config="type=file,os_cache_dirty_max=16MB" +icount=5000000 +log_partial=true +populate_threads=5 +populate_ops_per_txn=100 +threads=((count=1,read=1),(count=1,update=1)) +#threads=((count=8,read=1)) +run_time=120 +report_interval=5 +table_count=4 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-btree-log.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-btree-log.wtperf new file mode 100644 index 00000000000..d0a2e913186 --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-btree-log.wtperf @@ -0,0 +1,11 @@ +# wtperf options file: medium lsm configuration, with multiple tables. +conn_config="cache_size=1G,log=(enabled,file_max=10MB)" +table_config="type=file,os_cache_dirty_max=16MB" +icount=5000000 +populate_threads=5 +populate_ops_per_txn=100 +#threads=((count=1,read=1),(count=1,update=1)) +threads=((count=8,read=1)) +run_time=120 +report_interval=5 +table_count=4 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-lsm.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-lsm.wtperf index d8433352311..362bc8e00e8 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-lsm.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-lsm.wtperf @@ -8,3 +8,6 @@ threads=((count=8,read=1),(count=8,update=1)) run_time=180 report_interval=5 table_count=4 +# Add throughput/latency monitoring +max_latency=2000 +sample_interval=5 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/update-checkpoint-btree.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/update-checkpoint-btree.wtperf index a2e4caab159..76cc63451d8 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/update-checkpoint-btree.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/update-checkpoint-btree.wtperf @@ -10,3 +10,6 @@ checkpoint_interval=10 run_time=300 populate_threads=1 threads=((count=1,inserts=1),(count=2,reads=1),(count=2,updates=1)) +# Add throughput/latency monitoring +max_latency=2000 +sample_interval=5 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/update-checkpoint-lsm.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/update-checkpoint-lsm.wtperf index ebd95db910f..8a195fb1e93 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/update-checkpoint-lsm.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/update-checkpoint-lsm.wtperf @@ -10,3 +10,6 @@ checkpoint_interval=10 run_time=300 populate_threads=1 threads=((count=1,inserts=1),(count=2,reads=1),(count=2,updates=1)) +# Add throughput/latency monitoring +max_latency=2000 +sample_interval=5 diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c index 3a2e1709ddc..1c9ce963c9a 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c @@ -32,6 +32,7 @@ static const CONFIG default_cfg = { "WT_TEST", /* home */ "WT_TEST", /* monitor dir */ + NULL, /* partial logging */ NULL, /* base_uri */ NULL, /* uris */ NULL, /* helium_mount */ @@ -119,13 +120,13 @@ randomize_value(CONFIG_THREAD *thread, char *value_buf) * randomly chosen byte (other than the trailing NUL). * Make sure we don't write a NUL: keep the value the same length. */ - i = __wt_random(thread->rnd) % (thread->cfg->value_sz - 1); + i = __wt_random(&thread->rnd) % (thread->cfg->value_sz - 1); while (value_buf[i] == '\0' && i > 0) --i; if (i > 0) { vb = (uint8_t *)value_buf; - vb[0] = (__wt_random(thread->rnd) % 255) + 1; - vb[i] = (__wt_random(thread->rnd) % 255) + 1; + vb[0] = (__wt_random(&thread->rnd) % 255) + 1; + vb[i] = (__wt_random(&thread->rnd) % 255) + 1; } } @@ -1673,7 +1674,14 @@ create_tables(CONFIG *cfg) } for (i = 0; i < cfg->table_count; i++) { - if ((ret = session->create( + if (cfg->log_partial && i > 0) { + if (((ret = session->create(session, + cfg->uris[i], cfg->partial_config)) != 0)) { + lprintf(cfg, ret, 0, + "Error creating table %s", cfg->uris[i]); + return (ret); + } + } else if ((ret = session->create( session, cfg->uris[i], cfg->table_config)) != 0) { lprintf(cfg, ret, 0, "Error creating table %s", cfg->uris[i]); @@ -2155,6 +2163,16 @@ main(int argc, char *argv[]) if ((ret = config_opt_str(cfg, "table_config", tc_buf)) != 0) goto err; } + if (cfg->log_partial && cfg->table_count > 1) { + req_len = strlen(cfg->table_config) + + strlen(LOG_PARTIAL_CONFIG) + 1; + if ((cfg->partial_config = calloc(req_len, 1)) == NULL) { + ret = enomem(cfg); + goto err; + } + snprintf((char *)cfg->partial_config, req_len, "%s%s", + (char *)cfg->table_config, LOG_PARTIAL_CONFIG); + } /* Sanity-check the configuration. */ if ((ret = config_sanity(cfg)) != 0) @@ -2198,13 +2216,11 @@ start_threads(CONFIG *cfg, * new RNG state further along in the sequence. */ if (i == 0) - __wt_random_init(thread->rnd); - else { - thread->rnd[0] = (thread - 1)->rnd[0]; - thread->rnd[1] = (thread - 1)->rnd[1]; - } + __wt_random_init(&thread->rnd); + else + thread->rnd = (thread - 1)->rnd; for (j = 0; j < 1000; ++j) - (void)__wt_random(thread->rnd); + (void)__wt_random(&thread->rnd); /* * Every thread gets a key/data buffer because we don't bother @@ -2362,7 +2378,7 @@ wtperf_rand(CONFIG_THREAD *thread) * Use WiredTiger's random number routine: it's lock-free and fairly * good. */ - rval = (uint64_t)__wt_random(thread->rnd); + rval = __wt_random(&thread->rnd); /* Use Pareto distribution to give 80/20 hot/cold values. */ if (cfg->pareto != 0) { diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.h b/src/third_party/wiredtiger/bench/wtperf/wtperf.h index f176f62320e..874cdc499b1 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.h +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.h @@ -98,6 +98,7 @@ typedef struct { uint8_t ops[100]; /* Operation schedule */ } WORKLOAD; +#define LOG_PARTIAL_CONFIG ",log=(enabled=false)" /* * NOTE: If you add any fields to this structure here, you must also add * an initialization in wtperf.c in the default_cfg. @@ -105,6 +106,7 @@ typedef struct { struct __config { /* Configuration structure */ const char *home; /* WiredTiger home */ const char *monitor_dir; /* Monitor output dir */ + const char *partial_config; /* Config string for partial logging */ char *base_uri; /* Object URI */ char **uris; /* URIs if multiple tables */ const char *helium_mount; /* Optional Helium mount point */ @@ -209,7 +211,7 @@ typedef struct { struct __config_thread { /* Per-thread structure */ CONFIG *cfg; /* Enclosing configuration */ - uint32_t rnd[2]; /* Random number generation state */ + uint64_t rnd; /* Random number generation state */ pthread_t handle; /* Handle */ diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i index cc3fd34e227..6cb39ac3cc4 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i @@ -115,6 +115,7 @@ DEF_OPT_AS_BOOL(index, 0, DEF_OPT_AS_BOOL(insert_rmw, 0, "execute a read prior to each insert in workload phase") DEF_OPT_AS_UINT32(key_sz, 20, "key size") +DEF_OPT_AS_BOOL(log_partial, 0, "perform partial logging on first table only.") DEF_OPT_AS_UINT32(min_throughput, 0, "abort if any throughput measured is less than this amount. Requires " "sample_interval to be configured") diff --git a/src/third_party/wiredtiger/build_posix/aclocal/ax_func_posix_memalign.m4 b/src/third_party/wiredtiger/build_posix/aclocal/ax_func_posix_memalign.m4 index bd60adcbc81..1e21d429b42 100644 --- a/src/third_party/wiredtiger/build_posix/aclocal/ax_func_posix_memalign.m4 +++ b/src/third_party/wiredtiger/build_posix/aclocal/ax_func_posix_memalign.m4 @@ -27,7 +27,7 @@ AC_DEFUN([AX_FUNC_POSIX_MEMALIGN], [AC_CACHE_CHECK([for working posix_memalign], [ax_cv_func_posix_memalign_works], - [AC_TRY_RUN([ + [AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include <stdlib.h> int @@ -39,7 +39,7 @@ main () * the size word. */ exit (posix_memalign (&buffer, sizeof(void *), 123) != 0); } - ], + ]])], [ax_cv_func_posix_memalign_works=yes], [ax_cv_func_posix_memalign_works=no], [ax_cv_func_posix_memalign_works=no])]) diff --git a/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4 b/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4 index 9115a647042..cec54f5e842 100644 --- a/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4 +++ b/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4 @@ -2,8 +2,8 @@ dnl build by dist/s_version VERSION_MAJOR=2 VERSION_MINOR=6 -VERSION_PATCH=1 -VERSION_STRING='"WiredTiger 2.6.1: (May 15, 2015)"' +VERSION_PATCH=2 +VERSION_STRING='"WiredTiger 2.6.2: (June 4, 2015)"' AC_SUBST(VERSION_MAJOR) AC_SUBST(VERSION_MINOR) diff --git a/src/third_party/wiredtiger/build_posix/aclocal/version.m4 b/src/third_party/wiredtiger/build_posix/aclocal/version.m4 index f6524764dcd..4a4f3427df7 100644 --- a/src/third_party/wiredtiger/build_posix/aclocal/version.m4 +++ b/src/third_party/wiredtiger/build_posix/aclocal/version.m4 @@ -1,2 +1,2 @@ dnl WiredTiger product version for AC_INIT. Maintained by dist/s_version -2.6.1 +2.6.2 diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index db1bc85add4..fd63bc059b5 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -236,6 +236,14 @@ file_config = format_meta + [ Config('leaf_item_max', '0', r''' historic term for leaf_key_max and leaf_value_max''', min=0, undoc=True), + Config('log', '', r''' + the transaction log configuration for this object. Only valid if + log is enabled in ::wiredtiger_open.''', + type='category', subconfig=[ + Config('enabled', 'true', r''' + if false, this object has checkpoint-level durability.''', + type='boolean'), + ]), Config('memory_page_max', '5MB', r''' the maximum size a page can grow to in memory before being reconciled to disk. The specified size will be adjusted to a lower @@ -372,7 +380,12 @@ connection_runtime_config = [ continue evicting until the cache has less dirty memory than the value, as a percentage of the total cache size. Dirty pages will only be evicted if the cache is full enough to trigger eviction''', - min=10, max=99), + min=5, max=99), + Config('eviction_dirty_trigger', '95', r''' + trigger eviction when the cache is using this much memory for dirty + content, as a percentage of the total cache size. This setting only + alters behavior if it is lower than eviction_trigger''', + min=5, max=99), Config('eviction_target', '80', r''' continue evicting until the cache has less total memory than the value, as a percentage of the total cache size. Must be less than @@ -503,6 +516,7 @@ connection_runtime_config = [ 'fileops', 'log', 'lsm', + 'lsm_manager', 'metadata', 'mutex', 'overflow', @@ -568,7 +582,7 @@ common_wiredtiger_open = [ Config('secretkey', '', r''' A string that is passed to the WT_ENCRYPTOR::customize function. It is never stored in clear text, so must be given to any - subsequent wiredtiger_open calls to reopen the database. + subsequent ::wiredtiger_open calls to reopen the database. It must also be provided to any "wt" commands used with this database.'''), ]), @@ -760,22 +774,22 @@ methods = { type='boolean', undoc=True), Config('statistics', '', r''' Specify the statistics to be gathered. Choosing "all" gathers - statistics regardless of cost and may include traversing - on-disk files; "fast" gathers a subset of relatively - inexpensive statistics. The selection must agree with the - database \c statistics configuration specified to - ::wiredtiger_open or WT_CONNECTION::reconfigure. For example, - "all" or "fast" can be configured when the database is - configured with "all", but the cursor open will fail if "all" - is specified when the database is configured with "fast", - and the cursor open will fail in all cases when the database - is configured with "none". If \c statistics is not configured, - the default configuration is the database configuration. - The "clear" configuration resets statistics after gathering - them, where appropriate (for example, a cache size statistic - is not cleared, while the count of cursor insert operations - will be cleared). See @ref statistics for more information''', - type='list', choices=['all', 'fast', 'clear']), + statistics regardless of cost and may include traversing on-disk files; + "fast" gathers a subset of relatively inexpensive statistics. The + selection must agree with the database \c statistics configuration + specified to ::wiredtiger_open or WT_CONNECTION::reconfigure. For + example, "all" or "fast" can be configured when the database is + configured with "all", but the cursor open will fail if "all" is + specified when the database is configured with "fast", and the cursor + open will fail in all cases when the database is configured with + "none". If "size" is configured, only the underlying size of the + object on disk is filled in and the object is not opened. If \c + statistics is not configured, the default configuration is the database + configuration. The "clear" configuration resets statistics after + gathering them, where appropriate (for example, a cache size statistic + is not cleared, while the count of cursor insert operations will be + cleared). See @ref statistics for more information''', + type='list', choices=['all', 'fast', 'clear', 'size']), Config('target', '', r''' if non-empty, backup the list of objects; valid only for a backup data source''', diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py index ae97740073d..c8d9bcc6a5e 100644 --- a/src/third_party/wiredtiger/dist/flags.py +++ b/src/third_party/wiredtiger/dist/flags.py @@ -45,11 +45,6 @@ flags = { 'READ_TRUNCATE', 'READ_WONT_NEED', ], - 'page_eviction' : [ - 'EVICT_CHECK_SPLITS', - 'EVICT_EXCLUSIVE', - 'EVICT_INMEM_SPLIT', - ], 'rec_write' : [ 'EVICTING', 'SKIP_UPDATE_ERR', @@ -72,6 +67,7 @@ flags = { 'VERB_FILEOPS', 'VERB_LOG', 'VERB_LSM', + 'VERB_LSM_MANAGER', 'VERB_METADATA', 'VERB_MUTEX', 'VERB_OVERFLOW', diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index fa87c0086b6..e526b119282 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -219,6 +219,7 @@ PARAM POSIX PREDEFINE PRIu +PRNG PTHREAD PTR Pandis diff --git a/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c b/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c index 0906e1d131d..125aaa11db3 100644 --- a/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c +++ b/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c @@ -39,6 +39,9 @@ * is being built into the WiredTiger library. */ #include "wiredtiger_config.h" +#ifdef _MSC_VER +#define inline __inline +#endif /* Local compressor structure. */ typedef struct { diff --git a/src/third_party/wiredtiger/src/block/block_compact.c b/src/third_party/wiredtiger/src/block/block_compact.c index 4fcf6ea24ff..c48f27af243 100644 --- a/src/third_party/wiredtiger/src/block/block_compact.c +++ b/src/third_party/wiredtiger/src/block/block_compact.c @@ -67,7 +67,7 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, int *skipp) * worth doing. Ignore small files, and files where we are unlikely * to recover 10% of the file. */ - if (fh->size <= 10 * 1024) + if (fh->size <= WT_MEGABYTE) return (0); __wt_spin_lock(session, &block->live_lock); @@ -106,6 +106,8 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, int *skipp) *skipp ? "skipped" : "proceeding")); /* + * Skip files where we can't recover at least 1MB. + * * If at least 20% of the total file is available and in the first 80% * of the file, we'll try compaction on the last 20% of the file; else, * if at least 10% of the total file is available and in the first 90% @@ -115,11 +117,14 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, int *skipp) * empty file can be processed quickly, so more aggressive compaction is * less useful. */ - if (avail_ninety >= fh->size / 10) { + if (avail_eighty > WT_MEGABYTE && + avail_eighty >= ((fh->size / 10) * 2)) { + *skipp = 0; + block->compact_pct_tenths = 2; + } else if (avail_ninety > WT_MEGABYTE && + avail_ninety >= fh->size / 10) { *skipp = 0; block->compact_pct_tenths = 1; - if (avail_eighty >= ((fh->size / 10) * 2)) - block->compact_pct_tenths = 2; } err: __wt_spin_unlock(session, &block->live_lock); diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c index 5a882f0fb7c..8e45ec85a97 100644 --- a/src/third_party/wiredtiger/src/block/block_open.c +++ b/src/third_party/wiredtiger/src/block/block_open.c @@ -388,7 +388,7 @@ err: __wt_scr_free(session, &buf); /* * __wt_block_stat -- - * Block statistics + * Set the statistics for a live block handle. */ void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats) @@ -409,3 +409,19 @@ __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats) WT_STAT_SET(stats, block_size, block->fh->size); __wt_spin_unlock(session, &block->live_lock); } + +/* + * __wt_block_manager_size -- + * Set the size statistic for a file. + */ +int +__wt_block_manager_size( + WT_SESSION_IMPL *session, const char *filename, WT_DSRC_STATS *stats) +{ + wt_off_t filesize; + + WT_RET(__wt_filesize_name(session, filename, &filesize)); + WT_STAT_SET(stats, block_size, filesize); + + return (0); +} diff --git a/src/third_party/wiredtiger/src/btree/bt_compact.c b/src/third_party/wiredtiger/src/btree/bt_compact.c index 4709ac3260e..18f8ca54601 100644 --- a/src/third_party/wiredtiger/src/btree/bt_compact.c +++ b/src/third_party/wiredtiger/src/btree/bt_compact.c @@ -142,7 +142,6 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[]) block_manager_begin = 1; /* Walk the tree reviewing pages to see if they should be re-written. */ - session->compaction = 1; for (;;) { /* * Pages read for compaction aren't "useful"; don't update the @@ -159,6 +158,7 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[]) if (skip) continue; + session->compaction = 1; /* Rewrite the page: mark the page and tree dirty. */ WT_ERR(__wt_page_modify_init(session, ref->page)); __wt_page_modify_set(session, ref->page); diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index 7c4a4a57e3a..9725248e523 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -255,15 +255,26 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) /* Page sizes */ WT_RET(__btree_page_sizes(session)); - /* Eviction; the metadata file is never evicted. */ - if (WT_IS_METADATA(btree->dhandle)) + /* + * Set special flags for the metadata file. + * Eviction; the metadata file is never evicted. + * Logging; the metadata file is always logged if possible. + */ + if (WT_IS_METADATA(btree->dhandle)) { F_SET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION); - else { + F_CLR(btree, WT_BTREE_NO_LOGGING); + } else { WT_RET(__wt_config_gets(session, cfg, "cache_resident", &cval)); if (cval.val) F_SET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION); else F_CLR(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION); + + WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); + if (cval.val) + F_CLR(btree, WT_BTREE_NO_LOGGING); + else + F_SET(btree, WT_BTREE_NO_LOGGING); } /* Checksums */ diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c index 17d9442e1a4..b55f2196a25 100644 --- a/src/third_party/wiredtiger/src/btree/bt_page.c +++ b/src/third_party/wiredtiger/src/btree/bt_page.c @@ -53,8 +53,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) __wt_txn_update_oldest(session, 0); /* If eviction cannot succeed, don't try. */ - return ( - __wt_page_can_evict(session, page, WT_EVICT_CHECK_SPLITS, NULL)); + return (__wt_page_can_evict(session, page, 1, NULL)); } /* @@ -95,13 +94,17 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags return (WT_NOTFOUND); if (LF_ISSET(WT_READ_NO_WAIT)) return (WT_NOTFOUND); + + /* Waiting on another thread's read, stall. */ WT_STAT_FAST_CONN_INCR(session, page_read_blocked); - break; + goto stall; case WT_REF_LOCKED: if (LF_ISSET(WT_READ_NO_WAIT)) return (WT_NOTFOUND); + + /* Waiting on eviction, stall. */ WT_STAT_FAST_CONN_INCR(session, page_locked_blocked); - break; + goto stall; case WT_REF_SPLIT: return (WT_RESTART); case WT_REF_MEM: @@ -151,12 +154,11 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags /* If forced eviction fails, stall. */ if (ret == EBUSY) { ret = 0; - wait_cnt += 1000; WT_STAT_FAST_CONN_INCR(session, page_forcible_evict_blocked); - break; - } else - WT_RET(ret); + goto stall; + } + WT_RET(ret); /* * The result of a successful forced eviction @@ -201,6 +203,9 @@ skip_evict: if (++wait_cnt < 1000) __wt_yield(); else { + if (0) { +stall: wait_cnt += 1000; + } sleep_cnt = WT_MIN(wait_cnt, 10000); wait_cnt *= 2; WT_STAT_FAST_CONN_INCRV(session, page_sleep, sleep_cnt); diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c index 1bfd03f58cb..896ab23f1c2 100644 --- a/src/third_party/wiredtiger/src/btree/bt_slvg.c +++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c @@ -327,7 +327,7 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) */ if (ss->root_ref.page != NULL) { btree->ckpt = ckptbase; - ret = __wt_evict(session, &ss->root_ref, WT_EVICT_EXCLUSIVE); + ret = __wt_evict(session, &ss->root_ref, 1); ss->root_ref.page = NULL; btree->ckpt = NULL; } @@ -1313,7 +1313,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) ret = __wt_page_release(session, ref, 0); if (ret == 0) - ret = __wt_evict(session, ref, WT_EVICT_EXCLUSIVE); + ret = __wt_evict(session, ref, 1); if (0) { err: WT_TRET(__wt_page_release(session, ref, 0)); @@ -2022,7 +2022,7 @@ __slvg_row_build_leaf( */ ret = __wt_page_release(session, ref, 0); if (ret == 0) - ret = __wt_evict(session, ref, WT_EVICT_EXCLUSIVE); + ret = __wt_evict(session, ref, 1); if (0) { err: WT_TRET(__wt_page_release(session, ref, 0)); diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index c316be6f908..0d6428349dc 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -837,6 +837,9 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, return (0); } +#define WT_SPLIT_EXCLUSIVE 0x01 /* Page held exclusively */ +#define WT_SPLIT_INMEM 0x02 /* In-memory split */ + /* * __split_parent -- * Resolve a multi-page split, inserting new information into the parent. @@ -890,7 +893,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, * trying to split a page while its parent is being * checkpointed. */ - if (LF_ISSET(WT_EVICT_INMEM_SPLIT)) + if (LF_ISSET(WT_SPLIT_INMEM)) return (EBUSY); __wt_yield(); } @@ -1087,7 +1090,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, */ size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *); WT_TRET(__split_safe_free(session, - split_gen, LF_ISSET(WT_EVICT_EXCLUSIVE), pindex, size)); + split_gen, LF_ISSET(WT_SPLIT_EXCLUSIVE) ? 1 : 0, pindex, size)); parent_decr += size; /* @@ -1112,7 +1115,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, * Do the check here because we've just grown the parent page and * are holding it locked. */ - if (ret == 0 && !LF_ISSET(WT_EVICT_EXCLUSIVE) && + if (ret == 0 && !LF_ISSET(WT_SPLIT_EXCLUSIVE) && !F_ISSET_ATOMIC(parent, WT_PAGE_REFUSE_DEEPEN) && __split_should_deepen(session, parent_ref)) { /* @@ -1375,8 +1378,8 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref) * longer locked, so we cannot safely look at it. */ page = NULL; - if ((ret = __split_parent(session, - ref, split_ref, 2, parent_incr, WT_EVICT_INMEM_SPLIT)) != 0) { + if ((ret = __split_parent( + session, ref, split_ref, 2, parent_incr, WT_SPLIT_INMEM)) != 0) { /* * Move the insert list element back to the original page list. * For simplicity, the previous skip list pointers originally @@ -1467,7 +1470,7 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref) * Resolve a page split. */ int -__wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive) +__wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing) { WT_DECL_RET; WT_PAGE *page; @@ -1491,9 +1494,12 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive) WT_ERR(__wt_multi_to_ref(session, page, &mod->mod_multi[i], &ref_new[i], &parent_incr)); - /* Split into the parent. */ + /* + * Split into the parent; if we're closing the file, we hold it + * exclusively. + */ WT_ERR(__split_parent( session, ref, ref_new, - new_entries, parent_incr, exclusive ? WT_EVICT_EXCLUSIVE : 0)); + new_entries, parent_incr, closing ? WT_SPLIT_EXCLUSIVE : 0)); WT_STAT_FAST_CONN_INCR(session, cache_eviction_split); WT_STAT_FAST_DATA_INCR(session, cache_eviction_split); diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c index 6d65a89c15a..9803b924355 100644 --- a/src/third_party/wiredtiger/src/btree/row_srch.c +++ b/src/third_party/wiredtiger/src/btree/row_srch.c @@ -486,7 +486,7 @@ restart: WT_INTL_INDEX_GET(session, page, pindex); descent = pindex->index[ - __wt_random(session->rnd) % pindex->entries]; + __wt_random(&session->rnd) % pindex->entries]; /* * Swap the parent page for the child page; return on error, @@ -520,7 +520,7 @@ restart: cbt->compare = 0; WT_INTL_INDEX_GET(session, btree->root.page, pindex); cbt->slot = pindex->entries < 2 ? - __wt_random(session->rnd) % page->pg_row_entries : 0; + __wt_random(&session->rnd) % page->pg_row_entries : 0; return (__wt_row_leaf_key(session, page, page->pg_row_d + cbt->slot, cbt->tmp, 0)); diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 64fc802160c..6e9c1c2d01b 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -105,7 +105,10 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=10,max=99", + NULL, "min=5,max=99", + NULL, 0 }, + { "eviction_dirty_trigger", "int", + NULL, "min=5,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -128,9 +131,10 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"metadata\",\"mutex\",\"overflow\",\"read\",\"reconcile\"," - "\"recovery\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"transaction\",\"verify\",\"version\",\"write\"]", + "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," + "\"reconcile\",\"recovery\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"transaction\",\"verify\",\"version\"," + "\"write\"]", NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -181,6 +185,12 @@ static const WT_CONFIG_CHECK }; static const WT_CONFIG_CHECK + confchk_WT_SESSION_create_log_subconfigs[] = { + { "enabled", "boolean", NULL, NULL, NULL, 0 }, + { NULL, NULL, NULL, NULL, NULL, 0 } +}; + +static const WT_CONFIG_CHECK confchk_WT_SESSION_create_lsm_subconfigs[] = { { "auto_throttle", "boolean", NULL, NULL, NULL, 0 }, { "bloom", "boolean", NULL, NULL, NULL, 0 }, @@ -236,6 +246,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = { NULL, "min=512B,max=512MB", NULL, 0 }, { "leaf_value_max", "int", NULL, "min=0", NULL, 0 }, + { "log", "category", + NULL, NULL, + confchk_WT_SESSION_create_log_subconfigs, 1 }, { "lsm", "category", NULL, NULL, confchk_WT_SESSION_create_lsm_subconfigs, 11 }, @@ -276,7 +289,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = { { "readonly", "boolean", NULL, NULL, NULL, 0 }, { "skip_sort_check", "boolean", NULL, NULL, NULL, 0 }, { "statistics", "list", - NULL, "choices=[\"all\",\"fast\",\"clear\"]", + NULL, "choices=[\"all\",\"fast\",\"clear\",\"size\"]", NULL, 0 }, { "target", "list", NULL, NULL, NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } @@ -374,6 +387,9 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = { NULL, "min=512B,max=512MB", NULL, 0 }, { "leaf_value_max", "int", NULL, "min=0", NULL, 0 }, + { "log", "category", + NULL, NULL, + confchk_WT_SESSION_create_log_subconfigs, 1 }, { "memory_page_max", "int", NULL, "min=512B,max=10TB", NULL, 0 }, @@ -474,7 +490,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=10,max=99", + NULL, "min=5,max=99", + NULL, 0 }, + { "eviction_dirty_trigger", "int", + NULL, "min=5,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -514,9 +533,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"metadata\",\"mutex\",\"overflow\",\"read\",\"reconcile\"," - "\"recovery\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"transaction\",\"verify\",\"version\",\"write\"]", + "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," + "\"reconcile\",\"recovery\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"transaction\",\"verify\",\"version\"," + "\"write\"]", NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -545,7 +565,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=10,max=99", + NULL, "min=5,max=99", + NULL, 0 }, + { "eviction_dirty_trigger", "int", + NULL, "min=5,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -585,9 +608,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"metadata\",\"mutex\",\"overflow\",\"read\",\"reconcile\"," - "\"recovery\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"transaction\",\"verify\",\"version\",\"write\"]", + "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," + "\"reconcile\",\"recovery\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"transaction\",\"verify\",\"version\"," + "\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } @@ -615,7 +639,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=10,max=99", + NULL, "min=5,max=99", + NULL, 0 }, + { "eviction_dirty_trigger", "int", + NULL, "min=5,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -653,9 +680,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"metadata\",\"mutex\",\"overflow\",\"read\",\"reconcile\"," - "\"recovery\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"transaction\",\"verify\",\"version\",\"write\"]", + "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," + "\"reconcile\",\"recovery\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"transaction\",\"verify\",\"version\"," + "\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } @@ -683,7 +711,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=10,max=99", + NULL, "min=5,max=99", + NULL, 0 }, + { "eviction_dirty_trigger", "int", + NULL, "min=5,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -721,9 +752,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"metadata\",\"mutex\",\"overflow\",\"read\",\"reconcile\"," - "\"recovery\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"transaction\",\"verify\",\"version\",\"write\"]", + "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," + "\"reconcile\",\"recovery\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"transaction\",\"verify\",\"version\"," + "\"write\"]", NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -771,14 +803,14 @@ static const WT_CONFIG_ENTRY config_entries[] = { "cache_size=100MB,checkpoint=(log_size=0," "name=\"WiredTigerCheckpoint\",wait=0),error_prefix=," "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," - "eviction_target=80,eviction_trigger=95," - "file_manager=(close_handle_minimum=250,close_idle_time=30," + "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95" + ",file_manager=(close_handle_minimum=250,close_idle_time=30," "close_scan_interval=10),lsm_manager=(merge=,worker_thread_max=4)" ",lsm_merge=,shared_cache=(chunk=10MB,name=,reserve=0,size=500MB)" ",statistics=none,statistics_log=(on_close=0," "path=\"WiredTigerStat.%d.%H\",sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),verbose=", - confchk_WT_CONNECTION_reconfigure, 16 + confchk_WT_CONNECTION_reconfigure, 17 }, { "WT_CURSOR.close", "", @@ -816,14 +848,15 @@ static const WT_CONFIG_ENTRY config_entries[] = { "huffman_value=,immutable=0,internal_item_max=0," "internal_key_max=0,internal_key_truncate=,internal_page_max=4KB," "key_format=u,key_gap=10,leaf_item_max=0,leaf_key_max=0," - "leaf_page_max=32KB,leaf_value_max=0,lsm=(auto_throttle=,bloom=," - "bloom_bit_count=16,bloom_config=,bloom_hash_count=8," - "bloom_oldest=0,chunk_count_limit=0,chunk_max=5GB,chunk_size=10MB" - ",merge_max=15,merge_min=0),memory_page_max=5MB," - "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=0," - "prefix_compression_min=4,source=,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=75,type=file,value_format=u", - confchk_WT_SESSION_create, 39 + "leaf_page_max=32KB,leaf_value_max=0,log=(enabled=)," + "lsm=(auto_throttle=,bloom=,bloom_bit_count=16,bloom_config=," + "bloom_hash_count=8,bloom_oldest=0,chunk_count_limit=0," + "chunk_max=5GB,chunk_size=10MB,merge_max=15,merge_min=0)," + "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0," + "prefix_compression=0,prefix_compression_min=4,source=," + "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75," + "type=file,value_format=u", + confchk_WT_SESSION_create, 40 }, { "WT_SESSION.drop", "force=0,remove_files=", @@ -891,11 +924,12 @@ static const WT_CONFIG_ENTRY config_entries[] = { "huffman_value=,id=,internal_item_max=0,internal_key_max=0," "internal_key_truncate=,internal_page_max=4KB,key_format=u," "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB," - "leaf_value_max=0,memory_page_max=5MB,os_cache_dirty_max=0," - "os_cache_max=0,prefix_compression=0,prefix_compression_min=4," - "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75," - "value_format=u,version=(major=0,minor=0)", - confchk_file_meta, 36 + "leaf_value_max=0,log=(enabled=),memory_page_max=5MB," + "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=0," + "prefix_compression_min=4,split_deepen_min_child=0," + "split_deepen_per_child=0,split_pct=75,value_format=u," + "version=(major=0,minor=0)", + confchk_file_meta, 37 }, { "index.meta", "app_metadata=,collator=,columns=,extractor=,immutable=0," @@ -913,19 +947,19 @@ static const WT_CONFIG_ENTRY config_entries[] = { "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," "config_base=,create=0,direct_io=,encryption=(keyid=,name=," "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)" - ",eviction_dirty_target=80,eviction_target=80,eviction_trigger=95" - ",exclusive=0,extensions=,file_extend=," - "file_manager=(close_handle_minimum=250,close_idle_time=30," - "close_scan_interval=10),hazard_max=1000,log=(archive=," - "compressor=,enabled=0,file_max=100MB,path=,prealloc=,recover=on)" - ",lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,mmap=," - "multiprocess=0,session_max=100,session_scratch_max=2MB," - "shared_cache=(chunk=10MB,name=,reserve=0,size=500MB)," - "statistics=none,statistics_log=(on_close=0," + ",eviction_dirty_target=80,eviction_dirty_trigger=95," + "eviction_target=80,eviction_trigger=95,exclusive=0,extensions=," + "file_extend=,file_manager=(close_handle_minimum=250," + "close_idle_time=30,close_scan_interval=10),hazard_max=1000," + "log=(archive=,compressor=,enabled=0,file_max=100MB,path=," + "prealloc=,recover=on),lsm_manager=(merge=,worker_thread_max=4)," + "lsm_merge=,mmap=,multiprocess=0,session_max=100," + "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,reserve=0" + ",size=500MB),statistics=none,statistics_log=(on_close=0," "path=\"WiredTigerStat.%d.%H\",sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" ",method=fsync),use_environment_priv=0,verbose=", - confchk_wiredtiger_open, 33 + confchk_wiredtiger_open, 34 }, { "wiredtiger_open_all", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," @@ -933,20 +967,20 @@ static const WT_CONFIG_ENTRY config_entries[] = { "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," "config_base=,create=0,direct_io=,encryption=(keyid=,name=," "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)" - ",eviction_dirty_target=80,eviction_target=80,eviction_trigger=95" - ",exclusive=0,extensions=,file_extend=," - "file_manager=(close_handle_minimum=250,close_idle_time=30," - "close_scan_interval=10),hazard_max=1000,log=(archive=," - "compressor=,enabled=0,file_max=100MB,path=,prealloc=,recover=on)" - ",lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,mmap=," - "multiprocess=0,session_max=100,session_scratch_max=2MB," - "shared_cache=(chunk=10MB,name=,reserve=0,size=500MB)," - "statistics=none,statistics_log=(on_close=0," + ",eviction_dirty_target=80,eviction_dirty_trigger=95," + "eviction_target=80,eviction_trigger=95,exclusive=0,extensions=," + "file_extend=,file_manager=(close_handle_minimum=250," + "close_idle_time=30,close_scan_interval=10),hazard_max=1000," + "log=(archive=,compressor=,enabled=0,file_max=100MB,path=," + "prealloc=,recover=on),lsm_manager=(merge=,worker_thread_max=4)," + "lsm_merge=,mmap=,multiprocess=0,session_max=100," + "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,reserve=0" + ",size=500MB),statistics=none,statistics_log=(on_close=0," "path=\"WiredTigerStat.%d.%H\",sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" ",method=fsync),use_environment_priv=0,verbose=,version=(major=0," "minor=0)", - confchk_wiredtiger_open_all, 34 + confchk_wiredtiger_open_all, 35 }, { "wiredtiger_open_basecfg", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," @@ -954,18 +988,18 @@ static const WT_CONFIG_ENTRY config_entries[] = { "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," "direct_io=,encryption=(keyid=,name=,secretkey=),error_prefix=," "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," - "eviction_target=80,eviction_trigger=95,extensions=,file_extend=," - "file_manager=(close_handle_minimum=250,close_idle_time=30," - "close_scan_interval=10),hazard_max=1000,log=(archive=," - "compressor=,enabled=0,file_max=100MB,path=,prealloc=,recover=on)" - ",lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,mmap=," - "multiprocess=0,session_max=100,session_scratch_max=2MB," - "shared_cache=(chunk=10MB,name=,reserve=0,size=500MB)," - "statistics=none,statistics_log=(on_close=0," + "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95" + ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" + ",close_idle_time=30,close_scan_interval=10),hazard_max=1000," + "log=(archive=,compressor=,enabled=0,file_max=100MB,path=," + "prealloc=,recover=on),lsm_manager=(merge=,worker_thread_max=4)," + "lsm_merge=,mmap=,multiprocess=0,session_max=100," + "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,reserve=0" + ",size=500MB),statistics=none,statistics_log=(on_close=0," "path=\"WiredTigerStat.%d.%H\",sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" ",method=fsync),verbose=,version=(major=0,minor=0)", - confchk_wiredtiger_open_basecfg, 30 + confchk_wiredtiger_open_basecfg, 31 }, { "wiredtiger_open_usercfg", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," @@ -973,18 +1007,18 @@ static const WT_CONFIG_ENTRY config_entries[] = { "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," "direct_io=,encryption=(keyid=,name=,secretkey=),error_prefix=," "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," - "eviction_target=80,eviction_trigger=95,extensions=,file_extend=," - "file_manager=(close_handle_minimum=250,close_idle_time=30," - "close_scan_interval=10),hazard_max=1000,log=(archive=," - "compressor=,enabled=0,file_max=100MB,path=,prealloc=,recover=on)" - ",lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,mmap=," - "multiprocess=0,session_max=100,session_scratch_max=2MB," - "shared_cache=(chunk=10MB,name=,reserve=0,size=500MB)," - "statistics=none,statistics_log=(on_close=0," + "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95" + ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" + ",close_idle_time=30,close_scan_interval=10),hazard_max=1000," + "log=(archive=,compressor=,enabled=0,file_max=100MB,path=," + "prealloc=,recover=on),lsm_manager=(merge=,worker_thread_max=4)," + "lsm_merge=,mmap=,multiprocess=0,session_max=100," + "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,reserve=0" + ",size=500MB),statistics=none,statistics_log=(on_close=0," "path=\"WiredTigerStat.%d.%H\",sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" ",method=fsync),verbose=", - confchk_wiredtiger_open_usercfg, 29 + confchk_wiredtiger_open_usercfg, 30 }, { NULL, NULL, NULL, 0 } }; diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index d42287497a5..067ad00560e 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -1720,7 +1720,12 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) * merge the rest to be written. */ WT_ERR(__wt_config_merge(session, cfg + 1, - "create=,encryption=(secretkey=),log=(recover=)", &base_config)); + "config_base=," + "create=," + "encryption=(secretkey=)," + "exclusive=," + "log=(recover=)," + "use_environment_priv=,", &base_config)); WT_ERR(__wt_config_init(session, &parser, base_config)); while ((ret = __wt_config_next(&parser, &k, &v)) == 0) { /* Fix quoting for non-trivial settings. */ @@ -1739,8 +1744,7 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) if (0) { /* Close open file handle, remove any temporary file. */ -err: if (fp != NULL) - WT_TRET(__wt_fclose(&fp, WT_FHANDLE_WRITE)); +err: WT_TRET(__wt_fclose(&fp, WT_FHANDLE_WRITE)); WT_TRET(__wt_remove_if_exists(session, WT_BASECONFIG_SET)); } @@ -1820,7 +1824,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, session = conn->default_session = &conn->dummy_session; session->iface.connection = &conn->iface; session->name = "wiredtiger_open"; - __wt_random_init(session->rnd); + __wt_random_init(&session->rnd); __wt_event_handler_set(session, event_handler); /* Remaining basic initialization of the connection structure. */ diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c index 1edd9dac7fb..d62425fe536 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache.c @@ -45,6 +45,9 @@ __cache_config_local(WT_SESSION_IMPL *session, int shared, const char *cfg[]) WT_RET(__wt_config_gets(session, cfg, "eviction_dirty_target", &cval)); cache->eviction_dirty_target = (u_int)cval.val; + WT_RET(__wt_config_gets(session, cfg, "eviction_dirty_trigger", &cval)); + cache->eviction_dirty_trigger = (u_int)cval.val; + /* * The eviction thread configuration options include the main eviction * thread and workers. Our implementation splits them out. Adjust for diff --git a/src/third_party/wiredtiger/src/cursor/cur_stat.c b/src/third_party/wiredtiger/src/cursor/cur_stat.c index 85442592c39..82568401319 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_stat.c +++ b/src/third_party/wiredtiger/src/cursor/cur_stat.c @@ -373,6 +373,22 @@ __curstat_file_init(WT_SESSION_IMPL *session, { WT_DATA_HANDLE *dhandle; WT_DECL_RET; + const char *filename; + + /* + * If we are only getting the size of the file, we don't need to open + * the tree. + */ + if (F_ISSET(cst, WT_CONN_STAT_SIZE)) { + filename = uri; + if (!WT_PREFIX_SKIP(filename, "file:")) + return (EINVAL); + __wt_stat_init_dsrc_stats(&cst->u.dsrc_stats); + WT_RET(__wt_block_manager_size( + session, filename, &cst->u.dsrc_stats)); + __wt_curstat_dsrc_final(cst); + return (0); + } WT_RET(__wt_session_get_btree_ckpt(session, uri, cfg, 0)); dhandle = session->dhandle; @@ -508,8 +524,22 @@ __wt_curstat_open(WT_SESSION_IMPL *session, } WT_ERR_NOTFOUND_OK(ret); if ((ret = __wt_config_subgets( - session, &cval, "clear", &sval)) == 0 && sval.val != 0) + session, &cval, "size", &sval)) == 0 && sval.val != 0) { + if (F_ISSET(cst, WT_CONN_STAT_FAST | WT_CONN_STAT_ALL)) + WT_ERR_MSG(session, EINVAL, + "only one statistics configuration value " + "may be specified"); + F_SET(cst, WT_CONN_STAT_SIZE); + } + WT_ERR_NOTFOUND_OK(ret); + if ((ret = __wt_config_subgets( + session, &cval, "clear", &sval)) == 0 && sval.val != 0) { + if (F_ISSET(cst, WT_CONN_STAT_SIZE)) + WT_ERR_MSG(session, EINVAL, + "clear is incompatible with size " + "statistics"); F_SET(cst, WT_CONN_STAT_CLEAR); + } WT_ERR_NOTFOUND_OK(ret); /* If no configuration, use the connection's configuration. */ diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c index 21f6a1f016a..38cfc07ac5b 100644 --- a/src/third_party/wiredtiger/src/evict/evict_file.c +++ b/src/third_party/wiredtiger/src/evict/evict_file.c @@ -76,12 +76,12 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop) /* * Evict the page. */ - WT_ERR(__wt_evict(session, ref, WT_EVICT_EXCLUSIVE)); + WT_ERR(__wt_evict(session, ref, 1)); break; case WT_SYNC_DISCARD: WT_ASSERT(session, __wt_page_can_evict(session, page, 0, NULL)); - __wt_evict_page_clean_update(session, ref); + __wt_evict_page_clean_update(session, ref, 1); break; case WT_SYNC_DISCARD_FORCE: /* @@ -97,7 +97,7 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop) } F_SET(session, WT_SESSION_DISCARD_FORCE); - __wt_evict_page_clean_update(session, ref); + __wt_evict_page_clean_update(session, ref, 1); F_CLR(session, WT_SESSION_DISCARD_FORCE); break; WT_ILLEGAL_VALUE_ERR(session); diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index da7583339d4..a16d2743536 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -418,30 +418,22 @@ __evict_has_work(WT_SESSION_IMPL *session, uint32_t *flagsp) WT_CACHE *cache; WT_CONNECTION_IMPL *conn; uint32_t flags; - uint64_t bytes_inuse, bytes_max, dirty_inuse; + int evict, dirty; conn = S2C(session); cache = conn->cache; - flags = 0; - *flagsp = 0; + *flagsp = flags = 0; if (!F_ISSET(conn, WT_CONN_EVICTION_RUN)) return (0); - /* - * Figure out whether the cache usage exceeds either the eviction - * target or the dirty target. - */ - bytes_inuse = __wt_cache_bytes_inuse(cache); - dirty_inuse = __wt_cache_dirty_inuse(cache); - bytes_max = conn->cache_size; - /* Check to see if the eviction server should run. */ - if (bytes_inuse > (cache->eviction_target * bytes_max) / 100) + __wt_cache_status(session, &evict, &dirty); + if (evict) + /* The cache is too small. */ LF_SET(WT_EVICT_PASS_ALL); - else if (dirty_inuse > - (cache->eviction_dirty_target * bytes_max) / 100) - /* Ignore clean pages unless the cache is too large */ + else if (dirty) + /* Too many dirty pages, ignore clean pages. */ LF_SET(WT_EVICT_PASS_DIRTY); else if (F_ISSET(cache, WT_CACHE_WOULD_BLOCK)) { /* @@ -1232,8 +1224,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags) } fast: /* If the page can't be evicted, give up. */ - if (!__wt_page_can_evict( - session, page, WT_EVICT_CHECK_SPLITS, NULL)) + if (!__wt_page_can_evict(session, page, 1, NULL)) continue; /* @@ -1513,7 +1504,12 @@ __wt_cache_wait(WT_SESSION_IMPL *session, int full) WT_RET(__wt_eviction_check(session, &full, 0)); if (full < 100) return (0); - else if (ret == 0) + /* + * The value of ret is set in the switch statement above (and + * not altered by WT_RET), so it's 0 or WT_NOTFOUND depending + * on whether or not there was a page to evict in the queue. + */ + if (ret == 0) continue; /* @@ -1539,6 +1535,7 @@ __wt_cache_wait(WT_SESSION_IMPL *session, int full) busy = count = 1; } } + #ifdef HAVE_DIAGNOSTIC /* * __wt_cache_dump -- diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index ac95032748d..8680a644421 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -9,7 +9,7 @@ #include "wt_internal.h" static int __evict_page_dirty_update(WT_SESSION_IMPL *, WT_REF *, int); -static int __evict_review(WT_SESSION_IMPL *, WT_REF *, int *, uint32_t); +static int __evict_review(WT_SESSION_IMPL *, WT_REF *, int *, int); /* * __evict_exclusive_clear -- @@ -49,7 +49,7 @@ __evict_exclusive(WT_SESSION_IMPL *session, WT_REF *ref) * Evict a page. */ int -__wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) +__wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int closing) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -60,7 +60,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) conn = S2C(session); page = ref->page; - forced_eviction = (page->read_gen == WT_READGEN_OLDEST); + forced_eviction = page->read_gen == WT_READGEN_OLDEST; inmem_split = 0; WT_RET(__wt_verbose(session, WT_VERB_EVICT, @@ -73,7 +73,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) * to make this check for clean pages, too: while unlikely eviction * would choose an internal page with children, it's not disallowed. */ - WT_ERR(__evict_review(session, ref, &inmem_split, flags)); + WT_ERR(__evict_review(session, ref, &inmem_split, closing)); /* * If there was an in-memory split, the tree has been left in the state @@ -89,7 +89,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) mod = page->modify; /* Count evictions of internal pages during normal operation. */ - if (!LF_ISSET(WT_EVICT_EXCLUSIVE) && WT_PAGE_IS_INTERNAL(page)) { + if (!closing && WT_PAGE_IS_INTERNAL(page)) { WT_STAT_FAST_CONN_INCR(session, cache_eviction_internal); WT_STAT_FAST_DATA_INCR(session, cache_eviction_internal); } @@ -107,7 +107,8 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) if (__wt_ref_is_root(ref)) __wt_ref_out(session, ref); else - __wt_evict_page_clean_update(session, ref); + WT_ERR(__wt_evict_page_clean_update( + session, ref, closing)); WT_STAT_FAST_CONN_INCR(session, cache_eviction_clean); WT_STAT_FAST_DATA_INCR(session, cache_eviction_clean); @@ -116,14 +117,14 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) __wt_ref_out(session, ref); else WT_ERR(__evict_page_dirty_update( - session, ref, LF_ISSET(WT_EVICT_EXCLUSIVE))); + session, ref, closing)); WT_STAT_FAST_CONN_INCR(session, cache_eviction_dirty); WT_STAT_FAST_DATA_INCR(session, cache_eviction_dirty); } if (0) { -err: if (!LF_ISSET(WT_EVICT_EXCLUSIVE)) +err: if (!closing) __evict_exclusive_clear(session, ref); WT_STAT_FAST_CONN_INCR(session, cache_eviction_fail); @@ -143,9 +144,21 @@ done: if (((inmem_split && ret == 0) || (forced_eviction && ret == EBUSY)) && * __wt_evict_page_clean_update -- * Update a clean page's reference on eviction. */ -void -__wt_evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref) +int +__wt_evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, int closing) { + int evict; + + /* + * If doing normal system eviction, but only in the service of reducing + * the number of dirty pages, leave the clean page in cache. + */ + if (!closing) { + __wt_cache_status(session, &evict, NULL); + if (!evict) + return (EBUSY); + } + /* * Discard the page and update the reference structure; if the page has * an address, it's a disk page; if it has no address, it's a deleted @@ -154,6 +167,8 @@ __wt_evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref) __wt_ref_out(session, ref); WT_PUBLISH(ref->state, ref->addr == NULL ? WT_REF_DELETED : WT_REF_DISK); + + return (0); } /* @@ -161,17 +176,19 @@ __wt_evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref) * Update a dirty page's reference on eviction. */ static int -__evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive) +__evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, int closing) { WT_ADDR *addr; WT_PAGE *parent; WT_PAGE_MODIFY *mod; + int evict; parent = ref->home; mod = ref->page->modify; switch (F_ISSET(mod, WT_PM_REC_MASK)) { case WT_PM_REC_EMPTY: /* Page is empty */ + /* Discard the parent's address. */ if (ref->addr != NULL && __wt_off_page(parent, ref->addr)) { __wt_free(session, ((WT_ADDR *)ref->addr)->addr); __wt_free(session, ref->addr); @@ -198,9 +215,24 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive) * A real split where we reconciled a page and it turned into a * lot of pages. */ - WT_RET(__wt_split_multi(session, ref, exclusive)); + WT_RET(__wt_split_multi(session, ref, closing)); break; case WT_PM_REC_REPLACE: /* 1-for-1 page swap */ + /* + * If doing normal system eviction, but only in the service of + * reducing the number of dirty pages, leave the clean page in + * cache. Only do this when replacing a page with another one, + * because when a page splits into multiple pages, we want to + * push it out of cache (and read it back in, when needed), we + * would rather have more, smaller pages than fewer large pages. + */ + if (!closing) { + __wt_cache_status(session, &evict, NULL); + if (!evict) + return (EBUSY); + } + + /* Discard the parent's address. */ if (ref->addr != NULL && __wt_off_page(parent, ref->addr)) { __wt_free(session, ((WT_ADDR *)ref->addr)->addr); __wt_free(session, ref->addr); @@ -270,7 +302,7 @@ __evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent) */ static int __evict_review( - WT_SESSION_IMPL *session, WT_REF *ref, int *inmem_splitp, uint32_t flags) + WT_SESSION_IMPL *session, WT_REF *ref, int *inmem_splitp, int closing) { WT_DECL_RET; WT_PAGE *page; @@ -281,7 +313,7 @@ __evict_review( * Get exclusive access to the page if our caller doesn't have the tree * locked down. */ - if (!LF_ISSET(WT_EVICT_EXCLUSIVE)) { + if (!closing) { WT_RET(__evict_exclusive(session, ref)); /* @@ -311,8 +343,8 @@ __evict_review( } /* Check if the page can be evicted. */ - if (!LF_ISSET(WT_EVICT_EXCLUSIVE)) { - if (!__wt_page_can_evict(session, page, flags, inmem_splitp)) + if (!closing) { + if (!__wt_page_can_evict(session, page, 0, inmem_splitp)) return (EBUSY); /* @@ -347,7 +379,7 @@ __evict_review( */ reconcile_flags = WT_EVICTING; if (__wt_page_is_modified(page)) { - if (LF_ISSET(WT_EVICT_EXCLUSIVE)) + if (closing) FLD_SET(reconcile_flags, WT_SKIP_UPDATE_ERR); else if (!WT_PAGE_IS_INTERNAL(page) && page->read_gen == WT_READGEN_OLDEST) @@ -362,7 +394,7 @@ __evict_review( * If the page was ever modified, make sure all of the updates * on the page are old enough they can be discarded from cache. */ - if (!LF_ISSET(WT_EVICT_EXCLUSIVE) && mod != NULL && + if (!closing && mod != NULL && !__wt_txn_visible_all(session, mod->rec_max_txn) && !FLD_ISSET(reconcile_flags, WT_SKIP_UPDATE_RESTORE)) return (EBUSY); diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h index 76a25639ffd..deecd8f6d88 100644 --- a/src/third_party/wiredtiger/src/include/btree.h +++ b/src/third_party/wiredtiger/src/include/btree.h @@ -147,10 +147,11 @@ struct __wt_btree { #define WT_BTREE_BULK 0x00100 /* Bulk-load handle */ #define WT_BTREE_IN_MEMORY 0x00200 /* Cache-resident object */ #define WT_BTREE_NO_EVICTION 0x00400 /* Disable eviction */ -#define WT_BTREE_SALVAGE 0x00800 /* Handle is for salvage */ -#define WT_BTREE_SKIP_CKPT 0x01000 /* Handle skipped checkpoint */ -#define WT_BTREE_UPGRADE 0x02000 /* Handle is for upgrade */ -#define WT_BTREE_VERIFY 0x04000 /* Handle is for verify */ +#define WT_BTREE_NO_LOGGING 0x00800 /* Disable logging */ +#define WT_BTREE_SALVAGE 0x01000 /* Handle is for salvage */ +#define WT_BTREE_SKIP_CKPT 0x02000 /* Handle skipped checkpoint */ +#define WT_BTREE_UPGRADE 0x04000 /* Handle is for upgrade */ +#define WT_BTREE_VERIFY 0x08000 /* Handle is for verify */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 06d41b89036..e3cbe22c1b6 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -1022,18 +1022,19 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) * Check whether a page can be evicted. */ static inline int -__wt_page_can_evict( - WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags, int *inmem_splitp) +__wt_page_can_evict(WT_SESSION_IMPL *session, + WT_PAGE *page, int check_splits, int *inmem_splitp) { WT_BTREE *btree; WT_PAGE_MODIFY *mod; WT_TXN_GLOBAL *txn_global; + if (inmem_splitp != NULL) + *inmem_splitp = 0; + btree = S2BT(session); mod = page->modify; txn_global = &S2C(session)->txn_global; - if (inmem_splitp != NULL) - *inmem_splitp = 0; /* Pages that have never been modified can always be evicted. */ if (mod == NULL) @@ -1048,7 +1049,7 @@ __wt_page_can_evict( * a transaction value, once that's globally visible, we know we can * evict the created page. */ - if (LF_ISSET(WT_EVICT_CHECK_SPLITS) && WT_PAGE_IS_INTERNAL(page) && + if (check_splits && WT_PAGE_IS_INTERNAL(page) && !__wt_txn_visible_all(session, mod->mod_split_txn)) return (0); @@ -1105,10 +1106,10 @@ __wt_page_can_evict( /* * If the page was recently split in-memory, don't force it out: we * hope an eviction thread will find it first. The check here is - * similar to __wt_txn_visible_all, but ignores the checkpoints + * similar to __wt_txn_visible_all, but ignores the checkpoint's * transaction. */ - if (LF_ISSET(WT_EVICT_CHECK_SPLITS) && + if (check_splits && WT_TXNID_LE(txn_global->oldest_id, mod->inmem_split_txn)) return (0); @@ -1117,7 +1118,7 @@ __wt_page_can_evict( /* * __wt_page_release_evict -- - * Attempt to release and immediately evict a page. + * Release a reference to a page, and attempt to immediately evict it. */ static inline int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) @@ -1166,7 +1167,7 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) /* * __wt_page_release -- - * Release a reference to a page, fail if busy during forced eviction. + * Release a reference to a page. */ static inline int __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) @@ -1196,17 +1197,14 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) * memory_page_max setting, when we see many deleted items, and when we * are attempting to scan without trashing the cache. * - * Skip this if eviction is disabled for this operation or this tree, - * or if there is no chance of eviction succeeding for dirty pages due - * to a checkpoint or because we've already tried writing this page and - * it contains an update that isn't stable. Also skip forced eviction - * if we just did an in-memory split. + * Fast checks if eviction is disabled for this operation or this tree, + * then perform a general check if eviction will be possible. */ page = ref->page; - if (F_ISSET(btree, WT_BTREE_NO_EVICTION) || + if (page->read_gen != WT_READGEN_OLDEST || LF_ISSET(WT_READ_NO_EVICT) || - page->read_gen != WT_READGEN_OLDEST || - !__wt_page_can_evict(session, page, WT_EVICT_CHECK_SPLITS, NULL)) + F_ISSET(btree, WT_BTREE_NO_EVICTION) || + !__wt_page_can_evict(session, page, 1, NULL)) return (__wt_hazard_clear(session, page)); WT_RET_BUSY_OK(__wt_page_release_evict(session, ref)); @@ -1314,7 +1312,7 @@ __wt_skip_choose_depth(WT_SESSION_IMPL *session) u_int d; for (d = 1; d < WT_SKIP_MAXDEPTH && - __wt_random(session->rnd) < WT_SKIP_PROBABILITY; d++) + __wt_random(&session->rnd) < WT_SKIP_PROBABILITY; d++) ; return (d); } diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index 11f631416af..0e426c88ec9 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -28,8 +28,8 @@ * Encapsulation of an eviction candidate. */ struct __wt_evict_entry { - WT_BTREE *btree; /* Enclosing btree object */ - WT_REF *ref; /* Page to flush/evict */ + WT_BTREE *btree; /* Enclosing btree object */ + WT_REF *ref; /* Page to flush/evict */ }; /* @@ -84,6 +84,7 @@ struct __wt_cache { u_int eviction_trigger; /* Percent to trigger eviction */ u_int eviction_target; /* Percent to end eviction */ u_int eviction_dirty_target; /* Percent to allow dirty */ + u_int eviction_dirty_trigger; /* Percent to trigger dirty eviction */ u_int overhead_pct; /* Cache percent adjustment */ @@ -100,12 +101,6 @@ struct __wt_cache { *evict_file_next; /* LRU next file to search */ /* - * Sync/flush request information. - */ - volatile uint64_t sync_request; /* File sync requests */ - volatile uint64_t sync_complete;/* File sync requests completed */ - - /* * Cache pool information. */ uint64_t cp_saved_read; /* Read count from last pass */ diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i index 0c976800b38..31bae1ac679 100644 --- a/src/third_party/wiredtiger/src/include/cache.i +++ b/src/third_party/wiredtiger/src/include/cache.i @@ -91,6 +91,48 @@ __wt_cache_dirty_inuse(WT_CACHE *cache) } /* + * __wt_cache_status -- + * Return if the cache usage exceeds the eviction or dirty targets. + */ +static inline void +__wt_cache_status(WT_SESSION_IMPL *session, int *evictp, int *dirtyp) +{ + WT_CONNECTION_IMPL *conn; + WT_CACHE *cache; + uint64_t bytes_inuse, bytes_max, dirty_inuse; + + conn = S2C(session); + cache = conn->cache; + + /* + * There's an assumption "evict" overrides "dirty", that is, if eviction + * is required, we no longer care where we are with respect to the dirty + * target. + * + * Avoid division by zero if the cache size has not yet been set in a + * shared cache. + */ + bytes_max = conn->cache_size + 1; + if (evictp != NULL) { + bytes_inuse = __wt_cache_bytes_inuse(cache); + if (bytes_inuse > (cache->eviction_target * bytes_max) / 100) { + *evictp = 1; + return; + } + *evictp = 0; + } + if (dirtyp != NULL) { + dirty_inuse = __wt_cache_dirty_inuse(cache); + if (dirty_inuse > + (cache->eviction_dirty_target * bytes_max) / 100) { + *dirtyp = 1; + return; + } + *dirtyp = 0; + } +} + +/* * __wt_eviction_check -- * Wake the eviction server if necessary. */ @@ -108,50 +150,30 @@ __wt_eviction_check(WT_SESSION_IMPL *session, int *fullp, int wake) * If we're over the maximum cache, shut out reads (which include page * allocations) until we evict to back under the maximum cache. * Eviction will keep pushing out pages so we don't run on the edge all - * the time. Avoid division by zero if the cache size has not yet been - * in a shared cache. + * the time. + * + * Avoid division by zero if the cache size has not yet been set in a + * shared cache. */ bytes_inuse = __wt_cache_bytes_inuse(cache); - dirty_inuse = __wt_cache_dirty_inuse(cache); bytes_max = conn->cache_size + 1; - /* Calculate the cache full percentage. */ + /* Return the cache full percentage. */ *fullp = (int)((100 * bytes_inuse) / bytes_max); - - /* Wake eviction when we're over the trigger cache size. */ - if (wake && - (bytes_inuse > (cache->eviction_trigger * bytes_max) / 100 || - dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100)) - WT_RET(__wt_evict_server_wake(session)); - - return (0); -} - -/* - * __wt_session_can_wait -- - * Return if a session available for a potentially slow operation. - */ -static inline int -__wt_session_can_wait(WT_SESSION_IMPL *session) -{ - /* - * Return if a session available for a potentially slow operation; - * for example, used by the block manager in the case of flushing - * the system cache. - */ - if (!F_ISSET(session, WT_SESSION_CAN_WAIT)) + if (!wake) return (0); /* - * LSM sets the no-cache-check flag when holding the LSM tree lock, - * in that case, or when holding the schema lock, we don't want to - * highjack the thread for eviction. + * Wake eviction if we're over the trigger cache size or there are too + * many dirty pages. */ - if (F_ISSET(session, - WT_SESSION_NO_CACHE_CHECK | WT_SESSION_LOCKED_SCHEMA)) - return (0); - - return (1); + if (bytes_inuse <= (cache->eviction_trigger * bytes_max) / 100) { + dirty_inuse = __wt_cache_dirty_inuse(cache); + if (dirty_inuse <= + (cache->eviction_dirty_trigger * bytes_max) / 100) + return (0); + } + return (__wt_evict_server_wake(session)); } /* @@ -196,3 +218,30 @@ __wt_cache_full_check(WT_SESSION_IMPL *session) return (__wt_cache_wait(session, full)); } + +/* + * __wt_session_can_wait -- + * Return if a session available for a potentially slow operation. + */ +static inline int +__wt_session_can_wait(WT_SESSION_IMPL *session) +{ + /* + * Return if a session available for a potentially slow operation; + * for example, used by the block manager in the case of flushing + * the system cache. + */ + if (!F_ISSET(session, WT_SESSION_CAN_WAIT)) + return (0); + + /* + * LSM sets the no-cache-check flag when holding the LSM tree lock, + * in that case, or when holding the schema lock, we don't want to + * highjack the thread for eviction. + */ + if (F_ISSET(session, + WT_SESSION_NO_CACHE_CHECK | WT_SESSION_LOCKED_SCHEMA)) + return (0); + + return (1); +} diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 209eabea91c..d6a2bb0b17a 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -286,6 +286,7 @@ struct __wt_connection_impl { #define WT_CONN_STAT_FAST 0x04 /* "fast" statistics configured */ #define WT_CONN_STAT_NONE 0x08 /* don't gather statistics */ #define WT_CONN_STAT_ON_CLOSE 0x10 /* output statistics on close */ +#define WT_CONN_STAT_SIZE 0x20 /* "size" statistics configured */ uint32_t stat_flags; WT_CONNECTION_STATS stats; /* Connection statistics */ diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 3d3c851daad..871adb2d25d 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -50,6 +50,7 @@ extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block); extern int __wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize); extern void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats); +extern int __wt_block_manager_size( WT_SESSION_IMPL *session, const char *filename, WT_DSRC_STATS *stats); extern int __wt_bm_preload(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size); extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size); extern int __wt_block_read_off_blind( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset); @@ -153,7 +154,7 @@ extern void __wt_split_stash_discard_all( WT_SESSION_IMPL *session_safe, WT_SESS extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp); extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref); extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref); -extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive); +extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing); extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst); extern int __wt_cache_op(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, int op); extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]); @@ -312,8 +313,8 @@ extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session); extern int __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_server); extern int __wt_cache_wait(WT_SESSION_IMPL *session, int full); extern void __wt_cache_dump(WT_SESSION_IMPL *session); -extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags); -extern void __wt_evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref); +extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int closing); +extern int __wt_evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, int closing); extern int __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn); extern int __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn); extern int __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn); @@ -639,8 +640,8 @@ extern uint32_t __wt_nlpo2(uint32_t v); extern uint32_t __wt_log2_int(uint32_t n); extern int __wt_ispo2(uint32_t v); extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2); -extern void __wt_random_init(uint32_t *rnd); -extern uint32_t __wt_random(uint32_t *rnd); +extern void __wt_random_init(uint64_t volatile *rnd_state); +extern uint32_t __wt_random(uint64_t volatile *rnd_state); extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size); extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))); extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))); diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h index faada258c09..675ede9a8a0 100644 --- a/src/third_party/wiredtiger/src/include/flags.h +++ b/src/third_party/wiredtiger/src/include/flags.h @@ -18,9 +18,6 @@ #define WT_CONN_SERVER_SWEEP 0x00002000 #define WT_CONN_WAS_BACKUP 0x00004000 #define WT_EVICTING 0x00000001 -#define WT_EVICT_CHECK_SPLITS 0x00000001 -#define WT_EVICT_EXCLUSIVE 0x00000002 -#define WT_EVICT_INMEM_SPLIT 0x00000004 #define WT_FILE_TYPE_CHECKPOINT 0x00000001 #define WT_FILE_TYPE_DATA 0x00000002 #define WT_FILE_TYPE_DIRECTORY 0x00000004 @@ -80,20 +77,21 @@ #define WT_VERB_FILEOPS 0x00000040 #define WT_VERB_LOG 0x00000080 #define WT_VERB_LSM 0x00000100 -#define WT_VERB_METADATA 0x00000200 -#define WT_VERB_MUTEX 0x00000400 -#define WT_VERB_OVERFLOW 0x00000800 -#define WT_VERB_READ 0x00001000 -#define WT_VERB_RECONCILE 0x00002000 -#define WT_VERB_RECOVERY 0x00004000 -#define WT_VERB_SALVAGE 0x00008000 -#define WT_VERB_SHARED_CACHE 0x00010000 -#define WT_VERB_SPLIT 0x00020000 -#define WT_VERB_TEMPORARY 0x00040000 -#define WT_VERB_TRANSACTION 0x00080000 -#define WT_VERB_VERIFY 0x00100000 -#define WT_VERB_VERSION 0x00200000 -#define WT_VERB_WRITE 0x00400000 +#define WT_VERB_LSM_MANAGER 0x00000200 +#define WT_VERB_METADATA 0x00000400 +#define WT_VERB_MUTEX 0x00000800 +#define WT_VERB_OVERFLOW 0x00001000 +#define WT_VERB_READ 0x00002000 +#define WT_VERB_RECONCILE 0x00004000 +#define WT_VERB_RECOVERY 0x00008000 +#define WT_VERB_SALVAGE 0x00010000 +#define WT_VERB_SHARED_CACHE 0x00020000 +#define WT_VERB_SPLIT 0x00040000 +#define WT_VERB_TEMPORARY 0x00080000 +#define WT_VERB_TRANSACTION 0x00100000 +#define WT_VERB_VERIFY 0x00200000 +#define WT_VERB_VERSION 0x00400000 +#define WT_VERB_WRITE 0x00800000 /* * flags section: END * DO NOT EDIT: automatically built by dist/flags.py. diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h index dc6a0d7e027..08e57794fb8 100644 --- a/src/third_party/wiredtiger/src/include/lsm.h +++ b/src/third_party/wiredtiger/src/include/lsm.h @@ -157,7 +157,13 @@ struct __wt_lsm_manager { WT_LSM_WORKER_ARGS lsm_worker_cookies[WT_LSM_MAX_WORKERS]; }; -#define WT_LSM_AGGRESSIVE_THRESHOLD 5 +/* + * The value aggressive needs to get to before it influences how merges + * are chosen. The default value translates to enough level 0 chunks being + * generated to create a second level merge. + */ +#define WT_LSM_AGGRESSIVE_THRESHOLD 2 + /* * WT_LSM_TREE -- * An LSM tree. @@ -187,6 +193,8 @@ struct __wt_lsm_tree { uint64_t merge_throttle; /* Rate limiting due to merges */ uint64_t chunk_fill_ms; /* Estimate of time to fill a chunk */ struct timespec last_flush_ts; /* Timestamp last flush finished */ + uint64_t chunks_flushed; /* Count of chunks flushed since open */ + struct timespec merge_aggressive_ts;/* Timestamp for merge aggression */ struct timespec work_push_ts; /* Timestamp last work unit added */ uint64_t merge_progressing; /* Bumped when merges are active */ uint32_t merge_syncing; /* Bumped when merges are syncing */ @@ -199,8 +207,6 @@ struct __wt_lsm_tree { uint64_t chunk_max; /* Maximum chunk a merge creates */ u_int merge_min, merge_max; - u_int merge_idle; /* Count of idle merge threads */ - #define WT_LSM_BLOOM_MERGED 0x00000001 #define WT_LSM_BLOOM_OFF 0x00000002 #define WT_LSM_BLOOM_OLDEST 0x00000004 @@ -219,11 +225,12 @@ struct __wt_lsm_tree { uint32_t merge_aggressiveness; /* Increase amount of work per merge */ #define WT_LSM_TREE_ACTIVE 0x01 /* Workers are active */ -#define WT_LSM_TREE_COMPACTING 0x02 /* Tree being compacted */ -#define WT_LSM_TREE_MERGES 0x04 /* Tree should run merges */ -#define WT_LSM_TREE_NEED_SWITCH 0x08 /* New chunk needs creating */ -#define WT_LSM_TREE_OPEN 0x10 /* The tree is open */ -#define WT_LSM_TREE_THROTTLE 0x20 /* Throttle updates */ +#define WT_LSM_TREE_AGGRESSIVE_TIMER 0x02 /* Timer for merge aggression */ +#define WT_LSM_TREE_COMPACTING 0x04 /* Tree being compacted */ +#define WT_LSM_TREE_MERGES 0x08 /* Tree should run merges */ +#define WT_LSM_TREE_NEED_SWITCH 0x10 /* New chunk needs creating */ +#define WT_LSM_TREE_OPEN 0x20 /* The tree is open */ +#define WT_LSM_TREE_THROTTLE 0x40 /* Throttle updates */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index 8a8b229dbc0..bf1aa98d8d3 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -93,8 +93,8 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl { SLIST_HEAD(__tables, __wt_table) tables; WT_ITEM **scratch; /* Temporary memory for any function */ - u_int scratch_alloc; /* Currently allocated */ - size_t scratch_cached; /* Scratch bytes cached */ + u_int scratch_alloc; /* Currently allocated */ + size_t scratch_cached; /* Scratch bytes cached */ #ifdef HAVE_DIAGNOSTIC /* * It's hard to figure out from where a buffer was allocated after it's @@ -146,9 +146,9 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl { * to clear everything but the fields that persist. */ #define WT_SESSION_CLEAR_SIZE(s) \ - (WT_PTRDIFF(&(s)->rnd[0], s)) + (WT_PTRDIFF(&(s)->rnd, s)) - uint32_t rnd[2]; /* Random number generation state */ + uint64_t rnd; /* Random number generation state */ /* Hashed handle reference list array */ SLIST_HEAD(__dhandles_hash, __wt_data_handle_cache) *dhhash; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index fe27c4f1c62..8c4e58e02fe 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -946,14 +946,16 @@ struct __wt_session { * configured when the database is configured with "all"\, but the * cursor open will fail if "all" is specified when the database is * configured with "fast"\, and the cursor open will fail in all cases - * when the database is configured with "none". If \c statistics is not + * when the database is configured with "none". If "size" is + * configured\, only the underlying size of the object on disk is filled + * in and the object is not opened. If \c statistics is not * configured\, the default configuration is the database configuration. * The "clear" configuration resets statistics after gathering them\, * where appropriate (for example\, a cache size statistic is not * cleared\, while the count of cursor insert operations will be * cleared). See @ref statistics for more information., a list\, with * values chosen from the following options: \c "all"\, \c "fast"\, \c - * "clear"; default empty.} + * "clear"\, \c "size"; default empty.} * @config{target, if non-empty\, backup the list of objects; valid only * for a backup data source., a list of strings; default empty.} * @configend @@ -1104,6 +1106,12 @@ struct __wt_session { * temporarily ignored when large values are written. The default is * one-half the size of a newly split leaf page., an integer greater * than or equal to 0; default \c 0.} + * @config{log = (, the transaction log configuration for this object. + * Only valid if log is enabled in ::wiredtiger_open., a set of related + * configuration options defined below.} + * @config{ enabled, if false\, this object has + * checkpoint-level durability., a boolean flag; default \c true.} + * @config{ ),,} * @config{lsm = (, options only relevant for LSM data sources., a set * of related configuration options defined below.} * @config{ auto_throttle, Throttle inserts into @@ -1674,7 +1682,11 @@ struct __wt_connection { * @config{eviction_dirty_target, continue evicting until the cache has * less dirty memory than the value\, as a percentage of the total cache * size. Dirty pages will only be evicted if the cache is full enough - * to trigger eviction., an integer between 10 and 99; default \c 80.} + * to trigger eviction., an integer between 5 and 99; default \c 80.} + * @config{eviction_dirty_trigger, trigger eviction when the cache is + * using this much memory for dirty content\, as a percentage of the + * total cache size. This setting only alters behavior if it is lower + * than eviction_trigger., an integer between 5 and 99; default \c 95.} * @config{eviction_target, continue evicting until the cache has less * total memory than the value\, as a percentage of the total cache * size. Must be less than \c eviction_trigger., an integer between 10 @@ -1768,11 +1780,11 @@ struct __wt_connection { * as a list\, such as <code>"verbose=[evictserver\,read]"</code>., a * list\, with values chosen from the following options: \c "api"\, \c * "block"\, \c "checkpoint"\, \c "compact"\, \c "evict"\, \c - * "evictserver"\, \c "fileops"\, \c "log"\, \c "lsm"\, \c "metadata"\, - * \c "mutex"\, \c "overflow"\, \c "read"\, \c "reconcile"\, \c - * "recovery"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, \c - * "temporary"\, \c "transaction"\, \c "verify"\, \c "version"\, \c - * "write"; default empty.} + * "evictserver"\, \c "fileops"\, \c "log"\, \c "lsm"\, \c + * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c + * "read"\, \c "reconcile"\, \c "recovery"\, \c "salvage"\, \c + * "shared_cache"\, \c "split"\, \c "temporary"\, \c "transaction"\, \c + * "verify"\, \c "version"\, \c "write"; default empty.} * @configend * @errors */ @@ -2068,7 +2080,7 @@ struct __wt_connection { * string; default \c none.} * @config{ secretkey, A string * that is passed to the WT_ENCRYPTOR::customize function. It is never stored - * in clear text\, so must be given to any subsequent wiredtiger_open calls to + * in clear text\, so must be given to any subsequent ::wiredtiger_open calls to * reopen the database. It must also be provided to any "wt" commands used with * this database., a string; default empty.} * @config{ ),,} @@ -2090,7 +2102,11 @@ struct __wt_connection { * @config{eviction_dirty_target, continue evicting until the cache has less * dirty memory than the value\, as a percentage of the total cache size. Dirty * pages will only be evicted if the cache is full enough to trigger eviction., - * an integer between 10 and 99; default \c 80.} + * an integer between 5 and 99; default \c 80.} + * @config{eviction_dirty_trigger, trigger eviction when the cache is using this + * much memory for dirty content\, as a percentage of the total cache size. + * This setting only alters behavior if it is lower than eviction_trigger., an + * integer between 5 and 99; default \c 95.} * @config{eviction_target, continue evicting until the cache has less total * memory than the value\, as a percentage of the total cache size. Must be * less than \c eviction_trigger., an integer between 10 and 99; default \c 80.} @@ -2238,10 +2254,10 @@ struct __wt_connection { * list\, such as <code>"verbose=[evictserver\,read]"</code>., a list\, with * values chosen from the following options: \c "api"\, \c "block"\, \c * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evictserver"\, \c "fileops"\, - * \c "log"\, \c "lsm"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c - * "read"\, \c "reconcile"\, \c "recovery"\, \c "salvage"\, \c "shared_cache"\, - * \c "split"\, \c "temporary"\, \c "transaction"\, \c "verify"\, \c "version"\, - * \c "write"; default empty.} + * \c "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c + * "overflow"\, \c "read"\, \c "reconcile"\, \c "recovery"\, \c "salvage"\, \c + * "shared_cache"\, \c "split"\, \c "temporary"\, \c "transaction"\, \c + * "verify"\, \c "version"\, \c "write"; default empty.} * @configend * Additionally, if files named \c WiredTiger.config or \c WiredTiger.basecfg * appear in the WiredTiger home directory, they are read for configuration diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c index 45455b59e6b..d6abdf9e33b 100644 --- a/src/third_party/wiredtiger/src/log/log_slot.c +++ b/src/third_party/wiredtiger/src/log/log_slot.c @@ -108,7 +108,7 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, slot_grow_attempts = 0; find_slot: allocated_slot = WT_SLOT_ACTIVE == 1 ? 0 : - __wt_random(session->rnd) % WT_SLOT_ACTIVE; + __wt_random(&session->rnd) % WT_SLOT_ACTIVE; /* * Get the selected slot. Use a barrier to prevent the compiler from * caching this read. diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c index 0d3ce5da2d8..cb078d991d8 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c @@ -8,7 +8,6 @@ #include "wt_internal.h" -static int __lsm_manager_aggressive_update(WT_SESSION_IMPL *, WT_LSM_TREE *); static int __lsm_manager_run_server(WT_SESSION_IMPL *); static WT_THREAD_RET __lsm_worker_manager(void *); @@ -339,43 +338,6 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) } /* - * __lsm_manager_aggressive_update -- - * Update the merge aggressiveness for a single LSM tree. - */ -static int -__lsm_manager_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) -{ - struct timespec now; - uint64_t chunk_wait, stallms; - u_int new_aggressive; - - WT_RET(__wt_epoch(session, &now)); - stallms = WT_TIMEDIFF(now, lsm_tree->last_flush_ts) / WT_MILLION; - /* - * Get aggressive if more than enough chunks for a merge should have - * been created by now. Use 10 seconds as a default if we don't have an - * estimate. - */ - if (lsm_tree->nchunks > 1) - chunk_wait = stallms / (lsm_tree->chunk_fill_ms == 0 ? - 10000 : lsm_tree->chunk_fill_ms); - else - chunk_wait = 0; - new_aggressive = (u_int)(chunk_wait / lsm_tree->merge_min); - - if (new_aggressive > lsm_tree->merge_aggressiveness) { - WT_RET(__wt_verbose(session, WT_VERB_LSM, - "LSM merge %s got aggressive (old %u new %u), " - "merge_min %d, %u / %" PRIu64, - lsm_tree->name, lsm_tree->merge_aggressiveness, - new_aggressive, lsm_tree->merge_min, stallms, - lsm_tree->chunk_fill_ms)); - lsm_tree->merge_aggressiveness = new_aggressive; - } - return (0); -} - -/* * __lsm_manager_worker_shutdown -- * Shutdown the LSM manager and worker threads. */ @@ -428,8 +390,6 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) { if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) continue; - WT_ERR(__lsm_manager_aggressive_update( - session, lsm_tree)); WT_ERR(__wt_epoch(session, &now)); pushms = lsm_tree->work_push_ts.tv_sec == 0 ? 0 : WT_TIMEDIFF( @@ -458,7 +418,8 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) lsm_tree->nchunks > 1) || (lsm_tree->queue_ref == 0 && lsm_tree->nchunks > 1) || - (lsm_tree->merge_aggressiveness > 3 && + (lsm_tree->merge_aggressiveness > + WT_LSM_AGGRESSIVE_THRESHOLD && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) || pushms > fillms) { WT_ERR(__wt_lsm_manager_push_entry( @@ -469,7 +430,8 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) session, WT_LSM_WORK_FLUSH, 0, lsm_tree)); WT_ERR(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_BLOOM, 0, lsm_tree)); - WT_ERR(__wt_verbose(session, WT_VERB_LSM, + WT_ERR(__wt_verbose(session, + WT_VERB_LSM_MANAGER, "MGR %s: queue %d mod %d nchunks %d" " flags 0x%x aggressive %d pushms %" PRIu64 " fillms %" PRIu64, diff --git a/src/third_party/wiredtiger/src/lsm/lsm_merge.c b/src/third_party/wiredtiger/src/lsm/lsm_merge.c index d75f3b0619b..d7e684b8f51 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_merge.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_merge.c @@ -40,6 +40,101 @@ __wt_lsm_merge_update_tree(WT_SESSION_IMPL *session, } /* + * __lsm_merge_aggressive_clear -- + * We found a merge to do - clear the aggressive timer. + */ +static int +__lsm_merge_aggressive_clear(WT_LSM_TREE *lsm_tree) +{ + F_CLR(lsm_tree, WT_LSM_TREE_AGGRESSIVE_TIMER); + lsm_tree->merge_aggressiveness = 0; + return (0); +} + +/* + * __lsm_merge_aggressive_update -- + * Update the merge aggressiveness for an LSM tree. + */ +static int +__lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) +{ + struct timespec now; + uint64_t msec_since_last_merge, msec_to_create_merge; + u_int new_aggressive; + + new_aggressive = 0; + + /* + * If the tree is open read-only or we are compacting, be very + * aggressive. Otherwise, we can spend a long time waiting for merges + * to start in read-only applications. + */ + if (!lsm_tree->modified || + F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) { + lsm_tree->merge_aggressiveness = 10; + return (0); + } + + /* + * Only get aggressive if a reasonable number of flushes have been + * completed since opening the tree. + */ + if (lsm_tree->chunks_flushed <= lsm_tree->merge_min) + return (__lsm_merge_aggressive_clear(lsm_tree)); + + /* + * Start the timer if it isn't running. Use a flag to define whether + * the timer is running - since clearing and checking a special + * timer value isn't simple. + */ + if (!F_ISSET(lsm_tree, WT_LSM_TREE_AGGRESSIVE_TIMER)) { + F_SET(lsm_tree, WT_LSM_TREE_AGGRESSIVE_TIMER); + return (__wt_epoch(session, &lsm_tree->merge_aggressive_ts)); + } + + WT_RET(__wt_epoch(session, &now)); + msec_since_last_merge = + WT_TIMEDIFF(now, lsm_tree->merge_aggressive_ts) / WT_MILLION; + + /* + * If there is no estimate for how long it's taking to fill chunks + * pick 10 seconds. + */ + msec_to_create_merge = lsm_tree->merge_min * + (lsm_tree->chunk_fill_ms == 0 ? 10000 : lsm_tree->chunk_fill_ms); + + /* + * Don't consider getting aggressive until enough time has passed that + * we should have created enough chunks to trigger a new merge. We + * track average chunk-creation time - hence the "should"; the average + * fill time may not reflect the actual state if an application + * generates a variable load. + */ + if (msec_since_last_merge < msec_to_create_merge) + return (0); + + /* + * Bump how aggressively we look for merges based on how long since + * the last merge complete. The aggressive setting only increases + * slowly - triggering merges across generations of chunks isn't + * an efficient use of resources. + */ + while ((msec_since_last_merge /= msec_to_create_merge) > 1) + ++new_aggressive; + + if (new_aggressive > lsm_tree->merge_aggressiveness) { + WT_RET(__wt_verbose(session, WT_VERB_LSM, + "LSM merge %s got aggressive (old %u new %u), " + "merge_min %d, %u / %" PRIu64, + lsm_tree->name, lsm_tree->merge_aggressiveness, + new_aggressive, lsm_tree->merge_min, + msec_since_last_merge, lsm_tree->chunk_fill_ms)); + lsm_tree->merge_aggressiveness = new_aggressive; + } + return (0); +} + +/* * __lsm_merge_span -- * Figure out the best span of chunks to merge. Return an error if * there is no need to do any merges. Called with the LSM tree @@ -53,6 +148,7 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t aggressive, max_gap, max_gen, max_level; uint64_t record_count, chunk_size; u_int end_chunk, i, merge_max, merge_min, nchunks, start_chunk; + u_int oldest_gen, youngest_gen; chunk_size = 0; nchunks = 0; @@ -64,18 +160,9 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, *end = 0; *records = 0; - /* - * If the tree is open read-only or we are compacting, be very - * aggressive. Otherwise, we can spend a long time waiting for merges - * to start in read-only applications. - */ - if (!lsm_tree->modified || - F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) - lsm_tree->merge_aggressiveness = 10; - aggressive = lsm_tree->merge_aggressiveness; merge_max = (aggressive > WT_LSM_AGGRESSIVE_THRESHOLD) ? - 100 : lsm_tree->merge_min; + 100 : lsm_tree->merge_max; merge_min = (aggressive > WT_LSM_AGGRESSIVE_THRESHOLD) ? 2 : lsm_tree->merge_min; max_gap = (aggressive + 4) / 5; @@ -127,6 +214,8 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, * with the most recent set of chunks and work backwards until going * further becomes significantly less efficient. */ +retry_find: + oldest_gen = youngest_gen = lsm_tree->chunk[end_chunk]->generation; for (start_chunk = end_chunk + 1, record_count = 0; start_chunk > 0; ) { chunk = lsm_tree->chunk[start_chunk - 1]; @@ -159,6 +248,15 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, chunk_size - youngest->size > lsm_tree->chunk_max)) break; + /* Track chunk generations seen while looking for a merge */ + if (chunk->generation < youngest_gen) + youngest_gen = chunk->generation; + else if (chunk->generation > oldest_gen) + oldest_gen = chunk->generation; + + if (oldest_gen - youngest_gen > max_gap) + break; + /* * If we have enough chunks for a merge and the next chunk is * in too high a generation, stop. @@ -176,18 +274,23 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, --start_chunk; /* - * If we have a full window, or the merge would be too big, - * remove the youngest chunk. + * If the merge would be too big, or we have a full window + * and we could include an older chunk if the window wasn't + * full, remove the youngest chunk. */ - if (nchunks == merge_max || - chunk_size > lsm_tree->chunk_max) { + if (chunk_size > lsm_tree->chunk_max || + (nchunks == merge_max && start_chunk > 0 && + chunk->generation == + lsm_tree->chunk[start_chunk - 1]->generation)) { WT_ASSERT(session, F_ISSET(youngest, WT_LSM_CHUNK_MERGING)); F_CLR(youngest, WT_LSM_CHUNK_MERGING); record_count -= youngest->count; chunk_size -= youngest->size; --end_chunk; - } + } else if (nchunks == merge_max) + /* We've found the best full merge we can */ + break; } nchunks = (end_chunk + 1) - start_chunk; @@ -208,17 +311,28 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, * generations. */ if (nchunks < merge_min || - lsm_tree->chunk[end_chunk]->generation > - youngest->generation + max_gap) { + oldest_gen - youngest_gen > max_gap) { for (i = 0; i < nchunks; i++) { chunk = lsm_tree->chunk[start_chunk + i]; WT_ASSERT(session, F_ISSET(chunk, WT_LSM_CHUNK_MERGING)); F_CLR(chunk, WT_LSM_CHUNK_MERGING); } + /* + * If we didn't find a merge with appropriate gaps, try again + * with a smaller range. + */ + if (end_chunk > lsm_tree->merge_min && + oldest_gen - youngest_gen > max_gap) { + --end_chunk; + goto retry_find; + } + /* Consider getting aggressive if no merge was found */ + WT_RET(__lsm_merge_aggressive_update(session, lsm_tree)); return (WT_NOTFOUND); } + WT_RET(__lsm_merge_aggressive_clear(lsm_tree)); *records = record_count; *start = start_chunk; *end = end_chunk; @@ -299,8 +413,12 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) start_chunk, end_chunk, dest_id, record_count, generation)); for (verb = start_chunk; verb <= end_chunk; verb++) WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "%s: Chunk[%u] id %u", - lsm_tree->name, verb, lsm_tree->chunk[verb]->id)); + "Merging %s: Chunk[%u] id %u, gen: %" PRIu32 + ", size: %" PRIu64 ", records: %" PRIu64, + lsm_tree->name, verb, lsm_tree->chunk[verb]->id, + lsm_tree->chunk[verb]->generation, + lsm_tree->chunk[verb]->size, + lsm_tree->chunk[verb]->count)); } WT_ERR(__wt_calloc_one(session, &chunk)); diff --git a/src/third_party/wiredtiger/src/lsm/lsm_stat.c b/src/third_party/wiredtiger/src/lsm/lsm_stat.c index bc694000900..126a59af0d1 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_stat.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_stat.c @@ -40,11 +40,12 @@ __curstat_lsm_init( /* Propagate all, fast and/or clear to the cursors we open. */ if (!F_ISSET(cst, WT_CONN_STAT_NONE)) { (void)snprintf(config, sizeof(config), - "statistics=(%s%s%s)", - F_ISSET(cst, WT_CONN_STAT_CLEAR) ? "clear," : "", + "statistics=(%s%s%s%s)", F_ISSET(cst, WT_CONN_STAT_ALL) ? "all," : "", + F_ISSET(cst, WT_CONN_STAT_CLEAR) ? "clear," : "", !F_ISSET(cst, WT_CONN_STAT_ALL) && - F_ISSET(cst, WT_CONN_STAT_FAST) ? "fast," : ""); + F_ISSET(cst, WT_CONN_STAT_FAST) ? "fast," : "", + F_ISSET(cst, WT_CONN_STAT_SIZE) ? "size," : ""); cfg[1] = disk_cfg[1] = config; } diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c index 7a2c0ebb190..c3bee162ea1 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c @@ -109,7 +109,7 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, * enough to trigger checkpoints. */ if (evict_chunk != NULL && flush_chunk != NULL) { - chunk = (__wt_random(session->rnd) & 1) ? + chunk = (__wt_random(&session->rnd) & 1) ? evict_chunk : flush_chunk; WT_ERR(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_FLUSH, 0, lsm_tree)); @@ -333,6 +333,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, /* Update the flush timestamp to help track ongoing progress. */ WT_RET(__wt_epoch(session, &lsm_tree->last_flush_ts)); + ++lsm_tree->chunks_flushed; /* Lock the tree, mark the chunk as on disk and update the metadata. */ WT_RET(__wt_lsm_tree_writelock(session, lsm_tree)); diff --git a/src/third_party/wiredtiger/src/lsm/lsm_worker.c b/src/third_party/wiredtiger/src/lsm/lsm_worker.c index d1272df763d..8ed4a117641 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_worker.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_worker.c @@ -19,7 +19,7 @@ static WT_THREAD_RET __lsm_worker(void *); int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) { - WT_RET(__wt_verbose(session, WT_VERB_LSM, + WT_RET(__wt_verbose(session, WT_VERB_LSM_MANAGER, "Start LSM worker %d type 0x%x", args->id, args->type)); return (__wt_thread_create(session, &args->tid, __lsm_worker, args)); } diff --git a/src/third_party/wiredtiger/src/schema/schema_truncate.c b/src/third_party/wiredtiger/src/schema/schema_truncate.c index 0124ec70ca2..899c46470ca 100644 --- a/src/third_party/wiredtiger/src/schema/schema_truncate.c +++ b/src/third_party/wiredtiger/src/schema/schema_truncate.c @@ -13,19 +13,19 @@ * WT_SESSION::truncate for a file. */ static int -__truncate_file(WT_SESSION_IMPL *session, const char *name) +__truncate_file(WT_SESSION_IMPL *session, const char *uri) { WT_DECL_RET; const char *filename; uint32_t allocsize; - filename = name; + filename = uri; if (!WT_PREFIX_SKIP(filename, "file:")) return (EINVAL); /* Open and lock the file. */ WT_RET(__wt_session_get_btree( - session, name, NULL, NULL, WT_DHANDLE_EXCLUSIVE)); + session, uri, NULL, NULL, WT_DHANDLE_EXCLUSIVE)); /* Get the allocation size. */ allocsize = S2BT(session)->allocsize; @@ -34,11 +34,11 @@ __truncate_file(WT_SESSION_IMPL *session, const char *name) /* Close any btree handles in the file. */ WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_conn_dhandle_close_all(session, name, 0)); + ret = __wt_conn_dhandle_close_all(session, uri, 0)); WT_RET(ret); /* Delete the root address and truncate the file. */ - WT_RET(__wt_meta_checkpoint_clear(session, name)); + WT_RET(__wt_meta_checkpoint_clear(session, uri)); WT_RET(__wt_block_manager_truncate(session, filename, allocsize)); return (0); @@ -49,13 +49,13 @@ __truncate_file(WT_SESSION_IMPL *session, const char *name) * WT_SESSION::truncate for a table. */ static int -__truncate_table(WT_SESSION_IMPL *session, const char *name, const char *cfg[]) +__truncate_table(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) { WT_DECL_RET; WT_TABLE *table; u_int i; - WT_RET(__wt_schema_get_table(session, name, strlen(name), 0, &table)); + WT_RET(__wt_schema_get_table(session, uri, strlen(uri), 0, &table)); /* Truncate the column groups. */ for (i = 0; i < WT_COLGROUPS(table); i++) diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index 7e331c530fd..9301fb24ea0 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -869,21 +869,21 @@ __session_transaction_sync(WT_SESSION *wt_session, const char *config) int forever; session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL(session, transaction_sync, config, cfg); + WT_STAT_FAST_CONN_INCR(session, txn_sync); + conn = S2C(session); txn = &session->txn; if (F_ISSET(txn, WT_TXN_RUNNING)) - WT_RET_MSG(session, EINVAL, "transaction in progress"); + WT_ERR_MSG(session, EINVAL, "transaction in progress"); /* * If logging is not enabled there is nothing to do. */ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) - return (0); - SESSION_API_CALL(session, transaction_sync, config, cfg); - WT_STAT_FAST_CONN_INCR(session, txn_sync); + WT_ERR_MSG(session, EINVAL, "logging not enabled"); log = conn->log; - ret = 0; timeout_ms = waited_ms = 0; forever = 1; @@ -907,7 +907,7 @@ __session_transaction_sync(WT_SESSION *wt_session, const char *config) */ WT_ERR(__wt_config_gets_def( session, cfg, "timeout_ms", (int)UINT_MAX, &cval)); - if ((unsigned int)cval.len != UINT_MAX) { + if ((unsigned int)cval.val != UINT_MAX) { timeout_ms = (uint64_t)cval.val; forever = 0; } @@ -1166,7 +1166,7 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, WT_ERR(__wt_cond_alloc(session, "session", 0, &session_ret->cond)); if (WT_SESSION_FIRST_USE(session_ret)) - __wt_random_init(session_ret->rnd); + __wt_random_init(&session_ret->rnd); __wt_event_handler_set(session_ret, event_handler == NULL ? session->event_handler : event_handler); diff --git a/src/third_party/wiredtiger/src/support/rand.c b/src/third_party/wiredtiger/src/support/rand.c index bd51b2ea0d5..7dfb98c5ca4 100644 --- a/src/third_party/wiredtiger/src/support/rand.c +++ b/src/third_party/wiredtiger/src/support/rand.c @@ -28,43 +28,68 @@ #include "wt_internal.h" +/* + * This is an implementation of George Marsaglia's multiply-with-carry pseudo- + * random number generator. Computationally fast, with reasonable randomness + * properties, and a claimed period of > 2^60. + * + * Be very careful about races here. Multiple threads can call __wt_random + * concurrently, and it is okay if those concurrent calls get the same return + * value. What is *not* okay is if reading/writing the shared state races and + * uses two different values for m_w or m_z. That can result in a stored value + * of zero, in which case they will be stuck on zero forever. Take a local copy + * of the values to avoid that, and read/write in atomic, 8B chunks. + */ #undef M_W -#define M_W (rnd)[0] +#define M_W(p) ((uint32_t *)&(p))[0] #undef M_Z -#define M_Z (rnd)[1] +#define M_Z(p) ((uint32_t *)&(p))[1] /* * __wt_random_init -- * Initialize return of a 32-bit pseudo-random number. */ void -__wt_random_init(uint32_t *rnd) +__wt_random_init(uint64_t volatile * rnd_state) { - M_W = 521288629; - M_Z = 362436069; + uint64_t rnd; + + M_W(rnd) = 521288629; + M_Z(rnd) = 362436069; + *rnd_state = rnd; } /* * __wt_random -- * Return a 32-bit pseudo-random number. - * - * This is an implementation of George Marsaglia's multiply-with-carry pseudo- - * random number generator. Computationally fast, with reasonable randomness - * properties. - * - * We have to be very careful about races here. Multiple threads can call - * __wt_random concurrently, and it is okay if those concurrent calls get the - * same return value. What is *not* okay is if reading the shared state races - * with an update and uses two different values for m_w or m_z. That could - * result in a value of zero, in which case they would be stuck on zero - * forever. Take local copies of the shared values to avoid this. */ uint32_t -__wt_random(uint32_t *rnd) +__wt_random(uint64_t volatile * rnd_state) { - uint32_t w = M_W, z = M_Z; + uint64_t rnd; + uint32_t w, z; + + /* + * Take a copy of the random state so we can ensure that the + * calculation operates on the state consistently regardless of + * concurrent calls with the same random state. + */ + rnd = *rnd_state; + w = M_W(rnd); + z = M_Z(rnd); + + /* + * Check if the value goes to 0 (from which we won't recover), and reset + * to the initial state. This has additional benefits if a caller fails + * to initialize the state, or initializes with a seed that results in a + * short period. + */ + if (z == 0 || w == 0) + __wt_random_init(rnd_state); + + M_Z(rnd) = z = 36969 * (z & 65535) + (z >> 16); + M_W(rnd) = w = 18000 * (w & 65535) + (w >> 16); + *rnd_state = rnd; - M_Z = z = 36969 * (z & 65535) + (z >> 16); - M_W = w = 18000 * (w & 65535) + (w >> 16); - return (z << 16) + (w & 65535); + return ((z << 16) + (w & 65535)); } diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index d8032b49b17..9dc4fca223e 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -349,17 +349,17 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_ISOLATION saved_isolation; - const char *txn_cfg[] = { WT_CONFIG_BASE(session, - WT_SESSION_begin_transaction), "isolation=snapshot", NULL }; void *saved_meta_next; - int full, idle, logging, tracking; u_int i; + int full, fullckpt_logging, idle, tracking; + const char *txn_cfg[] = { WT_CONFIG_BASE(session, + WT_SESSION_begin_transaction), "isolation=snapshot", NULL }; conn = S2C(session); + txn = &session->txn; txn_global = &conn->txn_global; saved_isolation = session->isolation; - txn = &session->txn; - full = idle = logging = tracking = 0; + full = fullckpt_logging = idle = tracking = 0; /* Ensure the metadata table is open before taking any locks. */ WT_RET(__wt_metadata_open(session)); @@ -370,6 +370,10 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) */ WT_RET(__checkpoint_apply_all(session, cfg, NULL, &full)); + /* Configure logging only if doing a full checkpoint. */ + fullckpt_logging = + full && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED); + /* * Get a list of handles we want to flush; this may pull closed objects * into the session cache, but we're going to do that eventually anyway. @@ -418,7 +422,7 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) tracking = 1; /* Tell logging that we are about to start a database checkpoint. */ - if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && full) + if (fullckpt_logging) WT_ERR(__wt_txn_checkpoint_log( session, full, WT_TXN_LOG_CKPT_PREPARE, NULL)); @@ -455,11 +459,9 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) txn_checkpoint_generation, txn_global->checkpoint_gen); /* Tell logging that we have started a database checkpoint. */ - if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && full) { + if (fullckpt_logging) WT_ERR(__wt_txn_checkpoint_log( session, full, WT_TXN_LOG_CKPT_START, NULL)); - logging = 1; - } WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint)); @@ -561,8 +563,8 @@ err: /* * Tell logging that we have finished a database checkpoint. Do not * write a log record if the database was idle. */ - if (logging) { - if (ret == 0 && full && + if (fullckpt_logging) { + if (ret == 0 && F_ISSET((WT_BTREE *)session->meta_dhandle->handle, WT_BTREE_SKIP_CKPT)) idle = 1; diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c index 63e4c50aff5..0d66eccd7dc 100644 --- a/src/third_party/wiredtiger/src/txn/txn_log.c +++ b/src/third_party/wiredtiger/src/txn/txn_log.c @@ -158,7 +158,8 @@ __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) txn = &session->txn; if (!FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED) || - F_ISSET(session, WT_SESSION_NO_LOGGING)) + F_ISSET(session, WT_SESSION_NO_LOGGING) || + F_ISSET(S2BT(session), WT_BTREE_NO_LOGGING)) return (0); /* We'd better have a transaction. */ @@ -204,6 +205,11 @@ __wt_txn_log_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_UNUSED(cfg); txn = &session->txn; + /* + * If there are no log records there is nothing to do. + */ + if (txn->logrec == NULL) + return (0); /* Write updates to the log. */ return (__wt_log_write(session, txn->logrec, NULL, txn->txn_logsync)); @@ -295,8 +301,8 @@ __wt_txn_checkpoint_log( if (lsnp != NULL) *lsnp = *ckpt_lsn; return (0); - } else - return (__txn_log_file_sync(session, flags, lsnp)); + } + return (__txn_log_file_sync(session, flags, lsnp)); } switch (flags) { @@ -310,7 +316,6 @@ __wt_txn_checkpoint_log( */ WT_ERR(__wt_log_force_sync(session, ckpt_lsn)); break; - case WT_TXN_LOG_CKPT_START: /* Take a copy of the transaction snapshot. */ txn->ckpt_nsnapshot = txn->snapshot_count; @@ -322,7 +327,6 @@ __wt_txn_checkpoint_log( WT_ERR(__wt_vpack_uint( &p, WT_PTRDIFF(end, p), txn->snapshot[i])); break; - case WT_TXN_LOG_CKPT_STOP: /* * During a clean connection close, we get here without the @@ -368,7 +372,6 @@ __wt_txn_checkpoint_log( __wt_scr_free(session, &txn->ckpt_snapshot); txn->full_ckpt = 0; break; - WT_ILLEGAL_VALUE_ERR(session); } |