summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Pasette <dan@10gen.com>2015-02-19 07:25:04 -0500
committerRamon Fernandez <ramon.fernandez@mongodb.com>2015-02-19 14:00:18 -0500
commitec116a4be6ca7347fe8688c14aeba3c4d4e40873 (patch)
tree5e97bd11f77f13d2eb7673a7b3aaf677f171dba7
parent74aabd913f3cafb2fd3adbafb2e7ec3d5aa8597e (diff)
downloadmongo-ec116a4be6ca7347fe8688c14aeba3c4d4e40873.tar.gz
Import wiredtiger-wiredtiger-mongodb-3.0-rc9-1-gfc15500.tar.gz from wiredtiger branch mongodb-3.0
(cherry picked from commit 27e6efa8fee6a78ae549365093736934dc089eeb)
-rw-r--r--src/third_party/wiredtiger/.hgtags4
-rw-r--r--src/third_party/wiredtiger/NEWS.MONGODB126
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf.c12
-rw-r--r--src/third_party/wiredtiger/build_win/wiredtiger.def1
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py39
-rw-r--r--src/third_party/wiredtiger/dist/api_err.py68
-rw-r--r--src/third_party/wiredtiger/dist/s_export.list1
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok2
-rw-r--r--src/third_party/wiredtiger/ext/datasources/helium/helium.c55
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_compact.c14
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c22
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_stat.c16
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c21
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c197
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c32
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c21
-rw-r--r--src/third_party/wiredtiger/src/conn/api_strerror.c65
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache.c9
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c3
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_file.c8
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c152
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c53
-rw-r--r--src/third_party/wiredtiger/src/include/api.h2
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i77
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h9
-rw-r--r--src/third_party/wiredtiger/src/include/cache.i33
-rw-r--r--src/third_party/wiredtiger/src/include/config.h17
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h3
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h8
-rw-r--r--src/third_party/wiredtiger/src/include/session.h2
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in65
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger_ext.h11
-rw-r--r--src/third_party/wiredtiger/src/log/log.c39
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_merge.c3
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_meta.c9
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_errno.c48
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_errno.c65
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c24
-rw-r--r--src/third_party/wiredtiger/src/support/err.c14
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_recover.c17
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_main.c44
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_verify.c32
-rwxr-xr-xsrc/third_party/wiredtiger/tools/wtstats/wtstats.py328
47 files changed, 1103 insertions, 676 deletions
diff --git a/src/third_party/wiredtiger/.hgtags b/src/third_party/wiredtiger/.hgtags
index 2221be02e16..7138c8b03f5 100644
--- a/src/third_party/wiredtiger/.hgtags
+++ b/src/third_party/wiredtiger/.hgtags
@@ -36,3 +36,7 @@ e5c5feac1267bbe2195ba2b421d3f9bab335d5c3 mongodb-3.0-rc9
0000000000000000000000000000000000000000 mongodb-3.0-rc9
0000000000000000000000000000000000000000 mongodb-3.0-rc9
800508e9933ef8a213816dedfcf4bd5f9c05af0a mongodb-3.0-rc9
+800508e9933ef8a213816dedfcf4bd5f9c05af0a mongodb-3.0-rc9
+0000000000000000000000000000000000000000 mongodb-3.0-rc9
+0000000000000000000000000000000000000000 mongodb-3.0-rc9
+c1adbb056d9d959f37a967c00605745a5b1a352d mongodb-3.0-rc9
diff --git a/src/third_party/wiredtiger/NEWS.MONGODB b/src/third_party/wiredtiger/NEWS.MONGODB
index 85a68a084a0..303276a3b18 100644
--- a/src/third_party/wiredtiger/NEWS.MONGODB
+++ b/src/third_party/wiredtiger/NEWS.MONGODB
@@ -1,6 +1,126 @@
-3.0-RC9, Feb 12 2015
+3.0-RC9, Feb 18 2015
--------------------
+commit bf3ee2cd064b46cf0175d75950c825aa1f42c694
+Author: Michael Cahill <michael.cahill@wiredtiger.com>
+Date: Wed Feb 18 12:53:41 2015 +1100
+
+ Flip cache overhead to apply to the allocated bytes rather than the total size. Include the overhead in stats so that tools (e.g., mongostat) report accurate cache full and dirty percentages. This also makes eviction triggers and targets meaningful: with the default trigger of 95% and overhead 8%, eviction was previously never triggered until the cache was completely full.
+
+commit f9e6f942cf73c8a53aaadbc587c1b7efad6cc832
+Author: Keith Bostic <keith@wiredtiger.com>
+Date: Tue Feb 17 11:33:13 2015 -0500
+
+ Coverity notes the TXN_API_END_RETRY macro has an unnecessary test for "(ret == 0)" at the end of the do {} while loop.
+
+commit c34a56f357e21d134a2d9d0fefc032544069d8d7
+Author: Michael Cahill <michael.cahill@wiredtiger.com>
+Date: Tue Feb 17 21:56:34 2015 +1100
+
+ Allow the maximum number of eviction threads to be reconfigured. This was previously permitted by the API, but the array of thread contexts was not correctly resized, leading to segfaults.
+
+ refs SERVER-17293
+
+commit 67527fc235406469e69dbaec3dcd571469e660c0
+Author: Michael Cahill <michael.cahill@wiredtiger.com>
+Date: Tue Feb 17 21:50:53 2015 +1100
+
+ Make the eviction walk incremental: don't spend too long in any one file, fix tracking of whether we are making progress.
+
+commit 788265ed273c63183053e6325a9aa03c89c02860
+Author: Michael Cahill <michael.cahill@wiredtiger.com>
+Date: Tue Feb 17 21:48:44 2015 +1100
+
+ Combine the various checks for whether a page can be evicted into one place.
+
+commit 748e7b0c58b358b14340bacae41f9c46f3c06f7e
+Author: Michael Cahill <michael.cahill@wiredtiger.com>
+Date: Tue Feb 17 21:47:57 2015 +1100
+
+ Skip hot pages during write leaves: checkpoint will have to visit them anyway.
+
+commit a9de0f7ac8ad373d7aef6a480c69a2a7e0b55c59
+Author: Michael Cahill <michael.cahill@wiredtiger.com>
+Date: Tue Feb 17 21:18:41 2015 +1100
+
+ Run recovery after crashing test/format in the recovery test.
+
+commit 4733961a3c1fa37988178d1b1dd4eb44d83b63f6
+Author: Thomas Rueckstiess <thomas@rueckstiess.net>
+Date: Mon Feb 16 12:50:26 2015 +1100
+
+ fixes and improvements for wtperf parsing
+
+ - convert wtperf microsec to millisec
+ - don't skip monitor* files when parsing directory
+ - parsing code reorganization
+ - renamed wtperf stats fixture
+ - added tests
+
+commit dc396e1cd64871219b9e5a1b6558707feb70706e
+Author: Keith Bostic <keith@wiredtiger.com>
+Date: Sun Feb 15 13:44:30 2015 -0500
+
+ Clear the btree object statistics we're about to count, otherwise each cursor gets a cumulative value.
+
+commit 195b144bb37814b31cfa413029cda0b28f13f261
+Author: Keith Bostic <keith@wiredtiger.com>
+Date: Fri Feb 13 12:06:36 2015 -0500
+
+ Don't map WT_NOTFOUND to ENOENT unless a uri was specified, that's the only interesting case. Reference SERVER-17141.
+
+commit e9d7fee2c2c08985b8e2d2716e899853c5198290
+Author: Thomas Rueckstiess <thomas@rueckstiess.net>
+Date: Fri Feb 13 17:22:05 2015 +1100
+
+ added support to parse wtperf files.
+
+ they go into a separate stats section named "wtperf".
+
+commit 29d0d26fd1cd76392ea8225c1c4022ca54443737
+Author: Keith Bostic <keith@wiredtiger.com>
+Date: Thu Feb 12 18:05:33 2015 -0500
+
+ Ignore unexpected information in the metadata entry, the metadata entry might have been created by a future release, with unknown options.
+
+commit 05f07753059a4fa7f0f1bab7a107a9e6d17bf4af
+Author: Keith Bostic <keith@wiredtiger.com>
+Date: Thu Feb 12 15:42:35 2015 -0500
+
+ Remove the requirement of a HAVE_DIAGNOSTIC build for the verify commands to work (except for dump_offsets, that requires the btree debugging code and so won't work anywhere but a HAVE_DIAGNOSTIC build).
+
+commit 006ed9f17c7fc0fe65dc43717ed0239b3bac564c
+Author: Keith Bostic <keith@wiredtiger.com>
+Date: Thu Feb 12 15:31:36 2015 -0500
+
+ Add support for a new verify debug option, "dump_shape", that reports the levels of the tree.
+
+ __wt_config_gets() returns WT_NOTFOUND when there's no entry, don't fail every command when DIAGNOSTIC #defined and debug options aren't set.
+
+commit 46b7721215856d08ca3a37f7ffc27c57b1d4c1d7
+Author: Susan LoVerso <sue@wiredtiger.com>
+Date: Thu Feb 12 13:27:32 2015 -0500
+
+ Add recover config setting and use it in the wt command. #1651
+
+commit 0305a51ffba383af13d6078d409a03b249c502c5
+Author: Don Anderson <dda@ddanderson.com>
+Date: Wed Feb 11 10:21:04 2015 -0500
+
+ Add test to detect file ID problems in recovery. Refs #1622.
+
+commit fc0ff5a9ea09e54512353d2275126cb54dbc5451
+Author: Susan LoVerso <sue@wiredtiger.com>
+Date: Tue Feb 10 13:02:28 2015 -0500
+
+ Allow 'wt' command to run with or without recovery. #1651
+
+commit a26d87a53eb2ac2dcae9312b7979499c34c11613
+Author: Keith Bostic <keith@wiredtiger.com>
+Date: Mon Feb 2 19:20:33 2015 -0500
+
+ Replace wiredtiger_strerror_r with WT_SESSION.strerror, reference #1516.
+
commit 33c146b51fdac86999e2eaa67f5636490eb441fb
Author: Michael Cahill <michael.cahill@wiredtiger.com>
Date: Thu Feb 12 13:44:35 2015 +1100
@@ -133,9 +253,7 @@ Date: Mon Feb 9 12:25:10 2015 +1100
Refs SERVER-17204
- The bug is that we weren't doing an fsync of the file after it was
- created. Recovery assumes that if there are records for a particular
- file, then it will exist on disk.
+ The bug is that we weren't doing an fsync of the file after it was created. Recovery assumes that if there are records for a particular file, then it will exist on disk.
commit 4d50f5878073e582567848ae03ee506bb5058227
Author: Alex Gorrod <alexg@wiredtiger.com>
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c
index b9d72e45184..8780d270664 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c
@@ -833,7 +833,17 @@ populate_thread(void *arg)
if (cfg->random_value)
randomize_value(thread, value_buf);
cursor->set_value(cursor, value_buf);
- if ((ret = cursor->insert(cursor)) != 0) {
+ if ((ret = cursor->insert(cursor)) == WT_ROLLBACK) {
+ lprintf(cfg, ret, 0, "insert retrying");
+ if ((ret = session->rollback_transaction(
+ session, NULL)) != 0) {
+ lprintf(cfg, ret, 0,
+ "Failed rollback_transaction");
+ goto err;
+ }
+ intxn = 0;
+ continue;
+ } else if (ret != 0) {
lprintf(cfg, ret, 0, "Failed inserting");
goto err;
}
diff --git a/src/third_party/wiredtiger/build_win/wiredtiger.def b/src/third_party/wiredtiger/build_win/wiredtiger.def
index 02884e4fd65..86096fb778d 100644
--- a/src/third_party/wiredtiger/build_win/wiredtiger.def
+++ b/src/third_party/wiredtiger/build_win/wiredtiger.def
@@ -9,7 +9,6 @@ EXPORTS
wiredtiger_pack_str
wiredtiger_pack_uint
wiredtiger_strerror
- wiredtiger_strerror_r
wiredtiger_struct_pack
wiredtiger_struct_size
wiredtiger_struct_unpack
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 82335e3f831..feb51011309 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -325,13 +325,13 @@ connection_runtime_config = [
min='1MB', max='10TB'),
Config('cache_overhead', '8', r'''
assume the heap allocator overhead is the specified percentage, and
- adjust the cache size by that amount (for example, if the cache size is
- 100GB, a percentage of 10 means WiredTiger limits itself to allocating
- 90GB of memory). This value is configurable because different heap
- allocators have different overhead and different workloads will have
- different heap allocation sizes and patterns, therefore applications
- may need to adjust this value based on allocator choice and behavior
- in measured workloads''',
+ adjust the cache usage by that amount (for example, if there is 10GB
+ of data in cache, a percentage of 10 means WiredTiger treats this as
+ 11GB). This value is configurable because different heap allocators
+ have different overhead and different workloads will have different
+ heap allocation sizes and patterns, therefore applications may need to
+ adjust this value based on allocator choice and behavior in measured
+ workloads''',
min='0', max='30'),
Config('checkpoint', '', r'''
periodically checkpoint the database''',
@@ -550,6 +550,10 @@ common_wiredtiger_open = [
Config('prealloc', 'true', r'''
pre-allocate log files.''',
type='boolean'),
+ Config('recover', 'on', r'''
+ run recovery or error if recovery needs to run after an
+ unclean shutdown.''',
+ choices=['error','on']),
]),
Config('mmap', 'true', r'''
Use memory mapping to access files when possible''',
@@ -719,24 +723,29 @@ methods = {
files''',
type='boolean'),
]),
+'session.strerror' : Method([]),
'session.truncate' : Method([]),
'session.upgrade' : Method([]),
'session.verify' : Method([
Config('dump_address', 'false', r'''
- Display addresses and page types as pages are verified, using
- the application's message handler, intended for debugging''',
+ Display addresses and page types as pages are verified,
+ using the application's message handler, intended for debugging''',
type='boolean'),
Config('dump_blocks', 'false', r'''
- Display the contents of on-disk blocks as they are verified, using
- the application's message handler, intended for debugging''',
+ Display the contents of on-disk blocks as they are verified,
+ using the application's message handler, intended for debugging''',
type='boolean'),
Config('dump_offsets', '', r'''
- Display the contents of specific on-disk blocks, using
- the application's message handler, intended for debugging''',
+ Display the contents of specific on-disk blocks,
+ using the application's message handler, intended for debugging''',
type='list'),
Config('dump_pages', 'false', r'''
- Display the contents of in-memory pages as they are verified, using
- the application's message handler, intended for debugging''',
+ Display the contents of in-memory pages as they are verified,
+ using the application's message handler, intended for debugging''',
+ type='boolean'),
+ Config('dump_shape', 'false', r'''
+ Display the shape of the tree after verification,
+ using the application's message handler, intended for debugging''',
type='boolean')
]),
diff --git a/src/third_party/wiredtiger/dist/api_err.py b/src/third_party/wiredtiger/dist/api_err.py
index 6c893c9af82..d39f076656f 100644
--- a/src/third_party/wiredtiger/dist/api_err.py
+++ b/src/third_party/wiredtiger/dist/api_err.py
@@ -47,6 +47,10 @@ errors = [
interface, no further WiredTiger calls are required.'''),
Error('WT_RESTART', -31805,
'restart the operation (internal)', undoc=True),
+ Error('WT_RUN_RECOVERY', -31806,
+ 'recovery must be run to continue', '''
+ This error is generated when wiredtiger_open is configured
+ to return an error if recovery is required to use the database.'''),
]
# Update the #defines in the wiredtiger.in file.
@@ -88,28 +92,41 @@ tfile.write('''/* DO NOT EDIT: automatically built by dist/api_err.py. */
/*
* Historically, there was only the wiredtiger_strerror call because the POSIX
* port didn't need anything more complex; Windows requires memory allocation
- * of error strings, so we added the wiredtiger_strerror_r call. Because we
+ * of error strings, so we added the WT_SESSION.strerror method. Because we
* want wiredtiger_strerror to continue to be as thread-safe as possible, errors
- * are split into three categories: WiredTiger constant strings, system constant
- * strings and Everything Else, and we check constant strings before Everything
- * Else.
+ * are split into two categories: WiredTiger's or the system's constant strings
+ * and Everything Else, and we check constant strings before Everything Else.
*/
/*
- * __wiredtiger_error --
- *\tReturn a constant string for the WiredTiger errors.
+ * __wt_wiredtiger_error --
+ *\tReturn a constant string for WiredTiger POSIX-standard and errors.
*/
-static const char *
-__wiredtiger_error(int error)
+const char *
+__wt_wiredtiger_error(int error)
{
+\tconst char *p;
+
+\t/*
+\t * Check for WiredTiger specific errors.
+\t */
\tswitch (error) {
''')
for err in errors:
tfile.write('\tcase ' + err.name + ':\n')
tfile.write('\t\treturn ("' + err.name + ': ' + err.desc + '");\n')
-
tfile.write('''\t}
+
+\t/*
+\t * POSIX errors are non-negative integers; check for 0 explicitly
+\t * in-case the underlying strerror doesn't handle 0, some don't.
+\t */
+\tif (error == 0)
+\t\treturn ("Successful return: 0");
+\tif (error > 0 && (p = strerror(error)) != NULL)
+\t\treturn (p);
+
\treturn (NULL);
}
@@ -121,39 +138,8 @@ const char *
wiredtiger_strerror(int error)
{
\tstatic char buf[128];
-\tconst char *p;
-
-\t/* Check for a constant string. */
-\tif ((p = __wiredtiger_error(error)) != NULL ||
-\t (p = __wt_strerror(error)) != NULL)
-\t\treturn (p);
-
-\t/* Else, fill in the non-thread-safe static buffer. */
-\tif (wiredtiger_strerror_r(error, buf, sizeof(buf)) != 0)
-\t\t(void)snprintf(buf, sizeof(buf), "error return: %d", error);
-
-\treturn (buf);
-}
-
-/*
- * wiredtiger_strerror_r --
- *\tReturn a string for any error value, thread-safe version.
- */
-int
-wiredtiger_strerror_r(int error, char *buf, size_t buflen)
-{
-\tconst char *p;
-
-\t/* Require at least 2 bytes, printable character and trailing nul. */
-\tif (buflen < 2)
-\t\treturn (ENOMEM);
-
-\t/* Check for a constant string. */
-\tif ((p = __wiredtiger_error(error)) != NULL ||
-\t (p = __wt_strerror(error)) != NULL)
-\t\treturn (snprintf(buf, buflen, "%s", p) > 0 ? 0 : ENOMEM);
-\treturn (__wt_strerror_r(error, buf, buflen));
+\treturn (__wt_strerror(NULL, error, buf, sizeof(buf)));
}
''')
tfile.close()
diff --git a/src/third_party/wiredtiger/dist/s_export.list b/src/third_party/wiredtiger/dist/s_export.list
index 8f469e94433..d3803bc3afa 100644
--- a/src/third_party/wiredtiger/dist/s_export.list
+++ b/src/third_party/wiredtiger/dist/s_export.list
@@ -8,7 +8,6 @@ wiredtiger_pack_start
wiredtiger_pack_str
wiredtiger_pack_uint
wiredtiger_strerror
-wiredtiger_strerror_r
wiredtiger_struct_pack
wiredtiger_struct_size
wiredtiger_struct_unpack
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 1658684313c..66439faf161 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -283,6 +283,7 @@ RNG
ROCKSDB
RPC
RUNDIR
+RVv
Radu
Recno
Recurse
@@ -1073,6 +1074,7 @@ treplacement
trk
trk's
troot
+trun
trunc
trylock
trywrlock
diff --git a/src/third_party/wiredtiger/ext/datasources/helium/helium.c b/src/third_party/wiredtiger/ext/datasources/helium/helium.c
index d62ecb846e9..3fc521d93b2 100644
--- a/src/third_party/wiredtiger/ext/datasources/helium/helium.c
+++ b/src/third_party/wiredtiger/ext/datasources/helium/helium.c
@@ -1913,7 +1913,7 @@ bad_name: ERET(wtext, session, EINVAL, "%s: illegal name format", uri);
if (ret != 0 && ret != WT_NOTFOUND)
EMSG_ERR(wtext, session, ret,
"helium_o_truncate configuration: %s",
- wtext->strerror(ret));
+ wtext->strerror(wtext, session, ret));
if ((ret = ws_source_open_object(
wtds, session, hs, uri, NULL, oflags, &ws->he)) != 0)
@@ -2041,7 +2041,8 @@ master_uri_set(WT_DATA_SOURCE *wtds,
exclusive = a.val != 0;
else if (ret != WT_NOTFOUND)
ERET(wtext, session, ret,
- "exclusive configuration: %s", wtext->strerror(ret));
+ "exclusive configuration: %s",
+ wtext->strerror(wtext, session, ret));
/* Get the key/value format strings. */
if ((ret = wtext->config_get(
@@ -2052,7 +2053,7 @@ master_uri_set(WT_DATA_SOURCE *wtds,
} else
ERET(wtext, session, ret,
"key_format configuration: %s",
- wtext->strerror(ret));
+ wtext->strerror(wtext, session, ret));
}
if ((ret = wtext->config_get(
wtext, session, config, "value_format", &b)) != 0) {
@@ -2062,7 +2063,7 @@ master_uri_set(WT_DATA_SOURCE *wtds,
} else
ERET(wtext, session, ret,
"value_format configuration: %s",
- wtext->strerror(ret));
+ wtext->strerror(wtext, session, ret));
}
/* Get the compression configuration. */
@@ -2073,7 +2074,7 @@ master_uri_set(WT_DATA_SOURCE *wtds,
else
ERET(wtext, session, ret,
"helium_o_compress configuration: %s",
- wtext->strerror(ret));
+ wtext->strerror(wtext, session, ret));
}
/*
@@ -2090,7 +2091,8 @@ master_uri_set(WT_DATA_SOURCE *wtds,
return (0);
if (ret == WT_DUPLICATE_KEY)
return (exclusive ? EEXIST : 0);
- ERET(wtext, session, ret, "%s: %s", uri, wtext->strerror(ret));
+ ERET(wtext,
+ session, ret, "%s: %s", uri, wtext->strerror(wtext, session, ret));
}
/*
@@ -2129,19 +2131,22 @@ helium_session_open_cursor(WT_DATA_SOURCE *wtds, WT_SESSION *session,
if ((ret = wtext->config_get( /* Parse configuration */
wtext, session, config, "append", &v)) != 0)
EMSG_ERR(wtext, session, ret,
- "append configuration: %s", wtext->strerror(ret));
+ "append configuration: %s",
+ wtext->strerror(wtext, session, ret));
cursor->config_append = v.val != 0;
if ((ret = wtext->config_get(
wtext, session, config, "overwrite", &v)) != 0)
EMSG_ERR(wtext, session, ret,
- "overwrite configuration: %s", wtext->strerror(ret));
+ "overwrite configuration: %s",
+ wtext->strerror(wtext, session, ret));
cursor->config_overwrite = v.val != 0;
if ((ret = wtext->collator_config(
wtext, session, uri, config, NULL, &own)) != 0)
EMSG_ERR(wtext, session, ret,
- "collator configuration: %s", wtext->strerror(ret));
+ "collator configuration: %s",
+ wtext->strerror(wtext, session, ret));
/* Finish initializing the cursor. */
cursor->wtcursor.close = helium_cursor_close;
@@ -2178,19 +2183,19 @@ helium_session_open_cursor(WT_DATA_SOURCE *wtds, WT_SESSION *session,
session, value, strlen(value), &config_parser)) != 0)
EMSG_ERR(wtext, session, ret,
"Configuration string parser: %s",
- wtext->strerror(ret));
+ wtext->strerror(wtext, session, ret));
if ((ret = config_parser->get(
config_parser, "key_format", &v)) != 0)
EMSG_ERR(wtext, session, ret,
"key_format configuration: %s",
- wtext->strerror(ret));
+ wtext->strerror(wtext, session, ret));
ws->config_recno = v.len == 1 && v.str[0] == 'r';
if ((ret = config_parser->get(
config_parser, "value_format", &v)) != 0)
EMSG_ERR(wtext, session, ret,
"value_format configuration: %s",
- wtext->strerror(ret));
+ wtext->strerror(wtext, session, ret));
ws->config_bitfield =
v.len == 2 && isdigit(v.str[0]) && v.str[1] == 't';
@@ -2198,7 +2203,7 @@ helium_session_open_cursor(WT_DATA_SOURCE *wtds, WT_SESSION *session,
config_parser, "helium_o_compress", &v)) != 0)
EMSG_ERR(wtext, session, ret,
"helium_o_compress configuration: %s",
- wtext->strerror(ret));
+ wtext->strerror(wtext, session, ret));
ws->config_compress = v.val ? 1 : 0;
/*
@@ -2237,7 +2242,8 @@ err: if (ws != NULL && locked)
if (config_parser != NULL &&
(tret = config_parser->close(config_parser)) != 0)
EMSG(wtext, session, tret,
- "WT_CONFIG_PARSER.close: %s", wtext->strerror(tret));
+ "WT_CONFIG_PARSER.close: %s",
+ wtext->strerror(wtext, session, tret));
free((void *)value);
return (ret);
@@ -2913,7 +2919,7 @@ helium_config_read(WT_EXTENSION_API *wtext, WT_CONFIG_ITEM *config,
wtext, NULL, config->str, config->len, &config_parser)) != 0)
ERET(wtext, NULL, ret,
"WT_EXTENSION_API.config_parser_open: %s",
- wtext->strerror(ret));
+ wtext->strerror(wtext, NULL, ret));
while ((ret = config_parser->next(config_parser, &k, &v)) == 0) {
if (string_match("helium_devices", k.str, k.len)) {
if ((*devicep = calloc(1, v.len + 1)) == NULL)
@@ -2944,11 +2950,13 @@ helium_config_read(WT_EXTENSION_API *wtext, WT_CONFIG_ITEM *config,
ret = 0;
if (ret != 0)
EMSG_ERR(wtext, NULL, ret,
- "WT_CONFIG_PARSER.next: %s", wtext->strerror(ret));
+ "WT_CONFIG_PARSER.next: %s",
+ wtext->strerror(wtext, NULL, ret));
err: if ((tret = config_parser->close(config_parser)) != 0)
EMSG(wtext, NULL, tret,
- "WT_CONFIG_PARSER.close: %s", wtext->strerror(tret));
+ "WT_CONFIG_PARSER.close: %s",
+ wtext->strerror(wtext, NULL, tret));
return (ret);
}
@@ -3373,14 +3381,14 @@ wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
if ((ret = wtext->config_get(wtext, NULL, config, "config", &v)) != 0)
EMSG_ERR(wtext, NULL, ret,
"WT_EXTENSION_API.config_get: config: %s",
- wtext->strerror(ret));
+ wtext->strerror(wtext, NULL, ret));
/* Step through the list of Helium sources, opening each one. */
if ((ret = wtext->config_parser_open(
wtext, NULL, v.str, v.len, &config_parser)) != 0)
EMSG_ERR(wtext, NULL, ret,
"WT_EXTENSION_API.config_parser_open: config: %s",
- wtext->strerror(ret));
+ wtext->strerror(wtext, NULL, ret));
while ((ret = config_parser->next(config_parser, &k, &v)) == 0) {
if (string_match("helium_verbose", k.str, k.len)) {
verbose = v.val == 0 ? 0 : 1;
@@ -3392,11 +3400,11 @@ wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
if (ret != WT_NOTFOUND)
EMSG_ERR(wtext, NULL, ret,
"WT_CONFIG_PARSER.next: config: %s",
- wtext->strerror(ret));
+ wtext->strerror(wtext, NULL, ret));
if ((ret = config_parser->close(config_parser)) != 0)
EMSG_ERR(wtext, NULL, ret,
"WT_CONFIG_PARSER.close: config: %s",
- wtext->strerror(ret));
+ wtext->strerror(wtext, NULL, ret));
config_parser = NULL;
/* Find and open the database transaction store. */
@@ -3423,13 +3431,14 @@ wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
EMSG_ERR(wtext, NULL, ret,
"WT_CONNECTION.configure_method: session.create: "
"%s: %s",
- *p, wtext->strerror(ret));
+ *p, wtext->strerror(wtext, NULL, ret));
/* Add the data source */
if ((ret = connection->add_data_source(
connection, "helium:", (WT_DATA_SOURCE *)ds, NULL)) != 0)
EMSG_ERR(wtext, NULL, ret,
- "WT_CONNECTION.add_data_source: %s", wtext->strerror(ret));
+ "WT_CONNECTION.add_data_source: %s",
+ wtext->strerror(wtext, NULL, ret));
return (0);
err: if (ds != NULL)
diff --git a/src/third_party/wiredtiger/src/btree/bt_compact.c b/src/third_party/wiredtiger/src/btree/bt_compact.c
index 405410c6a1c..d8b3a638de3 100644
--- a/src/third_party/wiredtiger/src/btree/bt_compact.c
+++ b/src/third_party/wiredtiger/src/btree/bt_compact.c
@@ -105,8 +105,8 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[])
* writing the page modify information.
*
* There are three ways we call reconciliation: checkpoints, threads
- * writing leaf pages (usually in preparation for a checkpoint), and
- * eviction.
+ * writing leaf pages (usually in preparation for a checkpoint or if
+ * closing a file), and eviction.
*
* We're holding the schema lock which serializes with checkpoints.
*/
@@ -149,7 +149,7 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[])
* read, set its generation to a low value so it is evicted
* quickly.
*/
- WT_ERR(__wt_tree_walk(session, &ref,
+ WT_ERR(__wt_tree_walk(session, &ref, NULL,
WT_READ_COMPACT | WT_READ_NO_GEN | WT_READ_WONT_NEED));
if (ref == NULL)
break;
@@ -171,10 +171,12 @@ err: if (ref != NULL)
if (block_manager_begin)
WT_TRET(bm->compact_end(bm, session));
- __wt_spin_unlock(session, &btree->flush_lock);
-
+ /*
+ * Unlock will be a release barrier, use it to update the compaction
+ * status for reconciliation.
+ */
conn->compact_in_memory_pass = 0;
- WT_FULL_BARRIER();
+ __wt_spin_unlock(session, &btree->flush_lock);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 6140dca1fad..d80a5f4740d 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -487,7 +487,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating)
__wt_page_evict_soon(page);
cbt->page_deleted_count = 0;
- WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
+ WT_ERR(__wt_tree_walk(session, &cbt->ref, NULL, flags));
WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 880cb777954..f1ca81ee145 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -574,7 +574,7 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, int truncating)
__wt_page_evict_soon(page);
cbt->page_deleted_count = 0;
- WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
+ WT_ERR(__wt_tree_walk(session, &cbt->ref, NULL, flags));
WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index 6a2789c909b..299849ad365 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -566,7 +566,7 @@ __btree_get_last_recno(WT_SESSION_IMPL *session)
btree = S2BT(session);
next_walk = NULL;
- WT_RET(__wt_tree_walk(session, &next_walk, WT_READ_PREV));
+ WT_RET(__wt_tree_walk(session, &next_walk, NULL, WT_READ_PREV));
if (next_walk == NULL)
return (WT_NOTFOUND);
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index d1da615dafe..2f2ce4cf4f7 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -32,17 +32,11 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
return (0);
/* Leaf pages only. */
- if (page->type != WT_PAGE_COL_FIX &&
- page->type != WT_PAGE_COL_VAR &&
- page->type != WT_PAGE_ROW_LEAF)
+ if (WT_PAGE_IS_INTERNAL(page))
return (0);
- /*
- * Eviction may be turned off (although that's rare), or we may be in
- * the middle of a checkpoint.
- */
- if (LF_ISSET(WT_READ_NO_EVICT) ||
- F_ISSET(btree, WT_BTREE_NO_EVICTION) || btree->checkpointing)
+ /* Eviction may be turned off. */
+ if (LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(btree, WT_BTREE_NO_EVICTION))
return (0);
/*
@@ -52,17 +46,11 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
if (page->modify == NULL)
return (0);
- /*
- * If the page was recently split in-memory, don't force it out: we
- * hope eviction will find it first.
- */
- if (!__wt_txn_visible_all(session, page->modify->first_dirty_txn))
- return (0);
-
/* Trigger eviction on the next page release. */
__wt_page_evict_soon(page);
- return (1);
+ /* If eviction cannot succeed, don't try. */
+ return (__wt_page_can_evict(session, page, 1));
}
/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c
index b7108b52395..2e34a925f84 100644
--- a/src/third_party/wiredtiger/src/btree/bt_stat.c
+++ b/src/third_party/wiredtiger/src/btree/bt_stat.c
@@ -43,9 +43,21 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
if (!F_ISSET(cst, WT_CONN_STAT_ALL))
return (0);
+ /*
+ * Clear the statistics we're about to count.
+ */
+ WT_STAT_SET(stats, btree_column_deleted, 0);
+ WT_STAT_SET(stats, btree_column_fix, 0);
+ WT_STAT_SET(stats, btree_column_internal, 0);
+ WT_STAT_SET(stats, btree_column_variable, 0);
+ WT_STAT_SET(stats, btree_entries, 0);
+ WT_STAT_SET(stats, btree_overflow, 0);
+ WT_STAT_SET(stats, btree_row_internal, 0);
+ WT_STAT_SET(stats, btree_row_leaf, 0);
+
next_walk = NULL;
- while ((ret =
- __wt_tree_walk(session, &next_walk, 0)) == 0 && next_walk != NULL) {
+ while ((ret = __wt_tree_walk(session, &next_walk, NULL, 0)) == 0 &&
+ next_walk != NULL) {
WT_WITH_PAGE_INDEX(session,
ret = __stat_page(session, next_walk->page, stats));
WT_RET(ret);
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index f038ebf3ecc..a75af03d8c8 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -56,13 +56,19 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
flags |= WT_READ_NO_WAIT | WT_READ_SKIP_INTL;
for (walk = NULL;;) {
- WT_ERR(__wt_tree_walk(session, &walk, flags));
+ WT_ERR(__wt_tree_walk(session, &walk, NULL, flags));
if (walk == NULL)
break;
- /* Write dirty pages if nobody beat us to it. */
+ /*
+ * Write dirty pages if nobody beat us to it. Don't
+ * try to write the hottest pages: checkpoint will have
+ * to visit them anyway.
+ */
page = walk->page;
- if (__wt_page_is_modified(page)) {
+ if (__wt_page_is_modified(page) &&
+ __wt_txn_visible_all(
+ session, page->modify->update_txn)) {
if (txn->isolation == TXN_ISO_READ_COMMITTED)
__wt_txn_refresh(session, 1);
leaf_bytes += page->memory_footprint;
@@ -102,7 +108,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
/* Write all dirty in-cache pages. */
flags |= WT_READ_NO_EVICT;
for (walk = NULL;;) {
- WT_ERR(__wt_tree_walk(session, &walk, flags));
+ WT_ERR(__wt_tree_walk(session, &walk, NULL, flags));
if (walk == NULL)
break;
@@ -137,7 +143,6 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
}
}
break;
- WT_ILLEGAL_VALUE_ERR(session);
}
if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) {
@@ -170,6 +175,12 @@ err: /* On error, clear any left-over tree walk. */
WT_FULL_BARRIER();
/*
+ * If this tree was being skipped by the eviction server during
+ * the checkpoint, clear the wait.
+ */
+ btree->evict_walk_period = 0;
+
+ /*
* Wake the eviction server, in case application threads have
* stalled while the eviction server decided it couldn't make
* progress. Without this, application threads will be stalled
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index 2957eda3a49..45c2029f6ed 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -20,17 +20,21 @@ typedef struct {
uint64_t fcnt; /* Progress counter */
+#define WT_VRFY_DUMP(vs) \
+ ((vs)->dump_address || \
+ (vs)->dump_blocks || (vs)->dump_pages || (vs)->dump_shape)
int dump_address; /* Debugging hooks */
- int dump_pages;
int dump_blocks;
+ int dump_pages;
+ int dump_shape;
+
+ u_int depth, depth_internal[100], depth_leaf[100];
WT_ITEM *tmp1; /* Temporary buffer */
WT_ITEM *tmp2; /* Temporary buffer */
} WT_VSTUFF;
static void __verify_checkpoint_reset(WT_VSTUFF *);
-static int __verify_config(WT_SESSION_IMPL *, const char *[], WT_VSTUFF *);
-static int __verify_config_offsets(WT_SESSION_IMPL *, const char *[], int *);
static int __verify_overflow(
WT_SESSION_IMPL *, const uint8_t *, size_t, WT_VSTUFF *);
static int __verify_overflow_cell(
@@ -42,6 +46,96 @@ static int __verify_row_leaf_key_order(
static int __verify_tree(WT_SESSION_IMPL *, WT_REF *, WT_VSTUFF *);
/*
+ * __verify_config --
+ * Debugging: verification supports dumping pages in various formats.
+ */
+static int
+__verify_config(WT_SESSION_IMPL *session, const char *cfg[], WT_VSTUFF *vs)
+{
+ WT_CONFIG_ITEM cval;
+
+ WT_RET(__wt_config_gets(session, cfg, "dump_address", &cval));
+ vs->dump_address = cval.val != 0;
+
+ WT_RET(__wt_config_gets(session, cfg, "dump_blocks", &cval));
+ vs->dump_blocks = cval.val != 0;
+
+ WT_RET(__wt_config_gets(session, cfg, "dump_pages", &cval));
+ vs->dump_pages = cval.val != 0;
+
+ WT_RET(__wt_config_gets(session, cfg, "dump_shape", &cval));
+ vs->dump_shape = cval.val != 0;
+
+#if !defined(HAVE_DIAGNOSTIC)
+ if (vs->dump_blocks || vs->dump_pages)
+ WT_RET_MSG(session, ENOTSUP,
+ "the WiredTiger library was not built in diagnostic mode");
+#endif
+ return (0);
+}
+
+/*
+ * __verify_config_offsets --
+ * Debugging: optionally dump specific blocks from the file.
+ */
+static int
+__verify_config_offsets(WT_SESSION_IMPL *session, const char *cfg[], int *quitp)
+{
+ WT_CONFIG list;
+ WT_CONFIG_ITEM cval, k, v;
+ WT_DECL_RET;
+ u_long offset;
+
+ *quitp = 0;
+
+ WT_RET(__wt_config_gets(session, cfg, "dump_offsets", &cval));
+ WT_RET(__wt_config_subinit(session, &list, &cval));
+ while ((ret = __wt_config_next(&list, &k, &v)) == 0) {
+ /*
+ * Quit after dumping the requested blocks. (That's hopefully
+ * what the user wanted, all of this stuff is just hooked into
+ * verify because that's where we "dump blocks" for debugging.)
+ */
+ *quitp = 1;
+ if (v.len != 0 || sscanf(k.str, "%lu", &offset) != 1)
+ WT_RET_MSG(session, EINVAL,
+ "unexpected dump offset format");
+#if !defined(HAVE_DIAGNOSTIC)
+ WT_RET_MSG(session, ENOTSUP,
+ "the WiredTiger library was not built in diagnostic mode");
+#else
+ WT_TRET(
+ __wt_debug_offset_blind(session, (wt_off_t)offset, NULL));
+#endif
+ }
+ return (ret == WT_NOTFOUND ? 0 : ret);
+}
+
+/*
+ * __verify_tree_shape --
+ * Dump the tree shape.
+ */
+static int
+__verify_tree_shape(WT_SESSION_IMPL *session, WT_VSTUFF *vs)
+{
+ size_t i;
+
+ WT_RET(__wt_msg(session, "Internal page tree-depth:"));
+ for (i = 0; i < WT_ELEMENTS(vs->depth_internal); ++i)
+ if (vs->depth_internal[i] != 0)
+ WT_RET(__wt_msg(session,
+ "\t%03zu: %u", i, vs->depth_internal[i]));
+
+ WT_RET(__wt_msg(session, "Leaf page tree-depth:"));
+ for (i = 0; i < WT_ELEMENTS(vs->depth_leaf); ++i)
+ if (vs->depth_leaf[i] != 0)
+ WT_RET(__wt_msg(session,
+ "\t%03zu: %u", i, vs->depth_leaf[i]));
+
+ return (0);
+}
+
+/*
* __wt_verify --
* Verify a file.
*/
@@ -97,11 +191,10 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
/* House-keeping between checkpoints. */
__verify_checkpoint_reset(vs);
-#ifdef HAVE_DIAGNOSTIC
- if (vs->dump_address || vs->dump_blocks || vs->dump_pages)
+ if (WT_VRFY_DUMP(vs))
WT_ERR(__wt_msg(session, "%s: checkpoint %s",
btree->dhandle->name, ckpt->name));
-#endif
+
/* Load the checkpoint. */
WT_ERR(bm->checkpoint_load(bm, session,
ckpt->raw.data, ckpt->raw.size,
@@ -114,15 +207,13 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
if (root_addr_size != 0 &&
(ret = __wt_btree_tree_open(
session, root_addr, root_addr_size)) == 0) {
-#ifdef HAVE_DIAGNOSTIC
- if (vs->dump_address ||
- vs->dump_blocks || vs->dump_pages)
+ if (WT_VRFY_DUMP(vs))
WT_ERR(__wt_msg(session, "Root: %s %s",
__wt_addr_string(session,
root_addr, root_addr_size, vs->tmp1),
__wt_page_type_string(
btree->root.page->type)));
-#endif
+
WT_WITH_PAGE_INDEX(session,
ret = __verify_tree(session, &btree->root, vs));
@@ -132,6 +223,10 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
/* Unload the checkpoint. */
WT_TRET(bm->checkpoint_unload(bm, session));
WT_ERR(ret);
+
+ /* Display the tree shape. */
+ if (vs->dump_shape)
+ WT_ERR(__verify_tree_shape(session, vs));
}
done:
@@ -156,69 +251,6 @@ err: /* Inform the underlying block manager we're done. */
}
/*
- * __verify_config --
- * Debugging: verification supports dumping pages in various formats.
- */
-static int
-__verify_config(WT_SESSION_IMPL *session, const char *cfg[], WT_VSTUFF *vs)
-{
- WT_CONFIG_ITEM cval;
-
- WT_RET(__wt_config_gets(session, cfg, "dump_address", &cval));
- vs->dump_address = cval.val != 0;
-
- WT_RET(__wt_config_gets(session, cfg, "dump_blocks", &cval));
- vs->dump_blocks = cval.val != 0;
-
- WT_RET(__wt_config_gets(session, cfg, "dump_pages", &cval));
- vs->dump_pages = cval.val != 0;
-
-#if !defined(HAVE_DIAGNOSTIC)
- if (vs->dump_address || vs->dump_blocks || vs->dump_pages)
- WT_RET_MSG(session, ENOTSUP,
- "the WiredTiger library was not built in diagnostic mode");
-#endif
- return (0);
-}
-
-/*
- * __verify_config_offsets --
- * Debugging: optionally dump specific blocks from the file.
- */
-static int
-__verify_config_offsets(WT_SESSION_IMPL *session, const char *cfg[], int *quitp)
-{
- WT_CONFIG list;
- WT_CONFIG_ITEM cval, k, v;
- WT_DECL_RET;
- u_long offset;
-
- *quitp = 0;
-
- WT_RET(__wt_config_gets(session, cfg, "dump_offsets", &cval));
- WT_RET(__wt_config_subinit(session, &list, &cval));
- while ((ret = __wt_config_next(&list, &k, &v)) == 0) {
- /*
- * Quit after dumping the requested blocks. (That's hopefully
- * what the user wanted, all of this stuff is just hooked into
- * verify because that's where we "dump blocks" for debugging.)
- */
- *quitp = 1;
- if (v.len != 0 || sscanf(k.str, "%lu", &offset) != 1)
- WT_RET_MSG(session, EINVAL,
- "unexpected dump offset format");
-#if !defined(HAVE_DIAGNOSTIC)
- WT_RET_MSG(session, ENOTSUP,
- "the WiredTiger library was not built in diagnostic mode");
-#else
- WT_TRET(
- __wt_debug_offset_blind(session, (wt_off_t)offset, NULL));
-#endif
- }
- return (ret == WT_NOTFOUND ? 0 : ret);
-}
-
-/*
* __verify_checkpoint_reset --
* Reset anything needing to be reset for each new checkpoint verification.
*/
@@ -233,6 +265,9 @@ __verify_checkpoint_reset(WT_VSTUFF *vs)
/* Record total is per checkpoint, reset the record count. */
vs->record_total = 0;
+
+ /* Tree depth. */
+ vs->depth = 1;
}
/*
@@ -265,12 +300,20 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs)
WT_RET(__wt_verbose(session, WT_VERB_VERIFY, "%s %s",
__wt_page_addr_string(session, ref, vs->tmp1),
__wt_page_type_string(page->type)));
-#ifdef HAVE_DIAGNOSTIC
+
+ /* Optionally dump the address. */
if (vs->dump_address)
WT_RET(__wt_msg(session, "%s %s",
__wt_page_addr_string(session, ref, vs->tmp1),
__wt_page_type_string(page->type)));
-#endif
+
+ /* Track the shape of the tree. */
+ if (WT_PAGE_IS_INTERNAL(page))
+ ++vs->depth_internal[
+ WT_MIN(vs->depth, WT_ELEMENTS(vs->depth_internal) - 1)];
+ else
+ ++vs->depth_leaf[
+ WT_MIN(vs->depth, WT_ELEMENTS(vs->depth_internal) - 1)];
/*
* The page's physical structure was verified when it was read into
@@ -447,9 +490,11 @@ celltype_err: WT_RET_MSG(session, WT_ERROR,
}
/* Verify the subtree. */
+ ++vs->depth;
WT_RET(__wt_page_in(session, child_ref, 0));
ret = __verify_tree(session, child_ref, vs);
WT_TRET(__wt_page_release(session, child_ref, 0));
+ --vs->depth;
WT_RET(ret);
__wt_cell_unpack(child_ref->addr, unpack);
@@ -475,9 +520,11 @@ celltype_err: WT_RET_MSG(session, WT_ERROR,
session, page, child_ref, entry, vs));
/* Verify the subtree. */
+ ++vs->depth;
WT_RET(__wt_page_in(session, child_ref, 0));
ret = __verify_tree(session, child_ref, vs);
WT_TRET(__wt_page_release(session, child_ref, 0));
+ --vs->depth;
WT_RET(ret);
__wt_cell_unpack(child_ref->addr, unpack);
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index a2b2a6bb7c8..10dd5b12936 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -13,14 +13,14 @@
* Move to the next/previous page in the tree.
*/
int
-__wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
+__wt_tree_walk(WT_SESSION_IMPL *session,
+ WT_REF **refp, uint64_t *walkcntp, uint32_t flags)
{
WT_BTREE *btree;
WT_DECL_RET;
WT_PAGE *page;
WT_PAGE_INDEX *pindex;
WT_REF *couple, *ref;
- WT_TXN_STATE *txn_state;
int descending, prev, skip;
uint32_t slot;
@@ -44,16 +44,6 @@ __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
prev = LF_ISSET(WT_READ_PREV) ? 1 : 0;
/*
- * Pin a transaction ID, required to safely look at page index
- * structures, if our caller has not already done so.
- */
- txn_state = WT_SESSION_TXN_STATE(session);
- if (txn_state->snap_min == WT_TXN_NONE)
- txn_state->snap_min = S2C(session)->txn_global.last_running;
- else
- txn_state = NULL;
-
- /*
* There are multiple reasons and approaches to walking the in-memory
* tree:
*
@@ -95,11 +85,8 @@ __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
/* If no page is active, begin a walk from the start of the tree. */
if (ref == NULL) {
ref = &btree->root;
- if (ref->page == NULL) {
- if (txn_state != NULL)
- txn_state->snap_min = WT_TXN_NONE;
+ if (ref->page == NULL)
goto done;
- }
goto descend;
}
@@ -129,11 +116,8 @@ restart: /*
ref = couple;
if (ref == &btree->root) {
ref = &btree->root;
- if (ref->page == NULL) {
- if (txn_state != NULL)
- txn_state->snap_min = WT_TXN_NONE;
+ if (ref->page == NULL)
goto done;
- }
goto descend;
}
__wt_page_refp(session, ref, &pindex, &slot);
@@ -195,6 +179,9 @@ restart: /*
else
++slot;
+ if (walkcntp != NULL)
+ ++*walkcntp;
+
for (descending = 0;;) {
ref = pindex->index[slot];
@@ -283,9 +270,6 @@ descend: couple = ref;
}
done:
-err: if (txn_state != NULL)
- txn_state->snap_min = WT_TXN_NONE;
-
- WT_LEAVE_PAGE_INDEX(session);
+err: WT_LEAVE_PAGE_INDEX(session);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 052fbf3d0b4..43d87c518e4 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -299,6 +299,7 @@ static const WT_CONFIG_CHECK confchk_session_verify[] = {
{ "dump_blocks", "boolean", NULL, NULL },
{ "dump_offsets", "list", NULL, NULL },
{ "dump_pages", "boolean", NULL, NULL },
+ { "dump_shape", "boolean", NULL, NULL },
{ NULL, NULL, NULL, NULL }
};
@@ -318,6 +319,7 @@ static const WT_CONFIG_CHECK confchk_log_subconfigs[] = {
{ "file_max", "int", "min=100KB,max=2GB", NULL },
{ "path", "string", NULL, NULL },
{ "prealloc", "boolean", NULL, NULL },
+ { "recover", "string", "choices=[\"error\",\"on\"]", NULL },
{ NULL, NULL, NULL, NULL }
};
@@ -666,6 +668,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"force=0",
confchk_session_salvage
},
+ { "session.strerror",
+ "",
+ NULL
+ },
{ "session.truncate",
"",
NULL
@@ -675,7 +681,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
NULL
},
{ "session.verify",
- "dump_address=0,dump_blocks=0,dump_offsets=,dump_pages=0",
+ "dump_address=0,dump_blocks=0,dump_offsets=,dump_pages=0,"
+ "dump_shape=0",
confchk_session_verify
},
{ "table.meta",
@@ -690,7 +697,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80,"
"eviction_target=80,eviction_trigger=95,exclusive=0,extensions=,"
"file_extend=,hazard_max=1000,log=(archive=,compressor=,enabled=0"
- ",file_max=100MB,path=,prealloc=),lsm_manager=(merge=,"
+ ",file_max=100MB,path=,prealloc=,recover=on),lsm_manager=(merge=,"
"worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,"
"session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
",name=,reserve=0,size=500MB),statistics=none,"
@@ -708,7 +715,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80,"
"eviction_target=80,eviction_trigger=95,exclusive=0,extensions=,"
"file_extend=,hazard_max=1000,log=(archive=,compressor=,enabled=0"
- ",file_max=100MB,path=,prealloc=),lsm_manager=(merge=,"
+ ",file_max=100MB,path=,prealloc=,recover=on),lsm_manager=(merge=,"
"worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,"
"session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
",name=,reserve=0,size=500MB),statistics=none,"
@@ -725,8 +732,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"direct_io=,error_prefix=,eviction=(threads_max=1,threads_min=1),"
"eviction_dirty_target=80,eviction_target=80,eviction_trigger=95,"
"extensions=,file_extend=,hazard_max=1000,log=(archive=,"
- "compressor=,enabled=0,file_max=100MB,path=,prealloc=),"
- "lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,mmap=,"
+ "compressor=,enabled=0,file_max=100MB,path=,prealloc=,recover=on)"
+ ",lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,mmap=,"
"multiprocess=0,session_max=100,session_scratch_max=2MB,"
"shared_cache=(chunk=10MB,name=,reserve=0,size=500MB),"
"statistics=none,statistics_log=(on_close=0,"
@@ -742,8 +749,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"direct_io=,error_prefix=,eviction=(threads_max=1,threads_min=1),"
"eviction_dirty_target=80,eviction_target=80,eviction_trigger=95,"
"extensions=,file_extend=,hazard_max=1000,log=(archive=,"
- "compressor=,enabled=0,file_max=100MB,path=,prealloc=),"
- "lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,mmap=,"
+ "compressor=,enabled=0,file_max=100MB,path=,prealloc=,recover=on)"
+ ",lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,mmap=,"
"multiprocess=0,session_max=100,session_scratch_max=2MB,"
"shared_cache=(chunk=10MB,name=,reserve=0,size=500MB),"
"statistics=none,statistics_log=(on_close=0,"
diff --git a/src/third_party/wiredtiger/src/conn/api_strerror.c b/src/third_party/wiredtiger/src/conn/api_strerror.c
index 396ae7a3e0f..e41e402a1fd 100644
--- a/src/third_party/wiredtiger/src/conn/api_strerror.c
+++ b/src/third_party/wiredtiger/src/conn/api_strerror.c
@@ -5,20 +5,24 @@
/*
* Historically, there was only the wiredtiger_strerror call because the POSIX
* port didn't need anything more complex; Windows requires memory allocation
- * of error strings, so we added the wiredtiger_strerror_r call. Because we
+ * of error strings, so we added the WT_SESSION.strerror method. Because we
* want wiredtiger_strerror to continue to be as thread-safe as possible, errors
- * are split into three categories: WiredTiger constant strings, system constant
- * strings and Everything Else, and we check constant strings before Everything
- * Else.
+ * are split into two categories: WiredTiger's or the system's constant strings
+ * and Everything Else, and we check constant strings before Everything Else.
*/
/*
- * __wiredtiger_error --
- * Return a constant string for the WiredTiger errors.
+ * __wt_wiredtiger_error --
+ * Return a constant string for WiredTiger POSIX-standard and errors.
*/
-static const char *
-__wiredtiger_error(int error)
+const char *
+__wt_wiredtiger_error(int error)
{
+ const char *p;
+
+ /*
+ * Check for WiredTiger specific errors.
+ */
switch (error) {
case WT_ROLLBACK:
return ("WT_ROLLBACK: conflict between concurrent operations");
@@ -32,7 +36,19 @@ __wiredtiger_error(int error)
return ("WT_PANIC: WiredTiger library panic");
case WT_RESTART:
return ("WT_RESTART: restart the operation (internal)");
+ case WT_RUN_RECOVERY:
+ return ("WT_RUN_RECOVERY: recovery must be run to continue");
}
+
+ /*
+ * POSIX errors are non-negative integers; check for 0 explicitly
+ * in-case the underlying strerror doesn't handle 0, some don't.
+ */
+ if (error == 0)
+ return ("Successful return: 0");
+ if (error > 0 && (p = strerror(error)) != NULL)
+ return (p);
+
return (NULL);
}
@@ -44,37 +60,6 @@ const char *
wiredtiger_strerror(int error)
{
static char buf[128];
- const char *p;
-
- /* Check for a constant string. */
- if ((p = __wiredtiger_error(error)) != NULL ||
- (p = __wt_strerror(error)) != NULL)
- return (p);
-
- /* Else, fill in the non-thread-safe static buffer. */
- if (wiredtiger_strerror_r(error, buf, sizeof(buf)) != 0)
- (void)snprintf(buf, sizeof(buf), "error return: %d", error);
-
- return (buf);
-}
-
-/*
- * wiredtiger_strerror_r --
- * Return a string for any error value, thread-safe version.
- */
-int
-wiredtiger_strerror_r(int error, char *buf, size_t buflen)
-{
- const char *p;
-
- /* Require at least 2 bytes, printable character and trailing nul. */
- if (buflen < 2)
- return (ENOMEM);
-
- /* Check for a constant string. */
- if ((p = __wiredtiger_error(error)) != NULL ||
- (p = __wt_strerror(error)) != NULL)
- return (snprintf(buf, buflen, "%s", p) > 0 ? 0 : ENOMEM);
- return (__wt_strerror_r(error, buf, buflen));
+ return (__wt_strerror(NULL, error, buf, sizeof(buf)));
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index 5be55a77f24..0562f9cfc34 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -117,7 +117,7 @@ __conn_get_extension_api(WT_CONNECTION *wt_conn)
conn->extension_api.conn = wt_conn;
conn->extension_api.err_printf = __wt_ext_err_printf;
conn->extension_api.msg_printf = __wt_ext_msg_printf;
- conn->extension_api.strerror = wiredtiger_strerror;
+ conn->extension_api.strerror = __wt_ext_strerror;
conn->extension_api.scr_alloc = __wt_ext_scr_alloc;
conn->extension_api.scr_free = __wt_ext_scr_free;
conn->extension_api.collator_config = ext_collator_config;
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c
index b278d7a6b8a..c513d46137c 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache.c
@@ -29,8 +29,6 @@ __wt_cache_config(WT_SESSION_IMPL *session, const char *cfg[])
if (!F_ISSET(conn, WT_CONN_CACHE_POOL)) {
WT_RET(__wt_config_gets(session, cfg, "cache_size", &cval));
conn->cache_size = (uint64_t)cval.val;
- WT_RET(__wt_config_gets(session, cfg, "cache_overhead", &cval));
- conn->cache_overhead = (int)cval.val;
} else {
WT_RET(__wt_config_gets(
session, cfg, "shared_cache.reserve", &cval));
@@ -40,6 +38,9 @@ __wt_cache_config(WT_SESSION_IMPL *session, const char *cfg[])
cache->cp_reserved = (uint64_t)cval.val;
}
+ WT_RET(__wt_config_gets(session, cfg, "cache_overhead", &cval));
+ cache->overhead_pct = (u_int)cval.val;
+
WT_RET(__wt_config_gets(session, cfg, "eviction_target", &cval));
cache->eviction_target = (u_int)cval.val;
@@ -145,9 +146,9 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session)
WT_STAT_SET(stats, cache_bytes_max, conn->cache_size);
WT_STAT_SET(stats, cache_bytes_inuse, __wt_cache_bytes_inuse(cache));
- WT_STAT_SET(stats, cache_overhead, conn->cache_overhead);
+ WT_STAT_SET(stats, cache_overhead, cache->overhead_pct);
WT_STAT_SET(stats, cache_pages_inuse, __wt_cache_pages_inuse(cache));
- WT_STAT_SET(stats, cache_bytes_dirty, cache->bytes_dirty);
+ WT_STAT_SET(stats, cache_bytes_dirty, __wt_cache_dirty_inuse(cache));
WT_STAT_SET(stats,
cache_eviction_maximum_page_size, cache->evict_max_page_size);
WT_STAT_SET(stats, cache_pages_dirty, cache->pages_dirty);
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index 11abc7c1e2b..36d4d539d92 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -98,6 +98,9 @@ __logmgr_config(WT_SESSION_IMPL *session, const char **cfg, int *runp)
FLD_SET(conn->log_flags, WT_CONN_LOG_PREALLOC);
conn->log_prealloc = 1;
}
+ WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval));
+ if (cval.len != 0 && WT_STRING_MATCH("error", cval.str, cval.len))
+ FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);
WT_RET(__logmgr_sync_cfg(session, cfg));
return (0);
diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c
index 5aa85872a3b..910aef070ca 100644
--- a/src/third_party/wiredtiger/src/evict/evict_file.c
+++ b/src/third_party/wiredtiger/src/evict/evict_file.c
@@ -36,8 +36,8 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop)
/* Walk the tree, discarding pages. */
next_ref = NULL;
- WT_ERR(__wt_tree_walk(
- session, &next_ref, WT_READ_CACHE | WT_READ_NO_EVICT));
+ WT_ERR(__wt_tree_walk(session, &next_ref, NULL,
+ WT_READ_CACHE | WT_READ_NO_EVICT));
while ((ref = next_ref) != NULL) {
page = ref->page;
@@ -73,8 +73,8 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop)
* the reconciliation, the next walk call could miss a page in
* the tree.
*/
- WT_ERR(__wt_tree_walk(
- session, &next_ref, WT_READ_CACHE | WT_READ_NO_EVICT));
+ WT_ERR(__wt_tree_walk(session, &next_ref, NULL,
+ WT_READ_CACHE | WT_READ_NO_EVICT));
switch (syncop) {
case WT_SYNC_CLOSE:
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index c6b962f9f5d..83a9aa5c8c5 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -225,6 +225,46 @@ err: WT_PANIC_MSG(session, ret, "cache eviction server error");
}
/*
+ * __evict_workers_resize --
+ * Resize the array of eviction workers (as needed after a reconfigure).
+ * We don't do this during the reconfigure because the eviction server
+ * thread owns these structures.
+ */
+static int
+__evict_workers_resize(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_EVICT_WORKER *workers;
+ size_t alloc;
+ u_int i;
+
+ conn = S2C(session);
+
+ alloc = conn->evict_workers_alloc * sizeof(*workers);
+ WT_RET(__wt_realloc(session, &alloc,
+ conn->evict_workers_max * sizeof(*workers), &conn->evict_workctx));
+ workers = conn->evict_workctx;
+
+ for (i = conn->evict_workers_alloc; i < conn->evict_workers_max; i++) {
+ WT_ERR(__wt_open_internal_session(conn,
+ "eviction-worker", 0, 0, &workers[i].session));
+ workers[i].id = i;
+ F_SET(workers[i].session, WT_SESSION_CAN_WAIT);
+
+ if (i < conn->evict_workers_min) {
+ ++conn->evict_workers;
+ F_SET(&workers[i], WT_EVICT_WORKER_RUN);
+ WT_ERR(__wt_thread_create(workers[i].session,
+ &workers[i].tid, __evict_worker, &workers[i]));
+ }
+ }
+
+err: conn->evict_workers_alloc = conn->evict_workers_max;
+ return (ret);
+}
+
+/*
* __wt_evict_create --
* Start the eviction server thread.
*/
@@ -232,8 +272,6 @@ int
__wt_evict_create(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
- WT_EVICT_WORKER *workers;
- u_int i;
conn = S2C(session);
@@ -253,27 +291,6 @@ __wt_evict_create(WT_SESSION_IMPL *session)
if (conn->evict_workers_max == 0)
F_SET(session, WT_SESSION_CAN_WAIT);
- if (conn->evict_workers_max > 0) {
- WT_RET(__wt_calloc_def(
- session, conn->evict_workers_max, &workers));
- conn->evict_workctx = workers;
-
- for (i = 0; i < conn->evict_workers_max; i++) {
- WT_RET(__wt_open_internal_session(conn,
- "eviction-worker", 0, 0, &workers[i].session));
- workers[i].id = i;
- F_SET(workers[i].session, WT_SESSION_CAN_WAIT);
-
- if (i < conn->evict_workers_min) {
- ++conn->evict_workers;
- F_SET(&workers[i], WT_EVICT_WORKER_RUN);
- WT_RET(__wt_thread_create(
- workers[i].session, &workers[i].tid,
- __evict_worker, &workers[i]));
- }
- }
- }
-
/*
* Start the primary eviction server thread after the worker threads
* have started to avoid it starting additional worker threads before
@@ -312,11 +329,12 @@ __wt_evict_destroy(WT_SESSION_IMPL *session)
WT_TRET(__wt_cond_signal(session, cache->evict_waiter_cond));
WT_TRET(__wt_thread_join(session, workers[i].tid));
}
- /* Handle shutdown when cleaning up after a failed open */
+ /* Handle shutdown when cleaning up after a failed open. */
if (conn->evict_workctx != NULL) {
- for (i = 0; i < conn->evict_workers_max; i++) {
+ for (i = 0; i < conn->evict_workers_alloc; i++) {
wt_session = &conn->evict_workctx[i].session->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
+ if (wt_session != NULL)
+ WT_TRET(wt_session->close(wt_session, NULL));
}
__wt_free(session, conn->evict_workctx);
}
@@ -398,7 +416,7 @@ __evict_has_work(WT_SESSION_IMPL *session, uint32_t *flagsp)
* target or the dirty target.
*/
bytes_inuse = __wt_cache_bytes_inuse(cache);
- dirty_inuse = cache->bytes_dirty;
+ dirty_inuse = __wt_cache_dirty_inuse(cache);
bytes_max = conn->cache_size;
/* Check to see if the eviction server should run. */
@@ -435,9 +453,9 @@ __evict_pass(WT_SESSION_IMPL *session)
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_EVICT_WORKER *worker;
- int loop;
+ uint64_t pages_evicted;
uint32_t flags;
- uint64_t bytes_inuse, dirty_target_size, pages_evicted, target_size;
+ int loop;
conn = S2C(session);
cache = conn->cache;
@@ -469,16 +487,13 @@ __evict_pass(WT_SESSION_IMPL *session)
* Start a worker if we have capacity and we haven't reached
* the eviction targets.
*/
- bytes_inuse = __wt_cache_bytes_inuse(cache);
- target_size = (conn->cache_size * cache->eviction_target) / 100;
- dirty_target_size =
- (conn->cache_size * cache->eviction_dirty_target) / 100;
- if ((bytes_inuse > target_size ||
- cache->bytes_dirty > dirty_target_size) &&
+ if (LF_ISSET(WT_EVICT_PASS_ALL | WT_EVICT_PASS_DIRTY) &&
conn->evict_workers < conn->evict_workers_max) {
WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER,
"Starting evict worker: %"PRIu32"\n",
conn->evict_workers));
+ if (conn->evict_workers >= conn->evict_workers_alloc)
+ WT_RET(__evict_workers_resize(session));
worker = &conn->evict_workctx[conn->evict_workers++];
F_SET(worker, WT_EVICT_WORKER_RUN);
WT_RET(__wt_thread_create(session,
@@ -488,7 +503,7 @@ __evict_pass(WT_SESSION_IMPL *session)
WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER,
"Eviction pass with: Max: %" PRIu64
" In use: %" PRIu64 " Dirty: %" PRIu64,
- conn->cache_size, bytes_inuse, cache->bytes_dirty));
+ conn->cache_size, cache->bytes_inmem, cache->bytes_dirty));
WT_RET(__evict_lru_walk(session, flags));
WT_RET(__evict_server_work(session));
@@ -839,7 +854,7 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t flags)
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
- u_int max_entries, old_slot, retries, slot;
+ u_int max_entries, prev_slot, retries, slot, start_slot, spins;
int incr, dhandle_locked;
WT_DECL_SPINLOCK_ID(id);
@@ -869,7 +884,7 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t flags)
* Set the starting slot in the queue and the maximum pages added
* per walk.
*/
- slot = cache->evict_entries;
+ start_slot = slot = cache->evict_entries;
max_entries = slot + WT_EVICT_WALK_INCR;
retry: while (slot < max_entries && ret == 0) {
@@ -885,8 +900,16 @@ retry: while (slot < max_entries && ret == 0) {
* reference count to keep it alive while we sweep.
*/
if (!dhandle_locked) {
- if ((ret = __wt_spin_trylock(
- session, &conn->dhandle_lock, &id)) != 0)
+ for (spins = 0; (ret = __wt_spin_trylock(
+ session, &conn->dhandle_lock, &id)) == EBUSY &&
+ !F_ISSET(cache, WT_EVICT_CLEAR_WALKS);
+ spins++) {
+ if (spins < 1000)
+ __wt_yield();
+ else
+ __wt_sleep(0, 1000);
+ }
+ if (ret != 0)
break;
dhandle_locked = 1;
}
@@ -926,10 +949,10 @@ retry: while (slot < max_entries && ret == 0) {
continue;
/*
- * Also skip files that are configured to stick in cache until
- * we get aggressive.
+ * Also skip files that are checkpointing or configured to
+ * stick in cache until we get aggressive.
*/
- if (btree->evict_priority != 0 &&
+ if ((btree->checkpointing || btree->evict_priority != 0) &&
!LF_ISSET(WT_EVICT_PASS_AGGRESSIVE))
continue;
@@ -942,7 +965,7 @@ retry: while (slot < max_entries && ret == 0) {
btree->evict_walk_skips++ < btree->evict_walk_period)
continue;
btree->evict_walk_skips = 0;
- old_slot = slot;
+ prev_slot = slot;
(void)WT_ATOMIC_ADD4(dhandle->session_inuse, 1);
incr = 1;
@@ -964,15 +987,14 @@ retry: while (slot < max_entries && ret == 0) {
__wt_spin_unlock(session, &cache->evict_walk_lock);
/*
- * If we didn't find enough candidates in the file, skip it
- * next time.
+ * If we didn't find any candidates in the file, skip it next
+ * time.
*/
- if (slot >= old_slot + WT_EVICT_WALK_PER_FILE ||
- slot >= max_entries)
- btree->evict_walk_period = 0;
- else
+ if (slot == prev_slot)
btree->evict_walk_period = WT_MIN(
- WT_MAX(1, 2 * btree->evict_walk_period), 1000);
+ WT_MAX(1, 2 * btree->evict_walk_period), 100);
+ else
+ btree->evict_walk_period = 0;
}
if (incr) {
@@ -988,15 +1010,16 @@ retry: while (slot < max_entries && ret == 0) {
/*
* Walk the list of files a few times if we don't find enough pages.
- * Try two passes through all the files, then only keep going if we
- * are finding more candidates. Take care not to skip files on
- * subsequent passes.
+ * Try two passes through all the files, give up when we have some
+ * candidates and we aren't finding more. Take care not to skip files
+ * on subsequent passes.
*/
if (!F_ISSET(cache, WT_EVICT_CLEAR_WALKS) && ret == 0 &&
slot < max_entries && (retries < 2 ||
- (!LF_ISSET(WT_EVICT_PASS_WOULD_BLOCK) &&
- retries < 10 && slot > 0))) {
+ (!LF_ISSET(WT_EVICT_PASS_WOULD_BLOCK) && retries < 10 &&
+ (slot == cache->evict_entries || slot > start_slot)))) {
cache->evict_file_next = NULL;
+ start_slot = slot;
++retries;
goto retry;
}
@@ -1064,9 +1087,10 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
* Get some more eviction candidate pages.
*/
for (evict = start, pages_walked = 0, internal_pages = restarts = 0;
- evict < end && (ret == 0 || ret == WT_NOTFOUND);
- ret = __wt_tree_walk(session, &btree->evict_ref, walk_flags),
- ++pages_walked) {
+ evict < end && pages_walked < WT_EVICT_MAX_PER_FILE &&
+ (ret == 0 || ret == WT_NOTFOUND);
+ ret = __wt_tree_walk(
+ session, &btree->evict_ref, &pages_walked, walk_flags)) {
if (btree->evict_ref == NULL) {
/*
* Take care with terminating this loop.
@@ -1126,12 +1150,8 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
continue;
}
-fast: /*
- * If the file is being checkpointed, there's a period of time
- * where we can't discard dirty pages because of possible races
- * with the checkpointing thread.
- */
- if (modified && btree->checkpointing)
+fast: /* If the page can't be evicted, give up. */
+ if (!__wt_page_can_evict(session, page, 0))
continue;
/*
@@ -1447,7 +1467,7 @@ __wt_cache_dump(WT_SESSION_IMPL *session)
next_walk = NULL;
session->dhandle = dhandle;
while (__wt_tree_walk(session,
- &next_walk, WT_READ_CACHE | WT_READ_NO_WAIT) == 0 &&
+ &next_walk, NULL, WT_READ_CACHE | WT_READ_NO_WAIT) == 0 &&
next_walk != NULL) {
page = next_walk->page;
if (page->type == WT_PAGE_COL_INT ||
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 0cff584f2ab..9ba1af897a4 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -320,13 +320,11 @@ static int
__evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
int exclusive, int top, int *inmem_splitp, int *istreep)
{
- WT_BTREE *btree;
WT_DECL_RET;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
uint32_t flags;
- btree = S2BT(session);
flags = WT_EVICTING;
/*
@@ -370,19 +368,8 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
}
/*
- * If the tree was deepened, there's a requirement that newly created
- * internal pages not be evicted until all threads are known to have
- * exited the original page index array, because evicting an internal
- * page discards its WT_REF array, and a thread traversing the original
- * page index array might see an freed WT_REF. During the split we set
- * a transaction value, once that's globally visible, we know we can
- * evict the created page.
- */
- if (!exclusive && mod != NULL && WT_PAGE_IS_INTERNAL(page) &&
- !__wt_txn_visible_all(session, mod->mod_split_txn))
- return (EBUSY);
-
- /*
+ * Check whether the page can be evicted.
+ *
* If the file is being checkpointed, we can't evict dirty pages:
* if we write a page and free the previous version of the page, that
* previous version might be referenced by an internal page already
@@ -402,13 +389,8 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
* internal page acquires hazard pointers on child pages it reads, and
* is blocked by the exclusive lock.
*/
- if (mod != NULL && btree->checkpointing &&
- (__wt_page_is_modified(page) ||
- F_ISSET(mod, WT_PM_REC_MULTIBLOCK))) {
- WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint);
- WT_STAT_FAST_DATA_INCR(session, cache_eviction_checkpoint);
+ if (!exclusive && !__wt_page_can_evict(session, page, 0))
return (EBUSY);
- }
/*
* Check for an append-only workload needing an in-memory split.
@@ -448,29 +430,18 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
* If we have an exclusive lock (we're discarding the tree), assert
* there are no updates we cannot read.
*
- * Otherwise, if the top-level page we're evicting is a leaf page, set
- * the update-restore flag, so reconciliation will write blocks it can
- * write and create a list of skipped updates for blocks it cannot
- * write. This is how forced eviction of huge pages works: we take a
- * big page and reconcile it into blocks, some of which we write and
- * discard, the rest of which we re-create as smaller in-memory pages,
- * (restoring the updates that stopped us from writing the block), and
- * inserting the whole mess into the page's parent.
+ * Otherwise, if the top-level page we're evicting is a leaf page
+ * marked for forced eviction, set the update-restore flag, so
+ * reconciliation will write blocks it can write and create a list of
+ * skipped updates for blocks it cannot write. This is how forced
+ * eviction of huge pages works: we take a big page and reconcile it
+ * into blocks, some of which we write and discard, the rest of which
+ * we re-create as smaller in-memory pages, (restoring the updates that
+ * stopped us from writing the block), and inserting the whole mess
+ * into the page's parent.
*
* Don't set the update-restore flag for internal pages, they don't
* have updates that can be saved and restored.
- *
- * Don't set the update-restore flag for small pages. (If a small
- * page were selected by eviction and then modified, and we configure it
- * for update-restore, we'll end up splitting one or two pages into the
- * parent, which is a waste of effort. If we don't set update-restore,
- * eviction will return EBUSY, which makes more sense, the page was just
- * modified.)
- *
- * Don't set the update-restore flag for any page other than the
- * top one; only the reconciled top page goes through the split path
- * (and child pages are pages we expect to merge into the top page, they
- * they are not expected to split).
*/
if (__wt_page_is_modified(page)) {
if (exclusive)
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index e610b3b3e1b..70068e32b9b 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -76,7 +76,7 @@
} \
} \
break; \
-} while (ret == 0)
+} while (1)
/* End a transactional API call, retry on deadlock. */
#define TXN_API_END(s, ret) TXN_API_END_RETRY(s, ret, 1)
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 2896620e503..032178b4755 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -941,6 +941,70 @@ __wt_ref_info(WT_SESSION_IMPL *session,
}
/*
+ * __wt_page_can_evict --
+ * Check whether a page can be evicted.
+ */
+static inline int
+__wt_page_can_evict(WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits)
+{
+ WT_BTREE *btree;
+ WT_PAGE_MODIFY *mod;
+
+ btree = S2BT(session);
+ mod = page->modify;
+
+ /* Pages that have never been modified can always be evicted. */
+ if (mod == NULL)
+ return (1);
+
+ /*
+ * If the tree was deepened, there's a requirement that newly created
+ * internal pages not be evicted until all threads are known to have
+ * exited the original page index array, because evicting an internal
+ * page discards its WT_REF array, and a thread traversing the original
+ * page index array might see an freed WT_REF. During the split we set
+ * a transaction value, once that's globally visible, we know we can
+ * evict the created page.
+ */
+ if (WT_PAGE_IS_INTERNAL(page) &&
+ !__wt_txn_visible_all(session, mod->mod_split_txn))
+ return (0);
+
+ /*
+ * If the file is being checkpointed, we can't evict dirty pages:
+ * if we write a page and free the previous version of the page, that
+ * previous version might be referenced by an internal page already
+ * been written in the checkpoint, leaving the checkpoint inconsistent.
+ */
+ if (btree->checkpointing &&
+ (__wt_page_is_modified(page) ||
+ F_ISSET(mod, WT_PM_REC_MULTIBLOCK))) {
+ WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint);
+ WT_STAT_FAST_DATA_INCR(session, cache_eviction_checkpoint);
+ return (0);
+ }
+
+ /*
+ * If we aren't (potentially) doing eviction that can restore updates
+ * and the updates on this page are too recent, give up.
+ */
+ if (page->read_gen != WT_READGEN_OLDEST &&
+ !__wt_txn_visible_all(session, __wt_page_is_modified(page) ?
+ mod->update_txn : mod->rec_max_txn))
+ return (0);
+
+ /*
+ * If the page was recently split in-memory, don't force it out: we
+ * hope eviction will find it first.
+ */
+ if (check_splits &&
+ !__wt_txn_visible_all(session, mod->inmem_split_txn))
+ return (0);
+
+ return (1);
+}
+
+/*
* __wt_page_release_evict --
* Attempt to release and immediately evict a page.
*/
@@ -1010,10 +1074,9 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
/*
* Attempt to evict pages with the special "oldest" read generation.
- *
* This is set for pages that grow larger than the configured
- * memory_page_max setting, and when we are attempting to scan without
- * trashing the cache.
+ * memory_page_max setting, when we see many deleted items, and when we
+ * are attempting to scan without trashing the cache.
*
* Skip this if eviction is disabled for this operation or this tree,
* or if there is no chance of eviction succeeding for dirty pages due
@@ -1021,12 +1084,10 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
* it contains an update that isn't stable. Also skip forced eviction
* if we just did an in-memory split.
*/
- if (LF_ISSET(WT_READ_NO_EVICT) ||
- page->read_gen != WT_READGEN_OLDEST ||
+ if (page->read_gen != WT_READGEN_OLDEST ||
+ LF_ISSET(WT_READ_NO_EVICT) ||
F_ISSET(btree, WT_BTREE_NO_EVICTION) ||
- (__wt_page_is_modified(page) && (btree->checkpointing ||
- !__wt_txn_visible_all(session, page->modify->first_dirty_txn) ||
- !__wt_txn_visible_all(session, page->modify->inmem_split_txn))))
+ !__wt_page_can_evict(session, page, 1))
return (__wt_hazard_clear(session, page));
WT_RET_BUSY_OK(__wt_page_release_evict(session, ref));
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index de6faad608a..84b18082a25 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -13,9 +13,10 @@
#define WT_EVICT_INT_SKEW (1<<20) /* Prefer leaf pages over internal
pages by this many increments of the
read generation. */
-#define WT_EVICT_WALK_PER_FILE 10 /* Pages to visit per file */
-#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */
-#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */
+#define WT_EVICT_WALK_PER_FILE 10 /* Pages to queue per file */
+#define WT_EVICT_MAX_PER_FILE 100 /* Max pages to visit per file */
+#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */
+#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */
#define WT_EVICT_PASS_AGGRESSIVE 0x01
#define WT_EVICT_PASS_ALL 0x02
@@ -82,6 +83,8 @@ struct __wt_cache {
u_int eviction_target; /* Percent to end eviction */
u_int eviction_dirty_target; /* Percent to allow dirty */
+ u_int overhead_pct; /* Cache percent adjustment */
+
/*
* LRU eviction list information.
*/
diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i
index 4bceb5c0d6c..f952f1bf698 100644
--- a/src/third_party/wiredtiger/src/include/cache.i
+++ b/src/third_party/wiredtiger/src/include/cache.i
@@ -62,7 +62,32 @@ __wt_cache_pages_inuse(WT_CACHE *cache)
static inline uint64_t
__wt_cache_bytes_inuse(WT_CACHE *cache)
{
- return (cache->bytes_inmem);
+ uint64_t bytes_inuse;
+
+ /* Adjust the cache size to take allocation overhead into account. */
+ bytes_inuse = cache->bytes_inmem;
+ if (cache->overhead_pct != 0)
+ bytes_inuse +=
+ (bytes_inuse * (uint64_t)cache->overhead_pct) / 100;
+
+ return (bytes_inuse);
+}
+
+/*
+ * __wt_cache_dirty_inuse --
+ * Return the number of dirty bytes in use.
+ */
+static inline uint64_t
+__wt_cache_dirty_inuse(WT_CACHE *cache)
+{
+ uint64_t dirty_inuse;
+
+ dirty_inuse = cache->bytes_dirty;
+ if (cache->overhead_pct != 0)
+ dirty_inuse +=
+ (dirty_inuse * (uint64_t)cache->overhead_pct) / 100;
+
+ return (dirty_inuse);
}
/*
@@ -87,13 +112,9 @@ __wt_eviction_check(WT_SESSION_IMPL *session, int *fullp, int wake)
* in a shared cache.
*/
bytes_inuse = __wt_cache_bytes_inuse(cache);
- dirty_inuse = cache->bytes_dirty;
+ dirty_inuse = __wt_cache_dirty_inuse(cache);
bytes_max = conn->cache_size + 1;
- /* Adjust the cache size to take allocation overhead into account. */
- if (conn->cache_overhead != 0)
- bytes_max -= (bytes_max * (uint64_t)conn->cache_overhead) / 100;
-
/* Calculate the cache full percentage. */
*fullp = (int)((100 * bytes_inuse) / bytes_max);
diff --git a/src/third_party/wiredtiger/src/include/config.h b/src/third_party/wiredtiger/src/include/config.h
index 65757c2ef6d..046f515188c 100644
--- a/src/third_party/wiredtiger/src/include/config.h
+++ b/src/third_party/wiredtiger/src/include/config.h
@@ -73,14 +73,15 @@ struct __wt_config_parser_impl {
#define WT_CONFIG_ENTRY_session_rename 24
#define WT_CONFIG_ENTRY_session_rollback_transaction 25
#define WT_CONFIG_ENTRY_session_salvage 26
-#define WT_CONFIG_ENTRY_session_truncate 27
-#define WT_CONFIG_ENTRY_session_upgrade 28
-#define WT_CONFIG_ENTRY_session_verify 29
-#define WT_CONFIG_ENTRY_table_meta 30
-#define WT_CONFIG_ENTRY_wiredtiger_open 31
-#define WT_CONFIG_ENTRY_wiredtiger_open_all 32
-#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 33
-#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 34
+#define WT_CONFIG_ENTRY_session_strerror 27
+#define WT_CONFIG_ENTRY_session_truncate 28
+#define WT_CONFIG_ENTRY_session_upgrade 29
+#define WT_CONFIG_ENTRY_session_verify 30
+#define WT_CONFIG_ENTRY_table_meta 31
+#define WT_CONFIG_ENTRY_wiredtiger_open 32
+#define WT_CONFIG_ENTRY_wiredtiger_open_all 33
+#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 34
+#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 35
/*
* configuration section: END
* DO NOT EDIT: automatically built by dist/flags.py.
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index dd97ea50ce9..9cb42ae7c80 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -227,7 +227,6 @@ struct __wt_connection_impl {
uint32_t hazard_max; /* Hazard array size */
WT_CACHE *cache; /* Page cache */
- int cache_overhead; /* Cache percent adjustment */
uint64_t cache_size; /* Configured cache size */
WT_TXN_GLOBAL txn_global; /* Global transaction state */
@@ -290,6 +289,7 @@ struct __wt_connection_impl {
wt_thread_t evict_tid; /* Eviction server thread ID */
int evict_tid_set; /* Eviction server thread ID set */
+ uint32_t evict_workers_alloc;/* Allocated eviction workers */
uint32_t evict_workers_max;/* Max eviction workers */
uint32_t evict_workers_min;/* Min eviction workers */
uint32_t evict_workers; /* Number of eviction workers */
@@ -310,6 +310,7 @@ struct __wt_connection_impl {
#define WT_CONN_LOG_ENABLED 0x02 /* Logging is enabled */
#define WT_CONN_LOG_EXISTED 0x04 /* Log files found */
#define WT_CONN_LOG_PREALLOC 0x08 /* Pre-allocation is enabled */
+#define WT_CONN_LOG_RECOVER_ERR 0x10 /* Error if recovery required */
uint32_t log_flags; /* Global logging configuration */
WT_CONDVAR *log_cond; /* Log server wait mutex */
WT_SESSION_IMPL *log_session; /* Log server session */
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 7716336bff1..23bb36623e5 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -157,7 +157,7 @@ extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *addr, const WT_PAGE_HEADER *dsk, size_t size, int empty_page_ok);
extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *addr, WT_ITEM *buf);
-extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags);
+extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags);
extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd, int is_remove);
extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt);
extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page);
@@ -198,6 +198,7 @@ extern void __wt_conn_config_discard(WT_SESSION_IMPL *session);
extern int __wt_ext_config_parser_open(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session, const char *config, size_t len, WT_CONFIG_PARSER **config_parserp);
extern int __wt_ext_config_get(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_CONFIG_ARG *cfg_arg, const char *key, WT_CONFIG_ITEM *cval);
extern int __wt_config_upgrade(WT_SESSION_IMPL *session, WT_ITEM *buf);
+extern const char *__wt_wiredtiger_error(int error);
extern int __wt_collator_config(WT_SESSION_IMPL *session, const char *uri, WT_CONFIG_ITEM *cname, WT_CONFIG_ITEM *metadata, WT_COLLATOR **collatorp, int *ownp);
extern int __wt_conn_remove_collator(WT_SESSION_IMPL *session);
extern int __wt_conn_remove_compressor(WT_SESSION_IMPL *session);
@@ -304,6 +305,7 @@ extern void __wt_cache_dump(WT_SESSION_IMPL *session);
extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive);
extern void __wt_evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref);
extern int __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn);
+extern int __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, int *rec);
extern void __wt_log_written_reset(WT_SESSION_IMPL *session);
extern int __wt_log_get_all_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, int active_only);
extern void __wt_log_files_free(WT_SESSION_IMPL *session, char **files, u_int count);
@@ -436,8 +438,7 @@ extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp
extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, int fail, void *sym_ret);
extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh);
extern int __wt_errno(void);
-extern const char *__wt_strerror(int error);
-extern int __wt_strerror_r(int error, char *buf, size_t buflen);
+extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen);
extern int __wt_exist(WT_SESSION_IMPL *session, const char *filename, int *existp);
extern void __wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh);
extern int __wt_fallocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len);
@@ -573,6 +574,7 @@ extern void __wt_errx(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUN
extern int __wt_ext_err_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3)));
extern int __wt_ext_msg_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
+extern const char *__wt_ext_strerror(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, int error);
extern int __wt_progress(WT_SESSION_IMPL *session, const char *s, uint64_t v);
extern void __wt_assert(WT_SESSION_IMPL *session, int error, const char *file_name, int line_number, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 5, 6)));
extern int __wt_panic(WT_SESSION_IMPL *session);
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index 909f1daf5a4..36df35a104e 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -111,6 +111,8 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
} *scratch_track;
#endif
+ WT_ITEM err; /* Error buffer */
+
WT_TXN_ISOLATION isolation;
WT_TXN txn; /* Transaction state */
u_int ncursors; /* Count of active file cursors. */
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 80e0975d7e6..d0d0f9eec77 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -835,6 +835,17 @@ struct __wt_session {
int __F(reconfigure)(WT_SESSION *session, const char *config);
/*!
+ * Return information about an error as a string.
+ *
+ * @snippet ex_all.c Display an error thread safe
+ *
+ * @param session the session handle
+ * @param error a return value from a WiredTiger function
+ * @returns a string representation of the error
+ */
+ const char *__F(strerror)(WT_SESSION *session, int error);
+
+ /*!
* @name Cursor handles
* @{
*/
@@ -1319,6 +1330,9 @@ struct __wt_session {
* @config{dump_pages, Display the contents of in-memory pages as they
* are verified\, using the application's message handler\, intended for
* debugging., a boolean flag; default \c false.}
+ * @config{dump_shape, Display the shape of the tree after
+ * verification\, using the application's message handler\, intended for
+ * debugging., a boolean flag; default \c false.}
* @configend
* @ebusy_errors
*/
@@ -1543,14 +1557,14 @@ struct __wt_connection {
* integer between 1 and 20; default \c 2.}
* @config{ ),,}
* @config{cache_overhead, assume the heap allocator overhead is the
- * specified percentage\, and adjust the cache size by that amount (for
- * example\, if the cache size is 100GB\, a percentage of 10 means
- * WiredTiger limits itself to allocating 90GB of memory). This value is
- * configurable because different heap allocators have different
- * overhead and different workloads will have different heap allocation
- * sizes and patterns\, therefore applications may need to adjust this
- * value based on allocator choice and behavior in measured workloads.,
- * an integer between 0 and 30; default \c 8.}
+ * specified percentage\, and adjust the cache usage by that amount (for
+ * example\, if there is 10GB of data in cache\, a percentage of 10
+ * means WiredTiger treats this as 11GB). This value is configurable
+ * because different heap allocators have different overhead and
+ * different workloads will have different heap allocation sizes and
+ * patterns\, therefore applications may need to adjust this value based
+ * on allocator choice and behavior in measured workloads., an integer
+ * between 0 and 30; default \c 8.}
* @config{cache_size, maximum heap memory to allocate for the cache. A
* database should configure either \c cache_size or \c shared_cache but
* not both., an integer between 1MB and 10TB; default \c 100MB.}
@@ -1879,13 +1893,13 @@ struct __wt_connection {
* should be used (4KB on Linux systems\, zero elsewhere)., an integer between
* -1 and 1MB; default \c -1.}
* @config{cache_overhead, assume the heap allocator overhead is the specified
- * percentage\, and adjust the cache size by that amount (for example\, if the
- * cache size is 100GB\, a percentage of 10 means WiredTiger limits itself to
- * allocating 90GB of memory). This value is configurable because different heap
- * allocators have different overhead and different workloads will have
- * different heap allocation sizes and patterns\, therefore applications may
- * need to adjust this value based on allocator choice and behavior in measured
- * workloads., an integer between 0 and 30; default \c 8.}
+ * percentage\, and adjust the cache usage by that amount (for example\, if
+ * there is 10GB of data in cache\, a percentage of 10 means WiredTiger treats
+ * this as 11GB). This value is configurable because different heap allocators
+ * have different overhead and different workloads will have different heap
+ * allocation sizes and patterns\, therefore applications may need to adjust
+ * this value based on allocator choice and behavior in measured workloads., an
+ * integer between 0 and 30; default \c 8.}
* @config{cache_size, maximum heap memory to allocate for the cache. A
* database should configure either \c cache_size or \c shared_cache but not
* both., an integer between 1MB and 10TB; default \c 100MB.}
@@ -1973,6 +1987,9 @@ struct __wt_connection {
* are created relative to the database home., a string; default empty.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;prealloc, pre-allocate log files., a boolean
* flag; default \c true.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;recover, run recovery
+ * or error if recovery needs to run after an unclean shutdown., a string\,
+ * chosen from the following options: \c "error"\, \c "on"; default \c on.}
* @config{ ),,}
* @config{lsm_manager = (, configure database wide options for LSM tree
* management., a set of related configuration options defined below.}
@@ -2086,18 +2103,6 @@ int wiredtiger_open(const char *home,
*/
const char *wiredtiger_strerror(int error);
-/*!
- * Return information about a WiredTiger error as a string, thread-safe version.
- *
- * @snippet ex_all.c Display an error thread safe
- *
- * @param error a return value from a WiredTiger call
- * @param buf a buffer of at least \c buflen bytes
- * @param buflen the length of the buffer
- * @returns zero for success, non-zero to indicate an error.
- */
-int wiredtiger_strerror_r(int error, char *buf, size_t buflen);
-
#if !defined(SWIG)
/*!
* The interface implemented by applications to accept notifications
@@ -2618,6 +2623,12 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp);
/*! Restart the operation (internal). */
#define WT_RESTART -31805
/*! @endcond */
+/*!
+ * Recovery must be run to continue.
+ * This error is generated when wiredtiger_open is configured to return an error
+ * if recovery is required to use the database.
+ */
+#define WT_RUN_RECOVERY -31806
/*
* Error return section: END
* DO NOT EDIT: automatically built by dist/api_err.py.
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger_ext.h b/src/third_party/wiredtiger/src/include/wiredtiger_ext.h
index ee27b32ddf7..28fd8e18329 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger_ext.h
+++ b/src/third_party/wiredtiger/src/include/wiredtiger_ext.h
@@ -118,16 +118,17 @@ struct __wt_extension_api {
WT_EXTENSION_API *, WT_SESSION *session, const char *fmt, ...);
/*!
- * Return information about an error as a string; the strerror method
- * is a superset of the ISO C99/POSIX 1003.1-2001 function strerror.
+ * Return information about an error as a string.
*
* @snippet ex_data_source.c WT_EXTENSION_API strerror
*
- * @param err a return value from a WiredTiger, C library or POSIX
- * function
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param error a return value from a WiredTiger function
* @returns a string representation of the error
*/
- const char *(*strerror)(int err);
+ const char *(*strerror)(
+ WT_EXTENSION_API *, WT_SESSION *session, int error);
/*!
* Allocate short-term use scratch memory.
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 38c953d7835..f6c8602faff 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -36,6 +36,45 @@ __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn)
}
/*
+ * __wt_log_needs_recovery --
+ * Return 0 if we encounter a clean shutdown and 1 if recovery
+ * must be run in the given variable.
+ */
+int
+__wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, int *rec)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_LOG *log;
+
+ conn = S2C(session);
+ log = conn->log;
+ c = NULL;
+ /*
+ * Default is to run recovery always.
+ */
+ *rec = 1;
+
+ if (log == NULL)
+ return (0);
+ WT_RET(__wt_curlog_open(session, "log:", NULL, &c));
+ c->set_key(c, ckp_lsn->file, ckp_lsn->offset, 0);
+ WT_ERR(c->search(c));
+ /*
+ * If the checkpoint LSN we're given is the last record,
+ * then recovery is not needed.
+ */
+ if ((ret = c->next(c)) == WT_NOTFOUND) {
+ *rec = 0;
+ ret = 0;
+ }
+err: if (c != NULL)
+ (void)c->close(c);
+ return (ret);
+}
+
+/*
* __wt_log_written_reset --
* Interface to reset the amount of log written during this
* checkpoint period. Called from the checkpoint code.
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_merge.c b/src/third_party/wiredtiger/src/lsm/lsm_merge.c
index 923d8fd2837..6ca1b0f04ab 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_merge.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_merge.c
@@ -511,7 +511,8 @@ err: if (locked)
"Merge aborted due to close"));
else
WT_TRET(__wt_verbose(session, WT_VERB_LSM,
- "Merge failed with %s", wiredtiger_strerror(ret)));
+ "Merge failed with %s",
+ __wt_strerror(session, ret, NULL, 0)));
}
F_CLR(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_CACHE_CHECK);
return (ret);
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_meta.c b/src/third_party/wiredtiger/src/lsm/lsm_meta.c
index fa051413c5e..2fcced4d1c4 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_meta.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_meta.c
@@ -153,10 +153,11 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
}
WT_ERR_NOTFOUND_OK(ret);
lsm_tree->nold_chunks = nchunks;
- /* Values included for backward compatibility */
- } else if (WT_STRING_MATCH("merge_threads", ck.str, ck.len)) {
- } else
- WT_ERR(__wt_illegal_value(session, "LSM metadata"));
+ }
+ /*
+ * Ignore any other values: the metadata entry might have been
+ * created by a future release, with unknown options.
+ */
}
WT_ERR_NOTFOUND_OK(ret);
diff --git a/src/third_party/wiredtiger/src/os_posix/os_errno.c b/src/third_party/wiredtiger/src/os_posix/os_errno.c
index a58f13583ce..229b68e0008 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_errno.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_errno.c
@@ -24,46 +24,32 @@ __wt_errno(void)
/*
* __wt_strerror --
- * POSIX implementation of wiredtiger_strerror.
+ * POSIX implementation of WT_SESSION.strerror and wiredtiger_strerror.
*/
const char *
-__wt_strerror(int error)
+__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen)
{
const char *p;
/*
- * POSIX errors are non-negative integers; check for 0 explicitly
- * in-case the underlying strerror doesn't handle 0, some don't.
+ * Check for a WiredTiger or POSIX constant string, no buffer needed.
*/
- if (error == 0)
- return ("Successful return: 0");
- if (error > 0 && (p = strerror(error)) != NULL)
+ if ((p = __wt_wiredtiger_error(error)) != NULL)
return (p);
- return (NULL);
-}
-
-/*
- * __wt_strerror_r --
- * POSIX implementation of wiredtiger_strerror_r.
- */
-int
-__wt_strerror_r(int error, char *buf, size_t buflen)
-{
- const char *p;
-
- /* Require at least 2 bytes, printable character and trailing nul. */
- if (buflen < 2)
- return (ENOMEM);
/*
- * Check for POSIX errors then fallback to something generic. Copy the
- * string into the user's buffer, return success if anything printed.
+ * When called from wiredtiger_strerror, write a passed-in buffer.
+ * When called from WT_SESSION.strerror, write the session's buffer.
+ *
+ * Fallback to a generic message.
*/
- p = __wt_strerror(error);
- if (p != NULL && snprintf(buf, buflen, "%s", p) > 0)
- return (0);
-
- /* Fallback to a generic message, then guess it's a memory problem. */
- return (
- snprintf(buf, buflen, "error return: %d", error) > 0 ? 0 : ENOMEM);
+ if (session == NULL &&
+ snprintf(errbuf, errlen, "error return: %d", error) > 0)
+ return (errbuf);
+ if (session != NULL && __wt_buf_fmt(
+ session, &session->err, "error return: %d", error) == 0)
+ return (session->err.data);
+
+ /* Defeated. */
+ return ("Unable to return error string");
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_errno.c b/src/third_party/wiredtiger/src/os_win/os_errno.c
index 00ee638fbe3..81bcdf9089e 100644
--- a/src/third_party/wiredtiger/src/os_win/os_errno.c
+++ b/src/third_party/wiredtiger/src/os_win/os_errno.c
@@ -58,47 +58,27 @@ __wt_errno(void)
/*
* __wt_strerror --
- * Windows implementation of wiredtiger_strerror.
+ * Windows implementation of WT_SESSION.strerror and wiredtiger_strerror.
*/
const char *
-__wt_strerror(int error)
+__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen)
{
+ DWORD lasterror;
const char *p;
+ char buf[512];
/*
- * POSIX errors are non-negative integers; check for 0 explicitly
- * in-case the underlying strerror doesn't handle 0, some don't.
+ * Check for a WiredTiger or POSIX constant string, no buffer needed.
*/
- if (error == 0)
- return ("Successful return: 0");
- if (error > 0 && (p = strerror(error)) != NULL)
+ if ((p = __wt_wiredtiger_error(error)) != NULL)
return (p);
- return (NULL);
-}
-
-/*
- * __wt_strerror_r --
- * Windows implementation of wiredtiger_strerror_r.
- */
-int
-__wt_strerror_r(int error, char *buf, size_t buflen)
-{
- DWORD lasterror;
- const char *p;
-
- /* Require at least 2 bytes, printable character and trailing nul. */
- if (buflen < 2)
- return (ENOMEM);
/*
- * Check for POSIX errors, Windows errors, then fallback to something
- * generic. Copy the string into the user's buffer, return success if
- * anything printed.
+ * When called from wiredtiger_strerror, write a passed-in buffer.
+ * When called from WT_SESSION.strerror, write the session's buffer.
+ *
+ * Check for Windows errors.
*/
- p = __wt_strerror(error);
- if (p != NULL && snprintf(buf, buflen, "%s", p) > 0)
- return (0);
-
if (error < 0) {
error = __wt_map_error_to_windows_error(error);
@@ -109,16 +89,25 @@ __wt_strerror_r(int error, char *buf, size_t buflen)
error,
0, /* let system choose the correct LANGID */
buf,
- buflen,
+ sizeof(buf),
NULL);
- if (lasterror != 0)
- return (0);
-
- /* Fall through to the fallback error code */
+ if (lasterror != 0 && session == NULL &&
+ snprintf(errbuf, errlen, "%s", buf) > 0)
+ return (errbuf);
+ if (lasterror != 0 && session != NULL &&
+ __wt_buf_set(session, &session->err, buf, strlen(buf)) == 0)
+ return (session->err.data);
}
- /* Fallback to a generic message, then guess it's a memory problem. */
- return (
- snprintf(buf, buflen, "error return: %d", error) > 0 ? 0 : ENOMEM);
+ /* Fallback to a generic message. */
+ if (session == NULL &&
+ snprintf(errbuf, errlen, "error return: %d", error) > 0)
+ return (errbuf);
+ if (session != NULL && __wt_buf_fmt(
+ session, &session->err, "error return: %d", error) == 0)
+ return (session->err.data);
+
+ /* Defeated. */
+ return ("Unable to return error string");
}
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index 8ee143133ae..e54553aa071 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -128,8 +128,9 @@ __session_close(WT_SESSION *wt_session, const char *config)
/* Discard metadata tracking. */
__wt_meta_track_discard(session);
- /* Discard scratch buffers. */
+ /* Discard scratch buffers, error memory. */
__wt_scr_discard(session);
+ __wt_buf_free(session, &session->err);
/* Free transaction information. */
__wt_txn_destroy(session);
@@ -670,7 +671,11 @@ __session_truncate(WT_SESSION *wt_session,
done:
err: TXN_API_END_RETRY(session, ret, 0);
- return ((ret) == WT_NOTFOUND ? ENOENT : (ret));
+
+ /*
+ * Only map WT_NOTFOUND to ENOENT if a URI was specified.
+ */
+ return (ret == WT_NOTFOUND && uri != NULL ? ENOENT : ret);
}
/*
@@ -898,6 +903,20 @@ err: F_CLR(session, WT_SESSION_CAN_WAIT | WT_SESSION_NO_CACHE_CHECK);
}
/*
+ * __session_strerror --
+ * WT_SESSION->strerror method.
+ */
+static const char *
+__session_strerror(WT_SESSION *wt_session, int error)
+{
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ return (__wt_strerror(session, error, NULL, 0));
+}
+
+/*
* __wt_open_internal_session --
* Allocate a session for WiredTiger's use.
*/
@@ -959,6 +978,7 @@ __wt_open_session(WT_CONNECTION_IMPL *conn,
NULL,
__session_close,
__session_reconfigure,
+ __session_strerror,
__session_open_cursor,
__session_create,
__session_compact,
diff --git a/src/third_party/wiredtiger/src/support/err.c b/src/third_party/wiredtiger/src/support/err.c
index d766fcba33a..49a3891c58a 100644
--- a/src/third_party/wiredtiger/src/support/err.c
+++ b/src/third_party/wiredtiger/src/support/err.c
@@ -409,6 +409,20 @@ __wt_ext_msg_printf(
}
/*
+ * __wt_ext_strerror --
+ * Extension API call to return an error as a string.
+ */
+const char *
+__wt_ext_strerror(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, int error)
+{
+ if (wt_session == NULL)
+ wt_session = (WT_SESSION *)
+ ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
+
+ return (wt_session->strerror(wt_session, error));
+}
+
+/*
* __wt_progress --
* Progress message.
*/
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index 71f5df9dda2..2c834083691 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -412,7 +412,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
WT_RECOVERY r;
struct WT_RECOVERY_FILE *metafile;
char *config;
- int was_backup;
+ int needs_rec, was_backup;
conn = S2C(session);
WT_CLEAR(r);
@@ -483,14 +483,25 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
WT_ERR(__wt_verbose(session, WT_VERB_RECOVERY,
"Main recovery loop: starting at %u/%" PRIuMAX,
r.ckpt_lsn.file, (uintmax_t)r.ckpt_lsn.offset));
+ WT_ERR(__wt_log_needs_recovery(session, &r.ckpt_lsn, &needs_rec));
+ /*
+ * Check if the database was shut down cleanly. If not
+ * return an error if the user does not want automatic
+ * recovery.
+ */
+ if (needs_rec && FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR))
+ WT_ERR(WT_RUN_RECOVERY);
+ /*
+ * Always run recovery even if it was a clean shutdown.
+ * We can consider skipping it in the future.
+ */
if (WT_IS_INIT_LSN(&r.ckpt_lsn))
WT_ERR(__wt_log_scan(session, NULL,
WT_LOGSCAN_FIRST | WT_LOGSCAN_RECOVER,
__txn_log_recover, &r));
else
WT_ERR(__wt_log_scan(session, &r.ckpt_lsn,
- WT_LOGSCAN_RECOVER,
- __txn_log_recover, &r));
+ WT_LOGSCAN_RECOVER, __txn_log_recover, &r));
conn->next_file_id = r.max_fileid;
diff --git a/src/third_party/wiredtiger/src/utilities/util_main.c b/src/third_party/wiredtiger/src/utilities/util_main.c
index ecfffb81e4b..3274f3a0fd0 100644
--- a/src/third_party/wiredtiger/src/utilities/util_main.c
+++ b/src/third_party/wiredtiger/src/utilities/util_main.c
@@ -11,11 +11,15 @@
const char *home = "."; /* Home directory */
const char *progname; /* Program name */
/* Global arguments */
-const char *usage_prefix = "[-Vv] [-C config] [-h home]";
+const char *usage_prefix = "[-Vv] [-R] [-C config] [-h home]";
int verbose; /* Verbose flag */
static const char *command; /* Command name */
+#define REC_ERROR "log=(recover=error)"
+#define REC_LOGOFF "log=(enabled=false)"
+#define REC_RECOVER "log=(recover=on)"
+
static int usage(void);
int
@@ -27,7 +31,7 @@ main(int argc, char *argv[])
size_t len;
int ch, major_v, minor_v, tret, (*func)(WT_SESSION *, int, char *[]);
char *p;
- const char *cmd_config, *config;
+ const char *cmd_config, *config, *rec_config;
conn = NULL;
p = NULL;
@@ -52,9 +56,16 @@ main(int argc, char *argv[])
return (EXIT_FAILURE);
}
- /* Check for standard options. */
cmd_config = config = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "C:h:Vv")) != EOF)
+ /*
+ * We default to returning an error if recovery needs to be run.
+ * Generally we expect this to be run after a clean shutdown.
+ * The printlog command disables logging entirely. If recovery is
+ * needed, the user can specify -R to run recovery.
+ */
+ rec_config = REC_ERROR;
+ /* Check for standard options. */
+ while ((ch = __wt_getopt(progname, argc, argv, "C:h:RVv")) != EOF)
switch (ch) {
case 'C': /* wiredtiger_open config */
cmd_config = __wt_optarg;
@@ -62,6 +73,9 @@ main(int argc, char *argv[])
case 'h': /* home directory */
home = __wt_optarg;
break;
+ case 'R': /* recovery */
+ rec_config = REC_RECOVER;
+ break;
case 'V': /* version */
printf("%s\n", wiredtiger_version(NULL, NULL, NULL));
return (EXIT_SUCCESS);
@@ -118,8 +132,10 @@ main(int argc, char *argv[])
}
break;
case 'p':
- if (strcmp(command, "printlog") == 0)
+ if (strcmp(command, "printlog") == 0) {
func = util_printlog;
+ rec_config = REC_LOGOFF;
+ }
break;
case 'r':
if (strcmp(command, "read") == 0)
@@ -154,15 +170,22 @@ main(int argc, char *argv[])
return (usage());
/* Build the configuration string, as necessary. */
- if (config == NULL)
- config = cmd_config;
- else if (cmd_config != NULL) {
- len = strlen(cmd_config) + strlen(config) + 10;
+ if (cmd_config != NULL || rec_config != NULL) {
+ len = 10; /* some slop */
+ if (config != NULL)
+ len += strlen(config);
+ if (cmd_config != NULL)
+ len += strlen(cmd_config);
+ if (rec_config != NULL)
+ len += strlen(rec_config);
if ((p = malloc(len)) == NULL) {
ret = util_err(errno, NULL);
goto err;
}
- (void)snprintf(p, len, "%s,%s", config, cmd_config);
+ (void)snprintf(p, len, "%s,%s,%s",
+ config == NULL ? "" : config,
+ cmd_config == NULL ? "" : cmd_config,
+ rec_config == NULL ? "" : rec_config);
config = p;
}
@@ -201,6 +224,7 @@ usage(void)
"global options:\n"
"\t" "-C\twiredtiger_open configuration\n"
"\t" "-h\tdatabase directory\n"
+ "\t" "-R\trun recovery if configured\n"
"\t" "-V\tdisplay library version and exit\n"
"\t" "-v\tverbose\n");
fprintf(stderr,
diff --git a/src/third_party/wiredtiger/src/utilities/util_verify.c b/src/third_party/wiredtiger/src/utilities/util_verify.c
index 7b3fffdd03b..796d24a8a6d 100644
--- a/src/third_party/wiredtiger/src/utilities/util_verify.c
+++ b/src/third_party/wiredtiger/src/utilities/util_verify.c
@@ -10,28 +10,17 @@
static int usage(void);
-#undef OPT_ARGS
-#undef USAGE_ARGS
-#ifdef HAVE_DIAGNOSTIC
-#define OPT_ARGS "d:"
-#define USAGE_ARGS \
- "[-d dump_address | dump_blocks | dump_offsets=#,# | dump_pages] uri"
-#else
-#define OPT_ARGS ""
-#define USAGE_ARGS "uri"
-#endif
-
int
util_verify(WT_SESSION *session, int argc, char *argv[])
{
WT_DECL_RET;
size_t size;
- int ch, dump_address, dump_blocks, dump_pages;
+ int ch, dump_address, dump_blocks, dump_pages, dump_shape;
char *config, *dump_offsets, *name;
- dump_address = dump_blocks = dump_pages = 0;
+ dump_address = dump_blocks = dump_pages = dump_shape = 0;
config = dump_offsets = name = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, OPT_ARGS)) != EOF)
+ while ((ch = __wt_getopt(progname, argc, argv, "d:")) != EOF)
switch (ch) {
case 'd':
if (strcmp(__wt_optarg, "dump_address") == 0)
@@ -50,6 +39,8 @@ util_verify(WT_SESSION *session, int argc, char *argv[])
__wt_optarg + strlen("dump_offsets=");
} else if (strcmp(__wt_optarg, "dump_pages") == 0)
dump_pages = 1;
+ else if (strcmp(__wt_optarg, "dump_shape") == 0)
+ dump_shape = 1;
else
return (usage());
break;
@@ -67,11 +58,13 @@ util_verify(WT_SESSION *session, int argc, char *argv[])
return (1);
/* Build the configuration string as necessary. */
- if (dump_address || dump_blocks || dump_offsets != NULL || dump_pages) {
+ if (dump_address ||
+ dump_blocks || dump_offsets != NULL || dump_pages || dump_shape) {
size =
strlen("dump_address,") +
strlen("dump_blocks,") +
strlen("dump_pages,") +
+ strlen("dump_shape,") +
strlen("dump_offsets[],") +
(dump_offsets == NULL ? 0 : strlen(dump_offsets)) + 20;
if ((config = malloc(size)) == NULL) {
@@ -79,13 +72,14 @@ util_verify(WT_SESSION *session, int argc, char *argv[])
goto err;
}
snprintf(config, size,
- "%s%s%s%s%s%s",
+ "%s%s%s%s%s%s%s",
dump_address ? "dump_address," : "",
dump_blocks ? "dump_blocks," : "",
dump_offsets != NULL ? "dump_offsets=[" : "",
dump_offsets != NULL ? dump_offsets : "",
dump_offsets != NULL ? "]," : "",
- dump_pages ? "dump_pages" : "");
+ dump_pages ? "dump_pages," : "",
+ dump_shape ? "dump_shape," : "");
}
if ((ret = session->verify(session, name, config)) != 0) {
fprintf(stderr, "%s: verify(%s): %s\n",
@@ -115,6 +109,8 @@ usage(void)
(void)fprintf(stderr,
"usage: %s %s "
"verify %s\n",
- progname, usage_prefix, USAGE_ARGS);
+ progname, usage_prefix,
+ "[-d dump_address | dump_blocks | "
+ "dump_offsets=#,# | dump_pages | dump_shape] uri");
return (1);
}
diff --git a/src/third_party/wiredtiger/tools/wtstats/wtstats.py b/src/third_party/wiredtiger/tools/wtstats/wtstats.py
index 031b7cb546f..3749ffd6c63 100755
--- a/src/third_party/wiredtiger/tools/wtstats/wtstats.py
+++ b/src/third_party/wiredtiger/tools/wtstats/wtstats.py
@@ -58,15 +58,18 @@ def munge(args, title, values):
if title.split(' ')[1] != 'spinlock' and \
title.split(' ', 1)[1] in no_scale_per_second_list:
seconds = 1
+ elif 'wtperf' in title and 'per second' not in title:
+ seconds = 1
else:
t1, v1 = values[1]
seconds = (parsetime(t1) - start_time).seconds
- ylabel += ' per second'
+ if not ylabel.endswith('per second'):
+ ylabel += ' per second'
if seconds == 0:
seconds = 1
stats_cleared = False
- if args.clear or title.split(' ', 1)[1] in no_clear_list:
+ if args.clear or title.split(' ', 1)[1] in no_clear_list or 'wtperf' in title:
stats_cleared = True
# Split the values into a dictionary of y-axis values keyed by the x axis
@@ -85,9 +88,186 @@ def munge(args, title, values):
return ylabel, ydata
+
# Parse the command line
import argparse
+def common_prefix(a, b):
+ """ compute longest common prefix of a and b """
+ while not b.startswith(a):
+ a = a[:-1]
+ return a
+
+
+def common_suffix(a, b):
+ """ compute longest common suffix of a and b """
+ while not a.endswith(b):
+ b = b[1:]
+ return b
+
+
+def parse_wtstats_file(file, result):
+ """ parse wtstats file, one stat per line, example format:
+ Dec 05 14:43:14 0 /data/b block-manager: mapped bytes read
+ """
+ print 'Processing wtstats file: ' + file
+
+ # Parse file
+ for line in open(file, 'rU'):
+ month, day, time, v, title = line.strip('\n').split(" ", 4)
+ result[title].append((month + " " + day + " " + time, v))
+
+
+
+def parse_wtperf_file(file, result):
+ """ parse wtperf file, all stats on single line, example format:
+ Feb 13 17:55:14,0,0,156871,0,N,0,0,0,49,6,6146,0,0,0
+ """
+ print 'Processing wtperf file: ' + file
+ fh = open(file, 'rU')
+
+ # first line contains headings, replace microseconds with milliseconds
+ headings = fh.next().strip('\n').split(',')[1:]
+ headings = map(lambda h: h.replace('(uS)', ' (ms)'), headings)
+
+ # parse rest of file
+ for line in fh:
+ month, day, time, values = re.split(r'[ ,]', line.strip('\n'), 3)
+ values = values.split(',')
+ for i, v in enumerate(values):
+ if v == 'N':
+ v = 0
+ # convert us to ms
+ if '(ms)' in headings[i]:
+ v = float(v) / 1000.0
+ result['wtperf: ' + headings[i]].append((month + " " + day + " " + time, v))
+
+
+def skip_constants(result):
+ # Process the series, eliminate constants, delete totalsec for wtperf
+ items = list(result.iteritems())
+
+ for title, values in items:
+ skip = True
+ t0, v0 = values[0]
+ for t, v in values:
+ if v != v0:
+ skip = False
+ break
+
+ if title == 'wtperf: totalsec':
+ skip = True
+
+ if skip:
+ del result[title]
+
+ return result
+
+
+def parse_files(files_or_dir):
+ """ walk through file list or directory and parse according to file type (wtstats / wtperf). """
+
+ result = defaultdict(list)
+
+ for f in files_or_dir:
+ if os.path.isfile(f):
+ # peek at first line to determine type
+ with open(f, 'rU') as fh:
+ line = fh.readline()
+ if line.startswith('#time'):
+ parse_wtperf_file(f, result)
+ else:
+ parse_wtstats_file(f, result)
+
+ elif os.path.isdir(f):
+ for s in glob(os.path.join(f, 'WiredTigerStat*')):
+ parse_wtstats_file(s, result)
+
+ for s in glob(os.path.join(f, 'monitor*')):
+ parse_wtperf_file(s, result)
+
+ return result
+
+
+
+def output_series(results, args, prefix=None, grouplist=[]):
+ """ Write the data into the html template """
+
+ # add .html ending if not present
+ filename, ext = os.path.splitext(args.output)
+ if ext == '':
+ ext = '.html'
+
+ # open the output file based on prefix
+ if prefix == None:
+ outputname = filename + ext
+ elif len(grouplist) == 0:
+ outputname = filename +'.' + prefix + ext
+ else:
+ outputname = filename +'.group.' + prefix + ext
+
+ if prefix != None and len(grouplist) == 0:
+ this_series = []
+ for title, ydata in results:
+ if not prefix in title:
+ continue
+ #print 'Appending to dataset: ' + title
+ this_series.append((title, ydata))
+ elif prefix != None and len(grouplist) > 0:
+ this_series = []
+ for title, ydata in results:
+ for subgroup in grouplist:
+ if not subgroup in title:
+ continue
+ # print 'Appending to dataset: ' + title
+ this_series.append((title, ydata))
+ else:
+ this_series = results
+
+ if len(this_series) == 0:
+ print 'Output: ' + outputname + ' has no data. Do not create.'
+ return
+
+
+ json_output = { "series": [] }
+
+ for title, ydata in this_series:
+ json_output["series"].append({
+ "key": title,
+ "values": ydata,
+ });
+
+ # load template
+ this_path = os.path.dirname(os.path.realpath(__file__))
+ srcfile = os.path.join(this_path, 'wtstats.html.template')
+ try:
+ srcfile = open(srcfile)
+ contents = srcfile.read()
+ except IOError:
+ print >>sys.stderr, "Cannot find template file 'wtstats.html." \
+ "template'. See ./template/README.md for more information."
+ sys.exit(-1)
+
+ srcfile.close()
+
+ # if --json write data to <filename>.json
+ if args.json:
+ jsonfile = filename + '.json'
+ with open(jsonfile, 'w') as f:
+ json.dump(json_output, f)
+ print "created %s" % jsonfile
+
+ # write output file
+ dstfile = open(outputname, 'wt')
+ replaced_contents = contents.replace('"### INSERT DATA HERE ###"',
+ json.dumps(json_output))
+ dstfile.write(replaced_contents)
+ dstfile.close()
+ print "created %s" % dstfile.name
+
+
+
+
def main():
parser = argparse.ArgumentParser(description='Create graphs from' \
'WiredTiger statistics.')
@@ -109,155 +289,51 @@ def main():
'logging')
args = parser.parse_args()
- # Read the input file(s) into a dictionary of lists.
- def getfiles(l):
- for f in l:
- if os.path.isfile(f):
- yield f
- elif os.path.isdir(f):
- for s in glob(os.path.join(f, 'WiredTigerStat*')):
- print 'Processing ' + s
- yield s
-
- d = defaultdict(list)
- for f in getfiles(args.files):
- for line in open(f, 'rU'):
- month, day, time, v, title = line.strip('\n').split(" ", 4)
- d[title].append((month + " " + day + " " + time, v))
-
- # Process the series, eliminate constants
- for title, values in sorted(d.iteritems()):
- skip = True
- t0, v0 = values[0]
- for t, v in values:
- if v != v0:
- skip = False
- break
- if skip:
- #print "Skipping", title
- del d[title]
+ # Parse files or directory and skip constants
+ parsed = skip_constants(parse_files(args.files))
- # Common prefix / suffix elimination
+ # filter results based on --include, compute common prefix and suffix
+ results = []
prefix = suffix = None
- def common_prefix(a, b):
- while not b.startswith(a):
- a = a[:-1]
- return a
-
- def common_suffix(a, b):
- while not a.endswith(b):
- b = b[1:]
- return b
-
- def output_series(results, prefix=None, grouplist=[]):
- # add .html ending if not present
- filename, ext = os.path.splitext(args.output)
- if ext == '':
- ext = '.html'
-
- # open the output file based on prefix
- if prefix == None:
- outputname = filename + ext
- elif len(grouplist) == 0:
- outputname = filename +'.' + prefix + ext
- else:
- outputname = filename +'.group.' + prefix + ext
-
- if prefix != None and len(grouplist) == 0:
- this_series = []
- for title, ydata in results:
- if not prefix in title:
- continue
- #print 'Appending to dataset: ' + title
- this_series.append((title, ydata))
- elif prefix != None and len(grouplist) > 0:
- this_series = []
- for title, ydata in results:
- for subgroup in grouplist:
- if not subgroup in title:
- continue
- # print 'Appending to dataset: ' + title
- this_series.append((title, ydata))
- else:
- this_series = results
-
- if len(this_series) == 0:
- print 'Output: ' + outputname + ' has no data. Do not create.'
- return
-
-
- json_output = { "series": [] }
-
- for title, ydata in this_series:
- json_output["series"].append({
- "key": title,
- "values": ydata,
- });
-
- # load template
- this_path = os.path.dirname(os.path.realpath(__file__))
- srcfile = os.path.join(this_path, 'wtstats.html.template')
- try:
- srcfile = open(srcfile)
- contents = srcfile.read()
- except IOError:
- print >>sys.stderr, "Cannot find template file 'wtstats.html." \
- "template'. See ./template/README.md for more information."
- sys.exit(-1)
-
- srcfile.close()
-
- # if --json write data to <filename>.json
- if args.json:
- jsonfile = filename + '.json'
- with open(jsonfile, 'w') as f:
- json.dump(json_output, f)
- print "created %s" % jsonfile
-
- # write output file
- dstfile = open(outputname, 'wt')
- replaced_contents = contents.replace('"### INSERT DATA HERE ###"',
- json.dumps(json_output))
- dstfile.write(replaced_contents)
- dstfile.close()
- print "created %s" % dstfile.name
-
- # Split out the data, convert timestamps
- results = []
- for title, values in sorted(d.iteritems()):
+ for title, values in sorted(parsed.iteritems()):
title, ydata = munge(args, title, values)
- # Ignore entries if a list of regular expressions was given
+
+ # ignore entries if a list of regular expressions was given
if args.include and not [r for r in args.include if r.search(title)]:
continue
- prefix = title if prefix is None else common_prefix(prefix, title)
- suffix = title if suffix is None else common_suffix(title, suffix)
+ if not 'wtperf' in title:
+ prefix = title if prefix is None else common_prefix(prefix, title)
+ suffix = title if suffix is None else common_suffix(title, suffix)
results.append((title, ydata))
# Process titles, eliminate common prefixes and suffixes
if prefix or suffix:
new_results = []
for title, ydata in results:
- title = title[len(prefix):]
- if suffix:
- title = title[:-len(suffix)]
+ if 'wtperf' not in title:
+ title = title[len(prefix):]
+ if suffix:
+ title = title[:-len(suffix)]
new_results.append((title, ydata))
results = new_results
# Are we just listing the results?
if args.list:
+ print
+ print "Parsed stats:"
for title, ydata in results:
- print title
+ print " ", title
sys.exit(0)
- output_series(results)
+ output_series(results, args)
# If the user wants the stats split up by prefix type do so.
if args.all:
for prefix in prefix_list:
- output_series(results, prefix)
+ output_series(results, args, prefix)
for group in groups.keys():
- output_series(results, group, groups[group])
+ output_series(results, args, group, groups[group])
if __name__ == '__main__':