summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-02-11 04:37:44 +0000
committerevergreen <evergreen@mongodb.com>2020-02-11 04:37:44 +0000
commit7e0e302388b5fa71d29c0145a445e105b72864cd (patch)
treeaaeb3eea7963eebab64f5774c7d0f47e86e07749 /src
parente94778714638cb20f93ae94d4e16c38ed2d987bc (diff)
downloadmongo-7e0e302388b5fa71d29c0145a445e105b72864cd.tar.gz
Import wiredtiger: 4a7bbce5cb744d9026f083314746e85fa851338e from branch mongodb-4.2
ref: 35ba2ab887..4a7bbce5cb for: 4.2.4 WT-4999 Migrate Jenkins “wiredtiger-test-format-stress-zseries” job to Evergreen WT-5159 Make wiredtiger work with SWIG 4.0.0 beyond WT-5206 Return the correct checkpoint-modified list of blocks WT-5219 Btree walk code read the lock WT_REF.addr field without locking WT-5376 WT_UPDATE.type field can race with visibility checks when returning key/value pairs WT-5387 Prepared transaction resolution can stall eviction on active pages WT-5393 Prepared transaction rollback and API error handling fixes WT-5405 Make format LSM test a separate Evergreen task WT-5437 Salvage's excessive consumption of cache memory causing eviction to stall WT-5444 Re-enable PPC format tests in Evergreen WT-5449 Increase contention in history store stress workload WT-5458 Fix Evergreen timeout failures in linux-directio test WT-5460 Buffer alignment failure captured by linux-directio test WT-5468 Improve documentation for "wt load" WT-5480 Don't take threads resolving prepared transactions to assist with eviction WT-5481 DIAGNOSTIC split code assert can race with WT_REF locking WT-5488 Dump the failing CONFIG for Evergreen test/format tasks WT-5489 page-read can race with threads locking in-memory page structures WT-5534 Incremental backup needs to accept older metadata WT-5537 Use correct WT_ITEM fields per memory sanitizer
Diffstat (limited to 'src')
-rwxr-xr-xsrc/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py12
-rwxr-xr-xsrc/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py2
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/split_heavy.wtperf2
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py2
-rw-r--r--src/third_party/wiredtiger/dist/s_funcs.list1
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_python3
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok1
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_backup_block.c127
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/lang/python/Makefile.am9
-rwxr-xr-xsrc/third_party/wiredtiger/lang/python/setup_pip.py13
-rw-r--r--src/third_party/wiredtiger/lang/python/wiredtiger.i2
-rwxr-xr-xsrc/third_party/wiredtiger/lang/python/wiredtiger/init.py (renamed from src/third_party/wiredtiger/lang/python/wiredtiger/pip_init.py)12
-rw-r--r--src/third_party/wiredtiger/src/block/block_ckpt.c83
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_compact.c69
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c18
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_import.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_misc.c21
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c13
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c362
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c71
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c12
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c11
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c6
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_dhandle.c32
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_handle.c8
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_open.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_backup.c220
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_backup_incr.c260
-rw-r--r--src/third_party/wiredtiger/src/docs/command-line.dox11
-rw-r--r--src/third_party/wiredtiger/src/include/api.h42
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i68
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h2
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h51
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h16
-rw-r--r--src/third_party/wiredtiger/src/include/meta.h50
-rw-r--r--src/third_party/wiredtiger/src/include/session.h57
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i52
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h4
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_ckpt.c208
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_turtle.c13
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c79
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c316
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c1
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_load.c2
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c8
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen.yml53
-rw-r--r--src/third_party/wiredtiger/test/format/config.c24
-rwxr-xr-xsrc/third_party/wiredtiger/test/format/format.sh4
-rw-r--r--src/third_party/wiredtiger/test/suite/test_assert02.py7
-rw-r--r--src/third_party/wiredtiger/test/suite/test_assert05.py10
-rw-r--r--src/third_party/wiredtiger/test/suite/test_assert06.py36
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup12.py8
-rw-r--r--src/third_party/wiredtiger/test/suite/test_durable_ts02.py12
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare02.py56
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare04.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare05.py14
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare06.py15
-rw-r--r--src/third_party/wiredtiger/test/suite/test_stat08.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp13.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_txn17.py10
62 files changed, 1637 insertions, 988 deletions
diff --git a/src/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py b/src/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py
index 333da4b178c..9f7cb2941c0 100755
--- a/src/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py
+++ b/src/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py
@@ -81,7 +81,7 @@ from workgen import *
context = Context()
homedir = "WT_TEST"
conn_config = "cache_size=1G,checkpoint=(wait=60,log_size=2GB),\
- eviction=(threads_min=12,threads_max=12),log=(enabled=true),session_max=600,\
+ eviction=(threads_min=12,threads_max=12),log=(enabled=true),session_max=800,\
eviction_target=60,statistics=(fast),statistics_log=(wait=1,json)"# explicitly added
conn = wiredtiger_open(homedir, "create," + conn_config)
s = conn.open_session("")
@@ -119,7 +119,7 @@ s.create(log_name, wtperf_table_config + "key_format=S,value_format=S," +\
compress_table_config + table_config + ",log=(enabled=true)")
log_table = Table(log_name)
-ops = Operation(Operation.OP_SEARCH, tables[0])
+ops = Operation(Operation.OP_SEARCH, tables[0],Key(Key.KEYGEN_PARETO, 0, ParetoOptions(1)))
ops = op_multi_table(ops, tables, False)
ops = op_log_like(ops, log_table, 0)
thread0 = Thread(ops)
@@ -131,8 +131,6 @@ thread1 = Thread(ops)
# These operations include log_like operations, which will increase the number
# of insert/update operations by a factor of 2.0. This may cause the
# actual operations performed to be above the throttle.
-thread1.options.throttle=500
-thread1.options.throttle_burst=1.0
ops = Operation(Operation.OP_UPDATE, tables[0])
ops = op_multi_table(ops, tables, False)
@@ -156,10 +154,10 @@ ops = Operation(Operation.OP_SLEEP, "0.1") + \
Operation(Operation.OP_LOG_FLUSH, "")
logging_thread = Thread(ops)
-workload = Workload(context, 350 * thread0 + 10 * thread1 +\
- 50 * thread2 + 100 * thread3 + logging_thread)
+workload = Workload(context, 400 * thread0 + 100 * thread1 +\
+ 10 * thread2 + 100 * thread3 + logging_thread)
workload.options.report_interval=5
-workload.options.run_time=300
+workload.options.run_time=500
workload.options.max_latency=50000
workload.run(conn)
diff --git a/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py b/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py
index e27bd479730..487e1e90ff1 100755
--- a/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py
+++ b/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
#
# runner/__init__.py
-# Used as a first import by runners, does any common initialization.
+# Used as a first import by runners, does any common initialization.
from __future__ import print_function
import os, shutil, sys
diff --git a/src/third_party/wiredtiger/bench/wtperf/split_heavy.wtperf b/src/third_party/wiredtiger/bench/wtperf/split_heavy.wtperf
index 4f11340c095..4460aa99491 100644
--- a/src/third_party/wiredtiger/bench/wtperf/split_heavy.wtperf
+++ b/src/third_party/wiredtiger/bench/wtperf/split_heavy.wtperf
@@ -10,4 +10,4 @@ run_time=10
threads=((count=20,inserts=1))
value_sz=200
key_sz=64
-reopen_connection=false \ No newline at end of file
+reopen_connection=false
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index cf942be2f84..45cca0ef829 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -377,6 +377,8 @@ file_config = format_meta + file_runtime_config + [
file_meta = file_config + [
Config('checkpoint', '', r'''
the file checkpoint entries'''),
+ Config('checkpoint_backup_info', '', r'''
+ the incremental backup durable information'''),
Config('checkpoint_lsn', '', r'''
LSN of the last checkpoint'''),
Config('id', '', r'''
diff --git a/src/third_party/wiredtiger/dist/s_funcs.list b/src/third_party/wiredtiger/dist/s_funcs.list
index 54928fc03b4..0218937fffc 100644
--- a/src/third_party/wiredtiger/dist/s_funcs.list
+++ b/src/third_party/wiredtiger/dist/s_funcs.list
@@ -34,7 +34,6 @@ __wt_stat_join_aggregate
__wt_stat_join_clear_all
__wt_stream_set_no_buffer
__wt_try_readlock
-__wt_txn_err_chk
wiredtiger_calc_modify
wiredtiger_config_parser_open
wiredtiger_config_validate
diff --git a/src/third_party/wiredtiger/dist/s_python b/src/third_party/wiredtiger/dist/s_python
index 7ecb97059b5..323a92dca79 100755
--- a/src/third_party/wiredtiger/dist/s_python
+++ b/src/third_party/wiredtiger/dist/s_python
@@ -7,9 +7,10 @@ trap 'rm -f $t' 0 1 2 3 13 15
cd ..
# Check Python coding standards: check for tab characters.
+# Ignore generated files.
egrep ' ' `find . -name '*.py'` |
sed -e 's/:.*//' \
- -e '/__init__.py/d' \
+ -e '/swig_wiredtiger.py/d' \
-e '/\/wiredtiger.py/d' \
-e '/src\/docs\/tools\/doxypy.py/d' |
sort -u |
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 6cda20d9b3d..185ed5aac8d 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -510,6 +510,7 @@ bitpos
bitstring
bitwise
blk
+blkmod
bm
bnd
bool
diff --git a/src/third_party/wiredtiger/examples/c/ex_backup_block.c b/src/third_party/wiredtiger/examples/c/ex_backup_block.c
index fbae6e6da5d..422bf728540 100644
--- a/src/third_party/wiredtiger/examples/c/ex_backup_block.c
+++ b/src/third_party/wiredtiger/examples/c/ex_backup_block.c
@@ -136,11 +136,11 @@ setup_directories(void)
}
static void
-add_work(WT_SESSION *session, int iter)
+add_work(WT_SESSION *session, int iter, int iterj)
{
WT_CURSOR *cursor, *cursor2;
int i;
- char k[32], v[32];
+ char k[64], v[64];
error_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
/*
@@ -154,8 +154,8 @@ add_work(WT_SESSION *session, int iter)
* Perform some operations with individual auto-commit transactions.
*/
for (i = 0; i < MAX_KEYS; i++) {
- (void)snprintf(k, sizeof(k), "key.%d.%d", iter, i);
- (void)snprintf(v, sizeof(v), "value.%d.%d", iter, i);
+ (void)snprintf(k, sizeof(k), "key.%d.%d.%d", iter, iterj, i);
+ (void)snprintf(v, sizeof(v), "value.%d.%d.%d", iter, iterj, i);
cursor->set_key(cursor, k);
cursor->set_value(cursor, v);
error_check(cursor->insert(cursor));
@@ -260,7 +260,8 @@ take_full_backup(WT_SESSION *session, int i)
} else
hdir = home_incr;
if (i == 0) {
- (void)snprintf(buf, sizeof(buf), "incremental=(enabled=true,this_id=ID%d)", i);
+ (void)snprintf(
+ buf, sizeof(buf), "incremental=(granularity=1M,enabled=true,this_id=ID%d)", i);
error_check(session->open_cursor(session, "backup:", NULL, buf, &cursor));
} else
error_check(session->open_cursor(session, "backup:", NULL, NULL, &cursor));
@@ -279,13 +280,17 @@ take_full_backup(WT_SESSION *session, int i)
for (j = 0; j < MAX_ITERATIONS; j++) {
(void)snprintf(h, sizeof(h), "%s.%d", home_incr, j);
(void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename);
+#if 0
printf("FULL: Copy: %s\n", buf);
+#endif
error_check(system(buf));
}
else {
(void)snprintf(h, sizeof(h), "%s.%d", home_full, i);
(void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, hdir, filename);
+#if 0
printf("FULL %d: Copy: %s\n", i, buf);
+#endif
error_check(system(buf));
}
}
@@ -300,13 +305,16 @@ take_incr_backup(WT_SESSION *session, int i)
FILELIST *flist;
WT_CURSOR *backup_cur, *incr_cur;
uint64_t offset, size, type;
- size_t alloc, count;
+ size_t alloc, count, rdsize, tmp_sz;
int j, ret, rfd, wfd;
- char buf[1024], h[256];
+ char buf[1024], h[256], *tmp;
const char *filename;
+ bool first;
/*! [incremental backup using block transfer]*/
+ tmp = NULL;
+ tmp_sz = 0;
/* Open the backup data source for incremental backup. */
(void)snprintf(buf, sizeof(buf), "incremental=(src_id=ID%d,this_id=ID%d)", i - 1, i);
error_check(session->open_cursor(session, "backup:", NULL, buf, &backup_cur));
@@ -321,51 +329,61 @@ take_incr_backup(WT_SESSION *session, int i)
error_check(process_file(&flist, &count, &alloc, filename));
(void)snprintf(h, sizeof(h), "%s.0", home_incr);
(void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename);
- printf("Copying backup: %s\n", buf);
- error_check(system(buf));
#if 0
- (void)snprintf(buf, sizeof(buf), "%s/%s", home, filename);
- printf("Open source %s for reading\n", buf);
- error_check(rfd = open(buf, O_RDONLY, 0));
- (void)snprintf(h, sizeof(h), "%s.%d", home_incr, i);
- (void)snprintf(buf, sizeof(buf), "%s/%s", h, filename);
- printf("Open dest %s for writing\n", buf);
- error_check(wfd = open(buf, O_WRONLY, 0));
+ printf("Copying backup: %s\n", buf);
#endif
+ error_check(system(buf));
+ first = true;
(void)snprintf(buf, sizeof(buf), "incremental=(file=%s)", filename);
error_check(session->open_cursor(session, NULL, backup_cur, buf, &incr_cur));
+#if 0
printf("Taking incremental %d: File %s\n", i, filename);
+#endif
while ((ret = incr_cur->next(incr_cur)) == 0) {
error_check(incr_cur->get_key(incr_cur, &offset, &size, &type));
- printf("Incremental %s: KEY: Off %" PRIu64 " Size: %" PRIu64 " Type: %" PRIu64 "\n",
- filename, offset, size, type);
scan_end_check(type == WT_BACKUP_FILE || type == WT_BACKUP_RANGE);
+#if 0
+ printf("Incremental %s: KEY: Off %" PRIu64 " Size: %" PRIu64 " %s\n", filename, offset,
+ size, type == WT_BACKUP_FILE ? "WT_BACKUP_FILE" : "WT_BACKUP_RANGE");
+#endif
if (type == WT_BACKUP_RANGE) {
/*
* We should never get a range key after a whole file so the read file descriptor
- * should be valid. If the read descriptor is valid, so it the write one.
+ * should be valid. If the read descriptor is valid, so is the write one.
*/
- scan_end_check(rfd != -1);
- printf("Incremental %s: Range Offset: %" PRIu64 " Size: %" PRIu64 "\n", filename,
- offset, size);
+ if (tmp_sz < size) {
+ tmp = realloc(tmp, size);
+ testutil_assert(tmp != NULL);
+ tmp_sz = size;
+ }
+ if (first) {
+ (void)snprintf(buf, sizeof(buf), "%s/%s", home, filename);
+ error_sys_check(rfd = open(buf, O_RDONLY, 0));
+ (void)snprintf(h, sizeof(h), "%s.%d", home_incr, i);
+ (void)snprintf(buf, sizeof(buf), "%s/%s", h, filename);
+ error_sys_check(wfd = open(buf, O_WRONLY, 0));
+ first = false;
+ }
+
error_sys_check(lseek(rfd, (wt_off_t)offset, SEEK_SET));
- error_sys_check(read(rfd, buf, (size_t)size));
+ error_sys_check(rdsize = (size_t)read(rfd, tmp, (size_t)size));
error_sys_check(lseek(wfd, (wt_off_t)offset, SEEK_SET));
- error_sys_check(write(wfd, buf, (size_t)size));
+ /* Use the read size since we may have read less than the granularity. */
+ error_sys_check(write(wfd, tmp, rdsize));
} else {
-/* Whole file, so close both files and just copy the whole thing. */
-#if 0
- error_check(close(rfd));
- error_check(close(wfd));
-#endif
+ /* Whole file, so close both files and just copy the whole thing. */
+ testutil_assert(first == true);
rfd = wfd = -1;
(void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename);
+#if 0
printf("Incremental: Whole file copy: %s\n", buf);
+#endif
error_check(system(buf));
}
}
scan_end_check(ret == WT_NOTFOUND);
+ /* Done processing this file. Close incremental cursor. */
error_check(incr_cur->close(incr_cur));
/* Close file descriptors if they're open. */
@@ -386,18 +404,21 @@ take_incr_backup(WT_SESSION *session, int i)
}
scan_end_check(ret == WT_NOTFOUND);
+ /* Done processing all files. Close backup cursor. */
error_check(backup_cur->close(backup_cur));
error_check(finalize_files(flist, count));
+ free(tmp);
/*! [incremental backup using block transfer]*/
}
int
main(int argc, char *argv[])
{
+ struct stat sb;
WT_CONNECTION *wt_conn;
WT_CURSOR *backup_cur;
WT_SESSION *session;
- int i;
+ int i, j, ret;
char cmd_buf[256];
(void)argc; /* Unused variable */
@@ -412,7 +433,7 @@ main(int argc, char *argv[])
error_check(session->create(session, uri, "key_format=S,value_format=S"));
error_check(session->create(session, uri2, "key_format=S,value_format=S"));
printf("Adding initial data\n");
- add_work(session, 0);
+ add_work(session, 0, 0);
printf("Taking initial backup\n");
take_full_backup(session, 0);
@@ -421,8 +442,12 @@ main(int argc, char *argv[])
for (i = 1; i < MAX_ITERATIONS; i++) {
printf("Iteration %d: adding data\n", i);
- add_work(session, i);
- error_check(session->checkpoint(session, NULL));
+ /* For each iteration we may add work and checkpoint multiple times. */
+ for (j = 0; j < i; j++) {
+ add_work(session, i, j);
+ error_check(session->checkpoint(session, NULL));
+ }
+
/*
* The full backup here is only needed for testing and comparison purposes. A normal
* incremental backup procedure would not include this.
@@ -440,6 +465,20 @@ main(int argc, char *argv[])
error_check(compare_backups(i));
}
+ printf("Close and reopen the connection\n");
+ /*
+ * Close and reopen the connection to illustrate the durability of id information.
+ */
+ error_check(wt_conn->close(wt_conn, NULL));
+ error_check(wiredtiger_open(home, NULL, CONN_CONFIG, &wt_conn));
+ error_check(wt_conn->open_session(wt_conn, NULL, NULL, &session));
+ /*
+ * We should have an entry for i-1 and i-2. Use the older one.
+ */
+ (void)snprintf(cmd_buf, sizeof(cmd_buf), "incremental=(src_id=ID%d,this_id=ID%d)", i - 2, i);
+ error_check(session->open_cursor(session, "backup:", NULL, cmd_buf, &backup_cur));
+ error_check(backup_cur->close(backup_cur));
+
/*
* After we're done, release resources. Test the force stop setting.
*/
@@ -455,6 +494,28 @@ main(int argc, char *argv[])
printf("Final comparison: dumping and comparing data\n");
error_check(compare_backups(0));
+ for (i = 0; i < (int)filelist_count; ++i) {
+ if (last_flist[i].name == NULL)
+ break;
+ free((void *)last_flist[i].name);
+ }
+ free(last_flist);
+
+ /*
+ * Reopen the connection to verify that the forced stop should remove incremental information.
+ */
+ error_check(wiredtiger_open(home, NULL, CONN_CONFIG, &wt_conn));
+ error_check(wt_conn->open_session(wt_conn, NULL, NULL, &session));
+ /*
+ * We should not have any information.
+ */
+ (void)snprintf(cmd_buf, sizeof(cmd_buf), "incremental=(src_id=ID%d,this_id=ID%d)", i - 2, i);
+ testutil_assert(session->open_cursor(session, "backup:", NULL, cmd_buf, &backup_cur) == ENOENT);
+ error_check(wt_conn->close(wt_conn, NULL));
+
+ (void)snprintf(cmd_buf, sizeof(cmd_buf), "%s/WiredTiger.backup.block", home);
+ ret = stat(cmd_buf, &sb);
+ testutil_assert(ret == -1 && errno == ENOENT);
return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index f3969285fd8..f56574c8ece 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.2",
- "commit": "35ba2ab88763ca8a25743a9ef78b2eaed28f4a6c"
+ "commit": "4a7bbce5cb744d9026f083314746e85fa851338e"
}
diff --git a/src/third_party/wiredtiger/lang/python/Makefile.am b/src/third_party/wiredtiger/lang/python/Makefile.am
index ace11dfa5b9..b15e66e0e83 100644
--- a/src/third_party/wiredtiger/lang/python/Makefile.am
+++ b/src/third_party/wiredtiger/lang/python/Makefile.am
@@ -2,7 +2,7 @@ PYSRC = $(top_srcdir)/lang/python
PYDIRS = -t $(abs_builddir) -I $(abs_top_srcdir):$(abs_top_builddir) -L $(abs_top_builddir)/.libs
PYDST = $(abs_builddir)/wiredtiger
PYFILES = $(PYDST)/fpacking.py $(PYDST)/intpacking.py $(PYDST)/packing.py \
- $(PYDST)/packutil.py $(PYDST)/__init__.py
+ $(PYDST)/packutil.py $(PYDST)/swig_wiredtiger.py $(PYDST)/__init__.py
PY_MAJOR_VERSION := $$($(PYTHON) -c \
'import sys; print(int(sys.version_info.major))')
@@ -25,7 +25,12 @@ pyfiles: $(PYFILES)
$(PYDST)/%: $(PYSRC)/wiredtiger/%
mkdir -p $(PYDST) && cp -f $< $@
-$(PYDST)/__init__.py: $(PYSRC)/wiredtiger.py
+$(PYDST)/__init__.py: $(PYSRC)/wiredtiger/init.py
+ mkdir -p $(PYDST) && cp -f $< $@
+
+# Note: this cannot be named wiredtiger.py in the target directory,
+# we won't be able to import it.
+$(PYDST)/swig_wiredtiger.py: $(PYSRC)/wiredtiger.py
mkdir -p $(PYDST) && cp -f $< $@
install-exec-local:
diff --git a/src/third_party/wiredtiger/lang/python/setup_pip.py b/src/third_party/wiredtiger/lang/python/setup_pip.py
index ce0fd1b0d77..c7b64c0d90f 100755
--- a/src/third_party/wiredtiger/lang/python/setup_pip.py
+++ b/src/third_party/wiredtiger/lang/python/setup_pip.py
@@ -188,8 +188,10 @@ def get_library_dirs():
return dirs
# source_filter
-# Make any needed changes to the sources list. Any entry that
-# needs to be moved is returned in a dictionary.
+# Make any needed changes to the original sources list and return a manifest,
+# a list of source file names relative to the new staging root. Any entry
+# that needs to be renamed returned in a dictionary where the entry's key
+# is the new name and the value is the old source name.
def source_filter(sources):
result = []
movers = dict()
@@ -205,15 +207,16 @@ def source_filter(sources):
# move all lang/python files to the top level.
if dest.startswith(pywt_prefix):
dest = os.path.basename(dest)
- if dest == 'pip_init.py':
+ if dest == 'init.py':
dest = '__init__.py'
if dest != src:
movers[dest] = src
result.append(dest)
# Add SWIG generated files
- result.append('wiredtiger.py')
- movers['wiredtiger.py'] = os.path.join(pywt_build_dir, '__init__.py')
result.append(os.path.join(py_dir, 'wiredtiger_wrap.c'))
+ wiredtiger_py = 'swig_wiredtiger.py'
+ result.append('swig_wiredtiger.py')
+ movers['swig_wiredtiger.py'] = os.path.join(py_dir, 'wiredtiger.py')
return result, movers
################################################################
diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger.i b/src/third_party/wiredtiger/lang/python/wiredtiger.i
index 86182b1716c..ea298cd4f10 100644
--- a/src/third_party/wiredtiger/lang/python/wiredtiger.i
+++ b/src/third_party/wiredtiger/lang/python/wiredtiger.i
@@ -50,7 +50,7 @@ This provides an API similar to the C API, with the following modifications:
%feature("autodoc", "0");
%pythoncode %{
-from .packing import pack, unpack
+from packing import pack, unpack
## @endcond
%}
diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger/pip_init.py b/src/third_party/wiredtiger/lang/python/wiredtiger/init.py
index 187a21443b7..28aef041a73 100755
--- a/src/third_party/wiredtiger/lang/python/wiredtiger/pip_init.py
+++ b/src/third_party/wiredtiger/lang/python/wiredtiger/init.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
#
-# pip_init.py
+# init.py
# This is installed as __init__.py, and imports the file created by SWIG.
# This is needed because SWIG's import helper code created by certain SWIG
# versions may be broken, see: https://github.com/swig/swig/issues/769 .
@@ -42,11 +42,11 @@ if fname != '__init__.py' and fname != '__init__.pyc':
# to this module so they will appear in the wiredtiger namespace.
me = sys.modules[__name__]
sys.path.append(os.path.dirname(__file__))
-try:
- import wiredtiger.wiredtiger as swig_wiredtiger
-except ImportError:
- # for Python2
- import wiredtiger as swig_wiredtiger
+
+# explicitly importing _wiredtiger in advance of SWIG allows us to not
+# use relative importing, as SWIG does. It doesn't work for us with Python2.
+import _wiredtiger
+import swig_wiredtiger
for name in dir(swig_wiredtiger):
value = getattr(swig_wiredtiger, name)
setattr(me, name, value)
diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c
index 158fc919820..e1cf8982daf 100644
--- a/src/third_party/wiredtiger/src/block/block_ckpt.c
+++ b/src/third_party/wiredtiger/src/block/block_ckpt.c
@@ -9,8 +9,7 @@
#include "wt_internal.h"
static int __ckpt_process(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *);
-static int __ckpt_update(
- WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *, WT_CKPT *, WT_BLOCK_CKPT *, bool);
+static int __ckpt_update(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *, WT_CKPT *, WT_BLOCK_CKPT *);
/*
* __wt_block_ckpt_init --
@@ -331,6 +330,7 @@ __ckpt_verify(WT_SESSION_IMPL *session, WT_CKPT *ckptbase)
case WT_CKPT_DELETE | WT_CKPT_FAKE:
case WT_CKPT_FAKE:
break;
+ case WT_CKPT_ADD | WT_CKPT_BLOCK_MODS:
case WT_CKPT_ADD:
if (ckpt[1].name == NULL)
break;
@@ -570,7 +570,7 @@ __ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase)
/* Update checkpoints marked for update. */
WT_CKPT_FOREACH (ckptbase, ckpt)
if (F_ISSET(ckpt, WT_CKPT_UPDATE))
- WT_ERR(__ckpt_update(session, block, ckptbase, ckpt, ckpt->bpriv, false));
+ WT_ERR(__ckpt_update(session, block, ckptbase, ckpt, ckpt->bpriv));
live_update:
/* Truncate the file if that's possible. */
@@ -607,7 +607,7 @@ live_update:
*/
ci->ckpt_size = WT_MIN(ckpt_size, (uint64_t)block->size);
- WT_ERR(__ckpt_update(session, block, ckptbase, ckpt, ci, true));
+ WT_ERR(__ckpt_update(session, block, ckptbase, ckpt, ci));
}
/*
@@ -654,16 +654,80 @@ err:
}
/*
+ * __ckpt_add_blkmod_entry --
+ * Add an offset/length entry to the bitstring based on granularity.
+ */
+static int
+__ckpt_add_blkmod_entry(
+ WT_SESSION_IMPL *session, WT_BLOCK_MODS *blk_mod, wt_off_t offset, wt_off_t len)
+{
+ uint64_t end, start;
+ uint32_t end_rdup;
+
+ WT_ASSERT(session, blk_mod->granularity != 0);
+ start = (uint64_t)offset / blk_mod->granularity;
+ end = (uint64_t)(offset + len) / blk_mod->granularity;
+ WT_ASSERT(session, end < UINT32_MAX);
+ end_rdup = WT_MAX(__wt_rduppo2((uint32_t)end, 8), WT_BLOCK_MODS_LIST_MIN);
+ if ((end_rdup << 3) > blk_mod->nbits) {
+ /* If we don't have enough, extend the buffer. */
+ if (blk_mod->nbits == 0) {
+ WT_RET(__wt_buf_initsize(session, &blk_mod->bitstring, end_rdup));
+ memset(blk_mod->bitstring.mem, 0, end_rdup);
+ } else
+ WT_RET(__wt_buf_extend(session, &blk_mod->bitstring, end_rdup));
+ blk_mod->nbits = end_rdup << 3;
+ }
+
+ /* Set all the bits needed to record this offset/length pair. */
+ __bit_nset(blk_mod->bitstring.mem, start, end);
+ return (0);
+}
+
+/*
+ * __ckpt_add_blk_mods --
+ * Add the blocks to all valid incremental backup source identifiers.
+ */
+static int
+__ckpt_add_blk_mods(WT_SESSION_IMPL *session, WT_CKPT *ckpt, WT_BLOCK_CKPT *ci)
+{
+ WT_BLOCK_MODS *blk_mod;
+ WT_EXT *ext;
+ u_int i;
+
+ for (i = 0; i < WT_BLKINCR_MAX; ++i) {
+ blk_mod = &ckpt->backup_blocks[i];
+ /* If there is no information at this entry, we're done. */
+ if (!F_ISSET(blk_mod, WT_BLOCK_MODS_VALID))
+ continue;
+
+ WT_EXT_FOREACH (ext, ci->alloc.off)
+ WT_RET(__ckpt_add_blkmod_entry(session, blk_mod, ext->off, ext->size));
+
+ if (ci->alloc.offset != WT_BLOCK_INVALID_OFFSET)
+ WT_RET(__ckpt_add_blkmod_entry(session, blk_mod, ci->alloc.offset, ci->alloc.size));
+ if (ci->discard.offset != WT_BLOCK_INVALID_OFFSET)
+ WT_RET(__ckpt_add_blkmod_entry(session, blk_mod, ci->discard.offset, ci->discard.size));
+ if (ci->avail.offset != WT_BLOCK_INVALID_OFFSET)
+ WT_RET(__ckpt_add_blkmod_entry(session, blk_mod, ci->avail.offset, ci->avail.size));
+ }
+ return (0);
+}
+
+/*
* __ckpt_update --
* Update a checkpoint.
*/
static int
-__ckpt_update(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase, WT_CKPT *ckpt,
- WT_BLOCK_CKPT *ci, bool is_live)
+__ckpt_update(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase, WT_CKPT *ckpt, WT_BLOCK_CKPT *ci)
{
WT_DECL_ITEM(a);
WT_DECL_RET;
uint8_t *endp;
+ bool is_live;
+
+ is_live = F_ISSET(ckpt, WT_CKPT_ADD);
#ifdef HAVE_DIAGNOSTIC
/* Check the extent list combinations for overlaps. */
@@ -723,6 +787,13 @@ __ckpt_update(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase, WT_C
}
/*
+ * If this is the live system, we need to record the list of blocks written for this checkpoint
+ * (including the blocks we allocated to write the extent lists).
+ */
+ if (F_ISSET(ckpt, WT_CKPT_BLOCK_MODS))
+ WT_RET(__ckpt_add_blk_mods(session, ckpt, ci));
+
+ /*
* Set the file size for the live system.
*
* !!!
diff --git a/src/third_party/wiredtiger/src/btree/bt_compact.c b/src/third_party/wiredtiger/src/btree/bt_compact.c
index d396f87ab49..42c9d92c21b 100644
--- a/src/third_party/wiredtiger/src/btree/bt_compact.c
+++ b/src/third_party/wiredtiger/src/btree/bt_compact.c
@@ -10,31 +10,19 @@
/*
* __compact_rewrite --
- * Return if a page needs to be re-written.
+ * Return if a modified page needs to be re-written.
*/
static int
__compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
{
WT_BM *bm;
WT_MULTI *multi;
- WT_PAGE *page;
WT_PAGE_MODIFY *mod;
- size_t addr_size;
uint32_t i;
- const uint8_t *addr;
*skipp = true; /* Default skip. */
bm = S2BT(session)->bm;
- page = ref->page;
-
- /* If the page is clean, test the original addresses. */
- if (__wt_page_evict_clean(page)) {
- __wt_ref_info(session, ref, &addr, &addr_size, NULL);
- if (addr == NULL)
- return (0);
- return (bm->compact_page_skip(bm, session, addr, addr_size, skipp));
- }
/*
* If the page is a replacement, test the replacement addresses. Ignore empty pages, they get
@@ -44,7 +32,7 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
* looking at it, so the page modified structure may appear at any time (but cannot disappear).
* We've confirmed there is a page modify structure, it's OK to look at it.
*/
- mod = page->modify;
+ mod = ref->page->modify;
if (mod->rec_result == WT_PM_REC_REPLACE)
return (
bm->compact_page_skip(bm, session, mod->mod_replace.addr, mod->mod_replace.size, skipp));
@@ -63,17 +51,32 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
/*
* __compact_rewrite_lock --
- * Lock out checkpoints and return if a page needs to be re-written.
+ * Return if a page needs to be re-written.
*/
static int
__compact_rewrite_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
{
+ WT_BM *bm;
WT_BTREE *btree;
WT_DECL_RET;
+ size_t addr_size;
+ const uint8_t *addr;
*skipp = true; /* Default skip. */
btree = S2BT(session);
+ bm = btree->bm;
+
+ /*
+ * If the page is clean, test the original addresses. We're holding a hazard pointer on the
+ * page, so we're safe from eviction, no additional locking is required.
+ */
+ if (__wt_page_evict_clean(ref->page)) {
+ __wt_ref_info(session, ref, &addr, &addr_size, NULL);
+ if (addr == NULL)
+ return (0);
+ return (bm->compact_page_skip(bm, session, addr, addr_size, skipp));
+ }
/*
* Reviewing in-memory pages requires looking at page reconciliation results, because we care
@@ -83,8 +86,8 @@ __compact_rewrite_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
*
* There are two ways we call reconciliation: checkpoints and eviction. Get the tree's flush
* lock which blocks threads writing pages for checkpoints. If checkpoint is holding the lock,
- * quit working this file, we'll visit it again in our next pass. We don't have to worry about
- * eviction, we're holding a hazard pointer on the WT_REF, it's not going anywhere.
+ * quit working this file, we'll visit it again in our next pass. As noted above, we're holding
+ * a hazard pointer on the page, we're safe from eviction.
*/
WT_RET(__wt_spin_trylock(session, &btree->flush_lock));
@@ -225,12 +228,12 @@ int
__wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp)
{
WT_BM *bm;
- WT_DECL_RET;
size_t addr_size;
- const uint8_t *addr;
- u_int type;
+ uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE];
+ bool is_leaf;
WT_UNUSED(context);
+
*skipp = false; /* Default to reading */
/*
@@ -254,29 +257,17 @@ __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, void *context, boo
return (0);
/*
- * There's nothing to prevent the WT_REF state from changing underfoot, which can change its
- * address. For example, the WT_REF address might reference an on-page cell, and page eviction
- * can free that memory. Lock the WT_REF so we can look at its address.
- */
- if (!WT_REF_CAS_STATE(session, ref, WT_REF_DISK, WT_REF_LOCKED))
- return (0);
-
- /*
- * The page is on disk, so there had better be an address; assert that fact, test at run-time to
- * avoid the core dump.
- *
* Internal pages must be read to walk the tree; ask the block-manager if it's useful to rewrite
* leaf pages, don't do the I/O if a rewrite won't help.
+ *
+ * There can be NULL WT_REF.addr values, where the underlying call won't return a valid address.
+ * The "it's a leaf page" return is enough to confirm we have a valid address for a leaf page.
*/
- __wt_ref_info(session, ref, &addr, &addr_size, &type);
- WT_ASSERT(session, addr != NULL);
- if (addr != NULL && type != WT_CELL_ADDR_INT) {
+ __wt_ref_info_lock(session, ref, addr, &addr_size, &is_leaf);
+ if (is_leaf) {
bm = S2BT(session)->bm;
- ret = bm->compact_page_skip(bm, session, addr, addr_size, skipp);
+ return (bm->compact_page_skip(bm, session, addr, addr_size, skipp));
}
- /* Reset the WT_REF state. */
- WT_REF_SET_STATE(ref, WT_REF_DISK);
-
- return (ret);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 0f6b569b755..e4f6b17786c 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -473,17 +473,27 @@ __wt_btcur_reset(WT_CURSOR_BTREE *cbt)
* Search and return exact matching records only, including uncommitted ones.
*/
int
-__wt_btcur_search_uncommitted(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
+__wt_btcur_search_uncommitted(WT_CURSOR *cursor, WT_UPDATE **updp)
{
WT_BTREE *btree;
- WT_CURSOR *cursor;
+ WT_CURSOR_BTREE *cbt;
WT_SESSION_IMPL *session;
WT_UPDATE *upd;
+ *updp = NULL;
+
+ cbt = (WT_CURSOR_BTREE *)cursor;
btree = cbt->btree;
- cursor = &cbt->iface;
session = (WT_SESSION_IMPL *)cursor->session;
- *updp = upd = NULL; /* -Wuninitialized */
+ upd = NULL; /* -Wuninitialized */
+
+ /*
+ * Not calling the cursor initialization functions, we don't want to be tapped for eviction nor
+ * do we want other standard cursor semantics like snapshots, just discard the hazard pointer
+ * from the last operation. This also depends on the fact we're not setting the cursor's active
+ * flag, this is really a special chunk of code and not to be modified without careful thought.
+ */
+ WT_RET(__cursor_reset(cbt));
WT_RET(btree->type == BTREE_ROW ? __cursor_row_search(cbt, false, NULL, NULL) :
__cursor_col_search(cbt, NULL, NULL));
diff --git a/src/third_party/wiredtiger/src/btree/bt_import.c b/src/third_party/wiredtiger/src/btree/bt_import.c
index 02f023567f5..9d63a3cc959 100644
--- a/src/third_party/wiredtiger/src/btree/bt_import.c
+++ b/src/third_party/wiredtiger/src/btree/bt_import.c
@@ -91,12 +91,14 @@ __wt_import(WT_SESSION_IMPL *session, const char *uri)
* Build and flatten the metadata and the checkpoint list, then insert it into the metadata for
* this file.
*
- * Strip out the checkpoint-LSN, an imported file isn't associated with any log files. Assign a
+ * Strip out any incremental backup information, an imported file has not been part of a backup.
+ * Strip out the checkpoint LSN, an imported file isn't associated with any log files. Assign a
* unique file ID.
*/
filecfg[1] = a->data;
filecfg[2] = checkpoint_list;
- filecfg[3] = "checkpoint_lsn=";
+ filecfg[3] = "checkpoint_backup_info=";
+ filecfg[4] = "checkpoint_lsn=";
WT_WITH_SCHEMA_LOCK(session,
ret = __wt_snprintf(fileid, sizeof(fileid), "id=%" PRIu32, ++S2C(session)->next_file_id));
WT_ERR(ret);
diff --git a/src/third_party/wiredtiger/src/btree/bt_misc.c b/src/third_party/wiredtiger/src/btree/bt_misc.c
index d06b0b33bf6..4ff9afa3b66 100644
--- a/src/third_party/wiredtiger/src/btree/bt_misc.c
+++ b/src/third_party/wiredtiger/src/btree/bt_misc.c
@@ -85,27 +85,6 @@ __wt_cell_type_string(uint8_t type)
}
/*
- * __wt_page_addr_string --
- * Figure out a page's "address" and load a buffer with a printable, nul-terminated
- * representation of that address.
- */
-const char *
-__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf)
-{
- size_t addr_size;
- const uint8_t *addr;
-
- if (__wt_ref_is_root(ref)) {
- buf->data = "[Root]";
- buf->size = strlen("[Root]");
- return (buf->data);
- }
-
- __wt_ref_info(session, ref, &addr, &addr_size, NULL);
- return (__wt_addr_string(session, addr, addr_size, buf));
-}
-
-/*
* __wt_addr_string --
* Load a buffer with a printable, nul-terminated representation of an address.
*/
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index ddf47a43c2e..e8355692ce2 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -639,9 +639,7 @@ read:
session, true, !F_ISSET(&session->txn, WT_TXN_HAS_ID), NULL));
WT_RET(__page_read(session, ref, flags));
- /*
- * We just read a page, don't evict it before we have a chance to use it.
- */
+ /* We just read a page, don't evict it before we have a chance to use it. */
evict_skip = true;
/*
@@ -713,10 +711,13 @@ read:
__wt_tree_modify_set(session);
/*
- * Check if the page requires forced eviction.
+ * If a page has grown too large, we'll try and forcibly evict it before making it
+ * available to the caller. There are a variety of cases where that's not possible.
+ * Don't involve a thread resolving a transaction in forced eviction, they're usually
+ * making the problem better.
*/
- if (evict_skip || LF_ISSET(WT_READ_NO_SPLIT) || btree->evict_disabled > 0 ||
- btree->lsm_primary)
+ if (evict_skip || F_ISSET(session, WT_SESSION_RESOLVING_TXN) ||
+ LF_ISSET(WT_READ_NO_SPLIT) || btree->evict_disabled > 0 || btree->lsm_primary)
goto skip_evict;
/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index d918b5d856c..2ba005bc096 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -250,13 +250,17 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_scr_alloc(session, 0, &ss->tmp2));
/*
- * Step 1: Inform the underlying block manager that we're salvaging the file.
+ * !!! (Don't format the comment.)
+ * Step 1:
+ * Inform the underlying block manager that we're salvaging the file.
*/
WT_ERR(bm->salvage_start(bm, session));
/*
- * Step 2: Read the file and build in-memory structures that reference any leaf or overflow
- * page. Any pages other than leaf or overflow pages are added to the free list.
+ * !!! (Don't format the comment.)
+ * Step 2:
+ * Read the file and build in-memory structures that reference any leaf or overflow page. Any
+ * pages other than leaf or overflow pages are added to the free list.
*
* Turn off read checksum and verification error messages while we're reading the file, we
* expect to see corrupted blocks.
@@ -267,60 +271,54 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(ret);
/*
+ * !!! (Don't format the comment.)
* Step 3:
- * Discard any page referencing a non-existent overflow page. We do
- * this before checking overlapping key ranges on the grounds that a
- * bad key range we can use is better than a terrific key range that
- * references pages we don't have. On the other hand, we subsequently
- * discard key ranges where there are better overlapping ranges, and
- * it would be better if we let the availability of an overflow value
- * inform our choices as to the key ranges we select, ideally on a
- * per-key basis.
+ * Discard any page referencing a non-existent overflow page. We do this before checking
+ * overlapping key ranges on the grounds that a bad key range we can use is better than a
+ * terrific key range that references pages we don't have. On the other hand, we subsequently
+ * discard key ranges where there are better overlapping ranges, and it would be better if
+ * we let the availability of an overflow value inform our choices as to the key ranges we
+ * select, ideally on a per-key basis.
*
- * A complicating problem is found in variable-length column-store
- * objects, where we potentially split key ranges within RLE units.
- * For example, if there's a page with rows 15-20 and we later find
- * row 17 with a larger LSN, the range splits into 3 chunks, 15-16,
- * 17, and 18-20. If rows 15-20 were originally a single value (an
- * RLE of 6), and that record is an overflow record, we end up with
- * two chunks, both of which want to reference the same overflow value.
+ * A complicating problem is found in variable-length column-store objects, where we
+ * potentially split key ranges within RLE units. For example, if there's a page with rows
+ * 15-20 and we later find row 17 with a larger LSN, the range splits into 3 chunks, 15-16,
+ * 17, and 18-20. If rows 15-20 were originally a single value (an RLE of 6), and that
+ * record is an overflow record, we end up with two chunks, both of which want to reference
+ * the same overflow value.
*
- * Instead of the approach just described, we're first discarding any
- * pages referencing non-existent overflow pages, then we're reviewing
- * our key ranges and discarding any that overlap. We're doing it that
- * way for a few reasons: absent corruption, missing overflow items are
- * strong arguments the page was replaced (on the other hand, some kind
- * of file corruption is probably why we're here); it's a significant
- * amount of additional complexity to simultaneously juggle overlapping
- * ranges and missing overflow items; finally, real-world applications
- * usually don't have a lot of overflow items, as WiredTiger supports
+ * Instead of the approach just described, we're first discarding any pages referencing
+ * non-existent overflow pages, then we're reviewing our key ranges and discarding any
+ * that overlap. We're doing it that way for a few reasons: absent corruption, missing
+ * overflow items are strong arguments the page was replaced (on the other hand, some kind
+ * of file corruption is probably why we're here); it's a significant amount of additional
+ * complexity to simultaneously juggle overlapping ranges and missing overflow items; finally,
+ * real-world applications usually don't have a lot of overflow items, as WiredTiger supports
* very large page sizes, overflow items shouldn't be common.
*
* Step 4:
- * Add unreferenced overflow page blocks to the free list so they are
- * reused immediately.
+ * Add unreferenced overflow page blocks to the free list so they are reused immediately.
*/
WT_ERR(__slvg_ovfl_reconcile(session, ss));
WT_ERR(__slvg_ovfl_discard(session, ss));
/*
+ * !!! (Don't format the comment.)
* Step 5:
- * Walk the list of pages looking for overlapping ranges to resolve.
- * If we find a range that needs to be resolved, set a global flag
- * and a per WT_TRACK flag on the pages requiring modification.
+ * Walk the list of pages looking for overlapping ranges to resolve. If we find a range
+ * that needs to be resolved, set a global flag and a per WT_TRACK flag on the pages requiring
+ * modification.
*
* This requires sorting the page list by key, and secondarily by LSN.
*
* !!!
- * It's vanishingly unlikely and probably impossible for fixed-length
- * column-store files to have overlapping key ranges. It's possible
- * for an entire key range to go missing (if a page is corrupted and
- * lost), but because pages can't split, it shouldn't be possible to
- * find pages where the key ranges overlap. That said, we check for
- * it and clean up after it in reconciliation because it doesn't cost
- * much and future column-store formats or operations might allow for
- * fixed-length format ranges to overlap during salvage, and I don't
- * want to have to retrofit the code later.
+ * It's vanishingly unlikely and probably impossible for fixed-length column-store files
+ * to have overlapping key ranges. It's possible for an entire key range to go missing (if
+ * a page is corrupted and lost), but because pages can't split, it shouldn't be possible to
+ * find pages where the key ranges overlap. That said, we check for it and clean up after
+ * it in reconciliation because it doesn't cost much and future column-store formats or
+ * operations might allow for fixed-length format ranges to overlap during salvage, and I
+ * don't want to have to retrofit the code later.
*/
__wt_qsort(ss->pages, (size_t)ss->pages_next, sizeof(WT_TRACK *), __slvg_trk_compare_key);
if (ss->page_type == WT_PAGE_ROW_LEAF)
@@ -329,8 +327,10 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__slvg_col_range(session, ss));
/*
- * Step 6: We may have lost key ranges in column-store databases, that is, some part of the
- * record number space is gone; look for missing ranges.
+ * !!! (Don't format the comment.)
+ * Step 6:
+ * We may have lost key ranges in column-store databases, that is, some part of the record
+ * number space is gone; look for missing ranges.
*/
switch (ss->page_type) {
case WT_PAGE_COL_FIX:
@@ -342,8 +342,10 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[])
}
/*
- * Step 7: Build an internal page that references all of the leaf pages, and write it, as well
- * as any merged pages, to the file.
+ * !!! (Don't format the comment.)
+ * Step 7:
+ * Build an internal page that references all of the leaf pages, and write it, as well as any
+ * merged pages, to the file.
*
* Count how many leaf pages we have (we could track this during the array shuffling/splitting,
* but that's a lot harder).
@@ -365,25 +367,31 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[])
}
/*
- * Step 8: If we had to merge key ranges, we have to do a final pass through the leaf page array
- * and discard file pages used during key merges. We can't do it earlier: if we free'd the leaf
- * pages we're merging as we merged them, the write of subsequent leaf pages or the internal
- * page might allocate those free'd file blocks, and if the salvage run subsequently fails, we'd
- * have overwritten pages used to construct the final key range. In other words, if the salvage
- * run fails, we don't want to overwrite data the next salvage run might need.
+ * !!! (Don't format the comment.)
+ * Step 8:
+ * If we had to merge key ranges, we have to do a final pass through the leaf page array
+ * and discard file pages used during key merges. We can't do it earlier: if we free'd the
+ * leaf pages we're merging as we merged them, the write of subsequent leaf pages or the
+ * internal page might allocate those free'd file blocks, and if the salvage run subsequently
+ * fails, we'd have overwritten pages used to construct the final key range. In other words,
+ * if the salvage run fails, we don't want to overwrite data the next salvage run might need.
*/
if (ss->merge_free)
WT_ERR(__slvg_merge_block_free(session, ss));
/*
- * Step 9: Evict any newly created root page, creating a checkpoint.
+ * !!! (Don't format the comment.)
+ * Step 9:
+ * Evict any newly created root page, creating a checkpoint.
*/
WT_ERR(__slvg_checkpoint(session, &ss->root_ref));
-/*
- * Step 10: Inform the underlying block manager that we're done.
- */
err:
+ /*
+ * !!! (Don't format the comment.)
+ * Step 10:
+ * Inform the underlying block manager that we're done.
+ */
WT_TRET(bm->salvage_end(bm, session));
/* Discard any root page we created. */
@@ -714,40 +722,89 @@ __slvg_trk_leaf_ovfl(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_TRA
}
/*
+ * !!! (Don't format the comment.)
+ * When pages split, the key range is split across multiple pages. If not all
+ * of the old versions of the page are overwritten, or not all of the new pages
+ * are written, or some of the pages are corrupted, salvage will read different
+ * pages with overlapping key ranges, at different LSNs.
+ *
+ * We salvage all of the key ranges we find, at the latest LSN value: this means
+ * we may resurrect pages of deleted items, as page deletion doesn't write leaf
+ * pages and salvage will read and instantiate the contents of an old version of
+ * the deleted page.
+ *
+ * The leaf page array is sorted in key order, and secondarily on LSN: what this
+ * means is that for each new key range, the first page we find is the best page
+ * for that key. The process is to walk forward from each page until we reach a
+ * page with a starting key after the current page's stopping key.
+ *
+ * For each of page, check to see if they overlap the current page's key range.
+ * If they do, resolve the overlap. Because WiredTiger rarely splits pages,
+ * overlap resolution usually means discarding a page because the key ranges
+ * are the same, and one of the pages is simply an old version of the other.
+ *
+ * However, it's possible more complex resolution is necessary. For example,
+ * here's an improbably complex list of page ranges and LSNs:
+ *
+ * Page Range LSN
+ * 30 A-G 3
+ * 31 C-D 4
+ * 32 B-C 5
+ * 33 C-F 6
+ * 34 C-D 7
+ * 35 F-M 8
+ * 36 H-O 9
+ *
+ * We walk forward from each page reviewing all other pages in the array that
+ * overlap the range. For each overlap, the current or the overlapping
+ * page is updated so the page with the most recent information for any range
+ * "owns" that range. Here's an example for page 30.
+ *
+ * Review page 31: because page 31 has the range C-D and a higher LSN than page
+ * 30, page 30 would "split" into two ranges, A-C and E-G, conceding the C-D
+ * range to page 31. The new track element would be inserted into array with
+ * the following result:
+ *
+ * Page Range LSN
+ * 30 A-C 3 << Changed WT_TRACK element
+ * 31 C-D 4
+ * 32 B-C 5
+ * 33 C-F 6
+ * 34 C-D 7
+ * 30 E-G 3 << New WT_TRACK element
+ * 35 F-M 8
+ * 36 H-O 9
+ *
+ * Continue the review of the first element, using its new values.
+ *
+ * Review page 32: because page 31 has the range B-C and a higher LSN than page
+ * 30, page 30's A-C range would be truncated, conceding the B-C range to page
+ * 32.
+ * 30 A-B 3
+ * E-G 3
+ * 31 C-D 4
+ * 32 B-C 5
+ * 33 C-F 6
+ * 34 C-D 7
+ *
+ * Review page 33: because page 33 has a starting key (C) past page 30's ending
+ * key (B), we stop evaluating page 30's A-B range, as there can be no further
+ * overlaps.
+ *
+ * This process is repeated for each page in the array.
+ *
+ * When page 33 is processed, we'd discover that page 33's C-F range overlaps
+ * page 30's E-G range, and page 30's E-G range would be updated, conceding the
+ * E-F range to page 33.
+ *
+ * This is not computationally expensive because we don't walk far forward in
+ * the leaf array because it's sorted by starting key, and because WiredTiger
+ * splits are rare, the chance of finding the kind of range overlap requiring
+ * re-sorting the array is small.
+ */
+/*
* __slvg_col_range --
- * Figure out the leaf pages we need and free the leaf pages we don't. When pages split, the key
- * range is split across multiple pages. If not all of the old versions of the page are
- * overwritten, or not all of the new pages are written, or some of the pages are corrupted,
- * salvage will read different pages with overlapping key ranges, at different LSNs. We salvage
- * all of the key ranges we find, at the latest LSN value: this means we may resurrect pages of
- * deleted items, as page deletion doesn't write leaf pages and salvage will read and
- * instantiate the contents of an old version of the deleted page. The leaf page array is sorted
- * in key order, and secondarily on LSN: what this means is that for each new key range, the
- * first page we find is the best page for that key. The process is to walk forward from each
- * page until we reach a page with a starting key after the current page's stopping key. For
- * each of page, check to see if they overlap the current page's key range. If they do, resolve
- * the overlap. Because WiredTiger rarely splits pages, overlap resolution usually means
- * discarding a page because the key ranges are the same, and one of the pages is simply an old
- * version of the other. However, it's possible more complex resolution is necessary. For
- * example, here's an improbably complex list of page ranges and LSNs: Page Range LSN 30 A-G 3
- * 31 C-D 4 32 B-C 5 33 C-F 6 34 C-D 7 35 F-M 8 36 H-O 9 We walk forward from each page
- * reviewing all other pages in the array that overlap the range. For each overlap, the current
- * or the overlapping page is updated so the page with the most recent information for any range
- * "owns" that range. Here's an example for page 30. Review page 31: because page 31 has the
- * range C-D and a higher LSN than page 30, page 30 would "split" into two ranges, A-C and E-G,
- * conceding the C-D range to page 31. The new track element would be inserted into array with
- * the following result: Page Range LSN 30 A-C 3 << Changed WT_TRACK element 31 C-D 4 32 B-C 5
- * 33 C-F 6 34 C-D 7 30 E-G 3 << New WT_TRACK element 35 F-M 8 36 H-O 9 Continue the review of
- * the first element, using its new values. Review page 32: because page 31 has the range B-C
- * and a higher LSN than page 30, page 30's A-C range would be truncated, conceding the B-C
- * range to page 32. 30 A-B 3 E-G 3 31 C-D 4 32 B-C 5 33 C-F 6 34 C-D 7 Review page 33: because
- * page 33 has a starting key (C) past page 30's ending key (B), we stop evaluating page 30's
- * A-B range, as there can be no further overlaps. This process is repeated for each page in the
- * array. When page 33 is processed, we'd discover that page 33's C-F range overlaps page 30's
- * E-G range, and page 30's E-G range would be updated, conceding the E-F range to page 33. This
- * is not computationally expensive because we don't walk far forward in the leaf array because
- * it's sorted by starting key, and because WiredTiger splits are rare, the chance of finding
- * the kind of range overlap requiring re-sorting the array is small.
+ * Figure out the leaf pages we need and free the leaf pages we don't.
*/
static int
__slvg_col_range(WT_SESSION_IMPL *session, WT_STUFF *ss)
@@ -820,6 +877,7 @@ __slvg_col_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s
__wt_addr_string(session, b_trk->trk_addr, b_trk->trk_addr_size, ss->tmp2));
/*
+ * !!! (Don't format the comment.)
* The key ranges of two WT_TRACK pages in the array overlap -- choose
* the ranges we're going to take from each.
*
@@ -919,15 +977,12 @@ __slvg_col_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s
if (a_trk->trk_gen > b_trk->trk_gen) {
delete_b:
/*
- * After page and overflow reconciliation, one (and only one)
- * page can reference an overflow record. But, if we split a
- * page into multiple chunks, any of the chunks might own any
- * of the backing overflow records, so overflow records won't
- * normally be discarded until after the merge phase completes.
- * (The merge phase is where the final pages are written, and
- * we figure out which overflow records are actually used.)
- * If freeing a chunk and there are no other references to the
- * underlying shared information, the overflow records must be
+ * After page and overflow reconciliation, one (and only one) page can reference an overflow
+ * record. But, if we split a page into multiple chunks, any of the chunks might own any of
+ * the backing overflow records, so overflow records won't normally be discarded until after
+ * the merge phase completes. (The merge phase is where the final pages are written, and we
+ * figure out which overflow records are actually used.) If freeing a chunk and there are no
+ * other references to the underlying shared information, the overflow records must be
* useless, discard them to keep the final file size small.
*/
if (b_trk->shared->ref == 1)
@@ -1009,11 +1064,11 @@ __slvg_col_trk_update_start(uint32_t slot, WT_STUFF *ss)
* longer be in the right location.
*
* For example, imagine page #1 has the key range 30-50, it split, and
- * we wrote page #2 with key range 30-40, and page #3 key range with
- * 40-50, where pages #2 and #3 have larger LSNs than page #1. When the
+ * we wrote page #2 with key range 30-40, and page #3 key range with 40-50, where pages #2 and
+ * #3 have larger LSNs than page #1. When the
* key ranges were sorted, page #2 came first, then page #1 (because of
- * their earlier start keys than page #3), and page #2 came before page
- * #1 because of its LSN. When we resolve the overlap between page #2
+ * their earlier start keys than page #3), and page #2 came before page #1 because of its LSN.
+ * When we resolve the overlap between page #2
* and page #1, we truncate the initial key range of page #1, and it now
* sorts after page #3, because it has the same starting key of 40, and
* a lower LSN.
@@ -1124,15 +1179,12 @@ __slvg_col_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF
WT_REF_SET_STATE(ref, WT_REF_DISK);
/*
- * If the page's key range is unmodified from when we read it
- * (in other words, we didn't merge part of this page with
- * another page), we can use the page without change, and the
- * only thing we need to do is mark all overflow records the
- * page references as in-use.
+ * If the page's key range is unmodified from when we read it (in other words, we didn't
+ * merge part of this page with another page), we can use the page without change, and the
+ * only thing we need to do is mark all overflow records the page references as in-use.
*
- * If we did merge with another page, we have to build a page
- * reflecting the updated key range. Note, that requires an
- * additional pass to free the merge page's backing blocks.
+ * If we did merge with another page, we have to build a page reflecting the updated key
+ * range. Note, that requires an additional pass to free the merge page's backing blocks.
*/
if (F_ISSET(trk, WT_TRACK_MERGE)) {
ss->merge_free = true;
@@ -1210,13 +1262,11 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
}
/*
- * We can't discard the original blocks associated with this page now.
- * (The problem is we don't want to overwrite any original information
- * until the salvage run succeeds -- if we free the blocks now, the next
- * merge page we write might allocate those blocks and overwrite them,
- * and should the salvage run eventually fail, the original information
- * would have been lost.) Clear the reference addr so eviction doesn't
- * free the underlying blocks.
+ * We can't discard the original blocks associated with this page now. (The problem is we don't
+ * want to overwrite any original information until the salvage run succeeds -- if we free the
+ * blocks now, the next merge page we write might allocate those blocks and overwrite them, and
+ * should the salvage run eventually fail, the original information would have been lost.) Clear
+ * the reference addr so eviction doesn't free the underlying blocks.
*/
__wt_ref_addr_free(session, ref);
@@ -1410,8 +1460,9 @@ __slvg_row_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s
__wt_addr_string(session, b_trk->trk_addr, b_trk->trk_addr_size, ss->tmp2));
/*
- * The key ranges of two WT_TRACK pages in the array overlap -- choose
- * the ranges we're going to take from each.
+ * !!! (Don't format the comment.)
+ * The key ranges of two WT_TRACK pages in the array overlap -- choose the ranges we're going to
+ * take from each.
*
* We can think of the overlap possibilities as 11 different cases:
*
@@ -1432,11 +1483,11 @@ __slvg_row_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s
* #10 AAAAAA A is middle of B
* #11 AAAAAAAAAA A is a suffix of B
*
- * Note the leaf page array was sorted by key and a_trk appears earlier
- * in the array than b_trk, so cases #2/8, #10 and #11 are impossible.
+ * Note the leaf page array was sorted by key and a_trk appears earlier in the array than b_trk, so
+ * cases #2/8, #10 and #11 are impossible.
*
- * Finally, there's one additional complicating factor -- final ranges
- * are assigned based on the page's LSN.
+ * Finally, there's one additional complicating factor -- final ranges are assigned based on the
+ * page's LSN.
*/
#define A_TRK_START (&a_trk->row_start)
#define A_TRK_STOP (&a_trk->row_stop)
@@ -1514,15 +1565,12 @@ __slvg_row_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s
if (a_trk->trk_gen > b_trk->trk_gen) {
delete_b:
/*
- * After page and overflow reconciliation, one (and only one)
- * page can reference an overflow record. But, if we split a
- * page into multiple chunks, any of the chunks might own any
- * of the backing overflow records, so overflow records won't
- * normally be discarded until after the merge phase completes.
- * (The merge phase is where the final pages are written, and
- * we figure out which overflow records are actually used.)
- * If freeing a chunk and there are no other references to the
- * underlying shared information, the overflow records must be
+ * After page and overflow reconciliation, one (and only one) page can reference an overflow
+ * record. But, if we split a page into multiple chunks, any of the chunks might own any of
+ * the backing overflow records, so overflow records won't normally be discarded until after
+ * the merge phase completes. (The merge phase is where the final pages are written, and we
+ * figure out which overflow records are actually used.) If freeing a chunk and there are no
+ * other references to the underlying shared information, the overflow records must be
* useless, discard them to keep the final file size small.
*/
if (b_trk->shared->ref == 1)
@@ -1617,11 +1665,11 @@ __slvg_row_trk_update_start(WT_SESSION_IMPL *session, WT_ITEM *stop, uint32_t sl
* longer be in the right location.
*
* For example, imagine page #1 has the key range 30-50, it split, and
- * we wrote page #2 with key range 30-40, and page #3 key range with
- * 40-50, where pages #2 and #3 have larger LSNs than page #1. When the
+ * we wrote page #2 with key range 30-40, and page #3 key range with 40-50, where pages #2 and
+ * #3 have larger LSNs than page #1. When the
* key ranges were sorted, page #2 came first, then page #1 (because of
- * their earlier start keys than page #3), and page #2 came before page
- * #1 because of its LSN. When we resolve the overlap between page #2
+ * their earlier start keys than page #3), and page #2 came before page #1 because of its LSN.
+ * When we resolve the overlap between page #2
* and page #1, we truncate the initial key range of page #1, and it now
* sorts after page #3, because it has the same starting key of 40, and
* a lower LSN.
@@ -1701,8 +1749,10 @@ __slvg_row_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF
WT_REF *ref, **refp;
WT_TRACK *trk;
uint32_t i;
+ u_int decr_cnt;
addr = NULL;
+ decr_cnt = 0;
/* Allocate a row-store root (internal) page and fill it in. */
WT_RET(__wt_page_alloc(session, WT_PAGE_ROW_INT, leaf_cnt, true, &page));
@@ -1736,15 +1786,12 @@ __slvg_row_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF
WT_REF_SET_STATE(ref, WT_REF_DISK);
/*
- * If the page's key range is unmodified from when we read it
- * (in other words, we didn't merge part of this page with
- * another page), we can use the page without change, and the
- * only thing we need to do is mark all overflow records the
- * page references as in-use.
+ * If the page's key range is unmodified from when we read it (in other words, we didn't
+ * merge part of this page with another page), we can use the page without change, and the
+ * only thing we need to do is mark all overflow records the page references as in-use.
*
- * If we did merge with another page, we have to build a page
- * reflecting the updated key range. Note, that requires an
- * additional pass to free the merge page's backing blocks.
+ * If we did merge with another page, we have to build a page reflecting the updated key
+ * range. Note, that requires an additional pass to free the merge page's backing blocks.
*/
if (F_ISSET(trk, WT_TRACK_MERGE)) {
ss->merge_free = true;
@@ -1757,13 +1804,30 @@ __slvg_row_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF
WT_ERR(__slvg_ovfl_ref_all(session, trk));
}
++ref;
+
+ /*
+ * !!!
+ * There's a risk the page we're building is too large for the cache. The right fix would be
+ * to write the keys out to an on-disk file and delay allocating the page image until we're
+ * ready to reconcile the new root page, and then read keys in from that backing file during
+ * the reconciliation of the root page. For now, make sure the eviction threads don't see us
+ * as a threat.
+ */
+ if (page->memory_footprint > WT_MEGABYTE) {
+ ++decr_cnt;
+ __wt_cache_page_inmem_decr(session, page, WT_MEGABYTE);
+ }
}
+ if (decr_cnt != 0)
+ __wt_cache_page_inmem_incr(session, page, decr_cnt * WT_MEGABYTE);
__wt_root_ref_init(session, &ss->root_ref, page, false);
if (0) {
err:
__wt_free(session, addr);
+ if (decr_cnt != 0)
+ __wt_cache_page_inmem_incr(session, page, decr_cnt * WT_MEGABYTE);
__wt_page_out(session, &page);
}
return (ret);
@@ -1866,13 +1930,11 @@ __slvg_row_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref, WT_S
cookie->skip = skip_start;
/*
- * We can't discard the original blocks associated with this page now.
- * (The problem is we don't want to overwrite any original information
- * until the salvage run succeeds -- if we free the blocks now, the next
- * merge page we write might allocate those blocks and overwrite them,
- * and should the salvage run eventually fail, the original information
- * would have been lost.) Clear the reference addr so eviction doesn't
- * free the underlying blocks.
+ * We can't discard the original blocks associated with this page now. (The problem is we don't
+ * want to overwrite any original information until the salvage run succeeds -- if we free the
+ * blocks now, the next merge page we write might allocate those blocks and overwrite them, and
+ * should the salvage run eventually fail, the original information would have been lost.) Clear
+ * the reference addr so eviction doesn't free the underlying blocks.
*/
__wt_ref_addr_free(session, ref);
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 4212d820e60..bba9f370682 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -778,7 +778,13 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, uint32_t
*/
for (i = 0, deleted_refs = scr->mem; i < deleted_entries; ++i) {
next_ref = pindex->index[deleted_refs[i]];
- WT_ASSERT(session, next_ref->state == WT_REF_SPLIT);
+#ifdef HAVE_DIAGNOSTIC
+ {
+ uint32_t ref_state;
+ WT_ORDERED_READ(ref_state, next_ref->state);
+ WT_ASSERT(session, ref_state == WT_REF_LOCKED || ref_state == WT_REF_SPLIT);
+ }
+#endif
/*
* We set the WT_REF to split, discard it, freeing any resources it holds.
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index 175a4010db0..2c05b742ec8 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -303,6 +303,27 @@ __verify_checkpoint_reset(WT_VSTUFF *vs)
}
/*
+ * __verify_addr_string --
+ * Figure out a page's "address" and load a buffer with a printable, nul-terminated
+ * representation of that address.
+ */
+static const char *
+__verify_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf)
+{
+ size_t addr_size;
+ const uint8_t *addr;
+
+ if (__wt_ref_is_root(ref)) {
+ buf->data = "[Root]";
+ buf->size = strlen("[Root]");
+ return (buf->data);
+ }
+
+ __wt_ref_info(session, ref, &addr, &addr_size, NULL);
+ return (__wt_addr_string(session, addr, addr_size, buf));
+}
+
+/*
* __verify_addr_ts --
* Check an address block's timestamps.
*/
@@ -315,26 +336,26 @@ __verify_addr_ts(WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *unpack,
WT_RET_MSG(session, WT_ERROR,
"internal page reference at %s has a newest stop "
"timestamp of 0",
- __wt_page_addr_string(session, ref, vs->tmp1));
+ __verify_addr_string(session, ref, vs->tmp1));
if (unpack->oldest_start_ts > unpack->newest_stop_ts)
WT_RET_MSG(session, WT_ERROR,
"internal page reference at %s has an oldest start "
"timestamp %s newer than its newest stop timestamp %s",
- __wt_page_addr_string(session, ref, vs->tmp1),
+ __verify_addr_string(session, ref, vs->tmp1),
__wt_timestamp_to_string(unpack->oldest_start_ts, ts_string[0]),
__wt_timestamp_to_string(unpack->newest_stop_ts, ts_string[1]));
if (unpack->newest_stop_txn == WT_TXN_NONE)
WT_RET_MSG(session, WT_ERROR,
"internal page reference at %s has a newest stop "
"transaction of 0",
- __wt_page_addr_string(session, ref, vs->tmp1));
+ __verify_addr_string(session, ref, vs->tmp1));
if (unpack->oldest_start_txn > unpack->newest_stop_txn)
WT_RET_MSG(session, WT_ERROR,
"internal page reference at %s has an oldest start "
"transaction (%" PRIu64
") newer than its newest stop "
"transaction (%" PRIu64 ")",
- __wt_page_addr_string(session, ref, vs->tmp1), unpack->oldest_start_txn,
+ __verify_addr_string(session, ref, vs->tmp1), unpack->oldest_start_txn,
unpack->newest_stop_txn);
return (0);
}
@@ -363,12 +384,12 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *addr_unpack
unpack = &_unpack;
- __wt_verbose(session, WT_VERB_VERIFY, "%s %s", __wt_page_addr_string(session, ref, vs->tmp1),
+ __wt_verbose(session, WT_VERB_VERIFY, "%s %s", __verify_addr_string(session, ref, vs->tmp1),
__wt_page_type_string(page->type));
/* Optionally dump the address. */
if (vs->dump_address)
- WT_RET(__wt_msg(session, "%s %s", __wt_page_addr_string(session, ref, vs->tmp1),
+ WT_RET(__wt_msg(session, "%s %s", __verify_addr_string(session, ref, vs->tmp1),
__wt_page_type_string(page->type)));
/* Track the shape of the tree. */
@@ -427,7 +448,7 @@ recno_chk:
if (recno != vs->record_total + 1)
WT_RET_MSG(session, WT_ERROR, "page at %s has a starting record of %" PRIu64
" when the expected starting record is %" PRIu64,
- __wt_page_addr_string(session, ref, vs->tmp1), recno, vs->record_total + 1);
+ __verify_addr_string(session, ref, vs->tmp1), recno, vs->record_total + 1);
break;
}
switch (page->type) {
@@ -477,7 +498,7 @@ celltype_err:
WT_RET_MSG(session, WT_ERROR,
"page at %s, of type %s, is referenced in "
"its parent by a cell of type %s",
- __wt_page_addr_string(session, ref, vs->tmp1), __wt_page_type_string(page->type),
+ __verify_addr_string(session, ref, vs->tmp1), __wt_page_type_string(page->type),
__wt_cell_type_string(addr_unpack->raw));
break;
}
@@ -512,7 +533,7 @@ celltype_err:
"%s is %" PRIu64
" and the expected "
"starting record number is %" PRIu64,
- entry, __wt_page_addr_string(session, child_ref, vs->tmp1), child_ref->ref_recno,
+ entry, __verify_addr_string(session, child_ref, vs->tmp1), child_ref->ref_recno,
vs->record_total + 1);
}
@@ -594,13 +615,13 @@ __verify_row_int_key_order(
" on the page at %s "
"sorts before the last key appearing on page %s, earlier "
"in the tree: %s, %s",
- entry, __wt_page_addr_string(session, ref, vs->tmp1), (char *)vs->max_addr->data,
+ entry, __verify_addr_string(session, ref, vs->tmp1), (char *)vs->max_addr->data,
__wt_buf_set_printable(session, item.data, item.size, vs->tmp2),
__wt_buf_set_printable(session, vs->max_key->data, vs->max_key->size, vs->tmp3));
/* Update the largest key we've seen to the key just checked. */
WT_RET(__wt_buf_set(session, vs->max_key, item.data, item.size));
- WT_IGNORE_RET_PTR(__wt_page_addr_string(session, ref, vs->max_addr));
+ WT_IGNORE_RET_PTR(__verify_addr_string(session, ref, vs->max_addr));
return (0);
}
@@ -647,14 +668,14 @@ __verify_row_leaf_key_order(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs
"the first key on the page at %s sorts equal to "
"or less than the last key appearing on the page "
"at %s, earlier in the tree: %s, %s",
- __wt_page_addr_string(session, ref, vs->tmp2), (char *)vs->max_addr->data,
+ __verify_addr_string(session, ref, vs->tmp2), (char *)vs->max_addr->data,
__wt_buf_set_printable(session, vs->tmp1->data, vs->tmp1->size, vs->tmp3),
__wt_buf_set_printable(session, vs->max_key->data, vs->max_key->size, vs->tmp4));
}
/* Update the largest key we've seen to the last key on this page. */
WT_RET(__wt_row_leaf_key_copy(session, page, page->pg_row + (page->entries - 1), vs->max_key));
- WT_IGNORE_RET_PTR(__wt_page_addr_string(session, ref, vs->max_addr));
+ WT_IGNORE_RET_PTR(__verify_addr_string(session, ref, vs->max_addr));
return (0);
}
@@ -728,7 +749,7 @@ __verify_ts_addr_cmp(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t cell_num, c
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s failed verification with %s "
"timestamp of %s, %s the parent's %s timestamp of %s",
- cell_num, __wt_page_addr_string(session, ref, vs->tmp1), ts1_name, ts1_bp,
+ cell_num, __verify_addr_string(session, ref, vs->tmp1), ts1_name, ts1_bp,
gt ? "less than" : "greater than", ts2_name, ts2_bp);
}
@@ -751,7 +772,7 @@ __verify_txn_addr_cmp(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t cell_num,
"transaction of %" PRIu64
", %s the parent's %s transaction of "
"%" PRIu64,
- cell_num, __wt_page_addr_string(session, ref, vs->tmp1), txn1_name, txn1,
+ cell_num, __verify_addr_string(session, ref, vs->tmp1), txn1_name, txn1,
gt ? "less than" : "greater than", txn2_name, txn2);
}
@@ -794,7 +815,7 @@ __verify_page_cell(
" on page at %s references "
"an overflow item at %s that failed "
"verification",
- cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1),
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
__wt_addr_string(session, unpack.data, unpack.size, vs->tmp2));
break;
}
@@ -812,18 +833,18 @@ __verify_page_cell(
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has a "
"newest stop timestamp of 0",
- cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1));
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1));
if (unpack.newest_stop_txn == WT_TXN_NONE)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has a "
"newest stop transaction of 0",
- cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1));
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1));
if (unpack.oldest_start_ts > unpack.newest_stop_ts)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has an "
"oldest start timestamp %s newer than "
"its newest stop timestamp %s",
- cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1),
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
__wt_timestamp_to_string(unpack.oldest_start_ts, ts_string[0]),
__wt_timestamp_to_string(unpack.newest_stop_ts, ts_string[1]));
if (unpack.oldest_start_txn > unpack.newest_stop_txn) {
@@ -833,7 +854,7 @@ __verify_page_cell(
") "
"newer than its newest stop transaction "
"(%" PRIu64 ")",
- cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1),
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
unpack.oldest_start_txn, unpack.newest_stop_txn);
}
@@ -858,27 +879,27 @@ __verify_page_cell(
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has a stop "
"timestamp of 0",
- cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1));
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1));
if (unpack.start_ts > unpack.stop_ts)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has a "
"start timestamp %s newer than its stop "
"timestamp %s",
- cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1),
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
__wt_timestamp_to_string(unpack.start_ts, ts_string[0]),
__wt_timestamp_to_string(unpack.stop_ts, ts_string[1]));
if (unpack.stop_txn == WT_TXN_NONE)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has a stop "
"transaction of 0",
- cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1));
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1));
if (unpack.start_txn > unpack.stop_txn)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has a "
"start transaction %" PRIu64
"newer than "
"its stop transaction %" PRIu64,
- cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1), unpack.start_txn,
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1), unpack.start_txn,
unpack.stop_txn);
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start", unpack.start_ts,
@@ -903,7 +924,7 @@ __verify_page_cell(
WT_RET_MSG(session, WT_ERROR,
"page at %s, of type %s and referenced in its parent by a "
"cell of type %s, contains overflow items",
- __wt_page_addr_string(session, ref, vs->tmp1), __wt_page_type_string(ref->page->type),
+ __verify_addr_string(session, ref, vs->tmp1), __wt_page_type_string(ref->page->type),
__wt_cell_type_string(addr_unpack->raw));
return (0);
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index 22b3b717fe0..252b245a8dc 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -79,16 +79,10 @@ found:
static inline bool
__ref_is_leaf(WT_SESSION_IMPL *session, WT_REF *ref)
{
- size_t addr_size;
- const uint8_t *addr;
- u_int type;
+ bool is_leaf;
- /*
- * If the page has a disk address, we can crack it to figure out if this page is a leaf page or
- * not. If there's no address, the page isn't on disk and we don't know the page type.
- */
- __wt_ref_info(session, ref, &addr, &addr_size, &type);
- return (addr == NULL ? false : type == WT_CELL_ADDR_LEAF || type == WT_CELL_ADDR_LEAF_NO);
+ __wt_ref_info_lock(session, ref, NULL, NULL, &is_leaf);
+ return (is_leaf);
}
/*
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 629c5316e53..bba4a9b914b 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -415,6 +415,7 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = {
{"block_allocation", "string", NULL, "choices=[\"first\",\"best\"]", NULL, 0},
{"block_compressor", "string", NULL, NULL, NULL, 0},
{"cache_resident", "boolean", NULL, NULL, NULL, 0}, {"checkpoint", "string", NULL, NULL, NULL, 0},
+ {"checkpoint_backup_info", "string", NULL, NULL, NULL, 0},
{"checkpoint_lsn", "string", NULL, NULL, NULL, 0},
{"checksum", "string", NULL, "choices=[\"on\",\"off\",\"uncompressed\"]", NULL, 0},
{"collator", "string", NULL, NULL, NULL, 0}, {"columns", "list", NULL, NULL, NULL, 0},
@@ -941,10 +942,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
"assert=(commit_timestamp=none,durable_timestamp=none,"
"read_timestamp=none),block_allocation=best,block_compressor=,"
- "cache_resident=false,checkpoint=,checkpoint_lsn=,"
- "checksum=uncompressed,collator=,columns=,dictionary=0,"
- "encryption=(keyid=,name=),format=btree,huffman_key=,"
- "huffman_value=,id=,ignore_in_memory_cache_size=false,"
+ "cache_resident=false,checkpoint=,checkpoint_backup_info=,"
+ "checkpoint_lsn=,checksum=uncompressed,collator=,columns=,"
+ "dictionary=0,encryption=(keyid=,name=),format=btree,huffman_key="
+ ",huffman_value=,id=,ignore_in_memory_cache_size=false,"
"internal_item_max=0,internal_key_max=0,"
"internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
"key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
@@ -953,7 +954,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"prefix_compression=false,prefix_compression_min=4,"
"split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
"value_format=u,version=(major=0,minor=0)",
- confchk_file_meta, 41},
+ confchk_file_meta, 42},
{"index.meta",
"app_metadata=,collator=,columns=,extractor=,immutable=false,"
"index_key_columns=,key_format=u,source=,type=file,value_format=u",
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index ace1505e6dd..fcf46b48f95 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -2671,6 +2671,12 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, const char *c
WT_ERR(__wt_metadata_cursor(session, NULL));
+ /*
+ * Load any incremental backup information. This reads the metadata so must be done after the
+ * turtle file is initialized.
+ */
+ WT_ERR(__wt_backup_open(session));
+
/* Start the worker threads and run recovery. */
WT_ERR(__wt_connection_workers(session, cfg));
diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
index c947954e75c..1d79064186c 100644
--- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
@@ -38,7 +38,7 @@ __conn_dhandle_config_set(WT_SESSION_IMPL *session)
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
char *metaconf, *tmp;
- const char *base, *cfg[3];
+ const char *base, *cfg[4];
dhandle = session->dhandle;
base = NULL;
@@ -68,30 +68,32 @@ __conn_dhandle_config_set(WT_SESSION_IMPL *session)
switch (dhandle->type) {
case WT_DHANDLE_TYPE_BTREE:
/*
- * We are stripping out the checkpoint and checkpoint_lsn information from the config
- * string. We save the rest of the metadata string, that is essentially static and
- * unchanging and then concatenate the new checkpoint and LSN information on each
- * checkpoint. The reason is performance and avoiding a lot of calls to the config parsing
- * functions during a checkpoint for information that changes in a very well known way.
+ * We are stripping out all checkpoint related information from the config string. We save
+ * the rest of the metadata string, that is essentially static and unchanging and then
+ * concatenate the new checkpoint related information on each checkpoint. The reason is
+ * performance and avoiding a lot of calls to the config parsing functions during a
+ * checkpoint for information that changes in a very well known way.
+ *
+ * First collapse and overwrite checkpoint information because we do not know the name of or
+ * how many checkpoints may be in this metadata. Similarly, for backup information, we want
+ * an empty category to strip out since we don't know any backup ids. Set them empty and
+ * call collapse to overwrite anything existing.
*/
cfg[0] = metaconf;
cfg[1] = "checkpoint=()";
- cfg[2] = NULL;
+ cfg[2] = "checkpoint_backup_info=()";
+ cfg[3] = NULL;
WT_ERR(__wt_strdup(session, WT_CONFIG_BASE(session, file_meta), &dhandle->cfg[0]));
WT_ASSERT(session, dhandle->meta_base == NULL);
- /*
- * First collapse and overwrite any checkpoint information because we do not know the name
- * or how many checkpoints may be in this metadata. So first we have to set the string to
- * the empty checkpoint string and call collapse to overwrite anything existing.
- */
WT_ERR(__wt_config_collapse(session, cfg, &tmp));
/*
- * Now strip out the checkpoint and checkpoint LSN items from the configuration string and
- * that is now our base metadata string.
+ * Now strip out the checkpoint related items from the configuration string and that is now
+ * our base metadata string.
*/
cfg[0] = tmp;
cfg[1] = NULL;
- WT_ERR(__wt_config_merge(session, cfg, "checkpoint=,checkpoint_lsn=", &base));
+ WT_ERR(__wt_config_merge(
+ session, cfg, "checkpoint=,checkpoint_backup_info=,checkpoint_lsn=", &base));
__wt_free(session, tmp);
break;
case WT_DHANDLE_TYPE_TABLE:
diff --git a/src/third_party/wiredtiger/src/conn/conn_handle.c b/src/third_party/wiredtiger/src/conn/conn_handle.c
index 37136926060..e5c82d49a48 100644
--- a/src/third_party/wiredtiger/src/conn/conn_handle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_handle.c
@@ -94,7 +94,6 @@ void
__wt_connection_destroy(WT_CONNECTION_IMPL *conn)
{
WT_SESSION_IMPL *session;
- u_int i;
/* Check there's something to destroy. */
if (conn == NULL)
@@ -132,13 +131,6 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
__wt_cond_destroy(session, &conn->lsm_manager.work_cond);
/* Free allocated memory. */
- /*
- * XXX we need to persist this information when we are working on making incremental backups
- * persistent across restarts.
- */
- for (i = 0; i < WT_BLKINCR_MAX; ++i)
- __wt_free(session, conn->incr_backups[i].id_str);
-
__wt_free(session, conn->cfg);
__wt_free(session, conn->debug_ckpt);
__wt_free(session, conn->error_prefix);
diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c
index f7e338ac9bb..df6e6b79300 100644
--- a/src/third_party/wiredtiger/src/conn/conn_open.c
+++ b/src/third_party/wiredtiger/src/conn/conn_open.c
@@ -141,6 +141,8 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
/* Close operation tracking */
WT_TRET(__wt_conn_optrack_teardown(session, false));
+ __wt_backup_destroy(session);
+
/* Close any file handles left open. */
WT_TRET(__wt_close_connection_close(session));
diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c
index be781118895..3fdbafce445 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_backup.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c
@@ -19,6 +19,101 @@ static int __backup_stop(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *);
WT_ERR(F_ISSET(((WT_CURSOR_BACKUP *)(cursor)), WT_CURBACKUP_FORCE_STOP) ? EINVAL : 0);
/*
+ * __wt_backup_destroy --
+ * Destroy any backup information.
+ */
+void
+__wt_backup_destroy(WT_SESSION_IMPL *session)
+{
+ WT_BLKINCR *blkincr;
+ WT_CONNECTION_IMPL *conn;
+ uint64_t i;
+
+ conn = S2C(session);
+ /* Free any incremental backup information. */
+ for (i = 0; i < WT_BLKINCR_MAX; ++i) {
+ blkincr = &conn->incr_backups[i];
+ __wt_free(session, blkincr->id_str);
+ F_CLR(blkincr, WT_BLKINCR_VALID);
+ }
+ conn->incr_granularity = 0;
+ F_CLR(conn, WT_CONN_INCR_BACKUP);
+}
+
+/*
+ * __wt_backup_open --
+ * Restore any incremental backup information. We use the metadata's block information as the
+ * authority on whether incremental backup was in use on last shutdown.
+ */
+int
+__wt_backup_open(WT_SESSION_IMPL *session)
+{
+ WT_BLKINCR *blkincr;
+ WT_CONFIG blkconf;
+ WT_CONFIG_ITEM b, k, v;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ uint64_t i;
+ char *config;
+
+ conn = S2C(session);
+ config = NULL;
+
+ WT_RET(__wt_metadata_search(session, WT_METAFILE_URI, &config));
+ WT_ERR(__wt_config_getones(session, config, "checkpoint_backup_info", &v));
+ __wt_config_subinit(session, &blkconf, &v);
+ /*
+ * Walk each item in the metadata and set up our last known global incremental information.
+ */
+ F_CLR(conn, WT_CONN_INCR_BACKUP);
+ i = 0;
+ while (__wt_config_next(&blkconf, &k, &v) == 0) {
+ WT_ASSERT(session, i < WT_BLKINCR_MAX);
+ /*
+ * If we get here, we have at least one valid incremental backup. We want to set up its
+ * general configuration in the global table.
+ */
+ blkincr = &conn->incr_backups[i++];
+ F_SET(conn, WT_CONN_INCR_BACKUP);
+ WT_ERR(__wt_strndup(session, k.str, k.len, &blkincr->id_str));
+ WT_ERR(__wt_config_subgets(session, &v, "granularity", &b));
+ /*
+ * NOTE: For now the granularity is in the connection because it cannot change. We may be
+ * able to relax that.
+ */
+ conn->incr_granularity = blkincr->granularity = (uint64_t)b.val;
+ F_SET(blkincr, WT_BLKINCR_VALID);
+ }
+
+err:
+ if (ret != 0 && ret != WT_NOTFOUND)
+ __wt_backup_destroy(session);
+ __wt_free(session, config);
+ return (ret == WT_NOTFOUND ? 0 : ret);
+}
+
+/*
+ * __wt_backup_file_remove --
+ * Remove the incremental and meta-data backup files.
+ */
+int
+__wt_backup_file_remove(WT_SESSION_IMPL *session)
+{
+ WT_DECL_RET;
+
+ /*
+ * Note that order matters for removing the incremental files. We must remove the backup file
+ * before removing the source file so that we always know we were a source directory while
+ * there's any chance of an incremental backup file existing.
+ */
+ WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP, true));
+ WT_TRET(__wt_remove_if_exists(session, WT_LOGINCR_BACKUP, true));
+ WT_TRET(__wt_remove_if_exists(session, WT_LOGINCR_SRC, true));
+ WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP, true));
+ return (ret);
+}
+
+/*
* __curbackup_next --
* WT_CURSOR->next method for the backup cursor type.
*/
@@ -71,35 +166,6 @@ err:
}
/*
- * __backup_incr_release --
- * Free all resources relating to incremental backup.
- */
-static int
-__backup_incr_release(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, bool force)
-{
- WT_BLKINCR *blk;
- WT_CONNECTION_IMPL *conn;
- u_int i;
-
- WT_UNUSED(cb);
- WT_UNUSED(force);
- conn = S2C(session);
- /*
- * Clear flags. Remove file. Release any memory information.
- */
- F_CLR(conn, WT_CONN_INCR_BACKUP);
- for (i = 0; i < WT_BLKINCR_MAX; ++i) {
- blk = &conn->incr_backups[i];
- F_CLR(blk, WT_BLKINCR_VALID);
- }
- /* __wt_block_backup_remove... */
- conn->ckpt_incr_granularity = 0;
- WT_RET(__wt_remove_if_exists(session, WT_BLKINCR_BACKUP, true));
-
- return (0);
-}
-
-/*
* __backup_free --
* Free list resources for a backup cursor.
*/
@@ -115,10 +181,6 @@ __backup_free(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
}
if (cb->incr_file != NULL)
__wt_free(session, cb->incr_file);
- if (cb->incr_src != NULL)
- __wt_free(session, cb->incr_src);
- if (cb->incr_this != NULL)
- __wt_free(session, cb->incr_this);
__wt_curbackup_free_incr(session, cb);
}
@@ -140,7 +202,7 @@ err:
if (F_ISSET(cb, WT_CURBACKUP_FORCE_STOP)) {
__wt_verbose(
session, WT_VERB_BACKUP, "%s", "Releasing resources from forced stop incremental");
- __backup_incr_release(session, cb, true);
+ __wt_backup_destroy(session);
}
/*
@@ -232,26 +294,6 @@ err:
}
/*
- * __backup_get_ckpt --
- * Get the most recent checkpoint information and store it in the structure.
- *
- * XXX - Currently set return to static void for the compiler, when this function has real content
- * it should be static int.
- */
-static void
-__backup_get_ckpt(WT_SESSION_IMPL *session, WT_BLKINCR *incr)
-{
- WT_UNUSED(session);
- WT_UNUSED(incr);
- /*
- * Look up the most recent checkpoint and store information about it in incr.
- *
- * XXX When this function has content, return a real value. return (0);
- */
- return;
-}
-
-/*
* __backup_add_id --
* Add the identifier for block based incremental backup.
*/
@@ -262,11 +304,13 @@ __backup_add_id(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval)
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
u_int i;
+ const char *ckpt;
conn = S2C(session);
blk = NULL;
for (i = 0; i < WT_BLKINCR_MAX; ++i) {
blk = &conn->incr_backups[i];
+ __wt_verbose(session, WT_VERB_BACKUP, "blk[%u] flags 0x%" PRIx64, i, blk->flags);
/* If it isn't use, we can use it. */
if (!F_ISSET(blk, WT_BLKINCR_INUSE))
break;
@@ -281,16 +325,28 @@ __backup_add_id(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval)
if (blk->id_str != NULL)
__wt_verbose(
session, WT_VERB_BACKUP, "Freeing and reusing backup slot with old id %s", blk->id_str);
- /* Free any string that was there. */
+ /* Free anything that was there. */
__wt_free(session, blk->id_str);
WT_ERR(__wt_strndup(session, cval->str, cval->len, &blk->id_str));
- __wt_verbose(session, WT_VERB_BACKUP, "Using backup slot %u for id %s", i, blk->id_str);
/*
- * XXX This function can error in the future.
- *
- * WT_ERR(__backup_get_ckpt(session, blk));
+ * Get the most recent checkpoint name. For now just use the one that is part of the metadata.
+ * We only care whether or not a checkpoint exists, so immediately free it.
*/
- __backup_get_ckpt(session, blk);
+ ret = __wt_meta_checkpoint_last_name(session, WT_METAFILE_URI, &ckpt);
+ __wt_free(session, ckpt);
+ if (ret != 0 && ret != WT_NOTFOUND)
+ WT_ERR(ret);
+ if (ret == WT_NOTFOUND) {
+ /*
+ * If we don't find any checkpoint, backup files need to be full copy.
+ */
+ __wt_verbose(session, WT_VERB_BACKUP, "ID %s: Did not find any metadata checkpoint for %s.",
+ blk->id_str, WT_METAFILE_URI);
+ F_SET(blk, WT_BLKINCR_FULL);
+ } else {
+ __wt_verbose(session, WT_VERB_BACKUP, "Using backup slot %u for id %s", i, blk->id_str);
+ F_CLR(blk, WT_BLKINCR_FULL);
+ }
F_SET(blk, WT_BLKINCR_VALID);
return (0);
@@ -400,12 +456,11 @@ __backup_config(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[
if (cval.val) {
if (!F_ISSET(conn, WT_CONN_INCR_BACKUP)) {
WT_RET(__wt_config_gets(session, cfg, "incremental.granularity", &cval));
- /* XXX may not need cb->incr_granularity */
- if (conn->ckpt_incr_granularity != 0)
+ if (conn->incr_granularity != 0)
WT_RET_MSG(session, EINVAL, "Cannot change the incremental backup granularity");
- conn->ckpt_incr_granularity = cb->incr_granularity = (uint64_t)cval.val;
+ conn->incr_granularity = (uint64_t)cval.val;
}
- /* XXX Granularity can only be set once at the beginning */
+ /* Granularity can only be set once at the beginning */
F_SET(conn, WT_CONN_INCR_BACKUP);
incremental_config = true;
}
@@ -432,10 +487,8 @@ __backup_config(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[
if (is_dup)
WT_RET_MSG(session, EINVAL,
"Incremental source identifier can only be specified on a primary backup cursor");
- WT_RET(__backup_find_id(session, &cval, &cb->incr));
- /* XXX might not need this incr_src field */
- WT_RET(__wt_strndup(session, cval.str, cval.len, &cb->incr_src));
- F_SET(cb->incr, WT_BLKINCR_INUSE);
+ WT_RET(__backup_find_id(session, &cval, &cb->incr_src));
+ F_SET(cb->incr_src, WT_BLKINCR_INUSE);
incremental_config = true;
}
/*
@@ -455,8 +508,6 @@ __backup_config(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[
WT_ERR_MSG(session, EINVAL, "Incremental identifier already exists");
WT_ERR(__backup_add_id(session, &cval));
- /* XXX might not need this incr_this field */
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &cb->incr_this));
incremental_config = true;
}
@@ -530,8 +581,8 @@ __backup_config(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[
F_SET(cb, WT_CURBACKUP_INCR);
}
err:
- if (ret != 0 && cb->incr != NULL)
- F_CLR(cb->incr, WT_BLKINCR_INUSE);
+ if (ret != 0 && cb->incr_src != NULL)
+ F_CLR(cb->incr_src, WT_BLKINCR_INUSE);
__wt_scr_free(session, &tmp);
return (ret);
}
@@ -716,8 +767,8 @@ __backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
/* If it's not a dup backup cursor, make sure one isn't open. */
WT_ASSERT(session, !F_ISSET(session, WT_SESSION_BACKUP_DUP));
WT_WITH_HOTBACKUP_WRITE_LOCK(session, conn->hot_backup_list = NULL);
- if (cb->incr != NULL)
- F_CLR(cb->incr, WT_BLKINCR_INUSE);
+ if (cb->incr_src != NULL)
+ F_CLR(cb->incr_src, WT_BLKINCR_INUSE);
__backup_free(session, cb);
/* Remove any backup specific file. */
@@ -742,27 +793,6 @@ __backup_all(WT_SESSION_IMPL *session)
}
/*
- * __wt_backup_file_remove --
- * Remove the incremental and meta-data backup files.
- */
-int
-__wt_backup_file_remove(WT_SESSION_IMPL *session)
-{
- WT_DECL_RET;
-
- /*
- * Note that order matters for removing the incremental files. We must remove the backup file
- * before removing the source file so that we always know we were a source directory while
- * there's any chance of an incremental backup file existing.
- */
- WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP, true));
- WT_TRET(__wt_remove_if_exists(session, WT_LOGINCR_BACKUP, true));
- WT_TRET(__wt_remove_if_exists(session, WT_LOGINCR_SRC, true));
- WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP, true));
- return (ret);
-}
-
-/*
* __backup_list_uri_append --
* Append a new file name to the list, allocate space as necessary. Called via the schema_worker
* function.
diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c b/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c
index c8c59d3f9db..5403e2308ff 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c
@@ -9,53 +9,74 @@
#include "wt_internal.h"
/*
- * __alloc_merge --
- * Merge two allocation lists.
+ * __wt_backup_load_incr --
+ * Load the incremental.
*/
-static void
-__alloc_merge(
- uint64_t *a, uint64_t a_cnt, uint64_t *b, uint64_t b_cnt, uint64_t *res, uint64_t *res_cnt)
+int
+__wt_backup_load_incr(
+ WT_SESSION_IMPL *session, WT_CONFIG_ITEM *blkcfg, WT_ITEM *bitstring, uint64_t nbits)
{
- uint64_t total;
-
- for (total = 0; a_cnt > 0 || b_cnt > 0; ++total, res += 2) {
- if (a_cnt > 0 && b_cnt > 0) {
- if (a[0] <= b[0]) {
- res[0] = a[0];
- if (a[0] + a[1] < b[0])
- res[1] = a[1];
- else {
- res[1] = (b[0] + b[1]) - a[0];
- b += 2;
- --b_cnt;
- }
- a += 2;
- --a_cnt;
- } else if (b[0] <= a[0]) {
- res[0] = b[0];
- if (b[0] + b[1] < a[0])
- res[1] = b[1];
- else {
- res[1] = (a[0] + a[1]) - b[0];
- a += 2;
- --a_cnt;
- }
- b += 2;
- --b_cnt;
- }
- } else if (a_cnt > 0) {
- res[0] = a[0];
- res[1] = a[1];
- a += 2;
- --a_cnt;
- } else if (b_cnt > 0) {
- res[0] = b[0];
- res[1] = b[1];
- b += 2;
- --b_cnt;
+ if (blkcfg->len != 0)
+ WT_RET(__wt_nhex_to_raw(session, blkcfg->str, blkcfg->len, bitstring));
+ if (bitstring->size != (nbits >> 3))
+ WT_RET_MSG(session, WT_ERROR, "corrupted modified block list");
+ return (0);
+}
+
+/*
+ * __curbackup_incr_blkmod --
+ * Get the block modifications for a tree from its metadata and fill in the backup cursor's
+ * information with it.
+ */
+static int
+__curbackup_incr_blkmod(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_CURSOR_BACKUP *cb)
+{
+ WT_CONFIG blkconf;
+ WT_CONFIG_ITEM b, k, v;
+ WT_DECL_RET;
+ char *config;
+
+ WT_ASSERT(session, btree != NULL);
+ WT_ASSERT(session, btree->dhandle != NULL);
+ WT_ASSERT(session, cb->incr_src != NULL);
+
+ WT_RET(__wt_metadata_search(session, btree->dhandle->name, &config));
+ WT_ERR(__wt_config_getones(session, config, "checkpoint_backup_info", &v));
+ __wt_config_subinit(session, &blkconf, &v);
+ while ((ret = __wt_config_next(&blkconf, &k, &v)) == 0) {
+ /*
+ * First see if we have information for this source identifier.
+ */
+ if (WT_STRING_MATCH(cb->incr_src->id_str, k.str, k.len) == 0)
+ continue;
+
+ /*
+ * We found a match. If we have a name, then there should be granularity and nbits. The
+ * granularity should be set to something. But nbits may be 0 if there are no blocks
+ * currently modified.
+ */
+ WT_ERR(__wt_config_subgets(session, &v, "granularity", &b));
+ cb->granularity = (uint64_t)b.val;
+ WT_ERR(__wt_config_subgets(session, &v, "nbits", &b));
+ cb->nbits = (uint64_t)b.val;
+ WT_ERR(__wt_config_subgets(session, &v, "offset", &b));
+ cb->offset = (uint64_t)b.val;
+
+ /*
+ * We found a match. Load the block information into the cursor.
+ */
+ ret = __wt_config_subgets(session, &v, "blocks", &b);
+ if (ret != WT_NOTFOUND) {
+ WT_ERR(__wt_backup_load_incr(session, &b, &cb->bitstring, cb->nbits));
+ cb->bit_offset = 0;
+ cb->incr_init = true;
}
}
- *res_cnt = total;
+ WT_ERR_NOTFOUND_OK(ret);
+
+err:
+ __wt_free(session, config);
+ return (ret == WT_NOTFOUND ? 0 : ret);
}
/*
@@ -66,18 +87,11 @@ static int
__curbackup_incr_next(WT_CURSOR *cursor)
{
WT_BTREE *btree;
- WT_CKPT *ckpt, *ckptbase;
WT_CURSOR_BACKUP *cb;
WT_DECL_RET;
WT_SESSION_IMPL *session;
wt_off_t size;
- uint64_t *a, *b, *current, *next;
- uint64_t entries, total;
uint32_t raw;
- bool start, stop;
-
- ckptbase = NULL;
- a = b = NULL;
cb = (WT_CURSOR_BACKUP *)cursor;
btree = cb->incr_cursor == NULL ? NULL : ((WT_CURSOR_BTREE *)cb->incr_cursor)->btree;
@@ -86,104 +100,46 @@ __curbackup_incr_next(WT_CURSOR *cursor)
F_CLR(cursor, WT_CURSTD_RAW);
if (cb->incr_init) {
- /* We have this object's incremental information, Check if we're done. */
- if (cb->incr_list_offset >= cb->incr_list_count - WT_BACKUP_INCR_COMPONENTS)
- return (WT_NOTFOUND);
+ /* Look for the next chunk that had modifications. */
+ while (cb->bit_offset < cb->nbits)
+ if (__bit_test(cb->bitstring.mem, cb->bit_offset))
+ break;
+ else
+ ++cb->bit_offset;
- /*
- * If we returned all of the data, step to the next block, otherwise return the next chunk
- * of the current block.
- */
- if (cb->incr_list[cb->incr_list_offset + 1] <= cb->incr_granularity)
- cb->incr_list_offset += WT_BACKUP_INCR_COMPONENTS;
- else {
- cb->incr_list[cb->incr_list_offset] += cb->incr_granularity;
- cb->incr_list[cb->incr_list_offset + 1] -= cb->incr_granularity;
- cb->incr_list[cb->incr_list_offset + 2] = WT_BACKUP_RANGE;
- }
- } else if (btree == NULL) {
+ /* We either have this object's incremental information or we're done. */
+ if (cb->bit_offset >= cb->nbits)
+ WT_ERR(WT_NOTFOUND);
+ __wt_cursor_set_key(cursor, cb->offset + cb->granularity * cb->bit_offset++,
+ cb->granularity, WT_BACKUP_RANGE);
+ } else if (btree == NULL || F_ISSET(cb, WT_CURBACKUP_FORCE_FULL)) {
/* We don't have this object's incremental information, and it's a full file copy. */
WT_ERR(__wt_fs_size(session, cb->incr_file, &size));
- cb->incr_list_count = WT_BACKUP_INCR_COMPONENTS;
+ cb->nbits = 0;
+ cb->offset = 0;
+ cb->bit_offset = 0;
cb->incr_init = true;
- cb->incr_list_offset = 0;
__wt_cursor_set_key(cursor, 0, size, WT_BACKUP_FILE);
} else {
/*
* We don't have this object's incremental information, and it's not a full file copy. Get a
- * list of the checkpoints available for the file and flag the starting/stopping ones. It
- * shouldn't be possible to specify checkpoints that no longer exist, but check anyway.
+ * list of the block modifications for the file. The block modifications are from the
+ * incremental identifier starting point. Walk the list looking for one with a source of our
+ * id.
*/
- ret = __wt_meta_ckptlist_get(session, cb->incr_file, false, &ckptbase);
- WT_ERR(ret == WT_NOTFOUND ? ENOENT : ret);
-
+ WT_ERR(__curbackup_incr_blkmod(session, btree, cb));
/*
- * Count up the maximum number of block entries we might have to merge, and allocate a pair
- * of temporary arrays in which to do the merge.
+ * If there is no block modification information for this file, there is no information to
+ * return to the user.
*/
- entries = 0;
- WT_CKPT_FOREACH (ckptbase, ckpt)
- entries += ckpt->alloc_list_entries;
- WT_ERR(__wt_calloc_def(session, entries * WT_BACKUP_INCR_COMPONENTS, &a));
- WT_ERR(__wt_calloc_def(session, entries * WT_BACKUP_INCR_COMPONENTS, &b));
-
- /* Merge the block lists into a final list of blocks to copy. */
- start = stop = false;
- total = 0;
- current = NULL;
- next = a;
- WT_CKPT_FOREACH (ckptbase, ckpt) {
- if (strcmp(ckpt->name, cb->incr_checkpoint_start) == 0) {
- start = true;
- WT_ERR_ASSERT(session, ckpt->alloc_list_entries == 0, __wt_panic(session),
- "incremental backup start checkpoint has allocation list blocks");
- continue;
- }
- if (start == true) {
- if (strcmp(ckpt->name, cb->incr_checkpoint_stop) == 0)
- stop = true;
-
- __alloc_merge(
- current, total, ckpt->alloc_list, ckpt->alloc_list_entries, next, &total);
- current = next;
- next = next == a ? b : a;
- }
-
- if (stop == true)
- break;
- }
-
- if (!start)
- WT_ERR_MSG(session, ENOENT, "incremental backup start checkpoint %s not found",
- cb->incr_checkpoint_start);
- if (!stop)
- WT_ERR_MSG(session, ENOENT, "incremental backup stop checkpoint %s not found",
- cb->incr_checkpoint_stop);
-
- /* There may be nothing that needs copying. */
- if (total == 0)
+ if (cb->bitstring.mem == NULL)
WT_ERR(WT_NOTFOUND);
-
- if (next == a) {
- cb->incr_list = b;
- b = NULL;
- } else {
- cb->incr_list = a;
- a = NULL;
- }
- cb->incr_list_count = total;
- cb->incr_list_offset = 0;
- WT_ERR(__wt_scr_alloc(session, 0, &cb->incr_block));
- cb->incr_init = true;
-
- F_SET(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
+ __wt_cursor_set_key(cursor, cb->offset + cb->granularity * cb->bit_offset++,
+ cb->granularity, WT_BACKUP_RANGE);
}
err:
- __wt_free(session, a);
- __wt_free(session, b);
- __wt_meta_ckptlist_free(session, &ckptbase);
F_SET(cursor, raw);
API_END_RET(session, ret);
}
@@ -198,10 +154,7 @@ __wt_curbackup_free_incr(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
__wt_free(session, cb->incr_file);
if (cb->incr_cursor != NULL)
__wt_cursor_close(cb->incr_cursor);
- __wt_free(session, cb->incr_checkpoint_start);
- __wt_free(session, cb->incr_checkpoint_stop);
- __wt_free(session, cb->incr_list);
- __wt_scr_free(session, &cb->incr_block);
+ __wt_buf_free(session, &cb->bitstring);
}
/*
@@ -213,21 +166,48 @@ __wt_curbackup_open_incr(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *o
WT_CURSOR *cursor, const char *cfg[], WT_CURSOR **cursorp)
{
WT_CURSOR_BACKUP *cb, *other_cb;
+ WT_DECL_ITEM(open_uri);
+ WT_DECL_RET;
cb = (WT_CURSOR_BACKUP *)cursor;
other_cb = (WT_CURSOR_BACKUP *)other;
- WT_UNUSED(session);
cursor->key_format = WT_UNCHECKED_STRING(qqq);
cursor->value_format = "";
+ WT_ASSERT(session, other_cb->incr_src != NULL);
+
/*
* Inherit from the backup cursor but reset specific functions for incremental.
*/
cursor->next = __curbackup_incr_next;
cursor->get_key = __wt_cursor_get_key;
cursor->get_value = __wt_cursor_get_value_notsup;
- cb->incr_granularity = other_cb->incr_granularity;
+ cb->incr_src = other_cb->incr_src;
+
+ /* All WiredTiger owned files are full file copies. */
+ if (F_ISSET(other_cb->incr_src, WT_BLKINCR_FULL) ||
+ WT_PREFIX_MATCH(cb->incr_file, "WiredTiger")) {
+ __wt_verbose(session, WT_VERB_BACKUP, "Forcing full file copies for id %s",
+ other_cb->incr_src->id_str);
+ F_SET(cb, WT_CURBACKUP_FORCE_FULL);
+ }
+ /*
+ * Set up the incremental backup information, if we are not forcing a full file copy. We need an
+ * open cursor on the file. Open the backup checkpoint, confirming it exists.
+ */
+ if (!F_ISSET(cb, WT_CURBACKUP_FORCE_FULL)) {
+ WT_ERR(__wt_scr_alloc(session, 0, &open_uri));
+ WT_ERR(__wt_buf_fmt(session, open_uri, "file:%s", cb->incr_file));
+ __wt_free(session, cb->incr_file);
+ WT_ERR(__wt_strdup(session, open_uri->data, &cb->incr_file));
+
+ WT_ERR(__wt_curfile_open(session, cb->incr_file, NULL, cfg, &cb->incr_cursor));
+ WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
+ WT_ERR(__wt_strdup(session, cb->incr_cursor->internal_uri, &cb->incr_cursor->internal_uri));
+ } else
+ WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
- /* XXX Return full file info for all files for now. */
- return (__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
+err:
+ __wt_scr_free(session, &open_uri);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/docs/command-line.dox b/src/third_party/wiredtiger/src/docs/command-line.dox
index 986a634e786..de845ae9f6f 100644
--- a/src/third_party/wiredtiger/src/docs/command-line.dox
+++ b/src/third_party/wiredtiger/src/docs/command-line.dox
@@ -204,9 +204,10 @@ a table, creating the table if it does not yet exist. The data should
be the format produced by the \c dump command; see @ref dump_formats for
details.
-By default, if the table already exists, data in the table will be
-overwritten by the new data (use the \c -n option to make an attempt to
-overwrite existing data return an error).
+By default, if the table already exists, key/value pairs in the table
+will be overwritten by new data with matching keys (use the \c -n option
+to make an attempt to overwrite existing data return an error). Existing
+keys will not be removed.
@subsection util_load_synopsis Synopsis
<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] load [-ajn] [-f input] [-r name] [uri configuration ...]</code>
@@ -279,7 +280,9 @@ In the case of inserting values into a column-store table, each value
is appended to the table; in the case of inserting values into a
row-store table, lines are handled in pairs, where the first line is the
key and the second line is the value. If the row-store table already
-exists, data in the table will be overwritten by the new data.
+exists, key/value pairs in the table will be overwritten by new data
+with matching keys. For either column-store or row-store tables, existing
+keys will not be removed.
@subsection util_loadtext_synopsis Synopsis
<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] loadtext [-f input] uri</code>
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index 95946dfbc65..7157d9392b6 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -51,7 +51,6 @@
*/ \
WT_ERR(WT_SESSION_CHECK_PANIC(s)); \
WT_SINGLE_THREAD_CHECK_START(s); \
- WT_ERR(__wt_txn_err_chk(s)); \
WT_TRACK_OP_INIT(s); \
__wt_op_timer_start(s); \
/* Reset wait time if this isn't an API reentry. */ \
@@ -70,21 +69,20 @@
if ((config) != NULL) \
WT_ERR(__wt_config_check((s), WT_CONFIG_REF(session, h##_##n), (config), 0))
-#define API_END(s, ret) \
- if ((s) != NULL) { \
- WT_TRACK_OP_END(s); \
- WT_SINGLE_THREAD_CHECK_STOP(s); \
- if ((ret) != 0 && (ret) != WT_NOTFOUND && (ret) != WT_DUPLICATE_KEY && \
- (ret) != WT_PREPARE_CONFLICT && F_ISSET(&(s)->txn, WT_TXN_RUNNING)) \
- F_SET(&(s)->txn, WT_TXN_ERROR); \
- __wt_op_timer_stop(s); \
- /* \
- * No code after this line, otherwise error handling \
- * won't be correct. \
- */ \
- API_SESSION_POP(s); \
- } \
- } \
+#define API_END(s, ret) \
+ if ((s) != NULL) { \
+ WT_TRACK_OP_END(s); \
+ WT_SINGLE_THREAD_CHECK_STOP(s); \
+ if ((ret) != 0) \
+ __wt_txn_err_set(s, ret); \
+ __wt_op_timer_stop(s); \
+ /* \
+ * No code after this line, otherwise error handling \
+ * won't be correct. \
+ */ \
+ API_SESSION_POP(s); \
+ } \
+ } \
while (0)
/* An API call wrapped in a transaction if necessary. */
@@ -173,6 +171,14 @@
#define SESSION_API_CALL_PREPARE_ALLOWED(s, n, config, cfg) \
API_CALL(s, WT_SESSION, n, NULL, config, cfg)
+#define SESSION_API_CALL_PREPARE_NOT_ALLOWED(s, n, config, cfg) \
+ SESSION_API_PREPARE_CHECK(s, WT_SESSION, n); \
+ API_CALL(s, WT_SESSION, n, NULL, config, cfg)
+
+#define SESSION_API_CALL_PREPARE_NOT_ALLOWED_NOCONF(s, n) \
+ SESSION_API_PREPARE_CHECK(s, WT_SESSION, n); \
+ API_CALL_NOCONF(s, WT_SESSION, n, NULL)
+
#define SESSION_API_PREPARE_CHECK(s, h, n) \
do { \
int __prepare_ret; \
@@ -188,10 +194,6 @@
#define SESSION_API_CALL_NOCONF(s, n) API_CALL_NOCONF(s, WT_SESSION, n, NULL)
-#define SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(s, n) \
- SESSION_API_PREPARE_CHECK(s, WT_SESSION, n); \
- API_CALL_NOCONF(s, WT_SESSION, n, NULL)
-
#define SESSION_TXN_API_CALL(s, n, config, cfg) \
SESSION_API_PREPARE_CHECK(s, WT_SESSION, n); \
TXN_API_CALL(s, WT_SESSION, n, NULL, config, cfg)
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 5099bc4dce9..50f6fcf9759 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -1053,7 +1053,7 @@ __wt_row_leaf_value(WT_PAGE *page, WT_ROW *rip, WT_ITEM *value)
*/
static inline void
__wt_ref_info(
- WT_SESSION_IMPL *session, WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep)
+ WT_SESSION_IMPL *session, WT_REF *ref, const uint8_t **addrp, size_t *sizep, bool *is_leafp)
{
WT_ADDR *addr;
WT_CELL_UNPACK *unpack, _unpack;
@@ -1072,33 +1072,61 @@ __wt_ref_info(
if (addr == NULL) {
*addrp = NULL;
*sizep = 0;
- if (typep != NULL)
- *typep = 0;
+ if (is_leafp != NULL)
+ *is_leafp = false;
} else if (__wt_off_page(page, addr)) {
*addrp = addr->addr;
*sizep = addr->size;
- if (typep != NULL)
- switch (addr->type) {
- case WT_ADDR_INT:
- *typep = WT_CELL_ADDR_INT;
- break;
- case WT_ADDR_LEAF:
- *typep = WT_CELL_ADDR_LEAF;
- break;
- case WT_ADDR_LEAF_NO:
- *typep = WT_CELL_ADDR_LEAF_NO;
- break;
- default:
- *typep = 0;
- break;
- }
+ if (is_leafp != NULL)
+ *is_leafp = addr->type != WT_ADDR_INT;
} else {
__wt_cell_unpack(session, page, (WT_CELL *)addr, unpack);
*addrp = unpack->data;
*sizep = unpack->size;
- if (typep != NULL)
- *typep = unpack->type;
+
+ if (is_leafp != NULL)
+ *is_leafp = unpack->type != WT_ADDR_INT;
+ }
+}
+
+/*
+ * __wt_ref_info_lock --
+ * Lock the WT_REF and return the addr/size and type triplet for a reference.
+ */
+static inline void
+__wt_ref_info_lock(
+ WT_SESSION_IMPL *session, WT_REF *ref, uint8_t *addr_buf, size_t *sizep, bool *is_leafp)
+{
+ size_t size;
+ uint32_t previous_state;
+ const uint8_t *addr;
+ bool is_leaf;
+
+ /*
+ * The WT_REF address references either an on-page cell or in-memory structure, and eviction
+ * frees both. If our caller is already blocking eviction (either because the WT_REF is locked
+ * or there's a hazard pointer on the page), no locking is required, and the caller should call
+ * the underlying function directly. Otherwise, our caller is not blocking eviction and we lock
+ * here, and copy out the address instead of returning a reference.
+ */
+ for (;; __wt_yield()) {
+ previous_state = ref->state;
+ if (previous_state != WT_REF_LOCKED && previous_state != WT_REF_READING &&
+ WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED))
+ break;
}
+
+ __wt_ref_info(session, ref, &addr, &size, &is_leaf);
+
+ if (addr_buf != NULL) {
+ if (addr != NULL)
+ memcpy(addr_buf, addr, size);
+ *sizep = size;
+ }
+ if (is_leafp != NULL)
+ *is_leafp = is_leaf;
+
+ WT_REF_SET_STATE(ref, previous_state);
}
/*
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index 9498eb5d6c6..2a275449284 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -299,7 +299,7 @@ struct __wt_connection_impl {
uint64_t ckpt_write_pages;
/* Checkpoint and incremental backup data */
- uint64_t ckpt_incr_granularity;
+ uint64_t incr_granularity;
WT_BLKINCR incr_backups[WT_BLKINCR_MAX];
/* Connection's maximum and base write generations. */
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index 45a4ac01a9f..3ea011b8fc9 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -32,21 +32,6 @@
0 /* uint32_t flags */ \
}
-/*
- * Block based incremental backup structure. These live in the connection.
- */
-#define WT_BLKINCR_MAX 2
-struct __wt_blkincr {
- const char *id_str; /* User's name for this backup. */
- const char *ckpt_name; /* Requires WT-5115. All checkpoints must be this name */
- void *data;
-/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_BLKINCR_INUSE 0x1u /* This entry is active */
-#define WT_BLKINCR_VALID 0x2u /* This entry is valid */
- /* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint64_t flags;
-};
-
struct __wt_cursor_backup {
WT_CURSOR iface;
@@ -61,31 +46,25 @@ struct __wt_cursor_backup {
size_t list_next;
/* File offset-based incremental backup. */
- WT_BLKINCR *incr; /* Incremental backup in use */
- char *incr_file; /* File name */
- char *incr_src; /* Source identifier */
- char *incr_this; /* New base identifier */
- uint64_t incr_granularity; /* Maximum transfer size */
+ WT_BLKINCR *incr_src; /* Incremental backup source */
+ char *incr_file; /* File name */
WT_CURSOR *incr_cursor; /* File cursor */
- /* Start/stop checkpoints */
- char *incr_checkpoint_start;
- char *incr_checkpoint_stop;
-
-#define WT_BACKUP_INCR_COMPONENTS 3
- bool incr_init; /* Cursor traversal initialized */
- uint64_t *incr_list; /* List of file offset/size/type triples */
- uint64_t incr_list_count; /* Count of file offset/size/type triples */
- uint64_t incr_list_offset; /* Current offset */
- uint64_t incr_size; /* Maximum transfer size */
- WT_ITEM *incr_block; /* Current block of data */
+
+ bool incr_init; /* Cursor traversal initialized */
+ WT_ITEM bitstring; /* List of modified blocks */
+ uint64_t nbits; /* Number of bits in bitstring */
+ uint64_t offset; /* Zero bit offset in bitstring */
+ uint64_t bit_offset; /* Current offset */
+ uint64_t granularity; /* Length, transfer size */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CURBACKUP_DUP 0x1u /* Duplicated backup cursor */
-#define WT_CURBACKUP_FORCE_STOP 0x2u /* Force stop incremental backup */
-#define WT_CURBACKUP_INCR 0x4u /* Incremental backup cursor */
-#define WT_CURBACKUP_LOCKER 0x8u /* Hot-backup started */
- /* AUTOMATIC FLAG VALUE GENERATION STOP */
+#define WT_CURBACKUP_DUP 0x01u /* Duplicated backup cursor */
+#define WT_CURBACKUP_FORCE_FULL 0x02u /* Force full file copy for this cursor */
+#define WT_CURBACKUP_FORCE_STOP 0x04u /* Force stop incremental backup */
+#define WT_CURBACKUP_INCR 0x08u /* Incremental backup cursor */
+#define WT_CURBACKUP_LOCKER 0x10u /* Hot-backup started */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
uint8_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 46de0e10f7f..2cf1408525e 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -58,8 +58,6 @@ extern const char *__wt_ext_strerror(WT_EXTENSION_API *wt_api, WT_SESSION *wt_se
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern const char *__wt_json_tokname(int toktype) WT_GCC_FUNC_DECL_ATTRIBUTE(
(visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern const char *__wt_page_type_string(u_int type) WT_GCC_FUNC_DECL_ATTRIBUTE(
(visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern const char *__wt_session_strerror(WT_SESSION *wt_session, int error)
@@ -86,6 +84,10 @@ extern int __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_backup_file_remove(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_backup_load_incr(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *blkcfg,
+ WT_ITEM *bitstring, uint64_t nbits) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_backup_open(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri)
WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_addr_invalid(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr,
@@ -280,7 +282,7 @@ extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((wa
extern int __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_search_uncommitted(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
+extern int __wt_btcur_search_uncommitted(WT_CURSOR *cursor, WT_UPDATE **updp)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btree_close(WT_SESSION_IMPL *session)
@@ -1566,6 +1568,7 @@ extern void *__wt_ext_scr_alloc(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session
extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn))
WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_async_stats_update(WT_SESSION_IMPL *session);
+extern void __wt_backup_destroy(WT_SESSION_IMPL *session);
extern void __wt_block_ckpt_destroy(WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci);
extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on);
extern void __wt_block_ext_free(WT_SESSION_IMPL *session, WT_EXT *ext);
@@ -1964,8 +1967,6 @@ static inline int __wt_txn_context_check(WT_SESSION_IMPL *session, bool requires
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_txn_context_prepare_check(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_err_chk(WT_SESSION_IMPL *session)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_txn_id_check(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_txn_idle_cache_check(WT_SESSION_IMPL *session)
@@ -2135,7 +2136,9 @@ static inline void __wt_rec_incr(
WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size);
static inline void __wt_ref_addr_free(WT_SESSION_IMPL *session, WT_REF *ref);
static inline void __wt_ref_info(
- WT_SESSION_IMPL *session, WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep);
+ WT_SESSION_IMPL *session, WT_REF *ref, const uint8_t **addrp, size_t *sizep, bool *is_leafp);
+static inline void __wt_ref_info_lock(
+ WT_SESSION_IMPL *session, WT_REF *ref, uint8_t *addr_buf, size_t *sizep, bool *is_leafp);
static inline void __wt_ref_key(WT_PAGE *page, WT_REF *ref, void *keyp, size_t *sizep);
static inline void __wt_ref_key_clear(WT_REF *ref);
static inline void __wt_ref_key_onpage_set(WT_PAGE *page, WT_REF *ref, WT_CELL_UNPACK *unpack);
@@ -2156,6 +2159,7 @@ static inline void __wt_struct_size_adjust(WT_SESSION_IMPL *session, size_t *siz
static inline void __wt_timing_stress(WT_SESSION_IMPL *session, u_int flag);
static inline void __wt_tree_modify_set(WT_SESSION_IMPL *session);
static inline void __wt_txn_cursor_op(WT_SESSION_IMPL *session);
+static inline void __wt_txn_err_set(WT_SESSION_IMPL *session, int ret);
static inline void __wt_txn_op_apply_prepare_state(
WT_SESSION_IMPL *session, WT_REF *ref, bool commit);
static inline void __wt_txn_op_delete_commit_apply_timestamps(
diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h
index 9845dbd7f7d..b29d9665069 100644
--- a/src/third_party/wiredtiger/src/include/meta.h
+++ b/src/third_party/wiredtiger/src/include/meta.h
@@ -17,11 +17,10 @@
/*
* Backup related WiredTiger files.
*/
-#define WT_BACKUP_TMP "WiredTiger.backup.tmp" /* Backup tmp file */
-#define WT_BLKINCR_BACKUP "WiredTiger.backup.block" /* Block incremental durable file */
-#define WT_METADATA_BACKUP "WiredTiger.backup" /* Hot backup file */
-#define WT_LOGINCR_BACKUP "WiredTiger.ibackup" /* Log incremental backup */
-#define WT_LOGINCR_SRC "WiredTiger.isrc" /* Log incremental source */
+#define WT_BACKUP_TMP "WiredTiger.backup.tmp" /* Backup tmp file */
+#define WT_METADATA_BACKUP "WiredTiger.backup" /* Hot backup file */
+#define WT_LOGINCR_BACKUP "WiredTiger.ibackup" /* Log incremental backup */
+#define WT_LOGINCR_SRC "WiredTiger.isrc" /* Log incremental source */
#define WT_METADATA_TURTLE "WiredTiger.turtle" /* Metadata metadata */
#define WT_METADATA_TURTLE_SET "WiredTiger.turtle.set" /* Turtle temp file */
@@ -58,6 +57,42 @@
} while (0)
/*
+ * Block based incremental backup structure. These live in the connection.
+ */
+#define WT_BLKINCR_MAX 2
+struct __wt_blkincr {
+ const char *id_str; /* User's name for this backup. */
+ uint64_t granularity; /* Granularity of this backup. */
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_BLKINCR_FULL 0x1u /* There is no checkpoint, always do full file */
+#define WT_BLKINCR_INUSE 0x2u /* This entry is active */
+#define WT_BLKINCR_VALID 0x4u /* This entry is valid */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint64_t flags;
+};
+
+/*
+ * Block modifications from an incremental identifier going forward.
+ */
+/*
+ * At the default granularity, this is enough for blocks in a 2G file.
+ */
+#define WT_BLOCK_MODS_LIST_MIN 16 /* Initial bytes for bitmap. */
+struct __wt_block_mods {
+ const char *id_str;
+
+ WT_ITEM bitstring;
+ uint64_t nbits; /* Number of bits in bitstring */
+
+ uint64_t offset; /* Zero bit offset for bitstring */
+ uint64_t granularity;
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_BLOCK_MODS_VALID 0x1u /* Entry is valid */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
+};
+
+/*
* WT_CKPT --
* Encapsulation of checkpoint information, shared by the metadata, the
* btree engine, and the block manager.
@@ -88,6 +123,8 @@ struct __wt_ckpt {
char *block_metadata; /* Block-stored metadata */
char *block_checkpoint; /* Block-stored checkpoint */
+ WT_BLOCK_MODS backup_blocks[WT_BLKINCR_MAX];
+
/* Validity window */
wt_timestamp_t newest_durable_ts;
wt_timestamp_t oldest_start_ts;
@@ -95,9 +132,6 @@ struct __wt_ckpt {
wt_timestamp_t newest_stop_ts;
uint64_t newest_stop_txn;
- uint64_t *alloc_list; /* Checkpoint allocation list */
- uint64_t alloc_list_entries;
-
WT_ITEM addr; /* Checkpoint cookie string */
WT_ITEM raw; /* Checkpoint cookie raw */
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index f4b82b8f5e9..03d3ff72f8f 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -166,34 +166,35 @@ struct __wt_session_impl {
u_int stat_bucket; /* Statistics bucket offset */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_SESSION_BACKUP_CURSOR 0x0000001u
-#define WT_SESSION_BACKUP_DUP 0x0000002u
-#define WT_SESSION_CACHE_CURSORS 0x0000004u
-#define WT_SESSION_CAN_WAIT 0x0000008u
-#define WT_SESSION_IGNORE_CACHE_SIZE 0x0000010u
-#define WT_SESSION_INTERNAL 0x0000020u
-#define WT_SESSION_LOCKED_CHECKPOINT 0x0000040u
-#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x0000080u
-#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x0000100u
-#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x0000200u
-#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x0000400u
-#define WT_SESSION_LOCKED_METADATA 0x0000800u
-#define WT_SESSION_LOCKED_PASS 0x0001000u
-#define WT_SESSION_LOCKED_SCHEMA 0x0002000u
-#define WT_SESSION_LOCKED_SLOT 0x0004000u
-#define WT_SESSION_LOCKED_TABLE_READ 0x0008000u
-#define WT_SESSION_LOCKED_TABLE_WRITE 0x0010000u
-#define WT_SESSION_LOCKED_TURTLE 0x0020000u
-#define WT_SESSION_LOGGING_INMEM 0x0040000u
-#define WT_SESSION_LOOKASIDE_CURSOR 0x0080000u
-#define WT_SESSION_NO_DATA_HANDLES 0x0100000u
-#define WT_SESSION_NO_LOGGING 0x0200000u
-#define WT_SESSION_NO_RECONCILE 0x0400000u
-#define WT_SESSION_NO_SCHEMA_LOCK 0x0800000u
-#define WT_SESSION_QUIET_CORRUPT_FILE 0x1000000u
-#define WT_SESSION_READ_WONT_NEED 0x2000000u
-#define WT_SESSION_SCHEMA_TXN 0x4000000u
-#define WT_SESSION_SERVER_ASYNC 0x8000000u
+#define WT_SESSION_BACKUP_CURSOR 0x00000001u
+#define WT_SESSION_BACKUP_DUP 0x00000002u
+#define WT_SESSION_CACHE_CURSORS 0x00000004u
+#define WT_SESSION_CAN_WAIT 0x00000008u
+#define WT_SESSION_IGNORE_CACHE_SIZE 0x00000010u
+#define WT_SESSION_INTERNAL 0x00000020u
+#define WT_SESSION_LOCKED_CHECKPOINT 0x00000040u
+#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000080u
+#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000100u
+#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x00000200u
+#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x00000400u
+#define WT_SESSION_LOCKED_METADATA 0x00000800u
+#define WT_SESSION_LOCKED_PASS 0x00001000u
+#define WT_SESSION_LOCKED_SCHEMA 0x00002000u
+#define WT_SESSION_LOCKED_SLOT 0x00004000u
+#define WT_SESSION_LOCKED_TABLE_READ 0x00008000u
+#define WT_SESSION_LOCKED_TABLE_WRITE 0x00010000u
+#define WT_SESSION_LOCKED_TURTLE 0x00020000u
+#define WT_SESSION_LOGGING_INMEM 0x00040000u
+#define WT_SESSION_LOOKASIDE_CURSOR 0x00080000u
+#define WT_SESSION_NO_DATA_HANDLES 0x00100000u
+#define WT_SESSION_NO_LOGGING 0x00200000u
+#define WT_SESSION_NO_RECONCILE 0x00400000u
+#define WT_SESSION_NO_SCHEMA_LOCK 0x00800000u
+#define WT_SESSION_QUIET_CORRUPT_FILE 0x01000000u
+#define WT_SESSION_READ_WONT_NEED 0x02000000u
+#define WT_SESSION_RESOLVING_TXN 0x04000000u
+#define WT_SESSION_SCHEMA_TXN 0x08000000u
+#define WT_SESSION_SERVER_ASYNC 0x10000000u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint32_t flags;
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 332f3a3735c..7f29b10e23b 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -61,21 +61,34 @@ __wt_txn_context_check(WT_SESSION_IMPL *session, bool requires_txn)
}
/*
- * __wt_txn_err_chk --
- * Check the transaction hasn't already failed.
+ * __wt_txn_err_set --
+ * Set an error in the current transaction.
*/
-static inline int
-__wt_txn_err_chk(WT_SESSION_IMPL *session)
+static inline void
+__wt_txn_err_set(WT_SESSION_IMPL *session, int ret)
{
- /* Allow transaction rollback, but nothing else. */
- if (!F_ISSET(&(session->txn), WT_TXN_ERROR) ||
- strcmp(session->name, "rollback_transaction") != 0)
- return (0);
+ WT_TXN *txn;
-#ifdef HAVE_DIAGNOSTIC
- WT_ASSERT(session, !F_ISSET(&(session->txn), WT_TXN_ERROR));
-#endif
- WT_RET_MSG(session, EINVAL, "additional transaction operations attempted after error");
+ txn = &session->txn;
+
+ /* Ignore standard errors that don't fail the transaction. */
+ if (ret == WT_NOTFOUND || ret == WT_DUPLICATE_KEY || ret == WT_PREPARE_CONFLICT)
+ return;
+
+ /* Less commonly, it's not a running transaction. */
+ if (!F_ISSET(txn, WT_TXN_RUNNING))
+ return;
+
+ /* The transaction has to be rolled back. */
+ F_SET(txn, WT_TXN_ERROR);
+
+ /*
+ * Check for a prepared transaction, and quit: we can't ignore the error and we can't roll back
+ * a prepared transaction.
+ */
+ if (F_ISSET(txn, WT_TXN_PREPARE))
+ WT_PANIC_MSG(session, ret,
+ "transactional error logged after transaction was prepared, failing the system");
}
/*
@@ -750,12 +763,17 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp)
{
static WT_UPDATE tombstone = {.txnid = WT_TXN_NONE, .type = WT_UPDATE_TOMBSTONE};
WT_VISIBLE_TYPE upd_visible;
+ uint8_t type;
bool skipped_birthmark;
*updp = NULL;
+
+ type = WT_UPDATE_INVALID; /* [-Wconditional-uninitialized] */
for (skipped_birthmark = false; upd != NULL; upd = upd->next) {
+ WT_ORDERED_READ(type, upd->type);
+
/* Skip reserved place-holders, they're never visible. */
- if (upd->type != WT_UPDATE_RESERVE) {
+ if (type != WT_UPDATE_RESERVE) {
upd_visible = __wt_txn_upd_visible_type(session, upd);
if (upd_visible == WT_VISIBLE_TRUE)
break;
@@ -763,14 +781,16 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp)
return (WT_PREPARE_CONFLICT);
}
/* An invisible birthmark is equivalent to a tombstone. */
- if (upd->type == WT_UPDATE_BIRTHMARK)
+ if (type == WT_UPDATE_BIRTHMARK)
skipped_birthmark = true;
}
- if (upd == NULL && skipped_birthmark)
+ if (upd == NULL && skipped_birthmark) {
upd = &tombstone;
+ type = upd->type;
+ }
- *updp = upd == NULL || upd->type == WT_UPDATE_BIRTHMARK ? NULL : upd;
+ *updp = upd == NULL || type == WT_UPDATE_BIRTHMARK ? NULL : upd;
return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index 1cd8753a0ac..7ae570d3e59 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -87,6 +87,8 @@ struct __wt_block_desc;
typedef struct __wt_block_desc WT_BLOCK_DESC;
struct __wt_block_header;
typedef struct __wt_block_header WT_BLOCK_HEADER;
+struct __wt_block_mods;
+typedef struct __wt_block_mods WT_BLOCK_MODS;
struct __wt_bloom;
typedef struct __wt_bloom WT_BLOOM;
struct __wt_bloom_hash;
@@ -390,7 +392,7 @@ typedef uint64_t wt_timestamp_t;
#include "error.h"
#include "log.h"
#include "lsm.h"
-#include "meta.h"
+#include "meta.h" /* required by block.h */
#include "optrack.h"
#include "os.h"
#include "reconcile.h"
diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
index 93a01fa6abb..34c8b643c08 100644
--- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c
+++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
@@ -11,11 +11,79 @@
static int __ckpt_last(WT_SESSION_IMPL *, const char *, WT_CKPT *);
static int __ckpt_last_name(WT_SESSION_IMPL *, const char *, const char **);
static int __ckpt_load(WT_SESSION_IMPL *, WT_CONFIG_ITEM *, WT_CONFIG_ITEM *, WT_CKPT *);
+static int __ckpt_load_blk_mods(WT_SESSION_IMPL *, const char *, WT_CKPT *);
static int __ckpt_named(WT_SESSION_IMPL *, const char *, const char *, WT_CKPT *);
static int __ckpt_set(WT_SESSION_IMPL *, const char *, const char *, bool);
static int __ckpt_version_chk(WT_SESSION_IMPL *, const char *, const char *);
/*
+ * __ckpt_load_blk_mods --
+ * Load the block information from the config string.
+ */
+static int
+__ckpt_load_blk_mods(WT_SESSION_IMPL *session, const char *config, WT_CKPT *ckpt)
+{
+ WT_BLKINCR *blkincr;
+ WT_BLOCK_MODS *blk_mod;
+ WT_CONFIG blkconf;
+ WT_CONFIG_ITEM b, k, v;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ uint64_t i;
+
+ conn = S2C(session);
+ if (config == NULL)
+ return (0);
+ /*
+ * We could be reading in a configuration from an earlier release. If the string doesn't exist
+ * then we're done.
+ */
+ if ((ret = __wt_config_getones(session, config, "checkpoint_backup_info", &v)) != 0)
+ return (ret == WT_NOTFOUND ? 0 : ret);
+ __wt_config_subinit(session, &blkconf, &v);
+ /*
+ * Load block lists. Ignore any that have an id string that is not known.
+ *
+ * Remove those not known (TODO).
+ */
+ blkincr = NULL;
+ while ((ret = __wt_config_next(&blkconf, &k, &v)) == 0) {
+ /*
+ * See if this is a valid backup string.
+ */
+ for (i = 0; i < WT_BLKINCR_MAX; ++i) {
+ blkincr = &conn->incr_backups[i];
+ if (blkincr->id_str != NULL && WT_STRING_MATCH(blkincr->id_str, k.str, k.len))
+ break;
+ }
+ if (i == WT_BLKINCR_MAX)
+ /*
+ * This is the place to note that we want to remove an unknown id.
+ */
+ continue;
+
+ /*
+ * We have a valid entry. Load the block information.
+ */
+ blk_mod = &ckpt->backup_blocks[i];
+ WT_RET(__wt_strdup(session, blkincr->id_str, &blk_mod->id_str));
+ WT_RET(__wt_config_subgets(session, &v, "granularity", &b));
+ blk_mod->granularity = (uint64_t)b.val;
+ WT_RET(__wt_config_subgets(session, &v, "nbits", &b));
+ blk_mod->nbits = (uint64_t)b.val;
+ WT_RET(__wt_config_subgets(session, &v, "offset", &b));
+ blk_mod->offset = (uint64_t)b.val;
+ ret = __wt_config_subgets(session, &v, "blocks", &b);
+ WT_RET_NOTFOUND_OK(ret);
+ if (ret != WT_NOTFOUND) {
+ WT_RET(__wt_backup_load_incr(session, &b, &blk_mod->bitstring, blk_mod->nbits));
+ F_SET(blk_mod, WT_BLOCK_MODS_VALID);
+ }
+ }
+ return (ret == WT_NOTFOUND ? 0 : ret);
+}
+
+/*
* __wt_meta_checkpoint --
* Return a file's checkpoint information.
*/
@@ -118,7 +186,7 @@ __ckpt_set(WT_SESSION_IMPL *session, const char *fname, const char *v, bool use_
* use the slower path through configuration parsing functions.
*/
config = newcfg = NULL;
- str = v == NULL ? "checkpoint=(),checkpoint_lsn=" : v;
+ str = v == NULL ? "checkpoint=(),checkpoint_backup_info=(),checkpoint_lsn=" : v;
if (use_base && session->dhandle != NULL) {
WT_ERR(__wt_scr_alloc(session, 0, &tmp));
WT_ASSERT(session, strcmp(session->dhandle->name, fname) == 0);
@@ -315,6 +383,75 @@ __ckpt_compare_order(const void *a, const void *b)
}
/*
+ * __ckpt_valid_blk_mods --
+ * Make sure that this set of block mods reflects the current valid backup identifiers. If so,
+ * there is nothing to do. If not, free up old information and set it up for the current
+ * information.
+ */
+static int
+__ckpt_valid_blk_mods(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
+{
+ WT_BLKINCR *blk;
+ WT_BLOCK_MODS *blk_mod;
+ uint64_t i;
+ bool free, setup;
+
+ WT_ASSERT(session, F_ISSET(ckpt, WT_CKPT_ADD));
+ for (i = 0; i < WT_BLKINCR_MAX; ++i) {
+ blk = &S2C(session)->incr_backups[i];
+ blk_mod = &ckpt->backup_blocks[i];
+
+ /*
+ * Check the state of our block list array compared to the global one. There are
+ * several possibilities:
+ * - There is no global information for this index, nothing to do but free our resources.
+ * - We don't have any backup information locally. Set up our entry.
+ * - Our entry's id string matches the current global information. We just want to add our
+ * information to the existing list.
+ * - Our entry's id string does not match the current one. It is outdated. Free old
+ * resources
+ * and then set up our entry.
+ */
+
+ /* Check if the global entry is valid at our index. */
+ if (!F_ISSET(blk, WT_BLKINCR_VALID)) {
+ free = true;
+ setup = false;
+ } else if (F_ISSET(blk_mod, WT_BLOCK_MODS_VALID) &&
+ WT_STRING_MATCH(blk_mod->id_str, blk->id_str, strlen(blk->id_str))) {
+ /* We match, keep our entry and don't set up. */
+ setup = false;
+ free = false;
+ } else {
+ /* We don't match, free any old information. */
+ free = true;
+ setup = true;
+ }
+
+ /* Free any old information if we need to do so. */
+ if (free && F_ISSET(blk_mod, WT_BLOCK_MODS_VALID)) {
+ __wt_free(session, blk_mod->id_str);
+ __wt_buf_free(session, &blk_mod->bitstring);
+ blk_mod->nbits = 0;
+ blk_mod->granularity = 0;
+ blk_mod->offset = 0;
+ F_CLR(blk_mod, WT_BLOCK_MODS_VALID);
+ }
+
+ /* Set up the block list to point to the current information. */
+ if (setup) {
+ WT_RET(__wt_strdup(session, blk->id_str, &blk_mod->id_str));
+ WT_CLEAR(blk_mod->bitstring);
+ blk_mod->granularity = S2C(session)->incr_granularity;
+ blk_mod->nbits = 0;
+ blk_mod->offset = 0;
+ F_SET(blk_mod, WT_BLOCK_MODS_VALID);
+ }
+ }
+ return (0);
+}
+
+/*
* __wt_meta_ckptlist_get --
* Load all available checkpoint information for a file.
*/
@@ -378,10 +515,22 @@ __wt_meta_ckptlist_get(
maxorder = ckpt->order;
ckpt->order = maxorder + 1;
__wt_seconds(session, &ckpt->sec);
+ /*
+ * Load most recent checkpoint backup blocks to this checkpoint.
+ */
+ WT_ERR(__ckpt_load_blk_mods(session, config, ckpt));
WT_ERR(__wt_meta_block_metadata(session, config, ckpt));
+ /*
+ * Set the add-a-checkpoint flag, and if we're doing incremental backups, request a list of
+ * the checkpoint's modified blocks from the block manager.
+ */
F_SET(ckpt, WT_CKPT_ADD);
+ if (F_ISSET(S2C(session), WT_CONN_INCR_BACKUP)) {
+ F_SET(ckpt, WT_CKPT_BLOCK_MODS);
+ WT_ERR(__ckpt_valid_blk_mods(session, ckpt));
+ }
}
/* Return the array to our caller. */
@@ -578,6 +727,50 @@ __wt_meta_ckptlist_to_meta(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM
}
/*
+ * __ckpt_blkmod_to_meta --
+ * Add in any modification block string needed, including an empty one.
+ */
+static int
+__ckpt_blkmod_to_meta(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_CKPT *ckpt)
+{
+ WT_BLOCK_MODS *blk;
+ WT_ITEM bitstring;
+ u_int i;
+ bool valid;
+
+ WT_CLEAR(bitstring);
+ valid = false;
+ for (i = 0, blk = &ckpt->backup_blocks[0]; i < WT_BLKINCR_MAX; ++i, ++blk)
+ if (F_ISSET(blk, WT_BLOCK_MODS_VALID))
+ valid = true;
+
+ /*
+ * If the existing block modifications are not valid, there is nothing to do.
+ */
+ if (!valid) {
+ WT_RET(__wt_buf_catfmt(session, buf, ",checkpoint_backup_info="));
+ return (0);
+ }
+
+ /*
+ * We have at least one valid modified block list.
+ */
+ WT_RET(__wt_buf_catfmt(session, buf, ",checkpoint_backup_info=("));
+ for (i = 0, blk = &ckpt->backup_blocks[0]; i < WT_BLKINCR_MAX; ++i, ++blk) {
+ if (!F_ISSET(blk, WT_BLOCK_MODS_VALID))
+ continue;
+ WT_RET(__wt_raw_to_hex(session, blk->bitstring.data, blk->bitstring.size, &bitstring));
+ WT_RET(__wt_buf_catfmt(session, buf, "%s%s=(id=%" PRIu32 ",granularity=%" PRIu64
+ ",nbits=%" PRIu64 ",offset=%" PRIu64 ",blocks=%.*s)",
+ i == 0 ? "" : ",", blk->id_str, i, blk->granularity, blk->nbits, blk->offset,
+ (int)bitstring.size, (char *)bitstring.data));
+ __wt_buf_free(session, &bitstring);
+ }
+ WT_RET(__wt_buf_catfmt(session, buf, ")"));
+ return (0);
+}
+
+/*
* __wt_meta_ckptlist_set --
* Set a file's checkpoint value from the WT_CKPT list.
*/
@@ -593,6 +786,10 @@ __wt_meta_ckptlist_set(
WT_RET(__wt_scr_alloc(session, 1024, &buf));
WT_ERR(__wt_meta_ckptlist_to_meta(session, ckptbase, buf));
+ /* Add backup block modifications for any added checkpoint. */
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ if (F_ISSET(ckpt, WT_CKPT_ADD))
+ WT_ERR(__ckpt_blkmod_to_meta(session, buf, ckpt));
has_lsn = ckptlsn != NULL;
if (ckptlsn != NULL)
@@ -634,6 +831,9 @@ __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep)
void
__wt_meta_checkpoint_free(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
{
+ WT_BLOCK_MODS *blk_mod;
+ uint64_t i;
+
if (ckpt == NULL)
return;
@@ -643,6 +843,12 @@ __wt_meta_checkpoint_free(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
__wt_buf_free(session, &ckpt->addr);
__wt_buf_free(session, &ckpt->raw);
__wt_free(session, ckpt->bpriv);
+ for (i = 0; i < WT_BLKINCR_MAX; ++i) {
+ blk_mod = &ckpt->backup_blocks[i];
+ __wt_buf_free(session, &blk_mod->bitstring);
+ __wt_free(session, blk_mod->id_str);
+ F_CLR(blk_mod, WT_BLOCK_MODS_VALID);
+ }
WT_CLEAR(*ckpt); /* Clear to prepare for re-use. */
}
diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c
index 044094133ce..a7b2e740caf 100644
--- a/src/third_party/wiredtiger/src/meta/meta_turtle.c
+++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c
@@ -185,7 +185,7 @@ __wt_turtle_init(WT_SESSION_IMPL *session)
{
WT_DECL_RET;
char *metaconf, *unused_value;
- bool exist_backup, exist_bincr, exist_incr, exist_isrc, exist_turtle;
+ bool exist_backup, exist_incr, exist_isrc, exist_turtle;
bool load, loadTurtle;
load = loadTurtle = false;
@@ -212,17 +212,6 @@ __wt_turtle_init(WT_SESSION_IMPL *session)
WT_RET(__wt_fs_exist(session, WT_LOGINCR_SRC, &exist_isrc));
WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist_backup));
WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist_turtle));
- /*
- * Block incremental is different. If it exists, then we have block incremental information we
- * need to keep. Mark the connection as having block-based incremental backup turned on. XXX -
- * Need to call something to read it in and set this up. Maybe here, maybe not.
- */
- WT_RET(__wt_fs_exist(session, WT_BLKINCR_BACKUP, &exist_bincr));
- if (exist_bincr) {
- F_SET(S2C(session), WT_CONN_INCR_BACKUP);
- /* Load content into some structure. Not sure this is the right place. It may be too early.
- */
- }
if (exist_turtle) {
/*
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index d956c9692d9..34dcb676c7c 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -365,22 +365,17 @@ __session_reconfigure(WT_SESSION *wt_session, const char *config)
WT_SESSION_IMPL *session;
session = (WT_SESSION_IMPL *)wt_session;
- /*
- * Indicated as allowed in prepared state, even though not allowed, so that running transaction
- * check below take precedence.
- */
- SESSION_API_CALL_PREPARE_ALLOWED(session, reconfigure, config, cfg);
-
- /*
- * Note that this method only checks keys that are passed in by the application: we don't want
- * to reset other session settings to their default values.
- */
+ SESSION_API_CALL_PREPARE_NOT_ALLOWED(session, reconfigure, config, cfg);
WT_UNUSED(cfg);
WT_ERR(__wt_txn_context_check(session, false));
WT_ERR(__wt_session_reset_cursors(session, false));
+ /*
+ * Note that this method only checks keys that are passed in by the application: we don't want
+ * to reset other session settings to their default values.
+ */
WT_ERR(__wt_txn_reconfigure(session, config));
ret = __wt_config_getones(session, config, "ignore_cache_size", &cval);
@@ -820,7 +815,7 @@ __session_log_printf(WT_SESSION *wt_session, const char *fmt, ...)
va_list ap;
session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(session, log_printf);
+ SESSION_API_CALL_PREPARE_NOT_ALLOWED_NOCONF(session, log_printf);
va_start(ap, fmt);
ret = __wt_log_vprintf(session, fmt, ap);
@@ -967,8 +962,7 @@ __session_reset(WT_SESSION *wt_session)
WT_SESSION_IMPL *session;
session = (WT_SESSION_IMPL *)wt_session;
-
- SESSION_API_CALL_NOCONF(session, reset);
+ SESSION_API_CALL_PREPARE_NOT_ALLOWED_NOCONF(session, reset);
WT_ERR(__wt_txn_context_check(session, false));
@@ -1084,7 +1078,7 @@ __session_import(WT_SESSION *wt_session, const char *uri, const char *config)
value = NULL;
session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, import);
+ SESSION_API_CALL_PREPARE_NOT_ALLOWED_NOCONF(session, import);
WT_ERR(__wt_inmem_unsupported_op(session, NULL));
@@ -1605,11 +1599,7 @@ __session_begin_transaction(WT_SESSION *wt_session, const char *config)
WT_SESSION_IMPL *session;
session = (WT_SESSION_IMPL *)wt_session;
- /*
- * Indicated as allowed in prepared state, even though not allowed, so that running transaction
- * check below take precedence.
- */
- SESSION_API_CALL_PREPARE_ALLOWED(session, begin_transaction, config, cfg);
+ SESSION_API_CALL_PREPARE_NOT_ALLOWED(session, begin_transaction, config, cfg);
WT_STAT_CONN_INCR(session, txn_begin);
WT_ERR(__wt_txn_context_check(session, false));
@@ -1632,10 +1622,10 @@ __session_commit_transaction(WT_SESSION *wt_session, const char *config)
WT_TXN *txn;
session = (WT_SESSION_IMPL *)wt_session;
+ txn = &session->txn;
SESSION_API_CALL_PREPARE_ALLOWED(session, commit_transaction, config, cfg);
WT_STAT_CONN_INCR(session, txn_commit);
- txn = &session->txn;
if (F_ISSET(txn, WT_TXN_PREPARE)) {
WT_STAT_CONN_INCR(session, txn_prepare_commit);
WT_STAT_CONN_DECR(session, txn_prepare_active);
@@ -1645,20 +1635,34 @@ __session_commit_transaction(WT_SESSION *wt_session, const char *config)
/* Permit the commit if the transaction failed, but was read-only. */
if (F_ISSET(txn, WT_TXN_ERROR) && txn->mod_count != 0) {
- __wt_err(session, EINVAL, "failed transaction requires rollback%s%s",
- txn->rollback_reason == NULL ? "" : ": ",
+ __wt_err(session, EINVAL,
+ "failed %s"
+ "transaction requires rollback%s%s",
+ F_ISSET(txn, WT_TXN_PREPARE) ? "prepared " : "", txn->rollback_reason == NULL ? "" : ": ",
txn->rollback_reason == NULL ? "" : txn->rollback_reason);
ret = EINVAL;
}
- if (ret == 0)
+err:
+ /*
+ * We might have failed because an illegal configuration was specified or because there wasn't a
+ * transaction running, and we check the former as part of the api macros before we check the
+ * latter. Deal with it here: if there's an error and a transaction is running, roll it back.
+ */
+ if (ret == 0) {
+ F_SET(session, WT_SESSION_RESOLVING_TXN);
ret = __wt_txn_commit(session, cfg);
- else {
+ F_CLR(session, WT_SESSION_RESOLVING_TXN);
+ } else if (F_ISSET(txn, WT_TXN_RUNNING)) {
+ if (F_ISSET(txn, WT_TXN_PREPARE))
+ WT_PANIC_RET(session, ret, "failed to commit prepared transaction, failing the system");
+
WT_TRET(__wt_session_reset_cursors(session, false));
+ F_SET(session, WT_SESSION_RESOLVING_TXN);
WT_TRET(__wt_txn_rollback(session, cfg));
+ F_CLR(session, WT_SESSION_RESOLVING_TXN);
}
-err:
API_END_RET(session, ret);
}
@@ -1679,7 +1683,9 @@ __session_prepare_transaction(WT_SESSION *wt_session, const char *config)
WT_ERR(__wt_txn_context_check(session, true));
+ F_SET(session, WT_SESSION_RESOLVING_TXN);
WT_ERR(__wt_txn_prepare(session, cfg));
+ F_CLR(session, WT_SESSION_RESOLVING_TXN);
err:
API_END_RET(session, ret);
@@ -1730,7 +1736,9 @@ __session_rollback_transaction(WT_SESSION *wt_session, const char *config)
WT_TRET(__wt_session_reset_cursors(session, false));
+ F_SET(session, WT_SESSION_RESOLVING_TXN);
WT_TRET(__wt_txn_rollback(session, cfg));
+ F_CLR(session, WT_SESSION_RESOLVING_TXN);
err:
API_END_RET(session, ret);
@@ -1753,7 +1761,8 @@ __session_timestamp_transaction(WT_SESSION *wt_session, const char *config)
SESSION_API_CALL_PREPARE_ALLOWED(session, timestamp_transaction, NULL, cfg);
cfg[1] = config;
#endif
- WT_TRET(__wt_txn_set_timestamp(session, cfg));
+
+ ret = __wt_txn_set_timestamp(session, cfg);
err:
API_END_RET(session, ret);
}
@@ -1770,7 +1779,8 @@ __session_query_timestamp(WT_SESSION *wt_session, char *hex_timestamp, const cha
session = (WT_SESSION_IMPL *)wt_session;
SESSION_API_CALL_PREPARE_ALLOWED(session, query_timestamp, config, cfg);
- WT_TRET(__wt_txn_query_timestamp(session, hex_timestamp, cfg, false));
+
+ ret = __wt_txn_query_timestamp(session, hex_timestamp, cfg, false);
err:
API_END_RET(session, ret);
}
@@ -1788,7 +1798,7 @@ __session_transaction_pinned_range(WT_SESSION *wt_session, uint64_t *prange)
uint64_t pinned;
session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(session, pinned_range);
+ SESSION_API_CALL_PREPARE_NOT_ALLOWED_NOCONF(session, transaction_pinned_range);
txn_state = WT_SESSION_TXN_STATE(session);
@@ -1837,11 +1847,7 @@ __session_transaction_sync(WT_SESSION *wt_session, const char *config)
uint64_t time_start, time_stop;
session = (WT_SESSION_IMPL *)wt_session;
- /*
- * Indicated as allowed in prepared state, even though not allowed, so that running transaction
- * check below take precedence.
- */
- SESSION_API_CALL_PREPARE_ALLOWED(session, transaction_sync, config, cfg);
+ SESSION_API_CALL_PREPARE_NOT_ALLOWED(session, transaction_sync, config, cfg);
WT_STAT_CONN_INCR(session, txn_sync);
conn = S2C(session);
@@ -1930,13 +1936,8 @@ __session_checkpoint(WT_SESSION *wt_session, const char *config)
WT_SESSION_IMPL *session;
session = (WT_SESSION_IMPL *)wt_session;
-
WT_STAT_CONN_INCR(session, txn_checkpoint);
- /*
- * Indicated as allowed in prepared state, even though not allowed, so that running transaction
- * check below take precedence.
- */
- SESSION_API_CALL_PREPARE_ALLOWED(session, checkpoint, config, cfg);
+ SESSION_API_CALL_PREPARE_NOT_ALLOWED(session, checkpoint, config, cfg);
WT_ERR(__wt_inmem_unsupported_op(session, NULL));
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 8dc0a44fe50..5d98ce93152 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -623,26 +623,37 @@ __wt_txn_release(WT_SESSION_IMPL *session)
}
/*
- * __txn_resolve_prepared_op --
- * Resolve a transaction's operations indirect references. In case of prepared transactions, the
- * prepared updates could be evicted using cache overflow mechanism. Transaction operations
- * referring to these prepared updates would be referring to them using indirect references (i.e
- * keys/recnos), which need to be resolved as part of that transaction commit/rollback. If no
- * updates are resolved throw an error. Increment resolved update count for each resolved update
- * count we locate.
+ * __txn_search_prepared_op --
+ * Search for an operation's prepared update.
*/
static int
-__txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit)
+__txn_search_prepared_op(
+ WT_SESSION_IMPL *session, WT_TXN_OP *op, WT_CURSOR **cursorp, WT_UPDATE **updp)
{
WT_CURSOR *cursor;
WT_DECL_RET;
WT_TXN *txn;
- WT_UPDATE *upd;
+ uint32_t txn_flags;
const char *open_cursor_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL};
+ *updp = NULL;
+
txn = &session->txn;
- WT_RET(__wt_open_cursor(session, op->btree->dhandle->name, NULL, open_cursor_cfg, &cursor));
+ cursor = *cursorp;
+ if (cursor == NULL || ((WT_CURSOR_BTREE *)cursor)->btree->id != op->btree->id) {
+ *cursorp = NULL;
+ if (cursor != NULL)
+ WT_RET(cursor->close(cursor));
+ WT_RET(__wt_open_cursor(session, op->btree->dhandle->name, NULL, open_cursor_cfg, &cursor));
+ *cursorp = cursor;
+ }
+
+ /*
+ * Transaction error and prepare are cleared temporarily as cursor functions are not allowed
+ * after an error or a prepared transaction.
+ */
+ txn_flags = FLD_MASK(txn->flags, WT_TXN_ERROR | WT_TXN_PREPARE);
switch (op->type) {
case WT_TXN_OP_BASIC_COL:
@@ -651,30 +662,42 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit)
break;
case WT_TXN_OP_BASIC_ROW:
case WT_TXN_OP_INMEM_ROW:
- /*
- * Transaction prepare is cleared temporarily as cursor functions are not allowed for
- * prepared transactions.
- */
- F_CLR(txn, WT_TXN_PREPARE);
+ F_CLR(txn, txn_flags);
__wt_cursor_set_raw_key(cursor, &op->u.op_row.key);
- F_SET(txn, WT_TXN_PREPARE);
+ F_SET(txn, txn_flags);
break;
case WT_TXN_OP_NONE:
case WT_TXN_OP_REF_DELETE:
case WT_TXN_OP_TRUNCATE_COL:
case WT_TXN_OP_TRUNCATE_ROW:
- WT_ERR_ASSERT(session, false, WT_PANIC, "invalid prepared operation update type");
+ WT_RET_ASSERT(session, false, WT_PANIC, "invalid prepared operation update type");
break;
}
- WT_WITH_BTREE(
- session, op->btree, ret = __wt_btcur_search_uncommitted((WT_CURSOR_BTREE *)cursor, &upd));
- WT_ERR(ret);
-
- /* If we haven't found anything then there's an error. */
- WT_ERR_ASSERT(session, upd != NULL, WT_NOTFOUND,
+ F_CLR(txn, txn_flags);
+ WT_WITH_BTREE(session, op->btree, ret = __wt_btcur_search_uncommitted(cursor, updp));
+ F_SET(txn, txn_flags);
+ WT_RET(ret);
+ WT_RET_ASSERT(session, *updp != NULL, WT_NOTFOUND,
"unable to locate update associated with a prepared operation");
+ return (0);
+}
+
+/*
+ * __txn_resolve_prepared_op --
+ * Resolve a transaction's operations indirect references.
+ */
+static int
+__txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit, WT_CURSOR **cursorp)
+{
+ WT_TXN *txn;
+ WT_UPDATE *upd;
+
+ txn = &session->txn;
+
+ WT_RET(__txn_search_prepared_op(session, op, cursorp, &upd));
+
for (; upd != NULL; upd = upd->next) {
/*
* Aborted updates can exist in the update chain of our txn. Generally this will occur due
@@ -718,9 +741,7 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit)
__txn_resolve_prepared_update(session, upd);
}
-err:
- WT_TRET(cursor->close(cursor));
- return (ret);
+ return (0);
}
/*
@@ -737,7 +758,6 @@ __txn_commit_timestamps_assert(WT_SESSION_IMPL *session)
WT_UPDATE *upd;
wt_timestamp_t durable_op_timestamp, op_timestamp, prev_op_timestamp;
u_int i;
- const char *open_cursor_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL};
bool op_zero_ts, upd_zero_ts;
txn = &session->txn;
@@ -749,24 +769,18 @@ __txn_commit_timestamps_assert(WT_SESSION_IMPL *session)
*/
if (F_ISSET(txn, WT_TXN_TS_COMMIT_ALWAYS) && !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
txn->mod_count != 0)
- WT_RET_MSG(session, EINVAL,
- "commit_timestamp required and "
- "none set on this transaction");
+ WT_RET_MSG(session, EINVAL, "commit_timestamp required and none set on this transaction");
if (F_ISSET(txn, WT_TXN_TS_COMMIT_NEVER) && F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
txn->mod_count != 0)
- WT_RET_MSG(session, EINVAL,
- "no commit_timestamp required and "
- "timestamp set on this transaction");
+ WT_RET_MSG(
+ session, EINVAL, "no commit_timestamp required and timestamp set on this transaction");
if (F_ISSET(txn, WT_TXN_TS_DURABLE_ALWAYS) && !F_ISSET(txn, WT_TXN_HAS_TS_DURABLE) &&
txn->mod_count != 0)
- WT_RET_MSG(session, EINVAL,
- "durable_timestamp required and "
- "none set on this transaction");
+ WT_RET_MSG(session, EINVAL, "durable_timestamp required and none set on this transaction");
if (F_ISSET(txn, WT_TXN_TS_DURABLE_NEVER) && F_ISSET(txn, WT_TXN_HAS_TS_DURABLE) &&
txn->mod_count != 0)
WT_RET_MSG(session, EINVAL,
- "no durable_timestamp required and "
- "durable timestamp set on this transaction");
+ "no durable_timestamp required and durable timestamp set on this transaction");
/*
* If we're not doing any key consistency checking, we're done.
@@ -778,92 +792,110 @@ __txn_commit_timestamps_assert(WT_SESSION_IMPL *session)
* Error on any valid update structures for the same key that are at a later timestamp or use
* timestamps inconsistently.
*/
- for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++)
- if (op->type == WT_TXN_OP_BASIC_COL || op->type == WT_TXN_OP_BASIC_ROW) {
- /*
- * Search for prepared updates, so that they will be restored, if moved to lookaside.
- */
- if (F_ISSET(txn, WT_TXN_PREPARE)) {
- WT_RET(__wt_open_cursor(
- session, op->btree->dhandle->name, NULL, open_cursor_cfg, &cursor));
- F_CLR(txn, WT_TXN_PREPARE);
- if (op->type == WT_TXN_OP_BASIC_ROW)
- __wt_cursor_set_raw_key(cursor, &op->u.op_row.key);
- else
- ((WT_CURSOR_BTREE *)cursor)->iface.recno = op->u.op_col.recno;
- F_SET(txn, WT_TXN_PREPARE);
- WT_WITH_BTREE(session, op->btree,
- ret = __wt_btcur_search_uncommitted((WT_CURSOR_BTREE *)cursor, &upd));
- if (ret != 0)
- WT_RET_MSG(session, EINVAL, "prepared update restore failed");
- } else
- upd = op->u.op_upd;
-
- WT_ASSERT(session, upd != NULL);
- op_timestamp = upd->start_ts;
+ for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) {
+ switch (op->type) {
+ case WT_TXN_OP_BASIC_COL:
+ case WT_TXN_OP_INMEM_COL:
+ case WT_TXN_OP_BASIC_ROW:
+ case WT_TXN_OP_INMEM_ROW:
+ break;
+ case WT_TXN_OP_NONE:
+ case WT_TXN_OP_REF_DELETE:
+ case WT_TXN_OP_TRUNCATE_COL:
+ case WT_TXN_OP_TRUNCATE_ROW:
+ continue;
+ }
- /*
- * Skip over any aborted update structures, internally created update structures or ones
- * from our own transaction.
- */
- while (upd != NULL &&
- (upd->txnid == WT_TXN_ABORTED || upd->txnid == WT_TXN_NONE || upd->txnid == txn->id))
- upd = upd->next;
+ /* Search for prepared updates, so that they will be restored, if moved to lookaside. */
+ if (F_ISSET(txn, WT_TXN_PREPARE))
+ WT_ERR(__txn_search_prepared_op(session, op, &cursor, &upd));
+ else
+ upd = op->u.op_upd;
- /*
- * Check the timestamp on this update with the first valid update in the chain. They're
- * in most recent order.
- */
- if (upd != NULL) {
- prev_op_timestamp = upd->start_ts;
- durable_op_timestamp = upd->durable_ts;
- }
+ op_timestamp = upd->start_ts;
- /*
- * We no longer need to access the update structure so it's safe to release our
- * reference to the page.
- */
- if (cursor != NULL) {
- WT_ASSERT(session, F_ISSET(txn, WT_TXN_PREPARE));
- WT_RET(cursor->close(cursor));
- cursor = NULL;
- }
+ /*
+ * Skip over any aborted update structures, internally created update structures or ones
+ * from our own transaction.
+ */
+ while (upd != NULL &&
+ (upd->txnid == WT_TXN_ABORTED || upd->txnid == WT_TXN_NONE || upd->txnid == txn->id))
+ upd = upd->next;
- if (upd == NULL)
- continue;
- /*
- * Check for consistent per-key timestamp usage. If timestamps are or are not used
- * originally then they should be used the same way always. For this transaction,
- * timestamps are in use anytime the commit timestamp is set. Check timestamps are used
- * in order.
- */
- op_zero_ts = !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT);
- upd_zero_ts = prev_op_timestamp == WT_TS_NONE;
- if (op_zero_ts != upd_zero_ts) {
- WT_RET(__wt_verbose_dump_update(session, upd));
- WT_RET(__wt_verbose_dump_txn_one(session, &session->txn, EINVAL,
- "per-key timestamps used inconsistently, dumping relevant information"));
- }
- /*
- * If we aren't using timestamps for this transaction then we are done checking. Don't
- * check the timestamp because the one in the transaction is not cleared.
- */
- if (op_zero_ts)
- continue;
+ /*
+ * Check the timestamp on this update with the first valid update in the chain. They're in
+ * most recent order.
+ */
+ if (upd != NULL) {
+ prev_op_timestamp = upd->start_ts;
+ durable_op_timestamp = upd->durable_ts;
+ }
- /*
- * Only if the update structure doesn't have a timestamp then use the one in the
- * transaction structure.
- */
- if (op_timestamp == WT_TS_NONE)
- op_timestamp = txn->commit_timestamp;
- if (F_ISSET(txn, WT_TXN_TS_COMMIT_KEYS) && op_timestamp < prev_op_timestamp)
- WT_RET_MSG(session, EINVAL, "out of order commit timestamps");
- if (F_ISSET(txn, WT_TXN_TS_DURABLE_KEYS) &&
- txn->durable_timestamp < durable_op_timestamp)
- WT_RET_MSG(session, EINVAL, "out of order durable timestamps");
+ if (upd == NULL)
+ continue;
+ /*
+ * Check for consistent per-key timestamp usage. If timestamps are or are not used
+ * originally then they should be used the same way always. For this transaction, timestamps
+ * are in use anytime the commit timestamp is set. Check timestamps are used in order.
+ */
+ op_zero_ts = !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT);
+ upd_zero_ts = prev_op_timestamp == WT_TS_NONE;
+ if (op_zero_ts != upd_zero_ts) {
+ WT_ERR(__wt_verbose_dump_update(session, upd));
+ WT_ERR(__wt_verbose_dump_txn_one(session, &session->txn, EINVAL,
+ "per-key timestamps used inconsistently, dumping relevant information"));
}
- return (0);
+ /*
+ * If we aren't using timestamps for this transaction then we are done checking. Don't check
+ * the timestamp because the one in the transaction is not cleared.
+ */
+ if (op_zero_ts)
+ continue;
+
+ /*
+ * Only if the update structure doesn't have a timestamp then use the one in the transaction
+ * structure.
+ */
+ if (op_timestamp == WT_TS_NONE)
+ op_timestamp = txn->commit_timestamp;
+ if (F_ISSET(txn, WT_TXN_TS_COMMIT_KEYS) && op_timestamp < prev_op_timestamp)
+ WT_ERR_MSG(session, EINVAL, "out of order commit timestamps");
+ if (F_ISSET(txn, WT_TXN_TS_DURABLE_KEYS) && txn->durable_timestamp < durable_op_timestamp)
+ WT_ERR_MSG(session, EINVAL, "out of order durable timestamps");
+ }
+
+err:
+ if (cursor != NULL)
+ WT_TRET(cursor->close(cursor));
+ return (ret);
+}
+
+/*
+ * __txn_mod_compare --
+ * Qsort comparison routine for transaction modify list.
+ */
+static int WT_CDECL
+__txn_mod_compare(const void *a, const void *b)
+{
+ WT_TXN_OP *aopt, *bopt;
+
+ aopt = (WT_TXN_OP *)a;
+ bopt = (WT_TXN_OP *)b;
+
+ /* If the files are different, order by ID. */
+ if (aopt->btree->id != bopt->btree->id)
+ return (aopt->btree->id < bopt->btree->id);
+
+ /*
+ * If the files are the same, order by the key. Row-store collators require WT_SESSION pointers,
+ * and we don't have one. Compare the keys if there's no collator, otherwise return equality.
+ * Column-store is always easy.
+ */
+ if (aopt->type == WT_TXN_OP_BASIC_ROW || aopt->type == WT_TXN_OP_INMEM_ROW)
+ return (aopt->btree->collator == NULL ?
+ __wt_lex_compare(&aopt->u.op_row.key, &bopt->u.op_row.key) :
+ 0);
+ return (aopt->u.op_col.recno < bopt->u.op_col.recno);
}
/*
@@ -875,6 +907,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_CONFIG_ITEM cval;
WT_CONNECTION_IMPL *conn;
+ WT_CURSOR *cursor;
WT_DECL_RET;
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
@@ -887,6 +920,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
txn = &session->txn;
conn = S2C(session);
+ cursor = NULL;
txn_global = &conn->txn_global;
locked = false;
prepare = F_ISSET(txn, WT_TXN_PREPARE);
@@ -923,8 +957,15 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
"durable_timestamp should not be specified for non-prepared transaction");
}
- if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
- WT_ASSERT(session, txn->commit_timestamp <= txn->durable_timestamp);
+ WT_ASSERT(session,
+ !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) || txn->commit_timestamp <= txn->durable_timestamp);
+
+ /*
+ * Resolving prepared updates is expensive. Sort prepared modifications so all updates for each
+ * page within each file are done at the same time.
+ */
+ if (prepare)
+ __wt_qsort(txn->mod, txn->mod_count, sizeof(WT_TXN_OP), __txn_mod_compare);
WT_ERR(__txn_commit_timestamps_assert(session));
@@ -1035,7 +1076,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
* the work will happen on a different modification in this txn.
*/
if (!F_ISSET(op, WT_TXN_OP_KEY_REPEATED))
- WT_ERR(__txn_resolve_prepared_op(session, op, true));
+ WT_ERR(__txn_resolve_prepared_op(session, op, true, &cursor));
}
break;
case WT_TXN_OP_REF_DELETE:
@@ -1051,6 +1092,11 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
}
txn->mod_count = 0;
+ if (cursor != NULL) {
+ WT_ERR(cursor->close(cursor));
+ cursor = NULL;
+ }
+
/*
* If durable is set, we'll try to update the global durable timestamp with that value. If
* durable isn't set, durable is implied to be the same as commit so we'll use that instead.
@@ -1103,6 +1149,9 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
return (0);
err:
+ if (cursor != NULL)
+ WT_TRET(cursor->close(cursor));
+
/*
* If anything went wrong, roll back.
*
@@ -1111,6 +1160,14 @@ err:
*/
if (locked)
__wt_readunlock(session, &txn_global->visibility_rwlock);
+
+ /*
+ * Check for a prepared transaction, and quit: we can't ignore the error and we can't roll back
+ * a prepared transaction.
+ */
+ if (prepare)
+ WT_PANIC_RET(session, ret, "failed to commit prepared transaction, failing the system");
+
WT_TRET(__wt_txn_rollback(session, cfg));
return (ret);
}
@@ -1242,6 +1299,7 @@ __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[])
int
__wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
{
+ WT_CURSOR *cursor;
WT_DECL_RET;
WT_TXN *txn;
WT_TXN_OP *op;
@@ -1251,6 +1309,7 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
WT_UNUSED(cfg);
+ cursor = NULL;
txn = &session->txn;
prepare = F_ISSET(txn, WT_TXN_PREPARE);
readonly = txn->mod_count == 0;
@@ -1261,6 +1320,13 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
if (txn->notify != NULL)
WT_TRET(txn->notify->notify(txn->notify, (WT_SESSION *)session, txn->id, 0));
+ /*
+ * Resolving prepared updates is expensive. Sort prepared modifications so all updates for each
+ * page within each file are done at the same time.
+ */
+ if (prepare)
+ __wt_qsort(txn->mod, txn->mod_count, sizeof(WT_TXN_OP), __txn_mod_compare);
+
/* Rollback and free updates. */
for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) {
/* Metadata updates should never be rolled back. */
@@ -1286,7 +1352,7 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
* the work will happen on a different modification in this txn.
*/
if (!F_ISSET(op, WT_TXN_OP_KEY_REPEATED))
- WT_RET(__txn_resolve_prepared_op(session, op, false));
+ WT_TRET(__txn_resolve_prepared_op(session, op, false, &cursor));
}
break;
case WT_TXN_OP_REF_DELETE:
@@ -1306,6 +1372,11 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
}
txn->mod_count = 0;
+ if (cursor != NULL) {
+ WT_TRET(cursor->close(cursor));
+ cursor = NULL;
+ }
+
__wt_txn_release(session);
/*
* We're between transactions, if we need to block for eviction, it's a good time to do so. Note
@@ -1313,6 +1384,7 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
*/
if (!readonly)
WT_IGNORE_RET(__wt_cache_eviction_check(session, false, false, NULL));
+
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index e960ec03d48..200f84cc8c1 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -1226,6 +1226,7 @@ __checkpoint_lock_dirty_tree_int(WT_SESSION_IMPL *session, bool is_checkpoint, b
"cannot be deleted during a hot backup",
ckpt->name);
}
+
/*
* Mark old checkpoints that are being deleted and figure out which trees we can skip in this
* checkpoint.
diff --git a/src/third_party/wiredtiger/src/utilities/util_load.c b/src/third_party/wiredtiger/src/utilities/util_load.c
index 4f1d1bcb1f1..44672a66854 100644
--- a/src/third_party/wiredtiger/src/utilities/util_load.c
+++ b/src/third_party/wiredtiger/src/utilities/util_load.c
@@ -449,7 +449,7 @@ config_update(WT_SESSION *session, char **list)
if ((ret = __wt_config_merge((WT_SESSION_IMPL *)session, cfg,
"filename=,id=,"
- "checkpoint=,checkpoint_lsn=,version=,source=,",
+ "checkpoint=,checkpoint_backup_info=,checkpoint_lsn=,version=,source=,",
&p)) != 0)
break;
diff --git a/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c b/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c
index 388b079f842..5631dfb1016 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c
@@ -278,8 +278,12 @@ thread_insert(void *arg)
else if (ret == WT_ROLLBACK)
threadargs->rollbacks++;
}
- if (sharedopts->remove)
- testutil_check(session->commit_transaction(session, NULL));
+ if (sharedopts->remove) {
+ if (ret == WT_ROLLBACK)
+ testutil_check(session->rollback_transaction(session, NULL));
+ else
+ testutil_check(session->commit_transaction(session, NULL));
+ }
if (i % 1000 == 0 && i != 0) {
if (i % 10000 == 0)
fprintf(stderr, "*");
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index e230bd0ad0a..e7eaaa15de5 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -162,7 +162,7 @@ functions:
set -o errexit
set -o verbose
for i in $(seq ${times|1}); do
- ./t -1 -c ${config|../../../test/format/CONFIG.stress} ${extra_args|} 2>&1
+ ./t -1 -c ${config|../../../test/format/CONFIG.stress} ${extra_args|} || ( [ -f RUNDIR/CONFIG ] && cat RUNDIR/CONFIG ) 2>&1
done
"format test script":
command: shell.exec
@@ -172,7 +172,8 @@ functions:
set -o errexit
set -o verbose
for i in $(seq ${times|1}); do
- ${test_env_vars|} ${format_test_setting|} ./format.sh ${smp_command|} ${format_test_script_args|} 2>&1
+ ${format_test_setting|}
+ ${test_env_vars|} ./format.sh ${smp_command|} ${format_test_script_args|} 2>&1
done
"many dbs test":
command: shell.exec
@@ -1637,18 +1638,20 @@ tasks:
vars:
make_command: make all
- func: "make check all"
-
+
+ # Use format.sh to run tests in parallel (x4) for just under two hours (the
+ # default Evergreen timeout) on the higher spec build distros. This allows
+ # us to perform multiple test runs while ensuring a long-running config does
+ # not result in an Evergreen test timeout failure.
- name: linux-directio
depends_on:
- name: compile
commands:
- func: "fetch artifacts"
- func: "compile wiredtiger"
- - func: "format test"
+ - func: "format test script"
vars:
- times: 3
- config: ../../../test/format/CONFIG.stress
- extra_args: -C "direct_io=[data]"
+ format_test_script_args: -t 110 -j 4 direct_io=1
- name: format-linux-no-ftruncate
depends_on:
@@ -1940,6 +1943,16 @@ tasks:
# run for 24 hours ( 24 * 60 = 1440 minutes), don't stop at failed tests, use default config
format_test_script_args: -t 1440
+ - name: format-stress-sanitizer-lsm-test
+ commands:
+ - func: "get project"
+ - func: "compile wiredtiger address sanitizer"
+ - func: "format test script"
+ vars:
+ test_env_vars: ASAN_OPTIONS="detect_leaks=1:abort_on_error=1:disable_coredump=0" ASAN_SYMBOLIZER_PATH=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer
+ # Run for 30 mins, and explicitly set data_source to LSM with a large cache
+ format_test_script_args: -t 30 data_source=lsm cache_minimum=5000
+
- name: format-stress-sanitizer-smoke-test
#set a 7 hours timeout
exec_timeout_secs: 25200
@@ -1983,22 +1996,6 @@ tasks:
# At the time of writing this script, one call to underlying scripts takes about ~15 mins to finish in worst case.
# We are giving an extra ~20% room for vairance in execution time.
times: 80
-
- # This is special task until lz4 issues are resolved for zSeries distros
- - name: recovery-stress-test-without-lz4
- #set a 25 hours timeout
- exec_timeout_secs: 90000
- commands:
- - func: "get project"
- - func: "compile wiredtiger"
- vars:
- posix_configure_flags: --enable-strict --enable-diagnostic --with-builtins=snappy,zlib
- - func: "recovery stress test script"
- vars:
- # Repeat this script enough times to make this task run for 24 hours
- # At the time of writing this script, one call to underlying scripts takes about 8 mins to finish in worst case.
- # We are giving an extra ~20% room for vairance in execution time.
- times: 120
- name: split-stress-test
commands:
@@ -2140,6 +2137,7 @@ buildvariants:
- name: compile-ubsan
- name: ubsan-test
- name: linux-directio
+ distros: ubuntu1804-build
- name: syscall-linux
- name: make-check-asan-test
- name: configure-combinations
@@ -2155,6 +2153,7 @@ buildvariants:
- name: recovery-stress-test
- name: format-stress-sanitizer-test
- name: format-stress-sanitizer-smoke-test
+ - name: format-stress-sanitizer-lsm-test
- name: split-stress-test
- name: format-stress-test
- name: format-stress-smoke-test
@@ -2231,6 +2230,7 @@ buildvariants:
- name: compile-ubsan
- name: ubsan-test
- name: linux-directio
+ distros: rhel80-build
- name: syscall-linux
- name: compile-asan
- name: make-check-asan-test
@@ -2328,6 +2328,9 @@ buildvariants:
tasks:
- name: compile
- name: unit-test
+ - name: format-stress-ppc-zseries-test
+ - name: format-stress-smoke-test
+ - name: format-wtperf-test
- name: ubuntu1804-zseries
display_name: Ubuntu 18.04 zSeries
@@ -2341,5 +2344,7 @@ buildvariants:
tasks:
- name: compile
- name: unit-test
- - name: recovery-stress-test-without-lz4
+ - name: recovery-stress-test
- name: split-stress-test
+ - name: format-stress-ppc-zseries-test
+ - name: format-stress-smoke-test
diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c
index 41e3e6d374f..82863516877 100644
--- a/src/third_party/wiredtiger/test/format/config.c
+++ b/src/third_party/wiredtiger/test/format/config.c
@@ -52,6 +52,12 @@ static void config_reset(void);
static void config_transaction(void);
/*
+ * We currently disable random LSM testing, that is, it can be specified explicitly but we won't
+ * randomly choose LSM as a data_source configuration.
+ */
+#define DISABLE_RANDOM_LSM_TESTING 1
+
+/*
* config_setup --
* Initialize configuration for a run.
*/
@@ -106,14 +112,15 @@ config_setup(void)
config_single("data_source=file", false);
break;
case 2: /* 20% */
- /*
- * LSM requires a row-store and backing disk.
- *
- * Configuring truncation or timestamps results in LSM cache problems, don't
- * configure LSM if those set.
- *
- * XXX Remove the timestamp test when WT-4162 resolved.
- */
+#if !defined(DISABLE_RANDOM_LSM_TESTING)
+ /*
+ * LSM requires a row-store and backing disk.
+ *
+ * Configuring truncation or timestamps results in LSM cache problems, don't configure
+ * LSM if those set.
+ *
+ * XXX Remove the timestamp test when WT-4162 resolved.
+ */
if (g.type != ROW || g.c_in_memory)
break;
if (config_is_perm("transaction_timestamps") && g.c_txn_timestamps)
@@ -121,6 +128,7 @@ config_setup(void)
if (config_is_perm("truncate") && g.c_truncate)
break;
config_single("data_source=lsm", false);
+#endif
break;
case 3:
case 4:
diff --git a/src/third_party/wiredtiger/test/format/format.sh b/src/third_party/wiredtiger/test/format/format.sh
index 83a56e4d84d..0efc9858e82 100755
--- a/src/third_party/wiredtiger/test/format/format.sh
+++ b/src/third_party/wiredtiger/test/format/format.sh
@@ -214,7 +214,9 @@ report_failure()
echo "$name: job in $dir failed"
echo "$name: $dir log:"
- sed 's/^/ > /' < $log
+ sed 's/^/ /' < $log
+ echo "$name: $dir/CONFIG:"
+ sed 's/^/ /' < $dir/CONFIG
}
# Resolve/cleanup completed jobs.
diff --git a/src/third_party/wiredtiger/test/suite/test_assert02.py b/src/third_party/wiredtiger/test/suite/test_assert02.py
index 46b4f191917..d4bcaf639ac 100644
--- a/src/third_party/wiredtiger/test/suite/test_assert02.py
+++ b/src/third_party/wiredtiger/test/suite/test_assert02.py
@@ -68,8 +68,7 @@ class test_assert02(wttest.WiredTigerTestCase, suite_subprocess):
c_never = self.session.open_cursor(uri_never)
c_none = self.session.open_cursor(uri_none)
self.session.begin_transaction()
- self.session.timestamp_transaction(
- 'commit_timestamp=' + timestamp_str(1))
+ self.session.timestamp_transaction('commit_timestamp=' + timestamp_str(1))
c_always['key1'] = 'value1'
c_def['key1'] = 'value1'
c_never['key1'] = 'value1'
@@ -103,7 +102,7 @@ class test_assert02(wttest.WiredTigerTestCase, suite_subprocess):
msg = "/timestamp set on this transaction/"
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda:self.assertEquals(c_never.search(), 0), msg)
- self.session.commit_transaction()
+ self.session.rollback_transaction()
c_always.close()
c_def.close()
c_never.close()
@@ -131,7 +130,7 @@ class test_assert02(wttest.WiredTigerTestCase, suite_subprocess):
msg = "/none set on this transaction/"
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda:self.assertEquals(c_always.search(), 0), msg)
- self.session.commit_transaction()
+ self.session.rollback_transaction()
c_always.close()
c_def.close()
c_never.close()
diff --git a/src/third_party/wiredtiger/test/suite/test_assert05.py b/src/third_party/wiredtiger/test/suite/test_assert05.py
index ab7f8265930..d5c697989ef 100644
--- a/src/third_party/wiredtiger/test/suite/test_assert05.py
+++ b/src/third_party/wiredtiger/test/suite/test_assert05.py
@@ -73,10 +73,15 @@ class test_assert05(wttest.WiredTigerTestCase, suite_subprocess):
if (use_ts != 'never'):
self.session.commit_transaction()
else:
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
+
msg = "/timestamp set on this transaction/"
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda:self.assertEquals(self.session.commit_transaction(),
0), msg)
+ '''
+ self.session.rollback_transaction()
c.close()
self.count += 1
@@ -96,10 +101,15 @@ class test_assert05(wttest.WiredTigerTestCase, suite_subprocess):
if (use_ts != 'always'):
self.session.commit_transaction()
else:
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
+
msg = "/durable_timestamp is required for a prepared/"
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda:self.assertEquals(self.session.commit_transaction(),
0), msg)
+ '''
+ self.session.rollback_transaction()
self.count += 1
c.close()
diff --git a/src/third_party/wiredtiger/test/suite/test_assert06.py b/src/third_party/wiredtiger/test/suite/test_assert06.py
index bc7532cd648..250077cf36f 100644
--- a/src/third_party/wiredtiger/test/suite/test_assert06.py
+++ b/src/third_party/wiredtiger/test/suite/test_assert06.py
@@ -111,6 +111,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
self.session.commit_transaction()
c.close()
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
+
c = self.session.open_cursor(uri)
self.session.begin_transaction()
c['key_ts1'] = 'value4'
@@ -118,6 +121,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.commit_transaction(), msg_ooo)
c.close()
+ '''
# Detect not using a timestamp.
c = self.session.open_cursor(uri)
@@ -137,6 +141,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
self.session.commit_transaction()
c.close()
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
+
c = self.session.open_cursor(uri)
self.session.begin_transaction()
c['key_nots'] = 'value3'
@@ -145,6 +152,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
lambda: self.session.commit_transaction(), msg_usage)
c.close()
self.session.checkpoint()
+ '''
c = self.session.open_cursor(uri)
self.assertEquals(c['key_ts1'], 'value5')
@@ -191,6 +199,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
self.session.commit_transaction()
c.close()
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
+
# Modify the data item at timestamp 1. We should detect it is wrong.
c = self.session.open_cursor(uri)
self.session.begin_transaction()
@@ -199,6 +210,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.commit_transaction(), msg_ooo)
c.close()
+ '''
# Make sure we can successfully add a different key at timestamp 1.
c = self.session.open_cursor(uri)
@@ -224,6 +236,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
self.apply_timestamps(15)
self.session.commit_transaction()
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
+
c = self.session.open_cursor(uri)
self.session.begin_transaction()
c['key_ts3'] = 'value13'
@@ -232,6 +247,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.commit_transaction(), msg_ooo)
c.close()
+ '''
c = self.session.open_cursor(uri)
self.assertEquals(c['key_ts3'], 'value10')
@@ -248,6 +264,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
self.apply_timestamps(13)
self.session.commit_transaction()
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
+
c = self.session.open_cursor(uri)
self.session.begin_transaction()
c['key_ts4'] = 'value13'
@@ -255,6 +274,10 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.commit_transaction(), msg_ooo)
c.close()
+ '''
+
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
# Make sure multiple update attempts still fail and eventually
# succeed with a later timestamp. This tests that aborted entries
@@ -269,6 +292,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
c = self.session.open_cursor(uri)
self.assertEquals(c['key_ts4'], 'value15')
c.close()
+ '''
c = self.session.open_cursor(uri)
self.session.begin_transaction()
@@ -308,6 +332,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
self.session.commit_transaction()
c.close()
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
+
c = self.session.open_cursor(uri)
self.session.begin_transaction()
c['key_nots'] = 'value16'
@@ -315,6 +342,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.commit_transaction(), msg_usage)
c.close()
+ '''
c = self.session.open_cursor(uri)
self.session.begin_transaction()
@@ -322,6 +350,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
self.session.commit_transaction()
c.close()
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
+
c = self.session.open_cursor(uri)
self.session.begin_transaction()
c['key_nots'] = 'value17'
@@ -329,6 +360,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.commit_transaction(), msg_usage)
c.close()
+ '''
c = self.session.open_cursor(uri)
self.assertEquals(c['key_nots'], 'value_nots1')
@@ -375,6 +407,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
self.session.commit_transaction()
c.close()
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
+
c = self.session.open_cursor(uri)
self.session.begin_transaction()
c['key_nots'] = 'value23'
@@ -386,6 +421,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess):
lambda: self.session.commit_transaction(
'durable_timestamp=' + timestamp_str(23)), msg_usage)
c.close()
+ '''
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_backup12.py b/src/third_party/wiredtiger/test/suite/test_backup12.py
index 6726164d038..1148ed84a45 100644
--- a/src/third_party/wiredtiger/test/suite/test_backup12.py
+++ b/src/third_party/wiredtiger/test/suite/test_backup12.py
@@ -83,6 +83,9 @@ class test_backup12(wttest.WiredTigerTestCase, suite_subprocess):
# That log file is not part of the list returned. This is a full backup
# primary cursor with incremental configured.
os.mkdir(self.dir)
+ #
+ # Note, this first backup is actually done before a checkpoint is taken.
+ #
config = 'incremental=(enabled,this_id="ID1")'
bkup_c = self.session.open_cursor('backup:', None, config)
@@ -168,12 +171,9 @@ class test_backup12(wttest.WiredTigerTestCase, suite_subprocess):
offset = incrlist[0]
size = incrlist[1]
curtype = incrlist[2]
- self.assertEqual(offset, 0)
- # For now assert WT_BACKUP_FILE (which is 1).
- self.assertEqual(curtype, 1)
+ self.assertTrue(curtype == 1 or curtype == 2)
dup_cnt += 1
dupc.close()
- self.assertEqual(dup_cnt, 1)
self.pr('Copy from: ' + newfile + ' (' + str(sz) + ') to ' + self.dir)
shutil.copy(newfile, self.dir)
self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
diff --git a/src/third_party/wiredtiger/test/suite/test_durable_ts02.py b/src/third_party/wiredtiger/test/suite/test_durable_ts02.py
index 15b9bfdec82..27d0d3f3146 100644
--- a/src/third_party/wiredtiger/test/suite/test_durable_ts02.py
+++ b/src/third_party/wiredtiger/test/suite/test_durable_ts02.py
@@ -78,6 +78,9 @@ class test_durable_ts03(wttest.WiredTigerTestCase):
self.conn.set_timestamp('stable_timestamp=' + timestamp_str(100))
self.session.checkpoint()
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
+
# Scenario: 1
# Check to see commit timestamp > durable timestamap, returns error.
session.begin_transaction()
@@ -90,7 +93,9 @@ class test_durable_ts03(wttest.WiredTigerTestCase):
session.prepare_transaction('prepare_timestamp=' + timestamp_str(150))
msg = "/is less than the commit timestamp/"
# Check for error when commit timestamp > durable timestamp.
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: session.commit_transaction('commit_timestamp=' + timestamp_str(200) + ',durable_timestamp=' + timestamp_str(180)), msg)
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: session.commit_transaction('commit_timestamp=' +\
+ timestamp_str(200) + ',durable_timestamp=' + timestamp_str(180)), msg)
# Set a stable timestamp so that first update value is durable.
self.conn.set_timestamp('stable_timestamp=' + timestamp_str(250))
@@ -110,7 +115,10 @@ class test_durable_ts03(wttest.WiredTigerTestCase):
msg = "/is less than the stable timestamp/"
# Check that error is returned when durable timestamp < stable timestamp.
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: session.commit_transaction('commit_timestamp=' + timestamp_str(200) + ',durable_timestamp=' + timestamp_str(240)), msg)
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: session.commit_transaction('commit_timestamp=' +\
+ timestamp_str(200) + ',durable_timestamp=' + timestamp_str(240)), msg)
+ '''
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare02.py b/src/third_party/wiredtiger/test/suite/test_prepare02.py
index 7546cc44d59..ebe79cf729c 100644
--- a/src/third_party/wiredtiger/test/suite/test_prepare02.py
+++ b/src/third_party/wiredtiger/test/suite/test_prepare02.py
@@ -40,38 +40,33 @@ class test_prepare02(wttest.WiredTigerTestCase, suite_subprocess):
session_config = 'isolation=snapshot'
def test_prepare_session_operations(self):
- self.session.create("table:mytable", "key_format=S,value_format=S")
- cursor = self.session.open_cursor("table:mytable", None)
- # Test the session methods that are forbidden after the transaction is
- # prepared.
+ # Test the session methods forbidden after the transaction is prepared.
+ self.session.create("table:mytable", "key_format=S,value_format=S")
self.session.begin_transaction()
+ cursor = self.session.open_cursor("table:mytable", None)
+ cursor["key"] = "value"
self.session.prepare_transaction("prepare_timestamp=2a")
- msg = "/ not permitted in a/"
- #
- # The operations listed below are not supported in the prepared state.
- #
- # The operations are listed in the same order as they are declared in
- # the session structure. Any function missing below is allowed in the
- # prepared state.
- #
+ msg = "/not permitted in a prepared transaction/"
+
+ # The operations are listed in the same order as they are declared in the session structure.
+ # WT_SESSION.close permitted.
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda:self.session.reconfigure(), msg)
+ # WT_SESSION.strerror permitted, but currently broken in the Python API (WT-5399).
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.open_cursor("table:mytable", None), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.alter("table:mytable",
- "access_pattern_hint=random"), msg)
+ lambda: self.session.alter("table:mytable", "access_pattern_hint=random"), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.create("table:mytable1",
- "key_format=S,value_format=S"), msg)
+ lambda: self.session.create("table:mytable1", "key_format=S,value_format=S"), msg)
+ # WT_SESSION.import permitted, not supported in the Python API.
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.compact("table:mytable"), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.drop("table:mytable", None), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(cursor, cursor,
- "compare=gt,count=10"), msg)
+ lambda: self.session.join(cursor, cursor, "compare=gt,count=10"), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.log_flush("sync=on"), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
@@ -79,33 +74,39 @@ class test_prepare02(wttest.WiredTigerTestCase, suite_subprocess):
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.rebalance("table:mytable", None), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.rename("table:mytable", "table:mynewtable",
- None), msg)
+ lambda: self.session.rename("table:mytable", "table:mynewtable", None), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda:self.session.reset(), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.salvage("table:mytable", None), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.truncate("table:mytable",
- None, None, None), msg)
+ lambda: self.session.truncate("table:mytable", None, None, None), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.upgrade("table:mytable", None), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.verify("table:mytable", None), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda:self.session.begin_transaction(), msg)
+ # WT_SESSION.commit_transaction permitted, tested below.
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda:self.session.prepare_transaction("prepare_timestamp=2a"), msg)
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.timestamp_transaction(
- "read_timestamp=2a"), msg)
+ # WT_SESSION.rollback_transaction permitted, tested below.
+ self.session.timestamp_transaction("commit_timestamp=2b")
+ self.assertTimestampsEqual(self.session.query_timestamp('get=prepare'), '2a')
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda:self.session.checkpoint(), msg)
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.snapshot("name=test"), msg)
+ # WT_SESSION.transaction_pinned_range permitted, not supported in the Python API.
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda:self.session.transaction_sync(), msg)
- self.session.rollback_transaction()
+ self.session.breakpoint()
+
+ # Commit the transaction. Test that no "not permitted in a prepared transaction" error has
+ # set a transaction error flag, that is, we should still be able to commit successfully.
+ self.session.timestamp_transaction("commit_timestamp=2b")
+ self.session.timestamp_transaction("durable_timestamp=2b")
+ self.session.commit_transaction('commit_timestamp=2a')
# Commit after prepare is permitted.
self.session.begin_transaction()
@@ -115,8 +116,7 @@ class test_prepare02(wttest.WiredTigerTestCase, suite_subprocess):
self.session.timestamp_transaction("durable_timestamp=2b")
self.session.commit_transaction()
- # Setting commit timestamp via timestamp_transaction after
- # prepare is also permitted.
+ # Setting commit timestamp via timestamp_transaction after prepare is also permitted.
self.session.begin_transaction()
c1 = self.session.open_cursor("table:mytable", None)
self.session.prepare_transaction("prepare_timestamp=2a")
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare04.py b/src/third_party/wiredtiger/test/suite/test_prepare04.py
index d65b96adefe..fd27a244747 100644
--- a/src/third_party/wiredtiger/test/suite/test_prepare04.py
+++ b/src/third_party/wiredtiger/test/suite/test_prepare04.py
@@ -116,7 +116,7 @@ class test_prepare04(wttest.WiredTigerTestCase, suite_subprocess):
# Make sure we detect the conflict between operations.
self.assertRaisesException(wiredtiger.WiredTigerError, lambda:c_other.update(), conflictmsg)
- s_other.commit_transaction()
+ s_other.rollback_transaction()
self.session.timestamp_transaction('commit_timestamp=' + timestamp_str(300))
self.session.timestamp_transaction('durable_timestamp=' + timestamp_str(300))
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare05.py b/src/third_party/wiredtiger/test/suite/test_prepare05.py
index 9c812879315..0b4769ea566 100644
--- a/src/third_party/wiredtiger/test/suite/test_prepare05.py
+++ b/src/third_party/wiredtiger/test/suite/test_prepare05.py
@@ -52,7 +52,7 @@ class test_prepare05(wttest.WiredTigerTestCase, suite_subprocess):
lambda: self.session.prepare_transaction(
'prepare_timestamp=' + timestamp_str(1)),
"/older than the oldest timestamp/")
- self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))
+ self.session.rollback_transaction()
# Check setting the prepare timestamp same as oldest timestamp is valid.
self.session.begin_transaction()
@@ -72,7 +72,7 @@ class test_prepare05(wttest.WiredTigerTestCase, suite_subprocess):
lambda: self.session.prepare_transaction(
'prepare_timestamp=' + timestamp_str(2)),
"/should not have been set before/")
- self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))
+ self.session.rollback_transaction()
# It is illegal to set a prepare timestamp same as or earlier than an
# active read timestamp.
@@ -97,16 +97,18 @@ class test_prepare05(wttest.WiredTigerTestCase, suite_subprocess):
s_reader.rollback_transaction()
self.session.rollback_transaction()
- # It is illegal to set a commit timestamp older than prepare
- # timestamp of a transaction.
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
+
+ # It is illegal to set a commit timestamp older than prepare timestamp of a transaction.
self.session.begin_transaction()
c[1] = 1
- self.session.prepare_transaction(
- 'prepare_timestamp=' + timestamp_str(5))
+ self.session.prepare_transaction('prepare_timestamp=' + timestamp_str(5))
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.commit_transaction(
'commit_timestamp=' + timestamp_str(4)),
"/less than the prepare timestamp/")
+ '''
# It is legal to set a commit timestamp as same as prepare
# timestamp.
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare06.py b/src/third_party/wiredtiger/test/suite/test_prepare06.py
index 6255630d4ef..173f23b6142 100644
--- a/src/third_party/wiredtiger/test/suite/test_prepare06.py
+++ b/src/third_party/wiredtiger/test/suite/test_prepare06.py
@@ -64,6 +64,9 @@ class test_prepare06(wttest.WiredTigerTestCase, suite_subprocess):
self.session.timestamp_transaction('durable_timestamp=' + timestamp_str(35))
self.session.commit_transaction()
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
+
# Check setting a prepared transaction timestamps earlier than the
# oldest timestamp is invalid, if durable timestamp is less than the
# stable timestamp.
@@ -75,6 +78,7 @@ class test_prepare06(wttest.WiredTigerTestCase, suite_subprocess):
'durable_timestamp=' + timestamp_str(25)),
"/is less than the stable timestamp/")
self.session.rollback_transaction()
+ '''
# Check the cases with an active reader.
# Start a new reader to have an active read timestamp.
@@ -101,16 +105,22 @@ class test_prepare06(wttest.WiredTigerTestCase, suite_subprocess):
"/must be greater than the latest active read timestamp/")
self.session.rollback_transaction()
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
+
# It is illegal to set a commit timestamp less than the prepare
# timestamp of a transaction.
self.session.begin_transaction()
c[1] = 1
- self.session.prepare_transaction(
- 'prepare_timestamp=' + timestamp_str(45))
+ self.session.prepare_transaction('prepare_timestamp=' + timestamp_str(45))
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.commit_transaction(
'commit_timestamp=' + timestamp_str(30)),
"/less than the prepare timestamp/")
+ '''
+
+ '''
+ Commented out for now: the system panics if we fail after preparing a transaction.
# It is legal to set a commit timestamp older than prepare timestamp of
# a transaction with roundup_timestamps settings.
@@ -125,6 +135,7 @@ class test_prepare06(wttest.WiredTigerTestCase, suite_subprocess):
'durable_timestamp=' + timestamp_str(30)),
"/is less than the commit timestamp/")
self.session.rollback_transaction()
+ '''
s_reader.commit_transaction()
diff --git a/src/third_party/wiredtiger/test/suite/test_stat08.py b/src/third_party/wiredtiger/test/suite/test_stat08.py
index db7b0ad00d9..e82bc661f60 100644
--- a/src/third_party/wiredtiger/test/suite/test_stat08.py
+++ b/src/third_party/wiredtiger/test/suite/test_stat08.py
@@ -33,7 +33,7 @@ import wiredtiger, wttest
class test_stat08(wttest.WiredTigerTestCase):
nentries = 350000
- conn_config = 'cache_size=50MB,statistics=(all)'
+ conn_config = 'cache_size=10MB,statistics=(all)'
entry_value = "abcde" * 40
BYTES_READ = 4000
READ_TIME = 4003
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp13.py b/src/third_party/wiredtiger/test/suite/test_timestamp13.py
index 0d230da848b..ad2542f6181 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp13.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp13.py
@@ -70,7 +70,7 @@ class test_timestamp13(wttest.WiredTigerTestCase, suite_subprocess):
lambda: self.session.query_timestamp('get=unknown'),
'/not a permitted choice for key/')
- self.session.commit_transaction()
+ self.session.rollback_transaction()
# Querying a session's timestamps will error when not in a transaction.
for query in query_choices:
self.assertRaises(
diff --git a/src/third_party/wiredtiger/test/suite/test_txn17.py b/src/third_party/wiredtiger/test/suite/test_txn17.py
index 97bc5cf6d50..b7e843eaccc 100644
--- a/src/third_party/wiredtiger/test/suite/test_txn17.py
+++ b/src/third_party/wiredtiger/test/suite/test_txn17.py
@@ -57,24 +57,26 @@ class test_txn17(wttest.WiredTigerTestCase, suite_subprocess):
lambda: self.session.rollback_transaction(),
'/only permitted in a running transaction/')
- # Test API functionality tagged as requires_notransaction.
- # Begin a transaction and execute all the following tests under it.
- self.session.begin_transaction()
-
# Cannot begin a transaction while a transaction is already running.
+ self.session.begin_transaction()
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.begin_transaction(),
'/not permitted in a running transaction/')
+ self.session.rollback_transaction()
# Cannot take a checkpoint while a transaction is running.
+ self.session.begin_transaction()
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.checkpoint(),
'/not permitted in a running transaction/')
+ self.session.rollback_transaction()
# Cannot call transaction_sync while a transaction is running.
+ self.session.begin_transaction()
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.transaction_sync(),
'/not permitted in a running transaction/')
+ self.session.rollback_transaction()
if __name__ == '__main__':
wttest.run()