diff options
author | Luke Chen <luke.chen@mongodb.com> | 2020-02-11 04:37:44 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2020-02-11 04:37:44 +0000 |
commit | 7e0e302388b5fa71d29c0145a445e105b72864cd (patch) | |
tree | aaeb3eea7963eebab64f5774c7d0f47e86e07749 /src | |
parent | e94778714638cb20f93ae94d4e16c38ed2d987bc (diff) | |
download | mongo-7e0e302388b5fa71d29c0145a445e105b72864cd.tar.gz |
Import wiredtiger: 4a7bbce5cb744d9026f083314746e85fa851338e from branch mongodb-4.2
ref: 35ba2ab887..4a7bbce5cb
for: 4.2.4
WT-4999 Migrate Jenkins “wiredtiger-test-format-stress-zseries” job to Evergreen
WT-5159 Make wiredtiger work with SWIG 4.0.0 beyond
WT-5206 Return the correct checkpoint-modified list of blocks
WT-5219 Btree walk code read the lock WT_REF.addr field without locking
WT-5376 WT_UPDATE.type field can race with visibility checks when returning key/value pairs
WT-5387 Prepared transaction resolution can stall eviction on active pages
WT-5393 Prepared transaction rollback and API error handling fixes
WT-5405 Make format LSM test a separate Evergreen task
WT-5437 Salvage's excessive consumption of cache memory causing eviction to stall
WT-5444 Re-enable PPC format tests in Evergreen
WT-5449 Increase contention in history store stress workload
WT-5458 Fix Evergreen timeout failures in linux-directio test
WT-5460 Buffer alignment failure captured by linux-directio test
WT-5468 Improve documentation for "wt load"
WT-5480 Don't take threads resolving prepared transactions to assist with eviction
WT-5481 DIAGNOSTIC split code assert can race with WT_REF locking
WT-5488 Dump the failing CONFIG for Evergreen test/format tasks
WT-5489 page-read can race with threads locking in-memory page structures
WT-5534 Incremental backup needs to accept older metadata
WT-5537 Use correct WT_ITEM fields per memory sanitizer
Diffstat (limited to 'src')
62 files changed, 1637 insertions, 988 deletions
diff --git a/src/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py b/src/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py index 333da4b178c..9f7cb2941c0 100755 --- a/src/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py +++ b/src/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py @@ -81,7 +81,7 @@ from workgen import * context = Context() homedir = "WT_TEST" conn_config = "cache_size=1G,checkpoint=(wait=60,log_size=2GB),\ - eviction=(threads_min=12,threads_max=12),log=(enabled=true),session_max=600,\ + eviction=(threads_min=12,threads_max=12),log=(enabled=true),session_max=800,\ eviction_target=60,statistics=(fast),statistics_log=(wait=1,json)"# explicitly added conn = wiredtiger_open(homedir, "create," + conn_config) s = conn.open_session("") @@ -119,7 +119,7 @@ s.create(log_name, wtperf_table_config + "key_format=S,value_format=S," +\ compress_table_config + table_config + ",log=(enabled=true)") log_table = Table(log_name) -ops = Operation(Operation.OP_SEARCH, tables[0]) +ops = Operation(Operation.OP_SEARCH, tables[0],Key(Key.KEYGEN_PARETO, 0, ParetoOptions(1))) ops = op_multi_table(ops, tables, False) ops = op_log_like(ops, log_table, 0) thread0 = Thread(ops) @@ -131,8 +131,6 @@ thread1 = Thread(ops) # These operations include log_like operations, which will increase the number # of insert/update operations by a factor of 2.0. This may cause the # actual operations performed to be above the throttle. -thread1.options.throttle=500 -thread1.options.throttle_burst=1.0 ops = Operation(Operation.OP_UPDATE, tables[0]) ops = op_multi_table(ops, tables, False) @@ -156,10 +154,10 @@ ops = Operation(Operation.OP_SLEEP, "0.1") + \ Operation(Operation.OP_LOG_FLUSH, "") logging_thread = Thread(ops) -workload = Workload(context, 350 * thread0 + 10 * thread1 +\ - 50 * thread2 + 100 * thread3 + logging_thread) +workload = Workload(context, 400 * thread0 + 100 * thread1 +\ + 10 * thread2 + 100 * thread3 + logging_thread) workload.options.report_interval=5 -workload.options.run_time=300 +workload.options.run_time=500 workload.options.max_latency=50000 workload.run(conn) diff --git a/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py b/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py index e27bd479730..487e1e90ff1 100755 --- a/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py +++ b/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. # # runner/__init__.py -# Used as a first import by runners, does any common initialization. +# Used as a first import by runners, does any common initialization. from __future__ import print_function import os, shutil, sys diff --git a/src/third_party/wiredtiger/bench/wtperf/split_heavy.wtperf b/src/third_party/wiredtiger/bench/wtperf/split_heavy.wtperf index 4f11340c095..4460aa99491 100644 --- a/src/third_party/wiredtiger/bench/wtperf/split_heavy.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/split_heavy.wtperf @@ -10,4 +10,4 @@ run_time=10 threads=((count=20,inserts=1)) value_sz=200 key_sz=64 -reopen_connection=false
\ No newline at end of file +reopen_connection=false diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index cf942be2f84..45cca0ef829 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -377,6 +377,8 @@ file_config = format_meta + file_runtime_config + [ file_meta = file_config + [ Config('checkpoint', '', r''' the file checkpoint entries'''), + Config('checkpoint_backup_info', '', r''' + the incremental backup durable information'''), Config('checkpoint_lsn', '', r''' LSN of the last checkpoint'''), Config('id', '', r''' diff --git a/src/third_party/wiredtiger/dist/s_funcs.list b/src/third_party/wiredtiger/dist/s_funcs.list index 54928fc03b4..0218937fffc 100644 --- a/src/third_party/wiredtiger/dist/s_funcs.list +++ b/src/third_party/wiredtiger/dist/s_funcs.list @@ -34,7 +34,6 @@ __wt_stat_join_aggregate __wt_stat_join_clear_all __wt_stream_set_no_buffer __wt_try_readlock -__wt_txn_err_chk wiredtiger_calc_modify wiredtiger_config_parser_open wiredtiger_config_validate diff --git a/src/third_party/wiredtiger/dist/s_python b/src/third_party/wiredtiger/dist/s_python index 7ecb97059b5..323a92dca79 100755 --- a/src/third_party/wiredtiger/dist/s_python +++ b/src/third_party/wiredtiger/dist/s_python @@ -7,9 +7,10 @@ trap 'rm -f $t' 0 1 2 3 13 15 cd .. # Check Python coding standards: check for tab characters. +# Ignore generated files. egrep ' ' `find . -name '*.py'` | sed -e 's/:.*//' \ - -e '/__init__.py/d' \ + -e '/swig_wiredtiger.py/d' \ -e '/\/wiredtiger.py/d' \ -e '/src\/docs\/tools\/doxypy.py/d' | sort -u | diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 6cda20d9b3d..185ed5aac8d 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -510,6 +510,7 @@ bitpos bitstring bitwise blk +blkmod bm bnd bool diff --git a/src/third_party/wiredtiger/examples/c/ex_backup_block.c b/src/third_party/wiredtiger/examples/c/ex_backup_block.c index fbae6e6da5d..422bf728540 100644 --- a/src/third_party/wiredtiger/examples/c/ex_backup_block.c +++ b/src/third_party/wiredtiger/examples/c/ex_backup_block.c @@ -136,11 +136,11 @@ setup_directories(void) } static void -add_work(WT_SESSION *session, int iter) +add_work(WT_SESSION *session, int iter, int iterj) { WT_CURSOR *cursor, *cursor2; int i; - char k[32], v[32]; + char k[64], v[64]; error_check(session->open_cursor(session, uri, NULL, NULL, &cursor)); /* @@ -154,8 +154,8 @@ add_work(WT_SESSION *session, int iter) * Perform some operations with individual auto-commit transactions. */ for (i = 0; i < MAX_KEYS; i++) { - (void)snprintf(k, sizeof(k), "key.%d.%d", iter, i); - (void)snprintf(v, sizeof(v), "value.%d.%d", iter, i); + (void)snprintf(k, sizeof(k), "key.%d.%d.%d", iter, iterj, i); + (void)snprintf(v, sizeof(v), "value.%d.%d.%d", iter, iterj, i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); error_check(cursor->insert(cursor)); @@ -260,7 +260,8 @@ take_full_backup(WT_SESSION *session, int i) } else hdir = home_incr; if (i == 0) { - (void)snprintf(buf, sizeof(buf), "incremental=(enabled=true,this_id=ID%d)", i); + (void)snprintf( + buf, sizeof(buf), "incremental=(granularity=1M,enabled=true,this_id=ID%d)", i); error_check(session->open_cursor(session, "backup:", NULL, buf, &cursor)); } else error_check(session->open_cursor(session, "backup:", NULL, NULL, &cursor)); @@ -279,13 +280,17 @@ take_full_backup(WT_SESSION *session, int i) for (j = 0; j < MAX_ITERATIONS; j++) { (void)snprintf(h, sizeof(h), "%s.%d", home_incr, j); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename); +#if 0 printf("FULL: Copy: %s\n", buf); +#endif error_check(system(buf)); } else { (void)snprintf(h, sizeof(h), "%s.%d", home_full, i); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, hdir, filename); +#if 0 printf("FULL %d: Copy: %s\n", i, buf); +#endif error_check(system(buf)); } } @@ -300,13 +305,16 @@ take_incr_backup(WT_SESSION *session, int i) FILELIST *flist; WT_CURSOR *backup_cur, *incr_cur; uint64_t offset, size, type; - size_t alloc, count; + size_t alloc, count, rdsize, tmp_sz; int j, ret, rfd, wfd; - char buf[1024], h[256]; + char buf[1024], h[256], *tmp; const char *filename; + bool first; /*! [incremental backup using block transfer]*/ + tmp = NULL; + tmp_sz = 0; /* Open the backup data source for incremental backup. */ (void)snprintf(buf, sizeof(buf), "incremental=(src_id=ID%d,this_id=ID%d)", i - 1, i); error_check(session->open_cursor(session, "backup:", NULL, buf, &backup_cur)); @@ -321,51 +329,61 @@ take_incr_backup(WT_SESSION *session, int i) error_check(process_file(&flist, &count, &alloc, filename)); (void)snprintf(h, sizeof(h), "%s.0", home_incr); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename); - printf("Copying backup: %s\n", buf); - error_check(system(buf)); #if 0 - (void)snprintf(buf, sizeof(buf), "%s/%s", home, filename); - printf("Open source %s for reading\n", buf); - error_check(rfd = open(buf, O_RDONLY, 0)); - (void)snprintf(h, sizeof(h), "%s.%d", home_incr, i); - (void)snprintf(buf, sizeof(buf), "%s/%s", h, filename); - printf("Open dest %s for writing\n", buf); - error_check(wfd = open(buf, O_WRONLY, 0)); + printf("Copying backup: %s\n", buf); #endif + error_check(system(buf)); + first = true; (void)snprintf(buf, sizeof(buf), "incremental=(file=%s)", filename); error_check(session->open_cursor(session, NULL, backup_cur, buf, &incr_cur)); +#if 0 printf("Taking incremental %d: File %s\n", i, filename); +#endif while ((ret = incr_cur->next(incr_cur)) == 0) { error_check(incr_cur->get_key(incr_cur, &offset, &size, &type)); - printf("Incremental %s: KEY: Off %" PRIu64 " Size: %" PRIu64 " Type: %" PRIu64 "\n", - filename, offset, size, type); scan_end_check(type == WT_BACKUP_FILE || type == WT_BACKUP_RANGE); +#if 0 + printf("Incremental %s: KEY: Off %" PRIu64 " Size: %" PRIu64 " %s\n", filename, offset, + size, type == WT_BACKUP_FILE ? "WT_BACKUP_FILE" : "WT_BACKUP_RANGE"); +#endif if (type == WT_BACKUP_RANGE) { /* * We should never get a range key after a whole file so the read file descriptor - * should be valid. If the read descriptor is valid, so it the write one. + * should be valid. If the read descriptor is valid, so is the write one. */ - scan_end_check(rfd != -1); - printf("Incremental %s: Range Offset: %" PRIu64 " Size: %" PRIu64 "\n", filename, - offset, size); + if (tmp_sz < size) { + tmp = realloc(tmp, size); + testutil_assert(tmp != NULL); + tmp_sz = size; + } + if (first) { + (void)snprintf(buf, sizeof(buf), "%s/%s", home, filename); + error_sys_check(rfd = open(buf, O_RDONLY, 0)); + (void)snprintf(h, sizeof(h), "%s.%d", home_incr, i); + (void)snprintf(buf, sizeof(buf), "%s/%s", h, filename); + error_sys_check(wfd = open(buf, O_WRONLY, 0)); + first = false; + } + error_sys_check(lseek(rfd, (wt_off_t)offset, SEEK_SET)); - error_sys_check(read(rfd, buf, (size_t)size)); + error_sys_check(rdsize = (size_t)read(rfd, tmp, (size_t)size)); error_sys_check(lseek(wfd, (wt_off_t)offset, SEEK_SET)); - error_sys_check(write(wfd, buf, (size_t)size)); + /* Use the read size since we may have read less than the granularity. */ + error_sys_check(write(wfd, tmp, rdsize)); } else { -/* Whole file, so close both files and just copy the whole thing. */ -#if 0 - error_check(close(rfd)); - error_check(close(wfd)); -#endif + /* Whole file, so close both files and just copy the whole thing. */ + testutil_assert(first == true); rfd = wfd = -1; (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename); +#if 0 printf("Incremental: Whole file copy: %s\n", buf); +#endif error_check(system(buf)); } } scan_end_check(ret == WT_NOTFOUND); + /* Done processing this file. Close incremental cursor. */ error_check(incr_cur->close(incr_cur)); /* Close file descriptors if they're open. */ @@ -386,18 +404,21 @@ take_incr_backup(WT_SESSION *session, int i) } scan_end_check(ret == WT_NOTFOUND); + /* Done processing all files. Close backup cursor. */ error_check(backup_cur->close(backup_cur)); error_check(finalize_files(flist, count)); + free(tmp); /*! [incremental backup using block transfer]*/ } int main(int argc, char *argv[]) { + struct stat sb; WT_CONNECTION *wt_conn; WT_CURSOR *backup_cur; WT_SESSION *session; - int i; + int i, j, ret; char cmd_buf[256]; (void)argc; /* Unused variable */ @@ -412,7 +433,7 @@ main(int argc, char *argv[]) error_check(session->create(session, uri, "key_format=S,value_format=S")); error_check(session->create(session, uri2, "key_format=S,value_format=S")); printf("Adding initial data\n"); - add_work(session, 0); + add_work(session, 0, 0); printf("Taking initial backup\n"); take_full_backup(session, 0); @@ -421,8 +442,12 @@ main(int argc, char *argv[]) for (i = 1; i < MAX_ITERATIONS; i++) { printf("Iteration %d: adding data\n", i); - add_work(session, i); - error_check(session->checkpoint(session, NULL)); + /* For each iteration we may add work and checkpoint multiple times. */ + for (j = 0; j < i; j++) { + add_work(session, i, j); + error_check(session->checkpoint(session, NULL)); + } + /* * The full backup here is only needed for testing and comparison purposes. A normal * incremental backup procedure would not include this. @@ -440,6 +465,20 @@ main(int argc, char *argv[]) error_check(compare_backups(i)); } + printf("Close and reopen the connection\n"); + /* + * Close and reopen the connection to illustrate the durability of id information. + */ + error_check(wt_conn->close(wt_conn, NULL)); + error_check(wiredtiger_open(home, NULL, CONN_CONFIG, &wt_conn)); + error_check(wt_conn->open_session(wt_conn, NULL, NULL, &session)); + /* + * We should have an entry for i-1 and i-2. Use the older one. + */ + (void)snprintf(cmd_buf, sizeof(cmd_buf), "incremental=(src_id=ID%d,this_id=ID%d)", i - 2, i); + error_check(session->open_cursor(session, "backup:", NULL, cmd_buf, &backup_cur)); + error_check(backup_cur->close(backup_cur)); + /* * After we're done, release resources. Test the force stop setting. */ @@ -455,6 +494,28 @@ main(int argc, char *argv[]) printf("Final comparison: dumping and comparing data\n"); error_check(compare_backups(0)); + for (i = 0; i < (int)filelist_count; ++i) { + if (last_flist[i].name == NULL) + break; + free((void *)last_flist[i].name); + } + free(last_flist); + + /* + * Reopen the connection to verify that the forced stop should remove incremental information. + */ + error_check(wiredtiger_open(home, NULL, CONN_CONFIG, &wt_conn)); + error_check(wt_conn->open_session(wt_conn, NULL, NULL, &session)); + /* + * We should not have any information. + */ + (void)snprintf(cmd_buf, sizeof(cmd_buf), "incremental=(src_id=ID%d,this_id=ID%d)", i - 2, i); + testutil_assert(session->open_cursor(session, "backup:", NULL, cmd_buf, &backup_cur) == ENOENT); + error_check(wt_conn->close(wt_conn, NULL)); + + (void)snprintf(cmd_buf, sizeof(cmd_buf), "%s/WiredTiger.backup.block", home); + ret = stat(cmd_buf, &sb); + testutil_assert(ret == -1 && errno == ENOENT); return (EXIT_SUCCESS); } diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index f3969285fd8..f56574c8ece 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-4.2", - "commit": "35ba2ab88763ca8a25743a9ef78b2eaed28f4a6c" + "commit": "4a7bbce5cb744d9026f083314746e85fa851338e" } diff --git a/src/third_party/wiredtiger/lang/python/Makefile.am b/src/third_party/wiredtiger/lang/python/Makefile.am index ace11dfa5b9..b15e66e0e83 100644 --- a/src/third_party/wiredtiger/lang/python/Makefile.am +++ b/src/third_party/wiredtiger/lang/python/Makefile.am @@ -2,7 +2,7 @@ PYSRC = $(top_srcdir)/lang/python PYDIRS = -t $(abs_builddir) -I $(abs_top_srcdir):$(abs_top_builddir) -L $(abs_top_builddir)/.libs PYDST = $(abs_builddir)/wiredtiger PYFILES = $(PYDST)/fpacking.py $(PYDST)/intpacking.py $(PYDST)/packing.py \ - $(PYDST)/packutil.py $(PYDST)/__init__.py + $(PYDST)/packutil.py $(PYDST)/swig_wiredtiger.py $(PYDST)/__init__.py PY_MAJOR_VERSION := $$($(PYTHON) -c \ 'import sys; print(int(sys.version_info.major))') @@ -25,7 +25,12 @@ pyfiles: $(PYFILES) $(PYDST)/%: $(PYSRC)/wiredtiger/% mkdir -p $(PYDST) && cp -f $< $@ -$(PYDST)/__init__.py: $(PYSRC)/wiredtiger.py +$(PYDST)/__init__.py: $(PYSRC)/wiredtiger/init.py + mkdir -p $(PYDST) && cp -f $< $@ + +# Note: this cannot be named wiredtiger.py in the target directory, +# we won't be able to import it. +$(PYDST)/swig_wiredtiger.py: $(PYSRC)/wiredtiger.py mkdir -p $(PYDST) && cp -f $< $@ install-exec-local: diff --git a/src/third_party/wiredtiger/lang/python/setup_pip.py b/src/third_party/wiredtiger/lang/python/setup_pip.py index ce0fd1b0d77..c7b64c0d90f 100755 --- a/src/third_party/wiredtiger/lang/python/setup_pip.py +++ b/src/third_party/wiredtiger/lang/python/setup_pip.py @@ -188,8 +188,10 @@ def get_library_dirs(): return dirs # source_filter -# Make any needed changes to the sources list. Any entry that -# needs to be moved is returned in a dictionary. +# Make any needed changes to the original sources list and return a manifest, +# a list of source file names relative to the new staging root. Any entry +# that needs to be renamed returned in a dictionary where the entry's key +# is the new name and the value is the old source name. def source_filter(sources): result = [] movers = dict() @@ -205,15 +207,16 @@ def source_filter(sources): # move all lang/python files to the top level. if dest.startswith(pywt_prefix): dest = os.path.basename(dest) - if dest == 'pip_init.py': + if dest == 'init.py': dest = '__init__.py' if dest != src: movers[dest] = src result.append(dest) # Add SWIG generated files - result.append('wiredtiger.py') - movers['wiredtiger.py'] = os.path.join(pywt_build_dir, '__init__.py') result.append(os.path.join(py_dir, 'wiredtiger_wrap.c')) + wiredtiger_py = 'swig_wiredtiger.py' + result.append('swig_wiredtiger.py') + movers['swig_wiredtiger.py'] = os.path.join(py_dir, 'wiredtiger.py') return result, movers ################################################################ diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger.i b/src/third_party/wiredtiger/lang/python/wiredtiger.i index 86182b1716c..ea298cd4f10 100644 --- a/src/third_party/wiredtiger/lang/python/wiredtiger.i +++ b/src/third_party/wiredtiger/lang/python/wiredtiger.i @@ -50,7 +50,7 @@ This provides an API similar to the C API, with the following modifications: %feature("autodoc", "0"); %pythoncode %{ -from .packing import pack, unpack +from packing import pack, unpack ## @endcond %} diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger/pip_init.py b/src/third_party/wiredtiger/lang/python/wiredtiger/init.py index 187a21443b7..28aef041a73 100755 --- a/src/third_party/wiredtiger/lang/python/wiredtiger/pip_init.py +++ b/src/third_party/wiredtiger/lang/python/wiredtiger/init.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. # -# pip_init.py +# init.py # This is installed as __init__.py, and imports the file created by SWIG. # This is needed because SWIG's import helper code created by certain SWIG # versions may be broken, see: https://github.com/swig/swig/issues/769 . @@ -42,11 +42,11 @@ if fname != '__init__.py' and fname != '__init__.pyc': # to this module so they will appear in the wiredtiger namespace. me = sys.modules[__name__] sys.path.append(os.path.dirname(__file__)) -try: - import wiredtiger.wiredtiger as swig_wiredtiger -except ImportError: - # for Python2 - import wiredtiger as swig_wiredtiger + +# explicitly importing _wiredtiger in advance of SWIG allows us to not +# use relative importing, as SWIG does. It doesn't work for us with Python2. +import _wiredtiger +import swig_wiredtiger for name in dir(swig_wiredtiger): value = getattr(swig_wiredtiger, name) setattr(me, name, value) diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c index 158fc919820..e1cf8982daf 100644 --- a/src/third_party/wiredtiger/src/block/block_ckpt.c +++ b/src/third_party/wiredtiger/src/block/block_ckpt.c @@ -9,8 +9,7 @@ #include "wt_internal.h" static int __ckpt_process(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *); -static int __ckpt_update( - WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *, WT_CKPT *, WT_BLOCK_CKPT *, bool); +static int __ckpt_update(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *, WT_CKPT *, WT_BLOCK_CKPT *); /* * __wt_block_ckpt_init -- @@ -331,6 +330,7 @@ __ckpt_verify(WT_SESSION_IMPL *session, WT_CKPT *ckptbase) case WT_CKPT_DELETE | WT_CKPT_FAKE: case WT_CKPT_FAKE: break; + case WT_CKPT_ADD | WT_CKPT_BLOCK_MODS: case WT_CKPT_ADD: if (ckpt[1].name == NULL) break; @@ -570,7 +570,7 @@ __ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase) /* Update checkpoints marked for update. */ WT_CKPT_FOREACH (ckptbase, ckpt) if (F_ISSET(ckpt, WT_CKPT_UPDATE)) - WT_ERR(__ckpt_update(session, block, ckptbase, ckpt, ckpt->bpriv, false)); + WT_ERR(__ckpt_update(session, block, ckptbase, ckpt, ckpt->bpriv)); live_update: /* Truncate the file if that's possible. */ @@ -607,7 +607,7 @@ live_update: */ ci->ckpt_size = WT_MIN(ckpt_size, (uint64_t)block->size); - WT_ERR(__ckpt_update(session, block, ckptbase, ckpt, ci, true)); + WT_ERR(__ckpt_update(session, block, ckptbase, ckpt, ci)); } /* @@ -654,16 +654,80 @@ err: } /* + * __ckpt_add_blkmod_entry -- + * Add an offset/length entry to the bitstring based on granularity. + */ +static int +__ckpt_add_blkmod_entry( + WT_SESSION_IMPL *session, WT_BLOCK_MODS *blk_mod, wt_off_t offset, wt_off_t len) +{ + uint64_t end, start; + uint32_t end_rdup; + + WT_ASSERT(session, blk_mod->granularity != 0); + start = (uint64_t)offset / blk_mod->granularity; + end = (uint64_t)(offset + len) / blk_mod->granularity; + WT_ASSERT(session, end < UINT32_MAX); + end_rdup = WT_MAX(__wt_rduppo2((uint32_t)end, 8), WT_BLOCK_MODS_LIST_MIN); + if ((end_rdup << 3) > blk_mod->nbits) { + /* If we don't have enough, extend the buffer. */ + if (blk_mod->nbits == 0) { + WT_RET(__wt_buf_initsize(session, &blk_mod->bitstring, end_rdup)); + memset(blk_mod->bitstring.mem, 0, end_rdup); + } else + WT_RET(__wt_buf_extend(session, &blk_mod->bitstring, end_rdup)); + blk_mod->nbits = end_rdup << 3; + } + + /* Set all the bits needed to record this offset/length pair. */ + __bit_nset(blk_mod->bitstring.mem, start, end); + return (0); +} + +/* + * __ckpt_add_blk_mods -- + * Add the blocks to all valid incremental backup source identifiers. + */ +static int +__ckpt_add_blk_mods(WT_SESSION_IMPL *session, WT_CKPT *ckpt, WT_BLOCK_CKPT *ci) +{ + WT_BLOCK_MODS *blk_mod; + WT_EXT *ext; + u_int i; + + for (i = 0; i < WT_BLKINCR_MAX; ++i) { + blk_mod = &ckpt->backup_blocks[i]; + /* If there is no information at this entry, we're done. */ + if (!F_ISSET(blk_mod, WT_BLOCK_MODS_VALID)) + continue; + + WT_EXT_FOREACH (ext, ci->alloc.off) + WT_RET(__ckpt_add_blkmod_entry(session, blk_mod, ext->off, ext->size)); + + if (ci->alloc.offset != WT_BLOCK_INVALID_OFFSET) + WT_RET(__ckpt_add_blkmod_entry(session, blk_mod, ci->alloc.offset, ci->alloc.size)); + if (ci->discard.offset != WT_BLOCK_INVALID_OFFSET) + WT_RET(__ckpt_add_blkmod_entry(session, blk_mod, ci->discard.offset, ci->discard.size)); + if (ci->avail.offset != WT_BLOCK_INVALID_OFFSET) + WT_RET(__ckpt_add_blkmod_entry(session, blk_mod, ci->avail.offset, ci->avail.size)); + } + return (0); +} + +/* * __ckpt_update -- * Update a checkpoint. */ static int -__ckpt_update(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase, WT_CKPT *ckpt, - WT_BLOCK_CKPT *ci, bool is_live) +__ckpt_update( + WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase, WT_CKPT *ckpt, WT_BLOCK_CKPT *ci) { WT_DECL_ITEM(a); WT_DECL_RET; uint8_t *endp; + bool is_live; + + is_live = F_ISSET(ckpt, WT_CKPT_ADD); #ifdef HAVE_DIAGNOSTIC /* Check the extent list combinations for overlaps. */ @@ -723,6 +787,13 @@ __ckpt_update(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase, WT_C } /* + * If this is the live system, we need to record the list of blocks written for this checkpoint + * (including the blocks we allocated to write the extent lists). + */ + if (F_ISSET(ckpt, WT_CKPT_BLOCK_MODS)) + WT_RET(__ckpt_add_blk_mods(session, ckpt, ci)); + + /* * Set the file size for the live system. * * !!! diff --git a/src/third_party/wiredtiger/src/btree/bt_compact.c b/src/third_party/wiredtiger/src/btree/bt_compact.c index d396f87ab49..42c9d92c21b 100644 --- a/src/third_party/wiredtiger/src/btree/bt_compact.c +++ b/src/third_party/wiredtiger/src/btree/bt_compact.c @@ -10,31 +10,19 @@ /* * __compact_rewrite -- - * Return if a page needs to be re-written. + * Return if a modified page needs to be re-written. */ static int __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) { WT_BM *bm; WT_MULTI *multi; - WT_PAGE *page; WT_PAGE_MODIFY *mod; - size_t addr_size; uint32_t i; - const uint8_t *addr; *skipp = true; /* Default skip. */ bm = S2BT(session)->bm; - page = ref->page; - - /* If the page is clean, test the original addresses. */ - if (__wt_page_evict_clean(page)) { - __wt_ref_info(session, ref, &addr, &addr_size, NULL); - if (addr == NULL) - return (0); - return (bm->compact_page_skip(bm, session, addr, addr_size, skipp)); - } /* * If the page is a replacement, test the replacement addresses. Ignore empty pages, they get @@ -44,7 +32,7 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) * looking at it, so the page modified structure may appear at any time (but cannot disappear). * We've confirmed there is a page modify structure, it's OK to look at it. */ - mod = page->modify; + mod = ref->page->modify; if (mod->rec_result == WT_PM_REC_REPLACE) return ( bm->compact_page_skip(bm, session, mod->mod_replace.addr, mod->mod_replace.size, skipp)); @@ -63,17 +51,32 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) /* * __compact_rewrite_lock -- - * Lock out checkpoints and return if a page needs to be re-written. + * Return if a page needs to be re-written. */ static int __compact_rewrite_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) { + WT_BM *bm; WT_BTREE *btree; WT_DECL_RET; + size_t addr_size; + const uint8_t *addr; *skipp = true; /* Default skip. */ btree = S2BT(session); + bm = btree->bm; + + /* + * If the page is clean, test the original addresses. We're holding a hazard pointer on the + * page, so we're safe from eviction, no additional locking is required. + */ + if (__wt_page_evict_clean(ref->page)) { + __wt_ref_info(session, ref, &addr, &addr_size, NULL); + if (addr == NULL) + return (0); + return (bm->compact_page_skip(bm, session, addr, addr_size, skipp)); + } /* * Reviewing in-memory pages requires looking at page reconciliation results, because we care @@ -83,8 +86,8 @@ __compact_rewrite_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) * * There are two ways we call reconciliation: checkpoints and eviction. Get the tree's flush * lock which blocks threads writing pages for checkpoints. If checkpoint is holding the lock, - * quit working this file, we'll visit it again in our next pass. We don't have to worry about - * eviction, we're holding a hazard pointer on the WT_REF, it's not going anywhere. + * quit working this file, we'll visit it again in our next pass. As noted above, we're holding + * a hazard pointer on the page, we're safe from eviction. */ WT_RET(__wt_spin_trylock(session, &btree->flush_lock)); @@ -225,12 +228,12 @@ int __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp) { WT_BM *bm; - WT_DECL_RET; size_t addr_size; - const uint8_t *addr; - u_int type; + uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE]; + bool is_leaf; WT_UNUSED(context); + *skipp = false; /* Default to reading */ /* @@ -254,29 +257,17 @@ __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, void *context, boo return (0); /* - * There's nothing to prevent the WT_REF state from changing underfoot, which can change its - * address. For example, the WT_REF address might reference an on-page cell, and page eviction - * can free that memory. Lock the WT_REF so we can look at its address. - */ - if (!WT_REF_CAS_STATE(session, ref, WT_REF_DISK, WT_REF_LOCKED)) - return (0); - - /* - * The page is on disk, so there had better be an address; assert that fact, test at run-time to - * avoid the core dump. - * * Internal pages must be read to walk the tree; ask the block-manager if it's useful to rewrite * leaf pages, don't do the I/O if a rewrite won't help. + * + * There can be NULL WT_REF.addr values, where the underlying call won't return a valid address. + * The "it's a leaf page" return is enough to confirm we have a valid address for a leaf page. */ - __wt_ref_info(session, ref, &addr, &addr_size, &type); - WT_ASSERT(session, addr != NULL); - if (addr != NULL && type != WT_CELL_ADDR_INT) { + __wt_ref_info_lock(session, ref, addr, &addr_size, &is_leaf); + if (is_leaf) { bm = S2BT(session)->bm; - ret = bm->compact_page_skip(bm, session, addr, addr_size, skipp); + return (bm->compact_page_skip(bm, session, addr, addr_size, skipp)); } - /* Reset the WT_REF state. */ - WT_REF_SET_STATE(ref, WT_REF_DISK); - - return (ret); + return (0); } diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index 0f6b569b755..e4f6b17786c 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -473,17 +473,27 @@ __wt_btcur_reset(WT_CURSOR_BTREE *cbt) * Search and return exact matching records only, including uncommitted ones. */ int -__wt_btcur_search_uncommitted(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) +__wt_btcur_search_uncommitted(WT_CURSOR *cursor, WT_UPDATE **updp) { WT_BTREE *btree; - WT_CURSOR *cursor; + WT_CURSOR_BTREE *cbt; WT_SESSION_IMPL *session; WT_UPDATE *upd; + *updp = NULL; + + cbt = (WT_CURSOR_BTREE *)cursor; btree = cbt->btree; - cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; - *updp = upd = NULL; /* -Wuninitialized */ + upd = NULL; /* -Wuninitialized */ + + /* + * Not calling the cursor initialization functions, we don't want to be tapped for eviction nor + * do we want other standard cursor semantics like snapshots, just discard the hazard pointer + * from the last operation. This also depends on the fact we're not setting the cursor's active + * flag, this is really a special chunk of code and not to be modified without careful thought. + */ + WT_RET(__cursor_reset(cbt)); WT_RET(btree->type == BTREE_ROW ? __cursor_row_search(cbt, false, NULL, NULL) : __cursor_col_search(cbt, NULL, NULL)); diff --git a/src/third_party/wiredtiger/src/btree/bt_import.c b/src/third_party/wiredtiger/src/btree/bt_import.c index 02f023567f5..9d63a3cc959 100644 --- a/src/third_party/wiredtiger/src/btree/bt_import.c +++ b/src/third_party/wiredtiger/src/btree/bt_import.c @@ -91,12 +91,14 @@ __wt_import(WT_SESSION_IMPL *session, const char *uri) * Build and flatten the metadata and the checkpoint list, then insert it into the metadata for * this file. * - * Strip out the checkpoint-LSN, an imported file isn't associated with any log files. Assign a + * Strip out any incremental backup information, an imported file has not been part of a backup. + * Strip out the checkpoint LSN, an imported file isn't associated with any log files. Assign a * unique file ID. */ filecfg[1] = a->data; filecfg[2] = checkpoint_list; - filecfg[3] = "checkpoint_lsn="; + filecfg[3] = "checkpoint_backup_info="; + filecfg[4] = "checkpoint_lsn="; WT_WITH_SCHEMA_LOCK(session, ret = __wt_snprintf(fileid, sizeof(fileid), "id=%" PRIu32, ++S2C(session)->next_file_id)); WT_ERR(ret); diff --git a/src/third_party/wiredtiger/src/btree/bt_misc.c b/src/third_party/wiredtiger/src/btree/bt_misc.c index d06b0b33bf6..4ff9afa3b66 100644 --- a/src/third_party/wiredtiger/src/btree/bt_misc.c +++ b/src/third_party/wiredtiger/src/btree/bt_misc.c @@ -85,27 +85,6 @@ __wt_cell_type_string(uint8_t type) } /* - * __wt_page_addr_string -- - * Figure out a page's "address" and load a buffer with a printable, nul-terminated - * representation of that address. - */ -const char * -__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf) -{ - size_t addr_size; - const uint8_t *addr; - - if (__wt_ref_is_root(ref)) { - buf->data = "[Root]"; - buf->size = strlen("[Root]"); - return (buf->data); - } - - __wt_ref_info(session, ref, &addr, &addr_size, NULL); - return (__wt_addr_string(session, addr, addr_size, buf)); -} - -/* * __wt_addr_string -- * Load a buffer with a printable, nul-terminated representation of an address. */ diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index ddf47a43c2e..e8355692ce2 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -639,9 +639,7 @@ read: session, true, !F_ISSET(&session->txn, WT_TXN_HAS_ID), NULL)); WT_RET(__page_read(session, ref, flags)); - /* - * We just read a page, don't evict it before we have a chance to use it. - */ + /* We just read a page, don't evict it before we have a chance to use it. */ evict_skip = true; /* @@ -713,10 +711,13 @@ read: __wt_tree_modify_set(session); /* - * Check if the page requires forced eviction. + * If a page has grown too large, we'll try and forcibly evict it before making it + * available to the caller. There are a variety of cases where that's not possible. + * Don't involve a thread resolving a transaction in forced eviction, they're usually + * making the problem better. */ - if (evict_skip || LF_ISSET(WT_READ_NO_SPLIT) || btree->evict_disabled > 0 || - btree->lsm_primary) + if (evict_skip || F_ISSET(session, WT_SESSION_RESOLVING_TXN) || + LF_ISSET(WT_READ_NO_SPLIT) || btree->evict_disabled > 0 || btree->lsm_primary) goto skip_evict; /* diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c index d918b5d856c..2ba005bc096 100644 --- a/src/third_party/wiredtiger/src/btree/bt_slvg.c +++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c @@ -250,13 +250,17 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_scr_alloc(session, 0, &ss->tmp2)); /* - * Step 1: Inform the underlying block manager that we're salvaging the file. + * !!! (Don't format the comment.) + * Step 1: + * Inform the underlying block manager that we're salvaging the file. */ WT_ERR(bm->salvage_start(bm, session)); /* - * Step 2: Read the file and build in-memory structures that reference any leaf or overflow - * page. Any pages other than leaf or overflow pages are added to the free list. + * !!! (Don't format the comment.) + * Step 2: + * Read the file and build in-memory structures that reference any leaf or overflow page. Any + * pages other than leaf or overflow pages are added to the free list. * * Turn off read checksum and verification error messages while we're reading the file, we * expect to see corrupted blocks. @@ -267,60 +271,54 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(ret); /* + * !!! (Don't format the comment.) * Step 3: - * Discard any page referencing a non-existent overflow page. We do - * this before checking overlapping key ranges on the grounds that a - * bad key range we can use is better than a terrific key range that - * references pages we don't have. On the other hand, we subsequently - * discard key ranges where there are better overlapping ranges, and - * it would be better if we let the availability of an overflow value - * inform our choices as to the key ranges we select, ideally on a - * per-key basis. + * Discard any page referencing a non-existent overflow page. We do this before checking + * overlapping key ranges on the grounds that a bad key range we can use is better than a + * terrific key range that references pages we don't have. On the other hand, we subsequently + * discard key ranges where there are better overlapping ranges, and it would be better if + * we let the availability of an overflow value inform our choices as to the key ranges we + * select, ideally on a per-key basis. * - * A complicating problem is found in variable-length column-store - * objects, where we potentially split key ranges within RLE units. - * For example, if there's a page with rows 15-20 and we later find - * row 17 with a larger LSN, the range splits into 3 chunks, 15-16, - * 17, and 18-20. If rows 15-20 were originally a single value (an - * RLE of 6), and that record is an overflow record, we end up with - * two chunks, both of which want to reference the same overflow value. + * A complicating problem is found in variable-length column-store objects, where we + * potentially split key ranges within RLE units. For example, if there's a page with rows + * 15-20 and we later find row 17 with a larger LSN, the range splits into 3 chunks, 15-16, + * 17, and 18-20. If rows 15-20 were originally a single value (an RLE of 6), and that + * record is an overflow record, we end up with two chunks, both of which want to reference + * the same overflow value. * - * Instead of the approach just described, we're first discarding any - * pages referencing non-existent overflow pages, then we're reviewing - * our key ranges and discarding any that overlap. We're doing it that - * way for a few reasons: absent corruption, missing overflow items are - * strong arguments the page was replaced (on the other hand, some kind - * of file corruption is probably why we're here); it's a significant - * amount of additional complexity to simultaneously juggle overlapping - * ranges and missing overflow items; finally, real-world applications - * usually don't have a lot of overflow items, as WiredTiger supports + * Instead of the approach just described, we're first discarding any pages referencing + * non-existent overflow pages, then we're reviewing our key ranges and discarding any + * that overlap. We're doing it that way for a few reasons: absent corruption, missing + * overflow items are strong arguments the page was replaced (on the other hand, some kind + * of file corruption is probably why we're here); it's a significant amount of additional + * complexity to simultaneously juggle overlapping ranges and missing overflow items; finally, + * real-world applications usually don't have a lot of overflow items, as WiredTiger supports * very large page sizes, overflow items shouldn't be common. * * Step 4: - * Add unreferenced overflow page blocks to the free list so they are - * reused immediately. + * Add unreferenced overflow page blocks to the free list so they are reused immediately. */ WT_ERR(__slvg_ovfl_reconcile(session, ss)); WT_ERR(__slvg_ovfl_discard(session, ss)); /* + * !!! (Don't format the comment.) * Step 5: - * Walk the list of pages looking for overlapping ranges to resolve. - * If we find a range that needs to be resolved, set a global flag - * and a per WT_TRACK flag on the pages requiring modification. + * Walk the list of pages looking for overlapping ranges to resolve. If we find a range + * that needs to be resolved, set a global flag and a per WT_TRACK flag on the pages requiring + * modification. * * This requires sorting the page list by key, and secondarily by LSN. * * !!! - * It's vanishingly unlikely and probably impossible for fixed-length - * column-store files to have overlapping key ranges. It's possible - * for an entire key range to go missing (if a page is corrupted and - * lost), but because pages can't split, it shouldn't be possible to - * find pages where the key ranges overlap. That said, we check for - * it and clean up after it in reconciliation because it doesn't cost - * much and future column-store formats or operations might allow for - * fixed-length format ranges to overlap during salvage, and I don't - * want to have to retrofit the code later. + * It's vanishingly unlikely and probably impossible for fixed-length column-store files + * to have overlapping key ranges. It's possible for an entire key range to go missing (if + * a page is corrupted and lost), but because pages can't split, it shouldn't be possible to + * find pages where the key ranges overlap. That said, we check for it and clean up after + * it in reconciliation because it doesn't cost much and future column-store formats or + * operations might allow for fixed-length format ranges to overlap during salvage, and I + * don't want to have to retrofit the code later. */ __wt_qsort(ss->pages, (size_t)ss->pages_next, sizeof(WT_TRACK *), __slvg_trk_compare_key); if (ss->page_type == WT_PAGE_ROW_LEAF) @@ -329,8 +327,10 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__slvg_col_range(session, ss)); /* - * Step 6: We may have lost key ranges in column-store databases, that is, some part of the - * record number space is gone; look for missing ranges. + * !!! (Don't format the comment.) + * Step 6: + * We may have lost key ranges in column-store databases, that is, some part of the record + * number space is gone; look for missing ranges. */ switch (ss->page_type) { case WT_PAGE_COL_FIX: @@ -342,8 +342,10 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) } /* - * Step 7: Build an internal page that references all of the leaf pages, and write it, as well - * as any merged pages, to the file. + * !!! (Don't format the comment.) + * Step 7: + * Build an internal page that references all of the leaf pages, and write it, as well as any + * merged pages, to the file. * * Count how many leaf pages we have (we could track this during the array shuffling/splitting, * but that's a lot harder). @@ -365,25 +367,31 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) } /* - * Step 8: If we had to merge key ranges, we have to do a final pass through the leaf page array - * and discard file pages used during key merges. We can't do it earlier: if we free'd the leaf - * pages we're merging as we merged them, the write of subsequent leaf pages or the internal - * page might allocate those free'd file blocks, and if the salvage run subsequently fails, we'd - * have overwritten pages used to construct the final key range. In other words, if the salvage - * run fails, we don't want to overwrite data the next salvage run might need. + * !!! (Don't format the comment.) + * Step 8: + * If we had to merge key ranges, we have to do a final pass through the leaf page array + * and discard file pages used during key merges. We can't do it earlier: if we free'd the + * leaf pages we're merging as we merged them, the write of subsequent leaf pages or the + * internal page might allocate those free'd file blocks, and if the salvage run subsequently + * fails, we'd have overwritten pages used to construct the final key range. In other words, + * if the salvage run fails, we don't want to overwrite data the next salvage run might need. */ if (ss->merge_free) WT_ERR(__slvg_merge_block_free(session, ss)); /* - * Step 9: Evict any newly created root page, creating a checkpoint. + * !!! (Don't format the comment.) + * Step 9: + * Evict any newly created root page, creating a checkpoint. */ WT_ERR(__slvg_checkpoint(session, &ss->root_ref)); -/* - * Step 10: Inform the underlying block manager that we're done. - */ err: + /* + * !!! (Don't format the comment.) + * Step 10: + * Inform the underlying block manager that we're done. + */ WT_TRET(bm->salvage_end(bm, session)); /* Discard any root page we created. */ @@ -714,40 +722,89 @@ __slvg_trk_leaf_ovfl(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_TRA } /* + * !!! (Don't format the comment.) + * When pages split, the key range is split across multiple pages. If not all + * of the old versions of the page are overwritten, or not all of the new pages + * are written, or some of the pages are corrupted, salvage will read different + * pages with overlapping key ranges, at different LSNs. + * + * We salvage all of the key ranges we find, at the latest LSN value: this means + * we may resurrect pages of deleted items, as page deletion doesn't write leaf + * pages and salvage will read and instantiate the contents of an old version of + * the deleted page. + * + * The leaf page array is sorted in key order, and secondarily on LSN: what this + * means is that for each new key range, the first page we find is the best page + * for that key. The process is to walk forward from each page until we reach a + * page with a starting key after the current page's stopping key. + * + * For each of page, check to see if they overlap the current page's key range. + * If they do, resolve the overlap. Because WiredTiger rarely splits pages, + * overlap resolution usually means discarding a page because the key ranges + * are the same, and one of the pages is simply an old version of the other. + * + * However, it's possible more complex resolution is necessary. For example, + * here's an improbably complex list of page ranges and LSNs: + * + * Page Range LSN + * 30 A-G 3 + * 31 C-D 4 + * 32 B-C 5 + * 33 C-F 6 + * 34 C-D 7 + * 35 F-M 8 + * 36 H-O 9 + * + * We walk forward from each page reviewing all other pages in the array that + * overlap the range. For each overlap, the current or the overlapping + * page is updated so the page with the most recent information for any range + * "owns" that range. Here's an example for page 30. + * + * Review page 31: because page 31 has the range C-D and a higher LSN than page + * 30, page 30 would "split" into two ranges, A-C and E-G, conceding the C-D + * range to page 31. The new track element would be inserted into array with + * the following result: + * + * Page Range LSN + * 30 A-C 3 << Changed WT_TRACK element + * 31 C-D 4 + * 32 B-C 5 + * 33 C-F 6 + * 34 C-D 7 + * 30 E-G 3 << New WT_TRACK element + * 35 F-M 8 + * 36 H-O 9 + * + * Continue the review of the first element, using its new values. + * + * Review page 32: because page 31 has the range B-C and a higher LSN than page + * 30, page 30's A-C range would be truncated, conceding the B-C range to page + * 32. + * 30 A-B 3 + * E-G 3 + * 31 C-D 4 + * 32 B-C 5 + * 33 C-F 6 + * 34 C-D 7 + * + * Review page 33: because page 33 has a starting key (C) past page 30's ending + * key (B), we stop evaluating page 30's A-B range, as there can be no further + * overlaps. + * + * This process is repeated for each page in the array. + * + * When page 33 is processed, we'd discover that page 33's C-F range overlaps + * page 30's E-G range, and page 30's E-G range would be updated, conceding the + * E-F range to page 33. + * + * This is not computationally expensive because we don't walk far forward in + * the leaf array because it's sorted by starting key, and because WiredTiger + * splits are rare, the chance of finding the kind of range overlap requiring + * re-sorting the array is small. + */ +/* * __slvg_col_range -- - * Figure out the leaf pages we need and free the leaf pages we don't. When pages split, the key - * range is split across multiple pages. If not all of the old versions of the page are - * overwritten, or not all of the new pages are written, or some of the pages are corrupted, - * salvage will read different pages with overlapping key ranges, at different LSNs. We salvage - * all of the key ranges we find, at the latest LSN value: this means we may resurrect pages of - * deleted items, as page deletion doesn't write leaf pages and salvage will read and - * instantiate the contents of an old version of the deleted page. The leaf page array is sorted - * in key order, and secondarily on LSN: what this means is that for each new key range, the - * first page we find is the best page for that key. The process is to walk forward from each - * page until we reach a page with a starting key after the current page's stopping key. For - * each of page, check to see if they overlap the current page's key range. If they do, resolve - * the overlap. Because WiredTiger rarely splits pages, overlap resolution usually means - * discarding a page because the key ranges are the same, and one of the pages is simply an old - * version of the other. However, it's possible more complex resolution is necessary. For - * example, here's an improbably complex list of page ranges and LSNs: Page Range LSN 30 A-G 3 - * 31 C-D 4 32 B-C 5 33 C-F 6 34 C-D 7 35 F-M 8 36 H-O 9 We walk forward from each page - * reviewing all other pages in the array that overlap the range. For each overlap, the current - * or the overlapping page is updated so the page with the most recent information for any range - * "owns" that range. Here's an example for page 30. Review page 31: because page 31 has the - * range C-D and a higher LSN than page 30, page 30 would "split" into two ranges, A-C and E-G, - * conceding the C-D range to page 31. The new track element would be inserted into array with - * the following result: Page Range LSN 30 A-C 3 << Changed WT_TRACK element 31 C-D 4 32 B-C 5 - * 33 C-F 6 34 C-D 7 30 E-G 3 << New WT_TRACK element 35 F-M 8 36 H-O 9 Continue the review of - * the first element, using its new values. Review page 32: because page 31 has the range B-C - * and a higher LSN than page 30, page 30's A-C range would be truncated, conceding the B-C - * range to page 32. 30 A-B 3 E-G 3 31 C-D 4 32 B-C 5 33 C-F 6 34 C-D 7 Review page 33: because - * page 33 has a starting key (C) past page 30's ending key (B), we stop evaluating page 30's - * A-B range, as there can be no further overlaps. This process is repeated for each page in the - * array. When page 33 is processed, we'd discover that page 33's C-F range overlaps page 30's - * E-G range, and page 30's E-G range would be updated, conceding the E-F range to page 33. This - * is not computationally expensive because we don't walk far forward in the leaf array because - * it's sorted by starting key, and because WiredTiger splits are rare, the chance of finding - * the kind of range overlap requiring re-sorting the array is small. + * Figure out the leaf pages we need and free the leaf pages we don't. */ static int __slvg_col_range(WT_SESSION_IMPL *session, WT_STUFF *ss) @@ -820,6 +877,7 @@ __slvg_col_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s __wt_addr_string(session, b_trk->trk_addr, b_trk->trk_addr_size, ss->tmp2)); /* + * !!! (Don't format the comment.) * The key ranges of two WT_TRACK pages in the array overlap -- choose * the ranges we're going to take from each. * @@ -919,15 +977,12 @@ __slvg_col_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s if (a_trk->trk_gen > b_trk->trk_gen) { delete_b: /* - * After page and overflow reconciliation, one (and only one) - * page can reference an overflow record. But, if we split a - * page into multiple chunks, any of the chunks might own any - * of the backing overflow records, so overflow records won't - * normally be discarded until after the merge phase completes. - * (The merge phase is where the final pages are written, and - * we figure out which overflow records are actually used.) - * If freeing a chunk and there are no other references to the - * underlying shared information, the overflow records must be + * After page and overflow reconciliation, one (and only one) page can reference an overflow + * record. But, if we split a page into multiple chunks, any of the chunks might own any of + * the backing overflow records, so overflow records won't normally be discarded until after + * the merge phase completes. (The merge phase is where the final pages are written, and we + * figure out which overflow records are actually used.) If freeing a chunk and there are no + * other references to the underlying shared information, the overflow records must be * useless, discard them to keep the final file size small. */ if (b_trk->shared->ref == 1) @@ -1009,11 +1064,11 @@ __slvg_col_trk_update_start(uint32_t slot, WT_STUFF *ss) * longer be in the right location. * * For example, imagine page #1 has the key range 30-50, it split, and - * we wrote page #2 with key range 30-40, and page #3 key range with - * 40-50, where pages #2 and #3 have larger LSNs than page #1. When the + * we wrote page #2 with key range 30-40, and page #3 key range with 40-50, where pages #2 and + * #3 have larger LSNs than page #1. When the * key ranges were sorted, page #2 came first, then page #1 (because of - * their earlier start keys than page #3), and page #2 came before page - * #1 because of its LSN. When we resolve the overlap between page #2 + * their earlier start keys than page #3), and page #2 came before page #1 because of its LSN. + * When we resolve the overlap between page #2 * and page #1, we truncate the initial key range of page #1, and it now * sorts after page #3, because it has the same starting key of 40, and * a lower LSN. @@ -1124,15 +1179,12 @@ __slvg_col_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF WT_REF_SET_STATE(ref, WT_REF_DISK); /* - * If the page's key range is unmodified from when we read it - * (in other words, we didn't merge part of this page with - * another page), we can use the page without change, and the - * only thing we need to do is mark all overflow records the - * page references as in-use. + * If the page's key range is unmodified from when we read it (in other words, we didn't + * merge part of this page with another page), we can use the page without change, and the + * only thing we need to do is mark all overflow records the page references as in-use. * - * If we did merge with another page, we have to build a page - * reflecting the updated key range. Note, that requires an - * additional pass to free the merge page's backing blocks. + * If we did merge with another page, we have to build a page reflecting the updated key + * range. Note, that requires an additional pass to free the merge page's backing blocks. */ if (F_ISSET(trk, WT_TRACK_MERGE)) { ss->merge_free = true; @@ -1210,13 +1262,11 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) } /* - * We can't discard the original blocks associated with this page now. - * (The problem is we don't want to overwrite any original information - * until the salvage run succeeds -- if we free the blocks now, the next - * merge page we write might allocate those blocks and overwrite them, - * and should the salvage run eventually fail, the original information - * would have been lost.) Clear the reference addr so eviction doesn't - * free the underlying blocks. + * We can't discard the original blocks associated with this page now. (The problem is we don't + * want to overwrite any original information until the salvage run succeeds -- if we free the + * blocks now, the next merge page we write might allocate those blocks and overwrite them, and + * should the salvage run eventually fail, the original information would have been lost.) Clear + * the reference addr so eviction doesn't free the underlying blocks. */ __wt_ref_addr_free(session, ref); @@ -1410,8 +1460,9 @@ __slvg_row_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s __wt_addr_string(session, b_trk->trk_addr, b_trk->trk_addr_size, ss->tmp2)); /* - * The key ranges of two WT_TRACK pages in the array overlap -- choose - * the ranges we're going to take from each. + * !!! (Don't format the comment.) + * The key ranges of two WT_TRACK pages in the array overlap -- choose the ranges we're going to + * take from each. * * We can think of the overlap possibilities as 11 different cases: * @@ -1432,11 +1483,11 @@ __slvg_row_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s * #10 AAAAAA A is middle of B * #11 AAAAAAAAAA A is a suffix of B * - * Note the leaf page array was sorted by key and a_trk appears earlier - * in the array than b_trk, so cases #2/8, #10 and #11 are impossible. + * Note the leaf page array was sorted by key and a_trk appears earlier in the array than b_trk, so + * cases #2/8, #10 and #11 are impossible. * - * Finally, there's one additional complicating factor -- final ranges - * are assigned based on the page's LSN. + * Finally, there's one additional complicating factor -- final ranges are assigned based on the + * page's LSN. */ #define A_TRK_START (&a_trk->row_start) #define A_TRK_STOP (&a_trk->row_stop) @@ -1514,15 +1565,12 @@ __slvg_row_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s if (a_trk->trk_gen > b_trk->trk_gen) { delete_b: /* - * After page and overflow reconciliation, one (and only one) - * page can reference an overflow record. But, if we split a - * page into multiple chunks, any of the chunks might own any - * of the backing overflow records, so overflow records won't - * normally be discarded until after the merge phase completes. - * (The merge phase is where the final pages are written, and - * we figure out which overflow records are actually used.) - * If freeing a chunk and there are no other references to the - * underlying shared information, the overflow records must be + * After page and overflow reconciliation, one (and only one) page can reference an overflow + * record. But, if we split a page into multiple chunks, any of the chunks might own any of + * the backing overflow records, so overflow records won't normally be discarded until after + * the merge phase completes. (The merge phase is where the final pages are written, and we + * figure out which overflow records are actually used.) If freeing a chunk and there are no + * other references to the underlying shared information, the overflow records must be * useless, discard them to keep the final file size small. */ if (b_trk->shared->ref == 1) @@ -1617,11 +1665,11 @@ __slvg_row_trk_update_start(WT_SESSION_IMPL *session, WT_ITEM *stop, uint32_t sl * longer be in the right location. * * For example, imagine page #1 has the key range 30-50, it split, and - * we wrote page #2 with key range 30-40, and page #3 key range with - * 40-50, where pages #2 and #3 have larger LSNs than page #1. When the + * we wrote page #2 with key range 30-40, and page #3 key range with 40-50, where pages #2 and + * #3 have larger LSNs than page #1. When the * key ranges were sorted, page #2 came first, then page #1 (because of - * their earlier start keys than page #3), and page #2 came before page - * #1 because of its LSN. When we resolve the overlap between page #2 + * their earlier start keys than page #3), and page #2 came before page #1 because of its LSN. + * When we resolve the overlap between page #2 * and page #1, we truncate the initial key range of page #1, and it now * sorts after page #3, because it has the same starting key of 40, and * a lower LSN. @@ -1701,8 +1749,10 @@ __slvg_row_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF WT_REF *ref, **refp; WT_TRACK *trk; uint32_t i; + u_int decr_cnt; addr = NULL; + decr_cnt = 0; /* Allocate a row-store root (internal) page and fill it in. */ WT_RET(__wt_page_alloc(session, WT_PAGE_ROW_INT, leaf_cnt, true, &page)); @@ -1736,15 +1786,12 @@ __slvg_row_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF WT_REF_SET_STATE(ref, WT_REF_DISK); /* - * If the page's key range is unmodified from when we read it - * (in other words, we didn't merge part of this page with - * another page), we can use the page without change, and the - * only thing we need to do is mark all overflow records the - * page references as in-use. + * If the page's key range is unmodified from when we read it (in other words, we didn't + * merge part of this page with another page), we can use the page without change, and the + * only thing we need to do is mark all overflow records the page references as in-use. * - * If we did merge with another page, we have to build a page - * reflecting the updated key range. Note, that requires an - * additional pass to free the merge page's backing blocks. + * If we did merge with another page, we have to build a page reflecting the updated key + * range. Note, that requires an additional pass to free the merge page's backing blocks. */ if (F_ISSET(trk, WT_TRACK_MERGE)) { ss->merge_free = true; @@ -1757,13 +1804,30 @@ __slvg_row_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF WT_ERR(__slvg_ovfl_ref_all(session, trk)); } ++ref; + + /* + * !!! + * There's a risk the page we're building is too large for the cache. The right fix would be + * to write the keys out to an on-disk file and delay allocating the page image until we're + * ready to reconcile the new root page, and then read keys in from that backing file during + * the reconciliation of the root page. For now, make sure the eviction threads don't see us + * as a threat. + */ + if (page->memory_footprint > WT_MEGABYTE) { + ++decr_cnt; + __wt_cache_page_inmem_decr(session, page, WT_MEGABYTE); + } } + if (decr_cnt != 0) + __wt_cache_page_inmem_incr(session, page, decr_cnt * WT_MEGABYTE); __wt_root_ref_init(session, &ss->root_ref, page, false); if (0) { err: __wt_free(session, addr); + if (decr_cnt != 0) + __wt_cache_page_inmem_incr(session, page, decr_cnt * WT_MEGABYTE); __wt_page_out(session, &page); } return (ret); @@ -1866,13 +1930,11 @@ __slvg_row_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref, WT_S cookie->skip = skip_start; /* - * We can't discard the original blocks associated with this page now. - * (The problem is we don't want to overwrite any original information - * until the salvage run succeeds -- if we free the blocks now, the next - * merge page we write might allocate those blocks and overwrite them, - * and should the salvage run eventually fail, the original information - * would have been lost.) Clear the reference addr so eviction doesn't - * free the underlying blocks. + * We can't discard the original blocks associated with this page now. (The problem is we don't + * want to overwrite any original information until the salvage run succeeds -- if we free the + * blocks now, the next merge page we write might allocate those blocks and overwrite them, and + * should the salvage run eventually fail, the original information would have been lost.) Clear + * the reference addr so eviction doesn't free the underlying blocks. */ __wt_ref_addr_free(session, ref); diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index 4212d820e60..bba9f370682 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -778,7 +778,13 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, uint32_t */ for (i = 0, deleted_refs = scr->mem; i < deleted_entries; ++i) { next_ref = pindex->index[deleted_refs[i]]; - WT_ASSERT(session, next_ref->state == WT_REF_SPLIT); +#ifdef HAVE_DIAGNOSTIC + { + uint32_t ref_state; + WT_ORDERED_READ(ref_state, next_ref->state); + WT_ASSERT(session, ref_state == WT_REF_LOCKED || ref_state == WT_REF_SPLIT); + } +#endif /* * We set the WT_REF to split, discard it, freeing any resources it holds. diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c index 175a4010db0..2c05b742ec8 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c @@ -303,6 +303,27 @@ __verify_checkpoint_reset(WT_VSTUFF *vs) } /* + * __verify_addr_string -- + * Figure out a page's "address" and load a buffer with a printable, nul-terminated + * representation of that address. + */ +static const char * +__verify_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf) +{ + size_t addr_size; + const uint8_t *addr; + + if (__wt_ref_is_root(ref)) { + buf->data = "[Root]"; + buf->size = strlen("[Root]"); + return (buf->data); + } + + __wt_ref_info(session, ref, &addr, &addr_size, NULL); + return (__wt_addr_string(session, addr, addr_size, buf)); +} + +/* * __verify_addr_ts -- * Check an address block's timestamps. */ @@ -315,26 +336,26 @@ __verify_addr_ts(WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *unpack, WT_RET_MSG(session, WT_ERROR, "internal page reference at %s has a newest stop " "timestamp of 0", - __wt_page_addr_string(session, ref, vs->tmp1)); + __verify_addr_string(session, ref, vs->tmp1)); if (unpack->oldest_start_ts > unpack->newest_stop_ts) WT_RET_MSG(session, WT_ERROR, "internal page reference at %s has an oldest start " "timestamp %s newer than its newest stop timestamp %s", - __wt_page_addr_string(session, ref, vs->tmp1), + __verify_addr_string(session, ref, vs->tmp1), __wt_timestamp_to_string(unpack->oldest_start_ts, ts_string[0]), __wt_timestamp_to_string(unpack->newest_stop_ts, ts_string[1])); if (unpack->newest_stop_txn == WT_TXN_NONE) WT_RET_MSG(session, WT_ERROR, "internal page reference at %s has a newest stop " "transaction of 0", - __wt_page_addr_string(session, ref, vs->tmp1)); + __verify_addr_string(session, ref, vs->tmp1)); if (unpack->oldest_start_txn > unpack->newest_stop_txn) WT_RET_MSG(session, WT_ERROR, "internal page reference at %s has an oldest start " "transaction (%" PRIu64 ") newer than its newest stop " "transaction (%" PRIu64 ")", - __wt_page_addr_string(session, ref, vs->tmp1), unpack->oldest_start_txn, + __verify_addr_string(session, ref, vs->tmp1), unpack->oldest_start_txn, unpack->newest_stop_txn); return (0); } @@ -363,12 +384,12 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *addr_unpack unpack = &_unpack; - __wt_verbose(session, WT_VERB_VERIFY, "%s %s", __wt_page_addr_string(session, ref, vs->tmp1), + __wt_verbose(session, WT_VERB_VERIFY, "%s %s", __verify_addr_string(session, ref, vs->tmp1), __wt_page_type_string(page->type)); /* Optionally dump the address. */ if (vs->dump_address) - WT_RET(__wt_msg(session, "%s %s", __wt_page_addr_string(session, ref, vs->tmp1), + WT_RET(__wt_msg(session, "%s %s", __verify_addr_string(session, ref, vs->tmp1), __wt_page_type_string(page->type))); /* Track the shape of the tree. */ @@ -427,7 +448,7 @@ recno_chk: if (recno != vs->record_total + 1) WT_RET_MSG(session, WT_ERROR, "page at %s has a starting record of %" PRIu64 " when the expected starting record is %" PRIu64, - __wt_page_addr_string(session, ref, vs->tmp1), recno, vs->record_total + 1); + __verify_addr_string(session, ref, vs->tmp1), recno, vs->record_total + 1); break; } switch (page->type) { @@ -477,7 +498,7 @@ celltype_err: WT_RET_MSG(session, WT_ERROR, "page at %s, of type %s, is referenced in " "its parent by a cell of type %s", - __wt_page_addr_string(session, ref, vs->tmp1), __wt_page_type_string(page->type), + __verify_addr_string(session, ref, vs->tmp1), __wt_page_type_string(page->type), __wt_cell_type_string(addr_unpack->raw)); break; } @@ -512,7 +533,7 @@ celltype_err: "%s is %" PRIu64 " and the expected " "starting record number is %" PRIu64, - entry, __wt_page_addr_string(session, child_ref, vs->tmp1), child_ref->ref_recno, + entry, __verify_addr_string(session, child_ref, vs->tmp1), child_ref->ref_recno, vs->record_total + 1); } @@ -594,13 +615,13 @@ __verify_row_int_key_order( " on the page at %s " "sorts before the last key appearing on page %s, earlier " "in the tree: %s, %s", - entry, __wt_page_addr_string(session, ref, vs->tmp1), (char *)vs->max_addr->data, + entry, __verify_addr_string(session, ref, vs->tmp1), (char *)vs->max_addr->data, __wt_buf_set_printable(session, item.data, item.size, vs->tmp2), __wt_buf_set_printable(session, vs->max_key->data, vs->max_key->size, vs->tmp3)); /* Update the largest key we've seen to the key just checked. */ WT_RET(__wt_buf_set(session, vs->max_key, item.data, item.size)); - WT_IGNORE_RET_PTR(__wt_page_addr_string(session, ref, vs->max_addr)); + WT_IGNORE_RET_PTR(__verify_addr_string(session, ref, vs->max_addr)); return (0); } @@ -647,14 +668,14 @@ __verify_row_leaf_key_order(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs "the first key on the page at %s sorts equal to " "or less than the last key appearing on the page " "at %s, earlier in the tree: %s, %s", - __wt_page_addr_string(session, ref, vs->tmp2), (char *)vs->max_addr->data, + __verify_addr_string(session, ref, vs->tmp2), (char *)vs->max_addr->data, __wt_buf_set_printable(session, vs->tmp1->data, vs->tmp1->size, vs->tmp3), __wt_buf_set_printable(session, vs->max_key->data, vs->max_key->size, vs->tmp4)); } /* Update the largest key we've seen to the last key on this page. */ WT_RET(__wt_row_leaf_key_copy(session, page, page->pg_row + (page->entries - 1), vs->max_key)); - WT_IGNORE_RET_PTR(__wt_page_addr_string(session, ref, vs->max_addr)); + WT_IGNORE_RET_PTR(__verify_addr_string(session, ref, vs->max_addr)); return (0); } @@ -728,7 +749,7 @@ __verify_ts_addr_cmp(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t cell_num, c WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32 " on page at %s failed verification with %s " "timestamp of %s, %s the parent's %s timestamp of %s", - cell_num, __wt_page_addr_string(session, ref, vs->tmp1), ts1_name, ts1_bp, + cell_num, __verify_addr_string(session, ref, vs->tmp1), ts1_name, ts1_bp, gt ? "less than" : "greater than", ts2_name, ts2_bp); } @@ -751,7 +772,7 @@ __verify_txn_addr_cmp(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t cell_num, "transaction of %" PRIu64 ", %s the parent's %s transaction of " "%" PRIu64, - cell_num, __wt_page_addr_string(session, ref, vs->tmp1), txn1_name, txn1, + cell_num, __verify_addr_string(session, ref, vs->tmp1), txn1_name, txn1, gt ? "less than" : "greater than", txn2_name, txn2); } @@ -794,7 +815,7 @@ __verify_page_cell( " on page at %s references " "an overflow item at %s that failed " "verification", - cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1), + cell_num - 1, __verify_addr_string(session, ref, vs->tmp1), __wt_addr_string(session, unpack.data, unpack.size, vs->tmp2)); break; } @@ -812,18 +833,18 @@ __verify_page_cell( WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32 " on page at %s has a " "newest stop timestamp of 0", - cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1)); + cell_num - 1, __verify_addr_string(session, ref, vs->tmp1)); if (unpack.newest_stop_txn == WT_TXN_NONE) WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32 " on page at %s has a " "newest stop transaction of 0", - cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1)); + cell_num - 1, __verify_addr_string(session, ref, vs->tmp1)); if (unpack.oldest_start_ts > unpack.newest_stop_ts) WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32 " on page at %s has an " "oldest start timestamp %s newer than " "its newest stop timestamp %s", - cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1), + cell_num - 1, __verify_addr_string(session, ref, vs->tmp1), __wt_timestamp_to_string(unpack.oldest_start_ts, ts_string[0]), __wt_timestamp_to_string(unpack.newest_stop_ts, ts_string[1])); if (unpack.oldest_start_txn > unpack.newest_stop_txn) { @@ -833,7 +854,7 @@ __verify_page_cell( ") " "newer than its newest stop transaction " "(%" PRIu64 ")", - cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1), + cell_num - 1, __verify_addr_string(session, ref, vs->tmp1), unpack.oldest_start_txn, unpack.newest_stop_txn); } @@ -858,27 +879,27 @@ __verify_page_cell( WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32 " on page at %s has a stop " "timestamp of 0", - cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1)); + cell_num - 1, __verify_addr_string(session, ref, vs->tmp1)); if (unpack.start_ts > unpack.stop_ts) WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32 " on page at %s has a " "start timestamp %s newer than its stop " "timestamp %s", - cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1), + cell_num - 1, __verify_addr_string(session, ref, vs->tmp1), __wt_timestamp_to_string(unpack.start_ts, ts_string[0]), __wt_timestamp_to_string(unpack.stop_ts, ts_string[1])); if (unpack.stop_txn == WT_TXN_NONE) WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32 " on page at %s has a stop " "transaction of 0", - cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1)); + cell_num - 1, __verify_addr_string(session, ref, vs->tmp1)); if (unpack.start_txn > unpack.stop_txn) WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32 " on page at %s has a " "start transaction %" PRIu64 "newer than " "its stop transaction %" PRIu64, - cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1), unpack.start_txn, + cell_num - 1, __verify_addr_string(session, ref, vs->tmp1), unpack.start_txn, unpack.stop_txn); WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start", unpack.start_ts, @@ -903,7 +924,7 @@ __verify_page_cell( WT_RET_MSG(session, WT_ERROR, "page at %s, of type %s and referenced in its parent by a " "cell of type %s, contains overflow items", - __wt_page_addr_string(session, ref, vs->tmp1), __wt_page_type_string(ref->page->type), + __verify_addr_string(session, ref, vs->tmp1), __wt_page_type_string(ref->page->type), __wt_cell_type_string(addr_unpack->raw)); return (0); diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c index 22b3b717fe0..252b245a8dc 100644 --- a/src/third_party/wiredtiger/src/btree/bt_walk.c +++ b/src/third_party/wiredtiger/src/btree/bt_walk.c @@ -79,16 +79,10 @@ found: static inline bool __ref_is_leaf(WT_SESSION_IMPL *session, WT_REF *ref) { - size_t addr_size; - const uint8_t *addr; - u_int type; + bool is_leaf; - /* - * If the page has a disk address, we can crack it to figure out if this page is a leaf page or - * not. If there's no address, the page isn't on disk and we don't know the page type. - */ - __wt_ref_info(session, ref, &addr, &addr_size, &type); - return (addr == NULL ? false : type == WT_CELL_ADDR_LEAF || type == WT_CELL_ADDR_LEAF_NO); + __wt_ref_info_lock(session, ref, NULL, NULL, &is_leaf); + return (is_leaf); } /* diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 629c5316e53..bba4a9b914b 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -415,6 +415,7 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = { {"block_allocation", "string", NULL, "choices=[\"first\",\"best\"]", NULL, 0}, {"block_compressor", "string", NULL, NULL, NULL, 0}, {"cache_resident", "boolean", NULL, NULL, NULL, 0}, {"checkpoint", "string", NULL, NULL, NULL, 0}, + {"checkpoint_backup_info", "string", NULL, NULL, NULL, 0}, {"checkpoint_lsn", "string", NULL, NULL, NULL, 0}, {"checksum", "string", NULL, "choices=[\"on\",\"off\",\"uncompressed\"]", NULL, 0}, {"collator", "string", NULL, NULL, NULL, 0}, {"columns", "list", NULL, NULL, NULL, 0}, @@ -941,10 +942,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "access_pattern_hint=none,allocation_size=4KB,app_metadata=," "assert=(commit_timestamp=none,durable_timestamp=none," "read_timestamp=none),block_allocation=best,block_compressor=," - "cache_resident=false,checkpoint=,checkpoint_lsn=," - "checksum=uncompressed,collator=,columns=,dictionary=0," - "encryption=(keyid=,name=),format=btree,huffman_key=," - "huffman_value=,id=,ignore_in_memory_cache_size=false," + "cache_resident=false,checkpoint=,checkpoint_backup_info=," + "checkpoint_lsn=,checksum=uncompressed,collator=,columns=," + "dictionary=0,encryption=(keyid=,name=),format=btree,huffman_key=" + ",huffman_value=,id=,ignore_in_memory_cache_size=false," "internal_item_max=0,internal_key_max=0," "internal_key_truncate=true,internal_page_max=4KB,key_format=u," "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB," @@ -953,7 +954,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "prefix_compression=false,prefix_compression_min=4," "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90," "value_format=u,version=(major=0,minor=0)", - confchk_file_meta, 41}, + confchk_file_meta, 42}, {"index.meta", "app_metadata=,collator=,columns=,extractor=,immutable=false," "index_key_columns=,key_format=u,source=,type=file,value_format=u", diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index ace1505e6dd..fcf46b48f95 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -2671,6 +2671,12 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, const char *c WT_ERR(__wt_metadata_cursor(session, NULL)); + /* + * Load any incremental backup information. This reads the metadata so must be done after the + * turtle file is initialized. + */ + WT_ERR(__wt_backup_open(session)); + /* Start the worker threads and run recovery. */ WT_ERR(__wt_connection_workers(session, cfg)); diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c index c947954e75c..1d79064186c 100644 --- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c +++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c @@ -38,7 +38,7 @@ __conn_dhandle_config_set(WT_SESSION_IMPL *session) WT_DATA_HANDLE *dhandle; WT_DECL_RET; char *metaconf, *tmp; - const char *base, *cfg[3]; + const char *base, *cfg[4]; dhandle = session->dhandle; base = NULL; @@ -68,30 +68,32 @@ __conn_dhandle_config_set(WT_SESSION_IMPL *session) switch (dhandle->type) { case WT_DHANDLE_TYPE_BTREE: /* - * We are stripping out the checkpoint and checkpoint_lsn information from the config - * string. We save the rest of the metadata string, that is essentially static and - * unchanging and then concatenate the new checkpoint and LSN information on each - * checkpoint. The reason is performance and avoiding a lot of calls to the config parsing - * functions during a checkpoint for information that changes in a very well known way. + * We are stripping out all checkpoint related information from the config string. We save + * the rest of the metadata string, that is essentially static and unchanging and then + * concatenate the new checkpoint related information on each checkpoint. The reason is + * performance and avoiding a lot of calls to the config parsing functions during a + * checkpoint for information that changes in a very well known way. + * + * First collapse and overwrite checkpoint information because we do not know the name of or + * how many checkpoints may be in this metadata. Similarly, for backup information, we want + * an empty category to strip out since we don't know any backup ids. Set them empty and + * call collapse to overwrite anything existing. */ cfg[0] = metaconf; cfg[1] = "checkpoint=()"; - cfg[2] = NULL; + cfg[2] = "checkpoint_backup_info=()"; + cfg[3] = NULL; WT_ERR(__wt_strdup(session, WT_CONFIG_BASE(session, file_meta), &dhandle->cfg[0])); WT_ASSERT(session, dhandle->meta_base == NULL); - /* - * First collapse and overwrite any checkpoint information because we do not know the name - * or how many checkpoints may be in this metadata. So first we have to set the string to - * the empty checkpoint string and call collapse to overwrite anything existing. - */ WT_ERR(__wt_config_collapse(session, cfg, &tmp)); /* - * Now strip out the checkpoint and checkpoint LSN items from the configuration string and - * that is now our base metadata string. + * Now strip out the checkpoint related items from the configuration string and that is now + * our base metadata string. */ cfg[0] = tmp; cfg[1] = NULL; - WT_ERR(__wt_config_merge(session, cfg, "checkpoint=,checkpoint_lsn=", &base)); + WT_ERR(__wt_config_merge( + session, cfg, "checkpoint=,checkpoint_backup_info=,checkpoint_lsn=", &base)); __wt_free(session, tmp); break; case WT_DHANDLE_TYPE_TABLE: diff --git a/src/third_party/wiredtiger/src/conn/conn_handle.c b/src/third_party/wiredtiger/src/conn/conn_handle.c index 37136926060..e5c82d49a48 100644 --- a/src/third_party/wiredtiger/src/conn/conn_handle.c +++ b/src/third_party/wiredtiger/src/conn/conn_handle.c @@ -94,7 +94,6 @@ void __wt_connection_destroy(WT_CONNECTION_IMPL *conn) { WT_SESSION_IMPL *session; - u_int i; /* Check there's something to destroy. */ if (conn == NULL) @@ -132,13 +131,6 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_cond_destroy(session, &conn->lsm_manager.work_cond); /* Free allocated memory. */ - /* - * XXX we need to persist this information when we are working on making incremental backups - * persistent across restarts. - */ - for (i = 0; i < WT_BLKINCR_MAX; ++i) - __wt_free(session, conn->incr_backups[i].id_str); - __wt_free(session, conn->cfg); __wt_free(session, conn->debug_ckpt); __wt_free(session, conn->error_prefix); diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c index f7e338ac9bb..df6e6b79300 100644 --- a/src/third_party/wiredtiger/src/conn/conn_open.c +++ b/src/third_party/wiredtiger/src/conn/conn_open.c @@ -141,6 +141,8 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) /* Close operation tracking */ WT_TRET(__wt_conn_optrack_teardown(session, false)); + __wt_backup_destroy(session); + /* Close any file handles left open. */ WT_TRET(__wt_close_connection_close(session)); diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c index be781118895..3fdbafce445 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c @@ -19,6 +19,101 @@ static int __backup_stop(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *); WT_ERR(F_ISSET(((WT_CURSOR_BACKUP *)(cursor)), WT_CURBACKUP_FORCE_STOP) ? EINVAL : 0); /* + * __wt_backup_destroy -- + * Destroy any backup information. + */ +void +__wt_backup_destroy(WT_SESSION_IMPL *session) +{ + WT_BLKINCR *blkincr; + WT_CONNECTION_IMPL *conn; + uint64_t i; + + conn = S2C(session); + /* Free any incremental backup information. */ + for (i = 0; i < WT_BLKINCR_MAX; ++i) { + blkincr = &conn->incr_backups[i]; + __wt_free(session, blkincr->id_str); + F_CLR(blkincr, WT_BLKINCR_VALID); + } + conn->incr_granularity = 0; + F_CLR(conn, WT_CONN_INCR_BACKUP); +} + +/* + * __wt_backup_open -- + * Restore any incremental backup information. We use the metadata's block information as the + * authority on whether incremental backup was in use on last shutdown. + */ +int +__wt_backup_open(WT_SESSION_IMPL *session) +{ + WT_BLKINCR *blkincr; + WT_CONFIG blkconf; + WT_CONFIG_ITEM b, k, v; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + uint64_t i; + char *config; + + conn = S2C(session); + config = NULL; + + WT_RET(__wt_metadata_search(session, WT_METAFILE_URI, &config)); + WT_ERR(__wt_config_getones(session, config, "checkpoint_backup_info", &v)); + __wt_config_subinit(session, &blkconf, &v); + /* + * Walk each item in the metadata and set up our last known global incremental information. + */ + F_CLR(conn, WT_CONN_INCR_BACKUP); + i = 0; + while (__wt_config_next(&blkconf, &k, &v) == 0) { + WT_ASSERT(session, i < WT_BLKINCR_MAX); + /* + * If we get here, we have at least one valid incremental backup. We want to set up its + * general configuration in the global table. + */ + blkincr = &conn->incr_backups[i++]; + F_SET(conn, WT_CONN_INCR_BACKUP); + WT_ERR(__wt_strndup(session, k.str, k.len, &blkincr->id_str)); + WT_ERR(__wt_config_subgets(session, &v, "granularity", &b)); + /* + * NOTE: For now the granularity is in the connection because it cannot change. We may be + * able to relax that. + */ + conn->incr_granularity = blkincr->granularity = (uint64_t)b.val; + F_SET(blkincr, WT_BLKINCR_VALID); + } + +err: + if (ret != 0 && ret != WT_NOTFOUND) + __wt_backup_destroy(session); + __wt_free(session, config); + return (ret == WT_NOTFOUND ? 0 : ret); +} + +/* + * __wt_backup_file_remove -- + * Remove the incremental and meta-data backup files. + */ +int +__wt_backup_file_remove(WT_SESSION_IMPL *session) +{ + WT_DECL_RET; + + /* + * Note that order matters for removing the incremental files. We must remove the backup file + * before removing the source file so that we always know we were a source directory while + * there's any chance of an incremental backup file existing. + */ + WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP, true)); + WT_TRET(__wt_remove_if_exists(session, WT_LOGINCR_BACKUP, true)); + WT_TRET(__wt_remove_if_exists(session, WT_LOGINCR_SRC, true)); + WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP, true)); + return (ret); +} + +/* * __curbackup_next -- * WT_CURSOR->next method for the backup cursor type. */ @@ -71,35 +166,6 @@ err: } /* - * __backup_incr_release -- - * Free all resources relating to incremental backup. - */ -static int -__backup_incr_release(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, bool force) -{ - WT_BLKINCR *blk; - WT_CONNECTION_IMPL *conn; - u_int i; - - WT_UNUSED(cb); - WT_UNUSED(force); - conn = S2C(session); - /* - * Clear flags. Remove file. Release any memory information. - */ - F_CLR(conn, WT_CONN_INCR_BACKUP); - for (i = 0; i < WT_BLKINCR_MAX; ++i) { - blk = &conn->incr_backups[i]; - F_CLR(blk, WT_BLKINCR_VALID); - } - /* __wt_block_backup_remove... */ - conn->ckpt_incr_granularity = 0; - WT_RET(__wt_remove_if_exists(session, WT_BLKINCR_BACKUP, true)); - - return (0); -} - -/* * __backup_free -- * Free list resources for a backup cursor. */ @@ -115,10 +181,6 @@ __backup_free(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) } if (cb->incr_file != NULL) __wt_free(session, cb->incr_file); - if (cb->incr_src != NULL) - __wt_free(session, cb->incr_src); - if (cb->incr_this != NULL) - __wt_free(session, cb->incr_this); __wt_curbackup_free_incr(session, cb); } @@ -140,7 +202,7 @@ err: if (F_ISSET(cb, WT_CURBACKUP_FORCE_STOP)) { __wt_verbose( session, WT_VERB_BACKUP, "%s", "Releasing resources from forced stop incremental"); - __backup_incr_release(session, cb, true); + __wt_backup_destroy(session); } /* @@ -232,26 +294,6 @@ err: } /* - * __backup_get_ckpt -- - * Get the most recent checkpoint information and store it in the structure. - * - * XXX - Currently set return to static void for the compiler, when this function has real content - * it should be static int. - */ -static void -__backup_get_ckpt(WT_SESSION_IMPL *session, WT_BLKINCR *incr) -{ - WT_UNUSED(session); - WT_UNUSED(incr); - /* - * Look up the most recent checkpoint and store information about it in incr. - * - * XXX When this function has content, return a real value. return (0); - */ - return; -} - -/* * __backup_add_id -- * Add the identifier for block based incremental backup. */ @@ -262,11 +304,13 @@ __backup_add_id(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval) WT_CONNECTION_IMPL *conn; WT_DECL_RET; u_int i; + const char *ckpt; conn = S2C(session); blk = NULL; for (i = 0; i < WT_BLKINCR_MAX; ++i) { blk = &conn->incr_backups[i]; + __wt_verbose(session, WT_VERB_BACKUP, "blk[%u] flags 0x%" PRIx64, i, blk->flags); /* If it isn't use, we can use it. */ if (!F_ISSET(blk, WT_BLKINCR_INUSE)) break; @@ -281,16 +325,28 @@ __backup_add_id(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval) if (blk->id_str != NULL) __wt_verbose( session, WT_VERB_BACKUP, "Freeing and reusing backup slot with old id %s", blk->id_str); - /* Free any string that was there. */ + /* Free anything that was there. */ __wt_free(session, blk->id_str); WT_ERR(__wt_strndup(session, cval->str, cval->len, &blk->id_str)); - __wt_verbose(session, WT_VERB_BACKUP, "Using backup slot %u for id %s", i, blk->id_str); /* - * XXX This function can error in the future. - * - * WT_ERR(__backup_get_ckpt(session, blk)); + * Get the most recent checkpoint name. For now just use the one that is part of the metadata. + * We only care whether or not a checkpoint exists, so immediately free it. */ - __backup_get_ckpt(session, blk); + ret = __wt_meta_checkpoint_last_name(session, WT_METAFILE_URI, &ckpt); + __wt_free(session, ckpt); + if (ret != 0 && ret != WT_NOTFOUND) + WT_ERR(ret); + if (ret == WT_NOTFOUND) { + /* + * If we don't find any checkpoint, backup files need to be full copy. + */ + __wt_verbose(session, WT_VERB_BACKUP, "ID %s: Did not find any metadata checkpoint for %s.", + blk->id_str, WT_METAFILE_URI); + F_SET(blk, WT_BLKINCR_FULL); + } else { + __wt_verbose(session, WT_VERB_BACKUP, "Using backup slot %u for id %s", i, blk->id_str); + F_CLR(blk, WT_BLKINCR_FULL); + } F_SET(blk, WT_BLKINCR_VALID); return (0); @@ -400,12 +456,11 @@ __backup_config(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[ if (cval.val) { if (!F_ISSET(conn, WT_CONN_INCR_BACKUP)) { WT_RET(__wt_config_gets(session, cfg, "incremental.granularity", &cval)); - /* XXX may not need cb->incr_granularity */ - if (conn->ckpt_incr_granularity != 0) + if (conn->incr_granularity != 0) WT_RET_MSG(session, EINVAL, "Cannot change the incremental backup granularity"); - conn->ckpt_incr_granularity = cb->incr_granularity = (uint64_t)cval.val; + conn->incr_granularity = (uint64_t)cval.val; } - /* XXX Granularity can only be set once at the beginning */ + /* Granularity can only be set once at the beginning */ F_SET(conn, WT_CONN_INCR_BACKUP); incremental_config = true; } @@ -432,10 +487,8 @@ __backup_config(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[ if (is_dup) WT_RET_MSG(session, EINVAL, "Incremental source identifier can only be specified on a primary backup cursor"); - WT_RET(__backup_find_id(session, &cval, &cb->incr)); - /* XXX might not need this incr_src field */ - WT_RET(__wt_strndup(session, cval.str, cval.len, &cb->incr_src)); - F_SET(cb->incr, WT_BLKINCR_INUSE); + WT_RET(__backup_find_id(session, &cval, &cb->incr_src)); + F_SET(cb->incr_src, WT_BLKINCR_INUSE); incremental_config = true; } /* @@ -455,8 +508,6 @@ __backup_config(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[ WT_ERR_MSG(session, EINVAL, "Incremental identifier already exists"); WT_ERR(__backup_add_id(session, &cval)); - /* XXX might not need this incr_this field */ - WT_ERR(__wt_strndup(session, cval.str, cval.len, &cb->incr_this)); incremental_config = true; } @@ -530,8 +581,8 @@ __backup_config(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[ F_SET(cb, WT_CURBACKUP_INCR); } err: - if (ret != 0 && cb->incr != NULL) - F_CLR(cb->incr, WT_BLKINCR_INUSE); + if (ret != 0 && cb->incr_src != NULL) + F_CLR(cb->incr_src, WT_BLKINCR_INUSE); __wt_scr_free(session, &tmp); return (ret); } @@ -716,8 +767,8 @@ __backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) /* If it's not a dup backup cursor, make sure one isn't open. */ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_BACKUP_DUP)); WT_WITH_HOTBACKUP_WRITE_LOCK(session, conn->hot_backup_list = NULL); - if (cb->incr != NULL) - F_CLR(cb->incr, WT_BLKINCR_INUSE); + if (cb->incr_src != NULL) + F_CLR(cb->incr_src, WT_BLKINCR_INUSE); __backup_free(session, cb); /* Remove any backup specific file. */ @@ -742,27 +793,6 @@ __backup_all(WT_SESSION_IMPL *session) } /* - * __wt_backup_file_remove -- - * Remove the incremental and meta-data backup files. - */ -int -__wt_backup_file_remove(WT_SESSION_IMPL *session) -{ - WT_DECL_RET; - - /* - * Note that order matters for removing the incremental files. We must remove the backup file - * before removing the source file so that we always know we were a source directory while - * there's any chance of an incremental backup file existing. - */ - WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP, true)); - WT_TRET(__wt_remove_if_exists(session, WT_LOGINCR_BACKUP, true)); - WT_TRET(__wt_remove_if_exists(session, WT_LOGINCR_SRC, true)); - WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP, true)); - return (ret); -} - -/* * __backup_list_uri_append -- * Append a new file name to the list, allocate space as necessary. Called via the schema_worker * function. diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c b/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c index c8c59d3f9db..5403e2308ff 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c @@ -9,53 +9,74 @@ #include "wt_internal.h" /* - * __alloc_merge -- - * Merge two allocation lists. + * __wt_backup_load_incr -- + * Load the incremental. */ -static void -__alloc_merge( - uint64_t *a, uint64_t a_cnt, uint64_t *b, uint64_t b_cnt, uint64_t *res, uint64_t *res_cnt) +int +__wt_backup_load_incr( + WT_SESSION_IMPL *session, WT_CONFIG_ITEM *blkcfg, WT_ITEM *bitstring, uint64_t nbits) { - uint64_t total; - - for (total = 0; a_cnt > 0 || b_cnt > 0; ++total, res += 2) { - if (a_cnt > 0 && b_cnt > 0) { - if (a[0] <= b[0]) { - res[0] = a[0]; - if (a[0] + a[1] < b[0]) - res[1] = a[1]; - else { - res[1] = (b[0] + b[1]) - a[0]; - b += 2; - --b_cnt; - } - a += 2; - --a_cnt; - } else if (b[0] <= a[0]) { - res[0] = b[0]; - if (b[0] + b[1] < a[0]) - res[1] = b[1]; - else { - res[1] = (a[0] + a[1]) - b[0]; - a += 2; - --a_cnt; - } - b += 2; - --b_cnt; - } - } else if (a_cnt > 0) { - res[0] = a[0]; - res[1] = a[1]; - a += 2; - --a_cnt; - } else if (b_cnt > 0) { - res[0] = b[0]; - res[1] = b[1]; - b += 2; - --b_cnt; + if (blkcfg->len != 0) + WT_RET(__wt_nhex_to_raw(session, blkcfg->str, blkcfg->len, bitstring)); + if (bitstring->size != (nbits >> 3)) + WT_RET_MSG(session, WT_ERROR, "corrupted modified block list"); + return (0); +} + +/* + * __curbackup_incr_blkmod -- + * Get the block modifications for a tree from its metadata and fill in the backup cursor's + * information with it. + */ +static int +__curbackup_incr_blkmod(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_CURSOR_BACKUP *cb) +{ + WT_CONFIG blkconf; + WT_CONFIG_ITEM b, k, v; + WT_DECL_RET; + char *config; + + WT_ASSERT(session, btree != NULL); + WT_ASSERT(session, btree->dhandle != NULL); + WT_ASSERT(session, cb->incr_src != NULL); + + WT_RET(__wt_metadata_search(session, btree->dhandle->name, &config)); + WT_ERR(__wt_config_getones(session, config, "checkpoint_backup_info", &v)); + __wt_config_subinit(session, &blkconf, &v); + while ((ret = __wt_config_next(&blkconf, &k, &v)) == 0) { + /* + * First see if we have information for this source identifier. + */ + if (WT_STRING_MATCH(cb->incr_src->id_str, k.str, k.len) == 0) + continue; + + /* + * We found a match. If we have a name, then there should be granularity and nbits. The + * granularity should be set to something. But nbits may be 0 if there are no blocks + * currently modified. + */ + WT_ERR(__wt_config_subgets(session, &v, "granularity", &b)); + cb->granularity = (uint64_t)b.val; + WT_ERR(__wt_config_subgets(session, &v, "nbits", &b)); + cb->nbits = (uint64_t)b.val; + WT_ERR(__wt_config_subgets(session, &v, "offset", &b)); + cb->offset = (uint64_t)b.val; + + /* + * We found a match. Load the block information into the cursor. + */ + ret = __wt_config_subgets(session, &v, "blocks", &b); + if (ret != WT_NOTFOUND) { + WT_ERR(__wt_backup_load_incr(session, &b, &cb->bitstring, cb->nbits)); + cb->bit_offset = 0; + cb->incr_init = true; } } - *res_cnt = total; + WT_ERR_NOTFOUND_OK(ret); + +err: + __wt_free(session, config); + return (ret == WT_NOTFOUND ? 0 : ret); } /* @@ -66,18 +87,11 @@ static int __curbackup_incr_next(WT_CURSOR *cursor) { WT_BTREE *btree; - WT_CKPT *ckpt, *ckptbase; WT_CURSOR_BACKUP *cb; WT_DECL_RET; WT_SESSION_IMPL *session; wt_off_t size; - uint64_t *a, *b, *current, *next; - uint64_t entries, total; uint32_t raw; - bool start, stop; - - ckptbase = NULL; - a = b = NULL; cb = (WT_CURSOR_BACKUP *)cursor; btree = cb->incr_cursor == NULL ? NULL : ((WT_CURSOR_BTREE *)cb->incr_cursor)->btree; @@ -86,104 +100,46 @@ __curbackup_incr_next(WT_CURSOR *cursor) F_CLR(cursor, WT_CURSTD_RAW); if (cb->incr_init) { - /* We have this object's incremental information, Check if we're done. */ - if (cb->incr_list_offset >= cb->incr_list_count - WT_BACKUP_INCR_COMPONENTS) - return (WT_NOTFOUND); + /* Look for the next chunk that had modifications. */ + while (cb->bit_offset < cb->nbits) + if (__bit_test(cb->bitstring.mem, cb->bit_offset)) + break; + else + ++cb->bit_offset; - /* - * If we returned all of the data, step to the next block, otherwise return the next chunk - * of the current block. - */ - if (cb->incr_list[cb->incr_list_offset + 1] <= cb->incr_granularity) - cb->incr_list_offset += WT_BACKUP_INCR_COMPONENTS; - else { - cb->incr_list[cb->incr_list_offset] += cb->incr_granularity; - cb->incr_list[cb->incr_list_offset + 1] -= cb->incr_granularity; - cb->incr_list[cb->incr_list_offset + 2] = WT_BACKUP_RANGE; - } - } else if (btree == NULL) { + /* We either have this object's incremental information or we're done. */ + if (cb->bit_offset >= cb->nbits) + WT_ERR(WT_NOTFOUND); + __wt_cursor_set_key(cursor, cb->offset + cb->granularity * cb->bit_offset++, + cb->granularity, WT_BACKUP_RANGE); + } else if (btree == NULL || F_ISSET(cb, WT_CURBACKUP_FORCE_FULL)) { /* We don't have this object's incremental information, and it's a full file copy. */ WT_ERR(__wt_fs_size(session, cb->incr_file, &size)); - cb->incr_list_count = WT_BACKUP_INCR_COMPONENTS; + cb->nbits = 0; + cb->offset = 0; + cb->bit_offset = 0; cb->incr_init = true; - cb->incr_list_offset = 0; __wt_cursor_set_key(cursor, 0, size, WT_BACKUP_FILE); } else { /* * We don't have this object's incremental information, and it's not a full file copy. Get a - * list of the checkpoints available for the file and flag the starting/stopping ones. It - * shouldn't be possible to specify checkpoints that no longer exist, but check anyway. + * list of the block modifications for the file. The block modifications are from the + * incremental identifier starting point. Walk the list looking for one with a source of our + * id. */ - ret = __wt_meta_ckptlist_get(session, cb->incr_file, false, &ckptbase); - WT_ERR(ret == WT_NOTFOUND ? ENOENT : ret); - + WT_ERR(__curbackup_incr_blkmod(session, btree, cb)); /* - * Count up the maximum number of block entries we might have to merge, and allocate a pair - * of temporary arrays in which to do the merge. + * If there is no block modification information for this file, there is no information to + * return to the user. */ - entries = 0; - WT_CKPT_FOREACH (ckptbase, ckpt) - entries += ckpt->alloc_list_entries; - WT_ERR(__wt_calloc_def(session, entries * WT_BACKUP_INCR_COMPONENTS, &a)); - WT_ERR(__wt_calloc_def(session, entries * WT_BACKUP_INCR_COMPONENTS, &b)); - - /* Merge the block lists into a final list of blocks to copy. */ - start = stop = false; - total = 0; - current = NULL; - next = a; - WT_CKPT_FOREACH (ckptbase, ckpt) { - if (strcmp(ckpt->name, cb->incr_checkpoint_start) == 0) { - start = true; - WT_ERR_ASSERT(session, ckpt->alloc_list_entries == 0, __wt_panic(session), - "incremental backup start checkpoint has allocation list blocks"); - continue; - } - if (start == true) { - if (strcmp(ckpt->name, cb->incr_checkpoint_stop) == 0) - stop = true; - - __alloc_merge( - current, total, ckpt->alloc_list, ckpt->alloc_list_entries, next, &total); - current = next; - next = next == a ? b : a; - } - - if (stop == true) - break; - } - - if (!start) - WT_ERR_MSG(session, ENOENT, "incremental backup start checkpoint %s not found", - cb->incr_checkpoint_start); - if (!stop) - WT_ERR_MSG(session, ENOENT, "incremental backup stop checkpoint %s not found", - cb->incr_checkpoint_stop); - - /* There may be nothing that needs copying. */ - if (total == 0) + if (cb->bitstring.mem == NULL) WT_ERR(WT_NOTFOUND); - - if (next == a) { - cb->incr_list = b; - b = NULL; - } else { - cb->incr_list = a; - a = NULL; - } - cb->incr_list_count = total; - cb->incr_list_offset = 0; - WT_ERR(__wt_scr_alloc(session, 0, &cb->incr_block)); - cb->incr_init = true; - - F_SET(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT); + __wt_cursor_set_key(cursor, cb->offset + cb->granularity * cb->bit_offset++, + cb->granularity, WT_BACKUP_RANGE); } err: - __wt_free(session, a); - __wt_free(session, b); - __wt_meta_ckptlist_free(session, &ckptbase); F_SET(cursor, raw); API_END_RET(session, ret); } @@ -198,10 +154,7 @@ __wt_curbackup_free_incr(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) __wt_free(session, cb->incr_file); if (cb->incr_cursor != NULL) __wt_cursor_close(cb->incr_cursor); - __wt_free(session, cb->incr_checkpoint_start); - __wt_free(session, cb->incr_checkpoint_stop); - __wt_free(session, cb->incr_list); - __wt_scr_free(session, &cb->incr_block); + __wt_buf_free(session, &cb->bitstring); } /* @@ -213,21 +166,48 @@ __wt_curbackup_open_incr(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *o WT_CURSOR *cursor, const char *cfg[], WT_CURSOR **cursorp) { WT_CURSOR_BACKUP *cb, *other_cb; + WT_DECL_ITEM(open_uri); + WT_DECL_RET; cb = (WT_CURSOR_BACKUP *)cursor; other_cb = (WT_CURSOR_BACKUP *)other; - WT_UNUSED(session); cursor->key_format = WT_UNCHECKED_STRING(qqq); cursor->value_format = ""; + WT_ASSERT(session, other_cb->incr_src != NULL); + /* * Inherit from the backup cursor but reset specific functions for incremental. */ cursor->next = __curbackup_incr_next; cursor->get_key = __wt_cursor_get_key; cursor->get_value = __wt_cursor_get_value_notsup; - cb->incr_granularity = other_cb->incr_granularity; + cb->incr_src = other_cb->incr_src; + + /* All WiredTiger owned files are full file copies. */ + if (F_ISSET(other_cb->incr_src, WT_BLKINCR_FULL) || + WT_PREFIX_MATCH(cb->incr_file, "WiredTiger")) { + __wt_verbose(session, WT_VERB_BACKUP, "Forcing full file copies for id %s", + other_cb->incr_src->id_str); + F_SET(cb, WT_CURBACKUP_FORCE_FULL); + } + /* + * Set up the incremental backup information, if we are not forcing a full file copy. We need an + * open cursor on the file. Open the backup checkpoint, confirming it exists. + */ + if (!F_ISSET(cb, WT_CURBACKUP_FORCE_FULL)) { + WT_ERR(__wt_scr_alloc(session, 0, &open_uri)); + WT_ERR(__wt_buf_fmt(session, open_uri, "file:%s", cb->incr_file)); + __wt_free(session, cb->incr_file); + WT_ERR(__wt_strdup(session, open_uri->data, &cb->incr_file)); + + WT_ERR(__wt_curfile_open(session, cb->incr_file, NULL, cfg, &cb->incr_cursor)); + WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp)); + WT_ERR(__wt_strdup(session, cb->incr_cursor->internal_uri, &cb->incr_cursor->internal_uri)); + } else + WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp)); - /* XXX Return full file info for all files for now. */ - return (__wt_cursor_init(cursor, uri, NULL, cfg, cursorp)); +err: + __wt_scr_free(session, &open_uri); + return (ret); } diff --git a/src/third_party/wiredtiger/src/docs/command-line.dox b/src/third_party/wiredtiger/src/docs/command-line.dox index 986a634e786..de845ae9f6f 100644 --- a/src/third_party/wiredtiger/src/docs/command-line.dox +++ b/src/third_party/wiredtiger/src/docs/command-line.dox @@ -204,9 +204,10 @@ a table, creating the table if it does not yet exist. The data should be the format produced by the \c dump command; see @ref dump_formats for details. -By default, if the table already exists, data in the table will be -overwritten by the new data (use the \c -n option to make an attempt to -overwrite existing data return an error). +By default, if the table already exists, key/value pairs in the table +will be overwritten by new data with matching keys (use the \c -n option +to make an attempt to overwrite existing data return an error). Existing +keys will not be removed. @subsection util_load_synopsis Synopsis <code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] load [-ajn] [-f input] [-r name] [uri configuration ...]</code> @@ -279,7 +280,9 @@ In the case of inserting values into a column-store table, each value is appended to the table; in the case of inserting values into a row-store table, lines are handled in pairs, where the first line is the key and the second line is the value. If the row-store table already -exists, data in the table will be overwritten by the new data. +exists, key/value pairs in the table will be overwritten by new data +with matching keys. For either column-store or row-store tables, existing +keys will not be removed. @subsection util_loadtext_synopsis Synopsis <code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] loadtext [-f input] uri</code> diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h index 95946dfbc65..7157d9392b6 100644 --- a/src/third_party/wiredtiger/src/include/api.h +++ b/src/third_party/wiredtiger/src/include/api.h @@ -51,7 +51,6 @@ */ \ WT_ERR(WT_SESSION_CHECK_PANIC(s)); \ WT_SINGLE_THREAD_CHECK_START(s); \ - WT_ERR(__wt_txn_err_chk(s)); \ WT_TRACK_OP_INIT(s); \ __wt_op_timer_start(s); \ /* Reset wait time if this isn't an API reentry. */ \ @@ -70,21 +69,20 @@ if ((config) != NULL) \ WT_ERR(__wt_config_check((s), WT_CONFIG_REF(session, h##_##n), (config), 0)) -#define API_END(s, ret) \ - if ((s) != NULL) { \ - WT_TRACK_OP_END(s); \ - WT_SINGLE_THREAD_CHECK_STOP(s); \ - if ((ret) != 0 && (ret) != WT_NOTFOUND && (ret) != WT_DUPLICATE_KEY && \ - (ret) != WT_PREPARE_CONFLICT && F_ISSET(&(s)->txn, WT_TXN_RUNNING)) \ - F_SET(&(s)->txn, WT_TXN_ERROR); \ - __wt_op_timer_stop(s); \ - /* \ - * No code after this line, otherwise error handling \ - * won't be correct. \ - */ \ - API_SESSION_POP(s); \ - } \ - } \ +#define API_END(s, ret) \ + if ((s) != NULL) { \ + WT_TRACK_OP_END(s); \ + WT_SINGLE_THREAD_CHECK_STOP(s); \ + if ((ret) != 0) \ + __wt_txn_err_set(s, ret); \ + __wt_op_timer_stop(s); \ + /* \ + * No code after this line, otherwise error handling \ + * won't be correct. \ + */ \ + API_SESSION_POP(s); \ + } \ + } \ while (0) /* An API call wrapped in a transaction if necessary. */ @@ -173,6 +171,14 @@ #define SESSION_API_CALL_PREPARE_ALLOWED(s, n, config, cfg) \ API_CALL(s, WT_SESSION, n, NULL, config, cfg) +#define SESSION_API_CALL_PREPARE_NOT_ALLOWED(s, n, config, cfg) \ + SESSION_API_PREPARE_CHECK(s, WT_SESSION, n); \ + API_CALL(s, WT_SESSION, n, NULL, config, cfg) + +#define SESSION_API_CALL_PREPARE_NOT_ALLOWED_NOCONF(s, n) \ + SESSION_API_PREPARE_CHECK(s, WT_SESSION, n); \ + API_CALL_NOCONF(s, WT_SESSION, n, NULL) + #define SESSION_API_PREPARE_CHECK(s, h, n) \ do { \ int __prepare_ret; \ @@ -188,10 +194,6 @@ #define SESSION_API_CALL_NOCONF(s, n) API_CALL_NOCONF(s, WT_SESSION, n, NULL) -#define SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(s, n) \ - SESSION_API_PREPARE_CHECK(s, WT_SESSION, n); \ - API_CALL_NOCONF(s, WT_SESSION, n, NULL) - #define SESSION_TXN_API_CALL(s, n, config, cfg) \ SESSION_API_PREPARE_CHECK(s, WT_SESSION, n); \ TXN_API_CALL(s, WT_SESSION, n, NULL, config, cfg) diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 5099bc4dce9..50f6fcf9759 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -1053,7 +1053,7 @@ __wt_row_leaf_value(WT_PAGE *page, WT_ROW *rip, WT_ITEM *value) */ static inline void __wt_ref_info( - WT_SESSION_IMPL *session, WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep) + WT_SESSION_IMPL *session, WT_REF *ref, const uint8_t **addrp, size_t *sizep, bool *is_leafp) { WT_ADDR *addr; WT_CELL_UNPACK *unpack, _unpack; @@ -1072,33 +1072,61 @@ __wt_ref_info( if (addr == NULL) { *addrp = NULL; *sizep = 0; - if (typep != NULL) - *typep = 0; + if (is_leafp != NULL) + *is_leafp = false; } else if (__wt_off_page(page, addr)) { *addrp = addr->addr; *sizep = addr->size; - if (typep != NULL) - switch (addr->type) { - case WT_ADDR_INT: - *typep = WT_CELL_ADDR_INT; - break; - case WT_ADDR_LEAF: - *typep = WT_CELL_ADDR_LEAF; - break; - case WT_ADDR_LEAF_NO: - *typep = WT_CELL_ADDR_LEAF_NO; - break; - default: - *typep = 0; - break; - } + if (is_leafp != NULL) + *is_leafp = addr->type != WT_ADDR_INT; } else { __wt_cell_unpack(session, page, (WT_CELL *)addr, unpack); *addrp = unpack->data; *sizep = unpack->size; - if (typep != NULL) - *typep = unpack->type; + + if (is_leafp != NULL) + *is_leafp = unpack->type != WT_ADDR_INT; + } +} + +/* + * __wt_ref_info_lock -- + * Lock the WT_REF and return the addr/size and type triplet for a reference. + */ +static inline void +__wt_ref_info_lock( + WT_SESSION_IMPL *session, WT_REF *ref, uint8_t *addr_buf, size_t *sizep, bool *is_leafp) +{ + size_t size; + uint32_t previous_state; + const uint8_t *addr; + bool is_leaf; + + /* + * The WT_REF address references either an on-page cell or in-memory structure, and eviction + * frees both. If our caller is already blocking eviction (either because the WT_REF is locked + * or there's a hazard pointer on the page), no locking is required, and the caller should call + * the underlying function directly. Otherwise, our caller is not blocking eviction and we lock + * here, and copy out the address instead of returning a reference. + */ + for (;; __wt_yield()) { + previous_state = ref->state; + if (previous_state != WT_REF_LOCKED && previous_state != WT_REF_READING && + WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED)) + break; } + + __wt_ref_info(session, ref, &addr, &size, &is_leaf); + + if (addr_buf != NULL) { + if (addr != NULL) + memcpy(addr_buf, addr, size); + *sizep = size; + } + if (is_leafp != NULL) + *is_leafp = is_leaf; + + WT_REF_SET_STATE(ref, previous_state); } /* diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 9498eb5d6c6..2a275449284 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -299,7 +299,7 @@ struct __wt_connection_impl { uint64_t ckpt_write_pages; /* Checkpoint and incremental backup data */ - uint64_t ckpt_incr_granularity; + uint64_t incr_granularity; WT_BLKINCR incr_backups[WT_BLKINCR_MAX]; /* Connection's maximum and base write generations. */ diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 45a4ac01a9f..3ea011b8fc9 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -32,21 +32,6 @@ 0 /* uint32_t flags */ \ } -/* - * Block based incremental backup structure. These live in the connection. - */ -#define WT_BLKINCR_MAX 2 -struct __wt_blkincr { - const char *id_str; /* User's name for this backup. */ - const char *ckpt_name; /* Requires WT-5115. All checkpoints must be this name */ - void *data; -/* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_BLKINCR_INUSE 0x1u /* This entry is active */ -#define WT_BLKINCR_VALID 0x2u /* This entry is valid */ - /* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint64_t flags; -}; - struct __wt_cursor_backup { WT_CURSOR iface; @@ -61,31 +46,25 @@ struct __wt_cursor_backup { size_t list_next; /* File offset-based incremental backup. */ - WT_BLKINCR *incr; /* Incremental backup in use */ - char *incr_file; /* File name */ - char *incr_src; /* Source identifier */ - char *incr_this; /* New base identifier */ - uint64_t incr_granularity; /* Maximum transfer size */ + WT_BLKINCR *incr_src; /* Incremental backup source */ + char *incr_file; /* File name */ WT_CURSOR *incr_cursor; /* File cursor */ - /* Start/stop checkpoints */ - char *incr_checkpoint_start; - char *incr_checkpoint_stop; - -#define WT_BACKUP_INCR_COMPONENTS 3 - bool incr_init; /* Cursor traversal initialized */ - uint64_t *incr_list; /* List of file offset/size/type triples */ - uint64_t incr_list_count; /* Count of file offset/size/type triples */ - uint64_t incr_list_offset; /* Current offset */ - uint64_t incr_size; /* Maximum transfer size */ - WT_ITEM *incr_block; /* Current block of data */ + + bool incr_init; /* Cursor traversal initialized */ + WT_ITEM bitstring; /* List of modified blocks */ + uint64_t nbits; /* Number of bits in bitstring */ + uint64_t offset; /* Zero bit offset in bitstring */ + uint64_t bit_offset; /* Current offset */ + uint64_t granularity; /* Length, transfer size */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CURBACKUP_DUP 0x1u /* Duplicated backup cursor */ -#define WT_CURBACKUP_FORCE_STOP 0x2u /* Force stop incremental backup */ -#define WT_CURBACKUP_INCR 0x4u /* Incremental backup cursor */ -#define WT_CURBACKUP_LOCKER 0x8u /* Hot-backup started */ - /* AUTOMATIC FLAG VALUE GENERATION STOP */ +#define WT_CURBACKUP_DUP 0x01u /* Duplicated backup cursor */ +#define WT_CURBACKUP_FORCE_FULL 0x02u /* Force full file copy for this cursor */ +#define WT_CURBACKUP_FORCE_STOP 0x04u /* Force stop incremental backup */ +#define WT_CURBACKUP_INCR 0x08u /* Incremental backup cursor */ +#define WT_CURBACKUP_LOCKER 0x10u /* Hot-backup started */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint8_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 46de0e10f7f..2cf1408525e 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -58,8 +58,6 @@ extern const char *__wt_ext_strerror(WT_EXTENSION_API *wt_api, WT_SESSION *wt_se WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern const char *__wt_json_tokname(int toktype) WT_GCC_FUNC_DECL_ATTRIBUTE( (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern const char *__wt_page_type_string(u_int type) WT_GCC_FUNC_DECL_ATTRIBUTE( (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern const char *__wt_session_strerror(WT_SESSION *wt_session, int error) @@ -86,6 +84,10 @@ extern int __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_backup_file_remove(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_backup_load_incr(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *blkcfg, + WT_ITEM *bitstring, uint64_t nbits) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_backup_open(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_addr_invalid(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, @@ -280,7 +282,7 @@ extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((wa extern int __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btcur_search_uncommitted(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) +extern int __wt_btcur_search_uncommitted(WT_CURSOR *cursor, WT_UPDATE **updp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_btree_close(WT_SESSION_IMPL *session) @@ -1566,6 +1568,7 @@ extern void *__wt_ext_scr_alloc(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_async_stats_update(WT_SESSION_IMPL *session); +extern void __wt_backup_destroy(WT_SESSION_IMPL *session); extern void __wt_block_ckpt_destroy(WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci); extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on); extern void __wt_block_ext_free(WT_SESSION_IMPL *session, WT_EXT *ext); @@ -1964,8 +1967,6 @@ static inline int __wt_txn_context_check(WT_SESSION_IMPL *session, bool requires WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline int __wt_txn_context_prepare_check(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_err_chk(WT_SESSION_IMPL *session) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline int __wt_txn_id_check(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline int __wt_txn_idle_cache_check(WT_SESSION_IMPL *session) @@ -2135,7 +2136,9 @@ static inline void __wt_rec_incr( WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size); static inline void __wt_ref_addr_free(WT_SESSION_IMPL *session, WT_REF *ref); static inline void __wt_ref_info( - WT_SESSION_IMPL *session, WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep); + WT_SESSION_IMPL *session, WT_REF *ref, const uint8_t **addrp, size_t *sizep, bool *is_leafp); +static inline void __wt_ref_info_lock( + WT_SESSION_IMPL *session, WT_REF *ref, uint8_t *addr_buf, size_t *sizep, bool *is_leafp); static inline void __wt_ref_key(WT_PAGE *page, WT_REF *ref, void *keyp, size_t *sizep); static inline void __wt_ref_key_clear(WT_REF *ref); static inline void __wt_ref_key_onpage_set(WT_PAGE *page, WT_REF *ref, WT_CELL_UNPACK *unpack); @@ -2156,6 +2159,7 @@ static inline void __wt_struct_size_adjust(WT_SESSION_IMPL *session, size_t *siz static inline void __wt_timing_stress(WT_SESSION_IMPL *session, u_int flag); static inline void __wt_tree_modify_set(WT_SESSION_IMPL *session); static inline void __wt_txn_cursor_op(WT_SESSION_IMPL *session); +static inline void __wt_txn_err_set(WT_SESSION_IMPL *session, int ret); static inline void __wt_txn_op_apply_prepare_state( WT_SESSION_IMPL *session, WT_REF *ref, bool commit); static inline void __wt_txn_op_delete_commit_apply_timestamps( diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h index 9845dbd7f7d..b29d9665069 100644 --- a/src/third_party/wiredtiger/src/include/meta.h +++ b/src/third_party/wiredtiger/src/include/meta.h @@ -17,11 +17,10 @@ /* * Backup related WiredTiger files. */ -#define WT_BACKUP_TMP "WiredTiger.backup.tmp" /* Backup tmp file */ -#define WT_BLKINCR_BACKUP "WiredTiger.backup.block" /* Block incremental durable file */ -#define WT_METADATA_BACKUP "WiredTiger.backup" /* Hot backup file */ -#define WT_LOGINCR_BACKUP "WiredTiger.ibackup" /* Log incremental backup */ -#define WT_LOGINCR_SRC "WiredTiger.isrc" /* Log incremental source */ +#define WT_BACKUP_TMP "WiredTiger.backup.tmp" /* Backup tmp file */ +#define WT_METADATA_BACKUP "WiredTiger.backup" /* Hot backup file */ +#define WT_LOGINCR_BACKUP "WiredTiger.ibackup" /* Log incremental backup */ +#define WT_LOGINCR_SRC "WiredTiger.isrc" /* Log incremental source */ #define WT_METADATA_TURTLE "WiredTiger.turtle" /* Metadata metadata */ #define WT_METADATA_TURTLE_SET "WiredTiger.turtle.set" /* Turtle temp file */ @@ -58,6 +57,42 @@ } while (0) /* + * Block based incremental backup structure. These live in the connection. + */ +#define WT_BLKINCR_MAX 2 +struct __wt_blkincr { + const char *id_str; /* User's name for this backup. */ + uint64_t granularity; /* Granularity of this backup. */ +/* AUTOMATIC FLAG VALUE GENERATION START */ +#define WT_BLKINCR_FULL 0x1u /* There is no checkpoint, always do full file */ +#define WT_BLKINCR_INUSE 0x2u /* This entry is active */ +#define WT_BLKINCR_VALID 0x4u /* This entry is valid */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint64_t flags; +}; + +/* + * Block modifications from an incremental identifier going forward. + */ +/* + * At the default granularity, this is enough for blocks in a 2G file. + */ +#define WT_BLOCK_MODS_LIST_MIN 16 /* Initial bytes for bitmap. */ +struct __wt_block_mods { + const char *id_str; + + WT_ITEM bitstring; + uint64_t nbits; /* Number of bits in bitstring */ + + uint64_t offset; /* Zero bit offset for bitstring */ + uint64_t granularity; +/* AUTOMATIC FLAG VALUE GENERATION START */ +#define WT_BLOCK_MODS_VALID 0x1u /* Entry is valid */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; +}; + +/* * WT_CKPT -- * Encapsulation of checkpoint information, shared by the metadata, the * btree engine, and the block manager. @@ -88,6 +123,8 @@ struct __wt_ckpt { char *block_metadata; /* Block-stored metadata */ char *block_checkpoint; /* Block-stored checkpoint */ + WT_BLOCK_MODS backup_blocks[WT_BLKINCR_MAX]; + /* Validity window */ wt_timestamp_t newest_durable_ts; wt_timestamp_t oldest_start_ts; @@ -95,9 +132,6 @@ struct __wt_ckpt { wt_timestamp_t newest_stop_ts; uint64_t newest_stop_txn; - uint64_t *alloc_list; /* Checkpoint allocation list */ - uint64_t alloc_list_entries; - WT_ITEM addr; /* Checkpoint cookie string */ WT_ITEM raw; /* Checkpoint cookie raw */ diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index f4b82b8f5e9..03d3ff72f8f 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -166,34 +166,35 @@ struct __wt_session_impl { u_int stat_bucket; /* Statistics bucket offset */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_SESSION_BACKUP_CURSOR 0x0000001u -#define WT_SESSION_BACKUP_DUP 0x0000002u -#define WT_SESSION_CACHE_CURSORS 0x0000004u -#define WT_SESSION_CAN_WAIT 0x0000008u -#define WT_SESSION_IGNORE_CACHE_SIZE 0x0000010u -#define WT_SESSION_INTERNAL 0x0000020u -#define WT_SESSION_LOCKED_CHECKPOINT 0x0000040u -#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x0000080u -#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x0000100u -#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x0000200u -#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x0000400u -#define WT_SESSION_LOCKED_METADATA 0x0000800u -#define WT_SESSION_LOCKED_PASS 0x0001000u -#define WT_SESSION_LOCKED_SCHEMA 0x0002000u -#define WT_SESSION_LOCKED_SLOT 0x0004000u -#define WT_SESSION_LOCKED_TABLE_READ 0x0008000u -#define WT_SESSION_LOCKED_TABLE_WRITE 0x0010000u -#define WT_SESSION_LOCKED_TURTLE 0x0020000u -#define WT_SESSION_LOGGING_INMEM 0x0040000u -#define WT_SESSION_LOOKASIDE_CURSOR 0x0080000u -#define WT_SESSION_NO_DATA_HANDLES 0x0100000u -#define WT_SESSION_NO_LOGGING 0x0200000u -#define WT_SESSION_NO_RECONCILE 0x0400000u -#define WT_SESSION_NO_SCHEMA_LOCK 0x0800000u -#define WT_SESSION_QUIET_CORRUPT_FILE 0x1000000u -#define WT_SESSION_READ_WONT_NEED 0x2000000u -#define WT_SESSION_SCHEMA_TXN 0x4000000u -#define WT_SESSION_SERVER_ASYNC 0x8000000u +#define WT_SESSION_BACKUP_CURSOR 0x00000001u +#define WT_SESSION_BACKUP_DUP 0x00000002u +#define WT_SESSION_CACHE_CURSORS 0x00000004u +#define WT_SESSION_CAN_WAIT 0x00000008u +#define WT_SESSION_IGNORE_CACHE_SIZE 0x00000010u +#define WT_SESSION_INTERNAL 0x00000020u +#define WT_SESSION_LOCKED_CHECKPOINT 0x00000040u +#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000080u +#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000100u +#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x00000200u +#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x00000400u +#define WT_SESSION_LOCKED_METADATA 0x00000800u +#define WT_SESSION_LOCKED_PASS 0x00001000u +#define WT_SESSION_LOCKED_SCHEMA 0x00002000u +#define WT_SESSION_LOCKED_SLOT 0x00004000u +#define WT_SESSION_LOCKED_TABLE_READ 0x00008000u +#define WT_SESSION_LOCKED_TABLE_WRITE 0x00010000u +#define WT_SESSION_LOCKED_TURTLE 0x00020000u +#define WT_SESSION_LOGGING_INMEM 0x00040000u +#define WT_SESSION_LOOKASIDE_CURSOR 0x00080000u +#define WT_SESSION_NO_DATA_HANDLES 0x00100000u +#define WT_SESSION_NO_LOGGING 0x00200000u +#define WT_SESSION_NO_RECONCILE 0x00400000u +#define WT_SESSION_NO_SCHEMA_LOCK 0x00800000u +#define WT_SESSION_QUIET_CORRUPT_FILE 0x01000000u +#define WT_SESSION_READ_WONT_NEED 0x02000000u +#define WT_SESSION_RESOLVING_TXN 0x04000000u +#define WT_SESSION_SCHEMA_TXN 0x08000000u +#define WT_SESSION_SERVER_ASYNC 0x10000000u /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint32_t flags; diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 332f3a3735c..7f29b10e23b 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -61,21 +61,34 @@ __wt_txn_context_check(WT_SESSION_IMPL *session, bool requires_txn) } /* - * __wt_txn_err_chk -- - * Check the transaction hasn't already failed. + * __wt_txn_err_set -- + * Set an error in the current transaction. */ -static inline int -__wt_txn_err_chk(WT_SESSION_IMPL *session) +static inline void +__wt_txn_err_set(WT_SESSION_IMPL *session, int ret) { - /* Allow transaction rollback, but nothing else. */ - if (!F_ISSET(&(session->txn), WT_TXN_ERROR) || - strcmp(session->name, "rollback_transaction") != 0) - return (0); + WT_TXN *txn; -#ifdef HAVE_DIAGNOSTIC - WT_ASSERT(session, !F_ISSET(&(session->txn), WT_TXN_ERROR)); -#endif - WT_RET_MSG(session, EINVAL, "additional transaction operations attempted after error"); + txn = &session->txn; + + /* Ignore standard errors that don't fail the transaction. */ + if (ret == WT_NOTFOUND || ret == WT_DUPLICATE_KEY || ret == WT_PREPARE_CONFLICT) + return; + + /* Less commonly, it's not a running transaction. */ + if (!F_ISSET(txn, WT_TXN_RUNNING)) + return; + + /* The transaction has to be rolled back. */ + F_SET(txn, WT_TXN_ERROR); + + /* + * Check for a prepared transaction, and quit: we can't ignore the error and we can't roll back + * a prepared transaction. + */ + if (F_ISSET(txn, WT_TXN_PREPARE)) + WT_PANIC_MSG(session, ret, + "transactional error logged after transaction was prepared, failing the system"); } /* @@ -750,12 +763,17 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp) { static WT_UPDATE tombstone = {.txnid = WT_TXN_NONE, .type = WT_UPDATE_TOMBSTONE}; WT_VISIBLE_TYPE upd_visible; + uint8_t type; bool skipped_birthmark; *updp = NULL; + + type = WT_UPDATE_INVALID; /* [-Wconditional-uninitialized] */ for (skipped_birthmark = false; upd != NULL; upd = upd->next) { + WT_ORDERED_READ(type, upd->type); + /* Skip reserved place-holders, they're never visible. */ - if (upd->type != WT_UPDATE_RESERVE) { + if (type != WT_UPDATE_RESERVE) { upd_visible = __wt_txn_upd_visible_type(session, upd); if (upd_visible == WT_VISIBLE_TRUE) break; @@ -763,14 +781,16 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp) return (WT_PREPARE_CONFLICT); } /* An invisible birthmark is equivalent to a tombstone. */ - if (upd->type == WT_UPDATE_BIRTHMARK) + if (type == WT_UPDATE_BIRTHMARK) skipped_birthmark = true; } - if (upd == NULL && skipped_birthmark) + if (upd == NULL && skipped_birthmark) { upd = &tombstone; + type = upd->type; + } - *updp = upd == NULL || upd->type == WT_UPDATE_BIRTHMARK ? NULL : upd; + *updp = upd == NULL || type == WT_UPDATE_BIRTHMARK ? NULL : upd; return (0); } diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h index 1cd8753a0ac..7ae570d3e59 100644 --- a/src/third_party/wiredtiger/src/include/wt_internal.h +++ b/src/third_party/wiredtiger/src/include/wt_internal.h @@ -87,6 +87,8 @@ struct __wt_block_desc; typedef struct __wt_block_desc WT_BLOCK_DESC; struct __wt_block_header; typedef struct __wt_block_header WT_BLOCK_HEADER; +struct __wt_block_mods; +typedef struct __wt_block_mods WT_BLOCK_MODS; struct __wt_bloom; typedef struct __wt_bloom WT_BLOOM; struct __wt_bloom_hash; @@ -390,7 +392,7 @@ typedef uint64_t wt_timestamp_t; #include "error.h" #include "log.h" #include "lsm.h" -#include "meta.h" +#include "meta.h" /* required by block.h */ #include "optrack.h" #include "os.h" #include "reconcile.h" diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c index 93a01fa6abb..34c8b643c08 100644 --- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c +++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c @@ -11,11 +11,79 @@ static int __ckpt_last(WT_SESSION_IMPL *, const char *, WT_CKPT *); static int __ckpt_last_name(WT_SESSION_IMPL *, const char *, const char **); static int __ckpt_load(WT_SESSION_IMPL *, WT_CONFIG_ITEM *, WT_CONFIG_ITEM *, WT_CKPT *); +static int __ckpt_load_blk_mods(WT_SESSION_IMPL *, const char *, WT_CKPT *); static int __ckpt_named(WT_SESSION_IMPL *, const char *, const char *, WT_CKPT *); static int __ckpt_set(WT_SESSION_IMPL *, const char *, const char *, bool); static int __ckpt_version_chk(WT_SESSION_IMPL *, const char *, const char *); /* + * __ckpt_load_blk_mods -- + * Load the block information from the config string. + */ +static int +__ckpt_load_blk_mods(WT_SESSION_IMPL *session, const char *config, WT_CKPT *ckpt) +{ + WT_BLKINCR *blkincr; + WT_BLOCK_MODS *blk_mod; + WT_CONFIG blkconf; + WT_CONFIG_ITEM b, k, v; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + uint64_t i; + + conn = S2C(session); + if (config == NULL) + return (0); + /* + * We could be reading in a configuration from an earlier release. If the string doesn't exist + * then we're done. + */ + if ((ret = __wt_config_getones(session, config, "checkpoint_backup_info", &v)) != 0) + return (ret == WT_NOTFOUND ? 0 : ret); + __wt_config_subinit(session, &blkconf, &v); + /* + * Load block lists. Ignore any that have an id string that is not known. + * + * Remove those not known (TODO). + */ + blkincr = NULL; + while ((ret = __wt_config_next(&blkconf, &k, &v)) == 0) { + /* + * See if this is a valid backup string. + */ + for (i = 0; i < WT_BLKINCR_MAX; ++i) { + blkincr = &conn->incr_backups[i]; + if (blkincr->id_str != NULL && WT_STRING_MATCH(blkincr->id_str, k.str, k.len)) + break; + } + if (i == WT_BLKINCR_MAX) + /* + * This is the place to note that we want to remove an unknown id. + */ + continue; + + /* + * We have a valid entry. Load the block information. + */ + blk_mod = &ckpt->backup_blocks[i]; + WT_RET(__wt_strdup(session, blkincr->id_str, &blk_mod->id_str)); + WT_RET(__wt_config_subgets(session, &v, "granularity", &b)); + blk_mod->granularity = (uint64_t)b.val; + WT_RET(__wt_config_subgets(session, &v, "nbits", &b)); + blk_mod->nbits = (uint64_t)b.val; + WT_RET(__wt_config_subgets(session, &v, "offset", &b)); + blk_mod->offset = (uint64_t)b.val; + ret = __wt_config_subgets(session, &v, "blocks", &b); + WT_RET_NOTFOUND_OK(ret); + if (ret != WT_NOTFOUND) { + WT_RET(__wt_backup_load_incr(session, &b, &blk_mod->bitstring, blk_mod->nbits)); + F_SET(blk_mod, WT_BLOCK_MODS_VALID); + } + } + return (ret == WT_NOTFOUND ? 0 : ret); +} + +/* * __wt_meta_checkpoint -- * Return a file's checkpoint information. */ @@ -118,7 +186,7 @@ __ckpt_set(WT_SESSION_IMPL *session, const char *fname, const char *v, bool use_ * use the slower path through configuration parsing functions. */ config = newcfg = NULL; - str = v == NULL ? "checkpoint=(),checkpoint_lsn=" : v; + str = v == NULL ? "checkpoint=(),checkpoint_backup_info=(),checkpoint_lsn=" : v; if (use_base && session->dhandle != NULL) { WT_ERR(__wt_scr_alloc(session, 0, &tmp)); WT_ASSERT(session, strcmp(session->dhandle->name, fname) == 0); @@ -315,6 +383,75 @@ __ckpt_compare_order(const void *a, const void *b) } /* + * __ckpt_valid_blk_mods -- + * Make sure that this set of block mods reflects the current valid backup identifiers. If so, + * there is nothing to do. If not, free up old information and set it up for the current + * information. + */ +static int +__ckpt_valid_blk_mods(WT_SESSION_IMPL *session, WT_CKPT *ckpt) +{ + WT_BLKINCR *blk; + WT_BLOCK_MODS *blk_mod; + uint64_t i; + bool free, setup; + + WT_ASSERT(session, F_ISSET(ckpt, WT_CKPT_ADD)); + for (i = 0; i < WT_BLKINCR_MAX; ++i) { + blk = &S2C(session)->incr_backups[i]; + blk_mod = &ckpt->backup_blocks[i]; + + /* + * Check the state of our block list array compared to the global one. There are + * several possibilities: + * - There is no global information for this index, nothing to do but free our resources. + * - We don't have any backup information locally. Set up our entry. + * - Our entry's id string matches the current global information. We just want to add our + * information to the existing list. + * - Our entry's id string does not match the current one. It is outdated. Free old + * resources + * and then set up our entry. + */ + + /* Check if the global entry is valid at our index. */ + if (!F_ISSET(blk, WT_BLKINCR_VALID)) { + free = true; + setup = false; + } else if (F_ISSET(blk_mod, WT_BLOCK_MODS_VALID) && + WT_STRING_MATCH(blk_mod->id_str, blk->id_str, strlen(blk->id_str))) { + /* We match, keep our entry and don't set up. */ + setup = false; + free = false; + } else { + /* We don't match, free any old information. */ + free = true; + setup = true; + } + + /* Free any old information if we need to do so. */ + if (free && F_ISSET(blk_mod, WT_BLOCK_MODS_VALID)) { + __wt_free(session, blk_mod->id_str); + __wt_buf_free(session, &blk_mod->bitstring); + blk_mod->nbits = 0; + blk_mod->granularity = 0; + blk_mod->offset = 0; + F_CLR(blk_mod, WT_BLOCK_MODS_VALID); + } + + /* Set up the block list to point to the current information. */ + if (setup) { + WT_RET(__wt_strdup(session, blk->id_str, &blk_mod->id_str)); + WT_CLEAR(blk_mod->bitstring); + blk_mod->granularity = S2C(session)->incr_granularity; + blk_mod->nbits = 0; + blk_mod->offset = 0; + F_SET(blk_mod, WT_BLOCK_MODS_VALID); + } + } + return (0); +} + +/* * __wt_meta_ckptlist_get -- * Load all available checkpoint information for a file. */ @@ -378,10 +515,22 @@ __wt_meta_ckptlist_get( maxorder = ckpt->order; ckpt->order = maxorder + 1; __wt_seconds(session, &ckpt->sec); + /* + * Load most recent checkpoint backup blocks to this checkpoint. + */ + WT_ERR(__ckpt_load_blk_mods(session, config, ckpt)); WT_ERR(__wt_meta_block_metadata(session, config, ckpt)); + /* + * Set the add-a-checkpoint flag, and if we're doing incremental backups, request a list of + * the checkpoint's modified blocks from the block manager. + */ F_SET(ckpt, WT_CKPT_ADD); + if (F_ISSET(S2C(session), WT_CONN_INCR_BACKUP)) { + F_SET(ckpt, WT_CKPT_BLOCK_MODS); + WT_ERR(__ckpt_valid_blk_mods(session, ckpt)); + } } /* Return the array to our caller. */ @@ -578,6 +727,50 @@ __wt_meta_ckptlist_to_meta(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM } /* + * __ckpt_blkmod_to_meta -- + * Add in any modification block string needed, including an empty one. + */ +static int +__ckpt_blkmod_to_meta(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_CKPT *ckpt) +{ + WT_BLOCK_MODS *blk; + WT_ITEM bitstring; + u_int i; + bool valid; + + WT_CLEAR(bitstring); + valid = false; + for (i = 0, blk = &ckpt->backup_blocks[0]; i < WT_BLKINCR_MAX; ++i, ++blk) + if (F_ISSET(blk, WT_BLOCK_MODS_VALID)) + valid = true; + + /* + * If the existing block modifications are not valid, there is nothing to do. + */ + if (!valid) { + WT_RET(__wt_buf_catfmt(session, buf, ",checkpoint_backup_info=")); + return (0); + } + + /* + * We have at least one valid modified block list. + */ + WT_RET(__wt_buf_catfmt(session, buf, ",checkpoint_backup_info=(")); + for (i = 0, blk = &ckpt->backup_blocks[0]; i < WT_BLKINCR_MAX; ++i, ++blk) { + if (!F_ISSET(blk, WT_BLOCK_MODS_VALID)) + continue; + WT_RET(__wt_raw_to_hex(session, blk->bitstring.data, blk->bitstring.size, &bitstring)); + WT_RET(__wt_buf_catfmt(session, buf, "%s%s=(id=%" PRIu32 ",granularity=%" PRIu64 + ",nbits=%" PRIu64 ",offset=%" PRIu64 ",blocks=%.*s)", + i == 0 ? "" : ",", blk->id_str, i, blk->granularity, blk->nbits, blk->offset, + (int)bitstring.size, (char *)bitstring.data)); + __wt_buf_free(session, &bitstring); + } + WT_RET(__wt_buf_catfmt(session, buf, ")")); + return (0); +} + +/* * __wt_meta_ckptlist_set -- * Set a file's checkpoint value from the WT_CKPT list. */ @@ -593,6 +786,10 @@ __wt_meta_ckptlist_set( WT_RET(__wt_scr_alloc(session, 1024, &buf)); WT_ERR(__wt_meta_ckptlist_to_meta(session, ckptbase, buf)); + /* Add backup block modifications for any added checkpoint. */ + WT_CKPT_FOREACH (ckptbase, ckpt) + if (F_ISSET(ckpt, WT_CKPT_ADD)) + WT_ERR(__ckpt_blkmod_to_meta(session, buf, ckpt)); has_lsn = ckptlsn != NULL; if (ckptlsn != NULL) @@ -634,6 +831,9 @@ __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep) void __wt_meta_checkpoint_free(WT_SESSION_IMPL *session, WT_CKPT *ckpt) { + WT_BLOCK_MODS *blk_mod; + uint64_t i; + if (ckpt == NULL) return; @@ -643,6 +843,12 @@ __wt_meta_checkpoint_free(WT_SESSION_IMPL *session, WT_CKPT *ckpt) __wt_buf_free(session, &ckpt->addr); __wt_buf_free(session, &ckpt->raw); __wt_free(session, ckpt->bpriv); + for (i = 0; i < WT_BLKINCR_MAX; ++i) { + blk_mod = &ckpt->backup_blocks[i]; + __wt_buf_free(session, &blk_mod->bitstring); + __wt_free(session, blk_mod->id_str); + F_CLR(blk_mod, WT_BLOCK_MODS_VALID); + } WT_CLEAR(*ckpt); /* Clear to prepare for re-use. */ } diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c index 044094133ce..a7b2e740caf 100644 --- a/src/third_party/wiredtiger/src/meta/meta_turtle.c +++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c @@ -185,7 +185,7 @@ __wt_turtle_init(WT_SESSION_IMPL *session) { WT_DECL_RET; char *metaconf, *unused_value; - bool exist_backup, exist_bincr, exist_incr, exist_isrc, exist_turtle; + bool exist_backup, exist_incr, exist_isrc, exist_turtle; bool load, loadTurtle; load = loadTurtle = false; @@ -212,17 +212,6 @@ __wt_turtle_init(WT_SESSION_IMPL *session) WT_RET(__wt_fs_exist(session, WT_LOGINCR_SRC, &exist_isrc)); WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist_backup)); WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist_turtle)); - /* - * Block incremental is different. If it exists, then we have block incremental information we - * need to keep. Mark the connection as having block-based incremental backup turned on. XXX - - * Need to call something to read it in and set this up. Maybe here, maybe not. - */ - WT_RET(__wt_fs_exist(session, WT_BLKINCR_BACKUP, &exist_bincr)); - if (exist_bincr) { - F_SET(S2C(session), WT_CONN_INCR_BACKUP); - /* Load content into some structure. Not sure this is the right place. It may be too early. - */ - } if (exist_turtle) { /* diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index d956c9692d9..34dcb676c7c 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -365,22 +365,17 @@ __session_reconfigure(WT_SESSION *wt_session, const char *config) WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; - /* - * Indicated as allowed in prepared state, even though not allowed, so that running transaction - * check below take precedence. - */ - SESSION_API_CALL_PREPARE_ALLOWED(session, reconfigure, config, cfg); - - /* - * Note that this method only checks keys that are passed in by the application: we don't want - * to reset other session settings to their default values. - */ + SESSION_API_CALL_PREPARE_NOT_ALLOWED(session, reconfigure, config, cfg); WT_UNUSED(cfg); WT_ERR(__wt_txn_context_check(session, false)); WT_ERR(__wt_session_reset_cursors(session, false)); + /* + * Note that this method only checks keys that are passed in by the application: we don't want + * to reset other session settings to their default values. + */ WT_ERR(__wt_txn_reconfigure(session, config)); ret = __wt_config_getones(session, config, "ignore_cache_size", &cval); @@ -820,7 +815,7 @@ __session_log_printf(WT_SESSION *wt_session, const char *fmt, ...) va_list ap; session = (WT_SESSION_IMPL *)wt_session; - SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(session, log_printf); + SESSION_API_CALL_PREPARE_NOT_ALLOWED_NOCONF(session, log_printf); va_start(ap, fmt); ret = __wt_log_vprintf(session, fmt, ap); @@ -967,8 +962,7 @@ __session_reset(WT_SESSION *wt_session) WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; - - SESSION_API_CALL_NOCONF(session, reset); + SESSION_API_CALL_PREPARE_NOT_ALLOWED_NOCONF(session, reset); WT_ERR(__wt_txn_context_check(session, false)); @@ -1084,7 +1078,7 @@ __session_import(WT_SESSION *wt_session, const char *uri, const char *config) value = NULL; session = (WT_SESSION_IMPL *)wt_session; - SESSION_API_CALL_NOCONF(session, import); + SESSION_API_CALL_PREPARE_NOT_ALLOWED_NOCONF(session, import); WT_ERR(__wt_inmem_unsupported_op(session, NULL)); @@ -1605,11 +1599,7 @@ __session_begin_transaction(WT_SESSION *wt_session, const char *config) WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; - /* - * Indicated as allowed in prepared state, even though not allowed, so that running transaction - * check below take precedence. - */ - SESSION_API_CALL_PREPARE_ALLOWED(session, begin_transaction, config, cfg); + SESSION_API_CALL_PREPARE_NOT_ALLOWED(session, begin_transaction, config, cfg); WT_STAT_CONN_INCR(session, txn_begin); WT_ERR(__wt_txn_context_check(session, false)); @@ -1632,10 +1622,10 @@ __session_commit_transaction(WT_SESSION *wt_session, const char *config) WT_TXN *txn; session = (WT_SESSION_IMPL *)wt_session; + txn = &session->txn; SESSION_API_CALL_PREPARE_ALLOWED(session, commit_transaction, config, cfg); WT_STAT_CONN_INCR(session, txn_commit); - txn = &session->txn; if (F_ISSET(txn, WT_TXN_PREPARE)) { WT_STAT_CONN_INCR(session, txn_prepare_commit); WT_STAT_CONN_DECR(session, txn_prepare_active); @@ -1645,20 +1635,34 @@ __session_commit_transaction(WT_SESSION *wt_session, const char *config) /* Permit the commit if the transaction failed, but was read-only. */ if (F_ISSET(txn, WT_TXN_ERROR) && txn->mod_count != 0) { - __wt_err(session, EINVAL, "failed transaction requires rollback%s%s", - txn->rollback_reason == NULL ? "" : ": ", + __wt_err(session, EINVAL, + "failed %s" + "transaction requires rollback%s%s", + F_ISSET(txn, WT_TXN_PREPARE) ? "prepared " : "", txn->rollback_reason == NULL ? "" : ": ", txn->rollback_reason == NULL ? "" : txn->rollback_reason); ret = EINVAL; } - if (ret == 0) +err: + /* + * We might have failed because an illegal configuration was specified or because there wasn't a + * transaction running, and we check the former as part of the api macros before we check the + * latter. Deal with it here: if there's an error and a transaction is running, roll it back. + */ + if (ret == 0) { + F_SET(session, WT_SESSION_RESOLVING_TXN); ret = __wt_txn_commit(session, cfg); - else { + F_CLR(session, WT_SESSION_RESOLVING_TXN); + } else if (F_ISSET(txn, WT_TXN_RUNNING)) { + if (F_ISSET(txn, WT_TXN_PREPARE)) + WT_PANIC_RET(session, ret, "failed to commit prepared transaction, failing the system"); + WT_TRET(__wt_session_reset_cursors(session, false)); + F_SET(session, WT_SESSION_RESOLVING_TXN); WT_TRET(__wt_txn_rollback(session, cfg)); + F_CLR(session, WT_SESSION_RESOLVING_TXN); } -err: API_END_RET(session, ret); } @@ -1679,7 +1683,9 @@ __session_prepare_transaction(WT_SESSION *wt_session, const char *config) WT_ERR(__wt_txn_context_check(session, true)); + F_SET(session, WT_SESSION_RESOLVING_TXN); WT_ERR(__wt_txn_prepare(session, cfg)); + F_CLR(session, WT_SESSION_RESOLVING_TXN); err: API_END_RET(session, ret); @@ -1730,7 +1736,9 @@ __session_rollback_transaction(WT_SESSION *wt_session, const char *config) WT_TRET(__wt_session_reset_cursors(session, false)); + F_SET(session, WT_SESSION_RESOLVING_TXN); WT_TRET(__wt_txn_rollback(session, cfg)); + F_CLR(session, WT_SESSION_RESOLVING_TXN); err: API_END_RET(session, ret); @@ -1753,7 +1761,8 @@ __session_timestamp_transaction(WT_SESSION *wt_session, const char *config) SESSION_API_CALL_PREPARE_ALLOWED(session, timestamp_transaction, NULL, cfg); cfg[1] = config; #endif - WT_TRET(__wt_txn_set_timestamp(session, cfg)); + + ret = __wt_txn_set_timestamp(session, cfg); err: API_END_RET(session, ret); } @@ -1770,7 +1779,8 @@ __session_query_timestamp(WT_SESSION *wt_session, char *hex_timestamp, const cha session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL_PREPARE_ALLOWED(session, query_timestamp, config, cfg); - WT_TRET(__wt_txn_query_timestamp(session, hex_timestamp, cfg, false)); + + ret = __wt_txn_query_timestamp(session, hex_timestamp, cfg, false); err: API_END_RET(session, ret); } @@ -1788,7 +1798,7 @@ __session_transaction_pinned_range(WT_SESSION *wt_session, uint64_t *prange) uint64_t pinned; session = (WT_SESSION_IMPL *)wt_session; - SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(session, pinned_range); + SESSION_API_CALL_PREPARE_NOT_ALLOWED_NOCONF(session, transaction_pinned_range); txn_state = WT_SESSION_TXN_STATE(session); @@ -1837,11 +1847,7 @@ __session_transaction_sync(WT_SESSION *wt_session, const char *config) uint64_t time_start, time_stop; session = (WT_SESSION_IMPL *)wt_session; - /* - * Indicated as allowed in prepared state, even though not allowed, so that running transaction - * check below take precedence. - */ - SESSION_API_CALL_PREPARE_ALLOWED(session, transaction_sync, config, cfg); + SESSION_API_CALL_PREPARE_NOT_ALLOWED(session, transaction_sync, config, cfg); WT_STAT_CONN_INCR(session, txn_sync); conn = S2C(session); @@ -1930,13 +1936,8 @@ __session_checkpoint(WT_SESSION *wt_session, const char *config) WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; - WT_STAT_CONN_INCR(session, txn_checkpoint); - /* - * Indicated as allowed in prepared state, even though not allowed, so that running transaction - * check below take precedence. - */ - SESSION_API_CALL_PREPARE_ALLOWED(session, checkpoint, config, cfg); + SESSION_API_CALL_PREPARE_NOT_ALLOWED(session, checkpoint, config, cfg); WT_ERR(__wt_inmem_unsupported_op(session, NULL)); diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index 8dc0a44fe50..5d98ce93152 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -623,26 +623,37 @@ __wt_txn_release(WT_SESSION_IMPL *session) } /* - * __txn_resolve_prepared_op -- - * Resolve a transaction's operations indirect references. In case of prepared transactions, the - * prepared updates could be evicted using cache overflow mechanism. Transaction operations - * referring to these prepared updates would be referring to them using indirect references (i.e - * keys/recnos), which need to be resolved as part of that transaction commit/rollback. If no - * updates are resolved throw an error. Increment resolved update count for each resolved update - * count we locate. + * __txn_search_prepared_op -- + * Search for an operation's prepared update. */ static int -__txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit) +__txn_search_prepared_op( + WT_SESSION_IMPL *session, WT_TXN_OP *op, WT_CURSOR **cursorp, WT_UPDATE **updp) { WT_CURSOR *cursor; WT_DECL_RET; WT_TXN *txn; - WT_UPDATE *upd; + uint32_t txn_flags; const char *open_cursor_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL}; + *updp = NULL; + txn = &session->txn; - WT_RET(__wt_open_cursor(session, op->btree->dhandle->name, NULL, open_cursor_cfg, &cursor)); + cursor = *cursorp; + if (cursor == NULL || ((WT_CURSOR_BTREE *)cursor)->btree->id != op->btree->id) { + *cursorp = NULL; + if (cursor != NULL) + WT_RET(cursor->close(cursor)); + WT_RET(__wt_open_cursor(session, op->btree->dhandle->name, NULL, open_cursor_cfg, &cursor)); + *cursorp = cursor; + } + + /* + * Transaction error and prepare are cleared temporarily as cursor functions are not allowed + * after an error or a prepared transaction. + */ + txn_flags = FLD_MASK(txn->flags, WT_TXN_ERROR | WT_TXN_PREPARE); switch (op->type) { case WT_TXN_OP_BASIC_COL: @@ -651,30 +662,42 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit) break; case WT_TXN_OP_BASIC_ROW: case WT_TXN_OP_INMEM_ROW: - /* - * Transaction prepare is cleared temporarily as cursor functions are not allowed for - * prepared transactions. - */ - F_CLR(txn, WT_TXN_PREPARE); + F_CLR(txn, txn_flags); __wt_cursor_set_raw_key(cursor, &op->u.op_row.key); - F_SET(txn, WT_TXN_PREPARE); + F_SET(txn, txn_flags); break; case WT_TXN_OP_NONE: case WT_TXN_OP_REF_DELETE: case WT_TXN_OP_TRUNCATE_COL: case WT_TXN_OP_TRUNCATE_ROW: - WT_ERR_ASSERT(session, false, WT_PANIC, "invalid prepared operation update type"); + WT_RET_ASSERT(session, false, WT_PANIC, "invalid prepared operation update type"); break; } - WT_WITH_BTREE( - session, op->btree, ret = __wt_btcur_search_uncommitted((WT_CURSOR_BTREE *)cursor, &upd)); - WT_ERR(ret); - - /* If we haven't found anything then there's an error. */ - WT_ERR_ASSERT(session, upd != NULL, WT_NOTFOUND, + F_CLR(txn, txn_flags); + WT_WITH_BTREE(session, op->btree, ret = __wt_btcur_search_uncommitted(cursor, updp)); + F_SET(txn, txn_flags); + WT_RET(ret); + WT_RET_ASSERT(session, *updp != NULL, WT_NOTFOUND, "unable to locate update associated with a prepared operation"); + return (0); +} + +/* + * __txn_resolve_prepared_op -- + * Resolve a transaction's operations indirect references. + */ +static int +__txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit, WT_CURSOR **cursorp) +{ + WT_TXN *txn; + WT_UPDATE *upd; + + txn = &session->txn; + + WT_RET(__txn_search_prepared_op(session, op, cursorp, &upd)); + for (; upd != NULL; upd = upd->next) { /* * Aborted updates can exist in the update chain of our txn. Generally this will occur due @@ -718,9 +741,7 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit) __txn_resolve_prepared_update(session, upd); } -err: - WT_TRET(cursor->close(cursor)); - return (ret); + return (0); } /* @@ -737,7 +758,6 @@ __txn_commit_timestamps_assert(WT_SESSION_IMPL *session) WT_UPDATE *upd; wt_timestamp_t durable_op_timestamp, op_timestamp, prev_op_timestamp; u_int i; - const char *open_cursor_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL}; bool op_zero_ts, upd_zero_ts; txn = &session->txn; @@ -749,24 +769,18 @@ __txn_commit_timestamps_assert(WT_SESSION_IMPL *session) */ if (F_ISSET(txn, WT_TXN_TS_COMMIT_ALWAYS) && !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) && txn->mod_count != 0) - WT_RET_MSG(session, EINVAL, - "commit_timestamp required and " - "none set on this transaction"); + WT_RET_MSG(session, EINVAL, "commit_timestamp required and none set on this transaction"); if (F_ISSET(txn, WT_TXN_TS_COMMIT_NEVER) && F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) && txn->mod_count != 0) - WT_RET_MSG(session, EINVAL, - "no commit_timestamp required and " - "timestamp set on this transaction"); + WT_RET_MSG( + session, EINVAL, "no commit_timestamp required and timestamp set on this transaction"); if (F_ISSET(txn, WT_TXN_TS_DURABLE_ALWAYS) && !F_ISSET(txn, WT_TXN_HAS_TS_DURABLE) && txn->mod_count != 0) - WT_RET_MSG(session, EINVAL, - "durable_timestamp required and " - "none set on this transaction"); + WT_RET_MSG(session, EINVAL, "durable_timestamp required and none set on this transaction"); if (F_ISSET(txn, WT_TXN_TS_DURABLE_NEVER) && F_ISSET(txn, WT_TXN_HAS_TS_DURABLE) && txn->mod_count != 0) WT_RET_MSG(session, EINVAL, - "no durable_timestamp required and " - "durable timestamp set on this transaction"); + "no durable_timestamp required and durable timestamp set on this transaction"); /* * If we're not doing any key consistency checking, we're done. @@ -778,92 +792,110 @@ __txn_commit_timestamps_assert(WT_SESSION_IMPL *session) * Error on any valid update structures for the same key that are at a later timestamp or use * timestamps inconsistently. */ - for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) - if (op->type == WT_TXN_OP_BASIC_COL || op->type == WT_TXN_OP_BASIC_ROW) { - /* - * Search for prepared updates, so that they will be restored, if moved to lookaside. - */ - if (F_ISSET(txn, WT_TXN_PREPARE)) { - WT_RET(__wt_open_cursor( - session, op->btree->dhandle->name, NULL, open_cursor_cfg, &cursor)); - F_CLR(txn, WT_TXN_PREPARE); - if (op->type == WT_TXN_OP_BASIC_ROW) - __wt_cursor_set_raw_key(cursor, &op->u.op_row.key); - else - ((WT_CURSOR_BTREE *)cursor)->iface.recno = op->u.op_col.recno; - F_SET(txn, WT_TXN_PREPARE); - WT_WITH_BTREE(session, op->btree, - ret = __wt_btcur_search_uncommitted((WT_CURSOR_BTREE *)cursor, &upd)); - if (ret != 0) - WT_RET_MSG(session, EINVAL, "prepared update restore failed"); - } else - upd = op->u.op_upd; - - WT_ASSERT(session, upd != NULL); - op_timestamp = upd->start_ts; + for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) { + switch (op->type) { + case WT_TXN_OP_BASIC_COL: + case WT_TXN_OP_INMEM_COL: + case WT_TXN_OP_BASIC_ROW: + case WT_TXN_OP_INMEM_ROW: + break; + case WT_TXN_OP_NONE: + case WT_TXN_OP_REF_DELETE: + case WT_TXN_OP_TRUNCATE_COL: + case WT_TXN_OP_TRUNCATE_ROW: + continue; + } - /* - * Skip over any aborted update structures, internally created update structures or ones - * from our own transaction. - */ - while (upd != NULL && - (upd->txnid == WT_TXN_ABORTED || upd->txnid == WT_TXN_NONE || upd->txnid == txn->id)) - upd = upd->next; + /* Search for prepared updates, so that they will be restored, if moved to lookaside. */ + if (F_ISSET(txn, WT_TXN_PREPARE)) + WT_ERR(__txn_search_prepared_op(session, op, &cursor, &upd)); + else + upd = op->u.op_upd; - /* - * Check the timestamp on this update with the first valid update in the chain. They're - * in most recent order. - */ - if (upd != NULL) { - prev_op_timestamp = upd->start_ts; - durable_op_timestamp = upd->durable_ts; - } + op_timestamp = upd->start_ts; - /* - * We no longer need to access the update structure so it's safe to release our - * reference to the page. - */ - if (cursor != NULL) { - WT_ASSERT(session, F_ISSET(txn, WT_TXN_PREPARE)); - WT_RET(cursor->close(cursor)); - cursor = NULL; - } + /* + * Skip over any aborted update structures, internally created update structures or ones + * from our own transaction. + */ + while (upd != NULL && + (upd->txnid == WT_TXN_ABORTED || upd->txnid == WT_TXN_NONE || upd->txnid == txn->id)) + upd = upd->next; - if (upd == NULL) - continue; - /* - * Check for consistent per-key timestamp usage. If timestamps are or are not used - * originally then they should be used the same way always. For this transaction, - * timestamps are in use anytime the commit timestamp is set. Check timestamps are used - * in order. - */ - op_zero_ts = !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT); - upd_zero_ts = prev_op_timestamp == WT_TS_NONE; - if (op_zero_ts != upd_zero_ts) { - WT_RET(__wt_verbose_dump_update(session, upd)); - WT_RET(__wt_verbose_dump_txn_one(session, &session->txn, EINVAL, - "per-key timestamps used inconsistently, dumping relevant information")); - } - /* - * If we aren't using timestamps for this transaction then we are done checking. Don't - * check the timestamp because the one in the transaction is not cleared. - */ - if (op_zero_ts) - continue; + /* + * Check the timestamp on this update with the first valid update in the chain. They're in + * most recent order. + */ + if (upd != NULL) { + prev_op_timestamp = upd->start_ts; + durable_op_timestamp = upd->durable_ts; + } - /* - * Only if the update structure doesn't have a timestamp then use the one in the - * transaction structure. - */ - if (op_timestamp == WT_TS_NONE) - op_timestamp = txn->commit_timestamp; - if (F_ISSET(txn, WT_TXN_TS_COMMIT_KEYS) && op_timestamp < prev_op_timestamp) - WT_RET_MSG(session, EINVAL, "out of order commit timestamps"); - if (F_ISSET(txn, WT_TXN_TS_DURABLE_KEYS) && - txn->durable_timestamp < durable_op_timestamp) - WT_RET_MSG(session, EINVAL, "out of order durable timestamps"); + if (upd == NULL) + continue; + /* + * Check for consistent per-key timestamp usage. If timestamps are or are not used + * originally then they should be used the same way always. For this transaction, timestamps + * are in use anytime the commit timestamp is set. Check timestamps are used in order. + */ + op_zero_ts = !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT); + upd_zero_ts = prev_op_timestamp == WT_TS_NONE; + if (op_zero_ts != upd_zero_ts) { + WT_ERR(__wt_verbose_dump_update(session, upd)); + WT_ERR(__wt_verbose_dump_txn_one(session, &session->txn, EINVAL, + "per-key timestamps used inconsistently, dumping relevant information")); } - return (0); + /* + * If we aren't using timestamps for this transaction then we are done checking. Don't check + * the timestamp because the one in the transaction is not cleared. + */ + if (op_zero_ts) + continue; + + /* + * Only if the update structure doesn't have a timestamp then use the one in the transaction + * structure. + */ + if (op_timestamp == WT_TS_NONE) + op_timestamp = txn->commit_timestamp; + if (F_ISSET(txn, WT_TXN_TS_COMMIT_KEYS) && op_timestamp < prev_op_timestamp) + WT_ERR_MSG(session, EINVAL, "out of order commit timestamps"); + if (F_ISSET(txn, WT_TXN_TS_DURABLE_KEYS) && txn->durable_timestamp < durable_op_timestamp) + WT_ERR_MSG(session, EINVAL, "out of order durable timestamps"); + } + +err: + if (cursor != NULL) + WT_TRET(cursor->close(cursor)); + return (ret); +} + +/* + * __txn_mod_compare -- + * Qsort comparison routine for transaction modify list. + */ +static int WT_CDECL +__txn_mod_compare(const void *a, const void *b) +{ + WT_TXN_OP *aopt, *bopt; + + aopt = (WT_TXN_OP *)a; + bopt = (WT_TXN_OP *)b; + + /* If the files are different, order by ID. */ + if (aopt->btree->id != bopt->btree->id) + return (aopt->btree->id < bopt->btree->id); + + /* + * If the files are the same, order by the key. Row-store collators require WT_SESSION pointers, + * and we don't have one. Compare the keys if there's no collator, otherwise return equality. + * Column-store is always easy. + */ + if (aopt->type == WT_TXN_OP_BASIC_ROW || aopt->type == WT_TXN_OP_INMEM_ROW) + return (aopt->btree->collator == NULL ? + __wt_lex_compare(&aopt->u.op_row.key, &bopt->u.op_row.key) : + 0); + return (aopt->u.op_col.recno < bopt->u.op_col.recno); } /* @@ -875,6 +907,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; + WT_CURSOR *cursor; WT_DECL_RET; WT_TXN *txn; WT_TXN_GLOBAL *txn_global; @@ -887,6 +920,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) txn = &session->txn; conn = S2C(session); + cursor = NULL; txn_global = &conn->txn_global; locked = false; prepare = F_ISSET(txn, WT_TXN_PREPARE); @@ -923,8 +957,15 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) "durable_timestamp should not be specified for non-prepared transaction"); } - if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) - WT_ASSERT(session, txn->commit_timestamp <= txn->durable_timestamp); + WT_ASSERT(session, + !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) || txn->commit_timestamp <= txn->durable_timestamp); + + /* + * Resolving prepared updates is expensive. Sort prepared modifications so all updates for each + * page within each file are done at the same time. + */ + if (prepare) + __wt_qsort(txn->mod, txn->mod_count, sizeof(WT_TXN_OP), __txn_mod_compare); WT_ERR(__txn_commit_timestamps_assert(session)); @@ -1035,7 +1076,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) * the work will happen on a different modification in this txn. */ if (!F_ISSET(op, WT_TXN_OP_KEY_REPEATED)) - WT_ERR(__txn_resolve_prepared_op(session, op, true)); + WT_ERR(__txn_resolve_prepared_op(session, op, true, &cursor)); } break; case WT_TXN_OP_REF_DELETE: @@ -1051,6 +1092,11 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) } txn->mod_count = 0; + if (cursor != NULL) { + WT_ERR(cursor->close(cursor)); + cursor = NULL; + } + /* * If durable is set, we'll try to update the global durable timestamp with that value. If * durable isn't set, durable is implied to be the same as commit so we'll use that instead. @@ -1103,6 +1149,9 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) return (0); err: + if (cursor != NULL) + WT_TRET(cursor->close(cursor)); + /* * If anything went wrong, roll back. * @@ -1111,6 +1160,14 @@ err: */ if (locked) __wt_readunlock(session, &txn_global->visibility_rwlock); + + /* + * Check for a prepared transaction, and quit: we can't ignore the error and we can't roll back + * a prepared transaction. + */ + if (prepare) + WT_PANIC_RET(session, ret, "failed to commit prepared transaction, failing the system"); + WT_TRET(__wt_txn_rollback(session, cfg)); return (ret); } @@ -1242,6 +1299,7 @@ __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[]) int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) { + WT_CURSOR *cursor; WT_DECL_RET; WT_TXN *txn; WT_TXN_OP *op; @@ -1251,6 +1309,7 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) WT_UNUSED(cfg); + cursor = NULL; txn = &session->txn; prepare = F_ISSET(txn, WT_TXN_PREPARE); readonly = txn->mod_count == 0; @@ -1261,6 +1320,13 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) if (txn->notify != NULL) WT_TRET(txn->notify->notify(txn->notify, (WT_SESSION *)session, txn->id, 0)); + /* + * Resolving prepared updates is expensive. Sort prepared modifications so all updates for each + * page within each file are done at the same time. + */ + if (prepare) + __wt_qsort(txn->mod, txn->mod_count, sizeof(WT_TXN_OP), __txn_mod_compare); + /* Rollback and free updates. */ for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) { /* Metadata updates should never be rolled back. */ @@ -1286,7 +1352,7 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) * the work will happen on a different modification in this txn. */ if (!F_ISSET(op, WT_TXN_OP_KEY_REPEATED)) - WT_RET(__txn_resolve_prepared_op(session, op, false)); + WT_TRET(__txn_resolve_prepared_op(session, op, false, &cursor)); } break; case WT_TXN_OP_REF_DELETE: @@ -1306,6 +1372,11 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) } txn->mod_count = 0; + if (cursor != NULL) { + WT_TRET(cursor->close(cursor)); + cursor = NULL; + } + __wt_txn_release(session); /* * We're between transactions, if we need to block for eviction, it's a good time to do so. Note @@ -1313,6 +1384,7 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) */ if (!readonly) WT_IGNORE_RET(__wt_cache_eviction_check(session, false, false, NULL)); + return (ret); } diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index e960ec03d48..200f84cc8c1 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -1226,6 +1226,7 @@ __checkpoint_lock_dirty_tree_int(WT_SESSION_IMPL *session, bool is_checkpoint, b "cannot be deleted during a hot backup", ckpt->name); } + /* * Mark old checkpoints that are being deleted and figure out which trees we can skip in this * checkpoint. diff --git a/src/third_party/wiredtiger/src/utilities/util_load.c b/src/third_party/wiredtiger/src/utilities/util_load.c index 4f1d1bcb1f1..44672a66854 100644 --- a/src/third_party/wiredtiger/src/utilities/util_load.c +++ b/src/third_party/wiredtiger/src/utilities/util_load.c @@ -449,7 +449,7 @@ config_update(WT_SESSION *session, char **list) if ((ret = __wt_config_merge((WT_SESSION_IMPL *)session, cfg, "filename=,id=," - "checkpoint=,checkpoint_lsn=,version=,source=,", + "checkpoint=,checkpoint_backup_info=,checkpoint_lsn=,version=,source=,", &p)) != 0) break; diff --git a/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c b/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c index 388b079f842..5631dfb1016 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c @@ -278,8 +278,12 @@ thread_insert(void *arg) else if (ret == WT_ROLLBACK) threadargs->rollbacks++; } - if (sharedopts->remove) - testutil_check(session->commit_transaction(session, NULL)); + if (sharedopts->remove) { + if (ret == WT_ROLLBACK) + testutil_check(session->rollback_transaction(session, NULL)); + else + testutil_check(session->commit_transaction(session, NULL)); + } if (i % 1000 == 0 && i != 0) { if (i % 10000 == 0) fprintf(stderr, "*"); diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml index e230bd0ad0a..e7eaaa15de5 100755 --- a/src/third_party/wiredtiger/test/evergreen.yml +++ b/src/third_party/wiredtiger/test/evergreen.yml @@ -162,7 +162,7 @@ functions: set -o errexit set -o verbose for i in $(seq ${times|1}); do - ./t -1 -c ${config|../../../test/format/CONFIG.stress} ${extra_args|} 2>&1 + ./t -1 -c ${config|../../../test/format/CONFIG.stress} ${extra_args|} || ( [ -f RUNDIR/CONFIG ] && cat RUNDIR/CONFIG ) 2>&1 done "format test script": command: shell.exec @@ -172,7 +172,8 @@ functions: set -o errexit set -o verbose for i in $(seq ${times|1}); do - ${test_env_vars|} ${format_test_setting|} ./format.sh ${smp_command|} ${format_test_script_args|} 2>&1 + ${format_test_setting|} + ${test_env_vars|} ./format.sh ${smp_command|} ${format_test_script_args|} 2>&1 done "many dbs test": command: shell.exec @@ -1637,18 +1638,20 @@ tasks: vars: make_command: make all - func: "make check all" - + + # Use format.sh to run tests in parallel (x4) for just under two hours (the + # default Evergreen timeout) on the higher spec build distros. This allows + # us to perform multiple test runs while ensuring a long-running config does + # not result in an Evergreen test timeout failure. - name: linux-directio depends_on: - name: compile commands: - func: "fetch artifacts" - func: "compile wiredtiger" - - func: "format test" + - func: "format test script" vars: - times: 3 - config: ../../../test/format/CONFIG.stress - extra_args: -C "direct_io=[data]" + format_test_script_args: -t 110 -j 4 direct_io=1 - name: format-linux-no-ftruncate depends_on: @@ -1940,6 +1943,16 @@ tasks: # run for 24 hours ( 24 * 60 = 1440 minutes), don't stop at failed tests, use default config format_test_script_args: -t 1440 + - name: format-stress-sanitizer-lsm-test + commands: + - func: "get project" + - func: "compile wiredtiger address sanitizer" + - func: "format test script" + vars: + test_env_vars: ASAN_OPTIONS="detect_leaks=1:abort_on_error=1:disable_coredump=0" ASAN_SYMBOLIZER_PATH=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer + # Run for 30 mins, and explicitly set data_source to LSM with a large cache + format_test_script_args: -t 30 data_source=lsm cache_minimum=5000 + - name: format-stress-sanitizer-smoke-test #set a 7 hours timeout exec_timeout_secs: 25200 @@ -1983,22 +1996,6 @@ tasks: # At the time of writing this script, one call to underlying scripts takes about ~15 mins to finish in worst case. # We are giving an extra ~20% room for vairance in execution time. times: 80 - - # This is special task until lz4 issues are resolved for zSeries distros - - name: recovery-stress-test-without-lz4 - #set a 25 hours timeout - exec_timeout_secs: 90000 - commands: - - func: "get project" - - func: "compile wiredtiger" - vars: - posix_configure_flags: --enable-strict --enable-diagnostic --with-builtins=snappy,zlib - - func: "recovery stress test script" - vars: - # Repeat this script enough times to make this task run for 24 hours - # At the time of writing this script, one call to underlying scripts takes about 8 mins to finish in worst case. - # We are giving an extra ~20% room for vairance in execution time. - times: 120 - name: split-stress-test commands: @@ -2140,6 +2137,7 @@ buildvariants: - name: compile-ubsan - name: ubsan-test - name: linux-directio + distros: ubuntu1804-build - name: syscall-linux - name: make-check-asan-test - name: configure-combinations @@ -2155,6 +2153,7 @@ buildvariants: - name: recovery-stress-test - name: format-stress-sanitizer-test - name: format-stress-sanitizer-smoke-test + - name: format-stress-sanitizer-lsm-test - name: split-stress-test - name: format-stress-test - name: format-stress-smoke-test @@ -2231,6 +2230,7 @@ buildvariants: - name: compile-ubsan - name: ubsan-test - name: linux-directio + distros: rhel80-build - name: syscall-linux - name: compile-asan - name: make-check-asan-test @@ -2328,6 +2328,9 @@ buildvariants: tasks: - name: compile - name: unit-test + - name: format-stress-ppc-zseries-test + - name: format-stress-smoke-test + - name: format-wtperf-test - name: ubuntu1804-zseries display_name: Ubuntu 18.04 zSeries @@ -2341,5 +2344,7 @@ buildvariants: tasks: - name: compile - name: unit-test - - name: recovery-stress-test-without-lz4 + - name: recovery-stress-test - name: split-stress-test + - name: format-stress-ppc-zseries-test + - name: format-stress-smoke-test diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c index 41e3e6d374f..82863516877 100644 --- a/src/third_party/wiredtiger/test/format/config.c +++ b/src/third_party/wiredtiger/test/format/config.c @@ -52,6 +52,12 @@ static void config_reset(void); static void config_transaction(void); /* + * We currently disable random LSM testing, that is, it can be specified explicitly but we won't + * randomly choose LSM as a data_source configuration. + */ +#define DISABLE_RANDOM_LSM_TESTING 1 + +/* * config_setup -- * Initialize configuration for a run. */ @@ -106,14 +112,15 @@ config_setup(void) config_single("data_source=file", false); break; case 2: /* 20% */ - /* - * LSM requires a row-store and backing disk. - * - * Configuring truncation or timestamps results in LSM cache problems, don't - * configure LSM if those set. - * - * XXX Remove the timestamp test when WT-4162 resolved. - */ +#if !defined(DISABLE_RANDOM_LSM_TESTING) + /* + * LSM requires a row-store and backing disk. + * + * Configuring truncation or timestamps results in LSM cache problems, don't configure + * LSM if those set. + * + * XXX Remove the timestamp test when WT-4162 resolved. + */ if (g.type != ROW || g.c_in_memory) break; if (config_is_perm("transaction_timestamps") && g.c_txn_timestamps) @@ -121,6 +128,7 @@ config_setup(void) if (config_is_perm("truncate") && g.c_truncate) break; config_single("data_source=lsm", false); +#endif break; case 3: case 4: diff --git a/src/third_party/wiredtiger/test/format/format.sh b/src/third_party/wiredtiger/test/format/format.sh index 83a56e4d84d..0efc9858e82 100755 --- a/src/third_party/wiredtiger/test/format/format.sh +++ b/src/third_party/wiredtiger/test/format/format.sh @@ -214,7 +214,9 @@ report_failure() echo "$name: job in $dir failed" echo "$name: $dir log:" - sed 's/^/ > /' < $log + sed 's/^/ /' < $log + echo "$name: $dir/CONFIG:" + sed 's/^/ /' < $dir/CONFIG } # Resolve/cleanup completed jobs. diff --git a/src/third_party/wiredtiger/test/suite/test_assert02.py b/src/third_party/wiredtiger/test/suite/test_assert02.py index 46b4f191917..d4bcaf639ac 100644 --- a/src/third_party/wiredtiger/test/suite/test_assert02.py +++ b/src/third_party/wiredtiger/test/suite/test_assert02.py @@ -68,8 +68,7 @@ class test_assert02(wttest.WiredTigerTestCase, suite_subprocess): c_never = self.session.open_cursor(uri_never) c_none = self.session.open_cursor(uri_none) self.session.begin_transaction() - self.session.timestamp_transaction( - 'commit_timestamp=' + timestamp_str(1)) + self.session.timestamp_transaction('commit_timestamp=' + timestamp_str(1)) c_always['key1'] = 'value1' c_def['key1'] = 'value1' c_never['key1'] = 'value1' @@ -103,7 +102,7 @@ class test_assert02(wttest.WiredTigerTestCase, suite_subprocess): msg = "/timestamp set on this transaction/" self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:self.assertEquals(c_never.search(), 0), msg) - self.session.commit_transaction() + self.session.rollback_transaction() c_always.close() c_def.close() c_never.close() @@ -131,7 +130,7 @@ class test_assert02(wttest.WiredTigerTestCase, suite_subprocess): msg = "/none set on this transaction/" self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:self.assertEquals(c_always.search(), 0), msg) - self.session.commit_transaction() + self.session.rollback_transaction() c_always.close() c_def.close() c_never.close() diff --git a/src/third_party/wiredtiger/test/suite/test_assert05.py b/src/third_party/wiredtiger/test/suite/test_assert05.py index ab7f8265930..d5c697989ef 100644 --- a/src/third_party/wiredtiger/test/suite/test_assert05.py +++ b/src/third_party/wiredtiger/test/suite/test_assert05.py @@ -73,10 +73,15 @@ class test_assert05(wttest.WiredTigerTestCase, suite_subprocess): if (use_ts != 'never'): self.session.commit_transaction() else: + ''' + Commented out for now: the system panics if we fail after preparing a transaction. + msg = "/timestamp set on this transaction/" self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:self.assertEquals(self.session.commit_transaction(), 0), msg) + ''' + self.session.rollback_transaction() c.close() self.count += 1 @@ -96,10 +101,15 @@ class test_assert05(wttest.WiredTigerTestCase, suite_subprocess): if (use_ts != 'always'): self.session.commit_transaction() else: + ''' + Commented out for now: the system panics if we fail after preparing a transaction. + msg = "/durable_timestamp is required for a prepared/" self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:self.assertEquals(self.session.commit_transaction(), 0), msg) + ''' + self.session.rollback_transaction() self.count += 1 c.close() diff --git a/src/third_party/wiredtiger/test/suite/test_assert06.py b/src/third_party/wiredtiger/test/suite/test_assert06.py index bc7532cd648..250077cf36f 100644 --- a/src/third_party/wiredtiger/test/suite/test_assert06.py +++ b/src/third_party/wiredtiger/test/suite/test_assert06.py @@ -111,6 +111,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.session.commit_transaction() c.close() + ''' + Commented out for now: the system panics if we fail after preparing a transaction. + c = self.session.open_cursor(uri) self.session.begin_transaction() c['key_ts1'] = 'value4' @@ -118,6 +121,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.commit_transaction(), msg_ooo) c.close() + ''' # Detect not using a timestamp. c = self.session.open_cursor(uri) @@ -137,6 +141,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.session.commit_transaction() c.close() + ''' + Commented out for now: the system panics if we fail after preparing a transaction. + c = self.session.open_cursor(uri) self.session.begin_transaction() c['key_nots'] = 'value3' @@ -145,6 +152,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): lambda: self.session.commit_transaction(), msg_usage) c.close() self.session.checkpoint() + ''' c = self.session.open_cursor(uri) self.assertEquals(c['key_ts1'], 'value5') @@ -191,6 +199,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.session.commit_transaction() c.close() + ''' + Commented out for now: the system panics if we fail after preparing a transaction. + # Modify the data item at timestamp 1. We should detect it is wrong. c = self.session.open_cursor(uri) self.session.begin_transaction() @@ -199,6 +210,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.commit_transaction(), msg_ooo) c.close() + ''' # Make sure we can successfully add a different key at timestamp 1. c = self.session.open_cursor(uri) @@ -224,6 +236,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.apply_timestamps(15) self.session.commit_transaction() + ''' + Commented out for now: the system panics if we fail after preparing a transaction. + c = self.session.open_cursor(uri) self.session.begin_transaction() c['key_ts3'] = 'value13' @@ -232,6 +247,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.commit_transaction(), msg_ooo) c.close() + ''' c = self.session.open_cursor(uri) self.assertEquals(c['key_ts3'], 'value10') @@ -248,6 +264,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.apply_timestamps(13) self.session.commit_transaction() + ''' + Commented out for now: the system panics if we fail after preparing a transaction. + c = self.session.open_cursor(uri) self.session.begin_transaction() c['key_ts4'] = 'value13' @@ -255,6 +274,10 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.commit_transaction(), msg_ooo) c.close() + ''' + + ''' + Commented out for now: the system panics if we fail after preparing a transaction. # Make sure multiple update attempts still fail and eventually # succeed with a later timestamp. This tests that aborted entries @@ -269,6 +292,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): c = self.session.open_cursor(uri) self.assertEquals(c['key_ts4'], 'value15') c.close() + ''' c = self.session.open_cursor(uri) self.session.begin_transaction() @@ -308,6 +332,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.session.commit_transaction() c.close() + ''' + Commented out for now: the system panics if we fail after preparing a transaction. + c = self.session.open_cursor(uri) self.session.begin_transaction() c['key_nots'] = 'value16' @@ -315,6 +342,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.commit_transaction(), msg_usage) c.close() + ''' c = self.session.open_cursor(uri) self.session.begin_transaction() @@ -322,6 +350,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.session.commit_transaction() c.close() + ''' + Commented out for now: the system panics if we fail after preparing a transaction. + c = self.session.open_cursor(uri) self.session.begin_transaction() c['key_nots'] = 'value17' @@ -329,6 +360,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.commit_transaction(), msg_usage) c.close() + ''' c = self.session.open_cursor(uri) self.assertEquals(c['key_nots'], 'value_nots1') @@ -375,6 +407,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.session.commit_transaction() c.close() + ''' + Commented out for now: the system panics if we fail after preparing a transaction. + c = self.session.open_cursor(uri) self.session.begin_transaction() c['key_nots'] = 'value23' @@ -386,6 +421,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): lambda: self.session.commit_transaction( 'durable_timestamp=' + timestamp_str(23)), msg_usage) c.close() + ''' if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_backup12.py b/src/third_party/wiredtiger/test/suite/test_backup12.py index 6726164d038..1148ed84a45 100644 --- a/src/third_party/wiredtiger/test/suite/test_backup12.py +++ b/src/third_party/wiredtiger/test/suite/test_backup12.py @@ -83,6 +83,9 @@ class test_backup12(wttest.WiredTigerTestCase, suite_subprocess): # That log file is not part of the list returned. This is a full backup # primary cursor with incremental configured. os.mkdir(self.dir) + # + # Note, this first backup is actually done before a checkpoint is taken. + # config = 'incremental=(enabled,this_id="ID1")' bkup_c = self.session.open_cursor('backup:', None, config) @@ -168,12 +171,9 @@ class test_backup12(wttest.WiredTigerTestCase, suite_subprocess): offset = incrlist[0] size = incrlist[1] curtype = incrlist[2] - self.assertEqual(offset, 0) - # For now assert WT_BACKUP_FILE (which is 1). - self.assertEqual(curtype, 1) + self.assertTrue(curtype == 1 or curtype == 2) dup_cnt += 1 dupc.close() - self.assertEqual(dup_cnt, 1) self.pr('Copy from: ' + newfile + ' (' + str(sz) + ') to ' + self.dir) shutil.copy(newfile, self.dir) self.assertEqual(ret, wiredtiger.WT_NOTFOUND) diff --git a/src/third_party/wiredtiger/test/suite/test_durable_ts02.py b/src/third_party/wiredtiger/test/suite/test_durable_ts02.py index 15b9bfdec82..27d0d3f3146 100644 --- a/src/third_party/wiredtiger/test/suite/test_durable_ts02.py +++ b/src/third_party/wiredtiger/test/suite/test_durable_ts02.py @@ -78,6 +78,9 @@ class test_durable_ts03(wttest.WiredTigerTestCase): self.conn.set_timestamp('stable_timestamp=' + timestamp_str(100)) self.session.checkpoint() + ''' + Commented out for now: the system panics if we fail after preparing a transaction. + # Scenario: 1 # Check to see commit timestamp > durable timestamap, returns error. session.begin_transaction() @@ -90,7 +93,9 @@ class test_durable_ts03(wttest.WiredTigerTestCase): session.prepare_transaction('prepare_timestamp=' + timestamp_str(150)) msg = "/is less than the commit timestamp/" # Check for error when commit timestamp > durable timestamp. - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: session.commit_transaction('commit_timestamp=' + timestamp_str(200) + ',durable_timestamp=' + timestamp_str(180)), msg) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: session.commit_transaction('commit_timestamp=' +\ + timestamp_str(200) + ',durable_timestamp=' + timestamp_str(180)), msg) # Set a stable timestamp so that first update value is durable. self.conn.set_timestamp('stable_timestamp=' + timestamp_str(250)) @@ -110,7 +115,10 @@ class test_durable_ts03(wttest.WiredTigerTestCase): msg = "/is less than the stable timestamp/" # Check that error is returned when durable timestamp < stable timestamp. - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: session.commit_transaction('commit_timestamp=' + timestamp_str(200) + ',durable_timestamp=' + timestamp_str(240)), msg) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: session.commit_transaction('commit_timestamp=' +\ + timestamp_str(200) + ',durable_timestamp=' + timestamp_str(240)), msg) + ''' if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_prepare02.py b/src/third_party/wiredtiger/test/suite/test_prepare02.py index 7546cc44d59..ebe79cf729c 100644 --- a/src/third_party/wiredtiger/test/suite/test_prepare02.py +++ b/src/third_party/wiredtiger/test/suite/test_prepare02.py @@ -40,38 +40,33 @@ class test_prepare02(wttest.WiredTigerTestCase, suite_subprocess): session_config = 'isolation=snapshot' def test_prepare_session_operations(self): - self.session.create("table:mytable", "key_format=S,value_format=S") - cursor = self.session.open_cursor("table:mytable", None) - # Test the session methods that are forbidden after the transaction is - # prepared. + # Test the session methods forbidden after the transaction is prepared. + self.session.create("table:mytable", "key_format=S,value_format=S") self.session.begin_transaction() + cursor = self.session.open_cursor("table:mytable", None) + cursor["key"] = "value" self.session.prepare_transaction("prepare_timestamp=2a") - msg = "/ not permitted in a/" - # - # The operations listed below are not supported in the prepared state. - # - # The operations are listed in the same order as they are declared in - # the session structure. Any function missing below is allowed in the - # prepared state. - # + msg = "/not permitted in a prepared transaction/" + + # The operations are listed in the same order as they are declared in the session structure. + # WT_SESSION.close permitted. self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:self.session.reconfigure(), msg) + # WT_SESSION.strerror permitted, but currently broken in the Python API (WT-5399). self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.open_cursor("table:mytable", None), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.alter("table:mytable", - "access_pattern_hint=random"), msg) + lambda: self.session.alter("table:mytable", "access_pattern_hint=random"), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.create("table:mytable1", - "key_format=S,value_format=S"), msg) + lambda: self.session.create("table:mytable1", "key_format=S,value_format=S"), msg) + # WT_SESSION.import permitted, not supported in the Python API. self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.compact("table:mytable"), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.drop("table:mytable", None), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.join(cursor, cursor, - "compare=gt,count=10"), msg) + lambda: self.session.join(cursor, cursor, "compare=gt,count=10"), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.log_flush("sync=on"), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, @@ -79,33 +74,39 @@ class test_prepare02(wttest.WiredTigerTestCase, suite_subprocess): self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.rebalance("table:mytable", None), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.rename("table:mytable", "table:mynewtable", - None), msg) + lambda: self.session.rename("table:mytable", "table:mynewtable", None), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:self.session.reset(), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.salvage("table:mytable", None), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.truncate("table:mytable", - None, None, None), msg) + lambda: self.session.truncate("table:mytable", None, None, None), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.upgrade("table:mytable", None), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.verify("table:mytable", None), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:self.session.begin_transaction(), msg) + # WT_SESSION.commit_transaction permitted, tested below. self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:self.session.prepare_transaction("prepare_timestamp=2a"), msg) - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.timestamp_transaction( - "read_timestamp=2a"), msg) + # WT_SESSION.rollback_transaction permitted, tested below. + self.session.timestamp_transaction("commit_timestamp=2b") + self.assertTimestampsEqual(self.session.query_timestamp('get=prepare'), '2a') self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:self.session.checkpoint(), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.snapshot("name=test"), msg) + # WT_SESSION.transaction_pinned_range permitted, not supported in the Python API. self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:self.session.transaction_sync(), msg) - self.session.rollback_transaction() + self.session.breakpoint() + + # Commit the transaction. Test that no "not permitted in a prepared transaction" error has + # set a transaction error flag, that is, we should still be able to commit successfully. + self.session.timestamp_transaction("commit_timestamp=2b") + self.session.timestamp_transaction("durable_timestamp=2b") + self.session.commit_transaction('commit_timestamp=2a') # Commit after prepare is permitted. self.session.begin_transaction() @@ -115,8 +116,7 @@ class test_prepare02(wttest.WiredTigerTestCase, suite_subprocess): self.session.timestamp_transaction("durable_timestamp=2b") self.session.commit_transaction() - # Setting commit timestamp via timestamp_transaction after - # prepare is also permitted. + # Setting commit timestamp via timestamp_transaction after prepare is also permitted. self.session.begin_transaction() c1 = self.session.open_cursor("table:mytable", None) self.session.prepare_transaction("prepare_timestamp=2a") diff --git a/src/third_party/wiredtiger/test/suite/test_prepare04.py b/src/third_party/wiredtiger/test/suite/test_prepare04.py index d65b96adefe..fd27a244747 100644 --- a/src/third_party/wiredtiger/test/suite/test_prepare04.py +++ b/src/third_party/wiredtiger/test/suite/test_prepare04.py @@ -116,7 +116,7 @@ class test_prepare04(wttest.WiredTigerTestCase, suite_subprocess): # Make sure we detect the conflict between operations. self.assertRaisesException(wiredtiger.WiredTigerError, lambda:c_other.update(), conflictmsg) - s_other.commit_transaction() + s_other.rollback_transaction() self.session.timestamp_transaction('commit_timestamp=' + timestamp_str(300)) self.session.timestamp_transaction('durable_timestamp=' + timestamp_str(300)) diff --git a/src/third_party/wiredtiger/test/suite/test_prepare05.py b/src/third_party/wiredtiger/test/suite/test_prepare05.py index 9c812879315..0b4769ea566 100644 --- a/src/third_party/wiredtiger/test/suite/test_prepare05.py +++ b/src/third_party/wiredtiger/test/suite/test_prepare05.py @@ -52,7 +52,7 @@ class test_prepare05(wttest.WiredTigerTestCase, suite_subprocess): lambda: self.session.prepare_transaction( 'prepare_timestamp=' + timestamp_str(1)), "/older than the oldest timestamp/") - self.session.commit_transaction('commit_timestamp=' + timestamp_str(3)) + self.session.rollback_transaction() # Check setting the prepare timestamp same as oldest timestamp is valid. self.session.begin_transaction() @@ -72,7 +72,7 @@ class test_prepare05(wttest.WiredTigerTestCase, suite_subprocess): lambda: self.session.prepare_transaction( 'prepare_timestamp=' + timestamp_str(2)), "/should not have been set before/") - self.session.commit_transaction('commit_timestamp=' + timestamp_str(3)) + self.session.rollback_transaction() # It is illegal to set a prepare timestamp same as or earlier than an # active read timestamp. @@ -97,16 +97,18 @@ class test_prepare05(wttest.WiredTigerTestCase, suite_subprocess): s_reader.rollback_transaction() self.session.rollback_transaction() - # It is illegal to set a commit timestamp older than prepare - # timestamp of a transaction. + ''' + Commented out for now: the system panics if we fail after preparing a transaction. + + # It is illegal to set a commit timestamp older than prepare timestamp of a transaction. self.session.begin_transaction() c[1] = 1 - self.session.prepare_transaction( - 'prepare_timestamp=' + timestamp_str(5)) + self.session.prepare_transaction('prepare_timestamp=' + timestamp_str(5)) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.commit_transaction( 'commit_timestamp=' + timestamp_str(4)), "/less than the prepare timestamp/") + ''' # It is legal to set a commit timestamp as same as prepare # timestamp. diff --git a/src/third_party/wiredtiger/test/suite/test_prepare06.py b/src/third_party/wiredtiger/test/suite/test_prepare06.py index 6255630d4ef..173f23b6142 100644 --- a/src/third_party/wiredtiger/test/suite/test_prepare06.py +++ b/src/third_party/wiredtiger/test/suite/test_prepare06.py @@ -64,6 +64,9 @@ class test_prepare06(wttest.WiredTigerTestCase, suite_subprocess): self.session.timestamp_transaction('durable_timestamp=' + timestamp_str(35)) self.session.commit_transaction() + ''' + Commented out for now: the system panics if we fail after preparing a transaction. + # Check setting a prepared transaction timestamps earlier than the # oldest timestamp is invalid, if durable timestamp is less than the # stable timestamp. @@ -75,6 +78,7 @@ class test_prepare06(wttest.WiredTigerTestCase, suite_subprocess): 'durable_timestamp=' + timestamp_str(25)), "/is less than the stable timestamp/") self.session.rollback_transaction() + ''' # Check the cases with an active reader. # Start a new reader to have an active read timestamp. @@ -101,16 +105,22 @@ class test_prepare06(wttest.WiredTigerTestCase, suite_subprocess): "/must be greater than the latest active read timestamp/") self.session.rollback_transaction() + ''' + Commented out for now: the system panics if we fail after preparing a transaction. + # It is illegal to set a commit timestamp less than the prepare # timestamp of a transaction. self.session.begin_transaction() c[1] = 1 - self.session.prepare_transaction( - 'prepare_timestamp=' + timestamp_str(45)) + self.session.prepare_transaction('prepare_timestamp=' + timestamp_str(45)) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.commit_transaction( 'commit_timestamp=' + timestamp_str(30)), "/less than the prepare timestamp/") + ''' + + ''' + Commented out for now: the system panics if we fail after preparing a transaction. # It is legal to set a commit timestamp older than prepare timestamp of # a transaction with roundup_timestamps settings. @@ -125,6 +135,7 @@ class test_prepare06(wttest.WiredTigerTestCase, suite_subprocess): 'durable_timestamp=' + timestamp_str(30)), "/is less than the commit timestamp/") self.session.rollback_transaction() + ''' s_reader.commit_transaction() diff --git a/src/third_party/wiredtiger/test/suite/test_stat08.py b/src/third_party/wiredtiger/test/suite/test_stat08.py index db7b0ad00d9..e82bc661f60 100644 --- a/src/third_party/wiredtiger/test/suite/test_stat08.py +++ b/src/third_party/wiredtiger/test/suite/test_stat08.py @@ -33,7 +33,7 @@ import wiredtiger, wttest class test_stat08(wttest.WiredTigerTestCase): nentries = 350000 - conn_config = 'cache_size=50MB,statistics=(all)' + conn_config = 'cache_size=10MB,statistics=(all)' entry_value = "abcde" * 40 BYTES_READ = 4000 READ_TIME = 4003 diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp13.py b/src/third_party/wiredtiger/test/suite/test_timestamp13.py index 0d230da848b..ad2542f6181 100644 --- a/src/third_party/wiredtiger/test/suite/test_timestamp13.py +++ b/src/third_party/wiredtiger/test/suite/test_timestamp13.py @@ -70,7 +70,7 @@ class test_timestamp13(wttest.WiredTigerTestCase, suite_subprocess): lambda: self.session.query_timestamp('get=unknown'), '/not a permitted choice for key/') - self.session.commit_transaction() + self.session.rollback_transaction() # Querying a session's timestamps will error when not in a transaction. for query in query_choices: self.assertRaises( diff --git a/src/third_party/wiredtiger/test/suite/test_txn17.py b/src/third_party/wiredtiger/test/suite/test_txn17.py index 97bc5cf6d50..b7e843eaccc 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn17.py +++ b/src/third_party/wiredtiger/test/suite/test_txn17.py @@ -57,24 +57,26 @@ class test_txn17(wttest.WiredTigerTestCase, suite_subprocess): lambda: self.session.rollback_transaction(), '/only permitted in a running transaction/') - # Test API functionality tagged as requires_notransaction. - # Begin a transaction and execute all the following tests under it. - self.session.begin_transaction() - # Cannot begin a transaction while a transaction is already running. + self.session.begin_transaction() self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.begin_transaction(), '/not permitted in a running transaction/') + self.session.rollback_transaction() # Cannot take a checkpoint while a transaction is running. + self.session.begin_transaction() self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.checkpoint(), '/not permitted in a running transaction/') + self.session.rollback_transaction() # Cannot call transaction_sync while a transaction is running. + self.session.begin_transaction() self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.transaction_sync(), '/not permitted in a running transaction/') + self.session.rollback_transaction() if __name__ == '__main__': wttest.run() |