From 70f793946b5e3872d07f73908b2d45a31cce051d Mon Sep 17 00:00:00 2001 From: Luke Chen Date: Tue, 5 Mar 2019 13:25:53 +1100 Subject: Import wiredtiger: afdead1093b5c5b41dd54ddddf6f42d92bef1666 from branch mongodb-4.2 ref: 37e1570f82..afdead1093 for: 4.1.9 WT-4413 Add optional compact progress messages WT-4465 Add documentation for a dhandle's lifecycle WT-4514 Implement assert=durable_timestamp WT-4517 Change WT data format to avoid writing stable timestamps WT-4537 Fix WiredTiger cursor prev/next traversal failure on prepare retry WT-4554 Enhance WT salvage to handle the case of corrupted WiredTiger.turtle WT-4556 workgen: add synchronization points during run time WT-4558 WiredTiger connection statistics cursor incorrectly provides doubled-up values WT-4568 Operation tracking visualization incorrectly displays call stacks WT-4573 Reducing calls to __wt_epoch from session reset WT-4587 Parallelize the script that parses operation tracking files WT-4595 Coverity "null pointer dereference" complaints WT-4596 Simplify wt utility's session/connection handling WT-4599 Show latency threshold violations in operation visualizations WT-4605 workgen: read_write_storms.py needs a public domain notice WT-4606 workgen: remove lsm from the default table type in wtperf emulation WT-4613 Skip the wt4333_handle_locks test on OS X WT-4615 Sync backup file before returning backup cursor WT-4617 Cursor next/prev returns PREPARE_CONFLICT only once --- .../bench/workgen/runner/read_write_storms.py | 35 +- .../bench/workgen/runner/read_write_sync_long.py | 136 ++++++++ .../bench/workgen/runner/read_write_sync_short.py | 151 ++++++++ .../bench/workgen/runner/runner/__init__.py | 2 +- .../wiredtiger/bench/workgen/runner/runner/core.py | 17 + .../wiredtiger/bench/workgen/workgen.cxx | 115 +++++- src/third_party/wiredtiger/bench/workgen/workgen.h | 8 + .../wiredtiger/bench/workgen/workgen.swig | 2 +- .../wiredtiger/bench/workgen/workgen_int.h | 4 + .../wiredtiger/bench/workgen/workgen_time.h | 6 + src/third_party/wiredtiger/bench/workgen/wtperf.py | 2 +- src/third_party/wiredtiger/dist/api_data.py | 9 +- src/third_party/wiredtiger/dist/s_string.ok | 1 + src/third_party/wiredtiger/import.data | 2 +- .../wiredtiger/src/block/block_compact.c | 3 +- src/third_party/wiredtiger/src/btree/bt_compact.c | 34 ++ src/third_party/wiredtiger/src/btree/bt_curnext.c | 94 +++-- src/third_party/wiredtiger/src/btree/bt_curprev.c | 96 ++--- src/third_party/wiredtiger/src/btree/bt_debug.c | 26 ++ src/third_party/wiredtiger/src/btree/bt_handle.c | 16 + src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c | 62 ++-- src/third_party/wiredtiger/src/config/config_def.c | 159 +++++---- src/third_party/wiredtiger/src/conn/conn_api.c | 1 + src/third_party/wiredtiger/src/cursor/cur_backup.c | 15 +- src/third_party/wiredtiger/src/cursor/cur_index.c | 4 +- src/third_party/wiredtiger/src/cursor/cur_join.c | 2 +- src/third_party/wiredtiger/src/cursor/cur_stat.c | 1 + src/third_party/wiredtiger/src/cursor/cur_table.c | 4 +- .../src/docs/devdoc-dhandle-lifecycle.dox | 92 +++++ .../wiredtiger/src/docs/devdoc-index.dox | 10 + src/third_party/wiredtiger/src/docs/spell.ok | 5 + .../wiredtiger/src/docs/transactions.dox | 7 + src/third_party/wiredtiger/src/include/btree.h | 7 +- src/third_party/wiredtiger/src/include/cell.i | 288 +++++++++------ src/third_party/wiredtiger/src/include/compact.h | 1 + .../wiredtiger/src/include/connection.h | 59 ++-- src/third_party/wiredtiger/src/include/cursor.h | 8 + src/third_party/wiredtiger/src/include/cursor.i | 42 --- src/third_party/wiredtiger/src/include/extern.h | 2 +- src/third_party/wiredtiger/src/include/misc.h | 26 +- src/third_party/wiredtiger/src/include/session.h | 2 +- src/third_party/wiredtiger/src/include/txn.h | 40 ++- src/third_party/wiredtiger/src/include/txn.i | 6 + .../wiredtiger/src/include/wiredtiger.in | 33 +- .../wiredtiger/src/include/wt_internal.h | 4 +- src/third_party/wiredtiger/src/meta/meta_turtle.c | 30 +- .../wiredtiger/src/reconcile/rec_write.c | 117 ++++--- .../wiredtiger/src/schema/schema_create.c | 9 +- .../wiredtiger/src/schema/schema_drop.c | 8 +- .../wiredtiger/src/schema/schema_list.c | 16 +- .../wiredtiger/src/schema/schema_open.c | 6 +- .../wiredtiger/src/schema/schema_rename.c | 4 +- .../wiredtiger/src/schema/schema_stat.c | 2 +- .../wiredtiger/src/schema/schema_truncate.c | 2 +- .../wiredtiger/src/schema/schema_worker.c | 3 +- .../wiredtiger/src/session/session_api.c | 6 +- src/third_party/wiredtiger/src/txn/txn.c | 116 ++++-- src/third_party/wiredtiger/src/txn/txn_ckpt.c | 7 +- src/third_party/wiredtiger/src/txn/txn_timestamp.c | 33 +- src/third_party/wiredtiger/src/utilities/util.h | 2 +- .../wiredtiger/src/utilities/util_downgrade.c | 6 +- .../wiredtiger/src/utilities/util_main.c | 20 +- .../test/csuite/wt4156_metadata_salvage/main.c | 23 +- .../test/csuite/wt4333_handle_locks/main.c | 38 +- src/third_party/wiredtiger/test/format/config.c | 3 - .../wiredtiger/test/suite/test_assert04.py | 3 + .../wiredtiger/test/suite/test_assert05.py | 120 +++++++ .../wiredtiger/test/suite/test_assert06.py | 388 +++++++++++++++++++++ .../wiredtiger/test/suite/test_prepare_cursor02.py | 101 ++++++ .../tools/optrack/find-latency-spikes.py | 314 +++++++++++++---- .../wiredtiger/tools/optrack/wt_optrack_decode.py | 1 - 71 files changed, 2329 insertions(+), 688 deletions(-) create mode 100755 src/third_party/wiredtiger/bench/workgen/runner/read_write_sync_long.py create mode 100755 src/third_party/wiredtiger/bench/workgen/runner/read_write_sync_short.py mode change 100644 => 100755 src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py mode change 100644 => 100755 src/third_party/wiredtiger/bench/workgen/runner/runner/core.py create mode 100644 src/third_party/wiredtiger/src/docs/devdoc-dhandle-lifecycle.dox create mode 100644 src/third_party/wiredtiger/test/suite/test_assert05.py create mode 100644 src/third_party/wiredtiger/test/suite/test_assert06.py create mode 100644 src/third_party/wiredtiger/test/suite/test_prepare_cursor02.py (limited to 'src/third_party') diff --git a/src/third_party/wiredtiger/bench/workgen/runner/read_write_storms.py b/src/third_party/wiredtiger/bench/workgen/runner/read_write_storms.py index 2f774d0c902..0a4dab510f7 100644 --- a/src/third_party/wiredtiger/bench/workgen/runner/read_write_storms.py +++ b/src/third_party/wiredtiger/bench/workgen/runner/read_write_storms.py @@ -1,5 +1,34 @@ -#/usr/bin/env python -# generated from runner/read_write_heavy.wtperf originally, then hand edited. +#!/usr/bin/env python +# +# Public Domain 2014-2019 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +# Generated from runner/read_write_heavy.wtperf originally, then hand edited. +# Create waves of extra read and write activity (storms). from runner import * from wiredtiger import * @@ -11,7 +40,7 @@ conn_config += ",cache_size=2GB,eviction=(threads_max=8),log=(enabled=true),sess conn = wiredtiger_open("WT_TEST", "create," + conn_config) s = conn.open_session("") -wtperf_table_config = "key_format=S,value_format=S,type=lsm," +\ +wtperf_table_config = "key_format=S,value_format=S," +\ "exclusive=true,allocation_size=4kb," +\ "internal_page_max=64kb,leaf_page_max=4kb,split_pct=100," compress_table_config = "block_compressor=snappy," diff --git a/src/third_party/wiredtiger/bench/workgen/runner/read_write_sync_long.py b/src/third_party/wiredtiger/bench/workgen/runner/read_write_sync_long.py new file mode 100755 index 00000000000..c3374f4d3f2 --- /dev/null +++ b/src/third_party/wiredtiger/bench/workgen/runner/read_write_sync_long.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2019 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +# Generated from runner/read_write_heavy.wtperf originally, then hand edited. +# A long run demonstrating how read or write threads can be synchronized. +import sys + +from runner import * +from wiredtiger import * +from workgen import * + +context = Context() +conn_config = "" +conn_config += ",cache_size=2GB,eviction=(threads_max=8),log=(enabled=true),session_max=250,statistics=(fast),statistics_log=(wait=1,json)" # explicitly added +conn = wiredtiger_open("WT_TEST", "create," + conn_config) +s = conn.open_session("") + +wtperf_table_config = "key_format=S,value_format=S," +\ + "exclusive=true,allocation_size=4kb," +\ + "internal_page_max=64kb,leaf_page_max=4kb,split_pct=100," +compress_table_config = "block_compressor=snappy," +table_config = "memory_page_max=10m,leaf_value_max=64MB,checksum=on,split_pct=90,type=file,log=(enabled=false),leaf_page_max=32k,block_compressor=snappy" +tables = [] +table_count = 100 +for i in range(0, table_count): + tname = "table:test" + str(i) + table = Table(tname) + s.create(tname, wtperf_table_config +\ + compress_table_config + table_config) + table.options.key_size = 20 + table.options.value_size = 7000 + tables.append(table) + +populate_threads = 4 +icount = 4000000 +# There are multiple tables to be filled during populate, +# the icount is split between them all. +pop_ops = Operation(Operation.OP_INSERT, tables[0]) +pop_ops = op_multi_table(pop_ops, tables) +nops_per_thread = icount / (populate_threads * table_count) +pop_thread = Thread(pop_ops * nops_per_thread) +pop_workload = Workload(context, populate_threads * pop_thread) +pop_workload.run(conn) +print('populate complete') + +# Log like file, requires that logging be enabled in the connection config. +log_name = "table:log" +s.create(log_name, wtperf_table_config + "key_format=S,value_format=S," + compress_table_config + table_config + ",log=(enabled=true)") +log_table = Table(log_name) + +ops = Operation(Operation.OP_UPDATE, tables[0]) +ops = op_multi_table(ops, tables, False) +ops = op_log_like(ops, log_table, 0) +thread0 = Thread(ops) +# These operations include log_like operations, which will increase the number +# of insert/update operations by a factor of 2.0. This may cause the +# actual operations performed to be above the throttle. +thread0.options.throttle=11 +thread0.options.throttle_burst=0 + +ops = Operation(Operation.OP_SEARCH, tables[0]) +ops = op_multi_table(ops, tables, False) +ops = op_log_like(ops, log_table, 0) +thread1 = Thread(ops) +thread1.options.throttle=60 +thread1.options.throttle_burst=0 + +ops = Operation(Operation.OP_SLEEP, "60") + \ + Operation(Operation.OP_CHECKPOINT, "") +checkpoint_thread = Thread(ops) + +ops = Operation(Operation.OP_SLEEP, "0.1") + \ + Operation(Operation.OP_LOG_FLUSH, "") +logging_thread = Thread(ops) + +############################################################################ +# This part was added to the generated file. +# Add unthrottled threads that are synchronized so we get two minutes with +# unthrottled readers + writers, two minutes of unthrottled writers, two minutes +# of unthrottled readers and two minutes of idle. That is repeated. This is +# against a background of light reading and writing by the throttled threads. + +ops = Operation(Operation.OP_UPDATE, tables[0]) +ops = op_multi_table(ops, tables, False) +ops = op_log_like(ops, log_table, 0) +ops = timed(240.0, ops) + sleep(240.0) +thread_big_writer = Thread(ops) +thread_big_writer.synchronized = True + +ops = Operation(Operation.OP_SEARCH, tables[0]) +ops = op_multi_table(ops, tables, False) +ops = op_log_like(ops, log_table, 0) +ops = timed(120.0, ops) + timed(240.0, ops) + sleep(120.0) +thread_big_reader = Thread(ops) +thread_big_reader.synchronized = True + +# End of added section. +# The new threads will also be added to the workload below. +############################################################################ + +workload = Workload(context, 20 * thread0 + 20 * thread1 + checkpoint_thread + logging_thread + 50 * thread_big_writer + 50 * thread_big_reader) +workload.options.report_interval=1 +workload.options.run_time=1800 +workload.options.sample_rate=1 +workload.options.warmup=0 +workload.options.sample_interval_ms = 1000 +workload.run(conn) + +latency_filename = "WT_TEST/latency.out" +latency.workload_latency(workload, latency_filename) diff --git a/src/third_party/wiredtiger/bench/workgen/runner/read_write_sync_short.py b/src/third_party/wiredtiger/bench/workgen/runner/read_write_sync_short.py new file mode 100755 index 00000000000..a3121b323d8 --- /dev/null +++ b/src/third_party/wiredtiger/bench/workgen/runner/read_write_sync_short.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2019 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +# Generated from runner/read_write_heavy.wtperf originally, then hand edited. +# A short run demonstrating how read or write threads can be synchronized. +import sys + +from runner import * +from wiredtiger import * +from workgen import * + +context = Context() +conn_config = "" +conn_config += ",cache_size=2GB,eviction=(threads_max=8),log=(enabled=true),session_max=250,statistics=(fast),statistics_log=(wait=1,json)" # explicitly added +conn = wiredtiger_open("WT_TEST", "create," + conn_config) +s = conn.open_session("") + +wtperf_table_config = "key_format=S,value_format=S," +\ + "exclusive=true,allocation_size=4kb," +\ + "internal_page_max=64kb,leaf_page_max=4kb,split_pct=100," +compress_table_config = "block_compressor=snappy," +table_config = "memory_page_max=10m,leaf_value_max=64MB,checksum=on,split_pct=90,type=file,log=(enabled=false),leaf_page_max=32k,block_compressor=snappy" +tables = [] +table_count = 100 +for i in range(0, table_count): + tname = "table:test" + str(i) + table = Table(tname) + s.create(tname, wtperf_table_config +\ + compress_table_config + table_config) + table.options.key_size = 20 + table.options.value_size = 7000 + tables.append(table) + +populate_threads = 4 +icount = 4000000 +# There are multiple tables to be filled during populate, +# the icount is split between them all. +pop_ops = Operation(Operation.OP_INSERT, tables[0]) +pop_ops = op_multi_table(pop_ops, tables) +nops_per_thread = icount / (populate_threads * table_count) +pop_thread = Thread(pop_ops * nops_per_thread) +pop_workload = Workload(context, populate_threads * pop_thread) +pop_workload.run(conn) +print('populate complete') + +# Log like file, requires that logging be enabled in the connection config. +log_name = "table:log" +s.create(log_name, wtperf_table_config + "key_format=S,value_format=S," + compress_table_config + table_config + ",log=(enabled=true)") +log_table = Table(log_name) + +ops = Operation(Operation.OP_UPDATE, tables[0]) +ops = op_multi_table(ops, tables, False) +ops = op_log_like(ops, log_table, 0) +thread0 = Thread(ops) +# These operations include log_like operations, which will increase the number +# of insert/update operations by a factor of 2.0. This may cause the +# actual operations performed to be above the throttle. +thread0.options.throttle=11 +thread0.options.throttle_burst=0 + +ops = Operation(Operation.OP_SEARCH, tables[0]) +ops = op_multi_table(ops, tables, False) +ops = op_log_like(ops, log_table, 0) +thread1 = Thread(ops) +thread1.options.throttle=60 +thread1.options.throttle_burst=0 + +ops = Operation(Operation.OP_SLEEP, "60") + \ + Operation(Operation.OP_CHECKPOINT, "") +checkpoint_thread = Thread(ops) + +ops = Operation(Operation.OP_SLEEP, "0.1") + \ + Operation(Operation.OP_LOG_FLUSH, "") +logging_thread = Thread(ops) + +############################################################################ +# This part was added to the generated file. +# Add threads that do a bunch of operations and sleep, all in a loop. +# The write threads are in two groups that synchronize with each other +# every 20 seconds. The read threads are in two groups that synchronize +# with each other every 16 seconds. There is a collective synchronization +# every 80 seconds. + +ops = Operation(Operation.OP_UPDATE, tables[0]) +ops = op_multi_table(ops, tables, False) +ops = op_log_like(ops, log_table, 0) +ops = timed(5.0, ops) + sleep(5.0) +thread_big_10 = Thread(ops) +thread_big_10.synchronized = True + +ops = Operation(Operation.OP_UPDATE, tables[0]) +ops = op_multi_table(ops, tables, False) +ops = op_log_like(ops, log_table, 0) +ops = timed(5.0, ops) + sleep(15.0) +thread_big_20 = Thread(ops) +thread_big_20.synchronized = True + +ops = Operation(Operation.OP_SEARCH, tables[0]) +ops = op_multi_table(ops, tables, False) +ops = op_log_like(ops, log_table, 0) +ops = timed(4.0, ops) + sleep(4.0) +thread_bigread_8 = Thread(ops) +thread_bigread_8.synchronized = True + +ops = Operation(Operation.OP_SEARCH, tables[0]) +ops = op_multi_table(ops, tables, False) +ops = op_log_like(ops, log_table, 0) +ops = timed(4.0, ops) + sleep(12.0) +thread_bigread_16 = Thread(ops) +thread_bigread_16.synchronized = True + +# End of added section. +# The new threads will also be added to the workload below. +############################################################################ + +workload = Workload(context, 20 * thread0 + 20 * thread1 + checkpoint_thread + logging_thread + 50 * thread_big_10 + 50 * thread_big_20 + 50 * thread_bigread_8 + 50 * thread_bigread_16) +workload.options.report_interval=1 +workload.options.run_time=900 +workload.options.sample_rate=1 +workload.options.warmup=0 +workload.options.sample_interval_ms = 1000 +workload.run(conn) + +latency_filename = "WT_TEST/latency.out" +latency.workload_latency(workload, latency_filename) diff --git a/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py b/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py old mode 100644 new mode 100755 index 7c352aa6cda..42ffad6b247 --- a/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py +++ b/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py @@ -88,5 +88,5 @@ except: shutil.rmtree('WT_TEST', True) os.mkdir('WT_TEST') -from .core import txn, extensions_config, op_append, op_group_transaction, op_log_like, op_multi_table, op_populate_with_range +from .core import txn, extensions_config, op_append, op_group_transaction, op_log_like, op_multi_table, op_populate_with_range, sleep, timed from .latency import workload_latency diff --git a/src/third_party/wiredtiger/bench/workgen/runner/runner/core.py b/src/third_party/wiredtiger/bench/workgen/runner/runner/core.py old mode 100644 new mode 100755 index a79efc9c547..be660aecb88 --- a/src/third_party/wiredtiger/bench/workgen/runner/runner/core.py +++ b/src/third_party/wiredtiger/bench/workgen/runner/runner/core.py @@ -38,6 +38,23 @@ def txn(op, config=None): op._transaction = t return op +# sleep -- +# Create an operation to sleep a given number of seconds. +def sleep(seconds): + return Operation(Operation.OP_SLEEP, str(seconds)) + +# timed -- +# Configure the operation (and suboperations) to run until the time elapses. +def timed(seconds, op): + if op._group == None: + result = Operation() + result._group = OpList([op]) + result._repeatgroup = 1 + else: + result = op + result._timed = seconds + return result + # Check for a local build that contains the wt utility. First check in # current working directory, then in build_posix and finally in the disttop # directory. This isn't ideal - if a user has multiple builds in a tree we diff --git a/src/third_party/wiredtiger/bench/workgen/workgen.cxx b/src/third_party/wiredtiger/bench/workgen/workgen.cxx index 9bfa29e3136..9b6a2f76020 100644 --- a/src/third_party/wiredtiger/bench/workgen/workgen.cxx +++ b/src/third_party/wiredtiger/bench/workgen/workgen.cxx @@ -450,7 +450,8 @@ ThreadRunner::ThreadRunner() : _errno(0), _exception(), _thread(NULL), _context(NULL), _icontext(NULL), _workload(NULL), _wrunner(NULL), _rand_state(NULL), _throttle(NULL), _throttle_ops(0), _throttle_limit(0), - _in_transaction(false), _number(0), _stats(false), _table_usage(), + _in_transaction(false), _start_time_us(0), _op_time_us(0), + _number(0), _stats(false), _table_usage(), _cursors(NULL), _stop(false), _session(NULL), _keybuf(NULL), _valuebuf(NULL), _repeat(false) { } @@ -464,6 +465,8 @@ int ThreadRunner::create_all(WT_CONNECTION *conn) { WT_RET(close_all()); ASSERT(_session == NULL); + if (_thread->options.synchronized) + _thread->_op.synchronized_check(); WT_RET(conn->open_session(conn, NULL, NULL, &_session)); _table_usage.clear(); _stats.track_latency(_workload->options.sample_interval_ms > 0); @@ -562,6 +565,11 @@ int ThreadRunner::run() { ThreadOptions *options = &_thread->options; std::string name = options->name; + timespec start_time; + workgen_epoch(&start_time); + _start_time_us = ts_us(start_time); + _op_time_us = _start_time_us; + VERBOSE(*this, "thread " << name << " running"); if (options->throttle != 0) { _throttle = new Throttle(*this, options->throttle, @@ -855,6 +863,7 @@ int ThreadRunner::op_run(Operation *op) { // Never retry on an internal op. retry_op = false; WT_ERR(op->_internal->run(this, _session)); + _op_time_us += op->_internal->sync_time_us(); } } @@ -865,12 +874,28 @@ int ThreadRunner::op_run(Operation *op) { } else if (track != NULL) track->complete(); - if (op->_group != NULL) - VERBOSE(*this, "GROUP operation " << op->_repeatgroup << " times"); - for (int count = 0; !_stop && count < op->_repeatgroup; count++) - for (std::vector::iterator i = op->_group->begin(); - i != op->_group->end(); i++) - WT_ERR(op_run(&*i)); + if (op->_group != NULL) { + uint64_t endtime = 0; + timespec now; + + if (op->_timed != 0.0) + endtime = _op_time_us + secs_us(op->_timed); + + VERBOSE(*this, "GROUP operation " << op->_timed << " secs, " + << op->_repeatgroup << "times"); + + do { + for (int count = 0; !_stop && count < op->_repeatgroup; count++) { + for (std::vector::iterator i = op->_group->begin(); + i != op->_group->end(); i++) + WT_ERR(op_run(&*i)); + } + workgen_epoch(&now); + } while (!_stop && ts_us(now) < endtime); + + if (op->_timed != 0.0) + _op_time_us = endtime; + } err: if (own_cursor) WT_TRET(cursor->close(cursor)); @@ -995,7 +1020,7 @@ int Throttle::throttle(uint64_t op_count, uint64_t *op_limit) { } ThreadOptions::ThreadOptions() : name(), throttle(0.0), throttle_burst(1.0), - _options() { + synchronized(false), _options() { _options.add_string("name", name, "name of the thread"); _options.add_double("throttle", throttle, "Limit to this number of operations per second"); @@ -1005,7 +1030,8 @@ ThreadOptions::ThreadOptions() : name(), throttle(0.0), throttle_burst(1.0), } ThreadOptions::ThreadOptions(const ThreadOptions &other) : name(other.name), throttle(other.throttle), - throttle_burst(other.throttle_burst), _options(other._options) {} + throttle_burst(other.throttle_burst), synchronized(other.synchronized), + _options(other._options) {} ThreadOptions::~ThreadOptions() {} void @@ -1051,27 +1077,27 @@ void Thread::describe(std::ostream &os) const { Operation::Operation() : _optype(OP_NONE), _internal(NULL), _table(), _key(), _value(), _config(), - _transaction(NULL), _group(NULL), _repeatgroup(0) { + _transaction(NULL), _group(NULL), _repeatgroup(0), _timed(0.0) { init_internal(NULL); } Operation::Operation(OpType optype, Table table, Key key, Value value) : _optype(optype), _internal(NULL), _table(table), _key(key), _value(value), - _config(), _transaction(NULL), _group(NULL), _repeatgroup(0) { + _config(), _transaction(NULL), _group(NULL), _repeatgroup(0), _timed(0.0) { init_internal(NULL); size_check(); } Operation::Operation(OpType optype, Table table, Key key) : _optype(optype), _internal(NULL), _table(table), _key(key), _value(), - _config(), _transaction(NULL), _group(NULL), _repeatgroup(0) { + _config(), _transaction(NULL), _group(NULL), _repeatgroup(0), _timed(0.0) { init_internal(NULL); size_check(); } Operation::Operation(OpType optype, Table table) : _optype(optype), _internal(NULL), _table(table), _key(), _value(), - _config(), _transaction(NULL), _group(NULL), _repeatgroup(0) { + _config(), _transaction(NULL), _group(NULL), _repeatgroup(0), _timed(0.0) { init_internal(NULL); size_check(); } @@ -1080,7 +1106,7 @@ Operation::Operation(const Operation &other) : _optype(other._optype), _internal(NULL), _table(other._table), _key(other._key), _value(other._value), _config(other._config), _transaction(other._transaction), _group(other._group), - _repeatgroup(other._repeatgroup) { + _repeatgroup(other._repeatgroup), _timed(other._timed) { // Creation and destruction of _group and _transaction is managed // by Python. init_internal(other._internal); @@ -1088,7 +1114,8 @@ Operation::Operation(const Operation &other) : Operation::Operation(OpType optype, const char *config) : _optype(optype), _internal(NULL), _table(), _key(), _value(), - _config(config), _transaction(NULL), _group(NULL), _repeatgroup(0) { + _config(config), _transaction(NULL), _group(NULL), _repeatgroup(0), + _timed(0.0) { init_internal(NULL); } @@ -1105,6 +1132,7 @@ Operation& Operation::operator=(const Operation &other) { _transaction = other._transaction; _group = other._group; _repeatgroup = other._repeatgroup; + _timed = other._timed; delete _internal; _internal = NULL; init_internal(other._internal); @@ -1154,6 +1182,11 @@ void Operation::init_internal(OperationInternal *other) { } } +bool Operation::combinable() const { + return (_group != NULL && _repeatgroup == 1 && _timed == 0.0 && + _transaction == NULL && _config == ""); +} + void Operation::create_all() { size_check(); @@ -1169,14 +1202,19 @@ void Operation::describe(std::ostream &os) const { os << ", "; _value.describe(os); } if (!_config.empty()) - os << ", '" << _config; + os << ", '" << _config << "'"; if (_transaction != NULL) { os << ", ["; _transaction->describe(os); os << "]"; } + if (_timed != 0.0) + os << ", [timed " << _timed << " secs]"; if (_group != NULL) { - os << ", group[" << _repeatgroup << "]: {"; + os << ", group"; + if (_repeatgroup != 1) + os << "[repeat " << _repeatgroup << "]"; + os << ": {"; bool first = true; for (std::vector::const_iterator i = _group->begin(); i != _group->end(); i++) { @@ -1331,6 +1369,19 @@ void Operation::size_check() const { } } +void Operation::synchronized_check() const { + if (_timed != 0.0) + return; + if (_optype != Operation::OP_NONE) { + if (is_table_op() || _internal->sync_time_us() == 0) + THROW("operation cannot be synchronized, needs to be timed()"); + } else if (_group != NULL) { + for (std::vector::iterator i = _group->begin(); + i != _group->end(); i++) + i->synchronized_check(); + } +} + int CheckpointOperationInternal::run(ThreadRunner *runner, WT_SESSION *session) { (void)runner; /* not used */ @@ -1357,12 +1408,40 @@ void SleepOperationInternal::parse_config(const std::string &config) int SleepOperationInternal::run(ThreadRunner *runner, WT_SESSION *session) { + uint64_t endtime; + timespec now; + uint64_t now_us; + (void)runner; /* not used */ (void)session; /* not used */ - sleep(_sleepvalue); + + workgen_epoch(&now); + now_us = ts_us(now); + if (runner->_thread->options.synchronized) + endtime = runner->_op_time_us + secs_us(_sleepvalue); + else + endtime = now_us + secs_us(_sleepvalue); + + // Sleep for up to a second at a time, so we'll break out if + // we should stop. + while (!runner->_stop && now_us < endtime) { + uint64_t sleep_us = endtime - now_us; + if (sleep_us >= WT_MILLION) // one second + sleep(1); + else + usleep(sleep_us); + + workgen_epoch(&now); + now_us = ts_us(now); + } return (0); } +uint64_t SleepOperationInternal::sync_time_us() const +{ + return (secs_us(_sleepvalue)); +} + void TableOperationInternal::parse_config(const std::string &config) { if (!config.empty()) { diff --git a/src/third_party/wiredtiger/bench/workgen/workgen.h b/src/third_party/wiredtiger/bench/workgen/workgen.h index d29b42f17d7..c6c739d54cf 100644 --- a/src/third_party/wiredtiger/bench/workgen/workgen.h +++ b/src/third_party/wiredtiger/bench/workgen/workgen.h @@ -295,6 +295,7 @@ struct Operation { Transaction *_transaction; std::vector *_group; int _repeatgroup; + double _timed; Operation(); Operation(OpType optype, Table table, Key key, Value value); @@ -305,6 +306,9 @@ struct Operation { Operation(const Operation &other); ~Operation(); + // Check if adding (via Python '+') another operation to this one is + // as easy as appending the new operation to the _group. + bool combinable() const; void describe(std::ostream &os) const; #ifndef SWIG Operation& operator=(const Operation &other); @@ -317,6 +321,7 @@ struct Operation { uint64_t n, char *result) const; void kv_size_buffer(bool iskey, size_t &size) const; void size_check() const; + void synchronized_check() const; #endif }; @@ -327,6 +332,7 @@ struct ThreadOptions { std::string name; double throttle; double throttle_burst; + bool synchronized; ThreadOptions(); ThreadOptions(const ThreadOptions &other); @@ -334,6 +340,8 @@ struct ThreadOptions { void describe(std::ostream &os) const { os << "throttle " << throttle; + os << ", throttle_burst " << throttle_burst; + os << ", synchronized " << synchronized; } std::string help() const { return _options.help(); } diff --git a/src/third_party/wiredtiger/bench/workgen/workgen.swig b/src/third_party/wiredtiger/bench/workgen/workgen.swig index 05866c1b3e2..5a0d03f6c69 100644 --- a/src/third_party/wiredtiger/bench/workgen/workgen.swig +++ b/src/third_party/wiredtiger/bench/workgen/workgen.swig @@ -158,7 +158,7 @@ WorkgenFrozenClass(WorkloadOptions) def __add__(self, other): if not isinstance(other, Operation): raise Exception('Operation.__sum__ requires an Operation') - if self._group == None or self._repeatgroup != 1 or self._transaction != None or self._config != '': + if not self.combinable(): op = Operation() op._group = OpList([self, other]) op._repeatgroup = 1 diff --git a/src/third_party/wiredtiger/bench/workgen/workgen_int.h b/src/third_party/wiredtiger/bench/workgen/workgen_int.h index 50eeada0e30..e4cc948c7b9 100644 --- a/src/third_party/wiredtiger/bench/workgen/workgen_int.h +++ b/src/third_party/wiredtiger/bench/workgen/workgen_int.h @@ -101,6 +101,8 @@ struct ThreadRunner { Throttle *_throttle; uint64_t _throttle_ops; uint64_t _throttle_limit; + uint64_t _start_time_us; + uint64_t _op_time_us; // time that current operation starts bool _in_transaction; uint32_t _number; Stats _stats; @@ -188,6 +190,7 @@ struct OperationInternal { virtual void parse_config(const std::string &config) { (void)config; } virtual int run(ThreadRunner *runner, WT_SESSION *session) { (void)runner; (void)session; return (0); } + virtual uint64_t sync_time_us() const { return (0); } }; struct CheckpointOperationInternal : OperationInternal { @@ -227,6 +230,7 @@ struct SleepOperationInternal : OperationInternal { OperationInternal(other),_sleepvalue(other._sleepvalue) {} virtual void parse_config(const std::string &config); virtual int run(ThreadRunner *runner, WT_SESSION *session); + virtual uint64_t sync_time_us() const; }; struct TableInternal { diff --git a/src/third_party/wiredtiger/bench/workgen/workgen_time.h b/src/third_party/wiredtiger/bench/workgen/workgen_time.h index 9e045087d3a..187a452cb86 100644 --- a/src/third_party/wiredtiger/bench/workgen/workgen_time.h +++ b/src/third_party/wiredtiger/bench/workgen/workgen_time.h @@ -199,3 +199,9 @@ ts_us(const timespec &ts) { return (ns_to_us(ts.tv_nsec) + sec_to_us(ts.tv_sec)); } + +inline uint64_t +secs_us(double secs) +{ + return (secs * USEC_PER_SEC); +} diff --git a/src/third_party/wiredtiger/bench/workgen/wtperf.py b/src/third_party/wiredtiger/bench/workgen/wtperf.py index e4ce0393276..24e7dd8f733 100755 --- a/src/third_party/wiredtiger/bench/workgen/wtperf.py +++ b/src/third_party/wiredtiger/bench/workgen/wtperf.py @@ -393,7 +393,7 @@ class Translator: def translate_table_create(self): opts = self.options s = '' - s += 'wtperf_table_config = "key_format=S,value_format=S,type=lsm," +\\\n' + s += 'wtperf_table_config = "key_format=S,value_format=S," +\\\n' s += ' "exclusive=true,allocation_size=4kb," +\\\n' s += ' "internal_page_max=64kb,leaf_page_max=4kb,split_pct=100,"\n' if opts.compression != '': diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 4db94e19cf3..d585557d968 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -162,12 +162,16 @@ file_runtime_config = common_runtime_config + [ if mixed update use is allowed. If 'key_consistent' is set then all updates to a specific key must be the same with respect to timestamp usage or not.''', - choices=['always','key_consistent', 'never','none']), + choices=['always', 'key_consistent', 'never', 'none']), + Config('durable_timestamp', 'none', r''' + verify that durable timestamps should 'always' or 'never' be used + on modifications with this table.''', + choices=['always', 'key_consistent', 'never', 'none']), Config('read_timestamp', 'none', r''' verify that timestamps should 'always' or 'never' be used on reads with this table. Verification is 'none' if mixed read use is allowed.''', - choices=['always','never','none']) + choices=['always', 'never', 'none']) ], undoc=True), Config('cache_resident', 'false', r''' do not ever evict the object's pages from cache. Not compatible with @@ -620,6 +624,7 @@ connection_runtime_config = [ 'checkpoint', 'checkpoint_progress', 'compact', + 'compact_progress', 'error_returns', 'evict', 'evict_stuck', diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 3db38f0ea18..1d49ea15866 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -1257,6 +1257,7 @@ th tid timedwait timestamp +timestamped timestamps tmp todo diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 85fe66ac667..0842ac032b3 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "37e1570f8205f3e2d32a7abef6d0e8b09421f3ad", + "commit": "afdead1093b5c5b41dd54ddddf6f42d92bef1666", "github": "wiredtiger/wiredtiger.git", "vendor": "wiredtiger", "branch": "mongodb-4.2" diff --git a/src/third_party/wiredtiger/src/block/block_compact.c b/src/third_party/wiredtiger/src/block/block_compact.c index 85fc7303bc2..c6d02e5b514 100644 --- a/src/third_party/wiredtiger/src/block/block_compact.c +++ b/src/third_party/wiredtiger/src/block/block_compact.c @@ -178,7 +178,8 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session, } __wt_spin_unlock(session, &block->live_lock); - if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT)) { + if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT) || + WT_VERBOSE_ISSET(session, WT_VERB_COMPACT_PROGRESS)) { ++block->compact_pages_reviewed; if (*skipp) ++block->compact_pages_skipped; diff --git a/src/third_party/wiredtiger/src/btree/bt_compact.c b/src/third_party/wiredtiger/src/btree/bt_compact.c index 37ee36634ff..e358e993ec4 100644 --- a/src/third_party/wiredtiger/src/btree/bt_compact.c +++ b/src/third_party/wiredtiger/src/btree/bt_compact.c @@ -103,6 +103,39 @@ __compact_rewrite_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) return (ret); } +/* + * __compact_progress -- + * Output a compact progress message. + */ +static void +__compact_progress(WT_SESSION_IMPL *session) +{ + struct timespec cur_time; + WT_BM *bm; + uint64_t time_diff; + + if (!WT_VERBOSE_ISSET(session, WT_VERB_COMPACT_PROGRESS)) + return; + + bm = S2BT(session)->bm; + __wt_epoch(session, &cur_time); + + /* Log one progress message every twenty seconds. */ + time_diff = WT_TIMEDIFF_SEC(cur_time, session->compact->begin); + if (time_diff / WT_PROGRESS_MSG_PERIOD > + session->compact->prog_msg_count) { + __wt_verbose(session, + WT_VERB_COMPACT_PROGRESS, "Compact running" + " for %" PRIu64 " seconds; reviewed %" + PRIu64 " pages, skipped %" PRIu64 " pages," + " wrote %" PRIu64 " pages", time_diff, + bm->block->compact_pages_reviewed, + bm->block->compact_pages_skipped, + bm->block->compact_pages_written); + session->compact->prog_msg_count++; + } +} + /* * __wt_compact -- * Compact a file. @@ -137,6 +170,7 @@ __wt_compact(WT_SESSION_IMPL *session) * Quit if eviction is stuck, we're making the problem worse. */ if (++i > 100) { + __compact_progress(session); WT_ERR(__wt_session_compact_check_timeout(session)); if (__wt_cache_stuck(session)) diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c index d12548b008e..f504bdeddf4 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curnext.c +++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c @@ -13,13 +13,17 @@ * Return the next entry on the append list. */ static inline int -__cursor_fix_append_next(WT_CURSOR_BTREE *cbt, bool newpage) +__cursor_fix_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) { WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; + /* If restarting after a prepare conflict, jump to the right spot. */ + if (restart) + goto restart_read; + if (newpage) { if ((cbt->ins = WT_SKIP_FIRST(cbt->ins_head)) == NULL) return (WT_NOTFOUND); @@ -58,7 +62,7 @@ __cursor_fix_append_next(WT_CURSOR_BTREE *cbt, bool newpage) cbt->v = 0; cbt->iface.value.data = &cbt->v; } else { - WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); +restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); if (upd == NULL) { cbt->v = 0; cbt->iface.value.data = &cbt->v; @@ -74,7 +78,7 @@ __cursor_fix_append_next(WT_CURSOR_BTREE *cbt, bool newpage) * Move to the next, fixed-length column-store item. */ static inline int -__cursor_fix_next(WT_CURSOR_BTREE *cbt, bool newpage) +__cursor_fix_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) { WT_BTREE *btree; WT_PAGE *page; @@ -86,6 +90,10 @@ __cursor_fix_next(WT_CURSOR_BTREE *cbt, bool newpage) page = cbt->ref->page; upd = NULL; + /* If restarting after a prepare conflict, jump to the right spot. */ + if (restart) + goto restart_read; + /* Initialize for each new page. */ if (newpage) { cbt->last_standard_recno = __col_fix_last_recno(cbt->ref); @@ -108,7 +116,7 @@ new_page: if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins)) cbt->ins = NULL; if (cbt->ins != NULL) - WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); +restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); if (upd == NULL) { cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt); cbt->iface.value.data = &cbt->v; @@ -123,13 +131,17 @@ new_page: * Return the next variable-length entry on the append list. */ static inline int -__cursor_var_append_next(WT_CURSOR_BTREE *cbt, bool newpage) +__cursor_var_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) { WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; + /* If restarting after a prepare conflict, jump to the right spot. */ + if (restart) + goto restart_read; + if (newpage) { cbt->ins = WT_SKIP_FIRST(cbt->ins_head); goto new_page; @@ -141,7 +153,7 @@ new_page: if (cbt->ins == NULL) return (WT_NOTFOUND); __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); - WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); +restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); if (upd == NULL) continue; if (upd->type == WT_UPDATE_TOMBSTONE) { @@ -160,7 +172,7 @@ new_page: if (cbt->ins == NULL) * Move to the next, variable-length column-store item. */ static inline int -__cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage) +__cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) { WT_CELL *cell; WT_CELL_UNPACK unpack; @@ -176,6 +188,10 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage) rle_start = 0; /* -Werror=maybe-uninitialized */ + /* If restarting after a prepare conflict, jump to the right spot. */ + if (restart) + goto restart_read; + /* Initialize for each new page. */ if (newpage) { /* @@ -197,7 +213,8 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->recno + 1); -new_page: /* Find the matching WT_COL slot. */ +new_page: +restart_read: /* Find the matching WT_COL slot. */ if ((cip = __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL) return (WT_NOTFOUND); @@ -282,7 +299,7 @@ new_page: /* Find the matching WT_COL slot. */ * Move to the next row-store item. */ static inline int -__cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage) +__cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) { WT_INSERT *ins; WT_ITEM *key; @@ -295,6 +312,15 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage) page = cbt->ref->page; key = &cbt->iface.key; + /* If restarting after a prepare conflict, jump to the right spot. */ + if (restart) { + if (cbt->iter_retry == WT_CBT_RETRY_INSERT) + goto restart_read_insert; + if (cbt->iter_retry == WT_CBT_RETRY_PAGE) + goto restart_read_page; + } + cbt->iter_retry = WT_CBT_RETRY_NOTSET; + /* * For row-store pages, we need a single item that tells us the part * of the page we're walking (otherwise switching from next to prev @@ -329,7 +355,10 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage) if (cbt->ins != NULL) cbt->ins = WT_SKIP_NEXT(cbt->ins); -new_insert: if ((ins = cbt->ins) != NULL) { +new_insert: + cbt->iter_retry = WT_CBT_RETRY_INSERT; +restart_read_insert: + if ((ins = cbt->ins) != NULL) { WT_RET(__wt_txn_read(session, ins->upd, &upd)); if (upd == NULL) continue; @@ -362,7 +391,9 @@ new_insert: if ((ins = cbt->ins) != NULL) { cbt->ins_head = NULL; cbt->ins = NULL; + cbt->iter_retry = WT_CBT_RETRY_PAGE; cbt->slot = cbt->row_iteration_slot / 2 - 1; +restart_read_page: rip = &page->pg_row[cbt->slot]; WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd)); if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) { @@ -593,7 +624,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; - bool newpage, visible; + bool newpage, restart; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cbt->iface.session; @@ -601,23 +632,11 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) WT_STAT_CONN_INCR(session, cursor_next); WT_STAT_DATA_INCR(session, cursor_next); - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + flags = WT_READ_NO_SPLIT | WT_READ_SKIP_INTL; /* tree walk flags */ + if (truncating) + LF_SET(WT_READ_TRUNCATE); - /* - * If this cursor has returned prepare conflict earlier, check to see - * whether that prepared update is resolved or not. If not resolved, - * continue returning prepare conflict. If resolved, return the value - * based on the visibility rules. - */ - if (F_ISSET(cbt, WT_CBT_ITERATE_RETRY_NEXT)) { - WT_ERR(__cursor_check_prepared_update(cbt, &visible)); - if (visible) { -#ifdef HAVE_DIAGNOSTIC - WT_ERR(__wt_cursor_key_order_check(session, cbt, true)); -#endif - return (0); - } - } + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); WT_ERR(__cursor_func_init(cbt, false)); @@ -633,19 +652,20 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) * found. Then, move to the next page, until we reach the end of the * file. */ - flags = WT_READ_NO_SPLIT | WT_READ_SKIP_INTL; /* tree walk flags */ - if (truncating) - LF_SET(WT_READ_TRUNCATE); - for (newpage = false;; newpage = true) { + restart = F_ISSET(cbt, WT_CBT_ITERATE_RETRY_NEXT); + F_CLR(cbt, WT_CBT_ITERATE_RETRY_NEXT); + for (newpage = false;; newpage = true, restart = false) { page = cbt->ref == NULL ? NULL : cbt->ref->page; if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) { switch (page->type) { case WT_PAGE_COL_FIX: - ret = __cursor_fix_append_next(cbt, newpage); + ret = __cursor_fix_append_next( + cbt, newpage, restart); break; case WT_PAGE_COL_VAR: - ret = __cursor_var_append_next(cbt, newpage); + ret = __cursor_var_append_next( + cbt, newpage, restart); break; WT_ILLEGAL_VALUE_ERR(session, page->type); } @@ -657,13 +677,13 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) } else if (page != NULL) { switch (page->type) { case WT_PAGE_COL_FIX: - ret = __cursor_fix_next(cbt, newpage); + ret = __cursor_fix_next(cbt, newpage, restart); break; case WT_PAGE_COL_VAR: - ret = __cursor_var_next(cbt, newpage); + ret = __cursor_var_next(cbt, newpage, restart); break; case WT_PAGE_ROW_LEAF: - ret = __cursor_row_next(cbt, newpage); + ret = __cursor_row_next(cbt, newpage, restart); break; WT_ILLEGAL_VALUE_ERR(session, page->type); } @@ -712,7 +732,7 @@ err: switch (ret) { * at a prepared update, hence current key returned could be * same as earlier returned key. * - * eg: Initial data set : {1,2,3,...10) + * eg: Initial data set : (1,2,3,...10) * insert key 11 in a prepare transaction. * loop on next will return 1,2,3...10 and subsequent call to * next will return a prepare conflict. Now if we call prev diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c index 32310b8a341..22effc47553 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curprev.c +++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c @@ -125,13 +125,17 @@ restart: * Return the previous fixed-length entry on the append list. */ static inline int -__cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage) +__cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) { WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; + /* If restarting after a prepare conflict, jump to the right spot. */ + if (restart) + goto restart_read; + if (newpage) { if ((cbt->ins = WT_SKIP_LAST(cbt->ins_head)) == NULL) return (WT_NOTFOUND); @@ -204,7 +208,7 @@ __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage) cbt->iface.value.data = &cbt->v; } else { upd = NULL; - WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); +restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); if (upd == NULL) { cbt->v = 0; cbt->iface.value.data = &cbt->v; @@ -220,7 +224,7 @@ __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage) * Move to the previous, fixed-length column-store item. */ static inline int -__cursor_fix_prev(WT_CURSOR_BTREE *cbt, bool newpage) +__cursor_fix_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) { WT_BTREE *btree; WT_PAGE *page; @@ -231,6 +235,10 @@ __cursor_fix_prev(WT_CURSOR_BTREE *cbt, bool newpage) page = cbt->ref->page; btree = S2BT(session); + /* If restarting after a prepare conflict, jump to the right spot. */ + if (restart) + goto restart_read; + /* Initialize for each new page. */ if (newpage) { cbt->last_standard_recno = __col_fix_last_recno(cbt->ref); @@ -254,7 +262,7 @@ new_page: cbt->ins = NULL; upd = NULL; if (cbt->ins != NULL) - WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); +restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); if (upd == NULL) { cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt); cbt->iface.value.data = &cbt->v; @@ -269,13 +277,17 @@ new_page: * Return the previous variable-length entry on the append list. */ static inline int -__cursor_var_append_prev(WT_CURSOR_BTREE *cbt, bool newpage) +__cursor_var_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) { WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; + /* If restarting after a prepare conflict, jump to the right spot. */ + if (restart) + goto restart_read; + if (newpage) { cbt->ins = WT_SKIP_LAST(cbt->ins_head); goto new_page; @@ -287,7 +299,7 @@ new_page: if (cbt->ins == NULL) return (WT_NOTFOUND); __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); - WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); +restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); if (upd == NULL) continue; if (upd->type == WT_UPDATE_TOMBSTONE) { @@ -306,7 +318,7 @@ new_page: if (cbt->ins == NULL) * Move to the previous, variable-length column-store item. */ static inline int -__cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage) +__cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) { WT_CELL *cell; WT_CELL_UNPACK unpack; @@ -322,6 +334,10 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage) rle_start = 0; /* -Werror=maybe-uninitialized */ + /* If restarting after a prepare conflict, jump to the right spot. */ + if (restart) + goto restart_read; + /* Initialize for each new page. */ if (newpage) { /* @@ -344,7 +360,7 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage) new_page: if (cbt->recno < cbt->ref->ref_recno) return (WT_NOTFOUND); - /* Find the matching WT_COL slot. */ +restart_read: /* Find the matching WT_COL slot. */ if ((cip = __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL) return (WT_NOTFOUND); @@ -428,7 +444,7 @@ new_page: if (cbt->recno < cbt->ref->ref_recno) * Move to the previous row-store item. */ static inline int -__cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage) +__cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) { WT_INSERT *ins; WT_ITEM *key; @@ -441,6 +457,15 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage) page = cbt->ref->page; key = &cbt->iface.key; + /* If restarting after a prepare conflict, jump to the right spot. */ + if (restart) { + if (cbt->iter_retry == WT_CBT_RETRY_INSERT) + goto restart_read_insert; + if (cbt->iter_retry == WT_CBT_RETRY_PAGE) + goto restart_read_page; + } + cbt->iter_retry = WT_CBT_RETRY_NOTSET; + /* * For row-store pages, we need a single item that tells us the part * of the page we're walking (otherwise switching from next to prev @@ -486,7 +511,10 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage) if (cbt->ins != NULL) WT_RET(__cursor_skip_prev(cbt)); -new_insert: if ((ins = cbt->ins) != NULL) { +new_insert: + cbt->iter_retry = WT_CBT_RETRY_INSERT; +restart_read_insert: + if ((ins = cbt->ins) != NULL) { WT_RET(__wt_txn_read(session, ins->upd, &upd)); if (upd == NULL) continue; @@ -521,7 +549,9 @@ new_insert: if ((ins = cbt->ins) != NULL) { cbt->ins_head = NULL; cbt->ins = NULL; + cbt->iter_retry = WT_CBT_RETRY_PAGE; cbt->slot = cbt->row_iteration_slot / 2 - 1; +restart_read_page: rip = &page->pg_row[cbt->slot]; WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd)); if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) { @@ -547,7 +577,7 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; - bool newpage, visible; + bool newpage, restart; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cbt->iface.session; @@ -555,24 +585,12 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) WT_STAT_CONN_INCR(session, cursor_prev); WT_STAT_DATA_INCR(session, cursor_prev); - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + flags = /* tree walk flags */ + WT_READ_NO_SPLIT | WT_READ_PREV | WT_READ_SKIP_INTL; + if (truncating) + LF_SET(WT_READ_TRUNCATE); - /* - * If this cursor has returned prepare conflict earlier, check to see - * whether that prepared update is resolved or not. If not resolved, - * continue returning prepare conflict. If resolved, return the value - * based on the visibility rules. - */ - if (F_ISSET(cbt, WT_CBT_ITERATE_RETRY_PREV)) { - WT_ERR(__cursor_check_prepared_update(cbt, &visible)); - if (visible) { -#ifdef HAVE_DIAGNOSTIC - WT_ERR( - __wt_cursor_key_order_check(session, cbt, false)); -#endif - return (0); - } - } + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); WT_ERR(__cursor_func_init(cbt, false)); @@ -588,11 +606,9 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) * found. Then, move to the previous page, until we reach the start * of the file. */ - flags = /* tree walk flags */ - WT_READ_NO_SPLIT | WT_READ_PREV | WT_READ_SKIP_INTL; - if (truncating) - LF_SET(WT_READ_TRUNCATE); - for (newpage = false;; newpage = true) { + restart = F_ISSET(cbt, WT_CBT_ITERATE_RETRY_PREV); + F_CLR(cbt, WT_CBT_ITERATE_RETRY_PREV); + for (newpage = false;; newpage = true, restart = false) { page = cbt->ref == NULL ? NULL : cbt->ref->page; /* @@ -607,10 +623,12 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) { switch (page->type) { case WT_PAGE_COL_FIX: - ret = __cursor_fix_append_prev(cbt, newpage); + ret = __cursor_fix_append_prev( + cbt, newpage, restart); break; case WT_PAGE_COL_VAR: - ret = __cursor_var_append_prev(cbt, newpage); + ret = __cursor_var_append_prev( + cbt, newpage, restart); break; WT_ILLEGAL_VALUE_ERR(session, page->type); } @@ -624,13 +642,13 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) if (page != NULL) { switch (page->type) { case WT_PAGE_COL_FIX: - ret = __cursor_fix_prev(cbt, newpage); + ret = __cursor_fix_prev(cbt, newpage, restart); break; case WT_PAGE_COL_VAR: - ret = __cursor_var_prev(cbt, newpage); + ret = __cursor_var_prev(cbt, newpage, restart); break; case WT_PAGE_ROW_LEAF: - ret = __cursor_row_prev(cbt, newpage); + ret = __cursor_row_prev(cbt, newpage, restart); break; WT_ILLEGAL_VALUE_ERR(session, page->type); } @@ -668,7 +686,7 @@ err: switch (ret) { * at a prepared update, hence current key returned could be * same as earlier returned key. * - * eg: Initial data set : {2,3,...10) + * eg: Initial data set : (2,3,...10) * insert key 1 in a prepare transaction. * loop on prev will return 10,...3,2 and subsequent call to * prev will return a prepare conflict. Now if we call next diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index 1ce403dba7f..29c784c4198 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -1169,6 +1169,7 @@ __debug_modify(WT_DBG *ds, WT_UPDATE *upd) static int __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte) { + const char *prepare_state; char ts_string[WT_TS_INT_STRING_SIZE]; for (; upd != NULL; upd = upd->next) { @@ -1200,16 +1201,41 @@ __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte) WT_RET(ds->f(ds, "\tvalue {tombstone}\n")); break; } + if (upd->txnid == WT_TXN_ABORTED) WT_RET(ds->f(ds, "\t" "txn id aborted")); else WT_RET(ds->f(ds, "\t" "txn id %" PRIu64, upd->txnid)); + __wt_timestamp_to_string( upd->start_ts, ts_string, sizeof(ts_string)); WT_RET(ds->f(ds, ", start_ts %s", ts_string)); __wt_timestamp_to_string( upd->stop_ts, ts_string, sizeof(ts_string)); WT_RET(ds->f(ds, ", stop_ts %s", ts_string)); + if (upd->durable_ts != WT_TS_NONE) { + __wt_timestamp_to_string(upd->durable_ts, + ts_string, sizeof(ts_string)); + WT_RET(ds->f(ds, ", durable-ts %s", ts_string)); + } + + prepare_state = NULL; + switch (upd->prepare_state) { + case WT_PREPARE_INIT: + break; + case WT_PREPARE_INPROGRESS: + prepare_state = "in-progress"; + break; + case WT_PREPARE_LOCKED: + prepare_state = "locked"; + break; + case WT_PREPARE_RESOLVED: + prepare_state = "resolved"; + break; + } + if (prepare_state != NULL) + WT_RET(ds->f(ds, ", prepare %s", prepare_state)); + WT_RET(ds->f(ds, "\n")); } return (0); diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index 6d96c2537b3..aa26a627a94 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -437,6 +437,22 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_KEYS); else if (WT_STRING_MATCH("never", cval.str, cval.len)) FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_NEVER); + + /* + * A durable timestamp always implies a commit timestamp. But never + * having a durable timestamp does not imply anything about a commit + * timestamp. + */ + WT_RET(__wt_config_gets(session, + cfg, "assert.durable_timestamp", &cval)); + if (WT_STRING_MATCH("always", cval.str, cval.len)) + FLD_SET(btree->assert_flags, + WT_ASSERT_COMMIT_TS_ALWAYS | WT_ASSERT_DURABLE_TS_ALWAYS); + else if (WT_STRING_MATCH("key_consistent", cval.str, cval.len)) + FLD_SET(btree->assert_flags, WT_ASSERT_DURABLE_TS_KEYS); + else if (WT_STRING_MATCH("never", cval.str, cval.len)) + FLD_SET(btree->assert_flags, WT_ASSERT_DURABLE_TS_NEVER); + WT_RET(__wt_config_gets(session, cfg, "assert.read_timestamp", &cval)); if (WT_STRING_MATCH("always", cval.str, cval.len)) FLD_SET(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS); diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c index 5896852c1bf..82045edea31 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c @@ -8,8 +8,9 @@ #include "wt_internal.h" -static int __err_cell_corrupt(WT_SESSION_IMPL *, uint32_t, const char *); -static int __err_cell_corrupt_or_eof(WT_SESSION_IMPL *, uint32_t, const char *); +static int __err_cell_corrupt(WT_SESSION_IMPL *, int, uint32_t, const char *); +static int __err_cell_corrupt_or_eof( + WT_SESSION_IMPL *, int, uint32_t, const char *); static int __err_cell_type( WT_SESSION_IMPL *, uint32_t, const char *, uint8_t, uint8_t); static int __verify_dsk_chunk( @@ -31,12 +32,19 @@ static int __verify_dsk_row( goto err; \ } while (0) -#define WT_RET_VRFY(session, ...) do { \ - if (!(F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))) \ - __wt_errx(session, __VA_ARGS__); \ - return (WT_ERROR); \ +#define WT_RET_VRFY_RETVAL(session, ret, ...) do { \ + if (!(F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))) { \ + if ((ret) == 0) \ + __wt_errx(session, __VA_ARGS__); \ + else \ + __wt_err(session, ret, __VA_ARGS__); \ + } \ + return ((ret) == 0 ? ret : WT_ERROR); \ } while (0) +#define WT_RET_VRFY(session, ...) \ + WT_RET_VRFY_RETVAL(session, 0, __VA_ARGS__); + /* * WT_CELL_FOREACH_VRFY -- * Iterate through each cell on a page. Verify-specific version of the @@ -394,9 +402,9 @@ __verify_dsk_row(WT_SESSION_IMPL *session, ++cell_num; /* Carefully unpack the cell. */ - if (__wt_cell_unpack_safe( - session, dsk, cell, unpack, end) != 0) { - ret = __err_cell_corrupt(session, cell_num, tag); + ret = __wt_cell_unpack_safe(session, dsk, cell, unpack, end); + if (ret != 0) { + (void)__err_cell_corrupt(session, ret, cell_num, tag); goto err; } @@ -470,8 +478,8 @@ __verify_dsk_row(WT_SESSION_IMPL *session, case WT_CELL_VALUE_OVFL: if ((ret = bm->addr_invalid( bm, session, unpack->data, unpack->size)) == EINVAL) - ret = __err_cell_corrupt_or_eof( - session, cell_num, tag); + (void)__err_cell_corrupt_or_eof( + session, ret, cell_num, tag); WT_ERR(ret); break; } @@ -615,8 +623,7 @@ key_compare: /* __wt_page_type_string(dsk->type), tag, key_cnt, dsk->u.entries); if (dsk->type == WT_PAGE_ROW_LEAF && - F_ISSET(dsk, WT_PAGE_EMPTY_V_ALL) && - key_cnt != dsk->u.entries) + F_ISSET(dsk, WT_PAGE_EMPTY_V_ALL) && key_cnt != dsk->u.entries) WT_ERR_VRFY(session, "%s page at %s with the 'all empty values' flag set has a " "key count of %" PRIu32 " and a physical entry count of %" @@ -624,8 +631,7 @@ key_compare: /* __wt_page_type_string(dsk->type), tag, key_cnt, dsk->u.entries); if (dsk->type == WT_PAGE_ROW_LEAF && - F_ISSET(dsk, WT_PAGE_EMPTY_V_NONE) && - key_cnt * 2 != dsk->u.entries) + F_ISSET(dsk, WT_PAGE_EMPTY_V_NONE) && key_cnt * 2 != dsk->u.entries) WT_ERR_VRFY(session, "%s page at %s with the 'no empty values' flag set has a " "key count of %" PRIu32 " and a physical entry count of %" @@ -671,8 +677,10 @@ __verify_dsk_col_int(WT_SESSION_IMPL *session, ++cell_num; /* Carefully unpack the cell. */ - if (__wt_cell_unpack_safe(session, dsk, cell, unpack, end) != 0) - return (__err_cell_corrupt(session, cell_num, tag)); + ret = __wt_cell_unpack_safe(session, dsk, cell, unpack, end); + if (ret != 0) + return ( + __err_cell_corrupt(session, ret, cell_num, tag)); /* Check the raw and collapsed cell types. */ WT_RET(__err_cell_type( @@ -687,8 +695,8 @@ __verify_dsk_col_int(WT_SESSION_IMPL *session, ret = bm->addr_invalid(bm, session, unpack->data, unpack->size); WT_RET_ERROR_OK(ret, EINVAL); if (ret == EINVAL) - return ( - __err_cell_corrupt_or_eof(session, cell_num, tag)); + return (__err_cell_corrupt_or_eof( + session, ret, cell_num, tag)); } WT_RET(__verify_dsk_memsize(session, tag, dsk, cell)); @@ -749,8 +757,10 @@ __verify_dsk_col_var(WT_SESSION_IMPL *session, ++cell_num; /* Carefully unpack the cell. */ - if (__wt_cell_unpack_safe(session, dsk, cell, unpack, end) != 0) - return (__err_cell_corrupt(session, cell_num, tag)); + ret = __wt_cell_unpack_safe(session, dsk, cell, unpack, end); + if (ret != 0) + return (__err_cell_corrupt( + session, ret, cell_num, tag)); /* Check the raw and collapsed cell types. */ WT_RET(__err_cell_type( @@ -769,7 +779,7 @@ __verify_dsk_col_var(WT_SESSION_IMPL *session, WT_RET_ERROR_OK(ret, EINVAL); if (ret == EINVAL) return (__err_cell_corrupt_or_eof( - session, cell_num, tag)); + session, ret, cell_num, tag)); } /* @@ -881,9 +891,9 @@ __verify_dsk_chunk(WT_SESSION_IMPL *session, */ static int __err_cell_corrupt( - WT_SESSION_IMPL *session, uint32_t entry_num, const char *tag) + WT_SESSION_IMPL *session, int retval, uint32_t entry_num, const char *tag) { - WT_RET_VRFY(session, + WT_RET_VRFY_RETVAL(session, retval, "item %" PRIu32 " on page at %s is a corrupted cell", entry_num, tag); } @@ -894,9 +904,9 @@ __err_cell_corrupt( */ static int __err_cell_corrupt_or_eof( - WT_SESSION_IMPL *session, uint32_t entry_num, const char *tag) + WT_SESSION_IMPL *session, int retval, uint32_t entry_num, const char *tag) { - WT_RET_VRFY(session, + WT_RET_VRFY_RETVAL(session, retval, "item %" PRIu32 " on page at %s is a corrupted cell or references " "non-existent file pages", entry_num, tag); diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 90b1dc023ec..1b2f6ce5ed6 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -206,14 +206,14 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { NULL, 0 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\"," - "\"checkpoint_progress\",\"compact\",\"error_returns\",\"evict\"," - "\"evict_stuck\",\"evictserver\",\"fileops\",\"handleops\"," - "\"log\",\"lookaside\",\"lookaside_activity\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," - "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"checkpoint_progress\",\"compact\",\"compact_progress\"," + "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\"," + "\"fileops\",\"handleops\",\"log\",\"lookaside\"," + "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"timestamp\"," + "\"transaction\",\"verify\",\"version\",\"write\"]", NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -237,6 +237,10 @@ static const WT_CONFIG_CHECK confchk_assert_subconfigs[] = { NULL, "choices=[\"always\",\"key_consistent\",\"never\"," "\"none\"]", NULL, 0 }, + { "durable_timestamp", "string", + NULL, "choices=[\"always\",\"key_consistent\",\"never\"," + "\"none\"]", + NULL, 0 }, { "read_timestamp", "string", NULL, "choices=[\"always\",\"never\",\"none\"]", NULL, 0 }, @@ -256,7 +260,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_alter[] = { { "app_metadata", "string", NULL, NULL, NULL, 0 }, { "assert", "category", NULL, NULL, - confchk_assert_subconfigs, 2 }, + confchk_assert_subconfigs, 3 }, { "cache_resident", "boolean", NULL, NULL, NULL, 0 }, { "exclusive_refreshed", "boolean", NULL, NULL, NULL, 0 }, { "log", "category", @@ -349,7 +353,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = { { "app_metadata", "string", NULL, NULL, NULL, 0 }, { "assert", "category", NULL, NULL, - confchk_assert_subconfigs, 2 }, + confchk_assert_subconfigs, 3 }, { "block_allocation", "string", NULL, "choices=[\"first\",\"best\"]", NULL, 0 }, @@ -556,7 +560,7 @@ static const WT_CONFIG_CHECK confchk_file_config[] = { { "app_metadata", "string", NULL, NULL, NULL, 0 }, { "assert", "category", NULL, NULL, - confchk_assert_subconfigs, 2 }, + confchk_assert_subconfigs, 3 }, { "block_allocation", "string", NULL, "choices=[\"first\",\"best\"]", NULL, 0 }, @@ -621,7 +625,7 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = { { "app_metadata", "string", NULL, NULL, NULL, 0 }, { "assert", "category", NULL, NULL, - confchk_assert_subconfigs, 2 }, + confchk_assert_subconfigs, 3 }, { "block_allocation", "string", NULL, "choices=[\"first\",\"best\"]", NULL, 0 }, @@ -706,7 +710,7 @@ static const WT_CONFIG_CHECK confchk_lsm_meta[] = { { "app_metadata", "string", NULL, NULL, NULL, 0 }, { "assert", "category", NULL, NULL, - confchk_assert_subconfigs, 2 }, + confchk_assert_subconfigs, 3 }, { "block_allocation", "string", NULL, "choices=[\"first\",\"best\"]", NULL, 0 }, @@ -927,14 +931,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { { "use_environment_priv", "boolean", NULL, NULL, NULL, 0 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\"," - "\"checkpoint_progress\",\"compact\",\"error_returns\",\"evict\"," - "\"evict_stuck\",\"evictserver\",\"fileops\",\"handleops\"," - "\"log\",\"lookaside\",\"lookaside_activity\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," - "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"checkpoint_progress\",\"compact\",\"compact_progress\"," + "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\"," + "\"fileops\",\"handleops\",\"log\",\"lookaside\"," + "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"timestamp\"," + "\"transaction\",\"verify\",\"version\",\"write\"]", NULL, 0 }, { "write_through", "list", NULL, "choices=[\"data\",\"log\"]", @@ -1036,14 +1040,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { { "use_environment_priv", "boolean", NULL, NULL, NULL, 0 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\"," - "\"checkpoint_progress\",\"compact\",\"error_returns\",\"evict\"," - "\"evict_stuck\",\"evictserver\",\"fileops\",\"handleops\"," - "\"log\",\"lookaside\",\"lookaside_activity\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," - "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"checkpoint_progress\",\"compact\",\"compact_progress\"," + "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\"," + "\"fileops\",\"handleops\",\"log\",\"lookaside\"," + "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"timestamp\"," + "\"transaction\",\"verify\",\"version\",\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { "write_through", "list", @@ -1140,14 +1144,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { confchk_wiredtiger_open_transaction_sync_subconfigs, 2 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\"," - "\"checkpoint_progress\",\"compact\",\"error_returns\",\"evict\"," - "\"evict_stuck\",\"evictserver\",\"fileops\",\"handleops\"," - "\"log\",\"lookaside\",\"lookaside_activity\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," - "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"checkpoint_progress\",\"compact\",\"compact_progress\"," + "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\"," + "\"fileops\",\"handleops\",\"log\",\"lookaside\"," + "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"timestamp\"," + "\"transaction\",\"verify\",\"version\",\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { "write_through", "list", @@ -1244,14 +1248,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { confchk_wiredtiger_open_transaction_sync_subconfigs, 2 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\"," - "\"checkpoint_progress\",\"compact\",\"error_returns\",\"evict\"," - "\"evict_stuck\",\"evictserver\",\"fileops\",\"handleops\"," - "\"log\",\"lookaside\",\"lookaside_activity\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," - "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"checkpoint_progress\",\"compact\",\"compact_progress\"," + "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\"," + "\"fileops\",\"handleops\",\"log\",\"lookaside\"," + "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"timestamp\"," + "\"transaction\",\"verify\",\"version\",\"write\"]", NULL, 0 }, { "write_through", "list", NULL, "choices=[\"data\",\"log\"]", @@ -1348,9 +1352,10 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "WT_SESSION.alter", "access_pattern_hint=none,app_metadata=," - "assert=(commit_timestamp=none,read_timestamp=none)," - "cache_resident=false,exclusive_refreshed=true,log=(enabled=true)" - ",os_cache_dirty_max=0,os_cache_max=0", + "assert=(commit_timestamp=none,durable_timestamp=none," + "read_timestamp=none),cache_resident=false," + "exclusive_refreshed=true,log=(enabled=true),os_cache_dirty_max=0" + ",os_cache_max=0", confchk_WT_SESSION_alter, 8 }, { "WT_SESSION.begin_transaction", @@ -1376,11 +1381,11 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "WT_SESSION.create", "access_pattern_hint=none,allocation_size=4KB,app_metadata=," - "assert=(commit_timestamp=none,read_timestamp=none)," - "block_allocation=best,block_compressor=,cache_resident=false," - "checksum=uncompressed,colgroups=,collator=,columns=,dictionary=0" - ",encryption=(keyid=,name=),exclusive=false,extractor=," - "format=btree,huffman_key=,huffman_value=," + "assert=(commit_timestamp=none,durable_timestamp=none," + "read_timestamp=none),block_allocation=best,block_compressor=," + "cache_resident=false,checksum=uncompressed,colgroups=,collator=," + "columns=,dictionary=0,encryption=(keyid=,name=),exclusive=false," + "extractor=,format=btree,huffman_key=,huffman_value=," "ignore_in_memory_cache_size=false,immutable=false," "internal_item_max=0,internal_key_max=0," "internal_key_truncate=true,internal_page_max=4KB,key_format=u," @@ -1491,11 +1496,11 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "file.config", "access_pattern_hint=none,allocation_size=4KB,app_metadata=," - "assert=(commit_timestamp=none,read_timestamp=none)," - "block_allocation=best,block_compressor=,cache_resident=false," - "checksum=uncompressed,collator=,columns=,dictionary=0," - "encryption=(keyid=,name=),format=btree,huffman_key=," - "huffman_value=,ignore_in_memory_cache_size=false," + "assert=(commit_timestamp=none,durable_timestamp=none," + "read_timestamp=none),block_allocation=best,block_compressor=," + "cache_resident=false,checksum=uncompressed,collator=,columns=," + "dictionary=0,encryption=(keyid=,name=),format=btree,huffman_key=" + ",huffman_value=,ignore_in_memory_cache_size=false," "internal_item_max=0,internal_key_max=0," "internal_key_truncate=true,internal_page_max=4KB,key_format=u," "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB," @@ -1508,20 +1513,20 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "file.meta", "access_pattern_hint=none,allocation_size=4KB,app_metadata=," - "assert=(commit_timestamp=none,read_timestamp=none)," - "block_allocation=best,block_compressor=,cache_resident=false," - "checkpoint=,checkpoint_lsn=,checksum=uncompressed,collator=," - "columns=,dictionary=0,encryption=(keyid=,name=),format=btree," - "huffman_key=,huffman_value=,id=," - "ignore_in_memory_cache_size=false,internal_item_max=0," - "internal_key_max=0,internal_key_truncate=true," - "internal_page_max=4KB,key_format=u,key_gap=10,leaf_item_max=0," - "leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0," - "log=(enabled=true),memory_page_image_max=0,memory_page_max=5MB," - "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," - "prefix_compression_min=4,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=90,value_format=u," - "version=(major=0,minor=0)", + "assert=(commit_timestamp=none,durable_timestamp=none," + "read_timestamp=none),block_allocation=best,block_compressor=," + "cache_resident=false,checkpoint=,checkpoint_lsn=," + "checksum=uncompressed,collator=,columns=,dictionary=0," + "encryption=(keyid=,name=),format=btree,huffman_key=," + "huffman_value=,id=,ignore_in_memory_cache_size=false," + "internal_item_max=0,internal_key_max=0," + "internal_key_truncate=true,internal_page_max=4KB,key_format=u," + "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB," + "leaf_value_max=0,log=(enabled=true),memory_page_image_max=0," + "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0," + "prefix_compression=false,prefix_compression_min=4," + "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90," + "value_format=u,version=(major=0,minor=0)", confchk_file_meta, 41 }, { "index.meta", @@ -1531,11 +1536,11 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "lsm.meta", "access_pattern_hint=none,allocation_size=4KB,app_metadata=," - "assert=(commit_timestamp=none,read_timestamp=none)," - "block_allocation=best,block_compressor=,cache_resident=false," - "checksum=uncompressed,chunks=,collator=,columns=,dictionary=0," - "encryption=(keyid=,name=),format=btree,huffman_key=," - "huffman_value=,ignore_in_memory_cache_size=false," + "assert=(commit_timestamp=none,durable_timestamp=none," + "read_timestamp=none),block_allocation=best,block_compressor=," + "cache_resident=false,checksum=uncompressed,chunks=,collator=," + "columns=,dictionary=0,encryption=(keyid=,name=),format=btree," + "huffman_key=,huffman_value=,ignore_in_memory_cache_size=false," "internal_item_max=0,internal_key_max=0," "internal_key_truncate=true,internal_page_max=4KB,key_format=u," "key_gap=10,last=,leaf_item_max=0,leaf_key_max=0," diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index 0630bdb3711..392cbad87d3 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -1835,6 +1835,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) { "checkpoint", WT_VERB_CHECKPOINT }, { "checkpoint_progress",WT_VERB_CHECKPOINT_PROGRESS }, { "compact", WT_VERB_COMPACT }, + { "compact_progress", WT_VERB_COMPACT_PROGRESS }, { "error_returns", WT_VERB_ERROR_RETURNS }, { "evict", WT_VERB_EVICT }, { "evict_stuck", WT_VERB_EVICT_STUCK }, diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c index 315a822cc13..04882e527ce 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c @@ -359,18 +359,25 @@ __backup_start(WT_SESSION_IMPL *session, } err: /* Close the hot backup file. */ - if (cb->bfs != NULL) - WT_TRET(__wt_fclose(session, &cb->bfs)); if (srcfs != NULL) WT_TRET(__wt_fclose(session, &srcfs)); + /* + * Sync and rename the temp file into place. + */ + if (ret == 0) + ret = __wt_sync_and_rename(session, + &cb->bfs, WT_BACKUP_TMP, dest); if (ret == 0) { - WT_ASSERT(session, dest != NULL); - WT_TRET(__wt_fs_rename(session, WT_BACKUP_TMP, dest, false)); __wt_writelock(session, &conn->hot_backup_lock); conn->hot_backup_list = cb->list; __wt_writeunlock(session, &conn->hot_backup_lock); F_SET(session, WT_SESSION_BACKUP_CURSOR); } + /* + * If the file hasn't been closed, do it now. + */ + if (cb->bfs != NULL) + WT_TRET(__wt_fclose(session, &cb->bfs)); done: return (ret); diff --git a/src/third_party/wiredtiger/src/cursor/cur_index.c b/src/third_party/wiredtiger/src/cursor/cur_index.c index a5d77df7b47..baabcd0182c 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_index.c +++ b/src/third_party/wiredtiger/src/cursor/cur_index.c @@ -382,7 +382,7 @@ err: if (cindex->child != NULL) WT_TRET(cindex->child->close(cindex->child)); - WT_TRET(__wt_schema_release_table(session, cindex->table)); + WT_TRET(__wt_schema_release_table(session, &cindex->table)); /* The URI is owned by the index. */ cursor->internal_uri = NULL; __wt_cursor_close(cursor); @@ -489,7 +489,7 @@ __wt_curindex_open(WT_SESSION_IMPL *session, if ((ret = __wt_schema_open_index( session, table, idxname, namesize, &idx)) != 0) { - WT_TRET(__wt_schema_release_table(session, table)); + WT_TRET(__wt_schema_release_table(session, &table)); return (ret); } WT_RET(__wt_calloc_one(session, &cindex)); diff --git a/src/third_party/wiredtiger/src/cursor/cur_join.c b/src/third_party/wiredtiger/src/cursor/cur_join.c index a10e6cc1053..1a8f6ab28af 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_join.c +++ b/src/third_party/wiredtiger/src/cursor/cur_join.c @@ -327,7 +327,7 @@ __curjoin_close(WT_CURSOR *cursor) JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL); err: - WT_TRET(__wt_schema_release_table(session, cjoin->table)); + WT_TRET(__wt_schema_release_table(session, &cjoin->table)); /* This is owned by the table */ cursor->key_format = NULL; diff --git a/src/third_party/wiredtiger/src/cursor/cur_stat.c b/src/third_party/wiredtiger/src/cursor/cur_stat.c index 0118f342903..f4aee9a8c90 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_stat.c +++ b/src/third_party/wiredtiger/src/cursor/cur_stat.c @@ -357,6 +357,7 @@ __curstat_conn_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) * Optionally clear the connection statistics. */ __wt_conn_stat_init(session); + __wt_stat_connection_init_single(&cst->u.conn_stats); __wt_stat_connection_aggregate(conn->stats, &cst->u.conn_stats); if (F_ISSET(cst, WT_STAT_CLEAR)) __wt_stat_connection_clear_all(conn->stats); diff --git a/src/third_party/wiredtiger/src/cursor/cur_table.c b/src/third_party/wiredtiger/src/cursor/cur_table.c index 807bd668096..77c6018778c 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_table.c +++ b/src/third_party/wiredtiger/src/cursor/cur_table.c @@ -839,7 +839,7 @@ err: __wt_free(session, ctable->cg_valcopy); __wt_free(session, ctable->idx_cursors); - WT_TRET(__wt_schema_release_table(session, ctable->table)); + WT_TRET(__wt_schema_release_table(session, &ctable->table)); /* The URI is owned by the table. */ cursor->internal_uri = NULL; __wt_cursor_close(cursor); @@ -999,7 +999,7 @@ __wt_curtable_open(WT_SESSION_IMPL *session, ret = __wt_open_cursor(session, table->cgroups[0]->source, NULL, cfg, cursorp); - WT_TRET(__wt_schema_release_table(session, table)); + WT_TRET(__wt_schema_release_table(session, &table)); if (ret == 0) { /* Fix up the public URI to match what was passed in. */ cursor = *cursorp; diff --git a/src/third_party/wiredtiger/src/docs/devdoc-dhandle-lifecycle.dox b/src/third_party/wiredtiger/src/docs/devdoc-dhandle-lifecycle.dox new file mode 100644 index 00000000000..8f79a0da22e --- /dev/null +++ b/src/third_party/wiredtiger/src/docs/devdoc-dhandle-lifecycle.dox @@ -0,0 +1,92 @@ +/*! @page devdoc-dhandle-lifecycle Data Handle Lifecycle + +A WiredTiger Data Handle (dhandle) is a generic representation of any named +data source. This representation contains information such as its name, +how many references there are to it, individual data source statistics and +what type of underlying data object it is. + +WiredTiger maintains all dhandles in a global dhandle list accessed +from the connection. Multiple sessions access this list, which is +protected by a R/W lock. Each session also maintains a session +dhandle cache which is a cache of dhandles a session has operated +upon. The entries in the cache are references into the global dhandle +list. + +@section dhandle-creation dhandle creation + +When a cursor in a session attempts to access a WiredTiger table +that has not been accessed before, the dhandle for the table will +neither be in the session's dhandle cache nor in the connection's +global dhandle list. A new dhandle will have to be created for this +table. The cursor trying to access a table first attempts to find +the dhandle in the session dhandle cache. When it doesn't find the +dhandle in the session cache, it searches in the global dhandle +list while holding the read lock. When it doesn't find the dhandle +there, it creates a dhandle for this table and puts it in the global +dhandle list while holding the write lock on the global dhandle +list. The cursor operation then puts a reference to the dhandle in +the session's dhandle cache. + +There are two relevant reference counters in the dhandle structure, +\c session_ref and \c session_inuse. \c session_ref counts the +number of session dhandle cache lists that this contain dhandle. +\c session_inuse is a count of cursors opened and operating on this +dhandle. Both these counters are incremented by this session as the +cursor attempts to use this dhandle. When the operation completes +and the cursor is closed, the \c session_inuse is decremented. The +dhandle reference is not removed immediately from the session dhandle +cache. If this session accesses the same table again in the near +future, having the dhandle reference already in the session dhandle +cache is a performance optimization. + +@section session-cache-sweep dhandle session cache sweep + +Dhandle cache sweep is the only way a cleanup is performed on the +session' dhandle cache list. The references to the dhandles that have not +been accessed by this session in a long time are removed from the +cache. Since a session is single-threaded, a session's dhandle cache +can only be altered by that session alone. + +Each time a session accesses a dhandle, it checks if enough +time has elapsed to do a session cache sweep for that session. +As it walks the session dhandle cache list, it notices if any dhandle +on its list has been marked dead (idle too long). If it has, the +session removes that dhandle from its list and decrements the +\c session_ref count. + +Since accessing a dhandle involves walking the session dhandle cache +list anyway, cache cleanup is piggy-backed on this operation. + +@section dhandle-sweep sweep-server dhandle sweep + +WiredTiger maintains a sweep server in the background for the cleanup of the +global dhandle list. The sweep server periodically (\c close_scan_interval) +revisits the dhandles in the global list and if the dhandles are not being used, +i.e., the \c session_inuse count is 0, it assigns the current time as the time +of death for the dhandle, if not already done before. + +If a dhandle has already got a time of death set for it in a previous iteration +of the dhandle sweep and the dhandle has stayed not in use, the sweep server +compares the time of death with the current time to check if the dhandle has +remained idle for the configured idle time (\c close_idle_time). If the dhandle +has remained idle, the sweep server closes the associated btree contained in the +dhandle and releases some of the resources for that dhandle. It also marks +the dhandle as dead so that the next time a session with a reference walks its +own cache list, it will see the handle marked dead and remove it from the +session's dhandle cache list (see above). + +The sweep server then checks for whether or not any session is referencing this +dhandle, i.e., if a session's dhandle cache still contains a reference to this +dhandle. If a dhandle stays referenced by at least one session, i.e., the +\c session_ref count is not 0, the dhandle can't be removed from the global +list. If this dhandle is not referenced by any session, i.e., \c session_ref +count is 0, the sweep server removes the dhandle from the global dhandle list +and frees any remaining resources associated with it. The removal of the dhandle +from the global list hence completes this dhandle's lifecycle. Any future access +of the associated table would need to start by creating the dhandle again. + +Note: The sweep server's scan interval and a dhandle's close idle time can be +configured using \c file_manager configuration settings of the connection +handle. + +*/ diff --git a/src/third_party/wiredtiger/src/docs/devdoc-index.dox b/src/third_party/wiredtiger/src/docs/devdoc-index.dox index ba809d7af43..7ada556aa1a 100644 --- a/src/third_party/wiredtiger/src/docs/devdoc-index.dox +++ b/src/third_party/wiredtiger/src/docs/devdoc-index.dox @@ -9,4 +9,14 @@ see: - @subpage devdoc-schema +An internal structure called Data Handle (dhandle) is used to represent and +access a table in WiredTiger. A dhandle gets created when a table is accessed +for the first time. It is kept in a global list and is shared across the +sessions. When a dhandle is not needed anymore and has been idle for a while, +it is closed and destroyed, releasing all the resources associated with it. + +For more information on the lifecycle of a dhandle, see: + +- @subpage devdoc-dhandle-lifecycle + */ diff --git a/src/third_party/wiredtiger/src/docs/spell.ok b/src/third_party/wiredtiger/src/docs/spell.ok index b336b5ab17a..78741bc0ba8 100644 --- a/src/third_party/wiredtiger/src/docs/spell.ok +++ b/src/third_party/wiredtiger/src/docs/spell.ok @@ -181,6 +181,7 @@ dbformat dbm dbt decl +decrementing decrypt decrypted del @@ -190,6 +191,8 @@ destructors dev devdoc dhandle +dhandles +dhandle's disjunction disjunctions distclean @@ -272,6 +275,7 @@ intl inuse io ip +ie je jemalloc jitter @@ -296,6 +300,7 @@ libhe libkvs libtool libwiredtiger +lifecycle lmin ln loadtext diff --git a/src/third_party/wiredtiger/src/docs/transactions.dox b/src/third_party/wiredtiger/src/docs/transactions.dox index 255cba4cb52..25845ce0209 100644 --- a/src/third_party/wiredtiger/src/docs/transactions.dox +++ b/src/third_party/wiredtiger/src/docs/transactions.dox @@ -202,6 +202,13 @@ WT_PREPARE_CONFLICT error will be returned indicating that it is not possible to choose a version of data to return until a prepared transaction is resolved, it is reasonable to retry such operations. +Durability of the data updates performed by a prepared transaction, on tables +configured with log=(enabled=false), can be controlled by specifying a durable +timestamp during WT_SESSION::commit_transaction. Checkpoint will consider the +durable timestamp, instead of commit timestamp for persisting the data updates. +If the durable timestamp is not specified, then the commit timestamp will be +considered as the durable timestamp. + @subsection timestamp_connection Managing global timestamp state Applications that use timestamps need to manage some global state in order diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h index f7ff274cfb8..27e26af0e18 100644 --- a/src/third_party/wiredtiger/src/include/btree.h +++ b/src/third_party/wiredtiger/src/include/btree.h @@ -102,8 +102,11 @@ struct __wt_btree { #define WT_ASSERT_COMMIT_TS_ALWAYS 0x01u #define WT_ASSERT_COMMIT_TS_KEYS 0x02u #define WT_ASSERT_COMMIT_TS_NEVER 0x04u -#define WT_ASSERT_READ_TS_ALWAYS 0x08u -#define WT_ASSERT_READ_TS_NEVER 0x10u +#define WT_ASSERT_DURABLE_TS_ALWAYS 0x08u +#define WT_ASSERT_DURABLE_TS_KEYS 0x10u +#define WT_ASSERT_DURABLE_TS_NEVER 0x20u +#define WT_ASSERT_READ_TS_ALWAYS 0x40u +#define WT_ASSERT_READ_TS_NEVER 0x80u /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint32_t assert_flags; /* Debugging assertion information */ diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i index 3b45426f797..1812bd1a816 100644 --- a/src/third_party/wiredtiger/src/include/cell.i +++ b/src/third_party/wiredtiger/src/include/cell.i @@ -17,8 +17,8 @@ * There are 4 basic cell types: keys and data (each of which has an overflow * form), deleted cells and off-page references. The cell is usually followed * by additional data, varying by type: keys are followed by a chunk of data, - * data is followed by a pair of timestamps and a chunk of data, overflow and - * off-page cells are followed by an address cookie. + * data is followed by optional timestamps and a chunk of data, overflow and + * off-page cells are followed by optional timestamps and an address cookie. * * Deleted cells are place-holders for column-store files, where entries cannot * be removed in order to preserve the record count. @@ -58,7 +58,8 @@ * Bit 3 marks an 8B packed, uint64_t value following the cell description byte. * (A run-length counter or a record number for variable-length column store.) * - * Bit 4 is unused. + * Bit 4 marks a value with associated timestamps (globally visible values don't + * require timestamps). * * Bits 5-8 are cell "types". */ @@ -71,13 +72,7 @@ #define WT_CELL_SHORT_SHIFT 2 /* Shift for short key/value */ #define WT_CELL_64V 0x04 /* Associated value */ - -/* - * We could use bit 4 as a single bit (similar to bit 3), or as a type bit in a - * backward compatible way by adding bit 4 to the type mask and adding new types - * that incorporate it. - */ -#define WT_CELL_UNUSED_BIT4 0x08 /* Unused */ +#define WT_CELL_TIMESTAMPS 0x08 /* Associated timestamps */ /* * WT_CELL_ADDR_INT is an internal block location, WT_CELL_ADDR_LEAF is a leaf @@ -110,13 +105,13 @@ #define WT_CELL_TYPE(v) ((v) & WT_CELL_TYPE_MASK) /* - * When we aren't able to create a short key or value (and, in the case of a - * value, there's no associated RLE), the key or value is at least 64B, else - * we'd have been able to store it as a short cell. Decrement/Increment the - * size before storing it, in the hopes that relatively small key/value sizes - * will pack into a single byte instead of two bytes. + * When unable to create a short key or value (and where it wasn't an associated + * RLE or timestamps that prevented creating a short value), the data must be at + * least 64B, else we'd have used a short cell. When packing/unpacking the size, + * decrement/increment the size, in the hopes that a smaller size will pack into + * a single byte instead of two. */ -#define WT_CELL_SIZE_ADJUST 64 +#define WT_CELL_SIZE_ADJUST (WT_CELL_SHORT_MAX + 1) /* * WT_CELL -- @@ -193,12 +188,40 @@ __wt_timestamp_value_check( */ static inline void __cell_pack_timestamp_value(WT_SESSION_IMPL *session, - uint8_t **pp, wt_timestamp_t start_ts, wt_timestamp_t stop_ts) + uint8_t **pp, wt_timestamp_t *start_tsp, wt_timestamp_t *stop_tsp) { - __wt_timestamp_value_check(session, start_ts, stop_ts); + wt_timestamp_t start_ts, stop_ts; - if (__wt_process.page_version_ts) { - /* Start timestamp, stop timestamp difference. */ + __wt_timestamp_value_check(session, *start_tsp, *stop_tsp); + + /* + * Finalize the timestamps, checking if they're globally visible and + * won't need to be written. + * + * TIMESTAMP-FIXME + * Values (presumably) have associated transaction IDs, but we haven't + * yet decided how to handle them. For now, ignore them in determining + * value durability. + */ + if (*start_tsp != WT_TS_NONE && + __wt_txn_visible_all(session, WT_TXN_NONE, *start_tsp)) + *start_tsp = WT_TS_NONE; + + start_ts = *start_tsp; + stop_ts = *stop_tsp; + + /* + * Historic versions and globally visible values don't have associated + * timestamps, else set a flag bit and store the packed timestamp pair. + */ + if (!__wt_process.page_version_ts || + (start_ts == WT_TS_NONE && stop_ts == WT_TS_MAX)) + ++*pp; + else { + **pp |= WT_CELL_TIMESTAMPS; + ++*pp; + + /* Store differences, not absolutes. */ (void)__wt_vpack_uint(pp, 0, start_ts); (void)__wt_vpack_uint(pp, 0, stop_ts - start_ts); } @@ -235,11 +258,11 @@ __cell_pack_timestamp_addr(WT_SESSION_IMPL *session, __wt_timestamp_addr_check(session, oldest_start_ts, newest_start_ts, newest_stop_ts); + ++*pp; if (__wt_process.page_version_ts) { + /* Store differences, not absolutes. */ (void)__wt_vpack_uint(pp, 0, oldest_start_ts); - - /* Newest start timestamp, stop timestamp difference. */ - (void)__wt_vpack_uint(pp, 0, newest_start_ts); + (void)__wt_vpack_uint(pp, 0, newest_start_ts - oldest_start_ts); (void)__wt_vpack_uint(pp, 0, newest_stop_ts - newest_start_ts); } } @@ -256,7 +279,10 @@ __wt_cell_pack_addr(WT_SESSION_IMPL *session, { uint8_t *p; - p = cell->__chunk + 1; + /* Start building a cell: the descriptor byte starts zero. */ + p = cell->__chunk; + *p = '\0'; + __cell_pack_timestamp_addr(session, &p, oldest_start_ts, newest_start_ts, newest_stop_ts); @@ -271,32 +297,43 @@ __wt_cell_pack_addr(WT_SESSION_IMPL *session, } /* - * __wt_cell_pack_data -- - * Set a data item's WT_CELL contents. + * __wt_cell_pack_value -- + * Set a value item's WT_CELL contents. */ static inline size_t -__wt_cell_pack_data(WT_SESSION_IMPL *session, WT_CELL *cell, - wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, size_t size) +__wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell, + wt_timestamp_t *start_tsp, wt_timestamp_t *stop_tsp, + uint64_t rle, size_t size) { uint8_t byte, *p; + bool ts; - p = cell->__chunk + 1; - __cell_pack_timestamp_value(session, &p, start_ts, stop_ts); + /* Start building a cell: the descriptor byte starts zero. */ + p = cell->__chunk; + *p = '\0'; + + __cell_pack_timestamp_value(session, &p, start_tsp, stop_tsp); /* - * Short data cells without run-length encoding have 6 bits of data - * length in the descriptor byte. + * Short data cells without timestamps or run-length encoding have 6 + * bits of data length in the descriptor byte. */ - if (rle < 2 && size <= WT_CELL_SHORT_MAX) { + ts = (cell->__chunk[0] & WT_CELL_TIMESTAMPS) != 0; + if (!ts && rle < 2 && size <= WT_CELL_SHORT_MAX) { byte = (uint8_t)size; /* Type + length */ cell->__chunk[0] = (uint8_t) ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_VALUE_SHORT); } else { - if (rle < 2) { + /* + * If the size was what prevented us from using a short cell, + * it's larger than the adjustment size. Decrement/increment + * it when packing/unpacking so it takes up less room. + */ + if (!ts && rle < 2) { size -= WT_CELL_SIZE_ADJUST; - cell->__chunk[0] = WT_CELL_VALUE; /* Type */ + cell->__chunk[0] |= WT_CELL_VALUE; /* Type */ } else { - cell->__chunk[0] = WT_CELL_VALUE | WT_CELL_64V; + cell->__chunk[0] |= WT_CELL_VALUE | WT_CELL_64V; (void)__wt_vpack_uint(&p, 0, rle); /* RLE */ } (void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */ @@ -305,17 +342,17 @@ __wt_cell_pack_data(WT_SESSION_IMPL *session, WT_CELL *cell, } /* - * __wt_cell_pack_data_match -- - * Return if two items would have identical WT_CELLs (except for timestamps - * and any RLE). + * __wt_cell_pack_value_match -- + * Return if two value items would have identical WT_CELLs (except for + * timestamps and any RLE). */ static inline int -__wt_cell_pack_data_match(WT_CELL *page_cell, +__wt_cell_pack_value_match(WT_CELL *page_cell, WT_CELL *val_cell, const uint8_t *val_data, bool *matchp) { uint64_t alen, blen, v; const uint8_t *a, *b; - bool rle; + bool rle, ts; *matchp = false; /* Default to no-match */ @@ -323,8 +360,8 @@ __wt_cell_pack_data_match(WT_CELL *page_cell, * This is a special-purpose function used by reconciliation to support * dictionary lookups. We're passed an on-page cell and a created cell * plus a chunk of data we're about to write on the page, and we return - * if they would match on the page. The column-store comparison ignores - * the timestamps and the RLE because the copied cell will have its own. + * if they would match on the page. Ignore timestamps and column-store + * RLE because the copied cell will have its own. */ a = (uint8_t *)page_cell; b = (uint8_t *)val_cell; @@ -332,14 +369,11 @@ __wt_cell_pack_data_match(WT_CELL *page_cell, if (WT_CELL_SHORT_TYPE(a[0]) == WT_CELL_VALUE_SHORT) { alen = a[0] >> WT_CELL_SHORT_SHIFT; ++a; - if (__wt_process.page_version_ts) { - WT_RET(__wt_vunpack_uint(&a, 0, &v)); /* Skip TS */ - WT_RET(__wt_vunpack_uint(&a, 0, &v)); - } } else if (WT_CELL_TYPE(a[0]) == WT_CELL_VALUE) { rle = (a[0] & WT_CELL_64V) != 0; + ts = (a[0] & WT_CELL_TIMESTAMPS) != 0; ++a; - if (__wt_process.page_version_ts) { + if (ts) { WT_RET(__wt_vunpack_uint(&a, 0, &v)); /* Skip TS */ WT_RET(__wt_vunpack_uint(&a, 0, &v)); } @@ -352,14 +386,11 @@ __wt_cell_pack_data_match(WT_CELL *page_cell, if (WT_CELL_SHORT_TYPE(b[0]) == WT_CELL_VALUE_SHORT) { blen = b[0] >> WT_CELL_SHORT_SHIFT; ++b; - if (__wt_process.page_version_ts) { - WT_RET(__wt_vunpack_uint(&b, 0, &v)); /* Skip TS */ - WT_RET(__wt_vunpack_uint(&b, 0, &v)); - } } else if (WT_CELL_TYPE(b[0]) == WT_CELL_VALUE) { rle = (b[0] & WT_CELL_64V) != 0; + ts = (b[0] & WT_CELL_TIMESTAMPS) != 0; ++b; - if (__wt_process.page_version_ts) { + if (ts) { WT_RET(__wt_vunpack_uint(&b, 0, &v)); /* Skip TS */ WT_RET(__wt_vunpack_uint(&b, 0, &v)); } @@ -380,17 +411,21 @@ __wt_cell_pack_data_match(WT_CELL *page_cell, */ static inline size_t __wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, - wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, uint64_t v) + wt_timestamp_t *start_tsp, wt_timestamp_t *stop_tsp, + uint64_t rle, uint64_t v) { uint8_t *p; - p = cell->__chunk + 1; - __cell_pack_timestamp_value(session, &p, start_ts, stop_ts); + /* Start building a cell: the descriptor byte starts zero. */ + p = cell->__chunk; + *p = '\0'; + + __cell_pack_timestamp_value(session, &p, start_tsp, stop_tsp); if (rle < 2) - cell->__chunk[0] = WT_CELL_VALUE_COPY; /* Type */ + cell->__chunk[0] |= WT_CELL_VALUE_COPY; /* Type */ else { - cell->__chunk[0] = /* Type */ + cell->__chunk[0] |= /* Type */ WT_CELL_VALUE_COPY | WT_CELL_64V; (void)__wt_vpack_uint(&p, 0, rle); /* RLE */ } @@ -404,17 +439,20 @@ __wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, */ static inline size_t __wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, - wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle) + wt_timestamp_t *start_tsp, wt_timestamp_t *stop_tsp, uint64_t rle) { uint8_t *p; - p = cell->__chunk + 1; - __cell_pack_timestamp_value(session, &p, start_ts, stop_ts); + /* Start building a cell: the descriptor byte starts zero. */ + p = cell->__chunk; + *p = '\0'; + + __cell_pack_timestamp_value(session, &p, start_tsp, stop_tsp); if (rle < 2) - cell->__chunk[0] = WT_CELL_DEL; /* Type */ + cell->__chunk[0] |= WT_CELL_DEL; /* Type */ else { - cell->__chunk[0] = /* Type */ + cell->__chunk[0] |= /* Type */ WT_CELL_DEL | WT_CELL_64V; (void)__wt_vpack_uint(&p, 0, rle); /* RLE */ } @@ -441,6 +479,11 @@ __wt_cell_pack_int_key(WT_CELL *cell, size_t size) cell->__chunk[0] = WT_CELL_KEY; /* Type */ p = cell->__chunk + 1; + /* + * If the size prevented us from using a short cell, it's larger than + * the adjustment size. Decrement/increment it when packing/unpacking + * so it takes up less room. + */ size -= WT_CELL_SIZE_ADJUST; (void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */ return (WT_PTRDIFF(p, cell)); @@ -463,10 +506,10 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size) ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT); return (1); } - byte = (uint8_t)size; /* Type + length */ + byte = (uint8_t)size; /* Type + length */ cell->__chunk[0] = (uint8_t) ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT_PFX); - cell->__chunk[1] = prefix; /* Prefix */ + cell->__chunk[1] = prefix; /* Prefix */ return (2); } @@ -479,6 +522,11 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size) p = cell->__chunk + 2; } + /* + * If the size prevented us from using a short cell, it's larger than + * the adjustment size. Decrement/increment it when packing/unpacking + * so it takes up less room. + */ size -= WT_CELL_SIZE_ADJUST; (void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */ return (WT_PTRDIFF(p, cell)); @@ -490,25 +538,30 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size) */ static inline size_t __wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type, - wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, size_t size) + wt_timestamp_t *start_tsp, wt_timestamp_t *stop_tsp, + uint64_t rle, size_t size) { uint8_t *p; - p = cell->__chunk + 1; + /* Start building a cell: the descriptor byte starts zero. */ + p = cell->__chunk; + *p = '\0'; + switch (type) { case WT_CELL_KEY_OVFL: case WT_CELL_KEY_OVFL_RM: + ++p; break; case WT_CELL_VALUE_OVFL: case WT_CELL_VALUE_OVFL_RM: - __cell_pack_timestamp_value(session, &p, start_ts, stop_ts); + __cell_pack_timestamp_value(session, &p, start_tsp, stop_tsp); break; } if (rle < 2) - cell->__chunk[0] = type; /* Type */ + cell->__chunk[0] |= type; /* Type */ else { - cell->__chunk[0] = type | WT_CELL_64V; /* Type */ + cell->__chunk[0] |= type | WT_CELL_64V; /* Type */ (void)__wt_vpack_uint(&p, 0, rle); /* RLE */ } (void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */ @@ -690,7 +743,7 @@ __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, * NB: when unpacking a WT_CELL_VALUE_COPY cell, unpack.cell is returned * as the original cell, not the copied cell (in other words, data from * the copied cell must be available from unpack after we return, as our - * caller has no way to find the copied cell. + * caller has no way to find the copied cell). */ WT_CELL_LEN_CHK(cell, 0); unpack->cell = cell; @@ -714,8 +767,8 @@ restart: /* * Handle cells with neither RLE counts, timestamps or a data length: - * short key cells have 6 bits of data length in the descriptor byte - * and nothing else. + * short key/data cells have 6 bits of data length in the descriptor + * byte and nothing else. */ switch (unpack->raw) { case WT_CELL_KEY_SHORT_PFX: @@ -726,6 +779,7 @@ restart: unpack->__len = 2 + unpack->size; goto done; case WT_CELL_KEY_SHORT: + case WT_CELL_VALUE_SHORT: unpack->prefix = 0; unpack->data = cell->__chunk + 1; unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT; @@ -742,55 +796,52 @@ restart: /* * Check for a prefix byte that optionally follows the cell descriptor - * byte on row-store leaf pages. + * byte in keys on row-store leaf pages. */ if (unpack->raw == WT_CELL_KEY_PFX) { - ++p; /* skip prefix */ + unpack->prefix = *p++; /* skip prefix */ WT_CELL_LEN_CHK(p, 0); - unpack->prefix = cell->__chunk[1]; } /* Check for timestamps. */ - if (dsk->version >= WT_PAGE_VERSION_TS) - switch (unpack->raw) { - case WT_CELL_ADDR_DEL: - case WT_CELL_ADDR_INT: - case WT_CELL_ADDR_LEAF: - case WT_CELL_ADDR_LEAF_NO: - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : - WT_PTRDIFF(end, p), &unpack->oldest_start_ts)); - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : - WT_PTRDIFF(end, p), &unpack->newest_start_ts)); - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : - WT_PTRDIFF(end, p), &unpack->newest_stop_ts)); - unpack->newest_stop_ts += unpack->newest_start_ts; - - __wt_timestamp_addr_check(session, - unpack->oldest_start_ts, - unpack->newest_start_ts, unpack->newest_stop_ts); + switch (unpack->raw) { + case WT_CELL_ADDR_DEL: + case WT_CELL_ADDR_INT: + case WT_CELL_ADDR_LEAF: + case WT_CELL_ADDR_LEAF_NO: + if (!__wt_process.page_version_ts) break; - case WT_CELL_DEL: - case WT_CELL_VALUE: - case WT_CELL_VALUE_COPY: - case WT_CELL_VALUE_OVFL: - case WT_CELL_VALUE_OVFL_RM: - case WT_CELL_VALUE_SHORT: - WT_RET(__wt_vunpack_uint(&p, end == NULL ? - 0 : WT_PTRDIFF(end, p), &unpack->start_ts)); - WT_RET(__wt_vunpack_uint(&p, end == NULL ? - 0 : WT_PTRDIFF(end, p), &unpack->stop_ts)); - unpack->stop_ts += unpack->start_ts; - - __wt_timestamp_value_check( - session, unpack->start_ts, unpack->stop_ts); + + WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : + WT_PTRDIFF(end, p), &unpack->oldest_start_ts)); + WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : + WT_PTRDIFF(end, p), &unpack->newest_start_ts)); + unpack->newest_start_ts += unpack->oldest_start_ts; + WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : + WT_PTRDIFF(end, p), &unpack->newest_stop_ts)); + unpack->newest_stop_ts += unpack->newest_start_ts; + + __wt_timestamp_addr_check(session, + unpack->oldest_start_ts, + unpack->newest_start_ts, unpack->newest_stop_ts); + break; + case WT_CELL_DEL: + case WT_CELL_VALUE: + case WT_CELL_VALUE_COPY: + case WT_CELL_VALUE_OVFL: + case WT_CELL_VALUE_OVFL_RM: + if ((cell->__chunk[0] & WT_CELL_TIMESTAMPS) == 0) break; - } - if (unpack->raw == WT_CELL_VALUE_SHORT) { - unpack->data = p; - unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT; - unpack->__len = WT_PTRDIFF32(p, cell) + unpack->size; - goto done; + WT_RET(__wt_vunpack_uint(&p, end == NULL ? + 0 : WT_PTRDIFF(end, p), &unpack->start_ts)); + WT_RET(__wt_vunpack_uint(&p, end == NULL ? + 0 : WT_PTRDIFF(end, p), &unpack->stop_ts)); + unpack->stop_ts += unpack->start_ts; + + __wt_timestamp_value_check( + session, unpack->start_ts, unpack->stop_ts); + break; } /* @@ -848,14 +899,21 @@ restart: WT_RET(__wt_vunpack_uint( &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v)); + /* + * If the size was what prevented us from using a short cell, + * it's larger than the adjustment size. Decrement/increment + * it when packing/unpacking so it takes up less room. + */ if (unpack->raw == WT_CELL_KEY || unpack->raw == WT_CELL_KEY_PFX || - (unpack->raw == WT_CELL_VALUE && unpack->v == 0)) + (unpack->raw == WT_CELL_VALUE && + unpack->v == 0 && + (cell->__chunk[0] & WT_CELL_TIMESTAMPS) == 0)) v += WT_CELL_SIZE_ADJUST; unpack->data = p; unpack->size = (uint32_t)v; - unpack->__len = WT_PTRDIFF32(p + unpack->size, cell); + unpack->__len = WT_PTRDIFF32(p, cell) + unpack->size; break; case WT_CELL_DEL: diff --git a/src/third_party/wiredtiger/src/include/compact.h b/src/third_party/wiredtiger/src/include/compact.h index df14ed6c5e4..3a1f54ca294 100644 --- a/src/third_party/wiredtiger/src/include/compact.h +++ b/src/third_party/wiredtiger/src/include/compact.h @@ -10,6 +10,7 @@ struct __wt_compact_state { uint32_t lsm_count; /* Number of LSM trees seen */ uint32_t file_count; /* Number of files seen */ uint64_t max_time; /* Configured timeout */ + uint64_t prog_msg_count; /* Progress message count */ struct timespec begin; /* Starting time */ }; diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 280d7e32f7d..b6100ae134d 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -425,35 +425,36 @@ struct __wt_connection_impl { #define WT_VERB_CHECKPOINT 0x000000004u #define WT_VERB_CHECKPOINT_PROGRESS 0x000000008u #define WT_VERB_COMPACT 0x000000010u -#define WT_VERB_ERROR_RETURNS 0x000000020u -#define WT_VERB_EVICT 0x000000040u -#define WT_VERB_EVICTSERVER 0x000000080u -#define WT_VERB_EVICT_STUCK 0x000000100u -#define WT_VERB_FILEOPS 0x000000200u -#define WT_VERB_HANDLEOPS 0x000000400u -#define WT_VERB_LOG 0x000000800u -#define WT_VERB_LOOKASIDE 0x000001000u -#define WT_VERB_LOOKASIDE_ACTIVITY 0x000002000u -#define WT_VERB_LSM 0x000004000u -#define WT_VERB_LSM_MANAGER 0x000008000u -#define WT_VERB_METADATA 0x000010000u -#define WT_VERB_MUTEX 0x000020000u -#define WT_VERB_OVERFLOW 0x000040000u -#define WT_VERB_READ 0x000080000u -#define WT_VERB_REBALANCE 0x000100000u -#define WT_VERB_RECONCILE 0x000200000u -#define WT_VERB_RECOVERY 0x000400000u -#define WT_VERB_RECOVERY_PROGRESS 0x000800000u -#define WT_VERB_SALVAGE 0x001000000u -#define WT_VERB_SHARED_CACHE 0x002000000u -#define WT_VERB_SPLIT 0x004000000u -#define WT_VERB_TEMPORARY 0x008000000u -#define WT_VERB_THREAD_GROUP 0x010000000u -#define WT_VERB_TIMESTAMP 0x020000000u -#define WT_VERB_TRANSACTION 0x040000000u -#define WT_VERB_VERIFY 0x080000000u -#define WT_VERB_VERSION 0x100000000u -#define WT_VERB_WRITE 0x200000000u +#define WT_VERB_COMPACT_PROGRESS 0x000000020u +#define WT_VERB_ERROR_RETURNS 0x000000040u +#define WT_VERB_EVICT 0x000000080u +#define WT_VERB_EVICTSERVER 0x000000100u +#define WT_VERB_EVICT_STUCK 0x000000200u +#define WT_VERB_FILEOPS 0x000000400u +#define WT_VERB_HANDLEOPS 0x000000800u +#define WT_VERB_LOG 0x000001000u +#define WT_VERB_LOOKASIDE 0x000002000u +#define WT_VERB_LOOKASIDE_ACTIVITY 0x000004000u +#define WT_VERB_LSM 0x000008000u +#define WT_VERB_LSM_MANAGER 0x000010000u +#define WT_VERB_METADATA 0x000020000u +#define WT_VERB_MUTEX 0x000040000u +#define WT_VERB_OVERFLOW 0x000080000u +#define WT_VERB_READ 0x000100000u +#define WT_VERB_REBALANCE 0x000200000u +#define WT_VERB_RECONCILE 0x000400000u +#define WT_VERB_RECOVERY 0x000800000u +#define WT_VERB_RECOVERY_PROGRESS 0x001000000u +#define WT_VERB_SALVAGE 0x002000000u +#define WT_VERB_SHARED_CACHE 0x004000000u +#define WT_VERB_SPLIT 0x008000000u +#define WT_VERB_TEMPORARY 0x010000000u +#define WT_VERB_THREAD_GROUP 0x020000000u +#define WT_VERB_TIMESTAMP 0x040000000u +#define WT_VERB_TRANSACTION 0x080000000u +#define WT_VERB_VERIFY 0x100000000u +#define WT_VERB_VERSION 0x200000000u +#define WT_VERB_WRITE 0x400000000u /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint64_t verbose; diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 0e5905f491e..33d6660e687 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -217,6 +217,14 @@ struct __wt_cursor_btree { uint8_t append_tree; /* Cursor appended to the tree */ + /* + * We have to restart cursor next/prev after a prepare conflict. Keep + * the state of the cursor separately so we can restart at exactly the + * right point. + */ + enum { WT_CBT_RETRY_NOTSET=0, + WT_CBT_RETRY_INSERT, WT_CBT_RETRY_PAGE } iter_retry; + #ifdef HAVE_DIAGNOSTIC /* Check that cursor next/prev never returns keys out-of-order. */ WT_ITEM *lastkey, _lastkey; diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i index 351a5cd7abe..3c08f808c62 100644 --- a/src/third_party/wiredtiger/src/include/cursor.i +++ b/src/third_party/wiredtiger/src/include/cursor.i @@ -473,45 +473,3 @@ value: __wt_row_leaf_value_cell(session, page, rip, kpack, vpack); return (__wt_page_cell_data_ref(session, cbt->ref->page, vpack, vb)); } -/* - * __cursor_check_prepared_update -- - * Return whether prepared update at current position is visible or not. - */ -static inline int -__cursor_check_prepared_update(WT_CURSOR_BTREE *cbt, bool *visiblep) -{ - WT_SESSION_IMPL *session; - WT_UPDATE *upd; - - session = (WT_SESSION_IMPL *)cbt->iface.session; - /* - * When retrying an operation due to a prepared conflict, the cursor is - * at an update list which resulted in conflict. So, when retrying we - * should examine the same update again instead of iterating to the next - * object. We'll eventually find a valid update, else return - * prepare-conflict until resolved. - */ - WT_RET(__wt_cursor_valid(cbt, &upd, visiblep)); - - /* The update that returned prepared conflict is now visible. */ - F_CLR(cbt, WT_CBT_ITERATE_RETRY_NEXT | WT_CBT_ITERATE_RETRY_PREV); - if (*visiblep) { - /* - * The underlying key-return function uses a comparison value - * of 0 to indicate the search function has pre-built the key - * we want to return. That's not the case, don't take that path. - */ - cbt->compare = 1; - /* - * If a prepared delete operation is resolved, it will be - * visible, but key is not valid. The update will be null in - * that case and we continue with cursor navigation. - */ - if (upd != NULL) - WT_RET(__cursor_kv_return(session, cbt, upd)); - else - *visiblep = false; - } - - return (0); -} diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 4fbb1d8065c..2bc60a1a85d 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -621,7 +621,7 @@ extern int __wt_schema_create(WT_SESSION_IMPL *session, const char *uri, const c extern int __wt_schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_schema_get_table_uri(WT_SESSION_IMPL *session, const char *uri, bool ok_incomplete, uint32_t flags, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_schema_get_table(WT_SESSION_IMPL *session, const char *name, size_t namelen, bool ok_incomplete, uint32_t flags, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_schema_destroy_colgroup(WT_SESSION_IMPL *session, WT_COLGROUP **colgroupp); extern int __wt_schema_destroy_index(WT_SESSION_IMPL *session, WT_INDEX **idxp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_schema_close_table(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h index c9de286c34c..32d2433a9af 100644 --- a/src/third_party/wiredtiger/src/include/misc.h +++ b/src/third_party/wiredtiger/src/include/misc.h @@ -24,18 +24,20 @@ #define WT_DIVIDER "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" /* Basic constants. */ -#define WT_THOUSAND (1000) -#define WT_MILLION (1000000) -#define WT_BILLION (1000000000) - -#define WT_MINUTE (60) - -#define WT_KILOBYTE (1024) -#define WT_MEGABYTE (1048576) -#define WT_GIGABYTE (1073741824) -#define WT_TERABYTE ((uint64_t)1099511627776) -#define WT_PETABYTE ((uint64_t)1125899906842624) -#define WT_EXABYTE ((uint64_t)1152921504606846976) +#define WT_THOUSAND (1000) +#define WT_MILLION (1000000) +#define WT_BILLION (1000000000) + +#define WT_MINUTE (60) + +#define WT_PROGRESS_MSG_PERIOD (20) + +#define WT_KILOBYTE (1024) +#define WT_MEGABYTE (1048576) +#define WT_GIGABYTE (1073741824) +#define WT_TERABYTE ((uint64_t)1099511627776) +#define WT_PETABYTE ((uint64_t)1125899906842624) +#define WT_EXABYTE ((uint64_t)1152921504606846976) /* * Sizes that cannot be larger than 2**32 are stored in uint32_t fields in diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index a7b252b0bef..f7b41bc7784 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -40,7 +40,7 @@ struct __wt_hazard { typedef TAILQ_HEAD(__wt_cursor_list, __wt_cursor) WT_CURSOR_LIST; /* Number of cursors cached to trigger cursor sweep. */ -#define WT_SESSION_CURSOR_SWEEP_COUNTDOWN 20 +#define WT_SESSION_CURSOR_SWEEP_COUNTDOWN 40 /* Minimum number of buckets to visit during cursor sweep. */ #define WT_SESSION_CURSOR_SWEEP_MIN 5 diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h index 37b0421962b..cce5bd4cbc7 100644 --- a/src/third_party/wiredtiger/src/include/txn.h +++ b/src/third_party/wiredtiger/src/include/txn.h @@ -319,24 +319,28 @@ struct __wt_txn { const char *rollback_reason; /* If rollback, the reason */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_TXN_AUTOCOMMIT 0x00001u -#define WT_TXN_ERROR 0x00002u -#define WT_TXN_HAS_ID 0x00004u -#define WT_TXN_HAS_SNAPSHOT 0x00008u -#define WT_TXN_HAS_TS_COMMIT 0x00010u -#define WT_TXN_HAS_TS_READ 0x00020u -#define WT_TXN_IGNORE_PREPARE 0x00040u -#define WT_TXN_NAMED_SNAPSHOT 0x00080u -#define WT_TXN_PREPARE 0x00100u -#define WT_TXN_PUBLIC_TS_COMMIT 0x00200u -#define WT_TXN_PUBLIC_TS_READ 0x00400u -#define WT_TXN_READONLY 0x00800u -#define WT_TXN_RUNNING 0x01000u -#define WT_TXN_SYNC_SET 0x02000u -#define WT_TXN_TS_COMMIT_ALWAYS 0x04000u -#define WT_TXN_TS_COMMIT_KEYS 0x08000u -#define WT_TXN_TS_COMMIT_NEVER 0x10000u -#define WT_TXN_UPDATE 0x20000u +#define WT_TXN_AUTOCOMMIT 0x000001u +#define WT_TXN_ERROR 0x000002u +#define WT_TXN_HAS_ID 0x000004u +#define WT_TXN_HAS_SNAPSHOT 0x000008u +#define WT_TXN_HAS_TS_COMMIT 0x000010u +#define WT_TXN_HAS_TS_DURABLE 0x000020u +#define WT_TXN_HAS_TS_READ 0x000040u +#define WT_TXN_IGNORE_PREPARE 0x000080u +#define WT_TXN_NAMED_SNAPSHOT 0x000100u +#define WT_TXN_PREPARE 0x000200u +#define WT_TXN_PUBLIC_TS_COMMIT 0x000400u +#define WT_TXN_PUBLIC_TS_READ 0x000800u +#define WT_TXN_READONLY 0x001000u +#define WT_TXN_RUNNING 0x002000u +#define WT_TXN_SYNC_SET 0x004000u +#define WT_TXN_TS_COMMIT_ALWAYS 0x008000u +#define WT_TXN_TS_COMMIT_KEYS 0x010000u +#define WT_TXN_TS_COMMIT_NEVER 0x020000u +#define WT_TXN_TS_DURABLE_ALWAYS 0x040000u +#define WT_TXN_TS_DURABLE_KEYS 0x080000u +#define WT_TXN_TS_DURABLE_NEVER 0x100000u +#define WT_TXN_UPDATE 0x200000u /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 7ba90887513..6ce224ec65a 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -35,6 +35,12 @@ __wt_txn_timestamp_flags(WT_SESSION_IMPL *session) F_SET(&session->txn, WT_TXN_TS_COMMIT_KEYS); if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_NEVER)) F_SET(&session->txn, WT_TXN_TS_COMMIT_NEVER); + if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_ALWAYS)) + F_SET(&session->txn, WT_TXN_TS_DURABLE_ALWAYS); + if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_KEYS)) + F_SET(&session->txn, WT_TXN_TS_DURABLE_KEYS); + if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_NEVER)) + F_SET(&session->txn, WT_TXN_TS_DURABLE_NEVER); } /* diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 332bd58f7e6..c2fcb720575 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -2410,14 +2410,15 @@ struct __wt_connection { * "verbose=[evictserver\,read]"., a list\, with values * chosen from the following options: \c "api"\, \c "block"\, \c * "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\, \c - * "error_returns"\, \c "evict"\, \c "evict_stuck"\, \c "evictserver"\, - * \c "fileops"\, \c "handleops"\, \c "log"\, \c "lookaside"\, \c - * "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, - * \c "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c - * "reconcile"\, \c "recovery"\, \c "recovery_progress"\, \c "salvage"\, - * \c "shared_cache"\, \c "split"\, \c "temporary"\, \c "thread_group"\, - * \c "timestamp"\, \c "transaction"\, \c "verify"\, \c "version"\, \c - * "write"; default empty.} + * "compact_progress"\, \c "error_returns"\, \c "evict"\, \c + * "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c "handleops"\, \c + * "log"\, \c "lookaside"\, \c "lookaside_activity"\, \c "lsm"\, \c + * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c + * "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c + * "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, + * \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c + * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default + * empty.} * @configend * @errors */ @@ -3120,14 +3121,14 @@ struct __wt_connection { * @config{verbose, enable messages for various events. Options are given as a * list\, such as "verbose=[evictserver\,read]"., a list\, with * values chosen from the following options: \c "api"\, \c "block"\, \c - * "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\, \c "error_returns"\, - * \c "evict"\, \c "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c - * "handleops"\, \c "log"\, \c "lookaside"\, \c "lookaside_activity"\, \c - * "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c - * "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c - * "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, \c - * "temporary"\, \c "thread_group"\, \c "timestamp"\, \c "transaction"\, \c - * "verify"\, \c "version"\, \c "write"; default empty.} + * "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\, \c + * "compact_progress"\, \c "error_returns"\, \c "evict"\, \c "evict_stuck"\, \c + * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c "lookaside"\, + * \c "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c + * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c + * "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c + * "split"\, \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c + * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.} * @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to * files. Ignored on non-Windows systems. Options are given as a list\, such * as "write_through=[data]". Configuring \c write_through requires diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h index d93f6a3be7f..87437cd9ed3 100644 --- a/src/third_party/wiredtiger/src/include/wt_internal.h +++ b/src/third_party/wiredtiger/src/include/wt_internal.h @@ -393,9 +393,9 @@ typedef uint64_t wt_timestamp_t; #include "buf.i" /* required by cell.i */ #include "cache.i" /* required by txn.i */ +#include "mutex.i" /* required by txn.i */ +#include "txn.i" /* required by cell.i */ #include "cell.i" /* required by btree.i */ -#include "mutex.i" /* required by btree.i */ -#include "txn.i" /* required by btree.i */ #include "bitstring.i" #include "btree.i" /* required by cursor.i */ diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c index 3bd717b35cf..3531440e76f 100644 --- a/src/third_party/wiredtiger/src/meta/meta_turtle.c +++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c @@ -189,11 +189,12 @@ int __wt_turtle_init(WT_SESSION_IMPL *session) { WT_DECL_RET; - char *metaconf; - bool exist_backup, exist_incr, exist_isrc, exist_turtle, load; + char *metaconf, *unused_value; + bool exist_backup, exist_incr, exist_isrc, exist_turtle; + bool load, loadTurtle; metaconf = NULL; - load = false; + load = loadTurtle = false; /* * Discard any turtle setup file left-over from previous runs. This @@ -202,6 +203,7 @@ __wt_turtle_init(WT_SESSION_IMPL *session) WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false)); /* + * If we found a corrupted turtle file, then delete it and create a new. * We could die after creating the turtle file and before creating the * metadata file, or worse, the metadata file might be in some random * state. Make sure that doesn't happen: if we don't find the turtle @@ -220,6 +222,21 @@ __wt_turtle_init(WT_SESSION_IMPL *session) WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist_backup)); WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist_turtle)); if (exist_turtle) { + /* + * Failure to read means a bad turtle file. Remove it and create + * a new turtle file. + */ + if (F_ISSET(S2C(session), WT_CONN_SALVAGE)) + WT_WITH_TURTLE_LOCK(session, + ret = __wt_turtle_read(session, + WT_METAFILE_URI, &unused_value)); + + if (ret != 0) { + WT_RET(__wt_remove_if_exists( + session, WT_METADATA_TURTLE, false)); + loadTurtle = true; + } + /* * We need to detect the difference between a source database * that may have crashed with an incremental backup file @@ -258,7 +275,9 @@ __wt_turtle_init(WT_SESSION_IMPL *session) /* Create any bulk-loaded file stubs. */ WT_RET(__metadata_load_bulk(session)); + } + if (load || loadTurtle) { /* Create the turtle file. */ WT_RET(__metadata_config(session, &metaconf)); WT_WITH_TURTLE_LOCK(session, ret = @@ -329,8 +348,11 @@ err: WT_TRET(__wt_fclose(session, &fs)); * A file error or a missing key/value pair in the turtle file means * something has gone horribly wrong, except for the compatibility * setting which is optional. + * Failure to read the turtle file when salvaging means it can't be + * used for salvage. */ - if (ret == 0 || strcmp(key, WT_METADATA_COMPAT) == 0) + if (ret == 0 || strcmp(key, WT_METADATA_COMPAT) == 0 || + F_ISSET(S2C(session), WT_CONN_SALVAGE)) return (ret); WT_PANIC_RET(session, ret, "%s: fatal turtle file read error", WT_METADATA_TURTLE); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 866eab349b7..a63d3eab361 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -295,9 +295,10 @@ static int __rec_cell_build_int_key(WT_SESSION_IMPL *, static int __rec_cell_build_leaf_key(WT_SESSION_IMPL *, WT_RECONCILE *, const void *, size_t, bool *); static int __rec_cell_build_ovfl(WT_SESSION_IMPL *, WT_RECONCILE *, - WT_KV *, uint8_t, wt_timestamp_t, wt_timestamp_t, uint64_t); + WT_KV *, uint8_t, wt_timestamp_t *, wt_timestamp_t *, uint64_t); static int __rec_cell_build_val(WT_SESSION_IMPL *, WT_RECONCILE *, - const void *, size_t, wt_timestamp_t, wt_timestamp_t, uint64_t); + const void *, size_t, wt_timestamp_t *, wt_timestamp_t *, + uint64_t); static void __rec_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *); static int __rec_col_fix(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REF *); static int __rec_col_fix_slvg(WT_SESSION_IMPL *, @@ -1933,7 +1934,8 @@ __rec_image_copy(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_KV *kv) */ static int __rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r, - wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, WT_KV *val) + wt_timestamp_t *start_tsp, wt_timestamp_t *stop_tsp, + uint64_t rle, WT_KV *val) { WT_DICTIONARY *dp; uint64_t offset; @@ -1976,7 +1978,7 @@ __rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r, offset = (uint64_t)WT_PTRDIFF(r->first_free, (uint8_t *)r->cur_ptr->image.mem + dp->offset); val->len = val->cell_len = __wt_cell_pack_copy( - session, &val->cell, start_ts, stop_ts, rle, offset); + session, &val->cell, start_tsp, stop_tsp, rle, offset); val->buf.data = NULL; val->buf.size = 0; } @@ -3473,18 +3475,21 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_CURSOR *cursor; WT_KV *key, *val; WT_RECONCILE *r; + wt_timestamp_t start_ts, stop_ts; bool ovfl_key; r = cbulk->reconcile; btree = S2BT(session); cursor = &cbulk->cbt.iface; + start_ts = WT_TS_NONE; + stop_ts = WT_TS_MAX; key = &r->k; val = &r->v; WT_RET(__rec_cell_build_leaf_key(session, r, /* Build key cell */ cursor->key.data, cursor->key.size, &ovfl_key)); WT_RET(__rec_cell_build_val(session, r, /* Build value cell */ - cursor->value.data, cursor->value.size, WT_TS_NONE, WT_TS_MAX, 0)); + cursor->value.data, cursor->value.size, &start_ts, &stop_ts, 0)); /* Boundary: split or write the page. */ if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) { @@ -3511,10 +3516,10 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) r->all_empty_value = false; if (btree->dictionary) WT_RET(__rec_dict_replace( - session, r, WT_TS_NONE, WT_TS_MAX, 0, val)); + session, r, &start_ts, &stop_ts, 0, val)); __rec_image_copy(session, r, val); } - __rec_addr_ts_update(r, WT_TS_NONE, WT_TS_NONE, WT_TS_MAX); + __rec_addr_ts_update(r, start_ts, start_ts, stop_ts); /* Update compression state. */ __rec_key_state_update(r, ovfl_key); @@ -3632,14 +3637,17 @@ __wt_bulk_insert_var( WT_BTREE *btree; WT_KV *val; WT_RECONCILE *r; + wt_timestamp_t start_ts, stop_ts; r = cbulk->reconcile; btree = S2BT(session); + start_ts = WT_TS_NONE; + stop_ts = WT_TS_MAX; val = &r->v; if (deleted) { val->cell_len = __wt_cell_pack_del( - session, &val->cell, WT_TS_NONE, WT_TS_MAX, cbulk->rle); + session, &val->cell, &start_ts, &stop_ts, cbulk->rle); val->buf.data = NULL; val->buf.size = 0; val->len = val->cell_len; @@ -3651,7 +3659,7 @@ __wt_bulk_insert_var( */ WT_RET(__rec_cell_build_val(session, r, cbulk->last.data, cbulk->last.size, - WT_TS_NONE, WT_TS_MAX, cbulk->rle)); + &start_ts, &stop_ts, cbulk->rle)); /* Boundary: split or write the page. */ if (WT_CROSSING_SPLIT_BND(r, val->len)) @@ -3660,9 +3668,9 @@ __wt_bulk_insert_var( /* Copy the value onto the page. */ if (btree->dictionary) WT_RET(__rec_dict_replace( - session, r, WT_TS_NONE, WT_TS_MAX, cbulk->rle, val)); + session, r, &start_ts, &stop_ts, cbulk->rle, val)); __rec_image_copy(session, r, val); - __rec_addr_ts_update(r, WT_TS_NONE, WT_TS_NONE, WT_TS_MAX); + __rec_addr_ts_update(r, start_ts, start_ts, stop_ts); /* Update the starting record number in case we split. */ r->recno += cbulk->rle; @@ -4090,19 +4098,19 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, if (deleted) { val->cell_len = __wt_cell_pack_del( - session, &val->cell, start_ts, stop_ts, rle); + session, &val->cell, &start_ts, &stop_ts, rle); val->buf.data = NULL; val->buf.size = 0; val->len = val->cell_len; } else if (overflow_type) { val->cell_len = __wt_cell_pack_ovfl(session, &val->cell, - WT_CELL_VALUE_OVFL, start_ts, stop_ts, rle, value->size); + WT_CELL_VALUE_OVFL, &start_ts, &stop_ts, rle, value->size); val->buf.data = value->data; val->buf.size = value->size; val->len = val->cell_len + value->size; } else WT_RET(__rec_cell_build_val(session, - r, value->data, value->size, start_ts, stop_ts, rle)); + r, value->data, value->size, &start_ts, &stop_ts, rle)); /* Boundary: split or write the page. */ if (__rec_need_split(r, val->len)) @@ -4111,7 +4119,7 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, /* Copy the value onto the page. */ if (!deleted && !overflow_type && btree->dictionary) WT_RET(__rec_dict_replace( - session, r, start_ts, stop_ts, rle, val)); + session, r, &start_ts, &stop_ts, rle, val)); __rec_image_copy(session, r, val); __rec_addr_ts_update(r, start_ts, start_ts, stop_ts); @@ -4985,6 +4993,27 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) return (0); } +/* + * __rec_row_zero_len -- + * Return if a zero-length item can be written. + */ +static bool +__rec_row_zero_len(WT_SESSION_IMPL *session, + uint64_t txnid, wt_timestamp_t start_ts, wt_timestamp_t stop_ts) +{ + /* Before timestamps were stored on pages, it was always possible. */ + if (!__wt_process.page_version_ts) + return (true); + + /* + * The item must be globally visible because we're not writing anything + * on the page. + */ + return ((start_ts == WT_TS_NONE || + __wt_txn_visible_all(session, txnid, start_ts)) && + stop_ts == WT_TS_MAX); +} + /* * __rec_row_leaf -- * Reconcile a row-store leaf page. @@ -5110,8 +5139,8 @@ __rec_row_leaf(WT_SESSION_IMPL *session, p = tmpval->data; size = tmpval->size; } - WT_ERR(__rec_cell_build_val( - session, r, p, size, start_ts, stop_ts, 0)); + WT_ERR(__rec_cell_build_val(session, + r, p, size, &start_ts, &stop_ts, 0)); dictionary = true; } else if (vpack->raw == WT_CELL_VALUE_OVFL_RM) { /* @@ -5157,7 +5186,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, */ WT_ERR(__rec_cell_build_val(session, r, "ovfl-unused", strlen("ovfl-unused"), - start_ts, stop_ts, 0)); + &start_ts, &stop_ts, 0)); } else { val->buf.data = vpack->cell; val->buf.size = __wt_cell_total_len(vpack); @@ -5185,14 +5214,14 @@ __rec_row_leaf(WT_SESSION_IMPL *session, WT_ERR(__rec_cell_build_val(session, r, cbt->iface.value.data, cbt->iface.value.size, - start_ts, stop_ts, 0)); + &start_ts, &stop_ts, 0)); dictionary = true; break; case WT_UPDATE_STANDARD: /* Take the value from the update. */ WT_ERR(__rec_cell_build_val(session, r, upd->data, upd->size, - start_ts, stop_ts, 0)); + &start_ts, &stop_ts, 0)); dictionary = true; break; case WT_UPDATE_TOMBSTONE: @@ -5338,20 +5367,16 @@ build: session, r, key->len + val->len)); } - /* - * Copy the key/value pair onto the page. Zero-length items must - * be globally visible as we're writing nothing to the page. - */ + /* Copy the key/value pair onto the page. */ __rec_image_copy(session, r, key); if (val->len == 0 && - (!__wt_process.page_version_ts || - __wt_txn_visible_all(session, txnid, stop_ts))) + __rec_row_zero_len(session, txnid, start_ts, stop_ts)) r->any_empty_value = true; else { r->all_empty_value = false; if (dictionary && btree->dictionary) WT_ERR(__rec_dict_replace( - session, r, start_ts, stop_ts, 0, val)); + session, r, &start_ts, &stop_ts, 0, val)); __rec_image_copy(session, r, val); } __rec_addr_ts_update(r, start_ts, start_ts, stop_ts); @@ -5438,12 +5463,12 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) session, cbt, upd, F_ISSET(r, WT_REC_VISIBLE_ALL))); WT_RET(__rec_cell_build_val(session, r, cbt->iface.value.data, cbt->iface.value.size, - start_ts, stop_ts, 0)); + &start_ts, &stop_ts, 0)); break; case WT_UPDATE_STANDARD: /* Take the value from the update. */ WT_RET(__rec_cell_build_val(session, r, - upd->data, upd->size, start_ts, stop_ts, 0)); + upd->data, upd->size, &start_ts, &stop_ts, 0)); break; case WT_UPDATE_TOMBSTONE: continue; @@ -5472,20 +5497,16 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) session, r, key->len + val->len)); } - /* - * Copy the key/value pair onto the page. Zero-length items must - * be globally visible as we're writing nothing to the page. - */ + /* Copy the key/value pair onto the page. */ __rec_image_copy(session, r, key); if (val->len == 0 && - (!__wt_process.page_version_ts && - __wt_txn_visible_all(session, txnid, stop_ts))) + __rec_row_zero_len(session, txnid, start_ts, stop_ts)) r->any_empty_value = true; else { r->all_empty_value = false; if (btree->dictionary) WT_RET(__rec_dict_replace( - session, r, start_ts, stop_ts, 0, val)); + session, r, &start_ts, &stop_ts, 0, val)); __rec_image_copy(session, r, val); } __rec_addr_ts_update(r, start_ts, start_ts, stop_ts); @@ -5909,12 +5930,13 @@ __rec_cell_build_int_key(WT_SESSION_IMPL *session, { WT_BTREE *btree; WT_KV *key; + wt_timestamp_t start_ts, stop_ts; *is_ovflp = false; btree = S2BT(session); - key = &r->k; + start_ts = stop_ts = WT_TS_NONE; /* Keys aren't timestamped. */ /* Copy the bytes into the "current" and key buffers. */ WT_RET(__wt_buf_set(session, r->cur, data, size)); @@ -5926,7 +5948,7 @@ __rec_cell_build_int_key(WT_SESSION_IMPL *session, *is_ovflp = true; return (__rec_cell_build_ovfl(session, r, - key, WT_CELL_KEY_OVFL, WT_TS_NONE, WT_TS_NONE, 0)); + key, WT_CELL_KEY_OVFL, &start_ts, &stop_ts, 0)); } key->cell_len = __wt_cell_pack_int_key(&key->cell, key->buf.size); @@ -5946,6 +5968,7 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session, { WT_BTREE *btree; WT_KV *key; + wt_timestamp_t start_ts, stop_ts; size_t pfx_max; const uint8_t *a, *b; uint8_t pfx; @@ -5953,7 +5976,7 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session, *is_ovflp = false; btree = S2BT(session); - + start_ts = stop_ts = WT_TS_NONE; /* Keys aren't timestamped. */ key = &r->k; pfx = 0; @@ -6026,7 +6049,7 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session, *is_ovflp = true; return (__rec_cell_build_ovfl(session, r, key, - WT_CELL_KEY_OVFL, WT_TS_NONE, WT_TS_NONE, 0)); + WT_CELL_KEY_OVFL, &start_ts, &stop_ts, 0)); } return ( __rec_cell_build_leaf_key(session, r, NULL, 0, is_ovflp)); @@ -6104,7 +6127,7 @@ __rec_cell_build_addr(WT_SESSION_IMPL *session, static int __rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, const void *data, size_t size, - wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle) + wt_timestamp_t *start_tsp, wt_timestamp_t *stop_tsp, uint64_t rle) { WT_BTREE *btree; WT_KV *val; @@ -6133,11 +6156,11 @@ __rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_STAT_DATA_INCR(session, rec_overflow_value); return (__rec_cell_build_ovfl(session, r, - val, WT_CELL_VALUE_OVFL, start_ts, stop_ts, rle)); + val, WT_CELL_VALUE_OVFL, start_tsp, stop_tsp, rle)); } } - val->cell_len = __wt_cell_pack_data( - session, &val->cell, start_ts, stop_ts, rle, val->buf.size); + val->cell_len = __wt_cell_pack_value( + session, &val->cell, start_tsp, stop_tsp, rle, val->buf.size); val->len = val->cell_len + val->buf.size; return (0); @@ -6150,7 +6173,7 @@ __rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, static int __rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_KV *kv, uint8_t type, - wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle) + wt_timestamp_t *start_tsp, wt_timestamp_t *stop_tsp, uint64_t rle) { WT_BM *bm; WT_BTREE *btree; @@ -6210,7 +6233,7 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session, /* Build the cell and return. */ kv->cell_len = __wt_cell_pack_ovfl( - session, &kv->cell, type, start_ts, stop_ts, rle, kv->buf.size); + session, &kv->cell, type, start_tsp, stop_tsp, rle, kv->buf.size); kv->len = kv->cell_len + kv->buf.size; err: __wt_scr_free(session, &tmp); @@ -6373,7 +6396,7 @@ __rec_dictionary_lookup( hash = __wt_hash_fnv64(val->buf.data, val->buf.size); for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash); dp != NULL && dp->hash == hash; dp = dp->next[0]) { - WT_RET(__wt_cell_pack_data_match( + WT_RET(__wt_cell_pack_value_match( (WT_CELL *)((uint8_t *)r->cur_ptr->image.mem + dp->offset), &val->cell, val->buf.data, &match)); if (match) { diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c index f96f053470d..21b7e6c305c 100644 --- a/src/third_party/wiredtiger/src/schema/schema_create.c +++ b/src/third_party/wiredtiger/src/schema/schema_create.c @@ -278,7 +278,7 @@ err: __wt_free(session, cgconf); __wt_buf_free(session, &namebuf); if (!tracked) - WT_TRET(__wt_schema_release_table(session, table)); + WT_TRET(__wt_schema_release_table(session, &table)); return (ret); } @@ -415,7 +415,7 @@ __create_index(WT_SESSION_IMPL *session, WT_RET_MSG(session, ret, "Can't create an index for table: %.*s", (int)tlen, tablename); - WT_RET(__wt_schema_release_table(session, table)); + WT_RET(__wt_schema_release_table(session, &table)); if ((ret = __wt_schema_get_table( session, tablename, tlen, true, 0, &table)) != 0) @@ -565,7 +565,7 @@ err: __wt_free(session, idxconf); __wt_buf_free(session, &fmt); __wt_buf_free(session, &namebuf); - WT_TRET(__wt_schema_release_table(session, table)); + WT_TRET(__wt_schema_release_table(session, &table)); return (ret); } @@ -636,8 +636,7 @@ __create_table(WT_SESSION_IMPL *session, table = NULL; } -err: if (table != NULL) - WT_TRET(__wt_schema_release_table(session, table)); +err: WT_TRET(__wt_schema_release_table(session, &table)); __wt_free(session, cgname); __wt_free(session, tableconf); return (ret); diff --git a/src/third_party/wiredtiger/src/schema/schema_drop.c b/src/third_party/wiredtiger/src/schema/schema_drop.c index 6b43e93643b..f52caff4a22 100644 --- a/src/third_party/wiredtiger/src/schema/schema_drop.c +++ b/src/third_party/wiredtiger/src/schema/schema_drop.c @@ -131,7 +131,7 @@ __drop_table( */ WT_ERR(__wt_schema_get_table_uri(session, uri, true, WT_DHANDLE_EXCLUSIVE, &table)); - WT_ERR(__wt_schema_release_table(session, table)); + WT_ERR(__wt_schema_release_table(session, &table)); WT_ERR(__wt_schema_get_table_uri(session, uri, true, 0, &table)); /* Drop the column groups. */ @@ -162,7 +162,7 @@ __drop_table( } /* Make sure the table data handle is closed. */ - WT_TRET(__wt_schema_release_table(session, table)); + WT_ERR(__wt_schema_release_table(session, &table)); WT_ERR(__wt_schema_get_table_uri( session, uri, true, WT_DHANDLE_EXCLUSIVE, &table)); F_SET(&table->iface, WT_DHANDLE_DISCARD); @@ -176,8 +176,8 @@ __drop_table( /* Remove the metadata entry (ignore missing items). */ WT_ERR(__wt_metadata_remove(session, uri)); -err: if (table != NULL && !tracked) - WT_TRET(__wt_schema_release_table(session, table)); +err: if (!tracked) + WT_TRET(__wt_schema_release_table(session, &table)); return (ret); } diff --git a/src/third_party/wiredtiger/src/schema/schema_list.c b/src/third_party/wiredtiger/src/schema/schema_list.c index 9d0aae8fb1d..bda188d0ff4 100644 --- a/src/third_party/wiredtiger/src/schema/schema_list.c +++ b/src/third_party/wiredtiger/src/schema/schema_list.c @@ -14,22 +14,21 @@ */ int __wt_schema_get_table_uri(WT_SESSION_IMPL *session, - const char *uri, bool ok_incomplete, uint32_t flags, - WT_TABLE **tablep) + const char *uri, bool ok_incomplete, uint32_t flags, WT_TABLE **tablep) { WT_DATA_HANDLE *saved_dhandle; WT_DECL_RET; WT_TABLE *table; - saved_dhandle = session->dhandle; - *tablep = NULL; + saved_dhandle = session->dhandle; + WT_ERR(__wt_session_get_dhandle(session, uri, NULL, NULL, flags)); table = (WT_TABLE *)session->dhandle; if (!ok_incomplete && !table->cg_complete) { + WT_ERR(__wt_session_release_dhandle(session)); ret = __wt_set_return(session, EINVAL); - WT_TRET(__wt_session_release_dhandle(session)); WT_ERR_MSG(session, ret, "'%s' cannot be used " "until all column groups are created", table->iface.name); @@ -68,9 +67,14 @@ err: __wt_scr_free(session, &namebuf); * Release a table handle. */ int -__wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE *table) +__wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE **tablep) { WT_DECL_RET; + WT_TABLE *table; + + if ((table = *tablep) == NULL) + return (0); + *tablep = NULL; WT_WITH_DHANDLE(session, &table->iface, ret = __wt_session_release_dhandle(session)); diff --git a/src/third_party/wiredtiger/src/schema/schema_open.c b/src/third_party/wiredtiger/src/schema/schema_open.c index 89d2232da64..3796b1502b3 100644 --- a/src/third_party/wiredtiger/src/schema/schema_open.c +++ b/src/third_party/wiredtiger/src/schema/schema_open.c @@ -525,12 +525,12 @@ __wt_schema_get_colgroup(WT_SESSION_IMPL *session, *tablep = table; else WT_RET( - __wt_schema_release_table(session, table)); + __wt_schema_release_table(session, &table)); return (0); } } - WT_RET(__wt_schema_release_table(session, table)); + WT_RET(__wt_schema_release_table(session, &table)); if (quiet) WT_RET(ENOENT); WT_RET_MSG(session, ENOENT, "%s not found in table", uri); @@ -576,7 +576,7 @@ __wt_schema_get_index(WT_SESSION_IMPL *session, done: if (invalidate) table->idx_complete = false; -err: WT_TRET(__wt_schema_release_table(session, table)); +err: WT_TRET(__wt_schema_release_table(session, &table)); WT_RET(ret); if (*indexp != NULL) diff --git a/src/third_party/wiredtiger/src/schema/schema_rename.c b/src/third_party/wiredtiger/src/schema/schema_rename.c index 89d2756bdc2..9dbc1e0fce9 100644 --- a/src/third_party/wiredtiger/src/schema/schema_rename.c +++ b/src/third_party/wiredtiger/src/schema/schema_rename.c @@ -236,7 +236,7 @@ __rename_table(WT_SESSION_IMPL *session, table->indices[i]->name, cfg)); /* Make sure the table data handle is closed. */ - WT_TRET(__wt_schema_release_table(session, table)); + WT_ERR(__wt_schema_release_table(session, &table)); WT_ERR(__wt_schema_get_table_uri( session, uri, true, WT_DHANDLE_EXCLUSIVE, &table)); F_SET(&table->iface, WT_DHANDLE_DISCARD); @@ -251,7 +251,7 @@ __rename_table(WT_SESSION_IMPL *session, ret = __metadata_rename(session, uri, newuri); err: if (!tracked) - WT_TRET(__wt_schema_release_table(session, table)); + WT_TRET(__wt_schema_release_table(session, &table)); return (ret); } diff --git a/src/third_party/wiredtiger/src/schema/schema_stat.c b/src/third_party/wiredtiger/src/schema/schema_stat.c index 2999f92d7b6..e8535513d01 100644 --- a/src/third_party/wiredtiger/src/schema/schema_stat.c +++ b/src/third_party/wiredtiger/src/schema/schema_stat.c @@ -184,7 +184,7 @@ __wt_curstat_table_init(WT_SESSION_IMPL *session, __wt_curstat_dsrc_final(cst); -err: WT_TRET(__wt_schema_release_table(session, table)); +err: WT_TRET(__wt_schema_release_table(session, &table)); __wt_scr_free(session, &buf); return (ret); diff --git a/src/third_party/wiredtiger/src/schema/schema_truncate.c b/src/third_party/wiredtiger/src/schema/schema_truncate.c index 40511b0572a..0c60a327d9c 100644 --- a/src/third_party/wiredtiger/src/schema/schema_truncate.c +++ b/src/third_party/wiredtiger/src/schema/schema_truncate.c @@ -34,7 +34,7 @@ __truncate_table(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_ERR(__wt_schema_truncate( session, table->indices[i]->source, cfg)); -err: WT_TRET(__wt_schema_release_table(session, table)); +err: WT_TRET(__wt_schema_release_table(session, &table)); return (ret); } diff --git a/src/third_party/wiredtiger/src/schema/schema_worker.c b/src/third_party/wiredtiger/src/schema/schema_worker.c index eff8e7a83e1..a9362c228fe 100644 --- a/src/third_party/wiredtiger/src/schema/schema_worker.c +++ b/src/third_party/wiredtiger/src/schema/schema_worker.c @@ -152,7 +152,6 @@ __wt_schema_worker(WT_SESSION_IMPL *session, } else WT_ERR(__wt_bad_object_type(session, uri)); -err: if (table != NULL) - WT_TRET(__wt_schema_release_table(session, table)); +err: WT_TRET(__wt_schema_release_table(session, &table)); return (ret); } diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index e90d37ce6cb..379ec69c77d 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -1016,7 +1016,11 @@ __session_reset(WT_SESSION *wt_session) WT_TRET(__wt_session_reset_cursors(session, true)); - WT_TRET(__wt_session_cursor_cache_sweep(session)); + if (--session->cursor_sweep_countdown == 0) { + session->cursor_sweep_countdown = + WT_SESSION_CURSOR_SWEEP_COUNTDOWN; + WT_TRET(__wt_session_cursor_cache_sweep(session)); + } /* Release common session resources. */ WT_TRET(__wt_session_release_resources(session)); diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index 13fd1ee1233..f3d9f762750 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -576,17 +576,21 @@ __wt_txn_release(WT_SESSION_IMPL *session) } /* - * __txn_commit_timestamp_validate -- - * Validate that timestamp provided to commit is legal. + * __txn_commit_timestamps_validate -- + * Validate that timestamps provided to commit are legal. */ static inline int -__txn_commit_timestamp_validate(WT_SESSION_IMPL *session) +__txn_commit_timestamps_validate(WT_SESSION_IMPL *session) { + WT_CURSOR *cursor; + WT_DECL_RET; WT_TXN *txn; WT_TXN_OP *op; WT_UPDATE *upd; wt_timestamp_t op_timestamp; u_int i; + const char *open_cursor_cfg[] = { + WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL }; bool op_zero_ts, upd_zero_ts; txn = &session->txn; @@ -604,11 +608,21 @@ __txn_commit_timestamp_validate(WT_SESSION_IMPL *session) txn->mod_count != 0) WT_RET_MSG(session, EINVAL, "no commit_timestamp required and " "timestamp set on this transaction"); + if (F_ISSET(txn, WT_TXN_TS_DURABLE_ALWAYS) && + !F_ISSET(txn, WT_TXN_HAS_TS_DURABLE) && + txn->mod_count != 0) + WT_RET_MSG(session, EINVAL, "durable_timestamp required and " + "none set on this transaction"); + if (F_ISSET(txn, WT_TXN_TS_DURABLE_NEVER) && + F_ISSET(txn, WT_TXN_HAS_TS_DURABLE) && + txn->mod_count != 0) + WT_RET_MSG(session, EINVAL, "no durable_timestamp required and " + "durable timestamp set on this transaction"); /* * If we're not doing any key consistency checking, we're done. */ - if (!F_ISSET(txn, WT_TXN_TS_COMMIT_KEYS)) + if (!F_ISSET(txn, WT_TXN_TS_COMMIT_KEYS | WT_TXN_TS_DURABLE_KEYS)) return (0); /* @@ -618,11 +632,36 @@ __txn_commit_timestamp_validate(WT_SESSION_IMPL *session) for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) if (op->type == WT_TXN_OP_BASIC_COL || op->type == WT_TXN_OP_BASIC_ROW) { + /* + * Search for prepared updates, so that they will be + * restored, if moved to lookaside. + */ + if (F_ISSET(txn, WT_TXN_PREPARE)) { + WT_RET(__wt_open_cursor(session, + op->btree->dhandle->name, NULL, + open_cursor_cfg, &cursor)); + F_CLR(txn, WT_TXN_PREPARE); + if (op->type == WT_TXN_OP_BASIC_ROW) + __wt_cursor_set_raw_key( + cursor, &op->u.op_row.key); + else + ((WT_CURSOR_BTREE*)cursor)->iface.recno + = op->u.op_col.recno; + F_SET(txn, WT_TXN_PREPARE); + WT_WITH_BTREE(session, op->btree, + ret = __wt_btcur_search_uncommitted( + (WT_CURSOR_BTREE *)cursor, &upd)); + WT_TRET(cursor->close(cursor)); + if (ret != 0) + WT_RET_MSG(session, EINVAL, + "prepared update restore failed"); + op->u.op_upd = upd; + } else + upd = op->u.op_upd->next; /* * Skip over any aborted update structures or ones * from our own transaction. */ - upd = op->u.op_upd->next; while (upd != NULL && (upd->txnid == WT_TXN_ABORTED || upd->txnid == txn->id)) upd = upd->next; @@ -662,9 +701,14 @@ __txn_commit_timestamp_validate(WT_SESSION_IMPL *session) */ if (op_timestamp == WT_TS_NONE) op_timestamp = txn->commit_timestamp; - if (op_timestamp < upd->start_ts) + if (F_ISSET(txn, WT_TXN_TS_COMMIT_KEYS) && + op_timestamp < upd->start_ts) WT_RET_MSG(session, EINVAL, - "out of order timestamps"); + "out of order commit timestamps"); + if (F_ISSET(txn, WT_TXN_TS_DURABLE_KEYS) && + txn->durable_timestamp < upd->durable_ts) + WT_RET_MSG(session, EINVAL, + "out of order durable timestamps"); } return (0); } @@ -699,6 +743,8 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) txn->mod_count == 0); readonly = txn->mod_count == 0; + + prepare = F_ISSET(txn, WT_TXN_PREPARE); /* Look for a commit timestamp. */ WT_ERR( __wt_config_gets_def(session, cfg, "commit_timestamp", 0, &cval)); @@ -709,45 +755,43 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) * than stable timestamp. */ WT_ERR(__wt_timestamp_validate( - session, "commit", ts, &cval, false)); + session, "commit", ts, &cval, !prepare)); txn->commit_timestamp = ts; __wt_txn_set_commit_timestamp(session); + if (!prepare) + txn->durable_timestamp = txn->commit_timestamp; } - prepare = F_ISSET(txn, WT_TXN_PREPARE); if (prepare && !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) WT_ERR_MSG(session, EINVAL, "commit_timestamp is required for a prepared transaction"); - /* Durable timestamp is required for a prepared transaction. */ - if (prepare) { - WT_ERR(__wt_config_gets_def( - session, cfg, "durable_timestamp", 0, &cval)); - if (cval.len != 0) { - WT_ERR(__wt_txn_parse_timestamp( - session, "durable", &ts, &cval)); - WT_ERR(__wt_timestamp_validate( - session, "durable", ts, &cval, true)); - txn->durable_timestamp = ts; - } else - /* - * If durable timestamp is not given, commit timestamp - * will be considered as durable timestamp. - * TODO : error if durable timestamp is not given. - */ - txn->durable_timestamp = txn->commit_timestamp; - - } else - txn->durable_timestamp = txn->commit_timestamp; - - /* Durable timestamp should be later than stable timestamp. */ - if (cval.len != 0) + /* + * Durable timestamp is required for a prepared transaction. + * If durable timestamp is not given, commit timestamp will be + * considered as durable timestamp. We don't flag error if durable + * timestamp is not specified for prepared transactions, but will flag + * error if durable timestamp is specified for non-prepared + * transactions. + */ + WT_ERR(__wt_config_gets_def( + session, cfg, "durable_timestamp", 0, &cval)); + if (cval.len != 0) { + if (!prepare) + WT_ERR_MSG(session, EINVAL, + "durable_timestamp should not be given for " + "non-prepared transaction"); + + WT_ERR(__wt_txn_parse_timestamp( + session, "durable", &ts, &cval)); + /* Durable timestamp should be later than stable timestamp. */ + F_SET(txn, WT_TXN_HAS_TS_DURABLE); + txn->durable_timestamp = ts; WT_ERR(__wt_timestamp_validate( - session, "durable", txn->durable_timestamp, &cval, true)); - - WT_ERR(__txn_commit_timestamp_validate(session)); + session, "durable", ts, &cval, true)); + } - /* TODO : assert durable_timestamp. */ + WT_ERR(__txn_commit_timestamps_validate(session)); /* * The default sync setting is inherited from the connection, but can diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 3a111d271ca..a1c700661ce 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -467,7 +467,8 @@ __wt_checkpoint_progress(WT_SESSION_IMPL *session, bool closing) time_diff = WT_TIMEDIFF_SEC(cur_time, conn->ckpt_timer_start); - if (closing || (time_diff / 20) > conn->ckpt_progress_msg_count) { + if (closing || (time_diff / WT_PROGRESS_MSG_PERIOD) > + conn->ckpt_progress_msg_count) { __wt_verbose(session, WT_VERB_CHECKPOINT_PROGRESS, "Checkpoint %s for %" PRIu64 " seconds and wrote: %" PRIu64 " pages (%" PRIu64 " MB)", @@ -1537,7 +1538,7 @@ __checkpoint_mark_skip( /* * __wt_checkpoint_tree_reconcile_update -- - * Update a checkpoint based on reconciliation's results. + * Update a checkpoint based on reconciliation results. */ void __wt_checkpoint_tree_reconcile_update( @@ -1554,7 +1555,7 @@ __wt_checkpoint_tree_reconcile_update( /* * Reconciliation just wrote a checkpoint, everything has been written. - * Update the checkpoint with reconciliation's information. The reason + * Update the checkpoint with reconciliation information. The reason * for this function is the reconciliation code just passes through the * btree structure's checkpoint array, it doesn't know any more. */ diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c index 66ec0536d2b..de474dba222 100644 --- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c +++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c @@ -591,7 +591,7 @@ __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name, WT_TXN *txn = &session->txn; WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global; wt_timestamp_t oldest_ts, stable_ts; - char ts_string[WT_TS_INT_STRING_SIZE]; + char ts_string[2][WT_TS_INT_STRING_SIZE]; bool has_oldest_ts, has_stable_ts; /* @@ -611,17 +611,17 @@ __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name, if (has_oldest_ts && ts < oldest_ts) { __wt_timestamp_to_string( - oldest_ts, ts_string, sizeof(ts_string)); + oldest_ts, ts_string[0], sizeof(ts_string[0])); WT_RET_MSG(session, EINVAL, "%s timestamp %.*s older than oldest timestamp %s", - name, (int)cval->len, cval->str, ts_string); + name, (int)cval->len, cval->str, ts_string[0]); } if (compare_stable && has_stable_ts && ts < stable_ts) { __wt_timestamp_to_string( - stable_ts, ts_string, sizeof(ts_string)); + stable_ts, ts_string[0], sizeof(ts_string[0])); WT_RET_MSG(session, EINVAL, "%s timestamp %.*s older than stable timestamp %s", - name, (int)cval->len, cval->str, ts_string); + name, (int)cval->len, cval->str, ts_string[0]); } /* @@ -631,12 +631,12 @@ __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name, */ if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) && ts < txn->first_commit_timestamp) { - __wt_timestamp_to_string( - txn->first_commit_timestamp, ts_string, sizeof(ts_string)); + __wt_timestamp_to_string(txn->first_commit_timestamp, + ts_string[0], sizeof(ts_string[0])); WT_RET_MSG(session, EINVAL, "%s timestamp %.*s older than the first " "commit timestamp %s for this transaction", - name, (int)cval->len, cval->str, ts_string); + name, (int)cval->len, cval->str, ts_string[0]); } /* @@ -646,11 +646,23 @@ __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name, */ if (F_ISSET(txn, WT_TXN_PREPARE) && ts < txn->prepare_timestamp) { __wt_timestamp_to_string( - txn->prepare_timestamp, ts_string, sizeof(ts_string)); + txn->prepare_timestamp, ts_string[0], sizeof(ts_string[0])); WT_RET_MSG(session, EINVAL, "%s timestamp %.*s older than the prepare timestamp %s " "for this transaction", - name, (int)cval->len, cval->str, ts_string); + name, (int)cval->len, cval->str, ts_string[0]); + } + + if (F_ISSET(txn, WT_TXN_HAS_TS_DURABLE) && + txn->durable_timestamp < txn->commit_timestamp) { + __wt_timestamp_to_string( + txn->durable_timestamp, ts_string[0], sizeof(ts_string[0])); + __wt_timestamp_to_string( + txn->commit_timestamp, ts_string[1], sizeof(ts_string[1])); + WT_RET_MSG(session, EINVAL, + "%s timestamp %s older than the commit timestamp %s " + "for this transaction", + name, ts_string[0], ts_string[1]); } return (0); @@ -705,6 +717,7 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET(__wt_txn_parse_timestamp( session, "durable", &ts, &cval)); txn->durable_timestamp = ts; + F_SET(txn, WT_TXN_HAS_TS_DURABLE); prepare_allowed = true; } } diff --git a/src/third_party/wiredtiger/src/utilities/util.h b/src/third_party/wiredtiger/src/utilities/util.h index 05d65440958..be58f8eae0e 100644 --- a/src/third_party/wiredtiger/src/utilities/util.h +++ b/src/third_party/wiredtiger/src/utilities/util.h @@ -32,7 +32,7 @@ int util_cerr(WT_CURSOR *, const char *, int); int util_compact(WT_SESSION *, int, char *[]); void util_copyright(void); int util_create(WT_SESSION *, int, char *[]); -int util_downgrade(WT_SESSION *, WT_CONNECTION *, int, char *[]); +int util_downgrade(WT_SESSION *, int, char *[]); int util_drop(WT_SESSION *, int, char *[]); int util_dump(WT_SESSION *, int, char *[]); int util_err(WT_SESSION *, int, const char *, ...) diff --git a/src/third_party/wiredtiger/src/utilities/util_downgrade.c b/src/third_party/wiredtiger/src/utilities/util_downgrade.c index e03544f8422..ce780c8614c 100644 --- a/src/third_party/wiredtiger/src/utilities/util_downgrade.c +++ b/src/third_party/wiredtiger/src/utilities/util_downgrade.c @@ -11,8 +11,9 @@ static int usage(void); int -util_downgrade(WT_SESSION *session, WT_CONNECTION *conn, int argc, char *argv[]) +util_downgrade(WT_SESSION *session, int argc, char *argv[]) { + WT_CONNECTION *conn; WT_DECL_RET; int ch; char config_str[128], *release; @@ -39,8 +40,9 @@ util_downgrade(WT_SESSION *session, WT_CONNECTION *conn, int argc, char *argv[]) if ((ret = __wt_snprintf(config_str, sizeof(config_str), "compatibility=(release=%s)", release)) != 0) return (util_err(session, ret, NULL)); + conn = session->connection; if ((ret = conn->reconfigure(conn, config_str)) != 0) - return (util_err(session, ret, "conn.downgrade")); + return (util_err(session, ret, "WT_CONNECTION.downgrade")); return (0); } diff --git a/src/third_party/wiredtiger/src/utilities/util_main.c b/src/third_party/wiredtiger/src/utilities/util_main.c index de4f31fcf23..9b94acdc9ed 100644 --- a/src/third_party/wiredtiger/src/utilities/util_main.c +++ b/src/third_party/wiredtiger/src/utilities/util_main.c @@ -67,11 +67,10 @@ main(int argc, char *argv[]) WT_DECL_RET; WT_SESSION *session; size_t len; - int (*cfunc)(WT_SESSION *, WT_CONNECTION *, int, char *[]); int ch, major_v, minor_v, tret, (*func)(WT_SESSION *, int, char *[]); const char *cmd_config, *config, *p1, *p2, *p3, *rec_config; char *p, *secretkey; - bool logoff, needconn, recover, salvage; + bool logoff, recover, salvage; conn = NULL; p = NULL; @@ -83,8 +82,6 @@ main(int argc, char *argv[]) ++progname; command = ""; - needconn = false; - /* Check the version against the library build. */ (void)wiredtiger_version(&major_v, & minor_v, NULL); if (major_v != WIREDTIGER_VERSION_MAJOR || @@ -166,7 +163,6 @@ main(int argc, char *argv[]) __wt_optreset = __wt_optind = 1; func = NULL; - cfunc = NULL; switch (command[0]) { case 'a': if (strcmp(command, "alter") == 0) @@ -188,10 +184,9 @@ main(int argc, char *argv[]) } break; case 'd': - if (strcmp(command, "downgrade") == 0) { - cfunc = util_downgrade; - needconn = true; - } else if (strcmp(command, "drop") == 0) + if (strcmp(command, "downgrade") == 0) + func = util_downgrade; + else if (strcmp(command, "drop") == 0) func = util_drop; else if (strcmp(command, "dump") == 0) func = util_dump; @@ -248,7 +243,7 @@ main(int argc, char *argv[]) default: break; } - if (func == NULL && cfunc == NULL) { + if (func == NULL) { usage(); goto err; } @@ -293,10 +288,7 @@ main(int argc, char *argv[]) } /* Call the function. */ - if (needconn) - ret = cfunc(session, conn, argc, argv); - else - ret = func(session, argc, argv); + ret = func(session, argc, argv); if (0) { err: ret = 1; diff --git a/src/third_party/wiredtiger/test/csuite/wt4156_metadata_salvage/main.c b/src/third_party/wiredtiger/test/csuite/wt4156_metadata_salvage/main.c index 614ee01486a..fd734b1a4a2 100644 --- a/src/third_party/wiredtiger/test/csuite/wt4156_metadata_salvage/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt4156_metadata_salvage/main.c @@ -188,10 +188,10 @@ create_data(TABLE_INFO *t) /* * corrupt_metadata -- - * Corrupt the metadata by scribbling on the "corrupt" URI string. + * Corrupt the file by scribbling on the provided URI string. */ static void -corrupt_metadata(void) +corrupt_file(const char *file_name, const char *uri) { struct stat sb; FILE *fp; @@ -207,7 +207,7 @@ corrupt_metadata(void) * when WiredTiger next reads it. */ testutil_check(__wt_snprintf( - path, sizeof(path), "%s/%s", home, WT_METAFILE)); + path, sizeof(path), "%s/%s", home, file_name)); if ((fp = fopen(path, "r+")) == NULL) testutil_die(errno, "fopen: %s", path); testutil_check(fstat(fileno(fp), &sb)); @@ -219,7 +219,7 @@ corrupt_metadata(void) /* * Corrupt all occurrences of the string in the file. */ - while ((corrupt = byte_str(buf, meta_size, CORRUPT)) != NULL) { + while ((corrupt = byte_str(buf, meta_size, uri)) != NULL) { corrupted = true; testutil_assert(*(char *)corrupt != 'X'); *(char *)corrupt = 'X'; @@ -715,7 +715,7 @@ main(int argc, char *argv[]) * Damage/corrupt WiredTiger.wt. */ printf("corrupt metadata\n"); - corrupt_metadata(); + corrupt_file(WT_METAFILE, CORRUPT); testutil_check(__wt_snprintf(buf, sizeof(buf), "cp -p %s/WiredTiger.wt ./%s.SAVE/WiredTiger.wt.CORRUPT", home, home)); @@ -724,6 +724,19 @@ main(int argc, char *argv[]) testutil_die(ret, "system: %s", buf); run_all_verification(NULL, &table_data[0]); + /* + * Damage/corrupt WiredTiger.turtle. + */ + printf("corrupt turtle\n"); + corrupt_file(WT_METADATA_TURTLE, WT_METAFILE_URI); + testutil_check(__wt_snprintf(buf, sizeof(buf), + "cp -p %s/WiredTiger.turtle ./%s.SAVE/WiredTiger.turtle.CORRUPT", + home, home)); + printf("copy: %s\n", buf); + if ((ret = system(buf)) < 0) + testutil_die(ret, "system: %s", buf); + run_all_verification(NULL, &table_data[0]); + out_of_sync(&table_data[0]); /* diff --git a/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c b/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c index 5c8b965ceb5..a551cbf29de 100644 --- a/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c @@ -252,7 +252,7 @@ sweep_stats(void) } static void -run(bool config_cache) +runone(bool config_cache) { pthread_t idlist[1000]; u_int i, j; @@ -309,8 +309,8 @@ run(bool config_cache) testutil_check(conn->close(conn, NULL)); } -int -main(int argc, char *argv[]) +static int +run(int argc, char *argv[]) { static const struct { u_int workers; @@ -338,12 +338,6 @@ main(int argc, char *argv[]) u_int i, n; int ch; - /* - * Bypass this test for valgrind. It has a fairly low thread limit. - */ - if (testutil_is_flag_set("TESTUTIL_BYPASS_VALGRIND")) - return (EXIT_SUCCESS); - (void)testutil_set_progname(argv); __wt_random_init_seed(NULL, &rnd); @@ -365,10 +359,34 @@ main(int argc, char *argv[]) n = __wt_random(&rnd) % WT_ELEMENTS(runs); workers = runs[n].workers; uris = runs[n].uris; - run(runs[n].cache_cursors); + runone(runs[n].cache_cursors); } uri_teardown(); return (EXIT_SUCCESS); } + +int +main(int argc, char *argv[]) +{ + bool skip; + + skip = false; + + /* + * Bypass this test for valgrind. It has a fairly low thread limit. + */ + if (testutil_is_flag_set("TESTUTIL_BYPASS_VALGRIND")) + skip = true; + + /* + * Bypass this test for OS X. We periodically see it hang without error, + * leaving a zombie process that never exits (WT-4613, BUILD-7616). + */ +#if defined(__APPLE__) + skip = true; +#endif + + return (skip ? EXIT_SUCCESS : run(argc, argv)); +} diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c index f439c5bd2bb..d19963d75a5 100644 --- a/src/third_party/wiredtiger/test/format/config.c +++ b/src/third_party/wiredtiger/test/format/config.c @@ -778,9 +778,6 @@ config_pct(void) static void config_prepare(void) { - /* WT-4537: REMOVE when that merges */ - config_single("prepare=off", 0); - /* * We cannot prepare a transaction if logging is configured, or if * timestamps are not configured. diff --git a/src/third_party/wiredtiger/test/suite/test_assert04.py b/src/third_party/wiredtiger/test/suite/test_assert04.py index 57ee267ab01..59aafd2d4cc 100644 --- a/src/third_party/wiredtiger/test/suite/test_assert04.py +++ b/src/third_party/wiredtiger/test/suite/test_assert04.py @@ -124,6 +124,9 @@ class test_assert04(wttest.WiredTigerTestCase, suite_subprocess): c.close() # Detect using a timestamp on the non-timestamp key. + # We must first use a non timestamped operation on the key + # in order to violate the key consistency condition in the + # following transaction. c = self.session.open_cursor(uri) self.session.begin_transaction() c['key_nots'] = 'value_nots3' diff --git a/src/third_party/wiredtiger/test/suite/test_assert05.py b/src/third_party/wiredtiger/test/suite/test_assert05.py new file mode 100644 index 00000000000..b709e8df8dd --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_assert05.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2019 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# test_assert05.py +# Timestamps: assert durable timestamp settings +# + +from suite_subprocess import suite_subprocess +import wiredtiger, wttest + +def timestamp_str(t): + return '%x' % t + +class test_assert05(wttest.WiredTigerTestCase, suite_subprocess): + base = 'assert05' + base_uri = 'file:' + base + uri_always = base_uri + '.always.wt' + uri_def = base_uri + '.def.wt' + uri_never = base_uri + '.never.wt' + uri_none = base_uri + '.none.wt' + cfg = 'key_format=S,value_format=S,' + cfg_always = 'assert=(durable_timestamp=always)' + cfg_def = '' + cfg_never = 'assert=(durable_timestamp=never)' + cfg_none = 'assert=(durable_timestamp=none)' + + count = 1 + # + # Commit a k/v pair making sure that it detects an error if needed, when + # used with and without a durable timestamp. + # + def insert_check(self, uri, use_ts): + c = self.session.open_cursor(uri) + key = 'key' + str(self.count) + val = 'value' + str(self.count) + + # Commit with a timestamp + self.session.begin_transaction() + c[key] = val + self.session.prepare_transaction( + 'prepare_timestamp=' + timestamp_str(self.count)) + self.session.timestamp_transaction( + 'commit_timestamp=' + timestamp_str(self.count)) + self.session.timestamp_transaction( + 'durable_timestamp=' + timestamp_str(self.count)) + # All settings other than never should commit successfully + if (use_ts != 'never'): + self.session.commit_transaction() + else: + msg = "/timestamp set on this transaction/" + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda:self.assertEquals(self.session.commit_transaction(), + 0), msg) + c.close() + self.count += 1 + + # Commit without a timestamp + key = 'key' + str(self.count) + val = 'value' + str(self.count) + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c[key] = val + self.session.prepare_transaction( + 'prepare_timestamp=' + timestamp_str(self.count)) + self.session.timestamp_transaction( + 'commit_timestamp=' + timestamp_str(self.count)) + # All settings other than always should commit successfully + if (use_ts != 'always'): + self.session.commit_transaction() + else: + msg = "/none set on this transaction/" + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda:self.assertEquals(self.session.commit_transaction(), + 0), msg) + self.count += 1 + c.close() + + def test_durable_timestamp(self): + #if not wiredtiger.diagnostic_build(): + # self.skipTest('requires a diagnostic build') + + # Create a data item at a timestamp + self.session.create(self.uri_always, self.cfg + self.cfg_always) + self.session.create(self.uri_def, self.cfg + self.cfg_def) + self.session.create(self.uri_never, self.cfg + self.cfg_never) + self.session.create(self.uri_none, self.cfg + self.cfg_none) + + # Check inserting into each table + self.insert_check(self.uri_always, 'always') + self.insert_check(self.uri_def, 'none') + self.insert_check(self.uri_never, 'never') + self.insert_check(self.uri_none, 'none') + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_assert06.py b/src/third_party/wiredtiger/test/suite/test_assert06.py new file mode 100644 index 00000000000..828968441a9 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_assert06.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2019 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# test_assert06.py +# Timestamps: verify key consistent setting for durable timestamps +# + +from suite_subprocess import suite_subprocess +import wiredtiger, wttest + +def timestamp_str(t): + return '%x' % t + +class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): + def apply_timestamps(self, timestamp): + self.session.prepare_transaction( + 'prepare_timestamp=' + timestamp_str(timestamp)) + self.session.timestamp_transaction( + 'commit_timestamp=' + timestamp_str(timestamp)) + self.session.timestamp_transaction( + 'durable_timestamp=' + timestamp_str(timestamp)) + + def test_timestamp_alter(self): + base = 'assert06' + uri = 'file:' + base + cfg_on = 'assert=(durable_timestamp=key_consistent)' + cfg_off = 'assert=(durable_timestamp=none)' + msg_ooo='/out of order/' + msg_usage='/used inconsistently/' + + # Create the table without the key consistency checking turned on. + # Create a few items breaking the rules. Then alter the setting and + # verify the inconsistent usage is detected. + self.session.create(uri, 'key_format=S,value_format=S') + # Insert a data item at timestamp 2. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts1'] = 'value2' + self.apply_timestamps(2) + self.session.commit_transaction() + c.close() + + # Modify the data item at timestamp 1. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts1'] = 'value1' + self.apply_timestamps(1) + self.session.commit_transaction() + c.close() + + # Insert a non-timestamped item. Then modify with a timestamp. And + # again modify without a timestamp. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_nots'] = 'value_nots' + self.session.commit_transaction() + c.close() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_nots'] = 'value2' + self.apply_timestamps(2) + self.session.commit_transaction() + c.close() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_nots'] = 'value_nots2' + self.session.commit_transaction() + c.close() + + # We must move the oldest timestamp forward in order to alter. + # Otherwise alter closing the file will fail with EBUSY. + self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(2)) + + # Now alter the setting and make sure we detect incorrect usage. + self.session.alter(uri, cfg_on) + + # Detect decreasing timestamp. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts1'] = 'value5' + self.apply_timestamps(5) + self.session.commit_transaction() + c.close() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts1'] = 'value4' + self.apply_timestamps(4) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.commit_transaction(), msg_ooo) + c.close() + + # Detect not using a timestamp. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts1'] = 'value_nots3' + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.commit_transaction(), msg_usage) + c.close() + + # Detect using a timestamp on the non-timestamp key. + # We must first use a non timestamped operation on the key + # in order to violate the key consistency condition in the + # following transaction. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_nots'] = 'value_nots3' + self.session.commit_transaction() + c.close() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_nots'] = 'value3' + self.apply_timestamps(3) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.commit_transaction(), msg_usage) + c.close() + self.session.checkpoint() + + c = self.session.open_cursor(uri) + self.assertEquals(c['key_ts1'], 'value5') + self.assertEquals(c['key_nots'], 'value_nots3') + c.close() + + # Test to make sure that key consistency can be turned off + # after turning it on. + self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(5)) + self.session.alter(uri, cfg_off) + + # Detection is off we can successfully change the same key with and + # without a timestamp. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_nots'] = 'value_nots4' + self.session.commit_transaction() + c.close() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_nots'] = 'value6' + self.apply_timestamps(6) + self.session.commit_transaction() + c.close() + + def test_timestamp_usage(self): + base = 'assert06' + uri = 'file:' + base + msg_ooo='/out of order/' + msg_usage='/used inconsistently/' + + # Create the table with the key consistency checking turned on. + # That checking will verify any individual key is always or never + # used with a timestamp. And if it is used with a timestamp that + # the timestamps are in increasing order for that key. + self.session.create(uri, 'key_format=S,value_format=S,assert=(durable_timestamp=key_consistent)') + + # Insert a data item at timestamp 2. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts1'] = 'value2' + self.apply_timestamps(2) + self.session.commit_transaction() + c.close() + + # Modify the data item at timestamp 1. We should detect it is wrong. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts1'] = 'value1' + self.apply_timestamps(1) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.commit_transaction(), msg_ooo) + c.close() + + # Make sure we can successfully add a different key at timestamp 1. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts2'] = 'value1' + self.apply_timestamps(1) + self.session.commit_transaction() + c.close() + + # + # Insert key_ts3 at timestamp 10 and key_ts4 at 15. + # Then modify both keys in one transaction at timestamp 13. + # We should not be allowed to modify the one from 15. + # So the whole transaction should fail. + # + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts3'] = 'value10' + self.apply_timestamps(10) + self.session.commit_transaction() + self.session.begin_transaction() + c['key_ts4'] = 'value15' + self.apply_timestamps(15) + self.session.commit_transaction() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts3'] = 'value13' + c['key_ts4'] = 'value13' + self.apply_timestamps(13) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.commit_transaction(), msg_ooo) + c.close() + + c = self.session.open_cursor(uri) + self.assertEquals(c['key_ts3'], 'value10') + self.assertEquals(c['key_ts4'], 'value15') + c.close() + + # + # Separately, we should be able to update key_ts3 at timestamp 10 + # but not update key_ts4 inserted at timestamp 15. + # + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts3'] = 'value13' + self.apply_timestamps(13) + self.session.commit_transaction() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts4'] = 'value13' + self.apply_timestamps(13) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.commit_transaction(), msg_ooo) + c.close() + + # Make sure multiple update attempts still fail and eventually + # succeed with a later timestamp. This tests that aborted entries + # in the update chain are not considered for the timestamp check. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts4'] = 'value14' + self.apply_timestamps(14) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.commit_transaction(), msg_ooo) + c.close() + c = self.session.open_cursor(uri) + self.assertEquals(c['key_ts4'], 'value15') + c.close() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts4'] = 'value16' + self.apply_timestamps(16) + self.session.commit_transaction() + c.close() + c = self.session.open_cursor(uri) + self.assertEquals(c['key_ts4'], 'value16') + c.close() + + # Now try to modify a key previously used with timestamps without + # one. We should get the inconsistent usage message. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts4'] = 'value_nots' + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.commit_transaction(), msg_usage) + c.close() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts4'] = 'value_nots' + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.commit_transaction(), msg_usage) + c.close() + c = self.session.open_cursor(uri) + self.assertEquals(c['key_ts4'], 'value16') + c.close() + + # Now confirm the other way. Create a key without a timestamp and then + # attempt to modify it with a timestamp. The only error checking that + # makes sense here is the inconsistent usage. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_nots'] = 'value_nots' + self.session.commit_transaction() + c.close() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_nots'] = 'value16' + self.apply_timestamps(16) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.commit_transaction(), msg_usage) + c.close() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_nots'] = 'value_nots1' + self.session.commit_transaction() + c.close() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_nots'] = 'value17' + self.apply_timestamps(17) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.commit_transaction(), msg_usage) + c.close() + + c = self.session.open_cursor(uri) + self.assertEquals(c['key_nots'], 'value_nots1') + c.close() + + # Confirm it is okay to set the timestamp in the middle or end of the + # transaction. That should set the timestamp for the whole thing. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts5'] = 'value_notsyet' + c['key_ts5'] = 'value20' + self.apply_timestamps(20) + self.session.commit_transaction() + c.close() + + c = self.session.open_cursor(uri) + self.assertEquals(c['key_ts5'], 'value20') + c.close() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts6'] = 'value_notsyet' + c['key_ts6'] = 'value21_after' + self.apply_timestamps(21) + self.session.commit_transaction() + c.close() + + c = self.session.open_cursor(uri) + self.assertEquals(c['key_ts6'], 'value21_after') + c.close() + + # Confirm it is okay to set the durable timestamp on the commit call. + # That should set the timestamp for the whole thing. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts6'] = 'value_committs1' + c['key_ts6'] = 'value22' + self.session.prepare_transaction( + 'prepare_timestamp=' + timestamp_str(22)) + self.session.timestamp_transaction( + 'commit_timestamp=' + timestamp_str(22)) + self.session.commit_transaction('durable_timestamp=' + + timestamp_str(22)) + c.close() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_nots'] = 'value23' + self.session.prepare_transaction( + 'prepare_timestamp=' + timestamp_str(23)) + self.session.timestamp_transaction( + 'commit_timestamp=' + timestamp_str(23)) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.commit_transaction( + 'durable_timestamp=' + timestamp_str(23)), msg_usage) + c.close() + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_prepare_cursor02.py b/src/third_party/wiredtiger/test/suite/test_prepare_cursor02.py new file mode 100644 index 00000000000..0aeb3f62dbf --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_prepare_cursor02.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2019 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from wtdataset import SimpleDataSet, SimpleIndexDataSet +from wtdataset import SimpleLSMDataSet, ComplexDataSet, ComplexLSMDataSet +from wtscenario import make_scenarios + +def timestamp_str(t): + return '%x' %t + +# test_prepare_cursor02.py +# WT_CURSOR navigation (next/prev) tests with prepared transactions +class test_prepare_cursor02(wttest.WiredTigerTestCase): + + keyfmt = [ + ('row-store', dict(keyfmt='i')), + ('column-store', dict(keyfmt='r')), + ] + types = [ + ('table-simple', dict(uri='table', ds=SimpleDataSet)), + ] + + iso_types = [ + ('isolation_read_committed', dict(isolation='read-committed')), + ('isolation_snapshot', dict(isolation='snapshot')) + ] + scenarios = make_scenarios(types, keyfmt, iso_types) + + def skip(self): + return self.keyfmt == 'r' and \ + (self.ds.is_lsm() or self.uri == 'lsm') + + # Test cursor navigate (next/prev) with prepared transactions. + def test_cursor_navigate_prepare_transaction(self): + if self.skip(): + return + + # Build an object. + uri = self.uri + ':test_prepare_cursor02' + ds = self.ds(self, uri, 0, key_format=self.keyfmt) + ds.populate() + + session = self.conn.open_session() + cursor = session.open_cursor(uri, None) + session.begin_transaction() + cursor.set_key(ds.key(1)) + cursor.set_value(ds.value(1)) + cursor.insert() + session.prepare_transaction('prepare_timestamp=' + timestamp_str(100)) + + prep_session = self.conn.open_session() + prep_cursor = prep_session.open_cursor(uri, None) + + # Check cursor navigate with insert in prepared transaction. + # Data set is empty + # Insert key 1 in prepared state. + prep_session.begin_transaction() + # Check next operation. + prep_cursor.set_key(ds.key(1)) + self.assertRaisesException(wiredtiger.WiredTigerError, lambda: prep_cursor.search()) + self.assertRaisesException(wiredtiger.WiredTigerError, lambda: prep_cursor.next()) + self.assertRaisesException(wiredtiger.WiredTigerError, lambda: prep_cursor.next()) + + # Check prev operation. + prep_cursor.set_key(ds.key(1)) + self.assertRaisesException(wiredtiger.WiredTigerError, lambda: prep_cursor.search()) + self.assertRaisesException(wiredtiger.WiredTigerError, lambda: prep_cursor.prev()) + self.assertRaisesException(wiredtiger.WiredTigerError, lambda: prep_cursor.prev()) + prep_cursor.close() + prep_session.commit_transaction() + + session.rollback_transaction() + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py b/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py index 43331f7d169..8a0dfb00e88 100755 --- a/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py +++ b/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py @@ -36,11 +36,15 @@ from bokeh.models.annotations import Label from bokeh.plotting import figure, output_file, reset_output, save, show from bokeh.resources import CDN import matplotlib +from multiprocessing import Process, Queue, Array +import multiprocessing import numpy as np import os import pandas as pd import sys +import statvfs import traceback +import time # A directory where we store cross-file plots for each bucket of the outlier # histogram. @@ -78,8 +82,8 @@ lastTimeStamp = 0; # us when the function is to be considered an outlier. These values # would be read from a config file, if supplied by the user. # -outlierThresholdDict = {}; -outlierPrettyNames = {}; +userDefinedLatencyThresholds = {}; +userDefinedThresholdNames = {}; # A dictionary that holds a reference to the raw dataframe for each file. # @@ -98,16 +102,19 @@ pixelsForTitle = 30; pixelsPerHeightUnit = 30; pixelsPerWidthUnit = 5; +# How many work units for perform in parallel. +targetParallelism = 0; + # The name of the time units that were used when recording timestamps. # We assume that it's nanoseconds by default. Alternative units can be # set in the configuration file. # timeUnitString = "nanoseconds"; -# The coefficient by which we multiply the standard deviation when -# setting the outlier threshold, in case it is not specified by the user. +# The percentile threshold. A function duration above that percentile +# is deemed an outlier. # -STDEV_MULT = 2; +PERCENTILE = 0.999; def initColorList(): @@ -119,6 +126,12 @@ def initColorList(): # Some browsers break if you try to give them 'sage' if (color == "sage"): colorList.remove(color); + # We reserve red to highlight occurrences of functions + # that exceeded the user-defined latency threshold. Do + # not use red for regular function colors. + # + elif (color == "red"): + colorList.remove(color); # # Each unique function name gets a unique color. @@ -180,7 +193,9 @@ def getIntervalData(intervalBeginningsStack, intervalEnd, logfile): return intervalBegin[0], intervalEnd[0], intervalEnd[2], errorOccurred; -def plotOutlierHistogram(dataframe, maxOutliers, func, durationThreshold, +def plotOutlierHistogram(dataframe, maxOutliers, func, + statisticalOutlierThreshold, + userLatencyThreshold, averageDuration, maxDuration): global pixelsForTitle; @@ -191,7 +206,7 @@ def plotOutlierHistogram(dataframe, maxOutliers, func, durationThreshold, cds = ColumnDataSource(dataframe); figureTitle = "Occurrences of " + func + " that took longer than " \ - + durationThreshold + "."; + + statisticalOutlierThreshold + "."; hover = HoverTool(tooltips = [ ("interval start", "@lowerbound{0,0}"), @@ -209,7 +224,7 @@ def plotOutlierHistogram(dataframe, maxOutliers, func, durationThreshold, y_ticker_max = p.plot_height / pixelsPerHeightUnit; y_ticker_step = max(1, (maxOutliers + 1)/y_ticker_max); - y_upper_bound = (maxOutliers / y_ticker_step + 1) * y_ticker_step; + y_upper_bound = (maxOutliers / y_ticker_step + 2) * y_ticker_step; p.yaxis.ticker = FixedTicker(ticks = range(0, y_upper_bound, y_ticker_step)); @@ -221,18 +236,28 @@ def plotOutlierHistogram(dataframe, maxOutliers, func, durationThreshold, p.quad(left = 'lowerbound', right = 'upperbound', bottom = 'bottom', top = 'height', color = funcToColor[func], source = cds, - nonselection_fill_color=funcToColor[func], + nonselection_fill_color= funcToColor[func], nonselection_fill_alpha = 1.0, line_color = "lightgrey", selection_fill_color = funcToColor[func], selection_line_color="grey" ); + p.x(x='markerX', y='markerY', size='markersize', color = 'navy', + line_width = 1, source = cds); + # Add an annotation to the chart # y_max = dataframe['height'].max(); - text = "Average duration: " + '{0:,.0f}'.format(averageDuration) + \ - ". Maximum duration: " + '{0:,.0f}'.format(maxDuration) + "."; + text = "Average duration: " + '{0:,.0f}'.format(averageDuration) + " " + \ + timeUnitString + \ + ". Maximum duration: " + '{0:,.0f}'.format(maxDuration) + " " + \ + timeUnitString + ". "; + if (userLatencyThreshold is not None): + text = text + \ + "An \'x\' shows intervals with operations exceeding " + \ + "a user-defined threshold of " + \ + userLatencyThreshold + "."; mytext = Label(x=0, y=y_upper_bound-y_ticker_step, text=text, text_color = "grey", text_font = "helvetica", text_font_size = "10pt", @@ -280,13 +305,13 @@ def assignStackDepths(dataframe): for i in range(len(df.index)): - myStartTime = df.at[i, 'start']; + myEndTime = df.at[i, 'end']; # Pop all items off stack whose end time is earlier than my - # start time. They are not part of my stack, so I don't want to + # end time. They are not the callers on my stack, so I don't want to # count them. # - while (len(stack) > 0 and stack[-1] < myStartTime): + while (len(stack) > 0 and stack[-1] < myEndTime): stack.pop(); df.at[i, 'stackdepth'] = len(stack); @@ -469,11 +494,12 @@ def createLegendFigure(legendDict): def generateBucketChartForFile(figureName, dataframe, y_max, x_min, x_max): - global colorAlreadyUsedInLegend; global funcToColor; global plotWidth; global timeUnitString; + colorAlreadyUsedInLegend = {}; + MAX_ITEMS_PER_LEGEND = 10; numLegends = 0; legendItems = {}; @@ -562,10 +588,10 @@ def generateEmptyDataset(): # across the timelines for all files. We call it a bucket, because it # corresponds to a bucket in the outlier histogram. # -def generateCrossFilePlotsForBucket(i, lowerBound, upperBound, navigatorDF): +def generateCrossFilePlotsForBucket(i, lowerBound, upperBound, navigatorDF, + retFilename): global bucketDir; - global colorAlreadyUsedInLegend; global timeUnitString; aggregateLegendDict = {}; @@ -584,12 +610,6 @@ def generateCrossFilePlotsForBucket(i, lowerBound, upperBound, navigatorDF): navigatorFigure = generateNavigatorFigure(navigatorDF, i, intervalTitle); figuresForAllFiles.append(navigatorFigure); - # The following dictionary keeps track of legends. We need - # a legend for each new HTML file. So we reset the dictionary - # before generating a new file. - # - colorAlreadyUsedInLegend = {}; - # Select from the dataframe for this file the records whose 'start' # and 'end' timestamps fall within the lower and upper bound. # @@ -653,7 +673,8 @@ def generateCrossFilePlotsForBucket(i, lowerBound, upperBound, navigatorDF): save(column(figuresForAllFiles), filename = fileName, title=intervalTitle, resources=CDN); - return fileName; + retFilename.value = fileName; + # Generate a plot that shows a view of the entire timeline in a form of # intervals. By clicking on an interval we can navigate to that interval. @@ -752,6 +773,31 @@ def createIntervalNavigatorDF(numBuckets, timeUnitsPerBucket): dataframe['intervalnumbernext'] = dataframe['intervalnumber'] + 1; return dataframe; + +def waitOnOneProcess(runningProcesses): + + success = False; + for i, p in runningProcesses.items(): + if (not p.is_alive()): + del runningProcesses[i]; + success = True; + + # If we have not found a terminated process, sleep for a while + if (not success): + time.sleep(1); + +# Update the UI message showing what percentage of work done by +# parallel processes has completed. +# +def updatePercentComplete(runnableProcesses, runningProcesses, + totalWorkItems, workName): + + percentComplete = float(totalWorkItems - len(runnableProcesses) \ + - len(runningProcesses)) / float(totalWorkItems) * 100; + print(color.BLUE + color.BOLD + " " + workName), + sys.stdout.write(" %d%% complete \r" % (percentComplete) ); + sys.stdout.flush(); + # Generate plots of time series slices across all files for each bucket # in the outlier histogram. Save each cross-file slice to an HTML file. # @@ -761,32 +807,99 @@ def generateTSSlicesForBuckets(): global lastTimeStamp; global plotWidth; global pixelsPerWidthUnit; + global targetParallelism; bucketFilenames = []; + runnableProcesses = {}; + returnValues = {}; + spawnedProcesses = {}; numBuckets = plotWidth / pixelsPerWidthUnit; timeUnitsPerBucket = (lastTimeStamp - firstTimeStamp) / numBuckets; navigatorDF = createIntervalNavigatorDF(numBuckets, timeUnitsPerBucket); + print(color.BLUE + color.BOLD + + "Will process " + str(targetParallelism) + " work units in parallel." + + color.END); + for i in range(numBuckets): + retFilename = Array('c', os.statvfs('/')[statvfs.F_NAMEMAX]); lowerBound = i * timeUnitsPerBucket; upperBound = (i+1) * timeUnitsPerBucket; - fileName = generateCrossFilePlotsForBucket(i, lowerBound, upperBound, - navigatorDF); - - percentComplete = float(i) / float(numBuckets) * 100; - print(color.BLUE + color.BOLD + " Generating timeline charts... "), - sys.stdout.write("%d%% complete \r" % (percentComplete) ); - sys.stdout.flush(); - bucketFilenames.append(fileName); - + p = Process(target=generateCrossFilePlotsForBucket, + args=(i, lowerBound, upperBound, + navigatorDF, retFilename)); + runnableProcesses[i] = p; + returnValues[i] = retFilename; + + while (len(runnableProcesses) > 0): + while (len(spawnedProcesses) < targetParallelism + and len(runnableProcesses) > 0): + + i, p = runnableProcesses.popitem(); + p.start(); + spawnedProcesses[i] = p; + + # Find at least one terminated process + waitOnOneProcess(spawnedProcesses); + updatePercentComplete(runnableProcesses, spawnedProcesses, + numBuckets, "Generating timeline charts"); + + # Wait for all processes to terminate + while (len(spawnedProcesses) > 0): + waitOnOneProcess(spawnedProcesses); + updatePercentComplete(runnableProcesses, spawnedProcesses, + numBuckets, "Generating timeline charts"); + + for i, fname in returnValues.items(): + bucketFilenames.append(str(fname.value)); print(color.END); return bucketFilenames; -def processFile(fname): +# +# After we have cleaned up the data by getting rid of incomplete function +# call records (e.g., a function begin record is presend but a function end +# is not or vice versa), we optionally dump this clean data into a file, so +# it can be re-processed by other visualization tools. The output format is +# +# <0/1> +# +# We use '0' if it's a function entry, '1' if it's a function exit. +# +def dumpCleanData(fname, df): + + enterExit = []; + timestamps = []; + functionNames = []; + + outfile = None; + fnameParts = fname.split(".txt"); + newfname = fnameParts[0] + "-clean.txt"; + + for index, row in df.iterrows(): + # Append the function enter record: + enterExit.append(0); + timestamps.append(row['start']); + functionNames.append(row['function']); + + # Append the function exit record: + enterExit.append(1); + timestamps.append(row['end']); + functionNames.append(row['function']); + + newDF = pd.DataFrame({'enterExit' : enterExit, 'timestamp' : timestamps, + 'function' : functionNames}); + newDF = newDF.set_index('timestamp', drop=False); + newDF.sort_index(inplace = True); + + print("Dumping clean data to " + newfname); + newDF.to_csv(newfname, sep=' ', index=False, header=False, + columns = ['enterExit', 'function', 'timestamp']); + +def processFile(fname, dumpCleanDataBool): global perFileDataFrame; global perFuncDF; @@ -802,6 +915,10 @@ def processFile(fname): "Processing file " + str(fname) + color.END); iDF = createCallstackSeries(rawData, "." + fname + ".log"); + if (dumpCleanDataBool): + dumpCleanData(fname, iDF); + + perFileDataFrame[fname] = iDF; for func in funcToColor.keys(): @@ -820,11 +937,6 @@ def processFile(fname): # show how many times this function took an unusually long time to # execute. # -# The parameter durationThreshold tells us when a function should be -# considered as unusually long. If this parameter is "-1" we count -# all functions whose duration exceeded the average by more than -# two standard deviations. -# def createOutlierHistogramForFunction(func, funcDF, bucketFilenames): global firstTimeStamp; @@ -832,10 +944,13 @@ def createOutlierHistogramForFunction(func, funcDF, bucketFilenames): global plotWidth; global pixelsPerWidthUnit; global timeUnitString; - global STDEV_MULT; + global PERCENTILE; + + statisticalOutlierThreshold = 0; + statisticalOutlierThresholdDescr = ""; + userLatencyThreshold = sys.maxint; + userLatencyThresholdDescr = None; - durationThreshold = 0; - durationThresholdDescr = ""; # # funcDF is a list of functions along with their start and end @@ -853,46 +968,78 @@ def createOutlierHistogramForFunction(func, funcDF, bucketFilenames): averageDuration = funcDF['durations'].mean(); maxDuration = funcDF['durations'].max(); - if (outlierThresholdDict.has_key(func)): - durationThreshold = outlierThresholdDict[func]; - durationThresholdDescr = outlierPrettyNames[func]; - elif (outlierThresholdDict.has_key("*")): - durationThreshold = outlierThresholdDict["*"]; - durationThresholdDescr = outlierPrettyNames["*"]; - else: - # Signal that we will use standard deviation - durationThreshold = -STDEV_MULT; + # There are two things that we want to capture on the + # outlier charts: statistical outliers and functions exceeding the + # user-defined latency threshold. An outlier is a function + # whose duration is in the 99.9th percentile. For each + # time period we will show a bar whose height corresponds + # to the number of outliers observed during this exection + # period. + # + # Not all outliers are indicative of performance problems. + # To highlight real performance problems (as defined by the user) + # we will highlight those bars that contain operations whose + # duration exceeded the user-defined threshold. + # + if (userDefinedLatencyThresholds.has_key(func)): + userLatencyThreshold = userDefinedLatencyThresholds[func]; + userLatencyThresholdDescr = userDefinedThresholdNames[func]; + elif (userDefinedLatencyThresholds.has_key("*")): + userLatencyThreshold = userDefinedLatencyThresholds["*"]; + userLatencyThresholdDescr = userDefinedThresholdNames["*"]; + + statisticalOutlierThreshold = funcDF['durations'].quantile(PERCENTILE); + statisticalOutlierThresholdDescr = \ + '{0:,.0f}'.format(statisticalOutlierThreshold) \ + + " " + timeUnitString + \ + " (" + str(PERCENTILE * 100) + \ + "th percentile)"; - if (durationThreshold < 0): # this is a stdev multiplier - mult = -durationThreshold; - stdDev = funcDF['durations'].std(); - durationThreshold = averageDuration + mult * stdDev; - durationThresholdDescr = '{0:,.0f}'.format(durationThreshold) \ - + " " + timeUnitString + " (" + str(mult) + \ - " standard deviations)"; numBuckets = plotWidth / pixelsPerWidthUnit; timeUnitsPerBucket = (lastTimeStamp - firstTimeStamp) / numBuckets; - lowerBounds = []; - upperBounds = []; + bucketHeights = []; + markers = []; + lowerBounds = []; maxOutliers = 0; + upperBounds = []; for i in range(numBuckets): + markerSize = 0; lowerBound = i * timeUnitsPerBucket; upperBound = (i+1) * timeUnitsPerBucket; + # Find out how many statistical outliers we have in the + # current period. bucketDF = funcDF.loc[(funcDF['start'] >= lowerBound) - & (funcDF['start'] < upperBound) - & (funcDF['durations'] >= durationThreshold)]; + & (funcDF['start'] < upperBound) + & (funcDF['durations'] >= + statisticalOutlierThreshold)]; + # The number of statistical outliers is the height of the bar numOutliers = bucketDF.size; if (numOutliers > maxOutliers): maxOutliers = numOutliers; + # Find out whether we have any functions whose duration exceeded + # the user-defined threshold. + if (userLatencyThresholdDescr is not None): + bucketDF = funcDF.loc[(funcDF['start'] >= lowerBound) + & (funcDF['start'] < upperBound) + & (funcDF['durations'] >= + userLatencyThreshold)]; + + # If there is at least one element in this dataframe, then the + # operations that exceeded the user defined latency threshold are + # present in this period. Highlight this bucket with a bright color. + if (bucketDF.size > 0): + markerSize = 6; + lowerBounds.append(lowerBound); upperBounds.append(upperBound); bucketHeights.append(numOutliers); + markers.append(markerSize); if (maxOutliers == 0): return None; @@ -903,11 +1050,18 @@ def createOutlierHistogramForFunction(func, funcDF, bucketFilenames): dict['height'] = bucketHeights; dict['bottom'] = [0] * len(lowerBounds); dict['bucketfiles'] = bucketFilenames; + dict['markersize'] = markers; dataframe = pd.DataFrame(data=dict); + dataframe['markerX'] = dataframe['lowerbound'] + \ + (dataframe['upperbound'] - + dataframe['lowerbound']) / 2 ; + dataframe['markerY'] = dataframe['height'] + 0.2; return plotOutlierHistogram(dataframe, maxOutliers, func, - durationThresholdDescr, averageDuration, + statisticalOutlierThresholdDescr, + userLatencyThresholdDescr, + averageDuration, maxDuration); # @@ -967,8 +1121,8 @@ def getTimeUnitString(unitsPerSecond): # def parseConfigFile(fname): - global outlierThresholdDict; - global outlierPrettyNames; + global userDefinedLatencyThresholds; + global userDefinedThresholdNames; global timeUnitString; configFile = None; @@ -1046,14 +1200,13 @@ def parseConfigFile(fname): print(line); continue; - outlierThresholdDict[func] = threshold; - outlierPrettyNames[func] = str(number) + " " + units; + userDefinedLatencyThresholds[func] = threshold; + userDefinedThresholdNames[func] = str(number) + " " + units; # We were given an empty config file if (firstNonCommentLine): return False; - print outlierThresholdDict; return True; @@ -1063,6 +1216,7 @@ def main(): global arrowRightImg; global bucketDir; global perFuncDF; + global targetParallelism; configSupplied = False; figuresForAllFunctions = []; @@ -1074,12 +1228,27 @@ def main(): parser.add_argument('files', type=str, nargs='*', help='log files to process'); parser.add_argument('-c', '--config', dest='configFile', default=''); + parser.add_argument('-d', '--dumpCleanData', dest='dumpCleanData', + default=False, action='store_true', + help='Dump clean log data. Clean data will \ + not include incomplete function call records, \ + e.g., if there is a function begin record, but\ + no function end record, or vice versa.'); + parser.add_argument('-j', dest='jobParallelism', type=int, + default='0'); + args = parser.parse_args(); if (len(args.files) == 0): parser.print_help(); sys.exit(1); + # Determine the target job parallelism + if (args.jobParallelism > 0): + targetParallelism = args.jobParallelism; + else: + targetParallelism = multiprocessing.cpu_count() * 2; + # Get names of standard CSS colors that we will use for the legend initColorList(); @@ -1089,12 +1258,11 @@ def main(): if (not configSupplied): pluralSuffix = ""; - if (STDEV_MULT > 1): - pluralSuffix = "s"; + print(color.BLUE + color.BOLD + "Will deem as outliers all function instances whose runtime " + - "was " + str(STDEV_MULT) + " standard deviation" + pluralSuffix + - " greater than the average runtime for that function." + "was higher than the " + str(PERCENTILE * 100) + + "th percentile for that function." + color.END); @@ -1106,7 +1274,7 @@ def main(): # Parallelize this later, so we are working on files in parallel. for fname in args.files: - processFile(fname); + processFile(fname, args.dumpCleanData); # Normalize all intervals by subtracting the first timestamp. normalizeIntervalData(); diff --git a/src/third_party/wiredtiger/tools/optrack/wt_optrack_decode.py b/src/third_party/wiredtiger/tools/optrack/wt_optrack_decode.py index ea937f36b4a..971f3729981 100755 --- a/src/third_party/wiredtiger/tools/optrack/wt_optrack_decode.py +++ b/src/third_party/wiredtiger/tools/optrack/wt_optrack_decode.py @@ -265,7 +265,6 @@ def waitOnOneProcess(runningProcesses): def main(): runnableProcesses = {}; - returnValues = {}; spawnedProcesses = {}; successfullyProcessedFiles = []; targetParallelism = multiprocessing.cpu_count(); -- cgit v1.2.1