summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-01-07 05:07:34 +0000
committerevergreen <evergreen@mongodb.com>2020-01-07 05:07:34 +0000
commit41c060c649959b6b270ca0e4bd546e7b3041914d (patch)
treebbd89c87eced3a817357bfec057b4f2ec17cbe7a /src
parent4d299e75428d6b0f54396354afca87fa3556d6e3 (diff)
downloadmongo-41c060c649959b6b270ca0e4bd546e7b3041914d.tar.gz
Import wiredtiger: 5bd9fdfc0bb7f8fcf791e234b6e436c79dfe1fee from branch mongodb-4.4
ref: 61bf0547c6..5bd9fdfc0b for: 4.3.3 WT-4968 Remove all_committed timestamp WT-4978 Migrate Jenkins “wiredtiger-pull-request-compilers” job to Evergreen WT-5192 Don't allow checkpoints to evict without a snapshot WT-5235 Craft a workload to show lookaside contention WT-5377 variable-length column store insert locking in format test program can stall eviction WT-5389 Add timestamps to format.sh test runs
Diffstat (limited to 'src')
-rwxr-xr-xsrc/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py168
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py5
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_all.c2
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c5
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c5
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in22
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c3
-rw-r--r--src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c2
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen.yml62
-rw-r--r--src/third_party/wiredtiger/test/format/format.h12
-rwxr-xr-xsrc/third_party/wiredtiger/test/format/format.sh4
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c164
-rw-r--r--src/third_party/wiredtiger/test/format/t.c2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp14.py83
15 files changed, 342 insertions, 199 deletions
diff --git a/src/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py b/src/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py
new file mode 100755
index 00000000000..333da4b178c
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2019 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+# This benchmark is designed to stress disk access to the LAS/History Store file.
+# This is achieved through:
+# - Long running transactions consisting of read and update operations.
+# - Low cache size (~20%) for a reasonably sized WT table with large documents.
+# - Pareto distribution for operations in long running transactions. This will cause
+# skewed access for a selective set of keys in WT table.
+# - Relatively large number of read operation threads to stress cache.
+
+# Benchmark is based on a wtperf config: wiredtiger/bench/wtperf/runners/evict-btree-scan.wtperf
+# There are number of changes made to original configs such as:
+# - Reduced the number of documents inserts during initial warm-up phase.
+# - Increased the sizes of key and value.
+# - Reduced the WT cache size.
+# - Added transaction based operations and repurposed some of the read threads as long-running
+# transcation threads.
+
+###################################################################################################
+'''
+# wtperf options file: evict btree configuration
+conn_config="cache_size=40G,checkpoint=(wait=60,log_size=2GB),eviction=(threads_min=12,
+threads_max=12),log=(enabled=true),session_max=600,eviction_target=60,statistics=(fast),
+statistics_log=(wait=1,json)"
+
+# 1B records * (key=12 + value=138) is about 150G total data size
+key_sz=12
+value_sz=138
+log_like_table=true
+table_config="type=file"
+icount=1000000000
+report_interval=5
+run_time=3600
+# Scans every 10 minutes for all the scan specific tables.
+# .4B records * (key=12 + value=138) is about 60G total data size for scan
+# Running on a machine with 64G physical memory, this exhausts both the
+# WT cache and the system cache.
+scan_interval=600
+scan_pct=100
+scan_table_count=20
+scan_icount=400000000
+populate_threads=5
+table_count=100
+threads=((count=400,reads=1),(count=20,inserts=1,throttle=500),(count=10,updates=1,throttle=500))
+# Add throughput/latency monitoring
+max_latency=50000
+sample_interval=5
+'''
+###################################################################################################
+
+from runner import *
+from wiredtiger import *
+from workgen import *
+
+context = Context()
+homedir = "WT_TEST"
+conn_config = "cache_size=1G,checkpoint=(wait=60,log_size=2GB),\
+ eviction=(threads_min=12,threads_max=12),log=(enabled=true),session_max=600,\
+ eviction_target=60,statistics=(fast),statistics_log=(wait=1,json)"# explicitly added
+conn = wiredtiger_open(homedir, "create," + conn_config)
+s = conn.open_session("")
+
+wtperf_table_config = "key_format=S,value_format=S," +\
+ "exclusive=true,allocation_size=4kb," +\
+ "internal_page_max=64kb,leaf_page_max=4kb,split_pct=100,"
+compress_table_config = ""
+table_config = "type=file"
+tables = []
+table_count = 1
+for i in range(0, table_count):
+ tname = "table:test" + str(i)
+ table = Table(tname)
+ s.create(tname, wtperf_table_config +\
+ compress_table_config + table_config)
+ table.options.key_size = 200
+ table.options.value_size = 5000
+ tables.append(table)
+
+populate_threads = 40
+icount = 500000
+# If there are multiple tables to be filled during populate,
+# the icount is split between them all.
+pop_ops = Operation(Operation.OP_INSERT, tables[0])
+pop_ops = op_multi_table(pop_ops, tables)
+nops_per_thread = icount // (populate_threads * table_count)
+pop_thread = Thread(pop_ops * nops_per_thread)
+pop_workload = Workload(context, populate_threads * pop_thread)
+pop_workload.run(conn)
+
+# Log like file, requires that logging be enabled in the connection config.
+log_name = "table:log"
+s.create(log_name, wtperf_table_config + "key_format=S,value_format=S," +\
+ compress_table_config + table_config + ",log=(enabled=true)")
+log_table = Table(log_name)
+
+ops = Operation(Operation.OP_SEARCH, tables[0])
+ops = op_multi_table(ops, tables, False)
+ops = op_log_like(ops, log_table, 0)
+thread0 = Thread(ops)
+
+ops = Operation(Operation.OP_INSERT, tables[0])
+ops = op_multi_table(ops, tables, False)
+ops = op_log_like(ops, log_table, 0)
+thread1 = Thread(ops)
+# These operations include log_like operations, which will increase the number
+# of insert/update operations by a factor of 2.0. This may cause the
+# actual operations performed to be above the throttle.
+thread1.options.throttle=500
+thread1.options.throttle_burst=1.0
+
+ops = Operation(Operation.OP_UPDATE, tables[0])
+ops = op_multi_table(ops, tables, False)
+ops = op_log_like(ops, log_table, 0)
+thread2 = Thread(ops)
+# These operations include log_like operations, which will increase the number
+# of insert/update operations by a factor of 2.0. This may cause the
+# actual operations performed to be above the throttle.
+thread2.options.throttle=500
+thread2.options.throttle_burst=1.0
+
+# Long running transactions. There is a 0.1 second sleep after a series of search and update
+# operations. The sleep op is repeated 10000 times and this will make these transcations to at
+# least run for ~17 minutes.
+search_op = Operation(Operation.OP_SEARCH, tables[0], Key(Key.KEYGEN_PARETO, 0, ParetoOptions(1)))
+update_op = Operation(Operation.OP_UPDATE, tables[0], Key(Key.KEYGEN_PARETO, 0, ParetoOptions(1)))
+ops = txn(((search_op + update_op) * 1000 + sleep(0.1)) * 10000)
+thread3 = Thread(ops)
+
+ops = Operation(Operation.OP_SLEEP, "0.1") + \
+ Operation(Operation.OP_LOG_FLUSH, "")
+logging_thread = Thread(ops)
+
+workload = Workload(context, 350 * thread0 + 10 * thread1 +\
+ 50 * thread2 + 100 * thread3 + logging_thread)
+workload.options.report_interval=5
+workload.options.run_time=300
+workload.options.max_latency=50000
+workload.run(conn)
+
+latency_filename = homedir + "/latency.out"
+latency.workload_latency(workload, latency_filename)
+conn.close()
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index d6280b7f0ec..3a46d6e7ff0 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -1610,8 +1610,7 @@ methods = {
'WT_CONNECTION.query_timestamp' : Method([
Config('get', 'all_durable', r'''
- specify which timestamp to query: \c all_committed returns the largest
- timestamp such that all timestamps up to that value have committed,
+ specify which timestamp to query:
\c all_durable returns the largest timestamp such that all timestamps
up to that value have been made durable, \c last_checkpoint returns the
timestamp of the most recent stable checkpoint, \c oldest returns the
@@ -1622,7 +1621,7 @@ methods = {
timestamp of the most recent stable checkpoint taken prior to a shutdown
and \c stable returns the most recent \c stable_timestamp set with
WT_CONNECTION::set_timestamp. See @ref transaction_timestamps''',
- choices=['all_committed','all_durable','last_checkpoint',
+ choices=['all_durable','last_checkpoint',
'oldest','oldest_reader','pinned','recovery','stable']),
]),
diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c
index 53c2ac9d95d..3345cd7c708 100644
--- a/src/third_party/wiredtiger/examples/c/ex_all.c
+++ b/src/third_party/wiredtiger/examples/c/ex_all.c
@@ -912,7 +912,7 @@ transaction_ops(WT_SESSION *session_arg)
error_check(session->commit_transaction(session, NULL));
- error_check(conn->query_timestamp(conn, timestamp_buf, "get=all_committed"));
+ error_check(conn->query_timestamp(conn, timestamp_buf, "get=all_durable"));
/*! [query timestamp] */
}
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index f2d431f97fa..b2f18ef2ef2 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "61bf0547c6edfc6cd7c633c80f1c76f3e33e5aec",
+ "commit": "5bd9fdfc0bb7f8fcf791e234b6e436c79dfe1fee",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-4.4"
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index f7297729967..71a4c11dd44 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -291,9 +291,12 @@ __wt_sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
* if eviction fails (the page may stay in cache clean but with history that cannot be
* discarded), that is not wasted effort because checkpoint doesn't need to write the
* page again.
+ *
+ * Once the transaction has given up it's snapshot it is no longer safe to reconcile
+ * pages. That happens prior to the final metadata checkpoint.
*/
if (!WT_PAGE_IS_INTERNAL(page) && page->read_gen == WT_READGEN_WONT_NEED &&
- !tried_eviction) {
+ !tried_eviction && F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT)) {
ret = __wt_page_release_evict(session, walk, 0);
walk = NULL;
WT_ERR_BUSY_OK(ret);
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index a24841fe67e..12f510728c7 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -33,9 +33,8 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_open_session[] = {
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_query_timestamp[] = {
{"get", "string", NULL,
- "choices=[\"all_committed\",\"all_durable\","
- "\"last_checkpoint\",\"oldest\",\"oldest_reader\",\"pinned\","
- "\"recovery\",\"stable\"]",
+ "choices=[\"all_durable\",\"last_checkpoint\",\"oldest\","
+ "\"oldest_reader\",\"pinned\",\"recovery\",\"stable\"]",
NULL, 0},
{NULL, NULL, NULL, NULL, NULL, 0}};
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index a50e617705b..ddcdeffedf1 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -2471,18 +2471,16 @@ struct __wt_connection {
* hexadecimal encoding of the timestamp being queried. Must be large
* enough to hold a NUL terminated, hex-encoded 8B timestamp (17 bytes).
* @configstart{WT_CONNECTION.query_timestamp, see dist/api_data.py}
- * @config{get, specify which timestamp to query: \c all_committed returns the largest
- * timestamp such that all timestamps up to that value have committed\, \c all_durable
- * returns the largest timestamp such that all timestamps up to that value have been made
- * durable\, \c last_checkpoint returns the timestamp of the most recent stable checkpoint\,
- * \c oldest returns the most recent \c oldest_timestamp set with
- * WT_CONNECTION::set_timestamp\, \c oldest_reader returns the minimum of the read
- * timestamps of all active readers \c pinned returns the minimum of the \c oldest_timestamp
- * and the read timestamps of all active readers\, \c recovery returns the timestamp of the
- * most recent stable checkpoint taken prior to a shutdown and \c stable returns the most
- * recent \c stable_timestamp set with WT_CONNECTION::set_timestamp. See @ref
- * transaction_timestamps., a string\, chosen from the following options: \c
- * "all_committed"\, \c "all_durable"\, \c "last_checkpoint"\, \c "oldest"\, \c
+ * @config{get, specify which timestamp to query: \c all_durable returns the largest
+ * timestamp such that all timestamps up to that value have been made durable\, \c
+ * last_checkpoint returns the timestamp of the most recent stable checkpoint\, \c oldest
+ * returns the most recent \c oldest_timestamp set with WT_CONNECTION::set_timestamp\, \c
+ * oldest_reader returns the minimum of the read timestamps of all active readers \c pinned
+ * returns the minimum of the \c oldest_timestamp and the read timestamps of all active
+ * readers\, \c recovery returns the timestamp of the most recent stable checkpoint taken
+ * prior to a shutdown and \c stable returns the most recent \c stable_timestamp set with
+ * WT_CONNECTION::set_timestamp. See @ref transaction_timestamps., a string\, chosen from
+ * the following options: \c "all_durable"\, \c "last_checkpoint"\, \c "oldest"\, \c
* "oldest_reader"\, \c "pinned"\, \c "recovery"\, \c "stable"; default \c all_durable.}
* @configend
* @errors
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index 8db13cc7048..c24191efa24 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -250,8 +250,7 @@ __txn_global_query_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *tsp, cons
WT_STAT_CONN_INCR(session, txn_query_ts);
WT_RET(__wt_config_gets(session, cfg, "get", &cval));
- if (WT_STRING_MATCH("all_committed", cval.str, cval.len) ||
- WT_STRING_MATCH("all_durable", cval.str, cval.len)) {
+ if (WT_STRING_MATCH("all_durable", cval.str, cval.len)) {
if (!txn_global->has_durable_timestamp)
return (WT_NOTFOUND);
ts = txn_global->durable_timestamp;
diff --git a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
index 1b69427d9f2..c6672dc9830 100644
--- a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
@@ -141,7 +141,7 @@ thread_ts_run(void *arg)
* that requires locking out transactional ops that set or query a timestamp.
*/
testutil_check(pthread_rwlock_wrlock(&ts_lock));
- ret = td->conn->query_timestamp(td->conn, ts_string, "get=all_committed");
+ ret = td->conn->query_timestamp(td->conn, ts_string, "get=all_durable");
testutil_check(pthread_rwlock_unlock(&ts_lock));
testutil_assert(ret == 0 || ret == WT_NOTFOUND);
if (ret == 0) {
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index d728e9f4e96..5f63c4ff3a6 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -371,6 +371,55 @@ tasks:
vars:
posix_configure_flags: --enable-strict --enable-diagnostic
+ - name: compile-gcc
+ tags: ["pull_request", "pull_request_compilers"]
+ commands:
+ - func: "get project"
+ - func: "compile wiredtiger"
+ vars:
+ configure_env_vars: CC=gcc-4.8 CXX=g++-4.8 CFLAGS="-ggdb -fPIC"
+ - func: "compile wiredtiger"
+ vars:
+ configure_env_vars: CC=gcc-5 CXX=g++-5 CFLAGS="-ggdb -fPIC"
+ - func: "compile wiredtiger"
+ vars:
+ configure_env_vars: CC=gcc-6 CXX=g++-6 CFLAGS="-ggdb -fPIC"
+ - func: "compile wiredtiger"
+ vars:
+ configure_env_vars: CC=gcc-6 CXX=g++-6 CFLAGS="-ggdb -fPIC" CPPFLAGS=-fvisibility=hidden
+ - func: "compile wiredtiger"
+ vars:
+ configure_env_vars: CC=gcc-7 CXX=g++-7 CFLAGS="-ggdb -fPIC"
+ - func: "compile wiredtiger"
+ vars:
+ configure_env_vars: CC=gcc-8 CXX=g++-8 CFLAGS="-ggdb -fPIC"
+ - func: "compile wiredtiger"
+ vars:
+ configure_env_vars: CC=gcc-9 CXX=g++-9 CFLAGS="-ggdb -fPIC"
+
+ - name: compile-clang
+ tags: ["pull_request", "pull_request_compilers"]
+ commands:
+ - func: "get project"
+ - func: "compile wiredtiger"
+ vars:
+ configure_env_vars: CC=clang-3.9 CXX=clang++-3.9 CFLAGS="-ggdb -fPIC"
+ - func: "compile wiredtiger"
+ vars:
+ configure_env_vars: CC=clang-4.0 CXX=clang++-4.0 CFLAGS="-ggdb -fPIC"
+ - func: "compile wiredtiger"
+ vars:
+ configure_env_vars: CC=clang-5.0 CXX=clang++-5.0 CFLAGS="-ggdb -fPIC"
+ - func: "compile wiredtiger"
+ vars:
+ configure_env_vars: CC=clang-6.0 CXX=clang++-6.0 CFLAGS="-ggdb -fPIC"
+ - func: "compile wiredtiger"
+ vars:
+ configure_env_vars: CC=clang-7 CXX=clang++-7 CFLAGS="-ggdb -fPIC"
+ - func: "compile wiredtiger"
+ vars:
+ configure_env_vars: CC=clang-8 CXX=clang++-8 CFLAGS="-ggdb -fPIC"
+
- name: make-check-test
depends_on:
- name: compile
@@ -2029,7 +2078,7 @@ buildvariants:
posix_configure_flags: --enable-silent-rules --enable-diagnostic --enable-python --enable-zlib --enable-snappy --enable-strict --enable-static --prefix=$(pwd)/LOCAL_INSTALL
make_command: PATH=/opt/mongodbtoolchain/v3/bin:$PATH make
tasks:
- - name: ".pull_request !.windows_only"
+ - name: ".pull_request !.windows_only !.pull_request_compilers"
- name: compile-msan
- name: make-check-msan-test
- name: compile-ubsan
@@ -2055,6 +2104,17 @@ buildvariants:
- name: format-stress-smoke-test
- name: checkpoint-stress-test
+- name: ubuntu1804-compilers
+ display_name: Ubuntu 18.04 Compilers
+ run_on:
+ - ubuntu1804-wt-build
+ expansions:
+ posix_configure_flags: --enable-silent-rules --enable-diagnostic --enable-strict --enable-lz4 --enable-snappy --enable-zlib --enable-zstd --enable-python
+ smp_command: -j $(grep -c ^processor /proc/cpuinfo)
+ make_command: PATH=/opt/mongodbtoolchain/v3/bin:$PATH make
+ tasks:
+ - name: ".pull_request_compilers"
+
- name: ubuntu1804-python3
display_name: Ubuntu 18.04 (Python3)
run_on:
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index a1fa5d425b9..899956c8f71 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -106,15 +106,6 @@ typedef struct {
uint64_t truncate_cnt; /* Counter for truncation */
- /*
- * We have a list of records that are appended, but not yet "resolved", that is, we haven't yet
- * incremented the g.rows value to reflect the new records.
- */
- uint64_t *append; /* Appended records */
- size_t append_max; /* Maximum unresolved records */
- size_t append_cnt; /* Current unresolved records */
- pthread_rwlock_t append_lock; /* Single-thread resolution */
-
pthread_rwlock_t death_lock; /* Single-thread failure */
char *uri; /* Object name */
@@ -315,6 +306,9 @@ typedef struct {
uint64_t commit_ts; /* commit timestamp */
SNAP_OPS *snap, *snap_first, snap_list[512];
+ uint64_t insert_list[256]; /* column-store inserted records */
+ u_int insert_list_cnt;
+
WT_ITEM *tbuf, _tbuf; /* temporary buffer */
#define TINFO_RUNNING 1 /* Running */
diff --git a/src/third_party/wiredtiger/test/format/format.sh b/src/third_party/wiredtiger/test/format/format.sh
index 37607d6b8df..f9baf495a83 100755
--- a/src/third_party/wiredtiger/test/format/format.sh
+++ b/src/third_party/wiredtiger/test/format/format.sh
@@ -361,14 +361,14 @@ format()
if [[ $smoke_test -ne 0 ]]; then
args=${smoke_list[$smoke_next]}
smoke_next=$(($smoke_next + 1))
- echo "$name: starting smoke-test job in $dir"
+ echo "$name: starting smoke-test job in $dir ($(date))"
else
args=$format_args
# If abort/recovery testing is configured, do it 5% of the time.
[[ $abort_test -ne 0 ]] && [[ $(($count_jobs % 20)) -eq 0 ]] && args="$args abort=1"
- echo "$name: starting job in $dir"
+ echo "$name: starting job in $dir ($(date))"
fi
cmd="$format_binary -c "$config" -h "$dir" -1 $args quiet=1"
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 669cb7a484e..d6fce308164 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -29,6 +29,7 @@
#include "format.h"
static int col_insert(TINFO *, WT_CURSOR *);
+static void col_insert_resolve(TINFO *);
static int col_modify(TINFO *, WT_CURSOR *, bool);
static int col_remove(TINFO *, WT_CURSOR *, bool);
static int col_reserve(TINFO *, WT_CURSOR *, bool);
@@ -43,7 +44,6 @@ static int row_remove(TINFO *, WT_CURSOR *, bool);
static int row_reserve(TINFO *, WT_CURSOR *, bool);
static int row_truncate(TINFO *, WT_CURSOR *);
static int row_update(TINFO *, WT_CURSOR *, bool);
-static void table_append_init(void);
static char modify_repl[256];
@@ -167,9 +167,6 @@ wts_ops(bool lastrun)
quit_fourths = fourths + 15 * 4 * 60;
}
- /* Initialize the table extension code. */
- table_append_init();
-
/*
* We support replay of threaded runs, but don't log random numbers after threaded operations
* start, there's no point.
@@ -789,7 +786,7 @@ ops(void *arg)
* We can only append so many new records, once we reach that limit, update a record
* instead of inserting.
*/
- if (g.append_cnt >= g.append_max)
+ if (tinfo->insert_list_cnt >= WT_ELEMENTS(tinfo->insert_list))
goto update_instead_of_chosen_op;
ret = col_insert(tinfo, cursor);
@@ -944,6 +941,10 @@ update_instead_of_chosen_op:
break;
}
+ /* If we have pending inserts, try and update the total rows. */
+ if (tinfo->insert_list_cnt > 0)
+ col_insert_resolve(tinfo);
+
/*
* The cursor is positioned if we did any operation other than insert, do a small number of
* next/prev cursor operations in a random direction.
@@ -1582,100 +1583,6 @@ col_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
}
/*
- * table_append_init --
- * Re-initialize the appended records list.
- */
-static void
-table_append_init(void)
-{
- /* Append up to 10 records per thread before waiting on resolution. */
- g.append_max = (size_t)g.c_threads * 10;
- g.append_cnt = 0;
-
- free(g.append);
- g.append = dcalloc(g.append_max, sizeof(uint64_t));
-}
-
-/*
- * table_append --
- * Resolve the appended records.
- */
-static void
-table_append(uint64_t keyno)
-{
- uint64_t *ep, *p;
- int done;
-
- ep = g.append + g.append_max;
-
- /*
- * We don't want to ignore records we append, which requires we update the "last row" as we
- * insert new records. Threads allocating record numbers can race with other threads, so the
- * thread allocating record N may return after the thread allocating N + 1. We can't update a
- * record before it's been inserted, and so we can't leave gaps when the count of records in the
- * table is incremented.
- *
- * The solution is the append table, which contains an unsorted list of appended records. Every
- * time we finish appending a record, process the table, trying to update the total records in
- * the object.
- *
- * First, enter the new key into the append list.
- *
- * It's technically possible to race: we allocated space for 10 records per thread, but the
- * check for the maximum number of records being appended doesn't lock. If a thread allocated a
- * new record and went to sleep (so the append table fills up), then N threads of control used
- * the same g.append_cnt value to decide there was an available slot in the append table and
- * both allocated new records, we could run out of space in the table. It's unfortunately not
- * even unlikely in the case of a large number of threads all inserting as fast as they can and
- * a single thread going to sleep for an unexpectedly long time. If it happens, sleep and retry
- * until earlier records are resolved and we find a slot.
- */
- for (done = 0;;) {
- testutil_check(pthread_rwlock_wrlock(&g.append_lock));
-
- /*
- * If this is the thread we've been waiting for, and its record won't fit, we'd loop
- * infinitely. If there are many append operations and a thread goes to sleep for a little
- * too long, it can happen.
- */
- if (keyno == g.rows + 1) {
- g.rows = keyno;
- done = 1;
-
- /*
- * Clean out the table, incrementing the total count of records until we don't find the
- * next key.
- */
- for (;;) {
- for (p = g.append; p < ep; ++p)
- if (*p == g.rows + 1) {
- g.rows = *p;
- *p = 0;
- --g.append_cnt;
- break;
- }
- if (p == ep)
- break;
- }
- } else
- /* Enter the key into the table. */
- for (p = g.append; p < ep; ++p)
- if (*p == 0) {
- *p = keyno;
- ++g.append_cnt;
- done = 1;
- break;
- }
-
- testutil_check(pthread_rwlock_unlock(&g.append_lock));
-
- if (done)
- break;
- __wt_sleep(1, 0);
- }
-}
-
-/*
* row_insert --
* Insert a row in a row-store file.
*/
@@ -1707,6 +1614,63 @@ row_insert(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
}
/*
+ * col_insert_resolve --
+ * Resolve newly inserted records.
+ */
+static void
+col_insert_resolve(TINFO *tinfo)
+{
+ uint64_t v, *p;
+ u_int i;
+
+ /*
+ * We don't want to ignore column-store records we insert, which requires we update the "last
+ * row" so other threads consider them. Threads allocating record numbers can race with other
+ * threads, so the thread allocating record N may return after the thread allocating N + 1. We
+ * can't update a record before it's been inserted, and so we can't leave gaps when the count of
+ * records in the table is incremented.
+ *
+ * The solution is a per-thread array which contains an unsorted list of inserted records. If
+ * there are pending inserts, we review the table after every operation, trying to update the
+ * total rows. This is wasteful, but we want to give other threads immediate access to the row,
+ * ideally they'll collide with our insert before we resolve.
+ *
+ * Process the existing records and advance the last row count until we can't go further.
+ */
+ do {
+ WT_ORDERED_READ(v, g.rows);
+ for (i = 0, p = tinfo->insert_list; i < WT_ELEMENTS(tinfo->insert_list); ++i) {
+ if (*p == v + 1) {
+ testutil_assert(__wt_atomic_casv64(&g.rows, v, v + 1));
+ *p = 0;
+ --tinfo->insert_list_cnt;
+ break;
+ }
+ testutil_assert(*p == 0 || *p > v);
+ }
+ } while (tinfo->insert_list_cnt > 0 && i < WT_ELEMENTS(tinfo->insert_list));
+}
+
+/*
+ * col_insert_add --
+ * Add newly inserted records.
+ */
+static void
+col_insert_add(TINFO *tinfo)
+{
+ u_int i;
+
+ /* Add the inserted record to the array. */
+ for (i = 0; i < WT_ELEMENTS(tinfo->insert_list); ++i)
+ if (tinfo->insert_list[i] == 0) {
+ tinfo->insert_list[i] = tinfo->keyno;
+ ++tinfo->insert_list_cnt;
+ break;
+ }
+ testutil_assert(i < WT_ELEMENTS(tinfo->insert_list));
+}
+
+/*
* col_insert --
* Insert an element in a column-store file.
*/
@@ -1726,7 +1690,7 @@ col_insert(TINFO *tinfo, WT_CURSOR *cursor)
testutil_check(cursor->get_key(cursor, &tinfo->keyno));
- table_append(tinfo->keyno); /* Extend the object. */
+ col_insert_add(tinfo); /* Extend the object. */
if (g.type == FIX)
logop(cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", "insert", tinfo->keyno,
diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c
index 5665b50ab4c..85bef1a694c 100644
--- a/src/third_party/wiredtiger/test/format/t.c
+++ b/src/third_party/wiredtiger/test/format/t.c
@@ -190,7 +190,6 @@ main(int argc, char *argv[])
* Initialize locks to single-thread named checkpoints and backups, last last-record updates,
* and failures.
*/
- testutil_check(pthread_rwlock_init(&g.append_lock, NULL));
testutil_check(pthread_rwlock_init(&g.backup_lock, NULL));
testutil_check(pthread_rwlock_init(&g.death_lock, NULL));
testutil_check(pthread_rwlock_init(&g.ts_lock, NULL));
@@ -270,7 +269,6 @@ main(int argc, char *argv[])
config_print(false);
- testutil_check(pthread_rwlock_destroy(&g.append_lock));
testutil_check(pthread_rwlock_destroy(&g.backup_lock));
testutil_check(pthread_rwlock_destroy(&g.death_lock));
testutil_check(pthread_rwlock_destroy(&g.ts_lock));
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp14.py b/src/third_party/wiredtiger/test/suite/test_timestamp14.py
index 4343160f92f..1fc5cdc7b22 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp14.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp14.py
@@ -43,36 +43,32 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
uri = 'table:' + tablename
session_config = 'isolation=snapshot'
- def test_all_committed(self):
+ def test_all_durable_old(self):
# This test was originally for testing the all_committed timestamp.
# In the absence of prepared transactions, all_durable is identical to
- # all_committed so let's enforce the same values for both.
- all_committed_uri = self.uri + '_all_committed'
+ # all_committed so let's enforce the all_durable values instead.
+ all_durable_uri = self.uri + '_all_durable'
session1 = self.setUpSessionOpen(self.conn)
session2 = self.setUpSessionOpen(self.conn)
- session1.create(all_committed_uri, 'key_format=i,value_format=i')
- session2.create(all_committed_uri, 'key_format=i,value_format=i')
+ session1.create(all_durable_uri, 'key_format=i,value_format=i')
+ session2.create(all_durable_uri, 'key_format=i,value_format=i')
# Scenario 0: No commit timestamp has ever been specified therefore
- # There is no all_committed timestamp and we will get an error
+ # There is no all_durable timestamp and we will get an error
# Querying for it.
session1.begin_transaction()
- cur1 = session1.open_cursor(all_committed_uri)
+ cur1 = session1.open_cursor(all_durable_uri)
cur1[1]=1
session1.commit_transaction()
self.assertRaisesException(wiredtiger.WiredTigerError,
- lambda: self.conn.query_timestamp('get=all_committed'))
- self.assertRaisesException(wiredtiger.WiredTigerError,
lambda: self.conn.query_timestamp('get=all_durable'))
# Scenario 1: A single transaction with a commit timestamp, will
- # result in the all_committed timestamp being set.
+ # result in the all_durable timestamp being set.
session1.begin_transaction()
cur1[1]=1
session1.commit_transaction('commit_timestamp=1')
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), "1")
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), "1")
# Scenario 2: A transaction begins and specifies that it intends
@@ -82,36 +78,30 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
session1.timestamp_transaction('commit_timestamp=2')
session2.begin_transaction()
- cur2 = session2.open_cursor(all_committed_uri)
+ cur2 = session2.open_cursor(all_durable_uri)
cur2[2] = 2
session2.commit_transaction('commit_timestamp=3')
- # As the original transaction is still running the all_committed
+ # As the original transaction is still running the all_durable
# timestamp is being held at 1.
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), "1")
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), "1")
cur1[1] = 2
session1.commit_transaction()
- # Now that the original transaction has finished the all_committed
+ # Now that the original transaction has finished the all_durable
# timestamp has moved to 3, skipping 2 as there is a commit with
# a greater timestamp already existing.
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), "3")
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), "3")
# Senario 3: Commit with a commit timestamp of 5 and then begin a
- # transaction intending to commit at 4, the all_committed timestamp
+ # transaction intending to commit at 4, the all_durable timestamp
# should move back to 3. Until the transaction at 4 completes.
session1.begin_transaction()
cur1[1] = 3
session1.commit_transaction('commit_timestamp=5')
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), "5")
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), "5")
session1.begin_transaction()
@@ -120,8 +110,6 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
session1.timestamp_transaction('commit_timestamp=4')
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), "3")
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), "3")
session1.commit_transaction()
@@ -129,20 +117,16 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
# Now that the transaction at timestamp 4 has completed the
# all committed timestamp is back at 5.
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), "5")
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), "5")
# Scenario 4: Holding a transaction open without a commit timestamp
- # Will not affect the all_committed timestamp.
+ # Will not affect the all_durable timestamp.
session1.begin_transaction()
session2.begin_transaction()
cur2[2] = 2
session2.commit_transaction('commit_timestamp=6')
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), "6")
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), "6")
cur1[1] = 2
session1.commit_transaction()
@@ -270,26 +254,19 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
cur1[1] = 1
session1.commit_transaction('commit_timestamp=3')
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), '3')
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), '3')
# We have a running transaction with a lower commit_timestamp than we've
- # seen before. So all_durable (like all_committed) should return (lowest
- # commit timestamp - 1).
+ # seen before. So all_durable should return (lowest commit timestamp - 1).
session1.begin_transaction()
cur1[1] = 2
session1.timestamp_transaction('commit_timestamp=2')
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), '1')
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), '1')
session1.commit_transaction()
# After committing, go back to the value we saw previously.
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), '3')
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), '3')
# For prepared transactions, we take into account the durable timestamp
@@ -302,16 +279,12 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
# don't want that to be visible in the all_durable calculation.
session1.timestamp_transaction('commit_timestamp=7')
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), '3')
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), '3')
# Now take into account the durable timestamp.
session1.timestamp_transaction('durable_timestamp=8')
session1.commit_transaction()
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), '8')
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), '8')
# All durable moves back when we have a running prepared transaction
@@ -324,21 +297,15 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
# don't want that to be visible in the all_durable calculation.
session1.timestamp_transaction('commit_timestamp=4')
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), '8')
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), '8')
# Now take into account the durable timestamp.
session1.timestamp_transaction('durable_timestamp=5')
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), '4')
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), '4')
session1.commit_transaction()
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), '8')
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), '8')
# Now test a scenario with multiple commit timestamps for a single txn.
@@ -346,8 +313,6 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
cur1[1] = 5
session1.timestamp_transaction('commit_timestamp=6')
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), '5')
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), '5')
# Make more changes and set a new commit timestamp.
@@ -356,15 +321,11 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
cur1[1] = 6
session1.timestamp_transaction('commit_timestamp=7')
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), '5')
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), '5')
# Once committed, we go back to 8.
session1.commit_transaction()
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), '8')
- self.assertTimestampsEqual(
self.conn.query_timestamp('get=all_durable'), '8')
def test_all(self):
@@ -386,9 +347,9 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
session1.begin_transaction()
cur1[1]=2
session1.commit_transaction('commit_timestamp=4')
- # Confirm all_committed is now 4.
+ # Confirm all_durable is now 4.
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), "4")
+ self.conn.query_timestamp('get=all_durable'), "4")
# Create a read session.
session1.begin_transaction('read_timestamp=2')
@@ -401,9 +362,9 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
session2.begin_transaction()
cur2[2] = 2
session2.commit_transaction('commit_timestamp=7')
- # All_committed should now be 7.
+ # All_durable should now be 7.
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), "7")
+ self.conn.query_timestamp('get=all_durable'), "7")
# Move oldest to 5.
self.conn.set_timestamp('oldest_timestamp=5')
@@ -414,20 +375,20 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
self.conn.query_timestamp('get=oldest_reader'))
# Begin a write transaction pointing at timestamp 6,
- # this is below our current all_committed so it should move back
+ # this is below our current all_durable so it should move back
# to the oldest timestamp.
session2.begin_transaction()
session2.timestamp_transaction('commit_timestamp=6')
cur2[2] = 3
- # Confirm all_committed is now equal to oldest.
+ # Confirm all_durable is now equal to oldest.
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'),
+ self.conn.query_timestamp('get=all_durable'),
self.conn.query_timestamp('get=oldest'))
session2.commit_transaction()
self.assertTimestampsEqual(
- self.conn.query_timestamp('get=all_committed'), "7")
+ self.conn.query_timestamp('get=all_durable'), "7")
# End our read transaction.
session1.commit_transaction()