Import wiredtiger: 5bd9fdfc0bb7f8fcf791e234b6e436c79dfe1fee from branch mongodb-4.4

ref: 61bf0547c6..5bd9fdfc0b for: 4.3.3 WT-4968 Remove all_committed timestamp WT-4978 Migrate Jenkins “wiredtiger-pull-request-compilers” job to Evergreen WT-5192 Don't allow checkpoints to evict without a snapshot WT-5235 Craft a workload to show lookaside contention WT-5377 variable-length column store insert locking in format test program can stall eviction WT-5389 Add timestamps to format.sh test runs
author: Luke Chen <luke.chen@mongodb.com> 2020-01-07 05:07:34 +0000
committer: evergreen <evergreen@mongodb.com> 2020-01-07 05:07:34 +0000
commit: 41c060c649959b6b270ca0e4bd546e7b3041914d (patch)
tree: bbd89c87eced3a817357bfec057b4f2ec17cbe7a /src
parent: 4d299e75428d6b0f54396354afca87fa3556d6e3 (diff)
download: mongo-41c060c649959b6b270ca0e4bd546e7b3041914d.tar.gz
15 files changed, 342 insertions, 199 deletions
diff --git a/src/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py b/src/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py
new file mode 100755
index 00000000000..333da4b178c
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/workgen/runner/evict-btree-lookaside.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2019 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+# This benchmark is designed to stress disk access to the LAS/History Store file.
+# This is achieved through:
+#   - Long running transactions consisting of read and update operations.
+#   - Low cache size (~20%) for a reasonably sized WT table with large documents.
+#   - Pareto distribution for operations in long running transactions. This will cause
+#     skewed access for a selective set of keys in WT table.
+#   - Relatively large number of read operation threads to stress cache.
+
+# Benchmark is based on a wtperf config: wiredtiger/bench/wtperf/runners/evict-btree-scan.wtperf
+# There are number of changes made to original configs such as:
+#   - Reduced the number of documents inserts during initial warm-up phase.
+#   - Increased the sizes of key and value.
+#   - Reduced the WT cache size.
+#   - Added transaction based operations and repurposed some of the read threads as long-running
+#     transcation threads.
+
+###################################################################################################
+'''
+# wtperf options file: evict btree configuration
+conn_config="cache_size=40G,checkpoint=(wait=60,log_size=2GB),eviction=(threads_min=12,
+threads_max=12),log=(enabled=true),session_max=600,eviction_target=60,statistics=(fast),
+statistics_log=(wait=1,json)"
+
+# 1B records * (key=12 + value=138) is about 150G total data size
+key_sz=12
+value_sz=138
+log_like_table=true
+table_config="type=file"
+icount=1000000000
+report_interval=5
+run_time=3600
+# Scans every 10 minutes for all the scan specific tables.
+# .4B records * (key=12 + value=138) is about 60G total data size for scan
+# Running on a machine with 64G physical memory, this exhausts both the
+# WT cache and the system cache.
+scan_interval=600
+scan_pct=100
+scan_table_count=20
+scan_icount=400000000
+populate_threads=5
+table_count=100
+threads=((count=400,reads=1),(count=20,inserts=1,throttle=500),(count=10,updates=1,throttle=500))
+# Add throughput/latency monitoring
+max_latency=50000
+sample_interval=5
+'''
+###################################################################################################
+
+from runner import *
+from wiredtiger import *
+from workgen import *
+
+context = Context()
+homedir = "WT_TEST"
+conn_config =   "cache_size=1G,checkpoint=(wait=60,log_size=2GB),\
+                eviction=(threads_min=12,threads_max=12),log=(enabled=true),session_max=600,\
+                eviction_target=60,statistics=(fast),statistics_log=(wait=1,json)"# explicitly added
+conn = wiredtiger_open(homedir, "create," + conn_config)
+s = conn.open_session("")
+
+wtperf_table_config = "key_format=S,value_format=S," +\
+    "exclusive=true,allocation_size=4kb," +\
+    "internal_page_max=64kb,leaf_page_max=4kb,split_pct=100,"
+compress_table_config = ""
+table_config = "type=file"
+tables = []
+table_count = 1
+for i in range(0, table_count):
+    tname = "table:test" + str(i)
+    table = Table(tname)
+    s.create(tname, wtperf_table_config +\
+             compress_table_config + table_config)
+    table.options.key_size = 200
+    table.options.value_size = 5000
+    tables.append(table)
+
+populate_threads = 40
+icount = 500000
+# If there are multiple tables to be filled during populate,
+# the icount is split between them all.
+pop_ops = Operation(Operation.OP_INSERT, tables[0])
+pop_ops = op_multi_table(pop_ops, tables)
+nops_per_thread = icount // (populate_threads * table_count)
+pop_thread = Thread(pop_ops * nops_per_thread)
+pop_workload = Workload(context, populate_threads * pop_thread)
+pop_workload.run(conn)
+
+# Log like file, requires that logging be enabled in the connection config.
+log_name = "table:log"
+s.create(log_name, wtperf_table_config + "key_format=S,value_format=S," +\
+        compress_table_config + table_config + ",log=(enabled=true)")
+log_table = Table(log_name)
+
+ops = Operation(Operation.OP_SEARCH, tables[0])
+ops = op_multi_table(ops, tables, False)
+ops = op_log_like(ops, log_table, 0)
+thread0 = Thread(ops)
+
+ops = Operation(Operation.OP_INSERT, tables[0])
+ops = op_multi_table(ops, tables, False)
+ops = op_log_like(ops, log_table, 0)
+thread1 = Thread(ops)
+# These operations include log_like operations, which will increase the number
+# of insert/update operations by a factor of 2.0. This may cause the
+# actual operations performed to be above the throttle.
+thread1.options.throttle=500
+thread1.options.throttle_burst=1.0
+
+ops = Operation(Operation.OP_UPDATE, tables[0])
+ops = op_multi_table(ops, tables, False)
+ops = op_log_like(ops, log_table, 0)
+thread2 = Thread(ops)
+# These operations include log_like operations, which will increase the number
+# of insert/update operations by a factor of 2.0. This may cause the
+# actual operations performed to be above the throttle.
+thread2.options.throttle=500
+thread2.options.throttle_burst=1.0
+
+# Long running transactions. There is a 0.1 second sleep after a series of search and update
+# operations. The sleep op is repeated 10000 times and this will make these transcations to at
+# least run for ~17 minutes.
+search_op = Operation(Operation.OP_SEARCH, tables[0], Key(Key.KEYGEN_PARETO, 0, ParetoOptions(1)))
+update_op = Operation(Operation.OP_UPDATE, tables[0], Key(Key.KEYGEN_PARETO, 0, ParetoOptions(1)))
+ops = txn(((search_op + update_op) * 1000 + sleep(0.1)) * 10000)
+thread3 = Thread(ops)
+
+ops = Operation(Operation.OP_SLEEP, "0.1") + \
+      Operation(Operation.OP_LOG_FLUSH, "")
+logging_thread = Thread(ops)
+
+workload = Workload(context, 350 * thread0 + 10 * thread1 +\
+                    50 * thread2 + 100 * thread3 + logging_thread)
+workload.options.report_interval=5
+workload.options.run_time=300
+workload.options.max_latency=50000
+workload.run(conn)
+
+latency_filename = homedir + "/latency.out"
+latency.workload_latency(workload, latency_filename)
+conn.close()
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index d6280b7f0ec..3a46d6e7ff0 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -1610,8 +1610,7 @@ methods = {
 
 'WT_CONNECTION.query_timestamp' : Method([
     Config('get', 'all_durable', r'''
-        specify which timestamp to query: \c all_committed returns the largest
-        timestamp such that all timestamps up to that value have committed,
+        specify which timestamp to query:
         \c all_durable returns the largest timestamp such that all timestamps
         up to that value have been made durable, \c last_checkpoint returns the
         timestamp of the most recent stable checkpoint, \c oldest returns the
@@ -1622,7 +1621,7 @@ methods = {
         timestamp of the most recent stable checkpoint taken prior to a shutdown
         and \c stable returns the most recent \c stable_timestamp set with
         WT_CONNECTION::set_timestamp. See @ref transaction_timestamps''',
-        choices=['all_committed','all_durable','last_checkpoint',
+        choices=['all_durable','last_checkpoint',
             'oldest','oldest_reader','pinned','recovery','stable']),
 ]),
 
diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c
index 53c2ac9d95d..3345cd7c708 100644
--- a/src/third_party/wiredtiger/examples/c/ex_all.c
+++ b/src/third_party/wiredtiger/examples/c/ex_all.c
@@ -912,7 +912,7 @@ transaction_ops(WT_SESSION *session_arg)
 
         error_check(session->commit_transaction(session, NULL));
 
-        error_check(conn->query_timestamp(conn, timestamp_buf, "get=all_committed"));
+        error_check(conn->query_timestamp(conn, timestamp_buf, "get=all_durable"));
         /*! [query timestamp] */
     }
 
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index f2d431f97fa..b2f18ef2ef2 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
 {
-    "commit": "61bf0547c6edfc6cd7c633c80f1c76f3e33e5aec", 
+    "commit": "5bd9fdfc0bb7f8fcf791e234b6e436c79dfe1fee", 
     "github": "wiredtiger/wiredtiger.git", 
     "vendor": "wiredtiger", 
     "branch": "mongodb-4.4"
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index f7297729967..71a4c11dd44 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -291,9 +291,12 @@ __wt_sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
              * if eviction fails (the page may stay in cache clean but with history that cannot be
              * discarded), that is not wasted effort because checkpoint doesn't need to write the
              * page again.
+             *
+             * Once the transaction has given up it's snapshot it is no longer safe to reconcile
+             * pages. That happens prior to the final metadata checkpoint.
              */
             if (!WT_PAGE_IS_INTERNAL(page) && page->read_gen == WT_READGEN_WONT_NEED &&
-              !tried_eviction) {
+              !tried_eviction && F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT)) {
                 ret = __wt_page_release_evict(session, walk, 0);
                 walk = NULL;
                 WT_ERR_BUSY_OK(ret);
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index a24841fe67e..12f510728c7 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -33,9 +33,8 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_open_session[] = {
 
 static const WT_CONFIG_CHECK confchk_WT_CONNECTION_query_timestamp[] = {
   {"get", "string", NULL,
-    "choices=[\"all_committed\",\"all_durable\","
-    "\"last_checkpoint\",\"oldest\",\"oldest_reader\",\"pinned\","
-    "\"recovery\",\"stable\"]",
+    "choices=[\"all_durable\",\"last_checkpoint\",\"oldest\","
+    "\"oldest_reader\",\"pinned\",\"recovery\",\"stable\"]",
     NULL, 0},
   {NULL, NULL, NULL, NULL, NULL, 0}};
 
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index a50e617705b..ddcdeffedf1 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -2471,18 +2471,16 @@ struct __wt_connection {
 	 * hexadecimal encoding of the timestamp being queried.  Must be large
 	 * enough to hold a NUL terminated, hex-encoded 8B timestamp (17 bytes).
 	 * @configstart{WT_CONNECTION.query_timestamp, see dist/api_data.py}
-	 * @config{get, specify which timestamp to query: \c all_committed returns the largest
-	 * timestamp such that all timestamps up to that value have committed\, \c all_durable
-	 * returns the largest timestamp such that all timestamps up to that value have been made
-	 * durable\, \c last_checkpoint returns the timestamp of the most recent stable checkpoint\,
-	 * \c oldest returns the most recent \c oldest_timestamp set with
-	 * WT_CONNECTION::set_timestamp\, \c oldest_reader returns the minimum of the read
-	 * timestamps of all active readers \c pinned returns the minimum of the \c oldest_timestamp
-	 * and the read timestamps of all active readers\, \c recovery returns the timestamp of the
-	 * most recent stable checkpoint taken prior to a shutdown and \c stable returns the most
-	 * recent \c stable_timestamp set with WT_CONNECTION::set_timestamp.  See @ref
-	 * transaction_timestamps., a string\, chosen from the following options: \c
-	 * "all_committed"\, \c "all_durable"\, \c "last_checkpoint"\, \c "oldest"\, \c
+	 * @config{get, specify which timestamp to query: \c all_durable returns the largest
+	 * timestamp such that all timestamps up to that value have been made durable\, \c
+	 * last_checkpoint returns the timestamp of the most recent stable checkpoint\, \c oldest
+	 * returns the most recent \c oldest_timestamp set with WT_CONNECTION::set_timestamp\, \c
+	 * oldest_reader returns the minimum of the read timestamps of all active readers \c pinned
+	 * returns the minimum of the \c oldest_timestamp and the read timestamps of all active
+	 * readers\, \c recovery returns the timestamp of the most recent stable checkpoint taken
+	 * prior to a shutdown and \c stable returns the most recent \c stable_timestamp set with
+	 * WT_CONNECTION::set_timestamp.  See @ref transaction_timestamps., a string\, chosen from
+	 * the following options: \c "all_durable"\, \c "last_checkpoint"\, \c "oldest"\, \c
 	 * "oldest_reader"\, \c "pinned"\, \c "recovery"\, \c "stable"; default \c all_durable.}
 	 * @configend
 	 * @errors
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index 8db13cc7048..c24191efa24 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -250,8 +250,7 @@ __txn_global_query_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *tsp, cons
 
     WT_STAT_CONN_INCR(session, txn_query_ts);
     WT_RET(__wt_config_gets(session, cfg, "get", &cval));
-    if (WT_STRING_MATCH("all_committed", cval.str, cval.len) ||
-      WT_STRING_MATCH("all_durable", cval.str, cval.len)) {
+    if (WT_STRING_MATCH("all_durable", cval.str, cval.len)) {
         if (!txn_global->has_durable_timestamp)
             return (WT_NOTFOUND);
         ts = txn_global->durable_timestamp;
diff --git a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
index 1b69427d9f2..c6672dc9830 100644
--- a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
@@ -141,7 +141,7 @@ thread_ts_run(void *arg)
          * that requires locking out transactional ops that set or query a timestamp.
          */
         testutil_check(pthread_rwlock_wrlock(&ts_lock));
-        ret = td->conn->query_timestamp(td->conn, ts_string, "get=all_committed");
+        ret = td->conn->query_timestamp(td->conn, ts_string, "get=all_durable");
         testutil_check(pthread_rwlock_unlock(&ts_lock));
         testutil_assert(ret == 0 || ret == WT_NOTFOUND);
         if (ret == 0) {
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index d728e9f4e96..5f63c4ff3a6 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -371,6 +371,55 @@ tasks:
         vars:
           posix_configure_flags: --enable-strict --enable-diagnostic
 
+  - name: compile-gcc
+    tags: ["pull_request", "pull_request_compilers"]
+    commands:
+      - func: "get project"
+      - func: "compile wiredtiger"
+        vars:
+          configure_env_vars: CC=gcc-4.8 CXX=g++-4.8 CFLAGS="-ggdb -fPIC"
+      - func: "compile wiredtiger"
+        vars:
+          configure_env_vars: CC=gcc-5 CXX=g++-5 CFLAGS="-ggdb -fPIC"
+      - func: "compile wiredtiger"
+        vars:
+          configure_env_vars: CC=gcc-6 CXX=g++-6 CFLAGS="-ggdb -fPIC"
+      - func: "compile wiredtiger"
+        vars:
+          configure_env_vars: CC=gcc-6 CXX=g++-6 CFLAGS="-ggdb -fPIC" CPPFLAGS=-fvisibility=hidden
+      - func: "compile wiredtiger"
+        vars:
+          configure_env_vars: CC=gcc-7 CXX=g++-7 CFLAGS="-ggdb -fPIC"
+      - func: "compile wiredtiger"
+        vars:
+          configure_env_vars: CC=gcc-8 CXX=g++-8 CFLAGS="-ggdb -fPIC"
+      - func: "compile wiredtiger"
+        vars:
+          configure_env_vars: CC=gcc-9 CXX=g++-9 CFLAGS="-ggdb -fPIC"
+
+  - name: compile-clang
+    tags: ["pull_request", "pull_request_compilers"]
+    commands:
+      - func: "get project"
+      - func: "compile wiredtiger"
+        vars:
+          configure_env_vars: CC=clang-3.9 CXX=clang++-3.9 CFLAGS="-ggdb -fPIC"
+      - func: "compile wiredtiger"
+        vars:
+          configure_env_vars: CC=clang-4.0 CXX=clang++-4.0 CFLAGS="-ggdb -fPIC"
+      - func: "compile wiredtiger"
+        vars:
+          configure_env_vars: CC=clang-5.0 CXX=clang++-5.0 CFLAGS="-ggdb -fPIC"
+      - func: "compile wiredtiger"
+        vars:
+          configure_env_vars: CC=clang-6.0 CXX=clang++-6.0 CFLAGS="-ggdb -fPIC"
+      - func: "compile wiredtiger"
+        vars:
+          configure_env_vars: CC=clang-7 CXX=clang++-7 CFLAGS="-ggdb -fPIC"
+      - func: "compile wiredtiger"
+        vars:
+          configure_env_vars: CC=clang-8 CXX=clang++-8 CFLAGS="-ggdb -fPIC"
+
   - name: make-check-test
     depends_on:
       - name: compile
@@ -2029,7 +2078,7 @@ buildvariants:
     posix_configure_flags: --enable-silent-rules --enable-diagnostic --enable-python --enable-zlib --enable-snappy --enable-strict --enable-static --prefix=$(pwd)/LOCAL_INSTALL
     make_command: PATH=/opt/mongodbtoolchain/v3/bin:$PATH make
   tasks:
-    - name: ".pull_request !.windows_only"
+    - name: ".pull_request !.windows_only !.pull_request_compilers"
     - name: compile-msan
     - name: make-check-msan-test
     - name: compile-ubsan
@@ -2055,6 +2104,17 @@ buildvariants:
     - name: format-stress-smoke-test
     - name: checkpoint-stress-test
 
+- name: ubuntu1804-compilers
+  display_name: Ubuntu 18.04 Compilers
+  run_on:
+  - ubuntu1804-wt-build
+  expansions:
+    posix_configure_flags: --enable-silent-rules --enable-diagnostic --enable-strict --enable-lz4 --enable-snappy --enable-zlib --enable-zstd --enable-python
+    smp_command: -j $(grep -c ^processor /proc/cpuinfo)
+    make_command: PATH=/opt/mongodbtoolchain/v3/bin:$PATH make
+  tasks:
+    - name: ".pull_request_compilers"
+
 - name: ubuntu1804-python3
   display_name: Ubuntu 18.04 (Python3)
   run_on:
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index a1fa5d425b9..899956c8f71 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -106,15 +106,6 @@ typedef struct {
 
     uint64_t truncate_cnt; /* Counter for truncation */
 
-    /*
-     * We have a list of records that are appended, but not yet "resolved", that is, we haven't yet
-     * incremented the g.rows value to reflect the new records.
-     */
-    uint64_t *append;             /* Appended records */
-    size_t append_max;            /* Maximum unresolved records */
-    size_t append_cnt;            /* Current unresolved records */
-    pthread_rwlock_t append_lock; /* Single-thread resolution */
-
     pthread_rwlock_t death_lock; /* Single-thread failure */
 
     char *uri; /* Object name */
@@ -315,6 +306,9 @@ typedef struct {
     uint64_t commit_ts;    /* commit timestamp */
     SNAP_OPS *snap, *snap_first, snap_list[512];
 
+    uint64_t insert_list[256]; /* column-store inserted records */
+    u_int insert_list_cnt;
+
     WT_ITEM *tbuf, _tbuf; /* temporary buffer */
 
 #define TINFO_RUNNING 1  /* Running */
diff --git a/src/third_party/wiredtiger/test/format/format.sh b/src/third_party/wiredtiger/test/format/format.sh
index 37607d6b8df..f9baf495a83 100755
--- a/src/third_party/wiredtiger/test/format/format.sh
+++ b/src/third_party/wiredtiger/test/format/format.sh
@@ -361,14 +361,14 @@ format()
 	if [[ $smoke_test -ne 0 ]]; then
 		args=${smoke_list[$smoke_next]}
 		smoke_next=$(($smoke_next + 1))
-		echo "$name: starting smoke-test job in $dir"
+		echo "$name: starting smoke-test job in $dir ($(date))"
 	else
 		args=$format_args
 
 		# If abort/recovery testing is configured, do it 5% of the time.
 		[[ $abort_test -ne 0 ]] && [[ $(($count_jobs % 20)) -eq 0 ]] && args="$args abort=1"
 
-		echo "$name: starting job in $dir"
+		echo "$name: starting job in $dir ($(date))"
 	fi
 
 	cmd="$format_binary -c "$config" -h "$dir" -1 $args quiet=1"
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 669cb7a484e..d6fce308164 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -29,6 +29,7 @@
 #include "format.h"
 
 static int col_insert(TINFO *, WT_CURSOR *);
+static void col_insert_resolve(TINFO *);
 static int col_modify(TINFO *, WT_CURSOR *, bool);
 static int col_remove(TINFO *, WT_CURSOR *, bool);
 static int col_reserve(TINFO *, WT_CURSOR *, bool);
@@ -43,7 +44,6 @@ static int row_remove(TINFO *, WT_CURSOR *, bool);
 static int row_reserve(TINFO *, WT_CURSOR *, bool);
 static int row_truncate(TINFO *, WT_CURSOR *);
 static int row_update(TINFO *, WT_CURSOR *, bool);
-static void table_append_init(void);
 
 static char modify_repl[256];
 
@@ -167,9 +167,6 @@ wts_ops(bool lastrun)
         quit_fourths = fourths + 15 * 4 * 60;
     }
 
-    /* Initialize the table extension code. */
-    table_append_init();
-
     /*
      * We support replay of threaded runs, but don't log random numbers after threaded operations
      * start, there's no point.
@@ -789,7 +786,7 @@ ops(void *arg)
                  * We can only append so many new records, once we reach that limit, update a record
                  * instead of inserting.
                  */
-                if (g.append_cnt >= g.append_max)
+                if (tinfo->insert_list_cnt >= WT_ELEMENTS(tinfo->insert_list))
                     goto update_instead_of_chosen_op;
 
                 ret = col_insert(tinfo, cursor);
@@ -944,6 +941,10 @@ update_instead_of_chosen_op:
             break;
         }
 
+        /* If we have pending inserts, try and update the total rows. */
+        if (tinfo->insert_list_cnt > 0)
+            col_insert_resolve(tinfo);
+
         /*
          * The cursor is positioned if we did any operation other than insert, do a small number of
          * next/prev cursor operations in a random direction.
@@ -1582,100 +1583,6 @@ col_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
 }
 
 /*
- * table_append_init --
- *     Re-initialize the appended records list.
- */
-static void
-table_append_init(void)
-{
-    /* Append up to 10 records per thread before waiting on resolution. */
-    g.append_max = (size_t)g.c_threads * 10;
-    g.append_cnt = 0;
-
-    free(g.append);
-    g.append = dcalloc(g.append_max, sizeof(uint64_t));
-}
-
-/*
- * table_append --
- *     Resolve the appended records.
- */
-static void
-table_append(uint64_t keyno)
-{
-    uint64_t *ep, *p;
-    int done;
-
-    ep = g.append + g.append_max;
-
-    /*
-     * We don't want to ignore records we append, which requires we update the "last row" as we
-     * insert new records. Threads allocating record numbers can race with other threads, so the
-     * thread allocating record N may return after the thread allocating N + 1. We can't update a
-     * record before it's been inserted, and so we can't leave gaps when the count of records in the
-     * table is incremented.
-     *
-     * The solution is the append table, which contains an unsorted list of appended records. Every
-     * time we finish appending a record, process the table, trying to update the total records in
-     * the object.
-     *
-     * First, enter the new key into the append list.
-     *
-     * It's technically possible to race: we allocated space for 10 records per thread, but the
-     * check for the maximum number of records being appended doesn't lock. If a thread allocated a
-     * new record and went to sleep (so the append table fills up), then N threads of control used
-     * the same g.append_cnt value to decide there was an available slot in the append table and
-     * both allocated new records, we could run out of space in the table. It's unfortunately not
-     * even unlikely in the case of a large number of threads all inserting as fast as they can and
-     * a single thread going to sleep for an unexpectedly long time. If it happens, sleep and retry
-     * until earlier records are resolved and we find a slot.
-     */
-    for (done = 0;;) {
-        testutil_check(pthread_rwlock_wrlock(&g.append_lock));
-
-        /*
-         * If this is the thread we've been waiting for, and its record won't fit, we'd loop
-         * infinitely. If there are many append operations and a thread goes to sleep for a little
-         * too long, it can happen.
-         */
-        if (keyno == g.rows + 1) {
-            g.rows = keyno;
-            done = 1;
-
-            /*
-             * Clean out the table, incrementing the total count of records until we don't find the
-             * next key.
-             */
-            for (;;) {
-                for (p = g.append; p < ep; ++p)
-                    if (*p == g.rows + 1) {
-                        g.rows = *p;
-                        *p = 0;
-                        --g.append_cnt;
-                        break;
-                    }
-                if (p == ep)
-                    break;
-            }
-        } else
-            /* Enter the key into the table. */
-            for (p = g.append; p < ep; ++p)
-                if (*p == 0) {
-                    *p = keyno;
-                    ++g.append_cnt;
-                    done = 1;
-                    break;
-                }
-
-        testutil_check(pthread_rwlock_unlock(&g.append_lock));
-
-        if (done)
-            break;
-        __wt_sleep(1, 0);
-    }
-}
-
-/*
  * row_insert --
  *     Insert a row in a row-store file.
  */
@@ -1707,6 +1614,63 @@ row_insert(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
 }
 
 /*
+ * col_insert_resolve --
+ *     Resolve newly inserted records.
+ */
+static void
+col_insert_resolve(TINFO *tinfo)
+{
+    uint64_t v, *p;
+    u_int i;
+
+    /*
+     * We don't want to ignore column-store records we insert, which requires we update the "last
+     * row" so other threads consider them. Threads allocating record numbers can race with other
+     * threads, so the thread allocating record N may return after the thread allocating N + 1. We
+     * can't update a record before it's been inserted, and so we can't leave gaps when the count of
+     * records in the table is incremented.
+     *
+     * The solution is a per-thread array which contains an unsorted list of inserted records. If
+     * there are pending inserts, we review the table after every operation, trying to update the
+     * total rows. This is wasteful, but we want to give other threads immediate access to the row,
+     * ideally they'll collide with our insert before we resolve.
+     *
+     * Process the existing records and advance the last row count until we can't go further.
+     */
+    do {
+        WT_ORDERED_READ(v, g.rows);
+        for (i = 0, p = tinfo->insert_list; i < WT_ELEMENTS(tinfo->insert_list); ++i) {
+            if (*p == v + 1) {
+                testutil_assert(__wt_atomic_casv64(&g.rows, v, v + 1));
+                *p = 0;
+                --tinfo->insert_list_cnt;
+                break;
+            }
+            testutil_assert(*p == 0 || *p > v);
+        }
+    } while (tinfo->insert_list_cnt > 0 && i < WT_ELEMENTS(tinfo->insert_list));
+}
+
+/*
+ * col_insert_add --
+ *     Add newly inserted records.
+ */
+static void
+col_insert_add(TINFO *tinfo)
+{
+    u_int i;
+
+    /* Add the inserted record to the array. */
+    for (i = 0; i < WT_ELEMENTS(tinfo->insert_list); ++i)
+        if (tinfo->insert_list[i] == 0) {
+            tinfo->insert_list[i] = tinfo->keyno;
+            ++tinfo->insert_list_cnt;
+            break;
+        }
+    testutil_assert(i < WT_ELEMENTS(tinfo->insert_list));
+}
+
+/*
  * col_insert --
  *     Insert an element in a column-store file.
  */
@@ -1726,7 +1690,7 @@ col_insert(TINFO *tinfo, WT_CURSOR *cursor)
 
     testutil_check(cursor->get_key(cursor, &tinfo->keyno));
 
-    table_append(tinfo->keyno); /* Extend the object. */
+    col_insert_add(tinfo); /* Extend the object. */
 
     if (g.type == FIX)
         logop(cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", "insert", tinfo->keyno,
diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c
index 5665b50ab4c..85bef1a694c 100644
--- a/src/third_party/wiredtiger/test/format/t.c
+++ b/src/third_party/wiredtiger/test/format/t.c
@@ -190,7 +190,6 @@ main(int argc, char *argv[])
      * Initialize locks to single-thread named checkpoints and backups, last last-record updates,
      * and failures.
      */
-    testutil_check(pthread_rwlock_init(&g.append_lock, NULL));
     testutil_check(pthread_rwlock_init(&g.backup_lock, NULL));
     testutil_check(pthread_rwlock_init(&g.death_lock, NULL));
     testutil_check(pthread_rwlock_init(&g.ts_lock, NULL));
@@ -270,7 +269,6 @@ main(int argc, char *argv[])
 
     config_print(false);
 
-    testutil_check(pthread_rwlock_destroy(&g.append_lock));
     testutil_check(pthread_rwlock_destroy(&g.backup_lock));
     testutil_check(pthread_rwlock_destroy(&g.death_lock));
     testutil_check(pthread_rwlock_destroy(&g.ts_lock));
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp14.py b/src/third_party/wiredtiger/test/suite/test_timestamp14.py
index 4343160f92f..1fc5cdc7b22 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp14.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp14.py
@@ -43,36 +43,32 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
     uri = 'table:' + tablename
     session_config = 'isolation=snapshot'
 
-    def test_all_committed(self):
+    def test_all_durable_old(self):
         # This test was originally for testing the all_committed timestamp.
         # In the absence of prepared transactions, all_durable is identical to
-        # all_committed so let's enforce the same values for both.
-        all_committed_uri = self.uri + '_all_committed'
+        # all_committed so let's enforce the all_durable values instead.
+        all_durable_uri = self.uri + '_all_durable'
         session1 = self.setUpSessionOpen(self.conn)
         session2 = self.setUpSessionOpen(self.conn)
-        session1.create(all_committed_uri, 'key_format=i,value_format=i')
-        session2.create(all_committed_uri, 'key_format=i,value_format=i')
+        session1.create(all_durable_uri, 'key_format=i,value_format=i')
+        session2.create(all_durable_uri, 'key_format=i,value_format=i')
 
         # Scenario 0: No commit timestamp has ever been specified therefore
-        # There is no all_committed timestamp and we will get an error
+        # There is no all_durable timestamp and we will get an error
         # Querying for it.
         session1.begin_transaction()
-        cur1 = session1.open_cursor(all_committed_uri)
+        cur1 = session1.open_cursor(all_durable_uri)
         cur1[1]=1
         session1.commit_transaction()
         self.assertRaisesException(wiredtiger.WiredTigerError,
-            lambda: self.conn.query_timestamp('get=all_committed'))
-        self.assertRaisesException(wiredtiger.WiredTigerError,
             lambda: self.conn.query_timestamp('get=all_durable'))
 
         # Scenario 1: A single transaction with a commit timestamp, will
-        # result in the all_committed timestamp being set.
+        # result in the all_durable timestamp being set.
         session1.begin_transaction()
         cur1[1]=1
         session1.commit_transaction('commit_timestamp=1')
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), "1")
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), "1")
 
         # Scenario 2: A transaction begins and specifies that it intends
@@ -82,36 +78,30 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
         session1.timestamp_transaction('commit_timestamp=2')
 
         session2.begin_transaction()
-        cur2 = session2.open_cursor(all_committed_uri)
+        cur2 = session2.open_cursor(all_durable_uri)
         cur2[2] = 2
         session2.commit_transaction('commit_timestamp=3')
 
-        # As the original transaction is still running the all_committed
+        # As the original transaction is still running the all_durable
         # timestamp is being held at 1.
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), "1")
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), "1")
         cur1[1] = 2
         session1.commit_transaction()
 
-        # Now that the original transaction has finished the all_committed
+        # Now that the original transaction has finished the all_durable
         # timestamp has moved to 3, skipping 2 as there is a commit with
         # a greater timestamp already existing.
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), "3")
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), "3")
 
         # Senario 3: Commit with a commit timestamp of 5 and then begin a
-        # transaction intending to commit at 4, the all_committed timestamp
+        # transaction intending to commit at 4, the all_durable timestamp
         # should move back to 3. Until the transaction at 4 completes.
         session1.begin_transaction()
         cur1[1] = 3
         session1.commit_transaction('commit_timestamp=5')
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), "5")
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), "5")
 
         session1.begin_transaction()
@@ -120,8 +110,6 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
         session1.timestamp_transaction('commit_timestamp=4')
 
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), "3")
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), "3")
 
         session1.commit_transaction()
@@ -129,20 +117,16 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
         # Now that the transaction at timestamp 4 has completed the
         # all committed timestamp is back at 5.
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), "5")
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), "5")
 
         # Scenario 4: Holding a transaction open without a commit timestamp
-        # Will not affect the all_committed timestamp.
+        # Will not affect the all_durable timestamp.
         session1.begin_transaction()
         session2.begin_transaction()
         cur2[2] = 2
         session2.commit_transaction('commit_timestamp=6')
 
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), "6")
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), "6")
         cur1[1] = 2
         session1.commit_transaction()
@@ -270,26 +254,19 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
         cur1[1] = 1
         session1.commit_transaction('commit_timestamp=3')
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), '3')
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), '3')
 
         # We have a running transaction with a lower commit_timestamp than we've
-        # seen before. So all_durable (like all_committed) should return (lowest
-        # commit timestamp - 1).
+        # seen before. So all_durable should return (lowest commit timestamp - 1).
         session1.begin_transaction()
         cur1[1] = 2
         session1.timestamp_transaction('commit_timestamp=2')
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), '1')
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), '1')
         session1.commit_transaction()
 
         # After committing, go back to the value we saw previously.
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), '3')
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), '3')
 
         # For prepared transactions, we take into account the durable timestamp
@@ -302,16 +279,12 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
         # don't want that to be visible in the all_durable calculation.
         session1.timestamp_transaction('commit_timestamp=7')
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), '3')
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), '3')
 
         # Now take into account the durable timestamp.
         session1.timestamp_transaction('durable_timestamp=8')
         session1.commit_transaction()
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), '8')
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), '8')
 
         # All durable moves back when we have a running prepared transaction
@@ -324,21 +297,15 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
         # don't want that to be visible in the all_durable calculation.
         session1.timestamp_transaction('commit_timestamp=4')
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), '8')
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), '8')
 
         # Now take into account the durable timestamp.
         session1.timestamp_transaction('durable_timestamp=5')
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), '4')
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), '4')
         session1.commit_transaction()
 
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), '8')
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), '8')
 
         # Now test a scenario with multiple commit timestamps for a single txn.
@@ -346,8 +313,6 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
         cur1[1] = 5
         session1.timestamp_transaction('commit_timestamp=6')
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), '5')
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), '5')
 
         # Make more changes and set a new commit timestamp.
@@ -356,15 +321,11 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
         cur1[1] = 6
         session1.timestamp_transaction('commit_timestamp=7')
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), '5')
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), '5')
 
         # Once committed, we go back to 8.
         session1.commit_transaction()
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), '8')
-        self.assertTimestampsEqual(
             self.conn.query_timestamp('get=all_durable'), '8')
 
     def test_all(self):
@@ -386,9 +347,9 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
         session1.begin_transaction()
         cur1[1]=2
         session1.commit_transaction('commit_timestamp=4')
-        # Confirm all_committed is now 4.
+        # Confirm all_durable is now 4.
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), "4")
+            self.conn.query_timestamp('get=all_durable'), "4")
 
         # Create a read session.
         session1.begin_transaction('read_timestamp=2')
@@ -401,9 +362,9 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
         session2.begin_transaction()
         cur2[2] = 2
         session2.commit_transaction('commit_timestamp=7')
-        # All_committed should now be 7.
+        # All_durable should now be 7.
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), "7")
+            self.conn.query_timestamp('get=all_durable'), "7")
 
         # Move oldest to 5.
         self.conn.set_timestamp('oldest_timestamp=5')
@@ -414,20 +375,20 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess):
             self.conn.query_timestamp('get=oldest_reader'))
 
         # Begin a write transaction pointing at timestamp 6,
-        # this is below our current all_committed so it should move back
+        # this is below our current all_durable so it should move back
         # to the oldest timestamp.
         session2.begin_transaction()
         session2.timestamp_transaction('commit_timestamp=6')
         cur2[2] = 3
 
-        # Confirm all_committed is now equal to oldest.
+        # Confirm all_durable is now equal to oldest.
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'),
+            self.conn.query_timestamp('get=all_durable'),
             self.conn.query_timestamp('get=oldest'))
 
         session2.commit_transaction()
         self.assertTimestampsEqual(
-            self.conn.query_timestamp('get=all_committed'), "7")
+            self.conn.query_timestamp('get=all_durable'), "7")
         # End our read transaction.
         session1.commit_transaction()
author	Luke Chen <luke.chen@mongodb.com>	2020-01-07 05:07:34 +0000
committer	evergreen <evergreen@mongodb.com>	2020-01-07 05:07:34 +0000
commit	41c060c649959b6b270ca0e4bd546e7b3041914d (patch)
tree	bbd89c87eced3a817357bfec057b4f2ec17cbe7a /src
parent	4d299e75428d6b0f54396354afca87fa3556d6e3 (diff)
download	mongo-41c060c649959b6b270ca0e4bd546e7b3041914d.tar.gz