summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-11-09 17:32:51 +1100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-11-09 07:02:02 +0000
commitb806ef292dad8608db70cefe97cc90a1603182ca (patch)
tree740f464a221dc65effa674e94381eb8a96f19b0c
parent2856ce80170148fe578dc05ac7300f52d115bad2 (diff)
downloadmongo-b806ef292dad8608db70cefe97cc90a1603182ca.tar.gz
Import wiredtiger: 805fe0276c367cb70321af3e1f473c881d0adc87 from branch mongodb-5.0
ref: 5f201b99d9..805fe0276c for: 4.9.0 WT-6693 Add compatibility testing for imported objects WT-6824 Fix OSX python test errors related to UTF encoding WT-6836 Handle the last key in the shadow table being removed by rollback to stable in timestamp abort WT-6867 Documentation: create model subpages for architecture guide WT-6870 Rename history store statistic to better describe its use case. WT-6872 Replace the yield instructions with an ISB WT-6875 Remove legacy import invocation
-rw-r--r--src/third_party/wiredtiger/dist/docs.py45
-rw-r--r--src/third_party/wiredtiger/dist/docs_data.py26
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_docs18
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok3
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py4
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_all.c6
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_json.c3
-rw-r--r--src/third_party/wiredtiger/src/docs/Doxyfile2
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-dhandle.dox17
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-index.dox31
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-schema-ops.dox206
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-schema.dox205
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-transaction.dox8
-rw-r--r--src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.cmapx5
-rw-r--r--src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.pngbin67012 -> 66916 bytes
-rw-r--r--src/third_party/wiredtiger/src/docs/style/wiredtiger.css44
-rwxr-xr-xsrc/third_party/wiredtiger/src/docs/tools/doxfilter.py48
-rw-r--r--src/third_party/wiredtiger/src/history/hs_rec.c8
-rw-r--r--src/third_party/wiredtiger/src/include/ctype_inline.h10
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h1
-rw-r--r--src/third_party/wiredtiger/src/include/gcc.h9
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h4
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in8
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c16
-rw-r--r--src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c10
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen.yml12
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen/import_compatibility_test.sh151
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_jsondump02.py3
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_util01.py19
30 files changed, 665 insertions, 259 deletions
diff --git a/src/third_party/wiredtiger/dist/docs.py b/src/third_party/wiredtiger/dist/docs.py
new file mode 100644
index 00000000000..1b26c8090d3
--- /dev/null
+++ b/src/third_party/wiredtiger/dist/docs.py
@@ -0,0 +1,45 @@
+# Read and verify the documentation data to make sure path names are valid.
+
+import os, sys
+import docs_data
+
+def check_sort(got, msg, keyfunc=None):
+ if keyfunc:
+ expect = sorted(got, key=keyfunc)
+ else:
+ expect = sorted(got)
+ if got != expect:
+ print(msg)
+ print(' got: ' + str(got))
+ print(' expect: ' + str(expect))
+
+# An include filename will be sorted first.
+def inc_first(f):
+ if '/include/' in f:
+ return '_' + f
+ else:
+ return f
+
+top_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+
+pages = docs_data.arch_doc_pages
+all_names = [ page.doxygen_name for page in pages]
+check_sort(all_names, 'arch_doc_pages must be sorted by name.')
+
+for page in pages:
+ name = page.doxygen_name
+ check_sort(page.data_structures, name + ': data structures must be sorted.')
+ for partial in page.files:
+ fullpath = os.path.join(top_dir, partial)
+ if not os.path.exists(fullpath):
+ print(name + ': ' + partial + ': does not exist')
+ elif os.path.isdir(fullpath):
+ if fullpath[-1:] != '/':
+ print(name + ': ' + partial + ': is a directory, must end in /')
+ else:
+ if fullpath[-1:] == '/':
+ print(name + ': ' + partial + ': not a directory, cannot end in /')
+ check_sort(page.files,
+ name + ': sources must be sorted, with include files first.', inc_first)
+
+sys.exit(0)
diff --git a/src/third_party/wiredtiger/dist/docs_data.py b/src/third_party/wiredtiger/dist/docs_data.py
new file mode 100644
index 00000000000..47ae7c3c51a
--- /dev/null
+++ b/src/third_party/wiredtiger/dist/docs_data.py
@@ -0,0 +1,26 @@
+# Create entries used by our doxygen filter to expand the arch_page
+# macros in the documentation.
+
+class ArchDocPage:
+ def __init__(self, doxygen_name, data_structures, files):
+ self.doxygen_name = doxygen_name
+ self.data_structures = data_structures
+ self.files = files
+
+##########################################
+# List of all architecture subsections
+##########################################
+arch_doc_pages = [
+ ArchDocPage('arch-dhandle',
+ ['WT_BTREE', 'WT_DHANDLE'],
+ ['src/include/btree.h', 'src/include/dhandle.h',
+ 'src/conn/conn_dhandle.c', 'src/session/session_dhandle.c']),
+ ArchDocPage('arch-schema',
+ ['WT_COLGROUP', 'WT_INDEX', 'WT_LSM_TREE', 'WT_TABLE'],
+ ['src/include/intpack_inline.h', 'src/include/packing_inline.h',
+ 'src/include/schema.h',
+ 'src/lsm/', 'src/packing/', 'src/schema/']),
+ ArchDocPage('arch-transaction',
+ ['WT_TXN', 'WT_TXN_GLOBAL', 'WT_TXN_OP', 'WT_TXN_SHARED'],
+ ['src/include/txn.h', 'src/include/txn_inline.h', 'src/txn/']),
+]
diff --git a/src/third_party/wiredtiger/dist/s_docs b/src/third_party/wiredtiger/dist/s_docs
index 2bb134d0244..37686c3187b 100755
--- a/src/third_party/wiredtiger/dist/s_docs
+++ b/src/third_party/wiredtiger/dist/s_docs
@@ -124,6 +124,18 @@ valid_build()
done
}
+check_docs_data()
+{
+ python docs.py > $t
+ test -s $t && {
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo 'Documentation data errors in docs_data.py'
+ sed -e 's/^/ /' < $t
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ e=1
+ }
+}
+
build()
{
# Build from scratch on demand.
@@ -153,7 +165,7 @@ EOF
# Any cmapx files that are generated by plantuml need to be referred to
CMAPX=`find ../src/docs/ -type f -name "*.cmapx" 2>/dev/null`
if [ "$CMAPX" != '' ]; then
- cd ../docs
+ (cd ../docs
for f in $CMAPX; do
b=`echo $f | sed -e 's:.*/::' -e 's/.cmapx$//'`
for html in `grep -l $b.png *.html`; do
@@ -165,6 +177,7 @@ EOF
mv $html.NEW $html
done
done
+ )
fi
# Fixup the man pages generated by Doxygen. We want the command line
@@ -225,6 +238,9 @@ wtperf_config
spellchk
structurechk
+# Check the docs data input file.
+check_docs_data
+
# Build the documentation.
build $clean
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 58090bf42be..0e4185705d9 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -349,6 +349,7 @@ RocksDB
SIMD
SLIST
SLVG
+SMT
SOURCE's
SPINLOCK
SQL
@@ -894,6 +895,8 @@ io
ip
isalnum
isalpha
+isascii
+isb
iscntrl
isdigit
isgraph
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index 2c0b5195465..807516d8eeb 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -284,8 +284,8 @@ connection_stats = [
CacheStat('cache_hs_insert', 'history store table insert calls'),
CacheStat('cache_hs_insert_restart', 'history store table insert calls that returned restart'),
CacheStat('cache_hs_key_truncate', 'history store table truncation to remove an update'),
- CacheStat('cache_hs_key_truncate_mix_ts', 'history store table truncation to remove range of updates due to mixed timestamps'),
CacheStat('cache_hs_key_truncate_onpage_removal', 'history store table truncation to remove range of updates due to key being removed from the data page during reconciliation'),
+ CacheStat('cache_hs_key_truncate_non_ts', 'history store table truncation to remove range of updates due to non timestamped update on data page'),
CacheStat('cache_hs_key_truncate_rts', 'history store table truncation by rollback to stable to remove an update'),
CacheStat('cache_hs_key_truncate_rts_unstable', 'history store table truncation by rollback to stable to remove an unstable update'),
CacheStat('cache_hs_ondisk', 'history store table on-disk size', 'no_clear,no_scale,size'),
@@ -769,8 +769,8 @@ dsrc_stats = [
CacheStat('cache_hs_insert', 'history store table insert calls'),
CacheStat('cache_hs_insert_restart', 'history store table insert calls that returned restart'),
CacheStat('cache_hs_key_truncate', 'history store table truncation to remove an update'),
- CacheStat('cache_hs_key_truncate_mix_ts', 'history store table truncation to remove range of updates due to mixed timestamps'),
CacheStat('cache_hs_key_truncate_onpage_removal', 'history store table truncation to remove range of updates due to key being removed from the data page during reconciliation'),
+ CacheStat('cache_hs_key_truncate_non_ts', 'history store table truncation to remove range of updates due to non timestamped update on data page'),
CacheStat('cache_hs_key_truncate_rts', 'history store table truncation by rollback to stable to remove an update'),
CacheStat('cache_hs_key_truncate_rts_unstable', 'history store table truncation by rollback to stable to remove an unstable update'),
CacheStat('cache_hs_order_fixup_insert', 'history store table out-of-order updates that were fixed up during insertion'),
diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c
index a6c6c5f8c43..e63ca868d64 100644
--- a/src/third_party/wiredtiger/examples/c/ex_all.c
+++ b/src/third_party/wiredtiger/examples/c/ex_all.c
@@ -686,12 +686,6 @@ session_ops(WT_SESSION *session)
error_check(session->compact(session, "table:mytable", NULL));
/*! [Compact a table] */
-#ifdef MIGHT_NOT_RUN
- /*! [Import a file] */
- error_check(session->import(session, "file:import", NULL));
-/*! [Import a file] */
-#endif
-
error_check(
session->create(session, "table:old", "key_format=r,value_format=S,cache_resident=true"));
/*! [Rename a table] */
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 283f696cc64..12b52ce60eb 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-5.0",
- "commit": "5f201b99d99f813e6b553a37e9c5d7f1cab1e136"
+ "commit": "805fe0276c367cb70321af3e1f473c881d0adc87"
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_json.c b/src/third_party/wiredtiger/src/cursor/cur_json.c
index 89e5ecba4d3..ef24017b02d 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_json.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_json.c
@@ -315,7 +315,8 @@ __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode)
u_char abbrev;
if (!force_unicode) {
- if (__wt_isprint(ch) && ch != '\\' && ch != '"') {
+ /* We treat all non-ASCII characters as non-printable. */
+ if (__wt_isascii(ch) && __wt_isprint(ch) && ch != '\\' && ch != '"') {
if (bufsz >= 1)
*buf = ch;
return (1);
diff --git a/src/third_party/wiredtiger/src/docs/Doxyfile b/src/third_party/wiredtiger/src/docs/Doxyfile
index 378b1fd5f50..3653e6cd242 100644
--- a/src/third_party/wiredtiger/src/docs/Doxyfile
+++ b/src/third_party/wiredtiger/src/docs/Doxyfile
@@ -206,6 +206,8 @@ TAB_SIZE = 8
# You can put \n's in the value part of an alias to insert newlines.
ALIASES = \
+ arch_page_table{2}="<div class="arch_head"><table><tr><th rowspan=2 style=\"width:10%;\">@htmlonly <div><a href=\"arch-index.html\"><img class=\"arch_thumbnail\" src=\"wt_diagram.png\" style=\"background-image: url(wt_diagram.png)\"></a></div>@endhtmlonly</th><th style=\"width:44%\">Data Structures</th><th style=\"width:45%\">Source Location</th></tr><tr><td><code>\1</code></td><td><code>\2</code></td></tr></table></div>" \
+ arch_page_top{2}="@page \1 \2 (Architecture Guide)" \
config{3}=" @row{<tt>\1</tt>,\2,\3}" \
configempty{2}="@param config configuration string, see @ref config_strings. No values currently permitted." \
configend=" </table>" \
diff --git a/src/third_party/wiredtiger/src/docs/arch-dhandle.dox b/src/third_party/wiredtiger/src/docs/arch-dhandle.dox
new file mode 100644
index 00000000000..d88c82f9675
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-dhandle.dox
@@ -0,0 +1,17 @@
+/*! @arch_page arch-dhandle Data Handles and Btrees
+
+An internal structure called a Data Handle (dhandle) is used to represent and
+access Btrees and other data sources in WiredTiger. A dhandle gets created
+when a table is accessed for the first time. It is kept in a global list
+and is shared across the sessions. When a dhandle is not needed anymore
+and has been idle for a while, it is closed and destroyed, releasing all the
+resources associated with it.
+
+A Btree is one kind of dhandle. It embodies both the on-disk and in-memory
+representations of the Btree.
+
+@subpage arch-dhandle-lifecycle
+
+This section describes how dhandles are created, and how and when they are destroyed.
+
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-index.dox b/src/third_party/wiredtiger/src/docs/arch-index.dox
index f5430ccb7e7..7be364d330e 100644
--- a/src/third_party/wiredtiger/src/docs/arch-index.dox
+++ b/src/third_party/wiredtiger/src/docs/arch-index.dox
@@ -5,6 +5,7 @@ use the API interface to direct WiredTiger's operations (see @ref basic_api).
Here is an overview of the software components in WiredTiger and how they are organized.
An arrow indicates the "from" component uses "to" component.
+<div class="arch_diagram">
@plantuml_start{wt_diagram.png}
@startuml{wt_diagram.png}
@@ -41,11 +42,11 @@ rectangle "**WiredTiger Engine**
rectangle " C API " as c_api
file "____" as SPACE_api2
}
- rectangle "[[#component-schema Schema]]" as schema
+ rectangle "[[arch-schema.html Schema]]" as schema
rectangle "Cursor" as cursor
- rectangle "Transactions" as txn
+ rectangle "[[arch-transaction.html Transactions]]" as txn
rectangle "Metadata" as meta
- rectangle "[[#component-dhandle dhandle/\n Btree]]" as btree
+ rectangle "[[arch-dhandle.html dhandle/\n Btree]]" as btree
rectangle " Row\n storage" as row
rectangle " Column\n storage" as column
rectangle "History\n Store" as history
@@ -116,27 +117,25 @@ wt_file -[hidden]right-> log_file
@enduml
@plantuml_end
+</div>
We go into some detail for some of the internal components.
-@subpage arch-glossary
+@subpage arch-dhandle
-WiredTiger assigns specific meanings to certain words. Here we decode them.
+An internal structure called a Data Handle (dhandle) is used to represent and
+access Btrees and other data sources in WiredTiger.
-\anchor component-schema
@subpage arch-schema
-Most applications begin to make use of WiredTiger by creating a table (or other
-data object) to store their data in. Create is one of several schema operations
-available in WiredTiger.
+A schema defines the format of the application data in WiredTiger.
+
+@subpage arch-transaction
-\anchor component-dhandle
-@subpage arch-dhandle-lifecycle
+Transactions provide a powerful abstraction for multiple threads to operate on data concurrently.
-An internal structure called Data Handle (dhandle) is used to represent and
-access a table in WiredTiger. A dhandle gets created when a table is accessed
-for the first time. It is kept in a global list and is shared across the
-sessions. When a dhandle is not needed anymore and has been idle for a while,
-it is closed and destroyed, releasing all the resources associated with it.
+@subpage arch-glossary
+
+WiredTiger assigns specific meanings to certain words. Here we decode them.
*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-schema-ops.dox b/src/third_party/wiredtiger/src/docs/arch-schema-ops.dox
new file mode 100644
index 00000000000..8428208c6d9
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-schema-ops.dox
@@ -0,0 +1,206 @@
+/*! @page arch-schema-ops Schema Operations
+
+A @ref arch-schema defines the format of the application data in WiredTiger.
+This section details the internals of various schema operations.
+
+Schema operations cause an update to the metadata and are performed under a
+schema lock to avoid concurrent operations on the same object. The following
+sequence of steps define a generic schema operation:
+
+@plantuml_start{schema_generic.png}
+@startuml{schema_generic.png}
+:A schema operation;
+partition with-schema-lock {
+ :perform operation on underlying data-object;
+ :update metadata-file;
+ :checkpoint and sync metadata;
+}
+stop
+@enduml
+@plantuml_end
+
+@section schema_create Schema Create
+
+The create schema operation is responsible for creating the underlying data
+object on the filesystem with the right parameters and then creating an entry
+for this new object into the metadata. The sequence of operations involved in a
+create for various schema types are as follows:
+
+@plantuml_start{schema_create.png}
+@startuml{schema_create.png}
+:WT_SESSION->create(.,name,.)
+(__session_create());
+
+partition session-API-call {
+ :API session init with NULL dhandle;
+ :exit if PANIC flag set;
+ :exit if invalid configuration;
+}
+
+:validate "name" and if passed "type" config parameter;
+note right
+ "name" parameter is called as "uri" internally.
+ "type" is usually not passed and generally
+ implied from the uri.
+end note
+
+partition with-schema-lock {
+ partition with-table-lock {
+ :turn on meta tracking;
+ :check uri}
+
+ split
+ :uri matches "file:"
+ ("file" is the underlying
+ type for all the objects);
+ split again
+ :uri matches "colgroup:";
+ :__create_colgroup();
+ split again
+ :uri matches "table:";
+ :__create_table();
+ split again
+ :uri matches "lsm:";
+ :__wt_lsm_tree_create();
+ split again
+ :uri matches "index:";
+ :__create_index();
+ split again
+ :matches a named data-source;
+ :__create_data_source();
+ end split
+
+ partition __create_file() {
+ :exit if file exists;
+ :validate allocation size;
+ :block manager creates the file:
+ 1.create file using __wt_open()
+ 2.write an initial descriptor to file
+ 3.fsync and close the file handle;
+ if (metadata-file?) then (yes)
+ else (no)
+ :update metadata with file
+ configuration and version;
+ endif
+ :check if file setup correctly by
+ getting btree handle with
+ WT_DHANDLE_EXCLUSIVE set;
+ if (metatracking on?) then (yes)
+ :track locked handle*;
+ else (no)
+ :release btree -
+ sync and close;
+ endif
+ }
+
+ partition turn-off-meta-tracking {
+ if (errors?) then (yes)
+ :unroll operations;
+ else (no)
+ if (logging?) then (yes)
+ :sync log;
+ else (no)
+ endif
+ :checkpoint and sync;
+ endif
+ :apply post-commit ops:
+ release tracked (handle) btree* -
+ sync and close;
+ note right
+ if meta tracking is on, this btree
+ was being tracked as locked. As part
+ of tuning off meta tracking, we sync
+ and close this btree
+ end note
+ }
+ }
+}
+
+:API-end;
+
+stop
+@enduml
+@plantuml_end
+
+@section schema_rename Schema Rename
+
+The rename schema operation is responsible for renaming the underlying data
+object on the filesystem and updating the metadata accordingly. The sequence of
+operations involved in a rename for various schema types are as follows:
+
+@plantuml_start{schema_rename.png}
+@startuml{schema_rename.png}
+:WT_SESSION->rename(old-uri, new-uri, .)
+(__session_rename());
+:session-API-call;
+
+partition with-checkpoint-lock {
+ partition with-schema-lock {
+ partition with-table-write-lock {
+ :validate new uri-type to match the old type;
+ :turn on meta tracking;
+ :check uri}
+
+ split
+ :uri matches "file:"
+ ("file" is the underlying
+ type for all the objects);
+ split again
+ :uri matches "lsm:";
+ :__wt_lsm_tree_rename();
+ split again
+ :matches a named data-source;
+ :WT_DATA_SOURCE::rename();
+ split again
+ :uri matches "table:";
+ partition __rename_table() {
+ :rename colgroups and indices represented by the table:
+ 1. extract names from the uri
+ 2. create new uri with existing types and configuration
+ 3. recursive call the rename operation on individual
+ colgroup and index with the old and the new uri
+ 4. remove old entry for colgroups and indices from
+ the metadata table and add the new ones;
+ :close and remove table handle from the session;
+ :remove old table entry from the metadata table
+ and add a new one;
+ }
+ end split
+
+ partition __rename_file() {
+ :fail if backup cursor open and schema operations will conflict;
+ :close btree handles in the file;
+ :fail if file with the old name doesn't exist or with the new
+ name exists;
+ :remove old file entries and add new in the metadata;
+ :rename the underlying file;
+ if (meta-tracking?) then (yes)
+ :track filesystem op;
+ else (no)
+ endif
+ }
+
+ :bump schema generation number to ignore stale data;
+
+ partition turn-off-meta-tracking {
+ if (errors?) then (yes)
+ :unroll operations;
+ else (no)
+ if (logging?) then (yes)
+ :sync log;
+ else (no)
+ endif
+ :checkpoint and sync;
+ endif
+ }
+ }
+ }
+}
+
+:API-end;
+
+stop
+@enduml
+@plantuml_end
+
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-schema.dox b/src/third_party/wiredtiger/src/docs/arch-schema.dox
index f6ead3ac824..6494da597a2 100644
--- a/src/third_party/wiredtiger/src/docs/arch-schema.dox
+++ b/src/third_party/wiredtiger/src/docs/arch-schema.dox
@@ -1,208 +1,11 @@
-/*! @page arch-schema Schema Operations
+/*! @arch_page arch-schema Schema
A schema defines the format of the application data in WiredTiger. WiredTiger
supports various types of schemas (See @ref schema for more
-information), operated upon through a WT_SESSION reference. This section details
-the internals of these various schema operations.
+information), operated upon through a WT_SESSION reference.
-Schema operations cause an update to the metadata and are performed under a
-schema lock to avoid concurrent operations on the same object. The following
-sequence of steps define a generic schema operation:
+@subpage arch-schema-ops
-@plantuml_start{schema_generic.png}
-@startuml{schema_generic.png}
-:A schema operation;
-partition with-schema-lock {
- :perform operation on underlying data-object;
- :update metadata-file;
- :checkpoint and sync metadata;
-}
-stop
-@enduml
-@plantuml_end
-
-@section schema_create Schema Create
-
-The create schema operation is responsible for creating the underlying data
-object on the filesystem with the right parameters and then creating an entry
-for this new object into the metadata. The sequence of operations involved in a
-create for various schema types are as follows:
-
-@plantuml_start{schema_create.png}
-@startuml{schema_create.png}
-:WT_SESSION->create(.,name,.)
-(__session_create());
-
-partition session-API-call {
- :API session init with NULL dhandle;
- :exit if PANIC flag set;
- :exit if invalid configuration;
-}
-
-:validate "name" and if passed "type" config parameter;
-note right
- "name" parameter is called as "uri" internally.
- "type" is usually not passed and generally
- implied from the uri.
-end note
-
-partition with-schema-lock {
- partition with-table-lock {
- :turn on meta tracking;
- :check uri}
-
- split
- :uri matches "file:"
- ("file" is the underlying
- type for all the objects);
- split again
- :uri matches "colgroup:";
- :__create_colgroup();
- split again
- :uri matches "table:";
- :__create_table();
- split again
- :uri matches "lsm:";
- :__wt_lsm_tree_create();
- split again
- :uri matches "index:";
- :__create_index();
- split again
- :matches a named data-source;
- :__create_data_source();
- end split
-
- partition __create_file() {
- :exit if file exists;
- :validate allocation size;
- :block manager creates the file:
- 1.create file using __wt_open()
- 2.write an initial descriptor to file
- 3.fsync and close the file handle;
- if (metadata-file?) then (yes)
- else (no)
- :update metadata with file
- configuration and version;
- endif
- :check if file setup correctly by
- getting btree handle with
- WT_DHANDLE_EXCLUSIVE set;
- if (metatracking on?) then (yes)
- :track locked handle*;
- else (no)
- :release btree -
- sync and close;
- endif
- }
-
- partition turn-off-meta-tracking {
- if (errors?) then (yes)
- :unroll operations;
- else (no)
- if (logging?) then (yes)
- :sync log;
- else (no)
- endif
- :checkpoint and sync;
- endif
- :apply post-commit ops:
- release tracked (handle) btree* -
- sync and close;
- note right
- if meta tracking is on, this btree
- was being tracked as locked. As part
- of tuning off meta tracking, we sync
- and close this btree
- end note
- }
- }
-}
-
-:API-end;
-
-stop
-@enduml
-@plantuml_end
-
-@section schema_rename Schema Rename
-
-The rename schema operation is responsible for renaming the underlying data
-object on the filesystem and updating the metadata accordingly. The sequence of
-operations involved in a rename for various schema types are as follows:
-
-@plantuml_start{schema_rename.png}
-@startuml{schema_rename.png}
-:WT_SESSION->rename(old-uri, new-uri, .)
-(__session_rename());
-:session-API-call;
-
-partition with-checkpoint-lock {
- partition with-schema-lock {
- partition with-table-write-lock {
- :validate new uri-type to match the old type;
- :turn on meta tracking;
- :check uri}
-
- split
- :uri matches "file:"
- ("file" is the underlying
- type for all the objects);
- split again
- :uri matches "lsm:";
- :__wt_lsm_tree_rename();
- split again
- :matches a named data-source;
- :WT_DATA_SOURCE::rename();
- split again
- :uri matches "table:";
- partition __rename_table() {
- :rename colgroups and indices represented by the table:
- 1. extract names from the uri
- 2. create new uri with existing types and configuration
- 3. recursive call the rename operation on individual
- colgroup and index with the old and the new uri
- 4. remove old entry for colgroups and indices from
- the metadata table and add the new ones;
- :close and remove table handle from the session;
- :remove old table entry from the metadata table
- and add a new one;
- }
- end split
-
- partition __rename_file() {
- :fail if backup cursor open and schema operations will conflict;
- :close btree handles in the file;
- :fail if file with the old name doesn't exist or with the new
- name exists;
- :remove old file entries and add new in the metadata;
- :rename the underlying file;
- if (meta-tracking?) then (yes)
- :track filesystem op;
- else (no)
- endif
- }
-
- :bump schema generation number to ignore stale data;
-
- partition turn-off-meta-tracking {
- if (errors?) then (yes)
- :unroll operations;
- else (no)
- if (logging?) then (yes)
- :sync log;
- else (no)
- endif
- :checkpoint and sync;
- endif
- }
- }
- }
-}
-
-:API-end;
-
-stop
-@enduml
-@plantuml_end
+This section details the internals of various API operations that operate on schemas, such as WT_SESSION::create and WT_SESSION::rename.
*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-transaction.dox b/src/third_party/wiredtiger/src/docs/arch-transaction.dox
new file mode 100644
index 00000000000..bc3c4e59722
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-transaction.dox
@@ -0,0 +1,8 @@
+/*! @arch_page arch-transaction Transactions
+
+Transactions provide a powerful abstraction for multiple threads to operate on data concurrently.
+A caller of WiredTiger uses @ref transactions within the API to start and stop transactions within
+a session (thread of control).
+
+Internally, the current transaction state is represented by the WT_TXN structure.
+*/
diff --git a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.cmapx b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.cmapx
index 411d3905b8e..2f29589a7a7 100644
--- a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.cmapx
+++ b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.cmapx
@@ -1,4 +1,5 @@
<map id="wt_diagram_map" name="wt_diagram_map">
-<area shape="rect" id="id1" href="#component-schema" title="#component-schema" alt="" coords="98,224,151,241"/>
-<area shape="rect" id="id2" href="#component-dhandle" title="#component-dhandle" alt="" coords="181,320,244,353"/>
+<area shape="rect" id="id1" href="arch-schema.html" title="arch-schema.html" alt="" coords="98,224,151,241"/>
+<area shape="rect" id="id2" href="arch-transaction.html" title="arch-transaction.html" alt="" coords="341,329,429,345"/>
+<area shape="rect" id="id3" href="arch-dhandle.html" title="arch-dhandle.html" alt="" coords="181,320,244,353"/>
</map>
diff --git a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.png b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.png
index 57480c2156f..0dcf1f285ed 100644
--- a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.png
+++ b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.png
Binary files differ
diff --git a/src/third_party/wiredtiger/src/docs/style/wiredtiger.css b/src/third_party/wiredtiger/src/docs/style/wiredtiger.css
index 984e525f42f..9757f79b7d8 100644
--- a/src/third_party/wiredtiger/src/docs/style/wiredtiger.css
+++ b/src/third_party/wiredtiger/src/docs/style/wiredtiger.css
@@ -26,6 +26,50 @@ img {
height: auto;
}
+div.arch_diagram {
+ display: block;
+ background: #FFFFFF;
+ background: radial-gradient(#FFDDAA,#FFFFFF);
+}
+
+/* match just one image */
+img[src="wt_diagram.png"] {
+ display: block;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+.arch_head table {
+ background-color: #FFDDAA;
+ border-color: #624D32;
+ /*border: solid 1px;*/
+ border-spacing: 5px;
+}
+
+.arch_head td {
+ border-color: #FFFFFF;
+ /*border: solid 1px;*/
+ vertical-align: top;
+ text-align: left;
+}
+
+.arch_head th {
+ border-color: #624D32;
+ vertical-align: top;
+ text-align: left;
+}
+
+.arch_thumbnail {
+ padding: 0px 0px 0px 0px;
+ border: 3px solid #fff;
+ outline: 1px solid #000;
+ margin: 3px;
+ width: 70px;
+ height: 70px;
+ display: block;
+ text-align: center;
+}
+
.tablist {
width: 800px;
}
diff --git a/src/third_party/wiredtiger/src/docs/tools/doxfilter.py b/src/third_party/wiredtiger/src/docs/tools/doxfilter.py
index efe5c3ca80a..42f47426578 100755
--- a/src/third_party/wiredtiger/src/docs/tools/doxfilter.py
+++ b/src/third_party/wiredtiger/src/docs/tools/doxfilter.py
@@ -32,7 +32,19 @@
# (i.e., from "/*!" to "/**"), because the latter are configured to not
# search for brief descriptions at the beginning of pages.
-import re, sys
+import os, re, sys
+
+# We want to import the docs_data.py page from the dist directory.
+# First get our (src/doc/tools) directory.
+doc_tools_dir = os.path.dirname(os.path.realpath(__file__))
+top_dir = os.path.dirname(os.path.dirname(os.path.dirname(doc_tools_dir)))
+dist_dir = os.path.join(top_dir, 'dist')
+sys.path.insert(1, dist_dir)
+import docs_data
+
+arch_doc_lookup = {}
+for page in docs_data.arch_doc_pages:
+ arch_doc_lookup[page.doxygen_name] = page
progname = 'doxfilter.py'
linenum = 0
@@ -42,8 +54,40 @@ def err(arg):
sys.stderr.write(filename + ':' + str(linenum) + ': ERROR: ' + arg + '\n')
sys.exit(1)
+# Convert @arch_page to @arch_page_expanded, adding in information
+# from docs_data.py.
+def process_arch(source):
+ result = ''
+ mpage_content = []
+ arch_page_pat = re.compile(r'^(.*)@arch_page *([^ ]*) *(.*)')
+ for line in source.split('\n'):
+ m = re.search(arch_page_pat, line)
+ if line.count('@arch_page') > 0 and not m:
+ err('@arch_page incorrect syntax, need identifier and title')
+ if m:
+ groups = m.groups()
+ prefix = groups[0]
+ doxy_name = groups[1]
+ title = groups[2]
+
+ page_info = arch_doc_lookup[doxy_name]
+ data_structures_str = '<code>' + '<br>'.join(page_info.data_structures) + '</code>'
+ files_str = '<code>' + '<br>'.join(page_info.files) + '</code>'
+ result += prefix + '@arch_page_top{' + \
+ doxy_name + ',' + \
+ title + '}\n'
+ result += '@arch_page_table{' + \
+ data_structures_str + ',' + \
+ files_str + '}\n'
+ else:
+ result += line + '\n'
+ return result
+
def process(source):
- return source.replace(r'/*!', r'/**')
+ source = source.replace(r'/*!', r'/**')
+ if '@arch_page' in source:
+ source = process_arch(source)
+ return source
if __name__ == '__main__':
for f in sys.argv[1:]:
diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c
index c70ffa63251..39c6104ac9c 100644
--- a/src/third_party/wiredtiger/src/history/hs_rec.c
+++ b/src/third_party/wiredtiger/src/history/hs_rec.c
@@ -535,14 +535,14 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
!F_ISSET(first_non_ts_upd, WT_UPDATE_CLEARED_HS)) {
/* We can only delete history store entries that have timestamps. */
WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1, true));
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
- WT_STAT_DATA_INCR(session, cache_hs_key_truncate_mix_ts);
+ WT_STAT_CONN_INCR(session, cache_hs_key_truncate_non_ts);
+ WT_STAT_DATA_INCR(session, cache_hs_key_truncate_non_ts);
F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS);
} else if (first_non_ts_upd != NULL && !F_ISSET(first_non_ts_upd, WT_UPDATE_CLEARED_HS) &&
(list->ins == NULL || ts_updates_in_hs)) {
WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1, true));
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
- WT_STAT_DATA_INCR(session, cache_hs_key_truncate_mix_ts);
+ WT_STAT_CONN_INCR(session, cache_hs_key_truncate_non_ts);
+ WT_STAT_DATA_INCR(session, cache_hs_key_truncate_non_ts);
F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS);
}
diff --git a/src/third_party/wiredtiger/src/include/ctype_inline.h b/src/third_party/wiredtiger/src/include/ctype_inline.h
index e751eedc090..0daf748f3b2 100644
--- a/src/third_party/wiredtiger/src/include/ctype_inline.h
+++ b/src/third_party/wiredtiger/src/include/ctype_inline.h
@@ -29,6 +29,16 @@ __wt_isalpha(u_char c)
}
/*
+ * __wt_isascii --
+ * Wrap the ctype function without sign extension.
+ */
+static inline bool
+__wt_isascii(u_char c)
+{
+ return (isascii(c) != 0);
+}
+
+/*
* __wt_isdigit --
* Wrap the ctype function without sign extension.
*/
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 10d5a737718..0a6f3d79a9b 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -1811,6 +1811,7 @@ static inline bool __wt_eviction_updates_needed(WT_SESSION_IMPL *session, double
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isalnum(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isalpha(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_isascii(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isdigit(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isprint(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isspace(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h
index 85ba3214d3e..b620f614b07 100644
--- a/src/third_party/wiredtiger/src/include/gcc.h
+++ b/src/third_party/wiredtiger/src/include/gcc.h
@@ -217,7 +217,14 @@ WT_ATOMIC_FUNC(size, size_t, size_t *vp, size_t v)
} while (0)
#elif defined(__aarch64__)
-#define WT_PAUSE() __asm__ volatile("yield" ::: "memory")
+/*
+ * Use an isb instruction here to be closer to the original x86 pause instruction. The yield
+ * instruction that was previously here is a nop that is intended to provide a hint that a
+ * thread in a SMT system could yield. This is different from the x86 pause instruction
+ * which delays execution by O(100) cycles. The isb will typically delay execution by about
+ * 50 cycles so it's a reasonable alternative.
+ */
+#define WT_PAUSE() __asm__ volatile("isb" ::: "memory")
/*
* dmb are chosen here because they are sufficient to guarantee the ordering described above. We
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 234350bbdc3..200d63759a1 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -401,7 +401,7 @@ struct __wt_connection_stats {
int64_t cache_hs_key_truncate_rts;
int64_t cache_hs_key_truncate;
int64_t cache_hs_key_truncate_onpage_removal;
- int64_t cache_hs_key_truncate_mix_ts;
+ int64_t cache_hs_key_truncate_non_ts;
int64_t cache_hs_write_squash;
int64_t cache_inmem_splittable;
int64_t cache_inmem_split;
@@ -848,7 +848,7 @@ struct __wt_dsrc_stats {
int64_t cache_hs_key_truncate_rts;
int64_t cache_hs_key_truncate;
int64_t cache_hs_key_truncate_onpage_removal;
- int64_t cache_hs_key_truncate_mix_ts;
+ int64_t cache_hs_key_truncate_non_ts;
int64_t cache_hs_write_squash;
int64_t cache_inmem_splittable;
int64_t cache_inmem_split;
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index e17e90439a0..846bc11d603 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -4932,9 +4932,9 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 1105
/*!
* cache: history store table truncation to remove range of updates due
- * to mixed timestamps
+ * to non timestamped update on data page
*/
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_MIX_TS 1106
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_NON_TS 1106
/*! cache: history store table writes requiring squashed modifies */
#define WT_STAT_CONN_CACHE_HS_WRITE_SQUASH 1107
/*! cache: in-memory page passed criteria to be split */
@@ -6008,9 +6008,9 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 2074
/*!
* cache: history store table truncation to remove range of updates due
- * to mixed timestamps
+ * to non timestamped update on data page
*/
-#define WT_STAT_DSRC_CACHE_HS_KEY_TRUNCATE_MIX_TS 2075
+#define WT_STAT_DSRC_CACHE_HS_KEY_TRUNCATE_NON_TS 2075
/*! cache: history store table writes requiring squashed modifies */
#define WT_STAT_DSRC_CACHE_HS_WRITE_SQUASH 2076
/*! cache: in-memory page passed criteria to be split */
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 98292870d31..5700b72fd44 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -79,7 +79,8 @@ static const char *const __stats_dsrc_desc[] = {
"cache: history store table truncation to remove an update",
"cache: history store table truncation to remove range of updates due to key being removed from "
"the data page during reconciliation",
- "cache: history store table truncation to remove range of updates due to mixed timestamps",
+ "cache: history store table truncation to remove range of updates due to non timestamped update "
+ "on data page",
"cache: history store table writes requiring squashed modifies",
"cache: in-memory page passed criteria to be split",
"cache: in-memory page splits",
@@ -326,7 +327,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cache_hs_key_truncate_rts = 0;
stats->cache_hs_key_truncate = 0;
stats->cache_hs_key_truncate_onpage_removal = 0;
- stats->cache_hs_key_truncate_mix_ts = 0;
+ stats->cache_hs_key_truncate_non_ts = 0;
stats->cache_hs_write_squash = 0;
stats->cache_inmem_splittable = 0;
stats->cache_inmem_split = 0;
@@ -557,7 +558,7 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to)
to->cache_hs_key_truncate_rts += from->cache_hs_key_truncate_rts;
to->cache_hs_key_truncate += from->cache_hs_key_truncate;
to->cache_hs_key_truncate_onpage_removal += from->cache_hs_key_truncate_onpage_removal;
- to->cache_hs_key_truncate_mix_ts += from->cache_hs_key_truncate_mix_ts;
+ to->cache_hs_key_truncate_non_ts += from->cache_hs_key_truncate_non_ts;
to->cache_hs_write_squash += from->cache_hs_write_squash;
to->cache_inmem_splittable += from->cache_inmem_splittable;
to->cache_inmem_split += from->cache_inmem_split;
@@ -787,7 +788,7 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to)
to->cache_hs_key_truncate += WT_STAT_READ(from, cache_hs_key_truncate);
to->cache_hs_key_truncate_onpage_removal +=
WT_STAT_READ(from, cache_hs_key_truncate_onpage_removal);
- to->cache_hs_key_truncate_mix_ts += WT_STAT_READ(from, cache_hs_key_truncate_mix_ts);
+ to->cache_hs_key_truncate_non_ts += WT_STAT_READ(from, cache_hs_key_truncate_non_ts);
to->cache_hs_write_squash += WT_STAT_READ(from, cache_hs_write_squash);
to->cache_inmem_splittable += WT_STAT_READ(from, cache_inmem_splittable);
to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split);
@@ -1040,7 +1041,8 @@ static const char *const __stats_connection_desc[] = {
"cache: history store table truncation to remove an update",
"cache: history store table truncation to remove range of updates due to key being removed from "
"the data page during reconciliation",
- "cache: history store table truncation to remove range of updates due to mixed timestamps",
+ "cache: history store table truncation to remove range of updates due to non timestamped update "
+ "on data page",
"cache: history store table writes requiring squashed modifies",
"cache: in-memory page passed criteria to be split",
"cache: in-memory page splits",
@@ -1556,7 +1558,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_hs_key_truncate_rts = 0;
stats->cache_hs_key_truncate = 0;
stats->cache_hs_key_truncate_onpage_removal = 0;
- stats->cache_hs_key_truncate_mix_ts = 0;
+ stats->cache_hs_key_truncate_non_ts = 0;
stats->cache_hs_write_squash = 0;
stats->cache_inmem_splittable = 0;
stats->cache_inmem_split = 0;
@@ -2053,7 +2055,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->cache_hs_key_truncate += WT_STAT_READ(from, cache_hs_key_truncate);
to->cache_hs_key_truncate_onpage_removal +=
WT_STAT_READ(from, cache_hs_key_truncate_onpage_removal);
- to->cache_hs_key_truncate_mix_ts += WT_STAT_READ(from, cache_hs_key_truncate_mix_ts);
+ to->cache_hs_key_truncate_non_ts += WT_STAT_READ(from, cache_hs_key_truncate_non_ts);
to->cache_hs_write_squash += WT_STAT_READ(from, cache_hs_write_squash);
to->cache_inmem_splittable += WT_STAT_READ(from, cache_inmem_splittable);
to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split);
diff --git a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
index 0179adddf46..7c4c97cb974 100644
--- a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
@@ -826,7 +826,8 @@ main(int argc, char *argv[])
cur_shadow->set_key(cur_shadow, kname);
/*
* The collection table should always only have the data as of the checkpoint. The
- * shadow table should always have the exact same data (or not) as the collection table.
+ * shadow table should always have the exact same data (or not) as the collection table,
+ * except for the last key that may be committed after the stable timestamp.
*/
if ((ret = cur_coll->search(cur_coll)) != 0) {
if (ret != WT_NOTFOUND)
@@ -850,7 +851,12 @@ main(int argc, char *argv[])
} else if ((ret = cur_shadow->search(cur_shadow)) != 0) {
if (ret != WT_NOTFOUND)
testutil_die(ret, "shadow search");
- else {
+ /*
+ * We respectively insert the record to the collection table at timestamp t and to
+ * the shadow table at t + 1. If the checkpoint finishes at timestamp t, the last
+ * shadow table record will be removed by rollback to stable after restart.
+ */
+ if (durable_fp <= stable_val) {
printf("%s: SHADOW no record with key %" PRIu64 "\n", fname, key);
absent_shadow++;
}
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index 8a627e7fac5..cf65f3efb1d 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -1658,6 +1658,17 @@ tasks:
set -o verbose
test/evergreen/compatibility_test_for_releases.sh -w
+ - name: import-compatibility-test
+ commands:
+ - func: "get project"
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger"
+ script: |
+ set -o errexit
+ set -o verbose
+ test/evergreen/import_compatibility_test.sh
+
- name: generate-datafile-little-endian
depends_on:
- name: compile
@@ -2541,6 +2552,7 @@ buildvariants:
- ubuntu1804-test
tasks:
- name: compatibility-test-for-newer-releases
+ - name: import-compatibility-test
- name: windows-64
display_name: "! Windows 64-bit"
diff --git a/src/third_party/wiredtiger/test/evergreen/import_compatibility_test.sh b/src/third_party/wiredtiger/test/evergreen/import_compatibility_test.sh
new file mode 100755
index 00000000000..6f0ba25ee09
--- /dev/null
+++ b/src/third_party/wiredtiger/test/evergreen/import_compatibility_test.sh
@@ -0,0 +1,151 @@
+#!/usr/bin/env bash
+#
+# Test importing of files created in previous versions of WiredTiger.
+# Test that we can downgrade a database after importing a file.
+
+set -e
+
+# build_branch --
+# 1: branch
+build_branch()
+{
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo "Building branch: \"$1\""
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
+ # Clone if it doesn't already exist.
+ if [ ! -d "$1" ]; then
+ git clone --quiet https://github.com/wiredtiger/wiredtiger.git "$1"
+ fi
+ cd "$1"
+
+ git checkout --quiet "$1"
+
+ config=""
+ config+="--enable-snappy "
+ (sh build_posix/reconf &&
+ ./configure $config && make -j $(grep -c ^processor /proc/cpuinfo)) > /dev/null
+ cd ..
+}
+
+# create_file --
+# 1: branch
+# 2: file
+create_file()
+{
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo "Branch \"$1\" creating and populating \"$2\""
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
+ wt_cmd="$1/wt"
+ test_dir="$1/WT_TEST/"
+ uri="file:$2"
+
+ # Make the home directory.
+ mkdir -p $test_dir
+
+ # Create the file and populate with a few key/values.
+ $wt_cmd -h $test_dir create -c "key_format=S,value_format=S" $uri
+ $wt_cmd -h $test_dir write $uri abc 123 def 456 hij 789
+}
+
+# import_file --
+# 1: dest branch
+# 2: source branch
+# 3: file
+import_file()
+{
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo "Importing file \"$3\" from \"$1\" to \"$2\""
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
+ wt_cmd="$1/wt"
+ test_dir="$1/WT_TEST/"
+ mkdir -p $test_dir
+
+ # Move the file across to the destination branch's home directory.
+ import_file="$2/WT_TEST/$3"
+ cp $import_file $test_dir
+
+ # Run import via the wt tool.
+ uri="file:$3"
+ $wt_cmd -h $test_dir create -c "import=(enabled,repair=true)" $uri
+}
+
+# verify_file --
+# 1: branch
+# 2: file
+verify_file()
+{
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo "Branch \"$1\" verifying \"$2\""
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
+ wt_cmd="$1/wt"
+ test_dir="$1/WT_TEST/"
+ uri="file:$2"
+
+ $wt_cmd -h $test_dir verify $uri
+}
+
+# cleanup_branch --
+# 1: branch
+cleanup_branch()
+{
+ test_dir="$1/WT_TEST/"
+ if [ -d $test_dir ]; then
+ rm -rf $test_dir
+ fi
+}
+
+# import_compatibility_test --
+# 1: newer branch
+# 2: older branch
+import_compatibility_test()
+{
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo "Testing import compatibility between \"$1\" and \"$2\""
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
+ # Remove any leftover data files.
+ cleanup_branch $1
+ cleanup_branch $2
+
+ # Create a file in the older branch.
+ create_file $2 test_import
+
+ # Now import it into the newer branch and verify.
+ import_file $1 $2 test_import
+ verify_file $1 test_import
+
+ # Now downgrade by running wt from the older branch and dumping the table contents.
+ #
+ # Before trying this, we must remove the base configuration. The wt tool produces this file
+ # however MongoDB will not so we should emulate this.
+ rm $1/WT_TEST/WiredTiger.basecfg
+ $2/wt -h $1/WT_TEST/ dump file:test_import
+}
+
+# Release branches.
+#
+# Go all the way back to mongodb-4.2 since that's the first release where we don't support live
+# import.
+release_branches=(develop mongodb-5.0 mongodb-4.4 mongodb-4.2)
+
+# Build each of the release branches.
+for b in ${release_branches[@]}; do
+ build_branch $b
+done
+
+for i in ${!release_branches[@]}; do
+ newer=${release_branches[$i]}
+
+ # MongoDB v4.2 doesn't support live import so it should only ever be used as the "older" branch
+ # that we're importing from.
+ if [ $newer = mongodb-4.2 ]; then
+ continue
+ fi
+
+ older=${release_branches[$i+1]}
+ import_compatibility_test $newer $older
+done
diff --git a/src/third_party/wiredtiger/test/suite/test_jsondump02.py b/src/third_party/wiredtiger/test/suite/test_jsondump02.py
index 9ae8dead18c..080b698d7a2 100755
--- a/src/third_party/wiredtiger/test/suite/test_jsondump02.py
+++ b/src/third_party/wiredtiger/test/suite/test_jsondump02.py
@@ -92,9 +92,6 @@ class test_jsondump02(wttest.WiredTigerTestCase, suite_subprocess):
Create JSON cursors and test them directly, also test
dump/load commands.
"""
- import platform
- if platform.system() == 'Darwin':
- self.skipTest('JSON cursor test for OSX not yet working on Python3')
extra_params = ',allocation_size=512,' +\
'internal_page_max=16384,leaf_page_max=131072'
self.session.create(self.table_uri1,
diff --git a/src/third_party/wiredtiger/test/suite/test_util01.py b/src/third_party/wiredtiger/test/suite/test_util01.py
index 9dae07f3ca6..3eb457ceb1f 100755
--- a/src/third_party/wiredtiger/test/suite/test_util01.py
+++ b/src/third_party/wiredtiger/test/suite/test_util01.py
@@ -133,7 +133,21 @@ class test_util01(wttest.WiredTigerTestCase, suite_subprocess):
def dump_kv_to_line(self, b):
# The output from dump is a 'u' format.
- return b.strip(b'\x00').decode() + '\n'
+ # Printable chars appear 'as is', unprintable chars
+ # appear as \hh where hh are hex digits.
+ # We can't decode the entire byte array, some Unicode decoders
+ # will complain as the set of bytes don't represent UTF-8 encoded
+ # characters.
+
+ # Create byte representation of printable ascii chars
+ printable_chars = bytes(string.printable, 'ascii')
+ result = ''
+ for byte in b.strip(b'\x00'):
+ if byte in printable_chars:
+ result += bytearray([byte]).decode()
+ else:
+ result += "\\{:02x}".format(byte)
+ return result + '\n'
def write_entries(self, cursor, expectout, hexoutput, commit_timestamp, write_expected):
if commit_timestamp is not None:
@@ -217,9 +231,6 @@ class test_util01(wttest.WiredTigerTestCase, suite_subprocess):
self.dump(False, True, None, None)
def test_dump_api(self):
- import platform
- if platform.system() == 'Darwin':
- self.skipTest('dump API test for OSX not yet working on Python3')
self.dump(True, False, None, None)
def test_dump_api_hex(self):