summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-10-20 16:38:49 +1100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-10-20 06:35:06 +0000
commitd0c4d5770387b73d8074e24ac0134f6e70d34078 (patch)
treeb0a447e9280f9665cc7981bc64087a29130b55e9 /src/third_party/wiredtiger
parent94da5dc43e80e969099dbd86a24c8d34e1c2d372 (diff)
downloadmongo-d0c4d5770387b73d8074e24ac0134f6e70d34078.tar.gz
Import wiredtiger: f827562f2b486e000665c97ea81674012d5a765b from branch mongodb-5.0
ref: 6614fa3dca..f827562f2b for: 4.9.0 WT-6798 Utilize Arm LSE atomics and the correct strength barriers WT-6808 Documentation: add top level architecture picture WT-6812 Fix "out-of-order fixup" potentially corrupting historical values
Diffstat (limited to 'src/third_party/wiredtiger')
-rw-r--r--src/third_party/wiredtiger/build_posix/configure.ac.in6
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_docs17
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_docs_plantuml2
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok7
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-index.dox121
-rw-r--r--src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_create.pngbin124661 -> 162127 bytes
-rw-r--r--src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_generic.pngbin13799 -> 16233 bytes
-rw-r--r--src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_rename.pngbin113981 -> 148026 bytes
-rw-r--r--src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.cmapx4
-rw-r--r--src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.pngbin0 -> 67012 bytes
-rw-r--r--src/third_party/wiredtiger/src/docs/spell.ok5
-rw-r--r--src/third_party/wiredtiger/src/history/hs_rec.c12
-rw-r--r--src/third_party/wiredtiger/src/include/gcc.h29
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp20.py79
15 files changed, 269 insertions, 15 deletions
diff --git a/src/third_party/wiredtiger/build_posix/configure.ac.in b/src/third_party/wiredtiger/build_posix/configure.ac.in
index 0cbf422de08..a18834185d0 100644
--- a/src/third_party/wiredtiger/build_posix/configure.ac.in
+++ b/src/third_party/wiredtiger/build_posix/configure.ac.in
@@ -102,6 +102,12 @@ if test "$GCC" = "yes"; then
# instructions.
if test "$wt_cv_arm64" = "yes"; then
AM_CFLAGS="$AM_CFLAGS -march=armv8-a+crc"
+ # moutline-atomics preserves backwards compatibility with Arm v8.0
+ # systems but also supports using Arm v8.1 atomics. The latter can
+ # massively improve performance on larger Arm systems. The flag was
+ # back ported to gcc8, 9 and is the default in gcc10+. See if the
+ # compiler supports the flag.
+ AX_CHECK_COMPILE_FLAG([-moutline-atomics], [AM_CFLAGS="$AM_CFLAGS -moutline-atomics"])
fi
else
# The Solaris native compiler gets the additional -mt flag.
diff --git a/src/third_party/wiredtiger/dist/s_docs b/src/third_party/wiredtiger/dist/s_docs
index 095ada474d3..a8b1b2eb487 100755
--- a/src/third_party/wiredtiger/dist/s_docs
+++ b/src/third_party/wiredtiger/dist/s_docs
@@ -166,6 +166,23 @@ EOF
sed -i~ -e 's,/\.html,/,' -e 's,\.html\.html,.html,' navtree.js &&
rm -f navtree.js~)
+ # Any cmapx files that are generated by plantuml need to be referred to
+ CMAPX=`find ../src/docs/ -type f -name "*.cmapx" 2>/dev/null`
+ if [ "$CMAPX" != '' ]; then
+ cd ../docs
+ for f in $CMAPX; do
+ b=`echo $f | sed -e 's:.*/::' -e 's/.cmapx$//'`
+ for html in `grep -l $b.png *.html`; do
+ # There's an image in this HTML file that has a map file
+ # generated by plantuml. Refer to the map by its name
+ # and insert the contents of the map into the HTML source.
+ sed -e "/<img.*=\"$b[.]png\"/s/\(\"$b[.]png\"\)/\1 usemap=\"#${b}_map\"/" \
+ -e "/<img.*=\"$b[.]png\"/r $f" $html > $html.NEW
+ mv $html.NEW $html
+ done
+ done
+ fi
+
# Fixup the man pages generated by Doxygen. We want the command line
# documentation to be the main man page, but also install a man page
# for the WiredTiger header into the library section.
diff --git a/src/third_party/wiredtiger/dist/s_docs_plantuml b/src/third_party/wiredtiger/dist/s_docs_plantuml
index c646739d7ba..43c669591a7 100755
--- a/src/third_party/wiredtiger/dist/s_docs_plantuml
+++ b/src/third_party/wiredtiger/dist/s_docs_plantuml
@@ -35,7 +35,7 @@ test -f "../dist/plantuml.jar" || {
if [ $download_plantuml -eq 1 ]
then
echo 'Downloading plantuml:'
- wget $PLANTUML_URL -O ../dist/plantuml.jar
+ curl -Li $PLANTUML_URL -o ../dist/plantuml.jar
else
echo 'plantuml can be downloaded from:'
echo 'https://sourceforge.net/projects/plantuml/files/plantuml.jar/download'
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index be6ae0893fe..3df400be17d 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -698,12 +698,14 @@ dll
dlopen
dlsym
dmalloc
+dmb
dmsg
doxgen
doxygen
drealloc
ds
dsb
+dsbs
dsk
dsrc
dst
@@ -894,6 +896,9 @@ isalpha
iscntrl
isdigit
isgraph
+ish
+ishld
+ishst
islocked
islower
ispo
@@ -1224,6 +1229,7 @@ setstr
setv
setvbuf
sfence
+shareability
signalled
sii
sizeof
@@ -1306,6 +1312,7 @@ timestamp
timestamped
timestamps
tinfo
+tlb
tmp
todo
tokenizer
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 99cbeecfe50..71f75ec285e 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-5.0",
- "commit": "6614fa3dcaceaa95da289bbffe3d96eecb1c972c"
+ "commit": "f827562f2b486e000665c97ea81674012d5a765b"
}
diff --git a/src/third_party/wiredtiger/src/docs/arch-index.dox b/src/third_party/wiredtiger/src/docs/arch-index.dox
index 53a49de7ed3..f5430ccb7e7 100644
--- a/src/third_party/wiredtiger/src/docs/arch-index.dox
+++ b/src/third_party/wiredtiger/src/docs/arch-index.dox
@@ -1,15 +1,136 @@
/*! @page arch-index WiredTiger Architecture Guide
+WiredTiger is built as a library that is linked into the application. Applications
+use the API interface to direct WiredTiger's operations (see @ref basic_api).
+Here is an overview of the software components in WiredTiger and how they are organized.
+An arrow indicates the "from" component uses "to" component.
+
+@plantuml_start{wt_diagram.png}
+@startuml{wt_diagram.png}
+
+' We add spacing to the diagram in places to influence the layout.
+' To do this, we create some invisible components with hidden arrows
+' pointing to them. Since we don't otherwise use the "file" component,
+' we set all its parts to be transparent, and any use of "file" results
+' in an invisible spacer whose width is directed by the length of its label.
+' When modifying this diagram, it's sometimes useful to comment out the
+' following lines, and any [hidden] directives used below, to see how
+' the spacers influence the layout. Note that this may be fragile;
+' the spacers give hints to the layout, such hints will not always be honored.
+
+skinparam fileBorderColor Transparent
+skinparam fileBackgroundColor Transparent
+skinparam fileFontColor Transparent
+skinparam fileShadowing false
+
+' Our diagram is simple. First, we define lots of labeled rectangles
+' with most nesting within the "engine" rectangle.
+
+together {
+ rectangle "Python API" as python_api
+ ' "storage" displays as an oval.
+ storage " C/C++ \n applications " as application
+ rectangle "wt Utility" as utility
+}
+
+' Trailing spaces for this label puts the text to the left.
+rectangle "**WiredTiger Engine** " as wt_engine {
+ ' Leading and trailing spaces make a wide rectangle.
+ together {
+ file "____" as SPACE_api
+ rectangle " C API " as c_api
+ file "____" as SPACE_api2
+ }
+ rectangle "[[#component-schema Schema]]" as schema
+ rectangle "Cursor" as cursor
+ rectangle "Transactions" as txn
+ rectangle "Metadata" as meta
+ rectangle "[[#component-dhandle dhandle/\n Btree]]" as btree
+ rectangle " Row\n storage" as row
+ rectangle " Column\n storage" as column
+ rectangle "History\n Store" as history
+ rectangle "Snapshots" as snapshot
+ rectangle "Cache" as cache
+ rectangle "Eviction" as evict
+
+ together {
+ rectangle " Block\n Manager" as block
+ file "__________" as SPACE_log
+ rectangle "Logging" as log
+ file "___" as SPACE_log2
+ }
+ rectangle " File System & OS \n interface" as os
+}
+together {
+ database "Database\n Files" as wt_file
+ database " Log \n Files" as log_file
+}
+
+' Influence the ordering at the top using (hidden) directed labels
+python_api -[hidden]right-> application
+application -[hidden]right-> utility
+
+python_api -down-> c_api
+application -down-> c_api
+utility -down-> c_api
+
+c_api -down-> schema
+c_api -down-> cursor
+c_api -down-> txn
+SPACE_api -[hidden]right-> c_api
+c_api -[hidden]right-> SPACE_api2
+
+schema -down-> meta
+schema -down-> btree
+cursor -down-> btree
+btree -down-> row
+btree -down-> column
+meta -up-> cursor
+' The hidden arrow helps our boxes to line up in a better way.
+meta -[hidden]right-> btree
+cursor -[hidden]right-> txn
+txn -down-> snapshot
+row -down-> cache
+column -down-> cache
+cache -down-> history
+evict -down-> history
+history -up-> cursor
+snapshot -down-> evict
+cache -right-> evict
+cache -down-> block
+evict -down-> block
+txn -down-> log
+
+block -[hidden]right-> SPACE_log
+cache -[hidden]down-> SPACE_log
+evict -[hidden]down-> SPACE_log
+SPACE_log -[hidden]right-> log
+log -[hidden]right-> SPACE_log2
+
+block -down-> os
+log -down-> os
+os -down-> wt_file
+os -down-> log_file
+
+wt_file -[hidden]right-> log_file
+
+@enduml
+@plantuml_end
+
+We go into some detail for some of the internal components.
+
@subpage arch-glossary
WiredTiger assigns specific meanings to certain words. Here we decode them.
+\anchor component-schema
@subpage arch-schema
Most applications begin to make use of WiredTiger by creating a table (or other
data object) to store their data in. Create is one of several schema operations
available in WiredTiger.
+\anchor component-dhandle
@subpage arch-dhandle-lifecycle
An internal structure called Data Handle (dhandle) is used to represent and
diff --git a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_create.png b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_create.png
index d2699646e9c..d2fc00448f0 100644
--- a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_create.png
+++ b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_create.png
Binary files differ
diff --git a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_generic.png b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_generic.png
index 790584b3f7b..f046734ce52 100644
--- a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_generic.png
+++ b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_generic.png
Binary files differ
diff --git a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_rename.png b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_rename.png
index b0aa560946e..65ac52d6fa1 100644
--- a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_rename.png
+++ b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/schema_rename.png
Binary files differ
diff --git a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.cmapx b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.cmapx
new file mode 100644
index 00000000000..411d3905b8e
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.cmapx
@@ -0,0 +1,4 @@
+<map id="wt_diagram_map" name="wt_diagram_map">
+<area shape="rect" id="id1" href="#component-schema" title="#component-schema" alt="" coords="98,224,151,241"/>
+<area shape="rect" id="id2" href="#component-dhandle" title="#component-dhandle" alt="" coords="181,320,244,353"/>
+</map>
diff --git a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.png b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.png
new file mode 100644
index 00000000000..57480c2156f
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.png
Binary files differ
diff --git a/src/third_party/wiredtiger/src/docs/spell.ok b/src/third_party/wiredtiger/src/docs/spell.ok
index 26f40b5e099..b153ba12668 100644
--- a/src/third_party/wiredtiger/src/docs/spell.ok
+++ b/src/third_party/wiredtiger/src/docs/spell.ok
@@ -246,6 +246,10 @@ fdatasync
fextend
fh
fieldname
+fileBackgroundColor
+fileBorderColor
+fileFontColor
+fileShadowing
fileID
fileformats
fileid
@@ -481,6 +485,7 @@ seqno
serializable
sess
sid
+skinparam
skiplist
spinlock
spinlocks
diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c
index a03b8e31739..2c5e48e1ca6 100644
--- a/src/third_party/wiredtiger/src/history/hs_rec.c
+++ b/src/third_party/wiredtiger/src/history/hs_rec.c
@@ -847,10 +847,11 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
WT_CURSOR_BTREE *hs_cbt;
WT_DECL_RET;
WT_HS_TIME_POINT start_time_point, stop_time_point;
- WT_ITEM hs_key;
+ WT_ITEM hs_key, hs_value;
+ WT_TIME_WINDOW tw;
WT_UPDATE *tombstone;
wt_timestamp_t hs_ts;
- uint64_t hs_counter;
+ uint64_t hs_counter, hs_upd_type;
uint32_t hs_btree_id;
int cmp;
char ts_string[5][WT_TS_INT_STRING_SIZE];
@@ -859,6 +860,7 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
insert_cursor = NULL;
hs_cbt = (WT_CURSOR_BTREE *)hs_cursor;
WT_CLEAR(hs_key);
+ WT_CLEAR(hs_value);
tombstone = NULL;
/*
@@ -973,9 +975,13 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
stop_time_point.ts = stop_time_point.durable_ts = ts;
stop_time_point.txnid = hs_cbt->upd_value->tw.stop_txn;
+ /* Extract the underlying value for reinsertion. */
+ WT_ERR(hs_cursor->get_value(
+ hs_cursor, &tw.durable_stop_ts, &tw.durable_start_ts, &hs_upd_type, &hs_value));
+
/* Reinsert entry with earlier timestamp. */
while ((ret = __hs_insert_record_with_btree_int(session, insert_cursor, btree, key,
- WT_UPDATE_STANDARD, &hs_cursor->value, &start_time_point, &stop_time_point,
+ (uint8_t)hs_upd_type, &hs_value, &start_time_point, &stop_time_point,
*counter)) == WT_RESTART)
;
WT_ERR(ret);
diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h
index be4503f3492..85ba3214d3e 100644
--- a/src/third_party/wiredtiger/src/include/gcc.h
+++ b/src/third_party/wiredtiger/src/include/gcc.h
@@ -218,17 +218,28 @@ WT_ATOMIC_FUNC(size, size_t, size_t *vp, size_t v)
#elif defined(__aarch64__)
#define WT_PAUSE() __asm__ volatile("yield" ::: "memory")
-#define WT_FULL_BARRIER() \
- do { \
- __asm__ volatile("dsb sy" ::: "memory"); \
+
+/*
+ * dmb are chosen here because they are sufficient to guarantee the ordering described above. We
+ * don't want to use dsbs because they provide a much stronger guarantee of completion which isn't
+ * required. Additionally, dsbs synchronize other system activities such as tlb and cache
+ * maintenance instructions which is not required in this case.
+ *
+ * A shareability domain of inner-shareable is selected because all the entities participating in
+ * the ordering requirements are CPUs and ordering with respect to other devices or memory-types
+ * isn't required.
+ */
+#define WT_FULL_BARRIER() \
+ do { \
+ __asm__ volatile("dmb ish" ::: "memory"); \
} while (0)
-#define WT_READ_BARRIER() \
- do { \
- __asm__ volatile("dsb ld" ::: "memory"); \
+#define WT_READ_BARRIER() \
+ do { \
+ __asm__ volatile("dsb ishld" ::: "memory"); \
} while (0)
-#define WT_WRITE_BARRIER() \
- do { \
- __asm__ volatile("dsb st" ::: "memory"); \
+#define WT_WRITE_BARRIER() \
+ do { \
+ __asm__ volatile("dsb ishst" ::: "memory"); \
} while (0)
#elif defined(__s390x__)
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp20.py b/src/third_party/wiredtiger/test/suite/test_timestamp20.py
index 69aec301a47..63a2503d915 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp20.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp20.py
@@ -37,7 +37,7 @@ class test_timestamp20(wttest.WiredTigerTestCase):
conn_config = 'cache_size=50MB'
session_config = 'isolation=snapshot'
- def test_timestamp20(self):
+ def test_timestamp20_standard(self):
uri = 'table:test_timestamp20'
self.session.create(uri, 'key_format=S,value_format=S')
self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1))
@@ -86,3 +86,80 @@ class test_timestamp20(wttest.WiredTigerTestCase):
for i in range(1, 10000):
self.assertEqual(old_reader_cursor[str(i)], value3)
old_reader_session.rollback_transaction()
+
+ # In this test we're using modifies since they are more sensitive to corruptions.
+ #
+ # Corruptions to string types may go undetected since non-ASCII characters won't be included in
+ # the conversion to a Python string.
+ def test_timestamp20_modify(self):
+ uri = 'table:test_timestamp20'
+ self.session.create(uri, 'key_format=S,value_format=S')
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1))
+ cursor = self.session.open_cursor(uri)
+
+ value1 = 'a' * 500
+ value2 = 'b' * 500
+ value3 = 'c' * 500
+
+ # Apply the base value.
+ for i in range(1, 10000):
+ self.session.begin_transaction()
+ cursor[str(i)] = value1
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(10))
+
+ # Now apply a series of modifies.
+ for i in range(1, 10000):
+ self.session.begin_transaction()
+ cursor.set_key(str(i))
+ self.assertEqual(cursor.modify([wiredtiger.Modify('B', 100, 1)]), 0)
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(20))
+
+ for i in range(1, 10000):
+ self.session.begin_transaction()
+ cursor.set_key(str(i))
+ self.assertEqual(cursor.modify([wiredtiger.Modify('C', 200, 1)]), 0)
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(30))
+
+ # Open an old reader at this point.
+ #
+ # I'm trying to read the middle modify because I specifically don't want to read one that
+ # has been squashed into a full update.
+ old_reader_session = self.conn.open_session()
+ old_reader_cursor = old_reader_session.open_cursor(uri)
+ old_reader_session.begin_transaction('read_timestamp=' + timestamp_str(30))
+
+ # Now apply the last modify.
+ # This will be the end of the chain of modifies.
+ for i in range(1, 10000):
+ self.session.begin_transaction()
+ cursor.set_key(str(i))
+ self.assertEqual(cursor.modify([wiredtiger.Modify('D', 300, 1)]), 0)
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(40))
+
+ # Now put two updates out of order. 5 will go to the history store and will trigger a
+ # correction to the existing contents.
+ for i in range(1, 10000):
+ self.session.begin_transaction()
+ cursor[str(i)] = value2
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(5))
+ self.session.begin_transaction()
+ cursor[str(i)] = value3
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(50))
+
+ # Open up a new transaction and read at 30.
+ # We shouldn't be able to see past 5 due to txnid visibility.
+ self.session.begin_transaction('read_timestamp=' + timestamp_str(30))
+ for i in range(1, 10000):
+ self.assertEqual(cursor[str(i)], value2)
+ self.session.rollback_transaction()
+
+ # Put together expected value.
+ expected = list(value1)
+ expected[100] = 'B'
+ expected[200] = 'C'
+ expected = str().join(expected)
+
+ # On the other hand, this older transaction SHOULD be able to read past the 5.
+ for i in range(1, 10000):
+ self.assertEqual(old_reader_cursor[str(i)], expected)
+ old_reader_session.rollback_transaction()