Import wiredtiger: 4e90eef9aa6cab548252d137b5fd899293cba11b from branch mongodb-master

ref: 6bfcab58b3..4e90eef9aa for: 5.3.0 WT-8756 Add null check when dropping a tier, and add tiered storage tests
author: Chenhao Qu <chenhao.qu@mongodb.com> 2022-02-03 12:35:30 +1100
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-02-03 04:00:32 +0000
commit: 3b73633480220630507435a4edc3fb141739e93c (patch)
tree: f9c82681cd51d33b1384d2b77633984048906c4b /src
parent: 76e19e92235e3d8813a249e3d064c02c847a4595 (diff)
download: mongo-3b73633480220630507435a4edc3fb141739e93c.tar.gz
4 files changed, 298 insertions, 5 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 1e3ad6f130a..d698b891cd7 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
     "vendor": "wiredtiger",
     "github": "wiredtiger/wiredtiger.git",
     "branch": "mongodb-master",
-    "commit": "6bfcab58b39232366daf8448c0cba29b7f170042"
+    "commit": "4e90eef9aa6cab548252d137b5fd899293cba11b"
 }
diff --git a/src/third_party/wiredtiger/src/schema/schema_drop.c b/src/third_party/wiredtiger/src/schema/schema_drop.c
index 50e02192507..a1420755926 100644
--- a/src/third_party/wiredtiger/src/schema/schema_drop.c
+++ b/src/third_party/wiredtiger/src/schema/schema_drop.c
@@ -234,7 +234,7 @@ __drop_tiered(WT_SESSION_IMPL *session, const char *uri, bool force, const char
         WT_ERR(__wt_tiered_name(session, &tiered->iface, i, WT_TIERED_NAME_OBJECT, &name));
         __wt_verbose(session, WT_VERB_TIERED, "DROP_TIERED: remove object %s from metadata", name);
         WT_ERR_NOTFOUND_OK(__wt_metadata_remove(session, name), false);
-        if (remove_files)
+        if (remove_files && tier != NULL)
             WT_TRET(__wt_meta_track_drop(session, tier->name));
         __wt_free(session, name);
     }
diff --git a/src/third_party/wiredtiger/test/suite/test_tiered14.py b/src/third_party/wiredtiger/test/suite/test_tiered14.py
new file mode 100755
index 00000000000..6e25c0e8487
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_tiered14.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import os, random, wiredtiger, wtscenario, wttest
+from wtdataset import TrackedSimpleDataSet, TrackedComplexDataSet
+
+# test_tiered14.py
+#    Test somewhat arbitrary combinations of flush_tier, checkpoint, restarts,
+#    data additions and updates.
+class test_tiered14(wttest.WiredTigerTestCase):
+    uri = "table:test_tiered14-{}"   # format for subtests
+
+    auth_token = "test_token"
+    bucket = "mybucket"
+    cachedir = "mybucket-cache"
+    bucket_prefix = "pfx_"
+    extension_name = "local_store"
+
+    # FIXME-WT-8758: enable the commented scenarios, they all seem to
+    # hit the same WT assertion.
+
+    # The multiplier makes the size of keys and values progressively larger.
+    # A multipler of 0 makes the keys and values a single length.
+    multiplier = [
+        ('0', dict(multiplier=0)),
+#        ('S', dict(multiplier=1)),
+#        ('M', dict(multiplier=10)),
+#        ('L', dict(multiplier=100)),
+#        ('XL', dict(multiplier=1000)),
+    ]
+    keyfmt = [
+#        ('integer', dict(keyfmt='i')),
+        ('string', dict(keyfmt='S')),
+    ]
+    dataset = [
+        ('simple', dict(dataset='simple')),
+#        ('complex', dict(dataset='complex')),
+    ]
+    scenarios = wtscenario.make_scenarios(multiplier, keyfmt, dataset)
+
+    def conn_config(self):
+        if not os.path.exists(self.bucket):
+            os.mkdir(self.bucket)
+        return \
+          'tiered_storage=(auth_token=%s,' % self.auth_token + \
+          'bucket=%s,' % self.bucket + \
+          'bucket_prefix=%s,' % self.bucket_prefix + \
+          'cache_directory=%s,' % self.cachedir + \
+          'name=%s),tiered_manager=(wait=0)' % self.extension_name
+
+    # Load the local store extension.
+    def conn_extensions(self, extlist):
+        # Windows doesn't support dynamically loaded extension libraries.
+        if os.name == 'nt':
+            extlist.skip_if_missing = True
+        extlist.extension('storage_sources', self.extension_name)
+
+    def progress(self, s):
+        outstr = "testnum {}, position {}: {}".format(self.testnum, self.position, s)
+        self.verbose(3, outstr)
+        self.pr(outstr)
+
+    # Run a sequence of operations, indicated by a string.
+    #  a = add some number of keys
+    #  u = update some number of keys
+    #  c = checkpoint
+    #  r = reopen
+    #  f = flush_tier
+    #  . = check to make sure all expected values are present
+    #
+    # We require a unique test number so we get can generate a different uri from
+    # previous runs.  A different approach is to drop the uri, but then we need to
+    # remove the bucket and cache, which is specific to the storage source extension.
+    def playback(self, testnum, ops):
+        self.testnum = testnum
+        self.position = -1
+
+        uri = self.uri.format(testnum)
+        self.progress('Running ops: {} using uri {}'.format(ops, uri))
+        if self.dataset == 'simple':
+            ds = TrackedSimpleDataSet(self, uri, self.multiplier,
+                                      key_format = self.keyfmt)
+        elif self.dataset == 'complex':
+            ds = TrackedComplexDataSet(self, uri, self.multiplier,
+                                      key_format = self.keyfmt)
+
+        # Populate for a tracked data set is needed to create the uri.
+        ds.populate()
+        inserted = 0
+
+        # At the end of the sequence of operations, do a final check ('.').
+        for op in ops + '.':
+            self.position += 1
+            try:
+                if op == 'f':
+                    self.progress('flush_tier')
+                    self.session.flush_tier(None)
+                elif op == 'c':
+                    self.progress('checkpoint')
+                    self.session.checkpoint()
+                elif op == 'r':
+                    self.progress('reopen')
+                    self.reopen_conn()
+                elif op == 'a':
+                    self.progress('add')
+                    n = random.randrange(1, 101)  # 1 <= n <= 100
+                    ds.store_range(inserted, n)
+                    inserted += n
+                elif op == 'u':
+                    self.progress('update')
+                    # only update the elements if enough have already been added.
+                    n = random.randrange(1, 101)  # 1 <= n <= 100
+                    if n < inserted:
+                        pos = random.randrange(0, inserted - n)
+                        ds.store_range(pos, n)
+                elif op == '.':
+                    self.progress('check')
+                    ds.check()
+            except Exception as e:
+                self.progress('Failed at position {} in {}: {}'.format(idx, ops, str(e)))
+                raise(e)
+
+    # Test tiered storage with checkpoints and flush_tier calls.
+    def test_tiered(self):
+        random.seed(0)
+
+        # Get started with a fixed sequence of basic operations.
+        # There's no particular reason to start with this sequence.
+        testnum = 0
+        self.playback(testnum, "aaaaacaaa.uucrauaf.aauaac.auu.aacrauafa.uruua.")
+
+        for i in range(0, 10):
+            testnum += 1
+            # Generate a sequence of 100 operations that is heavy on additions and updates.
+            s = ''.join(random.choices('aaaaauuuuufcr.', k=100))
+            self.playback(testnum, s)
+
+        # FIXME-WT-8758: test disabled for now.
+        if False:
+         for i in range(0, 10):
+            testnum += 1
+            # Generate a sequence of 100 operations that is has a greater mix of 'operational' functions.
+            s = ''.join(random.choices('aufcr.', k=100))
+            self.playback(testnum, s)
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/wtdataset.py b/src/third_party/wiredtiger/test/suite/wtdataset.py
index c4bd993ca80..a7fd0ed63ac 100755
--- a/src/third_party/wiredtiger/test/suite/wtdataset.py
+++ b/src/third_party/wiredtiger/test/suite/wtdataset.py
@@ -49,12 +49,22 @@ class BaseDataSet(object):
                                      + ',value_format=' + self.value_format
                                      + ',' + self.config)
 
-    def fill(self):
+    def store_one_cursor(self, c, i):
+        c[self.key(i)] = self.value(i)
+
+    def store_range_cursor(self, c, key, count):
+        for i in range(key, key + count):
+            self.store_one(c, i)
+
+    def store_range(self, key, count):
         c = self.testcase.session.open_cursor(self.uri, None)
-        for i in range(1, self.rows + 1):
-            c[self.key(i)] = self.value(i)
+        for i in range(key, key + count):
+            self.store_one_cursor(c, i)
         c.close()
 
+    def fill(self):
+        self.store_range(1, self.rows)
+
     def postfill(self):
         pass
 
@@ -82,6 +92,11 @@ class BaseDataSet(object):
             raise AssertionError(
                 'key: object has unexpected format: ' + key_format)
 
+    # Deduce the source integer for a key in a Simple or Complex data set.
+    @staticmethod
+    def reverse_key_by_format(key, key_format):
+        return int(key)
+
     # Create a value for a Simple data set.
     @staticmethod
     def value_by_format(i, value_format):
@@ -393,6 +408,112 @@ class ProjectionIndexDataSet(BaseDataSet):
     def index_name(self, i):
         return self.indexname
 
+# A data set based on ComplexDataSet that allows large values (depending on a multiplier),
+# the ability to update keys with different values, and track the expected value for each key.
+class TrackedComplexDataSet(ComplexDataSet):
+    alphabet = 'abcdefghijklmnopqrstuvwxyz'
+
+    def __init__(self, testcase, uri, multiplier, **kwargs):
+        super(TrackedComplexDataSet, self).__init__(testcase, uri, 0, **kwargs)
+        self.multiplier = multiplier
+        self.track_values = dict()
+        self.refstr = ': ' + self.alphabet * multiplier
+
+    def store_count(self, i):
+        try:
+            return self.track_values[i]
+        except:
+            return 0
+
+    # override
+    def store_one_cursor(self, c, i):
+        self.track_values[i] = self.store_count(i) + 1
+        c[self.key(i)] = self.value(i)
+
+    # Redefine the value stored to get bigger depending on the multiplier,
+    # and to mix up the value depending on how many times it has been updated.
+    #
+    # If multiplier is 0, use the basic value used by ComplexDataSet.
+    # In this case, since it doesn't rely on the number of stores, updates
+    # of the same key will store the same value each time.
+    def comparable_value(self, i):
+        if self.multiplier == 0:
+            return ComplexDataSet.comparable_value(self, i)
+        nstores = self.store_count(i)
+        m = self.multiplier
+        bigi = i * m + nstores
+        return [str(i) + self.refstr[0 : bigi % (26*m)],
+                i,
+                str(i) + self.refstr[0 : bigi % (23*m)],
+                str(i) + self.refstr[0 : bigi % (18*m)]]
+
+    def check_cursor(self, cursor):
+        expect = dict(self.track_values)
+        for key, s1, i2, s3, s4 in cursor:
+            i = BaseDataSet.reverse_key_by_format(key, self.key_format)
+            v = self.value(i)
+            #self.testcase.tty('KEY: {} -> {}'.format(key, i))
+            #self.testcase.tty('GOT: {},{},{},{}'.format(s1, i2, s3, s4))
+            #self.testcase.tty('EXPECT: {}'.format(v))
+            self.testcase.assertEqual(s1, v[0])
+            self.testcase.assertEqual(i2, v[1])
+            self.testcase.assertEqual(s3, v[2])
+            self.testcase.assertEqual(s4, v[3])
+            self.testcase.assertTrue(i in expect)
+            del expect[i]
+        self.testcase.assertEqual(len(expect), 0)
+
+# A data set based on SimpleDataSet that allows large values (depending on a multiplier),
+# the ability to update keys with different values, and track the expected value for each key.
+class TrackedSimpleDataSet(SimpleDataSet):
+    alphabet = 'abcdefghijklmnopqrstuvwxyz'
+
+    def __init__(self, testcase, uri, multiplier, **kwargs):
+        super(TrackedSimpleDataSet, self).__init__(testcase, uri, 0, **kwargs)
+        self.multiplier = multiplier
+        self.track_values = dict()
+        self.refstr = ': ' + self.alphabet * multiplier
+
+    def store_count(self, i):
+        try:
+            return self.track_values[i]
+        except:
+            return 0
+
+    # override
+    def store_one_cursor(self, c, i):
+        self.track_values[i] = self.store_count(i) + 1
+        c[self.key(i)] = self.value(i)
+
+    # Redefine the value stored to get bigger depending on the multiplier,
+    # and to mix up the value depending on how many times it has been updated.
+    #
+    # If multiplier is 0, use the basic value used by SimpleDataSet.
+    # In this case, since it doesn't rely on the number of stores, updates
+    # of the same key will store the same value each time.
+    def comparable_value(self, i):
+        if self.multiplier == 0:
+            return SimpleDataSet.comparable_value(self, i)
+        nstores = self.store_count(i)
+        m = self.multiplier
+        bigi = i * m + nstores
+        return str(i) + self.refstr[0 : bigi % (26*m)]
+
+    def value(self, i):
+        return self.comparable_value(i)
+
+    def check_cursor(self, cursor):
+        expect = dict(self.track_values)
+        for key, s in cursor:
+            i = BaseDataSet.reverse_key_by_format(key, self.key_format)
+            v = self.value(i)
+            #self.testcase.tty('KEY: {} -> {}'.format(key, i))
+            #self.testcase.tty('GOT: {}'.format(s))
+            #self.testcase.tty('EXPECT: {}'.format(v))
+            self.testcase.assertEqual(s, v)
+            del expect[i]
+        self.testcase.assertEqual(len(expect), 0)
+
 # create a key based on a cursor as a shortcut to creating a SimpleDataSet
 def simple_key(cursor, i):
     return BaseDataSet.key_by_format(i, cursor.key_format)
author	Chenhao Qu <chenhao.qu@mongodb.com>	2022-02-03 12:35:30 +1100
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-02-03 04:00:32 +0000
commit	3b73633480220630507435a4edc3fb141739e93c (patch)
tree	f9c82681cd51d33b1384d2b77633984048906c4b /src
parent	76e19e92235e3d8813a249e3d064c02c847a4595 (diff)
download	mongo-3b73633480220630507435a4edc3fb141739e93c.tar.gz