Import wiredtiger: 3ef954b29f131cb835c5b4cb81adcfc603dc072d from branch mongodb-5.0

ref: ba0c36ccbb..3ef954b29f for: 5.0.0-rc2 WT-7644 Implement python hooks for tiered storage
author: Chenhao Qu <chenhao.qu@mongodb.com> 2021-06-16 05:46:37 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2021-06-16 06:08:31 +0000
commit: fa5fb112aa7690954e547aa9ea994533c18c40c7 (patch)
tree: 22b594a572f10b53d0a20378acaa6b7ece1c287e
parent: ce3290347bf15009ce45a941a263b3ac42dd7e46 (diff)
download: mongo-fa5fb112aa7690954e547aa9ea994533c18c40c7.tar.gz
3 files changed, 175 insertions, 64 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index f2f615eaa9d..5f403fe7b0b 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
     "vendor": "wiredtiger",
     "github": "wiredtiger/wiredtiger.git",
     "branch": "mongodb-5.0",
-    "commit": "ba0c36ccbb5c4b00d9a83603d08020a5c9865e21"
+    "commit": "3ef954b29f131cb835c5b4cb81adcfc603dc072d"
 }
diff --git a/src/third_party/wiredtiger/test/suite/hook_tiered.py b/src/third_party/wiredtiger/test/suite/hook_tiered.py
index 5bb97ea399b..cb7a44608ab 100755
--- a/src/third_party/wiredtiger/test/suite/hook_tiered.py
+++ b/src/third_party/wiredtiger/test/suite/hook_tiered.py
@@ -34,16 +34,25 @@
 #
 # Substitute tiered tables for regular (row-store) tables in Python tests.
 #
-# These hooks can be used to run the existing cursor tests on tiered tables.
-# They identify tests that create row-store tables and create tiered tables
-# instead.  The hook takes an optional argument to specify how many tiers
-# to create. The default is 2.
+# These hooks are intended for basic testing of tiered tables.  There are several
+# pieces of functionality here.
 #
-# To run with 3 tiers per table:
-#     ../test/suite/run.py --hooks tiered=3 cursor
+# 1. We add tiered storage parameters to the config when calling wiredtiger_open()
+#
+# 2. If we create an object that is *not* a table:, we add options to its config
+#    so that it will be stored local-only.  Tiered storage isn't intended (yet?) for
+#    use with lsm or column store.
+#
+# 3. We add calls to flush_tier().  Currently we only flush after a checkpoint() call,
+#    but we should add others.
+#
+# 4. We stub out some functions that aren't supported by tiered tables.  This will
+#    break tests of those functions.  But often when they are used in other tests, we
+#    can get away with returning success without performing the operation.
+#
+# To run, for example, the cursor tests with these hooks enabled:
+#     ../test/suite/run.py --hooks tiered cursor
 #
-# The hooks work with may other tests in the python suite but also encounter
-# a variety of failures that I haven't tried to sort out.
 from __future__ import print_function
 
 import os, sys, wthooks
@@ -52,66 +61,140 @@ from wttest import WiredTigerTestCase
 
 # These are the hook functions that are run when particular APIs are called.
 
+# Add the local storage extension whenever we call wiredtiger_open
+def wiredtiger_open_tiered(ignored_self, args):
+    auth_token = "test_token"
+    bucket = "mybucket"
+    extension_name = "local_store"
+    prefix = "pfx-"
+    extension_libs = WiredTigerTestCase.findExtension('storage_sources', extension_name)
+    if len(extension_libs) == 0:
+        raise Exception(extension_name + ' storage source extension not found')
+
+    if not os.path.exists(bucket):
+        os.mkdir(bucket)
+    tier_string = ',tiered_storage=(auth_token=%s,' % auth_token + \
+      'bucket=%s,' % bucket + \
+      'bucket_prefix=%s,' % prefix + \
+      'name=%s),tiered_manager=(wait=0),' % extension_name + \
+      'extensions=[\"%s\"],' % extension_libs[0]
+
+    args = list(args)         # convert from a readonly tuple to a writeable list
+    args[-1] += tier_string   # Modify the list
+
+    WiredTigerTestCase.verbose(None, 3,
+        '    Calling wiredtiger_open with config = \'{}\''.format(args))
+
+    return args
+
+# Called to replace Session.alter
+def session_alter_replace(orig_session_alter, session_self, uri, config):
+    # Alter isn't implemented for tiered tables.  Only call it if this can't be the uri
+    # of a tiered table.  Note this isn't a precise match for when we did/didn't create
+    # a tiered table, but we don't have the create config around to check.
+    ret = 0
+    if not uri.startswith("table:"):
+        ret = orig_session_alter(session_self, uri, config)
+    return ret
+
+# Called to replace Session.checkpoint.
+# We add a call to flush_tier after the checkpoint to make sure we are exercising tiered
+# functionality.
+def session_checkpoint_replace(orig_session_checkpoint, session_self, config):
+    ret = orig_session_checkpoint(session_self, config)
+    if ret != 0:
+        return ret
+    WiredTigerTestCase.verbose(None, 3,
+        '    Calling flush_tier() after checkpoint')
+    return session_self.flush_tier(None)
+
+# Called to replace Session.compact
+def session_compact_replace(orig_session_compact, session_self, uri, config):
+    # Compact isn't implemented for tiered tables.  Only call it if this can't be the uri
+    # of a tiered table.  Note this isn't a precise match for when we did/didn't create
+    # a tiered table, but we don't have the create config around to check.
+    ret = 0
+    if not uri.startswith("table:"):
+        ret = orig_session_compact(session_self, uri, config)
+    return ret
+
 # Called to replace Session.create
-def session_create_replace(ntiers, orig_session_create, session_self, uri, config):
+def session_create_replace(orig_session_create, session_self, uri, config):
     if config == None:
-        base_config = ""
+        new_config = ""
     else:
-        base_config = config
-
-    # If the test is creating a table (not colstore or lsm), create a tiered table instead,
-    # using arg to determine number of tiers. Otherwise just do the create as normal.
-    #
-    # NOTE: The following code uses the old API for creating tiered tables.  As of WT-7173
-    # this no longer works.  It will be updated and fixed in WT-7440.
-    if (uri.startswith("table:") and "key_format=r" not in base_config and
-      "type=lsm" not in base_config):
-        tier_string = ""
-        for i in range(ntiers):
-            new_uri = uri.replace('table:', 'file:tier' + str(i) + '_')
-            orig_session_create(session_self, new_uri, config)
-            tier_string = tier_string + '"' + new_uri + '", '
-        tier_config = 'type=tiered,tiered=(tiers=(' + tier_string[0:-2] + ')),' + base_config
-        WiredTigerTestCase.verbose(None, 3,
-            'Creating tiered table {} with config = \'{}\''.format(uri, tier_config))
-        ret = orig_session_create(session_self, uri, tier_config)
-    else:
-        ret = orig_session_create(session_self, uri, config)
+        new_config = config
+
+    # If the test isn't creating a table (i.e., it's a column store or lsm) create it as a
+    # "local only" object.  Otherwise we get tiered storage from the connection defaults.
+    if not uri.startswith("table:") or "key_format=r" in new_config or "type=lsm" in new_config:
+        new_config = new_config + ',tiered_storage=(name=none)'
+
+    WiredTigerTestCase.verbose(None, 3,
+        '    Creating \'{}\' with config = \'{}\''.format(uri, new_config))
+    ret = orig_session_create(session_self, uri, new_config)
     return ret
 
 # Called to replace Session.drop
-def session_drop_replace(ntiers, orig_session_drop, session_self, uri, config):
-    # Drop isn't implemented for tiered tables.  Only do the delete if this could be a
-    # uri we created a tiered table for.  Note this isn't a precise match for when we
-    # did/didn't create a tiered table, but we don't have the create config around to check.
+def session_drop_replace(orig_session_drop, session_self, uri, config):
+    # Drop isn't implemented for tiered tables.  Only call it if this can't be the uri
+    # of a tiered table.  Note this isn't a precise match for when we did/didn't create
+    # a tiered table, but we don't have the create config around to check.
     ret = 0
     if not uri.startswith("table:"):
         ret = orig_session_drop(session_self, uri, config)
     return ret
 
+# Called to replace Session.rename
+def session_rename_replace(orig_session_rename, session_self, uri, newuri, config):
+    # Rename isn't implemented for tiered tables.  Only call it if this can't be the uri
+    # of a tiered table.  Note this isn't a precise match for when we did/didn't create
+    # a tiered table, but we don't have the create config around to check.
+    ret = 0
+    if not uri.startswith("table:"):
+        ret = orig_session_rename(session_self, uri, newuri, config)
+    return ret
+
+# Called to replace Session.salvage
+def session_salvage_replace(orig_session_salvage, session_self, uri, config):
+    # Salvage isn't implemented for tiered tables.  Only call it if this can't be the uri
+    # of a tiered table.  Note this isn't a precise match for when we did/didn't create
+    # a tiered table, but we don't have the create config around to check.
+    ret = 0
+    if not uri.startswith("table:"):
+        ret = orig_session_salvage(session_self, uri, config)
+    return ret
+
 # Called to replace Session.verify
-def session_verify_replace(ntiers, orig_session_verify, session_self, uri):
-    return 0
+def session_verify_replace(orig_session_verify, session_self, uri, config):
+    # Verify isn't implemented for tiered tables.  Only call it if this can't be the uri
+    # of a tiered table.  Note this isn't a precise match for when we did/didn't create
+    # a tiered table, but we don't have the create config around to check.
+    ret = 0
+    if not uri.startswith("table:"):
+        ret = orig_session_verify(session_self, uri, config)
+    return ret
 
 # Every hook file must have one or more classes descended from WiredTigerHook
 # This is where the hook functions are 'hooked' to API methods.
 class TieredHookCreator(wthooks.WiredTigerHookCreator):
-    def __init__(self, ntiers=0):
-        # Argument specifies the number of tiers to test. The default is 2.
-        if ntiers == None:
-            self.ntiers = 2
-        else:
-            self.ntiers = int(ntiers)
-
-    # Is this test one we should skip? We skip tests of features supported on standard
-    # tables but not tiered tables, specififically cursor caching and checkpoint cursors.
+    def __init__(self, arg=0):
+        # Caller can specify an optional command-line argument.  We're not using it
+        # now, but this is where it would show up.
+        return
+
+    # Is this test one we should skip?
     def skip_test(self, test):
-        skip = ["bulk_backup",
-                "checkpoint",
-                "test_cursor13_big",
-                "test_cursor13_drops",
-                "test_cursor13_dup",
-                "test_cursor13_reopens"]
+        # Skip any test that contains one of these strings as a substring
+        skip = ["backup",              # Can't backup a tiered table
+                "cursor13_ckpt",       # Checkpoint tests with cached cursors
+                "cursor13_drops",      # Tests that require working drop implementation
+                "cursor13_dup",        # More cursor cache tests
+                "cursor13_reopens",    # More cursor cache tests
+                "lsm",                 # If the test name tells us it uses lsm ignore it
+                "test_config_json",    # create replacement can't handle a json config string
+                "test_cursor_big",     # Cursor caching verified with stats
+                "tiered"]
         for item in skip:
             if item in str(test):
                 return True
@@ -124,17 +207,39 @@ class TieredHookCreator(wthooks.WiredTigerHookCreator):
         return new_tests
 
     def setup_hooks(self):
+        orig_session_alter = self.Session['alter']
+        self.Session['alter'] =  (wthooks.HOOK_REPLACE, lambda s, uri, config:
+          session_alter_replace(orig_session_alter, s, uri, config))
+
+        orig_session_close = self.Session['close']
+        self.Session['close'] = (wthooks.HOOK_REPLACE, lambda s, config=None:
+          session_close_replace(orig_session_close, s, config))
+
+        orig_session_compact = self.Session['compact']
+        self.Session['compact'] =  (wthooks.HOOK_REPLACE, lambda s, uri, config:
+          session_compact_replace(orig_session_compact, s, uri, config))
+
         orig_session_create = self.Session['create']
         self.Session['create'] =  (wthooks.HOOK_REPLACE, lambda s, uri, config:
-          session_create_replace(self.ntiers, orig_session_create, s, uri, config))
+          session_create_replace(orig_session_create, s, uri, config))
 
         orig_session_drop = self.Session['drop']
-        self.Session['drop'] = (wthooks.HOOK_REPLACE, lambda s, uri, config:
-          session_drop_replace(self.ntiers, orig_session_drop, s, uri, config))
+        self.Session['drop'] = (wthooks.HOOK_REPLACE, lambda s, uri, config=None:
+          session_drop_replace(orig_session_drop, s, uri, config))
+
+        orig_session_rename = self.Session['rename']
+        self.Session['rename'] = (wthooks.HOOK_REPLACE, lambda s, uri, newuri, config=None:
+          session_rename_replace(orig_session_rename, s, uri, newuri, config))
+
+        orig_session_salvage = self.Session['salvage']
+        self.Session['salvage'] = (wthooks.HOOK_REPLACE, lambda s, uri, config:
+          session_salvage_replace(orig_session_salvage, s, uri, config))
 
         orig_session_verify = self.Session['verify']
-        self.Session['verify'] = (wthooks.HOOK_REPLACE, lambda s, uri:
-          session_verify_replace(self.ntiers, orig_session_verify, s, uri))
+        self.Session['verify'] = (wthooks.HOOK_REPLACE, lambda s, uri, config:
+          session_verify_replace(orig_session_verify, s, uri, config))
+
+        self.wiredtiger['wiredtiger_open'] = (wthooks.HOOK_ARGS, wiredtiger_open_tiered)
 
 # Every hook file must have a top level initialize function,
 # returning a list of WiredTigerHook objects.
diff --git a/src/third_party/wiredtiger/test/suite/wttest.py b/src/third_party/wiredtiger/test/suite/wttest.py
index ab4d7689d3f..92daaa62e37 100755
--- a/src/third_party/wiredtiger/test/suite/wttest.py
+++ b/src/third_party/wiredtiger/test/suite/wttest.py
@@ -284,6 +284,18 @@ class WiredTigerTestCase(unittest.TestCase):
         self.skipped = True
         super(WiredTigerTestCase, self).skipTest(reason)
 
+    # Construct the expected filename for an extension library and return
+    # the name if the file exists.
+    @staticmethod
+    def findExtension(dirname, libname):
+        pat = os.path.join(WiredTigerTestCase._builddir, 'ext',
+            dirname, libname, '.libs', 'libwiredtiger_*.so')
+        filenames = glob.glob(pat)
+        if len(filenames) > 1:
+            raise Exception(self.shortid() + ": " + ext +
+                ": multiple extensions libraries found matching: " + pat)
+        return filenames
+
     # Return the wiredtiger_open extension argument for
     # any needed shared library.
     def extensionsConfig(self):
@@ -312,9 +324,7 @@ class WiredTigerTestCase(unittest.TestCase):
                     ": extension is not named <dir>/<name>")
             libname = splits[1]
             dirname = splits[0]
-            pat = os.path.join(WiredTigerTestCase._builddir, 'ext',
-                dirname, libname, '.libs', 'libwiredtiger_*.so')
-            filenames = glob.glob(pat)
+            filenames = self.findExtension(dirname, libname)
             if len(filenames) == 0:
                 if skipIfMissing:
                     self.skipTest('extension "' + ext + '" not built')
@@ -323,10 +333,6 @@ class WiredTigerTestCase(unittest.TestCase):
                     raise Exception(self.shortid() +
                         ": " + ext +
                         ": no extensions library found matching: " + pat)
-            elif len(filenames) > 1:
-                raise Exception(self.shortid() +
-                    ": " + ext +
-                    ": multiple extensions libraries found matching: " + pat)
             complete = '"' + filenames[0] + '"' + extconf
             if ext in extfiles:
                 if extfiles[ext] != complete:
author	Chenhao Qu <chenhao.qu@mongodb.com>	2021-06-16 05:46:37 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2021-06-16 06:08:31 +0000
commit	fa5fb112aa7690954e547aa9ea994533c18c40c7 (patch)
tree	22b594a572f10b53d0a20378acaa6b7ece1c287e
parent	ce3290347bf15009ce45a941a263b3ac42dd7e46 (diff)
download	mongo-fa5fb112aa7690954e547aa9ea994533c18c40c7.tar.gz