Import wiredtiger: c5b552ac2e781e3f146ffa8ad748d9aa090e0623 from branch mongodb-4.6

ref: dc2383066e..c5b552ac2e for: 4.8.0 WT-6690 Add support for table import when the exported configuration is provided WT-6732 Fix post-task command noises in Evergreen task logs
author: Luke Chen <luke.chen@mongodb.com> 2020-09-29 18:09:52 +1000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2020-09-29 08:27:34 +0000
commit: 6260558d1f930a4bd81c02f53aae0a54e77177c4 (patch)
tree: cf20c677b6d540a56de1e15c4573abc4a34aabc6
parent: a8b665d074ca7b85d25e19a44c01cc5b86b3e40b (diff)
download: mongo-6260558d1f930a4bd81c02f53aae0a54e77177c4.tar.gz
7 files changed, 543 insertions, 186 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 6db2bb79884..6cef9ac9caf 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
     "vendor": "wiredtiger",
     "github": "wiredtiger/wiredtiger.git",
     "branch": "mongodb-4.6",
-    "commit": "dc2383066ede0925e3efa5e91d068d85a8c21c6e"
+    "commit": "c5b552ac2e781e3f146ffa8ad748d9aa090e0623"
 }
diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c
index c3d7417ff14..d6a81432596 100644
--- a/src/third_party/wiredtiger/src/schema/schema_create.c
+++ b/src/third_party/wiredtiger/src/schema/schema_create.c
@@ -104,6 +104,11 @@ __create_file(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const c
      * reconstruct the configuration metadata from the file.
      */
     if (import) {
+        /* First verify that the data to import exists on disk. */
+        WT_IGNORE_RET(__wt_fs_exist(session, filename, &exists));
+        if (!exists)
+            WT_ERR_MSG(session, ENOENT, "%s", uri);
+
         import_repair =
           __wt_config_getones(session, config, "import.repair", &cval) == 0 && cval.val != 0;
         if (!import_repair) {
@@ -131,16 +136,9 @@ __create_file(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const c
                   uri);
             }
         }
-    }
-
-    if (import) {
-        WT_IGNORE_RET(__wt_fs_exist(session, filename, &exists));
-        if (!exists)
-            WT_ERR_MSG(session, ENOENT, "%s: attempted to import file that does not exist", uri);
-    } else {
+    } else
         /* Create the file. */
         WT_ERR(__wt_block_manager_create(session, filename, allocsize));
-    }
 
     if (WT_META_TRACKING(session))
         WT_ERR(__wt_meta_track_fileop(session, NULL, uri));
@@ -591,14 +589,15 @@ static int
 __create_table(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config)
 {
     WT_CONFIG conf;
-    WT_CONFIG_ITEM cgkey, cgval, cval;
+    WT_CONFIG_ITEM cgkey, cgval, ckey, cval;
     WT_DECL_RET;
     WT_TABLE *table;
     size_t cgsize;
-    int ncolgroups;
+    int ncolgroups, nkeys;
     char *tableconf, *cgname;
     const char *cfg[4] = {WT_CONFIG_BASE(session, table_meta), config, NULL, NULL};
     const char *tablename;
+    bool import, import_repair;
 
     cgname = NULL;
     table = NULL;
@@ -608,14 +607,39 @@ __create_table(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const
 
     tablename = uri;
     WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
+    import = __wt_config_getones(session, config, "import.enabled", &cval) == 0 && cval.val != 0;
 
     /* Check if the table already exists. */
     if ((ret = __wt_metadata_search(session, uri, &tableconf)) != WT_NOTFOUND) {
-        if (exclusive)
+        /*
+         * Regardless of the 'exclusive' flag, we should raise an error if we try to import an
+         * existing URI rather than just silently returning.
+         */
+        if (exclusive || import)
             WT_TRET(EEXIST);
         goto err;
     }
 
+    if (import) {
+        import_repair =
+          __wt_config_getones(session, config, "import.repair", &cval) == 0 && cval.val != 0;
+        /*
+         * If this is an import but not a repair, check that the exported table metadata is provided
+         * in the config.
+         */
+        if (!import_repair) {
+            __wt_config_init(session, &conf, config);
+            for (nkeys = 0; (ret = __wt_config_next(&conf, &ckey, &cval)) == 0; nkeys++)
+                ;
+            if (nkeys == 1)
+                WT_ERR_MSG(session, EINVAL,
+                  "%s: import requires that the table configuration is specified or the "
+                  "'repair' option is provided",
+                  uri);
+            WT_ERR_NOTFOUND_OK(ret, false);
+        }
+    }
+
     WT_ERR(__wt_config_gets(session, cfg, "colgroups", &cval));
     __wt_config_subinit(session, &conf, &cval);
     for (ncolgroups = 0; (ret = __wt_config_next(&conf, &cgkey, &cgval)) == 0; ncolgroups++)
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index b62cbabff56..6a618ec3149 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -285,8 +285,10 @@ functions:
         set -o errexit
         set -o verbose
 
-        # Dump stderr/stdout contents generated by the C libraries onto console for Python tests
-        find "WT_TEST" -name "std*.txt" ! -empty -printf "\nContents from '%p':\n\n" -exec cat {} \;
+        if [ -d "WT_TEST" ]; then
+          # Dump stderr/stdout contents generated by the C libraries onto console for Python tests
+          find "WT_TEST" -name "std*.txt" ! -empty -exec sh -c "echo 'Contents from {}:'; cat {}" \;
+        fi
 
   "checkpoint test":
     command: shell.exec
diff --git a/src/third_party/wiredtiger/test/suite/test_import01.py b/src/third_party/wiredtiger/test/suite/test_import01.py
index f6c67a82c3b..59eeb06305c 100644
--- a/src/third_party/wiredtiger/test/suite/test_import01.py
+++ b/src/third_party/wiredtiger/test/suite/test_import01.py
@@ -27,34 +27,47 @@
 # OTHER DEALINGS IN THE SOFTWARE.
 #
 # test_import01.py
-# Import a file into a running database.
+# Import a file into a running database for the following scenarios:
+# - The source database and destination database are different.
+# - The source database and destination database are the same.
 
-import os, re, shutil
+import os, random, re, shutil, string
 import wiredtiger, wttest
 
-def timestamp_str(t):
-    return '%x' % t
+# Shared base class used by import tests.
+class test_import_base(wttest.WiredTigerTestCase):
 
-class test_import01(wttest.WiredTigerTestCase):
-    conn_config = 'cache_size=50MB,log=(enabled),statistics=(all)'
-    session_config = 'isolation=snapshot'
-
-    def update(self, uri, key, value, commit_ts):
+    # Insert or update a key/value at the supplied timestamp.
+    def update(self, uri, key, value, ts):
         cursor = self.session.open_cursor(uri)
         self.session.begin_transaction()
-        cursor[key] = value
-        self.session.commit_transaction('commit_timestamp=' + timestamp_str(commit_ts))
+        if type(value) in [list, tuple]:
+            cursor.set_key(key)
+            cursor.set_value(*value)
+            cursor.insert()
+        else:
+            cursor[key] = value
+        self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(ts))
         cursor.close()
 
-    def check(self, uri, key, value, read_ts):
+    # Verify the specified key/value is visible at the supplied timestamp.
+    def check_record(self, uri, key, value, ts):
         cursor = self.session.open_cursor(uri)
-        self.session.begin_transaction('read_timestamp=' + timestamp_str(read_ts))
+        self.session.begin_transaction('read_timestamp=' + self.timestamp_str(ts))
         cursor.set_key(key)
         self.assertEqual(0, cursor.search())
         self.assertEqual(value, cursor.get_value())
         self.session.rollback_transaction()
         cursor.close()
 
+    # Verify a range of records/timestamps.
+    def check(self, uri, keys, values, ts):
+        for i in range(len(keys)):
+            if type(values[i]) in [tuple]:
+                self.check_record(uri, keys[i], list(values[i]), ts[i])
+            else:
+                self.check_record(uri, keys[i], values[i], ts[i])
+
     # We know the ID can be different between configs, so just remove it from comparison.
     # Everything else should be the same.
     def config_compare(self, aconf, bconf):
@@ -66,60 +79,68 @@ class test_import01(wttest.WiredTigerTestCase):
              re.findall('\w+=\(.*?\)+', b))
         self.assertTrue(a.sort() == b.sort())
 
-    # Helper for populating a database to simulate importing files into an existing database.
-    def populate(self):
-        # Create file:test_import01_[1-100].
-        for fileno in range(1, 100):
-            uri = 'file:test_import01_{}'.format(fileno)
+    # Populate a database with N tables, each having M rows.
+    def populate(self, ntables, nrows):
+        for table in range(0, ntables):
+            uri = 'table:test_import_{}'.format(
+                ''.join(random.choice(string.ascii_letters) for i in range(10)))
             self.session.create(uri, 'key_format=i,value_format=S')
             cursor = self.session.open_cursor(uri)
-            # Insert keys [1-100] with value 'foo'.
-            for key in range(1, 100):
-                cursor[key] = 'foo'
+            for key in range(0, nrows):
+                cursor[key] = 'value_{}_{}'.format(table, key)
             cursor.close()
 
-    def copy_file(self, file_name, old_dir, new_dir):
-        old_path = os.path.join(old_dir, file_name)
-        if os.path.isfile(old_path) and "WiredTiger.lock" not in file_name and \
+    # Copy a file from a source directory to a destination directory.
+    def copy_file(self, file_name, src_dir, dest_dir):
+        src_path = os.path.join(src_dir, file_name)
+        if os.path.isfile(src_path) and "WiredTiger.lock" not in file_name and \
             "Tmplog" not in file_name and "Preplog" not in file_name:
-            shutil.copy(old_path, new_dir)
+            shutil.copy(src_path, dest_dir)
 
-    def test_file_import(self):
-        original_db_file = 'original_db_file'
-        uri = 'file:' + original_db_file
+    # Convert a WiredTiger timestamp to a string.
+    def timestamp_str(self, t):
+        return '%x' % t
 
-        create_config = 'allocation_size=512,key_format=u,log=(enabled=true),value_format=u'
-        self.session.create(uri, create_config)
+# test_import01
+class test_import01(test_import_base):
 
-        key1 = b'1'
-        key2 = b'2'
-        key3 = b'3'
-        key4 = b'4'
-        value1 = b'\x01\x02aaa\x03\x04'
-        value2 = b'\x01\x02bbb\x03\x04'
-        value3 = b'\x01\x02ccc\x03\x04'
-        value4 = b'\x01\x02ddd\x03\x04'
+    conn_config = 'cache_size=50MB,log=(enabled),statistics=(all)'
+    session_config = 'isolation=snapshot'
 
-        # Add some data.
-        self.update(uri, key1, value1, 10)
-        self.update(uri, key2, value2, 20)
+    original_db_file = 'original_db_file'
+    uri = 'file:' + original_db_file
 
-        # Perform a checkpoint.
-        self.session.checkpoint()
+    nrows = 100
+    ntables = 10
+    keys = [b'1', b'2', b'3', b'4', b'5', b'6']
+    values = [b'\x01\x02aaa\x03\x04', b'\x01\x02bbb\x03\x04', b'\x01\x02ccc\x03\x04',
+              b'\x01\x02ddd\x03\x04', b'\x01\x02eee\x03\x04', b'\x01\x02fff\x03\x04']
+    ts = [10*k for k in range(1, len(keys)+1)]
+    create_config = 'allocation_size=512,key_format=u,log=(enabled=true),value_format=u'
+
+    def test_file_import(self):
+        self.session.create(self.uri, self.create_config)
 
-        # Add more data.
-        self.update(uri, key3, value3, 30)
-        self.update(uri, key4, value4, 40)
+        # Add data and perform a checkpoint.
+        min_idx = 0
+        max_idx = len(self.keys) // 3
+        for i in range(min_idx, max_idx):
+            self.update(self.uri, self.keys[i], self.values[i], self.ts[i])
+        self.session.checkpoint()
 
-        # Perform a checkpoint.
+        # Add more data and checkpoint again.
+        min_idx = max_idx
+        max_idx = 2*len(self.keys) // 3
+        for i in range(min_idx, max_idx):
+            self.update(self.uri, self.keys[i], self.values[i], self.ts[i])
         self.session.checkpoint()
 
         # Export the metadata for the table.
         c = self.session.open_cursor('metadata:', None, None)
-        original_db_file_config = c[uri]
+        original_db_file_config = c[self.uri]
         c.close()
 
-        self.printVerbose(3, '\nFILE CONFIG\n' + original_db_file_config)
+        self.printVerbose(3, '\nFile configuration:\n' + original_db_file_config)
 
         # Close the connection.
         self.close_conn()
@@ -132,107 +153,88 @@ class test_import01(wttest.WiredTigerTestCase):
         self.session = self.setUpSessionOpen(self.conn)
 
         # Make a bunch of files and fill them with data.
-        self.populate()
+        self.populate(self.ntables, self.nrows)
+        self.session.checkpoint()
 
         # Copy over the datafiles for the object we want to import.
-        self.copy_file(original_db_file, '.', newdir)
+        self.copy_file(self.original_db_file, '.', newdir)
 
         # Contruct the config string.
         import_config = 'import=(enabled,repair=false,file_metadata=(' + \
             original_db_file_config + '))'
 
         # Import the file.
-        self.session.create(uri, import_config)
+        self.session.create(self.uri, import_config)
 
         # Verify object.
-        self.session.verify(uri)
+        self.session.verify(self.uri)
 
         # Check that the previously inserted values survived the import.
-        self.check(uri, key1, value1, 10)
-        self.check(uri, key2, value2, 20)
-        self.check(uri, key3, value3, 30)
-        self.check(uri, key4, value4, 40)
+        self.check(self.uri, self.keys[:max_idx], self.values[:max_idx], self.ts[:max_idx])
 
         # Compare configuration metadata.
         c = self.session.open_cursor('metadata:', None, None)
-        current_db_file_config = c[uri]
+        current_db_file_config = c[self.uri]
         c.close()
         self.config_compare(original_db_file_config, current_db_file_config)
 
-        key5 = b'5'
-        key6 = b'6'
-        value5 = b'\x01\x02eee\x03\x04'
-        value6 = b'\x01\x02fff\x03\x04'
-
-        # Add some data and check that the file operates as usual after importing.
-        self.update(uri, key5, value5, 50)
-        self.update(uri, key6, value6, 60)
-
-        self.check(uri, key5, value5, 50)
-        self.check(uri, key6, value6, 60)
+        # Add some data and check that the table operates as usual after importing.
+        min_idx = max_idx
+        max_idx = len(self.keys)
+        for i in range(min_idx, max_idx):
+            self.update(self.uri, self.keys[i], self.values[i], self.ts[i])
+        self.check(self.uri, self.keys, self.values, self.ts)
 
         # Perform a checkpoint.
         self.session.checkpoint()
 
     def test_file_import_dropped_file(self):
-        original_db_file = 'original_db_file'
-        uri = 'file:' + original_db_file
-
-        create_config = 'allocation_size=512,key_format=u,log=(enabled=true),value_format=u'
-        self.session.create(uri, create_config)
-
-        key1 = b'1'
-        key2 = b'2'
-        value1 = b'\x01\x02aaa\x03\x04'
-        value2 = b'\x01\x02bbb\x03\x04'
+        self.session.create(self.uri, self.create_config)
 
-        # Add some data.
-        self.update(uri, key1, value1, 10)
-        self.update(uri, key2, value2, 20)
-
-        # Perform a checkpoint.
+        # Add data and perform a checkpoint.
+        for i in range(0, len(self.keys)):
+            self.update(self.uri, self.keys[i], self.values[i], self.ts[i])
         self.session.checkpoint()
 
         # Export the metadata for the table.
         c = self.session.open_cursor('metadata:', None, None)
-        original_db_file_config = c[uri]
+        original_db_file_config = c[self.uri]
         c.close()
 
-        self.printVerbose(3, '\nFILE CONFIG\n' + original_db_file_config)
+        self.printVerbose(3, '\nFile configuration:\n' + original_db_file_config)
 
         # Make a bunch of files and fill them with data.
-        self.populate()
+        self.populate(self.ntables, self.nrows)
 
         # Make a copy of the data file that we're about to drop.
         backup_dir = 'BACKUP'
         shutil.rmtree(backup_dir, ignore_errors=True)
         os.mkdir(backup_dir)
-        self.copy_file(original_db_file, '.', backup_dir)
+        self.copy_file(self.original_db_file, '.', backup_dir)
 
         # Drop the table.
         # We'll be importing it back into our database shortly.
-        self.session.drop(uri)
+        self.session.drop(self.uri)
 
         # Now copy it back to our database directory.
-        self.copy_file(original_db_file, backup_dir, '.')
+        self.copy_file(self.original_db_file, backup_dir, '.')
 
         # Contruct the config string.
         import_config = 'import=(enabled,repair=false,file_metadata=(' + \
             original_db_file_config + '))'
 
         # Import the file.
-        self.session.create(uri, import_config)
+        self.session.create(self.uri, import_config)
 
         # Verify object.
-        self.session.verify(uri)
+        self.session.verify(self.uri)
 
         # Check that the previously inserted values survived the import.
-        self.check(uri, key1, value1, 10)
-        self.check(uri, key2, value2, 20)
+        self.check(self.uri, self.keys, self.values, self.ts)
 
         # Compare configuration metadata.
         c = self.session.open_cursor('metadata:', None, None)
-        current_db_file_config = c[uri]
+        current_db_file_config = c[self.uri]
         c.close()
         self.config_compare(original_db_file_config, current_db_file_config)
 
diff --git a/src/third_party/wiredtiger/test/suite/test_import02.py b/src/third_party/wiredtiger/test/suite/test_import02.py
index 6fda30d8899..1c2ea0f6817 100644
--- a/src/third_party/wiredtiger/test/suite/test_import02.py
+++ b/src/third_party/wiredtiger/test/suite/test_import02.py
@@ -31,58 +31,31 @@
 
 import os, shutil
 import wiredtiger, wttest
+from test_import01 import test_import_base
 
-def timestamp_str(t):
-    return '%x' % t
-
-class test_import02(wttest.WiredTigerTestCase):
+class test_import02(test_import_base):
     conn_config = 'cache_size=50MB,log=(enabled),statistics=(all)'
     session_config = 'isolation=snapshot'
 
-    def update(self, uri, key, value, commit_ts):
-        cursor = self.session.open_cursor(uri)
-        self.session.begin_transaction()
-        cursor[key] = value
-        self.session.commit_transaction('commit_timestamp=' + timestamp_str(commit_ts))
-        cursor.close()
+    original_db_file = 'original_db_file'
+    uri = 'file:' + original_db_file
 
-    # Helper for populating a database to simulate importing files into an existing database.
-    def populate(self):
-        # Create file:test_import02_[1-100].
-        for fileno in range(1, 100):
-            uri = 'file:test_import02_{}'.format(fileno)
-            self.session.create(uri, 'key_format=i,value_format=S')
-            cursor = self.session.open_cursor(uri)
-            # Insert keys [1-100] with value 'foo'.
-            for key in range(1, 100):
-                cursor[key] = 'foo'
-            cursor.close()
-
-    def copy_file(self, file_name, old_dir, new_dir):
-        old_path = os.path.join(old_dir, file_name)
-        if os.path.isfile(old_path) and "WiredTiger.lock" not in file_name and \
-            "Tmplog" not in file_name and "Preplog" not in file_name:
-            shutil.copy(old_path, new_dir)
+    nrows = 100
+    ntables = 10
+    keys = [b'1', b'2', b'3', b'4', b'5', b'6']
+    values = [b'\x01\x02aaa\x03\x04', b'\x01\x02bbb\x03\x04', b'\x01\x02ccc\x03\x04',
+              b'\x01\x02ddd\x03\x04', b'\x01\x02eee\x03\x04', b'\x01\x02fff\x03\x04']
+    ts = [10*k for k in range(1, len(keys)+1)]
+    create_config = 'allocation_size=512,key_format=u,log=(enabled=true),value_format=u'
 
     # The cases where 'file_metadata' is empty or the config option itself is missing entirely are
     # almost identical. Let's capture this in a helper and call them from each test.
     def no_metadata_helper(self, import_config):
-        original_db_file = 'original_db_file'
-        uri = 'file:' + original_db_file
-
-        create_config = 'allocation_size=512,key_format=u,log=(enabled=true),value_format=u'
-        self.session.create(uri, create_config)
-
-        key1 = b'1'
-        key2 = b'2'
-        value1 = b'\x01\x02aaa\x03\x04'
-        value2 = b'\x01\x02bbb\x03\x04'
+        self.session.create(self.uri, self.create_config)
 
-        # Add some data.
-        self.update(uri, key1, value1, 10)
-        self.update(uri, key2, value2, 20)
-
-        # Perform a checkpoint.
+        # Add data and perform a checkpoint.
+        for i in range(0, len(self.keys)):
+            self.update(self.uri, self.keys[i], self.values[i], self.ts[i])
         self.session.checkpoint()
 
         # Close the connection.
@@ -96,14 +69,14 @@ class test_import02(wttest.WiredTigerTestCase):
         self.session = self.setUpSessionOpen(self.conn)
 
         # Copy over the datafiles for the object we want to import.
-        self.copy_file(original_db_file, '.', newdir)
+        self.copy_file(self.original_db_file, '.', newdir)
 
         # Import the file.
         # Since we need "file_metadata" without the "repair" option, we should expect an error here.
         with self.expectedStderrPattern(
             'file:original_db_file: import requires that \'file_metadata\' is specified'):
             self.assertRaisesException(wiredtiger.WiredTigerError,
-                lambda: self.session.create(uri, import_config))
+                lambda: self.session.create(self.uri, import_config))
 
     def test_file_import_empty_metadata(self):
         self.no_metadata_helper('import=(enabled,repair=false,file_metadata="")')
@@ -112,34 +85,22 @@ class test_import02(wttest.WiredTigerTestCase):
         self.no_metadata_helper('import=(enabled,repair=false)')
 
     def test_file_import_existing_uri(self):
-        original_db_file = 'original_db_file'
-        uri = 'file:' + original_db_file
-
-        create_config = 'allocation_size=512,key_format=u,log=(enabled=true),value_format=u'
-        self.session.create(uri, create_config)
-
-        key1 = b'1'
-        key2 = b'2'
-
-        value1 = b'\x01\x02aaa\x03\x04'
-        value2 = b'\x01\x02bbb\x03\x04'
+        self.session.create(self.uri, self.create_config)
 
-        # Add some data.
-        self.update(uri, key1, value1, 10)
-        self.update(uri, key2, value2, 20)
-
-        # Perform a checkpoint.
+        # Add data and perform a checkpoint.
+        for i in range(0, len(self.keys)):
+            self.update(self.uri, self.keys[i], self.values[i], self.ts[i])
         self.session.checkpoint()
 
         # Export the metadata for the table.
         c = self.session.open_cursor('metadata:', None, None)
-        original_db_file_config = c[uri]
+        original_db_file_config = c[self.uri]
         c.close()
 
-        self.printVerbose(3, '\nFILE CONFIG\n' + original_db_file_config)
+        self.printVerbose(3, '\nFile configuration:\n' + original_db_file_config)
 
         # Make a bunch of files and fill them with data.
-        self.populate()
+        self.populate(self.ntables, self.nrows)
 
         # Contruct the config string.
         import_config = 'import=(enabled,repair=false,file_metadata=(' + \
@@ -148,24 +109,23 @@ class test_import02(wttest.WiredTigerTestCase):
         # Try to import the file even though it already exists in our database.
         # We should get an error back.
         self.assertRaisesException(wiredtiger.WiredTigerError,
-            lambda: self.session.create(uri, import_config))
+            lambda: self.session.create(self.uri, import_config))
 
     def test_import_file_missing_file(self):
-        original_db_file = 'original_db_file'
-        uri = 'file:' + original_db_file
-
         # Make a bunch of files and fill them with data.
-        self.populate()
-
+        self.populate(self.ntables, self.nrows)
         self.session.checkpoint()
 
         # Export the metadata for one of the files we made.
         # We just need an example of what a file configuration would typically look like.
-        c = self.session.open_cursor('metadata:', None, None)
-        example_db_file_config = c['file:test_import02_1']
-        c.close()
+        cursor = self.session.open_cursor('metadata:', None, None)
+        for k, v in cursor:
+            if k.startswith('table:'):
+                example_db_file_config = cursor[k]
+                break
+        cursor.close()
 
-        self.printVerbose(3, '\nFILE CONFIG\n' + example_db_file_config)
+        self.printVerbose(3, '\nFile configuration:\n' + example_db_file_config)
 
         # Contruct the config string.
         import_config = 'import=(enabled,repair=false,file_metadata=(' + \
@@ -173,7 +133,5 @@ class test_import02(wttest.WiredTigerTestCase):
 
         # Try to import a file that doesn't exist on disk.
         # We should get an error back.
-        with self.expectedStderrPattern(
-            'file:original_db_file: attempted to import file that does not exist'):
-            self.assertRaisesException(wiredtiger.WiredTigerError,
-                lambda: self.session.create(uri, import_config))
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.create(self.uri, import_config), '/No such file or directory/')
diff --git a/src/third_party/wiredtiger/test/suite/test_import03.py b/src/third_party/wiredtiger/test/suite/test_import03.py
new file mode 100644
index 00000000000..050e9008e57
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_import03.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_import03.py
+# Import a table into a running database.
+
+import os, random, shutil
+from wtscenario import make_scenarios
+from test_import01 import test_import_base
+
+class test_import03(test_import_base):
+    conn_config = 'cache_size=50MB,log=(enabled),statistics=(all)'
+    session_config = 'isolation=snapshot'
+
+    ntables = 10
+    nrows = 100
+    scenarios = make_scenarios([
+        ('simple_table', dict(
+            is_simple = True,
+            keys = [k for k in range(1, nrows+1)],
+            values = random.sample(range(1000000), k=nrows),
+            config = 'key_format=r,value_format=i')),
+        ('table_with_named_columns', dict(
+            is_simple = False,
+            keys = [k for k in range(1, 7)],
+            values = [('Australia', 'Canberra', 1),('Japan', 'Tokyo', 2),('Italy', 'Rome', 3),
+              ('China', 'Beijing', 4),('Germany', 'Berlin', 5),('South Korea', 'Seoul', 6)],
+            config = 'columns=(id,country,capital,population),key_format=r,value_format=SSi')),
+    ])
+
+    # Test something table specific like a projection.
+    def check_projections(self, uri, keys, values, ts):
+        for i in range(0, len(keys)):
+            self.check_record(uri + '(country,capital)',
+                              keys[i], [values[i][0], values[i][1]], ts[i])
+            self.check_record(uri + '(country,population)',
+                              keys[i], [values[i][0], values[i][2]], ts[i])
+            self.check_record(uri + '(capital,population)',
+                              keys[i], [values[i][1], values[i][2]], ts[i])
+
+    def test_table_import(self):
+        # Add some data and checkpoint.
+        self.populate(self.ntables, self.nrows)
+        self.session.checkpoint()
+
+        original_db_table = 'original_db_table'
+        uri = 'table:' + original_db_table
+        create_config = 'allocation_size=512,log=(enabled=true),' + self.config
+        self.session.create(uri, create_config)
+
+        keys = self.keys
+        values = self.values
+        ts = [10*k for k in range(1, len(keys)+1)]
+
+        # Add data and perform a checkpoint.
+        min_idx = 0
+        max_idx = len(keys) // 3
+        for i in range(min_idx, max_idx):
+            self.update(uri, keys[i], values[i], ts[i])
+        self.session.checkpoint()
+
+        # Add more data and checkpoint again.
+        min_idx = max_idx
+        max_idx = 2*len(keys) // 3
+        for i in range(min_idx, max_idx):
+            self.update(uri, keys[i], values[i], ts[i])
+        self.session.checkpoint()
+
+        # Export the metadata for the table.
+        original_db_file_uri = 'file:' + original_db_table + '.wt'
+        c = self.session.open_cursor('metadata:', None, None)
+        original_db_table_config = c[uri]
+        original_db_file_config = c[original_db_file_uri]
+        c.close()
+
+        self.printVerbose(3, '\nFile configuration:\n' + original_db_file_config)
+        self.printVerbose(3, '\nTable configuration:\n' + original_db_table_config)
+
+        # Contruct the config string.
+        import_config = '{},import=(enabled,repair=false,file_metadata=({}))'.format(
+            original_db_table_config, original_db_file_config)
+
+        # Close the connection.
+        self.close_conn()
+
+        # Create a new database and connect to it.
+        newdir = 'IMPORT_DB'
+        shutil.rmtree(newdir, ignore_errors=True)
+        os.mkdir(newdir)
+        self.conn = self.setUpConnectionOpen(newdir)
+        self.session = self.setUpSessionOpen(self.conn)
+
+        # Make a bunch of files and fill them with data.
+        self.populate(self.ntables, self.nrows)
+        self.session.checkpoint()
+
+        # Copy over the datafiles for the object we want to import.
+        self.copy_file(original_db_table + '.wt', '.', newdir)
+
+        # Import the table.
+        self.session.create(uri, import_config)
+
+        # Verify object.
+        self.session.verify(uri)
+
+        # Check that the previously inserted values survived the import.
+        self.check(uri, keys[:max_idx], values[:max_idx], ts[:max_idx])
+
+        # Check against projections when the table is not simple.
+        if not self.is_simple:
+            self.check_projections(uri, keys[:max_idx], values[:max_idx], ts[:max_idx])
+
+        # Compare configuration metadata.
+        c = self.session.open_cursor('metadata:', None, None)
+        current_db_table_config = c[uri]
+        c.close()
+        self.config_compare(original_db_table_config, current_db_table_config)
+
+        # Add some data and check that the table operates as usual after importing.
+        min_idx = max_idx
+        max_idx = len(keys)
+        for i in range(min_idx, max_idx):
+            self.update(uri, keys[i], values[i], ts[i])
+        self.check(uri, keys, values, ts)
+        if not self.is_simple:
+            self.check_projections(uri, keys, values, ts)
+
+        # Perform a checkpoint.
+        self.session.checkpoint()
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_import04.py b/src/third_party/wiredtiger/test/suite/test_import04.py
new file mode 100644
index 00000000000..eb79e97ad5c
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_import04.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_import04.py
+# Test success and failure scenarios for importing a table into a running database.
+# 1. Attempt to import a table into a destination database where a table object of
+#    that name already exists.
+#    Expected outcome: FAILURE
+# 2. Drop a table from a database without removing the data files, then attempt to
+#    import that table into the same database.
+#    Expected outcome: SUCCESS
+# 3. Attempt to import a table into a destination database where the required data
+#    files do not exist in the destination database directory.
+#    Expected outcome: FAILURE
+# 4. Attempt to import a table into a destination database without specifying the
+#    exported table configuration.
+#    Expected outcome: FAILURE
+# 5. Attempt to import a table into a destination database without specifying the
+#    exported file configuration.
+#    Expected outcome: FAILURE
+# 6. Attempt to import a table into a destination database with the exported
+#    configuration strings supplied, the required data files are present and the
+#    table object does not already exist in the destination database.
+#    Expected outcome: SUCCESS
+
+import os, random, shutil
+import wiredtiger, wttest
+from wtscenario import make_scenarios
+from test_import01 import test_import_base
+
+class test_import04(test_import_base):
+    conn_config = 'cache_size=50MB,log=(enabled),statistics=(all)'
+    session_config = 'isolation=snapshot'
+
+    ntables = 10
+    nrows = 100
+    scenarios = make_scenarios([
+        ('simple_table', dict(
+            is_simple = True,
+            keys=[k for k in range(1, nrows+1)],
+            values=random.sample(range(1000000), k=nrows),
+            config='key_format=r,value_format=i')),
+        ('table_with_named_columns', dict(
+            is_simple = False,
+            keys=[k for k in range(1, 7)],
+            values=[('Australia', 'Canberra', 1),('Japan', 'Tokyo', 2),('Italy', 'Rome', 3),
+              ('China', 'Beijing', 4),('Germany', 'Berlin', 5),('South Korea', 'Seoul', 6)],
+            config='columns=(id,country,capital,population),key_format=r,value_format=SSi')),
+    ])
+
+    # Test table projections.
+    def check_projections(self, uri, keys, values, ts):
+        for i in range(0, len(keys)):
+            self.check_record(uri + '(country,capital)',
+                              keys[i], [values[i][0], values[i][1]], ts[i])
+            self.check_record(uri + '(country,population)',
+                              keys[i], [values[i][0], values[i][2]], ts[i])
+            self.check_record(uri + '(capital,population)',
+                              keys[i], [values[i][1], values[i][2]], ts[i])
+
+    def test_table_import(self):
+        # Add some data and checkpoint.
+        self.populate(self.ntables, self.nrows)
+        self.session.checkpoint()
+
+        # Create the target table for import tests.
+        original_db_table = 'original_db_table'
+        uri = 'table:' + original_db_table
+        create_config = 'allocation_size=512,log=(enabled=true),' + self.config
+        self.session.create(uri, create_config)
+
+        keys = self.keys
+        values = self.values
+        ts = [10*k for k in range(1, len(keys)+1)]
+
+        # Add data and perform a checkpoint.
+        min_idx = 0
+        max_idx = len(keys) // 3
+        for i in range(min_idx, max_idx):
+            self.update(uri, keys[i], values[i], ts[i])
+        self.session.checkpoint()
+
+        # Add more data and checkpoint again.
+        min_idx = max_idx
+        max_idx = 2*len(keys) // 3
+        for i in range(min_idx, max_idx):
+            self.update(uri, keys[i], values[i], ts[i])
+        self.session.checkpoint()
+
+        # Check the inserted values are in the table.
+        self.check(uri, keys[:max_idx], values[:max_idx], ts[:max_idx])
+
+        # Check against projections when the table is not simple.
+        if not self.is_simple:
+            self.check_projections(uri, keys[:max_idx], values[:max_idx], ts[:max_idx])
+
+        # Export the metadata for the table.
+        original_db_file_uri = 'file:' + original_db_table + '.wt'
+        c = self.session.open_cursor('metadata:', None, None)
+        original_db_table_config = c[uri]
+        original_db_file_config = c[original_db_file_uri]
+        c.close()
+
+        # Close the connection.
+        self.close_conn()
+
+        # Construct the config string from the exported metadata.
+        import_config = '{},import=(enabled,file_metadata=({}))'.format(
+            original_db_table_config, original_db_file_config)
+
+        # Reopen the connection, add some data and attempt to import the table. We expect
+        # this to fail.
+        self.conn = self.setUpConnectionOpen('.')
+        self.session = self.setUpSessionOpen(self.conn)
+        self.populate(self.ntables, self.nrows)
+        self.session.checkpoint()
+        self.assertRaisesException(wiredtiger.WiredTigerError,
+            lambda: self.session.create(uri, import_config))
+
+        # Drop the table without removing the data files then attempt to import. We expect
+        # this operation to succeed.
+        self.session.drop(uri, 'remove_files=false')
+        # Verify the table is dropped.
+        self.assertRaisesException(wiredtiger.WiredTigerError,
+            lambda: self.session.open_cursor(uri, None, None))
+        self.session.create(uri, import_config)
+
+        self.close_conn()
+
+        # Create a new database, connect and populate.
+        newdir = 'IMPORT_DB'
+        shutil.rmtree(newdir, ignore_errors=True)
+        os.mkdir(newdir)
+        self.conn = self.setUpConnectionOpen(newdir)
+        self.session = self.setUpSessionOpen(self.conn)
+        self.populate(self.ntables, self.nrows)
+        self.session.checkpoint()
+
+        # Attempt to import the table before copying the file. We expect this to fail.
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.create(uri, import_config), '/No such file or directory/')
+
+        # Copy over the datafiles for the object we want to import.
+        self.copy_file(original_db_table + '.wt', '.', newdir)
+
+        # Construct the config string incorrectly by omitting the table config.
+        no_table_config = 'import=(enabled,file_metadata=({}))'.format(original_db_file_config)
+
+        # Attempt to import the table. We expect this to fail.
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.create(uri, no_table_config), '/Invalid argument/')
+
+        # Construct the config string incorrectly by omitting the file_metadata and attempt to
+        # import the table. We expect this to fail.
+        no_file_config = '{},import=(enabled)'.format(original_db_table_config)
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.create(uri, no_file_config), '/Invalid argument/')
+
+        # Import the table.
+        self.session.create(uri, import_config)
+
+        # Verify object.
+        self.session.verify(uri)
+
+        # Check that the previously inserted values survived the import.
+        self.check(uri, keys[:max_idx], values[:max_idx], ts[:max_idx])
+        if not self.is_simple:
+            self.check_projections(uri, keys[:max_idx], values[:max_idx], ts[:max_idx])
+
+        # Compare configuration metadata.
+        c = self.session.open_cursor('metadata:', None, None)
+        current_db_table_config = c[uri]
+        c.close()
+        self.config_compare(original_db_table_config, current_db_table_config)
+
+        # Add some data and check that the table operates as usual after importing.
+        min_idx = max_idx
+        max_idx = len(keys)
+        for i in range(min_idx, max_idx):
+            self.update(uri, keys[i], values[i], ts[i])
+        self.check(uri, keys, values, ts)
+        if not self.is_simple:
+            self.check_projections(uri, keys, values, ts)
+
+        # Perform a checkpoint.
+        self.session.checkpoint()
+
+if __name__ == '__main__':
+    wttest.run()
author	Luke Chen <luke.chen@mongodb.com>	2020-09-29 18:09:52 +1000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2020-09-29 08:27:34 +0000
commit	6260558d1f930a4bd81c02f53aae0a54e77177c4 (patch)
tree	cf20c677b6d540a56de1e15c4573abc4a34aabc6
parent	a8b665d074ca7b85d25e19a44c01cc5b86b3e40b (diff)
download	mongo-6260558d1f930a4bd81c02f53aae0a54e77177c4.tar.gz