Import wiredtiger: bb2bb7d8b680fe51351d7bce8ccc3f6d66eef770 from branch mongodb-master

ref: 3d715a5b58..bb2bb7d8b6 for: 6.2.0-rc0 WT-9821 Add option to verify to report all data corruption in a file (#8312)
author: Chenhao Qu <chenhao.qu@mongodb.com> 2022-10-19 13:59:27 +1100
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-10-19 03:39:50 +0000
commit: ee099cd81fb2b3c5176f4cbeff0ab7fdc15276a7 (patch)
tree: 37740b58dd9103f62c7d8e9362cade4907afdcdc /src/third_party
parent: a2c1142b5a7e17e7e6aae1d97dd42b46383b4da1 (diff)
download: mongo-ee099cd81fb2b3c5176f4cbeff0ab7fdc15276a7.tar.gz
7 files changed, 153 insertions, 24 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 448506a4473..8314dfe6f4d 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -1589,6 +1589,10 @@ methods = {
         Display the contents of in-memory pages as they are verified, using the application's
         message handler, intended for debugging''',
         type='boolean'),
+    Config('read_corrupt', 'false', r'''
+        A mode that allows verify to continue reading after encountering a checksum error. It
+        will skip past the corrupt block and continue with the verification process''',
+        type='boolean'),
     Config('stable_timestamp', 'false', r'''
         Ensure that no data has a start timestamp after the stable timestamp, to be run after
         rollback_to_stable.''',
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index d8f144b16f2..1c1f7d4f5c6 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
     "vendor": "wiredtiger",
     "github": "wiredtiger/wiredtiger.git",
     "branch": "mongodb-master",
-    "commit": "3d715a5b58a8c0c4acf67a9aaed0de0be0cf6244"
+    "commit": "bb2bb7d8b680fe51351d7bce8ccc3f6d66eef770"
 }
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index 97b1be8bffe..d5e3438e413 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -29,11 +29,14 @@ typedef struct {
     bool dump_blocks;
     bool dump_layout;
     bool dump_pages;
+    bool read_corrupt;
 
     /* Page layout information. */
     uint64_t depth, depth_internal[100], depth_leaf[100];
 
     WT_ITEM *tmp1, *tmp2, *tmp3, *tmp4; /* Temporary buffers */
+
+    int verify_err;
 } WT_VSTUFF;
 
 static void __verify_checkpoint_reset(WT_VSTUFF *);
@@ -78,6 +81,10 @@ __verify_config(WT_SESSION_IMPL *session, const char *cfg[], WT_VSTUFF *vs)
     WT_RET(__wt_config_gets(session, cfg, "dump_pages", &cval));
     vs->dump_pages = cval.val != 0;
 
+    WT_RET(__wt_config_gets(session, cfg, "read_corrupt", &cval));
+    vs->read_corrupt = cval.val != 0;
+    vs->verify_err = 0;
+
     WT_RET(__wt_config_gets(session, cfg, "stable_timestamp", &cval));
     vs->stable_timestamp = WT_TS_NONE; /* Ignored unless a value has been set */
     if (cval.val != 0) {
@@ -260,6 +267,13 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
               session, ret = __verify_tree(session, &btree->root, &addr_unpack, vs));
 
             /*
+             * If the read_corrupt mode was turned on, we may have continued traversing and
+             * verifying the pages of the tree despite encountering an error. Set the error.
+             */
+            if (vs->verify_err != 0)
+                ret = vs->verify_err;
+
+            /*
              * We have an exclusive lock on the handle, but we're swapping root pages in-and-out of
              * that handle, and there's a race with eviction entering the tree and seeing an invalid
              * root page. Eviction must work on trees being verified (else we'd have to do our own
@@ -585,7 +599,18 @@ celltype_err:
 
             /* Verify the subtree. */
             ++vs->depth;
-            WT_RET(__wt_page_in(session, child_ref, 0));
+            ret = __wt_page_in(session, child_ref, 0);
+
+            /*
+             * If configured, continue traversing through the pages of the tree even after
+             * encountering errors reading in the page.
+             */
+            if (vs->read_corrupt && ret != 0) {
+                if (vs->verify_err == 0)
+                    vs->verify_err = ret;
+                continue;
+            } else
+                WT_RET(ret);
             ret = __verify_tree(session, child_ref, unpack, vs);
             WT_TRET(__wt_page_release(session, child_ref, 0));
             --vs->depth;
@@ -615,7 +640,18 @@ celltype_err:
 
             /* Verify the subtree. */
             ++vs->depth;
-            WT_RET(__wt_page_in(session, child_ref, 0));
+            ret = __wt_page_in(session, child_ref, 0);
+
+            /*
+             * If configured, continue traversing through the pages of the tree even after
+             * encountering errors reading in the page.
+             */
+            if (vs->read_corrupt && ret != 0) {
+                if (vs->verify_err == 0)
+                    vs->verify_err = ret;
+                continue;
+            } else
+                WT_RET(ret);
             ret = __verify_tree(session, child_ref, unpack, vs);
             WT_TRET(__wt_page_release(session, child_ref, 0));
             --vs->depth;
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index bb81254db81..24bda8a8942 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -448,7 +448,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_verify[] = {
   {"do_not_clear_txn_id", "boolean", NULL, NULL, NULL, 0},
   {"dump_address", "boolean", NULL, NULL, NULL, 0}, {"dump_blocks", "boolean", NULL, NULL, NULL, 0},
   {"dump_layout", "boolean", NULL, NULL, NULL, 0}, {"dump_offsets", "list", NULL, NULL, NULL, 0},
-  {"dump_pages", "boolean", NULL, NULL, NULL, 0},
+  {"dump_pages", "boolean", NULL, NULL, NULL, 0}, {"read_corrupt", "boolean", NULL, NULL, NULL, 0},
   {"stable_timestamp", "boolean", NULL, NULL, NULL, 0}, {"strict", "boolean", NULL, NULL, NULL, 0},
   {NULL, NULL, NULL, NULL, NULL, 0}};
 
@@ -1328,8 +1328,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
   {"WT_SESSION.verify",
     "do_not_clear_txn_id=false,dump_address=false,dump_blocks=false,"
     "dump_layout=false,dump_offsets=,dump_pages=false,"
-    "stable_timestamp=false,strict=false",
-    confchk_WT_SESSION_verify, 8},
+    "read_corrupt=false,stable_timestamp=false,strict=false",
+    confchk_WT_SESSION_verify, 9},
   {"colgroup.meta",
     "app_metadata=,assert=(commit_timestamp=none,"
     "durable_timestamp=none,read_timestamp=none,write_timestamp=off),"
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index f1e66e10ad7..372fd3fd629 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -1687,6 +1687,9 @@ struct __wt_session {
 	 * @config{dump_pages, Display the contents of in-memory pages as they are verified\, using
 	 * the application's message handler\, intended for debugging., a boolean flag; default \c
 	 * false.}
+	 * @config{read_corrupt, A mode that allows verify to continue reading after encountering a
+	 * checksum error.  It will skip past the corrupt block and continue with the verification
+	 * process., a boolean flag; default \c false.}
 	 * @config{stable_timestamp, Ensure that no data has a start timestamp after the stable
 	 * timestamp\, to be run after rollback_to_stable., a boolean flag; default \c false.}
 	 * @config{strict, Treat any verification problem as an error; by default\, verify will
diff --git a/src/third_party/wiredtiger/src/utilities/util_verify.c b/src/third_party/wiredtiger/src/utilities/util_verify.c
index 4e0a0675852..1f619e4f94f 100644
--- a/src/third_party/wiredtiger/src/utilities/util_verify.c
+++ b/src/third_party/wiredtiger/src/utilities/util_verify.c
@@ -16,12 +16,13 @@ static int
 usage(void)
 {
     static const char *options[] = {"-d config",
-      "display underlying information during verification", "-s",
+      "display underlying information during verification", "-c",
+      "continue to the next page after encountering error during verification", "-s",
       "verify against the specified timestamp", "-t", "do not clear txn ids during verification",
       NULL, NULL};
 
     util_usage(
-      "verify [-s] [-t] [-d dump_address | dump_blocks | dump_layout | dump_offsets=#,# | "
+      "verify [-s] [-t] [-c] [-d dump_address | dump_blocks | dump_layout | dump_offsets=#,# | "
       "dump_pages] "
       "[uri]",
       "options:", options);
@@ -40,13 +41,17 @@ util_verify(WT_SESSION *session, int argc, char *argv[])
     size_t size;
     int ch;
     char *config, *dump_offsets, *uri;
-    bool do_not_clear_txn_id, dump_address, dump_blocks, dump_layout, dump_pages, stable_timestamp;
+    bool do_not_clear_txn_id, dump_address, dump_blocks, dump_layout, dump_pages, read_corrupt,
+      stable_timestamp;
 
-    do_not_clear_txn_id = dump_address = dump_blocks = dump_layout = dump_pages = stable_timestamp =
-      false;
+    do_not_clear_txn_id = dump_address = dump_blocks = dump_layout = dump_pages = read_corrupt =
+      stable_timestamp = false;
     config = dump_offsets = uri = NULL;
-    while ((ch = __wt_getopt(progname, argc, argv, "d:st")) != EOF)
+    while ((ch = __wt_getopt(progname, argc, argv, "cd:st")) != EOF)
         switch (ch) {
+        case 'c':
+            read_corrupt = true;
+            break;
         case 'd':
             if (strcmp(__wt_optarg, "dump_address") == 0)
                 dump_address = true;
@@ -88,20 +93,21 @@ util_verify(WT_SESSION *session, int argc, char *argv[])
     if ((uri = util_uri(session, *argv, "table")) == NULL)
         return (1);
 
-    if (do_not_clear_txn_id || dump_address || dump_blocks || dump_layout || dump_offsets != NULL ||
-      dump_pages || stable_timestamp) {
-        size = strlen("do_not_clear_txn_id,") + strlen("dump_address,") + strlen("dump_blocks,") +
-          strlen("dump_layout,") + strlen("dump_pages,") + strlen("dump_offsets[],") +
-          (dump_offsets == NULL ? 0 : strlen(dump_offsets)) + strlen("history_store") +
-          strlen("stable_timestamp,") + 20;
+    if (do_not_clear_txn_id || read_corrupt || dump_address || dump_blocks || dump_layout ||
+      dump_offsets != NULL || dump_pages || stable_timestamp) {
+        size = strlen("do_not_clear_txn_id,") + strlen("read_corrupt,") + strlen("dump_address,") +
+          strlen("dump_blocks,") + strlen("dump_layout,") + strlen("dump_pages,") +
+          strlen("dump_offsets[],") + (dump_offsets == NULL ? 0 : strlen(dump_offsets)) +
+          strlen("history_store") + strlen("stable_timestamp,") + 20;
         if ((config = malloc(size)) == NULL) {
             ret = util_err(session, errno, NULL);
             goto err;
         }
-        if ((ret = __wt_snprintf(config, size, "%s%s%s%s%s%s%s%s%s",
+        if ((ret = __wt_snprintf(config, size, "%s%s%s%s%s%s%s%s%s%s",
                do_not_clear_txn_id ? "do_not_clear_txn_id," : "",
-               dump_address ? "dump_address," : "", dump_blocks ? "dump_blocks," : "",
-               dump_layout ? "dump_layout," : "", dump_offsets != NULL ? "dump_offsets=[" : "",
+               read_corrupt ? "read_corrupt," : "", dump_address ? "dump_address," : "",
+               dump_blocks ? "dump_blocks," : "", dump_layout ? "dump_layout," : "",
+               dump_offsets != NULL ? "dump_offsets=[" : "",
                dump_offsets != NULL ? dump_offsets : "", dump_offsets != NULL ? "]," : "",
                dump_pages ? "dump_pages," : "", stable_timestamp ? "stable_timestamp," : "")) !=
           0) {
diff --git a/src/third_party/wiredtiger/test/suite/test_verify.py b/src/third_party/wiredtiger/test/suite/test_verify.py
index f48afab1854..a252cc7724f 100755
--- a/src/third_party/wiredtiger/test/suite/test_verify.py
+++ b/src/third_party/wiredtiger/test/suite/test_verify.py
@@ -70,6 +70,15 @@ class test_verify(wttest.WiredTigerTestCase, suite_subprocess):
         self.assertEqual(i, self.nentries)
         cursor.close()
 
+    def count_file_contains(self, filename, content):
+        count = 0
+        with open(filename) as f:
+            for line in f:
+                if content in line:
+                    count += 1
+            f.close()
+        return count
+
     def open_and_position(self, tablename, pct):
         """
         Open the file for the table, position it at a 4K page
@@ -144,9 +153,45 @@ class test_verify(wttest.WiredTigerTestCase, suite_subprocess):
         self.conn = self.setUpConnectionOpen(".")
         self.session = self.setUpSessionOpen(self.conn)
         self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
-            lambda: self.session.verify('table:' + self.tablename, None),
+            lambda: self.session.verify('table:' + self.tablename, "read_corrupt"),
+            "/WT_SESSION.verify/")
+        self.assertEqual(self.count_file_contains("stderr.txt",
+            "calculated block checksum doesn't match expected checksum"), 1)
+
+    def test_verify_api_read_corrupt_pages(self):
+        """
+        Test verify via API, on a table that is purposely corrupted in
+        multiple places. A verify operation with read_corrupt on should
+        result in multiple checksum errors being logged.
+        """
+        params = 'key_format=S,value_format=S'
+        self.session.create('table:' + self.tablename, params)
+        self.populate(self.tablename)
+        with self.open_and_position(self.tablename, 25) as f:
+            for i in range(0, 100):
+                f.write(b'\x01\xff\x80')
+        with self.open_and_position(self.tablename, 50) as f:
+            for i in range(0, 100):
+                f.write(b'\x01\xff\x80')
+        with self.open_and_position(self.tablename, 75) as f:
+            for i in range(0, 100):
+                f.write(b'\x01\xff\x80')
+
+        # open_and_position closed the session/connection, reopen them now.
+        self.conn = self.setUpConnectionOpen(".")
+        self.session = self.setUpSessionOpen(self.conn)
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.verify('table:' + self.tablename, "read_corrupt"),
             "/WT_SESSION.verify/")
 
+        # It is expected that more than one checksum error is logged given
+        # that we have corrupted the table in multiple locations, but we may
+        # not necessarily detect all three corruptions - e.g. we won't detect
+        # a corruption if we overwrite free space or overwrite a page that is
+        # a child of another page that we overwrite.
+        self.assertGreater(self.count_file_contains("stderr.txt",
+            "calculated block checksum doesn't match expected checksum"), 1)
+
     def test_verify_process_75pct_null(self):
         """
         Test verify in a 'wt' process on a table that is purposely damaged,
@@ -158,9 +203,11 @@ class test_verify(wttest.WiredTigerTestCase, suite_subprocess):
         with self.open_and_position(self.tablename, 75) as f:
             for i in range(0, 4096):
                 f.write(struct.pack('B', 0))
-        self.runWt(["verify", "table:" + self.tablename],
+        self.runWt(["verify", "-c", "table:" + self.tablename],
             errfilename="verifyerr.out", failure=True)
         self.check_non_empty_file("verifyerr.out")
+        self.assertEqual(self.count_file_contains("verifyerr.out",
+            "calculated block checksum doesn't match expected checksum"), 1)
 
     def test_verify_process_25pct_junk(self):
         """
@@ -173,9 +220,42 @@ class test_verify(wttest.WiredTigerTestCase, suite_subprocess):
         with self.open_and_position(self.tablename, 25) as f:
             for i in range(0, 100):
                 f.write(b'\x01\xff\x80')
-        self.runWt(["verify", "table:" + self.tablename],
+        self.runWt(["verify", "-c", "table:" + self.tablename],
             errfilename="verifyerr.out", failure=True)
         self.check_non_empty_file("verifyerr.out")
+        self.assertEqual(self.count_file_contains("verifyerr.out",
+            "calculated block checksum doesn't match expected checksum"), 1)
+
+    def test_verify_process_read_corrupt_pages(self):
+        """
+        Test verify in a 'wt' process on a table that is purposely corrupted
+        in multiple places. A verify operation with read_corrupt on should
+        result in multiple checksum errors being logged.
+        """
+        params = 'key_format=S,value_format=S'
+        self.session.create('table:' + self.tablename, params)
+        self.populate(self.tablename)
+        with self.open_and_position(self.tablename, 25) as f:
+            for i in range(0, 100):
+                f.write(b'\x01\xff\x80')
+        with self.open_and_position(self.tablename, 75) as f:
+            for i in range(0, 100):
+                f.write(b'\x01\xff\x80')
+        with self.open_and_position(self.tablename, 80) as f:
+            for i in range(0, 100):
+                f.write(b'\x01\xff\x80')
+        self.runWt(["verify", "-c", "table:" + self.tablename],
+            errfilename="verifyerr.out", failure=True)
+
+        self.check_non_empty_file("verifyerr.out")
+
+        # It is expected that more than one checksum error is logged given
+        # that we have corrupted the table in multiple locations, but we may
+        # not necessarily detect all three corruptions - e.g. we won't detect
+        # a corruption if we overwrite free space or overwrite a page that is
+        # a child of another page that we overwrite.
+        self.assertGreater(self.count_file_contains("verifyerr.out",
+            "calculated block checksum doesn't match expected checksum"), 1)
 
     def test_verify_process_truncated(self):
         """
author	Chenhao Qu <chenhao.qu@mongodb.com>	2022-10-19 13:59:27 +1100
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-10-19 03:39:50 +0000
commit	ee099cd81fb2b3c5176f4cbeff0ab7fdc15276a7 (patch)
tree	37740b58dd9103f62c7d8e9362cade4907afdcdc /src/third_party
parent	a2c1142b5a7e17e7e6aae1d97dd42b46383b4da1 (diff)
download	mongo-ee099cd81fb2b3c5176f4cbeff0ab7fdc15276a7.tar.gz