1 files changed, 352 insertions, 0 deletions
diff --git a/src/third_party/wiredtiger/test/suite/test_txn19.py b/src/third_party/wiredtiger/test/suite/test_txn19.py
new file mode 100755
index 00000000000..6805a0c9cc6
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_txn19.py
@@ -0,0 +1,352 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_txn19.py
+#   Transactions: test recovery with corrupted log files
+#
+
+import fnmatch, os, shutil, time
+from wtscenario import make_scenarios
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+
+# This test uses an artificially small log file limit, and creates
+# large records so two fit into a log file. This allows us to test
+# both the case when corruption happens at the beginning of a log file
+# (an even number of records have been created), and when corruption
+# happens in the middle of a log file (with an odd number of records).
+
+def corrupt(fname, truncate, offset, writeit):
+    with open(fname, 'r+') as log:
+        if offset:
+            if offset < 0:  # Negative offset means seek to the end
+                log.seek(0, 2)
+            else:
+                log.seek(offset)
+        if truncate:
+            log.truncate()
+        if writeit:
+            log.write(writeit)
+
+class test_txn19(wttest.WiredTigerTestCase, suite_subprocess):
+    base_config = 'log=(archive=false,enabled,file_max=100K),' + \
+                  'transaction_sync=(enabled,method=none)'
+    conn_config = base_config
+    corruption_type = [
+        ('removal', dict(kind='removal', f=lambda fname:
+            os.remove(fname))),
+        ('truncate', dict(kind='truncate', f=lambda fname:
+            corrupt(fname, True, 0, None))),
+        ('truncate-middle', dict(kind='truncate-middle', f=lambda fname:
+            corrupt(fname, True, 1024 * 25, None))),
+        ('zero-begin', dict(kind='zero', f=lambda fname:
+            corrupt(fname, False, 0, '\0' * 4096))),
+        ('zero-trunc', dict(kind='zero', f=lambda fname:
+            corrupt(fname, True, 0, '\0' * 4096))),
+        ('zero-end', dict(kind='zero-end', f=lambda fname:
+            corrupt(fname, False, -1, '\0' * 4096))),
+        ('garbage-begin', dict(kind='garbage-begin', f=lambda fname:
+            corrupt(fname, False, 0, 'Bad!' * 1024))),
+        ('garbage-middle', dict(kind='garbage-middle', f=lambda fname:
+            corrupt(fname, False, 1024 * 25, 'Bad!' * 1024))),
+        ('garbage-end', dict(kind='garbage-end', f=lambda fname:
+            corrupt(fname, False, -1, 'Bad!' * 1024))),
+    ]
+    # The list comprehension below expands each entry in the integer tuple
+    # list to a scenario.  For example, (3, 4, 2) expands to:
+    # ('corrupt=[3,4],checkpoint=2', dict(corruptpos=3, corruptpos2=4, chkpt=2))
+    #
+    # Each number corresponds to a log file, so for this example, we have
+    # corruption in log file 3 (using the style of corruption from
+    # corruption_type), there is a second corruption in log file 4,
+    # and there is a checkpoint in log file 2.  A value of 0 means no
+    # corruption or checkpoint.
+    corruption_pos = [
+        ('corrupt=[' + str(x) + ',' + str(y) + '],checkpoint=' + str(z),
+           dict(corruptpos=x,corruptpos2=y,chkpt=z)) for (x,y,z) in (
+               (0, 0, 0), (0, 0, 2), (6, 0, 0), (6, 0, 3), (3, 0, 0),
+               (3, 0, 2), (3, 4, 2), (3, 5, 2), (3, 0, 4))]
+    nrecords = [('nrecords=10', dict(nrecords=10)),
+                ('nrecords=11', dict(nrecords=11))]
+
+    # This function prunes out unnecessary or problematic test cases
+    # from the list of scenarios.
+    def includeFunc(name, dictarg):
+        kind = dictarg['kind']
+        corruptpos = dictarg['corruptpos']
+        chkpt = dictarg['chkpt']
+
+        # corruptpos == 0 indicates there is no corruption.
+        # (i.e. corrupt log file 0, which doesn't exist)
+        # We do want to test the case of no corruption, but we don't
+        # need to try it against every type of corruption, only one.
+        if corruptpos == 0:
+            return kind == 'removal'
+
+        # NOTE:
+        # The removal or truncation of a middle log file (not first or last)
+        # that would be used in recovery is not currently handled gracefully.
+        if (kind == 'removal' or kind == 'truncate') and \
+           corruptpos != 6 and corruptpos > chkpt:
+            return False
+
+        # All the other cases are valid
+        return True
+
+    scenarios = make_scenarios(
+        corruption_type, corruption_pos, nrecords,
+        include=includeFunc, prune=20, prunelong=1000)
+
+    uri = 'table:test_txn19'
+    create_params = 'key_format=i,value_format=S'
+
+    # Return the log file number that contains the given record
+    # number.  In this test, two records fit into each log file, and
+    # after each even record is written, a new log file is created
+    # (having no records initially).  The last log file is this
+    # (nrecords/2 + 1), given that we start with log 1.
+    def record_to_logfile(self, recordnum):
+        return recordnum / 2 + 1
+
+    # Returns the first record number in a log file.
+    def logfile_to_record(self, logfile):
+        return (logfile - 1) * 2
+
+    # Return true if the log file is corrupted.
+    # If not corrupted, the log file will produce no errors,
+    # and all the records originally written should be recovered.
+    def corrupted(self):
+        # Corruptpos == 0 means to do no corruption in any log file
+        if self.corruptpos == 0:
+            return False
+
+        # Adding zeroes to the end of a log file is indistinguishable
+        # from having a log file that is preallocated that has not been
+        # totally filled. One might argue that if this does not occur
+        # in the final log file, it could/should have been truncated.
+        # At any rate, we consider this particular corruption to be benign.
+        if self.kind == 'zero-end':
+            return False
+        return True
+
+    def show_logs(self, homedir, msg):
+        loglist = []
+        for i in range(0, 10):
+            basename = 'WiredTigerLog.000000000' + str(i)
+            fullname = homedir + os.sep + basename
+            if os.path.isfile(fullname):
+                loglist.append(i)
+                if os.stat(fullname).st_size == 0:
+                    self.tty('LOGS ' + msg + ': ' + str(i) + ' is empty')
+        self.tty('LOGS ' + msg + ': ' + str(loglist))
+
+    def copy_for_crash_restart(self, olddir, newdir):
+        ''' Simulate a crash from olddir and restart in newdir. '''
+        # with the connection still open, copy files to new directory
+        shutil.rmtree(newdir, ignore_errors=True)
+        os.mkdir(newdir)
+        for fname in os.listdir(olddir):
+            fullname = os.path.join(olddir, fname)
+            # Skip lock file on Windows since it is locked
+            if os.path.isfile(fullname) and \
+                "WiredTiger.lock" not in fullname and \
+                "Tmplog" not in fullname and \
+                "Preplog" not in fullname:
+                shutil.copy(fullname, newdir)
+
+    # Generate a value that is a bit over half the size of the log file.
+    def valuegen(self, i):
+        return str(i) + 'A' * (1024 * 60)   # ~60K
+
+    # Insert a list of keys
+    def inserts(self, keylist):
+        c = self.session.open_cursor(self.uri)
+        for i in keylist:
+            if self.chkpt > 0 and self.logfile_to_record(self.chkpt) == i:
+                self.session.checkpoint()
+            c[i] = self.valuegen(i)
+        c.close()
+
+    def checks(self, expectlist):
+        c = self.session.open_cursor(self.uri, None, None)
+        gotlist = []
+        for key, value in c:
+            gotlist.append(key)
+            self.assertEqual(self.valuegen(key), value)
+        self.assertEqual(expectlist, gotlist)
+        c.close()
+
+    def log_number_to_file_name(self, homedir, n):
+        self.assertLess(n, 10)  # assuming 1 digit
+        return homedir + os.sep + 'WiredTigerLog.000000000' + str(n)
+
+    def corrupt_log(self, homedir):
+        if not self.corrupted():
+            return
+        self.f(self.log_number_to_file_name(homedir, self.corruptpos))
+
+        # Corrupt a second log file if needed
+        if self.corruptpos2 != 0:
+            self.f(self.log_number_to_file_name(homedir, self.corruptpos2))
+
+    def corrupt_last_file(self):
+        return self.corruptpos == self.record_to_logfile(self.nrecords)
+
+    # Corruption past the last written record in a log file can sometimes
+    # be detected. In our test case, the last log file has zero or one large
+    # 60K record written into it, but it is presized to 100K.  Corruption
+    # at the end of this file creates a hole, and the corruption starts
+    # a new log record, where it can be detected as phony.  Similarly,
+    # corruption in the "middle" of the last file (actually the 25K point)
+    # can be detected if there aren't any of the insert records in the file.
+    def corrupt_hole_in_last_file(self):
+        return self.corrupt_last_file() and \
+            ((self.kind == 'garbage-middle' and self.nrecords % 2 == 0) or \
+             self.kind == 'garbage-end')
+
+    # Return true iff the log has been damaged in a way that is not detected
+    # as a corruption.  WiredTiger must be lenient about damage in any log
+    # file, because a partial log record written just before a crash is in
+    # most cases indistinguishable from a corruption.  If the beginning of
+    # the file is mangled, that is always an unexpected corruption. Situations
+    # where we cannot reliably detect corruption include:
+    #  - removal of the last log
+    #  - certain corruptions at the beginning of a log record (adding garbage
+    #      at the end of a log file can trigger this).
+    def log_corrupt_but_valid(self):
+        if self.corrupt_last_file() and self.kind == 'removal':
+            return True
+        if self.kind == 'truncate-middle' or \
+           self.kind == 'garbage-middle' or \
+           self.kind == 'garbage-end':
+            return True
+        return False
+
+    # In certain cases, we detect log corruption, but just issue warnings.
+    def expect_warning_corruption(self):
+        if self.kind == 'garbage-middle' and self.chkpt <= self.corruptpos:
+            return True
+        if self.corrupt_hole_in_last_file():
+            return True
+        return False
+
+    # For this test, at least, salvage identifies and fixes all
+    # recovery failures.
+    def expect_salvage_messages(self):
+        return self.expect_recovery_failure()
+
+    def expect_recovery_failure(self):
+        return self.corrupted() and \
+            self.corruptpos >= self.chkpt and \
+            not self.log_corrupt_but_valid()
+
+    def recovered_records(self):
+        if not self.corrupted() or self.chkpt > self.corruptpos:
+            return self.nrecords
+        if self.kind == 'garbage-end':
+            # All records in the corrupt file will be found.
+            found = self.logfile_to_record(self.corruptpos + 1)
+        else:
+            found = self.logfile_to_record(self.corruptpos)
+        return min(found, self.nrecords)
+
+    def test_corrupt_log(self):
+        ''' Corrupt the log and restart with different kinds of recovery '''
+
+        # This test creates some data, then simulates a crash with corruption.
+        # Then does a restart with recovery, then starts again with salvage,
+        # and finally starts again with recovery (adding new records).
+
+        self.session.create(self.uri, self.create_params)
+        self.inserts([x for x in range(0, self.nrecords)])
+        newdir = "RESTART"
+        self.copy_for_crash_restart(self.home, newdir)
+        self.close_conn()
+        #self.show_logs(newdir, 'before corruption')
+        self.corrupt_log(newdir)
+        #self.show_logs(newdir, 'after corruption')
+        salvage_config = self.base_config + ',salvage=true'
+        errfile = 'list.err'
+        outfile = 'list.out'
+        expect_fail = self.expect_recovery_failure()
+
+        # In cases of corruption, we cannot always call wiredtiger_open
+        # directly, because there may be a panic, and abort() is called
+        # in diagnostic mode which terminates the Python interpreter.
+        #
+        # Running any wt command externally to Python allows
+        # us to observe the failure or success safely.
+        # Use -R to force recover=on, which is the default for
+        # wiredtiger_open, (wt utilities normally have recover=error)
+        self.runWt(['-h', newdir, '-C', self.base_config, '-R', 'list'],
+            errfilename=errfile, outfilename=outfile, failure=expect_fail,
+            closeconn=False)
+
+        if expect_fail:
+            self.check_file_contains_one_of(errfile,
+            ['/log file.*corrupted/', 'WT_ERROR: non-specific WiredTiger error'])
+        else:
+            self.check_empty_file(errfile)
+            if self.expect_warning_corruption():
+                self.check_file_contains(outfile, '/log file .* corrupted/')
+            self.check_file_contains(outfile, self.uri)
+
+        found_records = self.recovered_records()
+        expect = [x for x in range(0, found_records)]
+
+        # If we are salvaging, expect an informational message
+        if self.expect_salvage_messages():
+            errpat = '.*'
+            # Possible messages:
+            #   salvage: log files x-y truncated at beginning
+            #   salvage: log file x truncated at beginning
+            #   salvage: log file x truncated
+            #   salvage: log file x removed
+            outpat = 'salvage: log file'
+        else:
+            errpat = '^$'
+            outpat = '^$'
+        with self.expectedStdoutPattern(outpat):
+            with self.expectedStderrPattern(errpat):
+                self.conn = self.wiredtiger_open(newdir, salvage_config)
+                self.session = self.setUpSessionOpen(self.conn)
+                self.checks(expect)
+
+        # Insert a couple more and simulate another crash.
+        newdir2 = "RESTART2"
+        self.inserts([self.nrecords, self.nrecords + 1])
+        expect.extend([self.nrecords, self.nrecords + 1])
+        self.copy_for_crash_restart(newdir, newdir2)
+        self.checks(expect)
+        self.reopen_conn(newdir)
+        self.checks(expect)
+        self.reopen_conn(newdir2, self.conn_config)
+        self.checks(expect)
+
+if __name__ == '__main__':
+    wttest.run()