src/third_party/wiredtiger/test/suite/test_import08.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178

#!/usr/bin/env python
#
# Public Domain 2014-present MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# test_import08.py
# Check that transaction ids from imported files are ignored regardless of write generation.

import os, re, shutil
from test_import01 import test_import_base
from wtscenario import make_scenarios

class test_import08(test_import_base):
    conn_config = 'cache_size=50MB,log=(enabled)'
    session_config = 'isolation=snapshot'

    original_db_file = 'original_db_file'
    uri = 'file:' + original_db_file

    nrows = 100
    ntables = 10
    keys = [b'1', b'2', b'3', b'4', b'5', b'6']
    values = [b'\x01\x02aaa\x03\x04', b'\x01\x02bbb\x03\x04', b'\x01\x02ccc\x03\x04',
              b'\x01\x02ddd\x03\x04', b'\x01\x02eee\x03\x04', b'\x01\x02fff\x03\x04']
    ts = [10*k for k in range(1, len(keys)+1)]
    create_config = 'allocation_size=512,key_format=u,log=(enabled=true),value_format=u'
    scenarios = make_scenarios([
        ('file_metadata', dict(repair=False)),
        ('repair', dict(repair=True)),
    ])

    def parse_write_gen(self, config):
        # The search string will look like: 'write_gen=<num>'.
        # Just reverse the string and take the digits from the back until we hit '='.
        write_gen = re.search("write_gen=\d+", config)
        self.assertTrue(write_gen is not None)
        write_gen_str = str()
        for c in reversed(write_gen.group(0)):
            if not c.isdigit():
                self.assertEqual(c, '=')
                break
            write_gen_str = c + write_gen_str
        return int(write_gen_str)

    def test_import_write_gen(self):
        # Make a bunch of files and fill them with data. This has the side effect of allocating a
        # lot of transaction ids which is important for our test.
        self.populate(self.ntables, self.nrows)

        # Find the URI of one of the generated tables.
        session2 = self.conn.open_session()
        cursor2 = session2.open_cursor('metadata:')
        generated_uri = None
        for k, _ in cursor2:
            if k.startswith('table:'):
                generated_uri = k
                break
        cursor2.close()

        # Now begin a transaction and remove the first entry in the table.
        #
        # This is going to allocate a transaction ID and more importantly, will ensure that pinned
        # ID is set for subsequent checkpoints in this test. If we don't do this, reconciliation
        # won't actually write transaction IDs to the disk since they will be deemed obsolete.
        session2.begin_transaction()
        self.assertTrue(generated_uri is not None)
        cursor2 = session2.open_cursor(generated_uri)
        cursor2.set_key(0)
        cursor2.remove()

        # Now create the file that we'll be importing later.
        self.session.create(self.uri, self.create_config)

        # Insert records into our newly created file.
        #
        # Since we allocated a bunch of transaction IDs before, we'll be allocating pretty high
        # IDs at this point.
        for i in range(0, len(self.keys)):
            self.update(self.uri, self.keys[i], self.values[i], self.ts[i])

            # We want to checkpoint after each write to increase the write gen for this particular
            # btree.
            self.session.checkpoint()

        # Export the metadata for the table.
        c = self.session.open_cursor('metadata:', None, None)
        original_db_file_config = c[self.uri]
        c.close()

        self.printVerbose(3, '\nFile configuration:\n' + original_db_file_config)

        # Now that we've finished doing our checkpoints, we can let go of the transaction ID we
        # allocated earlier.
        session2.rollback_transaction()

        # Close the connection.
        self.close_conn()

        # Create a new database and connect to it.
        newdir = 'IMPORT_DB'
        shutil.rmtree(newdir, ignore_errors=True)
        os.mkdir(newdir)
        self.conn = self.setUpConnectionOpen(newdir)
        self.session = self.setUpSessionOpen(self.conn)

        # Bring forward the oldest to be past or equal to the timestamps we'll be importing.
        self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(self.ts[-1]))

        # Copy over the datafiles for the object we want to import.
        self.copy_file(self.original_db_file, '.', newdir)

        # Contruct the config string.
        if self.repair:
            import_config = 'import=(enabled,repair=true)'
        else:
            import_config = 'import=(enabled,repair=false,file_metadata=(' + \
                original_db_file_config + '))'

        # Import the file.
        self.session.create(self.uri, import_config)

        # Verify object.
        self.session.verify(self.uri)

        # Check the write generation of the new table.
        #
        # The important thing to check is that it is greater than 1 (the current connection-wide
        # base write gen).
        c = self.session.open_cursor('metadata:')
        original_db_file_config = c[self.uri]
        c.close()
        write_gen = self.parse_write_gen(original_db_file_config)
        self.printVerbose(3, 'IMPORTED WRITE GEN: {}'.format(write_gen))
        self.assertGreater(write_gen, 1)

        # Check that the values are all visible.
        #
        # These values were written in the previous database with HIGH transaction IDs. Since we're
        # on a fresh connection, our IDs are starting back from 1 so we MUST wipe the IDs otherwise
        # they won't be visible to us and this test will fail.
        #
        # Since we were checkpointing after each write, the write gen of some of these pages will
        # definitely be higher than 1.
        #
        # If we use the old scheme and compare the page's write gen with the connection-wide base
        # write gen (which will be 1 since we made a new database), then we won't see some records
        # and the test will fail.
        #
        # If we use the new scheme and compare with the btree specific base write gen that was
        # supplied in the metadata, we will realise that it was from the previous run and wipe the
        # IDs meaning that all records will be visible.
        self.check(self.uri, self.keys, self.values)

        # Perform a checkpoint.
        self.session.checkpoint()