summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c
blob: 8dfafe886fa2d3c2a7bfbc083e46815b80221918 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
/*-
 * Copyright (c) 2014-present MongoDB, Inc.
 * Copyright (c) 2008-2014 WiredTiger, Inc.
 *	All rights reserved.
 *
 * See the file LICENSE for redistribution information.
 */

#include "wt_internal.h"

/*
 * __wt_backup_load_incr --
 *     Load the incremental.
 */
int
__wt_backup_load_incr(
  WT_SESSION_IMPL *session, WT_CONFIG_ITEM *blkcfg, WT_ITEM *bitstring, uint64_t nbits)
{
    if (blkcfg->len != 0)
        WT_RET(__wt_nhex_to_raw(session, blkcfg->str, blkcfg->len, bitstring));
    if (bitstring->size != (nbits >> 3))
        WT_RET_MSG(session, WT_ERROR, "corrupted modified block list");
    return (0);
}

/*
 * __curbackup_incr_blkmod --
 *     Get the block modifications for a tree from its metadata and fill in the backup cursor's
 *     information with it.
 */
static int
__curbackup_incr_blkmod(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_CURSOR_BACKUP *cb)
{
    WT_CKPT ckpt;
    WT_CONFIG blkconf;
    WT_CONFIG_ITEM b, k, v;
    WT_DECL_RET;
    char *config;

    WT_ASSERT(session, btree != NULL);
    WT_ASSERT(session, btree->dhandle != NULL);
    WT_ASSERT(session, cb->incr_src != NULL);

    WT_RET(__wt_metadata_search(session, btree->dhandle->name, &config));
    /* Check if this is a file with no checkpointed content. */
    ret = __wt_meta_checkpoint(session, btree->dhandle->name, 0, &ckpt);
    if (ret == 0 && ckpt.addr.size == 0)
        F_SET(cb, WT_CURBACKUP_CKPT_FAKE);
    __wt_meta_checkpoint_free(session, &ckpt);

    WT_ERR(__wt_config_getones(session, config, "checkpoint_backup_info", &v));
    if (v.len)
        F_SET(cb, WT_CURBACKUP_HAS_CB_INFO);
    __wt_config_subinit(session, &blkconf, &v);
    while ((ret = __wt_config_next(&blkconf, &k, &v)) == 0) {
        /*
         * First see if we have information for this source identifier.
         */
        if (WT_STRING_MATCH(cb->incr_src->id_str, k.str, k.len) == 0)
            continue;

        /*
         * We found a match. If we have a name, then there should be granularity and nbits. The
         * granularity should be set to something. But nbits may be 0 if there are no blocks
         * currently modified.
         */
        WT_ERR(__wt_config_subgets(session, &v, "granularity", &b));
        cb->granularity = (uint64_t)b.val;
        WT_ERR(__wt_config_subgets(session, &v, "nbits", &b));
        cb->nbits = (uint64_t)b.val;
        WT_ERR(__wt_config_subgets(session, &v, "offset", &b));
        cb->offset = (uint64_t)b.val;

        __wt_verbose(session, WT_VERB_BACKUP,
          "Found modified incr block gran %" PRIu64 " nbits %" PRIu64 " offset %" PRIu64,
          cb->granularity, cb->nbits, cb->offset);
        __wt_verbose(session, WT_VERB_BACKUP, "Modified incr block config: \"%s\"", config);

        /*
         * The rename configuration string component was added later. So don't error if we don't
         * find it in the string. If we don't have it, we're not doing a rename. Otherwise rename
         * forces full copies, there is no need to traverse the blocks information.
         */
        WT_ERR_NOTFOUND_OK(__wt_config_subgets(session, &v, "rename", &b), true);
        if (ret == 0 && b.val) {
            cb->nbits = 0;
            cb->offset = 0;
            cb->bit_offset = 0;
            F_SET(cb, WT_CURBACKUP_RENAME);
        } else {
            F_CLR(cb, WT_CURBACKUP_RENAME);

            /*
             * We found a match. Load the block information into the cursor.
             */
            if ((ret = __wt_config_subgets(session, &v, "blocks", &b)) == 0) {
                WT_ERR(__wt_backup_load_incr(session, &b, &cb->bitstring, cb->nbits));
                cb->bit_offset = 0;
                F_SET(cb, WT_CURBACKUP_INCR_INIT);
            }
            WT_ERR_NOTFOUND_OK(ret, false);
        }
        break;
    }
    WT_ERR_NOTFOUND_OK(ret, false);

err:
    __wt_free(session, config);
    return (ret == WT_NOTFOUND ? 0 : ret);
}

/*
 * __curbackup_incr_next --
 *     WT_CURSOR->next method for the btree cursor type when configured with incremental_backup.
 */
static int
__curbackup_incr_next(WT_CURSOR *cursor)
{
    WT_BTREE *btree;
    WT_CURSOR_BACKUP *cb;
    WT_DECL_ITEM(buf);
    WT_DECL_RET;
    WT_SESSION_IMPL *session;
    wt_off_t size;
    uint64_t start_bitoff, total_len, raw;
    const char *file;
    bool found;

    cb = (WT_CURSOR_BACKUP *)cursor;
    btree = cb->incr_cursor == NULL ? NULL : CUR2BT(cb->incr_cursor);
    raw = F_MASK(cursor, WT_CURSTD_RAW);
    CURSOR_API_CALL(cursor, session, get_value, btree);
    F_CLR(cursor, WT_CURSTD_RAW);

    if (!F_ISSET(cb, WT_CURBACKUP_INCR_INIT) &&
      (btree == NULL || F_ISSET(cb, WT_CURBACKUP_FORCE_FULL | WT_CURBACKUP_RENAME))) {
        /*
         * We don't have this object's incremental information or it's a forced file copy. If this
         * is a log file, use the full pathname that may include the log path.
         */
        file = cb->incr_file;
        if (WT_PREFIX_MATCH(file, WT_LOG_FILENAME)) {
            WT_ERR(__wt_scr_alloc(session, 0, &buf));
            WT_ERR(__wt_log_filename(session, UINT32_MAX, file, buf));
            file = buf->data;
        }
        WT_ERR(__wt_fs_size(session, file, &size));

        cb->nbits = 0;
        cb->offset = 0;
        cb->bit_offset = 0;
        /*
         * By setting this to true, the next call will detect we're done in the code for the
         * incremental cursor below and return WT_NOTFOUND.
         */
        F_SET(cb, WT_CURBACKUP_INCR_INIT);
        __wt_verbose(session, WT_VERB_BACKUP, "Set key WT_BACKUP_FILE %s size %" PRIuMAX,
          cb->incr_file, (uintmax_t)size);
        __wt_cursor_set_key(cursor, 0, size, WT_BACKUP_FILE);
    } else {
        if (!F_ISSET(cb, WT_CURBACKUP_INCR_INIT)) {
            /*
             * We don't have this object's incremental information, and it's not a full file copy.
             * Get a list of the block modifications for the file. The block modifications are from
             * the incremental identifier starting point. Walk the list looking for one with a
             * source of our id.
             */
            WT_ERR(__curbackup_incr_blkmod(session, btree, cb));
            /*
             * There are several cases where we do not have block modification information for
             * the file. They are described and handled as follows:
             *
             * 1. Renamed file. Always return the whole file information.
             * 2. Newly created file without checkpoint information. Return the whole
             *    file information.
             * 3. File created and checkpointed before incremental backups were configured.
             *    Return no file information as it was copied in the initial full backup.
             * 4. File that has not been modified since the previous incremental backup.
             *    Return no file information as there is no new information.
             */
            if (cb->bitstring.mem == NULL || F_ISSET(cb, WT_CURBACKUP_RENAME)) {
                F_SET(cb, WT_CURBACKUP_INCR_INIT);
                if (F_ISSET(cb, WT_CURBACKUP_RENAME) ||
                  (F_ISSET(cb, WT_CURBACKUP_CKPT_FAKE) && F_ISSET(cb, WT_CURBACKUP_HAS_CB_INFO))) {
                    WT_ERR(__wt_fs_size(session, cb->incr_file, &size));
                    __wt_verbose(session, WT_VERB_BACKUP,
                      "Set key WT_BACKUP_FILE %s size %" PRIuMAX, cb->incr_file, (uintmax_t)size);
                    __wt_cursor_set_key(cursor, 0, size, WT_BACKUP_FILE);
                    goto done;
                }
                WT_ERR(WT_NOTFOUND);
            }
        }
        /* We have initialized incremental information. */
        start_bitoff = cb->bit_offset;
        total_len = cb->granularity;
        found = false;
        /* The bit offset can be less than or equal to but never greater than the number of bits. */
        WT_ASSERT(session, cb->bit_offset <= cb->nbits);
        /* Look for the next chunk that had modifications. */
        while (cb->bit_offset < cb->nbits)
            if (__bit_test(cb->bitstring.mem, cb->bit_offset)) {
                found = true;
                /*
                 * Care must be taken to leave the bit_offset field set to the next offset bit so
                 * that the next call is set to the correct offset.
                 */
                start_bitoff = cb->bit_offset++;
                if (F_ISSET(cb, WT_CURBACKUP_CONSOLIDATE)) {
                    while (
                      cb->bit_offset < cb->nbits && __bit_test(cb->bitstring.mem, cb->bit_offset++))
                        total_len += cb->granularity;
                }
                break;
            } else
                ++cb->bit_offset;

        /* We either have this object's incremental information or we're done. */
        if (!found)
            WT_ERR(WT_NOTFOUND);
        WT_ASSERT(session, cb->granularity != 0);
        WT_ASSERT(session, total_len != 0);
        __wt_verbose(session, WT_VERB_BACKUP,
          "Set key WT_BACKUP_RANGE %s offset %" PRIu64 " length %" PRIu64, cb->incr_file,
          cb->offset + cb->granularity * start_bitoff, total_len);
        __wt_cursor_set_key(
          cursor, cb->offset + cb->granularity * start_bitoff, total_len, WT_BACKUP_RANGE);
    }

done:
err:
    F_SET(cursor, raw);
    __wt_scr_free(session, &buf);
    API_END_RET(session, ret);
}

/*
 * __wt_curbackup_free_incr --
 *     Free the duplicate backup cursor for a file-based incremental backup.
 */
int
__wt_curbackup_free_incr(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
{
    WT_DECL_RET;

    __wt_free(session, cb->incr_file);
    if (cb->incr_cursor != NULL)
        ret = cb->incr_cursor->close(cb->incr_cursor);
    __wt_buf_free(session, &cb->bitstring);

    return (ret);
}

/*
 * __wt_curbackup_open_incr --
 *     Initialize the duplicate backup cursor for a file-based incremental backup.
 */
int
__wt_curbackup_open_incr(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other,
  WT_CURSOR *cursor, const char *cfg[], WT_CURSOR **cursorp)
{
    WT_CURSOR_BACKUP *cb, *other_cb;
    WT_DECL_ITEM(open_uri);
    WT_DECL_RET;
    uint64_t session_cache_flags;

    cb = (WT_CURSOR_BACKUP *)cursor;
    other_cb = (WT_CURSOR_BACKUP *)other;
    cursor->key_format = WT_UNCHECKED_STRING(qqq);
    cursor->value_format = "";

    WT_ASSERT(session, other_cb->incr_src != NULL);

    /*
     * Inherit from the backup cursor but reset specific functions for incremental.
     */
    cursor->next = __curbackup_incr_next;
    cursor->get_key = __wt_cursor_get_key;
    cursor->get_value = __wt_cursor_get_value_notsup;
    cb->incr_src = other_cb->incr_src;

    /* All WiredTiger owned files are full file copies. */
    if (F_ISSET(other_cb->incr_src, WT_BLKINCR_FULL) ||
      WT_PREFIX_MATCH(cb->incr_file, "WiredTiger")) {
        __wt_verbose(session, WT_VERB_BACKUP, "Forcing full file copies for %s for id %s",
          cb->incr_file, other_cb->incr_src->id_str);
        F_SET(cb, WT_CURBACKUP_FORCE_FULL);
    }
    if (F_ISSET(other_cb, WT_CURBACKUP_CONSOLIDATE))
        F_SET(cb, WT_CURBACKUP_CONSOLIDATE);
    else
        F_CLR(cb, WT_CURBACKUP_CONSOLIDATE);

    /*
     * Set up the incremental backup information, if we are not forcing a full file copy. We need an
     * open cursor on the file. Open the backup checkpoint, confirming it exists.
     */
    if (!F_ISSET(cb, WT_CURBACKUP_FORCE_FULL)) {
        WT_ERR(__wt_scr_alloc(session, 0, &open_uri));
        WT_ERR(__wt_buf_fmt(session, open_uri, "file:%s", cb->incr_file));
        /*
         * Incremental cursors use file cursors, but in a non-standard way. Turn off cursor caching
         * as we open the cursor.
         */
        session_cache_flags = F_ISSET(session, WT_SESSION_CACHE_CURSORS);
        F_CLR(session, WT_SESSION_CACHE_CURSORS);
        WT_ERR(__wt_curfile_open(session, open_uri->data, NULL, cfg, &cb->incr_cursor));
        F_SET(session, session_cache_flags);
    }
    WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));

err:
    if (ret != 0)
        WT_TRET(__wt_curbackup_free_incr(session, cb));
    __wt_scr_free(session, &open_uri);
    return (ret);
}