#!/usr/bin/env python # # Public Domain 2014-present MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. # # Anyone is free to copy, modify, publish, use, compile, sell, or # distribute this software, either in source code form or as a compiled # binary, for any purpose, commercial or non-commercial, and by any # means. # # In jurisdictions that recognize copyright laws, the author or authors # of this software dedicate any and all copyright interest in the # software to the public domain. We make this dedication for the benefit # of the public at large and to the detriment of our heirs and # successors. We intend this dedication to be an overt act of # relinquishment in perpetuity of all present and future rights to this # software under copyright law. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. import os, glob, shutil import wttest, wiredtiger from suite_subprocess import suite_subprocess from helper import compare_files # Shared base class used by backup tests. class backup_base(wttest.WiredTigerTestCase, suite_subprocess): data_cursor_config = None # a config string for cursors. mult = 0 # counter to have variance in data. nops = 100 # number of operations added to uri. # We use counter to produce unique backup ids for multiple iterations # of incremental backup. bkup_id = 0 # Setup some of the backup tests, and increments the backup id. initial_backup = False # Used for populate function. rows = 100 populate_big = None # Specify a logpath directory to be used to place wiredtiger log files. logpath='' # Temporary directory used to verify consistent data between multiple incremental backups. home_tmp = "WT_TEST_TMP" # # Add data to the given uri. # Allows the option for doing a session checkpoint after adding data. # def add_data(self, uri, key, val, do_checkpoint=False): assert(self.nops != 0) c = self.session.open_cursor(uri, None, self.data_cursor_config) for i in range(0, self.nops): num = i + (self.mult * self.nops) k = key + str(num) v = val + str(num) c[k] = v c.close() if do_checkpoint: self.session.checkpoint() # Increase the counter so that later backups have unique ids. if not self.initial_backup: self.bkup_id += 1 # Increase the multiplier so that later calls insert unique items. self.mult += 1 # # Populate a set of objects. # def populate(self, objs, do_checkpoint=False, skiplsm=False): cg_config = '' for i in objs: if len(i) > 2: if i[2] and skiplsm: continue if i[2] == self.populate_big: self.rows = 50000 # Big Object else: self.rows = 1000 # Small Object if len(i) > 3: cg_config = i[3] i[1](self, i[0], self.rows, cgconfig = cg_config).populate() # Backup needs a checkpoint. if do_checkpoint: self.session.checkpoint() # # Set up all the directories needed for the test. We have a full backup directory, an incremental backup and # temporary directory. The temp directory is used to hold updated data for incremental backups, and will overwrite # the contents of the incremental directory when this function is called, to setup future backup calls. # That way we can compare the full and incremental backup each time through. # # Note: The log directory is a subdirectory of the home directory, creating that will make the home directory also. # The incremental backup function, copies the latest data into the temporary directory. def setup_directories(self, home_incr, home_full): # Create the temp directory, if the path doesn't exist # as we only want to create this directory at the start if not os.path.exists(self.home_tmp): os.makedirs(self.home_tmp + '/' + self.logpath) if os.path.exists(home_full): shutil.rmtree(home_full) os.makedirs(home_full + '/' + self.logpath) # If the incremental directory exists, then remove the contents of the directory # and place all the contents of temporary directory into the incremental directory # such that the test can now perform further incremental backups on the directory. if os.path.exists(home_incr): shutil.rmtree(home_incr) shutil.copytree(self.home_tmp, self.home_incr) else: os.makedirs(home_incr + '/' + self.logpath) # # Check that a URI doesn't exist, both the meta-data and the file names. # def confirmPathDoesNotExist(self, uri, dir): conn = self.wiredtiger_open(dir) session = conn.open_session() self.assertRaises(wiredtiger.WiredTigerError, lambda: session.open_cursor(uri, None, None)) conn.close() self.assertEqual( glob.glob(dir + '*' + uri.split(":")[1] + '*'), [], 'confirmPathDoesNotExist: URI exists, file name matching \"' + uri.split(":")[1] + '\" found') # # Copy a file into given directory. # def copy_file(self, file, dir): copy_from = file # If it is log file, prepend the path. if self.logpath and "WiredTigerLog" in file: copy_to = dir + '/' + self.logpath else: copy_to = dir shutil.copy(copy_from, copy_to) # # Uses a backup cursor to perform a selective backup, by iterating through the cursor # grabbing files that do not exist in the remove list to copy over into a given directory. # When dealing with a test that performs multiple incremental backups, we need to perform a # proper backup on each incremental directory as a starting base. # def take_selective_backup(self, backup_dir, remove_list, backup_cur=None): self.pr('Selective backup to ' + backup_dir + ': ') bkup_c = backup_cur if backup_cur == None: config = None if self.initial_backup: config = 'incremental=(granularity=1M,enabled=true,this_id=ID0)' bkup_c = self.session.open_cursor('backup:', None, config) all_files = [] # We cannot use 'for newfile in bkup_c:' usage because backup cursors don't have # values and adding in get_values returns ENOTSUP and causes the usage to fail. # If that changes then this, and the use of the duplicate below can change. while bkup_c.next() == 0: newfile = bkup_c.get_key() sz = os.path.getsize(newfile) if (newfile in remove_list): continue self.pr('Copy from: ' + newfile + ' (' + str(sz) + ') to ' + self.dir) self.copy_file(newfile, backup_dir) all_files.append(newfile) if backup_cur == None: bkup_c.close() return all_files # # Uses a backup cursor to perform a full backup, by iterating through the cursor # grabbing files to copy over into a given directory. When dealing with a test # that performs multiple incremental backups, we initially perform a full backup # on each incremental directory as a starting base. # Optional arguments: # backup_cur: A backup cursor that can be given into the function, but function caller # holds reponsibility of closing the cursor. # def take_full_backup(self, backup_dir, backup_cur=None): self.pr('Full backup to ' + backup_dir + ': ') bkup_c = backup_cur if backup_cur == None: config = None if self.initial_backup: config = 'incremental=(granularity=1M,enabled=true,this_id=ID0)' bkup_c = self.session.open_cursor('backup:', None, config) all_files = [] # We cannot use 'for newfile in bkup_c:' usage because backup cursors don't have # values and adding in get_values returns ENOTSUP and causes the usage to fail. # If that changes then this, and the use of the duplicate below can change. while bkup_c.next() == 0: newfile = bkup_c.get_key() sz = os.path.getsize(newfile) self.pr('Copy from: ' + newfile + ' (' + str(sz) + ') to ' + self.dir) self.copy_file(newfile, backup_dir) all_files.append(newfile) if backup_cur == None: bkup_c.close() return all_files # # Compare against two directory paths using the wt dump command. # The suffix allows the option to add distinctive tests adding suffix to the output files. # def compare_backups(self, uri, base_dir, other_dir, suffix = None): sfx = "" if suffix != None: sfx = "." + suffix base_out = "./backup_base" + sfx if os.path.exists(base_out): os.remove(base_out) # Run wt dump on base backup directory self.runWt(['-R', '-h', base_dir, 'dump', uri], outfilename=base_out) other_out = "./backup_other" + sfx if os.path.exists(other_out): os.remove(other_out) # Run wt dump on incremental backup self.runWt(['-R', '-h', other_dir, 'dump', uri], outfilename=other_out) self.pr("compare_files: " + base_out + ", " + other_out) self.assertEqual(True, compare_files(self, base_out, other_out)) # # Perform a block range copy for a given offset and file. # def range_copy(self, filename, offset, size, backup_incr_dir, consolidate): read_from = filename write_to = backup_incr_dir + '/' + filename rfp = open(read_from, "rb") rfp.seek(offset, 0) buf = rfp.read(size) # Perform between previous incremental directory, to check that # the old file and the new file is different. We can only ensure # the data are different if running in consolidate mode. It's # possible that we change multiple blocks in a single write and # some of the blocks are the same as before. If we are not running # in consolidate mode, these blocks which are copied separately one # by one will trigger this assert. old_to = self.home_tmp + '/' + filename if os.path.exists(old_to) and consolidate: self.pr('RANGE CHECK file ' + old_to + ' offset ' + str(offset) + ' len ' + str(size)) old_rfp = open(old_to, "rb") old_rfp.seek(offset, 0) old_buf = old_rfp.read(size) old_rfp.close() # This assertion tests that the offset range we're given actually changed # from the previous backup. self.assertNotEqual(buf, old_buf) wfp = None # Create file if the file doesn't exist. if not os.path.exists(write_to): wfp = open(write_to, "w+b") else: wfp = open(write_to, "r+b") wfp.seek(offset, 0) wfp.write(buf) rfp.close() wfp.close() # # With a given backup cursor, open an incremental block cursor to copy the blocks of a # given file. If the type of file is WT_BACKUP_FILE, perform full copy into given directory, # otherwise if type of file is WT_BACKUP_RANGE, perform partial copy of the file using range copy. # # Note: we return the sizes of WT_BACKUP_RANGE type files for tests that check for consolidate config. # def take_incr_backup_block(self, bkup_c, newfile, backup_incr_dir, consolidate): config = 'incremental=(file=' + newfile + ')' self.pr('Open incremental cursor with ' + config) # For each file listed, open a duplicate backup cursor and copy the blocks. incr_c = self.session.open_cursor(None, bkup_c, config) # For consolidate lens = [] # We cannot use 'for newfile in incr_c:' usage because backup cursors don't have # values and adding in get_values returns ENOTSUP and causes the usage to fail. # If that changes then this, and the use of the duplicate below can change. while incr_c.next() == 0: incrlist = incr_c.get_keys() offset = incrlist[0] size = incrlist[1] curtype = incrlist[2] self.assertTrue(curtype == wiredtiger.WT_BACKUP_FILE or curtype == wiredtiger.WT_BACKUP_RANGE) if curtype == wiredtiger.WT_BACKUP_FILE: sz = os.path.getsize(newfile) self.pr('Copy from: ' + newfile + ' (' + str(sz) + ') to ' + backup_incr_dir) # Copy the whole file. self.copy_file(newfile, backup_incr_dir) else: # Copy the block range. self.pr('Range copy file ' + newfile + ' offset ' + str(offset) + ' len ' + str(size)) self.range_copy(newfile, offset, size, backup_incr_dir, consolidate) lens.append(size) incr_c.close() return lens # # Given a backup cursor, open a log cursor, and copy all log files that are not # in the given log list. Return all the log files. # def take_log_backup(self, bkup_c, backup_dir, orig_logs, log_cursor=None): # Now open a duplicate backup cursor. dupc = log_cursor if log_cursor == None: config = 'target=("log:")' dupc = self.session.open_cursor(None, bkup_c, config) dup_logs = [] while dupc.next() == 0: newfile = dupc.get_key() self.assertTrue("WiredTigerLog" in newfile) sz = os.path.getsize(newfile) if (newfile not in orig_logs): self.pr('DUP: Copy from: ' + newfile + ' (' + str(sz) + ') to ' + backup_dir) shutil.copy(newfile, backup_dir) # Record all log files returned for later verification. dup_logs.append(newfile) if log_cursor == None: dupc.close() return dup_logs # # Open incremental backup cursor, with an id and iterate through all the files # and perform incremental block copy for each of them. Returns the information about # the backup files. # # Optional arguments: # consolidate: Add consolidate option to the cursor. # def take_incr_backup(self, backup_incr_dir, id=0, consolidate=False): self.assertTrue(id > 0 or self.bkup_id > 0) if id == 0: id = self.bkup_id # Open the backup data source for incremental backup. config = 'incremental=(src_id="ID' + str(id - 1) + '",this_id="ID' + str(id) + '"' if consolidate: config += ',consolidate=true' config += ')' self.pr("Incremental backup cursor with config " + config) bkup_c = self.session.open_cursor('backup:', None, config) file_sizes = [] file_names = [] # We cannot use 'for newfile in bkup_c:' usage because backup cursors don't have # values and adding in get_values returns ENOTSUP and causes the usage to fail. # If that changes then this, and the use of the duplicate below can change. while bkup_c.next() == 0: newfile = bkup_c.get_key() file_sizes += self.take_incr_backup_block(bkup_c, newfile, backup_incr_dir, consolidate) file_names.append(newfile) # Copy into temp directory for tests that require further iterations of incremental backups. self.copy_file(newfile, self.home_tmp) bkup_c.close() return (file_names, file_sizes)