From 2fe5f0ee2d2545f05db11b8c227e582e6a9479d9 Mon Sep 17 00:00:00 2001 From: ben Date: Mon, 8 Apr 2002 09:14:12 +0000 Subject: Various changes to switch to new include/exclude syntax git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@39 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/rdiff_backup/header.py | 2 +- rdiff-backup/rdiff_backup/selection.py | 217 ++++++++++++++++++++++++++++----- rdiff-backup/src/globals.py | 50 +++----- rdiff-backup/src/header.py | 2 +- rdiff-backup/src/main.py | 64 ++++++---- rdiff-backup/src/selection.py | 217 ++++++++++++++++++++++++++++----- 6 files changed, 428 insertions(+), 124 deletions(-) diff --git a/rdiff-backup/rdiff_backup/header.py b/rdiff-backup/rdiff_backup/header.py index 91ef2d2..8d54052 100644 --- a/rdiff-backup/rdiff_backup/header.py +++ b/rdiff-backup/rdiff_backup/header.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # # rdiff-backup -- Mirror files while keeping incremental changes -# Version 0.7.1 released March 25, 2002 +# Version 0.7.2 released April 30, 2002 # Copyright (C) 2001, 2002 Ben Escoto # # This program is licensed under the GNU General Public License (GPL). diff --git a/rdiff-backup/rdiff_backup/selection.py b/rdiff-backup/rdiff_backup/selection.py index 75d55ce..9f41860 100644 --- a/rdiff-backup/rdiff_backup/selection.py +++ b/rdiff-backup/rdiff_backup/selection.py @@ -1,4 +1,6 @@ +from __future__ import generators execfile("destructive_stepping.py") +import re ####################################################################### # @@ -47,14 +49,23 @@ class Select: be redundant and presumably isn't what the user intends. """ - def __init__(self, dsrpath, starting_index = None): - """DSRPIterator initializer. dsrpath should be the root dir""" + # This re should not match normal filenames, but usually just globs + glob_re = re.compile(".*[\*\?\[]") + + def __init__(self, dsrpath): + """DSRPIterator initializer""" self.selection_functions = [] - if starting_index: - self.iter = self.iterate_starting_from(dsrpath, starting_index, - self.iterate_starting_from) - else: self.iter = self.Iterate(dsrpath, self.Iterate) + self.dsrpath = dsrpath + self.prefix = dsrpath.path + + def set_iter(self, starting_index = None): + """Initialize more variables. dsrpath should be the root dir""" + if starting_index is not None: + self.iter = self.iterate_starting_from(self.dsrpath, + starting_index, self.iterate_starting_from) + else: self.iter = self.Iterate(self.dsrpath, self.Iterate) self.next = self.iter.next + self.__iter__ = lambda: self def Iterate(self, dsrpath, rec_func): """Return iterator yielding dsrps in dsrpath @@ -88,7 +99,7 @@ class Select: """Like Iterate, but only yield indicies > self.starting_index""" if dsrpath.index > self.starting_index: # past starting_index for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp - elif dsrpath.index = self.starting_index[:len(dsrpath.index)]: + elif dsrpath.index == self.starting_index[:len(dsrpath.index)]: # May encounter starting index on this branch for dsrp in self.Iterate(dsrpath, self.iterate_starting_from): yield dsrp @@ -100,50 +111,99 @@ class Select: if result is not None: return result return 1 - def add_selection_func(self, sel_func): - """Add another selection function at the end""" - self.selection_functions.append(sel_func) + def ParseArgs(self, argtuples): + """Create selection functions based on list of tuples + + The tuples have the form (option string, additional argument) + and are created when the initial commandline arguments are + read. The reason for the extra level of processing is that + the filelists may only be openable by the main connection, but + the selection functions need to be on the backup reader or + writer side. When the initial arguments are parsed the right + information is sent over the link. + + """ + for opt, arg in argtuples: + if opt == "--exclude": + self.add_selection_func(self.glob_get_sf(arg, 0)) + elif opt == "--exclude-device-files": + self.add_selection_func(self.devfiles_get_sf()) + elif opt == "--exclude-filelist": + self.add_selection_func(self.filelist_get_sf(arg[1], + 0, arg[0])) + elif opt == "--exclude-regexp": + self.add_selection_func(self.regexp_get_sf(arg, 0)) + elif opt == "--include": + self.add_selection_func(self.glob_get_sf(arg, 1)) + elif opt == "--include-filelist": + self.add_selection_func(self.filelist_get_sf(arg[1], + 1, arg[0])) + elif opt == "--include-regexp": + self.add_selection_func(self.regexp_get_sf(arg, 1)) + else: assert 0, "Bad option %s" % opt + + # Exclude rdiff-backup-data directory + self.add_selection_func( + self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1) + + def add_selection_func(self, sel_func, add_to_start = None): + """Add another selection function at the end or beginning""" + if add_to_start: self.selection_functions.insert(0, sel_func) + else: self.selection_functions.append(sel_func) - def filelist_add_sf(self, filelist_fp, include, filelist_name): - """Adds selection function by reading list of files + def filelist_get_sf(self, filelist_fp, inc_default, filelist_name): + """Return selection function by reading list of files The format of the filelist is documented in the man page. filelist_fp should be an (open) file object. - include should be true if this is an include list, false for - an exclude list. + inc_default should be true if this is an include list, + false for an exclude list. filelist_name is just a string used for logging. """ Log("Reading filelist %s" % filelist_name, 4) tuple_list, something_excluded = \ - self.filelist_read(filelist_fp, include, filelist_name) + self.filelist_read(filelist_fp, inc_default, filelist_name) Log("Sorting filelist %s" % filelist_name, 4) tuple_list.sort() - current_index = 0 + i = [0] # We have to put index in list because of stupid scoping rules + def selection_function(dsrp): - - + if i[0] > len(tuple_list): return inc_default + while 1: + include, move_on = \ + self.filelist_pair_match(dsrp, tuple_list[i[0]]) + if move_on: + i[0] += 1 + if include is None: continue # later line may match + return include + + selection_function.exclude = something_excluded + selection_function.name = "Filelist: " + filelist_name + return selection_function + def filelist_read(self, filelist_fp, include, filelist_name): """Read filelist from fp, return (tuplelist, something_excluded)""" something_excluded, tuple_list = None, [] prefix_warnings = 0 - while 1: - line = filelist_fp.readline() - if not line: break + for line in filelist_fp: + if not line.strip(): continue # skip blanks try: tuple = self.filelist_parse_line(line, include) except FilePrefixError, exp: prefix_warnings += 1 if prefix_warnings < 6: Log("Warning: file specification %s in filelist %s\n" "doesn't start with correct prefix %s, ignoring." % - (exp[0], filelist_name, exp[1]), 2) + (exp, filelist_name, self.prefix), 2) if prefix_warnings == 5: Log("Future prefix errors will not be logged.", 2) tuple_list.append(tuple) if not tuple[1]: something_excluded = 1 + if filelist_fp.close(): + Log("Error closing filelist %s" % filelist_name, 2) return (tuple_list, something_excluded) - def filelist_parse_line(self, line, include, prefix): + def filelist_parse_line(self, line, include): """Parse a single line of a filelist, returning a pair pair will be of form (index, include), where index is another @@ -160,17 +220,110 @@ class Select: include = 0 line = line[2:] - if not line.startswith(prefix+"/"): - raise FilePrefixError(line, prefix+"/") - index = filter(lambda x: x, line.split("/")) # remove empties + if not line.startswith(self.prefix): raise FilePrefixError(line) + line = line[len(self.prefix):] # Discard prefix + index = tuple(filter(lambda x: x, line.split("/"))) # remove empties return (index, include) def filelist_pair_match(self, dsrp, pair): - """Return 0/1 if pair excludes/includes dsrp, None if doesn't match""" + """Matches a filelist tuple against a dsrp + + Returns a pair (include, move_on, definitive). include is + None if the tuple doesn't match either way, and 0/1 if the + tuple excludes or includes the dsrp. + + move_on is true if the tuple cannot match a later index, and + so we should move on to the next tuple in the index. + + """ index, include = pair + if include == 1: + if index < dsrp.index: return (None, 1) + if index == dsrp.index: return (1, 1) + elif index[:len(dsrp.index)] == dsrp.index: + return (1, None) # /foo/bar implicitly includes /foo + else: return (None, None) # dsrp greater, not initial sequence + elif include == 0: + if dsrp.index[:len(index)] == index: + return (0, None) # /foo implicitly excludes /foo/bar + elif index < dsrp.index: return (None, 1) + else: return (None, None) # dsrp greater, not initial sequence + else: assert 0, "Include is %s, should be 0 or 1" % (include,) + + def regexp_get_sf(self, regexp_string, include): + """Return selection function given by regexp_string""" + assert include == 0 or include == 1 + try: regexp = re.compile(regexp_string) + except: + Log("Error compiling regular expression %s" % regexp_string, 1) + raise + + def sel_func(dsrp): + match = regexp.match(dsrp.path) + if match and match.end(0) == len(dsrp.path): return include + else: return None + + sel_func.exclude = not include + sel_func.name = "Regular expression: %s" % regexp_string + return sel_func + + def devfiles_get_sf(self): + """Return a selection function to exclude all dev files""" + if self.selection_functions: + Log("Warning: exclude-device-files is not the first " + "selector.\nThis may not be what you intended", 3) + def sel_func(dsrp): + if dsrp.isdev(): return 0 + else: return None + sel_func.exclude = 1 + sel_func.name = "Exclude device files" + return sel_func + + def glob_get_sf(self, glob_str, include): + """Return selection function given by glob string""" assert include == 0 or include == 1 - if not include and dsrp.index[:len(index)] == index: - return 0 # /foo matches /foo/bar/baz - elif include and index[:len(dsrp.index)] == dsrp.index: - return 1 # /foo/bar implicitly matches /foo for includes only - else: return None + if glob_str == "**": sel_func = lambda dsrp: include + elif not self.glob_re.match(glob_str): # normal file + return self.glob_get_filename_sf(glob_str, include) + else: pass ####XXXXXXXXXXXXX + + sel_func.exclude = not include + sel_func.name = "Command-line glob: %s" % glob_str + return sel_func + + def glob_get_filename_sf(self, filename, include): + """Get a selection function given a normal filename + + Some of the parsing is better explained in + filelist_parse_line. The reason this is split from normal + globbing is so we can check the prefix and give proper + warning. + + """ + if not filename.startswith(self.prefix): + Log("Warning: file specification %s does not start with\n" + "prefix %s, ignoring" % (filename, self.prefix), 2) + return lambda x: None # dummy selection function + index = tuple(filter(lambda x: x, + filename[len(self.prefix):].split("/"))) + return self.glob_get_tuple_sf(index, include) + + def glob_get_tuple_sf(self, tuple, include): + """Add selection function based on tuple""" + def include_sel_func(dsrp): + if (dsrp.index == tuple[:len(dsrp.index)] or + dsrp.index[:len(tuple)] == tuple): + return 1 # /foo/bar implicitly matches /foo, vice-versa + else: return None + + def exclude_sel_func(dsrp): + if dsrp.index[:len(tuple)] == tuple: + return 0 # /foo excludes /foo/bar, not vice-versa + else: return None + + if include == 1: sel_func = include_sel_func + elif include == 0: sel_func = exclude_sel_func + sel_func.exclude = not include + sel_func.name = "Tuple select %s" % (tuple,) + return sel_func + diff --git a/rdiff-backup/src/globals.py b/rdiff-backup/src/globals.py index 3987f73..fceefa4 100644 --- a/rdiff-backup/src/globals.py +++ b/rdiff-backup/src/globals.py @@ -8,7 +8,7 @@ import re, os class Globals: # The current version of rdiff-backup - version = "0.7.1" + version = "0.7.2" # If this is set, use this value in seconds as the current time # instead of reading it from the clock. @@ -45,26 +45,9 @@ class Globals: # If true, try to reset the atimes of the source partition. preserve_atime = None - # This is a list of compiled regular expressions. If one of them - # matches a file in the source area, do not process that file. - exclude_regexps = [] - - # Another list of compiled regexps; this time the file is excluded - # if it matches something in the destination area. - exclude_mirror_regexps = [] - - # If this is true, rdiff-backup will exclude any dev files it - # sees, in the same way it excludes files matching the exclude - # regexps. - exclude_device_files = None - # This will be set as soon as the LocalConnection class loads local_connection = None - # If this is true, instead of processing whole directory, just - # examine files read in from standard input. - include_from_stdin = None - # All connections should be added to the following list, so # further global changes can be propagated to the remote systems. # The first element should be Globals.local_connection. For a @@ -138,12 +121,16 @@ class Globals: # Increments based on files whose names match this # case-insensitive regular expression won't be compressed (applies - # to .snapshots and .diffs). The second below is the compiled - # version of the first. + # to .snapshots and .diffs). The second below will be the + # compiled version of the first. no_compression_regexp_string = ".*\\.(gz|z|bz|bz2|tgz|zip|rpm|deb|" \ "jpg|gif|png|mp3|ogg|avi|wmv|mpeg|mpg|rm|mov)$" no_compression_regexp = None + # On the reader and writer connections, the following will be + # replaced by the source and mirror Select objects respectively. + select_source, select_mirror = None, None + def get(cls, name): """Return the value of something in this class""" return cls.__dict__[name] @@ -181,19 +168,6 @@ class Globals: cls.__dict__[name][key] = val set_dict_val = classmethod(set_dict_val) - def add_regexp(cls, regstr, mirror=None): - """Add a regular expression to the exclude list""" - for conn in Globals.connections: - conn.Globals.add_regexp_local(regstr, mirror) - add_regexp = classmethod(add_regexp) - - def add_regexp_local(cls, regstr, mirror): - """Add the regex only to the local Globals class""" - compiled = re.compile(regstr) - if mirror: Globals.exclude_mirror_regexps.append(compiled) - else: Globals.exclude_regexps.append(compiled) - add_regexp_local = classmethod(add_regexp_local) - def postset_regexp(cls, name, re_string, flags = None): """Compile re_string on all existing connections, set to name""" for conn in Globals.connections: @@ -205,3 +179,13 @@ class Globals: if flags: cls.__dict__[name] = re.compile(re_string, flags) else: cls.__dict__[name] = re.compile(re_string) postset_regexp_local = classmethod(postset_regexp_local) + + def set_select(cls, source, dsrpath, tuplelist): + """Initialize select object using tuplelist""" + if source: + cls.select_source = Select(dsrpath) + cls.select_source.ParseArgs(tuplelist) + else: + cls.select_mirror = Select(dsrpath) + cls.select_mirror.ParseArgs(tuplelist) + set_select = classmethod(set_select) diff --git a/rdiff-backup/src/header.py b/rdiff-backup/src/header.py index 91ef2d2..8d54052 100644 --- a/rdiff-backup/src/header.py +++ b/rdiff-backup/src/header.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # # rdiff-backup -- Mirror files while keeping incremental changes -# Version 0.7.1 released March 25, 2002 +# Version 0.7.2 released April 30, 2002 # Copyright (C) 2001, 2002 Ben Escoto # # This program is licensed under the GNU General Public License (GPL). diff --git a/rdiff-backup/src/main.py b/rdiff-backup/src/main.py index 0558302..9a0d901 100755 --- a/rdiff-backup/src/main.py +++ b/rdiff-backup/src/main.py @@ -13,22 +13,29 @@ class Main: self.action = None self.remote_cmd, self.remote_schema = None, None self.force = None - self.exclude_regstrs = ["/proc"] - self.exclude_mirror_regstrs = [] + self.select_opts, self.select_mirror_opts = [], [] def parse_cmdlineoptions(self): """Parse argument list and set global preferences""" - try: optlist, self.args = getopt.getopt(sys.argv[1:], "blmv:Vs", - ["backup-mode", "version", "verbosity=", "exclude=", - "exclude-mirror=", "server", "test-server", - "remote-cmd=", "mirror-only", "force", - "change-source-perms", "list-increments", - "remove-older-than=", "remote-schema=", - "include-from-stdin", "terminal-verbosity=", - "exclude-device-files", "resume", "no-resume", - "resume-window=", "windows-time-format", - "checkpoint-interval=", "no-hard-links", "current-time=", - "no-compression", "no-compression-regexp="]) + def sel_fl(filename): + """Helper function for including/excluding filelists below""" + try: return open(filename, "r") + except IOError: Log.FatalError("Error opening file %s" % filename) + + try: optlist, self.args = getopt.getopt(sys.argv[1:], "blmsv:V", + ["backup-mode", "change-source-perms", + "checkpoint-interval=", "current-time=", "exclude=", + "exclude-device-files", "exclude-filelist=", + "exclude-filelist-stdin", "exclude-mirror=", + "exclude-regexp=", "force", "include=", + "include-filelist=", "include-filelist-stdin", + "include-regexp=", "list-increments", "mirror-only", + "no-compression", "no-compression-regexp=", + "no-hard-links", "no-resume", "remote-cmd=", + "remote-schema=", "remove-older-than=", "resume", + "resume-window=", "server", "terminal-verbosity=", + "test-server", "verbosity", "version", + "windows-time-format"]) except getopt.error: self.commandline_error("Error parsing commandline options") @@ -40,13 +47,24 @@ class Main: Globals.set_integer('checkpoint_interval', arg) elif opt == "--current-time": Globals.set_integer('current_time', arg) - elif opt == "--exclude": self.exclude_regstrs.append(arg) + elif opt == "--exclude": self.select_opts.append((opt, arg)) elif opt == "--exclude-device-files": - Globals.set('exclude_device_files', 1) + self.select_opts.append((opt, arg)) + elif opt == "--exclude-filelist": + self.select_opts.append((opt, (arg, sel_fl(arg)))) + elif opt == "--exclude-filelist-stdin": + self.select_opts.append((opt, ("standard input", sys.stdin))) elif opt == "--exclude-mirror": - self.exclude_mirror_regstrs.append(arg) + self.select_mirror_opts.append(("--exclude", arg)) + elif opt == "--exclude-regexp": self.select_opts.append((opt, arg)) elif opt == "--force": self.force = 1 - elif opt == "--include-from-stdin": Globals.include_from_stdin = 1 + elif opt == "--include": self.select_opts.append((opt, arg)) + elif opt == "--include-filelist": + self.select_opts.append((opt, (arg, sel_fl(arg)))) + elif opt == "--include-filelist-stdin": + self.select_opts.append((opt, ("standard input", sys.stdin))) + elif opt == "--include-regexp": + self.select_opts.append((opt, arg)) elif opt == "-l" or opt == "--list-increments": self.action = "list-increments" elif opt == "-m" or opt == "--mirror-only": self.action = "mirror" @@ -116,10 +134,10 @@ class Main: for rp in rps: rp.setdata() # Update with userinfo os.umask(077) - for regex_string in self.exclude_regstrs: - Globals.add_regexp(regex_string, None) - for regex_string in self.exclude_mirror_regstrs: - Globals.add_regexp(regex_string, 1) + rps[0].conn.Globals.set_select(1, rps[0], self.select_opts) + if len(rps) == 2: + rps[1].conn.Globals.set_select(None, rps[1], + self.select_mirror_opts) Globals.postset_regexp('no_compression_regexp', Globals.no_compression_regexp_string, re.I) @@ -217,9 +235,6 @@ rdiff-backup with the --force option.""" % rpout.path) except os.error: Log.FatalError("Unable to create directory %s" % rpout.path) if not self.datadir.lstat(): self.datadir.mkdir() - Globals.add_regexp(self.datadir.path, 1) - Globals.add_regexp(rpin.append("rdiff-backup-data").path, None) - if Log.verbosity > 0: Log.open_logfile(self.datadir.append("backup.log")) self.backup_warn_if_infinite_regress(rpin, rpout) @@ -334,7 +349,6 @@ Try restoring from an increment file (the filenames look like else: Log.FatalError("Unable to find rdiff-backup-data dir") Globals.rbdir = self.datadir = datadirrp - Globals.add_regexp(self.datadir.path, 1) rootrp = RPath(rpin.conn, "/".join(pathcomps[:i])) if not rootrp.lstat(): Log.FatalError("Root of mirror area %s does not exist" % diff --git a/rdiff-backup/src/selection.py b/rdiff-backup/src/selection.py index 75d55ce..9f41860 100644 --- a/rdiff-backup/src/selection.py +++ b/rdiff-backup/src/selection.py @@ -1,4 +1,6 @@ +from __future__ import generators execfile("destructive_stepping.py") +import re ####################################################################### # @@ -47,14 +49,23 @@ class Select: be redundant and presumably isn't what the user intends. """ - def __init__(self, dsrpath, starting_index = None): - """DSRPIterator initializer. dsrpath should be the root dir""" + # This re should not match normal filenames, but usually just globs + glob_re = re.compile(".*[\*\?\[]") + + def __init__(self, dsrpath): + """DSRPIterator initializer""" self.selection_functions = [] - if starting_index: - self.iter = self.iterate_starting_from(dsrpath, starting_index, - self.iterate_starting_from) - else: self.iter = self.Iterate(dsrpath, self.Iterate) + self.dsrpath = dsrpath + self.prefix = dsrpath.path + + def set_iter(self, starting_index = None): + """Initialize more variables. dsrpath should be the root dir""" + if starting_index is not None: + self.iter = self.iterate_starting_from(self.dsrpath, + starting_index, self.iterate_starting_from) + else: self.iter = self.Iterate(self.dsrpath, self.Iterate) self.next = self.iter.next + self.__iter__ = lambda: self def Iterate(self, dsrpath, rec_func): """Return iterator yielding dsrps in dsrpath @@ -88,7 +99,7 @@ class Select: """Like Iterate, but only yield indicies > self.starting_index""" if dsrpath.index > self.starting_index: # past starting_index for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp - elif dsrpath.index = self.starting_index[:len(dsrpath.index)]: + elif dsrpath.index == self.starting_index[:len(dsrpath.index)]: # May encounter starting index on this branch for dsrp in self.Iterate(dsrpath, self.iterate_starting_from): yield dsrp @@ -100,50 +111,99 @@ class Select: if result is not None: return result return 1 - def add_selection_func(self, sel_func): - """Add another selection function at the end""" - self.selection_functions.append(sel_func) + def ParseArgs(self, argtuples): + """Create selection functions based on list of tuples + + The tuples have the form (option string, additional argument) + and are created when the initial commandline arguments are + read. The reason for the extra level of processing is that + the filelists may only be openable by the main connection, but + the selection functions need to be on the backup reader or + writer side. When the initial arguments are parsed the right + information is sent over the link. + + """ + for opt, arg in argtuples: + if opt == "--exclude": + self.add_selection_func(self.glob_get_sf(arg, 0)) + elif opt == "--exclude-device-files": + self.add_selection_func(self.devfiles_get_sf()) + elif opt == "--exclude-filelist": + self.add_selection_func(self.filelist_get_sf(arg[1], + 0, arg[0])) + elif opt == "--exclude-regexp": + self.add_selection_func(self.regexp_get_sf(arg, 0)) + elif opt == "--include": + self.add_selection_func(self.glob_get_sf(arg, 1)) + elif opt == "--include-filelist": + self.add_selection_func(self.filelist_get_sf(arg[1], + 1, arg[0])) + elif opt == "--include-regexp": + self.add_selection_func(self.regexp_get_sf(arg, 1)) + else: assert 0, "Bad option %s" % opt + + # Exclude rdiff-backup-data directory + self.add_selection_func( + self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1) + + def add_selection_func(self, sel_func, add_to_start = None): + """Add another selection function at the end or beginning""" + if add_to_start: self.selection_functions.insert(0, sel_func) + else: self.selection_functions.append(sel_func) - def filelist_add_sf(self, filelist_fp, include, filelist_name): - """Adds selection function by reading list of files + def filelist_get_sf(self, filelist_fp, inc_default, filelist_name): + """Return selection function by reading list of files The format of the filelist is documented in the man page. filelist_fp should be an (open) file object. - include should be true if this is an include list, false for - an exclude list. + inc_default should be true if this is an include list, + false for an exclude list. filelist_name is just a string used for logging. """ Log("Reading filelist %s" % filelist_name, 4) tuple_list, something_excluded = \ - self.filelist_read(filelist_fp, include, filelist_name) + self.filelist_read(filelist_fp, inc_default, filelist_name) Log("Sorting filelist %s" % filelist_name, 4) tuple_list.sort() - current_index = 0 + i = [0] # We have to put index in list because of stupid scoping rules + def selection_function(dsrp): - - + if i[0] > len(tuple_list): return inc_default + while 1: + include, move_on = \ + self.filelist_pair_match(dsrp, tuple_list[i[0]]) + if move_on: + i[0] += 1 + if include is None: continue # later line may match + return include + + selection_function.exclude = something_excluded + selection_function.name = "Filelist: " + filelist_name + return selection_function + def filelist_read(self, filelist_fp, include, filelist_name): """Read filelist from fp, return (tuplelist, something_excluded)""" something_excluded, tuple_list = None, [] prefix_warnings = 0 - while 1: - line = filelist_fp.readline() - if not line: break + for line in filelist_fp: + if not line.strip(): continue # skip blanks try: tuple = self.filelist_parse_line(line, include) except FilePrefixError, exp: prefix_warnings += 1 if prefix_warnings < 6: Log("Warning: file specification %s in filelist %s\n" "doesn't start with correct prefix %s, ignoring." % - (exp[0], filelist_name, exp[1]), 2) + (exp, filelist_name, self.prefix), 2) if prefix_warnings == 5: Log("Future prefix errors will not be logged.", 2) tuple_list.append(tuple) if not tuple[1]: something_excluded = 1 + if filelist_fp.close(): + Log("Error closing filelist %s" % filelist_name, 2) return (tuple_list, something_excluded) - def filelist_parse_line(self, line, include, prefix): + def filelist_parse_line(self, line, include): """Parse a single line of a filelist, returning a pair pair will be of form (index, include), where index is another @@ -160,17 +220,110 @@ class Select: include = 0 line = line[2:] - if not line.startswith(prefix+"/"): - raise FilePrefixError(line, prefix+"/") - index = filter(lambda x: x, line.split("/")) # remove empties + if not line.startswith(self.prefix): raise FilePrefixError(line) + line = line[len(self.prefix):] # Discard prefix + index = tuple(filter(lambda x: x, line.split("/"))) # remove empties return (index, include) def filelist_pair_match(self, dsrp, pair): - """Return 0/1 if pair excludes/includes dsrp, None if doesn't match""" + """Matches a filelist tuple against a dsrp + + Returns a pair (include, move_on, definitive). include is + None if the tuple doesn't match either way, and 0/1 if the + tuple excludes or includes the dsrp. + + move_on is true if the tuple cannot match a later index, and + so we should move on to the next tuple in the index. + + """ index, include = pair + if include == 1: + if index < dsrp.index: return (None, 1) + if index == dsrp.index: return (1, 1) + elif index[:len(dsrp.index)] == dsrp.index: + return (1, None) # /foo/bar implicitly includes /foo + else: return (None, None) # dsrp greater, not initial sequence + elif include == 0: + if dsrp.index[:len(index)] == index: + return (0, None) # /foo implicitly excludes /foo/bar + elif index < dsrp.index: return (None, 1) + else: return (None, None) # dsrp greater, not initial sequence + else: assert 0, "Include is %s, should be 0 or 1" % (include,) + + def regexp_get_sf(self, regexp_string, include): + """Return selection function given by regexp_string""" + assert include == 0 or include == 1 + try: regexp = re.compile(regexp_string) + except: + Log("Error compiling regular expression %s" % regexp_string, 1) + raise + + def sel_func(dsrp): + match = regexp.match(dsrp.path) + if match and match.end(0) == len(dsrp.path): return include + else: return None + + sel_func.exclude = not include + sel_func.name = "Regular expression: %s" % regexp_string + return sel_func + + def devfiles_get_sf(self): + """Return a selection function to exclude all dev files""" + if self.selection_functions: + Log("Warning: exclude-device-files is not the first " + "selector.\nThis may not be what you intended", 3) + def sel_func(dsrp): + if dsrp.isdev(): return 0 + else: return None + sel_func.exclude = 1 + sel_func.name = "Exclude device files" + return sel_func + + def glob_get_sf(self, glob_str, include): + """Return selection function given by glob string""" assert include == 0 or include == 1 - if not include and dsrp.index[:len(index)] == index: - return 0 # /foo matches /foo/bar/baz - elif include and index[:len(dsrp.index)] == dsrp.index: - return 1 # /foo/bar implicitly matches /foo for includes only - else: return None + if glob_str == "**": sel_func = lambda dsrp: include + elif not self.glob_re.match(glob_str): # normal file + return self.glob_get_filename_sf(glob_str, include) + else: pass ####XXXXXXXXXXXXX + + sel_func.exclude = not include + sel_func.name = "Command-line glob: %s" % glob_str + return sel_func + + def glob_get_filename_sf(self, filename, include): + """Get a selection function given a normal filename + + Some of the parsing is better explained in + filelist_parse_line. The reason this is split from normal + globbing is so we can check the prefix and give proper + warning. + + """ + if not filename.startswith(self.prefix): + Log("Warning: file specification %s does not start with\n" + "prefix %s, ignoring" % (filename, self.prefix), 2) + return lambda x: None # dummy selection function + index = tuple(filter(lambda x: x, + filename[len(self.prefix):].split("/"))) + return self.glob_get_tuple_sf(index, include) + + def glob_get_tuple_sf(self, tuple, include): + """Add selection function based on tuple""" + def include_sel_func(dsrp): + if (dsrp.index == tuple[:len(dsrp.index)] or + dsrp.index[:len(tuple)] == tuple): + return 1 # /foo/bar implicitly matches /foo, vice-versa + else: return None + + def exclude_sel_func(dsrp): + if dsrp.index[:len(tuple)] == tuple: + return 0 # /foo excludes /foo/bar, not vice-versa + else: return None + + if include == 1: sel_func = include_sel_func + elif include == 0: sel_func = exclude_sel_func + sel_func.exclude = not include + sel_func.name = "Tuple select %s" % (tuple,) + return sel_func + -- cgit v1.2.1