From 11ec1472b7b88996c8da45a1d737e2e59e554de0 Mon Sep 17 00:00:00 2001 From: bescoto Date: Thu, 20 Oct 2005 19:34:51 +0000 Subject: Try read/write fsync for cygwin, also add --preserve-numerical-ids git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@638 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/CHANGELOG | 11 ++ rdiff-backup/rdiff-backup.1 | 58 +++++++-- rdiff-backup/rdiff_backup/Main.py | 21 +++- rdiff-backup/rdiff_backup/rpath.py | 41 +++++-- rdiff-backup/rdiff_backup/user_group.py | 202 ++++++++++++++++++-------------- rdiff-backup/testing/roottest.py | 38 +++++- rdiff-backup/testing/user_grouptest.py | 39 ++++-- 7 files changed, 284 insertions(+), 126 deletions(-) diff --git a/rdiff-backup/CHANGELOG b/rdiff-backup/CHANGELOG index 5233609..bde1767 100644 --- a/rdiff-backup/CHANGELOG +++ b/rdiff-backup/CHANGELOG @@ -1,3 +1,14 @@ +New in v1.1.0 (????/??/??) +-------------------------- + +When possible, fsync using a writable file descriptor. This may help +with cygwin. (Requested/tested by Dave Kempe.) + +Support req 104755: Added --preserve-numerical-ids option, which makes +rdiff-backup preserve uids/gids instead of unames/gnames. (Suggested +by Wiebe Cazemier) + + New in v1.0.2 (????/??/??) -------------------------- diff --git a/rdiff-backup/rdiff-backup.1 b/rdiff-backup/rdiff-backup.1 index f338777..019fa80 100644 --- a/rdiff-backup/rdiff-backup.1 +++ b/rdiff-backup/rdiff-backup.1 @@ -301,6 +301,12 @@ or .B --list-increments switches, where the time will be given in seconds since the epoch. .TP +.B --preserve-numerical-ids +If set, rdiff-backup will preserve uids/gids instead of trying to +preserve unames and gnames. See the +.B USERS and GROUPS +section for more information. +.TP .B --print-statistics If set, summary statistics will be printed after a successful backup If not set, this information will still be available from the @@ -817,27 +823,35 @@ if /home/ben/1234567 existed. There can be complications preserving ownership across systems. For instance the username that owns a file on the source system may not exist on the destination. Here is how rdiff-backup maps ownership on -the source to the destination: +the source to the destination (or vice-versa, in the case of restoring): .TP .B 1. -Attempt to preserve the user and group names for ownership and in -ACLs. This may result in files having different uids and gids across -systems. +If the --preserve-numerical-ids option is given, the remote files will +always have the same uid and gid, both for ownership and ACL entries. +This may cause unames and gnames to change. .TP .B 2. -If this fails (e.g. because the username does not exist), preserve the -original id, but only in cases of user and group ownership. For ACLs, -omit any entry that has a bad user or group name. +Otherwise, attempt to preserve the user and group names for ownership +and in ACLs. This may result in files having different uids and gids +across systems. .TP .B 3. -However, the +If a name cannot be preserved (e.g. because the username does not +exist), preserve the original id, but only in cases of user and group +ownership. For ACLs, omit any entry that has a bad user or group +name. +.TP +.B 4. +The .B --user-mapping-file and .B --group-mapping-file -options can override this behavior. If either of these options is -given, the policy descriped in 1 and 2 above will be followed, but -with the mapped user and group instead of the original. +options override this behavior. If either of these options is given, +the policy descriped in 2 and 3 above will be followed, but with the +mapped user and group instead of the original. If you specify both +.B --preserve-numerical-ids +and one of the mapping options, the behavior is undefined. .RE The user and group mapping files both have the same form: @@ -856,6 +870,28 @@ Each line should contain a name or id, followed by a colon ":", followed by another name or id. If a name or id is not listed, they are treated in the default way described above. +When restoring, the above behavior is also followed, but note that the +original source user/group information will be the input, not the +already mapped user/group information present in the backup +repository. For instance, suppose you have mapped all the files owned +by +.I alice +in the source so that they are owned by +.I ben +in the repository, and now you want to restore, making sure the files owned originally by +.I alice +are still owned by +.IR alice . +In this case there is no need to use any of the mapping options. +However, if you wanted to restore the files so that the files +originally owned by +.I alice +on the source are now owned by +.IR ben , +you would have to use the mapping options, even though you just want +the unames of the repository's files preserved in the restored files. + + .SH STATISTICS Every session rdiff-backup saves various statistics into two files, the session statistics file at diff --git a/rdiff-backup/rdiff_backup/Main.py b/rdiff-backup/rdiff_backup/Main.py index 62ce7fc..4a6450d 100644 --- a/rdiff-backup/rdiff_backup/Main.py +++ b/rdiff-backup/rdiff_backup/Main.py @@ -33,7 +33,9 @@ remote_cmd, remote_schema = None, None force = None select_opts = [] select_files = [] -user_mapping_filename, group_mapping_filename = None, None +user_mapping_filename, group_mapping_filename, preserve_numerical_ids = \ + None, None, None + # These are global because they are set while we are trying to figure # whether to restore or to backup restore_root, restore_index, restore_root_set = None, None, 0 @@ -43,7 +45,9 @@ def parse_cmdlineoptions(arglist): """Parse argument list and set global preferences""" global args, action, create_full_path, force, restore_timestr, remote_cmd global remote_schema, remove_older_than_string - global user_mapping_filename, group_mapping_filename + global user_mapping_filename, group_mapping_filename, \ + preserve_numerical_ids + def sel_fl(filename): """Helper function for including/excluding filelists below""" try: return open(filename, "r") @@ -73,7 +77,8 @@ def parse_cmdlineoptions(arglist): "no-compression-regexp=", "no-eas", "no-file-statistics", "no-hard-links", "null-separator", "override-chars-to-quote=", "parsable-output", - "print-statistics", "remote-cmd=", "remote-schema=", + "preserve-numerical-ids", "print-statistics", + "remote-cmd=", "remote-schema=", "remove-older-than=", "restore-as-of=", "restrict=", "restrict-read-only=", "restrict-update-only=", "server", "ssh-no-compression", "terminal-verbosity=", "test-server", @@ -156,6 +161,7 @@ def parse_cmdlineoptions(arglist): elif opt == "--override-chars-to-quote": Globals.set('chars_to_quote', arg) elif opt == "--parsable-output": Globals.set('parsable_output', 1) + elif opt == "--preserve-numerical-ids": preserve_numerical_ids = 1 elif opt == "--print-statistics": Globals.set('print_statistics', 1) elif opt == "-r" or opt == "--restore-as-of": restore_timestr, action = arg, "restore-as-of" @@ -233,7 +239,8 @@ def misc_setup(rps): def init_user_group_mapping(destination_conn): """Initialize user and group mapping on destination connection""" - global user_mapping_filename, group_mapping_filename + global user_mapping_filename, group_mapping_filename, \ + preserve_numerical_ids def get_string_from_file(filename): if not filename: return None rp = rpath.RPath(Globals.local_connection, filename) @@ -242,9 +249,11 @@ def init_user_group_mapping(destination_conn): Log.FatalError("Error '%s' reading mapping file '%s'" % (str(e), filename)) user_mapping_string = get_string_from_file(user_mapping_filename) - destination_conn.user_group.init_user_mapping(user_mapping_string) + destination_conn.user_group.init_user_mapping(user_mapping_string, + preserve_numerical_ids) group_mapping_string = get_string_from_file(group_mapping_filename) - destination_conn.user_group.init_group_mapping(group_mapping_string) + destination_conn.user_group.init_group_mapping(group_mapping_string, + preserve_numerical_ids) def take_action(rps): """Do whatever action says""" diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py index 95dd54f..8213a23 100644 --- a/rdiff-backup/rdiff_backup/rpath.py +++ b/rdiff-backup/rdiff_backup/rpath.py @@ -152,7 +152,8 @@ def copy_attribs(rpin, rpout): """ log.Log("Copying attributes from %s to %s" % (rpin.index, rpout.path), 7) assert rpin.lstat() == rpout.lstat() or rpin.isspecial() - if Globals.change_ownership: rpout.chown(*user_group.map_rpath(rpin)) + if Globals.change_ownership: + rpout.chown(*rpout.conn.user_group.map_rpath(rpin)) if rpin.issym(): return # symlinks don't have times or perms if Globals.resource_forks_write and rpin.isreg(): rpout.write_resource_fork(rpin.get_resource_fork()) @@ -1068,12 +1069,33 @@ class RPath(RORPath): if not fp: self.conn.rpath.RPath.fsync_local(self) else: os.fsync(fp.fileno()) - def fsync_local(self): - """fsync current file, run locally""" + def fsync_local(self, thunk = None): + """fsync current file, run locally + + If thunk is given, run it before syncing but after gathering + the file's file descriptor. + + """ assert self.conn is Globals.local_connection - fd = os.open(self.path, os.O_RDONLY) - os.fsync(fd) - os.close(fd) + try: + fd = os.open(self.path, os.O_RDONLY) + os.fsync(fd) + os.close(fd) + except OSError, e: + if e.errno != errno.EPERM or self.isdir(): raise + + # Maybe the system doesn't like read-only fsyncing. + # However, to open RDWR, we may need to alter permissions + # temporarily. + if self.hasfullperms(): oldperms = None + else: + oldperms = self.getperms() + self.chmod(0700) + fd = os.open(self.path, os.O_RDWR) + if oldperms is not None: self.chmod(oldperms) + if thunk: thunk() + os.fsync(fd) # Sync after we switch back permissions! + os.close(fd) def fsync_with_dir(self, fp = None): """fsync self and directory self is under""" @@ -1087,11 +1109,7 @@ class RPath(RORPath): file and the directory to make sure. """ - if self.lstat() and not self.issym(): - fp = self.open("rb") - self.delete() - os.fsync(fp.fileno()) - assert not fp.close() + if self.lstat() and not self.issym(): self.fsync_local(self.delete) if Globals.fsync_directories: self.get_parent_rp().fsync() def get_data(self): @@ -1224,6 +1242,7 @@ def setdata_local(rpath): rpath.get_resource_fork() if Globals.carbonfile_conn and rpath.isreg(): rpath.get_carbonfile() + # These two are overwritten by the eas_acls.py module. We can't # import that module directly because of circular dependency problems. def acl_get(rp): assert 0 diff --git a/rdiff-backup/rdiff_backup/user_group.py b/rdiff-backup/rdiff_backup/user_group.py index 9209daf..186e1b6 100644 --- a/rdiff-backup/rdiff_backup/user_group.py +++ b/rdiff-backup/rdiff_backup/user_group.py @@ -25,13 +25,16 @@ this. On the destination connection only, if necessary have a separate dictionary of mappings, which specify how to map users/groups on one -connection to the users/groups on the other. +connection to the users/groups on the other. The UserMap and GroupMap +objects should only be used on the destination. """ import grp, pwd import log, Globals +############ "Private" section - don't use outside user_group ########### + # This should be set to the user UserMap class object if using # user-defined user mapping, and a Map class object otherwise. UserMap = None @@ -40,73 +43,55 @@ UserMap = None # user-defined group mapping, and a Map class object otherwise. GroupMap = None - +# Used to cache by uid2uname and gid2gname below uid2uname_dict = {}; gid2gname_dict = {} -def uid2uname(uid): - """Given uid, return uname or None if cannot find""" - try: return uid2uname_dict[uid] - except KeyError: - try: uname = pwd.getpwuid(uid)[0] - except (KeyError, OverflowError), e: uname = None - uid2uname_dict[uid] = uname - return uname - -def gid2gname(gid): - """Given gid, return group name or None if cannot find""" - try: return gid2gname_dict[gid] - except KeyError: - try: gname = grp.getgrgid(gid)[0] - except (KeyError, OverflowError), e: gname = None - gid2gname_dict[gid] = gname - return gname +uname2uid_dict = {} def uname2uid(uname): """Given uname, return uid or None if cannot find""" - try: uname = pwd.getpwnam(uname)[2] - except KeyError: return None + try: return uname2uid_dict[uname] + except KeyError: + try: uid = pwd.getpwnam(uname)[2] + except KeyError: uid = None + uname2uid_dict[uname] = uid + return uid +gname2gid_dict = {} def gname2gid(gname): """Given gname, return gid or None if cannot find""" - try: gname = grp.getgrnam(gname)[2] - except KeyError: return None + try: return gname2gid_dict[gname] + except KeyError: + try: gid = grp.getgrnam(gname)[2] + except KeyError: gid = None + gname2gid_dict[gname] = gid + return gid class Map: """Used for mapping names and id on source side to dest side""" - def __init__(self, name2id_func): - """Map initializer, set dictionaries""" - self.name2id_dict = {} - self.name2id_func = name2id_func + def __init__(self, is_user): + """Initialize, user is true for users, false for groups""" + self.name2id = (is_user and uname2uid) or gname2gid - def get_id(self, id, name = None): + def __call__(self, id, name = None): """Return mapped id from id and, if available, name""" - if not name: return self.get_id_from_id(id) - try: return self.name2id_dict[name] - except KeyError: - out_id = self.find_id(id, name) - self.name2id_dict[name] = out_id - return out_id - - def get_id_from_name(self, name): - """Return mapped id from name only, or None if cannot""" - try: return self.name2id_dict[name] - except KeyError: - out_id = self.find_id_from_name(name) - self.name2id_dict[name] = out_id - return out_id - - def get_id_from_id(self, id): return id - - def find_id(self, id, name): - """Find the proper id to use with given id and name""" - try: return self.name2id_func(name) - except KeyError: return id - - def find_id_from_name(self, name): - """Look up proper id to use with name, or None""" - try: return self.name2id_func(name) - except KeyError: return None - + if not name: return id + newid = self.name2id(name) + if newid is None: return id + else: return newid + + def map_acl(self, id, name = None): + """Like get_id, but use this for ACLs. Return id or None + + Unlike ordinary user/group ownership, ACLs are not required + and can be dropped. If we cannot map the name over, return + None. + + """ + if not name: return id + return self.name2id(name) + + class DefinedMap(Map): """Map names and ids on source side to appropriate ids on dest side @@ -114,7 +99,7 @@ class DefinedMap(Map): supersedes Map. """ - def __init__(self, name2id_func, mapping_string): + def __init__(self, is_user, mapping_string): """Initialize object with given mapping string The mapping_string should consist of a number of lines, each which @@ -122,7 +107,7 @@ class DefinedMap(Map): mapping unless user is false, then do group. """ - Map.__init__(self, name2id_func) + Map.__init__(self, is_user) self.name_mapping_dict = {}; self.id_mapping_dict = {} for line in mapping_string.split('\n'): @@ -142,44 +127,89 @@ class DefinedMap(Map): """Return id of id_or_name, failing if cannot. Used in __init__""" try: return int(id_or_name) except ValueError: - try: id = self.name2id_func(id_or_name) + try: return self.name2id(id_or_name) except KeyError: log.Log.FatalError("Cannot get id for user or group name " + id_or_name) - return id - def get_id_from_id(self, id): return self.id_mapping_dict.get(id, id) + def __call__(self, id, name = None): + """Return new id given old id and name""" + newid = self.map_acl(id, name) + if newid is None: return id + else: return newid + + def map_acl(self, id, name = None): + """Return new id or None given old and name (used for ACLs)""" + if name: + try: return self.name_mapping_dict[name] + except KeyError: pass + newid = self.name2id(name) + if newid is not None: return newid + try: return self.id_mapping_dict[id] + except KeyError: return None + + +class NumericalMap: + """Simple Map replacement that just keeps numerical uid or gid""" + def __call__(self, id, name = None): return id + def map_acl(self, id, name = None): return id + - def find_id(self, id, name): - """Find proper id to use when source file has give id and name""" - try: return self.name_mapping_dict[name] - except KeyError: - try: return self.id_mapping_dict[id] - except KeyError: return Map.find_id(self, id, name) +############ Public section - don't use outside user_group ########### - def find_id_from_name(self, name): - """Find id to map name to, or None if we can't""" - try: return self.name_mapping_dict[name] - except KeyError: return Map.find_id_from_name(name) -def init_user_mapping(mapping_string = None): - """Initialize user mapping with given mapping string or None""" +def uid2uname(uid): + """Given uid, return uname from passwd file, or None if cannot find""" + try: return uid2uname_dict[uid] + except KeyError: + try: uname = pwd.getpwuid(uid)[0] + except (KeyError, OverflowError), e: uname = None + uid2uname_dict[uid] = uname + return uname + +def gid2gname(gid): + """Given gid, return group name from group file or None if cannot find""" + try: return gid2gname_dict[gid] + except KeyError: + try: gname = grp.getgrgid(gid)[0] + except (KeyError, OverflowError), e: gname = None + gid2gname_dict[gid] = gname + return gname + + +def init_user_mapping(mapping_string = None, numerical_ids = None): + """Initialize user mapping with given mapping string + + If numerical_ids is set, just keep the same uid. If either + argument is None, default to preserving uname where possible. + + """ global UserMap - name2id_func = lambda name: pwd.getpwnam(name)[2] - if mapping_string: UserMap = DefinedMap(name2id_func, mapping_string) - else: UserMap = Map(name2id_func) + if numerical_ids: UserMap = NumericalMap() + elif mapping_string: UserMap = DefinedMap(1, mapping_string) + else: UserMap = Map(1) + +def init_group_mapping(mapping_string = None, numerical_ids = None): + """Initialize group mapping with given mapping string -def init_group_mapping(mapping_string = None): - """Initialize the group mapping dictionary with given mapping string""" + If numerical_ids is set, just keep the same gid. If either + argument is None, default to preserving gname where possible. + + """ global GroupMap - name2id_func = lambda name: grp.getgrnam(name)[2] - if mapping_string: GroupMap = DefinedMap(name2id_func, mapping_string) - else: GroupMap = Map(name2id_func) + if numerical_ids: GroupMap = NumericalMap() + elif mapping_string: GroupMap = DefinedMap(0, mapping_string) + else: GroupMap = Map(0) + - def map_rpath(rp): - """Return (uid, gid) of mapped ownership of given rpath""" - old_uid, old_gid = rp.getuidgid() - new_uid = UserMap.get_id(old_uid, rp.getuname()) - new_gid = GroupMap.get_id(old_gid, rp.getgname()) - return (new_uid, new_gid) + """Return mapped (newuid, newgid) from rpath's initial info + + This is the main function exported by the user_group module. Note + that it is connection specific. + + """ + uid, gid = rp.getuidgid() + uname, gname = rp.getuname(), rp.getgname() + return (UserMap(uid, uname), GroupMap(gid, gname)) + diff --git a/rdiff-backup/testing/roottest.py b/rdiff-backup/testing/roottest.py index 4950993..d41d9d7 100644 --- a/rdiff-backup/testing/roottest.py +++ b/rdiff-backup/testing/roottest.py @@ -109,7 +109,43 @@ class RootTest(unittest.TestCase): extra_options = ("--user-mapping-file %s " "--group-mapping-file %s" % (user_map, group_map))) - assert get_ownership(out_rp) == ((userid, 1), (0, 1)), \ + assert get_ownership(out_rp) == ((userid, 0), (0, 1)), \ + get_ownership(out_rp) + + def test_numerical_mapping(self): + """Test --preserve-numerical-ids option + + This doesn't really test much, since we don't have a + convenient system with different uname/ids. + + """ + def write_ownership_dir(): + """Write the directory testfiles/root_mapping""" + rp = rpath.RPath(Globals.local_connection, + "testfiles/root_mapping") + re_init_dir(rp) + rp1 = rp.append('1') + rp1.touch() + rp2 = rp.append('2') + rp2.touch() + rp2.chown(userid, 1) # use groupid 1, usually bin + return rp + + def get_ownership(dir_rp): + """Return pair (ids of dir_rp/1, ids of dir_rp2) of ids""" + rp1, rp2 = map(dir_rp.append, ('1', '2')) + assert rp1.isreg() and rp2.isreg(), (rp1.isreg(), rp2.isreg()) + return (rp1.getuidgid(), rp2.getuidgid()) + + in_rp = write_ownership_dir() + out_rp = rpath.RPath(Globals.local_connection, 'testfiles/output') + if out_rp.lstat(): Myrm(out_rp.path) + + assert get_ownership(in_rp) == ((0,0), (userid, 1)), \ + get_ownership(in_rp) + rdiff_backup(1, 0, in_rp.path, out_rp.path, + extra_options = ("--preserve-numerical-ids")) + assert get_ownership(out_rp) == ((0,0), (userid, 1)), \ get_ownership(in_rp) diff --git a/rdiff-backup/testing/user_grouptest.py b/rdiff-backup/testing/user_grouptest.py index 73c8bcf..9321c53 100644 --- a/rdiff-backup/testing/user_grouptest.py +++ b/rdiff-backup/testing/user_grouptest.py @@ -12,6 +12,19 @@ class UserGroupTest(unittest.TestCase): assert user_group.uid2uname(0) == "root" assert user_group.gid2gname(0) == "root" assert user_group.gid2gname(0) == "root" + # Assume no user has uid 29378 + assert user_group.gid2gname(29378) is None + assert user_group.gid2gname(29378) is None + + def test_basic_reverse(self): + """Test basic name2id. Depends on systems users/groups""" + user_group.uname2uid_dict = {}; user_group.gname2gid_dict = {} + assert user_group.uname2uid("root") == 0 + assert user_group.uname2uid("root") == 0 + assert user_group.gname2gid("root") == 0 + assert user_group.gname2gid("root") == 0 + assert user_group.uname2uid("aoeuth3t2ug89") is None + assert user_group.uname2uid("aoeuth3t2ug89") is None def test_default_mapping(self): """Test the default user mapping""" @@ -20,9 +33,10 @@ class UserGroupTest(unittest.TestCase): binid = pwd.getpwnam('bin')[2] syncid = pwd.getpwnam('sync')[2] user_group.init_user_mapping() - assert user_group.UserMap.get_id(0) == 0 - assert user_group.UserMap.get_id(0, 'bin') == binid - assert user_group.UserMap.get_id(binid, 'sync') == syncid + assert user_group.UserMap(0) == 0 + assert user_group.UserMap(0, 'bin') == binid + assert user_group.UserMap(0, 'sync') == syncid + assert user_group.UserMap.map_acl(0, 'aoeuth3t2ug89') is None def test_user_mapping(self): """Test the user mapping file through the DefinedMap class""" @@ -39,15 +53,18 @@ sync:0""" daemonid = pwd.getpwnam('daemon')[2] user_group.init_user_mapping(mapping_string) - assert user_group.UserMap.get_id(rootid, 'root') == binid - assert user_group.UserMap.get_id(binid, 'bin') == rootid - assert user_group.UserMap.get_id(0) == syncid - assert user_group.UserMap.get_id(syncid, 'sync') == 0 - assert user_group.UserMap.get_id(500) == 501 + assert user_group.UserMap(rootid, 'root') == binid + assert user_group.UserMap(binid, 'bin') == rootid + assert user_group.UserMap(0) == syncid + assert user_group.UserMap(syncid, 'sync') == 0 + assert user_group.UserMap(500) == 501 + + assert user_group.UserMap(501) == 501 + assert user_group.UserMap(123, 'daemon') == daemonid + + assert user_group.UserMap.map_acl(29378, 'aoeuth3t2ug89') is None + assert user_group.UserMap.map_acl(0, 'aoeuth3t2ug89') is syncid - assert user_group.UserMap.get_id(501) == 501 - assert user_group.UserMap.get_id(123, 'daemon') == daemonid - if 0: code.InteractiveConsole(globals()).interact() def test_overflow(self): -- cgit v1.2.1