From e4c108ce309ff198af46f99acfdd1ef95d90acec Mon Sep 17 00:00:00 2001 From: joshn Date: Thu, 9 Apr 2009 13:59:19 +0000 Subject: Use Unicode for paths internally to add support for Unicode on Windows. git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@1053 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/CHANGELOG | 3 ++ rdiff-backup/rdiff_backup/FilenameMapping.py | 5 +++- rdiff-backup/rdiff_backup/eas_acls.py | 39 ++++++++++++++++---------- rdiff-backup/rdiff_backup/log.py | 31 ++++++++++++++++---- rdiff-backup/rdiff_backup/metadata.py | 10 ++++--- rdiff-backup/rdiff_backup/rpath.py | 42 ++++++++++++++++++++++++++-- rdiff-backup/rdiff_backup/statistics.py | 9 +++--- rdiff-backup/rdiff_backup/win_acls.py | 4 +-- 8 files changed, 108 insertions(+), 35 deletions(-) diff --git a/rdiff-backup/CHANGELOG b/rdiff-backup/CHANGELOG index 0b746f5..a8573c8 100644 --- a/rdiff-backup/CHANGELOG +++ b/rdiff-backup/CHANGELOG @@ -1,6 +1,9 @@ New in v1.3.4 (????/??/??) --------------------------- +Start using Unicode internally for filenames. This fixes Unicode support +on Windows (Josh Nisly) + Don't print "Fatal Error" if --check-destination-dir completed successfully. Thanks to Serge Zub for the suggestion. (Andrew Ferguson) diff --git a/rdiff-backup/rdiff_backup/FilenameMapping.py b/rdiff-backup/rdiff_backup/FilenameMapping.py index 046a4be..a5f8ff8 100644 --- a/rdiff-backup/rdiff_backup/FilenameMapping.py +++ b/rdiff-backup/rdiff_backup/FilenameMapping.py @@ -158,7 +158,10 @@ class QuotedRPath(rpath.RPath): correctly and append()ed to the currect QuotedRPath. """ - return map(unquote, self.conn.os.listdir(self.path)) + path = self.path + if type(path) != unicode: + path = unicode(path, 'utf-8') + return map(unquote, self.conn.os.listdir(path)) def __str__(self): return "QuotedPath: %s\nIndex: %s\nData: %s" % \ diff --git a/rdiff-backup/rdiff_backup/eas_acls.py b/rdiff-backup/rdiff_backup/eas_acls.py index da01051..817a052 100644 --- a/rdiff-backup/rdiff_backup/eas_acls.py +++ b/rdiff-backup/rdiff_backup/eas_acls.py @@ -57,7 +57,8 @@ class ExtendedAttributes: def read_from_rp(self, rp): """Set the extended attributes from an rpath""" try: - attr_list = rp.conn.xattr.listxattr(rp.path, rp.issym()) + attr_list = rp.conn.xattr.listxattr(rp.path.encode('utf-8'), + rp.issym()) except IOError, exc: if exc[0] in (errno.EOPNOTSUPP, errno.EPERM, errno.ETXTBSY): return # if not supported, consider empty @@ -74,7 +75,8 @@ class ExtendedAttributes: continue try: self.attr_dict[attr] = \ - rp.conn.xattr.getxattr(rp.path, attr, rp.issym()) + rp.conn.xattr.getxattr(rp.path.encode('utf-8'), + attr, rp.issym()) except IOError, exc: # File probably modified while reading, just continue if exc[0] == errno.ENODATA: continue @@ -86,9 +88,11 @@ class ExtendedAttributes: def clear_rp(self, rp): """Delete all the extended attributes in rpath""" try: - for name in rp.conn.xattr.listxattr(rp.path, rp.issym()): + for name in rp.conn.xattr.listxattr(rp.path.encode('utf-8'), + rp.issym()): try: - rp.conn.xattr.removexattr(rp.path, name, rp.issym()) + rp.conn.xattr.removexattr(rp.path.encode('utf-8'), + name, rp.issym()) except IOError, exc: # SELinux attributes cannot be removed, and we don't want # to bail out or be too noisy at low log levels. @@ -111,7 +115,8 @@ class ExtendedAttributes: self.clear_rp(rp) for (name, value) in self.attr_dict.iteritems(): try: - rp.conn.xattr.setxattr(rp.path, name, value, 0, rp.issym()) + rp.conn.xattr.setxattr(rp.path.encode('utf-8'), name, + value, 0, rp.issym()) except IOError, exc: # Mac and Linux attributes have different namespaces, so # fail gracefully if can't call setxattr @@ -149,13 +154,14 @@ def ea_compare_rps(rp1, rp2): def EA2Record(ea): """Convert ExtendedAttributes object to text record""" - str_list = ['# file: %s' % C.acl_quote(ea.get_indexpath())] + str_list = ['# file: %s' % C.acl_quote(ea.get_indexpath().encode('utf-8'))] for (name, val) in ea.attr_dict.iteritems(): if not val: str_list.append(name) else: encoded_val = base64.encodestring(val).replace('\n', '') try: - str_list.append('%s=0s%s' % (C.acl_quote(name), encoded_val)) + str_list.append('%s=0s%s' % (C.acl_quote(name.encode('utf-8')), + encoded_val)) except UnicodeEncodeError: log.Log("Warning: unable to store Unicode extended attribute %s" % repr(name), 3) @@ -169,7 +175,8 @@ def Record2EA(record): raise metadata.ParsingError("Bad record beginning: " + first[:8]) filename = first[8:] if filename == '.': index = () - else: index = tuple(C.acl_unquote(filename).split('/')) + else: index = tuple(unicode(C.acl_unquote(filename.encode('utf-8')), + 'utf-8').split('/')) ea = ExtendedAttributes(index) for line in lines: @@ -194,7 +201,7 @@ class EAExtractor(metadata.FlatExtractor): def filename_to_index(self, filename): """Convert possibly quoted filename to index tuple""" if filename == '.': return () - else: return tuple(C.acl_unquote(filename).split('/')) + else: return tuple(C.acl_unquote(filename.encode('utf-8')).split('/')) class ExtendedAttributesFile(metadata.FlatFile): """Store/retrieve EAs from extended_attributes file""" @@ -379,7 +386,7 @@ def set_rp_acl(rp, entry_list = None, default_entry_list = None, else: acl = posix1e.ACL() try: - acl.applyto(rp.path) + acl.applyto(rp.path.encode('utf-8')) except IOError, exc: if exc[0] == errno.EOPNOTSUPP: log.Log("Warning: unable to set ACL on %s: %s" % @@ -391,12 +398,12 @@ def set_rp_acl(rp, entry_list = None, default_entry_list = None, if default_entry_list: def_acl = list_to_acl(default_entry_list, map_names) else: def_acl = posix1e.ACL() - def_acl.applyto(rp.path, posix1e.ACL_TYPE_DEFAULT) + def_acl.applyto(rp.path.encode('utf-8'), posix1e.ACL_TYPE_DEFAULT) def get_acl_lists_from_rp(rp): """Returns (acl_list, def_acl_list) from an rpath. Call locally""" assert rp.conn is Globals.local_connection - try: acl = posix1e.ACL(file=rp.path) + try: acl = posix1e.ACL(file=rp.path.encode('utf-8')) except IOError, exc: if exc[0] == errno.EOPNOTSUPP: acl = None @@ -406,7 +413,7 @@ def get_acl_lists_from_rp(rp): acl = None else: raise if rp.isdir(): - try: def_acl = posix1e.ACL(filedef=rp.path) + try: def_acl = posix1e.ACL(filedef=rp.path.encode('utf-8')) except IOError, exc: if exc[0] == errno.EOPNOTSUPP: def_acl = None @@ -533,7 +540,8 @@ def acl_compare_rps(rp1, rp2): def ACL2Record(acl): """Convert an AccessControlLists object into a text record""" - return '# file: %s\n%s\n' % (C.acl_quote(acl.get_indexpath()), str(acl)) + return '# file: %s\n%s\n' % \ + (C.acl_quote(acl.get_indexpath().encode('utf-8')), str(acl)) def Record2ACL(record): """Convert text record to an AccessControlLists object""" @@ -543,7 +551,8 @@ def Record2ACL(record): raise metadata.ParsingError("Bad record beginning: "+ first_line) filename = first_line[8:] if filename == '.': index = () - else: index = tuple(C.acl_unquote(filename).split('/')) + else: index = tuple(unicode(C.acl_unquote(filename.encode('utf-8')), + 'utf-8').split('/')) return AccessControlLists(index, record[newline_pos:]) class ACLExtractor(EAExtractor): diff --git a/rdiff-backup/rdiff_backup/log.py b/rdiff-backup/rdiff_backup/log.py index 57ce2dd..1097503 100644 --- a/rdiff-backup/rdiff_backup/log.py +++ b/rdiff-backup/rdiff_backup/log.py @@ -125,7 +125,11 @@ class Logger: """Write the message to the log file, if possible""" if self.log_file_open: if self.log_file_local: - self.logfp.write(self.format(message, self.verbosity)) + str = self.format(message, self.verbosity) + if type(str) != unicode: + str = unicode(str, 'utf-8') + str = str.encode('utf-8') + self.logfp.write(str) self.logfp.flush() else: self.log_file_conn.log.Log.log_to_file(message) @@ -133,7 +137,14 @@ class Logger: """Write message to stdout/stderr""" if verbosity <= 2 or Globals.server: termfp = sys.stderr else: termfp = sys.stdout - termfp.write(self.format(message, self.term_verbosity)) + str = self.format(message, self.term_verbosity) + if type(str) != unicode: + str = unicode(str, 'utf-8') + try: + # Try to log as unicode, but fall back to ascii (for Windows) + termfp.write(str.encode('utf-8')) + except UnicodeDecodeError: + termfp.write(str.encode('ascii', 'replace')) def conn(self, direction, result, req_num): """Log some data on the connection @@ -165,10 +176,17 @@ class Logger: def exception_to_string(self, arglist = []): """Return string version of current exception plus what's in arglist""" type, value, tb = sys.exc_info() - s = ("Exception '%s' raised of class '%s':\n%s" % - (value, type, "".join(traceback.format_tb(tb)))) + s = (u"Exception '%s' raised of class '%s':\n%s" % + (value, type, u"".join(traceback.format_tb(tb)))) + s = s.encode('ascii', 'replace') if arglist: - s += "__Arguments:\n" + "\n".join(map(str, arglist)) + s += "__Arguments:" + for arg in arglist: + s += "\n" + try: + s += str(arg) + except UnicodeError: + s += unicode(arg).encode('ascii', 'replace') return s def exception(self, only_terminal = 0, verbosity = 5): @@ -259,7 +277,8 @@ class ErrorLog: """Return log string to put in error log""" assert (error_type == "ListError" or error_type == "UpdateError" or error_type == "SpecialFileError"), "Unknown type "+error_type - return "%s %s %s" % (error_type, cls.get_indexpath(rp), str(exc)) + str = u"%s %s %s" % (error_type, cls.get_indexpath(rp), unicode(exc)) + return str.encode('utf-8') def close(cls): """Close the error log file""" diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py index 13d5d5d..2e7031e 100644 --- a/rdiff-backup/rdiff_backup/metadata.py +++ b/rdiff-backup/rdiff_backup/metadata.py @@ -55,7 +55,7 @@ field names and values. """ from __future__ import generators -import re, gzip, os, binascii +import re, gzip, os, binascii, codecs import log, Globals, rpath, Time, robust, increment, static, rorpiter class ParsingError(Exception): @@ -376,16 +376,18 @@ class FlatFile: compress = 1 if mode == 'r': self.rp = rp_base - self.fileobj = self.rp.open("rb", compress) + self.fileobj = rpath.UnicodeFile(self.rp.open("rb", compress)) else: assert mode == 'w' if compress and check_path and not rp_base.isinccompressed(): def callback(rp): self.rp = rp - self.fileobj = rpath.MaybeGzip(rp_base, callback) + self.fileobj = rpath.UnicodeFile(rpath.MaybeGzip(rp_base, + callback)) else: self.rp = rp_base assert not self.rp.lstat(), self.rp - self.fileobj = self.rp.open("wb", compress = compress) + self.fileobj = rpath.UnicodeFile(self.rp.open("wb", + compress = compress)) def write_record(self, record): """Write a (text) record into the file""" diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py index 5c37068..3fc432b 100644 --- a/rdiff-backup/rdiff_backup/rpath.py +++ b/rdiff-backup/rdiff_backup/rpath.py @@ -35,7 +35,7 @@ are dealing with are local or remote. """ -import os, stat, re, sys, shutil, gzip, socket, time, errno +import os, stat, re, sys, shutil, gzip, socket, time, errno, codecs import Globals, Time, static, log, user_group, C try: @@ -284,6 +284,8 @@ def make_file_dict(filename): """ if os.name != 'nt': try: + if type(filename) == unicode: + filename = filename.encode('utf-8') return C.make_file_dict(filename) except OSError, error: # Unicode filenames should be process by the Python version @@ -333,7 +335,7 @@ def make_file_dict_python(filename): data['nlink'] = statblock[stat.ST_NLINK] if os.name == 'nt': - attribs = win32file.GetFileAttributes(filename) + attribs = win32file.GetFileAttributesW(filename) if attribs & winnt.FILE_ATTRIBUTE_REPARSE_POINT: data['type'] = 'sym' data['linkname'] = None @@ -995,7 +997,12 @@ class RPath(RORPath): def listdir(self): """Return list of string paths returned by os.listdir""" - return self.conn.os.listdir(self.path) + path = self.path + # Use pass in unicode to os.listdir, so that the returned + # entries are in unicode. + if type(path) != unicode: + path = unicode(path, 'utf-8') + return self.conn.os.listdir(path) def symlink(self, linktext): """Make symlink at self.path pointing to linktext""" @@ -1406,6 +1413,23 @@ class RPath(RORPath): write_win_acl(self, acl) self.data['win_acl'] = acl +class UnicodeFile: + """ Wraps a RPath and reads/writes unicode. """ + + def __init__(self, fileobj): + self.fileobj = fileobj + + def read(self, length = -1): + return unicode(self.fileobj.read(length), 'utf-8') + + def write(self, buf): + if type(buf) != unicode: + buf = unicode(buf, 'utf-8') + return self.fileobj.write(buf.encode('utf-8')) + + def close(self): + return self.fileobj.close() + class RPathFileHook: """Look like a file, but add closing hook""" def __init__(self, file, closing_thunk): @@ -1429,6 +1453,18 @@ class GzipFile(gzip.GzipFile): messages. Use this class instead to clean those up. """ + def __init__(self, filename=None, mode=None): + """ This is needed because we need to write an + encoded filename to the file, but use normal + unicode with the filename.""" + if mode and 'b' not in mode: + mode += 'b' + if type(filename) != unicode: + filename = unicode(filename, 'utf-8') + fileobj = open(filename, mode or 'rb') + gzip.GzipFile.__init__(self, filename.encode('utf-8'), + mode=mode, fileobj=fileobj) + def __del__(self): pass def __getattr__(self, name): if name == 'fileno': return self.fileobj.fileno diff --git a/rdiff-backup/rdiff_backup/statistics.py b/rdiff-backup/rdiff_backup/statistics.py index 0bdd439..1df602a 100644 --- a/rdiff-backup/rdiff_backup/statistics.py +++ b/rdiff-backup/rdiff_backup/statistics.py @@ -20,7 +20,7 @@ """Generate and process aggregated backup information""" import re, os, time -import Globals, Time, increment, log, static, metadata +import Globals, Time, increment, log, static, metadata, rpath class StatsException(Exception): pass @@ -219,13 +219,13 @@ class StatsObj: def write_stats_to_rp(self, rp): """Write statistics string to given rpath""" - fp = rp.open("wb") + fp = rpath.UnicodeFile(rp.open("wb")) fp.write(self.get_stats_string()) assert not fp.close() def read_stats_from_rp(self, rp): """Set statistics from rpath, return self for convenience""" - fp = rp.open("r") + fp = rpath.UnicodeFile(rp.open("r")) self.set_stats_from_string(fp.read()) fp.close() return self @@ -364,7 +364,8 @@ class FileStats: suffix = Globals.compression and 'data.gz' or 'data' cls._rp = increment.get_inc(rpbase, suffix, Time.curtime) assert not cls._rp.lstat() - cls._fileobj = cls._rp.open("wb", compress = Globals.compression) + cls._fileobj = rpath.UnicodeFile(cls._rp.open("wb", + compress = Globals.compression)) cls._line_sep = Globals.null_separator and '\0' or '\n' cls.write_docstring() diff --git a/rdiff-backup/rdiff_backup/win_acls.py b/rdiff-backup/rdiff_backup/win_acls.py index b7e322a..603a5fe 100644 --- a/rdiff-backup/rdiff_backup/win_acls.py +++ b/rdiff-backup/rdiff_backup/win_acls.py @@ -181,7 +181,7 @@ class ACL: def __str__(self): return '# file: %s\n%s\n' % \ - (C.acl_quote(self.get_indexpath()), unicode(self.__acl)) + (self.get_indexpath(), unicode(self.__acl)) def from_string(self, acl_str): lines = acl_str.splitlines() @@ -189,7 +189,7 @@ class ACL: raise metadata.ParsingError("Bad record beginning: " + lines[0][:8]) filename = lines[0][8:] if filename == '.': self.index = () - else: self.index = tuple(C.acl_unquote(filename).split('/')) + else: self.index = tuple(filename.split('/')) self.__acl = lines[1] def Record2WACL(record): -- cgit v1.2.1