From de66d93df7e024b160116409dedea240056818b5 Mon Sep 17 00:00:00 2001 From: bescoto Date: Fri, 16 Dec 2005 20:25:58 +0000 Subject: Don't gzip 0 length files git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@712 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/CHANGELOG | 4 +++ rdiff-backup/rdiff_backup/log.py | 19 ++++-------- rdiff-backup/rdiff_backup/metadata.py | 39 ++++++++++++++++--------- rdiff-backup/rdiff_backup/rpath.py | 55 +++++++++++++++++++++++++++++++++-- rdiff-backup/testing/rpathtest.py | 28 ++++++++++++++++++ 5 files changed, 117 insertions(+), 28 deletions(-) diff --git a/rdiff-backup/CHANGELOG b/rdiff-backup/CHANGELOG index 6ef64fb..597dcd7 100644 --- a/rdiff-backup/CHANGELOG +++ b/rdiff-backup/CHANGELOG @@ -6,6 +6,10 @@ Fix for restoring files in directories with really long names. rdiff-backup will now exit by default if it thinks another rdiff-backup process is currently working on the same repository. +Empty error_log, mirror_metadata, extended_attribute, and +access_control_lists files will no longer be gzipped (suggestion by +Hans F. Nordhaug). + New in v1.1.4 (2005/12/13) -------------------------- diff --git a/rdiff-backup/rdiff_backup/log.py b/rdiff-backup/rdiff_backup/log.py index 93933e6..47d31b4 100644 --- a/rdiff-backup/rdiff_backup/log.py +++ b/rdiff-backup/rdiff_backup/log.py @@ -19,7 +19,7 @@ """Manage logging, displaying and recording messages with required verbosity""" -import time, sys, traceback, types +import time, sys, traceback, types, rpath import Globals, static, re @@ -203,24 +203,17 @@ class ErrorLog: """ _log_fileobj = None - _log_inc_rp = None def open(cls, time_string, compress = 1): """Open the error log, prepare for writing""" if not Globals.isbackup_writer: return Globals.backup_writer.log.ErrorLog.open(time_string, compress) - assert not cls._log_fileobj and not cls._log_inc_rp, "log already open" + assert not cls._log_fileobj, "log already open" assert Globals.isbackup_writer - if compress: typestr = 'data.gz' - else: typestr = 'data' - cls._log_inc_rp = Globals.rbdir.append("error_log.%s.%s" % - (time_string, typestr)) - assert not cls._log_inc_rp.lstat(), ("""Error file %s already exists. -This is probably caused by your attempting to run two backups simultaneously -or within one second of each other. Wait a second and try again.""" % - (cls._log_inc_rp.path,)) - cls._log_fileobj = cls._log_inc_rp.open("wb", compress = compress) + base_rp = Globals.rbdir.append("error_log.%s.data" % (time_string,)) + if compress: cls._log_fileobj = rpath.MaybeGzip(base_rp) + else: cls._log_fileobj = cls._log_inc_rp.open("wb", compress = 0) def isopen(cls): """True if the error log file is currently open""" @@ -267,7 +260,7 @@ or within one second of each other. Wait a second and try again.""" % if not Globals.isbackup_writer: return Globals.backup_writer.log.ErrorLog.close() assert not cls._log_fileobj.close() - cls._log_fileobj = cls._log_inc_rp = None + cls._log_fileobj = None static.MakeClass(ErrorLog) diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py index bf7cb94..cb8ef41 100644 --- a/rdiff-backup/rdiff_backup/metadata.py +++ b/rdiff-backup/rdiff_backup/metadata.py @@ -355,19 +355,33 @@ class FlatFile: _extractor = FlatExtractor # Override to class that iterates objects _object_to_record = None # Set to function converting object to record _prefix = None # Set to required prefix - def __init__(self, rp, mode, check_path = 1, compress = 1): - """Open rp for reading ('r') or writing ('w')""" - self.rp = rp + def __init__(self, rp_base, mode, check_path = 1, compress = 1, + callback = None): + """Open rp (or rp+'.gz') for reading ('r') or writing ('w') + + If callback is available, it will be called on the rp upon + closing (because the rp may not be known in advance). + + """ self.mode = mode + self.callback = callback self._record_buffer = [] if check_path: - assert rp.isincfile() and rp.getincbase_str() == self._prefix, rp - compress = rp.isinccompressed() + assert (rp_base.isincfile() and + rp_base.getincbase_str() == self._prefix), rp_base + compress = 1 if mode == 'r': + self.rp = rp_base self.fileobj = self.rp.open("rb", compress) else: - assert mode == 'w' and not self.rp.lstat(), (mode, rp) - self.fileobj = self.rp.open("wb", compress) + assert mode == 'w' + if compress and not rp_base.isinccompressed(): + def callback(rp): self.rp = rp + self.fileobj = rpath.MaybeGzip(rp_base, callback) + else: + self.rp = rp_base + assert not self.rp.lstat(), self.rp + self.fileobj = self.rp.open("wb", compress = compress) def write_record(self, record): """Write a (text) record into the file""" @@ -398,12 +412,11 @@ class FlatFile: if self._buffering_on and self._record_buffer: self.fileobj.write("".join(self._record_buffer)) self._record_buffer = [] - try: fileno = self.fileobj.fileno() # will not work if GzipFile - except AttributeError: fileno = self.fileobj.fileobj.fileno() - os.fsync(fileno) result = self.fileobj.close() self.fileobj = None + self.rp.fsync_with_dir() self.rp.setdata() + if self.callback: self.callback(self.rp) return result class MetadataFile(FlatFile): @@ -449,6 +462,7 @@ class Manager: def add_incrp(self, rp): """Add rp to list of inc rps in the rbdir""" + assert rp.isincfile(), rp self.rplist.append(rp) time = rp.getinctime() if self.timerpmap.has_key(time): @@ -508,12 +522,11 @@ class Manager: """Used in the get_xx_writer functions, returns a writer class""" if time is None: timestr = Time.curtimestr else: timestr = Time.timetostring(time) - filename = '%s.%s.%s.gz' % (prefix, timestr, typestr) + filename = '%s.%s.%s' % (prefix, timestr, typestr) rp = Globals.rbdir.append(filename) assert not rp.lstat(), "File %s already exists!" % (rp.path,) assert rp.isincfile() - self.add_incrp(rp) - return flatfileclass(rp, 'w') + return flatfileclass(rp, 'w', callback = self.add_incrp) def get_meta_writer(self, typestr, time): """Return MetadataFile object opened for writing at given time""" diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py index 1f10f1a..7fed29c 100644 --- a/rdiff-backup/rdiff_backup/rpath.py +++ b/rdiff-backup/rdiff_backup/rpath.py @@ -1168,9 +1168,9 @@ class RPath(RORPath): self.fsync(fp) if Globals.fsync_directories: self.get_parent_rp().fsync() - def get_data(self): + def get_data(self, compressed = None): """Open file as a regular file, read data, close, return data""" - fp = self.open("rb") + fp = self.open("rb", compressed) s = fp.read() assert not fp.close() return s @@ -1263,6 +1263,57 @@ class GzipFile(gzip.GzipFile): """ def __del__(self): pass + def __getattr__(self, name): + if name == 'fileno': return self.fileobj.fileno + else: raise AttributeError(name) + + +class MaybeGzip: + """Represent a file object that may or may not be compressed + + We don't want to compress 0 length files. This class lets us + delay the opening of the file until either the first write (so we + know it has data and should be compressed), or close (when there's + no data). + + """ + def __init__(self, base_rp, callback = None): + """Return file-like object with filename based on base_rp""" + assert not base_rp.lstat(), base_rp + self.base_rp = base_rp + # callback will be called with final write rp as only argument + self.callback = callback + self.fileobj = None # Will be None unless data gets written + self.closed = 0 + + def __getattr__(self, name): + if name == 'fileno': return self.fileobj.fileno + else: raise AttributeError(name) + + def get_gzipped_rp(self): + """Return gzipped rp by adding .gz to base_rp""" + if self.base_rp.index: + newind = self.base_rp.index[:-1] + (self.base_rp.index[-1]+'.gz',) + return self.base_rp.new_index(newind) + else: return self.base_rp.append_path('.gz') + + def write(self, buf): + """Write buf to fileobj""" + if self.fileobj: return self.fileobj.write(buf) + if not buf: return + + new_rp = self.get_gzipped_rp() + if self.callback: self.callback(new_rp) + self.fileobj = new_rp.open("w", compress = 1) + return self.fileobj.write(buf) + + def close(self): + """Close related fileobj, pass return value""" + if self.closed: return None + self.closed = 1 + if self.fileobj: return self.fileobj.close() + if self.callback: self.callback(self.base_rp) + self.base_rp.touch() def setdata_local(rpath): diff --git a/rdiff-backup/testing/rpathtest.py b/rdiff-backup/testing/rpathtest.py index 6d7dee2..9db7f2e 100644 --- a/rdiff-backup/testing/rpathtest.py +++ b/rdiff-backup/testing/rpathtest.py @@ -454,5 +454,33 @@ class CheckPath(unittest.TestCase): bin2 = RPath(Globals.local_connection, "/bin") assert bin.path == "/bin", bin2.path +class Gzip(RPathTest): + """Test the gzip related functions/classes""" + def test_maybe_gzip(self): + """Test MaybeGzip""" + dirrp = rpath.RPath(self.lc, "testfiles/output") + re_init_dir(dirrp) + + base_rp = dirrp.append('foo') + fileobj = rpath.MaybeGzip(base_rp) + fileobj.close() + base_rp.setdata() + assert base_rp.isreg(), base_rp + assert base_rp.getsize() == 0 + base_rp.delete() + + base_gz = dirrp.append('foo.gz') + assert not base_gz.lstat() + fileobj = rpath.MaybeGzip(base_rp) + fileobj.write("lala") + fileobj.close() + base_rp.setdata() + base_gz.setdata() + assert not base_rp.lstat() + assert base_gz.isreg(), base_gz + data = base_gz.get_data(compressed = 1) + assert data == "lala", data + + if __name__ == "__main__": unittest.main() -- cgit v1.2.1