From 6f1dde2f87241290d29f7cb6701efc3374f32838 Mon Sep 17 00:00:00 2001 From: ben Date: Fri, 24 May 2002 08:40:48 +0000 Subject: Added statistics.py for more sophisticated statistics handling git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@104 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/rdiff_backup/increment.py | 95 +++++++---------------- rdiff-backup/rdiff_backup/statistics.py | 133 ++++++++++++++++++++++++++++++++ rdiff-backup/src/Make | 5 +- rdiff-backup/src/increment.py | 95 +++++++---------------- rdiff-backup/src/statistics.py | 133 ++++++++++++++++++++++++++++++++ 5 files changed, 323 insertions(+), 138 deletions(-) create mode 100644 rdiff-backup/rdiff_backup/statistics.py create mode 100644 rdiff-backup/src/statistics.py diff --git a/rdiff-backup/rdiff_backup/increment.py b/rdiff-backup/rdiff_backup/increment.py index 4e96e59..499d709 100644 --- a/rdiff-backup/rdiff_backup/increment.py +++ b/rdiff-backup/rdiff_backup/increment.py @@ -1,4 +1,4 @@ -execfile("filename_mapping.py") +execfile("statistics.py") ####################################################################### # @@ -108,7 +108,7 @@ class Inc: MakeStatic(Inc) -class IncrementITR(IterTreeReducer): +class IncrementITR(StatsITR): """Patch and increment iterator of increment triples This has to be an ITR because directories that have files in them @@ -127,13 +127,13 @@ class IncrementITR(IterTreeReducer): Remember this object needs to be pickable. """ - directory, directory_replacement = None, None + mirror_isdirectory, directory_replacement = None, None changed = None def __init__(self, inc_rpath): """Set inc_rpath, an rpath of the base of the tree""" self.inc_rpath = inc_rpath - IterTreeReducer.__init__(self, inc_rpath) + StatsITR.__init__(self, inc_rpath) def start_process(self, index, diff_rorp, dsrp): """Initial processing of file @@ -142,34 +142,15 @@ class IncrementITR(IterTreeReducer): dsrp is the local file to be incremented """ - self.init_statistics(diff_rorp, dsrp) + self.start_stats(dsrp) incpref = self.inc_rpath.new_index(index) if Globals.quoting_enabled: incpref.quote_path() if dsrp.isdir(): self.init_dir(dsrp, diff_rorp, incpref) - self.setvals(diff_rorp, dsrp, incpref) + self.mirror_isdirectory = 1 else: self.init_non_dir(dsrp, diff_rorp, incpref) - - def init_statistics(self, diff_rorp, dsrp): - """Set initial values for various statistics - - These refer to the old mirror or to new increment files. Note - that changed_file_size could be bigger than total_file_size. - The other statistic, increment_file_size, is set later when we - have that information. - - """ - if dsrp.lstat(): - self.total_files = 1 - self.total_file_size = dsrp.getsize() - else: self.total_files = self.total_file_size = 0 - if diff_rorp: - self.changed_files = 1 - if dsrp.lstat(): self.changed_file_size = dsrp.getsize() - else: self.changed_file_size = 0 - else: self.changed_files = self.changed_file_size = 0 - self.increment_file_size = 0 - + self.setvals(diff_rorp, dsrp, incpref) + def override_changed(self): """Set changed flag to true @@ -187,7 +168,6 @@ class IncrementITR(IterTreeReducer): superclass. """ - self.directory = 1 self.diff_rorp = diff_rorp self.dsrp = dsrp self.incpref = incpref @@ -224,53 +204,32 @@ class IncrementITR(IterTreeReducer): Robust.chain([Inc.Increment_action(diff_rorp, dsrp, incpref), RORPIter.patchonce_action(None, dsrp, diff_rorp)] ).execute() - - self.increment_file_size += ((Inc._inc_file and Inc._inc_file.lstat() - and Inc._inc_file.getsize()) or 0) self.changed = 1 def end_process(self): """Do final work when leaving a tree (directory)""" - if not self.directory: return diff_rorp, dsrp, incpref = self.diff_rorp, self.dsrp, self.incpref - if not diff_rorp and not self.changed: return - - if self.directory_replacement: - tf = self.directory_replacement - Inc.Increment(tf, dsrp, incpref) - RORPIter.patchonce_action(None, dsrp, tf).execute() - tf.delete() - else: - Inc.Increment(diff_rorp, dsrp, incpref) - if diff_rorp: - RORPIter.patchonce_action(None, dsrp, diff_rorp).execute() - - self.increment_file_size += ((Inc._inc_file and Inc._inc_file.lstat() - and Inc._inc_file.getsize()) or 0) - self.write_statistics() - - def write_statistics(self): - """Write the accumulated totals into file in inc directory""" - if not self.incpref.isdir(): return # only write for directories - statrp = Inc.get_inc_ext(self.incpref.append("directory_statistics"), - "data") - tf = TempFileManager.new(statrp) - def init_thunk(): - fp = tf.open("w") - fp.write("TotalFiles %d\n" % self.total_files) - fp.write("TotalFileSize %d\n" % self.total_file_size) - fp.write("ChangedFiles %d\n" % self.changed_files) - fp.write("ChangedFileSize %d\n" % self.changed_file_size) - fp.write("IncrementFileSize %d\n" % self.increment_file_size) - fp.close() - Robust.make_tf_robustaction(init_thunk, (tf,), (statrp,)).execute() + if self.mirror_isdirectory: + if not diff_rorp and not self.changed: return + + if self.directory_replacement: + tf = self.directory_replacement + Inc.Increment(tf, dsrp, incpref) + RORPIter.patchonce_action(None, dsrp, tf).execute() + tf.delete() + else: + Inc.Increment(diff_rorp, dsrp, incpref) + if diff_rorp: + RORPIter.patchonce_action(None, dsrp, diff_rorp).execute() + + self.end_stats(diff_rorp, dsrp, Inc._inc_file) + if self.incpref.isdir() and (self.mirror_isdirectory or dsrp.isdir()): + self.write_stats_to_rp(Inc.get_inc_ext( + self.incpref.append("directory_statistics"), "data")) def branch_process(self, subinstance): """Update statistics, and the has_changed flag if change in branch""" if subinstance.changed: self.changed = 1 + self.add_file_stats(subinstance) + - self.total_files += subinstance.total_files - self.total_file_size += subinstance.total_file_size - self.changed_files += subinstance.changed_files - self.changed_file_size += subinstance.changed_file_size - self.increment_file_size += subinstance.increment_file_size diff --git a/rdiff-backup/rdiff_backup/statistics.py b/rdiff-backup/rdiff_backup/statistics.py new file mode 100644 index 0000000..a121591 --- /dev/null +++ b/rdiff-backup/rdiff_backup/statistics.py @@ -0,0 +1,133 @@ +execfile("filename_mapping.py") + +####################################################################### +# +# statistics - Generate and process aggregated backup information +# + +class StatsException(Exception): pass + +class StatsObj: + """Contains various statistics, provide string conversion functions""" + + stat_file_attrs = ('SourceFiles', 'SourceFileSize', + 'MirrorFiles', 'MirrorFileSize', + 'NewFiles', 'NewFileSize', + 'DeletedFiles', 'DeletedFileSize', + 'ChangedFiles', + 'ChangedSourceSize', 'ChangedMirrorSize', + 'IncrementFileSize') + stat_time_attrs = ('StartTime', 'EndTime') + stat_attrs = stat_time_attrs + stat_file_attrs + + # Set all stats to None, indicating info not available + for attr in stat_attrs: locals()[attr] = None + + def get_stat(self, attribute): + """Get a statistic""" + try: return self.__dict__[attribute] + except KeyError: + # this may be a hack, but seems no good way to get attrs in python + return eval("self.%s" % attribute) + + def set_stat(self, attr, value): + """Set attribute to given value""" + self.__dict__[attr] = value + + def get_stats_string(self): + """Return string printing out statistics""" + slist = ["%s %s" % (attr, self.get_stat(attr)) + for attr in self.stat_attrs + if self.get_stat(attr) is not None] + return "\n".join(slist) + + def init_stats_from_string(self, s): + """Initialize attributes from string, return self for convenience""" + def error(line): raise StatsException("Bad line '%s'" % line) + + for line in s.split("\n"): + if not line: continue + line_parts = line.split() + if len(line_parts) < 2: error(line) + attr, value_string = line_parts[:2] + if not attr in self.stat_attrs: error(line) + try: self.set_stat(attr, long(value_string)) + except ValueError: error(line) + return self + + def write_stats_to_rp(self, rp): + """Write statistics string to given rpath""" + tf = TempFileManager.new(rp) + def init_thunk(): + fp = tf.open("w") + fp.write(self.get_stats_string()) + fp.close() + Robust.make_tf_robustaction(init_thunk, (tf,), (rp,)).execute() + + def read_stats_from_rp(self, rp): + """Set statistics from rpath, return self for convenience""" + fp = rp.open("r") + self.init_stats_from_string(fp.read()) + fp.close() + return self + + def stats_equal(self, s): + """Return true if s has same statistics as self""" + assert isinstance(s, StatsObj) + for attr in self.stat_file_attrs: + if self.get_stat(attr) != s.get_stat(attr): return None + return 1 + + +class StatsITR(IterTreeReducer, StatsObj): + """Keep track of per directory statistics + + This is subclassed by the mirroring and incrementing ITRs. + + """ + # zero out file statistics + for attr in StatsObj.stat_file_attrs: locals()[attr] = 0 + + def start_stats(self, mirror_dsrp): + """Record status of mirror dsrp + + This is called before the mirror is processed so we remember + the old state. + + """ + if mirror_dsrp.lstat(): + self.mirror_base_exists = 1 + self.mirror_base_size = mirror_dsrp.getsize() + else: self.mirror_base_exists = None + + def end_stats(self, diff_rorp, mirror_dsrp, inc_rp = None): + """Set various statistics after mirror processed""" + if mirror_dsrp.lstat(): + self.SourceFiles += 1 + self.SourceFileSize += mirror_dsrp.getsize() + if self.mirror_base_exists: + self.MirrorFiles += 1 + self.MirrorFileSize += self.mirror_base_size + if diff_rorp: # otherwise no change + self.ChangedFiles += 1 + self.ChangedSourceSize += mirror_dsrp.getsize() + self.ChangedMirrorSize += self.mirror_base_size + self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0 + else: # new file was created + self.NewFiles += 1 + self.NewFileSize += mirror_dsrp.getsize() + self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0 + else: + if self.mirror_base_exists: # file was deleted from mirror + self.MirrorFiles += 1 + self.MirrorFileSize += self.mirror_base_size + self.DeletedFiles += 1 + self.DeletedFileSize += self.mirror_base_size + self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0 + else: assert None # One of before and after should exist + + def add_file_stats(self, subinstance): + """Add all file statistics from subinstance to current totals""" + for attr in self.stat_file_attrs: + self.set_stat(attr, + self.get_stat(attr) + subinstance.get_stat(attr)) diff --git a/rdiff-backup/src/Make b/rdiff-backup/src/Make index b78ed95..2b79ffe 100755 --- a/rdiff-backup/src/Make +++ b/rdiff-backup/src/Make @@ -24,8 +24,9 @@ files = ["globals.py", "static.py", "lazy.py", "log.py", "ttime.py", "iterfile.py", "rdiff.py", "connection.py", "rpath.py", "hardlink.py", "robust.py", "rorpiter.py", "destructive_stepping.py", "selection.py", - "filename_mapping.py", "increment.py", "restore.py", - "manage.py", "highlevel.py", "setconnections.py", "main.py"] + "filename_mapping.py", "statistics.py", "increment.py", + "restore.py", "manage.py", "highlevel.py", + "setconnections.py", "main.py"] os.system("cp header.py rdiff-backup") diff --git a/rdiff-backup/src/increment.py b/rdiff-backup/src/increment.py index 4e96e59..499d709 100644 --- a/rdiff-backup/src/increment.py +++ b/rdiff-backup/src/increment.py @@ -1,4 +1,4 @@ -execfile("filename_mapping.py") +execfile("statistics.py") ####################################################################### # @@ -108,7 +108,7 @@ class Inc: MakeStatic(Inc) -class IncrementITR(IterTreeReducer): +class IncrementITR(StatsITR): """Patch and increment iterator of increment triples This has to be an ITR because directories that have files in them @@ -127,13 +127,13 @@ class IncrementITR(IterTreeReducer): Remember this object needs to be pickable. """ - directory, directory_replacement = None, None + mirror_isdirectory, directory_replacement = None, None changed = None def __init__(self, inc_rpath): """Set inc_rpath, an rpath of the base of the tree""" self.inc_rpath = inc_rpath - IterTreeReducer.__init__(self, inc_rpath) + StatsITR.__init__(self, inc_rpath) def start_process(self, index, diff_rorp, dsrp): """Initial processing of file @@ -142,34 +142,15 @@ class IncrementITR(IterTreeReducer): dsrp is the local file to be incremented """ - self.init_statistics(diff_rorp, dsrp) + self.start_stats(dsrp) incpref = self.inc_rpath.new_index(index) if Globals.quoting_enabled: incpref.quote_path() if dsrp.isdir(): self.init_dir(dsrp, diff_rorp, incpref) - self.setvals(diff_rorp, dsrp, incpref) + self.mirror_isdirectory = 1 else: self.init_non_dir(dsrp, diff_rorp, incpref) - - def init_statistics(self, diff_rorp, dsrp): - """Set initial values for various statistics - - These refer to the old mirror or to new increment files. Note - that changed_file_size could be bigger than total_file_size. - The other statistic, increment_file_size, is set later when we - have that information. - - """ - if dsrp.lstat(): - self.total_files = 1 - self.total_file_size = dsrp.getsize() - else: self.total_files = self.total_file_size = 0 - if diff_rorp: - self.changed_files = 1 - if dsrp.lstat(): self.changed_file_size = dsrp.getsize() - else: self.changed_file_size = 0 - else: self.changed_files = self.changed_file_size = 0 - self.increment_file_size = 0 - + self.setvals(diff_rorp, dsrp, incpref) + def override_changed(self): """Set changed flag to true @@ -187,7 +168,6 @@ class IncrementITR(IterTreeReducer): superclass. """ - self.directory = 1 self.diff_rorp = diff_rorp self.dsrp = dsrp self.incpref = incpref @@ -224,53 +204,32 @@ class IncrementITR(IterTreeReducer): Robust.chain([Inc.Increment_action(diff_rorp, dsrp, incpref), RORPIter.patchonce_action(None, dsrp, diff_rorp)] ).execute() - - self.increment_file_size += ((Inc._inc_file and Inc._inc_file.lstat() - and Inc._inc_file.getsize()) or 0) self.changed = 1 def end_process(self): """Do final work when leaving a tree (directory)""" - if not self.directory: return diff_rorp, dsrp, incpref = self.diff_rorp, self.dsrp, self.incpref - if not diff_rorp and not self.changed: return - - if self.directory_replacement: - tf = self.directory_replacement - Inc.Increment(tf, dsrp, incpref) - RORPIter.patchonce_action(None, dsrp, tf).execute() - tf.delete() - else: - Inc.Increment(diff_rorp, dsrp, incpref) - if diff_rorp: - RORPIter.patchonce_action(None, dsrp, diff_rorp).execute() - - self.increment_file_size += ((Inc._inc_file and Inc._inc_file.lstat() - and Inc._inc_file.getsize()) or 0) - self.write_statistics() - - def write_statistics(self): - """Write the accumulated totals into file in inc directory""" - if not self.incpref.isdir(): return # only write for directories - statrp = Inc.get_inc_ext(self.incpref.append("directory_statistics"), - "data") - tf = TempFileManager.new(statrp) - def init_thunk(): - fp = tf.open("w") - fp.write("TotalFiles %d\n" % self.total_files) - fp.write("TotalFileSize %d\n" % self.total_file_size) - fp.write("ChangedFiles %d\n" % self.changed_files) - fp.write("ChangedFileSize %d\n" % self.changed_file_size) - fp.write("IncrementFileSize %d\n" % self.increment_file_size) - fp.close() - Robust.make_tf_robustaction(init_thunk, (tf,), (statrp,)).execute() + if self.mirror_isdirectory: + if not diff_rorp and not self.changed: return + + if self.directory_replacement: + tf = self.directory_replacement + Inc.Increment(tf, dsrp, incpref) + RORPIter.patchonce_action(None, dsrp, tf).execute() + tf.delete() + else: + Inc.Increment(diff_rorp, dsrp, incpref) + if diff_rorp: + RORPIter.patchonce_action(None, dsrp, diff_rorp).execute() + + self.end_stats(diff_rorp, dsrp, Inc._inc_file) + if self.incpref.isdir() and (self.mirror_isdirectory or dsrp.isdir()): + self.write_stats_to_rp(Inc.get_inc_ext( + self.incpref.append("directory_statistics"), "data")) def branch_process(self, subinstance): """Update statistics, and the has_changed flag if change in branch""" if subinstance.changed: self.changed = 1 + self.add_file_stats(subinstance) + - self.total_files += subinstance.total_files - self.total_file_size += subinstance.total_file_size - self.changed_files += subinstance.changed_files - self.changed_file_size += subinstance.changed_file_size - self.increment_file_size += subinstance.increment_file_size diff --git a/rdiff-backup/src/statistics.py b/rdiff-backup/src/statistics.py new file mode 100644 index 0000000..a121591 --- /dev/null +++ b/rdiff-backup/src/statistics.py @@ -0,0 +1,133 @@ +execfile("filename_mapping.py") + +####################################################################### +# +# statistics - Generate and process aggregated backup information +# + +class StatsException(Exception): pass + +class StatsObj: + """Contains various statistics, provide string conversion functions""" + + stat_file_attrs = ('SourceFiles', 'SourceFileSize', + 'MirrorFiles', 'MirrorFileSize', + 'NewFiles', 'NewFileSize', + 'DeletedFiles', 'DeletedFileSize', + 'ChangedFiles', + 'ChangedSourceSize', 'ChangedMirrorSize', + 'IncrementFileSize') + stat_time_attrs = ('StartTime', 'EndTime') + stat_attrs = stat_time_attrs + stat_file_attrs + + # Set all stats to None, indicating info not available + for attr in stat_attrs: locals()[attr] = None + + def get_stat(self, attribute): + """Get a statistic""" + try: return self.__dict__[attribute] + except KeyError: + # this may be a hack, but seems no good way to get attrs in python + return eval("self.%s" % attribute) + + def set_stat(self, attr, value): + """Set attribute to given value""" + self.__dict__[attr] = value + + def get_stats_string(self): + """Return string printing out statistics""" + slist = ["%s %s" % (attr, self.get_stat(attr)) + for attr in self.stat_attrs + if self.get_stat(attr) is not None] + return "\n".join(slist) + + def init_stats_from_string(self, s): + """Initialize attributes from string, return self for convenience""" + def error(line): raise StatsException("Bad line '%s'" % line) + + for line in s.split("\n"): + if not line: continue + line_parts = line.split() + if len(line_parts) < 2: error(line) + attr, value_string = line_parts[:2] + if not attr in self.stat_attrs: error(line) + try: self.set_stat(attr, long(value_string)) + except ValueError: error(line) + return self + + def write_stats_to_rp(self, rp): + """Write statistics string to given rpath""" + tf = TempFileManager.new(rp) + def init_thunk(): + fp = tf.open("w") + fp.write(self.get_stats_string()) + fp.close() + Robust.make_tf_robustaction(init_thunk, (tf,), (rp,)).execute() + + def read_stats_from_rp(self, rp): + """Set statistics from rpath, return self for convenience""" + fp = rp.open("r") + self.init_stats_from_string(fp.read()) + fp.close() + return self + + def stats_equal(self, s): + """Return true if s has same statistics as self""" + assert isinstance(s, StatsObj) + for attr in self.stat_file_attrs: + if self.get_stat(attr) != s.get_stat(attr): return None + return 1 + + +class StatsITR(IterTreeReducer, StatsObj): + """Keep track of per directory statistics + + This is subclassed by the mirroring and incrementing ITRs. + + """ + # zero out file statistics + for attr in StatsObj.stat_file_attrs: locals()[attr] = 0 + + def start_stats(self, mirror_dsrp): + """Record status of mirror dsrp + + This is called before the mirror is processed so we remember + the old state. + + """ + if mirror_dsrp.lstat(): + self.mirror_base_exists = 1 + self.mirror_base_size = mirror_dsrp.getsize() + else: self.mirror_base_exists = None + + def end_stats(self, diff_rorp, mirror_dsrp, inc_rp = None): + """Set various statistics after mirror processed""" + if mirror_dsrp.lstat(): + self.SourceFiles += 1 + self.SourceFileSize += mirror_dsrp.getsize() + if self.mirror_base_exists: + self.MirrorFiles += 1 + self.MirrorFileSize += self.mirror_base_size + if diff_rorp: # otherwise no change + self.ChangedFiles += 1 + self.ChangedSourceSize += mirror_dsrp.getsize() + self.ChangedMirrorSize += self.mirror_base_size + self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0 + else: # new file was created + self.NewFiles += 1 + self.NewFileSize += mirror_dsrp.getsize() + self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0 + else: + if self.mirror_base_exists: # file was deleted from mirror + self.MirrorFiles += 1 + self.MirrorFileSize += self.mirror_base_size + self.DeletedFiles += 1 + self.DeletedFileSize += self.mirror_base_size + self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0 + else: assert None # One of before and after should exist + + def add_file_stats(self, subinstance): + """Add all file statistics from subinstance to current totals""" + for attr in self.stat_file_attrs: + self.set_stat(attr, + self.get_stat(attr) + subinstance.get_stat(attr)) -- cgit v1.2.1