summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2003-03-15 19:30:08 +0000
committerbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2003-03-15 19:30:08 +0000
commit42aa5f8cf76abd0a535f9205a8376dcb702a2a76 (patch)
tree7d2f19e991a572141a43df025fd4d3335af06465
parenta3aff7945f8d606bc14972e6107d0fa5e262e0b4 (diff)
downloadrdiff-backup-42aa5f8cf76abd0a535f9205a8376dcb702a2a76.tar.gz
Final changes for 0.11.4
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@301 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
-rw-r--r--rdiff-backup/CHANGELOG10
-rw-r--r--rdiff-backup/TODO3
-rw-r--r--rdiff-backup/rdiff-backup.15
-rw-r--r--rdiff-backup/rdiff_backup/Globals.py4
-rw-r--r--rdiff-backup/rdiff_backup/Main.py16
-rw-r--r--rdiff-backup/rdiff_backup/backup.py51
-rw-r--r--rdiff-backup/rdiff_backup/metadata.py34
-rw-r--r--rdiff-backup/rdiff_backup/rpath.py4
-rw-r--r--rdiff-backup/rdiff_backup/statistics.py65
-rw-r--r--rdiff-backup/testing/metadatatest.py2
-rw-r--r--rdiff-backup/testing/statisticstest.py22
-rw-r--r--rdiff-backup/testing/test_with_profiling.py7
12 files changed, 155 insertions, 68 deletions
diff --git a/rdiff-backup/CHANGELOG b/rdiff-backup/CHANGELOG
index 738b31c..5e7fd84 100644
--- a/rdiff-backup/CHANGELOG
+++ b/rdiff-backup/CHANGELOG
@@ -1,4 +1,4 @@
-New in v0.11.4 (2003/04/01)
+New in v0.11.4 (2003/03/15)
---------------------------
Fixed bug incrementing sockets whose filenames were pretty long, but
@@ -7,6 +7,14 @@ not super long. Reported by Olivier Mueller.
Added Albert Chin-A-Young's patch to add a few options to the setup.py
install script.
+Apparently fixed rare utime type bug. Thanks to Christian Skarby for
+report and testing.
+
+Added detailed file_statistics (in addition to session_statistics) as
+requested by Dean Gaudet. Disable with --no-file-statistics option.
+
+Minor speed enhancements.
+
New in v0.11.3 (2003/03/04)
---------------------------
diff --git a/rdiff-backup/TODO b/rdiff-backup/TODO
index ca48674..51bb553 100644
--- a/rdiff-backup/TODO
+++ b/rdiff-backup/TODO
@@ -1,3 +1,4 @@
+
---------[ Medium term ]---------------------------------------
Look at Kent Borg's suggestion for restore options and digests.
@@ -13,8 +14,6 @@ Add # of increments option to --remove-older-than
Make argument shortcut for cstream
-Make --calculate-averages work with directory_statistics file.
-
Write configuration file, to make sure settings like --quoting-char,
--windows-time-format, etc., don't change between sessions,
backup/restoring, etc.
diff --git a/rdiff-backup/rdiff-backup.1 b/rdiff-backup/rdiff-backup.1
index 0c5f2b4..8597da9 100644
--- a/rdiff-backup/rdiff-backup.1
+++ b/rdiff-backup/rdiff-backup.1
@@ -218,6 +218,11 @@ Do not compress increments based on files whose filenames match regexp.
The default is
"(?i).*\\.(gz|z|bz|bz2|tgz|zip|rpm|deb|jpg|gif|png|jp2|mp3|ogg|avi|wmv|mpeg|mpg|rm|mov)$"
.TP
+.B --no-file-statistics
+This will disable writing to the file_statistics file in the
+rdiff-backup-data directory. rdiff-backup will run slightly quicker
+and take up a bit less space.
+.TP
.BI --no-hard-links
Don't replicate hard links on destination side. Note that because
metadata is written to a separate file, hard link information will not
diff --git a/rdiff-backup/rdiff_backup/Globals.py b/rdiff-backup/rdiff_backup/Globals.py
index 359e351..396ab3b 100644
--- a/rdiff-backup/rdiff_backup/Globals.py
+++ b/rdiff-backup/rdiff_backup/Globals.py
@@ -148,6 +148,10 @@ ssh_compression = 1
# If true, print statistics after successful backup
print_statistics = None
+# Controls whether file_statistics file is written in
+# rdiff-backup-data dir. These can sometimes take up a lot of space.
+file_statistics = 1
+
# On the writer connection, the following will be set to the mirror
# Select iterator.
select_mirror = None
diff --git a/rdiff-backup/rdiff_backup/Main.py b/rdiff-backup/rdiff_backup/Main.py
index 391e848..5381272 100644
--- a/rdiff-backup/rdiff_backup/Main.py
+++ b/rdiff-backup/rdiff_backup/Main.py
@@ -53,13 +53,14 @@ def parse_cmdlineoptions(arglist):
"include-globbing-filelist=", "include-regexp=",
"list-changed-since=", "list-increments",
"no-compare-inode", "no-compression",
- "no-compression-regexp=", "no-hard-links", "null-separator",
- "parsable-output", "print-statistics", "quoting-char=",
- "remote-cmd=", "remote-schema=", "remove-older-than=",
- "restore-as-of=", "restrict=", "restrict-read-only=",
- "restrict-update-only=", "server", "ssh-no-compression",
- "terminal-verbosity=", "test-server", "verbosity=",
- "version", "windows-mode", "windows-time-format"])
+ "no-compression-regexp=", "no-file-statistics",
+ "no-hard-links", "null-separator", "parsable-output",
+ "print-statistics", "quoting-char=", "remote-cmd=",
+ "remote-schema=", "remove-older-than=", "restore-as-of=",
+ "restrict=", "restrict-read-only=", "restrict-update-only=",
+ "server", "ssh-no-compression", "terminal-verbosity=",
+ "test-server", "verbosity=", "version", "windows-mode",
+ "windows-time-format"])
except getopt.error, e:
commandline_error("Bad commandline options: %s" % str(e))
@@ -108,6 +109,7 @@ def parse_cmdlineoptions(arglist):
elif opt == "--no-compression": Globals.set("compression", None)
elif opt == "--no-compression-regexp":
Globals.set("no_compression_regexp_string", arg)
+ elif opt == "--no-file-statistics": Globals.set('file_statistics', 0)
elif opt == "--no-hard-links": Globals.set('preserve_hardlinks', 0)
elif opt == "--null-separator": Globals.set("null_separator", 1)
elif opt == "--parsable-output": Globals.set('parsable_output', 1)
diff --git a/rdiff-backup/rdiff_backup/backup.py b/rdiff-backup/rdiff_backup/backup.py
index 0ff5ace..6efd8a3 100644
--- a/rdiff-backup/rdiff_backup/backup.py
+++ b/rdiff-backup/rdiff_backup/backup.py
@@ -237,14 +237,21 @@ class CacheCollatedPostProcess:
self.iter = collated_iter # generates (source_rorp, dest_rorp) pairs
self.cache_size = cache_size
self.statfileobj = statistics.init_statfileobj()
+ if Globals.file_statistics: statistics.FileStats.init()
metadata.OpenMetadata()
- # the following should map indicies to lists [source_rorp,
- # dest_rorp, changed_flag, success_flag] where changed_flag
- # should be true if the rorps are different, and success_flag
- # should be 1 if dest_rorp has been successfully updated to
- # source_rorp, and 2 if the destination file is deleted
- # entirely. They both default to false (0).
+ # the following should map indicies to lists
+ # [source_rorp, dest_rorp, changed_flag, success_flag, increment]
+
+ # changed_flag should be true if the rorps are different, and
+
+ # success_flag should be 1 if dest_rorp has been successfully
+ # updated to source_rorp, and 2 if the destination file is
+ # deleted entirely. They both default to false (0).
+
+ # increment holds the RPath of the increment file if one
+ # exists. It is used to record file statistics.
+
self.cache_dict = {}
self.cache_indicies = []
@@ -255,7 +262,7 @@ class CacheCollatedPostProcess:
source_rorp, dest_rorp = self.iter.next()
self.pre_process(source_rorp, dest_rorp)
index = source_rorp and source_rorp.index or dest_rorp.index
- self.cache_dict[index] = [source_rorp, dest_rorp, 0, 0]
+ self.cache_dict[index] = [source_rorp, dest_rorp, 0, 0, None]
self.cache_indicies.append(index)
if len(self.cache_indicies) > self.cache_size: self.shorten_cache()
@@ -276,15 +283,17 @@ class CacheCollatedPostProcess:
"""Remove one element from cache, possibly adding it to metadata"""
first_index = self.cache_indicies[0]
del self.cache_indicies[0]
- old_source_rorp, old_dest_rorp, changed_flag, success_flag = \
+ old_source_rorp, old_dest_rorp, changed_flag, success_flag, inc = \
self.cache_dict[first_index]
del self.cache_dict[first_index]
self.post_process(old_source_rorp, old_dest_rorp,
- changed_flag, success_flag)
+ changed_flag, success_flag, inc)
- def post_process(self, source_rorp, dest_rorp, changed, success):
+ def post_process(self, source_rorp, dest_rorp, changed, success, inc):
"""Post process source_rorp and dest_rorp.
+ The point of this is to write statistics and metadata.
+
changed will be true if the files have changed. success will
be true if the files have been successfully updated (this is
always false for un-changed files).
@@ -294,12 +303,14 @@ class CacheCollatedPostProcess:
if source_rorp: self.statfileobj.add_source_file(source_rorp)
if dest_rorp: self.statfileobj.add_dest_file(dest_rorp)
if success == 0: metadata_rorp = dest_rorp
- elif success == 1:
+ elif success == 1 or success == 2:
self.statfileobj.add_changed(source_rorp, dest_rorp)
metadata_rorp = source_rorp
else: metadata_rorp = None
if metadata_rorp and metadata_rorp.lstat():
metadata.WriteMetadata(metadata_rorp)
+ if Globals.file_statistics:
+ statistics.FileStats.update(source_rorp, dest_rorp, changed, inc)
def in_cache(self, index):
"""Return true if given index is cached"""
@@ -317,6 +328,10 @@ class CacheCollatedPostProcess:
"""Signal that the file with given index has changed"""
self.cache_dict[index][2] = 1
+ def set_inc(self, index, inc):
+ """Set the increment of the current file"""
+ self.cache_dict[index][4] = inc
+
def get_rorps(self, index):
"""Retrieve (source_rorp, dest_rorp) from cache"""
return self.cache_dict[index][:2]
@@ -337,6 +352,7 @@ class CacheCollatedPostProcess:
while self.cache_indicies: self.shorten_cache()
metadata.CloseMetadata()
if Globals.print_statistics: statistics.print_active_stats()
+ if Globals.file_statistics: statistics.FileStats.close()
statistics.write_active_statfileobj()
@@ -511,6 +527,7 @@ class IncrementITRB(PatchITRB):
if self.patch_to_temp(rp, diff_rorp, tf):
inc = self.inc_with_checking(tf, rp, self.get_incrp(index))
if inc is not None:
+ self.CCPP.set_inc(index, inc)
if inc.isreg():
inc.fsync_with_dir() # Write inc before rp changed
if tf.lstat():
@@ -531,10 +548,12 @@ class IncrementITRB(PatchITRB):
inc = self.inc_with_checking(diff_rorp, base_rp,
self.get_incrp(index))
if inc and inc.isreg():
- inc.fsync_with_dir() # must writte inc before rp changed
+ inc.fsync_with_dir() # must write inc before rp changed
self.prepare_dir(diff_rorp, base_rp)
- elif (self.set_dir_replacement(diff_rorp, base_rp) and
- self.inc_with_checking(self.dir_replacement, base_rp,
- self.get_incrp(index))):
- self.CCPP.flag_success(index)
+ elif self.set_dir_replacement(diff_rorp, base_rp):
+ inc = self.inc_with_checking(self.dir_replacement, base_rp,
+ self.get_incrp(index))
+ if inc:
+ self.CCPP.set_inc(index, inc)
+ self.CCPP.flag_success(index)
diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py
index a434024..f1f23ac 100644
--- a/rdiff-backup/rdiff_backup/metadata.py
+++ b/rdiff-backup/rdiff_backup/metadata.py
@@ -104,26 +104,20 @@ def RORP2Record(rorpath):
str_list.append(" Permissions %s\n" % rorpath.getperms())
return "".join(str_list)
-line_parsing_regexp = re.compile("^ *([A-Za-z0-9]+) (.+)$")
+line_parsing_regexp = re.compile("^ *([A-Za-z0-9]+) (.+)$", re.M)
def Record2RORP(record_string):
"""Given record_string, return RORPath
For speed reasons, write the RORPath data dictionary directly
- instead of calling rorpath functions. This depends on the
+ instead of calling rorpath functions. Profiling has shown this to
+ be a time critical function.
"""
data_dict = {}
- index_list = [None] # put in list so we can modify using parse_line
- def process_line(line):
- """Process given line, and modify data_dict or index_list"""
- if not line: return # skip empty lines
- match = line_parsing_regexp.search(line)
- if not match: raise ParsingError("Bad line: '%s'" % line)
- field, data = match.group(1), match.group(2)
-
+ for field, data in line_parsing_regexp.findall(record_string):
if field == "File":
- if data == ".": index_list[0] = ()
- else: index_list[0] = tuple(unquote_path(data).split("/"))
+ if data == ".": index = ()
+ else: index = tuple(unquote_path(data).split("/"))
elif field == "Type":
if data == "None": data_dict['type'] = None
else: data_dict['type'] = data
@@ -140,9 +134,7 @@ def Record2RORP(record_string):
elif field == "Gid": data_dict['gid'] = int(data)
elif field == "Permissions": data_dict['perms'] = int(data)
else: raise ParsingError("Unknown field in line '%s'" % line)
-
- map(process_line, record_string.split("\n"))
- return rpath.RORPath(index_list[0], data_dict)
+ return rpath.RORPath(index, data_dict)
chars_to_quote = re.compile("\\n|\\\\")
def quote_path(path_string):
@@ -260,6 +252,7 @@ class rorp_extractor:
metadata_rp = None
metadata_fileobj = None
+metadata_record_buffer = [] # Use this because gzip writes are slow
def OpenMetadata(rp = None, compress = 1):
"""Open the Metadata file for writing, return metadata fileobj"""
global metadata_rp, metadata_fileobj
@@ -274,13 +267,20 @@ def OpenMetadata(rp = None, compress = 1):
def WriteMetadata(rorp):
"""Write metadata of rorp to file"""
- global metadata_fileobj
- metadata_fileobj.write(RORP2Record(rorp))
+ global metadata_fileobj, metadata_record_buffer
+ metadata_record_buffer.append(RORP2Record(rorp))
+ if len(metadata_record_buffer) >= 100: write_metadata_buffer()
+
+def write_metadata_buffer():
+ global metadata_record_buffer
+ metadata_fileobj.write("".join(metadata_record_buffer))
+ metadata_record_buffer = []
def CloseMetadata():
"""Close the metadata file"""
global metadata_rp, metadata_fileobj
assert metadata_fileobj, "Metadata file not open"
+ if metadata_record_buffer: write_metadata_buffer()
try: fileno = metadata_fileobj.fileno() # will not work if GzipFile
except AttributeError: fileno = metadata_fileobj.fileobj.fileno()
os.fsync(fileno)
diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py
index 909ac0a..f133eb6 100644
--- a/rdiff-backup/rdiff_backup/rpath.py
+++ b/rdiff-backup/rdiff_backup/rpath.py
@@ -440,8 +440,8 @@ class RORPath:
def getnumlinks(self):
"""Number of places inode is linked to"""
- try: return self.data['nlink']
- except KeyError: return 1
+ if self.data.has_key('nlink'): return self.data['nlink']
+ else: return 1
def readlink(self):
"""Wrapper around os.readlink()"""
diff --git a/rdiff-backup/rdiff_backup/statistics.py b/rdiff-backup/rdiff_backup/statistics.py
index 5ed6cd3..c381473 100644
--- a/rdiff-backup/rdiff_backup/statistics.py
+++ b/rdiff-backup/rdiff_backup/statistics.py
@@ -20,7 +20,7 @@
"""Generate and process aggregated backup information"""
import re, os, time
-import Globals, robust, Time, rorpiter, increment, log
+import Globals, Time, increment, log, static
class StatsException(Exception): pass
@@ -347,6 +347,69 @@ def print_active_stats():
"""Print statistics of active statobj to stdout and log"""
global _active_statfileobj
assert _active_statfileobj
+ _active_statfileobj.finish()
statmsg = _active_statfileobj.get_stats_logstring("Session statistics")
log.Log.log_to_file(statmsg)
Globals.client_conn.sys.stdout.write(statmsg)
+
+
+class FileStats:
+ """Keep track of less detailed stats on file-by-file basis"""
+ _fileobj, _rp = None, None
+ _line_sep = None
+ def init(cls):
+ """Open file stats object and prepare to write"""
+ assert not (cls._fileobj or cls._rp), (cls._fileobj, cls._rp)
+ rpbase = Globals.rbdir.append("file_statistics")
+ suffix = Globals.compression and 'data.gz' or 'data'
+ cls._rp = increment.get_inc(rpbase, suffix, Time.curtime)
+ assert not cls._rp.lstat()
+ cls._fileobj = cls._rp.open("wb", compress = Globals.compression)
+
+ cls._line_sep = Globals.null_separator and '\0' or '\n'
+ cls.write_docstring()
+ cls.line_buffer = []
+
+ def write_docstring(cls):
+ """Write the first line (a documentation string) into file"""
+ cls._fileobj.write("# Format of each line in file statistics file:")
+ cls._fileobj.write(cls._line_sep)
+ cls._fileobj.write("# Filename Changed SourceSize MirrorSize "
+ "IncrementSize" + cls._line_sep)
+
+ def update(cls, source_rorp, dest_rorp, changed, inc):
+ """Update file stats with given information"""
+ if source_rorp: filename = source_rorp.get_indexpath()
+ else: filename = dest_rorp.get_indexpath()
+
+ size_list = map(cls.get_size, [source_rorp, dest_rorp, inc])
+ line = " ".join([filename, str(changed)] + size_list)
+ cls.line_buffer.append(line)
+ if len(cls.line_buffer) >= 100: cls.write_buffer()
+
+ def get_size(cls, rorp):
+ """Return the size of rorp as string, or "NA" if not a regular file"""
+ if not rorp: return "NA"
+ if rorp.isreg(): return str(rorp.getsize())
+ else: return "0"
+
+ def write_buffer(cls):
+ """Write buffer to file because buffer is full
+
+ The buffer part is necessary because the GzipFile.write()
+ method seems fairly slow.
+
+ """
+ assert cls.line_buffer and cls._fileobj
+ cls.line_buffer.append('') # have join add _line_sep to end also
+ cls._fileobj.write(cls._line_sep.join(cls.line_buffer))
+ cls.line_buffer = []
+
+ def close(cls):
+ """Close file stats file"""
+ assert cls._fileobj, cls._fileobj
+ if cls.line_buffer: cls.write_buffer()
+ assert not cls._fileobj.close()
+ cls._fileobj = cls._rp = None
+
+static.MakeClass(FileStats)
diff --git a/rdiff-backup/testing/metadatatest.py b/rdiff-backup/testing/metadatatest.py
index 7b6a91a..ed7e07c 100644
--- a/rdiff-backup/testing/metadatatest.py
+++ b/rdiff-backup/testing/metadatatest.py
@@ -8,7 +8,7 @@ class MetadataTest(unittest.TestCase):
def make_temp(self):
"""Make temp directory testfiles/output"""
global tempdir
- tempdir.delete()
+ if tempdir.lstat(): tempdir.delete()
tempdir.mkdir()
def testQuote(self):
diff --git a/rdiff-backup/testing/statisticstest.py b/rdiff-backup/testing/statisticstest.py
index 85a1b68..b198b61 100644
--- a/rdiff-backup/testing/statisticstest.py
+++ b/rdiff-backup/testing/statisticstest.py
@@ -180,26 +180,6 @@ class IncStatTest(unittest.TestCase):
rbdir = rpath.RPath(Globals.local_connection,
"testfiles/output/rdiff-backup-data")
- #incs = Restore.get_inclist(rbdir.append("subdir").
- # append("directory_statistics"))
- #assert len(incs) == 2
- #s1 = StatsObj().read_stats_from_rp(incs[0]) # initial mirror stats
- #assert s1.SourceFiles == 2
- #assert 400000 < s1.SourceFileSize < 420000
- #self.stats_check_initial(s1)
-
- #subdir_stats = StatsObj().read_stats_from_rp(incs[1]) # increment stats
- #assert subdir_stats.SourceFiles == 2
- #assert 400000 < subdir_stats.SourceFileSize < 420000
- #assert subdir_stats.MirrorFiles == 2
- #assert 400000 < subdir_stats.MirrorFileSize < 420000
- #assert subdir_stats.NewFiles == subdir_stats.NewFileSize == 0
- #assert subdir_stats.DeletedFiles == subdir_stats.DeletedFileSize == 0
- #assert subdir_stats.ChangedFiles == 2
- #assert 400000 < subdir_stats.ChangedSourceSize < 420000
- #assert 400000 < subdir_stats.ChangedMirrorSize < 420000
- #assert 10 < subdir_stats.IncrementFileSize < 20000
-
incs = restore.get_inclist(rbdir.append("session_statistics"))
assert len(incs) == 2
s2 = statistics.StatsObj().read_stats_from_rp(incs[0])
@@ -214,7 +194,7 @@ class IncStatTest(unittest.TestCase):
assert 700000 <= root_stats.MirrorFileSize < 750000
assert root_stats.NewFiles == 1
assert root_stats.NewFileSize == 0
- assert root_stats.DeletedFiles == 1
+ assert root_stats.DeletedFiles == 1, root_stats.DeletedFiles
assert root_stats.DeletedFileSize == 200000
assert 3 <= root_stats.ChangedFiles <= 4, root_stats.ChangedFiles
assert 450000 <= root_stats.ChangedSourceSize < 470000
diff --git a/rdiff-backup/testing/test_with_profiling.py b/rdiff-backup/testing/test_with_profiling.py
new file mode 100644
index 0000000..13aefa3
--- /dev/null
+++ b/rdiff-backup/testing/test_with_profiling.py
@@ -0,0 +1,7 @@
+import profile, pstats
+from metadatatest import *
+
+profile.run("unittest.main()", "profile-output")
+p = pstats.Stats("profile-output")
+p.sort_stats('time')
+p.print_stats(40)