summaryrefslogtreecommitdiff
path: root/rdiff-backup
diff options
context:
space:
mode:
authorbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2002-12-08 19:49:29 +0000
committerbescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109>2002-12-08 19:49:29 +0000
commitc1a39e88df7ec66297635f519cb1f3fbd1b584f0 (patch)
treeda3784f19de5417a75880947eb3208344d5a8690 /rdiff-backup
parent390e37074558f83a2df042019910a73a03a0c1fa (diff)
downloadrdiff-backup-c1a39e88df7ec66297635f519cb1f3fbd1b584f0.tar.gz
Initial checkin of metadata code
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@243 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
Diffstat (limited to 'rdiff-backup')
-rw-r--r--rdiff-backup/rdiff_backup/metadata.py171
-rw-r--r--rdiff-backup/rdiff_backup/rpath.py9
-rw-r--r--rdiff-backup/testing/metadatatest.py31
3 files changed, 211 insertions, 0 deletions
diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py
new file mode 100644
index 0000000..b596f76
--- /dev/null
+++ b/rdiff-backup/rdiff_backup/metadata.py
@@ -0,0 +1,171 @@
+# Copyright 2002 Ben Escoto
+#
+# This file is part of rdiff-backup.
+#
+# rdiff-backup is free software; you can redistribute it and/or modify
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# rdiff-backup is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with rdiff-backup; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+# USA
+
+"""Store and retrieve metadata in destination directory
+
+The plan is to store metadata information for all files in the
+destination directory in a special metadata file. There are two
+reasons for this:
+
+1) The filesystem of the mirror directory may not be able to handle
+ types of metadata that the source filesystem can. For instance,
+ rdiff-backup may not have root access on the destination side, so
+ cannot set uid/gid. Or the source side may have ACLs and the
+ destination side doesn't.
+
+ Hopefully every file system can store binary data. Storing
+ metadata separately allows us to back up anything (ok, maybe
+ strange filenames are still a problem).
+
+2) Metadata can be more quickly read from a file than it can by
+ traversing the mirror directory over and over again. In many
+ cases most of rdiff-backup's time is spent compaing metadata (like
+ file size and modtime), trying to find differences. Reading this
+ data sequentially from a file is significantly less taxing than
+ listing directories and statting files all over the mirror
+ directory.
+
+The metadata is stored in a text file, which is a bunch of records
+concatenated together. Each record has the format:
+
+File <filename>
+ <field_name1> <value>
+ <field_name2> <value>
+ ...
+
+Where the lines are separated by newlines. See the code below for the
+field names and values.
+
+"""
+
+import re, log, Globals, rpath
+
+class ParsingError(Exception):
+ """This is raised when bad or unparsable data is received"""
+ pass
+
+
+def RORP2Record(rorpath):
+ """From RORPath, return text record of file's metadata"""
+ str_list = ["File %s\n" % quote_path(rorpath.get_indexpath())]
+
+ # Store file type, e.g. "dev", "reg", or "sym", and type-specific data
+ type = rorpath.gettype()
+ if type is None: type = "None"
+ str_list.append(" Type %s\n" % type)
+ if type == "reg":
+ str_list.append(" Size %s\n" % rorpath.getsize())
+
+ # If file is hardlinked, add that information
+ if Globals.preserve_hardlinks:
+ numlinks = rorpath.getnumlinks()
+ if numlinks > 1:
+ str_list.append(" NumHardLinks %s\n" % numlinks)
+ str_list.append(" Inode %s\n" % rorpath.getinode())
+ str_list.append(" DeviceLoc %s\n" % rorpath.getdevloc())
+ elif type == "None": return "".join(str_list)
+ elif type == "dir" or type == "sock" or type == "fifo": pass
+ elif type == "sym":
+ str_list.append(" SymData %s\n" % quote_path(rorpath.readlink()))
+ elif type == "dev":
+ major, minor = rorpath.getdevnums()
+ if rorpath.isblkdev(): devchar = "b"
+ else:
+ assert rorpath.ischardev()
+ devchar = "c"
+ str_list.append(" DeviceNum %s %s %s\n" % (devchar, major, minor))
+
+ # Store time information
+ if type != 'sym' and type != 'dev':
+ str_list.append(" ModTime %s\n" % rorpath.getmtime())
+
+ # Add user, group, and permission information
+ uid, gid = rorpath.getuidgid()
+ str_list.append(" Uid %s\n" % uid)
+ str_list.append(" Gid %s\n" % gid)
+ str_list.append(" Permissions %s\n" % rorpath.getperms())
+ return "".join(str_list)
+
+line_parsing_regexp = re.compile("^ *([A-Za-z0-9]+) (.+)$")
+def Record2RORP(record_string):
+ """Given record_string, return RORPath
+
+ For speed reasons, write the RORPath data dictionary directly
+ instead of calling rorpath functions. This depends on the
+
+ """
+ data_dict = {}
+ index_list = [None] # put in list so we can modify using parse_line
+ def process_line(line):
+ """Process given line, and modify data_dict or index_list"""
+ if not line: return # skip empty lines
+ match = line_parsing_regexp.search(line)
+ if not match: raise ParsingError("Bad line: '%s'" % line)
+ field, data = match.group(1), match.group(2)
+
+ if field == "File":
+ if data == ".": index_list[0] = ()
+ else: index_list[0] = tuple(unquote_path(data).split("/"))
+ elif field == "Type":
+ if data == "None": data_dict['type'] = None
+ else: data_dict['type'] = data
+ elif field == "Size": data_dict['size'] = long(data)
+ elif field == "NumHardLinks": data_dict['nlink'] = int(data)
+ elif field == "Inode": data_dict['inode'] = long(data)
+ elif field == "DeviceLoc": data_dict['devloc'] = long(data)
+ elif field == "SymData": data_dict['linkname'] = unquote_path(data)
+ elif field == "DeviceNum":
+ devchar, major_str, minor_str = data.split(" ")
+ data_dict['devnums'] = (devchar, int(major_str), int(minor_str))
+ elif field == "ModTime": data_dict['mtime'] = long(data)
+ elif field == "Uid": data_dict['uid'] = int(data)
+ elif field == "Gid": data_dict['gid'] = int(data)
+ elif field == "Permissions": data_dict['perms'] = int(data)
+ else: raise ParsingError("Unknown field in line '%s'" % line)
+
+ map(process_line, record_string.split("\n"))
+ return rpath.RORPath(index_list[0], data_dict)
+
+chars_to_quote = re.compile("\\n|\\\\")
+def quote_path(path_string):
+ """Return quoted verson of path_string
+
+ Because newlines are used to separate fields in a record, they are
+ replaced with \n. Backslashes become \\ and everything else is
+ left the way it is.
+
+ """
+ def replacement_func(match_obj):
+ """This is called on the match obj of any char that needs quoting"""
+ char = match_obj.group(0)
+ if char == "\n": return "\\n"
+ elif char == "\\": return "\\\\"
+ assert 0, "Bad char %s needs quoting" % char
+ return chars_to_quote.sub(replacement_func, path_string)
+
+def unquote_path(quoted_string):
+ """Reverse what was done by quote_path"""
+ def replacement_func(match_obj):
+ """Unquote match obj of two character sequence"""
+ two_chars = match_obj.group(0)
+ if two_chars == "\\n": return "\n"
+ elif two_chars == "\\\\": return "\\"
+ log.Log("Warning, unknown quoted sequence %s found" % two_chars, 2)
+ return two_chars
+ return re.sub("\\\\n|\\\\\\\\", replacement_func, quoted_string)
diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py
index e82fbc7..cfc0688 100644
--- a/rdiff-backup/rdiff_backup/rpath.py
+++ b/rdiff-backup/rdiff_backup/rpath.py
@@ -404,6 +404,15 @@ class RORPath(RPathStatic):
self.file = RPathFileHook(file, closing_hook)
self.file_already_open = None
+ def get_indexpath(self):
+ """Return path of index portion
+
+ For instance, if the index is ("a", "b"), return "a/b".
+
+ """
+ if not self.index: return "."
+ return "/".join(self.index)
+
def get_attached_filetype(self):
"""If there is a file attached, say what it is
diff --git a/rdiff-backup/testing/metadatatest.py b/rdiff-backup/testing/metadatatest.py
new file mode 100644
index 0000000..bad6d27
--- /dev/null
+++ b/rdiff-backup/testing/metadatatest.py
@@ -0,0 +1,31 @@
+import unittest, os
+from rdiff_backup.metadata import *
+from rdiff_backup import rpath, Globals
+
+class MetadataTest(unittest.TestCase):
+ def testQuote(self):
+ """Test quoting and unquoting"""
+ filenames = ["foo", ".", "hello\nthere", "\\", "\\\\\\",
+ "h\no\t\x87\n", " "]
+ for filename in filenames:
+ quoted = quote_path(filename)
+ assert not "\n" in quoted
+ result = unquote_path(quoted)
+ assert result == filename, (quoted, result, filename)
+
+ def testRORP2Record(self):
+ """Test turning RORPs into records and back again"""
+ vft = rpath.RPath(Globals.local_connection,
+ "testfiles/various_file_types")
+ rpaths = map(lambda x: vft.append(x), vft.listdir())
+ extra_rpaths = map(lambda x: rpath.RPath(Globals.local_connection, x),
+ ['/bin/ls', '/dev/ttyS0', '/dev/hda', 'aoeuaou'])
+
+ for rp in [vft] + rpaths + extra_rpaths:
+ record = RORP2Record(rp)
+ #print record
+ new_rorp = Record2RORP(record)
+ assert new_rorp == rp, (new_rorp, rp, record)
+
+
+if __name__ == "__main__": unittest.main()