diff options
author | bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> | 2002-12-08 19:49:29 +0000 |
---|---|---|
committer | bescoto <bescoto@2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109> | 2002-12-08 19:49:29 +0000 |
commit | c1a39e88df7ec66297635f519cb1f3fbd1b584f0 (patch) | |
tree | da3784f19de5417a75880947eb3208344d5a8690 /rdiff-backup/rdiff_backup/metadata.py | |
parent | 390e37074558f83a2df042019910a73a03a0c1fa (diff) | |
download | rdiff-backup-c1a39e88df7ec66297635f519cb1f3fbd1b584f0.tar.gz |
Initial checkin of metadata code
git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@243 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
Diffstat (limited to 'rdiff-backup/rdiff_backup/metadata.py')
-rw-r--r-- | rdiff-backup/rdiff_backup/metadata.py | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/rdiff-backup/rdiff_backup/metadata.py b/rdiff-backup/rdiff_backup/metadata.py new file mode 100644 index 0000000..b596f76 --- /dev/null +++ b/rdiff-backup/rdiff_backup/metadata.py @@ -0,0 +1,171 @@ +# Copyright 2002 Ben Escoto +# +# This file is part of rdiff-backup. +# +# rdiff-backup is free software; you can redistribute it and/or modify +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# rdiff-backup is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with rdiff-backup; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA + +"""Store and retrieve metadata in destination directory + +The plan is to store metadata information for all files in the +destination directory in a special metadata file. There are two +reasons for this: + +1) The filesystem of the mirror directory may not be able to handle + types of metadata that the source filesystem can. For instance, + rdiff-backup may not have root access on the destination side, so + cannot set uid/gid. Or the source side may have ACLs and the + destination side doesn't. + + Hopefully every file system can store binary data. Storing + metadata separately allows us to back up anything (ok, maybe + strange filenames are still a problem). + +2) Metadata can be more quickly read from a file than it can by + traversing the mirror directory over and over again. In many + cases most of rdiff-backup's time is spent compaing metadata (like + file size and modtime), trying to find differences. Reading this + data sequentially from a file is significantly less taxing than + listing directories and statting files all over the mirror + directory. + +The metadata is stored in a text file, which is a bunch of records +concatenated together. Each record has the format: + +File <filename> + <field_name1> <value> + <field_name2> <value> + ... + +Where the lines are separated by newlines. See the code below for the +field names and values. + +""" + +import re, log, Globals, rpath + +class ParsingError(Exception): + """This is raised when bad or unparsable data is received""" + pass + + +def RORP2Record(rorpath): + """From RORPath, return text record of file's metadata""" + str_list = ["File %s\n" % quote_path(rorpath.get_indexpath())] + + # Store file type, e.g. "dev", "reg", or "sym", and type-specific data + type = rorpath.gettype() + if type is None: type = "None" + str_list.append(" Type %s\n" % type) + if type == "reg": + str_list.append(" Size %s\n" % rorpath.getsize()) + + # If file is hardlinked, add that information + if Globals.preserve_hardlinks: + numlinks = rorpath.getnumlinks() + if numlinks > 1: + str_list.append(" NumHardLinks %s\n" % numlinks) + str_list.append(" Inode %s\n" % rorpath.getinode()) + str_list.append(" DeviceLoc %s\n" % rorpath.getdevloc()) + elif type == "None": return "".join(str_list) + elif type == "dir" or type == "sock" or type == "fifo": pass + elif type == "sym": + str_list.append(" SymData %s\n" % quote_path(rorpath.readlink())) + elif type == "dev": + major, minor = rorpath.getdevnums() + if rorpath.isblkdev(): devchar = "b" + else: + assert rorpath.ischardev() + devchar = "c" + str_list.append(" DeviceNum %s %s %s\n" % (devchar, major, minor)) + + # Store time information + if type != 'sym' and type != 'dev': + str_list.append(" ModTime %s\n" % rorpath.getmtime()) + + # Add user, group, and permission information + uid, gid = rorpath.getuidgid() + str_list.append(" Uid %s\n" % uid) + str_list.append(" Gid %s\n" % gid) + str_list.append(" Permissions %s\n" % rorpath.getperms()) + return "".join(str_list) + +line_parsing_regexp = re.compile("^ *([A-Za-z0-9]+) (.+)$") +def Record2RORP(record_string): + """Given record_string, return RORPath + + For speed reasons, write the RORPath data dictionary directly + instead of calling rorpath functions. This depends on the + + """ + data_dict = {} + index_list = [None] # put in list so we can modify using parse_line + def process_line(line): + """Process given line, and modify data_dict or index_list""" + if not line: return # skip empty lines + match = line_parsing_regexp.search(line) + if not match: raise ParsingError("Bad line: '%s'" % line) + field, data = match.group(1), match.group(2) + + if field == "File": + if data == ".": index_list[0] = () + else: index_list[0] = tuple(unquote_path(data).split("/")) + elif field == "Type": + if data == "None": data_dict['type'] = None + else: data_dict['type'] = data + elif field == "Size": data_dict['size'] = long(data) + elif field == "NumHardLinks": data_dict['nlink'] = int(data) + elif field == "Inode": data_dict['inode'] = long(data) + elif field == "DeviceLoc": data_dict['devloc'] = long(data) + elif field == "SymData": data_dict['linkname'] = unquote_path(data) + elif field == "DeviceNum": + devchar, major_str, minor_str = data.split(" ") + data_dict['devnums'] = (devchar, int(major_str), int(minor_str)) + elif field == "ModTime": data_dict['mtime'] = long(data) + elif field == "Uid": data_dict['uid'] = int(data) + elif field == "Gid": data_dict['gid'] = int(data) + elif field == "Permissions": data_dict['perms'] = int(data) + else: raise ParsingError("Unknown field in line '%s'" % line) + + map(process_line, record_string.split("\n")) + return rpath.RORPath(index_list[0], data_dict) + +chars_to_quote = re.compile("\\n|\\\\") +def quote_path(path_string): + """Return quoted verson of path_string + + Because newlines are used to separate fields in a record, they are + replaced with \n. Backslashes become \\ and everything else is + left the way it is. + + """ + def replacement_func(match_obj): + """This is called on the match obj of any char that needs quoting""" + char = match_obj.group(0) + if char == "\n": return "\\n" + elif char == "\\": return "\\\\" + assert 0, "Bad char %s needs quoting" % char + return chars_to_quote.sub(replacement_func, path_string) + +def unquote_path(quoted_string): + """Reverse what was done by quote_path""" + def replacement_func(match_obj): + """Unquote match obj of two character sequence""" + two_chars = match_obj.group(0) + if two_chars == "\\n": return "\n" + elif two_chars == "\\\\": return "\\" + log.Log("Warning, unknown quoted sequence %s found" % two_chars, 2) + return two_chars + return re.sub("\\\\n|\\\\\\\\", replacement_func, quoted_string) |