diff options
author | Jelmer Vernooij <jelmer@samba.org> | 2010-09-03 23:09:05 +0200 |
---|---|---|
committer | Jelmer Vernooij <jelmer@samba.org> | 2010-09-03 23:09:05 +0200 |
commit | 800760bbff11de9e7b3ad8949486fc6ef11e95ce (patch) | |
tree | 8f13f492b839d5fa3fd9689c3ce674b9c5770c62 /fastimport | |
parent | 9bf51243a371c086581367864d7f3997171fee2c (diff) | |
download | python-fastimport-git-800760bbff11de9e7b3ad8949486fc6ef11e95ce.tar.gz |
Move pure-fastimport code into its own directory, in preparation of splitting it into a separate package.
Diffstat (limited to 'fastimport')
-rw-r--r-- | fastimport/__init__.py | 0 | ||||
-rw-r--r-- | fastimport/commands.py | 444 | ||||
-rw-r--r-- | fastimport/dates.py | 79 | ||||
-rw-r--r-- | fastimport/errors.py | 176 | ||||
-rw-r--r-- | fastimport/helpers.py | 95 | ||||
-rw-r--r-- | fastimport/idmapfile.py | 65 | ||||
-rw-r--r-- | fastimport/parser.py | 626 | ||||
-rw-r--r-- | fastimport/processor.py | 257 | ||||
-rw-r--r-- | fastimport/tests/__init__.py | 0 | ||||
-rw-r--r-- | fastimport/tests/test_commands.py | 341 | ||||
-rw-r--r-- | fastimport/tests/test_errors.py | 78 |
11 files changed, 2161 insertions, 0 deletions
diff --git a/fastimport/__init__.py b/fastimport/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/fastimport/__init__.py diff --git a/fastimport/commands.py b/fastimport/commands.py new file mode 100644 index 0000000..7368070 --- /dev/null +++ b/fastimport/commands.py @@ -0,0 +1,444 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Import command classes.""" + + +# There is a bug in git 1.5.4.3 and older by which unquoting a string consumes +# one extra character. Set this variable to True to work-around it. It only +# happens when renaming a file whose name contains spaces and/or quotes, and +# the symptom is: +# % git-fast-import +# fatal: Missing space after source: R "file 1.txt" file 2.txt +# http://git.kernel.org/?p=git/git.git;a=commit;h=c8744d6a8b27115503565041566d97c21e722584 +GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE = False + + +# Lists of command names +COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'feature', 'progress', + 'reset', 'tag'] +FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename', + 'filedeleteall'] + +# Bazaar file kinds +FILE_KIND = 'file' +DIRECTORY_KIND = 'directory' +SYMLINK_KIND = 'symlink' +TREE_REFERENCE_KIND = 'tree-reference' + +# Feature names +MULTIPLE_AUTHORS_FEATURE = "multiple-authors" +COMMIT_PROPERTIES_FEATURE = "commit-properties" +EMPTY_DIRS_FEATURE = "empty-directories" +FEATURE_NAMES = [ + MULTIPLE_AUTHORS_FEATURE, + COMMIT_PROPERTIES_FEATURE, + EMPTY_DIRS_FEATURE, + ] + + +class ImportCommand(object): + """Base class for import commands.""" + + def __init__(self, name): + self.name = name + # List of field names not to display + self._binary = [] + + def __str__(self): + return repr(self) + + def dump_str(self, names=None, child_lists=None, verbose=False): + """Dump fields as a string. + + :param names: the list of fields to include or + None for all public fields + :param child_lists: dictionary of child command names to + fields for that child command to include + :param verbose: if True, prefix each line with the command class and + display fields as a dictionary; if False, dump just the field + values with tabs between them + """ + interesting = {} + if names is None: + fields = [k for k in self.__dict__.keys() if not k.startswith('_')] + else: + fields = names + for field in fields: + value = self.__dict__.get(field) + if field in self._binary and value is not None: + value = '(...)' + interesting[field] = value + if verbose: + return "%s: %s" % (self.__class__.__name__, interesting) + else: + return "\t".join([repr(interesting[k]) for k in fields]) + + +class BlobCommand(ImportCommand): + + def __init__(self, mark, data, lineno=0): + ImportCommand.__init__(self, 'blob') + self.mark = mark + self.data = data + self.lineno = lineno + # Provide a unique id in case the mark is missing + if mark is None: + self.id = '@%d' % lineno + else: + self.id = ':' + mark + self._binary = ['data'] + + def __repr__(self): + if self.mark is None: + mark_line = "" + else: + mark_line = "\nmark :%s" % self.mark + return "blob%s\ndata %d\n%s" % (mark_line, len(self.data), self.data) + + +class CheckpointCommand(ImportCommand): + + def __init__(self): + ImportCommand.__init__(self, 'checkpoint') + + def __repr__(self): + return "checkpoint" + + +class CommitCommand(ImportCommand): + + def __init__(self, ref, mark, author, committer, message, from_, + merges, file_iter, lineno=0, more_authors=None, properties=None): + ImportCommand.__init__(self, 'commit') + self.ref = ref + self.mark = mark + self.author = author + self.committer = committer + self.message = message + self.from_ = from_ + self.merges = merges + self.file_iter = file_iter + self.more_authors = more_authors + self.properties = properties + self.lineno = lineno + self._binary = ['file_iter'] + # Provide a unique id in case the mark is missing + if mark is None: + self.id = '@%d' % lineno + else: + self.id = ':%s' % mark + + def __repr__(self): + return self.to_string(include_file_contents=True) + + def __str__(self): + return self.to_string(include_file_contents=False) + + def to_string(self, use_features=True, include_file_contents=False): + if self.mark is None: + mark_line = "" + else: + mark_line = "\nmark :%s" % self.mark + if self.author is None: + author_section = "" + else: + author_section = "\nauthor %s" % format_who_when(self.author) + if use_features and self.more_authors: + for author in self.more_authors: + author_section += "\nauthor %s" % format_who_when(author) + committer = "committer %s" % format_who_when(self.committer) + if self.message is None: + msg_section = "" + else: + msg = self.message.encode('utf8') + msg_section = "\ndata %d\n%s" % (len(msg), msg) + if self.from_ is None: + from_line = "" + else: + from_line = "\nfrom %s" % self.from_ + if self.merges is None: + merge_lines = "" + else: + merge_lines = "".join(["\nmerge %s" % (m,) + for m in self.merges]) + if use_features and self.properties: + property_lines = [] + for name in sorted(self.properties): + value = self.properties[name] + property_lines.append("\n" + format_property(name, value)) + properties_section = "".join(property_lines) + else: + properties_section = "" + if self.file_iter is None: + filecommands = "" + else: + if include_file_contents: + format_str = "\n%r" + else: + format_str = "\n%s" + filecommands = "".join([format_str % (c,) + for c in self.iter_files()]) + return "commit %s%s%s\n%s%s%s%s%s%s" % (self.ref, mark_line, + author_section, committer, msg_section, from_line, merge_lines, + properties_section, filecommands) + + def dump_str(self, names=None, child_lists=None, verbose=False): + result = [ImportCommand.dump_str(self, names, verbose=verbose)] + for f in self.iter_files(): + if child_lists is None: + continue + try: + child_names = child_lists[f.name] + except KeyError: + continue + result.append("\t%s" % f.dump_str(child_names, verbose=verbose)) + return '\n'.join(result) + + def iter_files(self): + """Iterate over files.""" + # file_iter may be a callable or an iterator + if callable(self.file_iter): + return self.file_iter() + elif self.file_iter: + return iter(self.file_iter) + + +class FeatureCommand(ImportCommand): + + def __init__(self, feature_name, value=None, lineno=0): + ImportCommand.__init__(self, 'feature') + self.feature_name = feature_name + self.value = value + self.lineno = lineno + + def __repr__(self): + if self.value is None: + value_text = "" + else: + value_text = "=%s" % self.value + return "feature %s%s" % (self.feature_name, value_text) + + +class ProgressCommand(ImportCommand): + + def __init__(self, message): + ImportCommand.__init__(self, 'progress') + self.message = message + + def __repr__(self): + return "progress %s" % (self.message,) + + +class ResetCommand(ImportCommand): + + def __init__(self, ref, from_): + ImportCommand.__init__(self, 'reset') + self.ref = ref + self.from_ = from_ + + def __repr__(self): + if self.from_ is None: + from_line = "" + else: + # According to git-fast-import(1), the extra LF is optional here; + # however, versions of git up to 1.5.4.3 had a bug by which the LF + # was needed. Always emit it, since it doesn't hurt and maintains + # compatibility with older versions. + # http://git.kernel.org/?p=git/git.git;a=commit;h=655e8515f279c01f525745d443f509f97cd805ab + from_line = "\nfrom %s\n" % self.from_ + return "reset %s%s" % (self.ref, from_line) + + +class TagCommand(ImportCommand): + + def __init__(self, id, from_, tagger, message): + ImportCommand.__init__(self, 'tag') + self.id = id + self.from_ = from_ + self.tagger = tagger + self.message = message + + def __repr__(self): + if self.from_ is None: + from_line = "" + else: + from_line = "\nfrom %s" % self.from_ + if self.tagger is None: + tagger_line = "" + else: + tagger_line = "\ntagger %s" % format_who_when(self.tagger) + if self.message is None: + msg_section = "" + else: + msg = self.message.encode('utf8') + msg_section = "\ndata %d\n%s" % (len(msg), msg) + return "tag %s%s%s%s" % (self.id, from_line, tagger_line, msg_section) + + +class FileCommand(ImportCommand): + """Base class for file commands.""" + pass + + +class FileModifyCommand(FileCommand): + + def __init__(self, path, kind, is_executable, dataref, data): + # Either dataref or data should be null + FileCommand.__init__(self, 'filemodify') + self.path = check_path(path) + self.kind = kind + self.is_executable = is_executable + self.dataref = dataref + self.data = data + self._binary = ['data'] + + def __repr__(self): + return self.to_string(include_file_contents=True) + + def __str__(self): + return self.to_string(include_file_contents=False) + + def to_string(self, include_file_contents=False): + if self.is_executable: + mode = "755" + elif self.kind == 'file': + mode = "644" + elif self.kind == 'directory': + mode = "040000" + elif self.kind == 'symlink': + mode = "120000" + elif self.kind == 'tree-reference': + mode = "160000" + else: + raise AssertionError("unknown kind %s" % (self.kind,)) + datastr = "" + if self.kind == 'directory': + dataref = '-' + elif self.dataref is None: + dataref = "inline" + if include_file_contents: + datastr = "\ndata %d\n%s" % (len(self.data), self.data) + else: + dataref = "%s" % (self.dataref,) + path = format_path(self.path) + return "M %s %s %s%s" % (mode, dataref, path, datastr) + + +class FileDeleteCommand(FileCommand): + + def __init__(self, path): + FileCommand.__init__(self, 'filedelete') + self.path = check_path(path) + + def __repr__(self): + return "D %s" % (format_path(self.path),) + + +class FileCopyCommand(FileCommand): + + def __init__(self, src_path, dest_path): + FileCommand.__init__(self, 'filecopy') + self.src_path = check_path(src_path) + self.dest_path = check_path(dest_path) + + def __repr__(self): + return "C %s %s" % ( + format_path(self.src_path, quote_spaces=True), + format_path(self.dest_path)) + + +class FileRenameCommand(FileCommand): + + def __init__(self, old_path, new_path): + FileCommand.__init__(self, 'filerename') + self.old_path = check_path(old_path) + self.new_path = check_path(new_path) + + def __repr__(self): + return "R %s %s" % ( + format_path(self.old_path, quote_spaces=True), + format_path(self.new_path)) + + +class FileDeleteAllCommand(FileCommand): + + def __init__(self): + FileCommand.__init__(self, 'filedeleteall') + + def __repr__(self): + return "deleteall" + + +def check_path(path): + """Check that a path is legal. + + :return: the path if all is OK + :raise ValueError: if the path is illegal + """ + if path is None or path == '': + raise ValueError("illegal path '%s'" % path) + return path + + +def format_path(p, quote_spaces=False): + """Format a path in utf8, quoting it if necessary.""" + if '\n' in p: + import re + p = re.sub('\n', '\\n', p) + quote = True + else: + quote = p[0] == '"' or (quote_spaces and ' ' in p) + if quote: + extra = GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE and ' ' or '' + p = '"%s"%s' % (p, extra) + return p.encode('utf8') + + +def format_who_when(fields): + """Format a tuple of name,email,secs-since-epoch,utc-offset-secs as a string.""" + offset = fields[3] + if offset < 0: + offset_sign = '-' + offset = abs(offset) + else: + offset_sign = '+' + offset_hours = offset / 3600 + offset_minutes = offset / 60 - offset_hours * 60 + offset_str = "%s%02d%02d" % (offset_sign, offset_hours, offset_minutes) + name = fields[0] + if name == '': + sep = '' + else: + sep = ' ' + if isinstance(name, unicode): + name = name.encode('utf8') + email = fields[1] + if isinstance(email, unicode): + email = email.encode('utf8') + result = "%s%s<%s> %d %s" % (name, sep, email, fields[2], offset_str) + return result + + +def format_property(name, value): + """Format the name and value (both unicode) of a property as a string.""" + utf8_name = name.encode('utf8') + if value is not None: + utf8_value = value.encode('utf8') + result = "property %s %d %s" % (utf8_name, len(utf8_value), utf8_value) + else: + result = "property %s" % (utf8_name,) + return result diff --git a/fastimport/dates.py b/fastimport/dates.py new file mode 100644 index 0000000..510ab85 --- /dev/null +++ b/fastimport/dates.py @@ -0,0 +1,79 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Date parsing routines. + +Each routine returns timestamp,timezone where + +* timestamp is seconds since epoch +* timezone is the offset from UTC in seconds. +""" + + +import time + +from bzrlib.plugins.fastimport.fastimport import errors + + +def parse_raw(s, lineno=0): + """Parse a date from a raw string. + + The format must be exactly "seconds-since-epoch offset-utc". + See the spec for details. + """ + timestamp_str, timezone_str = s.split(' ', 1) + timestamp = float(timestamp_str) + timezone = _parse_tz(timezone_str, lineno) + return timestamp, timezone + + +def _parse_tz(tz, lineno): + """Parse a timezone specification in the [+|-]HHMM format. + + :return: the timezone offset in seconds. + """ + # from git_repository.py in bzr-git + if len(tz) != 5: + raise errors.InvalidTimezone(lineno, tz) + sign = {'+': +1, '-': -1}[tz[0]] + hours = int(tz[1:3]) + minutes = int(tz[3:]) + return sign * 60 * (60 * hours + minutes) + + +def parse_rfc2822(s, lineno=0): + """Parse a date from a rfc2822 string. + + See the spec for details. + """ + raise NotImplementedError(parse_rfc2822) + + +def parse_now(s, lineno=0): + """Parse a date from a string. + + The format must be exactly "now". + See the spec for details. + """ + return time.time(), 0 + + +# Lookup tabel of date parsing routines +DATE_PARSERS_BY_NAME = { + 'raw': parse_raw, + 'rfc2822': parse_rfc2822, + 'now': parse_now, + } diff --git a/fastimport/errors.py b/fastimport/errors.py new file mode 100644 index 0000000..9a71d77 --- /dev/null +++ b/fastimport/errors.py @@ -0,0 +1,176 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Exception classes for fastimport""" + +from bzrlib import errors as bzr_errors + + +# Prefix to messages to show location information +_LOCATION_FMT = "line %(lineno)d: " + + +class ImportError(bzr_errors.BzrError): + """The base exception class for all import processing exceptions.""" + + _fmt = "Unknown Import Error" + + +class ParsingError(ImportError): + """The base exception class for all import processing exceptions.""" + + _fmt = _LOCATION_FMT + "Unknown Import Parsing Error" + + def __init__(self, lineno): + ImportError.__init__(self) + self.lineno = lineno + + +class MissingBytes(ParsingError): + """Raised when EOF encountered while expecting to find more bytes.""" + + _fmt = (_LOCATION_FMT + "Unexpected EOF - expected %(expected)d bytes," + " found %(found)d") + + def __init__(self, lineno, expected, found): + ParsingError.__init__(self, lineno) + self.expected = expected + self.found = found + + +class MissingTerminator(ParsingError): + """Raised when EOF encountered while expecting to find a terminator.""" + + _fmt = (_LOCATION_FMT + + "Unexpected EOF - expected '%(terminator)s' terminator") + + def __init__(self, lineno, terminator): + ParsingError.__init__(self, lineno) + self.terminator = terminator + + +class InvalidCommand(ParsingError): + """Raised when an unknown command found.""" + + _fmt = (_LOCATION_FMT + "Invalid command '%(cmd)s'") + + def __init__(self, lineno, cmd): + ParsingError.__init__(self, lineno) + self.cmd = cmd + + +class MissingSection(ParsingError): + """Raised when a section is required in a command but not present.""" + + _fmt = (_LOCATION_FMT + "Command %(cmd)s is missing section %(section)s") + + def __init__(self, lineno, cmd, section): + ParsingError.__init__(self, lineno) + self.cmd = cmd + self.section = section + + +class BadFormat(ParsingError): + """Raised when a section is formatted incorrectly.""" + + _fmt = (_LOCATION_FMT + "Bad format for section %(section)s in " + "command %(cmd)s: found '%(text)s'") + + def __init__(self, lineno, cmd, section, text): + ParsingError.__init__(self, lineno) + self.cmd = cmd + self.section = section + self.text = text + + +class InvalidTimezone(ParsingError): + """Raised when converting a string timezone to a seconds offset.""" + + _fmt = (_LOCATION_FMT + + "Timezone %(timezone)r could not be converted.%(reason)s") + + def __init__(self, lineno, timezone, reason=None): + ParsingError.__init__(self, lineno) + self.timezone = timezone + if reason: + self.reason = ' ' + reason + else: + self.reason = '' + + +class UnknownDateFormat(ImportError): + """Raised when an unknown date format is given.""" + + _fmt = ("Unknown date format '%(format)s'") + + def __init__(self, format): + ImportError.__init__(self) + self.format = format + + +class MissingHandler(ImportError): + """Raised when a processor can't handle a command.""" + + _fmt = ("Missing handler for command %(cmd)s") + + def __init__(self, cmd): + ImportError.__init__(self) + self.cmd = cmd + + +class UnknownParameter(ImportError): + """Raised when an unknown parameter is passed to a processor.""" + + _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s") + + def __init__(self, param, knowns): + ImportError.__init__(self) + self.param = param + self.knowns = knowns + + +class BadRepositorySize(ImportError): + """Raised when the repository has an incorrect number of revisions.""" + + _fmt = ("Bad repository size - %(found)d revisions found, " + "%(expected)d expected") + + def __init__(self, expected, found): + ImportError.__init__(self) + self.expected = expected + self.found = found + + +class BadRestart(ImportError): + """Raised when the import stream and id-map do not match up.""" + + _fmt = ("Bad restart - attempted to skip commit %(commit_id)s " + "but matching revision-id is unknown") + + def __init__(self, commit_id): + ImportError.__init__(self) + self.commit_id = commit_id + + +class UnknownFeature(ImportError): + """Raised when an unknown feature is given in the input stream.""" + + _fmt = ("Unknown feature '%(feature)s' - try a later importer or " + "an earlier data format") + + def __init__(self, feature): + ImportError.__init__(self) + self.feature = feature diff --git a/fastimport/helpers.py b/fastimport/helpers.py new file mode 100644 index 0000000..05cce6f --- /dev/null +++ b/fastimport/helpers.py @@ -0,0 +1,95 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Miscellaneous useful stuff.""" + + +def single_plural(n, single, plural): + """Return a single or plural form of a noun based on number.""" + if n == 1: + return single + else: + return plural + + +def defines_to_dict(defines): + """Convert a list of definition strings to a dictionary.""" + if defines is None: + return None + result = {} + for define in defines: + kv = define.split('=', 1) + if len(kv) == 1: + result[define.strip()] = 1 + else: + result[kv[0].strip()] = kv[1].strip() + return result + + +def invert_dict(d): + """Invert a dictionary with keys matching each value turned into a list.""" + # Based on recipe from ASPN + result = {} + for k, v in d.iteritems(): + keys = result.setdefault(v, []) + keys.append(k) + return result + + +def invert_dictset(d): + """Invert a dictionary with keys matching a set of values, turned into lists.""" + # Based on recipe from ASPN + result = {} + for k, c in d.iteritems(): + for v in c: + keys = result.setdefault(v, []) + keys.append(k) + return result + + +def _common_path_and_rest(l1, l2, common=[]): + # From http://code.activestate.com/recipes/208993/ + if len(l1) < 1: return (common, l1, l2) + if len(l2) < 1: return (common, l1, l2) + if l1[0] != l2[0]: return (common, l1, l2) + return _common_path_and_rest(l1[1:], l2[1:], common+[l1[0]]) + + +def common_path(path1, path2): + """Find the common bit of 2 paths.""" + return ''.join(_common_path_and_rest(path1, path2)[0]) + + +def binary_stream(stream): + """Ensure a stream is binary on Windows. + + :return: the stream + """ + try: + import os + if os.name == 'nt': + fileno = getattr(stream, 'fileno', None) + if fileno: + no = fileno() + if no >= 0: # -1 means we're working as subprocess + import msvcrt + msvcrt.setmode(no, os.O_BINARY) + except ImportError: + pass + return stream + + + diff --git a/fastimport/idmapfile.py b/fastimport/idmapfile.py new file mode 100644 index 0000000..7b4ccf4 --- /dev/null +++ b/fastimport/idmapfile.py @@ -0,0 +1,65 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Routines for saving and loading the id-map file.""" + +import os + + +def save_id_map(filename, revision_ids): + """Save the mapping of commit ids to revision ids to a file. + + Throws the usual exceptions if the file cannot be opened, + written to or closed. + + :param filename: name of the file to save the data to + :param revision_ids: a dictionary of commit ids to revision ids. + """ + f = open(filename, 'wb') + try: + for commit_id, rev_id in revision_ids.iteritems(): + f.write("%s %s\n" % (commit_id, rev_id)) + f.flush() + finally: + f.close() + + +def load_id_map(filename): + """Load the mapping of commit ids to revision ids from a file. + + If the file does not exist, an empty result is returned. + If the file does exists but cannot be opened, read or closed, + the normal exceptions are thrown. + + NOTE: It is assumed that commit-ids do not have embedded spaces. + + :param filename: name of the file to save the data to + :result: map, count where: + map = a dictionary of commit ids to revision ids; + count = the number of keys in map + """ + result = {} + count = 0 + if os.path.exists(filename): + f = open(filename) + try: + for line in f: + parts = line[:-1].split(' ', 1) + result[parts[0]] = parts[1] + count += 1 + finally: + f.close() + return result, count diff --git a/fastimport/parser.py b/fastimport/parser.py new file mode 100644 index 0000000..ab6efb6 --- /dev/null +++ b/fastimport/parser.py @@ -0,0 +1,626 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Parser of import data into command objects. + +In order to reuse existing front-ends, the stream format is a subset of +the one used by git-fast-import (as of the 1.5.4 release of git at least). +The grammar is: + + stream ::= cmd*; + + cmd ::= new_blob + | new_commit + | new_tag + | reset_branch + | checkpoint + | progress + ; + + new_blob ::= 'blob' lf + mark? + file_content; + file_content ::= data; + + new_commit ::= 'commit' sp ref_str lf + mark? + ('author' sp name '<' email '>' when lf)? + 'committer' sp name '<' email '>' when lf + commit_msg + ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? + ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)* + file_change* + lf?; + commit_msg ::= data; + + file_change ::= file_clr + | file_del + | file_rnm + | file_cpy + | file_obm + | file_inm; + file_clr ::= 'deleteall' lf; + file_del ::= 'D' sp path_str lf; + file_rnm ::= 'R' sp path_str sp path_str lf; + file_cpy ::= 'C' sp path_str sp path_str lf; + file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf; + file_inm ::= 'M' sp mode sp 'inline' sp path_str lf + data; + + new_tag ::= 'tag' sp tag_str lf + 'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf + 'tagger' sp name '<' email '>' when lf + tag_msg; + tag_msg ::= data; + + reset_branch ::= 'reset' sp ref_str lf + ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? + lf?; + + checkpoint ::= 'checkpoint' lf + lf?; + + progress ::= 'progress' sp not_lf* lf + lf?; + + # note: the first idnum in a stream should be 1 and subsequent + # idnums should not have gaps between values as this will cause + # the stream parser to reserve space for the gapped values. An + # idnum can be updated in the future to a new object by issuing + # a new mark directive with the old idnum. + # + mark ::= 'mark' sp idnum lf; + data ::= (delimited_data | exact_data) + lf?; + + # note: delim may be any string but must not contain lf. + # data_line may contain any data but must not be exactly + # delim. The lf after the final data_line is included in + # the data. + delimited_data ::= 'data' sp '<<' delim lf + (data_line lf)* + delim lf; + + # note: declen indicates the length of binary_data in bytes. + # declen does not include the lf preceeding the binary data. + # + exact_data ::= 'data' sp declen lf + binary_data; + + # note: quoted strings are C-style quoting supporting \c for + # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn + # is the signed byte value in octal. Note that the only + # characters which must actually be escaped to protect the + # stream formatting is: \, " and LF. Otherwise these values + # are UTF8. + # + ref_str ::= ref; + sha1exp_str ::= sha1exp; + tag_str ::= tag; + path_str ::= path | '"' quoted(path) '"' ; + mode ::= '100644' | '644' + | '100755' | '755' + | '120000' + ; + + declen ::= # unsigned 32 bit value, ascii base10 notation; + bigint ::= # unsigned integer value, ascii base10 notation; + binary_data ::= # file content, not interpreted; + + when ::= raw_when | rfc2822_when; + raw_when ::= ts sp tz; + rfc2822_when ::= # Valid RFC 2822 date and time; + + sp ::= # ASCII space character; + lf ::= # ASCII newline (LF) character; + + # note: a colon (':') must precede the numerical value assigned to + # an idnum. This is to distinguish it from a ref or tag name as + # GIT does not permit ':' in ref or tag strings. + # + idnum ::= ':' bigint; + path ::= # GIT style file path, e.g. "a/b/c"; + ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT"; + tag ::= # GIT tag name, e.g. "FIREFOX_1_5"; + sha1exp ::= # Any valid GIT SHA1 expression; + hexsha1 ::= # SHA1 in hexadecimal format; + + # note: name and email are UTF8 strings, however name must not + # contain '<' or lf and email must not contain any of the + # following: '<', '>', lf. + # + name ::= # valid GIT author/committer name; + email ::= # valid GIT author/committer email; + ts ::= # time since the epoch in seconds, ascii base10 notation; + tz ::= # GIT style timezone; + + # note: comments may appear anywhere in the input, except + # within a data command. Any form of the data command + # always escapes the related input from comment processing. + # + # In case it is not clear, the '#' that starts the comment + # must be the first character on that the line (an lf have + # preceeded it). + # + comment ::= '#' not_lf* lf; + not_lf ::= # Any byte that is not ASCII newline (LF); +""" + + +import re +import sys + +import commands +import dates +import errors + + +## Stream parsing ## + +class LineBasedParser(object): + + def __init__(self, input): + """A Parser that keeps track of line numbers. + + :param input: the file-like object to read from + """ + self.input = input + self.lineno = 0 + # Lines pushed back onto the input stream + self._buffer = [] + + def abort(self, exception, *args): + """Raise an exception providing line number information.""" + raise exception(self.lineno, *args) + + def readline(self): + """Get the next line including the newline or '' on EOF.""" + self.lineno += 1 + if self._buffer: + return self._buffer.pop() + else: + return self.input.readline() + + def next_line(self): + """Get the next line without the newline or None on EOF.""" + line = self.readline() + if line: + return line[:-1] + else: + return None + + def push_line(self, line): + """Push line back onto the line buffer. + + :param line: the line with no trailing newline + """ + self.lineno -= 1 + self._buffer.append(line + "\n") + + def read_bytes(self, count): + """Read a given number of bytes from the input stream. + + Throws MissingBytes if the bytes are not found. + + Note: This method does not read from the line buffer. + + :return: a string + """ + result = self.input.read(count) + found = len(result) + self.lineno += result.count("\n") + if found != count: + self.abort(errors.MissingBytes, count, found) + return result + + def read_until(self, terminator): + """Read the input stream until the terminator is found. + + Throws MissingTerminator if the terminator is not found. + + Note: This method does not read from the line buffer. + + :return: the bytes read up to but excluding the terminator. + """ + + lines = [] + term = terminator + '\n' + while True: + line = self.input.readline() + if line == term: + break + else: + lines.append(line) + return ''.join(lines) + + +# Regular expression used for parsing. (Note: The spec states that the name +# part should be non-empty but git-fast-export doesn't always do that so +# the first bit is \w*, not \w+.) Also git-fast-import code says the +# space before the email is optional. +_WHO_AND_WHEN_RE = re.compile(r'([^<]*)<(.*)> (.+)') +_WHO_RE = re.compile(r'([^<]*)<(.*)>') + + +class ImportParser(LineBasedParser): + + def __init__(self, input, verbose=False, output=sys.stdout, + user_mapper=None): + """A Parser of import commands. + + :param input: the file-like object to read from + :param verbose: display extra information of not + :param output: the file-like object to write messages to (YAGNI?) + :param user_mapper: if not None, the UserMapper used to adjust + user-ids for authors, committers and taggers. + """ + LineBasedParser.__init__(self, input) + self.verbose = verbose + self.output = output + self.user_mapper = user_mapper + # We auto-detect the date format when a date is first encountered + self.date_parser = None + + def warning(self, msg): + sys.stderr.write("warning line %d: %s\n" % (self.lineno, msg)) + + def iter_commands(self): + """Iterator returning ImportCommand objects.""" + while True: + line = self.next_line() + if line is None: + break + elif len(line) == 0 or line.startswith('#'): + continue + # Search for commands in order of likelihood + elif line.startswith('commit '): + yield self._parse_commit(line[len('commit '):]) + elif line.startswith('blob'): + yield self._parse_blob() + elif line.startswith('progress '): + yield commands.ProgressCommand(line[len('progress '):]) + elif line.startswith('reset '): + yield self._parse_reset(line[len('reset '):]) + elif line.startswith('tag '): + yield self._parse_tag(line[len('tag '):]) + elif line.startswith('checkpoint'): + yield commands.CheckpointCommand() + elif line.startswith('feature'): + yield self._parse_feature(line[len('feature '):]) + else: + self.abort(errors.InvalidCommand, line) + + def iter_file_commands(self): + """Iterator returning FileCommand objects. + + If an invalid file command is found, the line is silently + pushed back and iteration ends. + """ + while True: + line = self.next_line() + if line is None: + break + elif len(line) == 0 or line.startswith('#'): + continue + # Search for file commands in order of likelihood + elif line.startswith('M '): + yield self._parse_file_modify(line[2:]) + elif line.startswith('D '): + path = self._path(line[2:]) + yield commands.FileDeleteCommand(path) + elif line.startswith('R '): + old, new = self._path_pair(line[2:]) + yield commands.FileRenameCommand(old, new) + elif line.startswith('C '): + src, dest = self._path_pair(line[2:]) + yield commands.FileCopyCommand(src, dest) + elif line.startswith('deleteall'): + yield commands.FileDeleteAllCommand() + else: + self.push_line(line) + break + + def _parse_blob(self): + """Parse a blob command.""" + lineno = self.lineno + mark = self._get_mark_if_any() + data = self._get_data('blob') + return commands.BlobCommand(mark, data, lineno) + + def _parse_commit(self, ref): + """Parse a commit command.""" + lineno = self.lineno + mark = self._get_mark_if_any() + author = self._get_user_info('commit', 'author', False) + more_authors = [] + while True: + another_author = self._get_user_info('commit', 'author', False) + if another_author is not None: + more_authors.append(another_author) + else: + break + committer = self._get_user_info('commit', 'committer') + message = self._get_data('commit', 'message') + try: + message = message.decode('utf_8') + except UnicodeDecodeError: + self.warning( + "commit message not in utf8 - replacing unknown characters") + message = message.decode('utf_8', 'replace') + from_ = self._get_from() + merges = [] + while True: + merge = self._get_merge() + if merge is not None: + # while the spec suggests it's illegal, git-fast-export + # outputs multiple merges on the one line, e.g. + # merge :x :y :z + these_merges = merge.split(" ") + merges.extend(these_merges) + else: + break + properties = {} + while True: + name_value = self._get_property() + if name_value is not None: + name, value = name_value + properties[name] = value + else: + break + return commands.CommitCommand(ref, mark, author, committer, message, + from_, merges, self.iter_file_commands, lineno=lineno, + more_authors=more_authors, properties=properties) + + def _parse_feature(self, info): + """Parse a feature command.""" + parts = info.split("=", 1) + name = parts[0] + if len(parts) > 1: + value = self._path(parts[1]) + else: + value = None + return commands.FeatureCommand(name, value, lineno=self.lineno) + + def _parse_file_modify(self, info): + """Parse a filemodify command within a commit. + + :param info: a string in the format "mode dataref path" + (where dataref might be the hard-coded literal 'inline'). + """ + params = info.split(' ', 2) + path = self._path(params[2]) + is_executable, kind = self._mode(params[0]) + if params[1] == 'inline': + dataref = None + data = self._get_data('filemodify') + else: + dataref = params[1] + data = None + return commands.FileModifyCommand(path, kind, is_executable, dataref, + data) + + def _parse_reset(self, ref): + """Parse a reset command.""" + from_ = self._get_from() + return commands.ResetCommand(ref, from_) + + def _parse_tag(self, name): + """Parse a tag command.""" + from_ = self._get_from('tag') + tagger = self._get_user_info('tag', 'tagger', accept_just_who=True) + message = self._get_data('tag', 'message').decode('utf_8') + return commands.TagCommand(name, from_, tagger, message) + + def _get_mark_if_any(self): + """Parse a mark section.""" + line = self.next_line() + if line.startswith('mark :'): + return line[len('mark :'):] + else: + self.push_line(line) + return None + + def _get_from(self, required_for=None): + """Parse a from section.""" + line = self.next_line() + if line is None: + return None + elif line.startswith('from '): + return line[len('from '):] + elif required_for: + self.abort(errors.MissingSection, required_for, 'from') + else: + self.push_line(line) + return None + + def _get_merge(self): + """Parse a merge section.""" + line = self.next_line() + if line is None: + return None + elif line.startswith('merge '): + return line[len('merge '):] + else: + self.push_line(line) + return None + + def _get_property(self): + """Parse a property section.""" + line = self.next_line() + if line is None: + return None + elif line.startswith('property '): + return self._name_value(line[len('property '):]) + else: + self.push_line(line) + return None + + def _get_user_info(self, cmd, section, required=True, + accept_just_who=False): + """Parse a user section.""" + line = self.next_line() + if line.startswith(section + ' '): + return self._who_when(line[len(section + ' '):], cmd, section, + accept_just_who=accept_just_who) + elif required: + self.abort(errors.MissingSection, cmd, section) + else: + self.push_line(line) + return None + + def _get_data(self, required_for, section='data'): + """Parse a data section.""" + line = self.next_line() + if line.startswith('data '): + rest = line[len('data '):] + if rest.startswith('<<'): + return self.read_until(rest[2:]) + else: + size = int(rest) + read_bytes = self.read_bytes(size) + # optional LF after data. + next = self.input.readline() + self.lineno += 1 + if len(next) > 1 or next != "\n": + self.push_line(next[:-1]) + return read_bytes + else: + self.abort(errors.MissingSection, required_for, section) + + def _who_when(self, s, cmd, section, accept_just_who=False): + """Parse who and when information from a string. + + :return: a tuple of (name,email,timestamp,timezone). name may be + the empty string if only an email address was given. + """ + match = _WHO_AND_WHEN_RE.search(s) + if match: + datestr = match.group(3).lstrip() + if self.date_parser is None: + # auto-detect the date format + if len(datestr.split(' ')) == 2: + format = 'raw' + elif datestr == 'now': + format = 'now' + else: + format = 'rfc2822' + self.date_parser = dates.DATE_PARSERS_BY_NAME[format] + try: + when = self.date_parser(datestr, self.lineno) + except ValueError: + print "failed to parse datestr '%s'" % (datestr,) + raise + else: + match = _WHO_RE.search(s) + if accept_just_who and match: + # HACK around missing time + # TODO: output a warning here + when = dates.DATE_PARSERS_BY_NAME['now']('now') + else: + self.abort(errors.BadFormat, cmd, section, s) + name = match.group(1) + if len(name) > 0: + if name[-1] == " ": + try: + name = name[:-1].decode('utf_8') + except UnicodeDecodeError: + # The spec says names are *typically* utf8 encoded + # but that isn't enforced by git-fast-export (at least) + self.warning("%s name not in utf8 - replacing unknown " + "characters" % (section,)) + name = name[:-1].decode('utf_8', 'replace') + email = match.group(2) + # While it shouldn't happen, some datasets have email addresses + # which contain unicode characters. See bug 338186. We sanitize + # the data at this level just in case. + try: + email = email.decode('utf_8') + except UnicodeDecodeError: + self.warning("%s email not in utf8 - replacing unknown characters" + % (section,)) + email = email.decode('utf_8', 'replace') + if self.user_mapper: + name, email = self.user_mapper.map_name_and_email(name, email) + return (name, email, when[0], when[1]) + + def _name_value(self, s): + """Parse a (name,value) tuple from 'name value-length value'.""" + parts = s.split(' ', 2) + name = parts[0] + if len(parts) == 1: + value = None + else: + size = int(parts[1]) + value = parts[2] + still_to_read = size - len(value) + if still_to_read > 0: + read_bytes = self.read_bytes(still_to_read) + value += "\n" + read_bytes[:still_to_read - 1] + value = value.decode('utf8') + return (name, value) + + def _path(self, s): + """Parse a path.""" + if s.startswith('"'): + if s[-1] != '"': + self.abort(errors.BadFormat, '?', '?', s) + else: + return _unquote_c_string(s[1:-1]) + try: + return s.decode('utf_8') + except UnicodeDecodeError: + # The spec recommends utf8 encoding but that isn't enforced + return s + + def _path_pair(self, s): + """Parse two paths separated by a space.""" + # TODO: handle a space in the first path + if s.startswith('"'): + parts = s[1:].split('" ', 1) + else: + parts = s.split(' ', 1) + if len(parts) != 2: + self.abort(errors.BadFormat, '?', '?', s) + elif parts[1].startswith('"') and parts[1].endswith('"'): + parts[1] = parts[1][1:-1] + elif parts[1].startswith('"') or parts[1].endswith('"'): + self.abort(errors.BadFormat, '?', '?', s) + return map(_unquote_c_string, parts) + + def _mode(self, s): + """Parse a file mode into executable and kind. + + :return (is_executable, kind) + """ + # Note: Output from git-fast-export slightly different to spec + if s in ['644', '100644', '0100644']: + return False, commands.FILE_KIND + elif s in ['755', '100755', '0100755']: + return True, commands.FILE_KIND + elif s in ['040000', '0040000']: + return False, commands.DIRECTORY_KIND + elif s in ['120000', '0120000']: + return False, commands.SYMLINK_KIND + elif s in ['160000', '0160000']: + return False, commands.TREE_REFERENCE_KIND + else: + self.abort(errors.BadFormat, 'filemodify', 'mode', s) + + +def _unquote_c_string(s): + """replace C-style escape sequences (\n, \", etc.) with real chars.""" + # HACK: Python strings are close enough + return s.decode('string_escape', 'replace') diff --git a/fastimport/processor.py b/fastimport/processor.py new file mode 100644 index 0000000..74f7183 --- /dev/null +++ b/fastimport/processor.py @@ -0,0 +1,257 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Processor of import commands. + +This module provides core processing functionality including an abstract class +for basing real processors on. See the processors package for examples. +""" + +import sys +import time + +from bzrlib import debug +from bzrlib.errors import NotBranchError +from bzrlib.trace import ( + mutter, + note, + warning, + ) +import errors + + +class ImportProcessor(object): + """Base class for import processors. + + Subclasses should override the pre_*, post_* and *_handler + methods as appropriate. + """ + + known_params = [] + + def __init__(self, bzrdir, params=None, verbose=False, outf=None): + if outf is None: + self.outf = sys.stdout + else: + self.outf = outf + self.verbose = verbose + if params is None: + self.params = {} + else: + self.params = params + self.validate_parameters() + self.bzrdir = bzrdir + if bzrdir is None: + # Some 'importers' don't need a repository to write to + self.working_tree = None + self.branch = None + self.repo = None + else: + try: + # Might be inside a branch + (self.working_tree, self.branch) = bzrdir._get_tree_branch() + self.repo = self.branch.repository + except NotBranchError: + # Must be inside a repository + self.working_tree = None + self.branch = None + self.repo = bzrdir.open_repository() + + # Handlers can set this to request exiting cleanly without + # iterating through the remaining commands + self.finished = False + + def validate_parameters(self): + """Validate that the parameters are correctly specified.""" + for p in self.params: + if p not in self.known_params: + raise errors.UnknownParameter(p, self.known_params) + + def process(self, command_iter): + """Import data into Bazaar by processing a stream of commands. + + :param command_iter: an iterator providing commands + """ + if self.working_tree is not None: + self.working_tree.lock_write() + elif self.branch is not None: + self.branch.lock_write() + elif self.repo is not None: + self.repo.lock_write() + try: + self._process(command_iter) + finally: + # If an unhandled exception occurred, abort the write group + if self.repo is not None and self.repo.is_in_write_group(): + self.repo.abort_write_group() + # Release the locks + if self.working_tree is not None: + self.working_tree.unlock() + elif self.branch is not None: + self.branch.unlock() + elif self.repo is not None: + self.repo.unlock() + + def _process(self, command_iter): + self.pre_process() + for cmd in command_iter(): + try: + handler = self.__class__.__dict__[cmd.name + "_handler"] + except KeyError: + raise errors.MissingHandler(cmd.name) + else: + self.pre_handler(cmd) + handler(self, cmd) + self.post_handler(cmd) + if self.finished: + break + self.post_process() + + def note(self, msg, *args): + """Output a note but timestamp it.""" + msg = "%s %s" % (self._time_of_day(), msg) + note(msg, *args) + + def warning(self, msg, *args): + """Output a warning but timestamp it.""" + msg = "%s WARNING: %s" % (self._time_of_day(), msg) + warning(msg, *args) + + def debug(self, mgs, *args): + """Output a debug message if the appropriate -D option was given.""" + if "fast-import" in debug.debug_flags: + msg = "%s DEBUG: %s" % (self._time_of_day(), msg) + mutter(msg, *args) + + def _time_of_day(self): + """Time of day as a string.""" + # Note: this is a separate method so tests can patch in a fixed value + return time.strftime("%H:%M:%S") + + def pre_process(self): + """Hook for logic at start of processing.""" + pass + + def post_process(self): + """Hook for logic at end of processing.""" + pass + + def pre_handler(self, cmd): + """Hook for logic before each handler starts.""" + pass + + def post_handler(self, cmd): + """Hook for logic after each handler finishes.""" + pass + + def progress_handler(self, cmd): + """Process a ProgressCommand.""" + raise NotImplementedError(self.progress_handler) + + def blob_handler(self, cmd): + """Process a BlobCommand.""" + raise NotImplementedError(self.blob_handler) + + def checkpoint_handler(self, cmd): + """Process a CheckpointCommand.""" + raise NotImplementedError(self.checkpoint_handler) + + def commit_handler(self, cmd): + """Process a CommitCommand.""" + raise NotImplementedError(self.commit_handler) + + def reset_handler(self, cmd): + """Process a ResetCommand.""" + raise NotImplementedError(self.reset_handler) + + def tag_handler(self, cmd): + """Process a TagCommand.""" + raise NotImplementedError(self.tag_handler) + + def feature_handler(self, cmd): + """Process a FeatureCommand.""" + raise NotImplementedError(self.feature_handler) + + +class CommitHandler(object): + """Base class for commit handling. + + Subclasses should override the pre_*, post_* and *_handler + methods as appropriate. + """ + + def __init__(self, command): + self.command = command + + def process(self): + self.pre_process_files() + for fc in self.command.file_iter(): + try: + handler = self.__class__.__dict__[fc.name[4:] + "_handler"] + except KeyError: + raise errors.MissingHandler(fc.name) + else: + handler(self, fc) + self.post_process_files() + + def note(self, msg, *args): + """Output a note but add context.""" + msg = "%s (%s)" % (msg, self.command.id) + note(msg, *args) + + def warning(self, msg, *args): + """Output a warning but add context.""" + msg = "WARNING: %s (%s)" % (msg, self.command.id) + warning(msg, *args) + + def mutter(self, msg, *args): + """Output a mutter but add context.""" + msg = "%s (%s)" % (msg, self.command.id) + mutter(msg, *args) + + def debug(self, msg, *args): + """Output a mutter if the appropriate -D option was given.""" + if "fast-import" in debug.debug_flags: + msg = "%s (%s)" % (msg, self.command.id) + mutter(msg, *args) + + def pre_process_files(self): + """Prepare for committing.""" + pass + + def post_process_files(self): + """Save the revision.""" + pass + + def modify_handler(self, filecmd): + """Handle a filemodify command.""" + raise NotImplementedError(self.modify_handler) + + def delete_handler(self, filecmd): + """Handle a filedelete command.""" + raise NotImplementedError(self.delete_handler) + + def copy_handler(self, filecmd): + """Handle a filecopy command.""" + raise NotImplementedError(self.copy_handler) + + def rename_handler(self, filecmd): + """Handle a filerename command.""" + raise NotImplementedError(self.rename_handler) + + def deleteall_handler(self, filecmd): + """Handle a filedeleteall command.""" + raise NotImplementedError(self.deleteall_handler) diff --git a/fastimport/tests/__init__.py b/fastimport/tests/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/fastimport/tests/__init__.py diff --git a/fastimport/tests/test_commands.py b/fastimport/tests/test_commands.py new file mode 100644 index 0000000..6efa4ce --- /dev/null +++ b/fastimport/tests/test_commands.py @@ -0,0 +1,341 @@ +# Copyright (C) 2009 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Test how Commands are displayed""" + +from testtools import TestCase + +from bzrlib.plugins.fastimport.fastimport import ( + commands, + ) + + +class TestBlobDisplay(TestCase): + + def test_blob(self): + c = commands.BlobCommand("1", "hello world") + self.assertEqual("blob\nmark :1\ndata 11\nhello world", repr(c)) + + def test_blob_no_mark(self): + c = commands.BlobCommand(None, "hello world") + self.assertEqual("blob\ndata 11\nhello world", repr(c)) + + +class TestCheckpointDisplay(TestCase): + + def test_checkpoint(self): + c = commands.CheckpointCommand() + self.assertEqual("checkpoint", repr(c)) + + +class TestCommitDisplay(TestCase): + + def test_commit(self): + # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) + committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) + c = commands.CommitCommand("refs/heads/master", "bbb", None, committer, + "release v1.0", ":aaa", None, None) + self.assertEqual( + "commit refs/heads/master\n" + "mark :bbb\n" + "committer Joe Wong <joe@example.com> 1234567890 -0600\n" + "data 12\n" + "release v1.0\n" + "from :aaa", + repr(c)) + + def test_commit_unicode_committer(self): + # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) + name = u'\u013d\xf3r\xe9m \xcdp\u0161\xfam' + name_utf8 = name.encode('utf8') + committer = (name, 'test@example.com', 1234567890, -6 * 3600) + c = commands.CommitCommand("refs/heads/master", "bbb", None, committer, + "release v1.0", ":aaa", None, None) + self.assertEqual( + "commit refs/heads/master\n" + "mark :bbb\n" + "committer %s <test@example.com> 1234567890 -0600\n" + "data 12\n" + "release v1.0\n" + "from :aaa" % (name_utf8,), + repr(c)) + + def test_commit_no_mark(self): + # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) + committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) + c = commands.CommitCommand("refs/heads/master", None, None, committer, + "release v1.0", ":aaa", None, None) + self.assertEqual( + "commit refs/heads/master\n" + "committer Joe Wong <joe@example.com> 1234567890 -0600\n" + "data 12\n" + "release v1.0\n" + "from :aaa", + repr(c)) + + def test_commit_no_from(self): + # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) + committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) + c = commands.CommitCommand("refs/heads/master", "bbb", None, committer, + "release v1.0", None, None, None) + self.assertEqual( + "commit refs/heads/master\n" + "mark :bbb\n" + "committer Joe Wong <joe@example.com> 1234567890 -0600\n" + "data 12\n" + "release v1.0", + repr(c)) + + def test_commit_with_author(self): + # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) + author = ('Sue Wong', 'sue@example.com', 1234565432, -6 * 3600) + committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) + c = commands.CommitCommand("refs/heads/master", "bbb", author, + committer, "release v1.0", ":aaa", None, None) + self.assertEqual( + "commit refs/heads/master\n" + "mark :bbb\n" + "author Sue Wong <sue@example.com> 1234565432 -0600\n" + "committer Joe Wong <joe@example.com> 1234567890 -0600\n" + "data 12\n" + "release v1.0\n" + "from :aaa", + repr(c)) + + def test_commit_with_merges(self): + # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) + committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) + c = commands.CommitCommand("refs/heads/master", "ddd", None, committer, + "release v1.0", ":aaa", [':bbb', ':ccc'], None) + self.assertEqual( + "commit refs/heads/master\n" + "mark :ddd\n" + "committer Joe Wong <joe@example.com> 1234567890 -0600\n" + "data 12\n" + "release v1.0\n" + "from :aaa\n" + "merge :bbb\n" + "merge :ccc", + repr(c)) + + def test_commit_with_filecommands(self): + file_cmds = iter([ + commands.FileDeleteCommand('readme.txt'), + commands.FileModifyCommand('NEWS', 'file', False, None, + 'blah blah blah'), + ]) + # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) + committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) + c = commands.CommitCommand("refs/heads/master", "bbb", None, committer, + "release v1.0", ":aaa", None, file_cmds) + self.assertEqual( + "commit refs/heads/master\n" + "mark :bbb\n" + "committer Joe Wong <joe@example.com> 1234567890 -0600\n" + "data 12\n" + "release v1.0\n" + "from :aaa\n" + "D readme.txt\n" + "M 644 inline NEWS\n" + "data 14\n" + "blah blah blah", + repr(c)) + + def test_commit_with_more_authors(self): + # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) + author = ('Sue Wong', 'sue@example.com', 1234565432, -6 * 3600) + committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) + more_authors = [ + ('Al Smith', 'al@example.com', 1234565432, -6 * 3600), + ('Bill Jones', 'bill@example.com', 1234565432, -6 * 3600), + ] + c = commands.CommitCommand("refs/heads/master", "bbb", author, + committer, "release v1.0", ":aaa", None, None, + more_authors=more_authors) + self.assertEqual( + "commit refs/heads/master\n" + "mark :bbb\n" + "author Sue Wong <sue@example.com> 1234565432 -0600\n" + "author Al Smith <al@example.com> 1234565432 -0600\n" + "author Bill Jones <bill@example.com> 1234565432 -0600\n" + "committer Joe Wong <joe@example.com> 1234567890 -0600\n" + "data 12\n" + "release v1.0\n" + "from :aaa", + repr(c)) + + def test_commit_with_properties(self): + # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) + committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) + properties = { + u'greeting': u'hello', + u'planet': u'world', + } + c = commands.CommitCommand("refs/heads/master", "bbb", None, + committer, "release v1.0", ":aaa", None, None, + properties=properties) + self.assertEqual( + "commit refs/heads/master\n" + "mark :bbb\n" + "committer Joe Wong <joe@example.com> 1234567890 -0600\n" + "data 12\n" + "release v1.0\n" + "from :aaa\n" + "property greeting 5 hello\n" + "property planet 5 world", + repr(c)) + + +class TestFeatureDisplay(TestCase): + + def test_feature(self): + c = commands.FeatureCommand("dwim") + self.assertEqual("feature dwim", repr(c)) + + def test_feature_with_value(self): + c = commands.FeatureCommand("dwim", "please") + self.assertEqual("feature dwim=please", repr(c)) + + +class TestProgressDisplay(TestCase): + + def test_progress(self): + c = commands.ProgressCommand("doing foo") + self.assertEqual("progress doing foo", repr(c)) + + +class TestResetDisplay(TestCase): + + def test_reset(self): + c = commands.ResetCommand("refs/tags/v1.0", ":xxx") + self.assertEqual("reset refs/tags/v1.0\nfrom :xxx\n", repr(c)) + + def test_reset_no_from(self): + c = commands.ResetCommand("refs/remotes/origin/master", None) + self.assertEqual("reset refs/remotes/origin/master", repr(c)) + + +class TestTagDisplay(TestCase): + + def test_tag(self): + # tagger tuple is (name, email, secs-since-epoch, secs-offset-from-utc) + tagger = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) + c = commands.TagCommand("refs/tags/v1.0", ":xxx", tagger, "create v1.0") + self.assertEqual( + "tag refs/tags/v1.0\n" + "from :xxx\n" + "tagger Joe Wong <joe@example.com> 1234567890 -0600\n" + "data 11\n" + "create v1.0", + repr(c)) + + def test_tag_no_from(self): + tagger = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) + c = commands.TagCommand("refs/tags/v1.0", None, tagger, "create v1.0") + self.assertEqual( + "tag refs/tags/v1.0\n" + "tagger Joe Wong <joe@example.com> 1234567890 -0600\n" + "data 11\n" + "create v1.0", + repr(c)) + + +class TestFileModifyDisplay(TestCase): + + def test_filemodify_file(self): + c = commands.FileModifyCommand("foo/bar", "file", False, ":23", None) + self.assertEqual("M 644 :23 foo/bar", repr(c)) + + def test_filemodify_file_executable(self): + c = commands.FileModifyCommand("foo/bar", "file", True, ":23", None) + self.assertEqual("M 755 :23 foo/bar", repr(c)) + + def test_filemodify_file_internal(self): + c = commands.FileModifyCommand("foo/bar", "file", False, None, + "hello world") + self.assertEqual("M 644 inline foo/bar\ndata 11\nhello world", repr(c)) + + def test_filemodify_symlink(self): + c = commands.FileModifyCommand("foo/bar", "symlink", False, None, "baz") + self.assertEqual("M 120000 inline foo/bar\ndata 3\nbaz", repr(c)) + + def test_filemodify_treeref(self): + c = commands.FileModifyCommand("tree-info", "tree-reference", False, + "revision-id-info", None) + self.assertEqual("M 160000 revision-id-info tree-info", repr(c)) + + +class TestFileDeleteDisplay(TestCase): + + def test_filedelete(self): + c = commands.FileDeleteCommand("foo/bar") + self.assertEqual("D foo/bar", repr(c)) + + +class TestFileCopyDisplay(TestCase): + + def test_filecopy(self): + c = commands.FileCopyCommand("foo/bar", "foo/baz") + self.assertEqual("C foo/bar foo/baz", repr(c)) + + def test_filecopy_quoted(self): + # Check the first path is quoted if it contains spaces + c = commands.FileCopyCommand("foo/b a r", "foo/b a z") + self.assertEqual('C "foo/b a r" foo/b a z', repr(c)) + + +class TestFileRenameDisplay(TestCase): + + def test_filerename(self): + c = commands.FileRenameCommand("foo/bar", "foo/baz") + self.assertEqual("R foo/bar foo/baz", repr(c)) + + def test_filerename_quoted(self): + # Check the first path is quoted if it contains spaces + c = commands.FileRenameCommand("foo/b a r", "foo/b a z") + self.assertEqual('R "foo/b a r" foo/b a z', repr(c)) + + +class TestFileDeleteAllDisplay(TestCase): + + def test_filedeleteall(self): + c = commands.FileDeleteAllCommand() + self.assertEqual("deleteall", repr(c)) + + +class TestPathChecking(TestCase): + + def test_filemodify_path_checking(self): + self.assertRaises(ValueError, commands.FileModifyCommand, "", + "file", False, None, "text") + self.assertRaises(ValueError, commands.FileModifyCommand, None, + "file", False, None, "text") + + def test_filedelete_path_checking(self): + self.assertRaises(ValueError, commands.FileDeleteCommand, "") + self.assertRaises(ValueError, commands.FileDeleteCommand, None) + + def test_filerename_path_checking(self): + self.assertRaises(ValueError, commands.FileRenameCommand, "", "foo") + self.assertRaises(ValueError, commands.FileRenameCommand, None, "foo") + self.assertRaises(ValueError, commands.FileRenameCommand, "foo", "") + self.assertRaises(ValueError, commands.FileRenameCommand, "foo", None) + + def test_filecopy_path_checking(self): + self.assertRaises(ValueError, commands.FileCopyCommand, "", "foo") + self.assertRaises(ValueError, commands.FileCopyCommand, None, "foo") + self.assertRaises(ValueError, commands.FileCopyCommand, "foo", "") + self.assertRaises(ValueError, commands.FileCopyCommand, "foo", None) diff --git a/fastimport/tests/test_errors.py b/fastimport/tests/test_errors.py new file mode 100644 index 0000000..e3b807c --- /dev/null +++ b/fastimport/tests/test_errors.py @@ -0,0 +1,78 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Test the Import errors""" + +from testtools import TestCase + +from bzrlib.plugins.fastimport.fastimport import ( + errors, + ) + + +class TestErrors(TestCase): + + def test_MissingBytes(self): + e = errors.MissingBytes(99, 10, 8) + self.assertEqual("line 99: Unexpected EOF - expected 10 bytes, found 8", + str(e)) + + def test_MissingTerminator(self): + e = errors.MissingTerminator(99, '---') + self.assertEqual("line 99: Unexpected EOF - expected '---' terminator", + str(e)) + + def test_InvalidCommand(self): + e = errors.InvalidCommand(99, 'foo') + self.assertEqual("line 99: Invalid command 'foo'", + str(e)) + + def test_MissingSection(self): + e = errors.MissingSection(99, 'foo', 'bar') + self.assertEqual("line 99: Command foo is missing section bar", + str(e)) + + def test_BadFormat(self): + e = errors.BadFormat(99, 'foo', 'bar', 'xyz') + self.assertEqual("line 99: Bad format for section bar in " + "command foo: found 'xyz'", + str(e)) + + def test_InvalidTimezone(self): + e = errors.InvalidTimezone(99, 'aa:bb') + self.assertEqual('aa:bb', e.timezone) + self.assertEqual('', e.reason) + self.assertEqual("line 99: Timezone 'aa:bb' could not be converted.", + str(e)) + e = errors.InvalidTimezone(99, 'aa:bb', 'Non-numeric hours') + self.assertEqual('aa:bb', e.timezone) + self.assertEqual(' Non-numeric hours', e.reason) + self.assertEqual("line 99: Timezone 'aa:bb' could not be converted." + " Non-numeric hours", + str(e)) + + def test_UnknownDateFormat(self): + e = errors.UnknownDateFormat('aaa') + self.assertEqual("Unknown date format 'aaa'", str(e)) + + def test_MissingHandler(self): + e = errors.MissingHandler('foo') + self.assertEqual("Missing handler for command foo", str(e)) + + def test_UnknownFeature(self): + e = errors.UnknownFeature('aaa') + self.assertEqual("Unknown feature 'aaa' - try a later importer or " + "an earlier data format", str(e)) |