66 files changed, 4356 insertions, 5574 deletions
diff --git a/.bzrignore b/.bzrignore
index 378eac2..c317774 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -1 +1,5 @@
 build
+# executables
+exporters/svn-archive
+exporters/svn-fast-export
+.testrepository
diff --git a/.testr.conf b/.testr.conf
new file mode 100644
index 0000000..cc509ce
--- /dev/null
+++ b/.testr.conf
@@ -0,0 +1,4 @@
+[DEFAULT]
+test_command=BZR_PLUGINS_AT=fastimport@. bzr selftest "^bzrlib.plugins.fastimport" --subunit $IDOPTION $LISTOPT
+test_id_option=--load-list $IDFILE
+test_list_option=--list
diff --git a/NEWS b/NEWS
index d0028c3..7d2cb12 100644
--- a/NEWS
+++ b/NEWS
@@ -4,11 +4,32 @@ bzr-fastimport Release Notes
 
 .. contents::
 
-In Development
-==============
+0.10 UNRELEASED
 
-Compatability Breaks
---------------------
+Changes
+-------
+
+* bzr-fastimport's file parsing and generation functionality has been exported into
+  separate upstream project called python-fastimport, that it now depends on.
+  python-fastimport can be retrieved from http://launchpad.net/python-fastimport.
+
+Bug fixes
+---------
+
+* Cope with non-ascii characters in tag names. (Jelmer Vernooij, #598060)
+
+* Cope with non-ascii characters in symbolic links. (Daniel Clemente,
+   Jelmer Vernooij, #238365)
+
+* In plain mode, don't export multiple authors. (David Kilzer, #496677)
+
+* Fix indentation when starting p4 fails. (Jelmer Vernooij)
+
+* SOURCE argument to bzr fast-import-filter is now optional, consistent with
+  examples. (Jelmer Vernooij, #477861)
+
+0.9 28-Feb-2010
+===============
 
 New Features
 ------------
@@ -35,9 +56,30 @@ New Features
   merged into this one for the purposes of ongoing bug fixing
   and development. (Miklos Vajna)
 
+* fast-export now supports a --no-plain parameter which causes
+  richer metadata to be included in the output using the
+  recently agreed 'feature' extension capability. The features
+  used are called multiple-authors, commit-properties and
+  empty-directories. (Ian Clatworthy)
+
+* fast-import and fast-import-filter now support user mapping
+  via the new --user-map option. The argument is a file specifying
+  how user-ids should be mapped to preferred user-ids.
+  (Ian Clatworthy)
+
+* svn-fast-export now supports an address option (to control the
+  default email domain) and a first-rev option (to select revisions
+  since a given one). (Ted Gould)
+
 Improvements
 ------------
 
+* Large repositories now compress better thanks to a change in
+  how file-ids are assigned. (Ian Clatworthy, John Arbash Meinel)
+
+* Memory usage is improved by flushing blobs to a disk cache
+  when appropriate. (John Arbash Meinel)
+
 * If a fast-import source ends in ".gz", it is assumed to be in
   gzip format and the stream is implicitly uncompressed. This
   means fast-import dump files generated by fast-export-from-xxx
@@ -50,7 +92,7 @@ Improvements
 * Directories that become empty following a delete or rename of
   one of their children are now implicitly pruned. If required,
   this will be made optional at a later date.
-  (Ian Clatworthy)
+  (Tom Widmer, Ian Clatworthy)
 
 * Blob tracking is now more intelligently done by an implicit
   first pass to collect blob usage statistics. This eliminates
@@ -79,6 +121,9 @@ Bug Fixes
 
 * Gracefully handle an empty input stream. (Gonéri Le Bouder)
 
+* Gracefully handle git submodules by ignoring them.
+  (Ian Clatworthy)
+
 * Get git-bzr working again. (Gonéri Le Bouder)
 
 Documentation
@@ -87,12 +132,6 @@ Documentation
 * Improved documentation has been published in the Bazaar Data Migration
   Guide: http://doc.bazaar-vcs.org/migration/en/data-migration/.
 
-Testing
--------
-
-Internals
----------
-
 
 0.8 22-Jul-2009
 ===============
diff --git a/README.txt b/README.txt
index 30b0e95..122b0e6 100644
--- a/README.txt
+++ b/README.txt
@@ -8,7 +8,9 @@ Required and recommended packages are:
 
 * Python 2.4 or later
 
-* Bazaar 1.1 or later.
+* Python-Fastimport 0.9.0 or later.
+
+* Bazaar 1.18 or later.
 
 
 Installation
diff --git a/__init__.py b/__init__.py
index 8ba91fc..61e14c6 100644
--- a/__init__.py
+++ b/__init__.py
@@ -55,7 +55,7 @@ online help for the individual commands for details::
   bzr help fast-export-from-darcs
   bzr help fast-export-from-hg
   bzr help fast-export-from-git
-  bzr help fast-export-from-mnt
+  bzr help fast-export-from-mtn
   bzr help fast-export-from-p4
   bzr help fast-export-from-svn
 
@@ -79,11 +79,18 @@ To report bugs or publish enhancements, visit the bzr-fastimport project
 page on Launchpad, https://launchpad.net/bzr-fastimport.
 """
 
-version_info = (0, 9, 0, 'dev', 0)
+version_info = (0, 10, 0, 'dev', 0)
 
-from bzrlib import bzrdir
-from bzrlib.commands import Command, register_command
-from bzrlib.option import Option, ListOption, RegistryOption
+from bzrlib.commands import plugin_cmds
+
+
+def load_fastimport():
+    """Load the fastimport module or raise an appropriate exception."""
+    try:
+        import fastimport
+    except ImportError, e:
+        from bzrlib.errors import DependencyNotPresent
+        raise DependencyNotPresent("fastimport", e)
 
 
 def test_suite():
@@ -91,726 +98,18 @@ def test_suite():
     return tests.test_suite()
 
 
-def _run(source, processor_factory, control, params, verbose):
-    """Create and run a processor.
-    
-    :param source: a filename or '-' for standard input. If the
-      filename ends in .gz, it will be opened as a gzip file and
-      the stream will be implicitly uncompressed
-    :param processor_factory: a callable for creating a processor
-    :param control: the BzrDir of the destination or None if no
-      destination is expected
-    """
-    import parser
-    stream = _get_source_stream(source)
-    proc = processor_factory(control, params=params, verbose=verbose)
-    p = parser.ImportParser(stream, verbose=verbose)
-    return proc.process(p.iter_commands)
-
-
-def _get_source_stream(source):
-    if source == '-':
-        import sys
-        stream = helpers.binary_stream(sys.stdin)
-    elif source.endswith('.gz'):
-        import gzip
-        stream = gzip.open(source, "rb")
-    else:
-        stream = open(source, "rb")
-    return stream
-
-
-class cmd_fast_import(Command):
-    """Backend for fast Bazaar data importers.
-
-    This command reads a mixed command/data stream and creates
-    branches in a Bazaar repository accordingly. The preferred
-    recipe is::
-
-      bzr fast-import project.fi project.bzr
-
-    Numerous commands are provided for generating a fast-import file
-    to use as input. These are named fast-export-from-xxx where xxx
-    is one of cvs, darcs, git, hg, mnt, p4 or svn.
-    To specify standard input as the input stream, use a
-    source name of '-' (instead of project.fi). If the source name
-    ends in '.gz', it is assumed to be compressed in gzip format.
-    
-    project.bzr will be created if it doesn't exist. If it exists
-    already, it should be empty or be an existing Bazaar repository
-    or branch. If not specified, the current directory is assumed.
- 
-    fast-import will intelligently select the format to use when
-    creating a repository or branch. If you are running Bazaar 1.17
-    up to Bazaar 2.0, the default format for Bazaar 2.x ("2a") is used.
-    Otherwise, the current default format ("pack-0.92" for Bazaar 1.x)
-    is used. If you wish to specify a custom format, use the `--format`
-    option.
-
-     .. note::
-     
-        To maintain backwards compatibility, fast-import lets you
-        create the target repository or standalone branch yourself.
-        It is recommended though that you let fast-import create
-        these for you instead.
-
-    :Branch mapping rules:
-
-     Git reference names are mapped to Bazaar branch names as follows:
-      
-     * refs/heads/foo is mapped to foo
-     * refs/remotes/origin/foo is mapped to foo.remote
-     * refs/tags/foo is mapped to foo.tag
-     * */master is mapped to trunk, trunk.remote, etc.
-     * */trunk is mapped to git-trunk, git-trunk.remote, etc.
-
-    :Branch creation rules:
-
-     When a shared repository is created or found at the destination,
-     branches are created inside it. In the simple case of a single
-     branch (refs/heads/master) inside the input file, the branch is
-     project.bzr/trunk.
-
-     When a standalone branch is found at the destination, the trunk
-     is imported there and warnings are output about any other branches
-     found in the input file.
-
-     When a branch in a shared repository is found at the destination,
-     that branch is made the trunk and other branches, if any, are
-     created in sister directories.
-
-    :Working tree updates:
-
-     The working tree is generated for the trunk branch. If multiple
-     branches are created, a message is output on completion explaining
-     how to create the working trees for other branches.
-
-    :Custom exporters:
-
-     The fast-export-from-xxx commands typically call more advanced
-     xxx-fast-export scripts. You are welcome to use the advanced
-     scripts if you prefer.
-
-     If you wish to write a custom exporter for your project, see
-     http://bazaar-vcs.org/BzrFastImport for the detailed protocol
-     specification. In many cases, exporters can be written quite
-     quickly using whatever scripting/programming language you like.
-
-    :Blob tracking:
-
-     As some exporters (like git-fast-export) reuse blob data across
-     commits, fast-import makes two passes over the input file by
-     default. In the first pass, it collects data about what blobs are
-     used when, along with some other statistics (e.g. total number of
-     commits). In the second pass, it generates the repository and
-     branches.
-     
-     .. note::
-     
-        The initial pass isn't done if the --info option is used
-        to explicitly pass in information about the input stream.
-        It also isn't done if the source is standard input. In the
-        latter case, memory consumption may be higher than otherwise
-        because some blobs may be kept in memory longer than necessary.
-
-    :Restarting an import:
-
-     At checkpoints and on completion, the commit-id -> revision-id
-     map is saved to a file called 'fastimport-id-map' in the control
-     directory for the repository (e.g. .bzr/repository). If the import
-     is interrupted or unexpectedly crashes, it can be started again
-     and this file will be used to skip over already loaded revisions.
-     As long as subsequent exports from the original source begin
-     with exactly the same revisions, you can use this feature to
-     maintain a mirror of a repository managed by a foreign tool.
-     If and when Bazaar is used to manage the repository, this file
-     can be safely deleted.
-
-    :Examples:
-
-     Import a Subversion repository into Bazaar::
-
-       bzr fast-export-from-svn /svn/repo/path project.fi
-       bzr fast-import project.fi project.bzr
-
-     Import a CVS repository into Bazaar::
-
-       bzr fast-export-from-cvs /cvs/repo/path project.fi
-       bzr fast-import project.fi project.bzr
-
-     Import a Git repository into Bazaar::
-
-       bzr fast-export-from-git /git/repo/path project.fi
-       bzr fast-import project.fi project.bzr
-
-     Import a Mercurial repository into Bazaar::
-
-       bzr fast-export-from-hg /hg/repo/path project.fi
-       bzr fast-import project.fi project.bzr
-
-     Import a Darcs repository into Bazaar::
-
-       bzr fast-export-from-darcs /darcs/repo/path project.fi
-       bzr fast-import project.fi project.bzr
-    """
-    hidden = False
-    _see_also = ['fast-export', 'fast-import-filter', 'fast-import-info']
-    takes_args = ['source', 'destination?']
-    takes_options = ['verbose',
-                    Option('info', type=str,
-                        help="Path to file containing caching hints.",
-                        ),
-                    Option('trees',
-                        help="Update all working trees, not just trunk's.",
-                        ),
-                    Option('count', type=int,
-                        help="Import this many revisions then exit.",
-                        ),
-                    Option('checkpoint', type=int,
-                        help="Checkpoint automatically every N revisions."
-                             " The default is 10000.",
-                        ),
-                    Option('autopack', type=int,
-                        help="Pack every N checkpoints. The default is 4.",
-                        ),
-                    Option('inv-cache', type=int,
-                        help="Number of inventories to cache.",
-                        ),
-                    RegistryOption.from_kwargs('mode',
-                        'The import algorithm to use.',
-                        title='Import Algorithm',
-                        default='Use the preferred algorithm (inventory deltas).',
-                        classic="Use the original algorithm (mutable inventories).",
-                        experimental="Enable experimental features.",
-                        value_switches=True, enum_switch=False,
-                        ),
-                    Option('import-marks', type=str,
-                        help="Import marks from file."
-                        ),
-                    Option('export-marks', type=str,
-                        help="Export marks to file."
-                        ),
-                    RegistryOption('format',
-                            help='Specify a format for the created repository. See'
-                                 ' "bzr help formats" for details.',
-                            lazy_registry=('bzrlib.bzrdir', 'format_registry'),
-                            converter=lambda name: bzrdir.format_registry.make_bzrdir(name),
-                            value_switches=False, title='Repository format'),
-                     ]
-    aliases = []
-    def run(self, source, destination='.', verbose=False, info=None,
-        trees=False, count=-1, checkpoint=10000, autopack=4, inv_cache=-1,
-        mode=None, import_marks=None, export_marks=None, format=None):
-        from bzrlib.errors import BzrCommandError, NotBranchError
-        from bzrlib.plugins.fastimport.processors import generic_processor
-        from bzrlib.plugins.fastimport.helpers import (
-            open_destination_directory,
-            )
-        # If no format is given and the user is running a release
-        # leading up to 2.0, select 2a for them. Otherwise, use
-        # the default format.
-        if format is None:
-            import bzrlib
-            bzr_version = bzrlib.version_info[0:2]
-            if bzr_version in [(1,17), (1,18), (2,0)]:
-                format = bzrdir.format_registry.make_bzrdir('2a')
-        control = open_destination_directory(destination, format=format)
-
-        # If an information file was given and the source isn't stdin,
-        # generate the information by reading the source file as a first pass
-        if info is None and source != '-':
-            info = self._generate_info(source)
-
-        # Do the work
-        if mode is None:
-            mode = 'default'
-        params = {
-            'info': info,
-            'trees': trees,
-            'count': count,
-            'checkpoint': checkpoint,
-            'autopack': autopack,
-            'inv-cache': inv_cache,
-            'mode': mode,
-            'import-marks': import_marks,
-            'export-marks': export_marks,
-            }
-        return _run(source, generic_processor.GenericProcessor, control,
-            params, verbose)
-
-    def _generate_info(self, source):
-        from cStringIO import StringIO
-        import parser
-        from bzrlib.plugins.fastimport.processors import info_processor
-        stream = _get_source_stream(source)
-        output = StringIO()
-        try:
-            proc = info_processor.InfoProcessor(verbose=True, outf=output)
-            p = parser.ImportParser(stream)
-            return_code = proc.process(p.iter_commands)
-            lines = output.getvalue().splitlines()
-        finally:
-            output.close()
-            stream.seek(0)
-        return lines
-
-
-class cmd_fast_import_filter(Command):
-    """Filter a fast-import stream to include/exclude files & directories.
-
-    This command is useful for splitting a subdirectory or bunch of
-    files out from a project to create a new project complete with history
-    for just those files. It can also be used to create a new project
-    repository that removes all references to files that should not have
-    been committed, e.g. security-related information (like passwords),
-    commercially sensitive material, files with an incompatible license or
-    large binary files like CD images.
-
-    When filtering out a subdirectory (or file), the new stream uses the
-    subdirectory (or subdirectory containing the file) as the root. As
-    fast-import doesn't know in advance whether a path is a file or
-    directory in the stream, you need to specify a trailing '/' on
-    directories passed to the `--includes option`. If multiple files or
-    directories are given, the new root is the deepest common directory.
-
-    To specify standard input as the input stream, use a source name
-    of '-'. If the source name ends in '.gz', it is assumed to be
-    compressed in gzip format.
-
-    Note: If a path has been renamed, take care to specify the *original*
-    path name, not the final name that it ends up with.
-
-    :Examples:
-
-     Create a new project from a library (note the trailing / on the
-     directory name of the library)::
-
-       front-end | bzr fast-import-filter -i lib/xxx/ > xxx.fi
-       bzr fast-import xxx.fi mylibrary.bzr
-       (lib/xxx/foo is now foo)
-
-     Create a new repository without a sensitive file::
-
-       front-end | bzr fast-import-filter -x missile-codes.txt > clean.fi
-       bzr fast-import clean.fi clean.bzr
-    """
-    hidden = False
-    _see_also = ['fast-import']
-    takes_args = ['source']
-    takes_options = ['verbose',
-                    ListOption('include_paths', short_name='i', type=str,
-                        help="Only include commits affecting these paths."
-                             " Directories should have a trailing /."
-                        ),
-                    ListOption('exclude_paths', short_name='x', type=str,
-                        help="Exclude these paths from commits."
-                        ),
-                     ]
-    aliases = []
-    encoding_type = 'exact'
-    def run(self, source, verbose=False, include_paths=None,
-        exclude_paths=None):
-        from bzrlib.plugins.fastimport.processors import filter_processor
-        params = {
-            'include_paths': include_paths,
-            'exclude_paths': exclude_paths,
-            }
-        return _run(source, filter_processor.FilterProcessor, None, params,
-            verbose)
-
-
-class cmd_fast_import_info(Command):
-    """Output information about a fast-import stream.
-
-    This command reads a fast-import stream and outputs
-    statistics and interesting properties about what it finds.
-    When run in verbose mode, the information is output as a
-    configuration file that can be passed to fast-import to
-    assist it in intelligently caching objects.
-
-    To specify standard input as the input stream, use a source name
-    of '-'. If the source name ends in '.gz', it is assumed to be
-    compressed in gzip format.
-
-    :Examples:
-
-     Display statistics about the import stream produced by front-end::
-
-      front-end | bzr fast-import-info -
-
-     Create a hints file for running fast-import on a large repository::
-
-       front-end | bzr fast-import-info -v - > front-end.cfg
-    """
-    hidden = False
-    _see_also = ['fast-import']
-    takes_args = ['source']
-    takes_options = ['verbose']
-    aliases = []
-    def run(self, source, verbose=False):
-        from bzrlib.plugins.fastimport.processors import info_processor
-        return _run(source, info_processor.InfoProcessor, None, {}, verbose)
-
-
-class cmd_fast_import_query(Command):
-    """Query a fast-import stream displaying selected commands.
-
-    To specify standard input as the input stream, use a source name
-    of '-'. If the source name ends in '.gz', it is assumed to be
-    compressed in gzip format.
-
-    To specify the commands to display, use the -C option one or
-    more times. To specify just some fields for a command, use the
-    syntax::
-
-      command=field1,...
-
-    By default, the nominated fields for the nominated commands
-    are displayed tab separated. To see the information in
-    a name:value format, use verbose mode.
-
-    Note: Binary fields (e.g. data for blobs) are masked out
-    so it is generally safe to view the output in a terminal.
-
-    :Examples:
-
-     Show all the fields of the reset and tag commands::
-
-      front-end > xxx.fi
-      bzr fast-import-query xxx.fi -Creset -Ctag
-
-     Show the mark and merge fields of the commit commands::
-
-      bzr fast-import-query xxx.fi -Ccommit=mark,merge
-    """
-    hidden = True
-    _see_also = ['fast-import', 'fast-import-filter']
-    takes_args = ['source']
-    takes_options = ['verbose',
-                    ListOption('commands', short_name='C', type=str,
-                        help="Display fields for these commands."
-                        ),
-                     ]
-    aliases = []
-    def run(self, source, verbose=False, commands=None):
-        from bzrlib.plugins.fastimport.processors import query_processor
-        from bzrlib.plugins.fastimport import helpers
-        params = helpers.defines_to_dict(commands)
-        return _run(source, query_processor.QueryProcessor, None, params,
-            verbose)
-
-
-class cmd_fast_export(Command):
-    """Generate a fast-import stream from a Bazaar branch.
-
-    This program generates a stream from a bzr branch in the format
-    required by git-fast-import(1). It preserves merges correctly,
-    even merged branches with no common history (`bzr merge -r 0..-1`).
-
-    If no destination is given or the destination is '-', standard output
-    is used. Otherwise, the destination is the name of a file. If the
-    destination ends in '.gz', the output will be compressed into gzip
-    format.
-
-    :Examples:
-
-     To import several unmerged but related branches into the same repository,
-     use the --{export,import}-marks options, and specify a name for the git
-     branch like this::
-    
-      bzr fast-export --export-marks=marks.bzr project.dev |
-              GIT_DIR=project/.git git-fast-import --export-marks=marks.git
-
-      bzr fast-export --import-marks=marks.bzr -b other project.other |
-              GIT_DIR=project/.git git-fast-import --import-marks=marks.git
-
-     If you get a "Missing space after source" error from git-fast-import,
-     see the top of the commands.py module for a work-around.
-    """
-    hidden = False
-    _see_also = ['fast-import', 'fast-import-filter']
-    takes_args = ['source', 'destination?']
-    takes_options = ['verbose', 'revision',
-                    Option('git-branch', short_name='b', type=str,
-                        argname='FILE',
-                        help='Name of the git branch to create (default=master).'
-                        ),
-                    Option('checkpoint', type=int, argname='N',
-                        help="Checkpoint every N revisions (default=10000)."
-                        ),
-                    Option('marks', type=str, argname='FILE',
-                        help="Import marks from and export marks to file."
-                        ),
-                    Option('import-marks', type=str, argname='FILE',
-                        help="Import marks from file."
-                        ),
-                    Option('export-marks', type=str, argname='FILE',
-                        help="Export marks to file."
-                        ),
-                     ]
-    aliases = []
-    encoding_type = 'exact'
-    def run(self, source, destination=None, verbose=False,
-        git_branch="master", checkpoint=10000, marks=None,
-        import_marks=None, export_marks=None, revision=None):
-        from bzrlib.plugins.fastimport import bzr_exporter
-
-        if marks:                                              
-            import_marks = export_marks = marks
-        exporter = bzr_exporter.BzrFastExporter(source,
-            destination=destination,
-            git_branch=git_branch, checkpoint=checkpoint,
-            import_marks_file=import_marks, export_marks_file=export_marks,
-            revision=revision, verbose=verbose)
-        return exporter.run()
-
-
-class cmd_fast_export_from_cvs(Command):
-    """Generate a fast-import file from a CVS repository.
-
-    Destination is a dump file, typically named xxx.fi where xxx is
-    the name of the project. If '-' is given, standard output is used.
-
-    cvs2svn 2.3 or later must be installed as its cvs2bzr script is used
-    under the covers to do the export.
-    
-    The source must be the path on your filesystem to the part of the
-    repository you wish to convert. i.e. either that path or a parent
-    directory must contain a CVSROOT subdirectory. The path may point to
-    either the top of a repository or to a path within it. In the latter
-    case, only that project within the repository will be converted.
-
-    .. note::
-       Remote access to the repository is not sufficient - the path
-       must point into a copy of the repository itself. See
-       http://cvs2svn.tigris.org/faq.html#repoaccess for instructions
-       on how to clone a remote CVS repository locally.
-
-    By default, the trunk, branches and tags are all exported. If you
-    only want the trunk, use the `--trunk-only` option.
-
-    By default, filenames, log messages and author names are expected
-    to be encoded in ascii. Use the `--encoding` option to specify an
-    alternative. If multiple encodings are used, specify the option
-    multiple times. For a list of valid encoding names, see
-    http://docs.python.org/lib/standard-encodings.html.
-
-    Windows users need to install GNU sort and use the `--sort`
-    option to specify its location. GNU sort can be downloaded from
-    http://unxutils.sourceforge.net/.
-    """
-    hidden = False
-    _see_also = ['fast-import', 'fast-import-filter']
-    takes_args = ['source', 'destination']
-    takes_options = ['verbose',
-                    Option('trunk-only',
-                        help="Export just the trunk, ignoring tags and branches."
-                        ),
-                    ListOption('encoding', type=str, argname='CODEC',
-                        help="Encoding used for filenames, commit messages "
-                             "and author names if not ascii."
-                        ),
-                    Option('sort', type=str, argname='PATH',
-                        help="GNU sort program location if not on the path."
-                        ),
-                    ]
-    aliases = []
-    encoding_type = 'exact'
-    def run(self, source, destination, verbose=False, trunk_only=False,
-        encoding=None, sort=None):
-        from bzrlib.plugins.fastimport.exporters import fast_export_from
-        custom = []
-        if trunk_only:
-            custom.append("--trunk-only")
-        if encoding:
-            for enc in encoding:
-                custom.extend(['--encoding', enc])
-        if sort:
-            custom.extend(['--sort', sort])
-        fast_export_from(source, destination, 'cvs', verbose, custom)
-
-
-class cmd_fast_export_from_darcs(Command):
-    """Generate a fast-import file from a Darcs repository.
-
-    Destination is a dump file, typically named xxx.fi where xxx is
-    the name of the project. If '-' is given, standard output is used.
-
-    Darcs 2.2 or later must be installed as various subcommands are
-    used to access the source repository. The source may be a network
-    URL but using a local URL is recommended for performance reasons.
-    """
-    hidden = False
-    _see_also = ['fast-import', 'fast-import-filter']
-    takes_args = ['source', 'destination']
-    takes_options = ['verbose',
-                    Option('encoding', type=str, argname='CODEC',
-                        help="Encoding used for commit messages if not utf-8."
-                        ),
-                    ]
-    aliases = []
-    encoding_type = 'exact'
-    def run(self, source, destination, verbose=False, encoding=None):
-        from bzrlib.plugins.fastimport.exporters import fast_export_from
-        custom = None
-        if encoding is not None:
-            custom = ['--encoding', encoding]
-        fast_export_from(source, destination, 'darcs', verbose, custom)
-
-
-class cmd_fast_export_from_hg(Command):
-    """Generate a fast-import file from a Mercurial repository.
-
-    Destination is a dump file, typically named xxx.fi where xxx is
-    the name of the project. If '-' is given, standard output is used.
-
-    Mercurial 1.2 or later must be installed as its libraries are used
-    to access the source repository. Given the APIs currently used,
-    the source repository must be a local file, not a network URL.
-    """
-    hidden = False
-    _see_also = ['fast-import', 'fast-import-filter']
-    takes_args = ['source', 'destination']
-    takes_options = ['verbose']
-    aliases = []
-    encoding_type = 'exact'
-    def run(self, source, destination, verbose=False):
-        from bzrlib.plugins.fastimport.exporters import fast_export_from
-        fast_export_from(source, destination, 'hg', verbose)
-
-
-class cmd_fast_export_from_git(Command):
-    """Generate a fast-import file from a Git repository.
-
-    Destination is a dump file, typically named xxx.fi where xxx is
-    the name of the project. If '-' is given, standard output is used.
-
-    Git 1.6 or later must be installed as the git fast-export
-    subcommand is used under the covers to generate the stream.
-    The source must be a local directory.
-
-    .. note::
-    
-       Earlier versions of Git may also work fine but are
-       likely to receive less active support if problems arise.
-    """
-    hidden = False
-    _see_also = ['fast-import', 'fast-import-filter']
-    takes_args = ['source', 'destination']
-    takes_options = ['verbose']
-    aliases = []
-    encoding_type = 'exact'
-    def run(self, source, destination, verbose=False):
-        from bzrlib.plugins.fastimport.exporters import fast_export_from
-        fast_export_from(source, destination, 'git', verbose)
-
-
-class cmd_fast_export_from_mnt(Command):
-    """Generate a fast-import file from a Monotone repository.
-
-    Destination is a dump file, typically named xxx.fi where xxx is
-    the name of the project. If '-' is given, standard output is used.
-
-    Monotone 0.43 or later must be installed as the mnt git_export
-    subcommand is used under the covers to generate the stream.
-    The source must be a local directory.
-    """
-    hidden = False
-    _see_also = ['fast-import', 'fast-import-filter']
-    takes_args = ['source', 'destination']
-    takes_options = ['verbose']
-    aliases = []
-    encoding_type = 'exact'
-    def run(self, source, destination, verbose=False):
-        from bzrlib.plugins.fastimport.exporters import fast_export_from
-        fast_export_from(source, destination, 'mnt', verbose)
-
-
-class cmd_fast_export_from_p4(Command):
-    """Generate a fast-import file from a Perforce repository.
-
-    Source is a Perforce depot path, e.g., //depot/project
-
-    Destination is a dump file, typically named xxx.fi where xxx is
-    the name of the project. If '-' is given, standard output is used.
-
-    bzrp4 must be installed as its p4_fast_export.py module is used under
-    the covers to do the export.  bzrp4 can be downloaded from
-    https://launchpad.net/bzrp4/.
-    
-    The P4PORT environment variable must be set, and you must be logged
-    into the Perforce server.
-
-    By default, only the HEAD changelist is exported.  To export all
-    changelists, append '@all' to the source.  To export a revision range,
-    append a comma-delimited pair of changelist numbers to the source,
-    e.g., '100,200'.
-    """
-    hidden = False
-    _see_also = ['fast-import', 'fast-import-filter']
-    takes_args = ['source', 'destination']
-    takes_options = []
-    aliases = []
-    encoding_type = 'exact'
-    def run(self, source, destination, verbose=False):
-        from bzrlib.plugins.fastimport.exporters import fast_export_from
-        custom = []
-        fast_export_from(source, destination, 'p4', verbose, custom)
-
-
-class cmd_fast_export_from_svn(Command):
-    """Generate a fast-import file from a Subversion repository.
-
-    Destination is a dump file, typically named xxx.fi where xxx is
-    the name of the project. If '-' is given, standard output is used.
-
-    Python-Subversion (Python bindings to the Subversion APIs)
-    1.4 or later must be installed as this library is used to
-    access the source repository. The source may be a network URL
-    but using a local URL is recommended for performance reasons.
-    """
-    hidden = False
-    _see_also = ['fast-import', 'fast-import-filter']
-    takes_args = ['source', 'destination']
-    takes_options = ['verbose',
-                    Option('trunk-path', type=str, argname="STR",
-                        help="Path in repo to /trunk.\n"
-                              "May be `regex:/cvs/(trunk)/proj1/(.*)` in "
-                              "which case the first group is used as the "
-                              "branch name and the second group is used "
-                              "to match files.",
-                        ),
-                    Option('branches-path', type=str, argname="STR",
-                        help="Path in repo to /branches."
-                        ),
-                    Option('tags-path', type=str, argname="STR",
-                        help="Path in repo to /tags."
-                        ),
-                    ]
-    aliases = []
-    encoding_type = 'exact'
-    def run(self, source, destination, verbose=False, trunk_path=None,
-        branches_path=None, tags_path=None):
-        from bzrlib.plugins.fastimport.exporters import fast_export_from
-        custom = []
-        if trunk_path is not None:
-            custom.extend(['--trunk-path', trunk_path])
-        if branches_path is not None:
-            custom.extend(['--branches-path', branches_path])
-        if tags_path is not None:
-            custom.extend(['--tags-path', tags_path])
-        fast_export_from(source, destination, 'svn', verbose, custom)
-
-
-register_command(cmd_fast_import)
-register_command(cmd_fast_import_filter)
-register_command(cmd_fast_import_info)
-register_command(cmd_fast_import_query)
-register_command(cmd_fast_export)
-register_command(cmd_fast_export_from_cvs)
-register_command(cmd_fast_export_from_darcs)
-register_command(cmd_fast_export_from_hg)
-register_command(cmd_fast_export_from_git)
-register_command(cmd_fast_export_from_mnt)
-register_command(cmd_fast_export_from_p4)
-register_command(cmd_fast_export_from_svn)
+for name in [
+        "fast_import",
+        "fast_import_filter",
+        "fast_import_info",
+        "fast_import_query",
+        "fast_export",
+        "fast_export_from_cvs",
+        "fast_export_from_darcs",
+        "fast_export_from_hg",
+        "fast_export_from_git",
+        "fast_export_from_mtn",
+        "fast_export_from_p4",
+        "fast_export_from_svn"
+        ]:
+    plugin_cmds.register_lazy("cmd_%s" % name, [], "bzrlib.plugins.fastimport.cmds")
diff --git a/branch_mapper.py b/branch_mapper.py
index f6d0670..acc37c9 100644
--- a/branch_mapper.py
+++ b/branch_mapper.py
@@ -14,46 +14,45 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
-"""An object that maps bzr branch names <-> git ref names."""
+"""An object that maps git ref names to bzr branch names.  Note that it is not
+used to map git ref names to bzr tag names."""
+
+
+import re
 
 
 class BranchMapper(object):
+    _GIT_TRUNK_RE = re.compile('(?:git-)*trunk')
 
-    def git_to_bzr(self, ref_names):
-        """Get the mapping from git reference names to Bazaar branch names.
-        
-        :return: a dictionary with git reference names as keys and
-          the Bazaar branch names as values.
+    def git_to_bzr(self, ref_name):
+        """Map a git reference name to a Bazaar branch name.
         """
-        bazaar_names = {}
-        for ref_name in sorted(ref_names):
-            parts = ref_name.split('/')
-            if parts[0] == 'refs':
+        parts = ref_name.split('/')
+        if parts[0] == 'refs':
+            parts.pop(0)
+        category = parts.pop(0)
+        if category == 'heads':
+            git_name = '/'.join(parts)
+            bazaar_name = self._git_to_bzr_name(git_name)
+        else:
+            if category == 'remotes' and parts[0] == 'origin':
                 parts.pop(0)
-            category = parts.pop(0)
-            if category == 'heads':
-                bazaar_name = self._git_to_bzr_name(parts[-1])
-            else:
-                if category.endswith('s'):
-                    category = category[:-1]
-                name_no_ext = self._git_to_bzr_name(parts[-1])
-                bazaar_name = "%s.%s" % (name_no_ext, category)
-            bazaar_names[ref_name] = bazaar_name
-        return bazaar_names
+            git_name = '/'.join(parts)
+            if category.endswith('s'):
+                category = category[:-1]
+            name_no_ext = self._git_to_bzr_name(git_name)
+            bazaar_name = "%s.%s" % (name_no_ext, category)
+        return bazaar_name
 
     def _git_to_bzr_name(self, git_name):
+        # Make a simple name more bzr-like, by mapping git 'master' to bzr 'trunk'.
+        # To avoid collision, map git 'trunk' to bzr 'git-trunk'.  Likewise
+        # 'git-trunk' to 'git-git-trunk' and so on, such that the mapping is
+        # one-to-one in both directions.
         if git_name == 'master':
             bazaar_name = 'trunk'
-        elif git_name.endswith('trunk'):
+        elif self._GIT_TRUNK_RE.match(git_name):
             bazaar_name = 'git-%s' % (git_name,)
         else:
             bazaar_name = git_name
         return bazaar_name
-
-    def bzr_to_git(self, branch_names):
-        """Get the mapping from Bazaar branch names to git reference names.
-        
-        :return: a dictionary with Bazaar branch names as keys and
-          the git reference names as values.
-        """
-        raise NotImplementedError(self.bzr_to_git)
diff --git a/branch_updater.py b/branch_updater.py
index b97f887..039171f 100644
--- a/branch_updater.py
+++ b/branch_updater.py
@@ -18,11 +18,12 @@
 
 from operator import itemgetter
 
-from bzrlib import bzrdir, errors, osutils
+from bzrlib import bzrdir, errors, osutils, transport
 from bzrlib.trace import error, note
 
-import branch_mapper
-import helpers
+from bzrlib.plugins.fastimport.helpers import (
+    best_format_for_objects_in_a_repository,
+    )
 
 
 class BranchUpdater(object):
@@ -40,9 +41,8 @@ class BranchUpdater(object):
         self.heads_by_ref = heads_by_ref
         self.last_ref = last_ref
         self.tags = tags
-        self.name_mapper = branch_mapper.BranchMapper()
         self._branch_format = \
-            helpers.best_format_for_objects_in_a_repository(repo)
+            best_format_for_objects_in_a_repository(repo)
 
     def update(self):
         """Update the Bazaar branches and tips matching the heads.
@@ -84,7 +84,9 @@ class BranchUpdater(object):
 
         # Convert the reference names into Bazaar speak. If we haven't
         # already put the 'trunk' first, do it now.
-        git_to_bzr_map = self.name_mapper.git_to_bzr(ref_names)
+        git_to_bzr_map = {}
+        for ref_name in ref_names:
+            git_to_bzr_map[ref_name] = self.cache_mgr.branch_mapper.git_to_bzr(ref_name)
         if ref_names and self.branch is None:
             trunk = self.select_trunk(ref_names)
             git_bzr_items = [(trunk, git_to_bzr_map[trunk])]
@@ -134,17 +136,21 @@ class BranchUpdater(object):
 
     def make_branch(self, location):
         """Make a branch in the repository if not already there."""
+        to_transport = transport.get_transport(location)
+        to_transport.create_prefix()
         try:
             return bzrdir.BzrDir.open(location).open_branch()
         except errors.NotBranchError, ex:
             return bzrdir.BzrDir.create_branch_convenience(location,
-                format=self._branch_format)
+                format=self._branch_format,
+                possible_transports=[to_transport])
 
     def _update_branch(self, br, last_mark):
         """Update a branch with last revision and tag information.
         
         :return: whether the branch was changed or not
         """
+        from fastimport.helpers import single_plural
         last_rev_id = self.cache_mgr.revision_ids[last_mark]
         revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
         revno = len(revs)
@@ -156,8 +162,9 @@ class BranchUpdater(object):
         # apply tags known in this branch
         my_tags = {}
         if self.tags:
+            ancestry = self.repo.get_ancestry(last_rev_id)
             for tag,rev in self.tags.items():
-                if rev in revs:
+                if rev in ancestry:
                     my_tags[tag] = rev
             if my_tags:
                 br.tags._set_tag_dict(my_tags)
@@ -165,6 +172,6 @@ class BranchUpdater(object):
         if changed:
             tagno = len(my_tags)
             note("\t branch %s now has %d %s and %d %s", br.nick,
-                revno, helpers.single_plural(revno, "revision", "revisions"),
-                tagno, helpers.single_plural(tagno, "tag", "tags"))
+                revno, single_plural(revno, "revision", "revisions"),
+                tagno, single_plural(tagno, "tag", "tags"))
         return changed
diff --git a/bzr_commit_handler.py b/bzr_commit_handler.py
index 5652251..c47a39d 100644
--- a/bzr_commit_handler.py
+++ b/bzr_commit_handler.py
@@ -18,6 +18,7 @@
 
 
 from bzrlib import (
+    debug,
     errors,
     generate_ids,
     inventory,
@@ -25,13 +26,24 @@ from bzrlib import (
     revision,
     serializer,
     )
-from bzrlib.plugins.fastimport import helpers, processor
+from bzrlib.trace import (
+    mutter,
+    note,
+    warning,
+    )
+from fastimport import (
+    helpers,
+    processor,
+    )
+
+from bzrlib.plugins.fastimport.helpers import (
+    mode_to_kind,
+    )
 
 
 _serializer_handles_escaping = hasattr(serializer.Serializer,
     'squashes_xml_invalid_characters')
 
-
 def copy_inventory(inv):
     # This currently breaks revision-id matching
     #if hasattr(inv, "_get_mutable_inventory"):
@@ -53,17 +65,51 @@ class GenericCommitHandler(processor.CommitHandler):
         self.verbose = verbose
         self.branch_ref = command.ref
         self.prune_empty_dirs = prune_empty_dirs
+        # This tracks path->file-id for things we're creating this commit.
+        # If the same path is created multiple times, we need to warn the
+        # user and add it just once.
+        # If a path is added then renamed or copied, we need to handle that.
+        self._new_file_ids = {}
+        # This tracks path->file-id for things we're modifying this commit.
+        # If a path is modified then renamed or copied, we need the make
+        # sure we grab the new content.
+        self._modified_file_ids = {}
+        # This tracks the paths for things we're deleting this commit.
+        # If the same path is added or the destination of a rename say,
+        # then a fresh file-id is required.
+        self._paths_deleted_this_commit = set()
+
+    def mutter(self, msg, *args):
+        """Output a mutter but add context."""
+        msg = "%s (%s)" % (msg, self.command.id)
+        mutter(msg, *args)
+
+    def debug(self, msg, *args):
+        """Output a mutter if the appropriate -D option was given."""
+        if "fast-import" in debug.debug_flags:
+            msg = "%s (%s)" % (msg, self.command.id)
+            mutter(msg, *args)
+
+    def note(self, msg, *args):
+        """Output a note but add context."""
+        msg = "%s (%s)" % (msg, self.command.id)
+        note(msg, *args)
+
+    def warning(self, msg, *args):
+        """Output a warning but add context."""
+        msg = "%s (%s)" % (msg, self.command.id)
+        warning(msg, *args)
 
     def pre_process_files(self):
         """Prepare for committing."""
         self.revision_id = self.gen_revision_id()
         # cache of texts for this commit, indexed by file-id
-        self.lines_for_commit = {}
+        self.data_for_commit = {}
         #if self.rev_store.expects_rich_root():
-        self.lines_for_commit[inventory.ROOT_ID] = []
+        self.data_for_commit[inventory.ROOT_ID] = []
 
         # Track the heads and get the real parent list
-        parents = self.cache_mgr.track_heads(self.command)
+        parents = self.cache_mgr.reftracker.track_heads(self.command)
 
         # Convert the parent commit-ids to bzr revision-ids
         if parents:
@@ -76,9 +122,9 @@ class GenericCommitHandler(processor.CommitHandler):
 
         # Tell the RevisionStore we're starting a new commit
         self.revision = self.build_revision()
-        parent_invs = [self.get_inventory(p) for p in self.parents]
+        self.parent_invs = [self.get_inventory(p) for p in self.parents]
         self.rev_store.start_new_revision(self.revision, self.parents,
-            parent_invs)
+            self.parent_invs)
 
         # cache of per-file parents for this commit, indexed by file-id
         self.per_file_parents_for_commit = {}
@@ -113,9 +159,13 @@ class GenericCommitHandler(processor.CommitHandler):
             self.cache_mgr.inventories[revision_id] = inv
         return inv
 
+    def _get_data(self, file_id):
+        """Get the data bytes for a file-id."""
+        return self.data_for_commit[file_id]
+
     def _get_lines(self, file_id):
         """Get the lines for a file-id."""
-        return self.lines_for_commit[file_id]
+        return osutils.split_lines(self._get_data(file_id))
 
     def _get_per_file_parents(self, file_id):
         """Get the lines for a file-id."""
@@ -154,19 +204,31 @@ class GenericCommitHandler(processor.CommitHandler):
         :return: file_id, is_new where
           is_new = True if the file_id is newly created
         """
-        try:
-            id = self.cache_mgr.fetch_file_id(self.branch_ref, path)
-            return id, False
-        except KeyError:
-            # Not in the cache, try the inventory
+        if path not in self._paths_deleted_this_commit:
+            # Try file-ids renamed in this commit
+            id = self._modified_file_ids.get(path)
+            if id is not None:
+                return id, False
+
+            # Try the basis inventory
             id = self.basis_inventory.path2id(path)
-            if id is None:
-                # Doesn't exist yet so create it
-                id = generate_ids.gen_file_id(path)
-                self.debug("Generated new file id %s for '%s' in '%s'",
-                    id, path, self.branch_ref)
-            self.cache_mgr.store_file_id(self.branch_ref, path, id)
-            return id, True
+            if id is not None:
+                return id, False
+            
+            # Try the other inventories
+            if len(self.parents) > 1:
+                for inv in self.parent_invs[1:]:
+                    id = self.basis_inventory.path2id(path)
+                    if id is not None:
+                        return id, False
+
+        # Doesn't exist yet so create it
+        dirname, basename = osutils.split(path)
+        id = generate_ids.gen_file_id(basename)
+        self.debug("Generated new file id %s for '%s' in revision-id '%s'",
+            id, path, self.revision_id)
+        self._new_file_ids[path] = id
+        return id, True
 
     def bzr_file_id(self, path):
         """Get a Bazaar file identifier for a path."""
@@ -192,14 +254,13 @@ class GenericCommitHandler(processor.CommitHandler):
         return generate_ids.gen_revision_id(who, timestamp)
 
     def build_revision(self):
-        rev_props = {}
+        rev_props = self._legal_revision_properties(self.command.properties)
+        if 'branch-nick' not in rev_props:
+            rev_props['branch-nick'] = self.cache_mgr.branch_mapper.git_to_bzr(
+                    self.branch_ref)
+        self._save_author_info(rev_props)
         committer = self.command.committer
         who = self._format_name_email(committer[0], committer[1])
-        author = self.command.author
-        if author is not None:
-            author_id = self._format_name_email(author[0], author[1])
-            if author_id != who:
-                rev_props['author'] = author_id
         message = self.command.message
         if not _serializer_handles_escaping:
             # We need to assume the bad ol' days
@@ -213,8 +274,50 @@ class GenericCommitHandler(processor.CommitHandler):
            properties=rev_props,
            parent_ids=self.parents)
 
+    def _legal_revision_properties(self, props):
+        """Clean-up any revision properties we can't handle."""
+        # For now, we just check for None because that's not allowed in 2.0rc1
+        result = {}
+        if props is not None:
+            for name, value in props.items():
+                if value is None:
+                    self.warning(
+                        "converting None to empty string for property %s"
+                        % (name,))
+                    result[name] = ''
+                else:
+                    result[name] = value
+        return result
+
+    def _save_author_info(self, rev_props):
+        author = self.command.author
+        if author is None:
+            return
+        if self.command.more_authors:
+            authors = [author] + self.command.more_authors
+            author_ids = [self._format_name_email(a[0], a[1]) for a in authors]
+        elif author != self.command.committer:
+            author_ids = [self._format_name_email(author[0], author[1])]
+        else:
+            return
+        # If we reach here, there are authors worth storing
+        rev_props['authors'] = "\n".join(author_ids)
+
     def _modify_item(self, path, kind, is_executable, data, inv):
         """Add to or change an item in the inventory."""
+        # If we've already added this, warn the user that we're ignoring it.
+        # In the future, it might be nice to double check that the new data
+        # is the same as the old but, frankly, exporters should be fixed
+        # not to produce bad data streams in the first place ...
+        existing = self._new_file_ids.get(path)
+        if existing:
+            # We don't warn about directories because it's fine for them
+            # to be created already by a previous rename
+            if kind != 'directory':
+                self.warning("%s already added in this commit - ignoring" %
+                    (path,))
+            return
+
         # Create the new InventoryEntry
         basename, parent_id = self._ensure_directory(path, inv)
         file_id = self.bzr_file_id(path)
@@ -222,18 +325,24 @@ class GenericCommitHandler(processor.CommitHandler):
         ie.revision = self.revision_id
         if kind == 'file':
             ie.executable = is_executable
-            lines = osutils.split_lines(data)
-            ie.text_sha1 = osutils.sha_strings(lines)
-            ie.text_size = sum(map(len, lines))
-            self.lines_for_commit[file_id] = lines
+            # lines = osutils.split_lines(data)
+            ie.text_sha1 = osutils.sha_string(data)
+            ie.text_size = len(data)
+            self.data_for_commit[file_id] = data
+        elif kind == 'directory':
+            self.directory_entries[path] = ie
+            # There are no lines stored for a directory so
+            # make sure the cache used by get_lines knows that
+            self.data_for_commit[file_id] = ''
         elif kind == 'symlink':
-            ie.symlink_target = data.encode('utf8')
+            ie.symlink_target = data.decode('utf8')
             # There are no lines stored for a symlink so
             # make sure the cache used by get_lines knows that
-            self.lines_for_commit[file_id] = []
+            self.data_for_commit[file_id] = ''
         else:
-            raise errors.BzrError("Cannot import items of kind '%s' yet" %
-                (kind,))
+            self.warning("Cannot import items of kind '%s' yet - ignoring '%s'"
+                % (kind, path))
+            return
         # Record it
         if file_id in inv:
             old_ie = inv[file_id]
@@ -273,7 +382,7 @@ class GenericCommitHandler(processor.CommitHandler):
         self.directory_entries[dirname] = ie
         # There are no lines stored for a directory so
         # make sure the cache used by get_lines knows that
-        self.lines_for_commit[dir_file_id] = []
+        self.data_for_commit[dir_file_id] = ''
 
         # It's possible that a file or symlink with that file-id
         # already exists. If it does, we need to delete it.
@@ -289,6 +398,8 @@ class GenericCommitHandler(processor.CommitHandler):
         """
         result = self.directory_entries.get(dirname)
         if result is None:
+            if dirname in self._paths_deleted_this_commit:
+                raise KeyError
             try:
                 file_id = inv.path2id(dirname)
             except errors.NoSuchId:
@@ -305,39 +416,61 @@ class GenericCommitHandler(processor.CommitHandler):
         return result
 
     def _delete_item(self, path, inv):
-        file_id = inv.path2id(path)
-        if file_id is None:
-            self.mutter("ignoring delete of %s as not in inventory", path)
-            return
-        try:
-            ie = inv[file_id]
-        except errors.NoSuchId:
-            self.mutter("ignoring delete of %s as not in inventory", path)
+        newly_added = self._new_file_ids.get(path)
+        if newly_added:
+            # We've only just added this path earlier in this commit.
+            file_id = newly_added
+            # note: delta entries look like (old, new, file-id, ie)
+            ie = self._delta_entries_by_fileid[file_id][3]
         else:
-            self.record_delete(path, ie)
+            file_id = inv.path2id(path)
+            if file_id is None:
+                self.mutter("ignoring delete of %s as not in inventory", path)
+                return
+            try:
+                ie = inv[file_id]
+            except errors.NoSuchId:
+                self.mutter("ignoring delete of %s as not in inventory", path)
+                return
+        self.record_delete(path, ie)
 
     def _copy_item(self, src_path, dest_path, inv):
-        if not self.parents:
-            self.warning("ignoring copy of %s to %s - no parent revisions",
-                src_path, dest_path)
-            return
-        file_id = inv.path2id(src_path)
-        if file_id is None:
-            self.warning("ignoring copy of %s to %s - source does not exist",
-                src_path, dest_path)
-            return
-        ie = inv[file_id]
+        newly_changed = self._new_file_ids.get(src_path) or \
+            self._modified_file_ids.get(src_path)
+        if newly_changed:
+            # We've only just added/changed this path earlier in this commit.
+            file_id = newly_changed
+            # note: delta entries look like (old, new, file-id, ie)
+            ie = self._delta_entries_by_fileid[file_id][3]
+        else:
+            file_id = inv.path2id(src_path)
+            if file_id is None:
+                self.warning("ignoring copy of %s to %s - source does not exist",
+                    src_path, dest_path)
+                return
+            ie = inv[file_id]
         kind = ie.kind
         if kind == 'file':
-            content = self.rev_store.get_file_text(self.parents[0], file_id)
+            if newly_changed:
+                content = self.data_for_commit[file_id]
+            else:
+                content = self.rev_store.get_file_text(self.parents[0], file_id)
             self._modify_item(dest_path, kind, ie.executable, content, inv)
         elif kind == 'symlink':
-            self._modify_item(dest_path, kind, False, ie.symlink_target, inv)
+            self._modify_item(dest_path, kind, False, ie.symlink_target.encode("utf-8"), inv)
         else:
             self.warning("ignoring copy of %s %s - feature not yet supported",
-                kind, path)
+                kind, dest_path)
 
     def _rename_item(self, old_path, new_path, inv):
+        existing = self._new_file_ids.get(old_path) or \
+            self._modified_file_ids.get(old_path)
+        if existing:
+            # We've only just added/modified this path earlier in this commit.
+            # Change the add/modify of old_path to an add of new_path
+            self._rename_pending_change(old_path, new_path, existing)
+            return
+
         file_id = inv.path2id(old_path)
         if file_id is None:
             self.warning(
@@ -350,13 +483,12 @@ class GenericCommitHandler(processor.CommitHandler):
         if new_file_id is not None:
             self.record_delete(new_path, inv[new_file_id])
         self.record_rename(old_path, new_path, file_id, ie)
-        self.cache_mgr.rename_path(self.branch_ref, old_path, new_path)
 
         # The revision-id for this entry will be/has been updated and
         # that means the loader then needs to know what the "new" text is.
         # We therefore must go back to the revision store to get it.
         lines = self.rev_store.get_file_lines(rev_id, file_id)
-        self.lines_for_commit[file_id] = lines
+        self.data_for_commit[file_id] = ''.join(lines)
 
     def _delete_all_items(self, inv):
         for name, root_item in inv.root.children.iteritems():
@@ -404,7 +536,7 @@ class InventoryCommitHandler(GenericCommitHandler):
         """Save the revision."""
         self.cache_mgr.inventories[self.revision_id] = self.inventory
         self.rev_store.load(self.revision, self.inventory, None,
-            lambda file_id: self._get_lines(file_id),
+            lambda file_id: self._get_data(file_id),
             lambda file_id: self._get_per_file_parents(file_id),
             lambda revision_ids: self._get_inventories(revision_ids))
 
@@ -446,59 +578,15 @@ class InventoryCommitHandler(GenericCommitHandler):
             self.inventory)
         self.inventory.rename(file_id, new_parent_id, new_basename)
 
-    def _delete_item(self, path, inv):
-        # NOTE: I'm retaining this method for now, instead of using the
-        # one in the superclass, because it's taken quite a lot of tweaking
-        # to cover all the edge cases seen in the wild. Long term, it can
-        # probably go once the higher level method does "warn_unless_in_merges"
-        # and handles all the various special cases ...
-        fileid = self.bzr_file_id(path)
-        dirname, basename = osutils.split(path)
-        if (fileid in inv and
-            isinstance(inv[fileid], inventory.InventoryDirectory)):
-            for child_path in inv[fileid].children.keys():
-                self._delete_item(osutils.pathjoin(path, child_path), inv)
-            # We need to clean this out of the directory entries as well
-            try:
-                del self.directory_entries[path]
-            except KeyError:
-                pass
-        try:
-            if self.inventory.id2path(fileid) == path:
-                del inv[fileid]
-            else:
-                # already added by some other name?
-                try:
-                    parent_id = self.cache_mgr.fetch_file_id(self.branch_ref,
-                        dirname)
-                except KeyError:
-                    pass
-                else:
-                    del inv[parent_id].children[basename]
-        except KeyError:
-            self._warn_unless_in_merges(fileid, path)
-        except errors.NoSuchId:
-            self._warn_unless_in_merges(fileid, path)
-        except AttributeError, ex:
-            if ex.args[0] == 'children':
-                # A directory has changed into a file and then one
-                # of it's children is being deleted!
-                self._warn_unless_in_merges(fileid, path)
-            else:
-                raise
-        try:
-            self.cache_mgr.delete_path(self.branch_ref, path)
-        except KeyError:
-            pass
-
     def modify_handler(self, filecmd):
         if filecmd.dataref is not None:
             data = self.cache_mgr.fetch_blob(filecmd.dataref)
         else:
             data = filecmd.data
         self.debug("modifying %s", filecmd.path)
-        self._modify_item(filecmd.path, filecmd.kind,
-            filecmd.is_executable, data, self.inventory)
+        (kind, is_executable) = mode_to_kind(filecmd.mode)
+        self._modify_item(filecmd.path, kind,
+            is_executable, data, self.inventory)
 
     def delete_handler(self, filecmd):
         self.debug("deleting %s", filecmd.path)
@@ -548,9 +636,9 @@ class InventoryDeltaCommitHandler(GenericCommitHandler):
         delta = self._get_final_delta()
         inv = self.rev_store.load_using_delta(self.revision,
             self.basis_inventory, delta, None,
-            lambda file_id: self._get_lines(file_id),
-            lambda file_id: self._get_per_file_parents(file_id),
-            lambda revision_ids: self._get_inventories(revision_ids))
+            self._get_data,
+            self._get_per_file_parents,
+            self._get_inventories)
         self.cache_mgr.inventories[self.revision_id] = inv
         #print "committed %s" % self.revision_id
 
@@ -562,44 +650,63 @@ class InventoryDeltaCommitHandler(GenericCommitHandler):
         """
         delta = list(self._delta_entries_by_fileid.values())
         if self.prune_empty_dirs and self._dirs_that_might_become_empty:
-            candidates = osutils.minimum_path_selection(
-                self._dirs_that_might_become_empty)
-            for path, file_id in self._empty_after_delta(delta, candidates):
-                delta.append((path, None, file_id, None))
-        #print "delta:\n%s\n\n" % "\n".join([str(de) for de in delta])
+            candidates = self._dirs_that_might_become_empty
+            while candidates:
+                never_born = set()
+                parent_dirs_that_might_become_empty = set()
+                for path, file_id in self._empty_after_delta(delta, candidates):
+                    newly_added = self._new_file_ids.get(path)
+                    if newly_added:
+                        never_born.add(newly_added)
+                    else:
+                        delta.append((path, None, file_id, None))
+                    parent_dir = osutils.dirname(path)
+                    if parent_dir:
+                        parent_dirs_that_might_become_empty.add(parent_dir)
+                candidates = parent_dirs_that_might_become_empty
+                # Clean up entries that got deleted before they were ever added
+                if never_born:
+                    delta = [de for de in delta if de[2] not in never_born]
         return delta
 
     def _empty_after_delta(self, delta, candidates):
-        new_inv = self.basis_inventory._get_mutable_inventory()
-        new_inv.apply_delta(delta)
+        #self.mutter("delta so far is:\n%s" % "\n".join([str(de) for de in delta]))
+        #self.mutter("candidates for deletion are:\n%s" % "\n".join([c for c in candidates]))
+        new_inv = self._get_proposed_inventory(delta)
         result = []
         for dir in candidates:
             file_id = new_inv.path2id(dir)
             if file_id is None:
                 continue
             ie = new_inv[file_id]
+            if ie.kind != 'directory':
+                continue
             if len(ie.children) == 0:
                 result.append((dir, file_id))
                 if self.verbose:
                     self.note("pruning empty directory %s" % (dir,))
-                # Check parents in case deleting this dir makes *them* empty
-                while True:
-                    file_id = ie.parent_id
-                    if file_id == inventory.ROOT_ID:
-                        # We've reach the root
-                        break
-                    try:
-                        ie = new_inv[file_id]
-                    except errors.NoSuchId:
-                        break
-                    if len(ie.children) > 1:
-                        break
-                    dir = new_inv.id2path(file_id)
-                    result.append((dir, file_id))
-                    if self.verbose:
-                        self.note("pruning empty directory parent %s" % (dir,))
         return result
 
+    def _get_proposed_inventory(self, delta):
+        if len(self.parents):
+            # new_inv = self.basis_inventory._get_mutable_inventory()
+            # Note that this will create unreferenced chk pages if we end up
+            # deleting entries, because this 'test' inventory won't end up
+            # used. However, it is cheaper than having to create a full copy of
+            # the inventory for every commit.
+            new_inv = self.basis_inventory.create_by_apply_delta(delta,
+                'not-a-valid-revision-id:')
+        else:
+            new_inv = inventory.Inventory(revision_id=self.revision_id)
+            # This is set in the delta so remove it to prevent a duplicate
+            del new_inv[inventory.ROOT_ID]
+            try:
+                new_inv.apply_delta(delta)
+            except errors.InconsistentDelta:
+                self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta]))
+                raise
+        return new_inv
+
     def _add_entry(self, entry):
         # We need to combine the data if multiple entries have the same file-id.
         # For example, a rename followed by a modification looks like:
@@ -626,9 +733,18 @@ class InventoryDeltaCommitHandler(GenericCommitHandler):
         if existing is not None:
             old_path = existing[0]
             entry = (old_path, new_path, file_id, ie)
-        self._delta_entries_by_fileid[file_id] = entry
+        if new_path is None and old_path is None:
+            # This is a delete cancelling a previous add
+            del self._delta_entries_by_fileid[file_id]
+            parent_dir = osutils.dirname(existing[1])
+            self.mutter("cancelling add of %s with parent %s" % (existing[1], parent_dir))
+            if parent_dir:
+                self._dirs_that_might_become_empty.add(parent_dir)
+            return
+        else:
+            self._delta_entries_by_fileid[file_id] = entry
 
-        # Collect parent direcctories that might become empty
+        # Collect parent directories that might become empty
         if new_path is None:
             # delete
             parent_dir = osutils.dirname(old_path)
@@ -672,14 +788,26 @@ class InventoryDeltaCommitHandler(GenericCommitHandler):
 
     def record_changed(self, path, ie, parent_id=None):
         self._add_entry((path, path, ie.file_id, ie))
+        self._modified_file_ids[path] = ie.file_id
 
     def record_delete(self, path, ie):
         self._add_entry((path, None, ie.file_id, None))
+        self._paths_deleted_this_commit.add(path)
         if ie.kind == 'directory':
+            try:
+                del self.directory_entries[path]
+            except KeyError:
+                pass
             for child_relpath, entry in \
                 self.basis_inventory.iter_entries_by_dir(from_dir=ie):
                 child_path = osutils.pathjoin(path, child_relpath)
                 self._add_entry((child_path, None, entry.file_id, None))
+                self._paths_deleted_this_commit.add(child_path)
+                if entry.kind == 'directory':
+                    try:
+                        del self.directory_entries[child_path]
+                    except KeyError:
+                        pass
 
     def record_rename(self, old_path, new_path, file_id, old_ie):
         new_ie = old_ie.copy()
@@ -689,29 +817,71 @@ class InventoryDeltaCommitHandler(GenericCommitHandler):
         new_ie.parent_id = new_parent_id
         new_ie.revision = self.revision_id
         self._add_entry((old_path, new_path, file_id, new_ie))
+        self._modified_file_ids[new_path] = file_id
+        self._paths_deleted_this_commit.discard(new_path)
+        if new_ie.kind == 'directory':
+            self.directory_entries[new_path] = new_ie
+
+    def _rename_pending_change(self, old_path, new_path, file_id):
+        """Instead of adding/modifying old-path, add new-path instead."""
+        # note: delta entries look like (old, new, file-id, ie)
+        old_ie = self._delta_entries_by_fileid[file_id][3]
+
+        # Delete the old path. Note that this might trigger implicit
+        # deletion of newly created parents that could now become empty.
+        self.record_delete(old_path, old_ie)
+
+        # Update the dictionaries used for tracking new file-ids
+        if old_path in self._new_file_ids:
+            del self._new_file_ids[old_path]
+        else:
+            del self._modified_file_ids[old_path]
+        self._new_file_ids[new_path] = file_id
+
+        # Create the new InventoryEntry
+        kind = old_ie.kind
+        basename, parent_id = self._ensure_directory(new_path,
+            self.basis_inventory)
+        ie = inventory.make_entry(kind, basename, parent_id, file_id)
+        ie.revision = self.revision_id
+        if kind == 'file':
+            ie.executable = old_ie.executable
+            ie.text_sha1 = old_ie.text_sha1
+            ie.text_size = old_ie.text_size
+        elif kind == 'symlink':
+            ie.symlink_target = old_ie.symlink_target
+
+        # Record it
+        self.record_new(new_path, ie)
 
     def modify_handler(self, filecmd):
+        (kind, executable) = mode_to_kind(filecmd.mode)
         if filecmd.dataref is not None:
-            data = self.cache_mgr.fetch_blob(filecmd.dataref)
+            if kind == "directory":
+                data = None
+            elif kind == "tree-reference":
+                data = filecmd.dataref
+            else:
+                data = self.cache_mgr.fetch_blob(filecmd.dataref)
         else:
             data = filecmd.data
         self.debug("modifying %s", filecmd.path)
-        self._modify_item(filecmd.path, filecmd.kind,
-            filecmd.is_executable, data, self.basis_inventory)
+        self._modify_item(filecmd.path, kind,
+            executable, data, self.basis_inventory)
 
     def delete_handler(self, filecmd):
         self.debug("deleting %s", filecmd.path)
         self._delete_item(filecmd.path, self.basis_inventory)
 
     def copy_handler(self, filecmd):
-        src_path = filecmd.src_path
-        dest_path = filecmd.dest_path
+        src_path = filecmd.src_path.decode("utf8")
+        dest_path = filecmd.dest_path.decode("utf8")
         self.debug("copying %s to %s", src_path, dest_path)
         self._copy_item(src_path, dest_path, self.basis_inventory)
 
     def rename_handler(self, filecmd):
-        old_path = filecmd.old_path
-        new_path = filecmd.new_path
+        old_path = filecmd.old_path.decode("utf8")
+        new_path = filecmd.new_path.decode("utf8")
         self.debug("renaming %s to %s", old_path, new_path)
         self._rename_item(old_path, new_path, self.basis_inventory)
 
diff --git a/cache_manager.py b/cache_manager.py
index af57534..6d8ef05 100644
--- a/cache_manager.py
+++ b/cache_manager.py
@@ -16,12 +16,84 @@
 
 """A manager of caches."""
 
+import atexit
+import os
+import shutil
+import tempfile
+import weakref
 
 from bzrlib import lru_cache, trace
-from bzrlib.plugins.fastimport import helpers
+from bzrlib.plugins.fastimport import (
+    branch_mapper,
+    )
+from fastimport.helpers import (
+    single_plural,
+    )
+from fastimport.reftracker import (
+    RefTracker,
+    )
+
+
+class _Cleanup(object):
+    """This class makes sure we clean up when CacheManager goes away.
+
+    We use a helper class to ensure that we are never in a refcycle.
+    """
+
+    def __init__(self, disk_blobs):
+        self.disk_blobs = disk_blobs
+        self.tempdir = None
+        self.small_blobs = None
+
+    def __del__(self):
+        self.finalize()
+
+    def finalize(self):
+        if self.disk_blobs is not None:
+            for info in self.disk_blobs.itervalues():
+                if info[-1] is not None:
+                    os.unlink(info[-1])
+            self.disk_blobs = None
+        if self.small_blobs is not None:
+            self.small_blobs.close()
+            self.small_blobs = None
+        if self.tempdir is not None:
+            shutil.rmtree(self.tempdir)
+
+
+class _Cleanup(object):
+    """This class makes sure we clean up when CacheManager goes away.
+
+    We use a helper class to ensure that we are never in a refcycle.
+    """
+
+    def __init__(self, disk_blobs):
+        self.disk_blobs = disk_blobs
+        self.tempdir = None
+        self.small_blobs = None
+
+    def __del__(self):
+        self.finalize()
+
+    def finalize(self):
+        if self.disk_blobs is not None:
+            for info in self.disk_blobs.itervalues():
+                if info[-1] is not None:
+                    os.unlink(info[-1])
+            self.disk_blobs = None
+        if self.small_blobs is not None:
+            self.small_blobs.close()
+            self.small_blobs = None
+        if self.tempdir is not None:
+            shutil.rmtree(self.tempdir)
+
 
 class CacheManager(object):
 
+    _small_blob_threshold = 25*1024
+    _sticky_cache_size = 300*1024*1024
+    _sticky_flushed_size = 100*1024*1024
+
     def __init__(self, info=None, verbose=False, inventory_cache_size=10):
         """Create a manager of caches.
 
@@ -31,9 +103,18 @@ class CacheManager(object):
         self.verbose = verbose
 
         # dataref -> data. datref is either :mark or the sha-1.
-        # Sticky blobs aren't removed after being referenced.
+        # Sticky blobs are referenced more than once, and are saved until their
+        # refcount goes to 0
         self._blobs = {}
         self._sticky_blobs = {}
+        self._sticky_memory_bytes = 0
+        # if we overflow our memory cache, then we will dump large blobs to
+        # disk in this directory
+        self._tempdir = None
+        # id => (offset, n_bytes, fname)
+        #   if fname is None, then the content is stored in the small file
+        self._disk_blobs = {}
+        self._cleanup = _Cleanup(self._disk_blobs)
 
         # revision-id -> Inventory cache
         # these are large and we probably don't need too many as
@@ -46,12 +127,6 @@ class CacheManager(object):
 
         # (path, branch_ref) -> file-ids - as generated.
         # (Use store_file_id/fetch_fileid methods rather than direct access.)
-        self._file_ids = {}
-
-        # Head tracking: last ref, last id per ref & map of commit ids to ref*s*
-        self.last_ref = None
-        self.last_ids = {}
-        self.heads = {}
 
         # Work out the blobs to make sticky - None means all
         self._blob_ref_counts = {}
@@ -67,18 +142,21 @@ class CacheManager(object):
                 # info not in file - possible when no blobs used
                 pass
 
+        # BranchMapper has no state (for now?), but we keep it around rather
+        # than reinstantiate on every usage
+        self.branch_mapper = branch_mapper.BranchMapper()
+
+        self.reftracker = RefTracker()
+
     def dump_stats(self, note=trace.note):
         """Dump some statistics about what we cached."""
         # TODO: add in inventory stastistics
         note("Cache statistics:")
         self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note)
         self._show_stats_for(self.revision_ids, "revision-ids", note=note)
-        self._show_stats_for(self._file_ids, "file-ids", note=note,
-            tuple_key=True)
         # These aren't interesting so omit from the output, at least for now
         #self._show_stats_for(self._blobs, "other blobs", note=note)
-        #self._show_stats_for(self.last_ids, "last-ids", note=note)
-        #self._show_stats_for(self.heads, "heads", note=note)
+        #self.reftracker.dump_stats(note=note)
 
     def _show_stats_for(self, dict, label, note=trace.note, tuple_key=False):
         """Dump statistics about a given dictionary.
@@ -100,109 +178,115 @@ class CacheManager(object):
                 size = size / 1024
                 unit = 'G'
         note("    %-12s: %8.1f %s (%d %s)" % (label, size, unit, count,
-            helpers.single_plural(count, "item", "items")))
+            single_plural(count, "item", "items")))
 
     def clear_all(self):
         """Free up any memory used by the caches."""
         self._blobs.clear()
         self._sticky_blobs.clear()
         self.revision_ids.clear()
-        self._file_ids.clear()
-        self.last_ids.clear()
-        self.heads.clear()
+        self.reftracker.clear()
         self.inventories.clear()
 
+    def _flush_blobs_to_disk(self):
+        blobs = self._sticky_blobs.keys()
+        sticky_blobs = self._sticky_blobs
+        total_blobs = len(sticky_blobs)
+        blobs.sort(key=lambda k:len(sticky_blobs[k]))
+        if self._tempdir is None:
+            tempdir = tempfile.mkdtemp(prefix='fastimport_blobs-')
+            self._tempdir = tempdir
+            self._cleanup.tempdir = self._tempdir
+            self._cleanup.small_blobs = tempfile.TemporaryFile(
+                prefix='small-blobs-', dir=self._tempdir)
+            small_blob_ref = weakref.ref(self._cleanup.small_blobs)
+            # Even though we add it to _Cleanup it seems that the object can be
+            # destroyed 'too late' for cleanup to actually occur. Probably a
+            # combination of bzr's "die directly, don't clean up" and how
+            # exceptions close the running stack.
+            def exit_cleanup():
+                small_blob = small_blob_ref()
+                if small_blob is not None:
+                    small_blob.close()
+                shutil.rmtree(tempdir, ignore_errors=True)
+            atexit.register(exit_cleanup)
+        count = 0
+        bytes = 0
+        n_small_bytes = 0
+        while self._sticky_memory_bytes > self._sticky_flushed_size:
+            id = blobs.pop()
+            blob = self._sticky_blobs.pop(id)
+            n_bytes = len(blob)
+            self._sticky_memory_bytes -= n_bytes
+            if n_bytes < self._small_blob_threshold:
+                f = self._cleanup.small_blobs
+                f.seek(0, os.SEEK_END)
+                self._disk_blobs[id] = (f.tell(), n_bytes, None)
+                f.write(blob)
+                n_small_bytes += n_bytes
+            else:
+                fd, name = tempfile.mkstemp(prefix='blob-', dir=self._tempdir)
+                os.write(fd, blob)
+                os.close(fd)
+                self._disk_blobs[id] = (0, n_bytes, name)
+            bytes += n_bytes
+            del blob
+            count += 1
+        trace.note('flushed %d/%d blobs w/ %.1fMB (%.1fMB small) to disk'
+                   % (count, total_blobs, bytes / 1024. / 1024,
+                      n_small_bytes / 1024. / 1024))
+
     def store_blob(self, id, data):
         """Store a blob of data."""
         # Note: If we're not reference counting, everything has to be sticky
         if not self._blob_ref_counts or id in self._blob_ref_counts:
             self._sticky_blobs[id] = data
+            self._sticky_memory_bytes += len(data)
+            if self._sticky_memory_bytes > self._sticky_cache_size:
+                self._flush_blobs_to_disk()
         elif data == '':
             # Empty data is always sticky
             self._sticky_blobs[id] = data
         else:
             self._blobs[id] = data
 
+    def _decref(self, id, cache, fn):
+        if not self._blob_ref_counts:
+            return False
+        count = self._blob_ref_counts.get(id, None)
+        if count is not None:
+            count -= 1
+            if count <= 0:
+                del cache[id]
+                if fn is not None:
+                    os.unlink(fn)
+                del self._blob_ref_counts[id]
+                return True
+            else:
+                self._blob_ref_counts[id] = count
+        return False
+
     def fetch_blob(self, id):
         """Fetch a blob of data."""
-        try:
-            b = self._sticky_blobs[id]
-            if self._blob_ref_counts and b != '':
-                self._blob_ref_counts[id] -= 1
-                if self._blob_ref_counts[id] == 0:
-                    del self._sticky_blobs[id]
-            return b
-        except KeyError:
+        if id in self._blobs:
             return self._blobs.pop(id)
+        if id in self._disk_blobs:
+            (offset, n_bytes, fn) = self._disk_blobs[id]
+            if fn is None:
+                f = self._cleanup.small_blobs
+                f.seek(offset)
+                content = f.read(n_bytes)
+            else:
+                fp = open(fn, 'rb')
+                try:
+                    content = fp.read()
+                finally:
+                    fp.close()
+            self._decref(id, self._disk_blobs, fn)
+            return content
+        content = self._sticky_blobs[id]
+        if self._decref(id, self._sticky_blobs, None):
+            self._sticky_memory_bytes -= len(content)
+        return content
 
-    def store_file_id(self, branch_ref, path, id):
-        """Store the path to file-id mapping for a branch."""
-        key = self._fileid_key(path, branch_ref)
-        self._file_ids[key] = id
 
-    def fetch_file_id(self, branch_ref, path):
-        """Lookup the file-id for a path in a branch.
-        
-        Raises KeyError if unsuccessful.
-        """
-        key = self._fileid_key(path, branch_ref)
-        return self._file_ids[key]
-
-    def _fileid_key(self, path, branch_ref):
-        return (path, branch_ref)
-
-    def delete_path(self, branch_ref, path):
-        """Remove a path from caches."""
-        # We actually want to remember what file-id we gave a path,
-        # even when that file is deleted, so doing nothing is correct.
-        # It's quite possible for a path to be deleted twice where
-        # the first time is in a merge branch (but the same branch_ref)
-        # and the second time is when that branch is merged to mainline.
-        pass
-
-    def rename_path(self, branch_ref, old_path, new_path):
-        """Rename a path in the caches."""
-        # In this case, we need to forget the file-id we gave a path,
-        # otherwise, we'll get duplicate file-ids in the repository
-        # if a new file is created at the old path.
-        old_key = self._fileid_key(old_path, branch_ref)
-        new_key = self._fileid_key(new_path, branch_ref)
-        try:
-            old_file_id = self._file_ids[old_key]
-        except KeyError:
-            # The old_key has already been removed, most likely
-            # in a merge branch.
-            pass
-        else:
-            self._file_ids[new_key] = old_file_id
-            del self._file_ids[old_key]
-
-    def track_heads(self, cmd):
-        """Track the repository heads given a CommitCommand.
-        
-        :param cmd: the CommitCommand
-        :return: the list of parents in terms of commit-ids
-        """
-        # Get the true set of parents
-        if cmd.from_ is not None:
-            parents = [cmd.from_]
-        else:
-            last_id = self.last_ids.get(cmd.ref)
-            if last_id is not None:
-                parents = [last_id]
-            else:
-                parents = []
-        parents.extend(cmd.merges)
-
-        # Track the heads
-        self.track_heads_for_ref(cmd.ref, cmd.id, parents)
-        return parents
-
-    def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
-        if parents is not None:
-            for parent in parents:
-                if parent in self.heads:
-                    del self.heads[parent]
-        self.heads.setdefault(cmd_id, set()).add(cmd_ref)
-        self.last_ids[cmd_ref] = cmd_id
-        self.last_ref = cmd_ref
diff --git a/cmds.py b/cmds.py
new file mode 100644
index 0000000..52170f7
--- /dev/null
+++ b/cmds.py
@@ -0,0 +1,882 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Fastimport/fastexport commands."""
+
+from bzrlib import bzrdir
+from bzrlib.commands import Command
+from bzrlib.option import Option, ListOption, RegistryOption
+
+from bzrlib.plugins.fastimport import load_fastimport
+
+
+def _run(source, processor_factory, control, params, verbose,
+    user_map=None):
+    """Create and run a processor.
+
+    :param source: a filename or '-' for standard input. If the
+      filename ends in .gz, it will be opened as a gzip file and
+      the stream will be implicitly uncompressed
+    :param processor_factory: a callable for creating a processor
+    :param control: the BzrDir of the destination or None if no
+      destination is expected
+    :param user_map: if not None, the file containing the user map.
+    """
+    from fastimport import parser
+    stream = _get_source_stream(source)
+    user_mapper = _get_user_mapper(user_map)
+    proc = processor_factory(control, params=params, verbose=verbose)
+    p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper)
+    return proc.process(p.iter_commands)
+
+
+def _get_source_stream(source):
+    if source == '-':
+        import sys
+        from fastimport import helpers
+        stream = helpers.binary_stream(sys.stdin)
+    elif source.endswith('.gz'):
+        import gzip
+        stream = gzip.open(source, "rb")
+    else:
+        stream = open(source, "rb")
+    return stream
+
+
+def _get_user_mapper(filename):
+    import user_mapper
+    if filename is None:
+        return None
+    f = open(filename)
+    lines = f.readlines()
+    f.close()
+    return user_mapper.UserMapper(lines)
+
+
+class cmd_fast_import(Command):
+    """Backend for fast Bazaar data importers.
+
+    This command reads a mixed command/data stream and creates
+    branches in a Bazaar repository accordingly. The preferred
+    recipe is::
+
+      bzr fast-import project.fi project.bzr
+
+    Numerous commands are provided for generating a fast-import file
+    to use as input. These are named fast-export-from-xxx where xxx
+    is one of cvs, darcs, git, hg, mtn, p4 or svn.
+    To specify standard input as the input stream, use a
+    source name of '-' (instead of project.fi). If the source name
+    ends in '.gz', it is assumed to be compressed in gzip format.
+    
+    project.bzr will be created if it doesn't exist. If it exists
+    already, it should be empty or be an existing Bazaar repository
+    or branch. If not specified, the current directory is assumed.
+ 
+    fast-import will intelligently select the format to use when
+    creating a repository or branch. If you are running Bazaar 1.17
+    up to Bazaar 2.0, the default format for Bazaar 2.x ("2a") is used.
+    Otherwise, the current default format ("pack-0.92" for Bazaar 1.x)
+    is used. If you wish to specify a custom format, use the `--format`
+    option.
+
+     .. note::
+     
+        To maintain backwards compatibility, fast-import lets you
+        create the target repository or standalone branch yourself.
+        It is recommended though that you let fast-import create
+        these for you instead.
+
+    :Branch mapping rules:
+
+     Git reference names are mapped to Bazaar branch names as follows:
+      
+     * refs/heads/foo is mapped to foo
+     * refs/remotes/origin/foo is mapped to foo.remote
+     * refs/tags/foo is mapped to foo.tag
+     * */master is mapped to trunk, trunk.remote, etc.
+     * */trunk is mapped to git-trunk, git-trunk.remote, etc.
+
+    :Branch creation rules:
+
+     When a shared repository is created or found at the destination,
+     branches are created inside it. In the simple case of a single
+     branch (refs/heads/master) inside the input file, the branch is
+     project.bzr/trunk.
+
+     When a standalone branch is found at the destination, the trunk
+     is imported there and warnings are output about any other branches
+     found in the input file.
+
+     When a branch in a shared repository is found at the destination,
+     that branch is made the trunk and other branches, if any, are
+     created in sister directories.
+
+    :Working tree updates:
+
+     The working tree is generated for the trunk branch. If multiple
+     branches are created, a message is output on completion explaining
+     how to create the working trees for other branches.
+
+    :Custom exporters:
+
+     The fast-export-from-xxx commands typically call more advanced
+     xxx-fast-export scripts. You are welcome to use the advanced
+     scripts if you prefer.
+
+     If you wish to write a custom exporter for your project, see
+     http://bazaar-vcs.org/BzrFastImport for the detailed protocol
+     specification. In many cases, exporters can be written quite
+     quickly using whatever scripting/programming language you like.
+
+    :User mapping:
+
+     Some source repositories store just the user name while Bazaar
+     prefers a full email address. You can adjust user-ids while
+     importing by using the --user-map option. The argument is a
+     text file with lines in the format::
+
+       old-id = new-id
+
+     Blank lines and lines beginning with # are ignored.
+     If old-id has the special value '@', then users without an
+     email address will get one created by using the matching new-id
+     as the domain, unless a more explicit address is given for them.
+     For example, given the user-map of::
+
+       @ = example.com
+       bill = William Jones <bill@example.com>
+
+     then user-ids are mapped as follows::
+     
+      maria => maria <maria@example.com>
+      bill => William Jones <bill@example.com>
+
+     .. note::
+     
+        User mapping is supported by both the fast-import and
+        fast-import-filter commands.
+
+    :Blob tracking:
+
+     As some exporters (like git-fast-export) reuse blob data across
+     commits, fast-import makes two passes over the input file by
+     default. In the first pass, it collects data about what blobs are
+     used when, along with some other statistics (e.g. total number of
+     commits). In the second pass, it generates the repository and
+     branches.
+     
+     .. note::
+     
+        The initial pass isn't done if the --info option is used
+        to explicitly pass in information about the input stream.
+        It also isn't done if the source is standard input. In the
+        latter case, memory consumption may be higher than otherwise
+        because some blobs may be kept in memory longer than necessary.
+
+    :Restarting an import:
+
+     At checkpoints and on completion, the commit-id -> revision-id
+     map is saved to a file called 'fastimport-id-map' in the control
+     directory for the repository (e.g. .bzr/repository). If the import
+     is interrupted or unexpectedly crashes, it can be started again
+     and this file will be used to skip over already loaded revisions.
+     As long as subsequent exports from the original source begin
+     with exactly the same revisions, you can use this feature to
+     maintain a mirror of a repository managed by a foreign tool.
+     If and when Bazaar is used to manage the repository, this file
+     can be safely deleted.
+
+    :Examples:
+
+     Import a Subversion repository into Bazaar::
+
+       bzr fast-export-from-svn /svn/repo/path project.fi
+       bzr fast-import project.fi project.bzr
+
+     Import a CVS repository into Bazaar::
+
+       bzr fast-export-from-cvs /cvs/repo/path project.fi
+       bzr fast-import project.fi project.bzr
+
+     Import a Git repository into Bazaar::
+
+       bzr fast-export-from-git /git/repo/path project.fi
+       bzr fast-import project.fi project.bzr
+
+     Import a Mercurial repository into Bazaar::
+
+       bzr fast-export-from-hg /hg/repo/path project.fi
+       bzr fast-import project.fi project.bzr
+
+     Import a Darcs repository into Bazaar::
+
+       bzr fast-export-from-darcs /darcs/repo/path project.fi
+       bzr fast-import project.fi project.bzr
+    """
+    hidden = False
+    _see_also = ['fast-export', 'fast-import-filter', 'fast-import-info']
+    takes_args = ['source', 'destination?']
+    takes_options = ['verbose',
+                    Option('user-map', type=str,
+                        help="Path to file containing a map of user-ids.",
+                        ),
+                    Option('info', type=str,
+                        help="Path to file containing caching hints.",
+                        ),
+                    Option('trees',
+                        help="Update all working trees, not just trunk's.",
+                        ),
+                    Option('count', type=int,
+                        help="Import this many revisions then exit.",
+                        ),
+                    Option('checkpoint', type=int,
+                        help="Checkpoint automatically every N revisions."
+                             " The default is 10000.",
+                        ),
+                    Option('autopack', type=int,
+                        help="Pack every N checkpoints. The default is 4.",
+                        ),
+                    Option('inv-cache', type=int,
+                        help="Number of inventories to cache.",
+                        ),
+                    RegistryOption.from_kwargs('mode',
+                        'The import algorithm to use.',
+                        title='Import Algorithm',
+                        default='Use the preferred algorithm (inventory deltas).',
+                        classic="Use the original algorithm (mutable inventories).",
+                        experimental="Enable experimental features.",
+                        value_switches=True, enum_switch=False,
+                        ),
+                    Option('import-marks', type=str,
+                        help="Import marks from file."
+                        ),
+                    Option('export-marks', type=str,
+                        help="Export marks to file."
+                        ),
+                    RegistryOption('format',
+                            help='Specify a format for the created repository. See'
+                                 ' "bzr help formats" for details.',
+                            lazy_registry=('bzrlib.bzrdir', 'format_registry'),
+                            converter=lambda name: bzrdir.format_registry.make_bzrdir(name),
+                            value_switches=False, title='Repository format'),
+                     ]
+    def run(self, source, destination='.', verbose=False, info=None,
+        trees=False, count=-1, checkpoint=10000, autopack=4, inv_cache=-1,
+        mode=None, import_marks=None, export_marks=None, format=None,
+        user_map=None):
+        load_fastimport()
+        from bzrlib.plugins.fastimport.processors import generic_processor
+        from bzrlib.plugins.fastimport.helpers import (
+            open_destination_directory,
+            )
+        # If no format is given and the user is running a release
+        # leading up to 2.0, select 2a for them. Otherwise, use
+        # the default format.
+        if format is None:
+            import bzrlib
+            bzr_version = bzrlib.version_info[0:2]
+            if bzr_version in [(1,17), (1,18), (2,0)]:
+                format = bzrdir.format_registry.make_bzrdir('2a')
+        control = open_destination_directory(destination, format=format)
+
+        # If an information file was given and the source isn't stdin,
+        # generate the information by reading the source file as a first pass
+        if info is None and source != '-':
+            info = self._generate_info(source)
+
+        # Do the work
+        if mode is None:
+            mode = 'default'
+        params = {
+            'info': info,
+            'trees': trees,
+            'count': count,
+            'checkpoint': checkpoint,
+            'autopack': autopack,
+            'inv-cache': inv_cache,
+            'mode': mode,
+            'import-marks': import_marks,
+            'export-marks': export_marks,
+            }
+        return _run(source, generic_processor.GenericProcessor, control,
+                params, verbose, user_map=user_map)
+
+    def _generate_info(self, source):
+        from cStringIO import StringIO
+        from fastimport import parser
+        from fastimport.processors import info_processor
+        stream = _get_source_stream(source)
+        output = StringIO()
+        try:
+            proc = info_processor.InfoProcessor(verbose=True, outf=output)
+            p = parser.ImportParser(stream)
+            return_code = proc.process(p.iter_commands)
+            lines = output.getvalue().splitlines()
+        finally:
+            output.close()
+            stream.seek(0)
+        return lines
+
+
+class cmd_fast_import_filter(Command):
+    """Filter a fast-import stream to include/exclude files & directories.
+
+    This command is useful for splitting a subdirectory or bunch of
+    files out from a project to create a new project complete with history
+    for just those files. It can also be used to create a new project
+    repository that removes all references to files that should not have
+    been committed, e.g. security-related information (like passwords),
+    commercially sensitive material, files with an incompatible license or
+    large binary files like CD images.
+
+    To specify standard input as the input stream, use a source name
+    of '-'. If the source name ends in '.gz', it is assumed to be
+    compressed in gzip format.
+
+    :File/directory filtering:
+
+     This is supported by the -i and -x options. Excludes take precedence
+     over includes.
+
+     When filtering out a subdirectory (or file), the new stream uses the
+     subdirectory (or subdirectory containing the file) as the root. As
+     fast-import doesn't know in advance whether a path is a file or
+     directory in the stream, you need to specify a trailing '/' on
+     directories passed to the `--includes option`. If multiple files or
+     directories are given, the new root is the deepest common directory.
+
+     Note: If a path has been renamed, take care to specify the *original*
+     path name, not the final name that it ends up with.
+
+    :User mapping:
+
+     Some source repositories store just the user name while Bazaar
+     prefers a full email address. You can adjust user-ids
+     by using the --user-map option. The argument is a
+     text file with lines in the format::
+
+       old-id = new-id
+
+     Blank lines and lines beginning with # are ignored.
+     If old-id has the special value '@', then users without an
+     email address will get one created by using the matching new-id
+     as the domain, unless a more explicit address is given for them.
+     For example, given the user-map of::
+
+       @ = example.com
+       bill = William Jones <bill@example.com>
+
+     then user-ids are mapped as follows::
+     
+      maria => maria <maria@example.com>
+      bill => William Jones <bill@example.com>
+
+     .. note::
+     
+        User mapping is supported by both the fast-import and
+        fast-import-filter commands.
+
+    :Examples:
+
+     Create a new project from a library (note the trailing / on the
+     directory name of the library)::
+
+       front-end | bzr fast-import-filter -i lib/xxx/ > xxx.fi
+       bzr fast-import xxx.fi mylibrary.bzr
+       (lib/xxx/foo is now foo)
+
+     Create a new repository without a sensitive file::
+
+       front-end | bzr fast-import-filter -x missile-codes.txt > clean.fi
+       bzr fast-import clean.fi clean.bzr
+    """
+    hidden = False
+    _see_also = ['fast-import']
+    takes_args = ['source?']
+    takes_options = ['verbose',
+                    ListOption('include_paths', short_name='i', type=str,
+                        help="Only include commits affecting these paths."
+                             " Directories should have a trailing /."
+                        ),
+                    ListOption('exclude_paths', short_name='x', type=str,
+                        help="Exclude these paths from commits."
+                        ),
+                    Option('user-map', type=str,
+                        help="Path to file containing a map of user-ids.",
+                        ),
+                     ]
+    encoding_type = 'exact'
+    def run(self, source=None, verbose=False, include_paths=None,
+        exclude_paths=None, user_map=None):
+        load_fastimport()
+        from fastimport.processors import filter_processor
+        params = {
+            'include_paths': include_paths,
+            'exclude_paths': exclude_paths,
+            }
+        from fastimport import parser
+        stream = _get_source_stream(source)
+        user_mapper = _get_user_mapper(user_map)
+        proc = filter_processor.FilterProcessor(params=params, verbose=verbose)
+        p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper)
+        return proc.process(p.iter_commands)
+
+
+class cmd_fast_import_info(Command):
+    """Output information about a fast-import stream.
+
+    This command reads a fast-import stream and outputs
+    statistics and interesting properties about what it finds.
+    When run in verbose mode, the information is output as a
+    configuration file that can be passed to fast-import to
+    assist it in intelligently caching objects.
+
+    To specify standard input as the input stream, use a source name
+    of '-'. If the source name ends in '.gz', it is assumed to be
+    compressed in gzip format.
+
+    :Examples:
+
+     Display statistics about the import stream produced by front-end::
+
+      front-end | bzr fast-import-info -
+
+     Create a hints file for running fast-import on a large repository::
+
+       front-end | bzr fast-import-info -v - > front-end.cfg
+    """
+    hidden = False
+    _see_also = ['fast-import']
+    takes_args = ['source']
+    takes_options = ['verbose']
+    def run(self, source, verbose=False):
+        load_fastimport()
+        from fastimport.processors import info_processor
+        return _run(source, info_processor.InfoProcessor, {}, verbose)
+
+
+class cmd_fast_import_query(Command):
+    """Query a fast-import stream displaying selected commands.
+
+    To specify standard input as the input stream, use a source name
+    of '-'. If the source name ends in '.gz', it is assumed to be
+    compressed in gzip format.
+
+    To specify a commit to display, give its mark using the
+    --commit-mark option. The commit will be displayed with
+    file-commands included but with inline blobs hidden.
+
+    To specify the commands to display, use the -C option one or
+    more times. To specify just some fields for a command, use the
+    syntax::
+
+      command=field1,...
+
+    By default, the nominated fields for the nominated commands
+    are displayed tab separated. To see the information in
+    a name:value format, use verbose mode.
+
+    Note: Binary fields (e.g. data for blobs) are masked out
+    so it is generally safe to view the output in a terminal.
+
+    :Examples:
+
+     Show the commit with mark 429::
+
+      bzr fast-import-query xxx.fi -m429
+
+     Show all the fields of the reset and tag commands::
+
+      bzr fast-import-query xxx.fi -Creset -Ctag
+
+     Show the mark and merge fields of the commit commands::
+
+      bzr fast-import-query xxx.fi -Ccommit=mark,merge
+    """
+    hidden = True
+    _see_also = ['fast-import', 'fast-import-filter']
+    takes_args = ['source']
+    takes_options = ['verbose',
+                    Option('commit-mark', short_name='m', type=str,
+                        help="Mark of the commit to display."
+                        ),
+                    ListOption('commands', short_name='C', type=str,
+                        help="Display fields for these commands."
+                        ),
+                     ]
+    def run(self, source, verbose=False, commands=None, commit_mark=None):
+        load_fastimport()
+        from fastimport.processors import query_processor
+        from bzrlib.plugins.fastimport import helpers
+        params = helpers.defines_to_dict(commands) or {}
+        if commit_mark:
+            params['commit-mark'] = commit_mark
+        return _run(source, query_processor.QueryProcessor, params,
+            verbose)
+
+
+class cmd_fast_export(Command):
+    """Generate a fast-import stream from a Bazaar branch.
+
+    This program generates a stream from a Bazaar branch in fast-import
+    format used by tools such as bzr fast-import, git-fast-import and
+    hg-fast-import.
+
+    If no destination is given or the destination is '-', standard output
+    is used. Otherwise, the destination is the name of a file. If the
+    destination ends in '.gz', the output will be compressed into gzip
+    format.
+ 
+    :Round-tripping:
+
+     Recent versions of the fast-import specification support features
+     that allow effective round-tripping of many Bazaar branches. As
+     such, fast-exporting a branch and fast-importing the data produced
+     will create a new repository with equivalent history, i.e.
+     "bzr log -v -p --include-merges --forward" on the old branch and
+     new branch should produce similar, if not identical, results.
+
+     .. note::
+    
+        Be aware that the new repository may appear to have similar history
+        but internally it is quite different with new revision-ids and
+        file-ids assigned. As a consequence, the ability to easily merge
+        with branches based on the old repository is lost. Depending on your
+        reasons for producing a new repository, this may or may not be an
+        issue.
+
+    :Interoperability:
+
+     fast-export can use the following "extended features" to
+     produce a richer data stream:
+
+     * *multiple-authors* - if a commit has multiple authors (as commonly
+       occurs in pair-programming), all authors will be included in the
+       output, not just the first author
+
+     * *commit-properties* - custom metadata per commit that Bazaar stores
+       in revision properties (e.g. branch-nick and bugs fixed by this
+       change) will be included in the output.
+
+     * *empty-directories* - directories, even the empty ones, will be
+       included in the output.
+
+     To disable these features and produce output acceptable to git 1.6,
+     use the --plain option. To enable these features, use --no-plain.
+     Currently, --plain is the default but that will change in the near
+     future once the feature names and definitions are formally agreed
+     to by the broader fast-import developer community.
+
+    :Examples:
+
+     To produce data destined for import into Bazaar::
+
+       bzr fast-export --no-plain my-bzr-branch my.fi.gz
+
+     To produce data destined for Git 1.6::
+
+       bzr fast-export --plain my-bzr-branch my.fi
+
+     To import several unmerged but related branches into the same repository,
+     use the --{export,import}-marks options, and specify a name for the git
+     branch like this::
+    
+       bzr fast-export --export-marks=marks.bzr project.dev |
+              GIT_DIR=project/.git git-fast-import --export-marks=marks.git
+
+       bzr fast-export --import-marks=marks.bzr -b other project.other |
+              GIT_DIR=project/.git git-fast-import --import-marks=marks.git
+
+     If you get a "Missing space after source" error from git-fast-import,
+     see the top of the commands.py module for a work-around.
+    """
+    hidden = False
+    _see_also = ['fast-import', 'fast-import-filter']
+    takes_args = ['source', 'destination?']
+    takes_options = ['verbose', 'revision',
+                    Option('git-branch', short_name='b', type=str,
+                        argname='FILE',
+                        help='Name of the git branch to create (default=master).'
+                        ),
+                    Option('checkpoint', type=int, argname='N',
+                        help="Checkpoint every N revisions (default=10000)."
+                        ),
+                    Option('marks', type=str, argname='FILE',
+                        help="Import marks from and export marks to file."
+                        ),
+                    Option('import-marks', type=str, argname='FILE',
+                        help="Import marks from file."
+                        ),
+                    Option('export-marks', type=str, argname='FILE',
+                        help="Export marks to file."
+                        ),
+                    Option('plain',
+                        help="Exclude metadata to maximise interoperability."
+                        ),
+                     ]
+    encoding_type = 'exact'
+    def run(self, source, destination=None, verbose=False,
+        git_branch="master", checkpoint=10000, marks=None,
+        import_marks=None, export_marks=None, revision=None,
+        plain=True):
+        load_fastimport()
+        from bzrlib.plugins.fastimport import exporter
+
+        if marks:
+            import_marks = export_marks = marks
+        exporter = exporter.BzrFastExporter(source,
+            destination=destination,
+            git_branch=git_branch, checkpoint=checkpoint,
+            import_marks_file=import_marks, export_marks_file=export_marks,
+            revision=revision, verbose=verbose, plain_format=plain)
+        return exporter.run()
+
+
+class cmd_fast_export_from_cvs(Command):
+    """Generate a fast-import file from a CVS repository.
+
+    Destination is a dump file, typically named xxx.fi where xxx is
+    the name of the project. If '-' is given, standard output is used.
+
+    cvs2svn 2.3 or later must be installed as its cvs2bzr script is used
+    under the covers to do the export.
+    
+    The source must be the path on your filesystem to the part of the
+    repository you wish to convert. i.e. either that path or a parent
+    directory must contain a CVSROOT subdirectory. The path may point to
+    either the top of a repository or to a path within it. In the latter
+    case, only that project within the repository will be converted.
+
+    .. note::
+       Remote access to the repository is not sufficient - the path
+       must point into a copy of the repository itself. See
+       http://cvs2svn.tigris.org/faq.html#repoaccess for instructions
+       on how to clone a remote CVS repository locally.
+
+    By default, the trunk, branches and tags are all exported. If you
+    only want the trunk, use the `--trunk-only` option.
+
+    By default, filenames, log messages and author names are expected
+    to be encoded in ascii. Use the `--encoding` option to specify an
+    alternative. If multiple encodings are used, specify the option
+    multiple times. For a list of valid encoding names, see
+    http://docs.python.org/lib/standard-encodings.html.
+
+    Windows users need to install GNU sort and use the `--sort`
+    option to specify its location. GNU sort can be downloaded from
+    http://unxutils.sourceforge.net/.
+    """
+    hidden = False
+    _see_also = ['fast-import', 'fast-import-filter']
+    takes_args = ['source', 'destination']
+    takes_options = ['verbose',
+                    Option('trunk-only',
+                        help="Export just the trunk, ignoring tags and branches."
+                        ),
+                    ListOption('encoding', type=str, argname='CODEC',
+                        help="Encoding used for filenames, commit messages "
+                             "and author names if not ascii."
+                        ),
+                    Option('sort', type=str, argname='PATH',
+                        help="GNU sort program location if not on the path."
+                        ),
+                    ]
+    encoding_type = 'exact'
+    def run(self, source, destination, verbose=False, trunk_only=False,
+        encoding=None, sort=None):
+        load_fastimport()
+        from bzrlib.plugins.fastimport.exporters import fast_export_from
+        custom = []
+        if trunk_only:
+            custom.append("--trunk-only")
+        if encoding:
+            for enc in encoding:
+                custom.extend(['--encoding', enc])
+        if sort:
+            custom.extend(['--sort', sort])
+        fast_export_from(source, destination, 'cvs', verbose, custom)
+
+
+class cmd_fast_export_from_darcs(Command):
+    """Generate a fast-import file from a Darcs repository.
+
+    Destination is a dump file, typically named xxx.fi where xxx is
+    the name of the project. If '-' is given, standard output is used.
+
+    Darcs 2.2 or later must be installed as various subcommands are
+    used to access the source repository. The source may be a network
+    URL but using a local URL is recommended for performance reasons.
+    """
+    hidden = False
+    _see_also = ['fast-import', 'fast-import-filter']
+    takes_args = ['source', 'destination']
+    takes_options = ['verbose',
+                    Option('encoding', type=str, argname='CODEC',
+                        help="Encoding used for commit messages if not utf-8."
+                        ),
+                    ]
+    encoding_type = 'exact'
+    def run(self, source, destination, verbose=False, encoding=None):
+        from bzrlib.plugins.fastimport.exporters import fast_export_from
+        custom = None
+        if encoding is not None:
+            custom = ['--encoding', encoding]
+        fast_export_from(source, destination, 'darcs', verbose, custom)
+
+
+class cmd_fast_export_from_hg(Command):
+    """Generate a fast-import file from a Mercurial repository.
+
+    Destination is a dump file, typically named xxx.fi where xxx is
+    the name of the project. If '-' is given, standard output is used.
+
+    Mercurial 1.2 or later must be installed as its libraries are used
+    to access the source repository. Given the APIs currently used,
+    the source repository must be a local file, not a network URL.
+    """
+    hidden = False
+    _see_also = ['fast-import', 'fast-import-filter']
+    takes_args = ['source', 'destination']
+    takes_options = ['verbose']
+    encoding_type = 'exact'
+    def run(self, source, destination, verbose=False):
+        load_fastimport()
+        from bzrlib.plugins.fastimport.exporters import fast_export_from
+        fast_export_from(source, destination, 'hg', verbose)
+
+
+class cmd_fast_export_from_git(Command):
+    """Generate a fast-import file from a Git repository.
+
+    Destination is a dump file, typically named xxx.fi where xxx is
+    the name of the project. If '-' is given, standard output is used.
+
+    Git 1.6 or later must be installed as the git fast-export
+    subcommand is used under the covers to generate the stream.
+    The source must be a local directory.
+
+    .. note::
+    
+       Earlier versions of Git may also work fine but are
+       likely to receive less active support if problems arise.
+    """
+    hidden = False
+    _see_also = ['fast-import', 'fast-import-filter']
+    takes_args = ['source', 'destination']
+    takes_options = ['verbose']
+    encoding_type = 'exact'
+    def run(self, source, destination, verbose=False):
+        load_fastimport()
+        from bzrlib.plugins.fastimport.exporters import fast_export_from
+        fast_export_from(source, destination, 'git', verbose)
+
+
+class cmd_fast_export_from_mtn(Command):
+    """Generate a fast-import file from a Monotone repository.
+
+    Destination is a dump file, typically named xxx.fi where xxx is
+    the name of the project. If '-' is given, standard output is used.
+
+    Monotone 0.43 or later must be installed as the mtn git_export
+    subcommand is used under the covers to generate the stream.
+    The source must be a local directory.
+    """
+    hidden = False
+    _see_also = ['fast-import', 'fast-import-filter']
+    takes_args = ['source', 'destination']
+    takes_options = ['verbose']
+    encoding_type = 'exact'
+    def run(self, source, destination, verbose=False):
+        load_fastimport()
+        from bzrlib.plugins.fastimport.exporters import fast_export_from
+        fast_export_from(source, destination, 'mtn', verbose)
+
+
+class cmd_fast_export_from_p4(Command):
+    """Generate a fast-import file from a Perforce repository.
+
+    Source is a Perforce depot path, e.g., //depot/project
+
+    Destination is a dump file, typically named xxx.fi where xxx is
+    the name of the project. If '-' is given, standard output is used.
+
+    bzrp4 must be installed as its p4_fast_export.py module is used under
+    the covers to do the export.  bzrp4 can be downloaded from
+    https://launchpad.net/bzrp4/.
+
+    The P4PORT environment variable must be set, and you must be logged
+    into the Perforce server.
+
+    By default, only the HEAD changelist is exported.  To export all
+    changelists, append '@all' to the source.  To export a revision range,
+    append a comma-delimited pair of changelist numbers to the source,
+    e.g., '100,200'.
+    """
+    hidden = False
+    _see_also = ['fast-import', 'fast-import-filter']
+    takes_args = ['source', 'destination']
+    takes_options = []
+    encoding_type = 'exact'
+    def run(self, source, destination, verbose=False):
+        load_fastimport()
+        from bzrlib.plugins.fastimport.exporters import fast_export_from
+        custom = []
+        fast_export_from(source, destination, 'p4', verbose, custom)
+
+
+class cmd_fast_export_from_svn(Command):
+    """Generate a fast-import file from a Subversion repository.
+
+    Destination is a dump file, typically named xxx.fi where xxx is
+    the name of the project. If '-' is given, standard output is used.
+
+    Python-Subversion (Python bindings to the Subversion APIs)
+    1.4 or later must be installed as this library is used to
+    access the source repository. The source may be a network URL
+    but using a local URL is recommended for performance reasons.
+    """
+    hidden = False
+    _see_also = ['fast-import', 'fast-import-filter']
+    takes_args = ['source', 'destination']
+    takes_options = ['verbose',
+                    Option('trunk-path', type=str, argname="STR",
+                        help="Path in repo to /trunk.\n"
+                              "May be `regex:/cvs/(trunk)/proj1/(.*)` in "
+                              "which case the first group is used as the "
+                              "branch name and the second group is used "
+                              "to match files.",
+                        ),
+                    Option('branches-path', type=str, argname="STR",
+                        help="Path in repo to /branches."
+                        ),
+                    Option('tags-path', type=str, argname="STR",
+                        help="Path in repo to /tags."
+                        ),
+                    ]
+    encoding_type = 'exact'
+    def run(self, source, destination, verbose=False, trunk_path=None,
+        branches_path=None, tags_path=None):
+        load_fastimport()
+        from bzrlib.plugins.fastimport.exporters import fast_export_from
+        custom = []
+        if trunk_path is not None:
+            custom.extend(['--trunk-path', trunk_path])
+        if branches_path is not None:
+            custom.extend(['--branches-path', branches_path])
+        if tags_path is not None:
+            custom.extend(['--tags-path', tags_path])
+        fast_export_from(source, destination, 'svn', verbose, custom)
diff --git a/commands.py b/commands.py
deleted file mode 100644
index 7ae2f54..0000000
--- a/commands.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Import command classes."""
-
-
-# There is a bug in git 1.5.4.3 and older by which unquoting a string consumes
-# one extra character. Set this variable to True to work-around it. It only
-# happens when renaming a file whose name contains spaces and/or quotes, and
-# the symptom is:
-#   % git-fast-import
-#   fatal: Missing space after source: R "file 1.txt" file 2.txt
-# http://git.kernel.org/?p=git/git.git;a=commit;h=c8744d6a8b27115503565041566d97c21e722584
-GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE = False
-
-
-# Lists of command names
-COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'progress', 'reset', 'tag']
-FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename',
-    'filedeleteall']
-
-# Bazaar file kinds
-FILE_KIND = 'file'
-SYMLINK_KIND = 'symlink'
-
-
-class ImportCommand(object):
-    """Base class for import commands."""
-
-    def __init__(self, name):
-        self.name = name
-        # List of field names not to display
-        self._binary = []
-
-    def dump_str(self, names=None, child_lists=None, verbose=False):
-        """Dump fields as a string.
-
-        :param names: the list of fields to include or
-            None for all public fields
-        :param child_lists: dictionary of child command names to
-            fields for that child command to include
-        :param verbose: if True, prefix each line with the command class and
-            display fields as a dictionary; if False, dump just the field
-            values with tabs between them
-        """
-        interesting = {}
-        if names is None:
-            fields = [k for k in self.__dict__.keys() if not k.startswith('_')]
-        else:
-            fields = names
-        for field in fields:
-            value = self.__dict__.get(field)
-            if field in self._binary and value is not None:
-                value = '(...)'
-            interesting[field] = value
-        if verbose:
-            return "%s: %s" % (self.__class__.__name__, interesting)
-        else:
-            return "\t".join([repr(interesting[k]) for k in fields])
-
-
-class BlobCommand(ImportCommand):
-
-    def __init__(self, mark, data, lineno=0):
-        ImportCommand.__init__(self, 'blob')
-        self.mark = mark
-        self.data = data
-        self.lineno = lineno
-        # Provide a unique id in case the mark is missing
-        if mark is None:
-            self.id = '@%d' % lineno
-        else:
-            self.id = ':' + mark
-        self._binary = ['data']
-
-    def __repr__(self):
-        if self.mark is None:
-            mark_line = ""
-        else:
-            mark_line = "\nmark :%s" % self.mark
-        return "blob%s\ndata %d\n%s" % (mark_line, len(self.data), self.data)
-
-
-class CheckpointCommand(ImportCommand):
-
-    def __init__(self):
-        ImportCommand.__init__(self, 'checkpoint')
-
-    def __repr__(self):
-        return "checkpoint"
-
-
-class CommitCommand(ImportCommand):
-
-    def __init__(self, ref, mark, author, committer, message, from_,
-        merges, file_iter, lineno=0):
-        ImportCommand.__init__(self, 'commit')
-        self.ref = ref
-        self.mark = mark
-        self.author = author
-        self.committer = committer
-        self.message = message
-        self.from_ = from_
-        self.merges = merges
-        self.file_iter = file_iter
-        self.lineno = lineno
-        self._binary = ['file_iter']
-        # Provide a unique id in case the mark is missing
-        if mark is None:
-            self.id = '@%d' % lineno
-        else:
-            self.id = ':%s' % mark
-
-    def __repr__(self):
-        if self.mark is None:
-            mark_line = ""
-        else:
-            mark_line = "\nmark :%s" % self.mark
-        if self.author is None:
-            author_line = ""
-        else:
-            author_line = "\nauthor %s" % format_who_when(self.author)
-        committer = "committer %s" % format_who_when(self.committer)
-        if self.message is None:
-            msg_section = ""
-        else:
-            msg = self.message.encode('utf8')
-            msg_section = "\ndata %d\n%s" % (len(msg), msg)
-        if self.from_ is None:
-            from_line = ""
-        else:
-            from_line = "\nfrom %s" % self.from_
-        if self.merges is None:
-            merge_lines = ""
-        else:
-            merge_lines = "".join(["\nmerge %s" % (m,)
-                for m in self.merges])
-        if self.file_iter is None:
-            filecommands = ""
-        else:
-            filecommands = "".join(["\n%r" % (c,)
-                for c in iter(self.file_iter)])
-        return "commit %s%s%s\n%s%s%s%s%s" % (self.ref, mark_line, author_line,
-            committer, msg_section, from_line, merge_lines, filecommands)
-
-    def dump_str(self, names=None, child_lists=None, verbose=False):
-        result = [ImportCommand.dump_str(self, names, verbose=verbose)]
-        for f in iter(self.file_iter):
-            if child_lists is None:
-                continue
-            try:
-                child_names = child_lists[f.name]
-            except KeyError:
-                continue
-            result.append("\t%s" % f.dump_str(child_names, verbose=verbose))
-        return '\n'.join(result)
-
-
-class ProgressCommand(ImportCommand):
-
-    def __init__(self, message):
-        ImportCommand.__init__(self, 'progress')
-        self.message = message
-
-    def __repr__(self):
-        return "progress %s" % (self.message,)
-
-
-class ResetCommand(ImportCommand):
-
-    def __init__(self, ref, from_):
-        ImportCommand.__init__(self, 'reset')
-        self.ref = ref
-        self.from_ = from_
-
-    def __repr__(self):
-        if self.from_ is None:
-            from_line = ""
-        else:
-            # According to git-fast-import(1), the extra LF is optional here;
-            # however, versions of git up to 1.5.4.3 had a bug by which the LF
-            # was needed. Always emit it, since it doesn't hurt and maintains
-            # compatibility with older versions.
-            # http://git.kernel.org/?p=git/git.git;a=commit;h=655e8515f279c01f525745d443f509f97cd805ab
-            from_line = "\nfrom %s\n" % self.from_
-        return "reset %s%s" % (self.ref, from_line)
-
-
-class TagCommand(ImportCommand):
-
-    def __init__(self, id, from_, tagger, message):
-        ImportCommand.__init__(self, 'tag')
-        self.id = id
-        self.from_ = from_
-        self.tagger = tagger
-        self.message = message
-
-    def __repr__(self):
-        if self.from_ is None:
-            from_line = ""
-        else:
-            from_line = "\nfrom %s" % self.from_
-        if self.tagger is None:
-            tagger_line = ""
-        else:
-            tagger_line = "\ntagger %s" % format_who_when(self.tagger)
-        if self.message is None:
-            msg_section = ""
-        else:
-            msg = self.message.encode('utf8')
-            msg_section = "\ndata %d\n%s" % (len(msg), msg)
-        return "tag %s%s%s%s" % (self.id, from_line, tagger_line, msg_section)
-
-
-class FileCommand(ImportCommand):
-    """Base class for file commands."""
-    pass
-
-
-class FileModifyCommand(FileCommand):
-
-    def __init__(self, path, kind, is_executable, dataref, data):
-        # Either dataref or data should be null
-        FileCommand.__init__(self, 'filemodify')
-        self.path = check_path(path)
-        self.kind = kind
-        self.is_executable = is_executable
-        self.dataref = dataref
-        self.data = data
-        self._binary = ['data']
-
-    def __repr__(self):
-        if self.kind == 'symlink':
-            mode = "120000"
-        elif self.is_executable:
-            mode = "755"
-        else:
-            mode = "644"
-        if self.dataref is None:
-            dataref = "inline"
-            datastr = "\ndata %d\n%s" % (len(self.data), self.data)
-        else:
-            dataref = "%s" % (self.dataref,)
-            datastr = ""
-        path = format_path(self.path)
-        return "M %s %s %s%s" % (mode, dataref, path, datastr)
-
-
-class FileDeleteCommand(FileCommand):
-
-    def __init__(self, path):
-        FileCommand.__init__(self, 'filedelete')
-        self.path = check_path(path)
-
-    def __repr__(self):
-        return "D %s" % (format_path(self.path),)
-
-
-class FileCopyCommand(FileCommand):
-
-    def __init__(self, src_path, dest_path):
-        FileCommand.__init__(self, 'filecopy')
-        self.src_path = check_path(src_path)
-        self.dest_path = check_path(dest_path)
-
-    def __repr__(self):
-        return "C %s %s" % (
-            format_path(self.src_path, quote_spaces=True),
-            format_path(self.dest_path))
-
-
-class FileRenameCommand(FileCommand):
-
-    def __init__(self, old_path, new_path):
-        FileCommand.__init__(self, 'filerename')
-        self.old_path = check_path(old_path)
-        self.new_path = check_path(new_path)
-
-    def __repr__(self):
-        return "R %s %s" % (
-            format_path(self.old_path, quote_spaces=True),
-            format_path(self.new_path))
-
-
-class FileDeleteAllCommand(FileCommand):
-
-    def __init__(self):
-        FileCommand.__init__(self, 'filedeleteall')
-
-    def __repr__(self):
-        return "deleteall"
-
-
-def check_path(path):
-    """Check that a path is legal.
-
-    :return: the path if all is OK
-    :raise ValueError: if the path is illegal
-    """
-    if path is None or path == '':
-        raise ValueError("illegal path '%s'" % path)
-    return path
-
-
-def format_path(p, quote_spaces=False):
-    """Format a path in utf8, quoting it if necessary."""
-    if '\n' in p:
-        import re
-        p = re.sub('\n', '\\n', p)
-        quote = True
-    else:
-        quote = p[0] == '"' or (quote_spaces and ' ' in p)
-    if quote:
-        extra = GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE and ' ' or ''
-        p = '"%s"%s' % (p, extra)
-    return p.encode('utf8')
-
-
-def format_who_when(fields):
-    """Format a tuple of name,email,secs-since-epoch,utc-offset-secs as a string."""
-    offset = fields[3]
-    if offset < 0:
-        offset_sign = '-'
-        offset = abs(offset)
-    else:
-        offset_sign = '+'
-    offset_hours = offset / 3600
-    offset_minutes = offset / 60 - offset_hours * 60
-    offset_str = "%s%02d%02d" % (offset_sign, offset_hours, offset_minutes)
-    name = fields[0]
-    if name == '':
-        sep = ''
-    else:
-        sep = ' '
-    result = "%s%s<%s> %d %s" % (name, sep, fields[1], fields[2], offset_str)
-    return result.encode('utf8')
diff --git a/dates.py b/dates.py
deleted file mode 100644
index 209d069..0000000
--- a/dates.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Date parsing routines.
-
-Each routine returns timestamp,timezone where
-
-* timestamp is seconds since epoch
-* timezone is the offset from UTC in seconds.
-"""
-
-
-import time
-
-from bzrlib.plugins.fastimport import errors
-
-
-def parse_raw(s, lineno=0):
-    """Parse a date from a raw string.
-    
-    The format must be exactly "seconds-since-epoch offset-utc".
-    See the spec for details.
-    """
-    timestamp_str, timezone_str = s.split(' ', 1)
-    timestamp = float(timestamp_str)
-    timezone = _parse_tz(timezone_str, lineno)
-    return timestamp, timezone
-
-
-def _parse_tz(tz, lineno):
-    """Parse a timezone specification in the [+|-]HHMM format.
-
-    :return: the timezone offset in seconds.
-    """
-    # from git_repository.py in bzr-git
-    if len(tz) != 5:
-        raise errors.InvalidTimezone(lineno, tz)
-    sign = {'+': +1, '-': -1}[tz[0]]
-    hours = int(tz[1:3])
-    minutes = int(tz[3:])
-    return sign * 60 * (60 * hours + minutes)
-
-
-def parse_rfc2822(s, lineno=0):
-    """Parse a date from a rfc2822 string.
-    
-    See the spec for details.
-    """
-    raise NotImplementedError(parse_rfc2822)
-
-
-def parse_now(s, lineno=0):
-    """Parse a date from a string.
-
-    The format must be exactly "now".
-    See the spec for details.
-    """
-    return time.time(), 0
-
-
-# Lookup tabel of date parsing routines
-DATE_PARSERS_BY_NAME = {
-    'raw':      parse_raw,
-    'rfc2822':  parse_rfc2822,
-    'now':      parse_now,
-    }
diff --git a/errors.py b/errors.py
deleted file mode 100644
index 02cc690..0000000
--- a/errors.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Exception classes for fastimport"""
-
-from bzrlib import errors as bzr_errors
-
-
-# Prefix to messages to show location information
-_LOCATION_FMT = "line %(lineno)d: "
-
-
-class ImportError(bzr_errors.BzrError):
-    """The base exception class for all import processing exceptions."""
-
-    _fmt = "Unknown Import Error"
-
-
-class ParsingError(ImportError):
-    """The base exception class for all import processing exceptions."""
-
-    _fmt = _LOCATION_FMT + "Unknown Import Parsing Error"
-
-    def __init__(self, lineno):
-        ImportError.__init__(self)
-        self.lineno = lineno
-
-
-class MissingBytes(ParsingError):
-    """Raised when EOF encountered while expecting to find more bytes."""
-
-    _fmt = (_LOCATION_FMT + "Unexpected EOF - expected %(expected)d bytes,"
-        " found %(found)d")
-
-    def __init__(self, lineno, expected, found):
-        ParsingError.__init__(self, lineno)
-        self.expected = expected
-        self.found = found
-
-
-class MissingTerminator(ParsingError):
-    """Raised when EOF encountered while expecting to find a terminator."""
-
-    _fmt = (_LOCATION_FMT +
-        "Unexpected EOF - expected '%(terminator)s' terminator")
-
-    def __init__(self, lineno, terminator):
-        ParsingError.__init__(self, lineno)
-        self.terminator = terminator
-
-
-class InvalidCommand(ParsingError):
-    """Raised when an unknown command found."""
-
-    _fmt = (_LOCATION_FMT + "Invalid command '%(cmd)s'")
-
-    def __init__(self, lineno, cmd):
-        ParsingError.__init__(self, lineno)
-        self.cmd = cmd
-
-
-class MissingSection(ParsingError):
-    """Raised when a section is required in a command but not present."""
-
-    _fmt = (_LOCATION_FMT + "Command %(cmd)s is missing section %(section)s")
-
-    def __init__(self, lineno, cmd, section):
-        ParsingError.__init__(self, lineno)
-        self.cmd = cmd
-        self.section = section
-
-
-class BadFormat(ParsingError):
-    """Raised when a section is formatted incorrectly."""
-
-    _fmt = (_LOCATION_FMT + "Bad format for section %(section)s in "
-        "command %(cmd)s: found '%(text)s'")
-
-    def __init__(self, lineno, cmd, section, text):
-        ParsingError.__init__(self, lineno)
-        self.cmd = cmd
-        self.section = section
-        self.text = text
-
-
-class InvalidTimezone(ParsingError):
-    """Raised when converting a string timezone to a seconds offset."""
-
-    _fmt = (_LOCATION_FMT +
-        "Timezone %(timezone)r could not be converted.%(reason)s")
-
-    def __init__(self, lineno, timezone, reason=None):
-        ParsingError.__init__(self, lineno)
-        self.timezone = timezone
-        if reason:
-            self.reason = ' ' + reason
-        else:
-            self.reason = ''
-
-
-class UnknownDateFormat(ImportError):
-    """Raised when an unknown date format is given."""
-
-    _fmt = ("Unknown date format '%(format)s'")
-
-    def __init__(self, format):
-        ImportError.__init__(self)
-        self.format = format
-
-
-class MissingHandler(ImportError):
-    """Raised when a processor can't handle a command."""
-
-    _fmt = ("Missing handler for command %(cmd)s")
-
-    def __init__(self, cmd):
-        ImportError.__init__(self)
-        self.cmd = cmd
-
-
-class UnknownParameter(ImportError):
-    """Raised when an unknown parameter is passed to a processor."""
-
-    _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s")
-
-    def __init__(self, param, knowns):
-        ImportError.__init__(self)
-        self.param = param
-        self.knowns = knowns
-
-
-class BadRepositorySize(ImportError):
-    """Raised when the repository has an incorrect number of revisions."""
-
-    _fmt = ("Bad repository size - %(found)d revisions found, "
-        "%(expected)d expected")
-
-    def __init__(self, expected, found):
-        ImportError.__init__(self)
-        self.expected = expected
-        self.found = found
-
-
-class BadRestart(ImportError):
-    """Raised when the import stream and id-map do not match up."""
-
-    _fmt = ("Bad restart - attempted to skip commit %(commit_id)s "
-        "but matching revision-id is unknown")
-
-    def __init__(self, commit_id):
-        ImportError.__init__(self)
-        self.commit_id = commit_id
diff --git a/explorer/logos/cvs.png b/explorer/logos/cvs.png
new file mode 100644
index 0000000..e279bdf
--- /dev/null
+++ b/explorer/logos/cvs.png
diff --git a/explorer/logos/darcs.png b/explorer/logos/darcs.png
new file mode 100644
index 0000000..ca9365f
--- /dev/null
+++ b/explorer/logos/darcs.png
diff --git a/explorer/logos/git.png b/explorer/logos/git.png
new file mode 100644
index 0000000..aae35a7
--- /dev/null
+++ b/explorer/logos/git.png
diff --git a/explorer/logos/mercurial.png b/explorer/logos/mercurial.png
new file mode 100644
index 0000000..60effbc
--- /dev/null
+++ b/explorer/logos/mercurial.png
diff --git a/explorer/logos/monotone.png b/explorer/logos/monotone.png
new file mode 100644
index 0000000..16f1908
--- /dev/null
+++ b/explorer/logos/monotone.png
diff --git a/explorer/logos/perforce.png b/explorer/logos/perforce.png
new file mode 100644
index 0000000..e62897c
--- /dev/null
+++ b/explorer/logos/perforce.png
diff --git a/explorer/logos/subversion.png b/explorer/logos/subversion.png
new file mode 100644
index 0000000..d28702a
--- /dev/null
+++ b/explorer/logos/subversion.png
diff --git a/explorer/tools.xml b/explorer/tools.xml
new file mode 100644
index 0000000..2386737
--- /dev/null
+++ b/explorer/tools.xml
@@ -0,0 +1,20 @@
+<folder title="Tools">
+  <folder title="Migration Tools">
+      <folder title="Export From" icon="actions/edit-redo">
+          <tool action="qrun fast-export" icon="logos/bazaar" title="Bazaar" type="bzr" />
+      <tool action="qrun fast-export-from-cvs" icon="logos/cvs" title="CVS" type="bzr" />
+      <tool action="qrun fast-export-from-darcs" icon="logos/darcs" title="Darcs" type="bzr" />
+      <tool action="qrun fast-export-from-git" icon="logos/git" title="Git" type="bzr" />
+      <tool action="qrun fast-export-from-hg" icon="logos/mercurial" title="Mercurial" type="bzr" />
+      <tool action="qrun fast-export-from-mtn" icon="logos/monotone" title="Monotone" type="bzr" />
+      <tool action="qrun fast-export-from-p4" icon="logos/perforce" title="Perforce" type="bzr" />
+      <tool action="qrun fast-export-from-svn" icon="logos/subversion" title="Subversion" type="bzr" />
+    </folder>
+    <folder title="Import From" icon="actions/go-jump">
+      <tool action="qrun fast-import" icon="mimetypes/text-x-generic-template" title="Fast Import Stream" type="bzr" />
+    </folder>
+    <separator/>
+    <tool action="qrun fast-import-filter" icon="actions/media-playback-pause" title="Fast Import Filter" type="bzr" />
+  </folder>
+</folder>
+
diff --git a/bzr_exporter.py b/exporter.py
index 16d942a..3f477d1 100755..100644
--- a/bzr_exporter.py
+++ b/exporter.py
@@ -35,65 +35,57 @@ import bzrlib.revision
 from bzrlib import (
     builtins,
     errors as bazErrors,
+    osutils,
     progress,
     trace,
     )
 
-from bzrlib.plugins.fastimport import commands, helpers, marks_file
+from bzrlib.plugins.fastimport import (
+    helpers,
+    marks_file,
+    )
 
+from fastimport import commands
+from fastimport.helpers import (
+    binary_stream,
+    single_plural,
+    )
 
-# This is adapted from _linear_view_verisons in log.py in bzr 1.12.
-def _iter_linear_revisions(branch, start_rev_id, end_rev_id):
-    """Calculate a sequence of revisions, newest to oldest.
 
-    :param start_rev_id: the lower revision-id
-    :param end_rev_id: the upper revision-id
-    :return: An iterator of revision_ids
-    :raises ValueError: if a start_rev_id is specified but
-      is not found walking the left-hand history
-    """
-    br_revno, br_rev_id = branch.last_revision_info()
-    repo = branch.repository
-    if start_rev_id is None and end_rev_id is None:
-        for revision_id in repo.iter_reverse_revision_history(br_rev_id):
-            yield revision_id
+def _get_output_stream(destination):
+    if destination is None or destination == '-':
+        return binary_stream(sys.stdout)
+    elif destination.endswith('gz'):
+        import gzip
+        return gzip.open(destination, 'wb')
     else:
-        if end_rev_id is None:
-            end_rev_id = br_rev_id
-        found_start = start_rev_id is None
-        for revision_id in repo.iter_reverse_revision_history(end_rev_id):
-            if not found_start and revision_id == start_rev_id:
-                yield revision_id
-                found_start = True
-                break
-            else:
-                yield revision_id
-        else:
-            if not found_start:
-                raise ValueError()
+        return open(destination, 'wb')
 
 
 class BzrFastExporter(object):
 
     def __init__(self, source, destination, git_branch=None, checkpoint=-1,
         import_marks_file=None, export_marks_file=None, revision=None,
-        verbose=False):
+        verbose=False, plain_format=False):
+        """Export branch data in fast import format.
+
+        :param plain_format: if True, 'classic' fast-import format is
+          used without any extended features; if False, the generated
+          data is richer and includes information like multiple
+          authors, revision properties, etc.
+        """
         self.source = source
-        if destination is None or destination == '-':
-            self.outf = helpers.binary_stream(sys.stdout)
-        elif destination.endswith('gz'):
-            import gzip
-            self.outf = gzip.open(destination, 'wb')
-        else:
-            self.outf = open(destination, 'wb')
+        self.outf = _get_output_stream(destination)
         self.git_branch = git_branch
         self.checkpoint = checkpoint
         self.import_marks_file = import_marks_file
         self.export_marks_file = export_marks_file
         self.revision = revision
         self.excluded_revisions = set()
+        self.plain_format = plain_format
         self._multi_author_api_available = hasattr(bzrlib.revision.Revision,
             'get_apparent_authors')
+        self.properties_to_exclude = ['authors', 'author']
 
         # Progress reporting stuff
         self.verbose = verbose
@@ -102,6 +94,7 @@ class BzrFastExporter(object):
         else:
             self.progress_every = 1000
         self._start_time = time.time()
+        self._commit_total = 0
 
         # Load the marks and initialise things accordingly
         self.revid_to_mark = {}
@@ -124,17 +117,15 @@ class BzrFastExporter(object):
             start_rev_id = None
             end_rev_id = None
         self.note("Calculating the revisions to include ...")
-        view_revisions = reversed(list(_iter_linear_revisions(self.branch,
-            start_rev_id, end_rev_id)))
+        view_revisions = reversed([rev_id for rev_id, _, _, _ in
+            self.branch.iter_merge_sorted_revisions(end_rev_id, start_rev_id)])
         # If a starting point was given, we need to later check that we don't
         # start emitting revisions from before that point. Collect the
         # revisions to exclude now ...
         if start_rev_id is not None:
-            # The result is inclusive so skip the first (the oldest) one
             self.note("Calculating the revisions to exclude ...")
-            uninteresting = list(_iter_linear_revisions(self.branch, None,
-                start_rev_id))[1:]
-            self.excluded_revisions = set(uninteresting)
+            self.excluded_revisions = set([rev_id for rev_id, _, _, _ in
+                self.branch.iter_merge_sorted_revisions(start_rev_id)])
         return list(view_revisions)
 
     def run(self):
@@ -144,7 +135,13 @@ class BzrFastExporter(object):
         # Export the data
         self.branch.repository.lock_read()
         try:
-            for revid in self.interesting_history():
+            interesting = self.interesting_history()
+            self._commit_total = len(interesting)
+            self.note("Starting export of %d revisions ..." %
+                self._commit_total)
+            if not self.plain_format:
+                self.emit_features()
+            for revid in interesting:
                 self.emit_commit(revid, self.git_branch)
             if self.branch.supports_tags():
                 self.emit_tags()
@@ -171,10 +168,11 @@ class BzrFastExporter(object):
         return time.strftime("%H:%M:%S")
 
     def report_progress(self, commit_count, details=''):
-        # Note: we can't easily give a total count here because we
-        # don't know how many merged revisions will need to be output
         if commit_count and commit_count % self.progress_every == 0:
-            counts = "%d" % (commit_count,)
+            if self._commit_total:
+                counts = "%d/%d" % (commit_count, self._commit_total)
+            else:
+                counts = "%d" % (commit_count,)
             minutes = (time.time() - self._start_time) / 60
             rate = commit_count * 1.0 / minutes
             if rate > 10:
@@ -187,7 +185,7 @@ class BzrFastExporter(object):
         time_required = progress.str_tdelta(time.time() - self._start_time)
         rc = len(self.revid_to_mark)
         self.note("Exported %d %s in %s",
-            rc, helpers.single_plural(rc, "revision", "revisions"),
+            rc, single_plural(rc, "revision", "revisions"),
             time_required)
 
     def print_cmd(self, cmd):
@@ -200,7 +198,7 @@ class BzrFastExporter(object):
  
     def is_empty_dir(self, tree, path):
         path_id = tree.path2id(path)
-        if path_id == None:
+        if path_id is None:
             self.warning("Skipping empty_dir detection - no file_id for %s" %
                 (path,))
             return False
@@ -216,6 +214,10 @@ class BzrFastExporter(object):
         else:
             return False
 
+    def emit_features(self):
+        for feature in sorted(commands.FEATURE_NAMES):
+            self.print_cmd(commands.FeatureCommand(feature))
+
     def emit_commit(self, revid, git_branch):
         if revid in self.revid_to_mark or revid in self.excluded_revisions:
             return
@@ -228,14 +230,13 @@ class BzrFastExporter(object):
             self.revid_to_mark[revid] = -1
             return
  
-        # Emit parents
-        nparents = len(revobj.parent_ids)
-        if nparents:
-            for parent in revobj.parent_ids:
-                self.emit_commit(parent, git_branch)
-
         # Get the primary parent
+        # TODO: Consider the excluded revisions when deciding the parents.
+        # Currently, a commit with parents that are excluded ought to be
+        # triggering the git_branch calculation below (and it is not).
+        # IGC 20090824
         ncommits = len(self.revid_to_mark)
+        nparents = len(revobj.parent_ids)
         if nparents == 0:
             if ncommits:
                 # This is a parentless commit but it's not the first one
@@ -264,36 +265,55 @@ class BzrFastExporter(object):
             self._save_marks()
             self.print_cmd(commands.CheckpointCommand())
 
-    def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
-        # Get the committer and author info
-        committer = revobj.committer
-        if committer.find('<') == -1:
+    def _get_name_email(self, user):
+        if user.find('<') == -1:
             # If the email isn't inside <>, we need to use it as the name
             # in order for things to round-trip correctly.
             # (note: parseaddr('a@b.com') => name:'', email: 'a@b.com')
-            name = committer
+            name = user
             email = ''
         else:
-            name, email = parseaddr(committer)
+            name, email = parseaddr(user)
+        return name, email
+
+    def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
+        # Get the committer and author info
+        committer = revobj.committer
+        name, email = self._get_name_email(committer)
         committer_info = (name, email, revobj.timestamp, revobj.timezone)
         if self._multi_author_api_available:
-            author = revobj.get_apparent_authors()[0]
+            more_authors = revobj.get_apparent_authors()
+            author = more_authors.pop(0)
         else:
+            more_authors = []
             author = revobj.get_apparent_author()
-        if author != committer:
-            name, email = parseaddr(author)
+        if not self.plain_format and more_authors:
+            name, email = self._get_name_email(author)
             author_info = (name, email, revobj.timestamp, revobj.timezone)
+            more_author_info = []
+            for a in more_authors:
+                name, email = self._get_name_email(a)
+                more_author_info.append(
+                    (name, email, revobj.timestamp, revobj.timezone))
+        elif author != committer:
+            name, email = self._get_name_email(author)
+            author_info = (name, email, revobj.timestamp, revobj.timezone)
+            more_author_info = None
         else:
             author_info = None
+            more_author_info = None
 
         # Get the parents in terms of marks
         non_ghost_parents = []
         for p in revobj.parent_ids:
             if p in self.excluded_revisions:
                 continue
-            parent_mark = self.revid_to_mark[p]
-            if parent_mark != -1:
+            try:
+                parent_mark = self.revid_to_mark[p]
                 non_ghost_parents.append(":%s" % parent_mark)
+            except KeyError:
+                # ghost - ignore
+                continue
         if non_ghost_parents:
             from_ = non_ghost_parents[0]
             merges = non_ghost_parents[1:]
@@ -301,9 +321,23 @@ class BzrFastExporter(object):
             from_ = None
             merges = None
 
+        # Filter the revision properties. Some metadata (like the
+        # author information) is already exposed in other ways so
+        # don't repeat it here.
+        if self.plain_format:
+            properties = None
+        else:
+            properties = revobj.properties
+            for prop in self.properties_to_exclude:
+                try:
+                    del properties[prop]
+                except KeyError:
+                    pass
+
         # Build and return the result
         return commands.CommitCommand(git_ref, mark, author_info,
-            committer_info, revobj.message, from_, merges, iter(file_cmds))
+            committer_info, revobj.message, from_, merges, iter(file_cmds),
+            more_authors=more_author_info, properties=properties)
 
     def _get_revision_trees(self, parent, revision_id):
         try:
@@ -351,15 +385,21 @@ class BzrFastExporter(object):
         for path, id_, kind in changes.added + my_modified + rd_modifies:
             if kind == 'file':
                 text = tree_new.get_file_text(id_)
-                file_cmds.append(commands.FileModifyCommand(path, 'file',
-                    tree_new.is_executable(id_), None, text))
+                file_cmds.append(commands.FileModifyCommand(path,
+                    helpers.kind_to_mode('file', tree_new.is_executable(id_)),
+                    None, text))
             elif kind == 'symlink':
-                file_cmds.append(commands.FileModifyCommand(path, 'symlink',
-                    False, None, tree_new.get_symlink_target(id_)))
+                file_cmds.append(commands.FileModifyCommand(path,
+                    helpers.kind_to_mode('symlink', False),
+                    None, tree_new.get_symlink_target(id_)))
+            elif kind == 'directory':
+                if not self.plain_format:
+                    file_cmds.append(commands.FileModifyCommand(path,
+                        helpers.kind_to_mode('directory', False),
+                        None, None))
             else:
-                # Should we do something here for importers that
-                # can handle directory and tree-reference changes?
-                continue
+                self.warning("cannot export '%s' of kind %s yet - ignoring" %
+                    (path, kind))
         return file_cmds
 
     def _process_renames_and_deletes(self, renames, deletes,
@@ -388,11 +428,15 @@ class BzrFastExporter(object):
         # Instead, we need to make multiple passes over the various lists to
         # get the ordering right.
 
+        must_be_renamed = {}
+        old_to_new = {}
         deleted_paths = set([p for p, _, _ in deletes])
         for (oldpath, newpath, id_, kind,
                 text_modified, meta_modified) in renames:
+            emit = kind != 'directory' or not self.plain_format
             if newpath in deleted_paths:
-                file_cmds.append(commands.FileDeleteCommand(newpath))
+                if emit:
+                    file_cmds.append(commands.FileDeleteCommand(newpath))
                 deleted_paths.remove(newpath)
             if (self.is_empty_dir(tree_old, oldpath)):
                 self.note("Skipping empty dir %s in rev %s" % (oldpath,
@@ -401,14 +445,40 @@ class BzrFastExporter(object):
             #oldpath = self._adjust_path_for_renames(oldpath, renamed,
             #    revision_id)
             renamed.append([oldpath, newpath])
-            file_cmds.append(commands.FileRenameCommand(oldpath, newpath))
+            old_to_new[oldpath] = newpath
+            if emit:
+                file_cmds.append(commands.FileRenameCommand(oldpath, newpath))
             if text_modified or meta_modified:
                 modifies.append((newpath, id_, kind))
 
+            # Renaming a directory implies all children must be renamed.
+            # Note: changes_from() doesn't handle this
+            if kind == 'directory':
+                for p, e in tree_old.inventory.iter_entries_by_dir(from_dir=id_):
+                    if e.kind == 'directory' and self.plain_format:
+                        continue
+                    old_child_path = osutils.pathjoin(oldpath, p)
+                    new_child_path = osutils.pathjoin(newpath, p)
+                    must_be_renamed[old_child_path] = new_child_path
+
+        # Add children not already renamed
+        if must_be_renamed:
+            renamed_already = set(old_to_new.keys())
+            still_to_be_renamed = set(must_be_renamed.keys()) - renamed_already
+            for old_child_path in sorted(still_to_be_renamed):
+                new_child_path = must_be_renamed[old_child_path]
+                if self.verbose:
+                    self.note("implicitly renaming %s => %s" % (old_child_path,
+                        new_child_path))
+                file_cmds.append(commands.FileRenameCommand(old_child_path,
+                    new_child_path))
+
         # Record remaining deletes
         for path, id_, kind in deletes:
             if path not in deleted_paths:
                 continue
+            if kind == 'directory' and self.plain_format:
+                continue
             #path = self._adjust_path_for_renames(path, renamed, revision_id)
             file_cmds.append(commands.FileDeleteCommand(path))
         return file_cmds, modifies, renamed
@@ -435,7 +505,7 @@ class BzrFastExporter(object):
                 self.warning('not creating tag %r pointing to non-existent '
                     'revision %s' % (tag, revid))
             else:
-                git_ref = 'refs/tags/%s' % tag
+                git_ref = 'refs/tags/%s' % tag.encode("utf-8")
                 self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark)))
 
     def _next_tmp_branch_name(self):
diff --git a/exporters/Makefile b/exporters/Makefile
index 8a4efd9..2b71211 100644
--- a/exporters/Makefile
+++ b/exporters/Makefile
@@ -1,7 +1,8 @@
-SVN ?= /usr/local/svn
-APR_INCLUDES ?= /usr/include/apr-1.0
-CFLAGS += -I${APR_INCLUDES} -I${SVN}/include/subversion-1 -pipe -O2 -std=c99
+SVN ?= /usr
+CFLAGS += -I${SVN}/include/subversion-1 -pipe -O2 -std=c99
+CFLAGS += `pkg-config --cflags apr-1`
 LDFLAGS += -L${SVN}/lib -lsvn_fs-1 -lsvn_repos-1
+LDFLAGS += `pkg-config --libs apr-1`
 
 all: svn-fast-export svn-archive
 
diff --git a/exporters/__init__.py b/exporters/__init__.py
index 2d7b135..6d282d6 100644
--- a/exporters/__init__.py
+++ b/exporters/__init__.py
@@ -218,11 +218,14 @@ class MercurialExporter(_Exporter):
 class GitExporter(_Exporter):
 
     def __init__(self):
-        self.check_install('Git', '1.6', ['git'])
+        self.cmd_name = "git"
+        if sys.platform == 'win32':
+            self.cmd_name = "git.cmd"
+        self.check_install('Git', '1.6', [self.cmd_name])
 
     def generate(self, source, destination, verbose=False, custom=None):
         """Generate a fast import stream. See _Exporter.generate() for details."""
-        args = ["git", "fast-export", "--all", "--signed-tags=warn"]
+        args = [self.cmd_name, "fast-export", "--all", "--signed-tags=warn"]
         outf, base, marks = self.get_output_info(destination)
         if marks:
             marks = os.path.abspath(marks)
@@ -241,11 +244,11 @@ class GitExporter(_Exporter):
 class MonotoneExporter(_Exporter):
 
     def __init__(self):
-        self.check_install('Monotone', '0.43', ['mnt'])
+        self.check_install('Monotone', '0.43', ['mtn'])
 
     def generate(self, source, destination, verbose=False, custom=None):
         """Generate a fast import stream. See _Exporter.generate() for details."""
-        args = ["mnt", "git_export"]
+        args = ["mtn", "git_export"]
         outf, base, marks = self.get_output_info(destination)
         if marks:
             marks = os.path.abspath(marks)
@@ -277,7 +280,7 @@ class PerforceExporter(_Exporter):
             retcode = p4_fast_export.main([source])
         finally:
             sys.stdout = original_stdout
-            self.report_results(retcode, destination)
+        self.report_results(retcode, destination)
 
 
 class SubversionExporter(_Exporter):
@@ -308,7 +311,7 @@ def fast_export_from(source, destination, tool, verbose=False, custom=None):
         factory = MercurialExporter
     elif tool == 'git':
         factory = GitExporter
-    elif tool == 'mnt':
+    elif tool == 'mtn':
         factory = MonotoneExporter
     elif tool == 'p4':
         factory = PerforceExporter
diff --git a/exporters/darcs/README b/exporters/darcs/README
index 4b13e3b..3fc9449 100644
--- a/exporters/darcs/README
+++ b/exporters/darcs/README
@@ -23,13 +23,6 @@ Independent::
 	Ideally it should work with any fast importer, but actually it has been
 	tested with git fast-import, bzr fast-import and hg fastimport. (These
 	are the three fast-import implementations available ATM.)
-	+
-	hg fastimport needs three patches. While they are not in the upstream,
-	you can get it from my repository using
-+
-----
-$ hg clone static-http://frugalware.org/~vmiklos/hg/hg-fastimport
-----
 
 Formats::
 	It supports the 'darcs-2', 'hashed', and 'old-fashioned-inventory' darcs
@@ -127,16 +120,18 @@ supported by fastimport-0.6 is hg-1.0.x.
 Mercurial (Hg) version:
 ----
 $ hg version
-Mercurial Distributed SCM (version 1.2.1)
+Mercurial Distributed SCM (version 1.3)
 ----
 
-Strictly speaking this document is a wrong place to talk about enabling
-hg plugins. However...
+Strictly speaking this document is a wrong place to talk about
+configuring hg fastimport. However... you will need something like:
 
 ----
-$ cat ~/.hgrc
-[extensions]
-hgext.fastimport=
+$ hg clone http://vc.gerg.ca/hg/pyfastimport
+$ hg clone http://vc.gerg.ca/hg/hg-fastimport
+$ sudo ln -s /path/to/pyfastimport/fastimport /usr/lib/python2.6/site-packages/fastimport
+$ sudo ln -s /path/to/hg-fastimport/hgfastimport /usr/lib/python2.6/site-packages/hgfastimport
+echo -e "[extensions]\nfastimport = /usr/lib/python2.6/site-packages/hgfastimport" > ~/.hgrc
 ----
 
 and once you installed the plugin correctly, you should have something like:
diff --git a/exporters/darcs/TODO b/exporters/darcs/TODO
index 2f199d1..c6892c8 100644
--- a/exporters/darcs/TODO
+++ b/exporters/darcs/TODO
@@ -4,3 +4,5 @@ not enabled, etc.
 parse the patches manually so we can avoid re-adding existing files manually.
 
 avoid darcs apply.
+
+import: handle evil merges (git-subtree), maybe using git log --first-parent
diff --git a/exporters/darcs/d2x b/exporters/darcs/d2x
index 79e18a3..959cc00 100755
--- a/exporters/darcs/d2x
+++ b/exporters/darcs/d2x
@@ -93,7 +93,7 @@ if [ ! -f $dmark ]; then
 		hg)
 			hg init
 			darcs-fast-export $* $origin | \
-				hg fastimport /dev/stdin
+				hg fastimport -
 	esac
 else
 	case $format in
diff --git a/exporters/darcs/darcs-fast-export b/exporters/darcs/darcs-fast-export
index d94618e..fa850de 100755
--- a/exporters/darcs/darcs-fast-export
+++ b/exporters/darcs/darcs-fast-export
@@ -4,7 +4,7 @@
 
     darcs-fast-export - darcs backend for fast data importers
 
-    Copyright (c) 2008 Miklos Vajna <vmiklos@frugalware.org>
+    Copyright (c) 2008, 2009 Miklos Vajna <vmiklos@frugalware.org>
     Copyright (c) 2008 Matthias Andree <matthias.andree@gmx.de>
 
     This program is free software; you can redistribute it and/or modify
@@ -29,314 +29,352 @@ import os
 import sys
 import gzip
 import time
+import calendar
 import shutil
 import subprocess
 import optparse
 import re
+import urllib
+import urllib2
+import StringIO
 
 sys = reload(sys)
 sys.setdefaultencoding("utf-8")
 
-def __get_zone():
-	now = time.localtime()
-	if time.daylight and now[-1]:
-		offset = time.altzone
-	else:
-		offset = time.timezone
-	hours, minutes = divmod(abs(offset), 3600)
-	if offset > 0:
-		sign = "-"
-	else:
-		sign = "+"
-	return sign, hours, minutes
-
-def get_zone_str():
-	sign, hours, minutes = __get_zone()
-	return "%s%02d%02d" % (sign, hours, minutes // 60)
-
-def get_zone_int():
-	sign, hours, minutes = __get_zone()
-	ret = hours*3600+minutes*60
-	if sign == "-":
-		ret *= -1
-	return ret
-
-def get_patchname(patch):
-	ret = []
-	s = ""
-	if patch.attributes['inverted'].value == 'True':
-		s = "UNDO: "
-	ret.append(s + patch.getElementsByTagName("name")[0].childNodes[0].data)
-	lines = patch.getElementsByTagName("comment")
-	if lines:
-		for i in lines[0].childNodes[0].data.split('\n'):
-			if not i.startswith("Ignore-this: "):
-				ret.append(i)
-	return "\n".join(ret).encode('utf-8')
-
-def get_author(patch):
-	"""darcs allows any freeform string, but fast-import has a more
-	strict format, so fix up broken author names here."""
-
-	author = patch.attributes['author'].value
-	if author in authormap:
-		author = authormap[author]
-	if not len(author):
-		author = "darcs-fast-export <darcs-fast-export>"
-	# add missing name
-	elif not ">" in author:
-		author = "%s <%s>" % (author.split('@')[0], author)
-	# avoid double quoting
-	elif author[0] == '"' and author[-1] == '"':
-		author = author[1:-1]
-	# name after email
-	elif author[-1] != '>':
-		author = author[author.index('>')+2:] + ' ' + author[:author.index('>')+1]
-	return author.encode('utf-8')
-
-def get_date(patch):
-	try:
-		date = time.strptime(patch, "%Y%m%d%H%M%S")
-	except ValueError:
-		date = time.strptime(patch[:19] + patch[-5:], '%a %b %d %H:%M:%S %Y')
-	return int(time.mktime(date)) + get_zone_int()
-
-def progress(s):
-	print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)
-	sys.stdout.flush()
-
-def log(s):
-	logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s))
-	logsock.flush()
-
-hashes = []
-def parse_inventory(sock=None):
-	prev = None
-	nextprev = False
-	buf = []
-	if not sock:
-		sock = open(os.path.join("_darcs", "hashed_inventory"))
-	for i in sock.readlines():
-		if i.startswith("hash"):
-			buf.insert(0, i[6:-1])
-		if i.startswith("Starting with inventory:"):
-			nextprev = True
-		elif nextprev:
-			prev = i[:-1]
-			nextprev = False
-	sock.close()
-	for i in buf:
-		hashes.insert(0, i)
-	if prev:
-		sock = gzip.open(os.path.join("_darcs", "inventories", prev))
-		parse_inventory(sock)
-
-# Option Parser
-usage="%prog [options] darcsrepo"
-opp = optparse.OptionParser(usage=usage)
-opp.add_option("--import-marks", metavar="IFILE",
-	help="read state for incremental imports from IFILE")
-opp.add_option("--export-marks", metavar="OFILE",
-	help="write state for incremental imports from OFILE")
-opp.add_option("--encoding",
-	help="encoding of log [default: %default], if unspecified and input isn't utf-8, guess")
-opp.add_option("--authors-file", metavar="F",
-	help="read author transformations in old=new format from F")
-opp.add_option("--working", metavar="W",
-	help="working directory which is removed at the end of non-incremental conversions")
-opp.add_option("--logfile", metavar="L",
-	help="log file which contains the output of external programs invoked during the conversion")
-opp.add_option("--git-branch", metavar="B",
-	help="git branch [default: refs/heads/master]")
-opp.add_option("--progress", metavar="P",
-	help="insert progress statements after every n commit [default: 100]")
-(options, args) = opp.parse_args()
-if len(args) < 1:
-	opp.error("darcsrepo required")
-
-export_marks = []
-import_marks = []
-if options.import_marks:
-	sock = open(options.import_marks)
-	for i in sock.readlines():
-		line = i.strip()
-		if not len(line):
-			continue
-		import_marks.append(line.split(' ')[1])
-		export_marks.append(line)
-	sock.close()
-
-# read author mapping file in gitauthors format,
-# i. e. in=out (one per # line)
-authormap = {}
-if options.authors_file:
-	sock = open(options.authors_file)
-	authormap = dict([i.strip().split('=',1) for i in sock])
-	sock.close()
-
-origin = os.path.abspath(args[0])
-if options.working:
-	working = os.path.abspath(options.working)
-else:
-	working = "%s.darcs" % origin
-patchfile = "%s.patch" % origin
-if options.logfile:
-	logfile = os.path.abspath(options.logfile)
-else:
-	logfile = "%s.log" % origin
-logsock = open(logfile, "a")
-if options.git_branch:
-	git_branch = options.git_branch
-else:
-	git_branch = "refs/heads/master"
-
-if options.progress:
-	prognum = int(options.progress)
-else:
-	prognum = 100
-
-progress("getting list of patches")
-if not len(import_marks):
-	sock = os.popen("darcs changes --xml --reverse --repo %s" % origin)
-else:
-	sock = os.popen("darcs changes --xml --reverse  --repo %s --from-match 'hash %s'" % (origin, import_marks[-1]))
-buf = sock.read()
-sock.close()
-# this is hackish. we need to escape some bad chars, otherwise the xml
-# will not be valid
-buf = buf.replace('\x1b', '^[')
-if options.encoding:
-	xmldoc = xml.dom.minidom.parseString(unicode(buf, options.encoding).encode('utf-8'))
-else:
-	try:
-		xmldoc = xml.dom.minidom.parseString(buf)
-	except xml.parsers.expat.ExpatError:
+class Handler:
+	def __init__(self):
+		self.hashes = []
+		self.authormap = {}
+		self.export_marks = []
+		self.import_marks = []
+
+	def get_patchname(self, patch):
+		ret = []
+		s = ""
+		if patch.attributes['inverted'].value == 'True':
+			s = "UNDO: "
+		cs = patch.getElementsByTagName("name")[0].childNodes
+		if cs.length > 0:
+			ret.append(s + cs[0].data)
+		lines = patch.getElementsByTagName("comment")
+		if lines:
+			for i in lines[0].childNodes[0].data.split('\n'):
+				if not i.startswith("Ignore-this: "):
+					ret.append(i)
+		return "\n".join(ret).encode('utf-8')
+
+	def get_author(self, patch):
+		"""darcs allows any freeform string, but fast-import has a more
+		strict format, so fix up broken author names here."""
+
+		author = patch.attributes['author'].value
+		if author in self.authormap:
+			author = self.authormap[author]
+		if not len(author):
+			author = "darcs-fast-export <darcs-fast-export>"
+		# add missing name
+		elif not ">" in author:
+			author = "%s <%s>" % (author.split('@')[0], author)
+		# avoid double quoting
+		elif author[0] == '"' and author[-1] == '"':
+			author = author[1:-1]
+		# name after email
+		elif author[-1] != '>':
+			author = author[author.index('>')+2:] + ' ' + author[:author.index('>')+1]
+		return author.encode('utf-8')
+
+	def get_date(self, patch):
 		try:
-			import chardet
-		except ImportError:
-			sys.exit("Error, encoding is not utf-8. Please " +
-				"either specify it with the --encoding " +
-				"option or install chardet.")
-		progress("encoding is not utf8, guessing charset")
-		encoding = chardet.detect(buf)['encoding']
-		progress("detected encoding is %s" % encoding)
-		xmldoc = xml.dom.minidom.parseString(unicode(buf, encoding).encode('utf-8'))
-sys.stdout.flush()
-
-darcs2 = False
-oldfashionedpatch = True
-cwd = os.getcwd()
-if os.path.exists(os.path.join(origin, "_darcs", "format")):
-	sock = open(os.path.join(origin, "_darcs", "format"))
-	format = [x.strip() for x in sock]
-	sock.close()
-	darcs2 = 'darcs-2' in format
-	oldfashionedpatch = not 'hashed' in format
-if not oldfashionedpatch:
-	progress("parsing the inventory")
-	os.chdir(origin)
-	parse_inventory()
-if not options.import_marks or not os.path.exists(working):
-	# init the tmp darcs repo
-	os.mkdir(working)
-	os.chdir(working)
-	if darcs2:
-		os.system("darcs init --darcs-2")
-	else:
-		os.system("darcs init --old-fashioned-inventory")
-else:
-	os.chdir(working)
-if options.import_marks:
-	sock = os.popen("darcs pull -a --match 'hash %s' %s" % (import_marks[-1], origin))
-	log("Building/updating working directory:\n%s" % sock.read())
-	sock.close()
-
-# this is the number of the NEXT patch
-count = 1
-patches = xmldoc.getElementsByTagName('patch')
-if len(import_marks):
-	patches = patches[1:]
-	count = len(import_marks) + 1
-if len(export_marks):
-	# this is the mark number of the NEXT patch
-	markcount = int(export_marks[-1].split(' ')[0][1:]) + 1
-else:
-	markcount = count
-# this may be huge and we need it many times
-patchnum = len(patches)
-
-if not len(import_marks):
-	progress("starting export, repo has %d patches" % patchnum)
-else:
-	progress("continuing export, %d patches to convert" % patchnum)
-paths = []
-for i in patches:
-	# apply the patch
-	hash = i.attributes['hash'].value
-	buf = ["\nNew patches:\n"]
-	if oldfashionedpatch:
-		sock = gzip.open(os.path.join(origin, "_darcs", "patches", hash))
-	else:
-		sock = gzip.open(os.path.join(origin, "_darcs", "patches", hashes[count-1]))
-	buf.append(sock.read())
-	sock.close()
-	sock = os.popen("darcs changes --context")
-	buf.append(sock.read())
-	sock.close()
-	sock = subprocess.Popen(["darcs", "apply", "--allow-conflicts"], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-	sock.stdin.write("".join(buf))
-	sock.stdin.close()
-	log("Applying %s:\n%s" % (hash, sock.stdout.read()))
-	sock.stdout.close()
-	message = get_patchname(i)
-	# export the commit
-	print "commit %s" % git_branch
-	print "mark :%s" % markcount
-	if options.export_marks:
-		export_marks.append(":%s %s" % (markcount, hash))
-	date = get_date(i.attributes['date'].value)
-	print "committer %s %s %s" % (get_author(i), date, get_zone_str())
-	print "data %d\n%s" % (len(message), message)
-	if markcount > 1:
-		print "from :%s" % (markcount-1)
-	# export the files
-	for j in paths:
-		print "D %s" % j
-	paths = []
-	for (root, dirs, files) in os.walk ("."):
-		for f in files:
-			j = os.path.normpath(os.path.join(root, f))
-			if j.startswith("_darcs") or "-darcs-backup" in j:
-				continue
-			paths.append(j)
-			sock = open(j)
-			buf = sock.read()
+			date = time.strptime(patch, "%Y%m%d%H%M%S")
+		except ValueError:
+			date = time.strptime(patch[:19] + patch[-5:], '%a %b %d %H:%M:%S %Y')
+		return calendar.timegm(date)
+
+	def progress(self, s):
+		print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)
+		sys.stdout.flush()
+
+	def log(self, s):
+		self.logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s))
+		self.logsock.flush()
+
+	def parse_inventory(self, sock=None):
+		prev = None
+		nextprev = False
+		buf = []
+		if not sock:
+			sock = self.open(os.path.join(self.origin, "_darcs", "hashed_inventory"))
+		for i in sock.readlines():
+			if i.startswith("hash"):
+				buf.insert(0, i[6:-1])
+			if i.startswith("Starting with inventory:"):
+				nextprev = True
+			elif nextprev:
+				prev = i[:-1]
+				nextprev = False
+		sock.close()
+		for i in buf:
+			self.hashes.insert(0, i)
+		if prev:
+			sock = self.gzip_open(os.path.join(self.origin, "_darcs", "inventories", prev))
+			self.parse_inventory(sock)
+
+	# this is like gzip.open but supports urls as well
+	def gzip_open(self, path):
+		if os.path.exists(path):
+			return gzip.open(path)
+		buf = urllib.urlopen(path).read()
+		sock = StringIO.StringIO(buf)
+		return gzip.GzipFile(fileobj=sock)
+
+	# this is like os.path.exists but supports urls as well
+	def path_exists(self, path):
+		if os.path.exists(path):
+			return True
+		else:
+			try:
+				urllib2.urlopen(urllib2.Request(path))
+				return True
+			except urllib2.HTTPError, e:
+				return False
+
+	# this is like open, but supports urls as well
+	def open(self, path):
+		if os.path.exists(path):
+			return open(path)
+		else:
+			return urllib.urlopen(path)
+	
+	def handle_opts(self):
+		# Option Parser
+		usage="%prog [options] darcsrepo"
+		opp = optparse.OptionParser(usage=usage)
+		opp.add_option("--import-marks", metavar="IFILE",
+			help="read state for incremental imports from IFILE")
+		opp.add_option("--export-marks", metavar="OFILE",
+			help="write state for incremental imports from OFILE")
+		opp.add_option("--encoding",
+			help="encoding of log [default: %default], if unspecified and input isn't utf-8, guess")
+		opp.add_option("--authors-file", metavar="F",
+			help="read author transformations in old=new format from F")
+		opp.add_option("--working", metavar="W",
+			help="working directory which is removed at the end of non-incremental conversions")
+		opp.add_option("--logfile", metavar="L",
+			help="log file which contains the output of external programs invoked during the conversion")
+		opp.add_option("--git-branch", metavar="B",
+			help="git branch [default: refs/heads/master]")
+		opp.add_option("--progress", metavar="P",
+			help="insert progress statements after every n commit [default: 100]")
+		(self.options, self.args) = opp.parse_args()
+		if len(self.args) < 1:
+			opp.error("darcsrepo required")
+
+		# read author mapping file in gitauthors format,
+		# i. e. in=out (one per # line)
+		if self.options.authors_file:
+			sock = open(self.options.authors_file)
+			self.authormap = dict([i.strip().split('=',1) for i in sock])
 			sock.close()
-			# darcs does not track the executable bit :/
-			print "M 644 inline %s" % j
-			print "data %s\n%s" % (len(buf), buf)
-	if message[:4] == "TAG ":
-		tag = re.sub('[^\xe9-\xf8\w.\-]+', '_', message[4:].strip().split('\n')[0]).strip('_')
-		print "tag %s" % tag
-		print "from :%s" % markcount
-		print "tagger %s %s %s" % (get_author(i), date, get_zone_str())
-		print "data %d\n%s" % (len(message), message)
-	if count % prognum == 0:
-		progress("%d/%d patches" % (count, patchnum))
-	count += 1
-	markcount += 1
 
-os.chdir(cwd)
+		if "://" not in self.args[0]:
+			self.origin = os.path.abspath(self.args[0])
+		else:
+			self.origin = self.args[0].strip('/')
+		if self.options.working:
+			self.working = os.path.abspath(self.options.working)
+		else:
+			if "://" not in self.origin:
+				self.working = "%s.darcs" % self.origin
+			else:
+				self.working = "%s.darcs" % os.path.split(self.origin)[-1]
+		if self.options.logfile:
+			logfile = os.path.abspath(self.options.logfile)
+		else:
+			if "://" not in self.origin:
+				logfile = "%s.log" % self.origin
+			else:
+				logfile = "%s.log" % os.path.split(self.origin)[-1]
+		self.logsock = open(logfile, "a")
+		if self.options.git_branch:
+			self.git_branch = self.options.git_branch
+		else:
+			self.git_branch = "refs/heads/master"
+
+		if self.options.progress:
+			self.prognum = int(self.options.progress)
+		else:
+			self.prognum = 100
+
+	def handle_import_marks(self):
+		if self.options.import_marks:
+			sock = open(self.options.import_marks)
+			for i in sock.readlines():
+				line = i.strip()
+				if not len(line):
+					continue
+				self.import_marks.append(line.split(' ')[1])
+				self.export_marks.append(line)
+			sock.close()
+	
+	def get_patches(self):
+		self.progress("getting list of patches")
+		if not len(self.import_marks):
+			sock = os.popen("darcs changes --xml --reverse --repo %s" % self.origin)
+		else:
+			sock = os.popen("darcs changes --xml --reverse  --repo %s --from-match 'hash %s'" % (self.origin, self.import_marks[-1]))
+		buf = sock.read()
+		sock.close()
+		# this is hackish. we need to escape some bad chars, otherwise the xml
+		# will not be valid
+		buf = buf.replace('\x1b', '^[')
+		if self.options.encoding:
+			xmldoc = xml.dom.minidom.parseString(unicode(buf, self.options.encoding).encode('utf-8'))
+		else:
+			try:
+				xmldoc = xml.dom.minidom.parseString(buf)
+			except xml.parsers.expat.ExpatError:
+				try:
+					import chardet
+				except ImportError:
+					sys.exit("Error, encoding is not utf-8. Please " +
+						"either specify it with the --encoding " +
+						"option or install chardet.")
+				self.progress("encoding is not utf8, guessing charset")
+				encoding = chardet.detect(buf)['encoding']
+				self.progress("detected encoding is %s" % encoding)
+				xmldoc = xml.dom.minidom.parseString(unicode(buf, encoding).encode('utf-8'))
+		sys.stdout.flush()
+		return xmldoc.getElementsByTagName('patch')
+
+	def setup_workdir(self):
+		darcs2 = False
+		self.oldfashionedpatch = True
+		self.cwd = os.getcwd()
+		if self.path_exists(os.path.join(self.origin, "_darcs", "format")):
+			sock = self.open(os.path.join(self.origin, "_darcs", "format"))
+			format = [x.strip() for x in sock]
+			sock.close()
+			darcs2 = 'darcs-2' in format
+			self.oldfashionedpatch = not 'hashed' in format
+		if not self.oldfashionedpatch:
+			self.progress("parsing the inventory")
+			if "://" not in self.origin:
+				os.chdir(self.origin)
+			self.parse_inventory()
+		if not self.options.import_marks or not os.path.exists(self.working):
+			# init the tmp darcs repo
+			os.mkdir(self.working)
+			os.chdir(self.working)
+			if darcs2:
+				os.system("darcs init --darcs-2")
+			else:
+				os.system("darcs init --old-fashioned-inventory")
+		else:
+			os.chdir(self.working)
+		if self.options.import_marks:
+			sock = os.popen("darcs pull -a --match 'hash %s' %s" % (self.import_marks[-1], self.origin))
+			self.log("Building/updating working directory:\n%s" % sock.read())
+			sock.close()
+	
+	def export_patches(self):
+		patches = self.get_patches()
+		# this is the number of the NEXT patch
+		count = 1
+		if len(self.import_marks):
+			patches = patches[1:]
+			count = len(self.import_marks) + 1
+		if len(self.export_marks):
+			# this is the mark number of the NEXT patch
+			markcount = int(self.export_marks[-1].split(' ')[0][1:]) + 1
+		else:
+			markcount = count
+		# this may be huge and we need it many times
+		patchnum = len(patches)
+
+		if not len(self.import_marks):
+			self.progress("starting export, repo has %d patches" % patchnum)
+		else:
+			self.progress("continuing export, %d patches to convert" % patchnum)
+		paths = []
+		for i in patches:
+			# apply the patch
+			hash = i.attributes['hash'].value
+			buf = ["\nNew patches:\n"]
+			if self.oldfashionedpatch:
+				sock = self.gzip_open(os.path.join(self.origin, "_darcs", "patches", hash))
+			else:
+				sock = self.gzip_open(os.path.join(self.origin, "_darcs", "patches", self.hashes[count-1]))
+			buf.append(sock.read())
+			sock.close()
+			sock = os.popen("darcs changes --context")
+			buf.append(sock.read())
+			sock.close()
+			sock = subprocess.Popen(["darcs", "apply", "--allow-conflicts"], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+			sock.stdin.write("".join(buf))
+			sock.stdin.close()
+			self.log("Applying %s:\n%s" % (hash, sock.stdout.read()))
+			sock.stdout.close()
+			message = self.get_patchname(i)
+			# export the commit
+			print "commit %s" % self.git_branch
+			print "mark :%s" % markcount
+			if self.options.export_marks:
+				self.export_marks.append(":%s %s" % (markcount, hash))
+			date = self.get_date(i.attributes['date'].value)
+			print "committer %s %s +0000" % (self.get_author(i), date)
+			print "data %d\n%s" % (len(message), message)
+			if markcount > 1:
+				print "from :%s" % (markcount-1)
+			# export the files
+			for j in paths:
+				print "D %s" % j
+			paths = []
+			for (root, dirs, files) in os.walk ("."):
+				for f in files:
+					j = os.path.normpath(os.path.join(root, f))
+					if j.startswith("_darcs") or "-darcs-backup" in j:
+						continue
+					paths.append(j)
+					sock = open(j)
+					buf = sock.read()
+					sock.close()
+					# darcs does not track the executable bit :/
+					print "M 644 inline %s" % j
+					print "data %s\n%s" % (len(buf), buf)
+			if message[:4] == "TAG ":
+				tag = re.sub('[^\xe9-\xf8\w.\-]+', '_', message[4:].strip().split('\n')[0]).strip('_')
+				print "tag %s" % tag
+				print "from :%s" % markcount
+				print "tagger %s %s +0000" % (self.get_author(i), date)
+				print "data %d\n%s" % (len(message), message)
+			if count % self.prognum == 0:
+				self.progress("%d/%d patches" % (count, patchnum))
+			count += 1
+			markcount += 1
+
+		os.chdir(self.cwd)
+
+		if not self.options.export_marks:
+			shutil.rmtree(self.working)
+		self.logsock.close()
+	
+	def handle_export_marks(self):
+		if self.options.export_marks:
+			self.progress("writing export marks")
+			sock = open(self.options.export_marks, 'w')
+			sock.write("\n".join(self.export_marks))
+			sock.write("\n")
+			sock.close()
 
-if not options.export_marks:
-	shutil.rmtree(working)
-logsock.close()
+		self.progress("finished")
 
-if options.export_marks:
-	progress("writing export marks")
-	sock = open(options.export_marks, 'w')
-	sock.write("\n".join(export_marks))
-	sock.write("\n")
-	sock.close()
+	def handle(self):
+		self.handle_opts()
+		self.handle_import_marks()
+		self.setup_workdir()
+		self.export_patches()
+		self.handle_export_marks()
 
-progress("finished")
+if __name__ == "__main__":
+	h = Handler()
+	h.handle()
diff --git a/exporters/darcs/darcs-fast-export.txt b/exporters/darcs/darcs-fast-export.txt
index 3ddd02e..d404ecf 100644
--- a/exporters/darcs/darcs-fast-export.txt
+++ b/exporters/darcs/darcs-fast-export.txt
@@ -18,6 +18,10 @@ The script can produce the fast-import stream format from the darcs
 repository. It supports incremental conversion as well, via the
 --import-marks / --export-marks switches.
 
+Optionally the darcsrepo string may be a HTTP repository, in that case
+only the patches are downloaded, not the pristine, speeding up a
+one-time import.
+
 == OPTIONS
 
 -h, --help::
diff --git a/exporters/darcs/darcs-fast-import b/exporters/darcs/darcs-fast-import
index 2955164..69ec7bb 100755
--- a/exporters/darcs/darcs-fast-import
+++ b/exporters/darcs/darcs-fast-import
@@ -4,7 +4,7 @@
 
     darcs-fast-export - darcs backend for fast data exporters
 
-    Copyright (c) 2008 Miklos Vajna <vmiklos@frugalware.org>
+    Copyright (c) 2008, 2009, 2010 Miklos Vajna <vmiklos@frugalware.org>
     Copyright (c) 2008 Matthias Andree <matthias.andree@gmx.de>
 
     This program is free software; you can redistribute it and/or modify
@@ -81,6 +81,11 @@ class Handler:
 		raise Exception(s)
 
 	def get_date(self, ts, tz):
+		# first fix the case when tz is higher than +1200, as
+		# darcs won't accept it
+		if int(tz[:3]) > 12:
+			ts = str(int(ts) + 60*60*24)
+			tz = str(int(tz[:3])-24) + tz[3:]
                 # int(ts) is seconds since epoch. Since we're trying to
                 # capture both the absolute time of the commit and the
                 # localtime in the timezone of the committer, we need to turn
@@ -99,6 +104,13 @@ class Handler:
 		items = s.split(' ')
 		return " ".join(items[:-1]) + " " + tz + " " + items[-1]
 
+	def invoke_darcs(self, cmdline):
+		if os.system("darcs %s" % cmdline) != 0:
+			self.bug("darcs failed")
+
+	def invoke_add(self, path):
+		self.invoke_darcs("add --boring --case-ok %s" % path)
+
 	def handle_mark(self):
 		if self.line.startswith("mark :"):
 			self.mark_num = int(self.line[6:-1])
@@ -143,6 +155,8 @@ class Handler:
 		sock.stdin.close()
 		self.log("Tagging %s:\n%s" % (version, sock.stdout.read()))
 		sock.stdout.close()
+		if sock.wait() != 0:
+			self.bug("darcs tag failed: '%s'" % sock.returncode)
 
 	def handle_commit(self):
 		if not self.prevfiles and self.options.import_marks:
@@ -156,6 +170,7 @@ class Handler:
 					self.files.append(path)
 		self.prevfiles = self.files[:]
 		adds = []
+		symlinks = []
 
 		self.read_next_line()
 		self.handle_mark()
@@ -173,32 +188,47 @@ class Handler:
 			self.read_next_line()
 		while self.line.startswith("merge "):
 			self.read_next_line()
+		change = False
 		while len(self.line) > 0:
 			if self.line.startswith("deleteall"):
 				path = self.line[2:-1]
 				for path in self.files:
 					os.unlink(path)
 				self.files = []
+				change = True
 			elif self.line.startswith("D "):
 				path = self.line[2:-1]
 				if os.path.exists(path):
 					os.unlink(path)
 				if path in self.files:
 					self.files.remove(path)
+				change = True
 			elif self.line.startswith("R "):
-				os.system("darcs mv %s" % self.line[2:])
+				self.invoke_darcs("mv %s" % self.line[2:])
+				change = True
 			elif self.line.startswith("C "):
 				src, dest = self.line[:-1].split(' ')[1:]
 				shutil.copy(src.strip('"'), dest.strip('"'))
-				os.system("darcs add %s" % dest)
+				self.invoke_add(dest)
+				change = True
 			elif self.line.startswith("M "):
 				items = self.line.split(' ')
 				path = items[3][:-1]
+				dir = os.path.split(path)[0]
+				if len(dir) and not os.path.exists(dir):
+					os.makedirs(dir)
+				if items[1] == "120000":
+					if not self.options.symhack:
+						print "Adding symbolic links (symlinks) is not supported by Darcs."
+						sys.exit(2)
+					idx = int(items[2][1:]) # TODO: handle inline symlinks
+					symlinks.append((self.marks[idx], path))
+					self.read_next_line()
+					continue
 				sock = open(path, "w")
 				if items[2] != "inline":
 					idx = int(items[2][1:])
 					sock.write(self.marks[idx])
-					del self.marks[idx]
 				else:
 					self.read_next_line()
 					self.handle_data()
@@ -208,6 +238,7 @@ class Handler:
 					adds.append(path)
 				if path not in self.files:
 					self.files.append(path)
+				change = True
 			else:
 				self.unread_line = True
 				break
@@ -215,15 +246,35 @@ class Handler:
 			if not len(self.line):
 				break
 
+		if not change:
+			# darcs does not support empty commits
+			return
 		for i in adds:
-			os.system("darcs add %s" % i)
-		sock = subprocess.Popen(["darcs", "record", "--ignore-times", "-a", "--pipe"], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-		buf = [self.date, self.ident, self.short, self.long]
-		sock.stdin.write("\n".join(buf))
+			self.invoke_add(i)
+		args = ["darcs", "record", "--ignore-times", "-a", "--pipe"]
+		buf = [self.date, self.ident]
+		if not len(self.short):
+			args.extend(['-m', ''])
+		else:
+			buf.extend([self.short, self.long])
+		sock = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+		sock.stdin.write("\n".join(buf)+"\n")
 		sock.stdin.close()
 		self.log("Recording :%s:\n%s" % (self.mark_num, sock.stdout.read()))
 		sock.stdout.close()
-
+		if sock.wait() != 0:
+			self.bug("darcs record failed: '%s'" % sock.returncode)
+
+		for src, path in symlinks:
+			# symlink does not do what we want if path is
+			# already there
+			if os.path.exists(path):
+				# rmtree() does not work on symlinks
+				if os.path.islink(path):
+					os.remove(path)
+				else:
+					shutil.rmtree(path)
+			os.symlink(src, path)
 		if self.options.export_marks:
 			# yeah, an xml parser would be better, but
 			# should we mess with encodings just because of
@@ -235,19 +286,24 @@ class Handler:
 			self.export_marks.append(":%s %s" % (self.mark_num, hash))
 
 	def handle_progress(self, s):
-		print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s.strip())
+		print "import progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s.strip())
 		sys.stdout.flush()
 
 	def handle_opts(self):
 		# Option Parser
 		usage="%prog [options]"
 		opp = optparse.OptionParser(usage=usage)
+		opp.set_defaults(symhack=False)
 		opp.add_option("--import-marks", metavar="IFILE",
 			help="read state for incremental imports from IFILE")
 		opp.add_option("--export-marks", metavar="OFILE",
 			help="write state for incremental imports to OFILE")
 		opp.add_option("--logfile", metavar="L",
 			help="log file which contains the output of external programs invoked during the conversion")
+		opp.add_option("--symhack", action="store_true", dest="symhack",
+			help="Do not error out when a symlink would be created, just create it in the workdir")
+		opp.add_option("--progress", metavar="P",
+			help="insert progress statements after every n commit [default: 100]")
 		(self.options, args) = opp.parse_args()
 
 		if self.options.logfile:
@@ -255,6 +311,11 @@ class Handler:
 		else:
 			logfile = "_darcs/import.log"
 		self.logsock = open(os.path.abspath(logfile), "a")
+
+		if self.options.progress:
+			self.prognum = int(self.options.progress)
+		else:
+			self.prognum = 0
 	
 	def log(self, s):
 		self.logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s))
@@ -282,6 +343,7 @@ class Handler:
 		self.handle_opts()
 		self.handle_import_marks()
 
+		commitcount = 0
 		while not self.eof:
 			self.read_next_line()
 			if not len(self.line[:-1]):
@@ -290,6 +352,9 @@ class Handler:
 				self.handle_blob()
 			elif self.line.startswith("commit"):
 				self.handle_commit()
+				commitcount += 1
+				if self.prognum != 0 and commitcount % self.prognum == 0:
+					self.handle_progress("%d patches" % commitcount)
 			elif self.line.startswith("tag"):
 				self.handle_tag()
 			elif self.line.startswith("reset"):
diff --git a/exporters/darcs/darcs-fast-import.txt b/exporters/darcs/darcs-fast-import.txt
index 09c7b1e..a7f2a12 100644
--- a/exporters/darcs/darcs-fast-import.txt
+++ b/exporters/darcs/darcs-fast-import.txt
@@ -33,3 +33,25 @@ as well, via the --import-marks / --export-marks switches.
 --logfile::
 	The output of external commands are redirected to a log file. You can
 	specify the path of that file with this parameter.
+
+--symhack::
+	Enable hack for symbolic links. darcs add does not handle them
+	but in case they are just added, we can create them in the working
+	directory. This can be handy in case for example the symbolic link is in
+	a subdirectory of the project and you don't even care about that
+	subdirectory. So the hack can be useful, but be extremely careful when
+	you use it.
+
+--progress=<n>::
+	Insert progress statements after every <n> created patches. The
+	default is not to print anything as progress info is usually provided by
+	the exporter. Use this option in case the exporter does not have such a
+	switch but you still want to get some feedback.
+
+== EXIT CODES
+
+The exit code is:
+
+* 0 on success
+* 1 on unhandled exception
+* 2 in case the stream would try to let the importer create a symlink
diff --git a/exporters/darcs/git-darcs b/exporters/darcs/git-darcs
index eb70338..18455a2 100755
--- a/exporters/darcs/git-darcs
+++ b/exporters/darcs/git-darcs
@@ -2,7 +2,7 @@
 #
 #   git-darcs - bidirectional operation between a darcs repo and git
 #
-#   Copyright (c) 2008 by Miklos Vajna <vmiklos@frugalware.org>
+#   Copyright (c) 2008, 2010 by Miklos Vajna <vmiklos@frugalware.org>
 #
 #   Based on git-bzr, which is
 #
@@ -32,24 +32,25 @@ add()
 	shift
 	if ! [ -n "$name" -a -n "$location" ]; then
 		echo "Usage: git darcs add name location [darcs-fast-export options]"
-		exit
+		return 1
 	fi
 	if git remote show |grep -q $name; then
 		echo "There is already a remote with that name"
-		exit
+		return 1
 	fi
 	if [ -n "$(git config git-darcs.$name.location)" ]; then
 		echo "There is already a darcs repo with that name"
-		exit
+		return 1
 	fi
-	if [ ! -d $location/_darcs ]; then
+	repo=$location/_darcs
+	if [ ! -d $repo ] && ! wget --quiet --spider $repo; then
 		echo "Remote is not a darcs repository"
-		exit
+		return 1
 	fi
 	git config git-darcs.$name.location $location
-	git config git-darcs.$name.darcs-fast-export-options "$*"
 	echo "Darcs repo $name added. You can fetch it with 'git darcs fetch $name'"
 	if ! [ -z "$*" ]; then
+		git config git-darcs.$name.darcs-fast-export-options "$*"
 		echo "darcs-fast-export will get options: $*"
 	fi
 }
@@ -59,7 +60,7 @@ get_location()
 	l=$(git config git-darcs.$remote.location)
 	if [ -z "$l" ]; then
 		echo "Cannot find darcs remote with name '$remote'." >&2
-		exit
+		return 1
 	fi
 	echo $l
 }
@@ -70,13 +71,17 @@ fetch()
 	shift
 	if ! [ -n "$remote" -a -z "$*" ]; then
 		echo "Usage: git darcs fetch reponame"
-		exit
+		return 1
 	fi
-	location=$(get_location $remote)
+	location=$(get_location $remote) || return $?
 	git_map=$git_dir/darcs-git/$remote-git-map
 	darcs_map=$git_dir/darcs-git/$remote-darcs-map
-	common_opts="--working $git_dir/darcs-git/repo --logfile $git_dir/darcs-git/fetch.log --git-branch=darcs/$remote"
+	common_opts="--working $git_dir/darcs-git/repo --logfile $git_dir/darcs-git/fetch.log --git-branch=refs/remotes/darcs/$remote"
         dfe_opts=$(git config git-darcs.$remote.darcs-fast-export-options)
+	pre_fetch="$(git config git-darcs.$remote.pre-fetch)"
+	if [ -n "$pre_fetch" ]; then
+		$pre_fetch
+	fi
 	if [ ! -f $git_map -a ! -f $darcs_map ]; then
 		echo "There doesn't seem to be an existing refmap."
 		echo "Doing an initial import"
@@ -85,20 +90,24 @@ fetch()
 			git fast-import --export-marks=$git_map
 	elif [ -f $git_map -a -f $darcs_map ]; then
 		echo "Updating remote $remote"
-		old_rev=$(git rev-parse darcs/$remote)
+		old_rev=$(git rev-parse refs/remotes/darcs/$remote)
 		darcs-fast-export --import-marks=$darcs_map --export-marks=$darcs_map $common_opts $dfe_opts $location | \
 			git fast-import --quiet --import-marks=$git_map --export-marks=$git_map
-		new_rev=$(git rev-parse darcs/$remote)
+		new_rev=$(git rev-parse refs/remotes/darcs/$remote)
 		if [ "$old_rev" != "$new_rev" ]; then
 			echo "Fetched the following updates:"
 			git shortlog $old_rev..$new_rev
 		else
 			echo "Nothing fetched."
-			exit
+			return 0
 		fi
 	else
 		echo "One of the mapfiles is missing! Something went wrong!"
-		exit
+		return 1
+	fi
+	post_fetch="$(git config git-darcs.$remote.post-fetch)"
+	if [ -n "$post_fetch" ]; then
+		$post_fetch
 	fi
 }
 
@@ -108,15 +117,15 @@ pull()
 	shift
 	if ! [ -n "$remote" -a -z "$*" ]; then
 		echo "Usage: git darcs pull reponame"
-		exit
+		return 1
 	fi
-	fetch $remote
+	fetch $remote || return $?
 	# see if we need to merge or rebase
 	branch=$(git symbolic-ref HEAD|sed 's|.*/||')
 	if [ "$(git config branch.$branch.rebase)" = "true" ]; then
-		git rebase darcs/$remote
+		git rebase refs/remotes/darcs/$remote
 	else
-		git merge darcs/$remote
+		git merge refs/remotes/darcs/$remote
 	fi
 }
 
@@ -126,30 +135,38 @@ push()
 	shift
 	if ! [ -n "$remote" -a -z "$*" ]; then
 		echo "Usage: git darcs push reponame"
-		exit
+		return 1
 	fi
-	location=$(get_location $remote)
-	if [ -n "$(git rev-list --left-right HEAD...darcs/$remote | sed -n '/^>/ p')" ]; then
+	location=$(get_location $remote) || return $?
+	if [ -n "$(git rev-list --left-right HEAD...refs/remotes/darcs/$remote | sed -n '/^>/ p')" ]; then
 		echo "HEAD is not a strict child of $remote, cannot push. Merge first"
-		exit
+		return 1
 	fi
-	if [ -z "$(git rev-list --left-right HEAD...darcs/$remote | sed -n '/^</ p')" ]; then
+	if [ -z "$(git rev-list --left-right HEAD...refs/remotes/darcs/$remote | sed -n '/^</ p')" ]; then
 		echo "Nothing to push. Commit something first"
-		exit
+		return 1
 	fi
 	git_map=$git_dir/darcs-git/$remote-git-map
 	darcs_map=$git_dir/darcs-git/$remote-darcs-map
 	if [ ! -f $git_map -o ! -f $darcs_map ]; then
 		echo "We do not have refmapping yet. Then how can I push?"
-		exit
+		return 1
+	fi
+	pre_push="$(git config git-darcs.$remote.pre-push)"
+	if [ -n "$pre_push" ]; then
+		$pre_push
 	fi
 	echo "Pushing the following updates:"
-	git shortlog darcs/$remote..
+	git shortlog refs/remotes/darcs/$remote..
 	git fast-export --import-marks=$git_map --export-marks=$git_map HEAD | \
 		(cd $location; darcs-fast-import --import-marks=$darcs_map --export-marks=$darcs_map \
 		--logfile $git_dir/darcs-git/push.log)
 	if [ $? == 0 ]; then
-		git update-ref darcs/$remote HEAD
+		git update-ref refs/remotes/darcs/$remote HEAD
+		post_push="$(git config git-darcs.$remote.post-push)"
+		if [ -n "$post_push" ]; then
+			$post_push
+		fi
 	fi
 }
 
@@ -159,18 +176,18 @@ list()
 	if [ -z "$*" ]
 	then
 		git config -l | sed -n -e '/git-darcs\..*/ {s/git-darcs\.//; s/\.location=.*//p}'
-		exit
+		return 0
 	elif [ "$#" -eq 1 ]
 	then
 		case $1 in
 			-v|--verbose)
 				git config -l | sed -n -e '/git-darcs\..*/ {s/git-darcs\.//; s/\.location=/\t/p}'
-				exit
+				return 0
 				;;
 		esac
 	fi
 	echo "Usage: git darcs list [-v|--verbose]"
-	exit 1
+	return 1
 }
 
 # Find the darcs commit(s) supporting a git SHA1 prefix
@@ -181,9 +198,9 @@ find_darcs()
 	if [ -z "$sha1" -o -n "$*" ]
 	then
 		echo "Usage: git darcs find-darcs <sha1-prefix>"
-		exit 1
+		return 1
 	fi
-	for remote in $git_dir/darcs/*
+	for remote in $(git for-each-ref --format='%(refname)' refs/remotes/darcs)
 	do
 		remote=`basename $remote`
 		git_map=$git_dir/darcs-git/$remote-git-map
@@ -191,7 +208,7 @@ find_darcs()
 		if [ ! -f $git_map -o ! -f $darcs_map ]
 		then
 			echo "Missing mappings for remote $remote"
-			exit 1
+			return 1
 		fi
 		for row in `sed -n -e "/:.* $sha1.*/ s/[^ ]*/&/p" $git_map`
 		do
@@ -208,9 +225,9 @@ find_git()
 	if [ -z "$patch" -o -n "$*" ]
 	then
 		echo "Usage: git darcs find-git <patch-prefix>"
-		exit 1
+		return 1
 	fi
-	for remote in $git_dir/darcs/*
+	for remote in $(git for-each-ref --format='%(refname)' refs/remotes/darcs)
 	do
 		remote=`basename $remote`
 		git_map=$git_dir/darcs-git/$remote-git-map
@@ -218,7 +235,7 @@ find_git()
 		if [ ! -f $git_map -o ! -f $darcs_map ]
 		then
 			echo "Missing mappings for remote $remote"
-			exit 1
+			return 1
 		fi
 		for row in `sed -n -e "/:.* $patch.*/ s/[^ ]*/&/p" $darcs_map`
 		do
@@ -230,7 +247,7 @@ find_git()
 git rev-parse 2> /dev/null
 if [ $? != 0 ]; then
 	echo "Must be inside a git repository to work"
-	exit
+	exit 1
 fi
 
 git_dir=$(git rev-parse --git-dir)
@@ -253,7 +270,7 @@ case $command in
 	*)
 		echo "Usage: git darcs [COMMAND] [OPTIONS]"
 		echo "Commands: add, push, fetch, pull, list, find-darcs, find-git"
-		exit
+		exit 1
 		;;
 esac
 
diff --git a/exporters/darcs/git-darcs.txt b/exporters/darcs/git-darcs.txt
index 7558329..8bf5b33 100644
--- a/exporters/darcs/git-darcs.txt
+++ b/exporters/darcs/git-darcs.txt
@@ -20,7 +20,7 @@ A typical workflow is:
 $ mkdir git-repo
 $ cd git-repo
 $ git init
-$ git darcs add upstream ../darcs-repo
+$ git darcs add upstream /path/to/darcs-repo
 $ git darcs pull upstream
 
 ... hack, hack, hack ...
@@ -70,3 +70,23 @@ find-darcs::
 find-git::
 	Searches for git commits matching a darcs patch prefix.
 	The syntax is `find-git <patch-prefix>`.
+
+== HOOKS
+
+It's possible to automatically run before and after the fetch and the
+push subcommand. For example if you want to automatically run `darcs
+pull -a` before a `git darcs fetch upstream`:
+
+----
+git config git-darcs.upstream.pre-fetch "darcs pull -a --repodir=/path/to/darcs-repo"
+----
+
+Or in case you want to automatically `darcs send` all patches after a
+`git darcs push upstream`:
+
+----
+git config git-darcs.upstream.post-push "darcs send -a --repodir=/path/to/darcs-repo"
+----
+
+== SEE-ALSO
+*git*(1), *darcs*(1)
diff --git a/exporters/darcs/t/lib-httpd.sh b/exporters/darcs/t/lib-httpd.sh
new file mode 100644
index 0000000..fad953e
--- /dev/null
+++ b/exporters/darcs/t/lib-httpd.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+#
+# This is based on git's t/lib-httpd.sh, which is
+# Copyright (c) 2008 Clemens Buchacher <drizzd@aon.at>
+#
+
+if test -n "$DFE_TEST_SKIP_HTTPD"
+then
+	echo "skipping test (undef DFE_TEST_SKIP_HTTPD to enable)"
+	exit
+fi
+
+LIB_HTTPD_PATH=${LIB_HTTPD_PATH-'/usr/sbin/httpd'}
+LIB_HTTPD_PORT=${LIB_HTTPD_PORT-'8111'}
+
+HTTPD_ROOT_PATH="$PWD"/httpd
+HTTPD_DOCUMENT_ROOT_PATH=$HTTPD_ROOT_PATH/www
+
+if ! test -x "$LIB_HTTPD_PATH"
+then
+        echo "skipping test, no web server found at '$LIB_HTTPD_PATH'"
+        exit
+fi
+
+HTTPD_VERSION=`$LIB_HTTPD_PATH -v | \
+	sed -n 's/^Server version: Apache\/\([0-9]*\)\..*$/\1/p; q'`
+
+if test -n "$HTTPD_VERSION"
+then
+	if test -z "$LIB_HTTPD_MODULE_PATH"
+	then
+		if ! test $HTTPD_VERSION -ge 2
+		then
+			echo "skipping test, at least Apache version 2 is required"
+			exit
+		fi
+
+		LIB_HTTPD_MODULE_PATH='/usr/lib/apache'
+	fi
+else
+	error "Could not identify web server at '$LIB_HTTPD_PATH'"
+fi
+
+HTTPD_PARA="-d $HTTPD_ROOT_PATH -f $HTTPD_ROOT_PATH/apache.conf"
+
+prepare_httpd() {
+	mkdir -p $HTTPD_DOCUMENT_ROOT_PATH
+
+	ln -s $LIB_HTTPD_MODULE_PATH $HTTPD_ROOT_PATH/modules
+
+	echo "PidFile httpd.pid" > $HTTPD_ROOT_PATH/apache.conf
+	echo "DocumentRoot www" >> $HTTPD_ROOT_PATH/apache.conf
+	echo "ErrorLog error.log" >> $HTTPD_ROOT_PATH/apache.conf
+
+	HTTPD_URL=http://127.0.0.1:$LIB_HTTPD_PORT
+}
+
+start_httpd() {
+	prepare_httpd
+
+	"$LIB_HTTPD_PATH" $HTTPD_PARA \
+		-c "Listen 127.0.0.1:$LIB_HTTPD_PORT" -k start
+}
+
+stop_httpd() {
+	"$LIB_HTTPD_PATH" $HTTPD_PARA -k stop
+}
diff --git a/exporters/darcs/t/lib.sh b/exporters/darcs/t/lib.sh
index 3df0a8a..7d2218a 100644
--- a/exporters/darcs/t/lib.sh
+++ b/exporters/darcs/t/lib.sh
@@ -78,6 +78,10 @@ third line"
 	_drrec -a -m "remove and rename"
 	darcs mv a b
 	_drrecamend
+	echo c > c
+	darcs add c
+	# empty commit message
+	_drrec -a -m ""
 	cd ..
 }
 
@@ -150,6 +154,7 @@ create_hg()
 	hg pull ../$1.tmp
 	hg merge
 	echo D > file
+	hg resolve -m file
 	echo "first line
 second line
 third line" | hg commit -l /dev/stdin
@@ -172,6 +177,12 @@ third line" | hg commit -l /dev/stdin
 	hg commit -m "add empty file"
 	hg rm file3
 	hg commit -m "remove file"
+	mkdir subdir
+	echo test > subdir/file
+	hg add subdir/file
+	hg commit -m "add subdir file"
+	echo test2 > subdir/file
+	hg commit -m "commit with weird date" -d "Fri Apr 03 12:38:26 2009 +1300"
 	cd ..
 }
 create_git()
@@ -180,6 +191,7 @@ create_git()
 	mkdir -p $1
 	cd $1
 	git init $2
+	git commit --allow-empty -m 'root commit'
 	echo A > file
 	git add file
 	git commit -a -m A
@@ -213,6 +225,23 @@ third line" | git commit -a -F -
 	git commit -a -m "add empty file"
 	rm file3
 	git commit -a -m "remove file"
+	# now add back 'file' with its old conents, so the mark gets
+	# reused
+	echo f > file
+	git add file
+	git commit -a -m "file: other -> f"
+	# this is a boring file for Darcs
+	touch foo.pyc
+	git add foo.pyc
+	git commit -a -m "boring file"
+	# replace an uppercase file to a lowercase one
+	echo SPAM > SPAM
+	git add SPAM
+	git commit -a -m SPAM
+	rm SPAM
+	echo spam > spam
+	git add spam
+	git commit -a -m "SPAM -> spam"
 	cd ..
 }
 
@@ -226,12 +255,14 @@ diff_git()
 
 diff_importgit()
 {
+	test -z "`(cd $1.darcs; darcs diff)`" &&
 	diff --exclude _darcs --exclude .git --exclude '*-darcs-backup*' -Nur $1 $1.darcs
 	return $?
 }
 
 diff_importhg()
 {
+	cd $1.darcs && test -z "`darcs diff 2>&1`" && cd .. &&
 	diff --exclude _darcs --exclude .hg --exclude '*-darcs-backup*' --exclude 'hg-export.*' \
 		--exclude '.hgtags' --exclude '*.orig' -Nur $1 $1.darcs
 	return $?
@@ -239,12 +270,14 @@ diff_importhg()
 
 diff_importdarcs()
 {
+	cd $1.importdarcs && test -z "`darcs diff 2>&1`" && cd .. &&
 	diff --exclude _darcs --exclude '*-darcs-backup*' -Nur $1 $2
 	return $?
 }
 
 diff_importbzr()
 {
+	cd $1.darcs && test -z "`darcs diff 2>&1`" && cd .. &&
 	diff --exclude _darcs --exclude .bzr --exclude '*-darcs-backup*' -Nur $1 $1.darcs
 	return $?
 }
@@ -260,6 +293,7 @@ diff_bzr()
 
 diff_hg()
 {
+	hg -R $1.hg update
 	diff --exclude _darcs --exclude .hg --exclude '*-darcs-backup*' -Nur $1.hg $1
 	return $?
 }
diff --git a/exporters/darcs/t/test-hg.sh b/exporters/darcs/t/test-hg.sh
index ddde37a..95bfc4b 100644
--- a/exporters/darcs/t/test-hg.sh
+++ b/exporters/darcs/t/test-hg.sh
@@ -8,7 +8,7 @@ cd test.hg
 hg init
 cd ..
 if [ "$1" != "--stdout" ]; then
-	darcs-fast-export test |(cd test.hg; hg fastimport /dev/stdin)
+	darcs-fast-export test |(cd test.hg; hg fastimport -)
 	diff_hg test
 	exit $?
 else
diff --git a/exporters/darcs/t/test2-git-http.sh b/exporters/darcs/t/test2-git-http.sh
new file mode 100644
index 0000000..02549e4
--- /dev/null
+++ b/exporters/darcs/t/test2-git-http.sh
@@ -0,0 +1,22 @@
+. ./lib.sh
+. ./lib-httpd.sh
+
+rm -rf test2.darcs test2.git httpd
+create_darcs test2 --darcs-2
+mkdir -p $HTTPD_DOCUMENT_ROOT_PATH
+mv -v test2 $HTTPD_DOCUMENT_ROOT_PATH
+ln -s $HTTPD_DOCUMENT_ROOT_PATH/test2 .
+
+mkdir test2.git
+cd test2.git
+git --bare init
+cd ..
+start_httpd
+darcs-fast-export $HTTPD_URL/test2 |(cd test2.git; git fast-import)
+ret=$?
+stop_httpd
+if [ $ret != 0 ]; then
+	exit $ret
+fi
+diff_git test2
+exit $?
diff --git a/exporters/darcs/t/testimport-gitsymlink.sh b/exporters/darcs/t/testimport-gitsymlink.sh
new file mode 100644
index 0000000..100c583
--- /dev/null
+++ b/exporters/darcs/t/testimport-gitsymlink.sh
@@ -0,0 +1,45 @@
+. ./lib.sh
+
+create_git test
+cd test
+# add two dirs with the some contents, then remove the second
+# and make it a symlink to the first
+mkdir dira
+echo blabla > dira/file
+echo blablabla > dira/file2
+mkdir dirb
+touch dirb/file
+touch dirb/file2
+git add dira dirb
+git commit -a -m "add dira/dirb"
+rm -rf dirb
+ln -s dira dirb
+git add dirb
+git commit -a -m "change a dir to a symlink"
+cd ..
+
+rm -rf test.darcs
+mkdir test.darcs
+cd test.darcs
+darcs init
+cd ..
+(cd test; git fast-export --progress=2 HEAD) | (cd test.darcs; darcs-fast-import)
+# we *do* want this to fail, but with error code 2. that means that we
+# detected that symlinks are not supported and the user does not get a
+# meaningless exception
+if [ $? != 2 ]; then
+	exit 1
+fi
+
+# now try with the symhack option
+rm -rf test.darcs
+mkdir test.darcs
+cd test.darcs
+darcs init
+cd ..
+(cd test; git fast-export --progress=2 HEAD) | (cd test.darcs; darcs-fast-import --symhack)
+if [ $? != 0 ]; then
+	exit 1
+fi
+diff_importgit test
+exit $?
diff --git a/exporters/darcs/t/testimport-hg.sh b/exporters/darcs/t/testimport-hg.sh
index 76df76d..7f6d215 100644
--- a/exporters/darcs/t/testimport-hg.sh
+++ b/exporters/darcs/t/testimport-hg.sh
@@ -8,7 +8,6 @@ cd test.darcs
 darcs init
 cd ..
 (cd test; $pypath/bzrlib/plugins/fastimport/exporters/hg-fast-export.py -r .) | (cd test.darcs; darcs-fast-import)
-rm test/{*.orig,hg-export.status}
 if [ $? != 0 ]; then
 	exit 1
 fi
diff --git a/exporters/darcs/x2d b/exporters/darcs/x2d
index 61c66ef..398103d 100755
--- a/exporters/darcs/x2d
+++ b/exporters/darcs/x2d
@@ -62,6 +62,12 @@ case $format in
 		;;
 esac
 
+common_opts=""
+while [ -n "$2" ]
+do
+	common_opts="$common_opts $1"
+	shift 1
+done
 origin="$1"
 shift 1
 
@@ -77,7 +83,7 @@ fmark="$origin.darcs/_darcs/fast-import/ffi-marks"
 mkdir -p $origin.darcs
 cd $origin.darcs
 
-common_opts="--logfile $origin.darcs/_darcs/fast-import/log"
+common_opts="$common_opts --logfile $origin.darcs/_darcs/fast-import/log"
 pypath="/$(python -c 'from distutils import sysconfig; print sysconfig.get_python_lib()[1:]')/"
 
 if [ ! -f $dmark ]; then
diff --git a/exporters/darcs/x2d.txt b/exporters/darcs/x2d.txt
index eb2ec34..25ed6bb 100644
--- a/exporters/darcs/x2d.txt
+++ b/exporters/darcs/x2d.txt
@@ -6,7 +6,7 @@ x2d - convert git, bzr or hg repos to a darcs one using fast-export
 
 == SYNOPSIS
 
-x2d -f <format> <otherrepo>
+x2d -f <format> [<importoptions>] <otherrepo>
 
 == DESCRIPTION
 
@@ -24,3 +24,5 @@ importer's standard input.
 -f <format>::
 	Specify the format of the source repo. Currently supported sources are
 	git, bzr and hg. Incremental conversion is supported for all of them.
+
+The rest of the options is directly passed to darcs-fast-import.
diff --git a/exporters/svn-fast-export.README b/exporters/svn-fast-export.README
new file mode 100644
index 0000000..e08277e
--- /dev/null
+++ b/exporters/svn-fast-export.README
@@ -0,0 +1,12 @@
+To compile svn-fast-export.c, use make. You'll need to install
+some packages first using the package manager on your OS:
+
+* libsvn-dev - the Subversion libraries
+* libapr1-dev - the Apache Portable Runtime libraries
+
+Note: If someone with good knowledge of the Subversion
+Python bindings could rewrite svn-fast-export.py so that
+https://bugs.launchpad.net/bzr-fastimport/+bug/273361
+went away, then there would be much rejoicing throughout
+the land and the need for svn-fast-export.c would largely
+disappear.
diff --git a/exporters/svn-fast-export.py b/exporters/svn-fast-export.py
index e44c6cb..fd88094 100755
--- a/exporters/svn-fast-export.py
+++ b/exporters/svn-fast-export.py
@@ -11,6 +11,7 @@
 trunk_path = '/trunk/'
 branches_path = '/branches/'
 tags_path = '/tags/'
+address = 'localhost'
 
 first_rev = 1
 final_rev = 0
@@ -123,9 +124,9 @@ def export_revision(rev, repo, fs, pool):
 
     # Do the recursive crawl.
     if props.has_key('svn:author'):
-        author = "%s <%s@localhost>" % (props['svn:author'], props['svn:author'])
+        author = "%s <%s@%s>" % (props['svn:author'], props['svn:author'], address)
     else:
-        author = 'nobody <nobody@localhost>'
+        author = 'nobody <nobody@users.sourceforge.net>'
 
     if len(file_changes) == 0:
         svn_pool_destroy(revpool)
@@ -165,7 +166,6 @@ def crawl_revisions(pool, repos_path):
     youngest_rev = svn_fs_youngest_rev(fs_obj, pool)
 
 
-    first_rev = 1
     if final_rev == 0:
         final_rev = youngest_rev
     for rev in xrange(first_rev, final_rev + 1):
@@ -178,12 +178,16 @@ if __name__ == '__main__':
     parser.set_usage(usage)
     parser.add_option('-f', '--final-rev', help='Final revision to import', 
                       dest='final_rev', metavar='FINAL_REV', type='int')
+    parser.add_option('-r', '--first-rev', help='First revision to import', 
+                      dest='first_rev', metavar='FIRST_REV', type='int')
     parser.add_option('-t', '--trunk-path', help="Path in repo to /trunk, may be `regex:/cvs/(trunk)/proj1/(.*)`\nFirst group is used as branchname, second to match files",
                       dest='trunk_path', metavar='TRUNK_PATH')
     parser.add_option('-b', '--branches-path', help='Path in repo to /branches',
                       dest='branches_path', metavar='BRANCHES_PATH')
     parser.add_option('-T', '--tags-path', help='Path in repo to /tags',
                       dest='tags_path', metavar='TAGS_PATH')
+    parser.add_option('-a', '--address', help='Domain to put on users for their mail address', 
+                      dest='address', metavar='hostname', type='string')
     (options, args) = parser.parse_args()
 
     if options.trunk_path != None:
@@ -194,6 +198,10 @@ if __name__ == '__main__':
         tags_path = options.tags_path
     if options.final_rev != None:
         final_rev = options.final_rev
+    if options.first_rev != None:
+        first_rev = options.first_rev
+    if options.address != None:
+        address = options.address
 
     MATCHER = Matcher.getMatcher(trunk_path)
     sys.stderr.write("%s\n" % MATCHER)
diff --git a/helpers.py b/helpers.py
index 34d4688..afc867d 100644
--- a/helpers.py
+++ b/helpers.py
@@ -16,97 +16,14 @@
 
 """Miscellaneous useful stuff."""
 
-
-def single_plural(n, single, plural):
-    """Return a single or plural form of a noun based on number."""
-    if n == 1:
-        return single
-    else:
-        return plural
-
-
-def defines_to_dict(defines):
-    """Convert a list of definition strings to a dictionary."""
-    if defines is None:
-        return None
-    result = {}
-    for define in defines:
-        kv = define.split('=', 1)
-        if len(kv) == 1:
-            result[define.strip()] = 1
-        else:
-            result[kv[0].strip()] = kv[1].strip()
-    return result
-
-
-def invert_dict(d):
-    """Invert a dictionary with keys matching each value turned into a list."""
-    # Based on recipe from ASPN
-    result = {}
-    for k, v in d.iteritems():
-        keys = result.setdefault(v, [])
-        keys.append(k)
-    return result
-
-
-def invert_dictset(d):
-    """Invert a dictionary with keys matching a set of values, turned into lists."""
-    # Based on recipe from ASPN
-    result = {}
-    for k, c in d.iteritems():
-        for v in c:
-            keys = result.setdefault(v, [])
-            keys.append(k)
-    return result
-
-
-def _common_path_and_rest(l1, l2, common=[]):
-    # From http://code.activestate.com/recipes/208993/
-    if len(l1) < 1: return (common, l1, l2)
-    if len(l2) < 1: return (common, l1, l2)
-    if l1[0] != l2[0]: return (common, l1, l2)
-    return _common_path_and_rest(l1[1:], l2[1:], common+[l1[0]])
-
-
-def common_path(path1, path2):
-    """Find the common bit of 2 paths."""
-    return ''.join(_common_path_and_rest(path1, path2)[0])
-
-
-def common_directory(paths):
-    """Find the deepest common directory of a list of paths.
-    
-    :return: if no paths are provided, None is returned;
-      if there is no common directory, '' is returned;
-      otherwise the common directory with a trailing / is returned.
-    """
-    from bzrlib import osutils
-    def get_dir_with_slash(path):
-        if path == '' or path.endswith('/'):
-            return path
-        else:
-            dirname, basename = osutils.split(path)
-            if dirname == '':
-                return dirname
-            else:
-                return dirname + '/'
-
-    if not paths:
-        return None
-    elif len(paths) == 1:
-        return get_dir_with_slash(paths[0])
-    else:
-        common = common_path(paths[0], paths[1])
-        for path in paths[2:]:
-            common = common_path(common, path)
-        return get_dir_with_slash(common)
+import stat
 
 
 def escape_commit_message(message):
     """Replace xml-incompatible control characters."""
     # This really ought to be provided by bzrlib.
     # Code copied from bzrlib.commit.
-    
+
     # Python strings can include characters that can't be
     # represented in well-formed XML; escape characters that
     # aren't listed in the XML specification
@@ -119,25 +36,6 @@ def escape_commit_message(message):
     return message
 
 
-def binary_stream(stream):
-    """Ensure a stream is binary on Windows.
-
-    :return: the stream
-    """
-    try:
-        import os
-        if os.name == 'nt':
-            fileno = getattr(stream, 'fileno', None)
-            if fileno:
-                no = fileno()
-                if no >= 0:     # -1 means we're working as subprocess
-                    import msvcrt
-                    msvcrt.setmode(no, os.O_BINARY)
-    except ImportError:
-        pass
-    return stream
-
-
 def best_format_for_objects_in_a_repository(repo):
     """Find the high-level format for branches and trees given a repository.
 
@@ -215,3 +113,37 @@ def open_destination_directory(location, format=None, verbose=True):
         from bzrlib.info import show_bzrdir_info
         show_bzrdir_info(repo.bzrdir, verbose=0)
     return control
+
+
+def kind_to_mode(kind, executable):
+    if kind == "file":
+        if executable == True:
+            return stat.S_IFREG | 0755
+        elif executable == False:
+            return stat.S_IFREG | 0644
+        else:
+            raise AssertionError("Executable %r invalid" % executable)
+    elif kind == "symlink":
+        return stat.S_IFLNK
+    elif kind == "directory":
+        return stat.S_IFDIR
+    elif kind == "tree-reference":
+        return 0160000
+    else:
+        raise AssertionError("Unknown file kind '%s'" % kind)
+
+
+def mode_to_kind(mode):
+    # Note: Output from git-fast-export slightly different to spec
+    if mode in (0644, 0100644):
+        return 'file', False
+    elif mode in (0755, 0100755):
+        return 'file', True
+    elif mode == 0040000:
+        return 'directory', False
+    elif mode == 0120000:
+        return 'symlink', False
+    elif mode == 0160000:
+        return 'tree-reference', False
+    else:
+        raise AssertionError("invalid mode %o" % mode)
diff --git a/idmapfile.py b/idmapfile.py
deleted file mode 100644
index 7b4ccf4..0000000
--- a/idmapfile.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Routines for saving and loading the id-map file."""
-
-import os
-
-
-def save_id_map(filename, revision_ids):
-    """Save the mapping of commit ids to revision ids to a file.
-
-    Throws the usual exceptions if the file cannot be opened,
-    written to or closed.
-
-    :param filename: name of the file to save the data to
-    :param revision_ids: a dictionary of commit ids to revision ids.
-    """
-    f = open(filename, 'wb')
-    try:
-        for commit_id, rev_id in revision_ids.iteritems():
-            f.write("%s %s\n" % (commit_id, rev_id))
-        f.flush()
-    finally:
-        f.close()
-
-
-def load_id_map(filename):
-    """Load the mapping of commit ids to revision ids from a file.
-
-    If the file does not exist, an empty result is returned.
-    If the file does exists but cannot be opened, read or closed,
-    the normal exceptions are thrown.
-
-    NOTE: It is assumed that commit-ids do not have embedded spaces.
-
-    :param filename: name of the file to save the data to
-    :result: map, count where:
-      map = a dictionary of commit ids to revision ids;
-      count = the number of keys in map
-    """
-    result = {}
-    count = 0
-    if os.path.exists(filename):
-        f = open(filename)
-        try:
-            for line in f:
-                parts = line[:-1].split(' ', 1)
-                result[parts[0]] = parts[1]
-                count += 1
-        finally:
-            f.close()
-    return result, count
diff --git a/marks_file.py b/marks_file.py
index ab24be2..c05f8c6 100644
--- a/marks_file.py
+++ b/marks_file.py
@@ -17,7 +17,6 @@
 """Routines for reading/writing a marks file."""
 
 
-import re
 from bzrlib.trace import warning
 
 
@@ -38,12 +37,26 @@ def import_marks(filename):
 
     # Read the revision info
     revision_ids = {}
-    for line in f:
+
+    line = f.readline()
+    if line == 'format=1\n':
+        # Cope with old-style marks files
+        # Read the branch info
+        branch_names = {}
+        for string in f.readline().rstrip('\n').split('\0'):
+            if not string:
+                continue
+            name, integer = string.rsplit('.', 1)
+            branch_names[name] = int(integer)
+        line = f.readline()
+
+    while line:
         line = line.rstrip('\n')
         mark, revid = line.split(' ', 1)
         if mark.startswith(':'):
             mark = mark[1:]
         revision_ids[mark] = revid
+        line = f.readline()
     f.close()
     return revision_ids
 
diff --git a/parser.py b/parser.py
deleted file mode 100644
index c133c01..0000000
--- a/parser.py
+++ /dev/null
@@ -1,557 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Parser of import data into command objects.
-
-In order to reuse existing front-ends, the stream format is a subset of
-the one used by git-fast-import (as of the 1.5.4 release of git at least).
-The grammar is:
-
-  stream ::= cmd*;
-
-  cmd ::= new_blob
-        | new_commit
-        | new_tag
-        | reset_branch
-        | checkpoint
-        | progress
-        ;
-
-  new_blob ::= 'blob' lf
-    mark?
-    file_content;
-  file_content ::= data;
-
-  new_commit ::= 'commit' sp ref_str lf
-    mark?
-    ('author' sp name '<' email '>' when lf)?
-    'committer' sp name '<' email '>' when lf
-    commit_msg
-    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
-    ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*
-    file_change*
-    lf?;
-  commit_msg ::= data;
-
-  file_change ::= file_clr
-    | file_del
-    | file_rnm
-    | file_cpy
-    | file_obm
-    | file_inm;
-  file_clr ::= 'deleteall' lf;
-  file_del ::= 'D' sp path_str lf;
-  file_rnm ::= 'R' sp path_str sp path_str lf;
-  file_cpy ::= 'C' sp path_str sp path_str lf;
-  file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
-  file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
-    data;
-
-  new_tag ::= 'tag' sp tag_str lf
-    'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
-    'tagger' sp name '<' email '>' when lf
-    tag_msg;
-  tag_msg ::= data;
-
-  reset_branch ::= 'reset' sp ref_str lf
-    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
-    lf?;
-
-  checkpoint ::= 'checkpoint' lf
-    lf?;
-
-  progress ::= 'progress' sp not_lf* lf
-    lf?;
-
-     # note: the first idnum in a stream should be 1 and subsequent
-     # idnums should not have gaps between values as this will cause
-     # the stream parser to reserve space for the gapped values.  An
-     # idnum can be updated in the future to a new object by issuing
-     # a new mark directive with the old idnum.
-     #
-  mark ::= 'mark' sp idnum lf;
-  data ::= (delimited_data | exact_data)
-    lf?;
-
-    # note: delim may be any string but must not contain lf.
-    # data_line may contain any data but must not be exactly
-    # delim. The lf after the final data_line is included in
-    # the data.
-  delimited_data ::= 'data' sp '<<' delim lf
-    (data_line lf)*
-    delim lf;
-
-     # note: declen indicates the length of binary_data in bytes.
-     # declen does not include the lf preceeding the binary data.
-     #
-  exact_data ::= 'data' sp declen lf
-    binary_data;
-
-     # note: quoted strings are C-style quoting supporting \c for
-     # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
-     # is the signed byte value in octal.  Note that the only
-     # characters which must actually be escaped to protect the
-     # stream formatting is: \, " and LF.  Otherwise these values
-     # are UTF8.
-     #
-  ref_str     ::= ref;
-  sha1exp_str ::= sha1exp;
-  tag_str     ::= tag;
-  path_str    ::= path    | '"' quoted(path)    '"' ;
-  mode        ::= '100644' | '644'
-                | '100755' | '755'
-                | '120000'
-                ;
-
-  declen ::= # unsigned 32 bit value, ascii base10 notation;
-  bigint ::= # unsigned integer value, ascii base10 notation;
-  binary_data ::= # file content, not interpreted;
-
-  when         ::= raw_when | rfc2822_when;
-  raw_when     ::= ts sp tz;
-  rfc2822_when ::= # Valid RFC 2822 date and time;
-
-  sp ::= # ASCII space character;
-  lf ::= # ASCII newline (LF) character;
-
-     # note: a colon (':') must precede the numerical value assigned to
-     # an idnum.  This is to distinguish it from a ref or tag name as
-     # GIT does not permit ':' in ref or tag strings.
-     #
-  idnum   ::= ':' bigint;
-  path    ::= # GIT style file path, e.g. "a/b/c";
-  ref     ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
-  tag     ::= # GIT tag name, e.g. "FIREFOX_1_5";
-  sha1exp ::= # Any valid GIT SHA1 expression;
-  hexsha1 ::= # SHA1 in hexadecimal format;
-
-     # note: name and email are UTF8 strings, however name must not
-     # contain '<' or lf and email must not contain any of the
-     # following: '<', '>', lf.
-     #
-  name  ::= # valid GIT author/committer name;
-  email ::= # valid GIT author/committer email;
-  ts    ::= # time since the epoch in seconds, ascii base10 notation;
-  tz    ::= # GIT style timezone;
-
-     # note: comments may appear anywhere in the input, except
-     # within a data command.  Any form of the data command
-     # always escapes the related input from comment processing.
-     #
-     # In case it is not clear, the '#' that starts the comment
-     # must be the first character on that the line (an lf have
-     # preceeded it).
-     #
-  comment ::= '#' not_lf* lf;
-  not_lf  ::= # Any byte that is not ASCII newline (LF);
-"""
-
-
-import re
-import sys
-
-import commands
-import dates
-import errors
-
-
-## Stream parsing ##
-
-class LineBasedParser(object):
-
-    def __init__(self, input):
-        """A Parser that keeps track of line numbers.
-
-        :param input: the file-like object to read from
-        """
-        self.input = input
-        self.lineno = 0
-        # Lines pushed back onto the input stream
-        self._buffer = []
-
-    def abort(self, exception, *args):
-        """Raise an exception providing line number information."""
-        raise exception(self.lineno, *args)
-
-    def readline(self):
-        """Get the next line including the newline or '' on EOF."""
-        self.lineno += 1
-        if self._buffer:
-            return self._buffer.pop()
-        else:
-            return self.input.readline()
-
-    def next_line(self):
-        """Get the next line without the newline or None on EOF."""
-        line = self.readline()
-        if line:
-            return line[:-1]
-        else:
-            return None
-
-    def push_line(self, line):
-        """Push line back onto the line buffer.
-        
-        :param line: the line with no trailing newline
-        """
-        self.lineno -= 1
-        self._buffer.append(line + "\n")
-
-    def read_bytes(self, count):
-        """Read a given number of bytes from the input stream.
-        
-        Throws MissingBytes if the bytes are not found.
-
-        Note: This method does not read from the line buffer.
-
-        :return: a string
-        """
-        result = self.input.read(count)
-        found = len(result)
-        self.lineno += result.count("\n")
-        if found != count:
-            self.abort(errors.MissingBytes, count, found)
-        return result
-
-    def read_until(self, terminator):
-        """Read the input stream until the terminator is found.
-        
-        Throws MissingTerminator if the terminator is not found.
-
-        Note: This method does not read from the line buffer.
-
-        :return: the bytes read up to but excluding the terminator.
-        """
-        
-        lines = []
-        term = terminator + '\n'
-        while True:
-            line = self.input.readline()
-            if line == term:
-                break
-            else:
-                lines.append(line)
-        return ''.join(lines)
-
-
-# Regular expression used for parsing. (Note: The spec states that the name
-# part should be non-empty but git-fast-export doesn't always do that so
-# the first bit is \w*, not \w+.) Also git-fast-import code says the
-# space before the email is optional.
-_WHO_AND_WHEN_RE = re.compile(r'([^<]*)<(.*)> (.+)')
-_WHO_RE = re.compile(r'([^<]*)<(.*)>')
-
-
-class ImportParser(LineBasedParser):
-
-    def __init__(self, input, verbose=False, output=sys.stdout):
-        """A Parser of import commands.
-
-        :param input: the file-like object to read from
-        :param verbose: display extra information of not
-        :param output: the file-like object to write messages to (YAGNI?)
-        """
-        LineBasedParser.__init__(self, input)
-        self.verbose = verbose
-        self.output = output
-        # We auto-detect the date format when a date is first encountered
-        self.date_parser = None
-
-    def _warning(self, msg):
-        sys.stderr.write("warning line %d: %s\n" % (self.lineno, msg))
-
-    def iter_commands(self):
-        """Iterator returning ImportCommand objects."""
-        while True:
-            line = self.next_line()
-            if line is None:
-                break
-            elif len(line) == 0 or line.startswith('#'):
-                continue
-            # Search for commands in order of likelihood
-            elif line.startswith('commit '):
-                yield self._parse_commit(line[len('commit '):])
-            elif line.startswith('blob'):
-                yield self._parse_blob()
-            elif line.startswith('progress '):
-                yield commands.ProgressCommand(line[len('progress '):])
-            elif line.startswith('reset '):
-                yield self._parse_reset(line[len('reset '):])
-            elif line.startswith('tag '):
-                yield self._parse_tag(line[len('tag '):])
-            elif line.startswith('checkpoint'):
-                yield commands.CheckpointCommand()
-            else:
-                self.abort(errors.InvalidCommand, line)
-
-    def iter_file_commands(self):
-        """Iterator returning FileCommand objects.
-        
-        If an invalid file command is found, the line is silently
-        pushed back and iteration ends.
-        """
-        while True:
-            line = self.next_line()
-            if line is None:
-                break
-            elif len(line) == 0 or line.startswith('#'):
-                continue
-            # Search for file commands in order of likelihood
-            elif line.startswith('M '):
-                yield self._parse_file_modify(line[2:])
-            elif line.startswith('D '):
-                path = self._path(line[2:])
-                yield commands.FileDeleteCommand(path)
-            elif line.startswith('R '):
-                old, new = self._path_pair(line[2:])
-                yield commands.FileRenameCommand(old, new)
-            elif line.startswith('C '):
-                src, dest = self._path_pair(line[2:])
-                yield commands.FileCopyCommand(src, dest)
-            elif line.startswith('deleteall'):
-                yield commands.FileDeleteAllCommand()
-            else:
-                self.push_line(line)
-                break
-
-    def _parse_blob(self):
-        """Parse a blob command."""
-        lineno = self.lineno
-        mark = self._get_mark_if_any()
-        data = self._get_data('blob')
-        return commands.BlobCommand(mark, data, lineno)
-
-    def _parse_commit(self, ref):
-        """Parse a commit command."""
-        lineno  = self.lineno
-        mark = self._get_mark_if_any()
-        author = self._get_user_info('commit', 'author', False)
-        committer = self._get_user_info('commit', 'committer')
-        message = self._get_data('commit', 'message')
-        try:
-            message = message.decode('utf_8')
-        except UnicodeDecodeError:
-            self._warning(
-                "commit message not in utf8 - replacing unknown characters")
-            message = message.decode('utf_8', 'replace')
-        from_ = self._get_from()
-        merges = []
-        while True:
-            merge = self._get_merge()
-            if merge is not None:
-                # while the spec suggests it's illegal, git-fast-export
-                # outputs multiple merges on the one line, e.g.
-                # merge :x :y :z
-                these_merges = merge.split(" ")
-                merges.extend(these_merges)
-            else:
-                break
-        return commands.CommitCommand(ref, mark, author, committer, message,
-            from_, merges, self.iter_file_commands, lineno)
-
-    def _parse_file_modify(self, info):
-        """Parse a filemodify command within a commit.
-
-        :param info: a string in the format "mode dataref path"
-          (where dataref might be the hard-coded literal 'inline').
-        """
-        params = info.split(' ', 2)
-        path = self._path(params[2])
-        is_executable, is_symlink = self._mode(params[0])
-        if is_symlink:
-            kind = commands.SYMLINK_KIND
-        else:
-            kind = commands.FILE_KIND
-        if params[1] == 'inline':
-            dataref = None
-            data = self._get_data('filemodify')
-        else:
-            dataref = params[1]
-            data = None
-        return commands.FileModifyCommand(path, kind, is_executable, dataref,
-            data)
-
-    def _parse_reset(self, ref):
-        """Parse a reset command."""
-        from_ = self._get_from()
-        return commands.ResetCommand(ref, from_)
-
-    def _parse_tag(self, name):
-        """Parse a tag command."""
-        from_ = self._get_from('tag')
-        tagger = self._get_user_info('tag', 'tagger', accept_just_who=True)
-        message = self._get_data('tag', 'message').decode('utf_8')
-        return commands.TagCommand(name, from_, tagger, message)
-
-    def _get_mark_if_any(self):
-        """Parse a mark section."""
-        line = self.next_line()
-        if line.startswith('mark :'):
-            return line[len('mark :'):]
-        else:
-            self.push_line(line)
-            return None
-
-    def _get_from(self, required_for=None):
-        """Parse a from section."""
-        line = self.next_line()
-        if line is None:
-            return None
-        elif line.startswith('from '):
-            return line[len('from '):]
-        elif required_for:
-            self.abort(errors.MissingSection, required_for, 'from')
-        else:
-            self.push_line(line)
-            return None
-
-    def _get_merge(self):
-        """Parse a merge section."""
-        line = self.next_line()
-        if line is None:
-            return None
-        elif line.startswith('merge '):
-            return line[len('merge '):]
-        else:
-            self.push_line(line)
-            return None
-
-    def _get_user_info(self, cmd, section, required=True,
-        accept_just_who=False):
-        """Parse a user section."""
-        line = self.next_line()
-        if line.startswith(section + ' '):
-            return self._who_when(line[len(section + ' '):], cmd, section,
-                accept_just_who=accept_just_who)
-        elif required:
-            self.abort(errors.MissingSection, cmd, section)
-        else:
-            self.push_line(line)
-            return None
-
-    def _get_data(self, required_for, section='data'):
-        """Parse a data section."""
-        line = self.next_line()
-        if line.startswith('data '):
-            rest = line[len('data '):]
-            if rest.startswith('<<'):
-                return self.read_until(rest[2:])
-            else:
-                size = int(rest)
-                read_bytes = self.read_bytes(size)
-                # optional LF after data.
-                next = self.input.readline()
-                self.lineno += 1
-                if len(next) > 1 or next != "\n":
-                    self.push_line(next[:-1])
-                return read_bytes
-        else:
-            self.abort(errors.MissingSection, required_for, section)
-
-    def _who_when(self, s, cmd, section, accept_just_who=False):
-        """Parse who and when information from a string.
-        
-        :return: a tuple of (name,email,timestamp,timezone). name may be
-            the empty string if only an email address was given.
-        """
-        match = _WHO_AND_WHEN_RE.search(s)
-        if match:
-            datestr = match.group(3)
-            if self.date_parser is None:
-                # auto-detect the date format
-                if len(datestr.split(' ')) == 2:
-                    format = 'raw'
-                elif datestr == 'now':
-                    format = 'now'
-                else:
-                    format = 'rfc2822'
-                self.date_parser = dates.DATE_PARSERS_BY_NAME[format]
-            when = self.date_parser(datestr, self.lineno)
-        else:
-            match = _WHO_RE.search(s)
-            if accept_just_who and match:
-                # HACK around missing time
-                # TODO: output a warning here
-                when = dates.DATE_PARSERS_BY_NAME['now']('now')
-            else:
-                self.abort(errors.BadFormat, cmd, section, s)
-        name = match.group(1)
-        if len(name) > 0:
-            if name[-1] == " ":
-                try:
-                    name = name[:-1].decode('utf_8')
-                except UnicodeDecodeError:
-                    # The spec says names are *typically* utf8 encoded
-                    # but that isn't enforced by git-fast-export (at least)
-                    name = name[:-1]
-        email = match.group(2)
-        # While it shouldn't happen, some datasets have email addresses
-        # which contain unicode characters. See bug 338186. We sanitize
-        # the data at this level just in case.
-        try:
-            email = "%s" % (email,)
-        except UnicodeDecodeError:
-            email = "%s" % (email.decode('utf_8'),)
-        return (name, email, when[0], when[1])
-
-    def _path(self, s):
-        """Parse a path."""
-        if s.startswith('"'):
-            if s[-1] != '"':
-                self.abort(errors.BadFormat, '?', '?', s)
-            else:
-                return _unquote_c_string(s[1:-1])
-        try:
-            return s.decode('utf_8')
-        except UnicodeDecodeError:
-            # The spec recommends utf8 encoding but that isn't enforced
-            return s
-
-    def _path_pair(self, s):
-        """Parse two paths separated by a space."""
-        # TODO: handle a space in the first path
-        if s.startswith('"'):
-            parts = s[1:].split('" ', 1)
-        else:
-            parts = s.split(' ', 1)
-        if len(parts) != 2:
-            self.abort(errors.BadFormat, '?', '?', s)
-        elif parts[1].startswith('"') and parts[1].endswith('"'):
-            parts[1] = parts[1][1:-1]
-        elif parts[1].startswith('"') or parts[1].endswith('"'):
-            self.abort(errors.BadFormat, '?', '?', s)
-        return map(_unquote_c_string, parts)
-
-    def _mode(self, s):
-        """Parse a file mode into executable and symlink flags.
-        
-        :return (is_executable, is_symlink)
-        """
-        # Note: Output from git-fast-export slightly different to spec
-        if s in ['644', '100644', '0100644']:
-            return False, False
-        elif s in ['755', '100755', '0100755']:
-            return True, False
-        elif s in ['120000', '0120000']:
-            return False, True
-        else:
-            self.abort(errors.BadFormat, 'filemodify', 'mode', s)
-
-
-def _unquote_c_string(s):
-    """replace C-style escape sequences (\n, \", etc.) with real chars."""
-    # HACK: Python strings are close enough
-    return s.decode('string_escape', 'replace')
diff --git a/processor.py b/processor.py
deleted file mode 100644
index 06b4871..0000000
--- a/processor.py
+++ /dev/null
@@ -1,253 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Processor of import commands.
-
-This module provides core processing functionality including an abstract class
-for basing real processors on. See the processors package for examples.
-"""
-
-import sys
-import time
-
-from bzrlib import debug
-from bzrlib.errors import NotBranchError
-from bzrlib.trace import (
-    mutter,
-    note,
-    warning,
-    )
-import errors
-
-
-class ImportProcessor(object):
-    """Base class for import processors.
-    
-    Subclasses should override the pre_*, post_* and *_handler
-    methods as appropriate.
-    """
-
-    known_params = []
-
-    def __init__(self, bzrdir, params=None, verbose=False, outf=None):
-        if outf is None:
-            self.outf = sys.stdout
-        else:
-            self.outf = outf
-        self.verbose = verbose
-        if params is None:
-            self.params = {}
-        else:
-            self.params = params
-            self.validate_parameters()
-        self.bzrdir = bzrdir
-        if bzrdir is None:
-            # Some 'importers' don't need a repository to write to
-            self.working_tree = None
-            self.branch = None
-            self.repo = None
-        else:
-            try:
-                # Might be inside a branch
-                (self.working_tree, self.branch) = bzrdir._get_tree_branch()
-                self.repo = self.branch.repository
-            except NotBranchError:
-                # Must be inside a repository
-                self.working_tree = None
-                self.branch = None
-                self.repo = bzrdir.open_repository()
-
-        # Handlers can set this to request exiting cleanly without
-        # iterating through the remaining commands
-        self.finished = False
-
-    def validate_parameters(self):
-        """Validate that the parameters are correctly specified."""
-        for p in self.params:
-            if p not in self.known_params:
-                raise errors.UnknownParameter(p, self.known_params)
-
-    def process(self, command_iter):
-        """Import data into Bazaar by processing a stream of commands.
-
-        :param command_iter: an iterator providing commands
-        """
-        if self.working_tree is not None:
-            self.working_tree.lock_write()
-        elif self.branch is not None:
-            self.branch.lock_write()
-        elif self.repo is not None:
-            self.repo.lock_write()
-        try:
-            self._process(command_iter)
-        finally:
-            # If an unhandled exception occurred, abort the write group
-            if self.repo is not None and self.repo.is_in_write_group():
-                self.repo.abort_write_group()
-            # Release the locks
-            if self.working_tree is not None:
-                self.working_tree.unlock()
-            elif self.branch is not None:
-                self.branch.unlock()
-            elif self.repo is not None:
-                self.repo.unlock()
-
-    def _process(self, command_iter):
-        self.pre_process()
-        for cmd in command_iter():
-            try:
-                handler = self.__class__.__dict__[cmd.name + "_handler"]
-            except KeyError:
-                raise errors.MissingHandler(cmd.name)
-            else:
-                self.pre_handler(cmd)
-                handler(self, cmd)
-                self.post_handler(cmd)
-            if self.finished:
-                break
-        self.post_process()
-
-    def note(self, msg, *args):
-        """Output a note but timestamp it."""
-        msg = "%s %s" % (self._time_of_day(), msg)
-        note(msg, *args)
-
-    def warning(self, msg, *args):
-        """Output a warning but timestamp it."""
-        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
-        warning(msg, *args)
-
-    def debug(self, mgs, *args):
-        """Output a debug message if the appropriate -D option was given."""
-        if "fast-import" in debug.debug_flags:
-            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
-            mutter(msg, *args)
-
-    def _time_of_day(self):
-        """Time of day as a string."""
-        # Note: this is a separate method so tests can patch in a fixed value
-        return time.strftime("%H:%M:%S")
-
-    def pre_process(self):
-        """Hook for logic at start of processing."""
-        pass
-
-    def post_process(self):
-        """Hook for logic at end of processing."""
-        pass
-
-    def pre_handler(self, cmd):
-        """Hook for logic before each handler starts."""
-        pass
-
-    def post_handler(self, cmd):
-        """Hook for logic after each handler finishes."""
-        pass
-
-    def progress_handler(self, cmd):
-        """Process a ProgressCommand."""
-        raise NotImplementedError(self.progress_handler)
-
-    def blob_handler(self, cmd):
-        """Process a BlobCommand."""
-        raise NotImplementedError(self.blob_handler)
-
-    def checkpoint_handler(self, cmd):
-        """Process a CheckpointCommand."""
-        raise NotImplementedError(self.checkpoint_handler)
-
-    def commit_handler(self, cmd):
-        """Process a CommitCommand."""
-        raise NotImplementedError(self.commit_handler)
-
-    def reset_handler(self, cmd):
-        """Process a ResetCommand."""
-        raise NotImplementedError(self.reset_handler)
-
-    def tag_handler(self, cmd):
-        """Process a TagCommand."""
-        raise NotImplementedError(self.tag_handler)
-
-
-class CommitHandler(object):
-    """Base class for commit handling.
-    
-    Subclasses should override the pre_*, post_* and *_handler
-    methods as appropriate.
-    """
-
-    def __init__(self, command):
-        self.command = command
-
-    def process(self):
-        self.pre_process_files()
-        for fc in self.command.file_iter():
-            try:
-                handler = self.__class__.__dict__[fc.name[4:] + "_handler"]
-            except KeyError:
-                raise errors.MissingHandler(fc.name)
-            else:
-                handler(self, fc)
-        self.post_process_files()
-
-    def note(self, msg, *args):
-        """Output a note but add context."""
-        msg = "%s (%s)" % (msg, self.command.id)
-        note(msg, *args)
-
-    def warning(self, msg, *args):
-        """Output a warning but add context."""
-        msg = "WARNING: %s (%s)" % (msg, self.command.id)
-        warning(msg, *args)
-
-    def mutter(self, msg, *args):
-        """Output a mutter but add context."""
-        msg = "%s (%s)" % (msg, self.command.id)
-        mutter(msg, *args)
-
-    def debug(self, msg, *args):
-        """Output a mutter if the appropriate -D option was given."""
-        if "fast-import" in debug.debug_flags:
-            msg = "%s (%s)" % (msg, self.command.id)
-            mutter(msg, *args)
-
-    def pre_process_files(self):
-        """Prepare for committing."""
-        pass
-
-    def post_process_files(self):
-        """Save the revision."""
-        pass
-
-    def modify_handler(self, filecmd):
-        """Handle a filemodify command."""
-        raise NotImplementedError(self.modify_handler)
-
-    def delete_handler(self, filecmd):
-        """Handle a filedelete command."""
-        raise NotImplementedError(self.delete_handler)
-
-    def copy_handler(self, filecmd):
-        """Handle a filecopy command."""
-        raise NotImplementedError(self.copy_handler)
-
-    def rename_handler(self, filecmd):
-        """Handle a filerename command."""
-        raise NotImplementedError(self.rename_handler)
-
-    def deleteall_handler(self, filecmd):
-        """Handle a filedeleteall command."""
-        raise NotImplementedError(self.deleteall_handler)
diff --git a/processors/filter_processor.py b/processors/filter_processor.py
deleted file mode 100644
index 8284cb5..0000000
--- a/processors/filter_processor.py
+++ /dev/null
@@ -1,288 +0,0 @@
-# Copyright (C) 2009 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Import processor that filters the input (and doesn't import)."""
-
-
-from bzrlib import osutils
-from bzrlib.trace import (
-    warning,
-    )
-from bzrlib.plugins.fastimport import (
-    commands,
-    helpers,
-    processor,
-    )
-
-
-class FilterProcessor(processor.ImportProcessor):
-    """An import processor that filters the input to include/exclude objects.
-
-    No changes to the current repository are made.
-
-    Here are the supported parameters:
-
-    * include_paths - a list of paths that commits must change in order to
-      be kept in the output stream
-
-    * exclude_paths - a list of paths that should not appear in the output
-      stream
-    """
-
-    known_params = [
-        'include_paths',
-        'exclude_paths',
-        ]
-
-    def pre_process(self):
-        self.includes = self.params.get('include_paths')
-        self.excludes = self.params.get('exclude_paths')
-        # What's the new root, if any
-        self.new_root = helpers.common_directory(self.includes)
-        # Buffer of blobs until we know we need them: mark -> cmd
-        self.blobs = {}
-        # These are the commits we've output so far
-        self.interesting_commits = set()
-        # Map of commit-id to list of parents
-        self.parents = {}
-
-    def pre_handler(self, cmd):
-        self.command = cmd
-        # Should this command be included in the output or not?
-        self.keep = False
-        # Blobs to dump into the output before dumping the command itself
-        self.referenced_blobs = []
-
-    def post_handler(self, cmd):
-        if not self.keep:
-            return
-        # print referenced blobs and the command
-        for blob_id in self.referenced_blobs:
-            self._print_command(self.blobs[blob_id])
-        self._print_command(self.command)
-
-    def progress_handler(self, cmd):
-        """Process a ProgressCommand."""
-        # These always pass through
-        self.keep = True
-
-    def blob_handler(self, cmd):
-        """Process a BlobCommand."""
-        # These never pass through directly. We buffer them and only
-        # output them if referenced by an interesting command.
-        self.blobs[cmd.id] = cmd
-        self.keep = False
-
-    def checkpoint_handler(self, cmd):
-        """Process a CheckpointCommand."""
-        # These always pass through
-        self.keep = True
-
-    def commit_handler(self, cmd):
-        """Process a CommitCommand."""
-        # These pass through if they meet the filtering conditions
-        interesting_filecmds = self._filter_filecommands(cmd.file_iter)
-        if interesting_filecmds:
-            # If all we have is a single deleteall, skip this commit
-            if len(interesting_filecmds) == 1 and isinstance(
-                interesting_filecmds[0], commands.FileDeleteAllCommand):
-                pass
-            else:
-                # Remember just the interesting file commands
-                self.keep = True
-                cmd.file_iter = iter(interesting_filecmds)
-
-                # Record the referenced blobs
-                for fc in interesting_filecmds:
-                    if isinstance(fc, commands.FileModifyCommand):
-                        if fc.dataref is not None:
-                            self.referenced_blobs.append(fc.dataref)
-
-                # Update from and merges to refer to commits in the output
-                cmd.from_ = self._find_interesting_from(cmd.from_)
-                cmd.merges = self._find_interesting_merges(cmd.merges)
-                self.interesting_commits.add(cmd.id)
-
-        # Keep track of the parents
-        if cmd.from_ and cmd.merges:
-            parents = [cmd.from_] + cmd.merges
-        elif cmd.from_:
-            parents = [cmd.from_]
-        else:
-            parents = None
-        self.parents[":" + cmd.mark] = parents
-
-    def reset_handler(self, cmd):
-        """Process a ResetCommand."""
-        if cmd.from_ is None:
-            # We pass through resets that init a branch because we have to
-            # assume the branch might be interesting.
-            self.keep = True
-        else:
-            # Keep resets if they indirectly reference something we kept
-            cmd.from_ = self._find_interesting_from(cmd.from_)
-            self.keep = cmd.from_ is not None
-
-    def tag_handler(self, cmd):
-        """Process a TagCommand."""
-        # Keep tags if they indirectly reference something we kept
-        cmd.from_ = self._find_interesting_from(cmd.from_)
-        self.keep = cmd.from_ is not None
-
-    def _print_command(self, cmd):
-        """Wrapper to avoid adding unnecessary blank lines."""
-        text = repr(cmd)
-        self.outf.write(text)
-        if not text.endswith("\n"):
-            self.outf.write("\n")
-
-    def _filter_filecommands(self, filecmd_iter):
-        """Return the filecommands filtered by includes & excludes.
-        
-        :return: a list of FileCommand objects
-        """
-        if self.includes is None and self.excludes is None:
-            return list(filecmd_iter())
-
-        # Do the filtering, adjusting for the new_root
-        result = []
-        for fc in filecmd_iter():
-            if (isinstance(fc, commands.FileModifyCommand) or
-                isinstance(fc, commands.FileDeleteCommand)):
-                if self._path_to_be_kept(fc.path):
-                    fc.path = self._adjust_for_new_root(fc.path)
-                else:
-                    continue
-            elif isinstance(fc, commands.FileDeleteAllCommand):
-                pass
-            elif isinstance(fc, commands.FileRenameCommand):
-                fc = self._convert_rename(fc)
-            elif isinstance(fc, commands.FileCopyCommand):
-                fc = self._convert_copy(fc)
-            else:
-                warning("cannot handle FileCommands of class %s - ignoring",
-                        fc.__class__)
-                continue
-            if fc is not None:
-                result.append(fc)
-        return result
-
-    def _path_to_be_kept(self, path):
-        """Does the given path pass the filtering criteria?"""
-        if self.excludes and (path in self.excludes
-                or osutils.is_inside_any(self.excludes, path)):
-            return False
-        if self.includes:
-            return (path in self.includes
-                or osutils.is_inside_any(self.includes, path))
-        return True
-
-    def _adjust_for_new_root(self, path):
-        """Adjust a path given the new root directory of the output."""
-        if self.new_root is None:
-            return path
-        elif path.startswith(self.new_root):
-            return path[len(self.new_root):]
-        else:
-            return path
-
-    def _find_interesting_parent(self, commit_ref):
-        while True:
-            if commit_ref in self.interesting_commits:
-                return commit_ref
-            parents = self.parents.get(commit_ref)
-            if not parents:
-                return None
-            commit_ref = parents[0]
-
-    def _find_interesting_from(self, commit_ref):
-        if commit_ref is None:
-            return None
-        return self._find_interesting_parent(commit_ref)
-
-    def _find_interesting_merges(self, commit_refs):
-        if commit_refs is None:
-            return None
-        merges = []
-        for commit_ref in commit_refs:
-            parent = self._find_interesting_parent(commit_ref)
-            if parent is not None:
-                merges.append(parent)
-        if merges:
-            return merges
-        else:
-            return None
-
-    def _convert_rename(self, fc):
-        """Convert a FileRenameCommand into a new FileCommand.
-        
-        :return: None if the rename is being ignored, otherwise a
-          new FileCommand based on the whether the old and new paths
-          are inside or outside of the interesting locations.
-          """
-        old = fc.old_path
-        new = fc.new_path
-        keep_old = self._path_to_be_kept(old)
-        keep_new = self._path_to_be_kept(new)
-        if keep_old and keep_new:
-            fc.old_path = self._adjust_for_new_root(old)
-            fc.new_path = self._adjust_for_new_root(new)
-            return fc
-        elif keep_old:
-            # The file has been renamed to a non-interesting location.
-            # Delete it!
-            old = self._adjust_for_new_root(old)
-            return commands.FileDeleteCommand(old)
-        elif keep_new:
-            # The file has been renamed into an interesting location
-            # We really ought to add it but we don't currently buffer
-            # the contents of all previous files and probably never want
-            # to. Maybe fast-import-info needs to be extended to
-            # remember all renames and a config file can be passed
-            # into here ala fast-import?
-            warning("cannot turn rename of %s into an add of %s yet" %
-                (old, new))
-        return None
-
-    def _convert_copy(self, fc):
-        """Convert a FileCopyCommand into a new FileCommand.
-        
-        :return: None if the copy is being ignored, otherwise a
-          new FileCommand based on the whether the source and destination
-          paths are inside or outside of the interesting locations.
-          """
-        src = fc.src_path
-        dest = fc.dest_path
-        keep_src = self._path_to_be_kept(src)
-        keep_dest = self._path_to_be_kept(dest)
-        if keep_src and keep_dest:
-            fc.src_path = self._adjust_for_new_root(src)
-            fc.dest_path = self._adjust_for_new_root(dest)
-            return fc
-        elif keep_src:
-            # The file has been copied to a non-interesting location.
-            # Ignore it!
-            return None
-        elif keep_dest:
-            # The file has been copied into an interesting location
-            # We really ought to add it but we don't currently buffer
-            # the contents of all previous files and probably never want
-            # to. Maybe fast-import-info needs to be extended to
-            # remember all copies and a config file can be passed
-            # into here ala fast-import?
-            warning("cannot turn copy of %s into an add of %s yet" %
-                (src, dest))
-        return None
diff --git a/processors/generic_processor.py b/processors/generic_processor.py
index 3f23c8b..43c933b 100644
--- a/processors/generic_processor.py
+++ b/processors/generic_processor.py
@@ -19,25 +19,34 @@
 
 import time
 from bzrlib import (
-    bzrdir,
+    debug,
     delta,
     errors,
     osutils,
     progress,
     )
 from bzrlib.repofmt import pack_repo
-from bzrlib.trace import note, mutter
-import bzrlib.util.configobj.configobj as configobj
+from bzrlib.trace import (
+    mutter,
+    note,
+    warning,
+    )
+try:
+    import bzrlib.util.configobj.configobj as configobj
+except ImportError:
+    import configobj
 from bzrlib.plugins.fastimport import (
     branch_updater,
-    bzr_commit_handler,
     cache_manager,
+    marks_file,
+    revision_store,
+    )
+from fastimport import (
+    commands,
     errors as plugin_errors,
     helpers,
     idmapfile,
-    marks_file,
     processor,
-    revision_store,
     )
 
 
@@ -51,8 +60,8 @@ _DEFAULT_AUTO_CHECKPOINT = 10000
 _DEFAULT_AUTO_PACK = 4
 
 # How many inventories to cache
-_DEFAULT_INV_CACHE_SIZE = 10
-_DEFAULT_CHK_INV_CACHE_SIZE = 100
+_DEFAULT_INV_CACHE_SIZE = 1
+_DEFAULT_CHK_INV_CACHE_SIZE = 1
 
 
 class GenericProcessor(processor.ImportProcessor):
@@ -95,7 +104,7 @@ class GenericProcessor(processor.ImportProcessor):
     * autopack - pack every n checkpoints. The default is 4.
 
     * inv-cache - number of inventories to cache.
-      If not set, the default is 100 for CHK formats and 10 otherwise.
+      If not set, the default is 1.
 
     * mode - import algorithm to use: default, experimental or classic.
 
@@ -118,13 +127,27 @@ class GenericProcessor(processor.ImportProcessor):
 
     def __init__(self, bzrdir, params=None, verbose=False, outf=None,
             prune_empty_dirs=True):
-        processor.ImportProcessor.__init__(self, bzrdir, params, verbose)
+        processor.ImportProcessor.__init__(self, params, verbose)
         self.prune_empty_dirs = prune_empty_dirs
+        self.bzrdir = bzrdir
+        try:
+            # Might be inside a branch
+            (self.working_tree, self.branch) = bzrdir._get_tree_branch()
+            self.repo = self.branch.repository
+        except errors.NotBranchError:
+            # Must be inside a repository
+            self.working_tree = None
+            self.branch = None
+            self.repo = bzrdir.open_repository()
 
     def pre_process(self):
-        self.note("Starting import ...")
         self._start_time = time.time()
         self._load_info_and_params()
+        if self.total_commits:
+            self.note("Starting import of %d commits ..." %
+                (self.total_commits,))
+        else:
+            self.note("Starting import ...")
         self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose,
             self.inventory_cache_size)
         
@@ -174,6 +197,7 @@ class GenericProcessor(processor.ImportProcessor):
         self.repo.start_write_group()
 
     def _load_info_and_params(self):
+        from bzrlib.plugins.fastimport import bzr_commit_handler
         self._mode = bool(self.params.get('mode', 'default'))
         self._experimental = self._mode == 'experimental'
 
@@ -269,6 +293,31 @@ class GenericProcessor(processor.ImportProcessor):
                 self.repo, self.inventory_cache_size,
                 fulltext_when=fulltext_when)
 
+    def process(self, command_iter):
+        """Import data into Bazaar by processing a stream of commands.
+
+        :param command_iter: an iterator providing commands
+        """
+        if self.working_tree is not None:
+            self.working_tree.lock_write()
+        elif self.branch is not None:
+            self.branch.lock_write()
+        elif self.repo is not None:
+            self.repo.lock_write()
+        try:
+            super(GenericProcessor, self)._process(command_iter)
+        finally:
+            # If an unhandled exception occurred, abort the write group
+            if self.repo is not None and self.repo.is_in_write_group():
+                self.repo.abort_write_group()
+            # Release the locks
+            if self.working_tree is not None:
+                self.working_tree.unlock()
+            elif self.branch is not None:
+                self.branch.unlock()
+            elif self.repo is not None:
+                self.repo.unlock()
+
     def _process(self, command_iter):
         # if anything goes wrong, abort the write group if any
         try:
@@ -287,15 +336,16 @@ class GenericProcessor(processor.ImportProcessor):
             marks_file.export_marks(self.params.get("export-marks"),
                 self.cache_mgr.revision_ids)
 
-        if self.cache_mgr.last_ref == None:
+        if self.cache_mgr.reftracker.last_ref == None:
             """Nothing to refresh"""
             return
 
         # Update the branches
         self.note("Updating branch information ...")
         updater = branch_updater.BranchUpdater(self.repo, self.branch,
-            self.cache_mgr, helpers.invert_dictset(self.cache_mgr.heads),
-            self.cache_mgr.last_ref, self.tags)
+            self.cache_mgr, helpers.invert_dictset(
+                self.cache_mgr.reftracker.heads),
+            self.cache_mgr.reftracker.last_ref, self.tags)
         branches_updated, branches_lost = updater.update()
         self._branch_count = len(branches_updated)
 
@@ -460,19 +510,19 @@ class GenericProcessor(processor.ImportProcessor):
     def commit_handler(self, cmd):
         """Process a CommitCommand."""
         if self.skip_total and self._revision_count < self.skip_total:
-            self.cache_mgr.track_heads(cmd)
+            self.cache_mgr.reftracker.track_heads(cmd)
             # Check that we really do know about this commit-id
             if not self.cache_mgr.revision_ids.has_key(cmd.id):
                 raise plugin_errors.BadRestart(cmd.id)
-            # Consume the file commands and free any non-sticky blobs
-            for fc in cmd.file_iter():
-                pass
             self.cache_mgr._blobs = {}
             self._revision_count += 1
+            if cmd.ref.startswith('refs/tags/'):
+                tag_name = cmd.ref[len('refs/tags/'):]
+                self._set_tag(tag_name, cmd.id)
             return
         if self.first_incremental_commit:
             self.first_incremental_commit = None
-            parents = self.cache_mgr.track_heads(cmd)
+            parents = self.cache_mgr.reftracker.track_heads(cmd)
 
         # 'Commit' the revision and report progress
         handler = self.commit_handler_factory(cmd, self.cache_mgr,
@@ -487,6 +537,10 @@ class GenericProcessor(processor.ImportProcessor):
         self._revision_count += 1
         self.report_progress("(%s)" % cmd.id)
 
+        if cmd.ref.startswith('refs/tags/'):
+            tag_name = cmd.ref[len('refs/tags/'):]
+            self._set_tag(tag_name, cmd.id)
+
         # Check if we should finish up or automatically checkpoint
         if (self.max_commits is not None and
             self._revision_count >= self.max_commits):
@@ -514,8 +568,10 @@ class GenericProcessor(processor.ImportProcessor):
 
     def progress_handler(self, cmd):
         """Process a ProgressCommand."""
-        # We could use a progress bar here instead
-        self.note("progress %s" % (cmd.message,))
+        # Most progress messages embedded in streams are annoying.
+        # Ignore them unless in verbose mode.
+        if self.verbose:
+            self.note("progress %s" % (cmd.message,))
 
     def reset_handler(self, cmd):
         """Process a ResetCommand."""
@@ -529,7 +585,7 @@ class GenericProcessor(processor.ImportProcessor):
             return
 
         if cmd.from_ is not None:
-            self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
+            self.cache_mgr.reftracker.track_heads_for_ref(cmd.ref, cmd.from_)
 
     def tag_handler(self, cmd):
         """Process a TagCommand."""
@@ -543,3 +599,25 @@ class GenericProcessor(processor.ImportProcessor):
         bzr_tag_name = name.decode('utf-8', 'replace')
         bzr_rev_id = self.cache_mgr.revision_ids[from_]
         self.tags[bzr_tag_name] = bzr_rev_id
+
+    def feature_handler(self, cmd):
+        """Process a FeatureCommand."""
+        feature = cmd.feature_name
+        if feature not in commands.FEATURE_NAMES:
+            raise plugin_errors.UnknownFeature(feature)
+
+    def debug(self, mgs, *args):
+        """Output a debug message if the appropriate -D option was given."""
+        if "fast-import" in debug.debug_flags:
+            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
+            mutter(msg, *args)
+
+    def note(self, msg, *args):
+        """Output a note but timestamp it."""
+        msg = "%s %s" % (self._time_of_day(), msg)
+        note(msg, *args)
+
+    def warning(self, msg, *args):
+        """Output a warning but timestamp it."""
+        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
+        warning(msg, *args)
diff --git a/processors/info_processor.py b/processors/info_processor.py
deleted file mode 100644
index e90418c..0000000
--- a/processors/info_processor.py
+++ /dev/null
@@ -1,281 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Import processor that dump stats about the input (and doesn't import)."""
-
-
-from bzrlib.trace import (
-    note,
-    warning,
-    )
-from bzrlib.plugins.fastimport import (
-    cache_manager,
-    commands,
-    helpers,
-    processor,
-    )
-
-
-class InfoProcessor(processor.ImportProcessor):
-    """An import processor that dumps statistics about the input.
-
-    No changes to the current repository are made.
-
-    As well as providing useful information about an import
-    stream before importing it, this processor is useful for
-    benchmarking the speed at which data can be extracted from
-    the source.
-    """
-
-    def __init__(self, target=None, params=None, verbose=0, outf=None):
-        # Allow creation without a target
-        processor.ImportProcessor.__init__(self, target, params, verbose,
-            outf=outf)
-
-    def pre_process(self):
-        self.note("Collecting statistics ...")
-        # Init statistics
-        self.cmd_counts = {}
-        for cmd in commands.COMMAND_NAMES:
-            self.cmd_counts[cmd] = 0
-        self.file_cmd_counts = {}
-        for fc in commands.FILE_COMMAND_NAMES:
-            self.file_cmd_counts[fc] = 0
-        self.parent_counts = {}
-        self.max_parent_count = 0
-        self.committers = set()
-        self.separate_authors_found = False
-        self.symlinks_found = False
-        self.executables_found = False
-        self.sha_blob_references = False
-        self.lightweight_tags = 0
-        # Blob usage tracking
-        self.blobs = {}
-        for usage in ['new', 'used', 'unknown', 'unmarked']:
-            self.blobs[usage] = set()
-        self.blob_ref_counts = {}
-        # Head tracking - delegate to the cache manager
-        self.cache_mgr = cache_manager.CacheManager(inventory_cache_size=0)
-        # Stuff to cache: a map from mark to # of times that mark is merged
-        self.merges = {}
-        # Stuff to cache: these are maps from mark to sets
-        self.rename_old_paths = {}
-        self.copy_source_paths = {}
-
-    def post_process(self):
-        # Dump statistics
-        cmd_names = commands.COMMAND_NAMES
-        fc_names = commands.FILE_COMMAND_NAMES
-        cmd_values = [self.cmd_counts[c] for c in cmd_names]
-        fc_values = [self.file_cmd_counts[c] for c in fc_names]
-        self._dump_stats_group("Command counts", cmd_names, cmd_values, str)
-        self._dump_stats_group("File command counts", fc_names, fc_values, str)
-
-        # Commit stats
-        if self.cmd_counts['commit']:
-            p_names = []
-            p_values = []
-            for i in xrange(0, self.max_parent_count + 1):
-                if i in self.parent_counts:
-                    count = self.parent_counts[i]
-                    p_names.append("parents-%d" % i)
-                    p_values.append(count)
-            merges_count = len(self.merges.keys())
-            p_names.append('total revisions merged')
-            p_values.append(merges_count)
-            flags = {
-                'separate authors found': self.separate_authors_found,
-                'executables': self.executables_found,
-                'symlinks': self.symlinks_found,
-                'blobs referenced by SHA': self.sha_blob_references,
-                }
-            self._dump_stats_group("Parent counts", p_names, p_values, str)
-            self._dump_stats_group("Commit analysis", flags.keys(),
-                flags.values(), _found)
-            heads = helpers.invert_dictset(self.cache_mgr.heads)
-            self._dump_stats_group("Head analysis", heads.keys(),
-                heads.values(), None, _iterable_as_config_list)
-            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
-            self._dump_stats_group("Merges", self.merges.keys(),
-                self.merges.values(), None)
-            # We only show the rename old path and copy source paths when -vv
-            # (verbose=2) is specified. The output here for mysql's data can't
-            # be parsed currently so this bit of code needs more work anyhow ..
-            if self.verbose >= 2:
-                self._dump_stats_group("Rename old paths",
-                    self.rename_old_paths.keys(),
-                    self.rename_old_paths.values(), len,
-                    _iterable_as_config_list)
-                self._dump_stats_group("Copy source paths",
-                    self.copy_source_paths.keys(),
-                    self.copy_source_paths.values(), len,
-                    _iterable_as_config_list)
-
-        # Blob stats
-        if self.cmd_counts['blob']:
-            # In verbose mode, don't list every blob used
-            if self.verbose:
-                del self.blobs['used']
-            self._dump_stats_group("Blob usage tracking", self.blobs.keys(),
-                self.blobs.values(), len, _iterable_as_config_list)
-        if self.blob_ref_counts:
-            blobs_by_count = helpers.invert_dict(self.blob_ref_counts)
-            self._dump_stats_group("Blob reference counts",
-                blobs_by_count.keys(),
-                blobs_by_count.values(), len, _iterable_as_config_list)
-
-        # Other stats
-        if self.cmd_counts['reset']:
-            reset_stats = {
-                'lightweight tags': self.lightweight_tags,
-                }
-            self._dump_stats_group("Reset analysis", reset_stats.keys(),
-                reset_stats.values())
-
-    def _dump_stats_group(self, title, names, values, normal_formatter=None,
-        verbose_formatter=None):
-        """Dump a statistics group.
-        
-        In verbose mode, do so as a config file so
-        that other processors can load the information if they want to.
-        :param normal_formatter: the callable to apply to the value
-          before displaying it in normal mode
-        :param verbose_formatter: the callable to apply to the value
-          before displaying it in verbose mode
-        """
-        if self.verbose:
-            self.outf.write("[%s]\n" % (title,))
-            for name, value in zip(names, values):
-                if verbose_formatter is not None:
-                    value = verbose_formatter(value)
-                if type(name) == str:
-                    name = name.replace(' ', '-')
-                self.outf.write("%s = %s\n" % (name, value))
-            self.outf.write("\n")
-        else:
-            self.outf.write("%s:\n" % (title,))
-            for name, value in zip(names, values):
-                if normal_formatter is not None:
-                    value = normal_formatter(value)
-                self.outf.write("\t%s\t%s\n" % (value, name))
-
-    def progress_handler(self, cmd):
-        """Process a ProgressCommand."""
-        self.cmd_counts[cmd.name] += 1
-
-    def blob_handler(self, cmd):
-        """Process a BlobCommand."""
-        self.cmd_counts[cmd.name] += 1
-        if cmd.mark is None:
-            self.blobs['unmarked'].add(cmd.id)
-        else:
-            self.blobs['new'].add(cmd.id)
-            # Marks can be re-used so remove it from used if already there.
-            # Note: we definitely do NOT want to remove it from multi if
-            # it's already in that set.
-            try:
-                self.blobs['used'].remove(cmd.id)
-            except KeyError:
-                pass
-
-    def checkpoint_handler(self, cmd):
-        """Process a CheckpointCommand."""
-        self.cmd_counts[cmd.name] += 1
-
-    def commit_handler(self, cmd):
-        """Process a CommitCommand."""
-        self.cmd_counts[cmd.name] += 1
-        self.committers.add(cmd.committer)
-        if cmd.author is not None:
-            self.separate_authors_found = True
-        for fc in cmd.file_iter():
-            self.file_cmd_counts[fc.name] += 1
-            if isinstance(fc, commands.FileModifyCommand):
-                if fc.is_executable:
-                    self.executables_found = True
-                if fc.kind == commands.SYMLINK_KIND:
-                    self.symlinks_found = True
-                if fc.dataref is not None:
-                    if fc.dataref[0] == ':':
-                        self._track_blob(fc.dataref)
-                    else:
-                        self.sha_blob_references = True
-            elif isinstance(fc, commands.FileRenameCommand):
-                self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path)
-            elif isinstance(fc, commands.FileCopyCommand):
-                self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path)
-
-        # Track the heads
-        parents = self.cache_mgr.track_heads(cmd)
-
-        # Track the parent counts
-        parent_count = len(parents)
-        if self.parent_counts.has_key(parent_count):
-            self.parent_counts[parent_count] += 1
-        else:
-            self.parent_counts[parent_count] = 1
-            if parent_count > self.max_parent_count:
-                self.max_parent_count = parent_count
-
-        # Remember the merges
-        if cmd.merges:
-            #self.merges.setdefault(cmd.ref, set()).update(cmd.merges)
-            for merge in cmd.merges:
-                if merge in self.merges:
-                    self.merges[merge] += 1
-                else:
-                    self.merges[merge] = 1
-
-    def reset_handler(self, cmd):
-        """Process a ResetCommand."""
-        self.cmd_counts[cmd.name] += 1
-        if cmd.ref.startswith('refs/tags/'):
-            self.lightweight_tags += 1
-        else:
-            if cmd.from_ is not None:
-                self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
-
-    def tag_handler(self, cmd):
-        """Process a TagCommand."""
-        self.cmd_counts[cmd.name] += 1
-
-    def _track_blob(self, mark):
-        if mark in self.blob_ref_counts:
-            self.blob_ref_counts[mark] += 1
-            pass
-        elif mark in self.blobs['used']:
-            self.blob_ref_counts[mark] = 2
-            self.blobs['used'].remove(mark)
-        elif mark in self.blobs['new']:
-            self.blobs['used'].add(mark)
-            self.blobs['new'].remove(mark)
-        else:
-            self.blobs['unknown'].add(mark)
-
-def _found(b):
-    """Format a found boolean as a string."""
-    return ['no', 'found'][b]
-
-def _iterable_as_config_list(s):
-    """Format an iterable as a sequence of comma-separated strings.
-    
-    To match what ConfigObj expects, a single item list has a trailing comma.
-    """
-    items = sorted(s)
-    if len(items) == 1:
-        return "%s," % (items[0],)
-    else:
-        return ", ".join(items)
diff --git a/processors/query_processor.py b/processors/query_processor.py
deleted file mode 100644
index dfee745..0000000
--- a/processors/query_processor.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Import processor that queries the input (and doesn't import)."""
-
-
-from bzrlib.plugins.fastimport import (
-    commands,
-    processor,
-    )
-
-
-class QueryProcessor(processor.ImportProcessor):
-    """An import processor that queries the input.
-
-    No changes to the current repository are made.
-    """
-
-    known_params = commands.COMMAND_NAMES + commands.FILE_COMMAND_NAMES
-
-    def __init__(self, target=None, params=None, verbose=False):
-        # Allow creation without a target
-        processor.ImportProcessor.__init__(self, target, params, verbose)
-        self.parsed_params = {}
-        if params:
-            for name, value in params.iteritems():
-                if value == 1:
-                    # All fields
-                    fields = None
-                else:
-                    fields = value.split(',')
-                self.parsed_params[name] = fields
-
-    def pre_handler(self, cmd):
-        """Hook for logic before each handler starts."""
-        if self.parsed_params.has_key(cmd.name):
-            fields = self.parsed_params[cmd.name]
-            str = cmd.dump_str(fields, self.parsed_params, self.verbose)
-            print "%s" % (str,)
-
-    def progress_handler(self, cmd):
-        """Process a ProgressCommand."""
-        pass
-
-    def blob_handler(self, cmd):
-        """Process a BlobCommand."""
-        pass
-
-    def checkpoint_handler(self, cmd):
-        """Process a CheckpointCommand."""
-        pass
-
-    def commit_handler(self, cmd):
-        """Process a CommitCommand."""
-        for fc in cmd.file_iter():
-            pass
-
-    def reset_handler(self, cmd):
-        """Process a ResetCommand."""
-        pass
-
-    def tag_handler(self, cmd):
-        """Process a TagCommand."""
-        pass
diff --git a/revision_store.py b/revision_store.py
index d2ab2d3..4ec4ba3 100644
--- a/revision_store.py
+++ b/revision_store.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2008 Canonical Ltd
+# Copyright (C) 2008, 2009 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -16,9 +16,146 @@
 
 """An abstraction of a repository providing just the bits importing needs."""
 
+import cStringIO
 
-from bzrlib import errors, inventory, knit, lru_cache, osutils
-from bzrlib import revision as _mod_revision
+from bzrlib import (
+    errors,
+    graph as _mod_graph,
+    inventory,
+    knit,
+    lru_cache,
+    osutils,
+    revision as _mod_revision,
+    trace,
+    )
+
+
+class _TreeShim(object):
+    """Fake a Tree implementation.
+
+    This implements just enough of the tree api to make commit builder happy.
+    """
+
+    def __init__(self, repo, basis_inv, inv_delta, content_provider):
+        self._repo = repo
+        self._content_provider = content_provider
+        self._basis_inv = basis_inv
+        self._inv_delta = inv_delta
+        self._new_info_by_id = dict([(file_id, (new_path, ie))
+                                    for _, new_path, file_id, ie in inv_delta])
+
+    def id2path(self, file_id):
+        if file_id in self._new_info_by_id:
+            new_path = self._new_info_by_id[file_id][0]
+            if new_path is None:
+                raise errors.NoSuchId(self, file_id)
+            return new_path
+        return self._basis_inv.id2path(file_id)
+
+    def path2id(self, path):
+        # CommitBuilder currently only requires access to the root id. We don't
+        # build a map of renamed files, etc. One possibility if we ever *do*
+        # need more than just root, is to defer to basis_inv.path2id() and then
+        # check if the file_id is in our _new_info_by_id dict. And in that
+        # case, return _new_info_by_id[file_id][0]
+        if path != '':
+            raise NotImplementedError(_TreeShim.path2id)
+        # TODO: Handle root renames?
+        return self._basis_inv.root.file_id
+
+    def get_file_with_stat(self, file_id, path=None):
+        content = self.get_file_text(file_id, path)
+        sio = cStringIO.StringIO(content)
+        return sio, None
+
+    def get_file_text(self, file_id, path=None):
+        try:
+            return self._content_provider(file_id)
+        except KeyError:
+            # The content wasn't shown as 'new'. Just validate this fact
+            assert file_id not in self._new_info_by_id
+            old_ie = self._basis_inv[file_id]
+            old_text_key = (file_id, old_ie.revision)
+            stream = self._repo.texts.get_record_stream([old_text_key],
+                                                        'unordered', True)
+            return stream.next().get_bytes_as('fulltext')
+
+    def get_symlink_target(self, file_id):
+        if file_id in self._new_info_by_id:
+            ie = self._new_info_by_id[file_id][1]
+            return ie.symlink_target
+        return self._basis_inv[file_id].symlink_target
+
+    def get_reference_revision(self, file_id, path=None):
+        raise NotImplementedError(_TreeShim.get_reference_revision)
+
+    def _delta_to_iter_changes(self):
+        """Convert the inv_delta into an iter_changes repr."""
+        # iter_changes is:
+        #   (file_id,
+        #    (old_path, new_path),
+        #    content_changed,
+        #    (old_versioned, new_versioned),
+        #    (old_parent_id, new_parent_id),
+        #    (old_name, new_name),
+        #    (old_kind, new_kind),
+        #    (old_exec, new_exec),
+        #   )
+        basis_inv = self._basis_inv
+        for old_path, new_path, file_id, ie in self._inv_delta:
+            # Perf: Would this be faster if we did 'if file_id in basis_inv'?
+            # Since the *very* common case is that the file already exists, it
+            # probably is better to optimize for that
+            try:
+                old_ie = basis_inv[file_id]
+            except errors.NoSuchId:
+                old_ie = None
+                if ie is None:
+                    raise AssertionError('How is both old and new None?')
+                    change = (file_id,
+                        (old_path, new_path),
+                        False,
+                        (False, False),
+                        (None, None),
+                        (None, None),
+                        (None, None),
+                        (None, None),
+                        )
+                change = (file_id,
+                    (old_path, new_path),
+                    True,
+                    (False, True),
+                    (None, ie.parent_id),
+                    (None, ie.name),
+                    (None, ie.kind),
+                    (None, ie.executable),
+                    )
+            else:
+                if ie is None:
+                    change = (file_id,
+                        (old_path, new_path),
+                        True,
+                        (True, False),
+                        (old_ie.parent_id, None),
+                        (old_ie.name, None),
+                        (old_ie.kind, None),
+                        (old_ie.executable, None),
+                        )
+                else:
+                    content_modified = (ie.text_sha1 != old_ie.text_sha1
+                                        or ie.text_size != old_ie.text_size)
+                    # TODO: ie.kind != old_ie.kind
+                    # TODO: symlinks changing targets, content_modified?
+                    change = (file_id,
+                        (old_path, new_path),
+                        content_modified,
+                        (True, True),
+                        (old_ie.parent_id, ie.parent_id),
+                        (old_ie.name, ie.name),
+                        (old_ie.kind, ie.kind),
+                        (old_ie.executable, ie.executable),
+                        )
+            yield change
 
 
 class AbstractRevisionStore(object):
@@ -33,6 +170,8 @@ class AbstractRevisionStore(object):
         :param repository: the target repository
         """
         self.repo = repo
+        self._graph = None
+        self._use_known_graph = True
         self._supports_chks = getattr(repo._format, 'supports_chks', False)
 
     def expects_rich_root(self):
@@ -224,29 +363,66 @@ class AbstractRevisionStore(object):
                 including an empty inventory for the missing revisions
             If None, a default implementation is provided.
         """
-        # Get the non-ghost parents and their inventories
-        if inventories_provider is None:
-            inventories_provider = self._default_inventories_provider
-        present_parents, parent_invs = inventories_provider(rev.parent_ids)
-
-        # Load the inventory
-        try:
-            rev_id = rev.revision_id
-            rev.inventory_sha1, inv = self._add_inventory_by_delta(
-                rev_id, basis_inv, inv_delta, present_parents, parent_invs)
-        except errors.RevisionAlreadyPresent:
+        # TODO: set revision_id = rev.revision_id
+        builder = self.repo._commit_builder_class(self.repo,
+            parents=rev.parent_ids, config=None, timestamp=rev.timestamp,
+            timezone=rev.timezone, committer=rev.committer,
+            revprops=rev.properties, revision_id=rev.revision_id)
+        if self._graph is None and self._use_known_graph:
+            if (getattr(_mod_graph, 'GraphThunkIdsToKeys', None) and
+                getattr(_mod_graph.GraphThunkIdsToKeys, "add_node", None) and
+                getattr(self.repo, "get_known_graph_ancestry", None)):
+                self._graph = self.repo.get_known_graph_ancestry(
+                    rev.parent_ids)
+            else:
+                self._use_known_graph = False
+        if self._graph is not None:
+            orig_heads = builder._heads
+            def thunked_heads(file_id, revision_ids):
+                # self._graph thinks in terms of keys, not ids, so translate
+                # them
+                # old_res = orig_heads(file_id, revision_ids)
+                if len(revision_ids) < 2:
+                    res = set(revision_ids)
+                else:
+                    res = set(self._graph.heads(revision_ids))
+                # if old_res != res:
+                #     import pdb; pdb.set_trace()
+                return res
+            builder._heads = thunked_heads
+
+        if rev.parent_ids:
+            basis_rev_id = rev.parent_ids[0]
+        else:
+            basis_rev_id = _mod_revision.NULL_REVISION
+        tree = _TreeShim(self.repo, basis_inv, inv_delta, text_provider)
+        changes = tree._delta_to_iter_changes()
+        for (file_id, path, fs_hash) in builder.record_iter_changes(
+                tree, basis_rev_id, changes):
+            # So far, we don't *do* anything with the result
             pass
+        builder.finish_inventory()
+        # TODO: This is working around a bug in the bzrlib code base.
+        # 'builder.finish_inventory()' ends up doing:
+        # self.inv_sha1 = self.repository.add_inventory_by_delta(...)
+        # However, add_inventory_by_delta returns (sha1, inv)
+        # And we *want* to keep a handle on both of those objects
+        if isinstance(builder.inv_sha1, tuple):
+            builder.inv_sha1, builder.new_inventory = builder.inv_sha1
+        # This is a duplicate of Builder.commit() since we already have the
+        # Revision object, and we *don't* want to call commit_write_group()
+        rev.inv_sha1 = builder.inv_sha1
+        builder.repository.add_revision(builder._new_revision_id, rev,
+            builder.new_inventory, builder._config)
+        if self._graph is not None:
+            # TODO: Use StaticTuple and .intern() for these things
+            self._graph.add_node(builder._new_revision_id, rev.parent_ids)
 
-        # Load the texts, signature and revision
-        file_rev_ids_needing_texts = [(id, ie.revision)
-            for _, n, id, ie in inv_delta
-            if n is not None and ie.revision == rev_id]
-        self._load_texts_for_file_rev_ids(file_rev_ids_needing_texts,
-            text_provider, parents_provider)
         if signature is not None:
-            self.repo.add_signature_text(rev_id, signature)
-        self._add_revision(rev, inv)
-        return inv
+            raise AssertionError('signatures not guaranteed yet')
+            self.repo.add_signature_text(rev.revision_id, signature)
+        # self._add_revision(rev, inv)
+        return builder.revision_tree().inventory
 
     def _non_root_entries_iter(self, inv, revision_id):
         if hasattr(inv, 'iter_non_root_entries'):
@@ -305,14 +481,19 @@ class AbstractRevisionStore(object):
         """
         if len(parents):
             if self._supports_chks:
-                validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
-                    inv_delta, revision_id, parents, basis_inv=basis_inv,
-                    propagate_caches=False)
+                try:
+                    validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
+                        inv_delta, revision_id, parents, basis_inv=basis_inv,
+                        propagate_caches=False)
+                except errors.InconsistentDelta:
+                    #print "BASIS INV IS\n%s\n" % "\n".join([str(i) for i in basis_inv.iter_entries_by_dir()])
+                    trace.mutter("INCONSISTENT DELTA IS:\n%s\n" % "\n".join([str(i) for i in inv_delta]))
+                    raise
             else:
                 validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
                     inv_delta, revision_id, parents)
         else:
-            if hasattr(basis_inv, 'create_by_apply_delta'):
+            if isinstance(basis_inv, inventory.CHKInventory):
                 new_inv = basis_inv.create_by_apply_delta(inv_delta, revision_id)
             else:
                 new_inv = inventory.Inventory(revision_id=revision_id)
diff --git a/setup.py b/setup.py
index f4d5d0c..23a7a93 100755
--- a/setup.py
+++ b/setup.py
@@ -3,12 +3,12 @@ from distutils.core import setup
 
 bzr_plugin_name = 'fastimport'
 
-bzr_plugin_version = (0, 9, 0, 'dev', 0)
+bzr_plugin_version = (0, 10, 0, 'dev', 0)
 bzr_minimum_version = (1, 1, 0)
 bzr_maximum_version = None
 
 if __name__ == '__main__':
-    setup(name="fastimport",
+    setup(name="bzr-fastimport",
           version="0.9.0dev0",
           description="stream-based import into and export from Bazaar.",
           author="Canonical Ltd",
@@ -17,6 +17,7 @@ if __name__ == '__main__':
           url="https://launchpad.net/bzr-fastimport",
           scripts=[],
           packages=['bzrlib.plugins.fastimport',
+                    'bzrlib.plugins.fastimport.exporters',
                     'bzrlib.plugins.fastimport.processors',
                     'bzrlib.plugins.fastimport.tests',
                     ],
diff --git a/tests/__init__.py b/tests/__init__.py
index 711b605..47441e6 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -17,19 +17,35 @@
 """Tests for bzr-fastimport."""
 
 
-from bzrlib.tests.TestUtil import TestLoader, TestSuite
+from bzrlib import errors as bzr_errors
+from bzrlib.tests import Feature, TestLoader
+from bzrlib.plugins.fastimport import load_fastimport
+
+
+class _FastimportFeature(Feature):
+
+    def _probe(self):
+        try:
+            load_fastimport()
+        except bzr_errors.DependencyNotPresent:
+            return False
+        return True
+
+    def feature_name(self):
+        return 'fastimport'
+
+
+FastimportFeature = _FastimportFeature()
+
 
 
 def test_suite():
-    module_names = [
-        'bzrlib.plugins.fastimport.tests.test_branch_mapper',
-        'bzrlib.plugins.fastimport.tests.test_commands',
-        'bzrlib.plugins.fastimport.tests.test_errors',
-        'bzrlib.plugins.fastimport.tests.test_filter_processor',
-        'bzrlib.plugins.fastimport.tests.test_generic_processor',
-        'bzrlib.plugins.fastimport.tests.test_head_tracking',
-        'bzrlib.plugins.fastimport.tests.test_helpers',
-        'bzrlib.plugins.fastimport.tests.test_parser',
-        ]
+    module_names = [__name__ + '.' + x for x in [
+        'test_commands',
+        'test_exporter',
+        'test_branch_mapper',
+        'test_generic_processor',
+        'test_revision_store',
+        ]]
     loader = TestLoader()
     return loader.loadTestsFromModuleNames(module_names)
diff --git a/tests/test_branch_mapper.py b/tests/test_branch_mapper.py
index fe1b533..6d6f170 100644
--- a/tests/test_branch_mapper.py
+++ b/tests/test_branch_mapper.py
@@ -22,47 +22,49 @@ from bzrlib.plugins.fastimport import (
     branch_mapper,
     )
 
+from bzrlib.plugins.fastimport.tests import (
+    FastimportFeature,
+    )
+
 
 class TestBranchMapper(tests.TestCase):
 
+    _test_needs_features = [FastimportFeature]
+
     def test_git_to_bzr(self):
         m = branch_mapper.BranchMapper()
-        git_refs = [
-            'refs/heads/master',
-            'refs/heads/foo',
-            'refs/tags/master',
-            'refs/tags/foo',
-            'refs/remotes/origin/master',
-            'refs/remotes/origin/foo',
-            ]
-        git_to_bzr_map = m.git_to_bzr(git_refs)
-        self.assertEqual(git_to_bzr_map, {
+        for git, bzr in {
             'refs/heads/master':                'trunk',
             'refs/heads/foo':                   'foo',
             'refs/tags/master':                 'trunk.tag',
             'refs/tags/foo':                    'foo.tag',
             'refs/remotes/origin/master':       'trunk.remote',
             'refs/remotes/origin/foo':          'foo.remote',
-            })
+            }.items():
+            self.assertEqual(m.git_to_bzr(git), bzr)
+
+    def test_git_to_bzr_with_slashes(self):
+        m = branch_mapper.BranchMapper()
+        for git, bzr in {
+            'refs/heads/master/slave':              'master/slave',
+            'refs/heads/foo/bar':                   'foo/bar',
+            'refs/tags/master/slave':               'master/slave.tag',
+            'refs/tags/foo/bar':                    'foo/bar.tag',
+            'refs/remotes/origin/master/slave':     'master/slave.remote',
+            'refs/remotes/origin/foo/bar':          'foo/bar.remote',
+            }.items():
+            self.assertEqual(m.git_to_bzr(git), bzr)
 
     def test_git_to_bzr_for_trunk(self):
         # As 'master' in git is mapped to trunk in bzr, we need to handle
         # 'trunk' in git in a sensible way.
         m = branch_mapper.BranchMapper()
-        git_refs = [
-            'refs/heads/trunk',
-            'refs/tags/trunk',
-            'refs/remotes/origin/trunk',
-            'refs/heads/git-trunk',
-            'refs/tags/git-trunk',
-            'refs/remotes/origin/git-trunk',
-            ]
-        git_to_bzr_map = m.git_to_bzr(git_refs)
-        self.assertEqual(git_to_bzr_map, {
+        for git, bzr in {
             'refs/heads/trunk':             'git-trunk',
             'refs/tags/trunk':              'git-trunk.tag',
             'refs/remotes/origin/trunk':    'git-trunk.remote',
             'refs/heads/git-trunk':         'git-git-trunk',
             'refs/tags/git-trunk':          'git-git-trunk.tag',
             'refs/remotes/origin/git-trunk':'git-git-trunk.remote',
-            })
+            }.items():
+            self.assertEqual(m.git_to_bzr(git), bzr)
diff --git a/tests/test_commands.py b/tests/test_commands.py
index 5eb9418..81a43c8 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2009 Canonical Ltd
+# Copyright (C) 2010 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -14,268 +14,45 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
-"""Test how Commands are displayed"""
+"""Test the command implementations."""
+
+import os
+import tempfile
+import gzip
 
 from bzrlib import tests
 
-from bzrlib.plugins.fastimport import (
-    commands,
+from bzrlib.plugins.fastimport.cmds import (
+    _get_source_stream,
     )
 
-
-class TestBlobDisplay(tests.TestCase):
-
-    def test_blob(self):
-        c = commands.BlobCommand("1", "hello world")
-        self.assertEqual("blob\nmark :1\ndata 11\nhello world", repr(c))
-
-    def test_blob_no_mark(self):
-        c = commands.BlobCommand(None, "hello world")
-        self.assertEqual("blob\ndata 11\nhello world", repr(c))
-
-
-class TestCheckpointDisplay(tests.TestCase):
-
-    def test_checkpoint(self):
-        c = commands.CheckpointCommand()
-        self.assertEqual("checkpoint", repr(c))
-
-
-class TestCommitDisplay(tests.TestCase):
-
-    def test_commit(self):
-        # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
-        committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
-        c = commands.CommitCommand("refs/heads/master", "bbb", None, committer,
-            "release v1.0", ":aaa", None, None)
-        self.assertEqualDiff(
-            "commit refs/heads/master\n"
-            "mark :bbb\n"
-            "committer Joe Wong <joe@example.com> 1234567890 -0600\n"
-            "data 12\n"
-            "release v1.0\n"
-            "from :aaa",
-            repr(c))
-
-    def test_commit_unicode_committer(self):
-        # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
-        name = u'\u013d\xf3r\xe9m \xcdp\u0161\xfam'
-        name_utf8 = name.encode('utf8')
-        committer = (name, 'test@example.com', 1234567890, -6 * 3600)
-        c = commands.CommitCommand("refs/heads/master", "bbb", None, committer,
-            "release v1.0", ":aaa", None, None)
-        self.assertEqualDiff(
-            "commit refs/heads/master\n"
-            "mark :bbb\n"
-            "committer %s <test@example.com> 1234567890 -0600\n"
-            "data 12\n"
-            "release v1.0\n"
-            "from :aaa" % (name_utf8,),
-            repr(c))
-
-    def test_commit_no_mark(self):
-        # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
-        committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
-        c = commands.CommitCommand("refs/heads/master", None, None, committer,
-            "release v1.0", ":aaa", None, None)
-        self.assertEqualDiff(
-            "commit refs/heads/master\n"
-            "committer Joe Wong <joe@example.com> 1234567890 -0600\n"
-            "data 12\n"
-            "release v1.0\n"
-            "from :aaa",
-            repr(c))
-
-    def test_commit_no_from(self):
-        # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
-        committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
-        c = commands.CommitCommand("refs/heads/master", "bbb", None, committer,
-            "release v1.0", None, None, None)
-        self.assertEqualDiff(
-            "commit refs/heads/master\n"
-            "mark :bbb\n"
-            "committer Joe Wong <joe@example.com> 1234567890 -0600\n"
-            "data 12\n"
-            "release v1.0",
-            repr(c))
-
-    def test_commit_with_author(self):
-        # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
-        author = ('Sue Wong', 'sue@example.com', 1234565432, -6 * 3600)
-        committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
-        c = commands.CommitCommand("refs/heads/master", "bbb", author,
-            committer, "release v1.0", ":aaa", None, None)
-        self.assertEqualDiff(
-            "commit refs/heads/master\n"
-            "mark :bbb\n"
-            "author Sue Wong <sue@example.com> 1234565432 -0600\n"
-            "committer Joe Wong <joe@example.com> 1234567890 -0600\n"
-            "data 12\n"
-            "release v1.0\n"
-            "from :aaa",
-            repr(c))
-
-    def test_commit_with_merges(self):
-        # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
-        committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
-        c = commands.CommitCommand("refs/heads/master", "ddd", None, committer,
-                "release v1.0", ":aaa", [':bbb', ':ccc'], None)
-        self.assertEqualDiff(
-            "commit refs/heads/master\n"
-            "mark :ddd\n"
-            "committer Joe Wong <joe@example.com> 1234567890 -0600\n"
-            "data 12\n"
-            "release v1.0\n"
-            "from :aaa\n"
-            "merge :bbb\n"
-            "merge :ccc",
-            repr(c))
-
-    def test_commit_with_filecommands(self):
-        file_cmds = iter([
-            commands.FileDeleteCommand('readme.txt'),
-            commands.FileModifyCommand('NEWS', 'file', False, None,
-                'blah blah blah'),
-            ])
-        # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
-        committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
-        c = commands.CommitCommand("refs/heads/master", "bbb", None, committer,
-            "release v1.0", ":aaa", None, file_cmds)
-        self.assertEqualDiff(
-            "commit refs/heads/master\n"
-            "mark :bbb\n"
-            "committer Joe Wong <joe@example.com> 1234567890 -0600\n"
-            "data 12\n"
-            "release v1.0\n"
-            "from :aaa\n"
-            "D readme.txt\n"
-            "M 644 inline NEWS\n"
-            "data 14\n"
-            "blah blah blah",
-            repr(c))
-
-
-class TestProgressDisplay(tests.TestCase):
-
-    def test_progress(self):
-        c = commands.ProgressCommand("doing foo")
-        self.assertEqual("progress doing foo", repr(c))
-
-
-class TestResetDisplay(tests.TestCase):
-
-    def test_reset(self):
-        c = commands.ResetCommand("refs/tags/v1.0", ":xxx")
-        self.assertEqual("reset refs/tags/v1.0\nfrom :xxx\n", repr(c))
-
-    def test_reset_no_from(self):
-        c = commands.ResetCommand("refs/remotes/origin/master", None)
-        self.assertEqual("reset refs/remotes/origin/master", repr(c))
-
-
-class TestTagDisplay(tests.TestCase):
-
-    def test_tag(self):
-        # tagger tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
-        tagger = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
-        c = commands.TagCommand("refs/tags/v1.0", ":xxx", tagger, "create v1.0")
-        self.assertEqual(
-            "tag refs/tags/v1.0\n"
-            "from :xxx\n"
-            "tagger Joe Wong <joe@example.com> 1234567890 -0600\n"
-            "data 11\n"
-            "create v1.0",
-            repr(c))
-
-    def test_tag_no_from(self):
-        tagger = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
-        c = commands.TagCommand("refs/tags/v1.0", None, tagger, "create v1.0")
-        self.assertEqualDiff(
-            "tag refs/tags/v1.0\n"
-            "tagger Joe Wong <joe@example.com> 1234567890 -0600\n"
-            "data 11\n"
-            "create v1.0",
-            repr(c))
-
-
-class TestFileModifyDisplay(tests.TestCase):
-
-    def test_filemodify_file(self):
-        c = commands.FileModifyCommand("foo/bar", "file", False, ":23", None)
-        self.assertEqual("M 644 :23 foo/bar", repr(c))
-
-    def test_filemodify_file_executable(self):
-        c = commands.FileModifyCommand("foo/bar", "file", True, ":23", None)
-        self.assertEqual("M 755 :23 foo/bar", repr(c))
-
-    def test_filemodify_file_internal(self):
-        c = commands.FileModifyCommand("foo/bar", "file", False, None,
-            "hello world")
-        self.assertEqual("M 644 inline foo/bar\ndata 11\nhello world", repr(c))
-
-    def test_filemodify_symlink(self):
-        c = commands.FileModifyCommand("foo/bar", "symlink", False, None, "baz")
-        self.assertEqual("M 120000 inline foo/bar\ndata 3\nbaz", repr(c))
-
-
-class TestFileDeleteDisplay(tests.TestCase):
-
-    def test_filedelete(self):
-        c = commands.FileDeleteCommand("foo/bar")
-        self.assertEqual("D foo/bar", repr(c))
-
-
-class TestFileCopyDisplay(tests.TestCase):
-
-    def test_filecopy(self):
-        c = commands.FileCopyCommand("foo/bar", "foo/baz")
-        self.assertEqual("C foo/bar foo/baz", repr(c))
-
-    def test_filecopy_quoted(self):
-        # Check the first path is quoted if it contains spaces
-        c = commands.FileCopyCommand("foo/b a r", "foo/b a z")
-        self.assertEqual('C "foo/b a r" foo/b a z', repr(c))
-
-
-class TestFileRenameDisplay(tests.TestCase):
-
-    def test_filerename(self):
-        c = commands.FileRenameCommand("foo/bar", "foo/baz")
-        self.assertEqual("R foo/bar foo/baz", repr(c))
-
-    def test_filerename_quoted(self):
-        # Check the first path is quoted if it contains spaces
-        c = commands.FileRenameCommand("foo/b a r", "foo/b a z")
-        self.assertEqual('R "foo/b a r" foo/b a z', repr(c))
-
-
-class TestFileDeleteAllDisplay(tests.TestCase):
-
-    def test_filedeleteall(self):
-        c = commands.FileDeleteAllCommand()
-        self.assertEqual("deleteall", repr(c))
+from bzrlib.plugins.fastimport.tests import (
+    FastimportFeature,
+    )
 
 
-class TestPathChecking(tests.TestCase):
+class TestSourceStream(tests.TestCase):
 
-    def test_filemodify_path_checking(self):
-        self.assertRaises(ValueError, commands.FileModifyCommand, "",
-            "file", False, None, "text")
-        self.assertRaises(ValueError, commands.FileModifyCommand, None,
-            "file", False, None, "text")
+    _test_needs_features = [FastimportFeature]
 
-    def test_filedelete_path_checking(self):
-        self.assertRaises(ValueError, commands.FileDeleteCommand, "")
-        self.assertRaises(ValueError, commands.FileDeleteCommand, None)
+    def test_get_source_stream_stdin(self):
+        # - returns standard in
+        self.assertIsNot(None, _get_source_stream("-"))
 
-    def test_filerename_path_checking(self):
-        self.assertRaises(ValueError, commands.FileRenameCommand, "", "foo")
-        self.assertRaises(ValueError, commands.FileRenameCommand, None, "foo")
-        self.assertRaises(ValueError, commands.FileRenameCommand, "foo", "")
-        self.assertRaises(ValueError, commands.FileRenameCommand, "foo", None)
+    def test_get_source_gz(self):
+        # files ending in .gz are automatically decompressed.
+        fd, filename = tempfile.mkstemp(suffix=".gz")
+        f = gzip.GzipFile(fileobj=os.fdopen(fd, "w"), mode='w')
+        f.write("bla")
+        f.close()
+        stream = _get_source_stream(filename)
+        self.assertIsNot("bla", stream.read())
 
-    def test_filecopy_path_checking(self):
-        self.assertRaises(ValueError, commands.FileCopyCommand, "", "foo")
-        self.assertRaises(ValueError, commands.FileCopyCommand, None, "foo")
-        self.assertRaises(ValueError, commands.FileCopyCommand, "foo", "")
-        self.assertRaises(ValueError, commands.FileCopyCommand, "foo", None)
+    def test_get_source_file(self):
+        # other files are opened as regular files.
+        fd, filename = tempfile.mkstemp()
+        f = os.fdopen(fd, 'w')
+        f.write("bla")
+        f.close()
+        stream = _get_source_stream(filename)
+        self.assertIsNot("bla", stream.read())
diff --git a/tests/test_errors.py b/tests/test_errors.py
deleted file mode 100644
index ac63b29..0000000
--- a/tests/test_errors.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Test the Import errors"""
-
-from bzrlib import tests
-
-from bzrlib.plugins.fastimport import (
-    errors,
-    )
-
-
-class TestErrors(tests.TestCase):
-
-    def test_MissingBytes(self):
-        e = errors.MissingBytes(99, 10, 8)
-        self.assertEqual("line 99: Unexpected EOF - expected 10 bytes, found 8",
-            str(e))
-
-    def test_MissingTerminator(self):
-        e = errors.MissingTerminator(99, '---')
-        self.assertEqual("line 99: Unexpected EOF - expected '---' terminator",
-            str(e))
-
-    def test_InvalidCommand(self):
-        e = errors.InvalidCommand(99, 'foo')
-        self.assertEqual("line 99: Invalid command 'foo'",
-            str(e))
-
-    def test_MissingSection(self):
-        e = errors.MissingSection(99, 'foo', 'bar')
-        self.assertEqual("line 99: Command foo is missing section bar",
-            str(e))
-
-    def test_BadFormat(self):
-        e = errors.BadFormat(99, 'foo', 'bar', 'xyz')
-        self.assertEqual("line 99: Bad format for section bar in "
-            "command foo: found 'xyz'",
-            str(e))
-
-    def test_InvalidTimezone(self):
-        e = errors.InvalidTimezone(99, 'aa:bb')
-        self.assertEqual('aa:bb', e.timezone)
-        self.assertEqual('', e.reason)
-        self.assertEqual("line 99: Timezone 'aa:bb' could not be converted.",
-            str(e))
-        e = errors.InvalidTimezone(99, 'aa:bb', 'Non-numeric hours')
-        self.assertEqual('aa:bb', e.timezone)
-        self.assertEqual(' Non-numeric hours', e.reason)
-        self.assertEqual("line 99: Timezone 'aa:bb' could not be converted."
-             " Non-numeric hours",
-             str(e))
-
-    def test_UnknownDateFormat(self):
-        e = errors.UnknownDateFormat('aaa')
-        self.assertEqual("Unknown date format 'aaa'", str(e))
-
-    def test_MissingHandler(self):
-        e = errors.MissingHandler('foo')
-        self.assertEqual("Missing handler for command foo", str(e))
diff --git a/tests/test_exporter.py b/tests/test_exporter.py
new file mode 100644
index 0000000..fe50e3b
--- /dev/null
+++ b/tests/test_exporter.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Test the exporter."""
+
+import os
+import tempfile
+import gzip
+
+from bzrlib import tests
+
+from bzrlib.plugins.fastimport.exporter import (
+    _get_output_stream,
+    )
+
+from bzrlib.plugins.fastimport.tests import (
+    FastimportFeature,
+    )
+
+
+class TestOutputStream(tests.TestCase):
+
+    _test_needs_features = [FastimportFeature]
+
+    def test_get_output_stream_stdout(self):
+        # - returns standard out
+        self.assertIsNot(None, _get_output_stream("-"))
+
+    def test_get_source_gz(self):
+        fd, filename = tempfile.mkstemp(suffix=".gz")
+        os.close(fd)
+        stream = _get_output_stream(filename)
+        stream.write("bla")
+        stream.close()
+        # files ending in .gz are automatically decompressed.
+        f = gzip.GzipFile(filename)
+        self.assertEquals("bla", f.read())
+        f.close()
+
+    def test_get_source_file(self):
+        # other files are opened as regular files.
+        fd, filename = tempfile.mkstemp()
+        os.close(fd)
+        stream = _get_output_stream(filename)
+        stream.write("foo")
+        stream.close()
+        f = open(filename, 'r')
+        self.assertEquals("foo", f.read())
+        f.close()
diff --git a/tests/test_filter_processor.py b/tests/test_filter_processor.py
deleted file mode 100644
index ff8a09f..0000000
--- a/tests/test_filter_processor.py
+++ /dev/null
@@ -1,877 +0,0 @@
-# Copyright (C) 2009 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Test FilterProcessor"""
-
-from cStringIO import StringIO
-
-from bzrlib import tests
-
-from bzrlib.plugins.fastimport import (
-    parser,
-    )
-from bzrlib.plugins.fastimport.processors.filter_processor import (
-    FilterProcessor,
-    )
-
-
-# A sample input stream containing all (top level) import commands
-_SAMPLE_ALL = \
-"""blob
-mark :1
-data 4
-foo
-commit refs/heads/master
-mark :2
-committer Joe <joe@example.com> 1234567890 +1000
-data 14
-Initial import
-M 644 :1 COPYING
-checkpoint
-progress first import done
-reset refs/remote/origin/master
-from :2
-tag v0.1
-from :2
-tagger Joe <joe@example.com> 1234567890 +1000
-data 12
-release v0.1
-"""
-
-
-# A sample input stream creating the following tree:
-#
-#  NEWS
-#  doc/README.txt
-#  doc/index.txt
-_SAMPLE_WITH_DIR = \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/master
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :2 NEWS
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :101
-M 644 :3 doc/README.txt
-M 644 :4 doc/index.txt
-"""
-
-
-class TestCaseWithFiltering(tests.TestCase):
-
-    def assertFiltering(self, input, params, expected):
-        outf = StringIO()
-        proc = FilterProcessor(None, params=params)
-        proc.outf = outf
-        s = StringIO(input)
-        p = parser.ImportParser(s)
-        proc.process(p.iter_commands)
-        out = outf.getvalue()
-        self.assertEqualDiff(expected, out)
-
-
-class TestNoFiltering(TestCaseWithFiltering):
-
-    def test_params_not_given(self):
-        self.assertFiltering(_SAMPLE_ALL, None, _SAMPLE_ALL)
-
-    def test_params_are_none(self):
-        params = {'include_paths': None, 'exclude_paths': None}
-        self.assertFiltering(_SAMPLE_ALL, params, _SAMPLE_ALL)
-
-
-class TestIncludePaths(TestCaseWithFiltering):
-
-    def test_file_in_root(self):
-        # Things to note:
-        # * only referenced blobs are retained
-        # * from clause is dropped from the first command
-        params = {'include_paths': ['NEWS']}
-        self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/master
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-M 644 :2 NEWS
-""")
-
-    def test_file_in_subdir(self):
-        #  Additional things to note:
-        # * new root: path is now index.txt, not doc/index.txt
-        # * other files changed in matching commits are excluded
-        params = {'include_paths': ['doc/index.txt']}
-        self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-M 644 :4 index.txt
-""")
-
-    def test_file_with_changes(self):
-        #  Additional things to note:
-        # * from updated to reference parents in the output
-        params = {'include_paths': ['doc/README.txt']}
-        self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-""")
-
-    def test_subdir(self):
-        params = {'include_paths': ['doc/']}
-        self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-""")
-
-    def test_multiple_files_in_subdir(self):
-        # The new root should be the subdrectory
-        params = {'include_paths': ['doc/README.txt', 'doc/index.txt']}
-        self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-""")
-
-
-class TestExcludePaths(TestCaseWithFiltering):
-
-    def test_file_in_root(self):
-        params = {'exclude_paths': ['NEWS']}
-        self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 doc/README.txt
-M 644 :4 doc/index.txt
-""")
-
-    def test_file_in_subdir(self):
-        params = {'exclude_paths': ['doc/README.txt']}
-        self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/master
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-M 644 :2 NEWS
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :101
-M 644 :4 doc/index.txt
-""")
-
-    def test_subdir(self):
-        params = {'exclude_paths': ['doc/']}
-        self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/master
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-M 644 :2 NEWS
-""")
-
-    def test_multple_files(self):
-        params = {'exclude_paths': ['doc/index.txt', 'NEWS']}
-        self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 doc/README.txt
-""")
-
-
-class TestIncludeAndExcludePaths(TestCaseWithFiltering):
-
-    def test_included_dir_and_excluded_file(self):
-        params = {'include_paths': ['doc/'], 'exclude_paths': ['doc/index.txt']}
-        self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-""")
-
-
-# A sample input stream creating the following tree:
-#
-#  NEWS
-#  doc/README.txt
-#  doc/index.txt
-#
-# It then renames doc/README.txt => doc/README
-_SAMPLE_WITH_RENAME_INSIDE = _SAMPLE_WITH_DIR + \
-"""commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-R doc/README.txt doc/README
-"""
-
-# A sample input stream creating the following tree:
-#
-#  NEWS
-#  doc/README.txt
-#  doc/index.txt
-#
-# It then renames doc/README.txt => README
-_SAMPLE_WITH_RENAME_TO_OUTSIDE = _SAMPLE_WITH_DIR + \
-"""commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-R doc/README.txt README
-"""
-
-# A sample input stream creating the following tree:
-#
-#  NEWS
-#  doc/README.txt
-#  doc/index.txt
-#
-# It then renames NEWS => doc/NEWS
-_SAMPLE_WITH_RENAME_TO_INSIDE = _SAMPLE_WITH_DIR + \
-"""commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-R NEWS doc/NEWS
-"""
-
-class TestIncludePathsWithRenames(TestCaseWithFiltering):
-
-    def test_rename_all_inside(self):
-        # These rename commands ought to be kept but adjusted for the new root
-        params = {'include_paths': ['doc/']}
-        self.assertFiltering(_SAMPLE_WITH_RENAME_INSIDE, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-R README.txt README
-""")
-
-    def test_rename_to_outside(self):
-        # These rename commands become deletes
-        params = {'include_paths': ['doc/']}
-        self.assertFiltering(_SAMPLE_WITH_RENAME_TO_OUTSIDE, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-D README.txt
-""")
-
-    def test_rename_to_inside(self):
-        # This ought to create a new file but doesn't yet
-        params = {'include_paths': ['doc/']}
-        self.assertFiltering(_SAMPLE_WITH_RENAME_TO_INSIDE, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-""")
-
-
-# A sample input stream creating the following tree:
-#
-#  NEWS
-#  doc/README.txt
-#  doc/index.txt
-#
-# It then copies doc/README.txt => doc/README
-_SAMPLE_WITH_COPY_INSIDE = _SAMPLE_WITH_DIR + \
-"""commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-C doc/README.txt doc/README
-"""
-
-# A sample input stream creating the following tree:
-#
-#  NEWS
-#  doc/README.txt
-#  doc/index.txt
-#
-# It then copies doc/README.txt => README
-_SAMPLE_WITH_COPY_TO_OUTSIDE = _SAMPLE_WITH_DIR + \
-"""commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-C doc/README.txt README
-"""
-
-# A sample input stream creating the following tree:
-#
-#  NEWS
-#  doc/README.txt
-#  doc/index.txt
-#
-# It then copies NEWS => doc/NEWS
-_SAMPLE_WITH_COPY_TO_INSIDE = _SAMPLE_WITH_DIR + \
-"""commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-C NEWS doc/NEWS
-"""
-
-
-class TestIncludePathsWithCopies(TestCaseWithFiltering):
-
-    def test_copy_all_inside(self):
-        # These copy commands ought to be kept but adjusted for the new root
-        params = {'include_paths': ['doc/']}
-        self.assertFiltering(_SAMPLE_WITH_COPY_INSIDE, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-C README.txt README
-""")
-
-    def test_copy_to_outside(self):
-        # This can be ignored
-        params = {'include_paths': ['doc/']}
-        self.assertFiltering(_SAMPLE_WITH_COPY_TO_OUTSIDE, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-""")
-
-    def test_copy_to_inside(self):
-        # This ought to create a new file but doesn't yet
-        params = {'include_paths': ['doc/']}
-        self.assertFiltering(_SAMPLE_WITH_COPY_TO_INSIDE, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-""")
-
-
-# A sample input stream with deleteall's creating the following tree:
-#
-#  NEWS
-#  doc/README.txt
-#  doc/index.txt
-_SAMPLE_WITH_DELETEALL = \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-deleteall
-M 644 :1 doc/README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-deleteall
-M 644 :3 doc/README.txt
-M 644 :4 doc/index.txt
-"""
-
-
-class TestIncludePathsWithDeleteAll(TestCaseWithFiltering):
-
-    def test_deleteall(self):
-        params = {'include_paths': ['doc/index.txt']}
-        self.assertFiltering(_SAMPLE_WITH_DELETEALL, params, \
-"""blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-deleteall
-M 644 :4 index.txt
-""")
-
-
-_SAMPLE_WITH_TAGS = _SAMPLE_WITH_DIR + \
-"""tag v0.1
-from :100
-tagger d <b@c> 1234798653 +0000
-data 12
-release v0.1
-tag v0.2
-from :102
-tagger d <b@c> 1234798653 +0000
-data 12
-release v0.2
-"""
-
-class TestIncludePathsWithTags(TestCaseWithFiltering):
-
-    def test_tag_retention(self):
-        # If a tag references a commit with a parent we kept,
-        # keep the tag but adjust 'from' accordingly.
-        # Otherwise, delete the tag command.
-        params = {'include_paths': ['NEWS']}
-        self.assertFiltering(_SAMPLE_WITH_TAGS, params, \
-"""blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/master
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-M 644 :2 NEWS
-tag v0.2
-from :101
-tagger d <b@c> 1234798653 +0000
-data 12
-release v0.2
-""")
-
-
-_SAMPLE_WITH_RESETS = _SAMPLE_WITH_DIR + \
-"""reset refs/heads/foo
-reset refs/heads/bar
-from :102
-"""
-
-class TestIncludePathsWithResets(TestCaseWithFiltering):
-
-    def test_reset_retention(self):
-        # Resets init'ing a branch (without a from) are passed through.
-        # If a reset references a commit with a parent we kept,
-        # keep the reset but adjust 'from' accordingly.
-        params = {'include_paths': ['NEWS']}
-        self.assertFiltering(_SAMPLE_WITH_RESETS, params, \
-"""blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/master
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-M 644 :2 NEWS
-reset refs/heads/foo
-reset refs/heads/bar
-from :101
-""")
diff --git a/tests/test_generic_processor.py b/tests/test_generic_processor.py
index d4f789b..41f846e 100644
--- a/tests/test_generic_processor.py
+++ b/tests/test_generic_processor.py
@@ -17,25 +17,47 @@
 import time
 
 from bzrlib import (
-    branch,
     tests,
     )
-
-from bzrlib.plugins.fastimport import (
-    commands,
-    errors,
+from bzrlib.plugins.fastimport.helpers import (
+    kind_to_mode,
     )
-
-from bzrlib.plugins.fastimport.processors import (
-    generic_processor,
+from bzrlib.plugins.fastimport.tests import (
+    FastimportFeature,
     )
 
+try:
+    from fastimport import commands
+except ImportError:
+    commands = object()
+
+
+def load_tests(standard_tests, module, loader):
+    """Parameterize tests for all versions of groupcompress."""
+    scenarios = [
+        ('pack-0.92', {'branch_format': 'pack-0.92'}),
+        ('1.9-rich-root', {'branch_format': '1.9-rich-root'}),
+    ]
+    try:
+        from bzrlib.repofmt.groupcompress_repo import RepositoryFormat2a
+        scenarios.append(('2a', {'branch_format': '2a'}))
+    except ImportError:
+        pass
+    suite = loader.suiteClass()
+    result = tests.multiply_tests(standard_tests, scenarios, suite)
+    return result
+
 
 class TestCaseForGenericProcessor(tests.TestCaseWithTransport):
 
+    _test_needs_features = [FastimportFeature]
+
     branch_format = "pack-0.92"
 
     def get_handler(self):
+        from bzrlib.plugins.fastimport.processors import (
+            generic_processor,
+            )
         branch = self.make_branch('.', format=self.branch_format)
         handler = generic_processor.GenericProcessor(branch.bzrdir)
         return handler, branch
@@ -176,23 +198,24 @@ class TestImportToPackModify(TestCaseForGenericProcessor):
 
     def file_command_iter(self, path, kind='file', content='aaa',
         executable=False, to_kind=None, to_content='bbb', to_executable=None):
+
         # Revno 1: create a file or symlink
         # Revno 2: modify it
         if to_kind is None:
             to_kind = kind
         if to_executable is None:
             to_executable = executable
+        mode = kind_to_mode(kind, executable)
+        to_mode = kind_to_mode(to_kind, to_executable)
         def command_list():
             author = ['', 'bugs@a.com', time.time(), time.timezone]
             committer = ['', 'elmer@a.com', time.time(), time.timezone]
             def files_one():
-                yield commands.FileModifyCommand(path, kind, executable,
-                        None, content)
+                yield commands.FileModifyCommand(path, mode, None, content)
             yield commands.CommitCommand('head', '1', author,
                 committer, "commit 1", None, [], files_one)
             def files_two():
-                yield commands.FileModifyCommand(path, to_kind, to_executable,
-                        None, to_content)
+                yield commands.FileModifyCommand(path, to_mode, None, to_content)
             yield commands.CommitCommand('head', '2', author,
                 committer, "commit 2", ":1", [], files_two)
         return command_list
@@ -292,9 +315,46 @@ class TestImportToPackModify(TestCaseForGenericProcessor):
         self.assertExecutable(branch, revtree2, path, False)
 
 
+class TestImportToPackModifyTwice(TestCaseForGenericProcessor):
+    """This tests when the same file is modified twice in the one commit.
+
+    Note: hg-fast-export produces data like this on occasions.
+    """
+
+    def file_command_iter(self, path, kind='file', content='aaa',
+        executable=False, to_kind=None, to_content='bbb', to_executable=None):
+
+        # Revno 1: create a file twice
+        if to_kind is None:
+            to_kind = kind
+        if to_executable is None:
+            to_executable = executable
+        def command_list():
+            author = ['', 'bugs@a.com', time.time(), time.timezone]
+            committer = ['', 'elmer@a.com', time.time(), time.timezone]
+            def files_one():
+                yield commands.FileModifyCommand(path, kind_to_mode(kind, executable),
+                        None, content)
+                yield commands.FileModifyCommand(path, kind_to_mode(to_kind, to_executable),
+                        None, to_content)
+            yield commands.CommitCommand('head', '1', author,
+                committer, "commit 1", None, [], files_one)
+        return command_list
+
+    def test_modify_file_twice_in_root(self):
+        handler, branch = self.get_handler()
+        path = 'a'
+        handler.process(self.file_command_iter(path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(path,)])
+        self.assertContent(branch, revtree1, path, "aaa")
+        self.assertRevisionRoot(revtree1, path)
+
+
 class TestImportToPackModifyTricky(TestCaseForGenericProcessor):
 
     def file_command_iter(self, path1, path2, kind='file'):
+
         # Revno 1: create a file or symlink in a directory
         # Revno 2: create a second file that implicitly deletes the
         # first one because either:
@@ -304,12 +364,12 @@ class TestImportToPackModifyTricky(TestCaseForGenericProcessor):
             author = ['', 'bugs@a.com', time.time(), time.timezone]
             committer = ['', 'elmer@a.com', time.time(), time.timezone]
             def files_one():
-                yield commands.FileModifyCommand(path1, kind, False,
+                yield commands.FileModifyCommand(path1, kind_to_mode(kind, False),
                         None, "aaa")
             yield commands.CommitCommand('head', '1', author,
                 committer, "commit 1", None, [], files_one)
             def files_two():
-                yield commands.FileModifyCommand(path2, kind, False,
+                yield commands.FileModifyCommand(path2, kind_to_mode(kind, False),
                         None, "bbb")
             yield commands.CommitCommand('head', '2', author,
                 committer, "commit 2", ":1", [], files_two)
@@ -372,13 +432,14 @@ class TestImportToPackModifyTricky(TestCaseForGenericProcessor):
 class TestImportToPackDelete(TestCaseForGenericProcessor):
 
     def file_command_iter(self, path, kind='file'):
+
         # Revno 1: create a file or symlink
         # Revno 2: delete it
         def command_list():
             author = ['', 'bugs@a.com', time.time(), time.timezone]
             committer = ['', 'elmer@a.com', time.time(), time.timezone]
             def files_one():
-                yield commands.FileModifyCommand(path, kind, False,
+                yield commands.FileModifyCommand(path, kind_to_mode(kind, False),
                         None, "aaa")
             yield commands.CommitCommand('head', '1', author,
                 committer, "commit 1", None, [], files_one)
@@ -439,9 +500,211 @@ class TestImportToPackDelete(TestCaseForGenericProcessor):
         self.assertContent(branch, revtree1, path, "aaa")
 
 
+class TestImportToPackDeleteNew(TestCaseForGenericProcessor):
+    """Test deletion of a newly added file."""
+
+    def file_command_iter(self, path, kind='file'):
+
+        # Revno 1: create a file or symlink then delete it
+        def command_list():
+            author = ['', 'bugs@a.com', time.time(), time.timezone]
+            committer = ['', 'elmer@a.com', time.time(), time.timezone]
+            def files_one():
+                yield commands.FileModifyCommand(path, kind_to_mode(kind, False),
+                        None, "aaa")
+                yield commands.FileDeleteCommand(path)
+            yield commands.CommitCommand('head', '1', author,
+                committer, "commit 1", None, [], files_one)
+        return command_list
+
+    def test_delete_new_file_in_root(self):
+        handler, branch = self.get_handler()
+        path = 'a'
+        handler.process(self.file_command_iter(path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,)
+
+    def test_delete_new_file_in_subdir(self):
+        handler, branch = self.get_handler()
+        path = 'a/a'
+        handler.process(self.file_command_iter(path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,)
+
+    def test_delete_new_symlink_in_root(self):
+        handler, branch = self.get_handler()
+        path = 'a'
+        handler.process(self.file_command_iter(path, kind='symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,)
+
+    def test_delete_new_symlink_in_subdir(self):
+        handler, branch = self.get_handler()
+        path = 'a/a'
+        handler.process(self.file_command_iter(path, kind='symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,)
+
+    def test_delete_new_file_in_deep_subdir(self):
+        handler, branch = self.get_handler()
+        path = 'a/b/c/d'
+        handler.process(self.file_command_iter(path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,)
+
+
+class TestImportToPackDeleteMultiLevel(TestCaseForGenericProcessor):
+
+    def file_command_iter(self, paths, paths_to_delete):
+
+        # Revno 1: create multiple files
+        # Revno 2: delete multiple files
+        def command_list():
+            author = ['', 'bugs@a.com', time.time(), time.timezone]
+            committer = ['', 'elmer@a.com', time.time(), time.timezone]
+            def files_one():
+                for i, path in enumerate(paths):
+                    yield commands.FileModifyCommand(path, kind_to_mode('file', False),
+                            None, "aaa%d" % i)
+            yield commands.CommitCommand('head', '1', author,
+                committer, "commit 1", None, [], files_one)
+            def files_two():
+                for path in paths_to_delete:
+                    yield commands.FileDeleteCommand(path)
+            yield commands.CommitCommand('head', '2', author,
+                committer, "commit 2", ":1", [], files_two)
+        return command_list
+
+    def test_delete_files_in_multiple_levels(self):
+        handler, branch = self.get_handler()
+        paths = ['a/b/c', 'a/b/d/e']
+        paths_to_delete = ['a/b/c', 'a/b/d/e']
+        handler.process(self.file_command_iter(paths, paths_to_delete))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[
+                ('a',), ('a/b',), ('a/b/c',),
+                ('a/b/d',), ('a/b/d/e',),
+                ])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[
+                ('a',), ('a/b',), ('a/b/c',),
+                ('a/b/d',), ('a/b/d/e',),
+                ])
+
+    def test_delete_file_single_level(self):
+        handler, branch = self.get_handler()
+        paths = ['a/b/c', 'a/b/d/e']
+        paths_to_delete = ['a/b/d/e']
+        handler.process(self.file_command_iter(paths, paths_to_delete))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[
+                ('a',), ('a/b',), ('a/b/c',),
+                ('a/b/d',), ('a/b/d/e',),
+                ])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[
+                ('a/b/d',), ('a/b/d/e',),
+                ])
+
+    def test_delete_file_complex_level(self):
+        handler, branch = self.get_handler()
+        paths = ['a/b/c', 'a/b/d/e', 'a/f/g', 'a/h', 'a/b/d/i/j']
+        paths_to_delete = ['a/b/c', 'a/b/d/e', 'a/f/g', 'a/b/d/i/j']
+        handler.process(self.file_command_iter(paths, paths_to_delete))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[
+                ('a',), ('a/b',), ('a/b/c',),
+                ('a/b/d',), ('a/b/d/e',),
+                ('a/f',), ('a/f/g',),
+                ('a/h',),
+                ('a/b/d/i',), ('a/b/d/i/j',),
+                ])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[
+                ('a/b',), ('a/b/c',),
+                ('a/b/d',), ('a/b/d/e',),
+                ('a/f',), ('a/f/g',),
+                ('a/b/d/i',), ('a/b/d/i/j',),
+                ])
+
+class TestImportToPackDeleteThenAdd(TestCaseForGenericProcessor):
+    """Test delete followed by an add. Merges can cause this."""
+
+    def file_command_iter(self, path, kind='file', content='aaa',
+        executable=False, to_kind=None, to_content='bbb', to_executable=None):
+
+        # Revno 1: create a file or symlink
+        # Revno 2: delete it and add it
+        if to_kind is None:
+            to_kind = kind
+        if to_executable is None:
+            to_executable = executable
+        def command_list():
+            author = ['', 'bugs@a.com', time.time(), time.timezone]
+            committer = ['', 'elmer@a.com', time.time(), time.timezone]
+            def files_one():
+                yield commands.FileModifyCommand(path, kind_to_mode(kind, executable),
+                        None, content)
+            yield commands.CommitCommand('head', '1', author,
+                committer, "commit 1", None, [], files_one)
+            def files_two():
+                yield commands.FileDeleteCommand(path)
+                yield commands.FileModifyCommand(path, kind_to_mode(to_kind, to_executable),
+                        None, to_content)
+            yield commands.CommitCommand('head', '2', author,
+                committer, "commit 2", ":1", [], files_two)
+        return command_list
+
+    def test_delete_then_add_file_in_root(self):
+        handler, branch = self.get_handler()
+        path = 'a'
+        handler.process(self.file_command_iter(path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(path,)])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(path,)],
+            expected_added=[(path,)])
+        self.assertContent(branch, revtree1, path, "aaa")
+        self.assertContent(branch, revtree2, path, "bbb")
+        self.assertRevisionRoot(revtree1, path)
+        self.assertRevisionRoot(revtree2, path)
+
+    def test_delete_then_add_file_in_subdir(self):
+        handler, branch = self.get_handler()
+        path = 'a/a'
+        handler.process(self.file_command_iter(path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('a',), (path,)])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(path,)],
+            expected_added=[(path,)])
+        self.assertContent(branch, revtree1, path, "aaa")
+        self.assertContent(branch, revtree2, path, "bbb")
+
+    def test_delete_then_add_symlink_in_root(self):
+        handler, branch = self.get_handler()
+        path = 'a'
+        handler.process(self.file_command_iter(path, kind='symlink'))
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(path,)],
+            expected_added=[(path,)])
+        self.assertSymlinkTarget(branch, revtree1, path, "aaa")
+        self.assertSymlinkTarget(branch, revtree2, path, "bbb")
+        self.assertRevisionRoot(revtree1, path)
+        self.assertRevisionRoot(revtree2, path)
+
+    def test_delete_then_add_symlink_in_subdir(self):
+        handler, branch = self.get_handler()
+        path = 'a/a'
+        handler.process(self.file_command_iter(path, kind='symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('a',), (path,)])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(path,)],
+            expected_added=[(path,)])
+        self.assertSymlinkTarget(branch, revtree1, path, "aaa")
+        self.assertSymlinkTarget(branch, revtree2, path, "bbb")
+
+
 class TestImportToPackDeleteDirectory(TestCaseForGenericProcessor):
 
     def file_command_iter(self, paths, dir):
+
         # Revno 1: create multiple files
         # Revno 2: delete a directory holding those files
         def command_list():
@@ -449,7 +712,7 @@ class TestImportToPackDeleteDirectory(TestCaseForGenericProcessor):
             committer = ['', 'elmer@a.com', time.time(), time.timezone]
             def files_one():
                 for i, path in enumerate(paths):
-                    yield commands.FileModifyCommand(path, 'file', False,
+                    yield commands.FileModifyCommand(path, kind_to_mode('file', False),
                             None, "aaa%d" % i)
             yield commands.CommitCommand('head', '1', author,
                 committer, "commit 1", None, [], files_one)
@@ -479,16 +742,68 @@ class TestImportToPackDeleteDirectory(TestCaseForGenericProcessor):
                 ])
 
 
+class TestImportToPackDeleteDirectoryThenAddFile(TestCaseForGenericProcessor):
+    """Test deleting a directory then adding a file in the same commit."""
+
+    def file_command_iter(self, paths, dir, new_path, kind='file'):
+
+        # Revno 1: create files in a directory
+        # Revno 2: delete the directory then add a file into it
+        def command_list():
+            author = ['', 'bugs@a.com', time.time(), time.timezone]
+            committer = ['', 'elmer@a.com', time.time(), time.timezone]
+            def files_one():
+                for i, path in enumerate(paths):
+                    yield commands.FileModifyCommand(path, kind_to_mode(kind, False),
+                            None, "aaa%d" % i)
+            yield commands.CommitCommand('head', '1', author,
+                committer, "commit 1", None, [], files_one)
+            def files_two():
+                yield commands.FileDeleteCommand(dir)
+                yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False),
+                        None, "bbb")
+            yield commands.CommitCommand('head', '2', author,
+                committer, "commit 2", ":1", [], files_two)
+        return command_list
+
+    def test_delete_dir_then_add_file(self):
+        handler, branch = self.get_handler()
+        paths = ['a/b/c', 'a/b/d']
+        dir = 'a/b'
+        new_path = 'a/b/z'
+        handler.process(self.file_command_iter(paths, dir, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('a',), ('a/b',), ('a/b/c',), ('a/b/d',),])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[('a/b',), ('a/b/c',), ('a/b/d',)],
+            expected_added=[('a/b',), ('a/b/z',)])
+        self.assertContent(branch, revtree2, new_path, "bbb")
+
+    def test_delete_dir_then_add_symlink(self):
+        handler, branch = self.get_handler()
+        paths = ['a/b/c', 'a/b/d']
+        dir = 'a/b'
+        new_path = 'a/b/z'
+        handler.process(self.file_command_iter(paths, dir, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('a',), ('a/b',), ('a/b/c',), ('a/b/d',),])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[('a/b',), ('a/b/c',), ('a/b/d',)],
+            expected_added=[('a/b',), ('a/b/z',)])
+        self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+
+
 class TestImportToPackRename(TestCaseForGenericProcessor):
 
-    def get_command_iter(self, old_path, new_path):
+    def get_command_iter(self, old_path, new_path, kind='file'):
+
         # Revno 1: create a file or symlink
         # Revno 2: rename it
         def command_list():
             author = ['', 'bugs@a.com', time.time(), time.timezone]
             committer = ['', 'elmer@a.com', time.time(), time.timezone]
             def files_one():
-                yield commands.FileModifyCommand(old_path, 'file', False,
+                yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False),
                         None, "aaa")
             yield commands.CommitCommand('head', '1', author,
                 committer, "commit 1", None, [], files_one)
@@ -498,7 +813,7 @@ class TestImportToPackRename(TestCaseForGenericProcessor):
                 committer, "commit 2", ":1", [], files_two)
         return command_list
 
-    def test_rename_in_root(self):
+    def test_rename_file_in_root(self):
         handler, branch = self.get_handler()
         old_path = 'a'
         new_path = 'b'
@@ -508,14 +823,31 @@ class TestImportToPackRename(TestCaseForGenericProcessor):
         self.assertRevisionRoot(revtree1, old_path)
         self.assertRevisionRoot(revtree2, new_path)
 
-    def test_rename_in_subdir(self):
+    def test_rename_symlink_in_root(self):
+        handler, branch = self.get_handler()
+        old_path = 'a'
+        new_path = 'b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_renamed=[(old_path, new_path)])
+        self.assertRevisionRoot(revtree1, old_path)
+        self.assertRevisionRoot(revtree2, new_path)
+
+    def test_rename_file_in_subdir(self):
         handler, branch = self.get_handler()
         old_path = 'a/a'
         new_path = 'a/b'
         handler.process(self.get_command_iter(old_path, new_path))
         self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)])
 
-    def test_move_to_new_dir(self):
+    def test_rename_symlink_in_subdir(self):
+        handler, branch = self.get_handler()
+        old_path = 'a/a'
+        new_path = 'a/b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)])
+
+    def test_rename_file_to_new_dir(self):
         handler, branch = self.get_handler()
         old_path = 'a/a'
         new_path = 'b/a'
@@ -525,10 +857,547 @@ class TestImportToPackRename(TestCaseForGenericProcessor):
             expected_added=[('b',)],
             expected_removed=[('a',)])
 
+    def test_rename_symlink_to_new_dir(self):
+        handler, branch = self.get_handler()
+        old_path = 'a/a'
+        new_path = 'b/a'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        self.assertChanges(branch, 2,
+            expected_renamed=[(old_path, new_path)],
+            expected_added=[('b',)],
+            expected_removed=[('a',)])
+
+
+class TestImportToPackRenameNew(TestCaseForGenericProcessor):
+    """Test rename of a newly added file."""
+
+    def get_command_iter(self, old_path, new_path, kind='file'):
+
+        # Revno 1: create a file and rename it
+        def command_list():
+            author = ['', 'bugs@a.com', time.time(), time.timezone]
+            committer = ['', 'elmer@a.com', time.time(), time.timezone]
+            def files_one():
+                yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False),
+                        None, "aaa")
+                yield commands.FileRenameCommand(old_path, new_path)
+            yield commands.CommitCommand('head', '1', author,
+                committer, "commit 1", None, [], files_one)
+        return command_list
+
+    def test_rename_new_file_in_root(self):
+        handler, branch = self.get_handler()
+        old_path = 'a'
+        new_path = 'b'
+        handler.process(self.get_command_iter(old_path, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(new_path,)])
+        self.assertRevisionRoot(revtree1, new_path)
+
+    def test_rename_new_symlink_in_root(self):
+        handler, branch = self.get_handler()
+        old_path = 'a'
+        new_path = 'b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(new_path,)])
+        self.assertRevisionRoot(revtree1, new_path)
+
+    def test_rename_new_file_in_subdir(self):
+        handler, branch = self.get_handler()
+        old_path = 'a/a'
+        new_path = 'a/b'
+        handler.process(self.get_command_iter(old_path, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('a',), (new_path,)])
+
+    def test_rename_new_symlink_in_subdir(self):
+        handler, branch = self.get_handler()
+        old_path = 'a/a'
+        new_path = 'a/b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('a',), (new_path,)])
+
+
+class TestImportToPackRenameToDeleted(TestCaseForGenericProcessor):
+    """Test rename to a destination path deleted in this commit."""
+
+    def get_command_iter(self, old_path, new_path, kind='file'):
+
+        # Revno 1: create two files
+        # Revno 2: delete one, rename the other one to that path
+        def command_list():
+            author = ['', 'bugs@a.com', time.time(), time.timezone]
+            committer = ['', 'elmer@a.com', time.time(), time.timezone]
+            def files_one():
+                yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False),
+                        None, "aaa")
+                yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False),
+                        None, "bbb")
+            yield commands.CommitCommand('head', '1', author,
+                committer, "commit 1", None, [], files_one)
+            def files_two():
+                yield commands.FileDeleteCommand(new_path)
+                yield commands.FileRenameCommand(old_path, new_path)
+            yield commands.CommitCommand('head', '2', author,
+                committer, "commit 2", ":1", [], files_two)
+        return command_list
+
+    def test_rename_to_deleted_file_in_root(self):
+        handler, branch = self.get_handler()
+        old_path = 'a'
+        new_path = 'b'
+        handler.process(self.get_command_iter(old_path, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(old_path,), (new_path,)])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(new_path,)],
+            expected_renamed=[(old_path, new_path)])
+        self.assertContent(branch, revtree1, old_path, "aaa")
+        self.assertContent(branch, revtree1, new_path, "bbb")
+        self.assertContent(branch, revtree2, new_path, "aaa")
+        self.assertRevisionRoot(revtree1, old_path)
+        self.assertRevisionRoot(revtree1, new_path)
+
+    def test_rename_to_deleted_symlink_in_root(self):
+        handler, branch = self.get_handler()
+        old_path = 'a'
+        new_path = 'b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(old_path,), (new_path,)])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(new_path,)],
+            expected_renamed=[(old_path, new_path)])
+        self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree1, new_path, "bbb")
+        self.assertSymlinkTarget(branch, revtree2, new_path, "aaa")
+        self.assertRevisionRoot(revtree1, old_path)
+        self.assertRevisionRoot(revtree1, new_path)
+
+    def test_rename_to_deleted_file_in_subdir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd/a'
+        new_path = 'd/b'
+        handler.process(self.get_command_iter(old_path, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d',), (old_path,), (new_path,)])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(new_path,)],
+            expected_renamed=[(old_path, new_path)])
+        self.assertContent(branch, revtree1, old_path, "aaa")
+        self.assertContent(branch, revtree1, new_path, "bbb")
+        self.assertContent(branch, revtree2, new_path, "aaa")
+
+    def test_rename_to_deleted_symlink_in_subdir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd/a'
+        new_path = 'd/b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d',), (old_path,), (new_path,)])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(new_path,)],
+            expected_renamed=[(old_path, new_path)])
+        self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree1, new_path, "bbb")
+        self.assertSymlinkTarget(branch, revtree2, new_path, "aaa")
+
+    def test_rename_to_deleted_file_in_new_dir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd1/a'
+        new_path = 'd2/b'
+        handler.process(self.get_command_iter(old_path, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d1',), (old_path,), ('d2',), (new_path,)])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[('d1',), (new_path,)],
+            expected_renamed=[(old_path, new_path)])
+        self.assertContent(branch, revtree1, old_path, "aaa")
+        self.assertContent(branch, revtree1, new_path, "bbb")
+        self.assertContent(branch, revtree2, new_path, "aaa")
+
+    def test_rename_to_deleted_symlink_in_new_dir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd1/a'
+        new_path = 'd2/b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d1',), (old_path,), ('d2',), (new_path,)])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[('d1',), (new_path,)],
+            expected_renamed=[(old_path, new_path)])
+        self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree1, new_path, "bbb")
+        self.assertSymlinkTarget(branch, revtree2, new_path, "aaa")
+
+
+class TestImportToPackRenameModified(TestCaseForGenericProcessor):
+    """Test rename of a path previously modified in this commit."""
+
+    def get_command_iter(self, old_path, new_path, kind='file'):
+
+        # Revno 1: create a file or symlink
+        # Revno 2: modify then rename it
+        def command_list():
+            author = ['', 'bugs@a.com', time.time(), time.timezone]
+            committer = ['', 'elmer@a.com', time.time(), time.timezone]
+            def files_one():
+                yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False),
+                        None, "aaa")
+            yield commands.CommitCommand('head', '1', author,
+                committer, "commit 1", None, [], files_one)
+            def files_two():
+                yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False),
+                        None, "bbb")
+                yield commands.FileRenameCommand(old_path, new_path)
+            yield commands.CommitCommand('head', '2', author,
+                committer, "commit 2", ":1", [], files_two)
+        return command_list
+
+    def test_rename_of_modified_file_in_root(self):
+        handler, branch = self.get_handler()
+        old_path = 'a'
+        new_path = 'b'
+        handler.process(self.get_command_iter(old_path, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(old_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_renamed=[(old_path, new_path)])
+        self.assertContent(branch, revtree1, old_path, "aaa")
+        self.assertContent(branch, revtree2, new_path, "bbb")
+        self.assertRevisionRoot(revtree1, old_path)
+        self.assertRevisionRoot(revtree2, new_path)
+
+    def test_rename_of_modified_symlink_in_root(self):
+        handler, branch = self.get_handler()
+        old_path = 'a'
+        new_path = 'b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(old_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_renamed=[(old_path, new_path)])
+        self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+        self.assertRevisionRoot(revtree1, old_path)
+        self.assertRevisionRoot(revtree2, new_path)
+
+    def test_rename_of_modified_file_in_subdir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd/a'
+        new_path = 'd/b'
+        handler.process(self.get_command_iter(old_path, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d',), (old_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_renamed=[(old_path, new_path)])
+        self.assertContent(branch, revtree1, old_path, "aaa")
+        self.assertContent(branch, revtree2, new_path, "bbb")
+
+    def test_rename_of_modified_symlink_in_subdir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd/a'
+        new_path = 'd/b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d',), (old_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_renamed=[(old_path, new_path)])
+        self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+
+    def test_rename_of_modified_file_to_new_dir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd1/a'
+        new_path = 'd2/b'
+        handler.process(self.get_command_iter(old_path, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d1',), (old_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_renamed=[(old_path, new_path)],
+            expected_added=[('d2',)],
+            expected_removed=[('d1',)])
+        self.assertContent(branch, revtree1, old_path, "aaa")
+        self.assertContent(branch, revtree2, new_path, "bbb")
+
+    def test_rename_of_modified_symlink_to_new_dir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd1/a'
+        new_path = 'd2/b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d1',), (old_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_renamed=[(old_path, new_path)],
+            expected_added=[('d2',)],
+            expected_removed=[('d1',)])
+        self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+
+
+class TestImportToPackRenameThenModify(TestCaseForGenericProcessor):
+    """Test rename of a path then modfy the new-path in the same commit."""
+
+    def get_command_iter(self, old_path, new_path, kind='file'):
+
+        # Revno 1: create a file or symlink
+        # Revno 2: rename it then modify the newly created path
+        def command_list():
+            author = ['', 'bugs@a.com', time.time(), time.timezone]
+            committer = ['', 'elmer@a.com', time.time(), time.timezone]
+            def files_one():
+                yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False),
+                        None, "aaa")
+            yield commands.CommitCommand('head', '1', author,
+                committer, "commit 1", None, [], files_one)
+            def files_two():
+                yield commands.FileRenameCommand(old_path, new_path)
+                yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False),
+                        None, "bbb")
+            yield commands.CommitCommand('head', '2', author,
+                committer, "commit 2", ":1", [], files_two)
+        return command_list
+
+    def test_rename_then_modify_file_in_root(self):
+        handler, branch = self.get_handler()
+        old_path = 'a'
+        new_path = 'b'
+        handler.process(self.get_command_iter(old_path, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(old_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_renamed=[(old_path, new_path)])
+        self.assertContent(branch, revtree1, old_path, "aaa")
+        self.assertContent(branch, revtree2, new_path, "bbb")
+        self.assertRevisionRoot(revtree1, old_path)
+        self.assertRevisionRoot(revtree2, new_path)
+
+    def test_rename_then_modify_file_in_subdir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd/a'
+        new_path = 'd/b'
+        handler.process(self.get_command_iter(old_path, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d',), (old_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_renamed=[(old_path, new_path)])
+        self.assertContent(branch, revtree1, old_path, "aaa")
+        self.assertContent(branch, revtree2, new_path, "bbb")
+
+    def test_rename_then_modify_file_in_new_dir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd1/a'
+        new_path = 'd2/b'
+        handler.process(self.get_command_iter(old_path, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d1',), (old_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_renamed=[(old_path, new_path)],
+            expected_added=[('d2',)],
+            expected_removed=[('d1',)])
+        self.assertContent(branch, revtree1, old_path, "aaa")
+        self.assertContent(branch, revtree2, new_path, "bbb")
+
+    def test_rename_then_modify_symlink_in_root(self):
+        handler, branch = self.get_handler()
+        old_path = 'a'
+        new_path = 'b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(old_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_renamed=[(old_path, new_path)])
+        self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+        self.assertRevisionRoot(revtree1, old_path)
+        self.assertRevisionRoot(revtree2, new_path)
+
+    def test_rename_then_modify_symlink_in_subdir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd/a'
+        new_path = 'd/b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d',), (old_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_renamed=[(old_path, new_path)])
+        self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+
+    def test_rename_then_modify_symlink_in_new_dir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd1/a'
+        new_path = 'd2/b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d1',), (old_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_renamed=[(old_path, new_path)],
+            expected_added=[('d2',)],
+            expected_removed=[('d1',)])
+        self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+
+
+class TestImportToPackDeleteRenameThenModify(TestCaseForGenericProcessor):
+    """Test rename of to a deleted path then modfy the new-path in the same commit."""
+
+    def get_command_iter(self, old_path, new_path, kind='file'):
+
+        # Revno 1: create two files or symlinks
+        # Revno 2: delete one, rename the other to it then modify the newly created path
+        def command_list():
+            author = ['', 'bugs@a.com', time.time(), time.timezone]
+            committer = ['', 'elmer@a.com', time.time(), time.timezone]
+            def files_one():
+                yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False),
+                        None, "aaa")
+                yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False),
+                        None, "zzz")
+            yield commands.CommitCommand('head', '1', author,
+                committer, "commit 1", None, [], files_one)
+            def files_two():
+                yield commands.FileDeleteCommand(new_path)
+                yield commands.FileRenameCommand(old_path, new_path)
+                yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False),
+                        None, "bbb")
+            yield commands.CommitCommand('head', '2', author,
+                committer, "commit 2", ":1", [], files_two)
+        return command_list
+
+    def test_delete_rename_then_modify_file_in_root(self):
+        handler, branch = self.get_handler()
+        old_path = 'a'
+        new_path = 'b'
+        handler.process(self.get_command_iter(old_path, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(old_path,), (new_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(new_path,)],
+            expected_renamed=[(old_path, new_path)])
+        self.assertContent(branch, revtree1, old_path, "aaa")
+        self.assertContent(branch, revtree1, new_path, "zzz")
+        self.assertContent(branch, revtree2, new_path, "bbb")
+        self.assertRevisionRoot(revtree1, old_path)
+        self.assertRevisionRoot(revtree1, new_path)
+        self.assertRevisionRoot(revtree2, new_path)
+
+    def test_delete_rename_then_modify_file_in_subdir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd/a'
+        new_path = 'd/b'
+        handler.process(self.get_command_iter(old_path, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d',), (old_path,), (new_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(new_path,)],
+            expected_renamed=[(old_path, new_path)])
+        self.assertContent(branch, revtree1, old_path, "aaa")
+        self.assertContent(branch, revtree1, new_path, "zzz")
+        self.assertContent(branch, revtree2, new_path, "bbb")
+
+    def test_delete_rename_then_modify_file_in_new_dir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd1/a'
+        new_path = 'd2/b'
+        handler.process(self.get_command_iter(old_path, new_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d1',), ('d2',), (old_path,), (new_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[('d1',), (new_path,)],
+            expected_renamed=[(old_path, new_path)])
+        self.assertContent(branch, revtree1, old_path, "aaa")
+        self.assertContent(branch, revtree1, new_path, "zzz")
+        self.assertContent(branch, revtree2, new_path, "bbb")
+
+    def test_delete_rename_then_modify_symlink_in_root(self):
+        handler, branch = self.get_handler()
+        old_path = 'a'
+        new_path = 'b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(old_path,), (new_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(new_path,)],
+            expected_renamed=[(old_path, new_path)])
+        self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree1, new_path, "zzz")
+        self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+        self.assertRevisionRoot(revtree1, old_path)
+        self.assertRevisionRoot(revtree1, new_path)
+        self.assertRevisionRoot(revtree2, new_path)
+
+    def test_delete_rename_then_modify_symlink_in_subdir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd/a'
+        new_path = 'd/b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d',), (old_path,), (new_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(new_path,)],
+            expected_renamed=[(old_path, new_path)])
+        self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree1, new_path, "zzz")
+        self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+
+    def test_delete_rename_then_modify_symlink_in_new_dir(self):
+        handler, branch = self.get_handler()
+        old_path = 'd1/a'
+        new_path = 'd2/b'
+        handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d1',), ('d2',), (old_path,), (new_path,)])
+        # Note: the delta doesn't show the modification?
+        # The actual new content is validated in the assertions following.
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[('d1',), (new_path,)],
+            expected_renamed=[(old_path, new_path)])
+        self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree1, new_path, "zzz")
+        self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+
 
 class TestImportToPackRenameTricky(TestCaseForGenericProcessor):
 
     def file_command_iter(self, path1, old_path2, new_path2, kind='file'):
+
         # Revno 1: create two files or symlinks in a directory
         # Revno 2: rename the second file so that it implicitly deletes the
         # first one because either:
@@ -538,9 +1407,9 @@ class TestImportToPackRenameTricky(TestCaseForGenericProcessor):
             author = ['', 'bugs@a.com', time.time(), time.timezone]
             committer = ['', 'elmer@a.com', time.time(), time.timezone]
             def files_one():
-                yield commands.FileModifyCommand(path1, kind, False,
+                yield commands.FileModifyCommand(path1, kind_to_mode(kind, False),
                         None, "aaa")
-                yield commands.FileModifyCommand(old_path2, kind, False,
+                yield commands.FileModifyCommand(old_path2, kind_to_mode(kind, False),
                         None, "bbb")
             yield commands.CommitCommand('head', '1', author,
                 committer, "commit 1", None, [], files_one)
@@ -550,7 +1419,6 @@ class TestImportToPackRenameTricky(TestCaseForGenericProcessor):
                 committer, "commit 2", ":1", [], files_two)
         return command_list
 
-
     def test_rename_file_becomes_directory(self):
         handler, branch = self.get_handler()
         old_path2 = 'foo'
@@ -613,13 +1481,14 @@ class TestImportToPackRenameTricky(TestCaseForGenericProcessor):
 class TestImportToPackCopy(TestCaseForGenericProcessor):
 
     def file_command_iter(self, src_path, dest_path, kind='file'):
+
         # Revno 1: create a file or symlink
         # Revno 2: copy it
         def command_list():
             author = ['', 'bugs@a.com', time.time(), time.timezone]
             committer = ['', 'elmer@a.com', time.time(), time.timezone]
             def files_one():
-                yield commands.FileModifyCommand(src_path, kind, False,
+                yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False),
                         None, "aaa")
             yield commands.CommitCommand('head', '1', author,
                 committer, "commit 1", None, [], files_one)
@@ -700,79 +1569,344 @@ class TestImportToPackCopy(TestCaseForGenericProcessor):
         self.assertSymlinkTarget(branch, revtree2, dest_path, "aaa")
 
 
-class TestImportToPackFileKinds(TestCaseForGenericProcessor):
+class TestImportToPackCopyNew(TestCaseForGenericProcessor):
+    """Test copy of a newly added file."""
 
-    def get_command_iter(self, path, kind, content):
+    def file_command_iter(self, src_path, dest_path, kind='file'):
+
+        # Revno 1: create a file or symlink and copy it
         def command_list():
+            author = ['', 'bugs@a.com', time.time(), time.timezone]
             committer = ['', 'elmer@a.com', time.time(), time.timezone]
             def files_one():
-                yield commands.FileModifyCommand(path, kind, False,
-                        None, content)
-            yield commands.CommitCommand('head', '1', None,
+                yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False),
+                        None, "aaa")
+                yield commands.FileCopyCommand(src_path, dest_path)
+            yield commands.CommitCommand('head', '1', author,
                 committer, "commit 1", None, [], files_one)
         return command_list
 
-    def test_import_plainfile(self):
+    def test_copy_new_file_in_root(self):
         handler, branch = self.get_handler()
-        handler.process(self.get_command_iter('foo', 'file', 'aaa'))
+        src_path = 'a'
+        dest_path = 'b'
+        handler.process(self.file_command_iter(src_path, dest_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(src_path,), (dest_path,)])
+        self.assertContent(branch, revtree1, src_path, "aaa")
+        self.assertContent(branch, revtree1, dest_path, "aaa")
+        self.assertRevisionRoot(revtree1, src_path)
+        self.assertRevisionRoot(revtree1, dest_path)
 
-    def test_import_symlink(self):
+    def test_copy_new_file_in_subdir(self):
         handler, branch = self.get_handler()
-        handler.process(self.get_command_iter('foo', 'symlink', 'bar'))
+        src_path = 'a/a'
+        dest_path = 'a/b'
+        handler.process(self.file_command_iter(src_path, dest_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('a',), (src_path,), (dest_path,)])
+        self.assertContent(branch, revtree1, src_path, "aaa")
+        self.assertContent(branch, revtree1, dest_path, "aaa")
 
+    def test_copy_new_file_to_new_dir(self):
+        handler, branch = self.get_handler()
+        src_path = 'a/a'
+        dest_path = 'b/a'
+        handler.process(self.file_command_iter(src_path, dest_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('a',), (src_path,), ('b',), (dest_path,)])
+        self.assertContent(branch, revtree1, src_path, "aaa")
+        self.assertContent(branch, revtree1, dest_path, "aaa")
 
-### TODO: Parameterise tests rather than below hack
+    def test_copy_new_symlink_in_root(self):
+        handler, branch = self.get_handler()
+        src_path = 'a'
+        dest_path = 'b'
+        handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(src_path,), (dest_path,)])
+        self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree1, dest_path, "aaa")
+        self.assertRevisionRoot(revtree1, src_path)
+        self.assertRevisionRoot(revtree1, dest_path)
 
-class TestImportToRichRootModify(TestImportToPackModify):
-    branch_format = "1.9-rich-root"
+    def test_copy_new_symlink_in_subdir(self):
+        handler, branch = self.get_handler()
+        src_path = 'a/a'
+        dest_path = 'a/b'
+        handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('a',), (src_path,), (dest_path,)])
+        self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree1, dest_path, "aaa")
 
-class TestImportToRichRootModifyTricky(TestImportToPackModifyTricky):
-    branch_format = "1.9-rich-root"
+    def test_copy_new_symlink_to_new_dir(self):
+        handler, branch = self.get_handler()
+        src_path = 'a/a'
+        dest_path = 'b/a'
+        handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('a',), (src_path,), ('b',), (dest_path,)])
+        self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree1, dest_path, "aaa")
 
-class TestImportToRichRootDelete(TestImportToPackDelete):
-    branch_format = "1.9-rich-root"
 
-class TestImportToRichRootDeleteDirectory(TestImportToPackDeleteDirectory):
-    branch_format = "1.9-rich-root"
+class TestImportToPackCopyToDeleted(TestCaseForGenericProcessor):
 
-class TestImportToRichRootRename(TestImportToPackRename):
-    branch_format = "1.9-rich-root"
+    def file_command_iter(self, src_path, dest_path, kind='file'):
 
-class TestImportToRichRootRenameTricky(TestImportToPackRenameTricky):
-    branch_format = "1.9-rich-root"
+        # Revno 1: create two files or symlinks
+        # Revno 2: delete one and copy the other one to its path
+        def command_list():
+            author = ['', 'bugs@a.com', time.time(), time.timezone]
+            committer = ['', 'elmer@a.com', time.time(), time.timezone]
+            def files_one():
+                yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False),
+                        None, "aaa")
+                yield commands.FileModifyCommand(dest_path, kind_to_mode(kind, False),
+                        None, "bbb")
+            yield commands.CommitCommand('head', '1', author,
+                committer, "commit 1", None, [], files_one)
+            def files_two():
+                yield commands.FileDeleteCommand(dest_path)
+                yield commands.FileCopyCommand(src_path, dest_path)
+            yield commands.CommitCommand('head', '2', author,
+                committer, "commit 2", ":1", [], files_two)
+        return command_list
 
-class TestImportToRichRootCopy(TestImportToPackCopy):
-    branch_format = "1.9-rich-root"
+    def test_copy_to_deleted_file_in_root(self):
+        handler, branch = self.get_handler()
+        src_path = 'a'
+        dest_path = 'b'
+        handler.process(self.file_command_iter(src_path, dest_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(src_path,), (dest_path,)])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(dest_path,)],
+            expected_added=[(dest_path,)])
+        self.assertContent(branch, revtree1, src_path, "aaa")
+        self.assertContent(branch, revtree1, dest_path, "bbb")
+        self.assertContent(branch, revtree2, src_path, "aaa")
+        self.assertContent(branch, revtree2, dest_path, "aaa")
+        self.assertRevisionRoot(revtree1, src_path)
+        self.assertRevisionRoot(revtree1, dest_path)
 
-class TestImportToRichRootFileKinds(TestImportToPackFileKinds):
-    branch_format = "1.9-rich-root"
+    def test_copy_to_deleted_symlink_in_root(self):
+        handler, branch = self.get_handler()
+        src_path = 'a'
+        dest_path = 'b'
+        handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[(src_path,), (dest_path,)])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(dest_path,)],
+            expected_added=[(dest_path,)])
+        self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree1, dest_path, "bbb")
+        self.assertSymlinkTarget(branch, revtree2, src_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree2, dest_path, "aaa")
+        self.assertRevisionRoot(revtree1, src_path)
+        self.assertRevisionRoot(revtree1, dest_path)
 
-try:
-    from bzrlib.repofmt.groupcompress_repo import RepositoryFormatCHK1
+    def test_copy_to_deleted_file_in_subdir(self):
+        handler, branch = self.get_handler()
+        src_path = 'd/a'
+        dest_path = 'd/b'
+        handler.process(self.file_command_iter(src_path, dest_path))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d',), (src_path,), (dest_path,)])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(dest_path,)],
+            expected_added=[(dest_path,)])
+        self.assertContent(branch, revtree1, src_path, "aaa")
+        self.assertContent(branch, revtree1, dest_path, "bbb")
+        self.assertContent(branch, revtree2, src_path, "aaa")
+        self.assertContent(branch, revtree2, dest_path, "aaa")
+
+    def test_copy_to_deleted_symlink_in_subdir(self):
+        handler, branch = self.get_handler()
+        src_path = 'd/a'
+        dest_path = 'd/b'
+        handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+        revtree0, revtree1 = self.assertChanges(branch, 1,
+            expected_added=[('d',), (src_path,), (dest_path,)])
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_removed=[(dest_path,)],
+            expected_added=[(dest_path,)])
+        self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree1, dest_path, "bbb")
+        self.assertSymlinkTarget(branch, revtree2, src_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree2, dest_path, "aaa")
 
-    class TestImportToChkModify(TestImportToPackModify):
-        branch_format = "development6-rich-root"
 
-    class TestImportToChkModifyTricky(TestImportToPackModifyTricky):
-        branch_format = "development6-rich-root"
+class TestImportToPackCopyModified(TestCaseForGenericProcessor):
+    """Test copy of file/symlink already modified in this commit."""
 
-    class TestImportToChkDelete(TestImportToPackDelete):
-        branch_format = "development6-rich-root"
+    def file_command_iter(self, src_path, dest_path, kind='file'):
 
-    class TestImportToChkDeleteDirectory(TestImportToPackDeleteDirectory):
-        branch_format = "development6-rich-root"
+        # Revno 1: create a file or symlink
+        # Revno 2: modify and copy it
+        def command_list():
+            author = ['', 'bugs@a.com', time.time(), time.timezone]
+            committer = ['', 'elmer@a.com', time.time(), time.timezone]
+            def files_one():
+                yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False),
+                        None, "aaa")
+            yield commands.CommitCommand('head', '1', author,
+                committer, "commit 1", None, [], files_one)
+            def files_two():
+                yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False),
+                        None, "bbb")
+                yield commands.FileCopyCommand(src_path, dest_path)
+            yield commands.CommitCommand('head', '2', author,
+                committer, "commit 2", ":1", [], files_two)
+        return command_list
 
-    class TestImportToChkRename(TestImportToPackRename):
-        branch_format = "development6-rich-root"
+    def test_copy_of_modified_file_in_root(self):
+        handler, branch = self.get_handler()
+        src_path = 'a'
+        dest_path = 'b'
+        handler.process(self.file_command_iter(src_path, dest_path))
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_modified=[(src_path,)],
+            expected_added=[(dest_path,)])
+        self.assertContent(branch, revtree1, src_path, "aaa")
+        self.assertContent(branch, revtree2, src_path, "bbb")
+        self.assertContent(branch, revtree2, dest_path, "bbb")
+        self.assertRevisionRoot(revtree1, src_path)
+        self.assertRevisionRoot(revtree2, dest_path)
 
-    class TestImportToChkRenameTricky(TestImportToPackRenameTricky):
-        branch_format = "development6-rich-root"
+    def test_copy_of_modified_file_in_subdir(self):
+        handler, branch = self.get_handler()
+        src_path = 'd/a'
+        dest_path = 'd/b'
+        handler.process(self.file_command_iter(src_path, dest_path))
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_modified=[(src_path,)],
+            expected_added=[(dest_path,)])
+        self.assertContent(branch, revtree1, src_path, "aaa")
+        self.assertContent(branch, revtree2, src_path, "bbb")
+        self.assertContent(branch, revtree2, dest_path, "bbb")
 
-    class TestImportToChkCopy(TestImportToPackCopy):
-        branch_format = "development6-rich-root"
+    def test_copy_of_modified_file_to_new_dir(self):
+        handler, branch = self.get_handler()
+        src_path = 'd1/a'
+        dest_path = 'd2/a'
+        handler.process(self.file_command_iter(src_path, dest_path))
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_modified=[(src_path,)],
+            expected_added=[('d2',), (dest_path,)])
+        self.assertContent(branch, revtree1, src_path, "aaa")
+        self.assertContent(branch, revtree2, src_path, "bbb")
+        self.assertContent(branch, revtree2, dest_path, "bbb")
 
-    class TestImportToChkFileKinds(TestImportToPackFileKinds):
-        branch_format = "development6-rich-root"
+    def test_copy_of_modified_symlink_in_root(self):
+        handler, branch = self.get_handler()
+        src_path = 'a'
+        dest_path = 'b'
+        handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_modified=[(src_path,)],
+            expected_added=[(dest_path,)])
+        self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree2, src_path, "bbb")
+        self.assertSymlinkTarget(branch, revtree2, dest_path, "bbb")
+        self.assertRevisionRoot(revtree1, src_path)
+        self.assertRevisionRoot(revtree2, dest_path)
 
-except ImportError:
-    pass
+    def test_copy_of_modified_symlink_in_subdir(self):
+        handler, branch = self.get_handler()
+        src_path = 'd/a'
+        dest_path = 'd/b'
+        handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_modified=[(src_path,)],
+            expected_added=[(dest_path,)])
+        self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree2, src_path, "bbb")
+        self.assertSymlinkTarget(branch, revtree2, dest_path, "bbb")
+
+    def test_copy_of_modified_symlink_to_new_dir(self):
+        handler, branch = self.get_handler()
+        src_path = 'd1/a'
+        dest_path = 'd2/a'
+        handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+        revtree1, revtree2 = self.assertChanges(branch, 2,
+            expected_modified=[(src_path,)],
+            expected_added=[('d2',), (dest_path,)])
+        self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+        self.assertSymlinkTarget(branch, revtree2, src_path, "bbb")
+        self.assertSymlinkTarget(branch, revtree2, dest_path, "bbb")
+
+
+class TestImportToPackFileKinds(TestCaseForGenericProcessor):
+
+    def get_command_iter(self, path, kind, content):
+
+        def command_list():
+            committer = ['', 'elmer@a.com', time.time(), time.timezone]
+            def files_one():
+                yield commands.FileModifyCommand(path, kind_to_mode(kind, False),
+                        None, content)
+            yield commands.CommitCommand('head', '1', None,
+                committer, "commit 1", None, [], files_one)
+        return command_list
+
+    def test_import_plainfile(self):
+        handler, branch = self.get_handler()
+        handler.process(self.get_command_iter('foo', 'file', 'aaa'))
+
+    def test_import_symlink(self):
+        handler, branch = self.get_handler()
+        handler.process(self.get_command_iter('foo', 'symlink', 'bar'))
+
+
+class TestModifyRevertInBranch(TestCaseForGenericProcessor):
+
+    def file_command_iter(self):
+        # A     add 'foo'
+        # |\
+        # | B   modify 'foo'
+        # | |
+        # | C   revert 'foo' back to A
+        # |/
+        # D     merge 'foo'
+        def command_list():
+            committer_a = ['', 'a@elmer.com', time.time(), time.timezone]
+            committer_b = ['', 'b@elmer.com', time.time(), time.timezone]
+            committer_c = ['', 'c@elmer.com', time.time(), time.timezone]
+            committer_d = ['', 'd@elmer.com', time.time(), time.timezone]
+            def files_one():
+                yield commands.FileModifyCommand('foo', kind_to_mode('file', False),
+                        None, "content A\n")
+            yield commands.CommitCommand('head', '1', None,
+                committer_a, "commit 1", None, [], files_one)
+            def files_two():
+                yield commands.FileModifyCommand('foo', kind_to_mode('file', False),
+                        None, "content B\n")
+            yield commands.CommitCommand('head', '2', None,
+                committer_b, "commit 2", ":1", [], files_two)
+            def files_three():
+                yield commands.FileModifyCommand('foo', kind_to_mode('file', False),
+                        None, "content A\n")
+            yield commands.CommitCommand('head', '3', None,
+                committer_c, "commit 3", ":2", [], files_three)
+            yield commands.CommitCommand('head', '4', None,
+                committer_d, "commit 4", ":1", [':3'], lambda: [])
+        return command_list
+
+    def test_modify_revert(self):
+        handler, branch = self.get_handler()
+        handler.process(self.file_command_iter())
+        branch.lock_read()
+        self.addCleanup(branch.unlock)
+        rev_d = branch.last_revision()
+        rev_a, rev_c = branch.repository.get_parent_map([rev_d])[rev_d]
+        rev_b = branch.repository.get_parent_map([rev_c])[rev_c][0]
+        rtree_a, rtree_b, rtree_c, rtree_d = branch.repository.revision_trees([
+            rev_a, rev_b, rev_c, rev_d])
+        foo_id = rtree_a.path2id('foo')
+        self.assertEqual(rev_a, rtree_a.inventory[foo_id].revision)
+        self.assertEqual(rev_b, rtree_b.inventory[foo_id].revision)
+        self.assertEqual(rev_c, rtree_c.inventory[foo_id].revision)
+        self.assertEqual(rev_c, rtree_d.inventory[foo_id].revision)
diff --git a/tests/test_head_tracking.py b/tests/test_head_tracking.py
deleted file mode 100644
index 63712e0..0000000
--- a/tests/test_head_tracking.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (C) 2009 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Test tracking of heads"""
-
-from cStringIO import StringIO
-
-from bzrlib import tests
-
-from bzrlib.plugins.fastimport import (
-    commands,
-    parser,
-    )
-from bzrlib.plugins.fastimport.cache_manager import CacheManager
-
-
-# A sample input stream that only adds files to a branch
-_SAMPLE_MAINLINE = \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/master
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :2 NEWS
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :101
-M 644 :3 doc/README.txt
-M 644 :4 doc/index.txt
-"""
-
-# A sample input stream that adds files to two branches
-_SAMPLE_TWO_HEADS = \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/mybranch
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :2 NEWS
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 doc/README.txt
-M 644 :4 doc/index.txt
-"""
-
-# A sample input stream that adds files to two branches
-_SAMPLE_TWO_BRANCHES_MERGED = \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/mybranch
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :2 NEWS
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 doc/README.txt
-M 644 :4 doc/index.txt
-commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :102
-merge :101
-D doc/index.txt
-"""
-
-# A sample input stream that contains a reset
-_SAMPLE_RESET = \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-reset refs/remotes/origin/master
-from :100
-"""
-
-# A sample input stream that contains a reset and more commits
-_SAMPLE_RESET_WITH_MORE_COMMITS = \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-reset refs/remotes/origin/master
-from :100
-commit refs/remotes/origin/master
-mark :101
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-D doc/README.txt
-"""
-
-class TestHeadTracking(tests.TestCase):
-
-    def assertHeads(self, input, expected):
-        s = StringIO(input)
-        p = parser.ImportParser(s)
-        cm = CacheManager()
-        for cmd in p.iter_commands():
-            if isinstance(cmd, commands.CommitCommand):
-                cm.track_heads(cmd)
-                # eat the file commands
-                list(cmd.file_iter())
-            elif isinstance(cmd, commands.ResetCommand):
-                if cmd.from_ is not None:
-                    cm.track_heads_for_ref(cmd.ref, cmd.from_)
-        self.assertEqual(cm.heads, expected)
-
-    def test_mainline(self):
-        self.assertHeads(_SAMPLE_MAINLINE, {
-            ':102': set(['refs/heads/master']),
-            })
-
-    def test_two_heads(self):
-        self.assertHeads(_SAMPLE_TWO_HEADS, {
-            ':101': set(['refs/heads/mybranch']),
-            ':102': set(['refs/heads/master']),
-            })
-
-    def test_two_branches_merged(self):
-        self.assertHeads(_SAMPLE_TWO_BRANCHES_MERGED, {
-            ':103': set(['refs/heads/master']),
-            })
-
-    def test_reset(self):
-        self.assertHeads(_SAMPLE_RESET, {
-            ':100': set(['refs/heads/master', 'refs/remotes/origin/master']),
-            })
-
-    def test_reset_with_more_commits(self):
-        self.assertHeads(_SAMPLE_RESET_WITH_MORE_COMMITS, {
-            ':101': set(['refs/remotes/origin/master']),
-            })
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
deleted file mode 100644
index 89009d1..0000000
--- a/tests/test_helpers.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (C) 2009 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Test the helper functions."""
-
-from bzrlib import tests
-
-from bzrlib.plugins.fastimport import (
-    helpers,
-    )
-
-
-class TestCommonDirectory(tests.TestCase):
-
-    def test_no_paths(self):
-        c = helpers.common_directory(None)
-        self.assertEqual(c, None)
-        c = helpers.common_directory([])
-        self.assertEqual(c, None)
-
-    def test_one_path(self):
-        c = helpers.common_directory(['foo'])
-        self.assertEqual(c, '')
-        c = helpers.common_directory(['foo/'])
-        self.assertEqual(c, 'foo/')
-        c = helpers.common_directory(['foo/bar'])
-        self.assertEqual(c, 'foo/')
-
-    def test_two_paths(self):
-        c = helpers.common_directory(['foo', 'bar'])
-        self.assertEqual(c, '')
-        c = helpers.common_directory(['foo/', 'bar'])
-        self.assertEqual(c, '')
-        c = helpers.common_directory(['foo/', 'foo/bar'])
-        self.assertEqual(c, 'foo/')
-        c = helpers.common_directory(['foo/bar/x', 'foo/bar/y'])
-        self.assertEqual(c, 'foo/bar/')
-        c = helpers.common_directory(['foo/bar/aa_x', 'foo/bar/aa_y'])
-        self.assertEqual(c, 'foo/bar/')
-
-    def test_lots_of_paths(self):
-        c = helpers.common_directory(['foo/bar/x', 'foo/bar/y', 'foo/bar/z'])
-        self.assertEqual(c, 'foo/bar/')
diff --git a/tests/test_parser.py b/tests/test_parser.py
deleted file mode 100644
index 91e27f0..0000000
--- a/tests/test_parser.py
+++ /dev/null
@@ -1,212 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Test the Import parsing"""
-
-import StringIO
-
-from bzrlib import tests
-
-from bzrlib.plugins.fastimport import (
-    errors,
-    parser,
-    )
-
-
-class TestLineBasedParser(tests.TestCase):
-
-    def test_push_line(self):
-        s = StringIO.StringIO("foo\nbar\nbaz\n")
-        p = parser.LineBasedParser(s)
-        self.assertEqual('foo', p.next_line())
-        self.assertEqual('bar', p.next_line())
-        p.push_line('bar')
-        self.assertEqual('bar', p.next_line())
-        self.assertEqual('baz', p.next_line())
-        self.assertEqual(None, p.next_line())
-
-    def test_read_bytes(self):
-        s = StringIO.StringIO("foo\nbar\nbaz\n")
-        p = parser.LineBasedParser(s)
-        self.assertEqual('fo', p.read_bytes(2))
-        self.assertEqual('o\nb', p.read_bytes(3))
-        self.assertEqual('ar', p.next_line())
-        # Test that the line buffer is ignored
-        p.push_line('bar')
-        self.assertEqual('baz', p.read_bytes(3))
-        # Test missing bytes
-        self.assertRaises(errors.MissingBytes, p.read_bytes, 10)
-
-    def test_read_until(self):
-        # TODO
-        return
-        s = StringIO.StringIO("foo\nbar\nbaz\nabc\ndef\nghi\n")
-        p = parser.LineBasedParser(s)
-        self.assertEqual('foo\nbar', p.read_until('baz'))
-        self.assertEqual('abc', p.next_line())
-        # Test that the line buffer is ignored
-        p.push_line('abc')
-        self.assertEqual('def', p.read_until('ghi'))
-        # Test missing terminator
-        self.assertRaises(errors.MissingTerminator, p.read_until('>>>'))
-
-
-# Sample text
-_sample_import_text = """
-progress completed
-# Test blob formats
-blob
-mark :1
-data 4
-aaaablob
-data 5
-bbbbb
-# Commit formats
-commit refs/heads/master
-mark :2
-committer bugs bunny <bugs@bunny.org> now
-data 14
-initial import
-M 644 inline README
-data 18
-Welcome from bugs
-commit refs/heads/master
-committer <bugs@bunny.org> now
-data 13
-second commit
-from :2
-M 644 inline README
-data 23
-Welcome from bugs, etc.
-# Miscellaneous
-checkpoint
-progress completed
-# Test a commit without sub-commands (bug #351717)
-commit refs/heads/master
-mark :3
-author <bugs@bunny.org> now
-committer <bugs@bunny.org> now
-data 20
-first commit, empty
-# Test a commit with a heredoc-style (delimited_data) messsage (bug #400960)
-commit refs/heads/master
-mark :4
-author <bugs@bunny.org> now
-committer <bugs@bunny.org> now
-data <<EOF
-Commit with heredoc-style message
-EOF
-"""
-
-
-class TestImportParser(tests.TestCase):
-
-    def test_iter_commands(self):
-        s = StringIO.StringIO(_sample_import_text)
-        p = parser.ImportParser(s)
-        result = []
-        for cmd in p.iter_commands():
-            result.append(cmd)
-            if cmd.name == 'commit':
-                for fc in cmd.file_iter():
-                    result.append(fc)
-        self.assertEqual(len(result), 11)
-        cmd1 = result.pop(0)
-        self.assertEqual('progress', cmd1.name)
-        self.assertEqual('completed', cmd1.message)
-        cmd2 = result.pop(0)
-        self.assertEqual('blob', cmd2.name)
-        self.assertEqual('1', cmd2.mark)
-        self.assertEqual(':1', cmd2.id)
-        self.assertEqual('aaaa', cmd2.data)
-        self.assertEqual(4, cmd2.lineno)
-        cmd3 = result.pop(0)
-        self.assertEqual('blob', cmd3.name)
-        self.assertEqual('@7', cmd3.id)
-        self.assertEqual(None, cmd3.mark)
-        self.assertEqual('bbbbb', cmd3.data)
-        self.assertEqual(7, cmd3.lineno)
-        cmd4 = result.pop(0)
-        self.assertEqual('commit', cmd4.name)
-        self.assertEqual('2', cmd4.mark)
-        self.assertEqual(':2', cmd4.id)
-        self.assertEqual('initial import', cmd4.message)
-        self.assertEqual('bugs bunny', cmd4.committer[0])
-        self.assertEqual('bugs@bunny.org', cmd4.committer[1])
-        # FIXME: check timestamp and timezone as well
-        self.assertEqual(None, cmd4.author)
-        self.assertEqual(11, cmd4.lineno)
-        self.assertEqual('refs/heads/master', cmd4.ref)
-        self.assertEqual(None, cmd4.from_)
-        self.assertEqual([], cmd4.merges)
-        file_cmd1 = result.pop(0)
-        self.assertEqual('filemodify', file_cmd1.name)
-        self.assertEqual('README', file_cmd1.path)
-        self.assertEqual('file', file_cmd1.kind)
-        self.assertEqual(False, file_cmd1.is_executable)
-        self.assertEqual('Welcome from bugs\n', file_cmd1.data)
-        cmd5 = result.pop(0)
-        self.assertEqual('commit', cmd5.name)
-        self.assertEqual(None, cmd5.mark)
-        self.assertEqual('@19', cmd5.id)
-        self.assertEqual('second commit', cmd5.message)
-        self.assertEqual('', cmd5.committer[0])
-        self.assertEqual('bugs@bunny.org', cmd5.committer[1])
-        # FIXME: check timestamp and timezone as well
-        self.assertEqual(None, cmd5.author)
-        self.assertEqual(19, cmd5.lineno)
-        self.assertEqual('refs/heads/master', cmd5.ref)
-        self.assertEqual(':2', cmd5.from_)
-        self.assertEqual([], cmd5.merges)
-        file_cmd2 = result.pop(0)
-        self.assertEqual('filemodify', file_cmd2.name)
-        self.assertEqual('README', file_cmd2.path)
-        self.assertEqual('file', file_cmd2.kind)
-        self.assertEqual(False, file_cmd2.is_executable)
-        self.assertEqual('Welcome from bugs, etc.', file_cmd2.data)
-        cmd6 = result.pop(0)
-        self.assertEqual(cmd6.name, 'checkpoint')
-        cmd7 = result.pop(0)
-        self.assertEqual('progress', cmd7.name)
-        self.assertEqual('completed', cmd7.message)
-        cmd = result.pop(0)
-        self.assertEqual('commit', cmd.name)
-        self.assertEqual('3', cmd.mark)
-        self.assertEqual(None, cmd.from_)
-        cmd = result.pop(0)
-        self.assertEqual('commit', cmd.name)
-        self.assertEqual('4', cmd.mark)
-        self.assertEqual('Commit with heredoc-style message\n', cmd.message)
-
-
-class TestStringParsing(tests.TestCase):
-
-    def test_unquote(self):
-        s = r'hello \"sweet\" wo\\r\tld'
-        self.assertEquals(r'hello "sweet" wo\r' + "\tld",
-            parser._unquote_c_string(s))
-
-
-class TestPathPairParsing(tests.TestCase):
-
-    def test_path_pair_simple(self):
-        p = parser.ImportParser("")
-        self.assertEqual(['foo', 'bar'], p._path_pair("foo bar"))
-
-    def test_path_pair_spaces_in_first(self):
-        p = parser.ImportParser("")
-        self.assertEqual(['foo bar', 'baz'],
-            p._path_pair('"foo bar" baz'))
diff --git a/tests/test_revision_store.py b/tests/test_revision_store.py
new file mode 100644
index 0000000..9e39254
--- /dev/null
+++ b/tests/test_revision_store.py
@@ -0,0 +1,152 @@
+# Copyright (C) 2008, 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Direct tests of the revision_store classes."""
+
+from bzrlib import (
+    branch,
+    errors,
+    inventory,
+    osutils,
+    tests,
+    )
+
+from bzrlib.plugins.fastimport import (
+    revision_store,
+    )
+from bzrlib.plugins.fastimport.tests import (
+    FastimportFeature,
+    )
+
+
+class Test_TreeShim(tests.TestCase):
+
+    _test_needs_features = [FastimportFeature]
+
+    def invAddEntry(self, inv, path, file_id=None):
+        if path.endswith('/'):
+            path = path[:-1]
+            kind = 'directory'
+        else:
+            kind = 'file'
+        parent_path, basename = osutils.split(path)
+        parent_id = inv.path2id(parent_path)
+        inv.add(inventory.make_entry(kind, basename, parent_id, file_id))
+
+    def make_trivial_basis_inv(self):
+        basis_inv = inventory.Inventory('TREE_ROOT')
+        self.invAddEntry(basis_inv, 'foo', 'foo-id')
+        self.invAddEntry(basis_inv, 'bar/', 'bar-id')
+        self.invAddEntry(basis_inv, 'bar/baz', 'baz-id')
+        return basis_inv
+
+    def test_id2path_no_delta(self):
+        basis_inv = self.make_trivial_basis_inv()
+        shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+                                        inv_delta=[], content_provider=None)
+        self.assertEqual('', shim.id2path('TREE_ROOT'))
+        self.assertEqual('foo', shim.id2path('foo-id'))
+        self.assertEqual('bar', shim.id2path('bar-id'))
+        self.assertEqual('bar/baz', shim.id2path('baz-id'))
+        self.assertRaises(errors.NoSuchId, shim.id2path, 'qux-id')
+
+    def test_id2path_with_delta(self):
+        basis_inv = self.make_trivial_basis_inv()
+        foo_entry = inventory.make_entry('file', 'foo2', 'TREE_ROOT', 'foo-id')
+        inv_delta = [('foo', 'foo2', 'foo-id', foo_entry),
+                     ('bar/baz', None, 'baz-id', None),
+                    ]
+
+        shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+                                        inv_delta=inv_delta,
+                                        content_provider=None)
+        self.assertEqual('', shim.id2path('TREE_ROOT'))
+        self.assertEqual('foo2', shim.id2path('foo-id'))
+        self.assertEqual('bar', shim.id2path('bar-id'))
+        self.assertRaises(errors.NoSuchId, shim.id2path, 'baz-id')
+
+    def test_path2id(self):
+        basis_inv = self.make_trivial_basis_inv()
+        shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+                                        inv_delta=[], content_provider=None)
+        self.assertEqual('TREE_ROOT', shim.path2id(''))
+        # We don't want to ever give a wrong value, so for now we just raise
+        # NotImplementedError
+        self.assertRaises(NotImplementedError, shim.path2id, 'bar')
+
+    def test_get_file_with_stat_content_in_stream(self):
+        basis_inv = self.make_trivial_basis_inv()
+
+        def content_provider(file_id):
+            return 'content of\n' + file_id + '\n'
+
+        shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+                                        inv_delta=[],
+                                        content_provider=content_provider)
+        f_obj, stat_val = shim.get_file_with_stat('baz-id')
+        self.assertIs(None, stat_val)
+        self.assertEqualDiff('content of\nbaz-id\n', f_obj.read())
+
+    # TODO: Test when the content isn't in the stream, and we fall back to the
+    #       repository that was passed in
+
+    def test_get_symlink_target(self):
+        basis_inv = self.make_trivial_basis_inv()
+        ie = inventory.make_entry('symlink', 'link', 'TREE_ROOT', 'link-id')
+        ie.symlink_target = u'link-target'
+        basis_inv.add(ie)
+        shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+                                        inv_delta=[], content_provider=None)
+        self.assertEqual(u'link-target', shim.get_symlink_target('link-id'))
+
+    def test_get_symlink_target_from_delta(self):
+        basis_inv = self.make_trivial_basis_inv()
+        ie = inventory.make_entry('symlink', 'link', 'TREE_ROOT', 'link-id')
+        ie.symlink_target = u'link-target'
+        inv_delta = [(None, 'link', 'link-id', ie)]
+        shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+                                        inv_delta=inv_delta,
+                                        content_provider=None)
+        self.assertEqual(u'link-target', shim.get_symlink_target('link-id'))
+
+    def test__delta_to_iter_changes(self):
+        basis_inv = self.make_trivial_basis_inv()
+        foo_entry = inventory.make_entry('file', 'foo2', 'bar-id', 'foo-id')
+        link_entry = inventory.make_entry('symlink', 'link', 'TREE_ROOT',
+                                          'link-id')
+        link_entry.symlink_target = u'link-target'
+        inv_delta = [('foo', 'bar/foo2', 'foo-id', foo_entry),
+                     ('bar/baz', None, 'baz-id', None),
+                     (None, 'link', 'link-id', link_entry),
+                    ]
+        shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+                                        inv_delta=inv_delta,
+                                        content_provider=None)
+        changes = list(shim._delta_to_iter_changes())
+        expected = [('foo-id', ('foo', 'bar/foo2'), False, (True, True),
+                     ('TREE_ROOT', 'bar-id'), ('foo', 'foo2'),
+                     ('file', 'file'), (False, False)),
+                    ('baz-id', ('bar/baz', None), True, (True, False),
+                     ('bar-id', None), ('baz', None),
+                     ('file', None), (False, None)),
+                    ('link-id', (None, 'link'), True, (False, True),
+                     (None, 'TREE_ROOT'), (None, 'link'),
+                     (None, 'symlink'), (None, False)),
+                   ]
+        # from pprint import pformat
+        # self.assertEqualDiff(pformat(expected), pformat(changes))
+        self.assertEqual(expected, changes)
+
diff --git a/user_mapper.py b/user_mapper.py
new file mode 100644
index 0000000..4fcf4a4
--- /dev/null
+++ b/user_mapper.py
@@ -0,0 +1,81 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+from email import Utils
+
+
+class UserMapper(object):
+
+    def __init__(self, lines):
+        """Create a user-mapper from a list of lines.
+
+        Blank lines and comment lines (starting with #) are ignored.
+        Otherwise lines are of the form:
+
+          old-id = new-id
+
+        Each id may be in the following forms:
+
+          name <email>
+          name
+
+        If old-id has the value '@', then new-id is the domain to use
+        when generating an email from a user-id.
+        """
+        self._parse(lines)
+
+    def _parse(self, lines):
+        self._user_map = {}
+        self._default_domain = None
+        for line in lines:
+            line = line.strip()
+            if len(line) == 0 or line.startswith('#'):
+                continue
+            old, new = line.split('=', 1)
+            old = old.strip()
+            new = new.strip()
+            if old == '@':
+                self._default_domain = new
+                continue
+            # Parse each id into a name and email address
+            old_name, old_email = self._parse_id(old)
+            new_name, new_email = self._parse_id(new)
+            #print "found user map: %s => %s" % ((old_name, old_email), (new_name, new_email))
+            self._user_map[(old_name, old_email)] = (new_name, new_email)
+
+    def _parse_id(self, id):
+        if id.find('<') == -1:
+            return id, None
+        else:
+            return Utils.parseaddr(id)
+
+    def map_name_and_email(self, name, email):
+        """Map a name and an email to the preferred name and email.
+
+        :param name: the current name
+        :param email: the current email
+        :result: the preferred name and email
+        """
+        try:
+            new_name, new_email = self._user_map[(name, email)]
+        except KeyError:
+            new_name = name
+            if self._default_domain and not email:
+                new_email = "%s@%s" % (name, self._default_domain)
+            else:
+                new_email = email
+        #print "converted '%s <%s>' to '%s <%s>'" % (name, email, new_name, new_email)
+        return new_name, new_email