summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.bzrignore4
-rw-r--r--.testr.conf4
-rw-r--r--NEWS61
-rw-r--r--README.txt4
-rw-r--r--__init__.py755
-rw-r--r--branch_mapper.py57
-rw-r--r--branch_updater.py27
-rw-r--r--bzr_commit_handler.py454
-rw-r--r--cache_manager.py272
-rw-r--r--cmds.py882
-rw-r--r--commands.py349
-rw-r--r--dates.py79
-rw-r--r--errors.py165
-rw-r--r--explorer/logos/cvs.pngbin0 -> 927 bytes
-rw-r--r--explorer/logos/darcs.pngbin0 -> 907 bytes
-rw-r--r--explorer/logos/git.pngbin0 -> 115 bytes
-rw-r--r--explorer/logos/mercurial.pngbin0 -> 792 bytes
-rw-r--r--explorer/logos/monotone.pngbin0 -> 685 bytes
-rw-r--r--explorer/logos/perforce.pngbin0 -> 924 bytes
-rw-r--r--explorer/logos/subversion.pngbin0 -> 1054 bytes
-rw-r--r--explorer/tools.xml20
-rw-r--r--[-rwxr-xr-x]exporter.py (renamed from bzr_exporter.py)222
-rw-r--r--exporters/Makefile7
-rw-r--r--exporters/__init__.py15
-rw-r--r--exporters/darcs/README21
-rw-r--r--exporters/darcs/TODO2
-rwxr-xr-xexporters/darcs/d2x2
-rwxr-xr-xexporters/darcs/darcs-fast-export634
-rw-r--r--exporters/darcs/darcs-fast-export.txt4
-rwxr-xr-xexporters/darcs/darcs-fast-import85
-rw-r--r--exporters/darcs/darcs-fast-import.txt22
-rwxr-xr-xexporters/darcs/git-darcs95
-rw-r--r--exporters/darcs/git-darcs.txt22
-rw-r--r--exporters/darcs/t/lib-httpd.sh67
-rw-r--r--exporters/darcs/t/lib.sh34
-rw-r--r--exporters/darcs/t/test-hg.sh2
-rw-r--r--exporters/darcs/t/test2-git-http.sh22
-rw-r--r--exporters/darcs/t/testimport-gitsymlink.sh45
-rw-r--r--exporters/darcs/t/testimport-hg.sh1
-rwxr-xr-xexporters/darcs/x2d8
-rw-r--r--exporters/darcs/x2d.txt4
-rw-r--r--exporters/svn-fast-export.README12
-rwxr-xr-xexporters/svn-fast-export.py14
-rw-r--r--helpers.py140
-rw-r--r--idmapfile.py65
-rw-r--r--marks_file.py17
-rw-r--r--parser.py557
-rw-r--r--processor.py253
-rw-r--r--processors/filter_processor.py288
-rw-r--r--processors/generic_processor.py122
-rw-r--r--processors/info_processor.py281
-rw-r--r--processors/query_processor.py77
-rw-r--r--revision_store.py235
-rwxr-xr-xsetup.py5
-rw-r--r--tests/__init__.py38
-rw-r--r--tests/test_branch_mapper.py46
-rw-r--r--tests/test_commands.py287
-rw-r--r--tests/test_errors.py73
-rw-r--r--tests/test_exporter.py62
-rw-r--r--tests/test_filter_processor.py877
-rw-r--r--tests/test_generic_processor.py1276
-rw-r--r--tests/test_head_tracking.py257
-rw-r--r--tests/test_helpers.py56
-rw-r--r--tests/test_parser.py212
-rw-r--r--tests/test_revision_store.py152
-rw-r--r--user_mapper.py81
66 files changed, 4356 insertions, 5574 deletions
diff --git a/.bzrignore b/.bzrignore
index 378eac2..c317774 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -1 +1,5 @@
build
+# executables
+exporters/svn-archive
+exporters/svn-fast-export
+.testrepository
diff --git a/.testr.conf b/.testr.conf
new file mode 100644
index 0000000..cc509ce
--- /dev/null
+++ b/.testr.conf
@@ -0,0 +1,4 @@
+[DEFAULT]
+test_command=BZR_PLUGINS_AT=fastimport@. bzr selftest "^bzrlib.plugins.fastimport" --subunit $IDOPTION $LISTOPT
+test_id_option=--load-list $IDFILE
+test_list_option=--list
diff --git a/NEWS b/NEWS
index d0028c3..7d2cb12 100644
--- a/NEWS
+++ b/NEWS
@@ -4,11 +4,32 @@ bzr-fastimport Release Notes
.. contents::
-In Development
-==============
+0.10 UNRELEASED
-Compatability Breaks
---------------------
+Changes
+-------
+
+* bzr-fastimport's file parsing and generation functionality has been exported into
+ separate upstream project called python-fastimport, that it now depends on.
+ python-fastimport can be retrieved from http://launchpad.net/python-fastimport.
+
+Bug fixes
+---------
+
+* Cope with non-ascii characters in tag names. (Jelmer Vernooij, #598060)
+
+* Cope with non-ascii characters in symbolic links. (Daniel Clemente,
+ Jelmer Vernooij, #238365)
+
+* In plain mode, don't export multiple authors. (David Kilzer, #496677)
+
+* Fix indentation when starting p4 fails. (Jelmer Vernooij)
+
+* SOURCE argument to bzr fast-import-filter is now optional, consistent with
+ examples. (Jelmer Vernooij, #477861)
+
+0.9 28-Feb-2010
+===============
New Features
------------
@@ -35,9 +56,30 @@ New Features
merged into this one for the purposes of ongoing bug fixing
and development. (Miklos Vajna)
+* fast-export now supports a --no-plain parameter which causes
+ richer metadata to be included in the output using the
+ recently agreed 'feature' extension capability. The features
+ used are called multiple-authors, commit-properties and
+ empty-directories. (Ian Clatworthy)
+
+* fast-import and fast-import-filter now support user mapping
+ via the new --user-map option. The argument is a file specifying
+ how user-ids should be mapped to preferred user-ids.
+ (Ian Clatworthy)
+
+* svn-fast-export now supports an address option (to control the
+ default email domain) and a first-rev option (to select revisions
+ since a given one). (Ted Gould)
+
Improvements
------------
+* Large repositories now compress better thanks to a change in
+ how file-ids are assigned. (Ian Clatworthy, John Arbash Meinel)
+
+* Memory usage is improved by flushing blobs to a disk cache
+ when appropriate. (John Arbash Meinel)
+
* If a fast-import source ends in ".gz", it is assumed to be in
gzip format and the stream is implicitly uncompressed. This
means fast-import dump files generated by fast-export-from-xxx
@@ -50,7 +92,7 @@ Improvements
* Directories that become empty following a delete or rename of
one of their children are now implicitly pruned. If required,
this will be made optional at a later date.
- (Ian Clatworthy)
+ (Tom Widmer, Ian Clatworthy)
* Blob tracking is now more intelligently done by an implicit
first pass to collect blob usage statistics. This eliminates
@@ -79,6 +121,9 @@ Bug Fixes
* Gracefully handle an empty input stream. (Gonéri Le Bouder)
+* Gracefully handle git submodules by ignoring them.
+ (Ian Clatworthy)
+
* Get git-bzr working again. (Gonéri Le Bouder)
Documentation
@@ -87,12 +132,6 @@ Documentation
* Improved documentation has been published in the Bazaar Data Migration
Guide: http://doc.bazaar-vcs.org/migration/en/data-migration/.
-Testing
--------
-
-Internals
----------
-
0.8 22-Jul-2009
===============
diff --git a/README.txt b/README.txt
index 30b0e95..122b0e6 100644
--- a/README.txt
+++ b/README.txt
@@ -8,7 +8,9 @@ Required and recommended packages are:
* Python 2.4 or later
-* Bazaar 1.1 or later.
+* Python-Fastimport 0.9.0 or later.
+
+* Bazaar 1.18 or later.
Installation
diff --git a/__init__.py b/__init__.py
index 8ba91fc..61e14c6 100644
--- a/__init__.py
+++ b/__init__.py
@@ -55,7 +55,7 @@ online help for the individual commands for details::
bzr help fast-export-from-darcs
bzr help fast-export-from-hg
bzr help fast-export-from-git
- bzr help fast-export-from-mnt
+ bzr help fast-export-from-mtn
bzr help fast-export-from-p4
bzr help fast-export-from-svn
@@ -79,11 +79,18 @@ To report bugs or publish enhancements, visit the bzr-fastimport project
page on Launchpad, https://launchpad.net/bzr-fastimport.
"""
-version_info = (0, 9, 0, 'dev', 0)
+version_info = (0, 10, 0, 'dev', 0)
-from bzrlib import bzrdir
-from bzrlib.commands import Command, register_command
-from bzrlib.option import Option, ListOption, RegistryOption
+from bzrlib.commands import plugin_cmds
+
+
+def load_fastimport():
+ """Load the fastimport module or raise an appropriate exception."""
+ try:
+ import fastimport
+ except ImportError, e:
+ from bzrlib.errors import DependencyNotPresent
+ raise DependencyNotPresent("fastimport", e)
def test_suite():
@@ -91,726 +98,18 @@ def test_suite():
return tests.test_suite()
-def _run(source, processor_factory, control, params, verbose):
- """Create and run a processor.
-
- :param source: a filename or '-' for standard input. If the
- filename ends in .gz, it will be opened as a gzip file and
- the stream will be implicitly uncompressed
- :param processor_factory: a callable for creating a processor
- :param control: the BzrDir of the destination or None if no
- destination is expected
- """
- import parser
- stream = _get_source_stream(source)
- proc = processor_factory(control, params=params, verbose=verbose)
- p = parser.ImportParser(stream, verbose=verbose)
- return proc.process(p.iter_commands)
-
-
-def _get_source_stream(source):
- if source == '-':
- import sys
- stream = helpers.binary_stream(sys.stdin)
- elif source.endswith('.gz'):
- import gzip
- stream = gzip.open(source, "rb")
- else:
- stream = open(source, "rb")
- return stream
-
-
-class cmd_fast_import(Command):
- """Backend for fast Bazaar data importers.
-
- This command reads a mixed command/data stream and creates
- branches in a Bazaar repository accordingly. The preferred
- recipe is::
-
- bzr fast-import project.fi project.bzr
-
- Numerous commands are provided for generating a fast-import file
- to use as input. These are named fast-export-from-xxx where xxx
- is one of cvs, darcs, git, hg, mnt, p4 or svn.
- To specify standard input as the input stream, use a
- source name of '-' (instead of project.fi). If the source name
- ends in '.gz', it is assumed to be compressed in gzip format.
-
- project.bzr will be created if it doesn't exist. If it exists
- already, it should be empty or be an existing Bazaar repository
- or branch. If not specified, the current directory is assumed.
-
- fast-import will intelligently select the format to use when
- creating a repository or branch. If you are running Bazaar 1.17
- up to Bazaar 2.0, the default format for Bazaar 2.x ("2a") is used.
- Otherwise, the current default format ("pack-0.92" for Bazaar 1.x)
- is used. If you wish to specify a custom format, use the `--format`
- option.
-
- .. note::
-
- To maintain backwards compatibility, fast-import lets you
- create the target repository or standalone branch yourself.
- It is recommended though that you let fast-import create
- these for you instead.
-
- :Branch mapping rules:
-
- Git reference names are mapped to Bazaar branch names as follows:
-
- * refs/heads/foo is mapped to foo
- * refs/remotes/origin/foo is mapped to foo.remote
- * refs/tags/foo is mapped to foo.tag
- * */master is mapped to trunk, trunk.remote, etc.
- * */trunk is mapped to git-trunk, git-trunk.remote, etc.
-
- :Branch creation rules:
-
- When a shared repository is created or found at the destination,
- branches are created inside it. In the simple case of a single
- branch (refs/heads/master) inside the input file, the branch is
- project.bzr/trunk.
-
- When a standalone branch is found at the destination, the trunk
- is imported there and warnings are output about any other branches
- found in the input file.
-
- When a branch in a shared repository is found at the destination,
- that branch is made the trunk and other branches, if any, are
- created in sister directories.
-
- :Working tree updates:
-
- The working tree is generated for the trunk branch. If multiple
- branches are created, a message is output on completion explaining
- how to create the working trees for other branches.
-
- :Custom exporters:
-
- The fast-export-from-xxx commands typically call more advanced
- xxx-fast-export scripts. You are welcome to use the advanced
- scripts if you prefer.
-
- If you wish to write a custom exporter for your project, see
- http://bazaar-vcs.org/BzrFastImport for the detailed protocol
- specification. In many cases, exporters can be written quite
- quickly using whatever scripting/programming language you like.
-
- :Blob tracking:
-
- As some exporters (like git-fast-export) reuse blob data across
- commits, fast-import makes two passes over the input file by
- default. In the first pass, it collects data about what blobs are
- used when, along with some other statistics (e.g. total number of
- commits). In the second pass, it generates the repository and
- branches.
-
- .. note::
-
- The initial pass isn't done if the --info option is used
- to explicitly pass in information about the input stream.
- It also isn't done if the source is standard input. In the
- latter case, memory consumption may be higher than otherwise
- because some blobs may be kept in memory longer than necessary.
-
- :Restarting an import:
-
- At checkpoints and on completion, the commit-id -> revision-id
- map is saved to a file called 'fastimport-id-map' in the control
- directory for the repository (e.g. .bzr/repository). If the import
- is interrupted or unexpectedly crashes, it can be started again
- and this file will be used to skip over already loaded revisions.
- As long as subsequent exports from the original source begin
- with exactly the same revisions, you can use this feature to
- maintain a mirror of a repository managed by a foreign tool.
- If and when Bazaar is used to manage the repository, this file
- can be safely deleted.
-
- :Examples:
-
- Import a Subversion repository into Bazaar::
-
- bzr fast-export-from-svn /svn/repo/path project.fi
- bzr fast-import project.fi project.bzr
-
- Import a CVS repository into Bazaar::
-
- bzr fast-export-from-cvs /cvs/repo/path project.fi
- bzr fast-import project.fi project.bzr
-
- Import a Git repository into Bazaar::
-
- bzr fast-export-from-git /git/repo/path project.fi
- bzr fast-import project.fi project.bzr
-
- Import a Mercurial repository into Bazaar::
-
- bzr fast-export-from-hg /hg/repo/path project.fi
- bzr fast-import project.fi project.bzr
-
- Import a Darcs repository into Bazaar::
-
- bzr fast-export-from-darcs /darcs/repo/path project.fi
- bzr fast-import project.fi project.bzr
- """
- hidden = False
- _see_also = ['fast-export', 'fast-import-filter', 'fast-import-info']
- takes_args = ['source', 'destination?']
- takes_options = ['verbose',
- Option('info', type=str,
- help="Path to file containing caching hints.",
- ),
- Option('trees',
- help="Update all working trees, not just trunk's.",
- ),
- Option('count', type=int,
- help="Import this many revisions then exit.",
- ),
- Option('checkpoint', type=int,
- help="Checkpoint automatically every N revisions."
- " The default is 10000.",
- ),
- Option('autopack', type=int,
- help="Pack every N checkpoints. The default is 4.",
- ),
- Option('inv-cache', type=int,
- help="Number of inventories to cache.",
- ),
- RegistryOption.from_kwargs('mode',
- 'The import algorithm to use.',
- title='Import Algorithm',
- default='Use the preferred algorithm (inventory deltas).',
- classic="Use the original algorithm (mutable inventories).",
- experimental="Enable experimental features.",
- value_switches=True, enum_switch=False,
- ),
- Option('import-marks', type=str,
- help="Import marks from file."
- ),
- Option('export-marks', type=str,
- help="Export marks to file."
- ),
- RegistryOption('format',
- help='Specify a format for the created repository. See'
- ' "bzr help formats" for details.',
- lazy_registry=('bzrlib.bzrdir', 'format_registry'),
- converter=lambda name: bzrdir.format_registry.make_bzrdir(name),
- value_switches=False, title='Repository format'),
- ]
- aliases = []
- def run(self, source, destination='.', verbose=False, info=None,
- trees=False, count=-1, checkpoint=10000, autopack=4, inv_cache=-1,
- mode=None, import_marks=None, export_marks=None, format=None):
- from bzrlib.errors import BzrCommandError, NotBranchError
- from bzrlib.plugins.fastimport.processors import generic_processor
- from bzrlib.plugins.fastimport.helpers import (
- open_destination_directory,
- )
- # If no format is given and the user is running a release
- # leading up to 2.0, select 2a for them. Otherwise, use
- # the default format.
- if format is None:
- import bzrlib
- bzr_version = bzrlib.version_info[0:2]
- if bzr_version in [(1,17), (1,18), (2,0)]:
- format = bzrdir.format_registry.make_bzrdir('2a')
- control = open_destination_directory(destination, format=format)
-
- # If an information file was given and the source isn't stdin,
- # generate the information by reading the source file as a first pass
- if info is None and source != '-':
- info = self._generate_info(source)
-
- # Do the work
- if mode is None:
- mode = 'default'
- params = {
- 'info': info,
- 'trees': trees,
- 'count': count,
- 'checkpoint': checkpoint,
- 'autopack': autopack,
- 'inv-cache': inv_cache,
- 'mode': mode,
- 'import-marks': import_marks,
- 'export-marks': export_marks,
- }
- return _run(source, generic_processor.GenericProcessor, control,
- params, verbose)
-
- def _generate_info(self, source):
- from cStringIO import StringIO
- import parser
- from bzrlib.plugins.fastimport.processors import info_processor
- stream = _get_source_stream(source)
- output = StringIO()
- try:
- proc = info_processor.InfoProcessor(verbose=True, outf=output)
- p = parser.ImportParser(stream)
- return_code = proc.process(p.iter_commands)
- lines = output.getvalue().splitlines()
- finally:
- output.close()
- stream.seek(0)
- return lines
-
-
-class cmd_fast_import_filter(Command):
- """Filter a fast-import stream to include/exclude files & directories.
-
- This command is useful for splitting a subdirectory or bunch of
- files out from a project to create a new project complete with history
- for just those files. It can also be used to create a new project
- repository that removes all references to files that should not have
- been committed, e.g. security-related information (like passwords),
- commercially sensitive material, files with an incompatible license or
- large binary files like CD images.
-
- When filtering out a subdirectory (or file), the new stream uses the
- subdirectory (or subdirectory containing the file) as the root. As
- fast-import doesn't know in advance whether a path is a file or
- directory in the stream, you need to specify a trailing '/' on
- directories passed to the `--includes option`. If multiple files or
- directories are given, the new root is the deepest common directory.
-
- To specify standard input as the input stream, use a source name
- of '-'. If the source name ends in '.gz', it is assumed to be
- compressed in gzip format.
-
- Note: If a path has been renamed, take care to specify the *original*
- path name, not the final name that it ends up with.
-
- :Examples:
-
- Create a new project from a library (note the trailing / on the
- directory name of the library)::
-
- front-end | bzr fast-import-filter -i lib/xxx/ > xxx.fi
- bzr fast-import xxx.fi mylibrary.bzr
- (lib/xxx/foo is now foo)
-
- Create a new repository without a sensitive file::
-
- front-end | bzr fast-import-filter -x missile-codes.txt > clean.fi
- bzr fast-import clean.fi clean.bzr
- """
- hidden = False
- _see_also = ['fast-import']
- takes_args = ['source']
- takes_options = ['verbose',
- ListOption('include_paths', short_name='i', type=str,
- help="Only include commits affecting these paths."
- " Directories should have a trailing /."
- ),
- ListOption('exclude_paths', short_name='x', type=str,
- help="Exclude these paths from commits."
- ),
- ]
- aliases = []
- encoding_type = 'exact'
- def run(self, source, verbose=False, include_paths=None,
- exclude_paths=None):
- from bzrlib.plugins.fastimport.processors import filter_processor
- params = {
- 'include_paths': include_paths,
- 'exclude_paths': exclude_paths,
- }
- return _run(source, filter_processor.FilterProcessor, None, params,
- verbose)
-
-
-class cmd_fast_import_info(Command):
- """Output information about a fast-import stream.
-
- This command reads a fast-import stream and outputs
- statistics and interesting properties about what it finds.
- When run in verbose mode, the information is output as a
- configuration file that can be passed to fast-import to
- assist it in intelligently caching objects.
-
- To specify standard input as the input stream, use a source name
- of '-'. If the source name ends in '.gz', it is assumed to be
- compressed in gzip format.
-
- :Examples:
-
- Display statistics about the import stream produced by front-end::
-
- front-end | bzr fast-import-info -
-
- Create a hints file for running fast-import on a large repository::
-
- front-end | bzr fast-import-info -v - > front-end.cfg
- """
- hidden = False
- _see_also = ['fast-import']
- takes_args = ['source']
- takes_options = ['verbose']
- aliases = []
- def run(self, source, verbose=False):
- from bzrlib.plugins.fastimport.processors import info_processor
- return _run(source, info_processor.InfoProcessor, None, {}, verbose)
-
-
-class cmd_fast_import_query(Command):
- """Query a fast-import stream displaying selected commands.
-
- To specify standard input as the input stream, use a source name
- of '-'. If the source name ends in '.gz', it is assumed to be
- compressed in gzip format.
-
- To specify the commands to display, use the -C option one or
- more times. To specify just some fields for a command, use the
- syntax::
-
- command=field1,...
-
- By default, the nominated fields for the nominated commands
- are displayed tab separated. To see the information in
- a name:value format, use verbose mode.
-
- Note: Binary fields (e.g. data for blobs) are masked out
- so it is generally safe to view the output in a terminal.
-
- :Examples:
-
- Show all the fields of the reset and tag commands::
-
- front-end > xxx.fi
- bzr fast-import-query xxx.fi -Creset -Ctag
-
- Show the mark and merge fields of the commit commands::
-
- bzr fast-import-query xxx.fi -Ccommit=mark,merge
- """
- hidden = True
- _see_also = ['fast-import', 'fast-import-filter']
- takes_args = ['source']
- takes_options = ['verbose',
- ListOption('commands', short_name='C', type=str,
- help="Display fields for these commands."
- ),
- ]
- aliases = []
- def run(self, source, verbose=False, commands=None):
- from bzrlib.plugins.fastimport.processors import query_processor
- from bzrlib.plugins.fastimport import helpers
- params = helpers.defines_to_dict(commands)
- return _run(source, query_processor.QueryProcessor, None, params,
- verbose)
-
-
-class cmd_fast_export(Command):
- """Generate a fast-import stream from a Bazaar branch.
-
- This program generates a stream from a bzr branch in the format
- required by git-fast-import(1). It preserves merges correctly,
- even merged branches with no common history (`bzr merge -r 0..-1`).
-
- If no destination is given or the destination is '-', standard output
- is used. Otherwise, the destination is the name of a file. If the
- destination ends in '.gz', the output will be compressed into gzip
- format.
-
- :Examples:
-
- To import several unmerged but related branches into the same repository,
- use the --{export,import}-marks options, and specify a name for the git
- branch like this::
-
- bzr fast-export --export-marks=marks.bzr project.dev |
- GIT_DIR=project/.git git-fast-import --export-marks=marks.git
-
- bzr fast-export --import-marks=marks.bzr -b other project.other |
- GIT_DIR=project/.git git-fast-import --import-marks=marks.git
-
- If you get a "Missing space after source" error from git-fast-import,
- see the top of the commands.py module for a work-around.
- """
- hidden = False
- _see_also = ['fast-import', 'fast-import-filter']
- takes_args = ['source', 'destination?']
- takes_options = ['verbose', 'revision',
- Option('git-branch', short_name='b', type=str,
- argname='FILE',
- help='Name of the git branch to create (default=master).'
- ),
- Option('checkpoint', type=int, argname='N',
- help="Checkpoint every N revisions (default=10000)."
- ),
- Option('marks', type=str, argname='FILE',
- help="Import marks from and export marks to file."
- ),
- Option('import-marks', type=str, argname='FILE',
- help="Import marks from file."
- ),
- Option('export-marks', type=str, argname='FILE',
- help="Export marks to file."
- ),
- ]
- aliases = []
- encoding_type = 'exact'
- def run(self, source, destination=None, verbose=False,
- git_branch="master", checkpoint=10000, marks=None,
- import_marks=None, export_marks=None, revision=None):
- from bzrlib.plugins.fastimport import bzr_exporter
-
- if marks:
- import_marks = export_marks = marks
- exporter = bzr_exporter.BzrFastExporter(source,
- destination=destination,
- git_branch=git_branch, checkpoint=checkpoint,
- import_marks_file=import_marks, export_marks_file=export_marks,
- revision=revision, verbose=verbose)
- return exporter.run()
-
-
-class cmd_fast_export_from_cvs(Command):
- """Generate a fast-import file from a CVS repository.
-
- Destination is a dump file, typically named xxx.fi where xxx is
- the name of the project. If '-' is given, standard output is used.
-
- cvs2svn 2.3 or later must be installed as its cvs2bzr script is used
- under the covers to do the export.
-
- The source must be the path on your filesystem to the part of the
- repository you wish to convert. i.e. either that path or a parent
- directory must contain a CVSROOT subdirectory. The path may point to
- either the top of a repository or to a path within it. In the latter
- case, only that project within the repository will be converted.
-
- .. note::
- Remote access to the repository is not sufficient - the path
- must point into a copy of the repository itself. See
- http://cvs2svn.tigris.org/faq.html#repoaccess for instructions
- on how to clone a remote CVS repository locally.
-
- By default, the trunk, branches and tags are all exported. If you
- only want the trunk, use the `--trunk-only` option.
-
- By default, filenames, log messages and author names are expected
- to be encoded in ascii. Use the `--encoding` option to specify an
- alternative. If multiple encodings are used, specify the option
- multiple times. For a list of valid encoding names, see
- http://docs.python.org/lib/standard-encodings.html.
-
- Windows users need to install GNU sort and use the `--sort`
- option to specify its location. GNU sort can be downloaded from
- http://unxutils.sourceforge.net/.
- """
- hidden = False
- _see_also = ['fast-import', 'fast-import-filter']
- takes_args = ['source', 'destination']
- takes_options = ['verbose',
- Option('trunk-only',
- help="Export just the trunk, ignoring tags and branches."
- ),
- ListOption('encoding', type=str, argname='CODEC',
- help="Encoding used for filenames, commit messages "
- "and author names if not ascii."
- ),
- Option('sort', type=str, argname='PATH',
- help="GNU sort program location if not on the path."
- ),
- ]
- aliases = []
- encoding_type = 'exact'
- def run(self, source, destination, verbose=False, trunk_only=False,
- encoding=None, sort=None):
- from bzrlib.plugins.fastimport.exporters import fast_export_from
- custom = []
- if trunk_only:
- custom.append("--trunk-only")
- if encoding:
- for enc in encoding:
- custom.extend(['--encoding', enc])
- if sort:
- custom.extend(['--sort', sort])
- fast_export_from(source, destination, 'cvs', verbose, custom)
-
-
-class cmd_fast_export_from_darcs(Command):
- """Generate a fast-import file from a Darcs repository.
-
- Destination is a dump file, typically named xxx.fi where xxx is
- the name of the project. If '-' is given, standard output is used.
-
- Darcs 2.2 or later must be installed as various subcommands are
- used to access the source repository. The source may be a network
- URL but using a local URL is recommended for performance reasons.
- """
- hidden = False
- _see_also = ['fast-import', 'fast-import-filter']
- takes_args = ['source', 'destination']
- takes_options = ['verbose',
- Option('encoding', type=str, argname='CODEC',
- help="Encoding used for commit messages if not utf-8."
- ),
- ]
- aliases = []
- encoding_type = 'exact'
- def run(self, source, destination, verbose=False, encoding=None):
- from bzrlib.plugins.fastimport.exporters import fast_export_from
- custom = None
- if encoding is not None:
- custom = ['--encoding', encoding]
- fast_export_from(source, destination, 'darcs', verbose, custom)
-
-
-class cmd_fast_export_from_hg(Command):
- """Generate a fast-import file from a Mercurial repository.
-
- Destination is a dump file, typically named xxx.fi where xxx is
- the name of the project. If '-' is given, standard output is used.
-
- Mercurial 1.2 or later must be installed as its libraries are used
- to access the source repository. Given the APIs currently used,
- the source repository must be a local file, not a network URL.
- """
- hidden = False
- _see_also = ['fast-import', 'fast-import-filter']
- takes_args = ['source', 'destination']
- takes_options = ['verbose']
- aliases = []
- encoding_type = 'exact'
- def run(self, source, destination, verbose=False):
- from bzrlib.plugins.fastimport.exporters import fast_export_from
- fast_export_from(source, destination, 'hg', verbose)
-
-
-class cmd_fast_export_from_git(Command):
- """Generate a fast-import file from a Git repository.
-
- Destination is a dump file, typically named xxx.fi where xxx is
- the name of the project. If '-' is given, standard output is used.
-
- Git 1.6 or later must be installed as the git fast-export
- subcommand is used under the covers to generate the stream.
- The source must be a local directory.
-
- .. note::
-
- Earlier versions of Git may also work fine but are
- likely to receive less active support if problems arise.
- """
- hidden = False
- _see_also = ['fast-import', 'fast-import-filter']
- takes_args = ['source', 'destination']
- takes_options = ['verbose']
- aliases = []
- encoding_type = 'exact'
- def run(self, source, destination, verbose=False):
- from bzrlib.plugins.fastimport.exporters import fast_export_from
- fast_export_from(source, destination, 'git', verbose)
-
-
-class cmd_fast_export_from_mnt(Command):
- """Generate a fast-import file from a Monotone repository.
-
- Destination is a dump file, typically named xxx.fi where xxx is
- the name of the project. If '-' is given, standard output is used.
-
- Monotone 0.43 or later must be installed as the mnt git_export
- subcommand is used under the covers to generate the stream.
- The source must be a local directory.
- """
- hidden = False
- _see_also = ['fast-import', 'fast-import-filter']
- takes_args = ['source', 'destination']
- takes_options = ['verbose']
- aliases = []
- encoding_type = 'exact'
- def run(self, source, destination, verbose=False):
- from bzrlib.plugins.fastimport.exporters import fast_export_from
- fast_export_from(source, destination, 'mnt', verbose)
-
-
-class cmd_fast_export_from_p4(Command):
- """Generate a fast-import file from a Perforce repository.
-
- Source is a Perforce depot path, e.g., //depot/project
-
- Destination is a dump file, typically named xxx.fi where xxx is
- the name of the project. If '-' is given, standard output is used.
-
- bzrp4 must be installed as its p4_fast_export.py module is used under
- the covers to do the export. bzrp4 can be downloaded from
- https://launchpad.net/bzrp4/.
-
- The P4PORT environment variable must be set, and you must be logged
- into the Perforce server.
-
- By default, only the HEAD changelist is exported. To export all
- changelists, append '@all' to the source. To export a revision range,
- append a comma-delimited pair of changelist numbers to the source,
- e.g., '100,200'.
- """
- hidden = False
- _see_also = ['fast-import', 'fast-import-filter']
- takes_args = ['source', 'destination']
- takes_options = []
- aliases = []
- encoding_type = 'exact'
- def run(self, source, destination, verbose=False):
- from bzrlib.plugins.fastimport.exporters import fast_export_from
- custom = []
- fast_export_from(source, destination, 'p4', verbose, custom)
-
-
-class cmd_fast_export_from_svn(Command):
- """Generate a fast-import file from a Subversion repository.
-
- Destination is a dump file, typically named xxx.fi where xxx is
- the name of the project. If '-' is given, standard output is used.
-
- Python-Subversion (Python bindings to the Subversion APIs)
- 1.4 or later must be installed as this library is used to
- access the source repository. The source may be a network URL
- but using a local URL is recommended for performance reasons.
- """
- hidden = False
- _see_also = ['fast-import', 'fast-import-filter']
- takes_args = ['source', 'destination']
- takes_options = ['verbose',
- Option('trunk-path', type=str, argname="STR",
- help="Path in repo to /trunk.\n"
- "May be `regex:/cvs/(trunk)/proj1/(.*)` in "
- "which case the first group is used as the "
- "branch name and the second group is used "
- "to match files.",
- ),
- Option('branches-path', type=str, argname="STR",
- help="Path in repo to /branches."
- ),
- Option('tags-path', type=str, argname="STR",
- help="Path in repo to /tags."
- ),
- ]
- aliases = []
- encoding_type = 'exact'
- def run(self, source, destination, verbose=False, trunk_path=None,
- branches_path=None, tags_path=None):
- from bzrlib.plugins.fastimport.exporters import fast_export_from
- custom = []
- if trunk_path is not None:
- custom.extend(['--trunk-path', trunk_path])
- if branches_path is not None:
- custom.extend(['--branches-path', branches_path])
- if tags_path is not None:
- custom.extend(['--tags-path', tags_path])
- fast_export_from(source, destination, 'svn', verbose, custom)
-
-
-register_command(cmd_fast_import)
-register_command(cmd_fast_import_filter)
-register_command(cmd_fast_import_info)
-register_command(cmd_fast_import_query)
-register_command(cmd_fast_export)
-register_command(cmd_fast_export_from_cvs)
-register_command(cmd_fast_export_from_darcs)
-register_command(cmd_fast_export_from_hg)
-register_command(cmd_fast_export_from_git)
-register_command(cmd_fast_export_from_mnt)
-register_command(cmd_fast_export_from_p4)
-register_command(cmd_fast_export_from_svn)
+for name in [
+ "fast_import",
+ "fast_import_filter",
+ "fast_import_info",
+ "fast_import_query",
+ "fast_export",
+ "fast_export_from_cvs",
+ "fast_export_from_darcs",
+ "fast_export_from_hg",
+ "fast_export_from_git",
+ "fast_export_from_mtn",
+ "fast_export_from_p4",
+ "fast_export_from_svn"
+ ]:
+ plugin_cmds.register_lazy("cmd_%s" % name, [], "bzrlib.plugins.fastimport.cmds")
diff --git a/branch_mapper.py b/branch_mapper.py
index f6d0670..acc37c9 100644
--- a/branch_mapper.py
+++ b/branch_mapper.py
@@ -14,46 +14,45 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""An object that maps bzr branch names <-> git ref names."""
+"""An object that maps git ref names to bzr branch names. Note that it is not
+used to map git ref names to bzr tag names."""
+
+
+import re
class BranchMapper(object):
+ _GIT_TRUNK_RE = re.compile('(?:git-)*trunk')
- def git_to_bzr(self, ref_names):
- """Get the mapping from git reference names to Bazaar branch names.
-
- :return: a dictionary with git reference names as keys and
- the Bazaar branch names as values.
+ def git_to_bzr(self, ref_name):
+ """Map a git reference name to a Bazaar branch name.
"""
- bazaar_names = {}
- for ref_name in sorted(ref_names):
- parts = ref_name.split('/')
- if parts[0] == 'refs':
+ parts = ref_name.split('/')
+ if parts[0] == 'refs':
+ parts.pop(0)
+ category = parts.pop(0)
+ if category == 'heads':
+ git_name = '/'.join(parts)
+ bazaar_name = self._git_to_bzr_name(git_name)
+ else:
+ if category == 'remotes' and parts[0] == 'origin':
parts.pop(0)
- category = parts.pop(0)
- if category == 'heads':
- bazaar_name = self._git_to_bzr_name(parts[-1])
- else:
- if category.endswith('s'):
- category = category[:-1]
- name_no_ext = self._git_to_bzr_name(parts[-1])
- bazaar_name = "%s.%s" % (name_no_ext, category)
- bazaar_names[ref_name] = bazaar_name
- return bazaar_names
+ git_name = '/'.join(parts)
+ if category.endswith('s'):
+ category = category[:-1]
+ name_no_ext = self._git_to_bzr_name(git_name)
+ bazaar_name = "%s.%s" % (name_no_ext, category)
+ return bazaar_name
def _git_to_bzr_name(self, git_name):
+ # Make a simple name more bzr-like, by mapping git 'master' to bzr 'trunk'.
+ # To avoid collision, map git 'trunk' to bzr 'git-trunk'. Likewise
+ # 'git-trunk' to 'git-git-trunk' and so on, such that the mapping is
+ # one-to-one in both directions.
if git_name == 'master':
bazaar_name = 'trunk'
- elif git_name.endswith('trunk'):
+ elif self._GIT_TRUNK_RE.match(git_name):
bazaar_name = 'git-%s' % (git_name,)
else:
bazaar_name = git_name
return bazaar_name
-
- def bzr_to_git(self, branch_names):
- """Get the mapping from Bazaar branch names to git reference names.
-
- :return: a dictionary with Bazaar branch names as keys and
- the git reference names as values.
- """
- raise NotImplementedError(self.bzr_to_git)
diff --git a/branch_updater.py b/branch_updater.py
index b97f887..039171f 100644
--- a/branch_updater.py
+++ b/branch_updater.py
@@ -18,11 +18,12 @@
from operator import itemgetter
-from bzrlib import bzrdir, errors, osutils
+from bzrlib import bzrdir, errors, osutils, transport
from bzrlib.trace import error, note
-import branch_mapper
-import helpers
+from bzrlib.plugins.fastimport.helpers import (
+ best_format_for_objects_in_a_repository,
+ )
class BranchUpdater(object):
@@ -40,9 +41,8 @@ class BranchUpdater(object):
self.heads_by_ref = heads_by_ref
self.last_ref = last_ref
self.tags = tags
- self.name_mapper = branch_mapper.BranchMapper()
self._branch_format = \
- helpers.best_format_for_objects_in_a_repository(repo)
+ best_format_for_objects_in_a_repository(repo)
def update(self):
"""Update the Bazaar branches and tips matching the heads.
@@ -84,7 +84,9 @@ class BranchUpdater(object):
# Convert the reference names into Bazaar speak. If we haven't
# already put the 'trunk' first, do it now.
- git_to_bzr_map = self.name_mapper.git_to_bzr(ref_names)
+ git_to_bzr_map = {}
+ for ref_name in ref_names:
+ git_to_bzr_map[ref_name] = self.cache_mgr.branch_mapper.git_to_bzr(ref_name)
if ref_names and self.branch is None:
trunk = self.select_trunk(ref_names)
git_bzr_items = [(trunk, git_to_bzr_map[trunk])]
@@ -134,17 +136,21 @@ class BranchUpdater(object):
def make_branch(self, location):
"""Make a branch in the repository if not already there."""
+ to_transport = transport.get_transport(location)
+ to_transport.create_prefix()
try:
return bzrdir.BzrDir.open(location).open_branch()
except errors.NotBranchError, ex:
return bzrdir.BzrDir.create_branch_convenience(location,
- format=self._branch_format)
+ format=self._branch_format,
+ possible_transports=[to_transport])
def _update_branch(self, br, last_mark):
"""Update a branch with last revision and tag information.
:return: whether the branch was changed or not
"""
+ from fastimport.helpers import single_plural
last_rev_id = self.cache_mgr.revision_ids[last_mark]
revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
revno = len(revs)
@@ -156,8 +162,9 @@ class BranchUpdater(object):
# apply tags known in this branch
my_tags = {}
if self.tags:
+ ancestry = self.repo.get_ancestry(last_rev_id)
for tag,rev in self.tags.items():
- if rev in revs:
+ if rev in ancestry:
my_tags[tag] = rev
if my_tags:
br.tags._set_tag_dict(my_tags)
@@ -165,6 +172,6 @@ class BranchUpdater(object):
if changed:
tagno = len(my_tags)
note("\t branch %s now has %d %s and %d %s", br.nick,
- revno, helpers.single_plural(revno, "revision", "revisions"),
- tagno, helpers.single_plural(tagno, "tag", "tags"))
+ revno, single_plural(revno, "revision", "revisions"),
+ tagno, single_plural(tagno, "tag", "tags"))
return changed
diff --git a/bzr_commit_handler.py b/bzr_commit_handler.py
index 5652251..c47a39d 100644
--- a/bzr_commit_handler.py
+++ b/bzr_commit_handler.py
@@ -18,6 +18,7 @@
from bzrlib import (
+ debug,
errors,
generate_ids,
inventory,
@@ -25,13 +26,24 @@ from bzrlib import (
revision,
serializer,
)
-from bzrlib.plugins.fastimport import helpers, processor
+from bzrlib.trace import (
+ mutter,
+ note,
+ warning,
+ )
+from fastimport import (
+ helpers,
+ processor,
+ )
+
+from bzrlib.plugins.fastimport.helpers import (
+ mode_to_kind,
+ )
_serializer_handles_escaping = hasattr(serializer.Serializer,
'squashes_xml_invalid_characters')
-
def copy_inventory(inv):
# This currently breaks revision-id matching
#if hasattr(inv, "_get_mutable_inventory"):
@@ -53,17 +65,51 @@ class GenericCommitHandler(processor.CommitHandler):
self.verbose = verbose
self.branch_ref = command.ref
self.prune_empty_dirs = prune_empty_dirs
+ # This tracks path->file-id for things we're creating this commit.
+ # If the same path is created multiple times, we need to warn the
+ # user and add it just once.
+ # If a path is added then renamed or copied, we need to handle that.
+ self._new_file_ids = {}
+ # This tracks path->file-id for things we're modifying this commit.
+ # If a path is modified then renamed or copied, we need the make
+ # sure we grab the new content.
+ self._modified_file_ids = {}
+ # This tracks the paths for things we're deleting this commit.
+ # If the same path is added or the destination of a rename say,
+ # then a fresh file-id is required.
+ self._paths_deleted_this_commit = set()
+
+ def mutter(self, msg, *args):
+ """Output a mutter but add context."""
+ msg = "%s (%s)" % (msg, self.command.id)
+ mutter(msg, *args)
+
+ def debug(self, msg, *args):
+ """Output a mutter if the appropriate -D option was given."""
+ if "fast-import" in debug.debug_flags:
+ msg = "%s (%s)" % (msg, self.command.id)
+ mutter(msg, *args)
+
+ def note(self, msg, *args):
+ """Output a note but add context."""
+ msg = "%s (%s)" % (msg, self.command.id)
+ note(msg, *args)
+
+ def warning(self, msg, *args):
+ """Output a warning but add context."""
+ msg = "%s (%s)" % (msg, self.command.id)
+ warning(msg, *args)
def pre_process_files(self):
"""Prepare for committing."""
self.revision_id = self.gen_revision_id()
# cache of texts for this commit, indexed by file-id
- self.lines_for_commit = {}
+ self.data_for_commit = {}
#if self.rev_store.expects_rich_root():
- self.lines_for_commit[inventory.ROOT_ID] = []
+ self.data_for_commit[inventory.ROOT_ID] = []
# Track the heads and get the real parent list
- parents = self.cache_mgr.track_heads(self.command)
+ parents = self.cache_mgr.reftracker.track_heads(self.command)
# Convert the parent commit-ids to bzr revision-ids
if parents:
@@ -76,9 +122,9 @@ class GenericCommitHandler(processor.CommitHandler):
# Tell the RevisionStore we're starting a new commit
self.revision = self.build_revision()
- parent_invs = [self.get_inventory(p) for p in self.parents]
+ self.parent_invs = [self.get_inventory(p) for p in self.parents]
self.rev_store.start_new_revision(self.revision, self.parents,
- parent_invs)
+ self.parent_invs)
# cache of per-file parents for this commit, indexed by file-id
self.per_file_parents_for_commit = {}
@@ -113,9 +159,13 @@ class GenericCommitHandler(processor.CommitHandler):
self.cache_mgr.inventories[revision_id] = inv
return inv
+ def _get_data(self, file_id):
+ """Get the data bytes for a file-id."""
+ return self.data_for_commit[file_id]
+
def _get_lines(self, file_id):
"""Get the lines for a file-id."""
- return self.lines_for_commit[file_id]
+ return osutils.split_lines(self._get_data(file_id))
def _get_per_file_parents(self, file_id):
"""Get the lines for a file-id."""
@@ -154,19 +204,31 @@ class GenericCommitHandler(processor.CommitHandler):
:return: file_id, is_new where
is_new = True if the file_id is newly created
"""
- try:
- id = self.cache_mgr.fetch_file_id(self.branch_ref, path)
- return id, False
- except KeyError:
- # Not in the cache, try the inventory
+ if path not in self._paths_deleted_this_commit:
+ # Try file-ids renamed in this commit
+ id = self._modified_file_ids.get(path)
+ if id is not None:
+ return id, False
+
+ # Try the basis inventory
id = self.basis_inventory.path2id(path)
- if id is None:
- # Doesn't exist yet so create it
- id = generate_ids.gen_file_id(path)
- self.debug("Generated new file id %s for '%s' in '%s'",
- id, path, self.branch_ref)
- self.cache_mgr.store_file_id(self.branch_ref, path, id)
- return id, True
+ if id is not None:
+ return id, False
+
+ # Try the other inventories
+ if len(self.parents) > 1:
+ for inv in self.parent_invs[1:]:
+ id = self.basis_inventory.path2id(path)
+ if id is not None:
+ return id, False
+
+ # Doesn't exist yet so create it
+ dirname, basename = osutils.split(path)
+ id = generate_ids.gen_file_id(basename)
+ self.debug("Generated new file id %s for '%s' in revision-id '%s'",
+ id, path, self.revision_id)
+ self._new_file_ids[path] = id
+ return id, True
def bzr_file_id(self, path):
"""Get a Bazaar file identifier for a path."""
@@ -192,14 +254,13 @@ class GenericCommitHandler(processor.CommitHandler):
return generate_ids.gen_revision_id(who, timestamp)
def build_revision(self):
- rev_props = {}
+ rev_props = self._legal_revision_properties(self.command.properties)
+ if 'branch-nick' not in rev_props:
+ rev_props['branch-nick'] = self.cache_mgr.branch_mapper.git_to_bzr(
+ self.branch_ref)
+ self._save_author_info(rev_props)
committer = self.command.committer
who = self._format_name_email(committer[0], committer[1])
- author = self.command.author
- if author is not None:
- author_id = self._format_name_email(author[0], author[1])
- if author_id != who:
- rev_props['author'] = author_id
message = self.command.message
if not _serializer_handles_escaping:
# We need to assume the bad ol' days
@@ -213,8 +274,50 @@ class GenericCommitHandler(processor.CommitHandler):
properties=rev_props,
parent_ids=self.parents)
+ def _legal_revision_properties(self, props):
+ """Clean-up any revision properties we can't handle."""
+ # For now, we just check for None because that's not allowed in 2.0rc1
+ result = {}
+ if props is not None:
+ for name, value in props.items():
+ if value is None:
+ self.warning(
+ "converting None to empty string for property %s"
+ % (name,))
+ result[name] = ''
+ else:
+ result[name] = value
+ return result
+
+ def _save_author_info(self, rev_props):
+ author = self.command.author
+ if author is None:
+ return
+ if self.command.more_authors:
+ authors = [author] + self.command.more_authors
+ author_ids = [self._format_name_email(a[0], a[1]) for a in authors]
+ elif author != self.command.committer:
+ author_ids = [self._format_name_email(author[0], author[1])]
+ else:
+ return
+ # If we reach here, there are authors worth storing
+ rev_props['authors'] = "\n".join(author_ids)
+
def _modify_item(self, path, kind, is_executable, data, inv):
"""Add to or change an item in the inventory."""
+ # If we've already added this, warn the user that we're ignoring it.
+ # In the future, it might be nice to double check that the new data
+ # is the same as the old but, frankly, exporters should be fixed
+ # not to produce bad data streams in the first place ...
+ existing = self._new_file_ids.get(path)
+ if existing:
+ # We don't warn about directories because it's fine for them
+ # to be created already by a previous rename
+ if kind != 'directory':
+ self.warning("%s already added in this commit - ignoring" %
+ (path,))
+ return
+
# Create the new InventoryEntry
basename, parent_id = self._ensure_directory(path, inv)
file_id = self.bzr_file_id(path)
@@ -222,18 +325,24 @@ class GenericCommitHandler(processor.CommitHandler):
ie.revision = self.revision_id
if kind == 'file':
ie.executable = is_executable
- lines = osutils.split_lines(data)
- ie.text_sha1 = osutils.sha_strings(lines)
- ie.text_size = sum(map(len, lines))
- self.lines_for_commit[file_id] = lines
+ # lines = osutils.split_lines(data)
+ ie.text_sha1 = osutils.sha_string(data)
+ ie.text_size = len(data)
+ self.data_for_commit[file_id] = data
+ elif kind == 'directory':
+ self.directory_entries[path] = ie
+ # There are no lines stored for a directory so
+ # make sure the cache used by get_lines knows that
+ self.data_for_commit[file_id] = ''
elif kind == 'symlink':
- ie.symlink_target = data.encode('utf8')
+ ie.symlink_target = data.decode('utf8')
# There are no lines stored for a symlink so
# make sure the cache used by get_lines knows that
- self.lines_for_commit[file_id] = []
+ self.data_for_commit[file_id] = ''
else:
- raise errors.BzrError("Cannot import items of kind '%s' yet" %
- (kind,))
+ self.warning("Cannot import items of kind '%s' yet - ignoring '%s'"
+ % (kind, path))
+ return
# Record it
if file_id in inv:
old_ie = inv[file_id]
@@ -273,7 +382,7 @@ class GenericCommitHandler(processor.CommitHandler):
self.directory_entries[dirname] = ie
# There are no lines stored for a directory so
# make sure the cache used by get_lines knows that
- self.lines_for_commit[dir_file_id] = []
+ self.data_for_commit[dir_file_id] = ''
# It's possible that a file or symlink with that file-id
# already exists. If it does, we need to delete it.
@@ -289,6 +398,8 @@ class GenericCommitHandler(processor.CommitHandler):
"""
result = self.directory_entries.get(dirname)
if result is None:
+ if dirname in self._paths_deleted_this_commit:
+ raise KeyError
try:
file_id = inv.path2id(dirname)
except errors.NoSuchId:
@@ -305,39 +416,61 @@ class GenericCommitHandler(processor.CommitHandler):
return result
def _delete_item(self, path, inv):
- file_id = inv.path2id(path)
- if file_id is None:
- self.mutter("ignoring delete of %s as not in inventory", path)
- return
- try:
- ie = inv[file_id]
- except errors.NoSuchId:
- self.mutter("ignoring delete of %s as not in inventory", path)
+ newly_added = self._new_file_ids.get(path)
+ if newly_added:
+ # We've only just added this path earlier in this commit.
+ file_id = newly_added
+ # note: delta entries look like (old, new, file-id, ie)
+ ie = self._delta_entries_by_fileid[file_id][3]
else:
- self.record_delete(path, ie)
+ file_id = inv.path2id(path)
+ if file_id is None:
+ self.mutter("ignoring delete of %s as not in inventory", path)
+ return
+ try:
+ ie = inv[file_id]
+ except errors.NoSuchId:
+ self.mutter("ignoring delete of %s as not in inventory", path)
+ return
+ self.record_delete(path, ie)
def _copy_item(self, src_path, dest_path, inv):
- if not self.parents:
- self.warning("ignoring copy of %s to %s - no parent revisions",
- src_path, dest_path)
- return
- file_id = inv.path2id(src_path)
- if file_id is None:
- self.warning("ignoring copy of %s to %s - source does not exist",
- src_path, dest_path)
- return
- ie = inv[file_id]
+ newly_changed = self._new_file_ids.get(src_path) or \
+ self._modified_file_ids.get(src_path)
+ if newly_changed:
+ # We've only just added/changed this path earlier in this commit.
+ file_id = newly_changed
+ # note: delta entries look like (old, new, file-id, ie)
+ ie = self._delta_entries_by_fileid[file_id][3]
+ else:
+ file_id = inv.path2id(src_path)
+ if file_id is None:
+ self.warning("ignoring copy of %s to %s - source does not exist",
+ src_path, dest_path)
+ return
+ ie = inv[file_id]
kind = ie.kind
if kind == 'file':
- content = self.rev_store.get_file_text(self.parents[0], file_id)
+ if newly_changed:
+ content = self.data_for_commit[file_id]
+ else:
+ content = self.rev_store.get_file_text(self.parents[0], file_id)
self._modify_item(dest_path, kind, ie.executable, content, inv)
elif kind == 'symlink':
- self._modify_item(dest_path, kind, False, ie.symlink_target, inv)
+ self._modify_item(dest_path, kind, False, ie.symlink_target.encode("utf-8"), inv)
else:
self.warning("ignoring copy of %s %s - feature not yet supported",
- kind, path)
+ kind, dest_path)
def _rename_item(self, old_path, new_path, inv):
+ existing = self._new_file_ids.get(old_path) or \
+ self._modified_file_ids.get(old_path)
+ if existing:
+ # We've only just added/modified this path earlier in this commit.
+ # Change the add/modify of old_path to an add of new_path
+ self._rename_pending_change(old_path, new_path, existing)
+ return
+
file_id = inv.path2id(old_path)
if file_id is None:
self.warning(
@@ -350,13 +483,12 @@ class GenericCommitHandler(processor.CommitHandler):
if new_file_id is not None:
self.record_delete(new_path, inv[new_file_id])
self.record_rename(old_path, new_path, file_id, ie)
- self.cache_mgr.rename_path(self.branch_ref, old_path, new_path)
# The revision-id for this entry will be/has been updated and
# that means the loader then needs to know what the "new" text is.
# We therefore must go back to the revision store to get it.
lines = self.rev_store.get_file_lines(rev_id, file_id)
- self.lines_for_commit[file_id] = lines
+ self.data_for_commit[file_id] = ''.join(lines)
def _delete_all_items(self, inv):
for name, root_item in inv.root.children.iteritems():
@@ -404,7 +536,7 @@ class InventoryCommitHandler(GenericCommitHandler):
"""Save the revision."""
self.cache_mgr.inventories[self.revision_id] = self.inventory
self.rev_store.load(self.revision, self.inventory, None,
- lambda file_id: self._get_lines(file_id),
+ lambda file_id: self._get_data(file_id),
lambda file_id: self._get_per_file_parents(file_id),
lambda revision_ids: self._get_inventories(revision_ids))
@@ -446,59 +578,15 @@ class InventoryCommitHandler(GenericCommitHandler):
self.inventory)
self.inventory.rename(file_id, new_parent_id, new_basename)
- def _delete_item(self, path, inv):
- # NOTE: I'm retaining this method for now, instead of using the
- # one in the superclass, because it's taken quite a lot of tweaking
- # to cover all the edge cases seen in the wild. Long term, it can
- # probably go once the higher level method does "warn_unless_in_merges"
- # and handles all the various special cases ...
- fileid = self.bzr_file_id(path)
- dirname, basename = osutils.split(path)
- if (fileid in inv and
- isinstance(inv[fileid], inventory.InventoryDirectory)):
- for child_path in inv[fileid].children.keys():
- self._delete_item(osutils.pathjoin(path, child_path), inv)
- # We need to clean this out of the directory entries as well
- try:
- del self.directory_entries[path]
- except KeyError:
- pass
- try:
- if self.inventory.id2path(fileid) == path:
- del inv[fileid]
- else:
- # already added by some other name?
- try:
- parent_id = self.cache_mgr.fetch_file_id(self.branch_ref,
- dirname)
- except KeyError:
- pass
- else:
- del inv[parent_id].children[basename]
- except KeyError:
- self._warn_unless_in_merges(fileid, path)
- except errors.NoSuchId:
- self._warn_unless_in_merges(fileid, path)
- except AttributeError, ex:
- if ex.args[0] == 'children':
- # A directory has changed into a file and then one
- # of it's children is being deleted!
- self._warn_unless_in_merges(fileid, path)
- else:
- raise
- try:
- self.cache_mgr.delete_path(self.branch_ref, path)
- except KeyError:
- pass
-
def modify_handler(self, filecmd):
if filecmd.dataref is not None:
data = self.cache_mgr.fetch_blob(filecmd.dataref)
else:
data = filecmd.data
self.debug("modifying %s", filecmd.path)
- self._modify_item(filecmd.path, filecmd.kind,
- filecmd.is_executable, data, self.inventory)
+ (kind, is_executable) = mode_to_kind(filecmd.mode)
+ self._modify_item(filecmd.path, kind,
+ is_executable, data, self.inventory)
def delete_handler(self, filecmd):
self.debug("deleting %s", filecmd.path)
@@ -548,9 +636,9 @@ class InventoryDeltaCommitHandler(GenericCommitHandler):
delta = self._get_final_delta()
inv = self.rev_store.load_using_delta(self.revision,
self.basis_inventory, delta, None,
- lambda file_id: self._get_lines(file_id),
- lambda file_id: self._get_per_file_parents(file_id),
- lambda revision_ids: self._get_inventories(revision_ids))
+ self._get_data,
+ self._get_per_file_parents,
+ self._get_inventories)
self.cache_mgr.inventories[self.revision_id] = inv
#print "committed %s" % self.revision_id
@@ -562,44 +650,63 @@ class InventoryDeltaCommitHandler(GenericCommitHandler):
"""
delta = list(self._delta_entries_by_fileid.values())
if self.prune_empty_dirs and self._dirs_that_might_become_empty:
- candidates = osutils.minimum_path_selection(
- self._dirs_that_might_become_empty)
- for path, file_id in self._empty_after_delta(delta, candidates):
- delta.append((path, None, file_id, None))
- #print "delta:\n%s\n\n" % "\n".join([str(de) for de in delta])
+ candidates = self._dirs_that_might_become_empty
+ while candidates:
+ never_born = set()
+ parent_dirs_that_might_become_empty = set()
+ for path, file_id in self._empty_after_delta(delta, candidates):
+ newly_added = self._new_file_ids.get(path)
+ if newly_added:
+ never_born.add(newly_added)
+ else:
+ delta.append((path, None, file_id, None))
+ parent_dir = osutils.dirname(path)
+ if parent_dir:
+ parent_dirs_that_might_become_empty.add(parent_dir)
+ candidates = parent_dirs_that_might_become_empty
+ # Clean up entries that got deleted before they were ever added
+ if never_born:
+ delta = [de for de in delta if de[2] not in never_born]
return delta
def _empty_after_delta(self, delta, candidates):
- new_inv = self.basis_inventory._get_mutable_inventory()
- new_inv.apply_delta(delta)
+ #self.mutter("delta so far is:\n%s" % "\n".join([str(de) for de in delta]))
+ #self.mutter("candidates for deletion are:\n%s" % "\n".join([c for c in candidates]))
+ new_inv = self._get_proposed_inventory(delta)
result = []
for dir in candidates:
file_id = new_inv.path2id(dir)
if file_id is None:
continue
ie = new_inv[file_id]
+ if ie.kind != 'directory':
+ continue
if len(ie.children) == 0:
result.append((dir, file_id))
if self.verbose:
self.note("pruning empty directory %s" % (dir,))
- # Check parents in case deleting this dir makes *them* empty
- while True:
- file_id = ie.parent_id
- if file_id == inventory.ROOT_ID:
- # We've reach the root
- break
- try:
- ie = new_inv[file_id]
- except errors.NoSuchId:
- break
- if len(ie.children) > 1:
- break
- dir = new_inv.id2path(file_id)
- result.append((dir, file_id))
- if self.verbose:
- self.note("pruning empty directory parent %s" % (dir,))
return result
+ def _get_proposed_inventory(self, delta):
+ if len(self.parents):
+ # new_inv = self.basis_inventory._get_mutable_inventory()
+ # Note that this will create unreferenced chk pages if we end up
+ # deleting entries, because this 'test' inventory won't end up
+ # used. However, it is cheaper than having to create a full copy of
+ # the inventory for every commit.
+ new_inv = self.basis_inventory.create_by_apply_delta(delta,
+ 'not-a-valid-revision-id:')
+ else:
+ new_inv = inventory.Inventory(revision_id=self.revision_id)
+ # This is set in the delta so remove it to prevent a duplicate
+ del new_inv[inventory.ROOT_ID]
+ try:
+ new_inv.apply_delta(delta)
+ except errors.InconsistentDelta:
+ self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta]))
+ raise
+ return new_inv
+
def _add_entry(self, entry):
# We need to combine the data if multiple entries have the same file-id.
# For example, a rename followed by a modification looks like:
@@ -626,9 +733,18 @@ class InventoryDeltaCommitHandler(GenericCommitHandler):
if existing is not None:
old_path = existing[0]
entry = (old_path, new_path, file_id, ie)
- self._delta_entries_by_fileid[file_id] = entry
+ if new_path is None and old_path is None:
+ # This is a delete cancelling a previous add
+ del self._delta_entries_by_fileid[file_id]
+ parent_dir = osutils.dirname(existing[1])
+ self.mutter("cancelling add of %s with parent %s" % (existing[1], parent_dir))
+ if parent_dir:
+ self._dirs_that_might_become_empty.add(parent_dir)
+ return
+ else:
+ self._delta_entries_by_fileid[file_id] = entry
- # Collect parent direcctories that might become empty
+ # Collect parent directories that might become empty
if new_path is None:
# delete
parent_dir = osutils.dirname(old_path)
@@ -672,14 +788,26 @@ class InventoryDeltaCommitHandler(GenericCommitHandler):
def record_changed(self, path, ie, parent_id=None):
self._add_entry((path, path, ie.file_id, ie))
+ self._modified_file_ids[path] = ie.file_id
def record_delete(self, path, ie):
self._add_entry((path, None, ie.file_id, None))
+ self._paths_deleted_this_commit.add(path)
if ie.kind == 'directory':
+ try:
+ del self.directory_entries[path]
+ except KeyError:
+ pass
for child_relpath, entry in \
self.basis_inventory.iter_entries_by_dir(from_dir=ie):
child_path = osutils.pathjoin(path, child_relpath)
self._add_entry((child_path, None, entry.file_id, None))
+ self._paths_deleted_this_commit.add(child_path)
+ if entry.kind == 'directory':
+ try:
+ del self.directory_entries[child_path]
+ except KeyError:
+ pass
def record_rename(self, old_path, new_path, file_id, old_ie):
new_ie = old_ie.copy()
@@ -689,29 +817,71 @@ class InventoryDeltaCommitHandler(GenericCommitHandler):
new_ie.parent_id = new_parent_id
new_ie.revision = self.revision_id
self._add_entry((old_path, new_path, file_id, new_ie))
+ self._modified_file_ids[new_path] = file_id
+ self._paths_deleted_this_commit.discard(new_path)
+ if new_ie.kind == 'directory':
+ self.directory_entries[new_path] = new_ie
+
+ def _rename_pending_change(self, old_path, new_path, file_id):
+ """Instead of adding/modifying old-path, add new-path instead."""
+ # note: delta entries look like (old, new, file-id, ie)
+ old_ie = self._delta_entries_by_fileid[file_id][3]
+
+ # Delete the old path. Note that this might trigger implicit
+ # deletion of newly created parents that could now become empty.
+ self.record_delete(old_path, old_ie)
+
+ # Update the dictionaries used for tracking new file-ids
+ if old_path in self._new_file_ids:
+ del self._new_file_ids[old_path]
+ else:
+ del self._modified_file_ids[old_path]
+ self._new_file_ids[new_path] = file_id
+
+ # Create the new InventoryEntry
+ kind = old_ie.kind
+ basename, parent_id = self._ensure_directory(new_path,
+ self.basis_inventory)
+ ie = inventory.make_entry(kind, basename, parent_id, file_id)
+ ie.revision = self.revision_id
+ if kind == 'file':
+ ie.executable = old_ie.executable
+ ie.text_sha1 = old_ie.text_sha1
+ ie.text_size = old_ie.text_size
+ elif kind == 'symlink':
+ ie.symlink_target = old_ie.symlink_target
+
+ # Record it
+ self.record_new(new_path, ie)
def modify_handler(self, filecmd):
+ (kind, executable) = mode_to_kind(filecmd.mode)
if filecmd.dataref is not None:
- data = self.cache_mgr.fetch_blob(filecmd.dataref)
+ if kind == "directory":
+ data = None
+ elif kind == "tree-reference":
+ data = filecmd.dataref
+ else:
+ data = self.cache_mgr.fetch_blob(filecmd.dataref)
else:
data = filecmd.data
self.debug("modifying %s", filecmd.path)
- self._modify_item(filecmd.path, filecmd.kind,
- filecmd.is_executable, data, self.basis_inventory)
+ self._modify_item(filecmd.path, kind,
+ executable, data, self.basis_inventory)
def delete_handler(self, filecmd):
self.debug("deleting %s", filecmd.path)
self._delete_item(filecmd.path, self.basis_inventory)
def copy_handler(self, filecmd):
- src_path = filecmd.src_path
- dest_path = filecmd.dest_path
+ src_path = filecmd.src_path.decode("utf8")
+ dest_path = filecmd.dest_path.decode("utf8")
self.debug("copying %s to %s", src_path, dest_path)
self._copy_item(src_path, dest_path, self.basis_inventory)
def rename_handler(self, filecmd):
- old_path = filecmd.old_path
- new_path = filecmd.new_path
+ old_path = filecmd.old_path.decode("utf8")
+ new_path = filecmd.new_path.decode("utf8")
self.debug("renaming %s to %s", old_path, new_path)
self._rename_item(old_path, new_path, self.basis_inventory)
diff --git a/cache_manager.py b/cache_manager.py
index af57534..6d8ef05 100644
--- a/cache_manager.py
+++ b/cache_manager.py
@@ -16,12 +16,84 @@
"""A manager of caches."""
+import atexit
+import os
+import shutil
+import tempfile
+import weakref
from bzrlib import lru_cache, trace
-from bzrlib.plugins.fastimport import helpers
+from bzrlib.plugins.fastimport import (
+ branch_mapper,
+ )
+from fastimport.helpers import (
+ single_plural,
+ )
+from fastimport.reftracker import (
+ RefTracker,
+ )
+
+
+class _Cleanup(object):
+ """This class makes sure we clean up when CacheManager goes away.
+
+ We use a helper class to ensure that we are never in a refcycle.
+ """
+
+ def __init__(self, disk_blobs):
+ self.disk_blobs = disk_blobs
+ self.tempdir = None
+ self.small_blobs = None
+
+ def __del__(self):
+ self.finalize()
+
+ def finalize(self):
+ if self.disk_blobs is not None:
+ for info in self.disk_blobs.itervalues():
+ if info[-1] is not None:
+ os.unlink(info[-1])
+ self.disk_blobs = None
+ if self.small_blobs is not None:
+ self.small_blobs.close()
+ self.small_blobs = None
+ if self.tempdir is not None:
+ shutil.rmtree(self.tempdir)
+
+
+class _Cleanup(object):
+ """This class makes sure we clean up when CacheManager goes away.
+
+ We use a helper class to ensure that we are never in a refcycle.
+ """
+
+ def __init__(self, disk_blobs):
+ self.disk_blobs = disk_blobs
+ self.tempdir = None
+ self.small_blobs = None
+
+ def __del__(self):
+ self.finalize()
+
+ def finalize(self):
+ if self.disk_blobs is not None:
+ for info in self.disk_blobs.itervalues():
+ if info[-1] is not None:
+ os.unlink(info[-1])
+ self.disk_blobs = None
+ if self.small_blobs is not None:
+ self.small_blobs.close()
+ self.small_blobs = None
+ if self.tempdir is not None:
+ shutil.rmtree(self.tempdir)
+
class CacheManager(object):
+ _small_blob_threshold = 25*1024
+ _sticky_cache_size = 300*1024*1024
+ _sticky_flushed_size = 100*1024*1024
+
def __init__(self, info=None, verbose=False, inventory_cache_size=10):
"""Create a manager of caches.
@@ -31,9 +103,18 @@ class CacheManager(object):
self.verbose = verbose
# dataref -> data. datref is either :mark or the sha-1.
- # Sticky blobs aren't removed after being referenced.
+ # Sticky blobs are referenced more than once, and are saved until their
+ # refcount goes to 0
self._blobs = {}
self._sticky_blobs = {}
+ self._sticky_memory_bytes = 0
+ # if we overflow our memory cache, then we will dump large blobs to
+ # disk in this directory
+ self._tempdir = None
+ # id => (offset, n_bytes, fname)
+ # if fname is None, then the content is stored in the small file
+ self._disk_blobs = {}
+ self._cleanup = _Cleanup(self._disk_blobs)
# revision-id -> Inventory cache
# these are large and we probably don't need too many as
@@ -46,12 +127,6 @@ class CacheManager(object):
# (path, branch_ref) -> file-ids - as generated.
# (Use store_file_id/fetch_fileid methods rather than direct access.)
- self._file_ids = {}
-
- # Head tracking: last ref, last id per ref & map of commit ids to ref*s*
- self.last_ref = None
- self.last_ids = {}
- self.heads = {}
# Work out the blobs to make sticky - None means all
self._blob_ref_counts = {}
@@ -67,18 +142,21 @@ class CacheManager(object):
# info not in file - possible when no blobs used
pass
+ # BranchMapper has no state (for now?), but we keep it around rather
+ # than reinstantiate on every usage
+ self.branch_mapper = branch_mapper.BranchMapper()
+
+ self.reftracker = RefTracker()
+
def dump_stats(self, note=trace.note):
"""Dump some statistics about what we cached."""
# TODO: add in inventory stastistics
note("Cache statistics:")
self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note)
self._show_stats_for(self.revision_ids, "revision-ids", note=note)
- self._show_stats_for(self._file_ids, "file-ids", note=note,
- tuple_key=True)
# These aren't interesting so omit from the output, at least for now
#self._show_stats_for(self._blobs, "other blobs", note=note)
- #self._show_stats_for(self.last_ids, "last-ids", note=note)
- #self._show_stats_for(self.heads, "heads", note=note)
+ #self.reftracker.dump_stats(note=note)
def _show_stats_for(self, dict, label, note=trace.note, tuple_key=False):
"""Dump statistics about a given dictionary.
@@ -100,109 +178,115 @@ class CacheManager(object):
size = size / 1024
unit = 'G'
note(" %-12s: %8.1f %s (%d %s)" % (label, size, unit, count,
- helpers.single_plural(count, "item", "items")))
+ single_plural(count, "item", "items")))
def clear_all(self):
"""Free up any memory used by the caches."""
self._blobs.clear()
self._sticky_blobs.clear()
self.revision_ids.clear()
- self._file_ids.clear()
- self.last_ids.clear()
- self.heads.clear()
+ self.reftracker.clear()
self.inventories.clear()
+ def _flush_blobs_to_disk(self):
+ blobs = self._sticky_blobs.keys()
+ sticky_blobs = self._sticky_blobs
+ total_blobs = len(sticky_blobs)
+ blobs.sort(key=lambda k:len(sticky_blobs[k]))
+ if self._tempdir is None:
+ tempdir = tempfile.mkdtemp(prefix='fastimport_blobs-')
+ self._tempdir = tempdir
+ self._cleanup.tempdir = self._tempdir
+ self._cleanup.small_blobs = tempfile.TemporaryFile(
+ prefix='small-blobs-', dir=self._tempdir)
+ small_blob_ref = weakref.ref(self._cleanup.small_blobs)
+ # Even though we add it to _Cleanup it seems that the object can be
+ # destroyed 'too late' for cleanup to actually occur. Probably a
+ # combination of bzr's "die directly, don't clean up" and how
+ # exceptions close the running stack.
+ def exit_cleanup():
+ small_blob = small_blob_ref()
+ if small_blob is not None:
+ small_blob.close()
+ shutil.rmtree(tempdir, ignore_errors=True)
+ atexit.register(exit_cleanup)
+ count = 0
+ bytes = 0
+ n_small_bytes = 0
+ while self._sticky_memory_bytes > self._sticky_flushed_size:
+ id = blobs.pop()
+ blob = self._sticky_blobs.pop(id)
+ n_bytes = len(blob)
+ self._sticky_memory_bytes -= n_bytes
+ if n_bytes < self._small_blob_threshold:
+ f = self._cleanup.small_blobs
+ f.seek(0, os.SEEK_END)
+ self._disk_blobs[id] = (f.tell(), n_bytes, None)
+ f.write(blob)
+ n_small_bytes += n_bytes
+ else:
+ fd, name = tempfile.mkstemp(prefix='blob-', dir=self._tempdir)
+ os.write(fd, blob)
+ os.close(fd)
+ self._disk_blobs[id] = (0, n_bytes, name)
+ bytes += n_bytes
+ del blob
+ count += 1
+ trace.note('flushed %d/%d blobs w/ %.1fMB (%.1fMB small) to disk'
+ % (count, total_blobs, bytes / 1024. / 1024,
+ n_small_bytes / 1024. / 1024))
+
def store_blob(self, id, data):
"""Store a blob of data."""
# Note: If we're not reference counting, everything has to be sticky
if not self._blob_ref_counts or id in self._blob_ref_counts:
self._sticky_blobs[id] = data
+ self._sticky_memory_bytes += len(data)
+ if self._sticky_memory_bytes > self._sticky_cache_size:
+ self._flush_blobs_to_disk()
elif data == '':
# Empty data is always sticky
self._sticky_blobs[id] = data
else:
self._blobs[id] = data
+ def _decref(self, id, cache, fn):
+ if not self._blob_ref_counts:
+ return False
+ count = self._blob_ref_counts.get(id, None)
+ if count is not None:
+ count -= 1
+ if count <= 0:
+ del cache[id]
+ if fn is not None:
+ os.unlink(fn)
+ del self._blob_ref_counts[id]
+ return True
+ else:
+ self._blob_ref_counts[id] = count
+ return False
+
def fetch_blob(self, id):
"""Fetch a blob of data."""
- try:
- b = self._sticky_blobs[id]
- if self._blob_ref_counts and b != '':
- self._blob_ref_counts[id] -= 1
- if self._blob_ref_counts[id] == 0:
- del self._sticky_blobs[id]
- return b
- except KeyError:
+ if id in self._blobs:
return self._blobs.pop(id)
+ if id in self._disk_blobs:
+ (offset, n_bytes, fn) = self._disk_blobs[id]
+ if fn is None:
+ f = self._cleanup.small_blobs
+ f.seek(offset)
+ content = f.read(n_bytes)
+ else:
+ fp = open(fn, 'rb')
+ try:
+ content = fp.read()
+ finally:
+ fp.close()
+ self._decref(id, self._disk_blobs, fn)
+ return content
+ content = self._sticky_blobs[id]
+ if self._decref(id, self._sticky_blobs, None):
+ self._sticky_memory_bytes -= len(content)
+ return content
- def store_file_id(self, branch_ref, path, id):
- """Store the path to file-id mapping for a branch."""
- key = self._fileid_key(path, branch_ref)
- self._file_ids[key] = id
- def fetch_file_id(self, branch_ref, path):
- """Lookup the file-id for a path in a branch.
-
- Raises KeyError if unsuccessful.
- """
- key = self._fileid_key(path, branch_ref)
- return self._file_ids[key]
-
- def _fileid_key(self, path, branch_ref):
- return (path, branch_ref)
-
- def delete_path(self, branch_ref, path):
- """Remove a path from caches."""
- # We actually want to remember what file-id we gave a path,
- # even when that file is deleted, so doing nothing is correct.
- # It's quite possible for a path to be deleted twice where
- # the first time is in a merge branch (but the same branch_ref)
- # and the second time is when that branch is merged to mainline.
- pass
-
- def rename_path(self, branch_ref, old_path, new_path):
- """Rename a path in the caches."""
- # In this case, we need to forget the file-id we gave a path,
- # otherwise, we'll get duplicate file-ids in the repository
- # if a new file is created at the old path.
- old_key = self._fileid_key(old_path, branch_ref)
- new_key = self._fileid_key(new_path, branch_ref)
- try:
- old_file_id = self._file_ids[old_key]
- except KeyError:
- # The old_key has already been removed, most likely
- # in a merge branch.
- pass
- else:
- self._file_ids[new_key] = old_file_id
- del self._file_ids[old_key]
-
- def track_heads(self, cmd):
- """Track the repository heads given a CommitCommand.
-
- :param cmd: the CommitCommand
- :return: the list of parents in terms of commit-ids
- """
- # Get the true set of parents
- if cmd.from_ is not None:
- parents = [cmd.from_]
- else:
- last_id = self.last_ids.get(cmd.ref)
- if last_id is not None:
- parents = [last_id]
- else:
- parents = []
- parents.extend(cmd.merges)
-
- # Track the heads
- self.track_heads_for_ref(cmd.ref, cmd.id, parents)
- return parents
-
- def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
- if parents is not None:
- for parent in parents:
- if parent in self.heads:
- del self.heads[parent]
- self.heads.setdefault(cmd_id, set()).add(cmd_ref)
- self.last_ids[cmd_ref] = cmd_id
- self.last_ref = cmd_ref
diff --git a/cmds.py b/cmds.py
new file mode 100644
index 0000000..52170f7
--- /dev/null
+++ b/cmds.py
@@ -0,0 +1,882 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Fastimport/fastexport commands."""
+
+from bzrlib import bzrdir
+from bzrlib.commands import Command
+from bzrlib.option import Option, ListOption, RegistryOption
+
+from bzrlib.plugins.fastimport import load_fastimport
+
+
+def _run(source, processor_factory, control, params, verbose,
+ user_map=None):
+ """Create and run a processor.
+
+ :param source: a filename or '-' for standard input. If the
+ filename ends in .gz, it will be opened as a gzip file and
+ the stream will be implicitly uncompressed
+ :param processor_factory: a callable for creating a processor
+ :param control: the BzrDir of the destination or None if no
+ destination is expected
+ :param user_map: if not None, the file containing the user map.
+ """
+ from fastimport import parser
+ stream = _get_source_stream(source)
+ user_mapper = _get_user_mapper(user_map)
+ proc = processor_factory(control, params=params, verbose=verbose)
+ p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper)
+ return proc.process(p.iter_commands)
+
+
+def _get_source_stream(source):
+ if source == '-':
+ import sys
+ from fastimport import helpers
+ stream = helpers.binary_stream(sys.stdin)
+ elif source.endswith('.gz'):
+ import gzip
+ stream = gzip.open(source, "rb")
+ else:
+ stream = open(source, "rb")
+ return stream
+
+
+def _get_user_mapper(filename):
+ import user_mapper
+ if filename is None:
+ return None
+ f = open(filename)
+ lines = f.readlines()
+ f.close()
+ return user_mapper.UserMapper(lines)
+
+
+class cmd_fast_import(Command):
+ """Backend for fast Bazaar data importers.
+
+ This command reads a mixed command/data stream and creates
+ branches in a Bazaar repository accordingly. The preferred
+ recipe is::
+
+ bzr fast-import project.fi project.bzr
+
+ Numerous commands are provided for generating a fast-import file
+ to use as input. These are named fast-export-from-xxx where xxx
+ is one of cvs, darcs, git, hg, mtn, p4 or svn.
+ To specify standard input as the input stream, use a
+ source name of '-' (instead of project.fi). If the source name
+ ends in '.gz', it is assumed to be compressed in gzip format.
+
+ project.bzr will be created if it doesn't exist. If it exists
+ already, it should be empty or be an existing Bazaar repository
+ or branch. If not specified, the current directory is assumed.
+
+ fast-import will intelligently select the format to use when
+ creating a repository or branch. If you are running Bazaar 1.17
+ up to Bazaar 2.0, the default format for Bazaar 2.x ("2a") is used.
+ Otherwise, the current default format ("pack-0.92" for Bazaar 1.x)
+ is used. If you wish to specify a custom format, use the `--format`
+ option.
+
+ .. note::
+
+ To maintain backwards compatibility, fast-import lets you
+ create the target repository or standalone branch yourself.
+ It is recommended though that you let fast-import create
+ these for you instead.
+
+ :Branch mapping rules:
+
+ Git reference names are mapped to Bazaar branch names as follows:
+
+ * refs/heads/foo is mapped to foo
+ * refs/remotes/origin/foo is mapped to foo.remote
+ * refs/tags/foo is mapped to foo.tag
+ * */master is mapped to trunk, trunk.remote, etc.
+ * */trunk is mapped to git-trunk, git-trunk.remote, etc.
+
+ :Branch creation rules:
+
+ When a shared repository is created or found at the destination,
+ branches are created inside it. In the simple case of a single
+ branch (refs/heads/master) inside the input file, the branch is
+ project.bzr/trunk.
+
+ When a standalone branch is found at the destination, the trunk
+ is imported there and warnings are output about any other branches
+ found in the input file.
+
+ When a branch in a shared repository is found at the destination,
+ that branch is made the trunk and other branches, if any, are
+ created in sister directories.
+
+ :Working tree updates:
+
+ The working tree is generated for the trunk branch. If multiple
+ branches are created, a message is output on completion explaining
+ how to create the working trees for other branches.
+
+ :Custom exporters:
+
+ The fast-export-from-xxx commands typically call more advanced
+ xxx-fast-export scripts. You are welcome to use the advanced
+ scripts if you prefer.
+
+ If you wish to write a custom exporter for your project, see
+ http://bazaar-vcs.org/BzrFastImport for the detailed protocol
+ specification. In many cases, exporters can be written quite
+ quickly using whatever scripting/programming language you like.
+
+ :User mapping:
+
+ Some source repositories store just the user name while Bazaar
+ prefers a full email address. You can adjust user-ids while
+ importing by using the --user-map option. The argument is a
+ text file with lines in the format::
+
+ old-id = new-id
+
+ Blank lines and lines beginning with # are ignored.
+ If old-id has the special value '@', then users without an
+ email address will get one created by using the matching new-id
+ as the domain, unless a more explicit address is given for them.
+ For example, given the user-map of::
+
+ @ = example.com
+ bill = William Jones <bill@example.com>
+
+ then user-ids are mapped as follows::
+
+ maria => maria <maria@example.com>
+ bill => William Jones <bill@example.com>
+
+ .. note::
+
+ User mapping is supported by both the fast-import and
+ fast-import-filter commands.
+
+ :Blob tracking:
+
+ As some exporters (like git-fast-export) reuse blob data across
+ commits, fast-import makes two passes over the input file by
+ default. In the first pass, it collects data about what blobs are
+ used when, along with some other statistics (e.g. total number of
+ commits). In the second pass, it generates the repository and
+ branches.
+
+ .. note::
+
+ The initial pass isn't done if the --info option is used
+ to explicitly pass in information about the input stream.
+ It also isn't done if the source is standard input. In the
+ latter case, memory consumption may be higher than otherwise
+ because some blobs may be kept in memory longer than necessary.
+
+ :Restarting an import:
+
+ At checkpoints and on completion, the commit-id -> revision-id
+ map is saved to a file called 'fastimport-id-map' in the control
+ directory for the repository (e.g. .bzr/repository). If the import
+ is interrupted or unexpectedly crashes, it can be started again
+ and this file will be used to skip over already loaded revisions.
+ As long as subsequent exports from the original source begin
+ with exactly the same revisions, you can use this feature to
+ maintain a mirror of a repository managed by a foreign tool.
+ If and when Bazaar is used to manage the repository, this file
+ can be safely deleted.
+
+ :Examples:
+
+ Import a Subversion repository into Bazaar::
+
+ bzr fast-export-from-svn /svn/repo/path project.fi
+ bzr fast-import project.fi project.bzr
+
+ Import a CVS repository into Bazaar::
+
+ bzr fast-export-from-cvs /cvs/repo/path project.fi
+ bzr fast-import project.fi project.bzr
+
+ Import a Git repository into Bazaar::
+
+ bzr fast-export-from-git /git/repo/path project.fi
+ bzr fast-import project.fi project.bzr
+
+ Import a Mercurial repository into Bazaar::
+
+ bzr fast-export-from-hg /hg/repo/path project.fi
+ bzr fast-import project.fi project.bzr
+
+ Import a Darcs repository into Bazaar::
+
+ bzr fast-export-from-darcs /darcs/repo/path project.fi
+ bzr fast-import project.fi project.bzr
+ """
+ hidden = False
+ _see_also = ['fast-export', 'fast-import-filter', 'fast-import-info']
+ takes_args = ['source', 'destination?']
+ takes_options = ['verbose',
+ Option('user-map', type=str,
+ help="Path to file containing a map of user-ids.",
+ ),
+ Option('info', type=str,
+ help="Path to file containing caching hints.",
+ ),
+ Option('trees',
+ help="Update all working trees, not just trunk's.",
+ ),
+ Option('count', type=int,
+ help="Import this many revisions then exit.",
+ ),
+ Option('checkpoint', type=int,
+ help="Checkpoint automatically every N revisions."
+ " The default is 10000.",
+ ),
+ Option('autopack', type=int,
+ help="Pack every N checkpoints. The default is 4.",
+ ),
+ Option('inv-cache', type=int,
+ help="Number of inventories to cache.",
+ ),
+ RegistryOption.from_kwargs('mode',
+ 'The import algorithm to use.',
+ title='Import Algorithm',
+ default='Use the preferred algorithm (inventory deltas).',
+ classic="Use the original algorithm (mutable inventories).",
+ experimental="Enable experimental features.",
+ value_switches=True, enum_switch=False,
+ ),
+ Option('import-marks', type=str,
+ help="Import marks from file."
+ ),
+ Option('export-marks', type=str,
+ help="Export marks to file."
+ ),
+ RegistryOption('format',
+ help='Specify a format for the created repository. See'
+ ' "bzr help formats" for details.',
+ lazy_registry=('bzrlib.bzrdir', 'format_registry'),
+ converter=lambda name: bzrdir.format_registry.make_bzrdir(name),
+ value_switches=False, title='Repository format'),
+ ]
+ def run(self, source, destination='.', verbose=False, info=None,
+ trees=False, count=-1, checkpoint=10000, autopack=4, inv_cache=-1,
+ mode=None, import_marks=None, export_marks=None, format=None,
+ user_map=None):
+ load_fastimport()
+ from bzrlib.plugins.fastimport.processors import generic_processor
+ from bzrlib.plugins.fastimport.helpers import (
+ open_destination_directory,
+ )
+ # If no format is given and the user is running a release
+ # leading up to 2.0, select 2a for them. Otherwise, use
+ # the default format.
+ if format is None:
+ import bzrlib
+ bzr_version = bzrlib.version_info[0:2]
+ if bzr_version in [(1,17), (1,18), (2,0)]:
+ format = bzrdir.format_registry.make_bzrdir('2a')
+ control = open_destination_directory(destination, format=format)
+
+ # If an information file was given and the source isn't stdin,
+ # generate the information by reading the source file as a first pass
+ if info is None and source != '-':
+ info = self._generate_info(source)
+
+ # Do the work
+ if mode is None:
+ mode = 'default'
+ params = {
+ 'info': info,
+ 'trees': trees,
+ 'count': count,
+ 'checkpoint': checkpoint,
+ 'autopack': autopack,
+ 'inv-cache': inv_cache,
+ 'mode': mode,
+ 'import-marks': import_marks,
+ 'export-marks': export_marks,
+ }
+ return _run(source, generic_processor.GenericProcessor, control,
+ params, verbose, user_map=user_map)
+
+ def _generate_info(self, source):
+ from cStringIO import StringIO
+ from fastimport import parser
+ from fastimport.processors import info_processor
+ stream = _get_source_stream(source)
+ output = StringIO()
+ try:
+ proc = info_processor.InfoProcessor(verbose=True, outf=output)
+ p = parser.ImportParser(stream)
+ return_code = proc.process(p.iter_commands)
+ lines = output.getvalue().splitlines()
+ finally:
+ output.close()
+ stream.seek(0)
+ return lines
+
+
+class cmd_fast_import_filter(Command):
+ """Filter a fast-import stream to include/exclude files & directories.
+
+ This command is useful for splitting a subdirectory or bunch of
+ files out from a project to create a new project complete with history
+ for just those files. It can also be used to create a new project
+ repository that removes all references to files that should not have
+ been committed, e.g. security-related information (like passwords),
+ commercially sensitive material, files with an incompatible license or
+ large binary files like CD images.
+
+ To specify standard input as the input stream, use a source name
+ of '-'. If the source name ends in '.gz', it is assumed to be
+ compressed in gzip format.
+
+ :File/directory filtering:
+
+ This is supported by the -i and -x options. Excludes take precedence
+ over includes.
+
+ When filtering out a subdirectory (or file), the new stream uses the
+ subdirectory (or subdirectory containing the file) as the root. As
+ fast-import doesn't know in advance whether a path is a file or
+ directory in the stream, you need to specify a trailing '/' on
+ directories passed to the `--includes option`. If multiple files or
+ directories are given, the new root is the deepest common directory.
+
+ Note: If a path has been renamed, take care to specify the *original*
+ path name, not the final name that it ends up with.
+
+ :User mapping:
+
+ Some source repositories store just the user name while Bazaar
+ prefers a full email address. You can adjust user-ids
+ by using the --user-map option. The argument is a
+ text file with lines in the format::
+
+ old-id = new-id
+
+ Blank lines and lines beginning with # are ignored.
+ If old-id has the special value '@', then users without an
+ email address will get one created by using the matching new-id
+ as the domain, unless a more explicit address is given for them.
+ For example, given the user-map of::
+
+ @ = example.com
+ bill = William Jones <bill@example.com>
+
+ then user-ids are mapped as follows::
+
+ maria => maria <maria@example.com>
+ bill => William Jones <bill@example.com>
+
+ .. note::
+
+ User mapping is supported by both the fast-import and
+ fast-import-filter commands.
+
+ :Examples:
+
+ Create a new project from a library (note the trailing / on the
+ directory name of the library)::
+
+ front-end | bzr fast-import-filter -i lib/xxx/ > xxx.fi
+ bzr fast-import xxx.fi mylibrary.bzr
+ (lib/xxx/foo is now foo)
+
+ Create a new repository without a sensitive file::
+
+ front-end | bzr fast-import-filter -x missile-codes.txt > clean.fi
+ bzr fast-import clean.fi clean.bzr
+ """
+ hidden = False
+ _see_also = ['fast-import']
+ takes_args = ['source?']
+ takes_options = ['verbose',
+ ListOption('include_paths', short_name='i', type=str,
+ help="Only include commits affecting these paths."
+ " Directories should have a trailing /."
+ ),
+ ListOption('exclude_paths', short_name='x', type=str,
+ help="Exclude these paths from commits."
+ ),
+ Option('user-map', type=str,
+ help="Path to file containing a map of user-ids.",
+ ),
+ ]
+ encoding_type = 'exact'
+ def run(self, source=None, verbose=False, include_paths=None,
+ exclude_paths=None, user_map=None):
+ load_fastimport()
+ from fastimport.processors import filter_processor
+ params = {
+ 'include_paths': include_paths,
+ 'exclude_paths': exclude_paths,
+ }
+ from fastimport import parser
+ stream = _get_source_stream(source)
+ user_mapper = _get_user_mapper(user_map)
+ proc = filter_processor.FilterProcessor(params=params, verbose=verbose)
+ p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper)
+ return proc.process(p.iter_commands)
+
+
+class cmd_fast_import_info(Command):
+ """Output information about a fast-import stream.
+
+ This command reads a fast-import stream and outputs
+ statistics and interesting properties about what it finds.
+ When run in verbose mode, the information is output as a
+ configuration file that can be passed to fast-import to
+ assist it in intelligently caching objects.
+
+ To specify standard input as the input stream, use a source name
+ of '-'. If the source name ends in '.gz', it is assumed to be
+ compressed in gzip format.
+
+ :Examples:
+
+ Display statistics about the import stream produced by front-end::
+
+ front-end | bzr fast-import-info -
+
+ Create a hints file for running fast-import on a large repository::
+
+ front-end | bzr fast-import-info -v - > front-end.cfg
+ """
+ hidden = False
+ _see_also = ['fast-import']
+ takes_args = ['source']
+ takes_options = ['verbose']
+ def run(self, source, verbose=False):
+ load_fastimport()
+ from fastimport.processors import info_processor
+ return _run(source, info_processor.InfoProcessor, {}, verbose)
+
+
+class cmd_fast_import_query(Command):
+ """Query a fast-import stream displaying selected commands.
+
+ To specify standard input as the input stream, use a source name
+ of '-'. If the source name ends in '.gz', it is assumed to be
+ compressed in gzip format.
+
+ To specify a commit to display, give its mark using the
+ --commit-mark option. The commit will be displayed with
+ file-commands included but with inline blobs hidden.
+
+ To specify the commands to display, use the -C option one or
+ more times. To specify just some fields for a command, use the
+ syntax::
+
+ command=field1,...
+
+ By default, the nominated fields for the nominated commands
+ are displayed tab separated. To see the information in
+ a name:value format, use verbose mode.
+
+ Note: Binary fields (e.g. data for blobs) are masked out
+ so it is generally safe to view the output in a terminal.
+
+ :Examples:
+
+ Show the commit with mark 429::
+
+ bzr fast-import-query xxx.fi -m429
+
+ Show all the fields of the reset and tag commands::
+
+ bzr fast-import-query xxx.fi -Creset -Ctag
+
+ Show the mark and merge fields of the commit commands::
+
+ bzr fast-import-query xxx.fi -Ccommit=mark,merge
+ """
+ hidden = True
+ _see_also = ['fast-import', 'fast-import-filter']
+ takes_args = ['source']
+ takes_options = ['verbose',
+ Option('commit-mark', short_name='m', type=str,
+ help="Mark of the commit to display."
+ ),
+ ListOption('commands', short_name='C', type=str,
+ help="Display fields for these commands."
+ ),
+ ]
+ def run(self, source, verbose=False, commands=None, commit_mark=None):
+ load_fastimport()
+ from fastimport.processors import query_processor
+ from bzrlib.plugins.fastimport import helpers
+ params = helpers.defines_to_dict(commands) or {}
+ if commit_mark:
+ params['commit-mark'] = commit_mark
+ return _run(source, query_processor.QueryProcessor, params,
+ verbose)
+
+
+class cmd_fast_export(Command):
+ """Generate a fast-import stream from a Bazaar branch.
+
+ This program generates a stream from a Bazaar branch in fast-import
+ format used by tools such as bzr fast-import, git-fast-import and
+ hg-fast-import.
+
+ If no destination is given or the destination is '-', standard output
+ is used. Otherwise, the destination is the name of a file. If the
+ destination ends in '.gz', the output will be compressed into gzip
+ format.
+
+ :Round-tripping:
+
+ Recent versions of the fast-import specification support features
+ that allow effective round-tripping of many Bazaar branches. As
+ such, fast-exporting a branch and fast-importing the data produced
+ will create a new repository with equivalent history, i.e.
+ "bzr log -v -p --include-merges --forward" on the old branch and
+ new branch should produce similar, if not identical, results.
+
+ .. note::
+
+ Be aware that the new repository may appear to have similar history
+ but internally it is quite different with new revision-ids and
+ file-ids assigned. As a consequence, the ability to easily merge
+ with branches based on the old repository is lost. Depending on your
+ reasons for producing a new repository, this may or may not be an
+ issue.
+
+ :Interoperability:
+
+ fast-export can use the following "extended features" to
+ produce a richer data stream:
+
+ * *multiple-authors* - if a commit has multiple authors (as commonly
+ occurs in pair-programming), all authors will be included in the
+ output, not just the first author
+
+ * *commit-properties* - custom metadata per commit that Bazaar stores
+ in revision properties (e.g. branch-nick and bugs fixed by this
+ change) will be included in the output.
+
+ * *empty-directories* - directories, even the empty ones, will be
+ included in the output.
+
+ To disable these features and produce output acceptable to git 1.6,
+ use the --plain option. To enable these features, use --no-plain.
+ Currently, --plain is the default but that will change in the near
+ future once the feature names and definitions are formally agreed
+ to by the broader fast-import developer community.
+
+ :Examples:
+
+ To produce data destined for import into Bazaar::
+
+ bzr fast-export --no-plain my-bzr-branch my.fi.gz
+
+ To produce data destined for Git 1.6::
+
+ bzr fast-export --plain my-bzr-branch my.fi
+
+ To import several unmerged but related branches into the same repository,
+ use the --{export,import}-marks options, and specify a name for the git
+ branch like this::
+
+ bzr fast-export --export-marks=marks.bzr project.dev |
+ GIT_DIR=project/.git git-fast-import --export-marks=marks.git
+
+ bzr fast-export --import-marks=marks.bzr -b other project.other |
+ GIT_DIR=project/.git git-fast-import --import-marks=marks.git
+
+ If you get a "Missing space after source" error from git-fast-import,
+ see the top of the commands.py module for a work-around.
+ """
+ hidden = False
+ _see_also = ['fast-import', 'fast-import-filter']
+ takes_args = ['source', 'destination?']
+ takes_options = ['verbose', 'revision',
+ Option('git-branch', short_name='b', type=str,
+ argname='FILE',
+ help='Name of the git branch to create (default=master).'
+ ),
+ Option('checkpoint', type=int, argname='N',
+ help="Checkpoint every N revisions (default=10000)."
+ ),
+ Option('marks', type=str, argname='FILE',
+ help="Import marks from and export marks to file."
+ ),
+ Option('import-marks', type=str, argname='FILE',
+ help="Import marks from file."
+ ),
+ Option('export-marks', type=str, argname='FILE',
+ help="Export marks to file."
+ ),
+ Option('plain',
+ help="Exclude metadata to maximise interoperability."
+ ),
+ ]
+ encoding_type = 'exact'
+ def run(self, source, destination=None, verbose=False,
+ git_branch="master", checkpoint=10000, marks=None,
+ import_marks=None, export_marks=None, revision=None,
+ plain=True):
+ load_fastimport()
+ from bzrlib.plugins.fastimport import exporter
+
+ if marks:
+ import_marks = export_marks = marks
+ exporter = exporter.BzrFastExporter(source,
+ destination=destination,
+ git_branch=git_branch, checkpoint=checkpoint,
+ import_marks_file=import_marks, export_marks_file=export_marks,
+ revision=revision, verbose=verbose, plain_format=plain)
+ return exporter.run()
+
+
+class cmd_fast_export_from_cvs(Command):
+ """Generate a fast-import file from a CVS repository.
+
+ Destination is a dump file, typically named xxx.fi where xxx is
+ the name of the project. If '-' is given, standard output is used.
+
+ cvs2svn 2.3 or later must be installed as its cvs2bzr script is used
+ under the covers to do the export.
+
+ The source must be the path on your filesystem to the part of the
+ repository you wish to convert. i.e. either that path or a parent
+ directory must contain a CVSROOT subdirectory. The path may point to
+ either the top of a repository or to a path within it. In the latter
+ case, only that project within the repository will be converted.
+
+ .. note::
+ Remote access to the repository is not sufficient - the path
+ must point into a copy of the repository itself. See
+ http://cvs2svn.tigris.org/faq.html#repoaccess for instructions
+ on how to clone a remote CVS repository locally.
+
+ By default, the trunk, branches and tags are all exported. If you
+ only want the trunk, use the `--trunk-only` option.
+
+ By default, filenames, log messages and author names are expected
+ to be encoded in ascii. Use the `--encoding` option to specify an
+ alternative. If multiple encodings are used, specify the option
+ multiple times. For a list of valid encoding names, see
+ http://docs.python.org/lib/standard-encodings.html.
+
+ Windows users need to install GNU sort and use the `--sort`
+ option to specify its location. GNU sort can be downloaded from
+ http://unxutils.sourceforge.net/.
+ """
+ hidden = False
+ _see_also = ['fast-import', 'fast-import-filter']
+ takes_args = ['source', 'destination']
+ takes_options = ['verbose',
+ Option('trunk-only',
+ help="Export just the trunk, ignoring tags and branches."
+ ),
+ ListOption('encoding', type=str, argname='CODEC',
+ help="Encoding used for filenames, commit messages "
+ "and author names if not ascii."
+ ),
+ Option('sort', type=str, argname='PATH',
+ help="GNU sort program location if not on the path."
+ ),
+ ]
+ encoding_type = 'exact'
+ def run(self, source, destination, verbose=False, trunk_only=False,
+ encoding=None, sort=None):
+ load_fastimport()
+ from bzrlib.plugins.fastimport.exporters import fast_export_from
+ custom = []
+ if trunk_only:
+ custom.append("--trunk-only")
+ if encoding:
+ for enc in encoding:
+ custom.extend(['--encoding', enc])
+ if sort:
+ custom.extend(['--sort', sort])
+ fast_export_from(source, destination, 'cvs', verbose, custom)
+
+
+class cmd_fast_export_from_darcs(Command):
+ """Generate a fast-import file from a Darcs repository.
+
+ Destination is a dump file, typically named xxx.fi where xxx is
+ the name of the project. If '-' is given, standard output is used.
+
+ Darcs 2.2 or later must be installed as various subcommands are
+ used to access the source repository. The source may be a network
+ URL but using a local URL is recommended for performance reasons.
+ """
+ hidden = False
+ _see_also = ['fast-import', 'fast-import-filter']
+ takes_args = ['source', 'destination']
+ takes_options = ['verbose',
+ Option('encoding', type=str, argname='CODEC',
+ help="Encoding used for commit messages if not utf-8."
+ ),
+ ]
+ encoding_type = 'exact'
+ def run(self, source, destination, verbose=False, encoding=None):
+ from bzrlib.plugins.fastimport.exporters import fast_export_from
+ custom = None
+ if encoding is not None:
+ custom = ['--encoding', encoding]
+ fast_export_from(source, destination, 'darcs', verbose, custom)
+
+
+class cmd_fast_export_from_hg(Command):
+ """Generate a fast-import file from a Mercurial repository.
+
+ Destination is a dump file, typically named xxx.fi where xxx is
+ the name of the project. If '-' is given, standard output is used.
+
+ Mercurial 1.2 or later must be installed as its libraries are used
+ to access the source repository. Given the APIs currently used,
+ the source repository must be a local file, not a network URL.
+ """
+ hidden = False
+ _see_also = ['fast-import', 'fast-import-filter']
+ takes_args = ['source', 'destination']
+ takes_options = ['verbose']
+ encoding_type = 'exact'
+ def run(self, source, destination, verbose=False):
+ load_fastimport()
+ from bzrlib.plugins.fastimport.exporters import fast_export_from
+ fast_export_from(source, destination, 'hg', verbose)
+
+
+class cmd_fast_export_from_git(Command):
+ """Generate a fast-import file from a Git repository.
+
+ Destination is a dump file, typically named xxx.fi where xxx is
+ the name of the project. If '-' is given, standard output is used.
+
+ Git 1.6 or later must be installed as the git fast-export
+ subcommand is used under the covers to generate the stream.
+ The source must be a local directory.
+
+ .. note::
+
+ Earlier versions of Git may also work fine but are
+ likely to receive less active support if problems arise.
+ """
+ hidden = False
+ _see_also = ['fast-import', 'fast-import-filter']
+ takes_args = ['source', 'destination']
+ takes_options = ['verbose']
+ encoding_type = 'exact'
+ def run(self, source, destination, verbose=False):
+ load_fastimport()
+ from bzrlib.plugins.fastimport.exporters import fast_export_from
+ fast_export_from(source, destination, 'git', verbose)
+
+
+class cmd_fast_export_from_mtn(Command):
+ """Generate a fast-import file from a Monotone repository.
+
+ Destination is a dump file, typically named xxx.fi where xxx is
+ the name of the project. If '-' is given, standard output is used.
+
+ Monotone 0.43 or later must be installed as the mtn git_export
+ subcommand is used under the covers to generate the stream.
+ The source must be a local directory.
+ """
+ hidden = False
+ _see_also = ['fast-import', 'fast-import-filter']
+ takes_args = ['source', 'destination']
+ takes_options = ['verbose']
+ encoding_type = 'exact'
+ def run(self, source, destination, verbose=False):
+ load_fastimport()
+ from bzrlib.plugins.fastimport.exporters import fast_export_from
+ fast_export_from(source, destination, 'mtn', verbose)
+
+
+class cmd_fast_export_from_p4(Command):
+ """Generate a fast-import file from a Perforce repository.
+
+ Source is a Perforce depot path, e.g., //depot/project
+
+ Destination is a dump file, typically named xxx.fi where xxx is
+ the name of the project. If '-' is given, standard output is used.
+
+ bzrp4 must be installed as its p4_fast_export.py module is used under
+ the covers to do the export. bzrp4 can be downloaded from
+ https://launchpad.net/bzrp4/.
+
+ The P4PORT environment variable must be set, and you must be logged
+ into the Perforce server.
+
+ By default, only the HEAD changelist is exported. To export all
+ changelists, append '@all' to the source. To export a revision range,
+ append a comma-delimited pair of changelist numbers to the source,
+ e.g., '100,200'.
+ """
+ hidden = False
+ _see_also = ['fast-import', 'fast-import-filter']
+ takes_args = ['source', 'destination']
+ takes_options = []
+ encoding_type = 'exact'
+ def run(self, source, destination, verbose=False):
+ load_fastimport()
+ from bzrlib.plugins.fastimport.exporters import fast_export_from
+ custom = []
+ fast_export_from(source, destination, 'p4', verbose, custom)
+
+
+class cmd_fast_export_from_svn(Command):
+ """Generate a fast-import file from a Subversion repository.
+
+ Destination is a dump file, typically named xxx.fi where xxx is
+ the name of the project. If '-' is given, standard output is used.
+
+ Python-Subversion (Python bindings to the Subversion APIs)
+ 1.4 or later must be installed as this library is used to
+ access the source repository. The source may be a network URL
+ but using a local URL is recommended for performance reasons.
+ """
+ hidden = False
+ _see_also = ['fast-import', 'fast-import-filter']
+ takes_args = ['source', 'destination']
+ takes_options = ['verbose',
+ Option('trunk-path', type=str, argname="STR",
+ help="Path in repo to /trunk.\n"
+ "May be `regex:/cvs/(trunk)/proj1/(.*)` in "
+ "which case the first group is used as the "
+ "branch name and the second group is used "
+ "to match files.",
+ ),
+ Option('branches-path', type=str, argname="STR",
+ help="Path in repo to /branches."
+ ),
+ Option('tags-path', type=str, argname="STR",
+ help="Path in repo to /tags."
+ ),
+ ]
+ encoding_type = 'exact'
+ def run(self, source, destination, verbose=False, trunk_path=None,
+ branches_path=None, tags_path=None):
+ load_fastimport()
+ from bzrlib.plugins.fastimport.exporters import fast_export_from
+ custom = []
+ if trunk_path is not None:
+ custom.extend(['--trunk-path', trunk_path])
+ if branches_path is not None:
+ custom.extend(['--branches-path', branches_path])
+ if tags_path is not None:
+ custom.extend(['--tags-path', tags_path])
+ fast_export_from(source, destination, 'svn', verbose, custom)
diff --git a/commands.py b/commands.py
deleted file mode 100644
index 7ae2f54..0000000
--- a/commands.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Import command classes."""
-
-
-# There is a bug in git 1.5.4.3 and older by which unquoting a string consumes
-# one extra character. Set this variable to True to work-around it. It only
-# happens when renaming a file whose name contains spaces and/or quotes, and
-# the symptom is:
-# % git-fast-import
-# fatal: Missing space after source: R "file 1.txt" file 2.txt
-# http://git.kernel.org/?p=git/git.git;a=commit;h=c8744d6a8b27115503565041566d97c21e722584
-GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE = False
-
-
-# Lists of command names
-COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'progress', 'reset', 'tag']
-FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename',
- 'filedeleteall']
-
-# Bazaar file kinds
-FILE_KIND = 'file'
-SYMLINK_KIND = 'symlink'
-
-
-class ImportCommand(object):
- """Base class for import commands."""
-
- def __init__(self, name):
- self.name = name
- # List of field names not to display
- self._binary = []
-
- def dump_str(self, names=None, child_lists=None, verbose=False):
- """Dump fields as a string.
-
- :param names: the list of fields to include or
- None for all public fields
- :param child_lists: dictionary of child command names to
- fields for that child command to include
- :param verbose: if True, prefix each line with the command class and
- display fields as a dictionary; if False, dump just the field
- values with tabs between them
- """
- interesting = {}
- if names is None:
- fields = [k for k in self.__dict__.keys() if not k.startswith('_')]
- else:
- fields = names
- for field in fields:
- value = self.__dict__.get(field)
- if field in self._binary and value is not None:
- value = '(...)'
- interesting[field] = value
- if verbose:
- return "%s: %s" % (self.__class__.__name__, interesting)
- else:
- return "\t".join([repr(interesting[k]) for k in fields])
-
-
-class BlobCommand(ImportCommand):
-
- def __init__(self, mark, data, lineno=0):
- ImportCommand.__init__(self, 'blob')
- self.mark = mark
- self.data = data
- self.lineno = lineno
- # Provide a unique id in case the mark is missing
- if mark is None:
- self.id = '@%d' % lineno
- else:
- self.id = ':' + mark
- self._binary = ['data']
-
- def __repr__(self):
- if self.mark is None:
- mark_line = ""
- else:
- mark_line = "\nmark :%s" % self.mark
- return "blob%s\ndata %d\n%s" % (mark_line, len(self.data), self.data)
-
-
-class CheckpointCommand(ImportCommand):
-
- def __init__(self):
- ImportCommand.__init__(self, 'checkpoint')
-
- def __repr__(self):
- return "checkpoint"
-
-
-class CommitCommand(ImportCommand):
-
- def __init__(self, ref, mark, author, committer, message, from_,
- merges, file_iter, lineno=0):
- ImportCommand.__init__(self, 'commit')
- self.ref = ref
- self.mark = mark
- self.author = author
- self.committer = committer
- self.message = message
- self.from_ = from_
- self.merges = merges
- self.file_iter = file_iter
- self.lineno = lineno
- self._binary = ['file_iter']
- # Provide a unique id in case the mark is missing
- if mark is None:
- self.id = '@%d' % lineno
- else:
- self.id = ':%s' % mark
-
- def __repr__(self):
- if self.mark is None:
- mark_line = ""
- else:
- mark_line = "\nmark :%s" % self.mark
- if self.author is None:
- author_line = ""
- else:
- author_line = "\nauthor %s" % format_who_when(self.author)
- committer = "committer %s" % format_who_when(self.committer)
- if self.message is None:
- msg_section = ""
- else:
- msg = self.message.encode('utf8')
- msg_section = "\ndata %d\n%s" % (len(msg), msg)
- if self.from_ is None:
- from_line = ""
- else:
- from_line = "\nfrom %s" % self.from_
- if self.merges is None:
- merge_lines = ""
- else:
- merge_lines = "".join(["\nmerge %s" % (m,)
- for m in self.merges])
- if self.file_iter is None:
- filecommands = ""
- else:
- filecommands = "".join(["\n%r" % (c,)
- for c in iter(self.file_iter)])
- return "commit %s%s%s\n%s%s%s%s%s" % (self.ref, mark_line, author_line,
- committer, msg_section, from_line, merge_lines, filecommands)
-
- def dump_str(self, names=None, child_lists=None, verbose=False):
- result = [ImportCommand.dump_str(self, names, verbose=verbose)]
- for f in iter(self.file_iter):
- if child_lists is None:
- continue
- try:
- child_names = child_lists[f.name]
- except KeyError:
- continue
- result.append("\t%s" % f.dump_str(child_names, verbose=verbose))
- return '\n'.join(result)
-
-
-class ProgressCommand(ImportCommand):
-
- def __init__(self, message):
- ImportCommand.__init__(self, 'progress')
- self.message = message
-
- def __repr__(self):
- return "progress %s" % (self.message,)
-
-
-class ResetCommand(ImportCommand):
-
- def __init__(self, ref, from_):
- ImportCommand.__init__(self, 'reset')
- self.ref = ref
- self.from_ = from_
-
- def __repr__(self):
- if self.from_ is None:
- from_line = ""
- else:
- # According to git-fast-import(1), the extra LF is optional here;
- # however, versions of git up to 1.5.4.3 had a bug by which the LF
- # was needed. Always emit it, since it doesn't hurt and maintains
- # compatibility with older versions.
- # http://git.kernel.org/?p=git/git.git;a=commit;h=655e8515f279c01f525745d443f509f97cd805ab
- from_line = "\nfrom %s\n" % self.from_
- return "reset %s%s" % (self.ref, from_line)
-
-
-class TagCommand(ImportCommand):
-
- def __init__(self, id, from_, tagger, message):
- ImportCommand.__init__(self, 'tag')
- self.id = id
- self.from_ = from_
- self.tagger = tagger
- self.message = message
-
- def __repr__(self):
- if self.from_ is None:
- from_line = ""
- else:
- from_line = "\nfrom %s" % self.from_
- if self.tagger is None:
- tagger_line = ""
- else:
- tagger_line = "\ntagger %s" % format_who_when(self.tagger)
- if self.message is None:
- msg_section = ""
- else:
- msg = self.message.encode('utf8')
- msg_section = "\ndata %d\n%s" % (len(msg), msg)
- return "tag %s%s%s%s" % (self.id, from_line, tagger_line, msg_section)
-
-
-class FileCommand(ImportCommand):
- """Base class for file commands."""
- pass
-
-
-class FileModifyCommand(FileCommand):
-
- def __init__(self, path, kind, is_executable, dataref, data):
- # Either dataref or data should be null
- FileCommand.__init__(self, 'filemodify')
- self.path = check_path(path)
- self.kind = kind
- self.is_executable = is_executable
- self.dataref = dataref
- self.data = data
- self._binary = ['data']
-
- def __repr__(self):
- if self.kind == 'symlink':
- mode = "120000"
- elif self.is_executable:
- mode = "755"
- else:
- mode = "644"
- if self.dataref is None:
- dataref = "inline"
- datastr = "\ndata %d\n%s" % (len(self.data), self.data)
- else:
- dataref = "%s" % (self.dataref,)
- datastr = ""
- path = format_path(self.path)
- return "M %s %s %s%s" % (mode, dataref, path, datastr)
-
-
-class FileDeleteCommand(FileCommand):
-
- def __init__(self, path):
- FileCommand.__init__(self, 'filedelete')
- self.path = check_path(path)
-
- def __repr__(self):
- return "D %s" % (format_path(self.path),)
-
-
-class FileCopyCommand(FileCommand):
-
- def __init__(self, src_path, dest_path):
- FileCommand.__init__(self, 'filecopy')
- self.src_path = check_path(src_path)
- self.dest_path = check_path(dest_path)
-
- def __repr__(self):
- return "C %s %s" % (
- format_path(self.src_path, quote_spaces=True),
- format_path(self.dest_path))
-
-
-class FileRenameCommand(FileCommand):
-
- def __init__(self, old_path, new_path):
- FileCommand.__init__(self, 'filerename')
- self.old_path = check_path(old_path)
- self.new_path = check_path(new_path)
-
- def __repr__(self):
- return "R %s %s" % (
- format_path(self.old_path, quote_spaces=True),
- format_path(self.new_path))
-
-
-class FileDeleteAllCommand(FileCommand):
-
- def __init__(self):
- FileCommand.__init__(self, 'filedeleteall')
-
- def __repr__(self):
- return "deleteall"
-
-
-def check_path(path):
- """Check that a path is legal.
-
- :return: the path if all is OK
- :raise ValueError: if the path is illegal
- """
- if path is None or path == '':
- raise ValueError("illegal path '%s'" % path)
- return path
-
-
-def format_path(p, quote_spaces=False):
- """Format a path in utf8, quoting it if necessary."""
- if '\n' in p:
- import re
- p = re.sub('\n', '\\n', p)
- quote = True
- else:
- quote = p[0] == '"' or (quote_spaces and ' ' in p)
- if quote:
- extra = GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE and ' ' or ''
- p = '"%s"%s' % (p, extra)
- return p.encode('utf8')
-
-
-def format_who_when(fields):
- """Format a tuple of name,email,secs-since-epoch,utc-offset-secs as a string."""
- offset = fields[3]
- if offset < 0:
- offset_sign = '-'
- offset = abs(offset)
- else:
- offset_sign = '+'
- offset_hours = offset / 3600
- offset_minutes = offset / 60 - offset_hours * 60
- offset_str = "%s%02d%02d" % (offset_sign, offset_hours, offset_minutes)
- name = fields[0]
- if name == '':
- sep = ''
- else:
- sep = ' '
- result = "%s%s<%s> %d %s" % (name, sep, fields[1], fields[2], offset_str)
- return result.encode('utf8')
diff --git a/dates.py b/dates.py
deleted file mode 100644
index 209d069..0000000
--- a/dates.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Date parsing routines.
-
-Each routine returns timestamp,timezone where
-
-* timestamp is seconds since epoch
-* timezone is the offset from UTC in seconds.
-"""
-
-
-import time
-
-from bzrlib.plugins.fastimport import errors
-
-
-def parse_raw(s, lineno=0):
- """Parse a date from a raw string.
-
- The format must be exactly "seconds-since-epoch offset-utc".
- See the spec for details.
- """
- timestamp_str, timezone_str = s.split(' ', 1)
- timestamp = float(timestamp_str)
- timezone = _parse_tz(timezone_str, lineno)
- return timestamp, timezone
-
-
-def _parse_tz(tz, lineno):
- """Parse a timezone specification in the [+|-]HHMM format.
-
- :return: the timezone offset in seconds.
- """
- # from git_repository.py in bzr-git
- if len(tz) != 5:
- raise errors.InvalidTimezone(lineno, tz)
- sign = {'+': +1, '-': -1}[tz[0]]
- hours = int(tz[1:3])
- minutes = int(tz[3:])
- return sign * 60 * (60 * hours + minutes)
-
-
-def parse_rfc2822(s, lineno=0):
- """Parse a date from a rfc2822 string.
-
- See the spec for details.
- """
- raise NotImplementedError(parse_rfc2822)
-
-
-def parse_now(s, lineno=0):
- """Parse a date from a string.
-
- The format must be exactly "now".
- See the spec for details.
- """
- return time.time(), 0
-
-
-# Lookup tabel of date parsing routines
-DATE_PARSERS_BY_NAME = {
- 'raw': parse_raw,
- 'rfc2822': parse_rfc2822,
- 'now': parse_now,
- }
diff --git a/errors.py b/errors.py
deleted file mode 100644
index 02cc690..0000000
--- a/errors.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Exception classes for fastimport"""
-
-from bzrlib import errors as bzr_errors
-
-
-# Prefix to messages to show location information
-_LOCATION_FMT = "line %(lineno)d: "
-
-
-class ImportError(bzr_errors.BzrError):
- """The base exception class for all import processing exceptions."""
-
- _fmt = "Unknown Import Error"
-
-
-class ParsingError(ImportError):
- """The base exception class for all import processing exceptions."""
-
- _fmt = _LOCATION_FMT + "Unknown Import Parsing Error"
-
- def __init__(self, lineno):
- ImportError.__init__(self)
- self.lineno = lineno
-
-
-class MissingBytes(ParsingError):
- """Raised when EOF encountered while expecting to find more bytes."""
-
- _fmt = (_LOCATION_FMT + "Unexpected EOF - expected %(expected)d bytes,"
- " found %(found)d")
-
- def __init__(self, lineno, expected, found):
- ParsingError.__init__(self, lineno)
- self.expected = expected
- self.found = found
-
-
-class MissingTerminator(ParsingError):
- """Raised when EOF encountered while expecting to find a terminator."""
-
- _fmt = (_LOCATION_FMT +
- "Unexpected EOF - expected '%(terminator)s' terminator")
-
- def __init__(self, lineno, terminator):
- ParsingError.__init__(self, lineno)
- self.terminator = terminator
-
-
-class InvalidCommand(ParsingError):
- """Raised when an unknown command found."""
-
- _fmt = (_LOCATION_FMT + "Invalid command '%(cmd)s'")
-
- def __init__(self, lineno, cmd):
- ParsingError.__init__(self, lineno)
- self.cmd = cmd
-
-
-class MissingSection(ParsingError):
- """Raised when a section is required in a command but not present."""
-
- _fmt = (_LOCATION_FMT + "Command %(cmd)s is missing section %(section)s")
-
- def __init__(self, lineno, cmd, section):
- ParsingError.__init__(self, lineno)
- self.cmd = cmd
- self.section = section
-
-
-class BadFormat(ParsingError):
- """Raised when a section is formatted incorrectly."""
-
- _fmt = (_LOCATION_FMT + "Bad format for section %(section)s in "
- "command %(cmd)s: found '%(text)s'")
-
- def __init__(self, lineno, cmd, section, text):
- ParsingError.__init__(self, lineno)
- self.cmd = cmd
- self.section = section
- self.text = text
-
-
-class InvalidTimezone(ParsingError):
- """Raised when converting a string timezone to a seconds offset."""
-
- _fmt = (_LOCATION_FMT +
- "Timezone %(timezone)r could not be converted.%(reason)s")
-
- def __init__(self, lineno, timezone, reason=None):
- ParsingError.__init__(self, lineno)
- self.timezone = timezone
- if reason:
- self.reason = ' ' + reason
- else:
- self.reason = ''
-
-
-class UnknownDateFormat(ImportError):
- """Raised when an unknown date format is given."""
-
- _fmt = ("Unknown date format '%(format)s'")
-
- def __init__(self, format):
- ImportError.__init__(self)
- self.format = format
-
-
-class MissingHandler(ImportError):
- """Raised when a processor can't handle a command."""
-
- _fmt = ("Missing handler for command %(cmd)s")
-
- def __init__(self, cmd):
- ImportError.__init__(self)
- self.cmd = cmd
-
-
-class UnknownParameter(ImportError):
- """Raised when an unknown parameter is passed to a processor."""
-
- _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s")
-
- def __init__(self, param, knowns):
- ImportError.__init__(self)
- self.param = param
- self.knowns = knowns
-
-
-class BadRepositorySize(ImportError):
- """Raised when the repository has an incorrect number of revisions."""
-
- _fmt = ("Bad repository size - %(found)d revisions found, "
- "%(expected)d expected")
-
- def __init__(self, expected, found):
- ImportError.__init__(self)
- self.expected = expected
- self.found = found
-
-
-class BadRestart(ImportError):
- """Raised when the import stream and id-map do not match up."""
-
- _fmt = ("Bad restart - attempted to skip commit %(commit_id)s "
- "but matching revision-id is unknown")
-
- def __init__(self, commit_id):
- ImportError.__init__(self)
- self.commit_id = commit_id
diff --git a/explorer/logos/cvs.png b/explorer/logos/cvs.png
new file mode 100644
index 0000000..e279bdf
--- /dev/null
+++ b/explorer/logos/cvs.png
Binary files differ
diff --git a/explorer/logos/darcs.png b/explorer/logos/darcs.png
new file mode 100644
index 0000000..ca9365f
--- /dev/null
+++ b/explorer/logos/darcs.png
Binary files differ
diff --git a/explorer/logos/git.png b/explorer/logos/git.png
new file mode 100644
index 0000000..aae35a7
--- /dev/null
+++ b/explorer/logos/git.png
Binary files differ
diff --git a/explorer/logos/mercurial.png b/explorer/logos/mercurial.png
new file mode 100644
index 0000000..60effbc
--- /dev/null
+++ b/explorer/logos/mercurial.png
Binary files differ
diff --git a/explorer/logos/monotone.png b/explorer/logos/monotone.png
new file mode 100644
index 0000000..16f1908
--- /dev/null
+++ b/explorer/logos/monotone.png
Binary files differ
diff --git a/explorer/logos/perforce.png b/explorer/logos/perforce.png
new file mode 100644
index 0000000..e62897c
--- /dev/null
+++ b/explorer/logos/perforce.png
Binary files differ
diff --git a/explorer/logos/subversion.png b/explorer/logos/subversion.png
new file mode 100644
index 0000000..d28702a
--- /dev/null
+++ b/explorer/logos/subversion.png
Binary files differ
diff --git a/explorer/tools.xml b/explorer/tools.xml
new file mode 100644
index 0000000..2386737
--- /dev/null
+++ b/explorer/tools.xml
@@ -0,0 +1,20 @@
+<folder title="Tools">
+ <folder title="Migration Tools">
+ <folder title="Export From" icon="actions/edit-redo">
+ <tool action="qrun fast-export" icon="logos/bazaar" title="Bazaar" type="bzr" />
+ <tool action="qrun fast-export-from-cvs" icon="logos/cvs" title="CVS" type="bzr" />
+ <tool action="qrun fast-export-from-darcs" icon="logos/darcs" title="Darcs" type="bzr" />
+ <tool action="qrun fast-export-from-git" icon="logos/git" title="Git" type="bzr" />
+ <tool action="qrun fast-export-from-hg" icon="logos/mercurial" title="Mercurial" type="bzr" />
+ <tool action="qrun fast-export-from-mtn" icon="logos/monotone" title="Monotone" type="bzr" />
+ <tool action="qrun fast-export-from-p4" icon="logos/perforce" title="Perforce" type="bzr" />
+ <tool action="qrun fast-export-from-svn" icon="logos/subversion" title="Subversion" type="bzr" />
+ </folder>
+ <folder title="Import From" icon="actions/go-jump">
+ <tool action="qrun fast-import" icon="mimetypes/text-x-generic-template" title="Fast Import Stream" type="bzr" />
+ </folder>
+ <separator/>
+ <tool action="qrun fast-import-filter" icon="actions/media-playback-pause" title="Fast Import Filter" type="bzr" />
+ </folder>
+</folder>
+
diff --git a/bzr_exporter.py b/exporter.py
index 16d942a..3f477d1 100755..100644
--- a/bzr_exporter.py
+++ b/exporter.py
@@ -35,65 +35,57 @@ import bzrlib.revision
from bzrlib import (
builtins,
errors as bazErrors,
+ osutils,
progress,
trace,
)
-from bzrlib.plugins.fastimport import commands, helpers, marks_file
+from bzrlib.plugins.fastimport import (
+ helpers,
+ marks_file,
+ )
+from fastimport import commands
+from fastimport.helpers import (
+ binary_stream,
+ single_plural,
+ )
-# This is adapted from _linear_view_verisons in log.py in bzr 1.12.
-def _iter_linear_revisions(branch, start_rev_id, end_rev_id):
- """Calculate a sequence of revisions, newest to oldest.
- :param start_rev_id: the lower revision-id
- :param end_rev_id: the upper revision-id
- :return: An iterator of revision_ids
- :raises ValueError: if a start_rev_id is specified but
- is not found walking the left-hand history
- """
- br_revno, br_rev_id = branch.last_revision_info()
- repo = branch.repository
- if start_rev_id is None and end_rev_id is None:
- for revision_id in repo.iter_reverse_revision_history(br_rev_id):
- yield revision_id
+def _get_output_stream(destination):
+ if destination is None or destination == '-':
+ return binary_stream(sys.stdout)
+ elif destination.endswith('gz'):
+ import gzip
+ return gzip.open(destination, 'wb')
else:
- if end_rev_id is None:
- end_rev_id = br_rev_id
- found_start = start_rev_id is None
- for revision_id in repo.iter_reverse_revision_history(end_rev_id):
- if not found_start and revision_id == start_rev_id:
- yield revision_id
- found_start = True
- break
- else:
- yield revision_id
- else:
- if not found_start:
- raise ValueError()
+ return open(destination, 'wb')
class BzrFastExporter(object):
def __init__(self, source, destination, git_branch=None, checkpoint=-1,
import_marks_file=None, export_marks_file=None, revision=None,
- verbose=False):
+ verbose=False, plain_format=False):
+ """Export branch data in fast import format.
+
+ :param plain_format: if True, 'classic' fast-import format is
+ used without any extended features; if False, the generated
+ data is richer and includes information like multiple
+ authors, revision properties, etc.
+ """
self.source = source
- if destination is None or destination == '-':
- self.outf = helpers.binary_stream(sys.stdout)
- elif destination.endswith('gz'):
- import gzip
- self.outf = gzip.open(destination, 'wb')
- else:
- self.outf = open(destination, 'wb')
+ self.outf = _get_output_stream(destination)
self.git_branch = git_branch
self.checkpoint = checkpoint
self.import_marks_file = import_marks_file
self.export_marks_file = export_marks_file
self.revision = revision
self.excluded_revisions = set()
+ self.plain_format = plain_format
self._multi_author_api_available = hasattr(bzrlib.revision.Revision,
'get_apparent_authors')
+ self.properties_to_exclude = ['authors', 'author']
# Progress reporting stuff
self.verbose = verbose
@@ -102,6 +94,7 @@ class BzrFastExporter(object):
else:
self.progress_every = 1000
self._start_time = time.time()
+ self._commit_total = 0
# Load the marks and initialise things accordingly
self.revid_to_mark = {}
@@ -124,17 +117,15 @@ class BzrFastExporter(object):
start_rev_id = None
end_rev_id = None
self.note("Calculating the revisions to include ...")
- view_revisions = reversed(list(_iter_linear_revisions(self.branch,
- start_rev_id, end_rev_id)))
+ view_revisions = reversed([rev_id for rev_id, _, _, _ in
+ self.branch.iter_merge_sorted_revisions(end_rev_id, start_rev_id)])
# If a starting point was given, we need to later check that we don't
# start emitting revisions from before that point. Collect the
# revisions to exclude now ...
if start_rev_id is not None:
- # The result is inclusive so skip the first (the oldest) one
self.note("Calculating the revisions to exclude ...")
- uninteresting = list(_iter_linear_revisions(self.branch, None,
- start_rev_id))[1:]
- self.excluded_revisions = set(uninteresting)
+ self.excluded_revisions = set([rev_id for rev_id, _, _, _ in
+ self.branch.iter_merge_sorted_revisions(start_rev_id)])
return list(view_revisions)
def run(self):
@@ -144,7 +135,13 @@ class BzrFastExporter(object):
# Export the data
self.branch.repository.lock_read()
try:
- for revid in self.interesting_history():
+ interesting = self.interesting_history()
+ self._commit_total = len(interesting)
+ self.note("Starting export of %d revisions ..." %
+ self._commit_total)
+ if not self.plain_format:
+ self.emit_features()
+ for revid in interesting:
self.emit_commit(revid, self.git_branch)
if self.branch.supports_tags():
self.emit_tags()
@@ -171,10 +168,11 @@ class BzrFastExporter(object):
return time.strftime("%H:%M:%S")
def report_progress(self, commit_count, details=''):
- # Note: we can't easily give a total count here because we
- # don't know how many merged revisions will need to be output
if commit_count and commit_count % self.progress_every == 0:
- counts = "%d" % (commit_count,)
+ if self._commit_total:
+ counts = "%d/%d" % (commit_count, self._commit_total)
+ else:
+ counts = "%d" % (commit_count,)
minutes = (time.time() - self._start_time) / 60
rate = commit_count * 1.0 / minutes
if rate > 10:
@@ -187,7 +185,7 @@ class BzrFastExporter(object):
time_required = progress.str_tdelta(time.time() - self._start_time)
rc = len(self.revid_to_mark)
self.note("Exported %d %s in %s",
- rc, helpers.single_plural(rc, "revision", "revisions"),
+ rc, single_plural(rc, "revision", "revisions"),
time_required)
def print_cmd(self, cmd):
@@ -200,7 +198,7 @@ class BzrFastExporter(object):
def is_empty_dir(self, tree, path):
path_id = tree.path2id(path)
- if path_id == None:
+ if path_id is None:
self.warning("Skipping empty_dir detection - no file_id for %s" %
(path,))
return False
@@ -216,6 +214,10 @@ class BzrFastExporter(object):
else:
return False
+ def emit_features(self):
+ for feature in sorted(commands.FEATURE_NAMES):
+ self.print_cmd(commands.FeatureCommand(feature))
+
def emit_commit(self, revid, git_branch):
if revid in self.revid_to_mark or revid in self.excluded_revisions:
return
@@ -228,14 +230,13 @@ class BzrFastExporter(object):
self.revid_to_mark[revid] = -1
return
- # Emit parents
- nparents = len(revobj.parent_ids)
- if nparents:
- for parent in revobj.parent_ids:
- self.emit_commit(parent, git_branch)
-
# Get the primary parent
+ # TODO: Consider the excluded revisions when deciding the parents.
+ # Currently, a commit with parents that are excluded ought to be
+ # triggering the git_branch calculation below (and it is not).
+ # IGC 20090824
ncommits = len(self.revid_to_mark)
+ nparents = len(revobj.parent_ids)
if nparents == 0:
if ncommits:
# This is a parentless commit but it's not the first one
@@ -264,36 +265,55 @@ class BzrFastExporter(object):
self._save_marks()
self.print_cmd(commands.CheckpointCommand())
- def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
- # Get the committer and author info
- committer = revobj.committer
- if committer.find('<') == -1:
+ def _get_name_email(self, user):
+ if user.find('<') == -1:
# If the email isn't inside <>, we need to use it as the name
# in order for things to round-trip correctly.
# (note: parseaddr('a@b.com') => name:'', email: 'a@b.com')
- name = committer
+ name = user
email = ''
else:
- name, email = parseaddr(committer)
+ name, email = parseaddr(user)
+ return name, email
+
+ def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
+ # Get the committer and author info
+ committer = revobj.committer
+ name, email = self._get_name_email(committer)
committer_info = (name, email, revobj.timestamp, revobj.timezone)
if self._multi_author_api_available:
- author = revobj.get_apparent_authors()[0]
+ more_authors = revobj.get_apparent_authors()
+ author = more_authors.pop(0)
else:
+ more_authors = []
author = revobj.get_apparent_author()
- if author != committer:
- name, email = parseaddr(author)
+ if not self.plain_format and more_authors:
+ name, email = self._get_name_email(author)
author_info = (name, email, revobj.timestamp, revobj.timezone)
+ more_author_info = []
+ for a in more_authors:
+ name, email = self._get_name_email(a)
+ more_author_info.append(
+ (name, email, revobj.timestamp, revobj.timezone))
+ elif author != committer:
+ name, email = self._get_name_email(author)
+ author_info = (name, email, revobj.timestamp, revobj.timezone)
+ more_author_info = None
else:
author_info = None
+ more_author_info = None
# Get the parents in terms of marks
non_ghost_parents = []
for p in revobj.parent_ids:
if p in self.excluded_revisions:
continue
- parent_mark = self.revid_to_mark[p]
- if parent_mark != -1:
+ try:
+ parent_mark = self.revid_to_mark[p]
non_ghost_parents.append(":%s" % parent_mark)
+ except KeyError:
+ # ghost - ignore
+ continue
if non_ghost_parents:
from_ = non_ghost_parents[0]
merges = non_ghost_parents[1:]
@@ -301,9 +321,23 @@ class BzrFastExporter(object):
from_ = None
merges = None
+ # Filter the revision properties. Some metadata (like the
+ # author information) is already exposed in other ways so
+ # don't repeat it here.
+ if self.plain_format:
+ properties = None
+ else:
+ properties = revobj.properties
+ for prop in self.properties_to_exclude:
+ try:
+ del properties[prop]
+ except KeyError:
+ pass
+
# Build and return the result
return commands.CommitCommand(git_ref, mark, author_info,
- committer_info, revobj.message, from_, merges, iter(file_cmds))
+ committer_info, revobj.message, from_, merges, iter(file_cmds),
+ more_authors=more_author_info, properties=properties)
def _get_revision_trees(self, parent, revision_id):
try:
@@ -351,15 +385,21 @@ class BzrFastExporter(object):
for path, id_, kind in changes.added + my_modified + rd_modifies:
if kind == 'file':
text = tree_new.get_file_text(id_)
- file_cmds.append(commands.FileModifyCommand(path, 'file',
- tree_new.is_executable(id_), None, text))
+ file_cmds.append(commands.FileModifyCommand(path,
+ helpers.kind_to_mode('file', tree_new.is_executable(id_)),
+ None, text))
elif kind == 'symlink':
- file_cmds.append(commands.FileModifyCommand(path, 'symlink',
- False, None, tree_new.get_symlink_target(id_)))
+ file_cmds.append(commands.FileModifyCommand(path,
+ helpers.kind_to_mode('symlink', False),
+ None, tree_new.get_symlink_target(id_)))
+ elif kind == 'directory':
+ if not self.plain_format:
+ file_cmds.append(commands.FileModifyCommand(path,
+ helpers.kind_to_mode('directory', False),
+ None, None))
else:
- # Should we do something here for importers that
- # can handle directory and tree-reference changes?
- continue
+ self.warning("cannot export '%s' of kind %s yet - ignoring" %
+ (path, kind))
return file_cmds
def _process_renames_and_deletes(self, renames, deletes,
@@ -388,11 +428,15 @@ class BzrFastExporter(object):
# Instead, we need to make multiple passes over the various lists to
# get the ordering right.
+ must_be_renamed = {}
+ old_to_new = {}
deleted_paths = set([p for p, _, _ in deletes])
for (oldpath, newpath, id_, kind,
text_modified, meta_modified) in renames:
+ emit = kind != 'directory' or not self.plain_format
if newpath in deleted_paths:
- file_cmds.append(commands.FileDeleteCommand(newpath))
+ if emit:
+ file_cmds.append(commands.FileDeleteCommand(newpath))
deleted_paths.remove(newpath)
if (self.is_empty_dir(tree_old, oldpath)):
self.note("Skipping empty dir %s in rev %s" % (oldpath,
@@ -401,14 +445,40 @@ class BzrFastExporter(object):
#oldpath = self._adjust_path_for_renames(oldpath, renamed,
# revision_id)
renamed.append([oldpath, newpath])
- file_cmds.append(commands.FileRenameCommand(oldpath, newpath))
+ old_to_new[oldpath] = newpath
+ if emit:
+ file_cmds.append(commands.FileRenameCommand(oldpath, newpath))
if text_modified or meta_modified:
modifies.append((newpath, id_, kind))
+ # Renaming a directory implies all children must be renamed.
+ # Note: changes_from() doesn't handle this
+ if kind == 'directory':
+ for p, e in tree_old.inventory.iter_entries_by_dir(from_dir=id_):
+ if e.kind == 'directory' and self.plain_format:
+ continue
+ old_child_path = osutils.pathjoin(oldpath, p)
+ new_child_path = osutils.pathjoin(newpath, p)
+ must_be_renamed[old_child_path] = new_child_path
+
+ # Add children not already renamed
+ if must_be_renamed:
+ renamed_already = set(old_to_new.keys())
+ still_to_be_renamed = set(must_be_renamed.keys()) - renamed_already
+ for old_child_path in sorted(still_to_be_renamed):
+ new_child_path = must_be_renamed[old_child_path]
+ if self.verbose:
+ self.note("implicitly renaming %s => %s" % (old_child_path,
+ new_child_path))
+ file_cmds.append(commands.FileRenameCommand(old_child_path,
+ new_child_path))
+
# Record remaining deletes
for path, id_, kind in deletes:
if path not in deleted_paths:
continue
+ if kind == 'directory' and self.plain_format:
+ continue
#path = self._adjust_path_for_renames(path, renamed, revision_id)
file_cmds.append(commands.FileDeleteCommand(path))
return file_cmds, modifies, renamed
@@ -435,7 +505,7 @@ class BzrFastExporter(object):
self.warning('not creating tag %r pointing to non-existent '
'revision %s' % (tag, revid))
else:
- git_ref = 'refs/tags/%s' % tag
+ git_ref = 'refs/tags/%s' % tag.encode("utf-8")
self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark)))
def _next_tmp_branch_name(self):
diff --git a/exporters/Makefile b/exporters/Makefile
index 8a4efd9..2b71211 100644
--- a/exporters/Makefile
+++ b/exporters/Makefile
@@ -1,7 +1,8 @@
-SVN ?= /usr/local/svn
-APR_INCLUDES ?= /usr/include/apr-1.0
-CFLAGS += -I${APR_INCLUDES} -I${SVN}/include/subversion-1 -pipe -O2 -std=c99
+SVN ?= /usr
+CFLAGS += -I${SVN}/include/subversion-1 -pipe -O2 -std=c99
+CFLAGS += `pkg-config --cflags apr-1`
LDFLAGS += -L${SVN}/lib -lsvn_fs-1 -lsvn_repos-1
+LDFLAGS += `pkg-config --libs apr-1`
all: svn-fast-export svn-archive
diff --git a/exporters/__init__.py b/exporters/__init__.py
index 2d7b135..6d282d6 100644
--- a/exporters/__init__.py
+++ b/exporters/__init__.py
@@ -218,11 +218,14 @@ class MercurialExporter(_Exporter):
class GitExporter(_Exporter):
def __init__(self):
- self.check_install('Git', '1.6', ['git'])
+ self.cmd_name = "git"
+ if sys.platform == 'win32':
+ self.cmd_name = "git.cmd"
+ self.check_install('Git', '1.6', [self.cmd_name])
def generate(self, source, destination, verbose=False, custom=None):
"""Generate a fast import stream. See _Exporter.generate() for details."""
- args = ["git", "fast-export", "--all", "--signed-tags=warn"]
+ args = [self.cmd_name, "fast-export", "--all", "--signed-tags=warn"]
outf, base, marks = self.get_output_info(destination)
if marks:
marks = os.path.abspath(marks)
@@ -241,11 +244,11 @@ class GitExporter(_Exporter):
class MonotoneExporter(_Exporter):
def __init__(self):
- self.check_install('Monotone', '0.43', ['mnt'])
+ self.check_install('Monotone', '0.43', ['mtn'])
def generate(self, source, destination, verbose=False, custom=None):
"""Generate a fast import stream. See _Exporter.generate() for details."""
- args = ["mnt", "git_export"]
+ args = ["mtn", "git_export"]
outf, base, marks = self.get_output_info(destination)
if marks:
marks = os.path.abspath(marks)
@@ -277,7 +280,7 @@ class PerforceExporter(_Exporter):
retcode = p4_fast_export.main([source])
finally:
sys.stdout = original_stdout
- self.report_results(retcode, destination)
+ self.report_results(retcode, destination)
class SubversionExporter(_Exporter):
@@ -308,7 +311,7 @@ def fast_export_from(source, destination, tool, verbose=False, custom=None):
factory = MercurialExporter
elif tool == 'git':
factory = GitExporter
- elif tool == 'mnt':
+ elif tool == 'mtn':
factory = MonotoneExporter
elif tool == 'p4':
factory = PerforceExporter
diff --git a/exporters/darcs/README b/exporters/darcs/README
index 4b13e3b..3fc9449 100644
--- a/exporters/darcs/README
+++ b/exporters/darcs/README
@@ -23,13 +23,6 @@ Independent::
Ideally it should work with any fast importer, but actually it has been
tested with git fast-import, bzr fast-import and hg fastimport. (These
are the three fast-import implementations available ATM.)
- +
- hg fastimport needs three patches. While they are not in the upstream,
- you can get it from my repository using
-+
-----
-$ hg clone static-http://frugalware.org/~vmiklos/hg/hg-fastimport
-----
Formats::
It supports the 'darcs-2', 'hashed', and 'old-fashioned-inventory' darcs
@@ -127,16 +120,18 @@ supported by fastimport-0.6 is hg-1.0.x.
Mercurial (Hg) version:
----
$ hg version
-Mercurial Distributed SCM (version 1.2.1)
+Mercurial Distributed SCM (version 1.3)
----
-Strictly speaking this document is a wrong place to talk about enabling
-hg plugins. However...
+Strictly speaking this document is a wrong place to talk about
+configuring hg fastimport. However... you will need something like:
----
-$ cat ~/.hgrc
-[extensions]
-hgext.fastimport=
+$ hg clone http://vc.gerg.ca/hg/pyfastimport
+$ hg clone http://vc.gerg.ca/hg/hg-fastimport
+$ sudo ln -s /path/to/pyfastimport/fastimport /usr/lib/python2.6/site-packages/fastimport
+$ sudo ln -s /path/to/hg-fastimport/hgfastimport /usr/lib/python2.6/site-packages/hgfastimport
+echo -e "[extensions]\nfastimport = /usr/lib/python2.6/site-packages/hgfastimport" > ~/.hgrc
----
and once you installed the plugin correctly, you should have something like:
diff --git a/exporters/darcs/TODO b/exporters/darcs/TODO
index 2f199d1..c6892c8 100644
--- a/exporters/darcs/TODO
+++ b/exporters/darcs/TODO
@@ -4,3 +4,5 @@ not enabled, etc.
parse the patches manually so we can avoid re-adding existing files manually.
avoid darcs apply.
+
+import: handle evil merges (git-subtree), maybe using git log --first-parent
diff --git a/exporters/darcs/d2x b/exporters/darcs/d2x
index 79e18a3..959cc00 100755
--- a/exporters/darcs/d2x
+++ b/exporters/darcs/d2x
@@ -93,7 +93,7 @@ if [ ! -f $dmark ]; then
hg)
hg init
darcs-fast-export $* $origin | \
- hg fastimport /dev/stdin
+ hg fastimport -
esac
else
case $format in
diff --git a/exporters/darcs/darcs-fast-export b/exporters/darcs/darcs-fast-export
index d94618e..fa850de 100755
--- a/exporters/darcs/darcs-fast-export
+++ b/exporters/darcs/darcs-fast-export
@@ -4,7 +4,7 @@
darcs-fast-export - darcs backend for fast data importers
- Copyright (c) 2008 Miklos Vajna <vmiklos@frugalware.org>
+ Copyright (c) 2008, 2009 Miklos Vajna <vmiklos@frugalware.org>
Copyright (c) 2008 Matthias Andree <matthias.andree@gmx.de>
This program is free software; you can redistribute it and/or modify
@@ -29,314 +29,352 @@ import os
import sys
import gzip
import time
+import calendar
import shutil
import subprocess
import optparse
import re
+import urllib
+import urllib2
+import StringIO
sys = reload(sys)
sys.setdefaultencoding("utf-8")
-def __get_zone():
- now = time.localtime()
- if time.daylight and now[-1]:
- offset = time.altzone
- else:
- offset = time.timezone
- hours, minutes = divmod(abs(offset), 3600)
- if offset > 0:
- sign = "-"
- else:
- sign = "+"
- return sign, hours, minutes
-
-def get_zone_str():
- sign, hours, minutes = __get_zone()
- return "%s%02d%02d" % (sign, hours, minutes // 60)
-
-def get_zone_int():
- sign, hours, minutes = __get_zone()
- ret = hours*3600+minutes*60
- if sign == "-":
- ret *= -1
- return ret
-
-def get_patchname(patch):
- ret = []
- s = ""
- if patch.attributes['inverted'].value == 'True':
- s = "UNDO: "
- ret.append(s + patch.getElementsByTagName("name")[0].childNodes[0].data)
- lines = patch.getElementsByTagName("comment")
- if lines:
- for i in lines[0].childNodes[0].data.split('\n'):
- if not i.startswith("Ignore-this: "):
- ret.append(i)
- return "\n".join(ret).encode('utf-8')
-
-def get_author(patch):
- """darcs allows any freeform string, but fast-import has a more
- strict format, so fix up broken author names here."""
-
- author = patch.attributes['author'].value
- if author in authormap:
- author = authormap[author]
- if not len(author):
- author = "darcs-fast-export <darcs-fast-export>"
- # add missing name
- elif not ">" in author:
- author = "%s <%s>" % (author.split('@')[0], author)
- # avoid double quoting
- elif author[0] == '"' and author[-1] == '"':
- author = author[1:-1]
- # name after email
- elif author[-1] != '>':
- author = author[author.index('>')+2:] + ' ' + author[:author.index('>')+1]
- return author.encode('utf-8')
-
-def get_date(patch):
- try:
- date = time.strptime(patch, "%Y%m%d%H%M%S")
- except ValueError:
- date = time.strptime(patch[:19] + patch[-5:], '%a %b %d %H:%M:%S %Y')
- return int(time.mktime(date)) + get_zone_int()
-
-def progress(s):
- print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)
- sys.stdout.flush()
-
-def log(s):
- logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s))
- logsock.flush()
-
-hashes = []
-def parse_inventory(sock=None):
- prev = None
- nextprev = False
- buf = []
- if not sock:
- sock = open(os.path.join("_darcs", "hashed_inventory"))
- for i in sock.readlines():
- if i.startswith("hash"):
- buf.insert(0, i[6:-1])
- if i.startswith("Starting with inventory:"):
- nextprev = True
- elif nextprev:
- prev = i[:-1]
- nextprev = False
- sock.close()
- for i in buf:
- hashes.insert(0, i)
- if prev:
- sock = gzip.open(os.path.join("_darcs", "inventories", prev))
- parse_inventory(sock)
-
-# Option Parser
-usage="%prog [options] darcsrepo"
-opp = optparse.OptionParser(usage=usage)
-opp.add_option("--import-marks", metavar="IFILE",
- help="read state for incremental imports from IFILE")
-opp.add_option("--export-marks", metavar="OFILE",
- help="write state for incremental imports from OFILE")
-opp.add_option("--encoding",
- help="encoding of log [default: %default], if unspecified and input isn't utf-8, guess")
-opp.add_option("--authors-file", metavar="F",
- help="read author transformations in old=new format from F")
-opp.add_option("--working", metavar="W",
- help="working directory which is removed at the end of non-incremental conversions")
-opp.add_option("--logfile", metavar="L",
- help="log file which contains the output of external programs invoked during the conversion")
-opp.add_option("--git-branch", metavar="B",
- help="git branch [default: refs/heads/master]")
-opp.add_option("--progress", metavar="P",
- help="insert progress statements after every n commit [default: 100]")
-(options, args) = opp.parse_args()
-if len(args) < 1:
- opp.error("darcsrepo required")
-
-export_marks = []
-import_marks = []
-if options.import_marks:
- sock = open(options.import_marks)
- for i in sock.readlines():
- line = i.strip()
- if not len(line):
- continue
- import_marks.append(line.split(' ')[1])
- export_marks.append(line)
- sock.close()
-
-# read author mapping file in gitauthors format,
-# i. e. in=out (one per # line)
-authormap = {}
-if options.authors_file:
- sock = open(options.authors_file)
- authormap = dict([i.strip().split('=',1) for i in sock])
- sock.close()
-
-origin = os.path.abspath(args[0])
-if options.working:
- working = os.path.abspath(options.working)
-else:
- working = "%s.darcs" % origin
-patchfile = "%s.patch" % origin
-if options.logfile:
- logfile = os.path.abspath(options.logfile)
-else:
- logfile = "%s.log" % origin
-logsock = open(logfile, "a")
-if options.git_branch:
- git_branch = options.git_branch
-else:
- git_branch = "refs/heads/master"
-
-if options.progress:
- prognum = int(options.progress)
-else:
- prognum = 100
-
-progress("getting list of patches")
-if not len(import_marks):
- sock = os.popen("darcs changes --xml --reverse --repo %s" % origin)
-else:
- sock = os.popen("darcs changes --xml --reverse --repo %s --from-match 'hash %s'" % (origin, import_marks[-1]))
-buf = sock.read()
-sock.close()
-# this is hackish. we need to escape some bad chars, otherwise the xml
-# will not be valid
-buf = buf.replace('\x1b', '^[')
-if options.encoding:
- xmldoc = xml.dom.minidom.parseString(unicode(buf, options.encoding).encode('utf-8'))
-else:
- try:
- xmldoc = xml.dom.minidom.parseString(buf)
- except xml.parsers.expat.ExpatError:
+class Handler:
+ def __init__(self):
+ self.hashes = []
+ self.authormap = {}
+ self.export_marks = []
+ self.import_marks = []
+
+ def get_patchname(self, patch):
+ ret = []
+ s = ""
+ if patch.attributes['inverted'].value == 'True':
+ s = "UNDO: "
+ cs = patch.getElementsByTagName("name")[0].childNodes
+ if cs.length > 0:
+ ret.append(s + cs[0].data)
+ lines = patch.getElementsByTagName("comment")
+ if lines:
+ for i in lines[0].childNodes[0].data.split('\n'):
+ if not i.startswith("Ignore-this: "):
+ ret.append(i)
+ return "\n".join(ret).encode('utf-8')
+
+ def get_author(self, patch):
+ """darcs allows any freeform string, but fast-import has a more
+ strict format, so fix up broken author names here."""
+
+ author = patch.attributes['author'].value
+ if author in self.authormap:
+ author = self.authormap[author]
+ if not len(author):
+ author = "darcs-fast-export <darcs-fast-export>"
+ # add missing name
+ elif not ">" in author:
+ author = "%s <%s>" % (author.split('@')[0], author)
+ # avoid double quoting
+ elif author[0] == '"' and author[-1] == '"':
+ author = author[1:-1]
+ # name after email
+ elif author[-1] != '>':
+ author = author[author.index('>')+2:] + ' ' + author[:author.index('>')+1]
+ return author.encode('utf-8')
+
+ def get_date(self, patch):
try:
- import chardet
- except ImportError:
- sys.exit("Error, encoding is not utf-8. Please " +
- "either specify it with the --encoding " +
- "option or install chardet.")
- progress("encoding is not utf8, guessing charset")
- encoding = chardet.detect(buf)['encoding']
- progress("detected encoding is %s" % encoding)
- xmldoc = xml.dom.minidom.parseString(unicode(buf, encoding).encode('utf-8'))
-sys.stdout.flush()
-
-darcs2 = False
-oldfashionedpatch = True
-cwd = os.getcwd()
-if os.path.exists(os.path.join(origin, "_darcs", "format")):
- sock = open(os.path.join(origin, "_darcs", "format"))
- format = [x.strip() for x in sock]
- sock.close()
- darcs2 = 'darcs-2' in format
- oldfashionedpatch = not 'hashed' in format
-if not oldfashionedpatch:
- progress("parsing the inventory")
- os.chdir(origin)
- parse_inventory()
-if not options.import_marks or not os.path.exists(working):
- # init the tmp darcs repo
- os.mkdir(working)
- os.chdir(working)
- if darcs2:
- os.system("darcs init --darcs-2")
- else:
- os.system("darcs init --old-fashioned-inventory")
-else:
- os.chdir(working)
-if options.import_marks:
- sock = os.popen("darcs pull -a --match 'hash %s' %s" % (import_marks[-1], origin))
- log("Building/updating working directory:\n%s" % sock.read())
- sock.close()
-
-# this is the number of the NEXT patch
-count = 1
-patches = xmldoc.getElementsByTagName('patch')
-if len(import_marks):
- patches = patches[1:]
- count = len(import_marks) + 1
-if len(export_marks):
- # this is the mark number of the NEXT patch
- markcount = int(export_marks[-1].split(' ')[0][1:]) + 1
-else:
- markcount = count
-# this may be huge and we need it many times
-patchnum = len(patches)
-
-if not len(import_marks):
- progress("starting export, repo has %d patches" % patchnum)
-else:
- progress("continuing export, %d patches to convert" % patchnum)
-paths = []
-for i in patches:
- # apply the patch
- hash = i.attributes['hash'].value
- buf = ["\nNew patches:\n"]
- if oldfashionedpatch:
- sock = gzip.open(os.path.join(origin, "_darcs", "patches", hash))
- else:
- sock = gzip.open(os.path.join(origin, "_darcs", "patches", hashes[count-1]))
- buf.append(sock.read())
- sock.close()
- sock = os.popen("darcs changes --context")
- buf.append(sock.read())
- sock.close()
- sock = subprocess.Popen(["darcs", "apply", "--allow-conflicts"], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
- sock.stdin.write("".join(buf))
- sock.stdin.close()
- log("Applying %s:\n%s" % (hash, sock.stdout.read()))
- sock.stdout.close()
- message = get_patchname(i)
- # export the commit
- print "commit %s" % git_branch
- print "mark :%s" % markcount
- if options.export_marks:
- export_marks.append(":%s %s" % (markcount, hash))
- date = get_date(i.attributes['date'].value)
- print "committer %s %s %s" % (get_author(i), date, get_zone_str())
- print "data %d\n%s" % (len(message), message)
- if markcount > 1:
- print "from :%s" % (markcount-1)
- # export the files
- for j in paths:
- print "D %s" % j
- paths = []
- for (root, dirs, files) in os.walk ("."):
- for f in files:
- j = os.path.normpath(os.path.join(root, f))
- if j.startswith("_darcs") or "-darcs-backup" in j:
- continue
- paths.append(j)
- sock = open(j)
- buf = sock.read()
+ date = time.strptime(patch, "%Y%m%d%H%M%S")
+ except ValueError:
+ date = time.strptime(patch[:19] + patch[-5:], '%a %b %d %H:%M:%S %Y')
+ return calendar.timegm(date)
+
+ def progress(self, s):
+ print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)
+ sys.stdout.flush()
+
+ def log(self, s):
+ self.logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s))
+ self.logsock.flush()
+
+ def parse_inventory(self, sock=None):
+ prev = None
+ nextprev = False
+ buf = []
+ if not sock:
+ sock = self.open(os.path.join(self.origin, "_darcs", "hashed_inventory"))
+ for i in sock.readlines():
+ if i.startswith("hash"):
+ buf.insert(0, i[6:-1])
+ if i.startswith("Starting with inventory:"):
+ nextprev = True
+ elif nextprev:
+ prev = i[:-1]
+ nextprev = False
+ sock.close()
+ for i in buf:
+ self.hashes.insert(0, i)
+ if prev:
+ sock = self.gzip_open(os.path.join(self.origin, "_darcs", "inventories", prev))
+ self.parse_inventory(sock)
+
+ # this is like gzip.open but supports urls as well
+ def gzip_open(self, path):
+ if os.path.exists(path):
+ return gzip.open(path)
+ buf = urllib.urlopen(path).read()
+ sock = StringIO.StringIO(buf)
+ return gzip.GzipFile(fileobj=sock)
+
+ # this is like os.path.exists but supports urls as well
+ def path_exists(self, path):
+ if os.path.exists(path):
+ return True
+ else:
+ try:
+ urllib2.urlopen(urllib2.Request(path))
+ return True
+ except urllib2.HTTPError, e:
+ return False
+
+ # this is like open, but supports urls as well
+ def open(self, path):
+ if os.path.exists(path):
+ return open(path)
+ else:
+ return urllib.urlopen(path)
+
+ def handle_opts(self):
+ # Option Parser
+ usage="%prog [options] darcsrepo"
+ opp = optparse.OptionParser(usage=usage)
+ opp.add_option("--import-marks", metavar="IFILE",
+ help="read state for incremental imports from IFILE")
+ opp.add_option("--export-marks", metavar="OFILE",
+ help="write state for incremental imports from OFILE")
+ opp.add_option("--encoding",
+ help="encoding of log [default: %default], if unspecified and input isn't utf-8, guess")
+ opp.add_option("--authors-file", metavar="F",
+ help="read author transformations in old=new format from F")
+ opp.add_option("--working", metavar="W",
+ help="working directory which is removed at the end of non-incremental conversions")
+ opp.add_option("--logfile", metavar="L",
+ help="log file which contains the output of external programs invoked during the conversion")
+ opp.add_option("--git-branch", metavar="B",
+ help="git branch [default: refs/heads/master]")
+ opp.add_option("--progress", metavar="P",
+ help="insert progress statements after every n commit [default: 100]")
+ (self.options, self.args) = opp.parse_args()
+ if len(self.args) < 1:
+ opp.error("darcsrepo required")
+
+ # read author mapping file in gitauthors format,
+ # i. e. in=out (one per # line)
+ if self.options.authors_file:
+ sock = open(self.options.authors_file)
+ self.authormap = dict([i.strip().split('=',1) for i in sock])
sock.close()
- # darcs does not track the executable bit :/
- print "M 644 inline %s" % j
- print "data %s\n%s" % (len(buf), buf)
- if message[:4] == "TAG ":
- tag = re.sub('[^\xe9-\xf8\w.\-]+', '_', message[4:].strip().split('\n')[0]).strip('_')
- print "tag %s" % tag
- print "from :%s" % markcount
- print "tagger %s %s %s" % (get_author(i), date, get_zone_str())
- print "data %d\n%s" % (len(message), message)
- if count % prognum == 0:
- progress("%d/%d patches" % (count, patchnum))
- count += 1
- markcount += 1
-os.chdir(cwd)
+ if "://" not in self.args[0]:
+ self.origin = os.path.abspath(self.args[0])
+ else:
+ self.origin = self.args[0].strip('/')
+ if self.options.working:
+ self.working = os.path.abspath(self.options.working)
+ else:
+ if "://" not in self.origin:
+ self.working = "%s.darcs" % self.origin
+ else:
+ self.working = "%s.darcs" % os.path.split(self.origin)[-1]
+ if self.options.logfile:
+ logfile = os.path.abspath(self.options.logfile)
+ else:
+ if "://" not in self.origin:
+ logfile = "%s.log" % self.origin
+ else:
+ logfile = "%s.log" % os.path.split(self.origin)[-1]
+ self.logsock = open(logfile, "a")
+ if self.options.git_branch:
+ self.git_branch = self.options.git_branch
+ else:
+ self.git_branch = "refs/heads/master"
+
+ if self.options.progress:
+ self.prognum = int(self.options.progress)
+ else:
+ self.prognum = 100
+
+ def handle_import_marks(self):
+ if self.options.import_marks:
+ sock = open(self.options.import_marks)
+ for i in sock.readlines():
+ line = i.strip()
+ if not len(line):
+ continue
+ self.import_marks.append(line.split(' ')[1])
+ self.export_marks.append(line)
+ sock.close()
+
+ def get_patches(self):
+ self.progress("getting list of patches")
+ if not len(self.import_marks):
+ sock = os.popen("darcs changes --xml --reverse --repo %s" % self.origin)
+ else:
+ sock = os.popen("darcs changes --xml --reverse --repo %s --from-match 'hash %s'" % (self.origin, self.import_marks[-1]))
+ buf = sock.read()
+ sock.close()
+ # this is hackish. we need to escape some bad chars, otherwise the xml
+ # will not be valid
+ buf = buf.replace('\x1b', '^[')
+ if self.options.encoding:
+ xmldoc = xml.dom.minidom.parseString(unicode(buf, self.options.encoding).encode('utf-8'))
+ else:
+ try:
+ xmldoc = xml.dom.minidom.parseString(buf)
+ except xml.parsers.expat.ExpatError:
+ try:
+ import chardet
+ except ImportError:
+ sys.exit("Error, encoding is not utf-8. Please " +
+ "either specify it with the --encoding " +
+ "option or install chardet.")
+ self.progress("encoding is not utf8, guessing charset")
+ encoding = chardet.detect(buf)['encoding']
+ self.progress("detected encoding is %s" % encoding)
+ xmldoc = xml.dom.minidom.parseString(unicode(buf, encoding).encode('utf-8'))
+ sys.stdout.flush()
+ return xmldoc.getElementsByTagName('patch')
+
+ def setup_workdir(self):
+ darcs2 = False
+ self.oldfashionedpatch = True
+ self.cwd = os.getcwd()
+ if self.path_exists(os.path.join(self.origin, "_darcs", "format")):
+ sock = self.open(os.path.join(self.origin, "_darcs", "format"))
+ format = [x.strip() for x in sock]
+ sock.close()
+ darcs2 = 'darcs-2' in format
+ self.oldfashionedpatch = not 'hashed' in format
+ if not self.oldfashionedpatch:
+ self.progress("parsing the inventory")
+ if "://" not in self.origin:
+ os.chdir(self.origin)
+ self.parse_inventory()
+ if not self.options.import_marks or not os.path.exists(self.working):
+ # init the tmp darcs repo
+ os.mkdir(self.working)
+ os.chdir(self.working)
+ if darcs2:
+ os.system("darcs init --darcs-2")
+ else:
+ os.system("darcs init --old-fashioned-inventory")
+ else:
+ os.chdir(self.working)
+ if self.options.import_marks:
+ sock = os.popen("darcs pull -a --match 'hash %s' %s" % (self.import_marks[-1], self.origin))
+ self.log("Building/updating working directory:\n%s" % sock.read())
+ sock.close()
+
+ def export_patches(self):
+ patches = self.get_patches()
+ # this is the number of the NEXT patch
+ count = 1
+ if len(self.import_marks):
+ patches = patches[1:]
+ count = len(self.import_marks) + 1
+ if len(self.export_marks):
+ # this is the mark number of the NEXT patch
+ markcount = int(self.export_marks[-1].split(' ')[0][1:]) + 1
+ else:
+ markcount = count
+ # this may be huge and we need it many times
+ patchnum = len(patches)
+
+ if not len(self.import_marks):
+ self.progress("starting export, repo has %d patches" % patchnum)
+ else:
+ self.progress("continuing export, %d patches to convert" % patchnum)
+ paths = []
+ for i in patches:
+ # apply the patch
+ hash = i.attributes['hash'].value
+ buf = ["\nNew patches:\n"]
+ if self.oldfashionedpatch:
+ sock = self.gzip_open(os.path.join(self.origin, "_darcs", "patches", hash))
+ else:
+ sock = self.gzip_open(os.path.join(self.origin, "_darcs", "patches", self.hashes[count-1]))
+ buf.append(sock.read())
+ sock.close()
+ sock = os.popen("darcs changes --context")
+ buf.append(sock.read())
+ sock.close()
+ sock = subprocess.Popen(["darcs", "apply", "--allow-conflicts"], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+ sock.stdin.write("".join(buf))
+ sock.stdin.close()
+ self.log("Applying %s:\n%s" % (hash, sock.stdout.read()))
+ sock.stdout.close()
+ message = self.get_patchname(i)
+ # export the commit
+ print "commit %s" % self.git_branch
+ print "mark :%s" % markcount
+ if self.options.export_marks:
+ self.export_marks.append(":%s %s" % (markcount, hash))
+ date = self.get_date(i.attributes['date'].value)
+ print "committer %s %s +0000" % (self.get_author(i), date)
+ print "data %d\n%s" % (len(message), message)
+ if markcount > 1:
+ print "from :%s" % (markcount-1)
+ # export the files
+ for j in paths:
+ print "D %s" % j
+ paths = []
+ for (root, dirs, files) in os.walk ("."):
+ for f in files:
+ j = os.path.normpath(os.path.join(root, f))
+ if j.startswith("_darcs") or "-darcs-backup" in j:
+ continue
+ paths.append(j)
+ sock = open(j)
+ buf = sock.read()
+ sock.close()
+ # darcs does not track the executable bit :/
+ print "M 644 inline %s" % j
+ print "data %s\n%s" % (len(buf), buf)
+ if message[:4] == "TAG ":
+ tag = re.sub('[^\xe9-\xf8\w.\-]+', '_', message[4:].strip().split('\n')[0]).strip('_')
+ print "tag %s" % tag
+ print "from :%s" % markcount
+ print "tagger %s %s +0000" % (self.get_author(i), date)
+ print "data %d\n%s" % (len(message), message)
+ if count % self.prognum == 0:
+ self.progress("%d/%d patches" % (count, patchnum))
+ count += 1
+ markcount += 1
+
+ os.chdir(self.cwd)
+
+ if not self.options.export_marks:
+ shutil.rmtree(self.working)
+ self.logsock.close()
+
+ def handle_export_marks(self):
+ if self.options.export_marks:
+ self.progress("writing export marks")
+ sock = open(self.options.export_marks, 'w')
+ sock.write("\n".join(self.export_marks))
+ sock.write("\n")
+ sock.close()
-if not options.export_marks:
- shutil.rmtree(working)
-logsock.close()
+ self.progress("finished")
-if options.export_marks:
- progress("writing export marks")
- sock = open(options.export_marks, 'w')
- sock.write("\n".join(export_marks))
- sock.write("\n")
- sock.close()
+ def handle(self):
+ self.handle_opts()
+ self.handle_import_marks()
+ self.setup_workdir()
+ self.export_patches()
+ self.handle_export_marks()
-progress("finished")
+if __name__ == "__main__":
+ h = Handler()
+ h.handle()
diff --git a/exporters/darcs/darcs-fast-export.txt b/exporters/darcs/darcs-fast-export.txt
index 3ddd02e..d404ecf 100644
--- a/exporters/darcs/darcs-fast-export.txt
+++ b/exporters/darcs/darcs-fast-export.txt
@@ -18,6 +18,10 @@ The script can produce the fast-import stream format from the darcs
repository. It supports incremental conversion as well, via the
--import-marks / --export-marks switches.
+Optionally the darcsrepo string may be a HTTP repository, in that case
+only the patches are downloaded, not the pristine, speeding up a
+one-time import.
+
== OPTIONS
-h, --help::
diff --git a/exporters/darcs/darcs-fast-import b/exporters/darcs/darcs-fast-import
index 2955164..69ec7bb 100755
--- a/exporters/darcs/darcs-fast-import
+++ b/exporters/darcs/darcs-fast-import
@@ -4,7 +4,7 @@
darcs-fast-export - darcs backend for fast data exporters
- Copyright (c) 2008 Miklos Vajna <vmiklos@frugalware.org>
+ Copyright (c) 2008, 2009, 2010 Miklos Vajna <vmiklos@frugalware.org>
Copyright (c) 2008 Matthias Andree <matthias.andree@gmx.de>
This program is free software; you can redistribute it and/or modify
@@ -81,6 +81,11 @@ class Handler:
raise Exception(s)
def get_date(self, ts, tz):
+ # first fix the case when tz is higher than +1200, as
+ # darcs won't accept it
+ if int(tz[:3]) > 12:
+ ts = str(int(ts) + 60*60*24)
+ tz = str(int(tz[:3])-24) + tz[3:]
# int(ts) is seconds since epoch. Since we're trying to
# capture both the absolute time of the commit and the
# localtime in the timezone of the committer, we need to turn
@@ -99,6 +104,13 @@ class Handler:
items = s.split(' ')
return " ".join(items[:-1]) + " " + tz + " " + items[-1]
+ def invoke_darcs(self, cmdline):
+ if os.system("darcs %s" % cmdline) != 0:
+ self.bug("darcs failed")
+
+ def invoke_add(self, path):
+ self.invoke_darcs("add --boring --case-ok %s" % path)
+
def handle_mark(self):
if self.line.startswith("mark :"):
self.mark_num = int(self.line[6:-1])
@@ -143,6 +155,8 @@ class Handler:
sock.stdin.close()
self.log("Tagging %s:\n%s" % (version, sock.stdout.read()))
sock.stdout.close()
+ if sock.wait() != 0:
+ self.bug("darcs tag failed: '%s'" % sock.returncode)
def handle_commit(self):
if not self.prevfiles and self.options.import_marks:
@@ -156,6 +170,7 @@ class Handler:
self.files.append(path)
self.prevfiles = self.files[:]
adds = []
+ symlinks = []
self.read_next_line()
self.handle_mark()
@@ -173,32 +188,47 @@ class Handler:
self.read_next_line()
while self.line.startswith("merge "):
self.read_next_line()
+ change = False
while len(self.line) > 0:
if self.line.startswith("deleteall"):
path = self.line[2:-1]
for path in self.files:
os.unlink(path)
self.files = []
+ change = True
elif self.line.startswith("D "):
path = self.line[2:-1]
if os.path.exists(path):
os.unlink(path)
if path in self.files:
self.files.remove(path)
+ change = True
elif self.line.startswith("R "):
- os.system("darcs mv %s" % self.line[2:])
+ self.invoke_darcs("mv %s" % self.line[2:])
+ change = True
elif self.line.startswith("C "):
src, dest = self.line[:-1].split(' ')[1:]
shutil.copy(src.strip('"'), dest.strip('"'))
- os.system("darcs add %s" % dest)
+ self.invoke_add(dest)
+ change = True
elif self.line.startswith("M "):
items = self.line.split(' ')
path = items[3][:-1]
+ dir = os.path.split(path)[0]
+ if len(dir) and not os.path.exists(dir):
+ os.makedirs(dir)
+ if items[1] == "120000":
+ if not self.options.symhack:
+ print "Adding symbolic links (symlinks) is not supported by Darcs."
+ sys.exit(2)
+ idx = int(items[2][1:]) # TODO: handle inline symlinks
+ symlinks.append((self.marks[idx], path))
+ self.read_next_line()
+ continue
sock = open(path, "w")
if items[2] != "inline":
idx = int(items[2][1:])
sock.write(self.marks[idx])
- del self.marks[idx]
else:
self.read_next_line()
self.handle_data()
@@ -208,6 +238,7 @@ class Handler:
adds.append(path)
if path not in self.files:
self.files.append(path)
+ change = True
else:
self.unread_line = True
break
@@ -215,15 +246,35 @@ class Handler:
if not len(self.line):
break
+ if not change:
+ # darcs does not support empty commits
+ return
for i in adds:
- os.system("darcs add %s" % i)
- sock = subprocess.Popen(["darcs", "record", "--ignore-times", "-a", "--pipe"], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
- buf = [self.date, self.ident, self.short, self.long]
- sock.stdin.write("\n".join(buf))
+ self.invoke_add(i)
+ args = ["darcs", "record", "--ignore-times", "-a", "--pipe"]
+ buf = [self.date, self.ident]
+ if not len(self.short):
+ args.extend(['-m', ''])
+ else:
+ buf.extend([self.short, self.long])
+ sock = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+ sock.stdin.write("\n".join(buf)+"\n")
sock.stdin.close()
self.log("Recording :%s:\n%s" % (self.mark_num, sock.stdout.read()))
sock.stdout.close()
-
+ if sock.wait() != 0:
+ self.bug("darcs record failed: '%s'" % sock.returncode)
+
+ for src, path in symlinks:
+ # symlink does not do what we want if path is
+ # already there
+ if os.path.exists(path):
+ # rmtree() does not work on symlinks
+ if os.path.islink(path):
+ os.remove(path)
+ else:
+ shutil.rmtree(path)
+ os.symlink(src, path)
if self.options.export_marks:
# yeah, an xml parser would be better, but
# should we mess with encodings just because of
@@ -235,19 +286,24 @@ class Handler:
self.export_marks.append(":%s %s" % (self.mark_num, hash))
def handle_progress(self, s):
- print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s.strip())
+ print "import progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s.strip())
sys.stdout.flush()
def handle_opts(self):
# Option Parser
usage="%prog [options]"
opp = optparse.OptionParser(usage=usage)
+ opp.set_defaults(symhack=False)
opp.add_option("--import-marks", metavar="IFILE",
help="read state for incremental imports from IFILE")
opp.add_option("--export-marks", metavar="OFILE",
help="write state for incremental imports to OFILE")
opp.add_option("--logfile", metavar="L",
help="log file which contains the output of external programs invoked during the conversion")
+ opp.add_option("--symhack", action="store_true", dest="symhack",
+ help="Do not error out when a symlink would be created, just create it in the workdir")
+ opp.add_option("--progress", metavar="P",
+ help="insert progress statements after every n commit [default: 100]")
(self.options, args) = opp.parse_args()
if self.options.logfile:
@@ -255,6 +311,11 @@ class Handler:
else:
logfile = "_darcs/import.log"
self.logsock = open(os.path.abspath(logfile), "a")
+
+ if self.options.progress:
+ self.prognum = int(self.options.progress)
+ else:
+ self.prognum = 0
def log(self, s):
self.logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s))
@@ -282,6 +343,7 @@ class Handler:
self.handle_opts()
self.handle_import_marks()
+ commitcount = 0
while not self.eof:
self.read_next_line()
if not len(self.line[:-1]):
@@ -290,6 +352,9 @@ class Handler:
self.handle_blob()
elif self.line.startswith("commit"):
self.handle_commit()
+ commitcount += 1
+ if self.prognum != 0 and commitcount % self.prognum == 0:
+ self.handle_progress("%d patches" % commitcount)
elif self.line.startswith("tag"):
self.handle_tag()
elif self.line.startswith("reset"):
diff --git a/exporters/darcs/darcs-fast-import.txt b/exporters/darcs/darcs-fast-import.txt
index 09c7b1e..a7f2a12 100644
--- a/exporters/darcs/darcs-fast-import.txt
+++ b/exporters/darcs/darcs-fast-import.txt
@@ -33,3 +33,25 @@ as well, via the --import-marks / --export-marks switches.
--logfile::
The output of external commands are redirected to a log file. You can
specify the path of that file with this parameter.
+
+--symhack::
+ Enable hack for symbolic links. darcs add does not handle them
+ but in case they are just added, we can create them in the working
+ directory. This can be handy in case for example the symbolic link is in
+ a subdirectory of the project and you don't even care about that
+ subdirectory. So the hack can be useful, but be extremely careful when
+ you use it.
+
+--progress=<n>::
+ Insert progress statements after every <n> created patches. The
+ default is not to print anything as progress info is usually provided by
+ the exporter. Use this option in case the exporter does not have such a
+ switch but you still want to get some feedback.
+
+== EXIT CODES
+
+The exit code is:
+
+* 0 on success
+* 1 on unhandled exception
+* 2 in case the stream would try to let the importer create a symlink
diff --git a/exporters/darcs/git-darcs b/exporters/darcs/git-darcs
index eb70338..18455a2 100755
--- a/exporters/darcs/git-darcs
+++ b/exporters/darcs/git-darcs
@@ -2,7 +2,7 @@
#
# git-darcs - bidirectional operation between a darcs repo and git
#
-# Copyright (c) 2008 by Miklos Vajna <vmiklos@frugalware.org>
+# Copyright (c) 2008, 2010 by Miklos Vajna <vmiklos@frugalware.org>
#
# Based on git-bzr, which is
#
@@ -32,24 +32,25 @@ add()
shift
if ! [ -n "$name" -a -n "$location" ]; then
echo "Usage: git darcs add name location [darcs-fast-export options]"
- exit
+ return 1
fi
if git remote show |grep -q $name; then
echo "There is already a remote with that name"
- exit
+ return 1
fi
if [ -n "$(git config git-darcs.$name.location)" ]; then
echo "There is already a darcs repo with that name"
- exit
+ return 1
fi
- if [ ! -d $location/_darcs ]; then
+ repo=$location/_darcs
+ if [ ! -d $repo ] && ! wget --quiet --spider $repo; then
echo "Remote is not a darcs repository"
- exit
+ return 1
fi
git config git-darcs.$name.location $location
- git config git-darcs.$name.darcs-fast-export-options "$*"
echo "Darcs repo $name added. You can fetch it with 'git darcs fetch $name'"
if ! [ -z "$*" ]; then
+ git config git-darcs.$name.darcs-fast-export-options "$*"
echo "darcs-fast-export will get options: $*"
fi
}
@@ -59,7 +60,7 @@ get_location()
l=$(git config git-darcs.$remote.location)
if [ -z "$l" ]; then
echo "Cannot find darcs remote with name '$remote'." >&2
- exit
+ return 1
fi
echo $l
}
@@ -70,13 +71,17 @@ fetch()
shift
if ! [ -n "$remote" -a -z "$*" ]; then
echo "Usage: git darcs fetch reponame"
- exit
+ return 1
fi
- location=$(get_location $remote)
+ location=$(get_location $remote) || return $?
git_map=$git_dir/darcs-git/$remote-git-map
darcs_map=$git_dir/darcs-git/$remote-darcs-map
- common_opts="--working $git_dir/darcs-git/repo --logfile $git_dir/darcs-git/fetch.log --git-branch=darcs/$remote"
+ common_opts="--working $git_dir/darcs-git/repo --logfile $git_dir/darcs-git/fetch.log --git-branch=refs/remotes/darcs/$remote"
dfe_opts=$(git config git-darcs.$remote.darcs-fast-export-options)
+ pre_fetch="$(git config git-darcs.$remote.pre-fetch)"
+ if [ -n "$pre_fetch" ]; then
+ $pre_fetch
+ fi
if [ ! -f $git_map -a ! -f $darcs_map ]; then
echo "There doesn't seem to be an existing refmap."
echo "Doing an initial import"
@@ -85,20 +90,24 @@ fetch()
git fast-import --export-marks=$git_map
elif [ -f $git_map -a -f $darcs_map ]; then
echo "Updating remote $remote"
- old_rev=$(git rev-parse darcs/$remote)
+ old_rev=$(git rev-parse refs/remotes/darcs/$remote)
darcs-fast-export --import-marks=$darcs_map --export-marks=$darcs_map $common_opts $dfe_opts $location | \
git fast-import --quiet --import-marks=$git_map --export-marks=$git_map
- new_rev=$(git rev-parse darcs/$remote)
+ new_rev=$(git rev-parse refs/remotes/darcs/$remote)
if [ "$old_rev" != "$new_rev" ]; then
echo "Fetched the following updates:"
git shortlog $old_rev..$new_rev
else
echo "Nothing fetched."
- exit
+ return 0
fi
else
echo "One of the mapfiles is missing! Something went wrong!"
- exit
+ return 1
+ fi
+ post_fetch="$(git config git-darcs.$remote.post-fetch)"
+ if [ -n "$post_fetch" ]; then
+ $post_fetch
fi
}
@@ -108,15 +117,15 @@ pull()
shift
if ! [ -n "$remote" -a -z "$*" ]; then
echo "Usage: git darcs pull reponame"
- exit
+ return 1
fi
- fetch $remote
+ fetch $remote || return $?
# see if we need to merge or rebase
branch=$(git symbolic-ref HEAD|sed 's|.*/||')
if [ "$(git config branch.$branch.rebase)" = "true" ]; then
- git rebase darcs/$remote
+ git rebase refs/remotes/darcs/$remote
else
- git merge darcs/$remote
+ git merge refs/remotes/darcs/$remote
fi
}
@@ -126,30 +135,38 @@ push()
shift
if ! [ -n "$remote" -a -z "$*" ]; then
echo "Usage: git darcs push reponame"
- exit
+ return 1
fi
- location=$(get_location $remote)
- if [ -n "$(git rev-list --left-right HEAD...darcs/$remote | sed -n '/^>/ p')" ]; then
+ location=$(get_location $remote) || return $?
+ if [ -n "$(git rev-list --left-right HEAD...refs/remotes/darcs/$remote | sed -n '/^>/ p')" ]; then
echo "HEAD is not a strict child of $remote, cannot push. Merge first"
- exit
+ return 1
fi
- if [ -z "$(git rev-list --left-right HEAD...darcs/$remote | sed -n '/^</ p')" ]; then
+ if [ -z "$(git rev-list --left-right HEAD...refs/remotes/darcs/$remote | sed -n '/^</ p')" ]; then
echo "Nothing to push. Commit something first"
- exit
+ return 1
fi
git_map=$git_dir/darcs-git/$remote-git-map
darcs_map=$git_dir/darcs-git/$remote-darcs-map
if [ ! -f $git_map -o ! -f $darcs_map ]; then
echo "We do not have refmapping yet. Then how can I push?"
- exit
+ return 1
+ fi
+ pre_push="$(git config git-darcs.$remote.pre-push)"
+ if [ -n "$pre_push" ]; then
+ $pre_push
fi
echo "Pushing the following updates:"
- git shortlog darcs/$remote..
+ git shortlog refs/remotes/darcs/$remote..
git fast-export --import-marks=$git_map --export-marks=$git_map HEAD | \
(cd $location; darcs-fast-import --import-marks=$darcs_map --export-marks=$darcs_map \
--logfile $git_dir/darcs-git/push.log)
if [ $? == 0 ]; then
- git update-ref darcs/$remote HEAD
+ git update-ref refs/remotes/darcs/$remote HEAD
+ post_push="$(git config git-darcs.$remote.post-push)"
+ if [ -n "$post_push" ]; then
+ $post_push
+ fi
fi
}
@@ -159,18 +176,18 @@ list()
if [ -z "$*" ]
then
git config -l | sed -n -e '/git-darcs\..*/ {s/git-darcs\.//; s/\.location=.*//p}'
- exit
+ return 0
elif [ "$#" -eq 1 ]
then
case $1 in
-v|--verbose)
git config -l | sed -n -e '/git-darcs\..*/ {s/git-darcs\.//; s/\.location=/\t/p}'
- exit
+ return 0
;;
esac
fi
echo "Usage: git darcs list [-v|--verbose]"
- exit 1
+ return 1
}
# Find the darcs commit(s) supporting a git SHA1 prefix
@@ -181,9 +198,9 @@ find_darcs()
if [ -z "$sha1" -o -n "$*" ]
then
echo "Usage: git darcs find-darcs <sha1-prefix>"
- exit 1
+ return 1
fi
- for remote in $git_dir/darcs/*
+ for remote in $(git for-each-ref --format='%(refname)' refs/remotes/darcs)
do
remote=`basename $remote`
git_map=$git_dir/darcs-git/$remote-git-map
@@ -191,7 +208,7 @@ find_darcs()
if [ ! -f $git_map -o ! -f $darcs_map ]
then
echo "Missing mappings for remote $remote"
- exit 1
+ return 1
fi
for row in `sed -n -e "/:.* $sha1.*/ s/[^ ]*/&/p" $git_map`
do
@@ -208,9 +225,9 @@ find_git()
if [ -z "$patch" -o -n "$*" ]
then
echo "Usage: git darcs find-git <patch-prefix>"
- exit 1
+ return 1
fi
- for remote in $git_dir/darcs/*
+ for remote in $(git for-each-ref --format='%(refname)' refs/remotes/darcs)
do
remote=`basename $remote`
git_map=$git_dir/darcs-git/$remote-git-map
@@ -218,7 +235,7 @@ find_git()
if [ ! -f $git_map -o ! -f $darcs_map ]
then
echo "Missing mappings for remote $remote"
- exit 1
+ return 1
fi
for row in `sed -n -e "/:.* $patch.*/ s/[^ ]*/&/p" $darcs_map`
do
@@ -230,7 +247,7 @@ find_git()
git rev-parse 2> /dev/null
if [ $? != 0 ]; then
echo "Must be inside a git repository to work"
- exit
+ exit 1
fi
git_dir=$(git rev-parse --git-dir)
@@ -253,7 +270,7 @@ case $command in
*)
echo "Usage: git darcs [COMMAND] [OPTIONS]"
echo "Commands: add, push, fetch, pull, list, find-darcs, find-git"
- exit
+ exit 1
;;
esac
diff --git a/exporters/darcs/git-darcs.txt b/exporters/darcs/git-darcs.txt
index 7558329..8bf5b33 100644
--- a/exporters/darcs/git-darcs.txt
+++ b/exporters/darcs/git-darcs.txt
@@ -20,7 +20,7 @@ A typical workflow is:
$ mkdir git-repo
$ cd git-repo
$ git init
-$ git darcs add upstream ../darcs-repo
+$ git darcs add upstream /path/to/darcs-repo
$ git darcs pull upstream
... hack, hack, hack ...
@@ -70,3 +70,23 @@ find-darcs::
find-git::
Searches for git commits matching a darcs patch prefix.
The syntax is `find-git <patch-prefix>`.
+
+== HOOKS
+
+It's possible to automatically run before and after the fetch and the
+push subcommand. For example if you want to automatically run `darcs
+pull -a` before a `git darcs fetch upstream`:
+
+----
+git config git-darcs.upstream.pre-fetch "darcs pull -a --repodir=/path/to/darcs-repo"
+----
+
+Or in case you want to automatically `darcs send` all patches after a
+`git darcs push upstream`:
+
+----
+git config git-darcs.upstream.post-push "darcs send -a --repodir=/path/to/darcs-repo"
+----
+
+== SEE-ALSO
+*git*(1), *darcs*(1)
diff --git a/exporters/darcs/t/lib-httpd.sh b/exporters/darcs/t/lib-httpd.sh
new file mode 100644
index 0000000..fad953e
--- /dev/null
+++ b/exporters/darcs/t/lib-httpd.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+#
+# This is based on git's t/lib-httpd.sh, which is
+# Copyright (c) 2008 Clemens Buchacher <drizzd@aon.at>
+#
+
+if test -n "$DFE_TEST_SKIP_HTTPD"
+then
+ echo "skipping test (undef DFE_TEST_SKIP_HTTPD to enable)"
+ exit
+fi
+
+LIB_HTTPD_PATH=${LIB_HTTPD_PATH-'/usr/sbin/httpd'}
+LIB_HTTPD_PORT=${LIB_HTTPD_PORT-'8111'}
+
+HTTPD_ROOT_PATH="$PWD"/httpd
+HTTPD_DOCUMENT_ROOT_PATH=$HTTPD_ROOT_PATH/www
+
+if ! test -x "$LIB_HTTPD_PATH"
+then
+ echo "skipping test, no web server found at '$LIB_HTTPD_PATH'"
+ exit
+fi
+
+HTTPD_VERSION=`$LIB_HTTPD_PATH -v | \
+ sed -n 's/^Server version: Apache\/\([0-9]*\)\..*$/\1/p; q'`
+
+if test -n "$HTTPD_VERSION"
+then
+ if test -z "$LIB_HTTPD_MODULE_PATH"
+ then
+ if ! test $HTTPD_VERSION -ge 2
+ then
+ echo "skipping test, at least Apache version 2 is required"
+ exit
+ fi
+
+ LIB_HTTPD_MODULE_PATH='/usr/lib/apache'
+ fi
+else
+ error "Could not identify web server at '$LIB_HTTPD_PATH'"
+fi
+
+HTTPD_PARA="-d $HTTPD_ROOT_PATH -f $HTTPD_ROOT_PATH/apache.conf"
+
+prepare_httpd() {
+ mkdir -p $HTTPD_DOCUMENT_ROOT_PATH
+
+ ln -s $LIB_HTTPD_MODULE_PATH $HTTPD_ROOT_PATH/modules
+
+ echo "PidFile httpd.pid" > $HTTPD_ROOT_PATH/apache.conf
+ echo "DocumentRoot www" >> $HTTPD_ROOT_PATH/apache.conf
+ echo "ErrorLog error.log" >> $HTTPD_ROOT_PATH/apache.conf
+
+ HTTPD_URL=http://127.0.0.1:$LIB_HTTPD_PORT
+}
+
+start_httpd() {
+ prepare_httpd
+
+ "$LIB_HTTPD_PATH" $HTTPD_PARA \
+ -c "Listen 127.0.0.1:$LIB_HTTPD_PORT" -k start
+}
+
+stop_httpd() {
+ "$LIB_HTTPD_PATH" $HTTPD_PARA -k stop
+}
diff --git a/exporters/darcs/t/lib.sh b/exporters/darcs/t/lib.sh
index 3df0a8a..7d2218a 100644
--- a/exporters/darcs/t/lib.sh
+++ b/exporters/darcs/t/lib.sh
@@ -78,6 +78,10 @@ third line"
_drrec -a -m "remove and rename"
darcs mv a b
_drrecamend
+ echo c > c
+ darcs add c
+ # empty commit message
+ _drrec -a -m ""
cd ..
}
@@ -150,6 +154,7 @@ create_hg()
hg pull ../$1.tmp
hg merge
echo D > file
+ hg resolve -m file
echo "first line
second line
third line" | hg commit -l /dev/stdin
@@ -172,6 +177,12 @@ third line" | hg commit -l /dev/stdin
hg commit -m "add empty file"
hg rm file3
hg commit -m "remove file"
+ mkdir subdir
+ echo test > subdir/file
+ hg add subdir/file
+ hg commit -m "add subdir file"
+ echo test2 > subdir/file
+ hg commit -m "commit with weird date" -d "Fri Apr 03 12:38:26 2009 +1300"
cd ..
}
create_git()
@@ -180,6 +191,7 @@ create_git()
mkdir -p $1
cd $1
git init $2
+ git commit --allow-empty -m 'root commit'
echo A > file
git add file
git commit -a -m A
@@ -213,6 +225,23 @@ third line" | git commit -a -F -
git commit -a -m "add empty file"
rm file3
git commit -a -m "remove file"
+ # now add back 'file' with its old conents, so the mark gets
+ # reused
+ echo f > file
+ git add file
+ git commit -a -m "file: other -> f"
+ # this is a boring file for Darcs
+ touch foo.pyc
+ git add foo.pyc
+ git commit -a -m "boring file"
+ # replace an uppercase file to a lowercase one
+ echo SPAM > SPAM
+ git add SPAM
+ git commit -a -m SPAM
+ rm SPAM
+ echo spam > spam
+ git add spam
+ git commit -a -m "SPAM -> spam"
cd ..
}
@@ -226,12 +255,14 @@ diff_git()
diff_importgit()
{
+ test -z "`(cd $1.darcs; darcs diff)`" &&
diff --exclude _darcs --exclude .git --exclude '*-darcs-backup*' -Nur $1 $1.darcs
return $?
}
diff_importhg()
{
+ cd $1.darcs && test -z "`darcs diff 2>&1`" && cd .. &&
diff --exclude _darcs --exclude .hg --exclude '*-darcs-backup*' --exclude 'hg-export.*' \
--exclude '.hgtags' --exclude '*.orig' -Nur $1 $1.darcs
return $?
@@ -239,12 +270,14 @@ diff_importhg()
diff_importdarcs()
{
+ cd $1.importdarcs && test -z "`darcs diff 2>&1`" && cd .. &&
diff --exclude _darcs --exclude '*-darcs-backup*' -Nur $1 $2
return $?
}
diff_importbzr()
{
+ cd $1.darcs && test -z "`darcs diff 2>&1`" && cd .. &&
diff --exclude _darcs --exclude .bzr --exclude '*-darcs-backup*' -Nur $1 $1.darcs
return $?
}
@@ -260,6 +293,7 @@ diff_bzr()
diff_hg()
{
+ hg -R $1.hg update
diff --exclude _darcs --exclude .hg --exclude '*-darcs-backup*' -Nur $1.hg $1
return $?
}
diff --git a/exporters/darcs/t/test-hg.sh b/exporters/darcs/t/test-hg.sh
index ddde37a..95bfc4b 100644
--- a/exporters/darcs/t/test-hg.sh
+++ b/exporters/darcs/t/test-hg.sh
@@ -8,7 +8,7 @@ cd test.hg
hg init
cd ..
if [ "$1" != "--stdout" ]; then
- darcs-fast-export test |(cd test.hg; hg fastimport /dev/stdin)
+ darcs-fast-export test |(cd test.hg; hg fastimport -)
diff_hg test
exit $?
else
diff --git a/exporters/darcs/t/test2-git-http.sh b/exporters/darcs/t/test2-git-http.sh
new file mode 100644
index 0000000..02549e4
--- /dev/null
+++ b/exporters/darcs/t/test2-git-http.sh
@@ -0,0 +1,22 @@
+. ./lib.sh
+. ./lib-httpd.sh
+
+rm -rf test2.darcs test2.git httpd
+create_darcs test2 --darcs-2
+mkdir -p $HTTPD_DOCUMENT_ROOT_PATH
+mv -v test2 $HTTPD_DOCUMENT_ROOT_PATH
+ln -s $HTTPD_DOCUMENT_ROOT_PATH/test2 .
+
+mkdir test2.git
+cd test2.git
+git --bare init
+cd ..
+start_httpd
+darcs-fast-export $HTTPD_URL/test2 |(cd test2.git; git fast-import)
+ret=$?
+stop_httpd
+if [ $ret != 0 ]; then
+ exit $ret
+fi
+diff_git test2
+exit $?
diff --git a/exporters/darcs/t/testimport-gitsymlink.sh b/exporters/darcs/t/testimport-gitsymlink.sh
new file mode 100644
index 0000000..100c583
--- /dev/null
+++ b/exporters/darcs/t/testimport-gitsymlink.sh
@@ -0,0 +1,45 @@
+. ./lib.sh
+
+create_git test
+cd test
+# add two dirs with the some contents, then remove the second
+# and make it a symlink to the first
+mkdir dira
+echo blabla > dira/file
+echo blablabla > dira/file2
+mkdir dirb
+touch dirb/file
+touch dirb/file2
+git add dira dirb
+git commit -a -m "add dira/dirb"
+rm -rf dirb
+ln -s dira dirb
+git add dirb
+git commit -a -m "change a dir to a symlink"
+cd ..
+
+rm -rf test.darcs
+mkdir test.darcs
+cd test.darcs
+darcs init
+cd ..
+(cd test; git fast-export --progress=2 HEAD) | (cd test.darcs; darcs-fast-import)
+# we *do* want this to fail, but with error code 2. that means that we
+# detected that symlinks are not supported and the user does not get a
+# meaningless exception
+if [ $? != 2 ]; then
+ exit 1
+fi
+
+# now try with the symhack option
+rm -rf test.darcs
+mkdir test.darcs
+cd test.darcs
+darcs init
+cd ..
+(cd test; git fast-export --progress=2 HEAD) | (cd test.darcs; darcs-fast-import --symhack)
+if [ $? != 0 ]; then
+ exit 1
+fi
+diff_importgit test
+exit $?
diff --git a/exporters/darcs/t/testimport-hg.sh b/exporters/darcs/t/testimport-hg.sh
index 76df76d..7f6d215 100644
--- a/exporters/darcs/t/testimport-hg.sh
+++ b/exporters/darcs/t/testimport-hg.sh
@@ -8,7 +8,6 @@ cd test.darcs
darcs init
cd ..
(cd test; $pypath/bzrlib/plugins/fastimport/exporters/hg-fast-export.py -r .) | (cd test.darcs; darcs-fast-import)
-rm test/{*.orig,hg-export.status}
if [ $? != 0 ]; then
exit 1
fi
diff --git a/exporters/darcs/x2d b/exporters/darcs/x2d
index 61c66ef..398103d 100755
--- a/exporters/darcs/x2d
+++ b/exporters/darcs/x2d
@@ -62,6 +62,12 @@ case $format in
;;
esac
+common_opts=""
+while [ -n "$2" ]
+do
+ common_opts="$common_opts $1"
+ shift 1
+done
origin="$1"
shift 1
@@ -77,7 +83,7 @@ fmark="$origin.darcs/_darcs/fast-import/ffi-marks"
mkdir -p $origin.darcs
cd $origin.darcs
-common_opts="--logfile $origin.darcs/_darcs/fast-import/log"
+common_opts="$common_opts --logfile $origin.darcs/_darcs/fast-import/log"
pypath="/$(python -c 'from distutils import sysconfig; print sysconfig.get_python_lib()[1:]')/"
if [ ! -f $dmark ]; then
diff --git a/exporters/darcs/x2d.txt b/exporters/darcs/x2d.txt
index eb2ec34..25ed6bb 100644
--- a/exporters/darcs/x2d.txt
+++ b/exporters/darcs/x2d.txt
@@ -6,7 +6,7 @@ x2d - convert git, bzr or hg repos to a darcs one using fast-export
== SYNOPSIS
-x2d -f <format> <otherrepo>
+x2d -f <format> [<importoptions>] <otherrepo>
== DESCRIPTION
@@ -24,3 +24,5 @@ importer's standard input.
-f <format>::
Specify the format of the source repo. Currently supported sources are
git, bzr and hg. Incremental conversion is supported for all of them.
+
+The rest of the options is directly passed to darcs-fast-import.
diff --git a/exporters/svn-fast-export.README b/exporters/svn-fast-export.README
new file mode 100644
index 0000000..e08277e
--- /dev/null
+++ b/exporters/svn-fast-export.README
@@ -0,0 +1,12 @@
+To compile svn-fast-export.c, use make. You'll need to install
+some packages first using the package manager on your OS:
+
+* libsvn-dev - the Subversion libraries
+* libapr1-dev - the Apache Portable Runtime libraries
+
+Note: If someone with good knowledge of the Subversion
+Python bindings could rewrite svn-fast-export.py so that
+https://bugs.launchpad.net/bzr-fastimport/+bug/273361
+went away, then there would be much rejoicing throughout
+the land and the need for svn-fast-export.c would largely
+disappear.
diff --git a/exporters/svn-fast-export.py b/exporters/svn-fast-export.py
index e44c6cb..fd88094 100755
--- a/exporters/svn-fast-export.py
+++ b/exporters/svn-fast-export.py
@@ -11,6 +11,7 @@
trunk_path = '/trunk/'
branches_path = '/branches/'
tags_path = '/tags/'
+address = 'localhost'
first_rev = 1
final_rev = 0
@@ -123,9 +124,9 @@ def export_revision(rev, repo, fs, pool):
# Do the recursive crawl.
if props.has_key('svn:author'):
- author = "%s <%s@localhost>" % (props['svn:author'], props['svn:author'])
+ author = "%s <%s@%s>" % (props['svn:author'], props['svn:author'], address)
else:
- author = 'nobody <nobody@localhost>'
+ author = 'nobody <nobody@users.sourceforge.net>'
if len(file_changes) == 0:
svn_pool_destroy(revpool)
@@ -165,7 +166,6 @@ def crawl_revisions(pool, repos_path):
youngest_rev = svn_fs_youngest_rev(fs_obj, pool)
- first_rev = 1
if final_rev == 0:
final_rev = youngest_rev
for rev in xrange(first_rev, final_rev + 1):
@@ -178,12 +178,16 @@ if __name__ == '__main__':
parser.set_usage(usage)
parser.add_option('-f', '--final-rev', help='Final revision to import',
dest='final_rev', metavar='FINAL_REV', type='int')
+ parser.add_option('-r', '--first-rev', help='First revision to import',
+ dest='first_rev', metavar='FIRST_REV', type='int')
parser.add_option('-t', '--trunk-path', help="Path in repo to /trunk, may be `regex:/cvs/(trunk)/proj1/(.*)`\nFirst group is used as branchname, second to match files",
dest='trunk_path', metavar='TRUNK_PATH')
parser.add_option('-b', '--branches-path', help='Path in repo to /branches',
dest='branches_path', metavar='BRANCHES_PATH')
parser.add_option('-T', '--tags-path', help='Path in repo to /tags',
dest='tags_path', metavar='TAGS_PATH')
+ parser.add_option('-a', '--address', help='Domain to put on users for their mail address',
+ dest='address', metavar='hostname', type='string')
(options, args) = parser.parse_args()
if options.trunk_path != None:
@@ -194,6 +198,10 @@ if __name__ == '__main__':
tags_path = options.tags_path
if options.final_rev != None:
final_rev = options.final_rev
+ if options.first_rev != None:
+ first_rev = options.first_rev
+ if options.address != None:
+ address = options.address
MATCHER = Matcher.getMatcher(trunk_path)
sys.stderr.write("%s\n" % MATCHER)
diff --git a/helpers.py b/helpers.py
index 34d4688..afc867d 100644
--- a/helpers.py
+++ b/helpers.py
@@ -16,97 +16,14 @@
"""Miscellaneous useful stuff."""
-
-def single_plural(n, single, plural):
- """Return a single or plural form of a noun based on number."""
- if n == 1:
- return single
- else:
- return plural
-
-
-def defines_to_dict(defines):
- """Convert a list of definition strings to a dictionary."""
- if defines is None:
- return None
- result = {}
- for define in defines:
- kv = define.split('=', 1)
- if len(kv) == 1:
- result[define.strip()] = 1
- else:
- result[kv[0].strip()] = kv[1].strip()
- return result
-
-
-def invert_dict(d):
- """Invert a dictionary with keys matching each value turned into a list."""
- # Based on recipe from ASPN
- result = {}
- for k, v in d.iteritems():
- keys = result.setdefault(v, [])
- keys.append(k)
- return result
-
-
-def invert_dictset(d):
- """Invert a dictionary with keys matching a set of values, turned into lists."""
- # Based on recipe from ASPN
- result = {}
- for k, c in d.iteritems():
- for v in c:
- keys = result.setdefault(v, [])
- keys.append(k)
- return result
-
-
-def _common_path_and_rest(l1, l2, common=[]):
- # From http://code.activestate.com/recipes/208993/
- if len(l1) < 1: return (common, l1, l2)
- if len(l2) < 1: return (common, l1, l2)
- if l1[0] != l2[0]: return (common, l1, l2)
- return _common_path_and_rest(l1[1:], l2[1:], common+[l1[0]])
-
-
-def common_path(path1, path2):
- """Find the common bit of 2 paths."""
- return ''.join(_common_path_and_rest(path1, path2)[0])
-
-
-def common_directory(paths):
- """Find the deepest common directory of a list of paths.
-
- :return: if no paths are provided, None is returned;
- if there is no common directory, '' is returned;
- otherwise the common directory with a trailing / is returned.
- """
- from bzrlib import osutils
- def get_dir_with_slash(path):
- if path == '' or path.endswith('/'):
- return path
- else:
- dirname, basename = osutils.split(path)
- if dirname == '':
- return dirname
- else:
- return dirname + '/'
-
- if not paths:
- return None
- elif len(paths) == 1:
- return get_dir_with_slash(paths[0])
- else:
- common = common_path(paths[0], paths[1])
- for path in paths[2:]:
- common = common_path(common, path)
- return get_dir_with_slash(common)
+import stat
def escape_commit_message(message):
"""Replace xml-incompatible control characters."""
# This really ought to be provided by bzrlib.
# Code copied from bzrlib.commit.
-
+
# Python strings can include characters that can't be
# represented in well-formed XML; escape characters that
# aren't listed in the XML specification
@@ -119,25 +36,6 @@ def escape_commit_message(message):
return message
-def binary_stream(stream):
- """Ensure a stream is binary on Windows.
-
- :return: the stream
- """
- try:
- import os
- if os.name == 'nt':
- fileno = getattr(stream, 'fileno', None)
- if fileno:
- no = fileno()
- if no >= 0: # -1 means we're working as subprocess
- import msvcrt
- msvcrt.setmode(no, os.O_BINARY)
- except ImportError:
- pass
- return stream
-
-
def best_format_for_objects_in_a_repository(repo):
"""Find the high-level format for branches and trees given a repository.
@@ -215,3 +113,37 @@ def open_destination_directory(location, format=None, verbose=True):
from bzrlib.info import show_bzrdir_info
show_bzrdir_info(repo.bzrdir, verbose=0)
return control
+
+
+def kind_to_mode(kind, executable):
+ if kind == "file":
+ if executable == True:
+ return stat.S_IFREG | 0755
+ elif executable == False:
+ return stat.S_IFREG | 0644
+ else:
+ raise AssertionError("Executable %r invalid" % executable)
+ elif kind == "symlink":
+ return stat.S_IFLNK
+ elif kind == "directory":
+ return stat.S_IFDIR
+ elif kind == "tree-reference":
+ return 0160000
+ else:
+ raise AssertionError("Unknown file kind '%s'" % kind)
+
+
+def mode_to_kind(mode):
+ # Note: Output from git-fast-export slightly different to spec
+ if mode in (0644, 0100644):
+ return 'file', False
+ elif mode in (0755, 0100755):
+ return 'file', True
+ elif mode == 0040000:
+ return 'directory', False
+ elif mode == 0120000:
+ return 'symlink', False
+ elif mode == 0160000:
+ return 'tree-reference', False
+ else:
+ raise AssertionError("invalid mode %o" % mode)
diff --git a/idmapfile.py b/idmapfile.py
deleted file mode 100644
index 7b4ccf4..0000000
--- a/idmapfile.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Routines for saving and loading the id-map file."""
-
-import os
-
-
-def save_id_map(filename, revision_ids):
- """Save the mapping of commit ids to revision ids to a file.
-
- Throws the usual exceptions if the file cannot be opened,
- written to or closed.
-
- :param filename: name of the file to save the data to
- :param revision_ids: a dictionary of commit ids to revision ids.
- """
- f = open(filename, 'wb')
- try:
- for commit_id, rev_id in revision_ids.iteritems():
- f.write("%s %s\n" % (commit_id, rev_id))
- f.flush()
- finally:
- f.close()
-
-
-def load_id_map(filename):
- """Load the mapping of commit ids to revision ids from a file.
-
- If the file does not exist, an empty result is returned.
- If the file does exists but cannot be opened, read or closed,
- the normal exceptions are thrown.
-
- NOTE: It is assumed that commit-ids do not have embedded spaces.
-
- :param filename: name of the file to save the data to
- :result: map, count where:
- map = a dictionary of commit ids to revision ids;
- count = the number of keys in map
- """
- result = {}
- count = 0
- if os.path.exists(filename):
- f = open(filename)
- try:
- for line in f:
- parts = line[:-1].split(' ', 1)
- result[parts[0]] = parts[1]
- count += 1
- finally:
- f.close()
- return result, count
diff --git a/marks_file.py b/marks_file.py
index ab24be2..c05f8c6 100644
--- a/marks_file.py
+++ b/marks_file.py
@@ -17,7 +17,6 @@
"""Routines for reading/writing a marks file."""
-import re
from bzrlib.trace import warning
@@ -38,12 +37,26 @@ def import_marks(filename):
# Read the revision info
revision_ids = {}
- for line in f:
+
+ line = f.readline()
+ if line == 'format=1\n':
+ # Cope with old-style marks files
+ # Read the branch info
+ branch_names = {}
+ for string in f.readline().rstrip('\n').split('\0'):
+ if not string:
+ continue
+ name, integer = string.rsplit('.', 1)
+ branch_names[name] = int(integer)
+ line = f.readline()
+
+ while line:
line = line.rstrip('\n')
mark, revid = line.split(' ', 1)
if mark.startswith(':'):
mark = mark[1:]
revision_ids[mark] = revid
+ line = f.readline()
f.close()
return revision_ids
diff --git a/parser.py b/parser.py
deleted file mode 100644
index c133c01..0000000
--- a/parser.py
+++ /dev/null
@@ -1,557 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Parser of import data into command objects.
-
-In order to reuse existing front-ends, the stream format is a subset of
-the one used by git-fast-import (as of the 1.5.4 release of git at least).
-The grammar is:
-
- stream ::= cmd*;
-
- cmd ::= new_blob
- | new_commit
- | new_tag
- | reset_branch
- | checkpoint
- | progress
- ;
-
- new_blob ::= 'blob' lf
- mark?
- file_content;
- file_content ::= data;
-
- new_commit ::= 'commit' sp ref_str lf
- mark?
- ('author' sp name '<' email '>' when lf)?
- 'committer' sp name '<' email '>' when lf
- commit_msg
- ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
- ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*
- file_change*
- lf?;
- commit_msg ::= data;
-
- file_change ::= file_clr
- | file_del
- | file_rnm
- | file_cpy
- | file_obm
- | file_inm;
- file_clr ::= 'deleteall' lf;
- file_del ::= 'D' sp path_str lf;
- file_rnm ::= 'R' sp path_str sp path_str lf;
- file_cpy ::= 'C' sp path_str sp path_str lf;
- file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
- file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
- data;
-
- new_tag ::= 'tag' sp tag_str lf
- 'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
- 'tagger' sp name '<' email '>' when lf
- tag_msg;
- tag_msg ::= data;
-
- reset_branch ::= 'reset' sp ref_str lf
- ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
- lf?;
-
- checkpoint ::= 'checkpoint' lf
- lf?;
-
- progress ::= 'progress' sp not_lf* lf
- lf?;
-
- # note: the first idnum in a stream should be 1 and subsequent
- # idnums should not have gaps between values as this will cause
- # the stream parser to reserve space for the gapped values. An
- # idnum can be updated in the future to a new object by issuing
- # a new mark directive with the old idnum.
- #
- mark ::= 'mark' sp idnum lf;
- data ::= (delimited_data | exact_data)
- lf?;
-
- # note: delim may be any string but must not contain lf.
- # data_line may contain any data but must not be exactly
- # delim. The lf after the final data_line is included in
- # the data.
- delimited_data ::= 'data' sp '<<' delim lf
- (data_line lf)*
- delim lf;
-
- # note: declen indicates the length of binary_data in bytes.
- # declen does not include the lf preceeding the binary data.
- #
- exact_data ::= 'data' sp declen lf
- binary_data;
-
- # note: quoted strings are C-style quoting supporting \c for
- # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
- # is the signed byte value in octal. Note that the only
- # characters which must actually be escaped to protect the
- # stream formatting is: \, " and LF. Otherwise these values
- # are UTF8.
- #
- ref_str ::= ref;
- sha1exp_str ::= sha1exp;
- tag_str ::= tag;
- path_str ::= path | '"' quoted(path) '"' ;
- mode ::= '100644' | '644'
- | '100755' | '755'
- | '120000'
- ;
-
- declen ::= # unsigned 32 bit value, ascii base10 notation;
- bigint ::= # unsigned integer value, ascii base10 notation;
- binary_data ::= # file content, not interpreted;
-
- when ::= raw_when | rfc2822_when;
- raw_when ::= ts sp tz;
- rfc2822_when ::= # Valid RFC 2822 date and time;
-
- sp ::= # ASCII space character;
- lf ::= # ASCII newline (LF) character;
-
- # note: a colon (':') must precede the numerical value assigned to
- # an idnum. This is to distinguish it from a ref or tag name as
- # GIT does not permit ':' in ref or tag strings.
- #
- idnum ::= ':' bigint;
- path ::= # GIT style file path, e.g. "a/b/c";
- ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
- tag ::= # GIT tag name, e.g. "FIREFOX_1_5";
- sha1exp ::= # Any valid GIT SHA1 expression;
- hexsha1 ::= # SHA1 in hexadecimal format;
-
- # note: name and email are UTF8 strings, however name must not
- # contain '<' or lf and email must not contain any of the
- # following: '<', '>', lf.
- #
- name ::= # valid GIT author/committer name;
- email ::= # valid GIT author/committer email;
- ts ::= # time since the epoch in seconds, ascii base10 notation;
- tz ::= # GIT style timezone;
-
- # note: comments may appear anywhere in the input, except
- # within a data command. Any form of the data command
- # always escapes the related input from comment processing.
- #
- # In case it is not clear, the '#' that starts the comment
- # must be the first character on that the line (an lf have
- # preceeded it).
- #
- comment ::= '#' not_lf* lf;
- not_lf ::= # Any byte that is not ASCII newline (LF);
-"""
-
-
-import re
-import sys
-
-import commands
-import dates
-import errors
-
-
-## Stream parsing ##
-
-class LineBasedParser(object):
-
- def __init__(self, input):
- """A Parser that keeps track of line numbers.
-
- :param input: the file-like object to read from
- """
- self.input = input
- self.lineno = 0
- # Lines pushed back onto the input stream
- self._buffer = []
-
- def abort(self, exception, *args):
- """Raise an exception providing line number information."""
- raise exception(self.lineno, *args)
-
- def readline(self):
- """Get the next line including the newline or '' on EOF."""
- self.lineno += 1
- if self._buffer:
- return self._buffer.pop()
- else:
- return self.input.readline()
-
- def next_line(self):
- """Get the next line without the newline or None on EOF."""
- line = self.readline()
- if line:
- return line[:-1]
- else:
- return None
-
- def push_line(self, line):
- """Push line back onto the line buffer.
-
- :param line: the line with no trailing newline
- """
- self.lineno -= 1
- self._buffer.append(line + "\n")
-
- def read_bytes(self, count):
- """Read a given number of bytes from the input stream.
-
- Throws MissingBytes if the bytes are not found.
-
- Note: This method does not read from the line buffer.
-
- :return: a string
- """
- result = self.input.read(count)
- found = len(result)
- self.lineno += result.count("\n")
- if found != count:
- self.abort(errors.MissingBytes, count, found)
- return result
-
- def read_until(self, terminator):
- """Read the input stream until the terminator is found.
-
- Throws MissingTerminator if the terminator is not found.
-
- Note: This method does not read from the line buffer.
-
- :return: the bytes read up to but excluding the terminator.
- """
-
- lines = []
- term = terminator + '\n'
- while True:
- line = self.input.readline()
- if line == term:
- break
- else:
- lines.append(line)
- return ''.join(lines)
-
-
-# Regular expression used for parsing. (Note: The spec states that the name
-# part should be non-empty but git-fast-export doesn't always do that so
-# the first bit is \w*, not \w+.) Also git-fast-import code says the
-# space before the email is optional.
-_WHO_AND_WHEN_RE = re.compile(r'([^<]*)<(.*)> (.+)')
-_WHO_RE = re.compile(r'([^<]*)<(.*)>')
-
-
-class ImportParser(LineBasedParser):
-
- def __init__(self, input, verbose=False, output=sys.stdout):
- """A Parser of import commands.
-
- :param input: the file-like object to read from
- :param verbose: display extra information of not
- :param output: the file-like object to write messages to (YAGNI?)
- """
- LineBasedParser.__init__(self, input)
- self.verbose = verbose
- self.output = output
- # We auto-detect the date format when a date is first encountered
- self.date_parser = None
-
- def _warning(self, msg):
- sys.stderr.write("warning line %d: %s\n" % (self.lineno, msg))
-
- def iter_commands(self):
- """Iterator returning ImportCommand objects."""
- while True:
- line = self.next_line()
- if line is None:
- break
- elif len(line) == 0 or line.startswith('#'):
- continue
- # Search for commands in order of likelihood
- elif line.startswith('commit '):
- yield self._parse_commit(line[len('commit '):])
- elif line.startswith('blob'):
- yield self._parse_blob()
- elif line.startswith('progress '):
- yield commands.ProgressCommand(line[len('progress '):])
- elif line.startswith('reset '):
- yield self._parse_reset(line[len('reset '):])
- elif line.startswith('tag '):
- yield self._parse_tag(line[len('tag '):])
- elif line.startswith('checkpoint'):
- yield commands.CheckpointCommand()
- else:
- self.abort(errors.InvalidCommand, line)
-
- def iter_file_commands(self):
- """Iterator returning FileCommand objects.
-
- If an invalid file command is found, the line is silently
- pushed back and iteration ends.
- """
- while True:
- line = self.next_line()
- if line is None:
- break
- elif len(line) == 0 or line.startswith('#'):
- continue
- # Search for file commands in order of likelihood
- elif line.startswith('M '):
- yield self._parse_file_modify(line[2:])
- elif line.startswith('D '):
- path = self._path(line[2:])
- yield commands.FileDeleteCommand(path)
- elif line.startswith('R '):
- old, new = self._path_pair(line[2:])
- yield commands.FileRenameCommand(old, new)
- elif line.startswith('C '):
- src, dest = self._path_pair(line[2:])
- yield commands.FileCopyCommand(src, dest)
- elif line.startswith('deleteall'):
- yield commands.FileDeleteAllCommand()
- else:
- self.push_line(line)
- break
-
- def _parse_blob(self):
- """Parse a blob command."""
- lineno = self.lineno
- mark = self._get_mark_if_any()
- data = self._get_data('blob')
- return commands.BlobCommand(mark, data, lineno)
-
- def _parse_commit(self, ref):
- """Parse a commit command."""
- lineno = self.lineno
- mark = self._get_mark_if_any()
- author = self._get_user_info('commit', 'author', False)
- committer = self._get_user_info('commit', 'committer')
- message = self._get_data('commit', 'message')
- try:
- message = message.decode('utf_8')
- except UnicodeDecodeError:
- self._warning(
- "commit message not in utf8 - replacing unknown characters")
- message = message.decode('utf_8', 'replace')
- from_ = self._get_from()
- merges = []
- while True:
- merge = self._get_merge()
- if merge is not None:
- # while the spec suggests it's illegal, git-fast-export
- # outputs multiple merges on the one line, e.g.
- # merge :x :y :z
- these_merges = merge.split(" ")
- merges.extend(these_merges)
- else:
- break
- return commands.CommitCommand(ref, mark, author, committer, message,
- from_, merges, self.iter_file_commands, lineno)
-
- def _parse_file_modify(self, info):
- """Parse a filemodify command within a commit.
-
- :param info: a string in the format "mode dataref path"
- (where dataref might be the hard-coded literal 'inline').
- """
- params = info.split(' ', 2)
- path = self._path(params[2])
- is_executable, is_symlink = self._mode(params[0])
- if is_symlink:
- kind = commands.SYMLINK_KIND
- else:
- kind = commands.FILE_KIND
- if params[1] == 'inline':
- dataref = None
- data = self._get_data('filemodify')
- else:
- dataref = params[1]
- data = None
- return commands.FileModifyCommand(path, kind, is_executable, dataref,
- data)
-
- def _parse_reset(self, ref):
- """Parse a reset command."""
- from_ = self._get_from()
- return commands.ResetCommand(ref, from_)
-
- def _parse_tag(self, name):
- """Parse a tag command."""
- from_ = self._get_from('tag')
- tagger = self._get_user_info('tag', 'tagger', accept_just_who=True)
- message = self._get_data('tag', 'message').decode('utf_8')
- return commands.TagCommand(name, from_, tagger, message)
-
- def _get_mark_if_any(self):
- """Parse a mark section."""
- line = self.next_line()
- if line.startswith('mark :'):
- return line[len('mark :'):]
- else:
- self.push_line(line)
- return None
-
- def _get_from(self, required_for=None):
- """Parse a from section."""
- line = self.next_line()
- if line is None:
- return None
- elif line.startswith('from '):
- return line[len('from '):]
- elif required_for:
- self.abort(errors.MissingSection, required_for, 'from')
- else:
- self.push_line(line)
- return None
-
- def _get_merge(self):
- """Parse a merge section."""
- line = self.next_line()
- if line is None:
- return None
- elif line.startswith('merge '):
- return line[len('merge '):]
- else:
- self.push_line(line)
- return None
-
- def _get_user_info(self, cmd, section, required=True,
- accept_just_who=False):
- """Parse a user section."""
- line = self.next_line()
- if line.startswith(section + ' '):
- return self._who_when(line[len(section + ' '):], cmd, section,
- accept_just_who=accept_just_who)
- elif required:
- self.abort(errors.MissingSection, cmd, section)
- else:
- self.push_line(line)
- return None
-
- def _get_data(self, required_for, section='data'):
- """Parse a data section."""
- line = self.next_line()
- if line.startswith('data '):
- rest = line[len('data '):]
- if rest.startswith('<<'):
- return self.read_until(rest[2:])
- else:
- size = int(rest)
- read_bytes = self.read_bytes(size)
- # optional LF after data.
- next = self.input.readline()
- self.lineno += 1
- if len(next) > 1 or next != "\n":
- self.push_line(next[:-1])
- return read_bytes
- else:
- self.abort(errors.MissingSection, required_for, section)
-
- def _who_when(self, s, cmd, section, accept_just_who=False):
- """Parse who and when information from a string.
-
- :return: a tuple of (name,email,timestamp,timezone). name may be
- the empty string if only an email address was given.
- """
- match = _WHO_AND_WHEN_RE.search(s)
- if match:
- datestr = match.group(3)
- if self.date_parser is None:
- # auto-detect the date format
- if len(datestr.split(' ')) == 2:
- format = 'raw'
- elif datestr == 'now':
- format = 'now'
- else:
- format = 'rfc2822'
- self.date_parser = dates.DATE_PARSERS_BY_NAME[format]
- when = self.date_parser(datestr, self.lineno)
- else:
- match = _WHO_RE.search(s)
- if accept_just_who and match:
- # HACK around missing time
- # TODO: output a warning here
- when = dates.DATE_PARSERS_BY_NAME['now']('now')
- else:
- self.abort(errors.BadFormat, cmd, section, s)
- name = match.group(1)
- if len(name) > 0:
- if name[-1] == " ":
- try:
- name = name[:-1].decode('utf_8')
- except UnicodeDecodeError:
- # The spec says names are *typically* utf8 encoded
- # but that isn't enforced by git-fast-export (at least)
- name = name[:-1]
- email = match.group(2)
- # While it shouldn't happen, some datasets have email addresses
- # which contain unicode characters. See bug 338186. We sanitize
- # the data at this level just in case.
- try:
- email = "%s" % (email,)
- except UnicodeDecodeError:
- email = "%s" % (email.decode('utf_8'),)
- return (name, email, when[0], when[1])
-
- def _path(self, s):
- """Parse a path."""
- if s.startswith('"'):
- if s[-1] != '"':
- self.abort(errors.BadFormat, '?', '?', s)
- else:
- return _unquote_c_string(s[1:-1])
- try:
- return s.decode('utf_8')
- except UnicodeDecodeError:
- # The spec recommends utf8 encoding but that isn't enforced
- return s
-
- def _path_pair(self, s):
- """Parse two paths separated by a space."""
- # TODO: handle a space in the first path
- if s.startswith('"'):
- parts = s[1:].split('" ', 1)
- else:
- parts = s.split(' ', 1)
- if len(parts) != 2:
- self.abort(errors.BadFormat, '?', '?', s)
- elif parts[1].startswith('"') and parts[1].endswith('"'):
- parts[1] = parts[1][1:-1]
- elif parts[1].startswith('"') or parts[1].endswith('"'):
- self.abort(errors.BadFormat, '?', '?', s)
- return map(_unquote_c_string, parts)
-
- def _mode(self, s):
- """Parse a file mode into executable and symlink flags.
-
- :return (is_executable, is_symlink)
- """
- # Note: Output from git-fast-export slightly different to spec
- if s in ['644', '100644', '0100644']:
- return False, False
- elif s in ['755', '100755', '0100755']:
- return True, False
- elif s in ['120000', '0120000']:
- return False, True
- else:
- self.abort(errors.BadFormat, 'filemodify', 'mode', s)
-
-
-def _unquote_c_string(s):
- """replace C-style escape sequences (\n, \", etc.) with real chars."""
- # HACK: Python strings are close enough
- return s.decode('string_escape', 'replace')
diff --git a/processor.py b/processor.py
deleted file mode 100644
index 06b4871..0000000
--- a/processor.py
+++ /dev/null
@@ -1,253 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Processor of import commands.
-
-This module provides core processing functionality including an abstract class
-for basing real processors on. See the processors package for examples.
-"""
-
-import sys
-import time
-
-from bzrlib import debug
-from bzrlib.errors import NotBranchError
-from bzrlib.trace import (
- mutter,
- note,
- warning,
- )
-import errors
-
-
-class ImportProcessor(object):
- """Base class for import processors.
-
- Subclasses should override the pre_*, post_* and *_handler
- methods as appropriate.
- """
-
- known_params = []
-
- def __init__(self, bzrdir, params=None, verbose=False, outf=None):
- if outf is None:
- self.outf = sys.stdout
- else:
- self.outf = outf
- self.verbose = verbose
- if params is None:
- self.params = {}
- else:
- self.params = params
- self.validate_parameters()
- self.bzrdir = bzrdir
- if bzrdir is None:
- # Some 'importers' don't need a repository to write to
- self.working_tree = None
- self.branch = None
- self.repo = None
- else:
- try:
- # Might be inside a branch
- (self.working_tree, self.branch) = bzrdir._get_tree_branch()
- self.repo = self.branch.repository
- except NotBranchError:
- # Must be inside a repository
- self.working_tree = None
- self.branch = None
- self.repo = bzrdir.open_repository()
-
- # Handlers can set this to request exiting cleanly without
- # iterating through the remaining commands
- self.finished = False
-
- def validate_parameters(self):
- """Validate that the parameters are correctly specified."""
- for p in self.params:
- if p not in self.known_params:
- raise errors.UnknownParameter(p, self.known_params)
-
- def process(self, command_iter):
- """Import data into Bazaar by processing a stream of commands.
-
- :param command_iter: an iterator providing commands
- """
- if self.working_tree is not None:
- self.working_tree.lock_write()
- elif self.branch is not None:
- self.branch.lock_write()
- elif self.repo is not None:
- self.repo.lock_write()
- try:
- self._process(command_iter)
- finally:
- # If an unhandled exception occurred, abort the write group
- if self.repo is not None and self.repo.is_in_write_group():
- self.repo.abort_write_group()
- # Release the locks
- if self.working_tree is not None:
- self.working_tree.unlock()
- elif self.branch is not None:
- self.branch.unlock()
- elif self.repo is not None:
- self.repo.unlock()
-
- def _process(self, command_iter):
- self.pre_process()
- for cmd in command_iter():
- try:
- handler = self.__class__.__dict__[cmd.name + "_handler"]
- except KeyError:
- raise errors.MissingHandler(cmd.name)
- else:
- self.pre_handler(cmd)
- handler(self, cmd)
- self.post_handler(cmd)
- if self.finished:
- break
- self.post_process()
-
- def note(self, msg, *args):
- """Output a note but timestamp it."""
- msg = "%s %s" % (self._time_of_day(), msg)
- note(msg, *args)
-
- def warning(self, msg, *args):
- """Output a warning but timestamp it."""
- msg = "%s WARNING: %s" % (self._time_of_day(), msg)
- warning(msg, *args)
-
- def debug(self, mgs, *args):
- """Output a debug message if the appropriate -D option was given."""
- if "fast-import" in debug.debug_flags:
- msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
- mutter(msg, *args)
-
- def _time_of_day(self):
- """Time of day as a string."""
- # Note: this is a separate method so tests can patch in a fixed value
- return time.strftime("%H:%M:%S")
-
- def pre_process(self):
- """Hook for logic at start of processing."""
- pass
-
- def post_process(self):
- """Hook for logic at end of processing."""
- pass
-
- def pre_handler(self, cmd):
- """Hook for logic before each handler starts."""
- pass
-
- def post_handler(self, cmd):
- """Hook for logic after each handler finishes."""
- pass
-
- def progress_handler(self, cmd):
- """Process a ProgressCommand."""
- raise NotImplementedError(self.progress_handler)
-
- def blob_handler(self, cmd):
- """Process a BlobCommand."""
- raise NotImplementedError(self.blob_handler)
-
- def checkpoint_handler(self, cmd):
- """Process a CheckpointCommand."""
- raise NotImplementedError(self.checkpoint_handler)
-
- def commit_handler(self, cmd):
- """Process a CommitCommand."""
- raise NotImplementedError(self.commit_handler)
-
- def reset_handler(self, cmd):
- """Process a ResetCommand."""
- raise NotImplementedError(self.reset_handler)
-
- def tag_handler(self, cmd):
- """Process a TagCommand."""
- raise NotImplementedError(self.tag_handler)
-
-
-class CommitHandler(object):
- """Base class for commit handling.
-
- Subclasses should override the pre_*, post_* and *_handler
- methods as appropriate.
- """
-
- def __init__(self, command):
- self.command = command
-
- def process(self):
- self.pre_process_files()
- for fc in self.command.file_iter():
- try:
- handler = self.__class__.__dict__[fc.name[4:] + "_handler"]
- except KeyError:
- raise errors.MissingHandler(fc.name)
- else:
- handler(self, fc)
- self.post_process_files()
-
- def note(self, msg, *args):
- """Output a note but add context."""
- msg = "%s (%s)" % (msg, self.command.id)
- note(msg, *args)
-
- def warning(self, msg, *args):
- """Output a warning but add context."""
- msg = "WARNING: %s (%s)" % (msg, self.command.id)
- warning(msg, *args)
-
- def mutter(self, msg, *args):
- """Output a mutter but add context."""
- msg = "%s (%s)" % (msg, self.command.id)
- mutter(msg, *args)
-
- def debug(self, msg, *args):
- """Output a mutter if the appropriate -D option was given."""
- if "fast-import" in debug.debug_flags:
- msg = "%s (%s)" % (msg, self.command.id)
- mutter(msg, *args)
-
- def pre_process_files(self):
- """Prepare for committing."""
- pass
-
- def post_process_files(self):
- """Save the revision."""
- pass
-
- def modify_handler(self, filecmd):
- """Handle a filemodify command."""
- raise NotImplementedError(self.modify_handler)
-
- def delete_handler(self, filecmd):
- """Handle a filedelete command."""
- raise NotImplementedError(self.delete_handler)
-
- def copy_handler(self, filecmd):
- """Handle a filecopy command."""
- raise NotImplementedError(self.copy_handler)
-
- def rename_handler(self, filecmd):
- """Handle a filerename command."""
- raise NotImplementedError(self.rename_handler)
-
- def deleteall_handler(self, filecmd):
- """Handle a filedeleteall command."""
- raise NotImplementedError(self.deleteall_handler)
diff --git a/processors/filter_processor.py b/processors/filter_processor.py
deleted file mode 100644
index 8284cb5..0000000
--- a/processors/filter_processor.py
+++ /dev/null
@@ -1,288 +0,0 @@
-# Copyright (C) 2009 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Import processor that filters the input (and doesn't import)."""
-
-
-from bzrlib import osutils
-from bzrlib.trace import (
- warning,
- )
-from bzrlib.plugins.fastimport import (
- commands,
- helpers,
- processor,
- )
-
-
-class FilterProcessor(processor.ImportProcessor):
- """An import processor that filters the input to include/exclude objects.
-
- No changes to the current repository are made.
-
- Here are the supported parameters:
-
- * include_paths - a list of paths that commits must change in order to
- be kept in the output stream
-
- * exclude_paths - a list of paths that should not appear in the output
- stream
- """
-
- known_params = [
- 'include_paths',
- 'exclude_paths',
- ]
-
- def pre_process(self):
- self.includes = self.params.get('include_paths')
- self.excludes = self.params.get('exclude_paths')
- # What's the new root, if any
- self.new_root = helpers.common_directory(self.includes)
- # Buffer of blobs until we know we need them: mark -> cmd
- self.blobs = {}
- # These are the commits we've output so far
- self.interesting_commits = set()
- # Map of commit-id to list of parents
- self.parents = {}
-
- def pre_handler(self, cmd):
- self.command = cmd
- # Should this command be included in the output or not?
- self.keep = False
- # Blobs to dump into the output before dumping the command itself
- self.referenced_blobs = []
-
- def post_handler(self, cmd):
- if not self.keep:
- return
- # print referenced blobs and the command
- for blob_id in self.referenced_blobs:
- self._print_command(self.blobs[blob_id])
- self._print_command(self.command)
-
- def progress_handler(self, cmd):
- """Process a ProgressCommand."""
- # These always pass through
- self.keep = True
-
- def blob_handler(self, cmd):
- """Process a BlobCommand."""
- # These never pass through directly. We buffer them and only
- # output them if referenced by an interesting command.
- self.blobs[cmd.id] = cmd
- self.keep = False
-
- def checkpoint_handler(self, cmd):
- """Process a CheckpointCommand."""
- # These always pass through
- self.keep = True
-
- def commit_handler(self, cmd):
- """Process a CommitCommand."""
- # These pass through if they meet the filtering conditions
- interesting_filecmds = self._filter_filecommands(cmd.file_iter)
- if interesting_filecmds:
- # If all we have is a single deleteall, skip this commit
- if len(interesting_filecmds) == 1 and isinstance(
- interesting_filecmds[0], commands.FileDeleteAllCommand):
- pass
- else:
- # Remember just the interesting file commands
- self.keep = True
- cmd.file_iter = iter(interesting_filecmds)
-
- # Record the referenced blobs
- for fc in interesting_filecmds:
- if isinstance(fc, commands.FileModifyCommand):
- if fc.dataref is not None:
- self.referenced_blobs.append(fc.dataref)
-
- # Update from and merges to refer to commits in the output
- cmd.from_ = self._find_interesting_from(cmd.from_)
- cmd.merges = self._find_interesting_merges(cmd.merges)
- self.interesting_commits.add(cmd.id)
-
- # Keep track of the parents
- if cmd.from_ and cmd.merges:
- parents = [cmd.from_] + cmd.merges
- elif cmd.from_:
- parents = [cmd.from_]
- else:
- parents = None
- self.parents[":" + cmd.mark] = parents
-
- def reset_handler(self, cmd):
- """Process a ResetCommand."""
- if cmd.from_ is None:
- # We pass through resets that init a branch because we have to
- # assume the branch might be interesting.
- self.keep = True
- else:
- # Keep resets if they indirectly reference something we kept
- cmd.from_ = self._find_interesting_from(cmd.from_)
- self.keep = cmd.from_ is not None
-
- def tag_handler(self, cmd):
- """Process a TagCommand."""
- # Keep tags if they indirectly reference something we kept
- cmd.from_ = self._find_interesting_from(cmd.from_)
- self.keep = cmd.from_ is not None
-
- def _print_command(self, cmd):
- """Wrapper to avoid adding unnecessary blank lines."""
- text = repr(cmd)
- self.outf.write(text)
- if not text.endswith("\n"):
- self.outf.write("\n")
-
- def _filter_filecommands(self, filecmd_iter):
- """Return the filecommands filtered by includes & excludes.
-
- :return: a list of FileCommand objects
- """
- if self.includes is None and self.excludes is None:
- return list(filecmd_iter())
-
- # Do the filtering, adjusting for the new_root
- result = []
- for fc in filecmd_iter():
- if (isinstance(fc, commands.FileModifyCommand) or
- isinstance(fc, commands.FileDeleteCommand)):
- if self._path_to_be_kept(fc.path):
- fc.path = self._adjust_for_new_root(fc.path)
- else:
- continue
- elif isinstance(fc, commands.FileDeleteAllCommand):
- pass
- elif isinstance(fc, commands.FileRenameCommand):
- fc = self._convert_rename(fc)
- elif isinstance(fc, commands.FileCopyCommand):
- fc = self._convert_copy(fc)
- else:
- warning("cannot handle FileCommands of class %s - ignoring",
- fc.__class__)
- continue
- if fc is not None:
- result.append(fc)
- return result
-
- def _path_to_be_kept(self, path):
- """Does the given path pass the filtering criteria?"""
- if self.excludes and (path in self.excludes
- or osutils.is_inside_any(self.excludes, path)):
- return False
- if self.includes:
- return (path in self.includes
- or osutils.is_inside_any(self.includes, path))
- return True
-
- def _adjust_for_new_root(self, path):
- """Adjust a path given the new root directory of the output."""
- if self.new_root is None:
- return path
- elif path.startswith(self.new_root):
- return path[len(self.new_root):]
- else:
- return path
-
- def _find_interesting_parent(self, commit_ref):
- while True:
- if commit_ref in self.interesting_commits:
- return commit_ref
- parents = self.parents.get(commit_ref)
- if not parents:
- return None
- commit_ref = parents[0]
-
- def _find_interesting_from(self, commit_ref):
- if commit_ref is None:
- return None
- return self._find_interesting_parent(commit_ref)
-
- def _find_interesting_merges(self, commit_refs):
- if commit_refs is None:
- return None
- merges = []
- for commit_ref in commit_refs:
- parent = self._find_interesting_parent(commit_ref)
- if parent is not None:
- merges.append(parent)
- if merges:
- return merges
- else:
- return None
-
- def _convert_rename(self, fc):
- """Convert a FileRenameCommand into a new FileCommand.
-
- :return: None if the rename is being ignored, otherwise a
- new FileCommand based on the whether the old and new paths
- are inside or outside of the interesting locations.
- """
- old = fc.old_path
- new = fc.new_path
- keep_old = self._path_to_be_kept(old)
- keep_new = self._path_to_be_kept(new)
- if keep_old and keep_new:
- fc.old_path = self._adjust_for_new_root(old)
- fc.new_path = self._adjust_for_new_root(new)
- return fc
- elif keep_old:
- # The file has been renamed to a non-interesting location.
- # Delete it!
- old = self._adjust_for_new_root(old)
- return commands.FileDeleteCommand(old)
- elif keep_new:
- # The file has been renamed into an interesting location
- # We really ought to add it but we don't currently buffer
- # the contents of all previous files and probably never want
- # to. Maybe fast-import-info needs to be extended to
- # remember all renames and a config file can be passed
- # into here ala fast-import?
- warning("cannot turn rename of %s into an add of %s yet" %
- (old, new))
- return None
-
- def _convert_copy(self, fc):
- """Convert a FileCopyCommand into a new FileCommand.
-
- :return: None if the copy is being ignored, otherwise a
- new FileCommand based on the whether the source and destination
- paths are inside or outside of the interesting locations.
- """
- src = fc.src_path
- dest = fc.dest_path
- keep_src = self._path_to_be_kept(src)
- keep_dest = self._path_to_be_kept(dest)
- if keep_src and keep_dest:
- fc.src_path = self._adjust_for_new_root(src)
- fc.dest_path = self._adjust_for_new_root(dest)
- return fc
- elif keep_src:
- # The file has been copied to a non-interesting location.
- # Ignore it!
- return None
- elif keep_dest:
- # The file has been copied into an interesting location
- # We really ought to add it but we don't currently buffer
- # the contents of all previous files and probably never want
- # to. Maybe fast-import-info needs to be extended to
- # remember all copies and a config file can be passed
- # into here ala fast-import?
- warning("cannot turn copy of %s into an add of %s yet" %
- (src, dest))
- return None
diff --git a/processors/generic_processor.py b/processors/generic_processor.py
index 3f23c8b..43c933b 100644
--- a/processors/generic_processor.py
+++ b/processors/generic_processor.py
@@ -19,25 +19,34 @@
import time
from bzrlib import (
- bzrdir,
+ debug,
delta,
errors,
osutils,
progress,
)
from bzrlib.repofmt import pack_repo
-from bzrlib.trace import note, mutter
-import bzrlib.util.configobj.configobj as configobj
+from bzrlib.trace import (
+ mutter,
+ note,
+ warning,
+ )
+try:
+ import bzrlib.util.configobj.configobj as configobj
+except ImportError:
+ import configobj
from bzrlib.plugins.fastimport import (
branch_updater,
- bzr_commit_handler,
cache_manager,
+ marks_file,
+ revision_store,
+ )
+from fastimport import (
+ commands,
errors as plugin_errors,
helpers,
idmapfile,
- marks_file,
processor,
- revision_store,
)
@@ -51,8 +60,8 @@ _DEFAULT_AUTO_CHECKPOINT = 10000
_DEFAULT_AUTO_PACK = 4
# How many inventories to cache
-_DEFAULT_INV_CACHE_SIZE = 10
-_DEFAULT_CHK_INV_CACHE_SIZE = 100
+_DEFAULT_INV_CACHE_SIZE = 1
+_DEFAULT_CHK_INV_CACHE_SIZE = 1
class GenericProcessor(processor.ImportProcessor):
@@ -95,7 +104,7 @@ class GenericProcessor(processor.ImportProcessor):
* autopack - pack every n checkpoints. The default is 4.
* inv-cache - number of inventories to cache.
- If not set, the default is 100 for CHK formats and 10 otherwise.
+ If not set, the default is 1.
* mode - import algorithm to use: default, experimental or classic.
@@ -118,13 +127,27 @@ class GenericProcessor(processor.ImportProcessor):
def __init__(self, bzrdir, params=None, verbose=False, outf=None,
prune_empty_dirs=True):
- processor.ImportProcessor.__init__(self, bzrdir, params, verbose)
+ processor.ImportProcessor.__init__(self, params, verbose)
self.prune_empty_dirs = prune_empty_dirs
+ self.bzrdir = bzrdir
+ try:
+ # Might be inside a branch
+ (self.working_tree, self.branch) = bzrdir._get_tree_branch()
+ self.repo = self.branch.repository
+ except errors.NotBranchError:
+ # Must be inside a repository
+ self.working_tree = None
+ self.branch = None
+ self.repo = bzrdir.open_repository()
def pre_process(self):
- self.note("Starting import ...")
self._start_time = time.time()
self._load_info_and_params()
+ if self.total_commits:
+ self.note("Starting import of %d commits ..." %
+ (self.total_commits,))
+ else:
+ self.note("Starting import ...")
self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose,
self.inventory_cache_size)
@@ -174,6 +197,7 @@ class GenericProcessor(processor.ImportProcessor):
self.repo.start_write_group()
def _load_info_and_params(self):
+ from bzrlib.plugins.fastimport import bzr_commit_handler
self._mode = bool(self.params.get('mode', 'default'))
self._experimental = self._mode == 'experimental'
@@ -269,6 +293,31 @@ class GenericProcessor(processor.ImportProcessor):
self.repo, self.inventory_cache_size,
fulltext_when=fulltext_when)
+ def process(self, command_iter):
+ """Import data into Bazaar by processing a stream of commands.
+
+ :param command_iter: an iterator providing commands
+ """
+ if self.working_tree is not None:
+ self.working_tree.lock_write()
+ elif self.branch is not None:
+ self.branch.lock_write()
+ elif self.repo is not None:
+ self.repo.lock_write()
+ try:
+ super(GenericProcessor, self)._process(command_iter)
+ finally:
+ # If an unhandled exception occurred, abort the write group
+ if self.repo is not None and self.repo.is_in_write_group():
+ self.repo.abort_write_group()
+ # Release the locks
+ if self.working_tree is not None:
+ self.working_tree.unlock()
+ elif self.branch is not None:
+ self.branch.unlock()
+ elif self.repo is not None:
+ self.repo.unlock()
+
def _process(self, command_iter):
# if anything goes wrong, abort the write group if any
try:
@@ -287,15 +336,16 @@ class GenericProcessor(processor.ImportProcessor):
marks_file.export_marks(self.params.get("export-marks"),
self.cache_mgr.revision_ids)
- if self.cache_mgr.last_ref == None:
+ if self.cache_mgr.reftracker.last_ref == None:
"""Nothing to refresh"""
return
# Update the branches
self.note("Updating branch information ...")
updater = branch_updater.BranchUpdater(self.repo, self.branch,
- self.cache_mgr, helpers.invert_dictset(self.cache_mgr.heads),
- self.cache_mgr.last_ref, self.tags)
+ self.cache_mgr, helpers.invert_dictset(
+ self.cache_mgr.reftracker.heads),
+ self.cache_mgr.reftracker.last_ref, self.tags)
branches_updated, branches_lost = updater.update()
self._branch_count = len(branches_updated)
@@ -460,19 +510,19 @@ class GenericProcessor(processor.ImportProcessor):
def commit_handler(self, cmd):
"""Process a CommitCommand."""
if self.skip_total and self._revision_count < self.skip_total:
- self.cache_mgr.track_heads(cmd)
+ self.cache_mgr.reftracker.track_heads(cmd)
# Check that we really do know about this commit-id
if not self.cache_mgr.revision_ids.has_key(cmd.id):
raise plugin_errors.BadRestart(cmd.id)
- # Consume the file commands and free any non-sticky blobs
- for fc in cmd.file_iter():
- pass
self.cache_mgr._blobs = {}
self._revision_count += 1
+ if cmd.ref.startswith('refs/tags/'):
+ tag_name = cmd.ref[len('refs/tags/'):]
+ self._set_tag(tag_name, cmd.id)
return
if self.first_incremental_commit:
self.first_incremental_commit = None
- parents = self.cache_mgr.track_heads(cmd)
+ parents = self.cache_mgr.reftracker.track_heads(cmd)
# 'Commit' the revision and report progress
handler = self.commit_handler_factory(cmd, self.cache_mgr,
@@ -487,6 +537,10 @@ class GenericProcessor(processor.ImportProcessor):
self._revision_count += 1
self.report_progress("(%s)" % cmd.id)
+ if cmd.ref.startswith('refs/tags/'):
+ tag_name = cmd.ref[len('refs/tags/'):]
+ self._set_tag(tag_name, cmd.id)
+
# Check if we should finish up or automatically checkpoint
if (self.max_commits is not None and
self._revision_count >= self.max_commits):
@@ -514,8 +568,10 @@ class GenericProcessor(processor.ImportProcessor):
def progress_handler(self, cmd):
"""Process a ProgressCommand."""
- # We could use a progress bar here instead
- self.note("progress %s" % (cmd.message,))
+ # Most progress messages embedded in streams are annoying.
+ # Ignore them unless in verbose mode.
+ if self.verbose:
+ self.note("progress %s" % (cmd.message,))
def reset_handler(self, cmd):
"""Process a ResetCommand."""
@@ -529,7 +585,7 @@ class GenericProcessor(processor.ImportProcessor):
return
if cmd.from_ is not None:
- self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
+ self.cache_mgr.reftracker.track_heads_for_ref(cmd.ref, cmd.from_)
def tag_handler(self, cmd):
"""Process a TagCommand."""
@@ -543,3 +599,25 @@ class GenericProcessor(processor.ImportProcessor):
bzr_tag_name = name.decode('utf-8', 'replace')
bzr_rev_id = self.cache_mgr.revision_ids[from_]
self.tags[bzr_tag_name] = bzr_rev_id
+
+ def feature_handler(self, cmd):
+ """Process a FeatureCommand."""
+ feature = cmd.feature_name
+ if feature not in commands.FEATURE_NAMES:
+ raise plugin_errors.UnknownFeature(feature)
+
+ def debug(self, mgs, *args):
+ """Output a debug message if the appropriate -D option was given."""
+ if "fast-import" in debug.debug_flags:
+ msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
+ mutter(msg, *args)
+
+ def note(self, msg, *args):
+ """Output a note but timestamp it."""
+ msg = "%s %s" % (self._time_of_day(), msg)
+ note(msg, *args)
+
+ def warning(self, msg, *args):
+ """Output a warning but timestamp it."""
+ msg = "%s WARNING: %s" % (self._time_of_day(), msg)
+ warning(msg, *args)
diff --git a/processors/info_processor.py b/processors/info_processor.py
deleted file mode 100644
index e90418c..0000000
--- a/processors/info_processor.py
+++ /dev/null
@@ -1,281 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Import processor that dump stats about the input (and doesn't import)."""
-
-
-from bzrlib.trace import (
- note,
- warning,
- )
-from bzrlib.plugins.fastimport import (
- cache_manager,
- commands,
- helpers,
- processor,
- )
-
-
-class InfoProcessor(processor.ImportProcessor):
- """An import processor that dumps statistics about the input.
-
- No changes to the current repository are made.
-
- As well as providing useful information about an import
- stream before importing it, this processor is useful for
- benchmarking the speed at which data can be extracted from
- the source.
- """
-
- def __init__(self, target=None, params=None, verbose=0, outf=None):
- # Allow creation without a target
- processor.ImportProcessor.__init__(self, target, params, verbose,
- outf=outf)
-
- def pre_process(self):
- self.note("Collecting statistics ...")
- # Init statistics
- self.cmd_counts = {}
- for cmd in commands.COMMAND_NAMES:
- self.cmd_counts[cmd] = 0
- self.file_cmd_counts = {}
- for fc in commands.FILE_COMMAND_NAMES:
- self.file_cmd_counts[fc] = 0
- self.parent_counts = {}
- self.max_parent_count = 0
- self.committers = set()
- self.separate_authors_found = False
- self.symlinks_found = False
- self.executables_found = False
- self.sha_blob_references = False
- self.lightweight_tags = 0
- # Blob usage tracking
- self.blobs = {}
- for usage in ['new', 'used', 'unknown', 'unmarked']:
- self.blobs[usage] = set()
- self.blob_ref_counts = {}
- # Head tracking - delegate to the cache manager
- self.cache_mgr = cache_manager.CacheManager(inventory_cache_size=0)
- # Stuff to cache: a map from mark to # of times that mark is merged
- self.merges = {}
- # Stuff to cache: these are maps from mark to sets
- self.rename_old_paths = {}
- self.copy_source_paths = {}
-
- def post_process(self):
- # Dump statistics
- cmd_names = commands.COMMAND_NAMES
- fc_names = commands.FILE_COMMAND_NAMES
- cmd_values = [self.cmd_counts[c] for c in cmd_names]
- fc_values = [self.file_cmd_counts[c] for c in fc_names]
- self._dump_stats_group("Command counts", cmd_names, cmd_values, str)
- self._dump_stats_group("File command counts", fc_names, fc_values, str)
-
- # Commit stats
- if self.cmd_counts['commit']:
- p_names = []
- p_values = []
- for i in xrange(0, self.max_parent_count + 1):
- if i in self.parent_counts:
- count = self.parent_counts[i]
- p_names.append("parents-%d" % i)
- p_values.append(count)
- merges_count = len(self.merges.keys())
- p_names.append('total revisions merged')
- p_values.append(merges_count)
- flags = {
- 'separate authors found': self.separate_authors_found,
- 'executables': self.executables_found,
- 'symlinks': self.symlinks_found,
- 'blobs referenced by SHA': self.sha_blob_references,
- }
- self._dump_stats_group("Parent counts", p_names, p_values, str)
- self._dump_stats_group("Commit analysis", flags.keys(),
- flags.values(), _found)
- heads = helpers.invert_dictset(self.cache_mgr.heads)
- self._dump_stats_group("Head analysis", heads.keys(),
- heads.values(), None, _iterable_as_config_list)
- # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
- self._dump_stats_group("Merges", self.merges.keys(),
- self.merges.values(), None)
- # We only show the rename old path and copy source paths when -vv
- # (verbose=2) is specified. The output here for mysql's data can't
- # be parsed currently so this bit of code needs more work anyhow ..
- if self.verbose >= 2:
- self._dump_stats_group("Rename old paths",
- self.rename_old_paths.keys(),
- self.rename_old_paths.values(), len,
- _iterable_as_config_list)
- self._dump_stats_group("Copy source paths",
- self.copy_source_paths.keys(),
- self.copy_source_paths.values(), len,
- _iterable_as_config_list)
-
- # Blob stats
- if self.cmd_counts['blob']:
- # In verbose mode, don't list every blob used
- if self.verbose:
- del self.blobs['used']
- self._dump_stats_group("Blob usage tracking", self.blobs.keys(),
- self.blobs.values(), len, _iterable_as_config_list)
- if self.blob_ref_counts:
- blobs_by_count = helpers.invert_dict(self.blob_ref_counts)
- self._dump_stats_group("Blob reference counts",
- blobs_by_count.keys(),
- blobs_by_count.values(), len, _iterable_as_config_list)
-
- # Other stats
- if self.cmd_counts['reset']:
- reset_stats = {
- 'lightweight tags': self.lightweight_tags,
- }
- self._dump_stats_group("Reset analysis", reset_stats.keys(),
- reset_stats.values())
-
- def _dump_stats_group(self, title, names, values, normal_formatter=None,
- verbose_formatter=None):
- """Dump a statistics group.
-
- In verbose mode, do so as a config file so
- that other processors can load the information if they want to.
- :param normal_formatter: the callable to apply to the value
- before displaying it in normal mode
- :param verbose_formatter: the callable to apply to the value
- before displaying it in verbose mode
- """
- if self.verbose:
- self.outf.write("[%s]\n" % (title,))
- for name, value in zip(names, values):
- if verbose_formatter is not None:
- value = verbose_formatter(value)
- if type(name) == str:
- name = name.replace(' ', '-')
- self.outf.write("%s = %s\n" % (name, value))
- self.outf.write("\n")
- else:
- self.outf.write("%s:\n" % (title,))
- for name, value in zip(names, values):
- if normal_formatter is not None:
- value = normal_formatter(value)
- self.outf.write("\t%s\t%s\n" % (value, name))
-
- def progress_handler(self, cmd):
- """Process a ProgressCommand."""
- self.cmd_counts[cmd.name] += 1
-
- def blob_handler(self, cmd):
- """Process a BlobCommand."""
- self.cmd_counts[cmd.name] += 1
- if cmd.mark is None:
- self.blobs['unmarked'].add(cmd.id)
- else:
- self.blobs['new'].add(cmd.id)
- # Marks can be re-used so remove it from used if already there.
- # Note: we definitely do NOT want to remove it from multi if
- # it's already in that set.
- try:
- self.blobs['used'].remove(cmd.id)
- except KeyError:
- pass
-
- def checkpoint_handler(self, cmd):
- """Process a CheckpointCommand."""
- self.cmd_counts[cmd.name] += 1
-
- def commit_handler(self, cmd):
- """Process a CommitCommand."""
- self.cmd_counts[cmd.name] += 1
- self.committers.add(cmd.committer)
- if cmd.author is not None:
- self.separate_authors_found = True
- for fc in cmd.file_iter():
- self.file_cmd_counts[fc.name] += 1
- if isinstance(fc, commands.FileModifyCommand):
- if fc.is_executable:
- self.executables_found = True
- if fc.kind == commands.SYMLINK_KIND:
- self.symlinks_found = True
- if fc.dataref is not None:
- if fc.dataref[0] == ':':
- self._track_blob(fc.dataref)
- else:
- self.sha_blob_references = True
- elif isinstance(fc, commands.FileRenameCommand):
- self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path)
- elif isinstance(fc, commands.FileCopyCommand):
- self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path)
-
- # Track the heads
- parents = self.cache_mgr.track_heads(cmd)
-
- # Track the parent counts
- parent_count = len(parents)
- if self.parent_counts.has_key(parent_count):
- self.parent_counts[parent_count] += 1
- else:
- self.parent_counts[parent_count] = 1
- if parent_count > self.max_parent_count:
- self.max_parent_count = parent_count
-
- # Remember the merges
- if cmd.merges:
- #self.merges.setdefault(cmd.ref, set()).update(cmd.merges)
- for merge in cmd.merges:
- if merge in self.merges:
- self.merges[merge] += 1
- else:
- self.merges[merge] = 1
-
- def reset_handler(self, cmd):
- """Process a ResetCommand."""
- self.cmd_counts[cmd.name] += 1
- if cmd.ref.startswith('refs/tags/'):
- self.lightweight_tags += 1
- else:
- if cmd.from_ is not None:
- self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
-
- def tag_handler(self, cmd):
- """Process a TagCommand."""
- self.cmd_counts[cmd.name] += 1
-
- def _track_blob(self, mark):
- if mark in self.blob_ref_counts:
- self.blob_ref_counts[mark] += 1
- pass
- elif mark in self.blobs['used']:
- self.blob_ref_counts[mark] = 2
- self.blobs['used'].remove(mark)
- elif mark in self.blobs['new']:
- self.blobs['used'].add(mark)
- self.blobs['new'].remove(mark)
- else:
- self.blobs['unknown'].add(mark)
-
-def _found(b):
- """Format a found boolean as a string."""
- return ['no', 'found'][b]
-
-def _iterable_as_config_list(s):
- """Format an iterable as a sequence of comma-separated strings.
-
- To match what ConfigObj expects, a single item list has a trailing comma.
- """
- items = sorted(s)
- if len(items) == 1:
- return "%s," % (items[0],)
- else:
- return ", ".join(items)
diff --git a/processors/query_processor.py b/processors/query_processor.py
deleted file mode 100644
index dfee745..0000000
--- a/processors/query_processor.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Import processor that queries the input (and doesn't import)."""
-
-
-from bzrlib.plugins.fastimport import (
- commands,
- processor,
- )
-
-
-class QueryProcessor(processor.ImportProcessor):
- """An import processor that queries the input.
-
- No changes to the current repository are made.
- """
-
- known_params = commands.COMMAND_NAMES + commands.FILE_COMMAND_NAMES
-
- def __init__(self, target=None, params=None, verbose=False):
- # Allow creation without a target
- processor.ImportProcessor.__init__(self, target, params, verbose)
- self.parsed_params = {}
- if params:
- for name, value in params.iteritems():
- if value == 1:
- # All fields
- fields = None
- else:
- fields = value.split(',')
- self.parsed_params[name] = fields
-
- def pre_handler(self, cmd):
- """Hook for logic before each handler starts."""
- if self.parsed_params.has_key(cmd.name):
- fields = self.parsed_params[cmd.name]
- str = cmd.dump_str(fields, self.parsed_params, self.verbose)
- print "%s" % (str,)
-
- def progress_handler(self, cmd):
- """Process a ProgressCommand."""
- pass
-
- def blob_handler(self, cmd):
- """Process a BlobCommand."""
- pass
-
- def checkpoint_handler(self, cmd):
- """Process a CheckpointCommand."""
- pass
-
- def commit_handler(self, cmd):
- """Process a CommitCommand."""
- for fc in cmd.file_iter():
- pass
-
- def reset_handler(self, cmd):
- """Process a ResetCommand."""
- pass
-
- def tag_handler(self, cmd):
- """Process a TagCommand."""
- pass
diff --git a/revision_store.py b/revision_store.py
index d2ab2d3..4ec4ba3 100644
--- a/revision_store.py
+++ b/revision_store.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2008 Canonical Ltd
+# Copyright (C) 2008, 2009 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -16,9 +16,146 @@
"""An abstraction of a repository providing just the bits importing needs."""
+import cStringIO
-from bzrlib import errors, inventory, knit, lru_cache, osutils
-from bzrlib import revision as _mod_revision
+from bzrlib import (
+ errors,
+ graph as _mod_graph,
+ inventory,
+ knit,
+ lru_cache,
+ osutils,
+ revision as _mod_revision,
+ trace,
+ )
+
+
+class _TreeShim(object):
+ """Fake a Tree implementation.
+
+ This implements just enough of the tree api to make commit builder happy.
+ """
+
+ def __init__(self, repo, basis_inv, inv_delta, content_provider):
+ self._repo = repo
+ self._content_provider = content_provider
+ self._basis_inv = basis_inv
+ self._inv_delta = inv_delta
+ self._new_info_by_id = dict([(file_id, (new_path, ie))
+ for _, new_path, file_id, ie in inv_delta])
+
+ def id2path(self, file_id):
+ if file_id in self._new_info_by_id:
+ new_path = self._new_info_by_id[file_id][0]
+ if new_path is None:
+ raise errors.NoSuchId(self, file_id)
+ return new_path
+ return self._basis_inv.id2path(file_id)
+
+ def path2id(self, path):
+ # CommitBuilder currently only requires access to the root id. We don't
+ # build a map of renamed files, etc. One possibility if we ever *do*
+ # need more than just root, is to defer to basis_inv.path2id() and then
+ # check if the file_id is in our _new_info_by_id dict. And in that
+ # case, return _new_info_by_id[file_id][0]
+ if path != '':
+ raise NotImplementedError(_TreeShim.path2id)
+ # TODO: Handle root renames?
+ return self._basis_inv.root.file_id
+
+ def get_file_with_stat(self, file_id, path=None):
+ content = self.get_file_text(file_id, path)
+ sio = cStringIO.StringIO(content)
+ return sio, None
+
+ def get_file_text(self, file_id, path=None):
+ try:
+ return self._content_provider(file_id)
+ except KeyError:
+ # The content wasn't shown as 'new'. Just validate this fact
+ assert file_id not in self._new_info_by_id
+ old_ie = self._basis_inv[file_id]
+ old_text_key = (file_id, old_ie.revision)
+ stream = self._repo.texts.get_record_stream([old_text_key],
+ 'unordered', True)
+ return stream.next().get_bytes_as('fulltext')
+
+ def get_symlink_target(self, file_id):
+ if file_id in self._new_info_by_id:
+ ie = self._new_info_by_id[file_id][1]
+ return ie.symlink_target
+ return self._basis_inv[file_id].symlink_target
+
+ def get_reference_revision(self, file_id, path=None):
+ raise NotImplementedError(_TreeShim.get_reference_revision)
+
+ def _delta_to_iter_changes(self):
+ """Convert the inv_delta into an iter_changes repr."""
+ # iter_changes is:
+ # (file_id,
+ # (old_path, new_path),
+ # content_changed,
+ # (old_versioned, new_versioned),
+ # (old_parent_id, new_parent_id),
+ # (old_name, new_name),
+ # (old_kind, new_kind),
+ # (old_exec, new_exec),
+ # )
+ basis_inv = self._basis_inv
+ for old_path, new_path, file_id, ie in self._inv_delta:
+ # Perf: Would this be faster if we did 'if file_id in basis_inv'?
+ # Since the *very* common case is that the file already exists, it
+ # probably is better to optimize for that
+ try:
+ old_ie = basis_inv[file_id]
+ except errors.NoSuchId:
+ old_ie = None
+ if ie is None:
+ raise AssertionError('How is both old and new None?')
+ change = (file_id,
+ (old_path, new_path),
+ False,
+ (False, False),
+ (None, None),
+ (None, None),
+ (None, None),
+ (None, None),
+ )
+ change = (file_id,
+ (old_path, new_path),
+ True,
+ (False, True),
+ (None, ie.parent_id),
+ (None, ie.name),
+ (None, ie.kind),
+ (None, ie.executable),
+ )
+ else:
+ if ie is None:
+ change = (file_id,
+ (old_path, new_path),
+ True,
+ (True, False),
+ (old_ie.parent_id, None),
+ (old_ie.name, None),
+ (old_ie.kind, None),
+ (old_ie.executable, None),
+ )
+ else:
+ content_modified = (ie.text_sha1 != old_ie.text_sha1
+ or ie.text_size != old_ie.text_size)
+ # TODO: ie.kind != old_ie.kind
+ # TODO: symlinks changing targets, content_modified?
+ change = (file_id,
+ (old_path, new_path),
+ content_modified,
+ (True, True),
+ (old_ie.parent_id, ie.parent_id),
+ (old_ie.name, ie.name),
+ (old_ie.kind, ie.kind),
+ (old_ie.executable, ie.executable),
+ )
+ yield change
class AbstractRevisionStore(object):
@@ -33,6 +170,8 @@ class AbstractRevisionStore(object):
:param repository: the target repository
"""
self.repo = repo
+ self._graph = None
+ self._use_known_graph = True
self._supports_chks = getattr(repo._format, 'supports_chks', False)
def expects_rich_root(self):
@@ -224,29 +363,66 @@ class AbstractRevisionStore(object):
including an empty inventory for the missing revisions
If None, a default implementation is provided.
"""
- # Get the non-ghost parents and their inventories
- if inventories_provider is None:
- inventories_provider = self._default_inventories_provider
- present_parents, parent_invs = inventories_provider(rev.parent_ids)
-
- # Load the inventory
- try:
- rev_id = rev.revision_id
- rev.inventory_sha1, inv = self._add_inventory_by_delta(
- rev_id, basis_inv, inv_delta, present_parents, parent_invs)
- except errors.RevisionAlreadyPresent:
+ # TODO: set revision_id = rev.revision_id
+ builder = self.repo._commit_builder_class(self.repo,
+ parents=rev.parent_ids, config=None, timestamp=rev.timestamp,
+ timezone=rev.timezone, committer=rev.committer,
+ revprops=rev.properties, revision_id=rev.revision_id)
+ if self._graph is None and self._use_known_graph:
+ if (getattr(_mod_graph, 'GraphThunkIdsToKeys', None) and
+ getattr(_mod_graph.GraphThunkIdsToKeys, "add_node", None) and
+ getattr(self.repo, "get_known_graph_ancestry", None)):
+ self._graph = self.repo.get_known_graph_ancestry(
+ rev.parent_ids)
+ else:
+ self._use_known_graph = False
+ if self._graph is not None:
+ orig_heads = builder._heads
+ def thunked_heads(file_id, revision_ids):
+ # self._graph thinks in terms of keys, not ids, so translate
+ # them
+ # old_res = orig_heads(file_id, revision_ids)
+ if len(revision_ids) < 2:
+ res = set(revision_ids)
+ else:
+ res = set(self._graph.heads(revision_ids))
+ # if old_res != res:
+ # import pdb; pdb.set_trace()
+ return res
+ builder._heads = thunked_heads
+
+ if rev.parent_ids:
+ basis_rev_id = rev.parent_ids[0]
+ else:
+ basis_rev_id = _mod_revision.NULL_REVISION
+ tree = _TreeShim(self.repo, basis_inv, inv_delta, text_provider)
+ changes = tree._delta_to_iter_changes()
+ for (file_id, path, fs_hash) in builder.record_iter_changes(
+ tree, basis_rev_id, changes):
+ # So far, we don't *do* anything with the result
pass
+ builder.finish_inventory()
+ # TODO: This is working around a bug in the bzrlib code base.
+ # 'builder.finish_inventory()' ends up doing:
+ # self.inv_sha1 = self.repository.add_inventory_by_delta(...)
+ # However, add_inventory_by_delta returns (sha1, inv)
+ # And we *want* to keep a handle on both of those objects
+ if isinstance(builder.inv_sha1, tuple):
+ builder.inv_sha1, builder.new_inventory = builder.inv_sha1
+ # This is a duplicate of Builder.commit() since we already have the
+ # Revision object, and we *don't* want to call commit_write_group()
+ rev.inv_sha1 = builder.inv_sha1
+ builder.repository.add_revision(builder._new_revision_id, rev,
+ builder.new_inventory, builder._config)
+ if self._graph is not None:
+ # TODO: Use StaticTuple and .intern() for these things
+ self._graph.add_node(builder._new_revision_id, rev.parent_ids)
- # Load the texts, signature and revision
- file_rev_ids_needing_texts = [(id, ie.revision)
- for _, n, id, ie in inv_delta
- if n is not None and ie.revision == rev_id]
- self._load_texts_for_file_rev_ids(file_rev_ids_needing_texts,
- text_provider, parents_provider)
if signature is not None:
- self.repo.add_signature_text(rev_id, signature)
- self._add_revision(rev, inv)
- return inv
+ raise AssertionError('signatures not guaranteed yet')
+ self.repo.add_signature_text(rev.revision_id, signature)
+ # self._add_revision(rev, inv)
+ return builder.revision_tree().inventory
def _non_root_entries_iter(self, inv, revision_id):
if hasattr(inv, 'iter_non_root_entries'):
@@ -305,14 +481,19 @@ class AbstractRevisionStore(object):
"""
if len(parents):
if self._supports_chks:
- validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
- inv_delta, revision_id, parents, basis_inv=basis_inv,
- propagate_caches=False)
+ try:
+ validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
+ inv_delta, revision_id, parents, basis_inv=basis_inv,
+ propagate_caches=False)
+ except errors.InconsistentDelta:
+ #print "BASIS INV IS\n%s\n" % "\n".join([str(i) for i in basis_inv.iter_entries_by_dir()])
+ trace.mutter("INCONSISTENT DELTA IS:\n%s\n" % "\n".join([str(i) for i in inv_delta]))
+ raise
else:
validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
inv_delta, revision_id, parents)
else:
- if hasattr(basis_inv, 'create_by_apply_delta'):
+ if isinstance(basis_inv, inventory.CHKInventory):
new_inv = basis_inv.create_by_apply_delta(inv_delta, revision_id)
else:
new_inv = inventory.Inventory(revision_id=revision_id)
diff --git a/setup.py b/setup.py
index f4d5d0c..23a7a93 100755
--- a/setup.py
+++ b/setup.py
@@ -3,12 +3,12 @@ from distutils.core import setup
bzr_plugin_name = 'fastimport'
-bzr_plugin_version = (0, 9, 0, 'dev', 0)
+bzr_plugin_version = (0, 10, 0, 'dev', 0)
bzr_minimum_version = (1, 1, 0)
bzr_maximum_version = None
if __name__ == '__main__':
- setup(name="fastimport",
+ setup(name="bzr-fastimport",
version="0.9.0dev0",
description="stream-based import into and export from Bazaar.",
author="Canonical Ltd",
@@ -17,6 +17,7 @@ if __name__ == '__main__':
url="https://launchpad.net/bzr-fastimport",
scripts=[],
packages=['bzrlib.plugins.fastimport',
+ 'bzrlib.plugins.fastimport.exporters',
'bzrlib.plugins.fastimport.processors',
'bzrlib.plugins.fastimport.tests',
],
diff --git a/tests/__init__.py b/tests/__init__.py
index 711b605..47441e6 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -17,19 +17,35 @@
"""Tests for bzr-fastimport."""
-from bzrlib.tests.TestUtil import TestLoader, TestSuite
+from bzrlib import errors as bzr_errors
+from bzrlib.tests import Feature, TestLoader
+from bzrlib.plugins.fastimport import load_fastimport
+
+
+class _FastimportFeature(Feature):
+
+ def _probe(self):
+ try:
+ load_fastimport()
+ except bzr_errors.DependencyNotPresent:
+ return False
+ return True
+
+ def feature_name(self):
+ return 'fastimport'
+
+
+FastimportFeature = _FastimportFeature()
+
def test_suite():
- module_names = [
- 'bzrlib.plugins.fastimport.tests.test_branch_mapper',
- 'bzrlib.plugins.fastimport.tests.test_commands',
- 'bzrlib.plugins.fastimport.tests.test_errors',
- 'bzrlib.plugins.fastimport.tests.test_filter_processor',
- 'bzrlib.plugins.fastimport.tests.test_generic_processor',
- 'bzrlib.plugins.fastimport.tests.test_head_tracking',
- 'bzrlib.plugins.fastimport.tests.test_helpers',
- 'bzrlib.plugins.fastimport.tests.test_parser',
- ]
+ module_names = [__name__ + '.' + x for x in [
+ 'test_commands',
+ 'test_exporter',
+ 'test_branch_mapper',
+ 'test_generic_processor',
+ 'test_revision_store',
+ ]]
loader = TestLoader()
return loader.loadTestsFromModuleNames(module_names)
diff --git a/tests/test_branch_mapper.py b/tests/test_branch_mapper.py
index fe1b533..6d6f170 100644
--- a/tests/test_branch_mapper.py
+++ b/tests/test_branch_mapper.py
@@ -22,47 +22,49 @@ from bzrlib.plugins.fastimport import (
branch_mapper,
)
+from bzrlib.plugins.fastimport.tests import (
+ FastimportFeature,
+ )
+
class TestBranchMapper(tests.TestCase):
+ _test_needs_features = [FastimportFeature]
+
def test_git_to_bzr(self):
m = branch_mapper.BranchMapper()
- git_refs = [
- 'refs/heads/master',
- 'refs/heads/foo',
- 'refs/tags/master',
- 'refs/tags/foo',
- 'refs/remotes/origin/master',
- 'refs/remotes/origin/foo',
- ]
- git_to_bzr_map = m.git_to_bzr(git_refs)
- self.assertEqual(git_to_bzr_map, {
+ for git, bzr in {
'refs/heads/master': 'trunk',
'refs/heads/foo': 'foo',
'refs/tags/master': 'trunk.tag',
'refs/tags/foo': 'foo.tag',
'refs/remotes/origin/master': 'trunk.remote',
'refs/remotes/origin/foo': 'foo.remote',
- })
+ }.items():
+ self.assertEqual(m.git_to_bzr(git), bzr)
+
+ def test_git_to_bzr_with_slashes(self):
+ m = branch_mapper.BranchMapper()
+ for git, bzr in {
+ 'refs/heads/master/slave': 'master/slave',
+ 'refs/heads/foo/bar': 'foo/bar',
+ 'refs/tags/master/slave': 'master/slave.tag',
+ 'refs/tags/foo/bar': 'foo/bar.tag',
+ 'refs/remotes/origin/master/slave': 'master/slave.remote',
+ 'refs/remotes/origin/foo/bar': 'foo/bar.remote',
+ }.items():
+ self.assertEqual(m.git_to_bzr(git), bzr)
def test_git_to_bzr_for_trunk(self):
# As 'master' in git is mapped to trunk in bzr, we need to handle
# 'trunk' in git in a sensible way.
m = branch_mapper.BranchMapper()
- git_refs = [
- 'refs/heads/trunk',
- 'refs/tags/trunk',
- 'refs/remotes/origin/trunk',
- 'refs/heads/git-trunk',
- 'refs/tags/git-trunk',
- 'refs/remotes/origin/git-trunk',
- ]
- git_to_bzr_map = m.git_to_bzr(git_refs)
- self.assertEqual(git_to_bzr_map, {
+ for git, bzr in {
'refs/heads/trunk': 'git-trunk',
'refs/tags/trunk': 'git-trunk.tag',
'refs/remotes/origin/trunk': 'git-trunk.remote',
'refs/heads/git-trunk': 'git-git-trunk',
'refs/tags/git-trunk': 'git-git-trunk.tag',
'refs/remotes/origin/git-trunk':'git-git-trunk.remote',
- })
+ }.items():
+ self.assertEqual(m.git_to_bzr(git), bzr)
diff --git a/tests/test_commands.py b/tests/test_commands.py
index 5eb9418..81a43c8 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2009 Canonical Ltd
+# Copyright (C) 2010 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -14,268 +14,45 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""Test how Commands are displayed"""
+"""Test the command implementations."""
+
+import os
+import tempfile
+import gzip
from bzrlib import tests
-from bzrlib.plugins.fastimport import (
- commands,
+from bzrlib.plugins.fastimport.cmds import (
+ _get_source_stream,
)
-
-class TestBlobDisplay(tests.TestCase):
-
- def test_blob(self):
- c = commands.BlobCommand("1", "hello world")
- self.assertEqual("blob\nmark :1\ndata 11\nhello world", repr(c))
-
- def test_blob_no_mark(self):
- c = commands.BlobCommand(None, "hello world")
- self.assertEqual("blob\ndata 11\nhello world", repr(c))
-
-
-class TestCheckpointDisplay(tests.TestCase):
-
- def test_checkpoint(self):
- c = commands.CheckpointCommand()
- self.assertEqual("checkpoint", repr(c))
-
-
-class TestCommitDisplay(tests.TestCase):
-
- def test_commit(self):
- # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
- committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
- c = commands.CommitCommand("refs/heads/master", "bbb", None, committer,
- "release v1.0", ":aaa", None, None)
- self.assertEqualDiff(
- "commit refs/heads/master\n"
- "mark :bbb\n"
- "committer Joe Wong <joe@example.com> 1234567890 -0600\n"
- "data 12\n"
- "release v1.0\n"
- "from :aaa",
- repr(c))
-
- def test_commit_unicode_committer(self):
- # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
- name = u'\u013d\xf3r\xe9m \xcdp\u0161\xfam'
- name_utf8 = name.encode('utf8')
- committer = (name, 'test@example.com', 1234567890, -6 * 3600)
- c = commands.CommitCommand("refs/heads/master", "bbb", None, committer,
- "release v1.0", ":aaa", None, None)
- self.assertEqualDiff(
- "commit refs/heads/master\n"
- "mark :bbb\n"
- "committer %s <test@example.com> 1234567890 -0600\n"
- "data 12\n"
- "release v1.0\n"
- "from :aaa" % (name_utf8,),
- repr(c))
-
- def test_commit_no_mark(self):
- # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
- committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
- c = commands.CommitCommand("refs/heads/master", None, None, committer,
- "release v1.0", ":aaa", None, None)
- self.assertEqualDiff(
- "commit refs/heads/master\n"
- "committer Joe Wong <joe@example.com> 1234567890 -0600\n"
- "data 12\n"
- "release v1.0\n"
- "from :aaa",
- repr(c))
-
- def test_commit_no_from(self):
- # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
- committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
- c = commands.CommitCommand("refs/heads/master", "bbb", None, committer,
- "release v1.0", None, None, None)
- self.assertEqualDiff(
- "commit refs/heads/master\n"
- "mark :bbb\n"
- "committer Joe Wong <joe@example.com> 1234567890 -0600\n"
- "data 12\n"
- "release v1.0",
- repr(c))
-
- def test_commit_with_author(self):
- # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
- author = ('Sue Wong', 'sue@example.com', 1234565432, -6 * 3600)
- committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
- c = commands.CommitCommand("refs/heads/master", "bbb", author,
- committer, "release v1.0", ":aaa", None, None)
- self.assertEqualDiff(
- "commit refs/heads/master\n"
- "mark :bbb\n"
- "author Sue Wong <sue@example.com> 1234565432 -0600\n"
- "committer Joe Wong <joe@example.com> 1234567890 -0600\n"
- "data 12\n"
- "release v1.0\n"
- "from :aaa",
- repr(c))
-
- def test_commit_with_merges(self):
- # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
- committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
- c = commands.CommitCommand("refs/heads/master", "ddd", None, committer,
- "release v1.0", ":aaa", [':bbb', ':ccc'], None)
- self.assertEqualDiff(
- "commit refs/heads/master\n"
- "mark :ddd\n"
- "committer Joe Wong <joe@example.com> 1234567890 -0600\n"
- "data 12\n"
- "release v1.0\n"
- "from :aaa\n"
- "merge :bbb\n"
- "merge :ccc",
- repr(c))
-
- def test_commit_with_filecommands(self):
- file_cmds = iter([
- commands.FileDeleteCommand('readme.txt'),
- commands.FileModifyCommand('NEWS', 'file', False, None,
- 'blah blah blah'),
- ])
- # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
- committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
- c = commands.CommitCommand("refs/heads/master", "bbb", None, committer,
- "release v1.0", ":aaa", None, file_cmds)
- self.assertEqualDiff(
- "commit refs/heads/master\n"
- "mark :bbb\n"
- "committer Joe Wong <joe@example.com> 1234567890 -0600\n"
- "data 12\n"
- "release v1.0\n"
- "from :aaa\n"
- "D readme.txt\n"
- "M 644 inline NEWS\n"
- "data 14\n"
- "blah blah blah",
- repr(c))
-
-
-class TestProgressDisplay(tests.TestCase):
-
- def test_progress(self):
- c = commands.ProgressCommand("doing foo")
- self.assertEqual("progress doing foo", repr(c))
-
-
-class TestResetDisplay(tests.TestCase):
-
- def test_reset(self):
- c = commands.ResetCommand("refs/tags/v1.0", ":xxx")
- self.assertEqual("reset refs/tags/v1.0\nfrom :xxx\n", repr(c))
-
- def test_reset_no_from(self):
- c = commands.ResetCommand("refs/remotes/origin/master", None)
- self.assertEqual("reset refs/remotes/origin/master", repr(c))
-
-
-class TestTagDisplay(tests.TestCase):
-
- def test_tag(self):
- # tagger tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
- tagger = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
- c = commands.TagCommand("refs/tags/v1.0", ":xxx", tagger, "create v1.0")
- self.assertEqual(
- "tag refs/tags/v1.0\n"
- "from :xxx\n"
- "tagger Joe Wong <joe@example.com> 1234567890 -0600\n"
- "data 11\n"
- "create v1.0",
- repr(c))
-
- def test_tag_no_from(self):
- tagger = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600)
- c = commands.TagCommand("refs/tags/v1.0", None, tagger, "create v1.0")
- self.assertEqualDiff(
- "tag refs/tags/v1.0\n"
- "tagger Joe Wong <joe@example.com> 1234567890 -0600\n"
- "data 11\n"
- "create v1.0",
- repr(c))
-
-
-class TestFileModifyDisplay(tests.TestCase):
-
- def test_filemodify_file(self):
- c = commands.FileModifyCommand("foo/bar", "file", False, ":23", None)
- self.assertEqual("M 644 :23 foo/bar", repr(c))
-
- def test_filemodify_file_executable(self):
- c = commands.FileModifyCommand("foo/bar", "file", True, ":23", None)
- self.assertEqual("M 755 :23 foo/bar", repr(c))
-
- def test_filemodify_file_internal(self):
- c = commands.FileModifyCommand("foo/bar", "file", False, None,
- "hello world")
- self.assertEqual("M 644 inline foo/bar\ndata 11\nhello world", repr(c))
-
- def test_filemodify_symlink(self):
- c = commands.FileModifyCommand("foo/bar", "symlink", False, None, "baz")
- self.assertEqual("M 120000 inline foo/bar\ndata 3\nbaz", repr(c))
-
-
-class TestFileDeleteDisplay(tests.TestCase):
-
- def test_filedelete(self):
- c = commands.FileDeleteCommand("foo/bar")
- self.assertEqual("D foo/bar", repr(c))
-
-
-class TestFileCopyDisplay(tests.TestCase):
-
- def test_filecopy(self):
- c = commands.FileCopyCommand("foo/bar", "foo/baz")
- self.assertEqual("C foo/bar foo/baz", repr(c))
-
- def test_filecopy_quoted(self):
- # Check the first path is quoted if it contains spaces
- c = commands.FileCopyCommand("foo/b a r", "foo/b a z")
- self.assertEqual('C "foo/b a r" foo/b a z', repr(c))
-
-
-class TestFileRenameDisplay(tests.TestCase):
-
- def test_filerename(self):
- c = commands.FileRenameCommand("foo/bar", "foo/baz")
- self.assertEqual("R foo/bar foo/baz", repr(c))
-
- def test_filerename_quoted(self):
- # Check the first path is quoted if it contains spaces
- c = commands.FileRenameCommand("foo/b a r", "foo/b a z")
- self.assertEqual('R "foo/b a r" foo/b a z', repr(c))
-
-
-class TestFileDeleteAllDisplay(tests.TestCase):
-
- def test_filedeleteall(self):
- c = commands.FileDeleteAllCommand()
- self.assertEqual("deleteall", repr(c))
+from bzrlib.plugins.fastimport.tests import (
+ FastimportFeature,
+ )
-class TestPathChecking(tests.TestCase):
+class TestSourceStream(tests.TestCase):
- def test_filemodify_path_checking(self):
- self.assertRaises(ValueError, commands.FileModifyCommand, "",
- "file", False, None, "text")
- self.assertRaises(ValueError, commands.FileModifyCommand, None,
- "file", False, None, "text")
+ _test_needs_features = [FastimportFeature]
- def test_filedelete_path_checking(self):
- self.assertRaises(ValueError, commands.FileDeleteCommand, "")
- self.assertRaises(ValueError, commands.FileDeleteCommand, None)
+ def test_get_source_stream_stdin(self):
+ # - returns standard in
+ self.assertIsNot(None, _get_source_stream("-"))
- def test_filerename_path_checking(self):
- self.assertRaises(ValueError, commands.FileRenameCommand, "", "foo")
- self.assertRaises(ValueError, commands.FileRenameCommand, None, "foo")
- self.assertRaises(ValueError, commands.FileRenameCommand, "foo", "")
- self.assertRaises(ValueError, commands.FileRenameCommand, "foo", None)
+ def test_get_source_gz(self):
+ # files ending in .gz are automatically decompressed.
+ fd, filename = tempfile.mkstemp(suffix=".gz")
+ f = gzip.GzipFile(fileobj=os.fdopen(fd, "w"), mode='w')
+ f.write("bla")
+ f.close()
+ stream = _get_source_stream(filename)
+ self.assertIsNot("bla", stream.read())
- def test_filecopy_path_checking(self):
- self.assertRaises(ValueError, commands.FileCopyCommand, "", "foo")
- self.assertRaises(ValueError, commands.FileCopyCommand, None, "foo")
- self.assertRaises(ValueError, commands.FileCopyCommand, "foo", "")
- self.assertRaises(ValueError, commands.FileCopyCommand, "foo", None)
+ def test_get_source_file(self):
+ # other files are opened as regular files.
+ fd, filename = tempfile.mkstemp()
+ f = os.fdopen(fd, 'w')
+ f.write("bla")
+ f.close()
+ stream = _get_source_stream(filename)
+ self.assertIsNot("bla", stream.read())
diff --git a/tests/test_errors.py b/tests/test_errors.py
deleted file mode 100644
index ac63b29..0000000
--- a/tests/test_errors.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Test the Import errors"""
-
-from bzrlib import tests
-
-from bzrlib.plugins.fastimport import (
- errors,
- )
-
-
-class TestErrors(tests.TestCase):
-
- def test_MissingBytes(self):
- e = errors.MissingBytes(99, 10, 8)
- self.assertEqual("line 99: Unexpected EOF - expected 10 bytes, found 8",
- str(e))
-
- def test_MissingTerminator(self):
- e = errors.MissingTerminator(99, '---')
- self.assertEqual("line 99: Unexpected EOF - expected '---' terminator",
- str(e))
-
- def test_InvalidCommand(self):
- e = errors.InvalidCommand(99, 'foo')
- self.assertEqual("line 99: Invalid command 'foo'",
- str(e))
-
- def test_MissingSection(self):
- e = errors.MissingSection(99, 'foo', 'bar')
- self.assertEqual("line 99: Command foo is missing section bar",
- str(e))
-
- def test_BadFormat(self):
- e = errors.BadFormat(99, 'foo', 'bar', 'xyz')
- self.assertEqual("line 99: Bad format for section bar in "
- "command foo: found 'xyz'",
- str(e))
-
- def test_InvalidTimezone(self):
- e = errors.InvalidTimezone(99, 'aa:bb')
- self.assertEqual('aa:bb', e.timezone)
- self.assertEqual('', e.reason)
- self.assertEqual("line 99: Timezone 'aa:bb' could not be converted.",
- str(e))
- e = errors.InvalidTimezone(99, 'aa:bb', 'Non-numeric hours')
- self.assertEqual('aa:bb', e.timezone)
- self.assertEqual(' Non-numeric hours', e.reason)
- self.assertEqual("line 99: Timezone 'aa:bb' could not be converted."
- " Non-numeric hours",
- str(e))
-
- def test_UnknownDateFormat(self):
- e = errors.UnknownDateFormat('aaa')
- self.assertEqual("Unknown date format 'aaa'", str(e))
-
- def test_MissingHandler(self):
- e = errors.MissingHandler('foo')
- self.assertEqual("Missing handler for command foo", str(e))
diff --git a/tests/test_exporter.py b/tests/test_exporter.py
new file mode 100644
index 0000000..fe50e3b
--- /dev/null
+++ b/tests/test_exporter.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Test the exporter."""
+
+import os
+import tempfile
+import gzip
+
+from bzrlib import tests
+
+from bzrlib.plugins.fastimport.exporter import (
+ _get_output_stream,
+ )
+
+from bzrlib.plugins.fastimport.tests import (
+ FastimportFeature,
+ )
+
+
+class TestOutputStream(tests.TestCase):
+
+ _test_needs_features = [FastimportFeature]
+
+ def test_get_output_stream_stdout(self):
+ # - returns standard out
+ self.assertIsNot(None, _get_output_stream("-"))
+
+ def test_get_source_gz(self):
+ fd, filename = tempfile.mkstemp(suffix=".gz")
+ os.close(fd)
+ stream = _get_output_stream(filename)
+ stream.write("bla")
+ stream.close()
+ # files ending in .gz are automatically decompressed.
+ f = gzip.GzipFile(filename)
+ self.assertEquals("bla", f.read())
+ f.close()
+
+ def test_get_source_file(self):
+ # other files are opened as regular files.
+ fd, filename = tempfile.mkstemp()
+ os.close(fd)
+ stream = _get_output_stream(filename)
+ stream.write("foo")
+ stream.close()
+ f = open(filename, 'r')
+ self.assertEquals("foo", f.read())
+ f.close()
diff --git a/tests/test_filter_processor.py b/tests/test_filter_processor.py
deleted file mode 100644
index ff8a09f..0000000
--- a/tests/test_filter_processor.py
+++ /dev/null
@@ -1,877 +0,0 @@
-# Copyright (C) 2009 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Test FilterProcessor"""
-
-from cStringIO import StringIO
-
-from bzrlib import tests
-
-from bzrlib.plugins.fastimport import (
- parser,
- )
-from bzrlib.plugins.fastimport.processors.filter_processor import (
- FilterProcessor,
- )
-
-
-# A sample input stream containing all (top level) import commands
-_SAMPLE_ALL = \
-"""blob
-mark :1
-data 4
-foo
-commit refs/heads/master
-mark :2
-committer Joe <joe@example.com> 1234567890 +1000
-data 14
-Initial import
-M 644 :1 COPYING
-checkpoint
-progress first import done
-reset refs/remote/origin/master
-from :2
-tag v0.1
-from :2
-tagger Joe <joe@example.com> 1234567890 +1000
-data 12
-release v0.1
-"""
-
-
-# A sample input stream creating the following tree:
-#
-# NEWS
-# doc/README.txt
-# doc/index.txt
-_SAMPLE_WITH_DIR = \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/master
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :2 NEWS
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :101
-M 644 :3 doc/README.txt
-M 644 :4 doc/index.txt
-"""
-
-
-class TestCaseWithFiltering(tests.TestCase):
-
- def assertFiltering(self, input, params, expected):
- outf = StringIO()
- proc = FilterProcessor(None, params=params)
- proc.outf = outf
- s = StringIO(input)
- p = parser.ImportParser(s)
- proc.process(p.iter_commands)
- out = outf.getvalue()
- self.assertEqualDiff(expected, out)
-
-
-class TestNoFiltering(TestCaseWithFiltering):
-
- def test_params_not_given(self):
- self.assertFiltering(_SAMPLE_ALL, None, _SAMPLE_ALL)
-
- def test_params_are_none(self):
- params = {'include_paths': None, 'exclude_paths': None}
- self.assertFiltering(_SAMPLE_ALL, params, _SAMPLE_ALL)
-
-
-class TestIncludePaths(TestCaseWithFiltering):
-
- def test_file_in_root(self):
- # Things to note:
- # * only referenced blobs are retained
- # * from clause is dropped from the first command
- params = {'include_paths': ['NEWS']}
- self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/master
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-M 644 :2 NEWS
-""")
-
- def test_file_in_subdir(self):
- # Additional things to note:
- # * new root: path is now index.txt, not doc/index.txt
- # * other files changed in matching commits are excluded
- params = {'include_paths': ['doc/index.txt']}
- self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-M 644 :4 index.txt
-""")
-
- def test_file_with_changes(self):
- # Additional things to note:
- # * from updated to reference parents in the output
- params = {'include_paths': ['doc/README.txt']}
- self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-""")
-
- def test_subdir(self):
- params = {'include_paths': ['doc/']}
- self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-""")
-
- def test_multiple_files_in_subdir(self):
- # The new root should be the subdrectory
- params = {'include_paths': ['doc/README.txt', 'doc/index.txt']}
- self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-""")
-
-
-class TestExcludePaths(TestCaseWithFiltering):
-
- def test_file_in_root(self):
- params = {'exclude_paths': ['NEWS']}
- self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 doc/README.txt
-M 644 :4 doc/index.txt
-""")
-
- def test_file_in_subdir(self):
- params = {'exclude_paths': ['doc/README.txt']}
- self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/master
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-M 644 :2 NEWS
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :101
-M 644 :4 doc/index.txt
-""")
-
- def test_subdir(self):
- params = {'exclude_paths': ['doc/']}
- self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/master
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-M 644 :2 NEWS
-""")
-
- def test_multple_files(self):
- params = {'exclude_paths': ['doc/index.txt', 'NEWS']}
- self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 doc/README.txt
-""")
-
-
-class TestIncludeAndExcludePaths(TestCaseWithFiltering):
-
- def test_included_dir_and_excluded_file(self):
- params = {'include_paths': ['doc/'], 'exclude_paths': ['doc/index.txt']}
- self.assertFiltering(_SAMPLE_WITH_DIR, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-""")
-
-
-# A sample input stream creating the following tree:
-#
-# NEWS
-# doc/README.txt
-# doc/index.txt
-#
-# It then renames doc/README.txt => doc/README
-_SAMPLE_WITH_RENAME_INSIDE = _SAMPLE_WITH_DIR + \
-"""commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-R doc/README.txt doc/README
-"""
-
-# A sample input stream creating the following tree:
-#
-# NEWS
-# doc/README.txt
-# doc/index.txt
-#
-# It then renames doc/README.txt => README
-_SAMPLE_WITH_RENAME_TO_OUTSIDE = _SAMPLE_WITH_DIR + \
-"""commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-R doc/README.txt README
-"""
-
-# A sample input stream creating the following tree:
-#
-# NEWS
-# doc/README.txt
-# doc/index.txt
-#
-# It then renames NEWS => doc/NEWS
-_SAMPLE_WITH_RENAME_TO_INSIDE = _SAMPLE_WITH_DIR + \
-"""commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-R NEWS doc/NEWS
-"""
-
-class TestIncludePathsWithRenames(TestCaseWithFiltering):
-
- def test_rename_all_inside(self):
- # These rename commands ought to be kept but adjusted for the new root
- params = {'include_paths': ['doc/']}
- self.assertFiltering(_SAMPLE_WITH_RENAME_INSIDE, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-R README.txt README
-""")
-
- def test_rename_to_outside(self):
- # These rename commands become deletes
- params = {'include_paths': ['doc/']}
- self.assertFiltering(_SAMPLE_WITH_RENAME_TO_OUTSIDE, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-D README.txt
-""")
-
- def test_rename_to_inside(self):
- # This ought to create a new file but doesn't yet
- params = {'include_paths': ['doc/']}
- self.assertFiltering(_SAMPLE_WITH_RENAME_TO_INSIDE, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-""")
-
-
-# A sample input stream creating the following tree:
-#
-# NEWS
-# doc/README.txt
-# doc/index.txt
-#
-# It then copies doc/README.txt => doc/README
-_SAMPLE_WITH_COPY_INSIDE = _SAMPLE_WITH_DIR + \
-"""commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-C doc/README.txt doc/README
-"""
-
-# A sample input stream creating the following tree:
-#
-# NEWS
-# doc/README.txt
-# doc/index.txt
-#
-# It then copies doc/README.txt => README
-_SAMPLE_WITH_COPY_TO_OUTSIDE = _SAMPLE_WITH_DIR + \
-"""commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-C doc/README.txt README
-"""
-
-# A sample input stream creating the following tree:
-#
-# NEWS
-# doc/README.txt
-# doc/index.txt
-#
-# It then copies NEWS => doc/NEWS
-_SAMPLE_WITH_COPY_TO_INSIDE = _SAMPLE_WITH_DIR + \
-"""commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-C NEWS doc/NEWS
-"""
-
-
-class TestIncludePathsWithCopies(TestCaseWithFiltering):
-
- def test_copy_all_inside(self):
- # These copy commands ought to be kept but adjusted for the new root
- params = {'include_paths': ['doc/']}
- self.assertFiltering(_SAMPLE_WITH_COPY_INSIDE, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 10
-move intro
-from :102
-C README.txt README
-""")
-
- def test_copy_to_outside(self):
- # This can be ignored
- params = {'include_paths': ['doc/']}
- self.assertFiltering(_SAMPLE_WITH_COPY_TO_OUTSIDE, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-""")
-
- def test_copy_to_inside(self):
- # This ought to create a new file but doesn't yet
- params = {'include_paths': ['doc/']}
- self.assertFiltering(_SAMPLE_WITH_COPY_TO_INSIDE, params, \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 README.txt
-M 644 :4 index.txt
-""")
-
-
-# A sample input stream with deleteall's creating the following tree:
-#
-# NEWS
-# doc/README.txt
-# doc/index.txt
-_SAMPLE_WITH_DELETEALL = \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-deleteall
-M 644 :1 doc/README.txt
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-deleteall
-M 644 :3 doc/README.txt
-M 644 :4 doc/index.txt
-"""
-
-
-class TestIncludePathsWithDeleteAll(TestCaseWithFiltering):
-
- def test_deleteall(self):
- params = {'include_paths': ['doc/index.txt']}
- self.assertFiltering(_SAMPLE_WITH_DELETEALL, params, \
-"""blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-deleteall
-M 644 :4 index.txt
-""")
-
-
-_SAMPLE_WITH_TAGS = _SAMPLE_WITH_DIR + \
-"""tag v0.1
-from :100
-tagger d <b@c> 1234798653 +0000
-data 12
-release v0.1
-tag v0.2
-from :102
-tagger d <b@c> 1234798653 +0000
-data 12
-release v0.2
-"""
-
-class TestIncludePathsWithTags(TestCaseWithFiltering):
-
- def test_tag_retention(self):
- # If a tag references a commit with a parent we kept,
- # keep the tag but adjust 'from' accordingly.
- # Otherwise, delete the tag command.
- params = {'include_paths': ['NEWS']}
- self.assertFiltering(_SAMPLE_WITH_TAGS, params, \
-"""blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/master
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-M 644 :2 NEWS
-tag v0.2
-from :101
-tagger d <b@c> 1234798653 +0000
-data 12
-release v0.2
-""")
-
-
-_SAMPLE_WITH_RESETS = _SAMPLE_WITH_DIR + \
-"""reset refs/heads/foo
-reset refs/heads/bar
-from :102
-"""
-
-class TestIncludePathsWithResets(TestCaseWithFiltering):
-
- def test_reset_retention(self):
- # Resets init'ing a branch (without a from) are passed through.
- # If a reset references a commit with a parent we kept,
- # keep the reset but adjust 'from' accordingly.
- params = {'include_paths': ['NEWS']}
- self.assertFiltering(_SAMPLE_WITH_RESETS, params, \
-"""blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/master
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-M 644 :2 NEWS
-reset refs/heads/foo
-reset refs/heads/bar
-from :101
-""")
diff --git a/tests/test_generic_processor.py b/tests/test_generic_processor.py
index d4f789b..41f846e 100644
--- a/tests/test_generic_processor.py
+++ b/tests/test_generic_processor.py
@@ -17,25 +17,47 @@
import time
from bzrlib import (
- branch,
tests,
)
-
-from bzrlib.plugins.fastimport import (
- commands,
- errors,
+from bzrlib.plugins.fastimport.helpers import (
+ kind_to_mode,
)
-
-from bzrlib.plugins.fastimport.processors import (
- generic_processor,
+from bzrlib.plugins.fastimport.tests import (
+ FastimportFeature,
)
+try:
+ from fastimport import commands
+except ImportError:
+ commands = object()
+
+
+def load_tests(standard_tests, module, loader):
+ """Parameterize tests for all versions of groupcompress."""
+ scenarios = [
+ ('pack-0.92', {'branch_format': 'pack-0.92'}),
+ ('1.9-rich-root', {'branch_format': '1.9-rich-root'}),
+ ]
+ try:
+ from bzrlib.repofmt.groupcompress_repo import RepositoryFormat2a
+ scenarios.append(('2a', {'branch_format': '2a'}))
+ except ImportError:
+ pass
+ suite = loader.suiteClass()
+ result = tests.multiply_tests(standard_tests, scenarios, suite)
+ return result
+
class TestCaseForGenericProcessor(tests.TestCaseWithTransport):
+ _test_needs_features = [FastimportFeature]
+
branch_format = "pack-0.92"
def get_handler(self):
+ from bzrlib.plugins.fastimport.processors import (
+ generic_processor,
+ )
branch = self.make_branch('.', format=self.branch_format)
handler = generic_processor.GenericProcessor(branch.bzrdir)
return handler, branch
@@ -176,23 +198,24 @@ class TestImportToPackModify(TestCaseForGenericProcessor):
def file_command_iter(self, path, kind='file', content='aaa',
executable=False, to_kind=None, to_content='bbb', to_executable=None):
+
# Revno 1: create a file or symlink
# Revno 2: modify it
if to_kind is None:
to_kind = kind
if to_executable is None:
to_executable = executable
+ mode = kind_to_mode(kind, executable)
+ to_mode = kind_to_mode(to_kind, to_executable)
def command_list():
author = ['', 'bugs@a.com', time.time(), time.timezone]
committer = ['', 'elmer@a.com', time.time(), time.timezone]
def files_one():
- yield commands.FileModifyCommand(path, kind, executable,
- None, content)
+ yield commands.FileModifyCommand(path, mode, None, content)
yield commands.CommitCommand('head', '1', author,
committer, "commit 1", None, [], files_one)
def files_two():
- yield commands.FileModifyCommand(path, to_kind, to_executable,
- None, to_content)
+ yield commands.FileModifyCommand(path, to_mode, None, to_content)
yield commands.CommitCommand('head', '2', author,
committer, "commit 2", ":1", [], files_two)
return command_list
@@ -292,9 +315,46 @@ class TestImportToPackModify(TestCaseForGenericProcessor):
self.assertExecutable(branch, revtree2, path, False)
+class TestImportToPackModifyTwice(TestCaseForGenericProcessor):
+ """This tests when the same file is modified twice in the one commit.
+
+ Note: hg-fast-export produces data like this on occasions.
+ """
+
+ def file_command_iter(self, path, kind='file', content='aaa',
+ executable=False, to_kind=None, to_content='bbb', to_executable=None):
+
+ # Revno 1: create a file twice
+ if to_kind is None:
+ to_kind = kind
+ if to_executable is None:
+ to_executable = executable
+ def command_list():
+ author = ['', 'bugs@a.com', time.time(), time.timezone]
+ committer = ['', 'elmer@a.com', time.time(), time.timezone]
+ def files_one():
+ yield commands.FileModifyCommand(path, kind_to_mode(kind, executable),
+ None, content)
+ yield commands.FileModifyCommand(path, kind_to_mode(to_kind, to_executable),
+ None, to_content)
+ yield commands.CommitCommand('head', '1', author,
+ committer, "commit 1", None, [], files_one)
+ return command_list
+
+ def test_modify_file_twice_in_root(self):
+ handler, branch = self.get_handler()
+ path = 'a'
+ handler.process(self.file_command_iter(path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(path,)])
+ self.assertContent(branch, revtree1, path, "aaa")
+ self.assertRevisionRoot(revtree1, path)
+
+
class TestImportToPackModifyTricky(TestCaseForGenericProcessor):
def file_command_iter(self, path1, path2, kind='file'):
+
# Revno 1: create a file or symlink in a directory
# Revno 2: create a second file that implicitly deletes the
# first one because either:
@@ -304,12 +364,12 @@ class TestImportToPackModifyTricky(TestCaseForGenericProcessor):
author = ['', 'bugs@a.com', time.time(), time.timezone]
committer = ['', 'elmer@a.com', time.time(), time.timezone]
def files_one():
- yield commands.FileModifyCommand(path1, kind, False,
+ yield commands.FileModifyCommand(path1, kind_to_mode(kind, False),
None, "aaa")
yield commands.CommitCommand('head', '1', author,
committer, "commit 1", None, [], files_one)
def files_two():
- yield commands.FileModifyCommand(path2, kind, False,
+ yield commands.FileModifyCommand(path2, kind_to_mode(kind, False),
None, "bbb")
yield commands.CommitCommand('head', '2', author,
committer, "commit 2", ":1", [], files_two)
@@ -372,13 +432,14 @@ class TestImportToPackModifyTricky(TestCaseForGenericProcessor):
class TestImportToPackDelete(TestCaseForGenericProcessor):
def file_command_iter(self, path, kind='file'):
+
# Revno 1: create a file or symlink
# Revno 2: delete it
def command_list():
author = ['', 'bugs@a.com', time.time(), time.timezone]
committer = ['', 'elmer@a.com', time.time(), time.timezone]
def files_one():
- yield commands.FileModifyCommand(path, kind, False,
+ yield commands.FileModifyCommand(path, kind_to_mode(kind, False),
None, "aaa")
yield commands.CommitCommand('head', '1', author,
committer, "commit 1", None, [], files_one)
@@ -439,9 +500,211 @@ class TestImportToPackDelete(TestCaseForGenericProcessor):
self.assertContent(branch, revtree1, path, "aaa")
+class TestImportToPackDeleteNew(TestCaseForGenericProcessor):
+ """Test deletion of a newly added file."""
+
+ def file_command_iter(self, path, kind='file'):
+
+ # Revno 1: create a file or symlink then delete it
+ def command_list():
+ author = ['', 'bugs@a.com', time.time(), time.timezone]
+ committer = ['', 'elmer@a.com', time.time(), time.timezone]
+ def files_one():
+ yield commands.FileModifyCommand(path, kind_to_mode(kind, False),
+ None, "aaa")
+ yield commands.FileDeleteCommand(path)
+ yield commands.CommitCommand('head', '1', author,
+ committer, "commit 1", None, [], files_one)
+ return command_list
+
+ def test_delete_new_file_in_root(self):
+ handler, branch = self.get_handler()
+ path = 'a'
+ handler.process(self.file_command_iter(path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,)
+
+ def test_delete_new_file_in_subdir(self):
+ handler, branch = self.get_handler()
+ path = 'a/a'
+ handler.process(self.file_command_iter(path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,)
+
+ def test_delete_new_symlink_in_root(self):
+ handler, branch = self.get_handler()
+ path = 'a'
+ handler.process(self.file_command_iter(path, kind='symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,)
+
+ def test_delete_new_symlink_in_subdir(self):
+ handler, branch = self.get_handler()
+ path = 'a/a'
+ handler.process(self.file_command_iter(path, kind='symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,)
+
+ def test_delete_new_file_in_deep_subdir(self):
+ handler, branch = self.get_handler()
+ path = 'a/b/c/d'
+ handler.process(self.file_command_iter(path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,)
+
+
+class TestImportToPackDeleteMultiLevel(TestCaseForGenericProcessor):
+
+ def file_command_iter(self, paths, paths_to_delete):
+
+ # Revno 1: create multiple files
+ # Revno 2: delete multiple files
+ def command_list():
+ author = ['', 'bugs@a.com', time.time(), time.timezone]
+ committer = ['', 'elmer@a.com', time.time(), time.timezone]
+ def files_one():
+ for i, path in enumerate(paths):
+ yield commands.FileModifyCommand(path, kind_to_mode('file', False),
+ None, "aaa%d" % i)
+ yield commands.CommitCommand('head', '1', author,
+ committer, "commit 1", None, [], files_one)
+ def files_two():
+ for path in paths_to_delete:
+ yield commands.FileDeleteCommand(path)
+ yield commands.CommitCommand('head', '2', author,
+ committer, "commit 2", ":1", [], files_two)
+ return command_list
+
+ def test_delete_files_in_multiple_levels(self):
+ handler, branch = self.get_handler()
+ paths = ['a/b/c', 'a/b/d/e']
+ paths_to_delete = ['a/b/c', 'a/b/d/e']
+ handler.process(self.file_command_iter(paths, paths_to_delete))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[
+ ('a',), ('a/b',), ('a/b/c',),
+ ('a/b/d',), ('a/b/d/e',),
+ ])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[
+ ('a',), ('a/b',), ('a/b/c',),
+ ('a/b/d',), ('a/b/d/e',),
+ ])
+
+ def test_delete_file_single_level(self):
+ handler, branch = self.get_handler()
+ paths = ['a/b/c', 'a/b/d/e']
+ paths_to_delete = ['a/b/d/e']
+ handler.process(self.file_command_iter(paths, paths_to_delete))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[
+ ('a',), ('a/b',), ('a/b/c',),
+ ('a/b/d',), ('a/b/d/e',),
+ ])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[
+ ('a/b/d',), ('a/b/d/e',),
+ ])
+
+ def test_delete_file_complex_level(self):
+ handler, branch = self.get_handler()
+ paths = ['a/b/c', 'a/b/d/e', 'a/f/g', 'a/h', 'a/b/d/i/j']
+ paths_to_delete = ['a/b/c', 'a/b/d/e', 'a/f/g', 'a/b/d/i/j']
+ handler.process(self.file_command_iter(paths, paths_to_delete))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[
+ ('a',), ('a/b',), ('a/b/c',),
+ ('a/b/d',), ('a/b/d/e',),
+ ('a/f',), ('a/f/g',),
+ ('a/h',),
+ ('a/b/d/i',), ('a/b/d/i/j',),
+ ])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[
+ ('a/b',), ('a/b/c',),
+ ('a/b/d',), ('a/b/d/e',),
+ ('a/f',), ('a/f/g',),
+ ('a/b/d/i',), ('a/b/d/i/j',),
+ ])
+
+class TestImportToPackDeleteThenAdd(TestCaseForGenericProcessor):
+ """Test delete followed by an add. Merges can cause this."""
+
+ def file_command_iter(self, path, kind='file', content='aaa',
+ executable=False, to_kind=None, to_content='bbb', to_executable=None):
+
+ # Revno 1: create a file or symlink
+ # Revno 2: delete it and add it
+ if to_kind is None:
+ to_kind = kind
+ if to_executable is None:
+ to_executable = executable
+ def command_list():
+ author = ['', 'bugs@a.com', time.time(), time.timezone]
+ committer = ['', 'elmer@a.com', time.time(), time.timezone]
+ def files_one():
+ yield commands.FileModifyCommand(path, kind_to_mode(kind, executable),
+ None, content)
+ yield commands.CommitCommand('head', '1', author,
+ committer, "commit 1", None, [], files_one)
+ def files_two():
+ yield commands.FileDeleteCommand(path)
+ yield commands.FileModifyCommand(path, kind_to_mode(to_kind, to_executable),
+ None, to_content)
+ yield commands.CommitCommand('head', '2', author,
+ committer, "commit 2", ":1", [], files_two)
+ return command_list
+
+ def test_delete_then_add_file_in_root(self):
+ handler, branch = self.get_handler()
+ path = 'a'
+ handler.process(self.file_command_iter(path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(path,)])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(path,)],
+ expected_added=[(path,)])
+ self.assertContent(branch, revtree1, path, "aaa")
+ self.assertContent(branch, revtree2, path, "bbb")
+ self.assertRevisionRoot(revtree1, path)
+ self.assertRevisionRoot(revtree2, path)
+
+ def test_delete_then_add_file_in_subdir(self):
+ handler, branch = self.get_handler()
+ path = 'a/a'
+ handler.process(self.file_command_iter(path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('a',), (path,)])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(path,)],
+ expected_added=[(path,)])
+ self.assertContent(branch, revtree1, path, "aaa")
+ self.assertContent(branch, revtree2, path, "bbb")
+
+ def test_delete_then_add_symlink_in_root(self):
+ handler, branch = self.get_handler()
+ path = 'a'
+ handler.process(self.file_command_iter(path, kind='symlink'))
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(path,)],
+ expected_added=[(path,)])
+ self.assertSymlinkTarget(branch, revtree1, path, "aaa")
+ self.assertSymlinkTarget(branch, revtree2, path, "bbb")
+ self.assertRevisionRoot(revtree1, path)
+ self.assertRevisionRoot(revtree2, path)
+
+ def test_delete_then_add_symlink_in_subdir(self):
+ handler, branch = self.get_handler()
+ path = 'a/a'
+ handler.process(self.file_command_iter(path, kind='symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('a',), (path,)])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(path,)],
+ expected_added=[(path,)])
+ self.assertSymlinkTarget(branch, revtree1, path, "aaa")
+ self.assertSymlinkTarget(branch, revtree2, path, "bbb")
+
+
class TestImportToPackDeleteDirectory(TestCaseForGenericProcessor):
def file_command_iter(self, paths, dir):
+
# Revno 1: create multiple files
# Revno 2: delete a directory holding those files
def command_list():
@@ -449,7 +712,7 @@ class TestImportToPackDeleteDirectory(TestCaseForGenericProcessor):
committer = ['', 'elmer@a.com', time.time(), time.timezone]
def files_one():
for i, path in enumerate(paths):
- yield commands.FileModifyCommand(path, 'file', False,
+ yield commands.FileModifyCommand(path, kind_to_mode('file', False),
None, "aaa%d" % i)
yield commands.CommitCommand('head', '1', author,
committer, "commit 1", None, [], files_one)
@@ -479,16 +742,68 @@ class TestImportToPackDeleteDirectory(TestCaseForGenericProcessor):
])
+class TestImportToPackDeleteDirectoryThenAddFile(TestCaseForGenericProcessor):
+ """Test deleting a directory then adding a file in the same commit."""
+
+ def file_command_iter(self, paths, dir, new_path, kind='file'):
+
+ # Revno 1: create files in a directory
+ # Revno 2: delete the directory then add a file into it
+ def command_list():
+ author = ['', 'bugs@a.com', time.time(), time.timezone]
+ committer = ['', 'elmer@a.com', time.time(), time.timezone]
+ def files_one():
+ for i, path in enumerate(paths):
+ yield commands.FileModifyCommand(path, kind_to_mode(kind, False),
+ None, "aaa%d" % i)
+ yield commands.CommitCommand('head', '1', author,
+ committer, "commit 1", None, [], files_one)
+ def files_two():
+ yield commands.FileDeleteCommand(dir)
+ yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False),
+ None, "bbb")
+ yield commands.CommitCommand('head', '2', author,
+ committer, "commit 2", ":1", [], files_two)
+ return command_list
+
+ def test_delete_dir_then_add_file(self):
+ handler, branch = self.get_handler()
+ paths = ['a/b/c', 'a/b/d']
+ dir = 'a/b'
+ new_path = 'a/b/z'
+ handler.process(self.file_command_iter(paths, dir, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('a',), ('a/b',), ('a/b/c',), ('a/b/d',),])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[('a/b',), ('a/b/c',), ('a/b/d',)],
+ expected_added=[('a/b',), ('a/b/z',)])
+ self.assertContent(branch, revtree2, new_path, "bbb")
+
+ def test_delete_dir_then_add_symlink(self):
+ handler, branch = self.get_handler()
+ paths = ['a/b/c', 'a/b/d']
+ dir = 'a/b'
+ new_path = 'a/b/z'
+ handler.process(self.file_command_iter(paths, dir, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('a',), ('a/b',), ('a/b/c',), ('a/b/d',),])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[('a/b',), ('a/b/c',), ('a/b/d',)],
+ expected_added=[('a/b',), ('a/b/z',)])
+ self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+
+
class TestImportToPackRename(TestCaseForGenericProcessor):
- def get_command_iter(self, old_path, new_path):
+ def get_command_iter(self, old_path, new_path, kind='file'):
+
# Revno 1: create a file or symlink
# Revno 2: rename it
def command_list():
author = ['', 'bugs@a.com', time.time(), time.timezone]
committer = ['', 'elmer@a.com', time.time(), time.timezone]
def files_one():
- yield commands.FileModifyCommand(old_path, 'file', False,
+ yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False),
None, "aaa")
yield commands.CommitCommand('head', '1', author,
committer, "commit 1", None, [], files_one)
@@ -498,7 +813,7 @@ class TestImportToPackRename(TestCaseForGenericProcessor):
committer, "commit 2", ":1", [], files_two)
return command_list
- def test_rename_in_root(self):
+ def test_rename_file_in_root(self):
handler, branch = self.get_handler()
old_path = 'a'
new_path = 'b'
@@ -508,14 +823,31 @@ class TestImportToPackRename(TestCaseForGenericProcessor):
self.assertRevisionRoot(revtree1, old_path)
self.assertRevisionRoot(revtree2, new_path)
- def test_rename_in_subdir(self):
+ def test_rename_symlink_in_root(self):
+ handler, branch = self.get_handler()
+ old_path = 'a'
+ new_path = 'b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_renamed=[(old_path, new_path)])
+ self.assertRevisionRoot(revtree1, old_path)
+ self.assertRevisionRoot(revtree2, new_path)
+
+ def test_rename_file_in_subdir(self):
handler, branch = self.get_handler()
old_path = 'a/a'
new_path = 'a/b'
handler.process(self.get_command_iter(old_path, new_path))
self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)])
- def test_move_to_new_dir(self):
+ def test_rename_symlink_in_subdir(self):
+ handler, branch = self.get_handler()
+ old_path = 'a/a'
+ new_path = 'a/b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)])
+
+ def test_rename_file_to_new_dir(self):
handler, branch = self.get_handler()
old_path = 'a/a'
new_path = 'b/a'
@@ -525,10 +857,547 @@ class TestImportToPackRename(TestCaseForGenericProcessor):
expected_added=[('b',)],
expected_removed=[('a',)])
+ def test_rename_symlink_to_new_dir(self):
+ handler, branch = self.get_handler()
+ old_path = 'a/a'
+ new_path = 'b/a'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ self.assertChanges(branch, 2,
+ expected_renamed=[(old_path, new_path)],
+ expected_added=[('b',)],
+ expected_removed=[('a',)])
+
+
+class TestImportToPackRenameNew(TestCaseForGenericProcessor):
+ """Test rename of a newly added file."""
+
+ def get_command_iter(self, old_path, new_path, kind='file'):
+
+ # Revno 1: create a file and rename it
+ def command_list():
+ author = ['', 'bugs@a.com', time.time(), time.timezone]
+ committer = ['', 'elmer@a.com', time.time(), time.timezone]
+ def files_one():
+ yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False),
+ None, "aaa")
+ yield commands.FileRenameCommand(old_path, new_path)
+ yield commands.CommitCommand('head', '1', author,
+ committer, "commit 1", None, [], files_one)
+ return command_list
+
+ def test_rename_new_file_in_root(self):
+ handler, branch = self.get_handler()
+ old_path = 'a'
+ new_path = 'b'
+ handler.process(self.get_command_iter(old_path, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(new_path,)])
+ self.assertRevisionRoot(revtree1, new_path)
+
+ def test_rename_new_symlink_in_root(self):
+ handler, branch = self.get_handler()
+ old_path = 'a'
+ new_path = 'b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(new_path,)])
+ self.assertRevisionRoot(revtree1, new_path)
+
+ def test_rename_new_file_in_subdir(self):
+ handler, branch = self.get_handler()
+ old_path = 'a/a'
+ new_path = 'a/b'
+ handler.process(self.get_command_iter(old_path, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('a',), (new_path,)])
+
+ def test_rename_new_symlink_in_subdir(self):
+ handler, branch = self.get_handler()
+ old_path = 'a/a'
+ new_path = 'a/b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('a',), (new_path,)])
+
+
+class TestImportToPackRenameToDeleted(TestCaseForGenericProcessor):
+ """Test rename to a destination path deleted in this commit."""
+
+ def get_command_iter(self, old_path, new_path, kind='file'):
+
+ # Revno 1: create two files
+ # Revno 2: delete one, rename the other one to that path
+ def command_list():
+ author = ['', 'bugs@a.com', time.time(), time.timezone]
+ committer = ['', 'elmer@a.com', time.time(), time.timezone]
+ def files_one():
+ yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False),
+ None, "aaa")
+ yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False),
+ None, "bbb")
+ yield commands.CommitCommand('head', '1', author,
+ committer, "commit 1", None, [], files_one)
+ def files_two():
+ yield commands.FileDeleteCommand(new_path)
+ yield commands.FileRenameCommand(old_path, new_path)
+ yield commands.CommitCommand('head', '2', author,
+ committer, "commit 2", ":1", [], files_two)
+ return command_list
+
+ def test_rename_to_deleted_file_in_root(self):
+ handler, branch = self.get_handler()
+ old_path = 'a'
+ new_path = 'b'
+ handler.process(self.get_command_iter(old_path, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(old_path,), (new_path,)])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(new_path,)],
+ expected_renamed=[(old_path, new_path)])
+ self.assertContent(branch, revtree1, old_path, "aaa")
+ self.assertContent(branch, revtree1, new_path, "bbb")
+ self.assertContent(branch, revtree2, new_path, "aaa")
+ self.assertRevisionRoot(revtree1, old_path)
+ self.assertRevisionRoot(revtree1, new_path)
+
+ def test_rename_to_deleted_symlink_in_root(self):
+ handler, branch = self.get_handler()
+ old_path = 'a'
+ new_path = 'b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(old_path,), (new_path,)])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(new_path,)],
+ expected_renamed=[(old_path, new_path)])
+ self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree1, new_path, "bbb")
+ self.assertSymlinkTarget(branch, revtree2, new_path, "aaa")
+ self.assertRevisionRoot(revtree1, old_path)
+ self.assertRevisionRoot(revtree1, new_path)
+
+ def test_rename_to_deleted_file_in_subdir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd/a'
+ new_path = 'd/b'
+ handler.process(self.get_command_iter(old_path, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d',), (old_path,), (new_path,)])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(new_path,)],
+ expected_renamed=[(old_path, new_path)])
+ self.assertContent(branch, revtree1, old_path, "aaa")
+ self.assertContent(branch, revtree1, new_path, "bbb")
+ self.assertContent(branch, revtree2, new_path, "aaa")
+
+ def test_rename_to_deleted_symlink_in_subdir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd/a'
+ new_path = 'd/b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d',), (old_path,), (new_path,)])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(new_path,)],
+ expected_renamed=[(old_path, new_path)])
+ self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree1, new_path, "bbb")
+ self.assertSymlinkTarget(branch, revtree2, new_path, "aaa")
+
+ def test_rename_to_deleted_file_in_new_dir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd1/a'
+ new_path = 'd2/b'
+ handler.process(self.get_command_iter(old_path, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d1',), (old_path,), ('d2',), (new_path,)])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[('d1',), (new_path,)],
+ expected_renamed=[(old_path, new_path)])
+ self.assertContent(branch, revtree1, old_path, "aaa")
+ self.assertContent(branch, revtree1, new_path, "bbb")
+ self.assertContent(branch, revtree2, new_path, "aaa")
+
+ def test_rename_to_deleted_symlink_in_new_dir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd1/a'
+ new_path = 'd2/b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d1',), (old_path,), ('d2',), (new_path,)])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[('d1',), (new_path,)],
+ expected_renamed=[(old_path, new_path)])
+ self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree1, new_path, "bbb")
+ self.assertSymlinkTarget(branch, revtree2, new_path, "aaa")
+
+
+class TestImportToPackRenameModified(TestCaseForGenericProcessor):
+ """Test rename of a path previously modified in this commit."""
+
+ def get_command_iter(self, old_path, new_path, kind='file'):
+
+ # Revno 1: create a file or symlink
+ # Revno 2: modify then rename it
+ def command_list():
+ author = ['', 'bugs@a.com', time.time(), time.timezone]
+ committer = ['', 'elmer@a.com', time.time(), time.timezone]
+ def files_one():
+ yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False),
+ None, "aaa")
+ yield commands.CommitCommand('head', '1', author,
+ committer, "commit 1", None, [], files_one)
+ def files_two():
+ yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False),
+ None, "bbb")
+ yield commands.FileRenameCommand(old_path, new_path)
+ yield commands.CommitCommand('head', '2', author,
+ committer, "commit 2", ":1", [], files_two)
+ return command_list
+
+ def test_rename_of_modified_file_in_root(self):
+ handler, branch = self.get_handler()
+ old_path = 'a'
+ new_path = 'b'
+ handler.process(self.get_command_iter(old_path, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(old_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_renamed=[(old_path, new_path)])
+ self.assertContent(branch, revtree1, old_path, "aaa")
+ self.assertContent(branch, revtree2, new_path, "bbb")
+ self.assertRevisionRoot(revtree1, old_path)
+ self.assertRevisionRoot(revtree2, new_path)
+
+ def test_rename_of_modified_symlink_in_root(self):
+ handler, branch = self.get_handler()
+ old_path = 'a'
+ new_path = 'b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(old_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_renamed=[(old_path, new_path)])
+ self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+ self.assertRevisionRoot(revtree1, old_path)
+ self.assertRevisionRoot(revtree2, new_path)
+
+ def test_rename_of_modified_file_in_subdir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd/a'
+ new_path = 'd/b'
+ handler.process(self.get_command_iter(old_path, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d',), (old_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_renamed=[(old_path, new_path)])
+ self.assertContent(branch, revtree1, old_path, "aaa")
+ self.assertContent(branch, revtree2, new_path, "bbb")
+
+ def test_rename_of_modified_symlink_in_subdir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd/a'
+ new_path = 'd/b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d',), (old_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_renamed=[(old_path, new_path)])
+ self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+
+ def test_rename_of_modified_file_to_new_dir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd1/a'
+ new_path = 'd2/b'
+ handler.process(self.get_command_iter(old_path, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d1',), (old_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_renamed=[(old_path, new_path)],
+ expected_added=[('d2',)],
+ expected_removed=[('d1',)])
+ self.assertContent(branch, revtree1, old_path, "aaa")
+ self.assertContent(branch, revtree2, new_path, "bbb")
+
+ def test_rename_of_modified_symlink_to_new_dir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd1/a'
+ new_path = 'd2/b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d1',), (old_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_renamed=[(old_path, new_path)],
+ expected_added=[('d2',)],
+ expected_removed=[('d1',)])
+ self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+
+
+class TestImportToPackRenameThenModify(TestCaseForGenericProcessor):
+ """Test rename of a path then modfy the new-path in the same commit."""
+
+ def get_command_iter(self, old_path, new_path, kind='file'):
+
+ # Revno 1: create a file or symlink
+ # Revno 2: rename it then modify the newly created path
+ def command_list():
+ author = ['', 'bugs@a.com', time.time(), time.timezone]
+ committer = ['', 'elmer@a.com', time.time(), time.timezone]
+ def files_one():
+ yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False),
+ None, "aaa")
+ yield commands.CommitCommand('head', '1', author,
+ committer, "commit 1", None, [], files_one)
+ def files_two():
+ yield commands.FileRenameCommand(old_path, new_path)
+ yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False),
+ None, "bbb")
+ yield commands.CommitCommand('head', '2', author,
+ committer, "commit 2", ":1", [], files_two)
+ return command_list
+
+ def test_rename_then_modify_file_in_root(self):
+ handler, branch = self.get_handler()
+ old_path = 'a'
+ new_path = 'b'
+ handler.process(self.get_command_iter(old_path, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(old_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_renamed=[(old_path, new_path)])
+ self.assertContent(branch, revtree1, old_path, "aaa")
+ self.assertContent(branch, revtree2, new_path, "bbb")
+ self.assertRevisionRoot(revtree1, old_path)
+ self.assertRevisionRoot(revtree2, new_path)
+
+ def test_rename_then_modify_file_in_subdir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd/a'
+ new_path = 'd/b'
+ handler.process(self.get_command_iter(old_path, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d',), (old_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_renamed=[(old_path, new_path)])
+ self.assertContent(branch, revtree1, old_path, "aaa")
+ self.assertContent(branch, revtree2, new_path, "bbb")
+
+ def test_rename_then_modify_file_in_new_dir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd1/a'
+ new_path = 'd2/b'
+ handler.process(self.get_command_iter(old_path, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d1',), (old_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_renamed=[(old_path, new_path)],
+ expected_added=[('d2',)],
+ expected_removed=[('d1',)])
+ self.assertContent(branch, revtree1, old_path, "aaa")
+ self.assertContent(branch, revtree2, new_path, "bbb")
+
+ def test_rename_then_modify_symlink_in_root(self):
+ handler, branch = self.get_handler()
+ old_path = 'a'
+ new_path = 'b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(old_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_renamed=[(old_path, new_path)])
+ self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+ self.assertRevisionRoot(revtree1, old_path)
+ self.assertRevisionRoot(revtree2, new_path)
+
+ def test_rename_then_modify_symlink_in_subdir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd/a'
+ new_path = 'd/b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d',), (old_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_renamed=[(old_path, new_path)])
+ self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+
+ def test_rename_then_modify_symlink_in_new_dir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd1/a'
+ new_path = 'd2/b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d1',), (old_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_renamed=[(old_path, new_path)],
+ expected_added=[('d2',)],
+ expected_removed=[('d1',)])
+ self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+
+
+class TestImportToPackDeleteRenameThenModify(TestCaseForGenericProcessor):
+ """Test rename of to a deleted path then modfy the new-path in the same commit."""
+
+ def get_command_iter(self, old_path, new_path, kind='file'):
+
+ # Revno 1: create two files or symlinks
+ # Revno 2: delete one, rename the other to it then modify the newly created path
+ def command_list():
+ author = ['', 'bugs@a.com', time.time(), time.timezone]
+ committer = ['', 'elmer@a.com', time.time(), time.timezone]
+ def files_one():
+ yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False),
+ None, "aaa")
+ yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False),
+ None, "zzz")
+ yield commands.CommitCommand('head', '1', author,
+ committer, "commit 1", None, [], files_one)
+ def files_two():
+ yield commands.FileDeleteCommand(new_path)
+ yield commands.FileRenameCommand(old_path, new_path)
+ yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False),
+ None, "bbb")
+ yield commands.CommitCommand('head', '2', author,
+ committer, "commit 2", ":1", [], files_two)
+ return command_list
+
+ def test_delete_rename_then_modify_file_in_root(self):
+ handler, branch = self.get_handler()
+ old_path = 'a'
+ new_path = 'b'
+ handler.process(self.get_command_iter(old_path, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(old_path,), (new_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(new_path,)],
+ expected_renamed=[(old_path, new_path)])
+ self.assertContent(branch, revtree1, old_path, "aaa")
+ self.assertContent(branch, revtree1, new_path, "zzz")
+ self.assertContent(branch, revtree2, new_path, "bbb")
+ self.assertRevisionRoot(revtree1, old_path)
+ self.assertRevisionRoot(revtree1, new_path)
+ self.assertRevisionRoot(revtree2, new_path)
+
+ def test_delete_rename_then_modify_file_in_subdir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd/a'
+ new_path = 'd/b'
+ handler.process(self.get_command_iter(old_path, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d',), (old_path,), (new_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(new_path,)],
+ expected_renamed=[(old_path, new_path)])
+ self.assertContent(branch, revtree1, old_path, "aaa")
+ self.assertContent(branch, revtree1, new_path, "zzz")
+ self.assertContent(branch, revtree2, new_path, "bbb")
+
+ def test_delete_rename_then_modify_file_in_new_dir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd1/a'
+ new_path = 'd2/b'
+ handler.process(self.get_command_iter(old_path, new_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d1',), ('d2',), (old_path,), (new_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[('d1',), (new_path,)],
+ expected_renamed=[(old_path, new_path)])
+ self.assertContent(branch, revtree1, old_path, "aaa")
+ self.assertContent(branch, revtree1, new_path, "zzz")
+ self.assertContent(branch, revtree2, new_path, "bbb")
+
+ def test_delete_rename_then_modify_symlink_in_root(self):
+ handler, branch = self.get_handler()
+ old_path = 'a'
+ new_path = 'b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(old_path,), (new_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(new_path,)],
+ expected_renamed=[(old_path, new_path)])
+ self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree1, new_path, "zzz")
+ self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+ self.assertRevisionRoot(revtree1, old_path)
+ self.assertRevisionRoot(revtree1, new_path)
+ self.assertRevisionRoot(revtree2, new_path)
+
+ def test_delete_rename_then_modify_symlink_in_subdir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd/a'
+ new_path = 'd/b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d',), (old_path,), (new_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(new_path,)],
+ expected_renamed=[(old_path, new_path)])
+ self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree1, new_path, "zzz")
+ self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+
+ def test_delete_rename_then_modify_symlink_in_new_dir(self):
+ handler, branch = self.get_handler()
+ old_path = 'd1/a'
+ new_path = 'd2/b'
+ handler.process(self.get_command_iter(old_path, new_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d1',), ('d2',), (old_path,), (new_path,)])
+ # Note: the delta doesn't show the modification?
+ # The actual new content is validated in the assertions following.
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[('d1',), (new_path,)],
+ expected_renamed=[(old_path, new_path)])
+ self.assertSymlinkTarget(branch, revtree1, old_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree1, new_path, "zzz")
+ self.assertSymlinkTarget(branch, revtree2, new_path, "bbb")
+
class TestImportToPackRenameTricky(TestCaseForGenericProcessor):
def file_command_iter(self, path1, old_path2, new_path2, kind='file'):
+
# Revno 1: create two files or symlinks in a directory
# Revno 2: rename the second file so that it implicitly deletes the
# first one because either:
@@ -538,9 +1407,9 @@ class TestImportToPackRenameTricky(TestCaseForGenericProcessor):
author = ['', 'bugs@a.com', time.time(), time.timezone]
committer = ['', 'elmer@a.com', time.time(), time.timezone]
def files_one():
- yield commands.FileModifyCommand(path1, kind, False,
+ yield commands.FileModifyCommand(path1, kind_to_mode(kind, False),
None, "aaa")
- yield commands.FileModifyCommand(old_path2, kind, False,
+ yield commands.FileModifyCommand(old_path2, kind_to_mode(kind, False),
None, "bbb")
yield commands.CommitCommand('head', '1', author,
committer, "commit 1", None, [], files_one)
@@ -550,7 +1419,6 @@ class TestImportToPackRenameTricky(TestCaseForGenericProcessor):
committer, "commit 2", ":1", [], files_two)
return command_list
-
def test_rename_file_becomes_directory(self):
handler, branch = self.get_handler()
old_path2 = 'foo'
@@ -613,13 +1481,14 @@ class TestImportToPackRenameTricky(TestCaseForGenericProcessor):
class TestImportToPackCopy(TestCaseForGenericProcessor):
def file_command_iter(self, src_path, dest_path, kind='file'):
+
# Revno 1: create a file or symlink
# Revno 2: copy it
def command_list():
author = ['', 'bugs@a.com', time.time(), time.timezone]
committer = ['', 'elmer@a.com', time.time(), time.timezone]
def files_one():
- yield commands.FileModifyCommand(src_path, kind, False,
+ yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False),
None, "aaa")
yield commands.CommitCommand('head', '1', author,
committer, "commit 1", None, [], files_one)
@@ -700,79 +1569,344 @@ class TestImportToPackCopy(TestCaseForGenericProcessor):
self.assertSymlinkTarget(branch, revtree2, dest_path, "aaa")
-class TestImportToPackFileKinds(TestCaseForGenericProcessor):
+class TestImportToPackCopyNew(TestCaseForGenericProcessor):
+ """Test copy of a newly added file."""
- def get_command_iter(self, path, kind, content):
+ def file_command_iter(self, src_path, dest_path, kind='file'):
+
+ # Revno 1: create a file or symlink and copy it
def command_list():
+ author = ['', 'bugs@a.com', time.time(), time.timezone]
committer = ['', 'elmer@a.com', time.time(), time.timezone]
def files_one():
- yield commands.FileModifyCommand(path, kind, False,
- None, content)
- yield commands.CommitCommand('head', '1', None,
+ yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False),
+ None, "aaa")
+ yield commands.FileCopyCommand(src_path, dest_path)
+ yield commands.CommitCommand('head', '1', author,
committer, "commit 1", None, [], files_one)
return command_list
- def test_import_plainfile(self):
+ def test_copy_new_file_in_root(self):
handler, branch = self.get_handler()
- handler.process(self.get_command_iter('foo', 'file', 'aaa'))
+ src_path = 'a'
+ dest_path = 'b'
+ handler.process(self.file_command_iter(src_path, dest_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(src_path,), (dest_path,)])
+ self.assertContent(branch, revtree1, src_path, "aaa")
+ self.assertContent(branch, revtree1, dest_path, "aaa")
+ self.assertRevisionRoot(revtree1, src_path)
+ self.assertRevisionRoot(revtree1, dest_path)
- def test_import_symlink(self):
+ def test_copy_new_file_in_subdir(self):
handler, branch = self.get_handler()
- handler.process(self.get_command_iter('foo', 'symlink', 'bar'))
+ src_path = 'a/a'
+ dest_path = 'a/b'
+ handler.process(self.file_command_iter(src_path, dest_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('a',), (src_path,), (dest_path,)])
+ self.assertContent(branch, revtree1, src_path, "aaa")
+ self.assertContent(branch, revtree1, dest_path, "aaa")
+ def test_copy_new_file_to_new_dir(self):
+ handler, branch = self.get_handler()
+ src_path = 'a/a'
+ dest_path = 'b/a'
+ handler.process(self.file_command_iter(src_path, dest_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('a',), (src_path,), ('b',), (dest_path,)])
+ self.assertContent(branch, revtree1, src_path, "aaa")
+ self.assertContent(branch, revtree1, dest_path, "aaa")
-### TODO: Parameterise tests rather than below hack
+ def test_copy_new_symlink_in_root(self):
+ handler, branch = self.get_handler()
+ src_path = 'a'
+ dest_path = 'b'
+ handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(src_path,), (dest_path,)])
+ self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree1, dest_path, "aaa")
+ self.assertRevisionRoot(revtree1, src_path)
+ self.assertRevisionRoot(revtree1, dest_path)
-class TestImportToRichRootModify(TestImportToPackModify):
- branch_format = "1.9-rich-root"
+ def test_copy_new_symlink_in_subdir(self):
+ handler, branch = self.get_handler()
+ src_path = 'a/a'
+ dest_path = 'a/b'
+ handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('a',), (src_path,), (dest_path,)])
+ self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree1, dest_path, "aaa")
-class TestImportToRichRootModifyTricky(TestImportToPackModifyTricky):
- branch_format = "1.9-rich-root"
+ def test_copy_new_symlink_to_new_dir(self):
+ handler, branch = self.get_handler()
+ src_path = 'a/a'
+ dest_path = 'b/a'
+ handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('a',), (src_path,), ('b',), (dest_path,)])
+ self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree1, dest_path, "aaa")
-class TestImportToRichRootDelete(TestImportToPackDelete):
- branch_format = "1.9-rich-root"
-class TestImportToRichRootDeleteDirectory(TestImportToPackDeleteDirectory):
- branch_format = "1.9-rich-root"
+class TestImportToPackCopyToDeleted(TestCaseForGenericProcessor):
-class TestImportToRichRootRename(TestImportToPackRename):
- branch_format = "1.9-rich-root"
+ def file_command_iter(self, src_path, dest_path, kind='file'):
-class TestImportToRichRootRenameTricky(TestImportToPackRenameTricky):
- branch_format = "1.9-rich-root"
+ # Revno 1: create two files or symlinks
+ # Revno 2: delete one and copy the other one to its path
+ def command_list():
+ author = ['', 'bugs@a.com', time.time(), time.timezone]
+ committer = ['', 'elmer@a.com', time.time(), time.timezone]
+ def files_one():
+ yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False),
+ None, "aaa")
+ yield commands.FileModifyCommand(dest_path, kind_to_mode(kind, False),
+ None, "bbb")
+ yield commands.CommitCommand('head', '1', author,
+ committer, "commit 1", None, [], files_one)
+ def files_two():
+ yield commands.FileDeleteCommand(dest_path)
+ yield commands.FileCopyCommand(src_path, dest_path)
+ yield commands.CommitCommand('head', '2', author,
+ committer, "commit 2", ":1", [], files_two)
+ return command_list
-class TestImportToRichRootCopy(TestImportToPackCopy):
- branch_format = "1.9-rich-root"
+ def test_copy_to_deleted_file_in_root(self):
+ handler, branch = self.get_handler()
+ src_path = 'a'
+ dest_path = 'b'
+ handler.process(self.file_command_iter(src_path, dest_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(src_path,), (dest_path,)])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(dest_path,)],
+ expected_added=[(dest_path,)])
+ self.assertContent(branch, revtree1, src_path, "aaa")
+ self.assertContent(branch, revtree1, dest_path, "bbb")
+ self.assertContent(branch, revtree2, src_path, "aaa")
+ self.assertContent(branch, revtree2, dest_path, "aaa")
+ self.assertRevisionRoot(revtree1, src_path)
+ self.assertRevisionRoot(revtree1, dest_path)
-class TestImportToRichRootFileKinds(TestImportToPackFileKinds):
- branch_format = "1.9-rich-root"
+ def test_copy_to_deleted_symlink_in_root(self):
+ handler, branch = self.get_handler()
+ src_path = 'a'
+ dest_path = 'b'
+ handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[(src_path,), (dest_path,)])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(dest_path,)],
+ expected_added=[(dest_path,)])
+ self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree1, dest_path, "bbb")
+ self.assertSymlinkTarget(branch, revtree2, src_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree2, dest_path, "aaa")
+ self.assertRevisionRoot(revtree1, src_path)
+ self.assertRevisionRoot(revtree1, dest_path)
-try:
- from bzrlib.repofmt.groupcompress_repo import RepositoryFormatCHK1
+ def test_copy_to_deleted_file_in_subdir(self):
+ handler, branch = self.get_handler()
+ src_path = 'd/a'
+ dest_path = 'd/b'
+ handler.process(self.file_command_iter(src_path, dest_path))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d',), (src_path,), (dest_path,)])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(dest_path,)],
+ expected_added=[(dest_path,)])
+ self.assertContent(branch, revtree1, src_path, "aaa")
+ self.assertContent(branch, revtree1, dest_path, "bbb")
+ self.assertContent(branch, revtree2, src_path, "aaa")
+ self.assertContent(branch, revtree2, dest_path, "aaa")
+
+ def test_copy_to_deleted_symlink_in_subdir(self):
+ handler, branch = self.get_handler()
+ src_path = 'd/a'
+ dest_path = 'd/b'
+ handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+ revtree0, revtree1 = self.assertChanges(branch, 1,
+ expected_added=[('d',), (src_path,), (dest_path,)])
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_removed=[(dest_path,)],
+ expected_added=[(dest_path,)])
+ self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree1, dest_path, "bbb")
+ self.assertSymlinkTarget(branch, revtree2, src_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree2, dest_path, "aaa")
- class TestImportToChkModify(TestImportToPackModify):
- branch_format = "development6-rich-root"
- class TestImportToChkModifyTricky(TestImportToPackModifyTricky):
- branch_format = "development6-rich-root"
+class TestImportToPackCopyModified(TestCaseForGenericProcessor):
+ """Test copy of file/symlink already modified in this commit."""
- class TestImportToChkDelete(TestImportToPackDelete):
- branch_format = "development6-rich-root"
+ def file_command_iter(self, src_path, dest_path, kind='file'):
- class TestImportToChkDeleteDirectory(TestImportToPackDeleteDirectory):
- branch_format = "development6-rich-root"
+ # Revno 1: create a file or symlink
+ # Revno 2: modify and copy it
+ def command_list():
+ author = ['', 'bugs@a.com', time.time(), time.timezone]
+ committer = ['', 'elmer@a.com', time.time(), time.timezone]
+ def files_one():
+ yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False),
+ None, "aaa")
+ yield commands.CommitCommand('head', '1', author,
+ committer, "commit 1", None, [], files_one)
+ def files_two():
+ yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False),
+ None, "bbb")
+ yield commands.FileCopyCommand(src_path, dest_path)
+ yield commands.CommitCommand('head', '2', author,
+ committer, "commit 2", ":1", [], files_two)
+ return command_list
- class TestImportToChkRename(TestImportToPackRename):
- branch_format = "development6-rich-root"
+ def test_copy_of_modified_file_in_root(self):
+ handler, branch = self.get_handler()
+ src_path = 'a'
+ dest_path = 'b'
+ handler.process(self.file_command_iter(src_path, dest_path))
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_modified=[(src_path,)],
+ expected_added=[(dest_path,)])
+ self.assertContent(branch, revtree1, src_path, "aaa")
+ self.assertContent(branch, revtree2, src_path, "bbb")
+ self.assertContent(branch, revtree2, dest_path, "bbb")
+ self.assertRevisionRoot(revtree1, src_path)
+ self.assertRevisionRoot(revtree2, dest_path)
- class TestImportToChkRenameTricky(TestImportToPackRenameTricky):
- branch_format = "development6-rich-root"
+ def test_copy_of_modified_file_in_subdir(self):
+ handler, branch = self.get_handler()
+ src_path = 'd/a'
+ dest_path = 'd/b'
+ handler.process(self.file_command_iter(src_path, dest_path))
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_modified=[(src_path,)],
+ expected_added=[(dest_path,)])
+ self.assertContent(branch, revtree1, src_path, "aaa")
+ self.assertContent(branch, revtree2, src_path, "bbb")
+ self.assertContent(branch, revtree2, dest_path, "bbb")
- class TestImportToChkCopy(TestImportToPackCopy):
- branch_format = "development6-rich-root"
+ def test_copy_of_modified_file_to_new_dir(self):
+ handler, branch = self.get_handler()
+ src_path = 'd1/a'
+ dest_path = 'd2/a'
+ handler.process(self.file_command_iter(src_path, dest_path))
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_modified=[(src_path,)],
+ expected_added=[('d2',), (dest_path,)])
+ self.assertContent(branch, revtree1, src_path, "aaa")
+ self.assertContent(branch, revtree2, src_path, "bbb")
+ self.assertContent(branch, revtree2, dest_path, "bbb")
- class TestImportToChkFileKinds(TestImportToPackFileKinds):
- branch_format = "development6-rich-root"
+ def test_copy_of_modified_symlink_in_root(self):
+ handler, branch = self.get_handler()
+ src_path = 'a'
+ dest_path = 'b'
+ handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_modified=[(src_path,)],
+ expected_added=[(dest_path,)])
+ self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree2, src_path, "bbb")
+ self.assertSymlinkTarget(branch, revtree2, dest_path, "bbb")
+ self.assertRevisionRoot(revtree1, src_path)
+ self.assertRevisionRoot(revtree2, dest_path)
-except ImportError:
- pass
+ def test_copy_of_modified_symlink_in_subdir(self):
+ handler, branch = self.get_handler()
+ src_path = 'd/a'
+ dest_path = 'd/b'
+ handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_modified=[(src_path,)],
+ expected_added=[(dest_path,)])
+ self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree2, src_path, "bbb")
+ self.assertSymlinkTarget(branch, revtree2, dest_path, "bbb")
+
+ def test_copy_of_modified_symlink_to_new_dir(self):
+ handler, branch = self.get_handler()
+ src_path = 'd1/a'
+ dest_path = 'd2/a'
+ handler.process(self.file_command_iter(src_path, dest_path, 'symlink'))
+ revtree1, revtree2 = self.assertChanges(branch, 2,
+ expected_modified=[(src_path,)],
+ expected_added=[('d2',), (dest_path,)])
+ self.assertSymlinkTarget(branch, revtree1, src_path, "aaa")
+ self.assertSymlinkTarget(branch, revtree2, src_path, "bbb")
+ self.assertSymlinkTarget(branch, revtree2, dest_path, "bbb")
+
+
+class TestImportToPackFileKinds(TestCaseForGenericProcessor):
+
+ def get_command_iter(self, path, kind, content):
+
+ def command_list():
+ committer = ['', 'elmer@a.com', time.time(), time.timezone]
+ def files_one():
+ yield commands.FileModifyCommand(path, kind_to_mode(kind, False),
+ None, content)
+ yield commands.CommitCommand('head', '1', None,
+ committer, "commit 1", None, [], files_one)
+ return command_list
+
+ def test_import_plainfile(self):
+ handler, branch = self.get_handler()
+ handler.process(self.get_command_iter('foo', 'file', 'aaa'))
+
+ def test_import_symlink(self):
+ handler, branch = self.get_handler()
+ handler.process(self.get_command_iter('foo', 'symlink', 'bar'))
+
+
+class TestModifyRevertInBranch(TestCaseForGenericProcessor):
+
+ def file_command_iter(self):
+ # A add 'foo'
+ # |\
+ # | B modify 'foo'
+ # | |
+ # | C revert 'foo' back to A
+ # |/
+ # D merge 'foo'
+ def command_list():
+ committer_a = ['', 'a@elmer.com', time.time(), time.timezone]
+ committer_b = ['', 'b@elmer.com', time.time(), time.timezone]
+ committer_c = ['', 'c@elmer.com', time.time(), time.timezone]
+ committer_d = ['', 'd@elmer.com', time.time(), time.timezone]
+ def files_one():
+ yield commands.FileModifyCommand('foo', kind_to_mode('file', False),
+ None, "content A\n")
+ yield commands.CommitCommand('head', '1', None,
+ committer_a, "commit 1", None, [], files_one)
+ def files_two():
+ yield commands.FileModifyCommand('foo', kind_to_mode('file', False),
+ None, "content B\n")
+ yield commands.CommitCommand('head', '2', None,
+ committer_b, "commit 2", ":1", [], files_two)
+ def files_three():
+ yield commands.FileModifyCommand('foo', kind_to_mode('file', False),
+ None, "content A\n")
+ yield commands.CommitCommand('head', '3', None,
+ committer_c, "commit 3", ":2", [], files_three)
+ yield commands.CommitCommand('head', '4', None,
+ committer_d, "commit 4", ":1", [':3'], lambda: [])
+ return command_list
+
+ def test_modify_revert(self):
+ handler, branch = self.get_handler()
+ handler.process(self.file_command_iter())
+ branch.lock_read()
+ self.addCleanup(branch.unlock)
+ rev_d = branch.last_revision()
+ rev_a, rev_c = branch.repository.get_parent_map([rev_d])[rev_d]
+ rev_b = branch.repository.get_parent_map([rev_c])[rev_c][0]
+ rtree_a, rtree_b, rtree_c, rtree_d = branch.repository.revision_trees([
+ rev_a, rev_b, rev_c, rev_d])
+ foo_id = rtree_a.path2id('foo')
+ self.assertEqual(rev_a, rtree_a.inventory[foo_id].revision)
+ self.assertEqual(rev_b, rtree_b.inventory[foo_id].revision)
+ self.assertEqual(rev_c, rtree_c.inventory[foo_id].revision)
+ self.assertEqual(rev_c, rtree_d.inventory[foo_id].revision)
diff --git a/tests/test_head_tracking.py b/tests/test_head_tracking.py
deleted file mode 100644
index 63712e0..0000000
--- a/tests/test_head_tracking.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (C) 2009 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Test tracking of heads"""
-
-from cStringIO import StringIO
-
-from bzrlib import tests
-
-from bzrlib.plugins.fastimport import (
- commands,
- parser,
- )
-from bzrlib.plugins.fastimport.cache_manager import CacheManager
-
-
-# A sample input stream that only adds files to a branch
-_SAMPLE_MAINLINE = \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/master
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :2 NEWS
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :101
-M 644 :3 doc/README.txt
-M 644 :4 doc/index.txt
-"""
-
-# A sample input stream that adds files to two branches
-_SAMPLE_TWO_HEADS = \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/mybranch
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :2 NEWS
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 doc/README.txt
-M 644 :4 doc/index.txt
-"""
-
-# A sample input stream that adds files to two branches
-_SAMPLE_TWO_BRANCHES_MERGED = \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-blob
-mark :2
-data 17
-Life
-is
-good ...
-commit refs/heads/mybranch
-mark :101
-committer a <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :2 NEWS
-blob
-mark :3
-data 19
-Welcome!
-my friend
-blob
-mark :4
-data 11
-== Docs ==
-commit refs/heads/master
-mark :102
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-M 644 :3 doc/README.txt
-M 644 :4 doc/index.txt
-commit refs/heads/master
-mark :103
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :102
-merge :101
-D doc/index.txt
-"""
-
-# A sample input stream that contains a reset
-_SAMPLE_RESET = \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-reset refs/remotes/origin/master
-from :100
-"""
-
-# A sample input stream that contains a reset and more commits
-_SAMPLE_RESET_WITH_MORE_COMMITS = \
-"""blob
-mark :1
-data 9
-Welcome!
-commit refs/heads/master
-mark :100
-committer a <b@c> 1234798653 +0000
-data 4
-test
-M 644 :1 doc/README.txt
-reset refs/remotes/origin/master
-from :100
-commit refs/remotes/origin/master
-mark :101
-committer d <b@c> 1234798653 +0000
-data 8
-test
-ing
-from :100
-D doc/README.txt
-"""
-
-class TestHeadTracking(tests.TestCase):
-
- def assertHeads(self, input, expected):
- s = StringIO(input)
- p = parser.ImportParser(s)
- cm = CacheManager()
- for cmd in p.iter_commands():
- if isinstance(cmd, commands.CommitCommand):
- cm.track_heads(cmd)
- # eat the file commands
- list(cmd.file_iter())
- elif isinstance(cmd, commands.ResetCommand):
- if cmd.from_ is not None:
- cm.track_heads_for_ref(cmd.ref, cmd.from_)
- self.assertEqual(cm.heads, expected)
-
- def test_mainline(self):
- self.assertHeads(_SAMPLE_MAINLINE, {
- ':102': set(['refs/heads/master']),
- })
-
- def test_two_heads(self):
- self.assertHeads(_SAMPLE_TWO_HEADS, {
- ':101': set(['refs/heads/mybranch']),
- ':102': set(['refs/heads/master']),
- })
-
- def test_two_branches_merged(self):
- self.assertHeads(_SAMPLE_TWO_BRANCHES_MERGED, {
- ':103': set(['refs/heads/master']),
- })
-
- def test_reset(self):
- self.assertHeads(_SAMPLE_RESET, {
- ':100': set(['refs/heads/master', 'refs/remotes/origin/master']),
- })
-
- def test_reset_with_more_commits(self):
- self.assertHeads(_SAMPLE_RESET_WITH_MORE_COMMITS, {
- ':101': set(['refs/remotes/origin/master']),
- })
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
deleted file mode 100644
index 89009d1..0000000
--- a/tests/test_helpers.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (C) 2009 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Test the helper functions."""
-
-from bzrlib import tests
-
-from bzrlib.plugins.fastimport import (
- helpers,
- )
-
-
-class TestCommonDirectory(tests.TestCase):
-
- def test_no_paths(self):
- c = helpers.common_directory(None)
- self.assertEqual(c, None)
- c = helpers.common_directory([])
- self.assertEqual(c, None)
-
- def test_one_path(self):
- c = helpers.common_directory(['foo'])
- self.assertEqual(c, '')
- c = helpers.common_directory(['foo/'])
- self.assertEqual(c, 'foo/')
- c = helpers.common_directory(['foo/bar'])
- self.assertEqual(c, 'foo/')
-
- def test_two_paths(self):
- c = helpers.common_directory(['foo', 'bar'])
- self.assertEqual(c, '')
- c = helpers.common_directory(['foo/', 'bar'])
- self.assertEqual(c, '')
- c = helpers.common_directory(['foo/', 'foo/bar'])
- self.assertEqual(c, 'foo/')
- c = helpers.common_directory(['foo/bar/x', 'foo/bar/y'])
- self.assertEqual(c, 'foo/bar/')
- c = helpers.common_directory(['foo/bar/aa_x', 'foo/bar/aa_y'])
- self.assertEqual(c, 'foo/bar/')
-
- def test_lots_of_paths(self):
- c = helpers.common_directory(['foo/bar/x', 'foo/bar/y', 'foo/bar/z'])
- self.assertEqual(c, 'foo/bar/')
diff --git a/tests/test_parser.py b/tests/test_parser.py
deleted file mode 100644
index 91e27f0..0000000
--- a/tests/test_parser.py
+++ /dev/null
@@ -1,212 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Test the Import parsing"""
-
-import StringIO
-
-from bzrlib import tests
-
-from bzrlib.plugins.fastimport import (
- errors,
- parser,
- )
-
-
-class TestLineBasedParser(tests.TestCase):
-
- def test_push_line(self):
- s = StringIO.StringIO("foo\nbar\nbaz\n")
- p = parser.LineBasedParser(s)
- self.assertEqual('foo', p.next_line())
- self.assertEqual('bar', p.next_line())
- p.push_line('bar')
- self.assertEqual('bar', p.next_line())
- self.assertEqual('baz', p.next_line())
- self.assertEqual(None, p.next_line())
-
- def test_read_bytes(self):
- s = StringIO.StringIO("foo\nbar\nbaz\n")
- p = parser.LineBasedParser(s)
- self.assertEqual('fo', p.read_bytes(2))
- self.assertEqual('o\nb', p.read_bytes(3))
- self.assertEqual('ar', p.next_line())
- # Test that the line buffer is ignored
- p.push_line('bar')
- self.assertEqual('baz', p.read_bytes(3))
- # Test missing bytes
- self.assertRaises(errors.MissingBytes, p.read_bytes, 10)
-
- def test_read_until(self):
- # TODO
- return
- s = StringIO.StringIO("foo\nbar\nbaz\nabc\ndef\nghi\n")
- p = parser.LineBasedParser(s)
- self.assertEqual('foo\nbar', p.read_until('baz'))
- self.assertEqual('abc', p.next_line())
- # Test that the line buffer is ignored
- p.push_line('abc')
- self.assertEqual('def', p.read_until('ghi'))
- # Test missing terminator
- self.assertRaises(errors.MissingTerminator, p.read_until('>>>'))
-
-
-# Sample text
-_sample_import_text = """
-progress completed
-# Test blob formats
-blob
-mark :1
-data 4
-aaaablob
-data 5
-bbbbb
-# Commit formats
-commit refs/heads/master
-mark :2
-committer bugs bunny <bugs@bunny.org> now
-data 14
-initial import
-M 644 inline README
-data 18
-Welcome from bugs
-commit refs/heads/master
-committer <bugs@bunny.org> now
-data 13
-second commit
-from :2
-M 644 inline README
-data 23
-Welcome from bugs, etc.
-# Miscellaneous
-checkpoint
-progress completed
-# Test a commit without sub-commands (bug #351717)
-commit refs/heads/master
-mark :3
-author <bugs@bunny.org> now
-committer <bugs@bunny.org> now
-data 20
-first commit, empty
-# Test a commit with a heredoc-style (delimited_data) messsage (bug #400960)
-commit refs/heads/master
-mark :4
-author <bugs@bunny.org> now
-committer <bugs@bunny.org> now
-data <<EOF
-Commit with heredoc-style message
-EOF
-"""
-
-
-class TestImportParser(tests.TestCase):
-
- def test_iter_commands(self):
- s = StringIO.StringIO(_sample_import_text)
- p = parser.ImportParser(s)
- result = []
- for cmd in p.iter_commands():
- result.append(cmd)
- if cmd.name == 'commit':
- for fc in cmd.file_iter():
- result.append(fc)
- self.assertEqual(len(result), 11)
- cmd1 = result.pop(0)
- self.assertEqual('progress', cmd1.name)
- self.assertEqual('completed', cmd1.message)
- cmd2 = result.pop(0)
- self.assertEqual('blob', cmd2.name)
- self.assertEqual('1', cmd2.mark)
- self.assertEqual(':1', cmd2.id)
- self.assertEqual('aaaa', cmd2.data)
- self.assertEqual(4, cmd2.lineno)
- cmd3 = result.pop(0)
- self.assertEqual('blob', cmd3.name)
- self.assertEqual('@7', cmd3.id)
- self.assertEqual(None, cmd3.mark)
- self.assertEqual('bbbbb', cmd3.data)
- self.assertEqual(7, cmd3.lineno)
- cmd4 = result.pop(0)
- self.assertEqual('commit', cmd4.name)
- self.assertEqual('2', cmd4.mark)
- self.assertEqual(':2', cmd4.id)
- self.assertEqual('initial import', cmd4.message)
- self.assertEqual('bugs bunny', cmd4.committer[0])
- self.assertEqual('bugs@bunny.org', cmd4.committer[1])
- # FIXME: check timestamp and timezone as well
- self.assertEqual(None, cmd4.author)
- self.assertEqual(11, cmd4.lineno)
- self.assertEqual('refs/heads/master', cmd4.ref)
- self.assertEqual(None, cmd4.from_)
- self.assertEqual([], cmd4.merges)
- file_cmd1 = result.pop(0)
- self.assertEqual('filemodify', file_cmd1.name)
- self.assertEqual('README', file_cmd1.path)
- self.assertEqual('file', file_cmd1.kind)
- self.assertEqual(False, file_cmd1.is_executable)
- self.assertEqual('Welcome from bugs\n', file_cmd1.data)
- cmd5 = result.pop(0)
- self.assertEqual('commit', cmd5.name)
- self.assertEqual(None, cmd5.mark)
- self.assertEqual('@19', cmd5.id)
- self.assertEqual('second commit', cmd5.message)
- self.assertEqual('', cmd5.committer[0])
- self.assertEqual('bugs@bunny.org', cmd5.committer[1])
- # FIXME: check timestamp and timezone as well
- self.assertEqual(None, cmd5.author)
- self.assertEqual(19, cmd5.lineno)
- self.assertEqual('refs/heads/master', cmd5.ref)
- self.assertEqual(':2', cmd5.from_)
- self.assertEqual([], cmd5.merges)
- file_cmd2 = result.pop(0)
- self.assertEqual('filemodify', file_cmd2.name)
- self.assertEqual('README', file_cmd2.path)
- self.assertEqual('file', file_cmd2.kind)
- self.assertEqual(False, file_cmd2.is_executable)
- self.assertEqual('Welcome from bugs, etc.', file_cmd2.data)
- cmd6 = result.pop(0)
- self.assertEqual(cmd6.name, 'checkpoint')
- cmd7 = result.pop(0)
- self.assertEqual('progress', cmd7.name)
- self.assertEqual('completed', cmd7.message)
- cmd = result.pop(0)
- self.assertEqual('commit', cmd.name)
- self.assertEqual('3', cmd.mark)
- self.assertEqual(None, cmd.from_)
- cmd = result.pop(0)
- self.assertEqual('commit', cmd.name)
- self.assertEqual('4', cmd.mark)
- self.assertEqual('Commit with heredoc-style message\n', cmd.message)
-
-
-class TestStringParsing(tests.TestCase):
-
- def test_unquote(self):
- s = r'hello \"sweet\" wo\\r\tld'
- self.assertEquals(r'hello "sweet" wo\r' + "\tld",
- parser._unquote_c_string(s))
-
-
-class TestPathPairParsing(tests.TestCase):
-
- def test_path_pair_simple(self):
- p = parser.ImportParser("")
- self.assertEqual(['foo', 'bar'], p._path_pair("foo bar"))
-
- def test_path_pair_spaces_in_first(self):
- p = parser.ImportParser("")
- self.assertEqual(['foo bar', 'baz'],
- p._path_pair('"foo bar" baz'))
diff --git a/tests/test_revision_store.py b/tests/test_revision_store.py
new file mode 100644
index 0000000..9e39254
--- /dev/null
+++ b/tests/test_revision_store.py
@@ -0,0 +1,152 @@
+# Copyright (C) 2008, 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Direct tests of the revision_store classes."""
+
+from bzrlib import (
+ branch,
+ errors,
+ inventory,
+ osutils,
+ tests,
+ )
+
+from bzrlib.plugins.fastimport import (
+ revision_store,
+ )
+from bzrlib.plugins.fastimport.tests import (
+ FastimportFeature,
+ )
+
+
+class Test_TreeShim(tests.TestCase):
+
+ _test_needs_features = [FastimportFeature]
+
+ def invAddEntry(self, inv, path, file_id=None):
+ if path.endswith('/'):
+ path = path[:-1]
+ kind = 'directory'
+ else:
+ kind = 'file'
+ parent_path, basename = osutils.split(path)
+ parent_id = inv.path2id(parent_path)
+ inv.add(inventory.make_entry(kind, basename, parent_id, file_id))
+
+ def make_trivial_basis_inv(self):
+ basis_inv = inventory.Inventory('TREE_ROOT')
+ self.invAddEntry(basis_inv, 'foo', 'foo-id')
+ self.invAddEntry(basis_inv, 'bar/', 'bar-id')
+ self.invAddEntry(basis_inv, 'bar/baz', 'baz-id')
+ return basis_inv
+
+ def test_id2path_no_delta(self):
+ basis_inv = self.make_trivial_basis_inv()
+ shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+ inv_delta=[], content_provider=None)
+ self.assertEqual('', shim.id2path('TREE_ROOT'))
+ self.assertEqual('foo', shim.id2path('foo-id'))
+ self.assertEqual('bar', shim.id2path('bar-id'))
+ self.assertEqual('bar/baz', shim.id2path('baz-id'))
+ self.assertRaises(errors.NoSuchId, shim.id2path, 'qux-id')
+
+ def test_id2path_with_delta(self):
+ basis_inv = self.make_trivial_basis_inv()
+ foo_entry = inventory.make_entry('file', 'foo2', 'TREE_ROOT', 'foo-id')
+ inv_delta = [('foo', 'foo2', 'foo-id', foo_entry),
+ ('bar/baz', None, 'baz-id', None),
+ ]
+
+ shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+ inv_delta=inv_delta,
+ content_provider=None)
+ self.assertEqual('', shim.id2path('TREE_ROOT'))
+ self.assertEqual('foo2', shim.id2path('foo-id'))
+ self.assertEqual('bar', shim.id2path('bar-id'))
+ self.assertRaises(errors.NoSuchId, shim.id2path, 'baz-id')
+
+ def test_path2id(self):
+ basis_inv = self.make_trivial_basis_inv()
+ shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+ inv_delta=[], content_provider=None)
+ self.assertEqual('TREE_ROOT', shim.path2id(''))
+ # We don't want to ever give a wrong value, so for now we just raise
+ # NotImplementedError
+ self.assertRaises(NotImplementedError, shim.path2id, 'bar')
+
+ def test_get_file_with_stat_content_in_stream(self):
+ basis_inv = self.make_trivial_basis_inv()
+
+ def content_provider(file_id):
+ return 'content of\n' + file_id + '\n'
+
+ shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+ inv_delta=[],
+ content_provider=content_provider)
+ f_obj, stat_val = shim.get_file_with_stat('baz-id')
+ self.assertIs(None, stat_val)
+ self.assertEqualDiff('content of\nbaz-id\n', f_obj.read())
+
+ # TODO: Test when the content isn't in the stream, and we fall back to the
+ # repository that was passed in
+
+ def test_get_symlink_target(self):
+ basis_inv = self.make_trivial_basis_inv()
+ ie = inventory.make_entry('symlink', 'link', 'TREE_ROOT', 'link-id')
+ ie.symlink_target = u'link-target'
+ basis_inv.add(ie)
+ shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+ inv_delta=[], content_provider=None)
+ self.assertEqual(u'link-target', shim.get_symlink_target('link-id'))
+
+ def test_get_symlink_target_from_delta(self):
+ basis_inv = self.make_trivial_basis_inv()
+ ie = inventory.make_entry('symlink', 'link', 'TREE_ROOT', 'link-id')
+ ie.symlink_target = u'link-target'
+ inv_delta = [(None, 'link', 'link-id', ie)]
+ shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+ inv_delta=inv_delta,
+ content_provider=None)
+ self.assertEqual(u'link-target', shim.get_symlink_target('link-id'))
+
+ def test__delta_to_iter_changes(self):
+ basis_inv = self.make_trivial_basis_inv()
+ foo_entry = inventory.make_entry('file', 'foo2', 'bar-id', 'foo-id')
+ link_entry = inventory.make_entry('symlink', 'link', 'TREE_ROOT',
+ 'link-id')
+ link_entry.symlink_target = u'link-target'
+ inv_delta = [('foo', 'bar/foo2', 'foo-id', foo_entry),
+ ('bar/baz', None, 'baz-id', None),
+ (None, 'link', 'link-id', link_entry),
+ ]
+ shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+ inv_delta=inv_delta,
+ content_provider=None)
+ changes = list(shim._delta_to_iter_changes())
+ expected = [('foo-id', ('foo', 'bar/foo2'), False, (True, True),
+ ('TREE_ROOT', 'bar-id'), ('foo', 'foo2'),
+ ('file', 'file'), (False, False)),
+ ('baz-id', ('bar/baz', None), True, (True, False),
+ ('bar-id', None), ('baz', None),
+ ('file', None), (False, None)),
+ ('link-id', (None, 'link'), True, (False, True),
+ (None, 'TREE_ROOT'), (None, 'link'),
+ (None, 'symlink'), (None, False)),
+ ]
+ # from pprint import pformat
+ # self.assertEqualDiff(pformat(expected), pformat(changes))
+ self.assertEqual(expected, changes)
+
diff --git a/user_mapper.py b/user_mapper.py
new file mode 100644
index 0000000..4fcf4a4
--- /dev/null
+++ b/user_mapper.py
@@ -0,0 +1,81 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+from email import Utils
+
+
+class UserMapper(object):
+
+ def __init__(self, lines):
+ """Create a user-mapper from a list of lines.
+
+ Blank lines and comment lines (starting with #) are ignored.
+ Otherwise lines are of the form:
+
+ old-id = new-id
+
+ Each id may be in the following forms:
+
+ name <email>
+ name
+
+ If old-id has the value '@', then new-id is the domain to use
+ when generating an email from a user-id.
+ """
+ self._parse(lines)
+
+ def _parse(self, lines):
+ self._user_map = {}
+ self._default_domain = None
+ for line in lines:
+ line = line.strip()
+ if len(line) == 0 or line.startswith('#'):
+ continue
+ old, new = line.split('=', 1)
+ old = old.strip()
+ new = new.strip()
+ if old == '@':
+ self._default_domain = new
+ continue
+ # Parse each id into a name and email address
+ old_name, old_email = self._parse_id(old)
+ new_name, new_email = self._parse_id(new)
+ #print "found user map: %s => %s" % ((old_name, old_email), (new_name, new_email))
+ self._user_map[(old_name, old_email)] = (new_name, new_email)
+
+ def _parse_id(self, id):
+ if id.find('<') == -1:
+ return id, None
+ else:
+ return Utils.parseaddr(id)
+
+ def map_name_and_email(self, name, email):
+ """Map a name and an email to the preferred name and email.
+
+ :param name: the current name
+ :param email: the current email
+ :result: the preferred name and email
+ """
+ try:
+ new_name, new_email = self._user_map[(name, email)]
+ except KeyError:
+ new_name = name
+ if self._default_domain and not email:
+ new_email = "%s@%s" % (name, self._default_domain)
+ else:
+ new_email = email
+ #print "converted '%s <%s>' to '%s <%s>'" % (name, email, new_name, new_email)
+ return new_name, new_email