diff options
66 files changed, 4356 insertions, 5574 deletions
@@ -1 +1,5 @@ build +# executables +exporters/svn-archive +exporters/svn-fast-export +.testrepository diff --git a/.testr.conf b/.testr.conf new file mode 100644 index 0000000..cc509ce --- /dev/null +++ b/.testr.conf @@ -0,0 +1,4 @@ +[DEFAULT] +test_command=BZR_PLUGINS_AT=fastimport@. bzr selftest "^bzrlib.plugins.fastimport" --subunit $IDOPTION $LISTOPT +test_id_option=--load-list $IDFILE +test_list_option=--list @@ -4,11 +4,32 @@ bzr-fastimport Release Notes .. contents:: -In Development -============== +0.10 UNRELEASED -Compatability Breaks --------------------- +Changes +------- + +* bzr-fastimport's file parsing and generation functionality has been exported into + separate upstream project called python-fastimport, that it now depends on. + python-fastimport can be retrieved from http://launchpad.net/python-fastimport. + +Bug fixes +--------- + +* Cope with non-ascii characters in tag names. (Jelmer Vernooij, #598060) + +* Cope with non-ascii characters in symbolic links. (Daniel Clemente, + Jelmer Vernooij, #238365) + +* In plain mode, don't export multiple authors. (David Kilzer, #496677) + +* Fix indentation when starting p4 fails. (Jelmer Vernooij) + +* SOURCE argument to bzr fast-import-filter is now optional, consistent with + examples. (Jelmer Vernooij, #477861) + +0.9 28-Feb-2010 +=============== New Features ------------ @@ -35,9 +56,30 @@ New Features merged into this one for the purposes of ongoing bug fixing and development. (Miklos Vajna) +* fast-export now supports a --no-plain parameter which causes + richer metadata to be included in the output using the + recently agreed 'feature' extension capability. The features + used are called multiple-authors, commit-properties and + empty-directories. (Ian Clatworthy) + +* fast-import and fast-import-filter now support user mapping + via the new --user-map option. The argument is a file specifying + how user-ids should be mapped to preferred user-ids. + (Ian Clatworthy) + +* svn-fast-export now supports an address option (to control the + default email domain) and a first-rev option (to select revisions + since a given one). (Ted Gould) + Improvements ------------ +* Large repositories now compress better thanks to a change in + how file-ids are assigned. (Ian Clatworthy, John Arbash Meinel) + +* Memory usage is improved by flushing blobs to a disk cache + when appropriate. (John Arbash Meinel) + * If a fast-import source ends in ".gz", it is assumed to be in gzip format and the stream is implicitly uncompressed. This means fast-import dump files generated by fast-export-from-xxx @@ -50,7 +92,7 @@ Improvements * Directories that become empty following a delete or rename of one of their children are now implicitly pruned. If required, this will be made optional at a later date. - (Ian Clatworthy) + (Tom Widmer, Ian Clatworthy) * Blob tracking is now more intelligently done by an implicit first pass to collect blob usage statistics. This eliminates @@ -79,6 +121,9 @@ Bug Fixes * Gracefully handle an empty input stream. (Gonéri Le Bouder) +* Gracefully handle git submodules by ignoring them. + (Ian Clatworthy) + * Get git-bzr working again. (Gonéri Le Bouder) Documentation @@ -87,12 +132,6 @@ Documentation * Improved documentation has been published in the Bazaar Data Migration Guide: http://doc.bazaar-vcs.org/migration/en/data-migration/. -Testing -------- - -Internals ---------- - 0.8 22-Jul-2009 =============== @@ -8,7 +8,9 @@ Required and recommended packages are: * Python 2.4 or later -* Bazaar 1.1 or later. +* Python-Fastimport 0.9.0 or later. + +* Bazaar 1.18 or later. Installation diff --git a/__init__.py b/__init__.py index 8ba91fc..61e14c6 100644 --- a/__init__.py +++ b/__init__.py @@ -55,7 +55,7 @@ online help for the individual commands for details:: bzr help fast-export-from-darcs bzr help fast-export-from-hg bzr help fast-export-from-git - bzr help fast-export-from-mnt + bzr help fast-export-from-mtn bzr help fast-export-from-p4 bzr help fast-export-from-svn @@ -79,11 +79,18 @@ To report bugs or publish enhancements, visit the bzr-fastimport project page on Launchpad, https://launchpad.net/bzr-fastimport. """ -version_info = (0, 9, 0, 'dev', 0) +version_info = (0, 10, 0, 'dev', 0) -from bzrlib import bzrdir -from bzrlib.commands import Command, register_command -from bzrlib.option import Option, ListOption, RegistryOption +from bzrlib.commands import plugin_cmds + + +def load_fastimport(): + """Load the fastimport module or raise an appropriate exception.""" + try: + import fastimport + except ImportError, e: + from bzrlib.errors import DependencyNotPresent + raise DependencyNotPresent("fastimport", e) def test_suite(): @@ -91,726 +98,18 @@ def test_suite(): return tests.test_suite() -def _run(source, processor_factory, control, params, verbose): - """Create and run a processor. - - :param source: a filename or '-' for standard input. If the - filename ends in .gz, it will be opened as a gzip file and - the stream will be implicitly uncompressed - :param processor_factory: a callable for creating a processor - :param control: the BzrDir of the destination or None if no - destination is expected - """ - import parser - stream = _get_source_stream(source) - proc = processor_factory(control, params=params, verbose=verbose) - p = parser.ImportParser(stream, verbose=verbose) - return proc.process(p.iter_commands) - - -def _get_source_stream(source): - if source == '-': - import sys - stream = helpers.binary_stream(sys.stdin) - elif source.endswith('.gz'): - import gzip - stream = gzip.open(source, "rb") - else: - stream = open(source, "rb") - return stream - - -class cmd_fast_import(Command): - """Backend for fast Bazaar data importers. - - This command reads a mixed command/data stream and creates - branches in a Bazaar repository accordingly. The preferred - recipe is:: - - bzr fast-import project.fi project.bzr - - Numerous commands are provided for generating a fast-import file - to use as input. These are named fast-export-from-xxx where xxx - is one of cvs, darcs, git, hg, mnt, p4 or svn. - To specify standard input as the input stream, use a - source name of '-' (instead of project.fi). If the source name - ends in '.gz', it is assumed to be compressed in gzip format. - - project.bzr will be created if it doesn't exist. If it exists - already, it should be empty or be an existing Bazaar repository - or branch. If not specified, the current directory is assumed. - - fast-import will intelligently select the format to use when - creating a repository or branch. If you are running Bazaar 1.17 - up to Bazaar 2.0, the default format for Bazaar 2.x ("2a") is used. - Otherwise, the current default format ("pack-0.92" for Bazaar 1.x) - is used. If you wish to specify a custom format, use the `--format` - option. - - .. note:: - - To maintain backwards compatibility, fast-import lets you - create the target repository or standalone branch yourself. - It is recommended though that you let fast-import create - these for you instead. - - :Branch mapping rules: - - Git reference names are mapped to Bazaar branch names as follows: - - * refs/heads/foo is mapped to foo - * refs/remotes/origin/foo is mapped to foo.remote - * refs/tags/foo is mapped to foo.tag - * */master is mapped to trunk, trunk.remote, etc. - * */trunk is mapped to git-trunk, git-trunk.remote, etc. - - :Branch creation rules: - - When a shared repository is created or found at the destination, - branches are created inside it. In the simple case of a single - branch (refs/heads/master) inside the input file, the branch is - project.bzr/trunk. - - When a standalone branch is found at the destination, the trunk - is imported there and warnings are output about any other branches - found in the input file. - - When a branch in a shared repository is found at the destination, - that branch is made the trunk and other branches, if any, are - created in sister directories. - - :Working tree updates: - - The working tree is generated for the trunk branch. If multiple - branches are created, a message is output on completion explaining - how to create the working trees for other branches. - - :Custom exporters: - - The fast-export-from-xxx commands typically call more advanced - xxx-fast-export scripts. You are welcome to use the advanced - scripts if you prefer. - - If you wish to write a custom exporter for your project, see - http://bazaar-vcs.org/BzrFastImport for the detailed protocol - specification. In many cases, exporters can be written quite - quickly using whatever scripting/programming language you like. - - :Blob tracking: - - As some exporters (like git-fast-export) reuse blob data across - commits, fast-import makes two passes over the input file by - default. In the first pass, it collects data about what blobs are - used when, along with some other statistics (e.g. total number of - commits). In the second pass, it generates the repository and - branches. - - .. note:: - - The initial pass isn't done if the --info option is used - to explicitly pass in information about the input stream. - It also isn't done if the source is standard input. In the - latter case, memory consumption may be higher than otherwise - because some blobs may be kept in memory longer than necessary. - - :Restarting an import: - - At checkpoints and on completion, the commit-id -> revision-id - map is saved to a file called 'fastimport-id-map' in the control - directory for the repository (e.g. .bzr/repository). If the import - is interrupted or unexpectedly crashes, it can be started again - and this file will be used to skip over already loaded revisions. - As long as subsequent exports from the original source begin - with exactly the same revisions, you can use this feature to - maintain a mirror of a repository managed by a foreign tool. - If and when Bazaar is used to manage the repository, this file - can be safely deleted. - - :Examples: - - Import a Subversion repository into Bazaar:: - - bzr fast-export-from-svn /svn/repo/path project.fi - bzr fast-import project.fi project.bzr - - Import a CVS repository into Bazaar:: - - bzr fast-export-from-cvs /cvs/repo/path project.fi - bzr fast-import project.fi project.bzr - - Import a Git repository into Bazaar:: - - bzr fast-export-from-git /git/repo/path project.fi - bzr fast-import project.fi project.bzr - - Import a Mercurial repository into Bazaar:: - - bzr fast-export-from-hg /hg/repo/path project.fi - bzr fast-import project.fi project.bzr - - Import a Darcs repository into Bazaar:: - - bzr fast-export-from-darcs /darcs/repo/path project.fi - bzr fast-import project.fi project.bzr - """ - hidden = False - _see_also = ['fast-export', 'fast-import-filter', 'fast-import-info'] - takes_args = ['source', 'destination?'] - takes_options = ['verbose', - Option('info', type=str, - help="Path to file containing caching hints.", - ), - Option('trees', - help="Update all working trees, not just trunk's.", - ), - Option('count', type=int, - help="Import this many revisions then exit.", - ), - Option('checkpoint', type=int, - help="Checkpoint automatically every N revisions." - " The default is 10000.", - ), - Option('autopack', type=int, - help="Pack every N checkpoints. The default is 4.", - ), - Option('inv-cache', type=int, - help="Number of inventories to cache.", - ), - RegistryOption.from_kwargs('mode', - 'The import algorithm to use.', - title='Import Algorithm', - default='Use the preferred algorithm (inventory deltas).', - classic="Use the original algorithm (mutable inventories).", - experimental="Enable experimental features.", - value_switches=True, enum_switch=False, - ), - Option('import-marks', type=str, - help="Import marks from file." - ), - Option('export-marks', type=str, - help="Export marks to file." - ), - RegistryOption('format', - help='Specify a format for the created repository. See' - ' "bzr help formats" for details.', - lazy_registry=('bzrlib.bzrdir', 'format_registry'), - converter=lambda name: bzrdir.format_registry.make_bzrdir(name), - value_switches=False, title='Repository format'), - ] - aliases = [] - def run(self, source, destination='.', verbose=False, info=None, - trees=False, count=-1, checkpoint=10000, autopack=4, inv_cache=-1, - mode=None, import_marks=None, export_marks=None, format=None): - from bzrlib.errors import BzrCommandError, NotBranchError - from bzrlib.plugins.fastimport.processors import generic_processor - from bzrlib.plugins.fastimport.helpers import ( - open_destination_directory, - ) - # If no format is given and the user is running a release - # leading up to 2.0, select 2a for them. Otherwise, use - # the default format. - if format is None: - import bzrlib - bzr_version = bzrlib.version_info[0:2] - if bzr_version in [(1,17), (1,18), (2,0)]: - format = bzrdir.format_registry.make_bzrdir('2a') - control = open_destination_directory(destination, format=format) - - # If an information file was given and the source isn't stdin, - # generate the information by reading the source file as a first pass - if info is None and source != '-': - info = self._generate_info(source) - - # Do the work - if mode is None: - mode = 'default' - params = { - 'info': info, - 'trees': trees, - 'count': count, - 'checkpoint': checkpoint, - 'autopack': autopack, - 'inv-cache': inv_cache, - 'mode': mode, - 'import-marks': import_marks, - 'export-marks': export_marks, - } - return _run(source, generic_processor.GenericProcessor, control, - params, verbose) - - def _generate_info(self, source): - from cStringIO import StringIO - import parser - from bzrlib.plugins.fastimport.processors import info_processor - stream = _get_source_stream(source) - output = StringIO() - try: - proc = info_processor.InfoProcessor(verbose=True, outf=output) - p = parser.ImportParser(stream) - return_code = proc.process(p.iter_commands) - lines = output.getvalue().splitlines() - finally: - output.close() - stream.seek(0) - return lines - - -class cmd_fast_import_filter(Command): - """Filter a fast-import stream to include/exclude files & directories. - - This command is useful for splitting a subdirectory or bunch of - files out from a project to create a new project complete with history - for just those files. It can also be used to create a new project - repository that removes all references to files that should not have - been committed, e.g. security-related information (like passwords), - commercially sensitive material, files with an incompatible license or - large binary files like CD images. - - When filtering out a subdirectory (or file), the new stream uses the - subdirectory (or subdirectory containing the file) as the root. As - fast-import doesn't know in advance whether a path is a file or - directory in the stream, you need to specify a trailing '/' on - directories passed to the `--includes option`. If multiple files or - directories are given, the new root is the deepest common directory. - - To specify standard input as the input stream, use a source name - of '-'. If the source name ends in '.gz', it is assumed to be - compressed in gzip format. - - Note: If a path has been renamed, take care to specify the *original* - path name, not the final name that it ends up with. - - :Examples: - - Create a new project from a library (note the trailing / on the - directory name of the library):: - - front-end | bzr fast-import-filter -i lib/xxx/ > xxx.fi - bzr fast-import xxx.fi mylibrary.bzr - (lib/xxx/foo is now foo) - - Create a new repository without a sensitive file:: - - front-end | bzr fast-import-filter -x missile-codes.txt > clean.fi - bzr fast-import clean.fi clean.bzr - """ - hidden = False - _see_also = ['fast-import'] - takes_args = ['source'] - takes_options = ['verbose', - ListOption('include_paths', short_name='i', type=str, - help="Only include commits affecting these paths." - " Directories should have a trailing /." - ), - ListOption('exclude_paths', short_name='x', type=str, - help="Exclude these paths from commits." - ), - ] - aliases = [] - encoding_type = 'exact' - def run(self, source, verbose=False, include_paths=None, - exclude_paths=None): - from bzrlib.plugins.fastimport.processors import filter_processor - params = { - 'include_paths': include_paths, - 'exclude_paths': exclude_paths, - } - return _run(source, filter_processor.FilterProcessor, None, params, - verbose) - - -class cmd_fast_import_info(Command): - """Output information about a fast-import stream. - - This command reads a fast-import stream and outputs - statistics and interesting properties about what it finds. - When run in verbose mode, the information is output as a - configuration file that can be passed to fast-import to - assist it in intelligently caching objects. - - To specify standard input as the input stream, use a source name - of '-'. If the source name ends in '.gz', it is assumed to be - compressed in gzip format. - - :Examples: - - Display statistics about the import stream produced by front-end:: - - front-end | bzr fast-import-info - - - Create a hints file for running fast-import on a large repository:: - - front-end | bzr fast-import-info -v - > front-end.cfg - """ - hidden = False - _see_also = ['fast-import'] - takes_args = ['source'] - takes_options = ['verbose'] - aliases = [] - def run(self, source, verbose=False): - from bzrlib.plugins.fastimport.processors import info_processor - return _run(source, info_processor.InfoProcessor, None, {}, verbose) - - -class cmd_fast_import_query(Command): - """Query a fast-import stream displaying selected commands. - - To specify standard input as the input stream, use a source name - of '-'. If the source name ends in '.gz', it is assumed to be - compressed in gzip format. - - To specify the commands to display, use the -C option one or - more times. To specify just some fields for a command, use the - syntax:: - - command=field1,... - - By default, the nominated fields for the nominated commands - are displayed tab separated. To see the information in - a name:value format, use verbose mode. - - Note: Binary fields (e.g. data for blobs) are masked out - so it is generally safe to view the output in a terminal. - - :Examples: - - Show all the fields of the reset and tag commands:: - - front-end > xxx.fi - bzr fast-import-query xxx.fi -Creset -Ctag - - Show the mark and merge fields of the commit commands:: - - bzr fast-import-query xxx.fi -Ccommit=mark,merge - """ - hidden = True - _see_also = ['fast-import', 'fast-import-filter'] - takes_args = ['source'] - takes_options = ['verbose', - ListOption('commands', short_name='C', type=str, - help="Display fields for these commands." - ), - ] - aliases = [] - def run(self, source, verbose=False, commands=None): - from bzrlib.plugins.fastimport.processors import query_processor - from bzrlib.plugins.fastimport import helpers - params = helpers.defines_to_dict(commands) - return _run(source, query_processor.QueryProcessor, None, params, - verbose) - - -class cmd_fast_export(Command): - """Generate a fast-import stream from a Bazaar branch. - - This program generates a stream from a bzr branch in the format - required by git-fast-import(1). It preserves merges correctly, - even merged branches with no common history (`bzr merge -r 0..-1`). - - If no destination is given or the destination is '-', standard output - is used. Otherwise, the destination is the name of a file. If the - destination ends in '.gz', the output will be compressed into gzip - format. - - :Examples: - - To import several unmerged but related branches into the same repository, - use the --{export,import}-marks options, and specify a name for the git - branch like this:: - - bzr fast-export --export-marks=marks.bzr project.dev | - GIT_DIR=project/.git git-fast-import --export-marks=marks.git - - bzr fast-export --import-marks=marks.bzr -b other project.other | - GIT_DIR=project/.git git-fast-import --import-marks=marks.git - - If you get a "Missing space after source" error from git-fast-import, - see the top of the commands.py module for a work-around. - """ - hidden = False - _see_also = ['fast-import', 'fast-import-filter'] - takes_args = ['source', 'destination?'] - takes_options = ['verbose', 'revision', - Option('git-branch', short_name='b', type=str, - argname='FILE', - help='Name of the git branch to create (default=master).' - ), - Option('checkpoint', type=int, argname='N', - help="Checkpoint every N revisions (default=10000)." - ), - Option('marks', type=str, argname='FILE', - help="Import marks from and export marks to file." - ), - Option('import-marks', type=str, argname='FILE', - help="Import marks from file." - ), - Option('export-marks', type=str, argname='FILE', - help="Export marks to file." - ), - ] - aliases = [] - encoding_type = 'exact' - def run(self, source, destination=None, verbose=False, - git_branch="master", checkpoint=10000, marks=None, - import_marks=None, export_marks=None, revision=None): - from bzrlib.plugins.fastimport import bzr_exporter - - if marks: - import_marks = export_marks = marks - exporter = bzr_exporter.BzrFastExporter(source, - destination=destination, - git_branch=git_branch, checkpoint=checkpoint, - import_marks_file=import_marks, export_marks_file=export_marks, - revision=revision, verbose=verbose) - return exporter.run() - - -class cmd_fast_export_from_cvs(Command): - """Generate a fast-import file from a CVS repository. - - Destination is a dump file, typically named xxx.fi where xxx is - the name of the project. If '-' is given, standard output is used. - - cvs2svn 2.3 or later must be installed as its cvs2bzr script is used - under the covers to do the export. - - The source must be the path on your filesystem to the part of the - repository you wish to convert. i.e. either that path or a parent - directory must contain a CVSROOT subdirectory. The path may point to - either the top of a repository or to a path within it. In the latter - case, only that project within the repository will be converted. - - .. note:: - Remote access to the repository is not sufficient - the path - must point into a copy of the repository itself. See - http://cvs2svn.tigris.org/faq.html#repoaccess for instructions - on how to clone a remote CVS repository locally. - - By default, the trunk, branches and tags are all exported. If you - only want the trunk, use the `--trunk-only` option. - - By default, filenames, log messages and author names are expected - to be encoded in ascii. Use the `--encoding` option to specify an - alternative. If multiple encodings are used, specify the option - multiple times. For a list of valid encoding names, see - http://docs.python.org/lib/standard-encodings.html. - - Windows users need to install GNU sort and use the `--sort` - option to specify its location. GNU sort can be downloaded from - http://unxutils.sourceforge.net/. - """ - hidden = False - _see_also = ['fast-import', 'fast-import-filter'] - takes_args = ['source', 'destination'] - takes_options = ['verbose', - Option('trunk-only', - help="Export just the trunk, ignoring tags and branches." - ), - ListOption('encoding', type=str, argname='CODEC', - help="Encoding used for filenames, commit messages " - "and author names if not ascii." - ), - Option('sort', type=str, argname='PATH', - help="GNU sort program location if not on the path." - ), - ] - aliases = [] - encoding_type = 'exact' - def run(self, source, destination, verbose=False, trunk_only=False, - encoding=None, sort=None): - from bzrlib.plugins.fastimport.exporters import fast_export_from - custom = [] - if trunk_only: - custom.append("--trunk-only") - if encoding: - for enc in encoding: - custom.extend(['--encoding', enc]) - if sort: - custom.extend(['--sort', sort]) - fast_export_from(source, destination, 'cvs', verbose, custom) - - -class cmd_fast_export_from_darcs(Command): - """Generate a fast-import file from a Darcs repository. - - Destination is a dump file, typically named xxx.fi where xxx is - the name of the project. If '-' is given, standard output is used. - - Darcs 2.2 or later must be installed as various subcommands are - used to access the source repository. The source may be a network - URL but using a local URL is recommended for performance reasons. - """ - hidden = False - _see_also = ['fast-import', 'fast-import-filter'] - takes_args = ['source', 'destination'] - takes_options = ['verbose', - Option('encoding', type=str, argname='CODEC', - help="Encoding used for commit messages if not utf-8." - ), - ] - aliases = [] - encoding_type = 'exact' - def run(self, source, destination, verbose=False, encoding=None): - from bzrlib.plugins.fastimport.exporters import fast_export_from - custom = None - if encoding is not None: - custom = ['--encoding', encoding] - fast_export_from(source, destination, 'darcs', verbose, custom) - - -class cmd_fast_export_from_hg(Command): - """Generate a fast-import file from a Mercurial repository. - - Destination is a dump file, typically named xxx.fi where xxx is - the name of the project. If '-' is given, standard output is used. - - Mercurial 1.2 or later must be installed as its libraries are used - to access the source repository. Given the APIs currently used, - the source repository must be a local file, not a network URL. - """ - hidden = False - _see_also = ['fast-import', 'fast-import-filter'] - takes_args = ['source', 'destination'] - takes_options = ['verbose'] - aliases = [] - encoding_type = 'exact' - def run(self, source, destination, verbose=False): - from bzrlib.plugins.fastimport.exporters import fast_export_from - fast_export_from(source, destination, 'hg', verbose) - - -class cmd_fast_export_from_git(Command): - """Generate a fast-import file from a Git repository. - - Destination is a dump file, typically named xxx.fi where xxx is - the name of the project. If '-' is given, standard output is used. - - Git 1.6 or later must be installed as the git fast-export - subcommand is used under the covers to generate the stream. - The source must be a local directory. - - .. note:: - - Earlier versions of Git may also work fine but are - likely to receive less active support if problems arise. - """ - hidden = False - _see_also = ['fast-import', 'fast-import-filter'] - takes_args = ['source', 'destination'] - takes_options = ['verbose'] - aliases = [] - encoding_type = 'exact' - def run(self, source, destination, verbose=False): - from bzrlib.plugins.fastimport.exporters import fast_export_from - fast_export_from(source, destination, 'git', verbose) - - -class cmd_fast_export_from_mnt(Command): - """Generate a fast-import file from a Monotone repository. - - Destination is a dump file, typically named xxx.fi where xxx is - the name of the project. If '-' is given, standard output is used. - - Monotone 0.43 or later must be installed as the mnt git_export - subcommand is used under the covers to generate the stream. - The source must be a local directory. - """ - hidden = False - _see_also = ['fast-import', 'fast-import-filter'] - takes_args = ['source', 'destination'] - takes_options = ['verbose'] - aliases = [] - encoding_type = 'exact' - def run(self, source, destination, verbose=False): - from bzrlib.plugins.fastimport.exporters import fast_export_from - fast_export_from(source, destination, 'mnt', verbose) - - -class cmd_fast_export_from_p4(Command): - """Generate a fast-import file from a Perforce repository. - - Source is a Perforce depot path, e.g., //depot/project - - Destination is a dump file, typically named xxx.fi where xxx is - the name of the project. If '-' is given, standard output is used. - - bzrp4 must be installed as its p4_fast_export.py module is used under - the covers to do the export. bzrp4 can be downloaded from - https://launchpad.net/bzrp4/. - - The P4PORT environment variable must be set, and you must be logged - into the Perforce server. - - By default, only the HEAD changelist is exported. To export all - changelists, append '@all' to the source. To export a revision range, - append a comma-delimited pair of changelist numbers to the source, - e.g., '100,200'. - """ - hidden = False - _see_also = ['fast-import', 'fast-import-filter'] - takes_args = ['source', 'destination'] - takes_options = [] - aliases = [] - encoding_type = 'exact' - def run(self, source, destination, verbose=False): - from bzrlib.plugins.fastimport.exporters import fast_export_from - custom = [] - fast_export_from(source, destination, 'p4', verbose, custom) - - -class cmd_fast_export_from_svn(Command): - """Generate a fast-import file from a Subversion repository. - - Destination is a dump file, typically named xxx.fi where xxx is - the name of the project. If '-' is given, standard output is used. - - Python-Subversion (Python bindings to the Subversion APIs) - 1.4 or later must be installed as this library is used to - access the source repository. The source may be a network URL - but using a local URL is recommended for performance reasons. - """ - hidden = False - _see_also = ['fast-import', 'fast-import-filter'] - takes_args = ['source', 'destination'] - takes_options = ['verbose', - Option('trunk-path', type=str, argname="STR", - help="Path in repo to /trunk.\n" - "May be `regex:/cvs/(trunk)/proj1/(.*)` in " - "which case the first group is used as the " - "branch name and the second group is used " - "to match files.", - ), - Option('branches-path', type=str, argname="STR", - help="Path in repo to /branches." - ), - Option('tags-path', type=str, argname="STR", - help="Path in repo to /tags." - ), - ] - aliases = [] - encoding_type = 'exact' - def run(self, source, destination, verbose=False, trunk_path=None, - branches_path=None, tags_path=None): - from bzrlib.plugins.fastimport.exporters import fast_export_from - custom = [] - if trunk_path is not None: - custom.extend(['--trunk-path', trunk_path]) - if branches_path is not None: - custom.extend(['--branches-path', branches_path]) - if tags_path is not None: - custom.extend(['--tags-path', tags_path]) - fast_export_from(source, destination, 'svn', verbose, custom) - - -register_command(cmd_fast_import) -register_command(cmd_fast_import_filter) -register_command(cmd_fast_import_info) -register_command(cmd_fast_import_query) -register_command(cmd_fast_export) -register_command(cmd_fast_export_from_cvs) -register_command(cmd_fast_export_from_darcs) -register_command(cmd_fast_export_from_hg) -register_command(cmd_fast_export_from_git) -register_command(cmd_fast_export_from_mnt) -register_command(cmd_fast_export_from_p4) -register_command(cmd_fast_export_from_svn) +for name in [ + "fast_import", + "fast_import_filter", + "fast_import_info", + "fast_import_query", + "fast_export", + "fast_export_from_cvs", + "fast_export_from_darcs", + "fast_export_from_hg", + "fast_export_from_git", + "fast_export_from_mtn", + "fast_export_from_p4", + "fast_export_from_svn" + ]: + plugin_cmds.register_lazy("cmd_%s" % name, [], "bzrlib.plugins.fastimport.cmds") diff --git a/branch_mapper.py b/branch_mapper.py index f6d0670..acc37c9 100644 --- a/branch_mapper.py +++ b/branch_mapper.py @@ -14,46 +14,45 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -"""An object that maps bzr branch names <-> git ref names.""" +"""An object that maps git ref names to bzr branch names. Note that it is not +used to map git ref names to bzr tag names.""" + + +import re class BranchMapper(object): + _GIT_TRUNK_RE = re.compile('(?:git-)*trunk') - def git_to_bzr(self, ref_names): - """Get the mapping from git reference names to Bazaar branch names. - - :return: a dictionary with git reference names as keys and - the Bazaar branch names as values. + def git_to_bzr(self, ref_name): + """Map a git reference name to a Bazaar branch name. """ - bazaar_names = {} - for ref_name in sorted(ref_names): - parts = ref_name.split('/') - if parts[0] == 'refs': + parts = ref_name.split('/') + if parts[0] == 'refs': + parts.pop(0) + category = parts.pop(0) + if category == 'heads': + git_name = '/'.join(parts) + bazaar_name = self._git_to_bzr_name(git_name) + else: + if category == 'remotes' and parts[0] == 'origin': parts.pop(0) - category = parts.pop(0) - if category == 'heads': - bazaar_name = self._git_to_bzr_name(parts[-1]) - else: - if category.endswith('s'): - category = category[:-1] - name_no_ext = self._git_to_bzr_name(parts[-1]) - bazaar_name = "%s.%s" % (name_no_ext, category) - bazaar_names[ref_name] = bazaar_name - return bazaar_names + git_name = '/'.join(parts) + if category.endswith('s'): + category = category[:-1] + name_no_ext = self._git_to_bzr_name(git_name) + bazaar_name = "%s.%s" % (name_no_ext, category) + return bazaar_name def _git_to_bzr_name(self, git_name): + # Make a simple name more bzr-like, by mapping git 'master' to bzr 'trunk'. + # To avoid collision, map git 'trunk' to bzr 'git-trunk'. Likewise + # 'git-trunk' to 'git-git-trunk' and so on, such that the mapping is + # one-to-one in both directions. if git_name == 'master': bazaar_name = 'trunk' - elif git_name.endswith('trunk'): + elif self._GIT_TRUNK_RE.match(git_name): bazaar_name = 'git-%s' % (git_name,) else: bazaar_name = git_name return bazaar_name - - def bzr_to_git(self, branch_names): - """Get the mapping from Bazaar branch names to git reference names. - - :return: a dictionary with Bazaar branch names as keys and - the git reference names as values. - """ - raise NotImplementedError(self.bzr_to_git) diff --git a/branch_updater.py b/branch_updater.py index b97f887..039171f 100644 --- a/branch_updater.py +++ b/branch_updater.py @@ -18,11 +18,12 @@ from operator import itemgetter -from bzrlib import bzrdir, errors, osutils +from bzrlib import bzrdir, errors, osutils, transport from bzrlib.trace import error, note -import branch_mapper -import helpers +from bzrlib.plugins.fastimport.helpers import ( + best_format_for_objects_in_a_repository, + ) class BranchUpdater(object): @@ -40,9 +41,8 @@ class BranchUpdater(object): self.heads_by_ref = heads_by_ref self.last_ref = last_ref self.tags = tags - self.name_mapper = branch_mapper.BranchMapper() self._branch_format = \ - helpers.best_format_for_objects_in_a_repository(repo) + best_format_for_objects_in_a_repository(repo) def update(self): """Update the Bazaar branches and tips matching the heads. @@ -84,7 +84,9 @@ class BranchUpdater(object): # Convert the reference names into Bazaar speak. If we haven't # already put the 'trunk' first, do it now. - git_to_bzr_map = self.name_mapper.git_to_bzr(ref_names) + git_to_bzr_map = {} + for ref_name in ref_names: + git_to_bzr_map[ref_name] = self.cache_mgr.branch_mapper.git_to_bzr(ref_name) if ref_names and self.branch is None: trunk = self.select_trunk(ref_names) git_bzr_items = [(trunk, git_to_bzr_map[trunk])] @@ -134,17 +136,21 @@ class BranchUpdater(object): def make_branch(self, location): """Make a branch in the repository if not already there.""" + to_transport = transport.get_transport(location) + to_transport.create_prefix() try: return bzrdir.BzrDir.open(location).open_branch() except errors.NotBranchError, ex: return bzrdir.BzrDir.create_branch_convenience(location, - format=self._branch_format) + format=self._branch_format, + possible_transports=[to_transport]) def _update_branch(self, br, last_mark): """Update a branch with last revision and tag information. :return: whether the branch was changed or not """ + from fastimport.helpers import single_plural last_rev_id = self.cache_mgr.revision_ids[last_mark] revs = list(self.repo.iter_reverse_revision_history(last_rev_id)) revno = len(revs) @@ -156,8 +162,9 @@ class BranchUpdater(object): # apply tags known in this branch my_tags = {} if self.tags: + ancestry = self.repo.get_ancestry(last_rev_id) for tag,rev in self.tags.items(): - if rev in revs: + if rev in ancestry: my_tags[tag] = rev if my_tags: br.tags._set_tag_dict(my_tags) @@ -165,6 +172,6 @@ class BranchUpdater(object): if changed: tagno = len(my_tags) note("\t branch %s now has %d %s and %d %s", br.nick, - revno, helpers.single_plural(revno, "revision", "revisions"), - tagno, helpers.single_plural(tagno, "tag", "tags")) + revno, single_plural(revno, "revision", "revisions"), + tagno, single_plural(tagno, "tag", "tags")) return changed diff --git a/bzr_commit_handler.py b/bzr_commit_handler.py index 5652251..c47a39d 100644 --- a/bzr_commit_handler.py +++ b/bzr_commit_handler.py @@ -18,6 +18,7 @@ from bzrlib import ( + debug, errors, generate_ids, inventory, @@ -25,13 +26,24 @@ from bzrlib import ( revision, serializer, ) -from bzrlib.plugins.fastimport import helpers, processor +from bzrlib.trace import ( + mutter, + note, + warning, + ) +from fastimport import ( + helpers, + processor, + ) + +from bzrlib.plugins.fastimport.helpers import ( + mode_to_kind, + ) _serializer_handles_escaping = hasattr(serializer.Serializer, 'squashes_xml_invalid_characters') - def copy_inventory(inv): # This currently breaks revision-id matching #if hasattr(inv, "_get_mutable_inventory"): @@ -53,17 +65,51 @@ class GenericCommitHandler(processor.CommitHandler): self.verbose = verbose self.branch_ref = command.ref self.prune_empty_dirs = prune_empty_dirs + # This tracks path->file-id for things we're creating this commit. + # If the same path is created multiple times, we need to warn the + # user and add it just once. + # If a path is added then renamed or copied, we need to handle that. + self._new_file_ids = {} + # This tracks path->file-id for things we're modifying this commit. + # If a path is modified then renamed or copied, we need the make + # sure we grab the new content. + self._modified_file_ids = {} + # This tracks the paths for things we're deleting this commit. + # If the same path is added or the destination of a rename say, + # then a fresh file-id is required. + self._paths_deleted_this_commit = set() + + def mutter(self, msg, *args): + """Output a mutter but add context.""" + msg = "%s (%s)" % (msg, self.command.id) + mutter(msg, *args) + + def debug(self, msg, *args): + """Output a mutter if the appropriate -D option was given.""" + if "fast-import" in debug.debug_flags: + msg = "%s (%s)" % (msg, self.command.id) + mutter(msg, *args) + + def note(self, msg, *args): + """Output a note but add context.""" + msg = "%s (%s)" % (msg, self.command.id) + note(msg, *args) + + def warning(self, msg, *args): + """Output a warning but add context.""" + msg = "%s (%s)" % (msg, self.command.id) + warning(msg, *args) def pre_process_files(self): """Prepare for committing.""" self.revision_id = self.gen_revision_id() # cache of texts for this commit, indexed by file-id - self.lines_for_commit = {} + self.data_for_commit = {} #if self.rev_store.expects_rich_root(): - self.lines_for_commit[inventory.ROOT_ID] = [] + self.data_for_commit[inventory.ROOT_ID] = [] # Track the heads and get the real parent list - parents = self.cache_mgr.track_heads(self.command) + parents = self.cache_mgr.reftracker.track_heads(self.command) # Convert the parent commit-ids to bzr revision-ids if parents: @@ -76,9 +122,9 @@ class GenericCommitHandler(processor.CommitHandler): # Tell the RevisionStore we're starting a new commit self.revision = self.build_revision() - parent_invs = [self.get_inventory(p) for p in self.parents] + self.parent_invs = [self.get_inventory(p) for p in self.parents] self.rev_store.start_new_revision(self.revision, self.parents, - parent_invs) + self.parent_invs) # cache of per-file parents for this commit, indexed by file-id self.per_file_parents_for_commit = {} @@ -113,9 +159,13 @@ class GenericCommitHandler(processor.CommitHandler): self.cache_mgr.inventories[revision_id] = inv return inv + def _get_data(self, file_id): + """Get the data bytes for a file-id.""" + return self.data_for_commit[file_id] + def _get_lines(self, file_id): """Get the lines for a file-id.""" - return self.lines_for_commit[file_id] + return osutils.split_lines(self._get_data(file_id)) def _get_per_file_parents(self, file_id): """Get the lines for a file-id.""" @@ -154,19 +204,31 @@ class GenericCommitHandler(processor.CommitHandler): :return: file_id, is_new where is_new = True if the file_id is newly created """ - try: - id = self.cache_mgr.fetch_file_id(self.branch_ref, path) - return id, False - except KeyError: - # Not in the cache, try the inventory + if path not in self._paths_deleted_this_commit: + # Try file-ids renamed in this commit + id = self._modified_file_ids.get(path) + if id is not None: + return id, False + + # Try the basis inventory id = self.basis_inventory.path2id(path) - if id is None: - # Doesn't exist yet so create it - id = generate_ids.gen_file_id(path) - self.debug("Generated new file id %s for '%s' in '%s'", - id, path, self.branch_ref) - self.cache_mgr.store_file_id(self.branch_ref, path, id) - return id, True + if id is not None: + return id, False + + # Try the other inventories + if len(self.parents) > 1: + for inv in self.parent_invs[1:]: + id = self.basis_inventory.path2id(path) + if id is not None: + return id, False + + # Doesn't exist yet so create it + dirname, basename = osutils.split(path) + id = generate_ids.gen_file_id(basename) + self.debug("Generated new file id %s for '%s' in revision-id '%s'", + id, path, self.revision_id) + self._new_file_ids[path] = id + return id, True def bzr_file_id(self, path): """Get a Bazaar file identifier for a path.""" @@ -192,14 +254,13 @@ class GenericCommitHandler(processor.CommitHandler): return generate_ids.gen_revision_id(who, timestamp) def build_revision(self): - rev_props = {} + rev_props = self._legal_revision_properties(self.command.properties) + if 'branch-nick' not in rev_props: + rev_props['branch-nick'] = self.cache_mgr.branch_mapper.git_to_bzr( + self.branch_ref) + self._save_author_info(rev_props) committer = self.command.committer who = self._format_name_email(committer[0], committer[1]) - author = self.command.author - if author is not None: - author_id = self._format_name_email(author[0], author[1]) - if author_id != who: - rev_props['author'] = author_id message = self.command.message if not _serializer_handles_escaping: # We need to assume the bad ol' days @@ -213,8 +274,50 @@ class GenericCommitHandler(processor.CommitHandler): properties=rev_props, parent_ids=self.parents) + def _legal_revision_properties(self, props): + """Clean-up any revision properties we can't handle.""" + # For now, we just check for None because that's not allowed in 2.0rc1 + result = {} + if props is not None: + for name, value in props.items(): + if value is None: + self.warning( + "converting None to empty string for property %s" + % (name,)) + result[name] = '' + else: + result[name] = value + return result + + def _save_author_info(self, rev_props): + author = self.command.author + if author is None: + return + if self.command.more_authors: + authors = [author] + self.command.more_authors + author_ids = [self._format_name_email(a[0], a[1]) for a in authors] + elif author != self.command.committer: + author_ids = [self._format_name_email(author[0], author[1])] + else: + return + # If we reach here, there are authors worth storing + rev_props['authors'] = "\n".join(author_ids) + def _modify_item(self, path, kind, is_executable, data, inv): """Add to or change an item in the inventory.""" + # If we've already added this, warn the user that we're ignoring it. + # In the future, it might be nice to double check that the new data + # is the same as the old but, frankly, exporters should be fixed + # not to produce bad data streams in the first place ... + existing = self._new_file_ids.get(path) + if existing: + # We don't warn about directories because it's fine for them + # to be created already by a previous rename + if kind != 'directory': + self.warning("%s already added in this commit - ignoring" % + (path,)) + return + # Create the new InventoryEntry basename, parent_id = self._ensure_directory(path, inv) file_id = self.bzr_file_id(path) @@ -222,18 +325,24 @@ class GenericCommitHandler(processor.CommitHandler): ie.revision = self.revision_id if kind == 'file': ie.executable = is_executable - lines = osutils.split_lines(data) - ie.text_sha1 = osutils.sha_strings(lines) - ie.text_size = sum(map(len, lines)) - self.lines_for_commit[file_id] = lines + # lines = osutils.split_lines(data) + ie.text_sha1 = osutils.sha_string(data) + ie.text_size = len(data) + self.data_for_commit[file_id] = data + elif kind == 'directory': + self.directory_entries[path] = ie + # There are no lines stored for a directory so + # make sure the cache used by get_lines knows that + self.data_for_commit[file_id] = '' elif kind == 'symlink': - ie.symlink_target = data.encode('utf8') + ie.symlink_target = data.decode('utf8') # There are no lines stored for a symlink so # make sure the cache used by get_lines knows that - self.lines_for_commit[file_id] = [] + self.data_for_commit[file_id] = '' else: - raise errors.BzrError("Cannot import items of kind '%s' yet" % - (kind,)) + self.warning("Cannot import items of kind '%s' yet - ignoring '%s'" + % (kind, path)) + return # Record it if file_id in inv: old_ie = inv[file_id] @@ -273,7 +382,7 @@ class GenericCommitHandler(processor.CommitHandler): self.directory_entries[dirname] = ie # There are no lines stored for a directory so # make sure the cache used by get_lines knows that - self.lines_for_commit[dir_file_id] = [] + self.data_for_commit[dir_file_id] = '' # It's possible that a file or symlink with that file-id # already exists. If it does, we need to delete it. @@ -289,6 +398,8 @@ class GenericCommitHandler(processor.CommitHandler): """ result = self.directory_entries.get(dirname) if result is None: + if dirname in self._paths_deleted_this_commit: + raise KeyError try: file_id = inv.path2id(dirname) except errors.NoSuchId: @@ -305,39 +416,61 @@ class GenericCommitHandler(processor.CommitHandler): return result def _delete_item(self, path, inv): - file_id = inv.path2id(path) - if file_id is None: - self.mutter("ignoring delete of %s as not in inventory", path) - return - try: - ie = inv[file_id] - except errors.NoSuchId: - self.mutter("ignoring delete of %s as not in inventory", path) + newly_added = self._new_file_ids.get(path) + if newly_added: + # We've only just added this path earlier in this commit. + file_id = newly_added + # note: delta entries look like (old, new, file-id, ie) + ie = self._delta_entries_by_fileid[file_id][3] else: - self.record_delete(path, ie) + file_id = inv.path2id(path) + if file_id is None: + self.mutter("ignoring delete of %s as not in inventory", path) + return + try: + ie = inv[file_id] + except errors.NoSuchId: + self.mutter("ignoring delete of %s as not in inventory", path) + return + self.record_delete(path, ie) def _copy_item(self, src_path, dest_path, inv): - if not self.parents: - self.warning("ignoring copy of %s to %s - no parent revisions", - src_path, dest_path) - return - file_id = inv.path2id(src_path) - if file_id is None: - self.warning("ignoring copy of %s to %s - source does not exist", - src_path, dest_path) - return - ie = inv[file_id] + newly_changed = self._new_file_ids.get(src_path) or \ + self._modified_file_ids.get(src_path) + if newly_changed: + # We've only just added/changed this path earlier in this commit. + file_id = newly_changed + # note: delta entries look like (old, new, file-id, ie) + ie = self._delta_entries_by_fileid[file_id][3] + else: + file_id = inv.path2id(src_path) + if file_id is None: + self.warning("ignoring copy of %s to %s - source does not exist", + src_path, dest_path) + return + ie = inv[file_id] kind = ie.kind if kind == 'file': - content = self.rev_store.get_file_text(self.parents[0], file_id) + if newly_changed: + content = self.data_for_commit[file_id] + else: + content = self.rev_store.get_file_text(self.parents[0], file_id) self._modify_item(dest_path, kind, ie.executable, content, inv) elif kind == 'symlink': - self._modify_item(dest_path, kind, False, ie.symlink_target, inv) + self._modify_item(dest_path, kind, False, ie.symlink_target.encode("utf-8"), inv) else: self.warning("ignoring copy of %s %s - feature not yet supported", - kind, path) + kind, dest_path) def _rename_item(self, old_path, new_path, inv): + existing = self._new_file_ids.get(old_path) or \ + self._modified_file_ids.get(old_path) + if existing: + # We've only just added/modified this path earlier in this commit. + # Change the add/modify of old_path to an add of new_path + self._rename_pending_change(old_path, new_path, existing) + return + file_id = inv.path2id(old_path) if file_id is None: self.warning( @@ -350,13 +483,12 @@ class GenericCommitHandler(processor.CommitHandler): if new_file_id is not None: self.record_delete(new_path, inv[new_file_id]) self.record_rename(old_path, new_path, file_id, ie) - self.cache_mgr.rename_path(self.branch_ref, old_path, new_path) # The revision-id for this entry will be/has been updated and # that means the loader then needs to know what the "new" text is. # We therefore must go back to the revision store to get it. lines = self.rev_store.get_file_lines(rev_id, file_id) - self.lines_for_commit[file_id] = lines + self.data_for_commit[file_id] = ''.join(lines) def _delete_all_items(self, inv): for name, root_item in inv.root.children.iteritems(): @@ -404,7 +536,7 @@ class InventoryCommitHandler(GenericCommitHandler): """Save the revision.""" self.cache_mgr.inventories[self.revision_id] = self.inventory self.rev_store.load(self.revision, self.inventory, None, - lambda file_id: self._get_lines(file_id), + lambda file_id: self._get_data(file_id), lambda file_id: self._get_per_file_parents(file_id), lambda revision_ids: self._get_inventories(revision_ids)) @@ -446,59 +578,15 @@ class InventoryCommitHandler(GenericCommitHandler): self.inventory) self.inventory.rename(file_id, new_parent_id, new_basename) - def _delete_item(self, path, inv): - # NOTE: I'm retaining this method for now, instead of using the - # one in the superclass, because it's taken quite a lot of tweaking - # to cover all the edge cases seen in the wild. Long term, it can - # probably go once the higher level method does "warn_unless_in_merges" - # and handles all the various special cases ... - fileid = self.bzr_file_id(path) - dirname, basename = osutils.split(path) - if (fileid in inv and - isinstance(inv[fileid], inventory.InventoryDirectory)): - for child_path in inv[fileid].children.keys(): - self._delete_item(osutils.pathjoin(path, child_path), inv) - # We need to clean this out of the directory entries as well - try: - del self.directory_entries[path] - except KeyError: - pass - try: - if self.inventory.id2path(fileid) == path: - del inv[fileid] - else: - # already added by some other name? - try: - parent_id = self.cache_mgr.fetch_file_id(self.branch_ref, - dirname) - except KeyError: - pass - else: - del inv[parent_id].children[basename] - except KeyError: - self._warn_unless_in_merges(fileid, path) - except errors.NoSuchId: - self._warn_unless_in_merges(fileid, path) - except AttributeError, ex: - if ex.args[0] == 'children': - # A directory has changed into a file and then one - # of it's children is being deleted! - self._warn_unless_in_merges(fileid, path) - else: - raise - try: - self.cache_mgr.delete_path(self.branch_ref, path) - except KeyError: - pass - def modify_handler(self, filecmd): if filecmd.dataref is not None: data = self.cache_mgr.fetch_blob(filecmd.dataref) else: data = filecmd.data self.debug("modifying %s", filecmd.path) - self._modify_item(filecmd.path, filecmd.kind, - filecmd.is_executable, data, self.inventory) + (kind, is_executable) = mode_to_kind(filecmd.mode) + self._modify_item(filecmd.path, kind, + is_executable, data, self.inventory) def delete_handler(self, filecmd): self.debug("deleting %s", filecmd.path) @@ -548,9 +636,9 @@ class InventoryDeltaCommitHandler(GenericCommitHandler): delta = self._get_final_delta() inv = self.rev_store.load_using_delta(self.revision, self.basis_inventory, delta, None, - lambda file_id: self._get_lines(file_id), - lambda file_id: self._get_per_file_parents(file_id), - lambda revision_ids: self._get_inventories(revision_ids)) + self._get_data, + self._get_per_file_parents, + self._get_inventories) self.cache_mgr.inventories[self.revision_id] = inv #print "committed %s" % self.revision_id @@ -562,44 +650,63 @@ class InventoryDeltaCommitHandler(GenericCommitHandler): """ delta = list(self._delta_entries_by_fileid.values()) if self.prune_empty_dirs and self._dirs_that_might_become_empty: - candidates = osutils.minimum_path_selection( - self._dirs_that_might_become_empty) - for path, file_id in self._empty_after_delta(delta, candidates): - delta.append((path, None, file_id, None)) - #print "delta:\n%s\n\n" % "\n".join([str(de) for de in delta]) + candidates = self._dirs_that_might_become_empty + while candidates: + never_born = set() + parent_dirs_that_might_become_empty = set() + for path, file_id in self._empty_after_delta(delta, candidates): + newly_added = self._new_file_ids.get(path) + if newly_added: + never_born.add(newly_added) + else: + delta.append((path, None, file_id, None)) + parent_dir = osutils.dirname(path) + if parent_dir: + parent_dirs_that_might_become_empty.add(parent_dir) + candidates = parent_dirs_that_might_become_empty + # Clean up entries that got deleted before they were ever added + if never_born: + delta = [de for de in delta if de[2] not in never_born] return delta def _empty_after_delta(self, delta, candidates): - new_inv = self.basis_inventory._get_mutable_inventory() - new_inv.apply_delta(delta) + #self.mutter("delta so far is:\n%s" % "\n".join([str(de) for de in delta])) + #self.mutter("candidates for deletion are:\n%s" % "\n".join([c for c in candidates])) + new_inv = self._get_proposed_inventory(delta) result = [] for dir in candidates: file_id = new_inv.path2id(dir) if file_id is None: continue ie = new_inv[file_id] + if ie.kind != 'directory': + continue if len(ie.children) == 0: result.append((dir, file_id)) if self.verbose: self.note("pruning empty directory %s" % (dir,)) - # Check parents in case deleting this dir makes *them* empty - while True: - file_id = ie.parent_id - if file_id == inventory.ROOT_ID: - # We've reach the root - break - try: - ie = new_inv[file_id] - except errors.NoSuchId: - break - if len(ie.children) > 1: - break - dir = new_inv.id2path(file_id) - result.append((dir, file_id)) - if self.verbose: - self.note("pruning empty directory parent %s" % (dir,)) return result + def _get_proposed_inventory(self, delta): + if len(self.parents): + # new_inv = self.basis_inventory._get_mutable_inventory() + # Note that this will create unreferenced chk pages if we end up + # deleting entries, because this 'test' inventory won't end up + # used. However, it is cheaper than having to create a full copy of + # the inventory for every commit. + new_inv = self.basis_inventory.create_by_apply_delta(delta, + 'not-a-valid-revision-id:') + else: + new_inv = inventory.Inventory(revision_id=self.revision_id) + # This is set in the delta so remove it to prevent a duplicate + del new_inv[inventory.ROOT_ID] + try: + new_inv.apply_delta(delta) + except errors.InconsistentDelta: + self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta])) + raise + return new_inv + def _add_entry(self, entry): # We need to combine the data if multiple entries have the same file-id. # For example, a rename followed by a modification looks like: @@ -626,9 +733,18 @@ class InventoryDeltaCommitHandler(GenericCommitHandler): if existing is not None: old_path = existing[0] entry = (old_path, new_path, file_id, ie) - self._delta_entries_by_fileid[file_id] = entry + if new_path is None and old_path is None: + # This is a delete cancelling a previous add + del self._delta_entries_by_fileid[file_id] + parent_dir = osutils.dirname(existing[1]) + self.mutter("cancelling add of %s with parent %s" % (existing[1], parent_dir)) + if parent_dir: + self._dirs_that_might_become_empty.add(parent_dir) + return + else: + self._delta_entries_by_fileid[file_id] = entry - # Collect parent direcctories that might become empty + # Collect parent directories that might become empty if new_path is None: # delete parent_dir = osutils.dirname(old_path) @@ -672,14 +788,26 @@ class InventoryDeltaCommitHandler(GenericCommitHandler): def record_changed(self, path, ie, parent_id=None): self._add_entry((path, path, ie.file_id, ie)) + self._modified_file_ids[path] = ie.file_id def record_delete(self, path, ie): self._add_entry((path, None, ie.file_id, None)) + self._paths_deleted_this_commit.add(path) if ie.kind == 'directory': + try: + del self.directory_entries[path] + except KeyError: + pass for child_relpath, entry in \ self.basis_inventory.iter_entries_by_dir(from_dir=ie): child_path = osutils.pathjoin(path, child_relpath) self._add_entry((child_path, None, entry.file_id, None)) + self._paths_deleted_this_commit.add(child_path) + if entry.kind == 'directory': + try: + del self.directory_entries[child_path] + except KeyError: + pass def record_rename(self, old_path, new_path, file_id, old_ie): new_ie = old_ie.copy() @@ -689,29 +817,71 @@ class InventoryDeltaCommitHandler(GenericCommitHandler): new_ie.parent_id = new_parent_id new_ie.revision = self.revision_id self._add_entry((old_path, new_path, file_id, new_ie)) + self._modified_file_ids[new_path] = file_id + self._paths_deleted_this_commit.discard(new_path) + if new_ie.kind == 'directory': + self.directory_entries[new_path] = new_ie + + def _rename_pending_change(self, old_path, new_path, file_id): + """Instead of adding/modifying old-path, add new-path instead.""" + # note: delta entries look like (old, new, file-id, ie) + old_ie = self._delta_entries_by_fileid[file_id][3] + + # Delete the old path. Note that this might trigger implicit + # deletion of newly created parents that could now become empty. + self.record_delete(old_path, old_ie) + + # Update the dictionaries used for tracking new file-ids + if old_path in self._new_file_ids: + del self._new_file_ids[old_path] + else: + del self._modified_file_ids[old_path] + self._new_file_ids[new_path] = file_id + + # Create the new InventoryEntry + kind = old_ie.kind + basename, parent_id = self._ensure_directory(new_path, + self.basis_inventory) + ie = inventory.make_entry(kind, basename, parent_id, file_id) + ie.revision = self.revision_id + if kind == 'file': + ie.executable = old_ie.executable + ie.text_sha1 = old_ie.text_sha1 + ie.text_size = old_ie.text_size + elif kind == 'symlink': + ie.symlink_target = old_ie.symlink_target + + # Record it + self.record_new(new_path, ie) def modify_handler(self, filecmd): + (kind, executable) = mode_to_kind(filecmd.mode) if filecmd.dataref is not None: - data = self.cache_mgr.fetch_blob(filecmd.dataref) + if kind == "directory": + data = None + elif kind == "tree-reference": + data = filecmd.dataref + else: + data = self.cache_mgr.fetch_blob(filecmd.dataref) else: data = filecmd.data self.debug("modifying %s", filecmd.path) - self._modify_item(filecmd.path, filecmd.kind, - filecmd.is_executable, data, self.basis_inventory) + self._modify_item(filecmd.path, kind, + executable, data, self.basis_inventory) def delete_handler(self, filecmd): self.debug("deleting %s", filecmd.path) self._delete_item(filecmd.path, self.basis_inventory) def copy_handler(self, filecmd): - src_path = filecmd.src_path - dest_path = filecmd.dest_path + src_path = filecmd.src_path.decode("utf8") + dest_path = filecmd.dest_path.decode("utf8") self.debug("copying %s to %s", src_path, dest_path) self._copy_item(src_path, dest_path, self.basis_inventory) def rename_handler(self, filecmd): - old_path = filecmd.old_path - new_path = filecmd.new_path + old_path = filecmd.old_path.decode("utf8") + new_path = filecmd.new_path.decode("utf8") self.debug("renaming %s to %s", old_path, new_path) self._rename_item(old_path, new_path, self.basis_inventory) diff --git a/cache_manager.py b/cache_manager.py index af57534..6d8ef05 100644 --- a/cache_manager.py +++ b/cache_manager.py @@ -16,12 +16,84 @@ """A manager of caches.""" +import atexit +import os +import shutil +import tempfile +import weakref from bzrlib import lru_cache, trace -from bzrlib.plugins.fastimport import helpers +from bzrlib.plugins.fastimport import ( + branch_mapper, + ) +from fastimport.helpers import ( + single_plural, + ) +from fastimport.reftracker import ( + RefTracker, + ) + + +class _Cleanup(object): + """This class makes sure we clean up when CacheManager goes away. + + We use a helper class to ensure that we are never in a refcycle. + """ + + def __init__(self, disk_blobs): + self.disk_blobs = disk_blobs + self.tempdir = None + self.small_blobs = None + + def __del__(self): + self.finalize() + + def finalize(self): + if self.disk_blobs is not None: + for info in self.disk_blobs.itervalues(): + if info[-1] is not None: + os.unlink(info[-1]) + self.disk_blobs = None + if self.small_blobs is not None: + self.small_blobs.close() + self.small_blobs = None + if self.tempdir is not None: + shutil.rmtree(self.tempdir) + + +class _Cleanup(object): + """This class makes sure we clean up when CacheManager goes away. + + We use a helper class to ensure that we are never in a refcycle. + """ + + def __init__(self, disk_blobs): + self.disk_blobs = disk_blobs + self.tempdir = None + self.small_blobs = None + + def __del__(self): + self.finalize() + + def finalize(self): + if self.disk_blobs is not None: + for info in self.disk_blobs.itervalues(): + if info[-1] is not None: + os.unlink(info[-1]) + self.disk_blobs = None + if self.small_blobs is not None: + self.small_blobs.close() + self.small_blobs = None + if self.tempdir is not None: + shutil.rmtree(self.tempdir) + class CacheManager(object): + _small_blob_threshold = 25*1024 + _sticky_cache_size = 300*1024*1024 + _sticky_flushed_size = 100*1024*1024 + def __init__(self, info=None, verbose=False, inventory_cache_size=10): """Create a manager of caches. @@ -31,9 +103,18 @@ class CacheManager(object): self.verbose = verbose # dataref -> data. datref is either :mark or the sha-1. - # Sticky blobs aren't removed after being referenced. + # Sticky blobs are referenced more than once, and are saved until their + # refcount goes to 0 self._blobs = {} self._sticky_blobs = {} + self._sticky_memory_bytes = 0 + # if we overflow our memory cache, then we will dump large blobs to + # disk in this directory + self._tempdir = None + # id => (offset, n_bytes, fname) + # if fname is None, then the content is stored in the small file + self._disk_blobs = {} + self._cleanup = _Cleanup(self._disk_blobs) # revision-id -> Inventory cache # these are large and we probably don't need too many as @@ -46,12 +127,6 @@ class CacheManager(object): # (path, branch_ref) -> file-ids - as generated. # (Use store_file_id/fetch_fileid methods rather than direct access.) - self._file_ids = {} - - # Head tracking: last ref, last id per ref & map of commit ids to ref*s* - self.last_ref = None - self.last_ids = {} - self.heads = {} # Work out the blobs to make sticky - None means all self._blob_ref_counts = {} @@ -67,18 +142,21 @@ class CacheManager(object): # info not in file - possible when no blobs used pass + # BranchMapper has no state (for now?), but we keep it around rather + # than reinstantiate on every usage + self.branch_mapper = branch_mapper.BranchMapper() + + self.reftracker = RefTracker() + def dump_stats(self, note=trace.note): """Dump some statistics about what we cached.""" # TODO: add in inventory stastistics note("Cache statistics:") self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note) self._show_stats_for(self.revision_ids, "revision-ids", note=note) - self._show_stats_for(self._file_ids, "file-ids", note=note, - tuple_key=True) # These aren't interesting so omit from the output, at least for now #self._show_stats_for(self._blobs, "other blobs", note=note) - #self._show_stats_for(self.last_ids, "last-ids", note=note) - #self._show_stats_for(self.heads, "heads", note=note) + #self.reftracker.dump_stats(note=note) def _show_stats_for(self, dict, label, note=trace.note, tuple_key=False): """Dump statistics about a given dictionary. @@ -100,109 +178,115 @@ class CacheManager(object): size = size / 1024 unit = 'G' note(" %-12s: %8.1f %s (%d %s)" % (label, size, unit, count, - helpers.single_plural(count, "item", "items"))) + single_plural(count, "item", "items"))) def clear_all(self): """Free up any memory used by the caches.""" self._blobs.clear() self._sticky_blobs.clear() self.revision_ids.clear() - self._file_ids.clear() - self.last_ids.clear() - self.heads.clear() + self.reftracker.clear() self.inventories.clear() + def _flush_blobs_to_disk(self): + blobs = self._sticky_blobs.keys() + sticky_blobs = self._sticky_blobs + total_blobs = len(sticky_blobs) + blobs.sort(key=lambda k:len(sticky_blobs[k])) + if self._tempdir is None: + tempdir = tempfile.mkdtemp(prefix='fastimport_blobs-') + self._tempdir = tempdir + self._cleanup.tempdir = self._tempdir + self._cleanup.small_blobs = tempfile.TemporaryFile( + prefix='small-blobs-', dir=self._tempdir) + small_blob_ref = weakref.ref(self._cleanup.small_blobs) + # Even though we add it to _Cleanup it seems that the object can be + # destroyed 'too late' for cleanup to actually occur. Probably a + # combination of bzr's "die directly, don't clean up" and how + # exceptions close the running stack. + def exit_cleanup(): + small_blob = small_blob_ref() + if small_blob is not None: + small_blob.close() + shutil.rmtree(tempdir, ignore_errors=True) + atexit.register(exit_cleanup) + count = 0 + bytes = 0 + n_small_bytes = 0 + while self._sticky_memory_bytes > self._sticky_flushed_size: + id = blobs.pop() + blob = self._sticky_blobs.pop(id) + n_bytes = len(blob) + self._sticky_memory_bytes -= n_bytes + if n_bytes < self._small_blob_threshold: + f = self._cleanup.small_blobs + f.seek(0, os.SEEK_END) + self._disk_blobs[id] = (f.tell(), n_bytes, None) + f.write(blob) + n_small_bytes += n_bytes + else: + fd, name = tempfile.mkstemp(prefix='blob-', dir=self._tempdir) + os.write(fd, blob) + os.close(fd) + self._disk_blobs[id] = (0, n_bytes, name) + bytes += n_bytes + del blob + count += 1 + trace.note('flushed %d/%d blobs w/ %.1fMB (%.1fMB small) to disk' + % (count, total_blobs, bytes / 1024. / 1024, + n_small_bytes / 1024. / 1024)) + def store_blob(self, id, data): """Store a blob of data.""" # Note: If we're not reference counting, everything has to be sticky if not self._blob_ref_counts or id in self._blob_ref_counts: self._sticky_blobs[id] = data + self._sticky_memory_bytes += len(data) + if self._sticky_memory_bytes > self._sticky_cache_size: + self._flush_blobs_to_disk() elif data == '': # Empty data is always sticky self._sticky_blobs[id] = data else: self._blobs[id] = data + def _decref(self, id, cache, fn): + if not self._blob_ref_counts: + return False + count = self._blob_ref_counts.get(id, None) + if count is not None: + count -= 1 + if count <= 0: + del cache[id] + if fn is not None: + os.unlink(fn) + del self._blob_ref_counts[id] + return True + else: + self._blob_ref_counts[id] = count + return False + def fetch_blob(self, id): """Fetch a blob of data.""" - try: - b = self._sticky_blobs[id] - if self._blob_ref_counts and b != '': - self._blob_ref_counts[id] -= 1 - if self._blob_ref_counts[id] == 0: - del self._sticky_blobs[id] - return b - except KeyError: + if id in self._blobs: return self._blobs.pop(id) + if id in self._disk_blobs: + (offset, n_bytes, fn) = self._disk_blobs[id] + if fn is None: + f = self._cleanup.small_blobs + f.seek(offset) + content = f.read(n_bytes) + else: + fp = open(fn, 'rb') + try: + content = fp.read() + finally: + fp.close() + self._decref(id, self._disk_blobs, fn) + return content + content = self._sticky_blobs[id] + if self._decref(id, self._sticky_blobs, None): + self._sticky_memory_bytes -= len(content) + return content - def store_file_id(self, branch_ref, path, id): - """Store the path to file-id mapping for a branch.""" - key = self._fileid_key(path, branch_ref) - self._file_ids[key] = id - def fetch_file_id(self, branch_ref, path): - """Lookup the file-id for a path in a branch. - - Raises KeyError if unsuccessful. - """ - key = self._fileid_key(path, branch_ref) - return self._file_ids[key] - - def _fileid_key(self, path, branch_ref): - return (path, branch_ref) - - def delete_path(self, branch_ref, path): - """Remove a path from caches.""" - # We actually want to remember what file-id we gave a path, - # even when that file is deleted, so doing nothing is correct. - # It's quite possible for a path to be deleted twice where - # the first time is in a merge branch (but the same branch_ref) - # and the second time is when that branch is merged to mainline. - pass - - def rename_path(self, branch_ref, old_path, new_path): - """Rename a path in the caches.""" - # In this case, we need to forget the file-id we gave a path, - # otherwise, we'll get duplicate file-ids in the repository - # if a new file is created at the old path. - old_key = self._fileid_key(old_path, branch_ref) - new_key = self._fileid_key(new_path, branch_ref) - try: - old_file_id = self._file_ids[old_key] - except KeyError: - # The old_key has already been removed, most likely - # in a merge branch. - pass - else: - self._file_ids[new_key] = old_file_id - del self._file_ids[old_key] - - def track_heads(self, cmd): - """Track the repository heads given a CommitCommand. - - :param cmd: the CommitCommand - :return: the list of parents in terms of commit-ids - """ - # Get the true set of parents - if cmd.from_ is not None: - parents = [cmd.from_] - else: - last_id = self.last_ids.get(cmd.ref) - if last_id is not None: - parents = [last_id] - else: - parents = [] - parents.extend(cmd.merges) - - # Track the heads - self.track_heads_for_ref(cmd.ref, cmd.id, parents) - return parents - - def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None): - if parents is not None: - for parent in parents: - if parent in self.heads: - del self.heads[parent] - self.heads.setdefault(cmd_id, set()).add(cmd_ref) - self.last_ids[cmd_ref] = cmd_id - self.last_ref = cmd_ref @@ -0,0 +1,882 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Fastimport/fastexport commands.""" + +from bzrlib import bzrdir +from bzrlib.commands import Command +from bzrlib.option import Option, ListOption, RegistryOption + +from bzrlib.plugins.fastimport import load_fastimport + + +def _run(source, processor_factory, control, params, verbose, + user_map=None): + """Create and run a processor. + + :param source: a filename or '-' for standard input. If the + filename ends in .gz, it will be opened as a gzip file and + the stream will be implicitly uncompressed + :param processor_factory: a callable for creating a processor + :param control: the BzrDir of the destination or None if no + destination is expected + :param user_map: if not None, the file containing the user map. + """ + from fastimport import parser + stream = _get_source_stream(source) + user_mapper = _get_user_mapper(user_map) + proc = processor_factory(control, params=params, verbose=verbose) + p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper) + return proc.process(p.iter_commands) + + +def _get_source_stream(source): + if source == '-': + import sys + from fastimport import helpers + stream = helpers.binary_stream(sys.stdin) + elif source.endswith('.gz'): + import gzip + stream = gzip.open(source, "rb") + else: + stream = open(source, "rb") + return stream + + +def _get_user_mapper(filename): + import user_mapper + if filename is None: + return None + f = open(filename) + lines = f.readlines() + f.close() + return user_mapper.UserMapper(lines) + + +class cmd_fast_import(Command): + """Backend for fast Bazaar data importers. + + This command reads a mixed command/data stream and creates + branches in a Bazaar repository accordingly. The preferred + recipe is:: + + bzr fast-import project.fi project.bzr + + Numerous commands are provided for generating a fast-import file + to use as input. These are named fast-export-from-xxx where xxx + is one of cvs, darcs, git, hg, mtn, p4 or svn. + To specify standard input as the input stream, use a + source name of '-' (instead of project.fi). If the source name + ends in '.gz', it is assumed to be compressed in gzip format. + + project.bzr will be created if it doesn't exist. If it exists + already, it should be empty or be an existing Bazaar repository + or branch. If not specified, the current directory is assumed. + + fast-import will intelligently select the format to use when + creating a repository or branch. If you are running Bazaar 1.17 + up to Bazaar 2.0, the default format for Bazaar 2.x ("2a") is used. + Otherwise, the current default format ("pack-0.92" for Bazaar 1.x) + is used. If you wish to specify a custom format, use the `--format` + option. + + .. note:: + + To maintain backwards compatibility, fast-import lets you + create the target repository or standalone branch yourself. + It is recommended though that you let fast-import create + these for you instead. + + :Branch mapping rules: + + Git reference names are mapped to Bazaar branch names as follows: + + * refs/heads/foo is mapped to foo + * refs/remotes/origin/foo is mapped to foo.remote + * refs/tags/foo is mapped to foo.tag + * */master is mapped to trunk, trunk.remote, etc. + * */trunk is mapped to git-trunk, git-trunk.remote, etc. + + :Branch creation rules: + + When a shared repository is created or found at the destination, + branches are created inside it. In the simple case of a single + branch (refs/heads/master) inside the input file, the branch is + project.bzr/trunk. + + When a standalone branch is found at the destination, the trunk + is imported there and warnings are output about any other branches + found in the input file. + + When a branch in a shared repository is found at the destination, + that branch is made the trunk and other branches, if any, are + created in sister directories. + + :Working tree updates: + + The working tree is generated for the trunk branch. If multiple + branches are created, a message is output on completion explaining + how to create the working trees for other branches. + + :Custom exporters: + + The fast-export-from-xxx commands typically call more advanced + xxx-fast-export scripts. You are welcome to use the advanced + scripts if you prefer. + + If you wish to write a custom exporter for your project, see + http://bazaar-vcs.org/BzrFastImport for the detailed protocol + specification. In many cases, exporters can be written quite + quickly using whatever scripting/programming language you like. + + :User mapping: + + Some source repositories store just the user name while Bazaar + prefers a full email address. You can adjust user-ids while + importing by using the --user-map option. The argument is a + text file with lines in the format:: + + old-id = new-id + + Blank lines and lines beginning with # are ignored. + If old-id has the special value '@', then users without an + email address will get one created by using the matching new-id + as the domain, unless a more explicit address is given for them. + For example, given the user-map of:: + + @ = example.com + bill = William Jones <bill@example.com> + + then user-ids are mapped as follows:: + + maria => maria <maria@example.com> + bill => William Jones <bill@example.com> + + .. note:: + + User mapping is supported by both the fast-import and + fast-import-filter commands. + + :Blob tracking: + + As some exporters (like git-fast-export) reuse blob data across + commits, fast-import makes two passes over the input file by + default. In the first pass, it collects data about what blobs are + used when, along with some other statistics (e.g. total number of + commits). In the second pass, it generates the repository and + branches. + + .. note:: + + The initial pass isn't done if the --info option is used + to explicitly pass in information about the input stream. + It also isn't done if the source is standard input. In the + latter case, memory consumption may be higher than otherwise + because some blobs may be kept in memory longer than necessary. + + :Restarting an import: + + At checkpoints and on completion, the commit-id -> revision-id + map is saved to a file called 'fastimport-id-map' in the control + directory for the repository (e.g. .bzr/repository). If the import + is interrupted or unexpectedly crashes, it can be started again + and this file will be used to skip over already loaded revisions. + As long as subsequent exports from the original source begin + with exactly the same revisions, you can use this feature to + maintain a mirror of a repository managed by a foreign tool. + If and when Bazaar is used to manage the repository, this file + can be safely deleted. + + :Examples: + + Import a Subversion repository into Bazaar:: + + bzr fast-export-from-svn /svn/repo/path project.fi + bzr fast-import project.fi project.bzr + + Import a CVS repository into Bazaar:: + + bzr fast-export-from-cvs /cvs/repo/path project.fi + bzr fast-import project.fi project.bzr + + Import a Git repository into Bazaar:: + + bzr fast-export-from-git /git/repo/path project.fi + bzr fast-import project.fi project.bzr + + Import a Mercurial repository into Bazaar:: + + bzr fast-export-from-hg /hg/repo/path project.fi + bzr fast-import project.fi project.bzr + + Import a Darcs repository into Bazaar:: + + bzr fast-export-from-darcs /darcs/repo/path project.fi + bzr fast-import project.fi project.bzr + """ + hidden = False + _see_also = ['fast-export', 'fast-import-filter', 'fast-import-info'] + takes_args = ['source', 'destination?'] + takes_options = ['verbose', + Option('user-map', type=str, + help="Path to file containing a map of user-ids.", + ), + Option('info', type=str, + help="Path to file containing caching hints.", + ), + Option('trees', + help="Update all working trees, not just trunk's.", + ), + Option('count', type=int, + help="Import this many revisions then exit.", + ), + Option('checkpoint', type=int, + help="Checkpoint automatically every N revisions." + " The default is 10000.", + ), + Option('autopack', type=int, + help="Pack every N checkpoints. The default is 4.", + ), + Option('inv-cache', type=int, + help="Number of inventories to cache.", + ), + RegistryOption.from_kwargs('mode', + 'The import algorithm to use.', + title='Import Algorithm', + default='Use the preferred algorithm (inventory deltas).', + classic="Use the original algorithm (mutable inventories).", + experimental="Enable experimental features.", + value_switches=True, enum_switch=False, + ), + Option('import-marks', type=str, + help="Import marks from file." + ), + Option('export-marks', type=str, + help="Export marks to file." + ), + RegistryOption('format', + help='Specify a format for the created repository. See' + ' "bzr help formats" for details.', + lazy_registry=('bzrlib.bzrdir', 'format_registry'), + converter=lambda name: bzrdir.format_registry.make_bzrdir(name), + value_switches=False, title='Repository format'), + ] + def run(self, source, destination='.', verbose=False, info=None, + trees=False, count=-1, checkpoint=10000, autopack=4, inv_cache=-1, + mode=None, import_marks=None, export_marks=None, format=None, + user_map=None): + load_fastimport() + from bzrlib.plugins.fastimport.processors import generic_processor + from bzrlib.plugins.fastimport.helpers import ( + open_destination_directory, + ) + # If no format is given and the user is running a release + # leading up to 2.0, select 2a for them. Otherwise, use + # the default format. + if format is None: + import bzrlib + bzr_version = bzrlib.version_info[0:2] + if bzr_version in [(1,17), (1,18), (2,0)]: + format = bzrdir.format_registry.make_bzrdir('2a') + control = open_destination_directory(destination, format=format) + + # If an information file was given and the source isn't stdin, + # generate the information by reading the source file as a first pass + if info is None and source != '-': + info = self._generate_info(source) + + # Do the work + if mode is None: + mode = 'default' + params = { + 'info': info, + 'trees': trees, + 'count': count, + 'checkpoint': checkpoint, + 'autopack': autopack, + 'inv-cache': inv_cache, + 'mode': mode, + 'import-marks': import_marks, + 'export-marks': export_marks, + } + return _run(source, generic_processor.GenericProcessor, control, + params, verbose, user_map=user_map) + + def _generate_info(self, source): + from cStringIO import StringIO + from fastimport import parser + from fastimport.processors import info_processor + stream = _get_source_stream(source) + output = StringIO() + try: + proc = info_processor.InfoProcessor(verbose=True, outf=output) + p = parser.ImportParser(stream) + return_code = proc.process(p.iter_commands) + lines = output.getvalue().splitlines() + finally: + output.close() + stream.seek(0) + return lines + + +class cmd_fast_import_filter(Command): + """Filter a fast-import stream to include/exclude files & directories. + + This command is useful for splitting a subdirectory or bunch of + files out from a project to create a new project complete with history + for just those files. It can also be used to create a new project + repository that removes all references to files that should not have + been committed, e.g. security-related information (like passwords), + commercially sensitive material, files with an incompatible license or + large binary files like CD images. + + To specify standard input as the input stream, use a source name + of '-'. If the source name ends in '.gz', it is assumed to be + compressed in gzip format. + + :File/directory filtering: + + This is supported by the -i and -x options. Excludes take precedence + over includes. + + When filtering out a subdirectory (or file), the new stream uses the + subdirectory (or subdirectory containing the file) as the root. As + fast-import doesn't know in advance whether a path is a file or + directory in the stream, you need to specify a trailing '/' on + directories passed to the `--includes option`. If multiple files or + directories are given, the new root is the deepest common directory. + + Note: If a path has been renamed, take care to specify the *original* + path name, not the final name that it ends up with. + + :User mapping: + + Some source repositories store just the user name while Bazaar + prefers a full email address. You can adjust user-ids + by using the --user-map option. The argument is a + text file with lines in the format:: + + old-id = new-id + + Blank lines and lines beginning with # are ignored. + If old-id has the special value '@', then users without an + email address will get one created by using the matching new-id + as the domain, unless a more explicit address is given for them. + For example, given the user-map of:: + + @ = example.com + bill = William Jones <bill@example.com> + + then user-ids are mapped as follows:: + + maria => maria <maria@example.com> + bill => William Jones <bill@example.com> + + .. note:: + + User mapping is supported by both the fast-import and + fast-import-filter commands. + + :Examples: + + Create a new project from a library (note the trailing / on the + directory name of the library):: + + front-end | bzr fast-import-filter -i lib/xxx/ > xxx.fi + bzr fast-import xxx.fi mylibrary.bzr + (lib/xxx/foo is now foo) + + Create a new repository without a sensitive file:: + + front-end | bzr fast-import-filter -x missile-codes.txt > clean.fi + bzr fast-import clean.fi clean.bzr + """ + hidden = False + _see_also = ['fast-import'] + takes_args = ['source?'] + takes_options = ['verbose', + ListOption('include_paths', short_name='i', type=str, + help="Only include commits affecting these paths." + " Directories should have a trailing /." + ), + ListOption('exclude_paths', short_name='x', type=str, + help="Exclude these paths from commits." + ), + Option('user-map', type=str, + help="Path to file containing a map of user-ids.", + ), + ] + encoding_type = 'exact' + def run(self, source=None, verbose=False, include_paths=None, + exclude_paths=None, user_map=None): + load_fastimport() + from fastimport.processors import filter_processor + params = { + 'include_paths': include_paths, + 'exclude_paths': exclude_paths, + } + from fastimport import parser + stream = _get_source_stream(source) + user_mapper = _get_user_mapper(user_map) + proc = filter_processor.FilterProcessor(params=params, verbose=verbose) + p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper) + return proc.process(p.iter_commands) + + +class cmd_fast_import_info(Command): + """Output information about a fast-import stream. + + This command reads a fast-import stream and outputs + statistics and interesting properties about what it finds. + When run in verbose mode, the information is output as a + configuration file that can be passed to fast-import to + assist it in intelligently caching objects. + + To specify standard input as the input stream, use a source name + of '-'. If the source name ends in '.gz', it is assumed to be + compressed in gzip format. + + :Examples: + + Display statistics about the import stream produced by front-end:: + + front-end | bzr fast-import-info - + + Create a hints file for running fast-import on a large repository:: + + front-end | bzr fast-import-info -v - > front-end.cfg + """ + hidden = False + _see_also = ['fast-import'] + takes_args = ['source'] + takes_options = ['verbose'] + def run(self, source, verbose=False): + load_fastimport() + from fastimport.processors import info_processor + return _run(source, info_processor.InfoProcessor, {}, verbose) + + +class cmd_fast_import_query(Command): + """Query a fast-import stream displaying selected commands. + + To specify standard input as the input stream, use a source name + of '-'. If the source name ends in '.gz', it is assumed to be + compressed in gzip format. + + To specify a commit to display, give its mark using the + --commit-mark option. The commit will be displayed with + file-commands included but with inline blobs hidden. + + To specify the commands to display, use the -C option one or + more times. To specify just some fields for a command, use the + syntax:: + + command=field1,... + + By default, the nominated fields for the nominated commands + are displayed tab separated. To see the information in + a name:value format, use verbose mode. + + Note: Binary fields (e.g. data for blobs) are masked out + so it is generally safe to view the output in a terminal. + + :Examples: + + Show the commit with mark 429:: + + bzr fast-import-query xxx.fi -m429 + + Show all the fields of the reset and tag commands:: + + bzr fast-import-query xxx.fi -Creset -Ctag + + Show the mark and merge fields of the commit commands:: + + bzr fast-import-query xxx.fi -Ccommit=mark,merge + """ + hidden = True + _see_also = ['fast-import', 'fast-import-filter'] + takes_args = ['source'] + takes_options = ['verbose', + Option('commit-mark', short_name='m', type=str, + help="Mark of the commit to display." + ), + ListOption('commands', short_name='C', type=str, + help="Display fields for these commands." + ), + ] + def run(self, source, verbose=False, commands=None, commit_mark=None): + load_fastimport() + from fastimport.processors import query_processor + from bzrlib.plugins.fastimport import helpers + params = helpers.defines_to_dict(commands) or {} + if commit_mark: + params['commit-mark'] = commit_mark + return _run(source, query_processor.QueryProcessor, params, + verbose) + + +class cmd_fast_export(Command): + """Generate a fast-import stream from a Bazaar branch. + + This program generates a stream from a Bazaar branch in fast-import + format used by tools such as bzr fast-import, git-fast-import and + hg-fast-import. + + If no destination is given or the destination is '-', standard output + is used. Otherwise, the destination is the name of a file. If the + destination ends in '.gz', the output will be compressed into gzip + format. + + :Round-tripping: + + Recent versions of the fast-import specification support features + that allow effective round-tripping of many Bazaar branches. As + such, fast-exporting a branch and fast-importing the data produced + will create a new repository with equivalent history, i.e. + "bzr log -v -p --include-merges --forward" on the old branch and + new branch should produce similar, if not identical, results. + + .. note:: + + Be aware that the new repository may appear to have similar history + but internally it is quite different with new revision-ids and + file-ids assigned. As a consequence, the ability to easily merge + with branches based on the old repository is lost. Depending on your + reasons for producing a new repository, this may or may not be an + issue. + + :Interoperability: + + fast-export can use the following "extended features" to + produce a richer data stream: + + * *multiple-authors* - if a commit has multiple authors (as commonly + occurs in pair-programming), all authors will be included in the + output, not just the first author + + * *commit-properties* - custom metadata per commit that Bazaar stores + in revision properties (e.g. branch-nick and bugs fixed by this + change) will be included in the output. + + * *empty-directories* - directories, even the empty ones, will be + included in the output. + + To disable these features and produce output acceptable to git 1.6, + use the --plain option. To enable these features, use --no-plain. + Currently, --plain is the default but that will change in the near + future once the feature names and definitions are formally agreed + to by the broader fast-import developer community. + + :Examples: + + To produce data destined for import into Bazaar:: + + bzr fast-export --no-plain my-bzr-branch my.fi.gz + + To produce data destined for Git 1.6:: + + bzr fast-export --plain my-bzr-branch my.fi + + To import several unmerged but related branches into the same repository, + use the --{export,import}-marks options, and specify a name for the git + branch like this:: + + bzr fast-export --export-marks=marks.bzr project.dev | + GIT_DIR=project/.git git-fast-import --export-marks=marks.git + + bzr fast-export --import-marks=marks.bzr -b other project.other | + GIT_DIR=project/.git git-fast-import --import-marks=marks.git + + If you get a "Missing space after source" error from git-fast-import, + see the top of the commands.py module for a work-around. + """ + hidden = False + _see_also = ['fast-import', 'fast-import-filter'] + takes_args = ['source', 'destination?'] + takes_options = ['verbose', 'revision', + Option('git-branch', short_name='b', type=str, + argname='FILE', + help='Name of the git branch to create (default=master).' + ), + Option('checkpoint', type=int, argname='N', + help="Checkpoint every N revisions (default=10000)." + ), + Option('marks', type=str, argname='FILE', + help="Import marks from and export marks to file." + ), + Option('import-marks', type=str, argname='FILE', + help="Import marks from file." + ), + Option('export-marks', type=str, argname='FILE', + help="Export marks to file." + ), + Option('plain', + help="Exclude metadata to maximise interoperability." + ), + ] + encoding_type = 'exact' + def run(self, source, destination=None, verbose=False, + git_branch="master", checkpoint=10000, marks=None, + import_marks=None, export_marks=None, revision=None, + plain=True): + load_fastimport() + from bzrlib.plugins.fastimport import exporter + + if marks: + import_marks = export_marks = marks + exporter = exporter.BzrFastExporter(source, + destination=destination, + git_branch=git_branch, checkpoint=checkpoint, + import_marks_file=import_marks, export_marks_file=export_marks, + revision=revision, verbose=verbose, plain_format=plain) + return exporter.run() + + +class cmd_fast_export_from_cvs(Command): + """Generate a fast-import file from a CVS repository. + + Destination is a dump file, typically named xxx.fi where xxx is + the name of the project. If '-' is given, standard output is used. + + cvs2svn 2.3 or later must be installed as its cvs2bzr script is used + under the covers to do the export. + + The source must be the path on your filesystem to the part of the + repository you wish to convert. i.e. either that path or a parent + directory must contain a CVSROOT subdirectory. The path may point to + either the top of a repository or to a path within it. In the latter + case, only that project within the repository will be converted. + + .. note:: + Remote access to the repository is not sufficient - the path + must point into a copy of the repository itself. See + http://cvs2svn.tigris.org/faq.html#repoaccess for instructions + on how to clone a remote CVS repository locally. + + By default, the trunk, branches and tags are all exported. If you + only want the trunk, use the `--trunk-only` option. + + By default, filenames, log messages and author names are expected + to be encoded in ascii. Use the `--encoding` option to specify an + alternative. If multiple encodings are used, specify the option + multiple times. For a list of valid encoding names, see + http://docs.python.org/lib/standard-encodings.html. + + Windows users need to install GNU sort and use the `--sort` + option to specify its location. GNU sort can be downloaded from + http://unxutils.sourceforge.net/. + """ + hidden = False + _see_also = ['fast-import', 'fast-import-filter'] + takes_args = ['source', 'destination'] + takes_options = ['verbose', + Option('trunk-only', + help="Export just the trunk, ignoring tags and branches." + ), + ListOption('encoding', type=str, argname='CODEC', + help="Encoding used for filenames, commit messages " + "and author names if not ascii." + ), + Option('sort', type=str, argname='PATH', + help="GNU sort program location if not on the path." + ), + ] + encoding_type = 'exact' + def run(self, source, destination, verbose=False, trunk_only=False, + encoding=None, sort=None): + load_fastimport() + from bzrlib.plugins.fastimport.exporters import fast_export_from + custom = [] + if trunk_only: + custom.append("--trunk-only") + if encoding: + for enc in encoding: + custom.extend(['--encoding', enc]) + if sort: + custom.extend(['--sort', sort]) + fast_export_from(source, destination, 'cvs', verbose, custom) + + +class cmd_fast_export_from_darcs(Command): + """Generate a fast-import file from a Darcs repository. + + Destination is a dump file, typically named xxx.fi where xxx is + the name of the project. If '-' is given, standard output is used. + + Darcs 2.2 or later must be installed as various subcommands are + used to access the source repository. The source may be a network + URL but using a local URL is recommended for performance reasons. + """ + hidden = False + _see_also = ['fast-import', 'fast-import-filter'] + takes_args = ['source', 'destination'] + takes_options = ['verbose', + Option('encoding', type=str, argname='CODEC', + help="Encoding used for commit messages if not utf-8." + ), + ] + encoding_type = 'exact' + def run(self, source, destination, verbose=False, encoding=None): + from bzrlib.plugins.fastimport.exporters import fast_export_from + custom = None + if encoding is not None: + custom = ['--encoding', encoding] + fast_export_from(source, destination, 'darcs', verbose, custom) + + +class cmd_fast_export_from_hg(Command): + """Generate a fast-import file from a Mercurial repository. + + Destination is a dump file, typically named xxx.fi where xxx is + the name of the project. If '-' is given, standard output is used. + + Mercurial 1.2 or later must be installed as its libraries are used + to access the source repository. Given the APIs currently used, + the source repository must be a local file, not a network URL. + """ + hidden = False + _see_also = ['fast-import', 'fast-import-filter'] + takes_args = ['source', 'destination'] + takes_options = ['verbose'] + encoding_type = 'exact' + def run(self, source, destination, verbose=False): + load_fastimport() + from bzrlib.plugins.fastimport.exporters import fast_export_from + fast_export_from(source, destination, 'hg', verbose) + + +class cmd_fast_export_from_git(Command): + """Generate a fast-import file from a Git repository. + + Destination is a dump file, typically named xxx.fi where xxx is + the name of the project. If '-' is given, standard output is used. + + Git 1.6 or later must be installed as the git fast-export + subcommand is used under the covers to generate the stream. + The source must be a local directory. + + .. note:: + + Earlier versions of Git may also work fine but are + likely to receive less active support if problems arise. + """ + hidden = False + _see_also = ['fast-import', 'fast-import-filter'] + takes_args = ['source', 'destination'] + takes_options = ['verbose'] + encoding_type = 'exact' + def run(self, source, destination, verbose=False): + load_fastimport() + from bzrlib.plugins.fastimport.exporters import fast_export_from + fast_export_from(source, destination, 'git', verbose) + + +class cmd_fast_export_from_mtn(Command): + """Generate a fast-import file from a Monotone repository. + + Destination is a dump file, typically named xxx.fi where xxx is + the name of the project. If '-' is given, standard output is used. + + Monotone 0.43 or later must be installed as the mtn git_export + subcommand is used under the covers to generate the stream. + The source must be a local directory. + """ + hidden = False + _see_also = ['fast-import', 'fast-import-filter'] + takes_args = ['source', 'destination'] + takes_options = ['verbose'] + encoding_type = 'exact' + def run(self, source, destination, verbose=False): + load_fastimport() + from bzrlib.plugins.fastimport.exporters import fast_export_from + fast_export_from(source, destination, 'mtn', verbose) + + +class cmd_fast_export_from_p4(Command): + """Generate a fast-import file from a Perforce repository. + + Source is a Perforce depot path, e.g., //depot/project + + Destination is a dump file, typically named xxx.fi where xxx is + the name of the project. If '-' is given, standard output is used. + + bzrp4 must be installed as its p4_fast_export.py module is used under + the covers to do the export. bzrp4 can be downloaded from + https://launchpad.net/bzrp4/. + + The P4PORT environment variable must be set, and you must be logged + into the Perforce server. + + By default, only the HEAD changelist is exported. To export all + changelists, append '@all' to the source. To export a revision range, + append a comma-delimited pair of changelist numbers to the source, + e.g., '100,200'. + """ + hidden = False + _see_also = ['fast-import', 'fast-import-filter'] + takes_args = ['source', 'destination'] + takes_options = [] + encoding_type = 'exact' + def run(self, source, destination, verbose=False): + load_fastimport() + from bzrlib.plugins.fastimport.exporters import fast_export_from + custom = [] + fast_export_from(source, destination, 'p4', verbose, custom) + + +class cmd_fast_export_from_svn(Command): + """Generate a fast-import file from a Subversion repository. + + Destination is a dump file, typically named xxx.fi where xxx is + the name of the project. If '-' is given, standard output is used. + + Python-Subversion (Python bindings to the Subversion APIs) + 1.4 or later must be installed as this library is used to + access the source repository. The source may be a network URL + but using a local URL is recommended for performance reasons. + """ + hidden = False + _see_also = ['fast-import', 'fast-import-filter'] + takes_args = ['source', 'destination'] + takes_options = ['verbose', + Option('trunk-path', type=str, argname="STR", + help="Path in repo to /trunk.\n" + "May be `regex:/cvs/(trunk)/proj1/(.*)` in " + "which case the first group is used as the " + "branch name and the second group is used " + "to match files.", + ), + Option('branches-path', type=str, argname="STR", + help="Path in repo to /branches." + ), + Option('tags-path', type=str, argname="STR", + help="Path in repo to /tags." + ), + ] + encoding_type = 'exact' + def run(self, source, destination, verbose=False, trunk_path=None, + branches_path=None, tags_path=None): + load_fastimport() + from bzrlib.plugins.fastimport.exporters import fast_export_from + custom = [] + if trunk_path is not None: + custom.extend(['--trunk-path', trunk_path]) + if branches_path is not None: + custom.extend(['--branches-path', branches_path]) + if tags_path is not None: + custom.extend(['--tags-path', tags_path]) + fast_export_from(source, destination, 'svn', verbose, custom) diff --git a/commands.py b/commands.py deleted file mode 100644 index 7ae2f54..0000000 --- a/commands.py +++ /dev/null @@ -1,349 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Import command classes.""" - - -# There is a bug in git 1.5.4.3 and older by which unquoting a string consumes -# one extra character. Set this variable to True to work-around it. It only -# happens when renaming a file whose name contains spaces and/or quotes, and -# the symptom is: -# % git-fast-import -# fatal: Missing space after source: R "file 1.txt" file 2.txt -# http://git.kernel.org/?p=git/git.git;a=commit;h=c8744d6a8b27115503565041566d97c21e722584 -GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE = False - - -# Lists of command names -COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'progress', 'reset', 'tag'] -FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename', - 'filedeleteall'] - -# Bazaar file kinds -FILE_KIND = 'file' -SYMLINK_KIND = 'symlink' - - -class ImportCommand(object): - """Base class for import commands.""" - - def __init__(self, name): - self.name = name - # List of field names not to display - self._binary = [] - - def dump_str(self, names=None, child_lists=None, verbose=False): - """Dump fields as a string. - - :param names: the list of fields to include or - None for all public fields - :param child_lists: dictionary of child command names to - fields for that child command to include - :param verbose: if True, prefix each line with the command class and - display fields as a dictionary; if False, dump just the field - values with tabs between them - """ - interesting = {} - if names is None: - fields = [k for k in self.__dict__.keys() if not k.startswith('_')] - else: - fields = names - for field in fields: - value = self.__dict__.get(field) - if field in self._binary and value is not None: - value = '(...)' - interesting[field] = value - if verbose: - return "%s: %s" % (self.__class__.__name__, interesting) - else: - return "\t".join([repr(interesting[k]) for k in fields]) - - -class BlobCommand(ImportCommand): - - def __init__(self, mark, data, lineno=0): - ImportCommand.__init__(self, 'blob') - self.mark = mark - self.data = data - self.lineno = lineno - # Provide a unique id in case the mark is missing - if mark is None: - self.id = '@%d' % lineno - else: - self.id = ':' + mark - self._binary = ['data'] - - def __repr__(self): - if self.mark is None: - mark_line = "" - else: - mark_line = "\nmark :%s" % self.mark - return "blob%s\ndata %d\n%s" % (mark_line, len(self.data), self.data) - - -class CheckpointCommand(ImportCommand): - - def __init__(self): - ImportCommand.__init__(self, 'checkpoint') - - def __repr__(self): - return "checkpoint" - - -class CommitCommand(ImportCommand): - - def __init__(self, ref, mark, author, committer, message, from_, - merges, file_iter, lineno=0): - ImportCommand.__init__(self, 'commit') - self.ref = ref - self.mark = mark - self.author = author - self.committer = committer - self.message = message - self.from_ = from_ - self.merges = merges - self.file_iter = file_iter - self.lineno = lineno - self._binary = ['file_iter'] - # Provide a unique id in case the mark is missing - if mark is None: - self.id = '@%d' % lineno - else: - self.id = ':%s' % mark - - def __repr__(self): - if self.mark is None: - mark_line = "" - else: - mark_line = "\nmark :%s" % self.mark - if self.author is None: - author_line = "" - else: - author_line = "\nauthor %s" % format_who_when(self.author) - committer = "committer %s" % format_who_when(self.committer) - if self.message is None: - msg_section = "" - else: - msg = self.message.encode('utf8') - msg_section = "\ndata %d\n%s" % (len(msg), msg) - if self.from_ is None: - from_line = "" - else: - from_line = "\nfrom %s" % self.from_ - if self.merges is None: - merge_lines = "" - else: - merge_lines = "".join(["\nmerge %s" % (m,) - for m in self.merges]) - if self.file_iter is None: - filecommands = "" - else: - filecommands = "".join(["\n%r" % (c,) - for c in iter(self.file_iter)]) - return "commit %s%s%s\n%s%s%s%s%s" % (self.ref, mark_line, author_line, - committer, msg_section, from_line, merge_lines, filecommands) - - def dump_str(self, names=None, child_lists=None, verbose=False): - result = [ImportCommand.dump_str(self, names, verbose=verbose)] - for f in iter(self.file_iter): - if child_lists is None: - continue - try: - child_names = child_lists[f.name] - except KeyError: - continue - result.append("\t%s" % f.dump_str(child_names, verbose=verbose)) - return '\n'.join(result) - - -class ProgressCommand(ImportCommand): - - def __init__(self, message): - ImportCommand.__init__(self, 'progress') - self.message = message - - def __repr__(self): - return "progress %s" % (self.message,) - - -class ResetCommand(ImportCommand): - - def __init__(self, ref, from_): - ImportCommand.__init__(self, 'reset') - self.ref = ref - self.from_ = from_ - - def __repr__(self): - if self.from_ is None: - from_line = "" - else: - # According to git-fast-import(1), the extra LF is optional here; - # however, versions of git up to 1.5.4.3 had a bug by which the LF - # was needed. Always emit it, since it doesn't hurt and maintains - # compatibility with older versions. - # http://git.kernel.org/?p=git/git.git;a=commit;h=655e8515f279c01f525745d443f509f97cd805ab - from_line = "\nfrom %s\n" % self.from_ - return "reset %s%s" % (self.ref, from_line) - - -class TagCommand(ImportCommand): - - def __init__(self, id, from_, tagger, message): - ImportCommand.__init__(self, 'tag') - self.id = id - self.from_ = from_ - self.tagger = tagger - self.message = message - - def __repr__(self): - if self.from_ is None: - from_line = "" - else: - from_line = "\nfrom %s" % self.from_ - if self.tagger is None: - tagger_line = "" - else: - tagger_line = "\ntagger %s" % format_who_when(self.tagger) - if self.message is None: - msg_section = "" - else: - msg = self.message.encode('utf8') - msg_section = "\ndata %d\n%s" % (len(msg), msg) - return "tag %s%s%s%s" % (self.id, from_line, tagger_line, msg_section) - - -class FileCommand(ImportCommand): - """Base class for file commands.""" - pass - - -class FileModifyCommand(FileCommand): - - def __init__(self, path, kind, is_executable, dataref, data): - # Either dataref or data should be null - FileCommand.__init__(self, 'filemodify') - self.path = check_path(path) - self.kind = kind - self.is_executable = is_executable - self.dataref = dataref - self.data = data - self._binary = ['data'] - - def __repr__(self): - if self.kind == 'symlink': - mode = "120000" - elif self.is_executable: - mode = "755" - else: - mode = "644" - if self.dataref is None: - dataref = "inline" - datastr = "\ndata %d\n%s" % (len(self.data), self.data) - else: - dataref = "%s" % (self.dataref,) - datastr = "" - path = format_path(self.path) - return "M %s %s %s%s" % (mode, dataref, path, datastr) - - -class FileDeleteCommand(FileCommand): - - def __init__(self, path): - FileCommand.__init__(self, 'filedelete') - self.path = check_path(path) - - def __repr__(self): - return "D %s" % (format_path(self.path),) - - -class FileCopyCommand(FileCommand): - - def __init__(self, src_path, dest_path): - FileCommand.__init__(self, 'filecopy') - self.src_path = check_path(src_path) - self.dest_path = check_path(dest_path) - - def __repr__(self): - return "C %s %s" % ( - format_path(self.src_path, quote_spaces=True), - format_path(self.dest_path)) - - -class FileRenameCommand(FileCommand): - - def __init__(self, old_path, new_path): - FileCommand.__init__(self, 'filerename') - self.old_path = check_path(old_path) - self.new_path = check_path(new_path) - - def __repr__(self): - return "R %s %s" % ( - format_path(self.old_path, quote_spaces=True), - format_path(self.new_path)) - - -class FileDeleteAllCommand(FileCommand): - - def __init__(self): - FileCommand.__init__(self, 'filedeleteall') - - def __repr__(self): - return "deleteall" - - -def check_path(path): - """Check that a path is legal. - - :return: the path if all is OK - :raise ValueError: if the path is illegal - """ - if path is None or path == '': - raise ValueError("illegal path '%s'" % path) - return path - - -def format_path(p, quote_spaces=False): - """Format a path in utf8, quoting it if necessary.""" - if '\n' in p: - import re - p = re.sub('\n', '\\n', p) - quote = True - else: - quote = p[0] == '"' or (quote_spaces and ' ' in p) - if quote: - extra = GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE and ' ' or '' - p = '"%s"%s' % (p, extra) - return p.encode('utf8') - - -def format_who_when(fields): - """Format a tuple of name,email,secs-since-epoch,utc-offset-secs as a string.""" - offset = fields[3] - if offset < 0: - offset_sign = '-' - offset = abs(offset) - else: - offset_sign = '+' - offset_hours = offset / 3600 - offset_minutes = offset / 60 - offset_hours * 60 - offset_str = "%s%02d%02d" % (offset_sign, offset_hours, offset_minutes) - name = fields[0] - if name == '': - sep = '' - else: - sep = ' ' - result = "%s%s<%s> %d %s" % (name, sep, fields[1], fields[2], offset_str) - return result.encode('utf8') diff --git a/dates.py b/dates.py deleted file mode 100644 index 209d069..0000000 --- a/dates.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Date parsing routines. - -Each routine returns timestamp,timezone where - -* timestamp is seconds since epoch -* timezone is the offset from UTC in seconds. -""" - - -import time - -from bzrlib.plugins.fastimport import errors - - -def parse_raw(s, lineno=0): - """Parse a date from a raw string. - - The format must be exactly "seconds-since-epoch offset-utc". - See the spec for details. - """ - timestamp_str, timezone_str = s.split(' ', 1) - timestamp = float(timestamp_str) - timezone = _parse_tz(timezone_str, lineno) - return timestamp, timezone - - -def _parse_tz(tz, lineno): - """Parse a timezone specification in the [+|-]HHMM format. - - :return: the timezone offset in seconds. - """ - # from git_repository.py in bzr-git - if len(tz) != 5: - raise errors.InvalidTimezone(lineno, tz) - sign = {'+': +1, '-': -1}[tz[0]] - hours = int(tz[1:3]) - minutes = int(tz[3:]) - return sign * 60 * (60 * hours + minutes) - - -def parse_rfc2822(s, lineno=0): - """Parse a date from a rfc2822 string. - - See the spec for details. - """ - raise NotImplementedError(parse_rfc2822) - - -def parse_now(s, lineno=0): - """Parse a date from a string. - - The format must be exactly "now". - See the spec for details. - """ - return time.time(), 0 - - -# Lookup tabel of date parsing routines -DATE_PARSERS_BY_NAME = { - 'raw': parse_raw, - 'rfc2822': parse_rfc2822, - 'now': parse_now, - } diff --git a/errors.py b/errors.py deleted file mode 100644 index 02cc690..0000000 --- a/errors.py +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Exception classes for fastimport""" - -from bzrlib import errors as bzr_errors - - -# Prefix to messages to show location information -_LOCATION_FMT = "line %(lineno)d: " - - -class ImportError(bzr_errors.BzrError): - """The base exception class for all import processing exceptions.""" - - _fmt = "Unknown Import Error" - - -class ParsingError(ImportError): - """The base exception class for all import processing exceptions.""" - - _fmt = _LOCATION_FMT + "Unknown Import Parsing Error" - - def __init__(self, lineno): - ImportError.__init__(self) - self.lineno = lineno - - -class MissingBytes(ParsingError): - """Raised when EOF encountered while expecting to find more bytes.""" - - _fmt = (_LOCATION_FMT + "Unexpected EOF - expected %(expected)d bytes," - " found %(found)d") - - def __init__(self, lineno, expected, found): - ParsingError.__init__(self, lineno) - self.expected = expected - self.found = found - - -class MissingTerminator(ParsingError): - """Raised when EOF encountered while expecting to find a terminator.""" - - _fmt = (_LOCATION_FMT + - "Unexpected EOF - expected '%(terminator)s' terminator") - - def __init__(self, lineno, terminator): - ParsingError.__init__(self, lineno) - self.terminator = terminator - - -class InvalidCommand(ParsingError): - """Raised when an unknown command found.""" - - _fmt = (_LOCATION_FMT + "Invalid command '%(cmd)s'") - - def __init__(self, lineno, cmd): - ParsingError.__init__(self, lineno) - self.cmd = cmd - - -class MissingSection(ParsingError): - """Raised when a section is required in a command but not present.""" - - _fmt = (_LOCATION_FMT + "Command %(cmd)s is missing section %(section)s") - - def __init__(self, lineno, cmd, section): - ParsingError.__init__(self, lineno) - self.cmd = cmd - self.section = section - - -class BadFormat(ParsingError): - """Raised when a section is formatted incorrectly.""" - - _fmt = (_LOCATION_FMT + "Bad format for section %(section)s in " - "command %(cmd)s: found '%(text)s'") - - def __init__(self, lineno, cmd, section, text): - ParsingError.__init__(self, lineno) - self.cmd = cmd - self.section = section - self.text = text - - -class InvalidTimezone(ParsingError): - """Raised when converting a string timezone to a seconds offset.""" - - _fmt = (_LOCATION_FMT + - "Timezone %(timezone)r could not be converted.%(reason)s") - - def __init__(self, lineno, timezone, reason=None): - ParsingError.__init__(self, lineno) - self.timezone = timezone - if reason: - self.reason = ' ' + reason - else: - self.reason = '' - - -class UnknownDateFormat(ImportError): - """Raised when an unknown date format is given.""" - - _fmt = ("Unknown date format '%(format)s'") - - def __init__(self, format): - ImportError.__init__(self) - self.format = format - - -class MissingHandler(ImportError): - """Raised when a processor can't handle a command.""" - - _fmt = ("Missing handler for command %(cmd)s") - - def __init__(self, cmd): - ImportError.__init__(self) - self.cmd = cmd - - -class UnknownParameter(ImportError): - """Raised when an unknown parameter is passed to a processor.""" - - _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s") - - def __init__(self, param, knowns): - ImportError.__init__(self) - self.param = param - self.knowns = knowns - - -class BadRepositorySize(ImportError): - """Raised when the repository has an incorrect number of revisions.""" - - _fmt = ("Bad repository size - %(found)d revisions found, " - "%(expected)d expected") - - def __init__(self, expected, found): - ImportError.__init__(self) - self.expected = expected - self.found = found - - -class BadRestart(ImportError): - """Raised when the import stream and id-map do not match up.""" - - _fmt = ("Bad restart - attempted to skip commit %(commit_id)s " - "but matching revision-id is unknown") - - def __init__(self, commit_id): - ImportError.__init__(self) - self.commit_id = commit_id diff --git a/explorer/logos/cvs.png b/explorer/logos/cvs.png Binary files differnew file mode 100644 index 0000000..e279bdf --- /dev/null +++ b/explorer/logos/cvs.png diff --git a/explorer/logos/darcs.png b/explorer/logos/darcs.png Binary files differnew file mode 100644 index 0000000..ca9365f --- /dev/null +++ b/explorer/logos/darcs.png diff --git a/explorer/logos/git.png b/explorer/logos/git.png Binary files differnew file mode 100644 index 0000000..aae35a7 --- /dev/null +++ b/explorer/logos/git.png diff --git a/explorer/logos/mercurial.png b/explorer/logos/mercurial.png Binary files differnew file mode 100644 index 0000000..60effbc --- /dev/null +++ b/explorer/logos/mercurial.png diff --git a/explorer/logos/monotone.png b/explorer/logos/monotone.png Binary files differnew file mode 100644 index 0000000..16f1908 --- /dev/null +++ b/explorer/logos/monotone.png diff --git a/explorer/logos/perforce.png b/explorer/logos/perforce.png Binary files differnew file mode 100644 index 0000000..e62897c --- /dev/null +++ b/explorer/logos/perforce.png diff --git a/explorer/logos/subversion.png b/explorer/logos/subversion.png Binary files differnew file mode 100644 index 0000000..d28702a --- /dev/null +++ b/explorer/logos/subversion.png diff --git a/explorer/tools.xml b/explorer/tools.xml new file mode 100644 index 0000000..2386737 --- /dev/null +++ b/explorer/tools.xml @@ -0,0 +1,20 @@ +<folder title="Tools"> + <folder title="Migration Tools"> + <folder title="Export From" icon="actions/edit-redo"> + <tool action="qrun fast-export" icon="logos/bazaar" title="Bazaar" type="bzr" /> + <tool action="qrun fast-export-from-cvs" icon="logos/cvs" title="CVS" type="bzr" /> + <tool action="qrun fast-export-from-darcs" icon="logos/darcs" title="Darcs" type="bzr" /> + <tool action="qrun fast-export-from-git" icon="logos/git" title="Git" type="bzr" /> + <tool action="qrun fast-export-from-hg" icon="logos/mercurial" title="Mercurial" type="bzr" /> + <tool action="qrun fast-export-from-mtn" icon="logos/monotone" title="Monotone" type="bzr" /> + <tool action="qrun fast-export-from-p4" icon="logos/perforce" title="Perforce" type="bzr" /> + <tool action="qrun fast-export-from-svn" icon="logos/subversion" title="Subversion" type="bzr" /> + </folder> + <folder title="Import From" icon="actions/go-jump"> + <tool action="qrun fast-import" icon="mimetypes/text-x-generic-template" title="Fast Import Stream" type="bzr" /> + </folder> + <separator/> + <tool action="qrun fast-import-filter" icon="actions/media-playback-pause" title="Fast Import Filter" type="bzr" /> + </folder> +</folder> + diff --git a/bzr_exporter.py b/exporter.py index 16d942a..3f477d1 100755..100644 --- a/bzr_exporter.py +++ b/exporter.py @@ -35,65 +35,57 @@ import bzrlib.revision from bzrlib import ( builtins, errors as bazErrors, + osutils, progress, trace, ) -from bzrlib.plugins.fastimport import commands, helpers, marks_file +from bzrlib.plugins.fastimport import ( + helpers, + marks_file, + ) +from fastimport import commands +from fastimport.helpers import ( + binary_stream, + single_plural, + ) -# This is adapted from _linear_view_verisons in log.py in bzr 1.12. -def _iter_linear_revisions(branch, start_rev_id, end_rev_id): - """Calculate a sequence of revisions, newest to oldest. - :param start_rev_id: the lower revision-id - :param end_rev_id: the upper revision-id - :return: An iterator of revision_ids - :raises ValueError: if a start_rev_id is specified but - is not found walking the left-hand history - """ - br_revno, br_rev_id = branch.last_revision_info() - repo = branch.repository - if start_rev_id is None and end_rev_id is None: - for revision_id in repo.iter_reverse_revision_history(br_rev_id): - yield revision_id +def _get_output_stream(destination): + if destination is None or destination == '-': + return binary_stream(sys.stdout) + elif destination.endswith('gz'): + import gzip + return gzip.open(destination, 'wb') else: - if end_rev_id is None: - end_rev_id = br_rev_id - found_start = start_rev_id is None - for revision_id in repo.iter_reverse_revision_history(end_rev_id): - if not found_start and revision_id == start_rev_id: - yield revision_id - found_start = True - break - else: - yield revision_id - else: - if not found_start: - raise ValueError() + return open(destination, 'wb') class BzrFastExporter(object): def __init__(self, source, destination, git_branch=None, checkpoint=-1, import_marks_file=None, export_marks_file=None, revision=None, - verbose=False): + verbose=False, plain_format=False): + """Export branch data in fast import format. + + :param plain_format: if True, 'classic' fast-import format is + used without any extended features; if False, the generated + data is richer and includes information like multiple + authors, revision properties, etc. + """ self.source = source - if destination is None or destination == '-': - self.outf = helpers.binary_stream(sys.stdout) - elif destination.endswith('gz'): - import gzip - self.outf = gzip.open(destination, 'wb') - else: - self.outf = open(destination, 'wb') + self.outf = _get_output_stream(destination) self.git_branch = git_branch self.checkpoint = checkpoint self.import_marks_file = import_marks_file self.export_marks_file = export_marks_file self.revision = revision self.excluded_revisions = set() + self.plain_format = plain_format self._multi_author_api_available = hasattr(bzrlib.revision.Revision, 'get_apparent_authors') + self.properties_to_exclude = ['authors', 'author'] # Progress reporting stuff self.verbose = verbose @@ -102,6 +94,7 @@ class BzrFastExporter(object): else: self.progress_every = 1000 self._start_time = time.time() + self._commit_total = 0 # Load the marks and initialise things accordingly self.revid_to_mark = {} @@ -124,17 +117,15 @@ class BzrFastExporter(object): start_rev_id = None end_rev_id = None self.note("Calculating the revisions to include ...") - view_revisions = reversed(list(_iter_linear_revisions(self.branch, - start_rev_id, end_rev_id))) + view_revisions = reversed([rev_id for rev_id, _, _, _ in + self.branch.iter_merge_sorted_revisions(end_rev_id, start_rev_id)]) # If a starting point was given, we need to later check that we don't # start emitting revisions from before that point. Collect the # revisions to exclude now ... if start_rev_id is not None: - # The result is inclusive so skip the first (the oldest) one self.note("Calculating the revisions to exclude ...") - uninteresting = list(_iter_linear_revisions(self.branch, None, - start_rev_id))[1:] - self.excluded_revisions = set(uninteresting) + self.excluded_revisions = set([rev_id for rev_id, _, _, _ in + self.branch.iter_merge_sorted_revisions(start_rev_id)]) return list(view_revisions) def run(self): @@ -144,7 +135,13 @@ class BzrFastExporter(object): # Export the data self.branch.repository.lock_read() try: - for revid in self.interesting_history(): + interesting = self.interesting_history() + self._commit_total = len(interesting) + self.note("Starting export of %d revisions ..." % + self._commit_total) + if not self.plain_format: + self.emit_features() + for revid in interesting: self.emit_commit(revid, self.git_branch) if self.branch.supports_tags(): self.emit_tags() @@ -171,10 +168,11 @@ class BzrFastExporter(object): return time.strftime("%H:%M:%S") def report_progress(self, commit_count, details=''): - # Note: we can't easily give a total count here because we - # don't know how many merged revisions will need to be output if commit_count and commit_count % self.progress_every == 0: - counts = "%d" % (commit_count,) + if self._commit_total: + counts = "%d/%d" % (commit_count, self._commit_total) + else: + counts = "%d" % (commit_count,) minutes = (time.time() - self._start_time) / 60 rate = commit_count * 1.0 / minutes if rate > 10: @@ -187,7 +185,7 @@ class BzrFastExporter(object): time_required = progress.str_tdelta(time.time() - self._start_time) rc = len(self.revid_to_mark) self.note("Exported %d %s in %s", - rc, helpers.single_plural(rc, "revision", "revisions"), + rc, single_plural(rc, "revision", "revisions"), time_required) def print_cmd(self, cmd): @@ -200,7 +198,7 @@ class BzrFastExporter(object): def is_empty_dir(self, tree, path): path_id = tree.path2id(path) - if path_id == None: + if path_id is None: self.warning("Skipping empty_dir detection - no file_id for %s" % (path,)) return False @@ -216,6 +214,10 @@ class BzrFastExporter(object): else: return False + def emit_features(self): + for feature in sorted(commands.FEATURE_NAMES): + self.print_cmd(commands.FeatureCommand(feature)) + def emit_commit(self, revid, git_branch): if revid in self.revid_to_mark or revid in self.excluded_revisions: return @@ -228,14 +230,13 @@ class BzrFastExporter(object): self.revid_to_mark[revid] = -1 return - # Emit parents - nparents = len(revobj.parent_ids) - if nparents: - for parent in revobj.parent_ids: - self.emit_commit(parent, git_branch) - # Get the primary parent + # TODO: Consider the excluded revisions when deciding the parents. + # Currently, a commit with parents that are excluded ought to be + # triggering the git_branch calculation below (and it is not). + # IGC 20090824 ncommits = len(self.revid_to_mark) + nparents = len(revobj.parent_ids) if nparents == 0: if ncommits: # This is a parentless commit but it's not the first one @@ -264,36 +265,55 @@ class BzrFastExporter(object): self._save_marks() self.print_cmd(commands.CheckpointCommand()) - def _get_commit_command(self, git_ref, mark, revobj, file_cmds): - # Get the committer and author info - committer = revobj.committer - if committer.find('<') == -1: + def _get_name_email(self, user): + if user.find('<') == -1: # If the email isn't inside <>, we need to use it as the name # in order for things to round-trip correctly. # (note: parseaddr('a@b.com') => name:'', email: 'a@b.com') - name = committer + name = user email = '' else: - name, email = parseaddr(committer) + name, email = parseaddr(user) + return name, email + + def _get_commit_command(self, git_ref, mark, revobj, file_cmds): + # Get the committer and author info + committer = revobj.committer + name, email = self._get_name_email(committer) committer_info = (name, email, revobj.timestamp, revobj.timezone) if self._multi_author_api_available: - author = revobj.get_apparent_authors()[0] + more_authors = revobj.get_apparent_authors() + author = more_authors.pop(0) else: + more_authors = [] author = revobj.get_apparent_author() - if author != committer: - name, email = parseaddr(author) + if not self.plain_format and more_authors: + name, email = self._get_name_email(author) author_info = (name, email, revobj.timestamp, revobj.timezone) + more_author_info = [] + for a in more_authors: + name, email = self._get_name_email(a) + more_author_info.append( + (name, email, revobj.timestamp, revobj.timezone)) + elif author != committer: + name, email = self._get_name_email(author) + author_info = (name, email, revobj.timestamp, revobj.timezone) + more_author_info = None else: author_info = None + more_author_info = None # Get the parents in terms of marks non_ghost_parents = [] for p in revobj.parent_ids: if p in self.excluded_revisions: continue - parent_mark = self.revid_to_mark[p] - if parent_mark != -1: + try: + parent_mark = self.revid_to_mark[p] non_ghost_parents.append(":%s" % parent_mark) + except KeyError: + # ghost - ignore + continue if non_ghost_parents: from_ = non_ghost_parents[0] merges = non_ghost_parents[1:] @@ -301,9 +321,23 @@ class BzrFastExporter(object): from_ = None merges = None + # Filter the revision properties. Some metadata (like the + # author information) is already exposed in other ways so + # don't repeat it here. + if self.plain_format: + properties = None + else: + properties = revobj.properties + for prop in self.properties_to_exclude: + try: + del properties[prop] + except KeyError: + pass + # Build and return the result return commands.CommitCommand(git_ref, mark, author_info, - committer_info, revobj.message, from_, merges, iter(file_cmds)) + committer_info, revobj.message, from_, merges, iter(file_cmds), + more_authors=more_author_info, properties=properties) def _get_revision_trees(self, parent, revision_id): try: @@ -351,15 +385,21 @@ class BzrFastExporter(object): for path, id_, kind in changes.added + my_modified + rd_modifies: if kind == 'file': text = tree_new.get_file_text(id_) - file_cmds.append(commands.FileModifyCommand(path, 'file', - tree_new.is_executable(id_), None, text)) + file_cmds.append(commands.FileModifyCommand(path, + helpers.kind_to_mode('file', tree_new.is_executable(id_)), + None, text)) elif kind == 'symlink': - file_cmds.append(commands.FileModifyCommand(path, 'symlink', - False, None, tree_new.get_symlink_target(id_))) + file_cmds.append(commands.FileModifyCommand(path, + helpers.kind_to_mode('symlink', False), + None, tree_new.get_symlink_target(id_))) + elif kind == 'directory': + if not self.plain_format: + file_cmds.append(commands.FileModifyCommand(path, + helpers.kind_to_mode('directory', False), + None, None)) else: - # Should we do something here for importers that - # can handle directory and tree-reference changes? - continue + self.warning("cannot export '%s' of kind %s yet - ignoring" % + (path, kind)) return file_cmds def _process_renames_and_deletes(self, renames, deletes, @@ -388,11 +428,15 @@ class BzrFastExporter(object): # Instead, we need to make multiple passes over the various lists to # get the ordering right. + must_be_renamed = {} + old_to_new = {} deleted_paths = set([p for p, _, _ in deletes]) for (oldpath, newpath, id_, kind, text_modified, meta_modified) in renames: + emit = kind != 'directory' or not self.plain_format if newpath in deleted_paths: - file_cmds.append(commands.FileDeleteCommand(newpath)) + if emit: + file_cmds.append(commands.FileDeleteCommand(newpath)) deleted_paths.remove(newpath) if (self.is_empty_dir(tree_old, oldpath)): self.note("Skipping empty dir %s in rev %s" % (oldpath, @@ -401,14 +445,40 @@ class BzrFastExporter(object): #oldpath = self._adjust_path_for_renames(oldpath, renamed, # revision_id) renamed.append([oldpath, newpath]) - file_cmds.append(commands.FileRenameCommand(oldpath, newpath)) + old_to_new[oldpath] = newpath + if emit: + file_cmds.append(commands.FileRenameCommand(oldpath, newpath)) if text_modified or meta_modified: modifies.append((newpath, id_, kind)) + # Renaming a directory implies all children must be renamed. + # Note: changes_from() doesn't handle this + if kind == 'directory': + for p, e in tree_old.inventory.iter_entries_by_dir(from_dir=id_): + if e.kind == 'directory' and self.plain_format: + continue + old_child_path = osutils.pathjoin(oldpath, p) + new_child_path = osutils.pathjoin(newpath, p) + must_be_renamed[old_child_path] = new_child_path + + # Add children not already renamed + if must_be_renamed: + renamed_already = set(old_to_new.keys()) + still_to_be_renamed = set(must_be_renamed.keys()) - renamed_already + for old_child_path in sorted(still_to_be_renamed): + new_child_path = must_be_renamed[old_child_path] + if self.verbose: + self.note("implicitly renaming %s => %s" % (old_child_path, + new_child_path)) + file_cmds.append(commands.FileRenameCommand(old_child_path, + new_child_path)) + # Record remaining deletes for path, id_, kind in deletes: if path not in deleted_paths: continue + if kind == 'directory' and self.plain_format: + continue #path = self._adjust_path_for_renames(path, renamed, revision_id) file_cmds.append(commands.FileDeleteCommand(path)) return file_cmds, modifies, renamed @@ -435,7 +505,7 @@ class BzrFastExporter(object): self.warning('not creating tag %r pointing to non-existent ' 'revision %s' % (tag, revid)) else: - git_ref = 'refs/tags/%s' % tag + git_ref = 'refs/tags/%s' % tag.encode("utf-8") self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark))) def _next_tmp_branch_name(self): diff --git a/exporters/Makefile b/exporters/Makefile index 8a4efd9..2b71211 100644 --- a/exporters/Makefile +++ b/exporters/Makefile @@ -1,7 +1,8 @@ -SVN ?= /usr/local/svn -APR_INCLUDES ?= /usr/include/apr-1.0 -CFLAGS += -I${APR_INCLUDES} -I${SVN}/include/subversion-1 -pipe -O2 -std=c99 +SVN ?= /usr +CFLAGS += -I${SVN}/include/subversion-1 -pipe -O2 -std=c99 +CFLAGS += `pkg-config --cflags apr-1` LDFLAGS += -L${SVN}/lib -lsvn_fs-1 -lsvn_repos-1 +LDFLAGS += `pkg-config --libs apr-1` all: svn-fast-export svn-archive diff --git a/exporters/__init__.py b/exporters/__init__.py index 2d7b135..6d282d6 100644 --- a/exporters/__init__.py +++ b/exporters/__init__.py @@ -218,11 +218,14 @@ class MercurialExporter(_Exporter): class GitExporter(_Exporter): def __init__(self): - self.check_install('Git', '1.6', ['git']) + self.cmd_name = "git" + if sys.platform == 'win32': + self.cmd_name = "git.cmd" + self.check_install('Git', '1.6', [self.cmd_name]) def generate(self, source, destination, verbose=False, custom=None): """Generate a fast import stream. See _Exporter.generate() for details.""" - args = ["git", "fast-export", "--all", "--signed-tags=warn"] + args = [self.cmd_name, "fast-export", "--all", "--signed-tags=warn"] outf, base, marks = self.get_output_info(destination) if marks: marks = os.path.abspath(marks) @@ -241,11 +244,11 @@ class GitExporter(_Exporter): class MonotoneExporter(_Exporter): def __init__(self): - self.check_install('Monotone', '0.43', ['mnt']) + self.check_install('Monotone', '0.43', ['mtn']) def generate(self, source, destination, verbose=False, custom=None): """Generate a fast import stream. See _Exporter.generate() for details.""" - args = ["mnt", "git_export"] + args = ["mtn", "git_export"] outf, base, marks = self.get_output_info(destination) if marks: marks = os.path.abspath(marks) @@ -277,7 +280,7 @@ class PerforceExporter(_Exporter): retcode = p4_fast_export.main([source]) finally: sys.stdout = original_stdout - self.report_results(retcode, destination) + self.report_results(retcode, destination) class SubversionExporter(_Exporter): @@ -308,7 +311,7 @@ def fast_export_from(source, destination, tool, verbose=False, custom=None): factory = MercurialExporter elif tool == 'git': factory = GitExporter - elif tool == 'mnt': + elif tool == 'mtn': factory = MonotoneExporter elif tool == 'p4': factory = PerforceExporter diff --git a/exporters/darcs/README b/exporters/darcs/README index 4b13e3b..3fc9449 100644 --- a/exporters/darcs/README +++ b/exporters/darcs/README @@ -23,13 +23,6 @@ Independent:: Ideally it should work with any fast importer, but actually it has been tested with git fast-import, bzr fast-import and hg fastimport. (These are the three fast-import implementations available ATM.) - + - hg fastimport needs three patches. While they are not in the upstream, - you can get it from my repository using -+ ----- -$ hg clone static-http://frugalware.org/~vmiklos/hg/hg-fastimport ----- Formats:: It supports the 'darcs-2', 'hashed', and 'old-fashioned-inventory' darcs @@ -127,16 +120,18 @@ supported by fastimport-0.6 is hg-1.0.x. Mercurial (Hg) version: ---- $ hg version -Mercurial Distributed SCM (version 1.2.1) +Mercurial Distributed SCM (version 1.3) ---- -Strictly speaking this document is a wrong place to talk about enabling -hg plugins. However... +Strictly speaking this document is a wrong place to talk about +configuring hg fastimport. However... you will need something like: ---- -$ cat ~/.hgrc -[extensions] -hgext.fastimport= +$ hg clone http://vc.gerg.ca/hg/pyfastimport +$ hg clone http://vc.gerg.ca/hg/hg-fastimport +$ sudo ln -s /path/to/pyfastimport/fastimport /usr/lib/python2.6/site-packages/fastimport +$ sudo ln -s /path/to/hg-fastimport/hgfastimport /usr/lib/python2.6/site-packages/hgfastimport +echo -e "[extensions]\nfastimport = /usr/lib/python2.6/site-packages/hgfastimport" > ~/.hgrc ---- and once you installed the plugin correctly, you should have something like: diff --git a/exporters/darcs/TODO b/exporters/darcs/TODO index 2f199d1..c6892c8 100644 --- a/exporters/darcs/TODO +++ b/exporters/darcs/TODO @@ -4,3 +4,5 @@ not enabled, etc. parse the patches manually so we can avoid re-adding existing files manually. avoid darcs apply. + +import: handle evil merges (git-subtree), maybe using git log --first-parent diff --git a/exporters/darcs/d2x b/exporters/darcs/d2x index 79e18a3..959cc00 100755 --- a/exporters/darcs/d2x +++ b/exporters/darcs/d2x @@ -93,7 +93,7 @@ if [ ! -f $dmark ]; then hg) hg init darcs-fast-export $* $origin | \ - hg fastimport /dev/stdin + hg fastimport - esac else case $format in diff --git a/exporters/darcs/darcs-fast-export b/exporters/darcs/darcs-fast-export index d94618e..fa850de 100755 --- a/exporters/darcs/darcs-fast-export +++ b/exporters/darcs/darcs-fast-export @@ -4,7 +4,7 @@ darcs-fast-export - darcs backend for fast data importers - Copyright (c) 2008 Miklos Vajna <vmiklos@frugalware.org> + Copyright (c) 2008, 2009 Miklos Vajna <vmiklos@frugalware.org> Copyright (c) 2008 Matthias Andree <matthias.andree@gmx.de> This program is free software; you can redistribute it and/or modify @@ -29,314 +29,352 @@ import os import sys import gzip import time +import calendar import shutil import subprocess import optparse import re +import urllib +import urllib2 +import StringIO sys = reload(sys) sys.setdefaultencoding("utf-8") -def __get_zone(): - now = time.localtime() - if time.daylight and now[-1]: - offset = time.altzone - else: - offset = time.timezone - hours, minutes = divmod(abs(offset), 3600) - if offset > 0: - sign = "-" - else: - sign = "+" - return sign, hours, minutes - -def get_zone_str(): - sign, hours, minutes = __get_zone() - return "%s%02d%02d" % (sign, hours, minutes // 60) - -def get_zone_int(): - sign, hours, minutes = __get_zone() - ret = hours*3600+minutes*60 - if sign == "-": - ret *= -1 - return ret - -def get_patchname(patch): - ret = [] - s = "" - if patch.attributes['inverted'].value == 'True': - s = "UNDO: " - ret.append(s + patch.getElementsByTagName("name")[0].childNodes[0].data) - lines = patch.getElementsByTagName("comment") - if lines: - for i in lines[0].childNodes[0].data.split('\n'): - if not i.startswith("Ignore-this: "): - ret.append(i) - return "\n".join(ret).encode('utf-8') - -def get_author(patch): - """darcs allows any freeform string, but fast-import has a more - strict format, so fix up broken author names here.""" - - author = patch.attributes['author'].value - if author in authormap: - author = authormap[author] - if not len(author): - author = "darcs-fast-export <darcs-fast-export>" - # add missing name - elif not ">" in author: - author = "%s <%s>" % (author.split('@')[0], author) - # avoid double quoting - elif author[0] == '"' and author[-1] == '"': - author = author[1:-1] - # name after email - elif author[-1] != '>': - author = author[author.index('>')+2:] + ' ' + author[:author.index('>')+1] - return author.encode('utf-8') - -def get_date(patch): - try: - date = time.strptime(patch, "%Y%m%d%H%M%S") - except ValueError: - date = time.strptime(patch[:19] + patch[-5:], '%a %b %d %H:%M:%S %Y') - return int(time.mktime(date)) + get_zone_int() - -def progress(s): - print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s) - sys.stdout.flush() - -def log(s): - logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)) - logsock.flush() - -hashes = [] -def parse_inventory(sock=None): - prev = None - nextprev = False - buf = [] - if not sock: - sock = open(os.path.join("_darcs", "hashed_inventory")) - for i in sock.readlines(): - if i.startswith("hash"): - buf.insert(0, i[6:-1]) - if i.startswith("Starting with inventory:"): - nextprev = True - elif nextprev: - prev = i[:-1] - nextprev = False - sock.close() - for i in buf: - hashes.insert(0, i) - if prev: - sock = gzip.open(os.path.join("_darcs", "inventories", prev)) - parse_inventory(sock) - -# Option Parser -usage="%prog [options] darcsrepo" -opp = optparse.OptionParser(usage=usage) -opp.add_option("--import-marks", metavar="IFILE", - help="read state for incremental imports from IFILE") -opp.add_option("--export-marks", metavar="OFILE", - help="write state for incremental imports from OFILE") -opp.add_option("--encoding", - help="encoding of log [default: %default], if unspecified and input isn't utf-8, guess") -opp.add_option("--authors-file", metavar="F", - help="read author transformations in old=new format from F") -opp.add_option("--working", metavar="W", - help="working directory which is removed at the end of non-incremental conversions") -opp.add_option("--logfile", metavar="L", - help="log file which contains the output of external programs invoked during the conversion") -opp.add_option("--git-branch", metavar="B", - help="git branch [default: refs/heads/master]") -opp.add_option("--progress", metavar="P", - help="insert progress statements after every n commit [default: 100]") -(options, args) = opp.parse_args() -if len(args) < 1: - opp.error("darcsrepo required") - -export_marks = [] -import_marks = [] -if options.import_marks: - sock = open(options.import_marks) - for i in sock.readlines(): - line = i.strip() - if not len(line): - continue - import_marks.append(line.split(' ')[1]) - export_marks.append(line) - sock.close() - -# read author mapping file in gitauthors format, -# i. e. in=out (one per # line) -authormap = {} -if options.authors_file: - sock = open(options.authors_file) - authormap = dict([i.strip().split('=',1) for i in sock]) - sock.close() - -origin = os.path.abspath(args[0]) -if options.working: - working = os.path.abspath(options.working) -else: - working = "%s.darcs" % origin -patchfile = "%s.patch" % origin -if options.logfile: - logfile = os.path.abspath(options.logfile) -else: - logfile = "%s.log" % origin -logsock = open(logfile, "a") -if options.git_branch: - git_branch = options.git_branch -else: - git_branch = "refs/heads/master" - -if options.progress: - prognum = int(options.progress) -else: - prognum = 100 - -progress("getting list of patches") -if not len(import_marks): - sock = os.popen("darcs changes --xml --reverse --repo %s" % origin) -else: - sock = os.popen("darcs changes --xml --reverse --repo %s --from-match 'hash %s'" % (origin, import_marks[-1])) -buf = sock.read() -sock.close() -# this is hackish. we need to escape some bad chars, otherwise the xml -# will not be valid -buf = buf.replace('\x1b', '^[') -if options.encoding: - xmldoc = xml.dom.minidom.parseString(unicode(buf, options.encoding).encode('utf-8')) -else: - try: - xmldoc = xml.dom.minidom.parseString(buf) - except xml.parsers.expat.ExpatError: +class Handler: + def __init__(self): + self.hashes = [] + self.authormap = {} + self.export_marks = [] + self.import_marks = [] + + def get_patchname(self, patch): + ret = [] + s = "" + if patch.attributes['inverted'].value == 'True': + s = "UNDO: " + cs = patch.getElementsByTagName("name")[0].childNodes + if cs.length > 0: + ret.append(s + cs[0].data) + lines = patch.getElementsByTagName("comment") + if lines: + for i in lines[0].childNodes[0].data.split('\n'): + if not i.startswith("Ignore-this: "): + ret.append(i) + return "\n".join(ret).encode('utf-8') + + def get_author(self, patch): + """darcs allows any freeform string, but fast-import has a more + strict format, so fix up broken author names here.""" + + author = patch.attributes['author'].value + if author in self.authormap: + author = self.authormap[author] + if not len(author): + author = "darcs-fast-export <darcs-fast-export>" + # add missing name + elif not ">" in author: + author = "%s <%s>" % (author.split('@')[0], author) + # avoid double quoting + elif author[0] == '"' and author[-1] == '"': + author = author[1:-1] + # name after email + elif author[-1] != '>': + author = author[author.index('>')+2:] + ' ' + author[:author.index('>')+1] + return author.encode('utf-8') + + def get_date(self, patch): try: - import chardet - except ImportError: - sys.exit("Error, encoding is not utf-8. Please " + - "either specify it with the --encoding " + - "option or install chardet.") - progress("encoding is not utf8, guessing charset") - encoding = chardet.detect(buf)['encoding'] - progress("detected encoding is %s" % encoding) - xmldoc = xml.dom.minidom.parseString(unicode(buf, encoding).encode('utf-8')) -sys.stdout.flush() - -darcs2 = False -oldfashionedpatch = True -cwd = os.getcwd() -if os.path.exists(os.path.join(origin, "_darcs", "format")): - sock = open(os.path.join(origin, "_darcs", "format")) - format = [x.strip() for x in sock] - sock.close() - darcs2 = 'darcs-2' in format - oldfashionedpatch = not 'hashed' in format -if not oldfashionedpatch: - progress("parsing the inventory") - os.chdir(origin) - parse_inventory() -if not options.import_marks or not os.path.exists(working): - # init the tmp darcs repo - os.mkdir(working) - os.chdir(working) - if darcs2: - os.system("darcs init --darcs-2") - else: - os.system("darcs init --old-fashioned-inventory") -else: - os.chdir(working) -if options.import_marks: - sock = os.popen("darcs pull -a --match 'hash %s' %s" % (import_marks[-1], origin)) - log("Building/updating working directory:\n%s" % sock.read()) - sock.close() - -# this is the number of the NEXT patch -count = 1 -patches = xmldoc.getElementsByTagName('patch') -if len(import_marks): - patches = patches[1:] - count = len(import_marks) + 1 -if len(export_marks): - # this is the mark number of the NEXT patch - markcount = int(export_marks[-1].split(' ')[0][1:]) + 1 -else: - markcount = count -# this may be huge and we need it many times -patchnum = len(patches) - -if not len(import_marks): - progress("starting export, repo has %d patches" % patchnum) -else: - progress("continuing export, %d patches to convert" % patchnum) -paths = [] -for i in patches: - # apply the patch - hash = i.attributes['hash'].value - buf = ["\nNew patches:\n"] - if oldfashionedpatch: - sock = gzip.open(os.path.join(origin, "_darcs", "patches", hash)) - else: - sock = gzip.open(os.path.join(origin, "_darcs", "patches", hashes[count-1])) - buf.append(sock.read()) - sock.close() - sock = os.popen("darcs changes --context") - buf.append(sock.read()) - sock.close() - sock = subprocess.Popen(["darcs", "apply", "--allow-conflicts"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) - sock.stdin.write("".join(buf)) - sock.stdin.close() - log("Applying %s:\n%s" % (hash, sock.stdout.read())) - sock.stdout.close() - message = get_patchname(i) - # export the commit - print "commit %s" % git_branch - print "mark :%s" % markcount - if options.export_marks: - export_marks.append(":%s %s" % (markcount, hash)) - date = get_date(i.attributes['date'].value) - print "committer %s %s %s" % (get_author(i), date, get_zone_str()) - print "data %d\n%s" % (len(message), message) - if markcount > 1: - print "from :%s" % (markcount-1) - # export the files - for j in paths: - print "D %s" % j - paths = [] - for (root, dirs, files) in os.walk ("."): - for f in files: - j = os.path.normpath(os.path.join(root, f)) - if j.startswith("_darcs") or "-darcs-backup" in j: - continue - paths.append(j) - sock = open(j) - buf = sock.read() + date = time.strptime(patch, "%Y%m%d%H%M%S") + except ValueError: + date = time.strptime(patch[:19] + patch[-5:], '%a %b %d %H:%M:%S %Y') + return calendar.timegm(date) + + def progress(self, s): + print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s) + sys.stdout.flush() + + def log(self, s): + self.logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)) + self.logsock.flush() + + def parse_inventory(self, sock=None): + prev = None + nextprev = False + buf = [] + if not sock: + sock = self.open(os.path.join(self.origin, "_darcs", "hashed_inventory")) + for i in sock.readlines(): + if i.startswith("hash"): + buf.insert(0, i[6:-1]) + if i.startswith("Starting with inventory:"): + nextprev = True + elif nextprev: + prev = i[:-1] + nextprev = False + sock.close() + for i in buf: + self.hashes.insert(0, i) + if prev: + sock = self.gzip_open(os.path.join(self.origin, "_darcs", "inventories", prev)) + self.parse_inventory(sock) + + # this is like gzip.open but supports urls as well + def gzip_open(self, path): + if os.path.exists(path): + return gzip.open(path) + buf = urllib.urlopen(path).read() + sock = StringIO.StringIO(buf) + return gzip.GzipFile(fileobj=sock) + + # this is like os.path.exists but supports urls as well + def path_exists(self, path): + if os.path.exists(path): + return True + else: + try: + urllib2.urlopen(urllib2.Request(path)) + return True + except urllib2.HTTPError, e: + return False + + # this is like open, but supports urls as well + def open(self, path): + if os.path.exists(path): + return open(path) + else: + return urllib.urlopen(path) + + def handle_opts(self): + # Option Parser + usage="%prog [options] darcsrepo" + opp = optparse.OptionParser(usage=usage) + opp.add_option("--import-marks", metavar="IFILE", + help="read state for incremental imports from IFILE") + opp.add_option("--export-marks", metavar="OFILE", + help="write state for incremental imports from OFILE") + opp.add_option("--encoding", + help="encoding of log [default: %default], if unspecified and input isn't utf-8, guess") + opp.add_option("--authors-file", metavar="F", + help="read author transformations in old=new format from F") + opp.add_option("--working", metavar="W", + help="working directory which is removed at the end of non-incremental conversions") + opp.add_option("--logfile", metavar="L", + help="log file which contains the output of external programs invoked during the conversion") + opp.add_option("--git-branch", metavar="B", + help="git branch [default: refs/heads/master]") + opp.add_option("--progress", metavar="P", + help="insert progress statements after every n commit [default: 100]") + (self.options, self.args) = opp.parse_args() + if len(self.args) < 1: + opp.error("darcsrepo required") + + # read author mapping file in gitauthors format, + # i. e. in=out (one per # line) + if self.options.authors_file: + sock = open(self.options.authors_file) + self.authormap = dict([i.strip().split('=',1) for i in sock]) sock.close() - # darcs does not track the executable bit :/ - print "M 644 inline %s" % j - print "data %s\n%s" % (len(buf), buf) - if message[:4] == "TAG ": - tag = re.sub('[^\xe9-\xf8\w.\-]+', '_', message[4:].strip().split('\n')[0]).strip('_') - print "tag %s" % tag - print "from :%s" % markcount - print "tagger %s %s %s" % (get_author(i), date, get_zone_str()) - print "data %d\n%s" % (len(message), message) - if count % prognum == 0: - progress("%d/%d patches" % (count, patchnum)) - count += 1 - markcount += 1 -os.chdir(cwd) + if "://" not in self.args[0]: + self.origin = os.path.abspath(self.args[0]) + else: + self.origin = self.args[0].strip('/') + if self.options.working: + self.working = os.path.abspath(self.options.working) + else: + if "://" not in self.origin: + self.working = "%s.darcs" % self.origin + else: + self.working = "%s.darcs" % os.path.split(self.origin)[-1] + if self.options.logfile: + logfile = os.path.abspath(self.options.logfile) + else: + if "://" not in self.origin: + logfile = "%s.log" % self.origin + else: + logfile = "%s.log" % os.path.split(self.origin)[-1] + self.logsock = open(logfile, "a") + if self.options.git_branch: + self.git_branch = self.options.git_branch + else: + self.git_branch = "refs/heads/master" + + if self.options.progress: + self.prognum = int(self.options.progress) + else: + self.prognum = 100 + + def handle_import_marks(self): + if self.options.import_marks: + sock = open(self.options.import_marks) + for i in sock.readlines(): + line = i.strip() + if not len(line): + continue + self.import_marks.append(line.split(' ')[1]) + self.export_marks.append(line) + sock.close() + + def get_patches(self): + self.progress("getting list of patches") + if not len(self.import_marks): + sock = os.popen("darcs changes --xml --reverse --repo %s" % self.origin) + else: + sock = os.popen("darcs changes --xml --reverse --repo %s --from-match 'hash %s'" % (self.origin, self.import_marks[-1])) + buf = sock.read() + sock.close() + # this is hackish. we need to escape some bad chars, otherwise the xml + # will not be valid + buf = buf.replace('\x1b', '^[') + if self.options.encoding: + xmldoc = xml.dom.minidom.parseString(unicode(buf, self.options.encoding).encode('utf-8')) + else: + try: + xmldoc = xml.dom.minidom.parseString(buf) + except xml.parsers.expat.ExpatError: + try: + import chardet + except ImportError: + sys.exit("Error, encoding is not utf-8. Please " + + "either specify it with the --encoding " + + "option or install chardet.") + self.progress("encoding is not utf8, guessing charset") + encoding = chardet.detect(buf)['encoding'] + self.progress("detected encoding is %s" % encoding) + xmldoc = xml.dom.minidom.parseString(unicode(buf, encoding).encode('utf-8')) + sys.stdout.flush() + return xmldoc.getElementsByTagName('patch') + + def setup_workdir(self): + darcs2 = False + self.oldfashionedpatch = True + self.cwd = os.getcwd() + if self.path_exists(os.path.join(self.origin, "_darcs", "format")): + sock = self.open(os.path.join(self.origin, "_darcs", "format")) + format = [x.strip() for x in sock] + sock.close() + darcs2 = 'darcs-2' in format + self.oldfashionedpatch = not 'hashed' in format + if not self.oldfashionedpatch: + self.progress("parsing the inventory") + if "://" not in self.origin: + os.chdir(self.origin) + self.parse_inventory() + if not self.options.import_marks or not os.path.exists(self.working): + # init the tmp darcs repo + os.mkdir(self.working) + os.chdir(self.working) + if darcs2: + os.system("darcs init --darcs-2") + else: + os.system("darcs init --old-fashioned-inventory") + else: + os.chdir(self.working) + if self.options.import_marks: + sock = os.popen("darcs pull -a --match 'hash %s' %s" % (self.import_marks[-1], self.origin)) + self.log("Building/updating working directory:\n%s" % sock.read()) + sock.close() + + def export_patches(self): + patches = self.get_patches() + # this is the number of the NEXT patch + count = 1 + if len(self.import_marks): + patches = patches[1:] + count = len(self.import_marks) + 1 + if len(self.export_marks): + # this is the mark number of the NEXT patch + markcount = int(self.export_marks[-1].split(' ')[0][1:]) + 1 + else: + markcount = count + # this may be huge and we need it many times + patchnum = len(patches) + + if not len(self.import_marks): + self.progress("starting export, repo has %d patches" % patchnum) + else: + self.progress("continuing export, %d patches to convert" % patchnum) + paths = [] + for i in patches: + # apply the patch + hash = i.attributes['hash'].value + buf = ["\nNew patches:\n"] + if self.oldfashionedpatch: + sock = self.gzip_open(os.path.join(self.origin, "_darcs", "patches", hash)) + else: + sock = self.gzip_open(os.path.join(self.origin, "_darcs", "patches", self.hashes[count-1])) + buf.append(sock.read()) + sock.close() + sock = os.popen("darcs changes --context") + buf.append(sock.read()) + sock.close() + sock = subprocess.Popen(["darcs", "apply", "--allow-conflicts"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) + sock.stdin.write("".join(buf)) + sock.stdin.close() + self.log("Applying %s:\n%s" % (hash, sock.stdout.read())) + sock.stdout.close() + message = self.get_patchname(i) + # export the commit + print "commit %s" % self.git_branch + print "mark :%s" % markcount + if self.options.export_marks: + self.export_marks.append(":%s %s" % (markcount, hash)) + date = self.get_date(i.attributes['date'].value) + print "committer %s %s +0000" % (self.get_author(i), date) + print "data %d\n%s" % (len(message), message) + if markcount > 1: + print "from :%s" % (markcount-1) + # export the files + for j in paths: + print "D %s" % j + paths = [] + for (root, dirs, files) in os.walk ("."): + for f in files: + j = os.path.normpath(os.path.join(root, f)) + if j.startswith("_darcs") or "-darcs-backup" in j: + continue + paths.append(j) + sock = open(j) + buf = sock.read() + sock.close() + # darcs does not track the executable bit :/ + print "M 644 inline %s" % j + print "data %s\n%s" % (len(buf), buf) + if message[:4] == "TAG ": + tag = re.sub('[^\xe9-\xf8\w.\-]+', '_', message[4:].strip().split('\n')[0]).strip('_') + print "tag %s" % tag + print "from :%s" % markcount + print "tagger %s %s +0000" % (self.get_author(i), date) + print "data %d\n%s" % (len(message), message) + if count % self.prognum == 0: + self.progress("%d/%d patches" % (count, patchnum)) + count += 1 + markcount += 1 + + os.chdir(self.cwd) + + if not self.options.export_marks: + shutil.rmtree(self.working) + self.logsock.close() + + def handle_export_marks(self): + if self.options.export_marks: + self.progress("writing export marks") + sock = open(self.options.export_marks, 'w') + sock.write("\n".join(self.export_marks)) + sock.write("\n") + sock.close() -if not options.export_marks: - shutil.rmtree(working) -logsock.close() + self.progress("finished") -if options.export_marks: - progress("writing export marks") - sock = open(options.export_marks, 'w') - sock.write("\n".join(export_marks)) - sock.write("\n") - sock.close() + def handle(self): + self.handle_opts() + self.handle_import_marks() + self.setup_workdir() + self.export_patches() + self.handle_export_marks() -progress("finished") +if __name__ == "__main__": + h = Handler() + h.handle() diff --git a/exporters/darcs/darcs-fast-export.txt b/exporters/darcs/darcs-fast-export.txt index 3ddd02e..d404ecf 100644 --- a/exporters/darcs/darcs-fast-export.txt +++ b/exporters/darcs/darcs-fast-export.txt @@ -18,6 +18,10 @@ The script can produce the fast-import stream format from the darcs repository. It supports incremental conversion as well, via the --import-marks / --export-marks switches. +Optionally the darcsrepo string may be a HTTP repository, in that case +only the patches are downloaded, not the pristine, speeding up a +one-time import. + == OPTIONS -h, --help:: diff --git a/exporters/darcs/darcs-fast-import b/exporters/darcs/darcs-fast-import index 2955164..69ec7bb 100755 --- a/exporters/darcs/darcs-fast-import +++ b/exporters/darcs/darcs-fast-import @@ -4,7 +4,7 @@ darcs-fast-export - darcs backend for fast data exporters - Copyright (c) 2008 Miklos Vajna <vmiklos@frugalware.org> + Copyright (c) 2008, 2009, 2010 Miklos Vajna <vmiklos@frugalware.org> Copyright (c) 2008 Matthias Andree <matthias.andree@gmx.de> This program is free software; you can redistribute it and/or modify @@ -81,6 +81,11 @@ class Handler: raise Exception(s) def get_date(self, ts, tz): + # first fix the case when tz is higher than +1200, as + # darcs won't accept it + if int(tz[:3]) > 12: + ts = str(int(ts) + 60*60*24) + tz = str(int(tz[:3])-24) + tz[3:] # int(ts) is seconds since epoch. Since we're trying to # capture both the absolute time of the commit and the # localtime in the timezone of the committer, we need to turn @@ -99,6 +104,13 @@ class Handler: items = s.split(' ') return " ".join(items[:-1]) + " " + tz + " " + items[-1] + def invoke_darcs(self, cmdline): + if os.system("darcs %s" % cmdline) != 0: + self.bug("darcs failed") + + def invoke_add(self, path): + self.invoke_darcs("add --boring --case-ok %s" % path) + def handle_mark(self): if self.line.startswith("mark :"): self.mark_num = int(self.line[6:-1]) @@ -143,6 +155,8 @@ class Handler: sock.stdin.close() self.log("Tagging %s:\n%s" % (version, sock.stdout.read())) sock.stdout.close() + if sock.wait() != 0: + self.bug("darcs tag failed: '%s'" % sock.returncode) def handle_commit(self): if not self.prevfiles and self.options.import_marks: @@ -156,6 +170,7 @@ class Handler: self.files.append(path) self.prevfiles = self.files[:] adds = [] + symlinks = [] self.read_next_line() self.handle_mark() @@ -173,32 +188,47 @@ class Handler: self.read_next_line() while self.line.startswith("merge "): self.read_next_line() + change = False while len(self.line) > 0: if self.line.startswith("deleteall"): path = self.line[2:-1] for path in self.files: os.unlink(path) self.files = [] + change = True elif self.line.startswith("D "): path = self.line[2:-1] if os.path.exists(path): os.unlink(path) if path in self.files: self.files.remove(path) + change = True elif self.line.startswith("R "): - os.system("darcs mv %s" % self.line[2:]) + self.invoke_darcs("mv %s" % self.line[2:]) + change = True elif self.line.startswith("C "): src, dest = self.line[:-1].split(' ')[1:] shutil.copy(src.strip('"'), dest.strip('"')) - os.system("darcs add %s" % dest) + self.invoke_add(dest) + change = True elif self.line.startswith("M "): items = self.line.split(' ') path = items[3][:-1] + dir = os.path.split(path)[0] + if len(dir) and not os.path.exists(dir): + os.makedirs(dir) + if items[1] == "120000": + if not self.options.symhack: + print "Adding symbolic links (symlinks) is not supported by Darcs." + sys.exit(2) + idx = int(items[2][1:]) # TODO: handle inline symlinks + symlinks.append((self.marks[idx], path)) + self.read_next_line() + continue sock = open(path, "w") if items[2] != "inline": idx = int(items[2][1:]) sock.write(self.marks[idx]) - del self.marks[idx] else: self.read_next_line() self.handle_data() @@ -208,6 +238,7 @@ class Handler: adds.append(path) if path not in self.files: self.files.append(path) + change = True else: self.unread_line = True break @@ -215,15 +246,35 @@ class Handler: if not len(self.line): break + if not change: + # darcs does not support empty commits + return for i in adds: - os.system("darcs add %s" % i) - sock = subprocess.Popen(["darcs", "record", "--ignore-times", "-a", "--pipe"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) - buf = [self.date, self.ident, self.short, self.long] - sock.stdin.write("\n".join(buf)) + self.invoke_add(i) + args = ["darcs", "record", "--ignore-times", "-a", "--pipe"] + buf = [self.date, self.ident] + if not len(self.short): + args.extend(['-m', '']) + else: + buf.extend([self.short, self.long]) + sock = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + sock.stdin.write("\n".join(buf)+"\n") sock.stdin.close() self.log("Recording :%s:\n%s" % (self.mark_num, sock.stdout.read())) sock.stdout.close() - + if sock.wait() != 0: + self.bug("darcs record failed: '%s'" % sock.returncode) + + for src, path in symlinks: + # symlink does not do what we want if path is + # already there + if os.path.exists(path): + # rmtree() does not work on symlinks + if os.path.islink(path): + os.remove(path) + else: + shutil.rmtree(path) + os.symlink(src, path) if self.options.export_marks: # yeah, an xml parser would be better, but # should we mess with encodings just because of @@ -235,19 +286,24 @@ class Handler: self.export_marks.append(":%s %s" % (self.mark_num, hash)) def handle_progress(self, s): - print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s.strip()) + print "import progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s.strip()) sys.stdout.flush() def handle_opts(self): # Option Parser usage="%prog [options]" opp = optparse.OptionParser(usage=usage) + opp.set_defaults(symhack=False) opp.add_option("--import-marks", metavar="IFILE", help="read state for incremental imports from IFILE") opp.add_option("--export-marks", metavar="OFILE", help="write state for incremental imports to OFILE") opp.add_option("--logfile", metavar="L", help="log file which contains the output of external programs invoked during the conversion") + opp.add_option("--symhack", action="store_true", dest="symhack", + help="Do not error out when a symlink would be created, just create it in the workdir") + opp.add_option("--progress", metavar="P", + help="insert progress statements after every n commit [default: 100]") (self.options, args) = opp.parse_args() if self.options.logfile: @@ -255,6 +311,11 @@ class Handler: else: logfile = "_darcs/import.log" self.logsock = open(os.path.abspath(logfile), "a") + + if self.options.progress: + self.prognum = int(self.options.progress) + else: + self.prognum = 0 def log(self, s): self.logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)) @@ -282,6 +343,7 @@ class Handler: self.handle_opts() self.handle_import_marks() + commitcount = 0 while not self.eof: self.read_next_line() if not len(self.line[:-1]): @@ -290,6 +352,9 @@ class Handler: self.handle_blob() elif self.line.startswith("commit"): self.handle_commit() + commitcount += 1 + if self.prognum != 0 and commitcount % self.prognum == 0: + self.handle_progress("%d patches" % commitcount) elif self.line.startswith("tag"): self.handle_tag() elif self.line.startswith("reset"): diff --git a/exporters/darcs/darcs-fast-import.txt b/exporters/darcs/darcs-fast-import.txt index 09c7b1e..a7f2a12 100644 --- a/exporters/darcs/darcs-fast-import.txt +++ b/exporters/darcs/darcs-fast-import.txt @@ -33,3 +33,25 @@ as well, via the --import-marks / --export-marks switches. --logfile:: The output of external commands are redirected to a log file. You can specify the path of that file with this parameter. + +--symhack:: + Enable hack for symbolic links. darcs add does not handle them + but in case they are just added, we can create them in the working + directory. This can be handy in case for example the symbolic link is in + a subdirectory of the project and you don't even care about that + subdirectory. So the hack can be useful, but be extremely careful when + you use it. + +--progress=<n>:: + Insert progress statements after every <n> created patches. The + default is not to print anything as progress info is usually provided by + the exporter. Use this option in case the exporter does not have such a + switch but you still want to get some feedback. + +== EXIT CODES + +The exit code is: + +* 0 on success +* 1 on unhandled exception +* 2 in case the stream would try to let the importer create a symlink diff --git a/exporters/darcs/git-darcs b/exporters/darcs/git-darcs index eb70338..18455a2 100755 --- a/exporters/darcs/git-darcs +++ b/exporters/darcs/git-darcs @@ -2,7 +2,7 @@ # # git-darcs - bidirectional operation between a darcs repo and git # -# Copyright (c) 2008 by Miklos Vajna <vmiklos@frugalware.org> +# Copyright (c) 2008, 2010 by Miklos Vajna <vmiklos@frugalware.org> # # Based on git-bzr, which is # @@ -32,24 +32,25 @@ add() shift if ! [ -n "$name" -a -n "$location" ]; then echo "Usage: git darcs add name location [darcs-fast-export options]" - exit + return 1 fi if git remote show |grep -q $name; then echo "There is already a remote with that name" - exit + return 1 fi if [ -n "$(git config git-darcs.$name.location)" ]; then echo "There is already a darcs repo with that name" - exit + return 1 fi - if [ ! -d $location/_darcs ]; then + repo=$location/_darcs + if [ ! -d $repo ] && ! wget --quiet --spider $repo; then echo "Remote is not a darcs repository" - exit + return 1 fi git config git-darcs.$name.location $location - git config git-darcs.$name.darcs-fast-export-options "$*" echo "Darcs repo $name added. You can fetch it with 'git darcs fetch $name'" if ! [ -z "$*" ]; then + git config git-darcs.$name.darcs-fast-export-options "$*" echo "darcs-fast-export will get options: $*" fi } @@ -59,7 +60,7 @@ get_location() l=$(git config git-darcs.$remote.location) if [ -z "$l" ]; then echo "Cannot find darcs remote with name '$remote'." >&2 - exit + return 1 fi echo $l } @@ -70,13 +71,17 @@ fetch() shift if ! [ -n "$remote" -a -z "$*" ]; then echo "Usage: git darcs fetch reponame" - exit + return 1 fi - location=$(get_location $remote) + location=$(get_location $remote) || return $? git_map=$git_dir/darcs-git/$remote-git-map darcs_map=$git_dir/darcs-git/$remote-darcs-map - common_opts="--working $git_dir/darcs-git/repo --logfile $git_dir/darcs-git/fetch.log --git-branch=darcs/$remote" + common_opts="--working $git_dir/darcs-git/repo --logfile $git_dir/darcs-git/fetch.log --git-branch=refs/remotes/darcs/$remote" dfe_opts=$(git config git-darcs.$remote.darcs-fast-export-options) + pre_fetch="$(git config git-darcs.$remote.pre-fetch)" + if [ -n "$pre_fetch" ]; then + $pre_fetch + fi if [ ! -f $git_map -a ! -f $darcs_map ]; then echo "There doesn't seem to be an existing refmap." echo "Doing an initial import" @@ -85,20 +90,24 @@ fetch() git fast-import --export-marks=$git_map elif [ -f $git_map -a -f $darcs_map ]; then echo "Updating remote $remote" - old_rev=$(git rev-parse darcs/$remote) + old_rev=$(git rev-parse refs/remotes/darcs/$remote) darcs-fast-export --import-marks=$darcs_map --export-marks=$darcs_map $common_opts $dfe_opts $location | \ git fast-import --quiet --import-marks=$git_map --export-marks=$git_map - new_rev=$(git rev-parse darcs/$remote) + new_rev=$(git rev-parse refs/remotes/darcs/$remote) if [ "$old_rev" != "$new_rev" ]; then echo "Fetched the following updates:" git shortlog $old_rev..$new_rev else echo "Nothing fetched." - exit + return 0 fi else echo "One of the mapfiles is missing! Something went wrong!" - exit + return 1 + fi + post_fetch="$(git config git-darcs.$remote.post-fetch)" + if [ -n "$post_fetch" ]; then + $post_fetch fi } @@ -108,15 +117,15 @@ pull() shift if ! [ -n "$remote" -a -z "$*" ]; then echo "Usage: git darcs pull reponame" - exit + return 1 fi - fetch $remote + fetch $remote || return $? # see if we need to merge or rebase branch=$(git symbolic-ref HEAD|sed 's|.*/||') if [ "$(git config branch.$branch.rebase)" = "true" ]; then - git rebase darcs/$remote + git rebase refs/remotes/darcs/$remote else - git merge darcs/$remote + git merge refs/remotes/darcs/$remote fi } @@ -126,30 +135,38 @@ push() shift if ! [ -n "$remote" -a -z "$*" ]; then echo "Usage: git darcs push reponame" - exit + return 1 fi - location=$(get_location $remote) - if [ -n "$(git rev-list --left-right HEAD...darcs/$remote | sed -n '/^>/ p')" ]; then + location=$(get_location $remote) || return $? + if [ -n "$(git rev-list --left-right HEAD...refs/remotes/darcs/$remote | sed -n '/^>/ p')" ]; then echo "HEAD is not a strict child of $remote, cannot push. Merge first" - exit + return 1 fi - if [ -z "$(git rev-list --left-right HEAD...darcs/$remote | sed -n '/^</ p')" ]; then + if [ -z "$(git rev-list --left-right HEAD...refs/remotes/darcs/$remote | sed -n '/^</ p')" ]; then echo "Nothing to push. Commit something first" - exit + return 1 fi git_map=$git_dir/darcs-git/$remote-git-map darcs_map=$git_dir/darcs-git/$remote-darcs-map if [ ! -f $git_map -o ! -f $darcs_map ]; then echo "We do not have refmapping yet. Then how can I push?" - exit + return 1 + fi + pre_push="$(git config git-darcs.$remote.pre-push)" + if [ -n "$pre_push" ]; then + $pre_push fi echo "Pushing the following updates:" - git shortlog darcs/$remote.. + git shortlog refs/remotes/darcs/$remote.. git fast-export --import-marks=$git_map --export-marks=$git_map HEAD | \ (cd $location; darcs-fast-import --import-marks=$darcs_map --export-marks=$darcs_map \ --logfile $git_dir/darcs-git/push.log) if [ $? == 0 ]; then - git update-ref darcs/$remote HEAD + git update-ref refs/remotes/darcs/$remote HEAD + post_push="$(git config git-darcs.$remote.post-push)" + if [ -n "$post_push" ]; then + $post_push + fi fi } @@ -159,18 +176,18 @@ list() if [ -z "$*" ] then git config -l | sed -n -e '/git-darcs\..*/ {s/git-darcs\.//; s/\.location=.*//p}' - exit + return 0 elif [ "$#" -eq 1 ] then case $1 in -v|--verbose) git config -l | sed -n -e '/git-darcs\..*/ {s/git-darcs\.//; s/\.location=/\t/p}' - exit + return 0 ;; esac fi echo "Usage: git darcs list [-v|--verbose]" - exit 1 + return 1 } # Find the darcs commit(s) supporting a git SHA1 prefix @@ -181,9 +198,9 @@ find_darcs() if [ -z "$sha1" -o -n "$*" ] then echo "Usage: git darcs find-darcs <sha1-prefix>" - exit 1 + return 1 fi - for remote in $git_dir/darcs/* + for remote in $(git for-each-ref --format='%(refname)' refs/remotes/darcs) do remote=`basename $remote` git_map=$git_dir/darcs-git/$remote-git-map @@ -191,7 +208,7 @@ find_darcs() if [ ! -f $git_map -o ! -f $darcs_map ] then echo "Missing mappings for remote $remote" - exit 1 + return 1 fi for row in `sed -n -e "/:.* $sha1.*/ s/[^ ]*/&/p" $git_map` do @@ -208,9 +225,9 @@ find_git() if [ -z "$patch" -o -n "$*" ] then echo "Usage: git darcs find-git <patch-prefix>" - exit 1 + return 1 fi - for remote in $git_dir/darcs/* + for remote in $(git for-each-ref --format='%(refname)' refs/remotes/darcs) do remote=`basename $remote` git_map=$git_dir/darcs-git/$remote-git-map @@ -218,7 +235,7 @@ find_git() if [ ! -f $git_map -o ! -f $darcs_map ] then echo "Missing mappings for remote $remote" - exit 1 + return 1 fi for row in `sed -n -e "/:.* $patch.*/ s/[^ ]*/&/p" $darcs_map` do @@ -230,7 +247,7 @@ find_git() git rev-parse 2> /dev/null if [ $? != 0 ]; then echo "Must be inside a git repository to work" - exit + exit 1 fi git_dir=$(git rev-parse --git-dir) @@ -253,7 +270,7 @@ case $command in *) echo "Usage: git darcs [COMMAND] [OPTIONS]" echo "Commands: add, push, fetch, pull, list, find-darcs, find-git" - exit + exit 1 ;; esac diff --git a/exporters/darcs/git-darcs.txt b/exporters/darcs/git-darcs.txt index 7558329..8bf5b33 100644 --- a/exporters/darcs/git-darcs.txt +++ b/exporters/darcs/git-darcs.txt @@ -20,7 +20,7 @@ A typical workflow is: $ mkdir git-repo $ cd git-repo $ git init -$ git darcs add upstream ../darcs-repo +$ git darcs add upstream /path/to/darcs-repo $ git darcs pull upstream ... hack, hack, hack ... @@ -70,3 +70,23 @@ find-darcs:: find-git:: Searches for git commits matching a darcs patch prefix. The syntax is `find-git <patch-prefix>`. + +== HOOKS + +It's possible to automatically run before and after the fetch and the +push subcommand. For example if you want to automatically run `darcs +pull -a` before a `git darcs fetch upstream`: + +---- +git config git-darcs.upstream.pre-fetch "darcs pull -a --repodir=/path/to/darcs-repo" +---- + +Or in case you want to automatically `darcs send` all patches after a +`git darcs push upstream`: + +---- +git config git-darcs.upstream.post-push "darcs send -a --repodir=/path/to/darcs-repo" +---- + +== SEE-ALSO +*git*(1), *darcs*(1) diff --git a/exporters/darcs/t/lib-httpd.sh b/exporters/darcs/t/lib-httpd.sh new file mode 100644 index 0000000..fad953e --- /dev/null +++ b/exporters/darcs/t/lib-httpd.sh @@ -0,0 +1,67 @@ +#!/bin/sh +# +# This is based on git's t/lib-httpd.sh, which is +# Copyright (c) 2008 Clemens Buchacher <drizzd@aon.at> +# + +if test -n "$DFE_TEST_SKIP_HTTPD" +then + echo "skipping test (undef DFE_TEST_SKIP_HTTPD to enable)" + exit +fi + +LIB_HTTPD_PATH=${LIB_HTTPD_PATH-'/usr/sbin/httpd'} +LIB_HTTPD_PORT=${LIB_HTTPD_PORT-'8111'} + +HTTPD_ROOT_PATH="$PWD"/httpd +HTTPD_DOCUMENT_ROOT_PATH=$HTTPD_ROOT_PATH/www + +if ! test -x "$LIB_HTTPD_PATH" +then + echo "skipping test, no web server found at '$LIB_HTTPD_PATH'" + exit +fi + +HTTPD_VERSION=`$LIB_HTTPD_PATH -v | \ + sed -n 's/^Server version: Apache\/\([0-9]*\)\..*$/\1/p; q'` + +if test -n "$HTTPD_VERSION" +then + if test -z "$LIB_HTTPD_MODULE_PATH" + then + if ! test $HTTPD_VERSION -ge 2 + then + echo "skipping test, at least Apache version 2 is required" + exit + fi + + LIB_HTTPD_MODULE_PATH='/usr/lib/apache' + fi +else + error "Could not identify web server at '$LIB_HTTPD_PATH'" +fi + +HTTPD_PARA="-d $HTTPD_ROOT_PATH -f $HTTPD_ROOT_PATH/apache.conf" + +prepare_httpd() { + mkdir -p $HTTPD_DOCUMENT_ROOT_PATH + + ln -s $LIB_HTTPD_MODULE_PATH $HTTPD_ROOT_PATH/modules + + echo "PidFile httpd.pid" > $HTTPD_ROOT_PATH/apache.conf + echo "DocumentRoot www" >> $HTTPD_ROOT_PATH/apache.conf + echo "ErrorLog error.log" >> $HTTPD_ROOT_PATH/apache.conf + + HTTPD_URL=http://127.0.0.1:$LIB_HTTPD_PORT +} + +start_httpd() { + prepare_httpd + + "$LIB_HTTPD_PATH" $HTTPD_PARA \ + -c "Listen 127.0.0.1:$LIB_HTTPD_PORT" -k start +} + +stop_httpd() { + "$LIB_HTTPD_PATH" $HTTPD_PARA -k stop +} diff --git a/exporters/darcs/t/lib.sh b/exporters/darcs/t/lib.sh index 3df0a8a..7d2218a 100644 --- a/exporters/darcs/t/lib.sh +++ b/exporters/darcs/t/lib.sh @@ -78,6 +78,10 @@ third line" _drrec -a -m "remove and rename" darcs mv a b _drrecamend + echo c > c + darcs add c + # empty commit message + _drrec -a -m "" cd .. } @@ -150,6 +154,7 @@ create_hg() hg pull ../$1.tmp hg merge echo D > file + hg resolve -m file echo "first line second line third line" | hg commit -l /dev/stdin @@ -172,6 +177,12 @@ third line" | hg commit -l /dev/stdin hg commit -m "add empty file" hg rm file3 hg commit -m "remove file" + mkdir subdir + echo test > subdir/file + hg add subdir/file + hg commit -m "add subdir file" + echo test2 > subdir/file + hg commit -m "commit with weird date" -d "Fri Apr 03 12:38:26 2009 +1300" cd .. } create_git() @@ -180,6 +191,7 @@ create_git() mkdir -p $1 cd $1 git init $2 + git commit --allow-empty -m 'root commit' echo A > file git add file git commit -a -m A @@ -213,6 +225,23 @@ third line" | git commit -a -F - git commit -a -m "add empty file" rm file3 git commit -a -m "remove file" + # now add back 'file' with its old conents, so the mark gets + # reused + echo f > file + git add file + git commit -a -m "file: other -> f" + # this is a boring file for Darcs + touch foo.pyc + git add foo.pyc + git commit -a -m "boring file" + # replace an uppercase file to a lowercase one + echo SPAM > SPAM + git add SPAM + git commit -a -m SPAM + rm SPAM + echo spam > spam + git add spam + git commit -a -m "SPAM -> spam" cd .. } @@ -226,12 +255,14 @@ diff_git() diff_importgit() { + test -z "`(cd $1.darcs; darcs diff)`" && diff --exclude _darcs --exclude .git --exclude '*-darcs-backup*' -Nur $1 $1.darcs return $? } diff_importhg() { + cd $1.darcs && test -z "`darcs diff 2>&1`" && cd .. && diff --exclude _darcs --exclude .hg --exclude '*-darcs-backup*' --exclude 'hg-export.*' \ --exclude '.hgtags' --exclude '*.orig' -Nur $1 $1.darcs return $? @@ -239,12 +270,14 @@ diff_importhg() diff_importdarcs() { + cd $1.importdarcs && test -z "`darcs diff 2>&1`" && cd .. && diff --exclude _darcs --exclude '*-darcs-backup*' -Nur $1 $2 return $? } diff_importbzr() { + cd $1.darcs && test -z "`darcs diff 2>&1`" && cd .. && diff --exclude _darcs --exclude .bzr --exclude '*-darcs-backup*' -Nur $1 $1.darcs return $? } @@ -260,6 +293,7 @@ diff_bzr() diff_hg() { + hg -R $1.hg update diff --exclude _darcs --exclude .hg --exclude '*-darcs-backup*' -Nur $1.hg $1 return $? } diff --git a/exporters/darcs/t/test-hg.sh b/exporters/darcs/t/test-hg.sh index ddde37a..95bfc4b 100644 --- a/exporters/darcs/t/test-hg.sh +++ b/exporters/darcs/t/test-hg.sh @@ -8,7 +8,7 @@ cd test.hg hg init cd .. if [ "$1" != "--stdout" ]; then - darcs-fast-export test |(cd test.hg; hg fastimport /dev/stdin) + darcs-fast-export test |(cd test.hg; hg fastimport -) diff_hg test exit $? else diff --git a/exporters/darcs/t/test2-git-http.sh b/exporters/darcs/t/test2-git-http.sh new file mode 100644 index 0000000..02549e4 --- /dev/null +++ b/exporters/darcs/t/test2-git-http.sh @@ -0,0 +1,22 @@ +. ./lib.sh +. ./lib-httpd.sh + +rm -rf test2.darcs test2.git httpd +create_darcs test2 --darcs-2 +mkdir -p $HTTPD_DOCUMENT_ROOT_PATH +mv -v test2 $HTTPD_DOCUMENT_ROOT_PATH +ln -s $HTTPD_DOCUMENT_ROOT_PATH/test2 . + +mkdir test2.git +cd test2.git +git --bare init +cd .. +start_httpd +darcs-fast-export $HTTPD_URL/test2 |(cd test2.git; git fast-import) +ret=$? +stop_httpd +if [ $ret != 0 ]; then + exit $ret +fi +diff_git test2 +exit $? diff --git a/exporters/darcs/t/testimport-gitsymlink.sh b/exporters/darcs/t/testimport-gitsymlink.sh new file mode 100644 index 0000000..100c583 --- /dev/null +++ b/exporters/darcs/t/testimport-gitsymlink.sh @@ -0,0 +1,45 @@ +. ./lib.sh + +create_git test +cd test +# add two dirs with the some contents, then remove the second +# and make it a symlink to the first +mkdir dira +echo blabla > dira/file +echo blablabla > dira/file2 +mkdir dirb +touch dirb/file +touch dirb/file2 +git add dira dirb +git commit -a -m "add dira/dirb" +rm -rf dirb +ln -s dira dirb +git add dirb +git commit -a -m "change a dir to a symlink" +cd .. + +rm -rf test.darcs +mkdir test.darcs +cd test.darcs +darcs init +cd .. +(cd test; git fast-export --progress=2 HEAD) | (cd test.darcs; darcs-fast-import) +# we *do* want this to fail, but with error code 2. that means that we +# detected that symlinks are not supported and the user does not get a +# meaningless exception +if [ $? != 2 ]; then + exit 1 +fi + +# now try with the symhack option +rm -rf test.darcs +mkdir test.darcs +cd test.darcs +darcs init +cd .. +(cd test; git fast-export --progress=2 HEAD) | (cd test.darcs; darcs-fast-import --symhack) +if [ $? != 0 ]; then + exit 1 +fi +diff_importgit test +exit $? diff --git a/exporters/darcs/t/testimport-hg.sh b/exporters/darcs/t/testimport-hg.sh index 76df76d..7f6d215 100644 --- a/exporters/darcs/t/testimport-hg.sh +++ b/exporters/darcs/t/testimport-hg.sh @@ -8,7 +8,6 @@ cd test.darcs darcs init cd .. (cd test; $pypath/bzrlib/plugins/fastimport/exporters/hg-fast-export.py -r .) | (cd test.darcs; darcs-fast-import) -rm test/{*.orig,hg-export.status} if [ $? != 0 ]; then exit 1 fi diff --git a/exporters/darcs/x2d b/exporters/darcs/x2d index 61c66ef..398103d 100755 --- a/exporters/darcs/x2d +++ b/exporters/darcs/x2d @@ -62,6 +62,12 @@ case $format in ;; esac +common_opts="" +while [ -n "$2" ] +do + common_opts="$common_opts $1" + shift 1 +done origin="$1" shift 1 @@ -77,7 +83,7 @@ fmark="$origin.darcs/_darcs/fast-import/ffi-marks" mkdir -p $origin.darcs cd $origin.darcs -common_opts="--logfile $origin.darcs/_darcs/fast-import/log" +common_opts="$common_opts --logfile $origin.darcs/_darcs/fast-import/log" pypath="/$(python -c 'from distutils import sysconfig; print sysconfig.get_python_lib()[1:]')/" if [ ! -f $dmark ]; then diff --git a/exporters/darcs/x2d.txt b/exporters/darcs/x2d.txt index eb2ec34..25ed6bb 100644 --- a/exporters/darcs/x2d.txt +++ b/exporters/darcs/x2d.txt @@ -6,7 +6,7 @@ x2d - convert git, bzr or hg repos to a darcs one using fast-export == SYNOPSIS -x2d -f <format> <otherrepo> +x2d -f <format> [<importoptions>] <otherrepo> == DESCRIPTION @@ -24,3 +24,5 @@ importer's standard input. -f <format>:: Specify the format of the source repo. Currently supported sources are git, bzr and hg. Incremental conversion is supported for all of them. + +The rest of the options is directly passed to darcs-fast-import. diff --git a/exporters/svn-fast-export.README b/exporters/svn-fast-export.README new file mode 100644 index 0000000..e08277e --- /dev/null +++ b/exporters/svn-fast-export.README @@ -0,0 +1,12 @@ +To compile svn-fast-export.c, use make. You'll need to install +some packages first using the package manager on your OS: + +* libsvn-dev - the Subversion libraries +* libapr1-dev - the Apache Portable Runtime libraries + +Note: If someone with good knowledge of the Subversion +Python bindings could rewrite svn-fast-export.py so that +https://bugs.launchpad.net/bzr-fastimport/+bug/273361 +went away, then there would be much rejoicing throughout +the land and the need for svn-fast-export.c would largely +disappear. diff --git a/exporters/svn-fast-export.py b/exporters/svn-fast-export.py index e44c6cb..fd88094 100755 --- a/exporters/svn-fast-export.py +++ b/exporters/svn-fast-export.py @@ -11,6 +11,7 @@ trunk_path = '/trunk/' branches_path = '/branches/' tags_path = '/tags/' +address = 'localhost' first_rev = 1 final_rev = 0 @@ -123,9 +124,9 @@ def export_revision(rev, repo, fs, pool): # Do the recursive crawl. if props.has_key('svn:author'): - author = "%s <%s@localhost>" % (props['svn:author'], props['svn:author']) + author = "%s <%s@%s>" % (props['svn:author'], props['svn:author'], address) else: - author = 'nobody <nobody@localhost>' + author = 'nobody <nobody@users.sourceforge.net>' if len(file_changes) == 0: svn_pool_destroy(revpool) @@ -165,7 +166,6 @@ def crawl_revisions(pool, repos_path): youngest_rev = svn_fs_youngest_rev(fs_obj, pool) - first_rev = 1 if final_rev == 0: final_rev = youngest_rev for rev in xrange(first_rev, final_rev + 1): @@ -178,12 +178,16 @@ if __name__ == '__main__': parser.set_usage(usage) parser.add_option('-f', '--final-rev', help='Final revision to import', dest='final_rev', metavar='FINAL_REV', type='int') + parser.add_option('-r', '--first-rev', help='First revision to import', + dest='first_rev', metavar='FIRST_REV', type='int') parser.add_option('-t', '--trunk-path', help="Path in repo to /trunk, may be `regex:/cvs/(trunk)/proj1/(.*)`\nFirst group is used as branchname, second to match files", dest='trunk_path', metavar='TRUNK_PATH') parser.add_option('-b', '--branches-path', help='Path in repo to /branches', dest='branches_path', metavar='BRANCHES_PATH') parser.add_option('-T', '--tags-path', help='Path in repo to /tags', dest='tags_path', metavar='TAGS_PATH') + parser.add_option('-a', '--address', help='Domain to put on users for their mail address', + dest='address', metavar='hostname', type='string') (options, args) = parser.parse_args() if options.trunk_path != None: @@ -194,6 +198,10 @@ if __name__ == '__main__': tags_path = options.tags_path if options.final_rev != None: final_rev = options.final_rev + if options.first_rev != None: + first_rev = options.first_rev + if options.address != None: + address = options.address MATCHER = Matcher.getMatcher(trunk_path) sys.stderr.write("%s\n" % MATCHER) @@ -16,97 +16,14 @@ """Miscellaneous useful stuff.""" - -def single_plural(n, single, plural): - """Return a single or plural form of a noun based on number.""" - if n == 1: - return single - else: - return plural - - -def defines_to_dict(defines): - """Convert a list of definition strings to a dictionary.""" - if defines is None: - return None - result = {} - for define in defines: - kv = define.split('=', 1) - if len(kv) == 1: - result[define.strip()] = 1 - else: - result[kv[0].strip()] = kv[1].strip() - return result - - -def invert_dict(d): - """Invert a dictionary with keys matching each value turned into a list.""" - # Based on recipe from ASPN - result = {} - for k, v in d.iteritems(): - keys = result.setdefault(v, []) - keys.append(k) - return result - - -def invert_dictset(d): - """Invert a dictionary with keys matching a set of values, turned into lists.""" - # Based on recipe from ASPN - result = {} - for k, c in d.iteritems(): - for v in c: - keys = result.setdefault(v, []) - keys.append(k) - return result - - -def _common_path_and_rest(l1, l2, common=[]): - # From http://code.activestate.com/recipes/208993/ - if len(l1) < 1: return (common, l1, l2) - if len(l2) < 1: return (common, l1, l2) - if l1[0] != l2[0]: return (common, l1, l2) - return _common_path_and_rest(l1[1:], l2[1:], common+[l1[0]]) - - -def common_path(path1, path2): - """Find the common bit of 2 paths.""" - return ''.join(_common_path_and_rest(path1, path2)[0]) - - -def common_directory(paths): - """Find the deepest common directory of a list of paths. - - :return: if no paths are provided, None is returned; - if there is no common directory, '' is returned; - otherwise the common directory with a trailing / is returned. - """ - from bzrlib import osutils - def get_dir_with_slash(path): - if path == '' or path.endswith('/'): - return path - else: - dirname, basename = osutils.split(path) - if dirname == '': - return dirname - else: - return dirname + '/' - - if not paths: - return None - elif len(paths) == 1: - return get_dir_with_slash(paths[0]) - else: - common = common_path(paths[0], paths[1]) - for path in paths[2:]: - common = common_path(common, path) - return get_dir_with_slash(common) +import stat def escape_commit_message(message): """Replace xml-incompatible control characters.""" # This really ought to be provided by bzrlib. # Code copied from bzrlib.commit. - + # Python strings can include characters that can't be # represented in well-formed XML; escape characters that # aren't listed in the XML specification @@ -119,25 +36,6 @@ def escape_commit_message(message): return message -def binary_stream(stream): - """Ensure a stream is binary on Windows. - - :return: the stream - """ - try: - import os - if os.name == 'nt': - fileno = getattr(stream, 'fileno', None) - if fileno: - no = fileno() - if no >= 0: # -1 means we're working as subprocess - import msvcrt - msvcrt.setmode(no, os.O_BINARY) - except ImportError: - pass - return stream - - def best_format_for_objects_in_a_repository(repo): """Find the high-level format for branches and trees given a repository. @@ -215,3 +113,37 @@ def open_destination_directory(location, format=None, verbose=True): from bzrlib.info import show_bzrdir_info show_bzrdir_info(repo.bzrdir, verbose=0) return control + + +def kind_to_mode(kind, executable): + if kind == "file": + if executable == True: + return stat.S_IFREG | 0755 + elif executable == False: + return stat.S_IFREG | 0644 + else: + raise AssertionError("Executable %r invalid" % executable) + elif kind == "symlink": + return stat.S_IFLNK + elif kind == "directory": + return stat.S_IFDIR + elif kind == "tree-reference": + return 0160000 + else: + raise AssertionError("Unknown file kind '%s'" % kind) + + +def mode_to_kind(mode): + # Note: Output from git-fast-export slightly different to spec + if mode in (0644, 0100644): + return 'file', False + elif mode in (0755, 0100755): + return 'file', True + elif mode == 0040000: + return 'directory', False + elif mode == 0120000: + return 'symlink', False + elif mode == 0160000: + return 'tree-reference', False + else: + raise AssertionError("invalid mode %o" % mode) diff --git a/idmapfile.py b/idmapfile.py deleted file mode 100644 index 7b4ccf4..0000000 --- a/idmapfile.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Routines for saving and loading the id-map file.""" - -import os - - -def save_id_map(filename, revision_ids): - """Save the mapping of commit ids to revision ids to a file. - - Throws the usual exceptions if the file cannot be opened, - written to or closed. - - :param filename: name of the file to save the data to - :param revision_ids: a dictionary of commit ids to revision ids. - """ - f = open(filename, 'wb') - try: - for commit_id, rev_id in revision_ids.iteritems(): - f.write("%s %s\n" % (commit_id, rev_id)) - f.flush() - finally: - f.close() - - -def load_id_map(filename): - """Load the mapping of commit ids to revision ids from a file. - - If the file does not exist, an empty result is returned. - If the file does exists but cannot be opened, read or closed, - the normal exceptions are thrown. - - NOTE: It is assumed that commit-ids do not have embedded spaces. - - :param filename: name of the file to save the data to - :result: map, count where: - map = a dictionary of commit ids to revision ids; - count = the number of keys in map - """ - result = {} - count = 0 - if os.path.exists(filename): - f = open(filename) - try: - for line in f: - parts = line[:-1].split(' ', 1) - result[parts[0]] = parts[1] - count += 1 - finally: - f.close() - return result, count diff --git a/marks_file.py b/marks_file.py index ab24be2..c05f8c6 100644 --- a/marks_file.py +++ b/marks_file.py @@ -17,7 +17,6 @@ """Routines for reading/writing a marks file.""" -import re from bzrlib.trace import warning @@ -38,12 +37,26 @@ def import_marks(filename): # Read the revision info revision_ids = {} - for line in f: + + line = f.readline() + if line == 'format=1\n': + # Cope with old-style marks files + # Read the branch info + branch_names = {} + for string in f.readline().rstrip('\n').split('\0'): + if not string: + continue + name, integer = string.rsplit('.', 1) + branch_names[name] = int(integer) + line = f.readline() + + while line: line = line.rstrip('\n') mark, revid = line.split(' ', 1) if mark.startswith(':'): mark = mark[1:] revision_ids[mark] = revid + line = f.readline() f.close() return revision_ids diff --git a/parser.py b/parser.py deleted file mode 100644 index c133c01..0000000 --- a/parser.py +++ /dev/null @@ -1,557 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Parser of import data into command objects. - -In order to reuse existing front-ends, the stream format is a subset of -the one used by git-fast-import (as of the 1.5.4 release of git at least). -The grammar is: - - stream ::= cmd*; - - cmd ::= new_blob - | new_commit - | new_tag - | reset_branch - | checkpoint - | progress - ; - - new_blob ::= 'blob' lf - mark? - file_content; - file_content ::= data; - - new_commit ::= 'commit' sp ref_str lf - mark? - ('author' sp name '<' email '>' when lf)? - 'committer' sp name '<' email '>' when lf - commit_msg - ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? - ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)* - file_change* - lf?; - commit_msg ::= data; - - file_change ::= file_clr - | file_del - | file_rnm - | file_cpy - | file_obm - | file_inm; - file_clr ::= 'deleteall' lf; - file_del ::= 'D' sp path_str lf; - file_rnm ::= 'R' sp path_str sp path_str lf; - file_cpy ::= 'C' sp path_str sp path_str lf; - file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf; - file_inm ::= 'M' sp mode sp 'inline' sp path_str lf - data; - - new_tag ::= 'tag' sp tag_str lf - 'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf - 'tagger' sp name '<' email '>' when lf - tag_msg; - tag_msg ::= data; - - reset_branch ::= 'reset' sp ref_str lf - ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? - lf?; - - checkpoint ::= 'checkpoint' lf - lf?; - - progress ::= 'progress' sp not_lf* lf - lf?; - - # note: the first idnum in a stream should be 1 and subsequent - # idnums should not have gaps between values as this will cause - # the stream parser to reserve space for the gapped values. An - # idnum can be updated in the future to a new object by issuing - # a new mark directive with the old idnum. - # - mark ::= 'mark' sp idnum lf; - data ::= (delimited_data | exact_data) - lf?; - - # note: delim may be any string but must not contain lf. - # data_line may contain any data but must not be exactly - # delim. The lf after the final data_line is included in - # the data. - delimited_data ::= 'data' sp '<<' delim lf - (data_line lf)* - delim lf; - - # note: declen indicates the length of binary_data in bytes. - # declen does not include the lf preceeding the binary data. - # - exact_data ::= 'data' sp declen lf - binary_data; - - # note: quoted strings are C-style quoting supporting \c for - # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn - # is the signed byte value in octal. Note that the only - # characters which must actually be escaped to protect the - # stream formatting is: \, " and LF. Otherwise these values - # are UTF8. - # - ref_str ::= ref; - sha1exp_str ::= sha1exp; - tag_str ::= tag; - path_str ::= path | '"' quoted(path) '"' ; - mode ::= '100644' | '644' - | '100755' | '755' - | '120000' - ; - - declen ::= # unsigned 32 bit value, ascii base10 notation; - bigint ::= # unsigned integer value, ascii base10 notation; - binary_data ::= # file content, not interpreted; - - when ::= raw_when | rfc2822_when; - raw_when ::= ts sp tz; - rfc2822_when ::= # Valid RFC 2822 date and time; - - sp ::= # ASCII space character; - lf ::= # ASCII newline (LF) character; - - # note: a colon (':') must precede the numerical value assigned to - # an idnum. This is to distinguish it from a ref or tag name as - # GIT does not permit ':' in ref or tag strings. - # - idnum ::= ':' bigint; - path ::= # GIT style file path, e.g. "a/b/c"; - ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT"; - tag ::= # GIT tag name, e.g. "FIREFOX_1_5"; - sha1exp ::= # Any valid GIT SHA1 expression; - hexsha1 ::= # SHA1 in hexadecimal format; - - # note: name and email are UTF8 strings, however name must not - # contain '<' or lf and email must not contain any of the - # following: '<', '>', lf. - # - name ::= # valid GIT author/committer name; - email ::= # valid GIT author/committer email; - ts ::= # time since the epoch in seconds, ascii base10 notation; - tz ::= # GIT style timezone; - - # note: comments may appear anywhere in the input, except - # within a data command. Any form of the data command - # always escapes the related input from comment processing. - # - # In case it is not clear, the '#' that starts the comment - # must be the first character on that the line (an lf have - # preceeded it). - # - comment ::= '#' not_lf* lf; - not_lf ::= # Any byte that is not ASCII newline (LF); -""" - - -import re -import sys - -import commands -import dates -import errors - - -## Stream parsing ## - -class LineBasedParser(object): - - def __init__(self, input): - """A Parser that keeps track of line numbers. - - :param input: the file-like object to read from - """ - self.input = input - self.lineno = 0 - # Lines pushed back onto the input stream - self._buffer = [] - - def abort(self, exception, *args): - """Raise an exception providing line number information.""" - raise exception(self.lineno, *args) - - def readline(self): - """Get the next line including the newline or '' on EOF.""" - self.lineno += 1 - if self._buffer: - return self._buffer.pop() - else: - return self.input.readline() - - def next_line(self): - """Get the next line without the newline or None on EOF.""" - line = self.readline() - if line: - return line[:-1] - else: - return None - - def push_line(self, line): - """Push line back onto the line buffer. - - :param line: the line with no trailing newline - """ - self.lineno -= 1 - self._buffer.append(line + "\n") - - def read_bytes(self, count): - """Read a given number of bytes from the input stream. - - Throws MissingBytes if the bytes are not found. - - Note: This method does not read from the line buffer. - - :return: a string - """ - result = self.input.read(count) - found = len(result) - self.lineno += result.count("\n") - if found != count: - self.abort(errors.MissingBytes, count, found) - return result - - def read_until(self, terminator): - """Read the input stream until the terminator is found. - - Throws MissingTerminator if the terminator is not found. - - Note: This method does not read from the line buffer. - - :return: the bytes read up to but excluding the terminator. - """ - - lines = [] - term = terminator + '\n' - while True: - line = self.input.readline() - if line == term: - break - else: - lines.append(line) - return ''.join(lines) - - -# Regular expression used for parsing. (Note: The spec states that the name -# part should be non-empty but git-fast-export doesn't always do that so -# the first bit is \w*, not \w+.) Also git-fast-import code says the -# space before the email is optional. -_WHO_AND_WHEN_RE = re.compile(r'([^<]*)<(.*)> (.+)') -_WHO_RE = re.compile(r'([^<]*)<(.*)>') - - -class ImportParser(LineBasedParser): - - def __init__(self, input, verbose=False, output=sys.stdout): - """A Parser of import commands. - - :param input: the file-like object to read from - :param verbose: display extra information of not - :param output: the file-like object to write messages to (YAGNI?) - """ - LineBasedParser.__init__(self, input) - self.verbose = verbose - self.output = output - # We auto-detect the date format when a date is first encountered - self.date_parser = None - - def _warning(self, msg): - sys.stderr.write("warning line %d: %s\n" % (self.lineno, msg)) - - def iter_commands(self): - """Iterator returning ImportCommand objects.""" - while True: - line = self.next_line() - if line is None: - break - elif len(line) == 0 or line.startswith('#'): - continue - # Search for commands in order of likelihood - elif line.startswith('commit '): - yield self._parse_commit(line[len('commit '):]) - elif line.startswith('blob'): - yield self._parse_blob() - elif line.startswith('progress '): - yield commands.ProgressCommand(line[len('progress '):]) - elif line.startswith('reset '): - yield self._parse_reset(line[len('reset '):]) - elif line.startswith('tag '): - yield self._parse_tag(line[len('tag '):]) - elif line.startswith('checkpoint'): - yield commands.CheckpointCommand() - else: - self.abort(errors.InvalidCommand, line) - - def iter_file_commands(self): - """Iterator returning FileCommand objects. - - If an invalid file command is found, the line is silently - pushed back and iteration ends. - """ - while True: - line = self.next_line() - if line is None: - break - elif len(line) == 0 or line.startswith('#'): - continue - # Search for file commands in order of likelihood - elif line.startswith('M '): - yield self._parse_file_modify(line[2:]) - elif line.startswith('D '): - path = self._path(line[2:]) - yield commands.FileDeleteCommand(path) - elif line.startswith('R '): - old, new = self._path_pair(line[2:]) - yield commands.FileRenameCommand(old, new) - elif line.startswith('C '): - src, dest = self._path_pair(line[2:]) - yield commands.FileCopyCommand(src, dest) - elif line.startswith('deleteall'): - yield commands.FileDeleteAllCommand() - else: - self.push_line(line) - break - - def _parse_blob(self): - """Parse a blob command.""" - lineno = self.lineno - mark = self._get_mark_if_any() - data = self._get_data('blob') - return commands.BlobCommand(mark, data, lineno) - - def _parse_commit(self, ref): - """Parse a commit command.""" - lineno = self.lineno - mark = self._get_mark_if_any() - author = self._get_user_info('commit', 'author', False) - committer = self._get_user_info('commit', 'committer') - message = self._get_data('commit', 'message') - try: - message = message.decode('utf_8') - except UnicodeDecodeError: - self._warning( - "commit message not in utf8 - replacing unknown characters") - message = message.decode('utf_8', 'replace') - from_ = self._get_from() - merges = [] - while True: - merge = self._get_merge() - if merge is not None: - # while the spec suggests it's illegal, git-fast-export - # outputs multiple merges on the one line, e.g. - # merge :x :y :z - these_merges = merge.split(" ") - merges.extend(these_merges) - else: - break - return commands.CommitCommand(ref, mark, author, committer, message, - from_, merges, self.iter_file_commands, lineno) - - def _parse_file_modify(self, info): - """Parse a filemodify command within a commit. - - :param info: a string in the format "mode dataref path" - (where dataref might be the hard-coded literal 'inline'). - """ - params = info.split(' ', 2) - path = self._path(params[2]) - is_executable, is_symlink = self._mode(params[0]) - if is_symlink: - kind = commands.SYMLINK_KIND - else: - kind = commands.FILE_KIND - if params[1] == 'inline': - dataref = None - data = self._get_data('filemodify') - else: - dataref = params[1] - data = None - return commands.FileModifyCommand(path, kind, is_executable, dataref, - data) - - def _parse_reset(self, ref): - """Parse a reset command.""" - from_ = self._get_from() - return commands.ResetCommand(ref, from_) - - def _parse_tag(self, name): - """Parse a tag command.""" - from_ = self._get_from('tag') - tagger = self._get_user_info('tag', 'tagger', accept_just_who=True) - message = self._get_data('tag', 'message').decode('utf_8') - return commands.TagCommand(name, from_, tagger, message) - - def _get_mark_if_any(self): - """Parse a mark section.""" - line = self.next_line() - if line.startswith('mark :'): - return line[len('mark :'):] - else: - self.push_line(line) - return None - - def _get_from(self, required_for=None): - """Parse a from section.""" - line = self.next_line() - if line is None: - return None - elif line.startswith('from '): - return line[len('from '):] - elif required_for: - self.abort(errors.MissingSection, required_for, 'from') - else: - self.push_line(line) - return None - - def _get_merge(self): - """Parse a merge section.""" - line = self.next_line() - if line is None: - return None - elif line.startswith('merge '): - return line[len('merge '):] - else: - self.push_line(line) - return None - - def _get_user_info(self, cmd, section, required=True, - accept_just_who=False): - """Parse a user section.""" - line = self.next_line() - if line.startswith(section + ' '): - return self._who_when(line[len(section + ' '):], cmd, section, - accept_just_who=accept_just_who) - elif required: - self.abort(errors.MissingSection, cmd, section) - else: - self.push_line(line) - return None - - def _get_data(self, required_for, section='data'): - """Parse a data section.""" - line = self.next_line() - if line.startswith('data '): - rest = line[len('data '):] - if rest.startswith('<<'): - return self.read_until(rest[2:]) - else: - size = int(rest) - read_bytes = self.read_bytes(size) - # optional LF after data. - next = self.input.readline() - self.lineno += 1 - if len(next) > 1 or next != "\n": - self.push_line(next[:-1]) - return read_bytes - else: - self.abort(errors.MissingSection, required_for, section) - - def _who_when(self, s, cmd, section, accept_just_who=False): - """Parse who and when information from a string. - - :return: a tuple of (name,email,timestamp,timezone). name may be - the empty string if only an email address was given. - """ - match = _WHO_AND_WHEN_RE.search(s) - if match: - datestr = match.group(3) - if self.date_parser is None: - # auto-detect the date format - if len(datestr.split(' ')) == 2: - format = 'raw' - elif datestr == 'now': - format = 'now' - else: - format = 'rfc2822' - self.date_parser = dates.DATE_PARSERS_BY_NAME[format] - when = self.date_parser(datestr, self.lineno) - else: - match = _WHO_RE.search(s) - if accept_just_who and match: - # HACK around missing time - # TODO: output a warning here - when = dates.DATE_PARSERS_BY_NAME['now']('now') - else: - self.abort(errors.BadFormat, cmd, section, s) - name = match.group(1) - if len(name) > 0: - if name[-1] == " ": - try: - name = name[:-1].decode('utf_8') - except UnicodeDecodeError: - # The spec says names are *typically* utf8 encoded - # but that isn't enforced by git-fast-export (at least) - name = name[:-1] - email = match.group(2) - # While it shouldn't happen, some datasets have email addresses - # which contain unicode characters. See bug 338186. We sanitize - # the data at this level just in case. - try: - email = "%s" % (email,) - except UnicodeDecodeError: - email = "%s" % (email.decode('utf_8'),) - return (name, email, when[0], when[1]) - - def _path(self, s): - """Parse a path.""" - if s.startswith('"'): - if s[-1] != '"': - self.abort(errors.BadFormat, '?', '?', s) - else: - return _unquote_c_string(s[1:-1]) - try: - return s.decode('utf_8') - except UnicodeDecodeError: - # The spec recommends utf8 encoding but that isn't enforced - return s - - def _path_pair(self, s): - """Parse two paths separated by a space.""" - # TODO: handle a space in the first path - if s.startswith('"'): - parts = s[1:].split('" ', 1) - else: - parts = s.split(' ', 1) - if len(parts) != 2: - self.abort(errors.BadFormat, '?', '?', s) - elif parts[1].startswith('"') and parts[1].endswith('"'): - parts[1] = parts[1][1:-1] - elif parts[1].startswith('"') or parts[1].endswith('"'): - self.abort(errors.BadFormat, '?', '?', s) - return map(_unquote_c_string, parts) - - def _mode(self, s): - """Parse a file mode into executable and symlink flags. - - :return (is_executable, is_symlink) - """ - # Note: Output from git-fast-export slightly different to spec - if s in ['644', '100644', '0100644']: - return False, False - elif s in ['755', '100755', '0100755']: - return True, False - elif s in ['120000', '0120000']: - return False, True - else: - self.abort(errors.BadFormat, 'filemodify', 'mode', s) - - -def _unquote_c_string(s): - """replace C-style escape sequences (\n, \", etc.) with real chars.""" - # HACK: Python strings are close enough - return s.decode('string_escape', 'replace') diff --git a/processor.py b/processor.py deleted file mode 100644 index 06b4871..0000000 --- a/processor.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Processor of import commands. - -This module provides core processing functionality including an abstract class -for basing real processors on. See the processors package for examples. -""" - -import sys -import time - -from bzrlib import debug -from bzrlib.errors import NotBranchError -from bzrlib.trace import ( - mutter, - note, - warning, - ) -import errors - - -class ImportProcessor(object): - """Base class for import processors. - - Subclasses should override the pre_*, post_* and *_handler - methods as appropriate. - """ - - known_params = [] - - def __init__(self, bzrdir, params=None, verbose=False, outf=None): - if outf is None: - self.outf = sys.stdout - else: - self.outf = outf - self.verbose = verbose - if params is None: - self.params = {} - else: - self.params = params - self.validate_parameters() - self.bzrdir = bzrdir - if bzrdir is None: - # Some 'importers' don't need a repository to write to - self.working_tree = None - self.branch = None - self.repo = None - else: - try: - # Might be inside a branch - (self.working_tree, self.branch) = bzrdir._get_tree_branch() - self.repo = self.branch.repository - except NotBranchError: - # Must be inside a repository - self.working_tree = None - self.branch = None - self.repo = bzrdir.open_repository() - - # Handlers can set this to request exiting cleanly without - # iterating through the remaining commands - self.finished = False - - def validate_parameters(self): - """Validate that the parameters are correctly specified.""" - for p in self.params: - if p not in self.known_params: - raise errors.UnknownParameter(p, self.known_params) - - def process(self, command_iter): - """Import data into Bazaar by processing a stream of commands. - - :param command_iter: an iterator providing commands - """ - if self.working_tree is not None: - self.working_tree.lock_write() - elif self.branch is not None: - self.branch.lock_write() - elif self.repo is not None: - self.repo.lock_write() - try: - self._process(command_iter) - finally: - # If an unhandled exception occurred, abort the write group - if self.repo is not None and self.repo.is_in_write_group(): - self.repo.abort_write_group() - # Release the locks - if self.working_tree is not None: - self.working_tree.unlock() - elif self.branch is not None: - self.branch.unlock() - elif self.repo is not None: - self.repo.unlock() - - def _process(self, command_iter): - self.pre_process() - for cmd in command_iter(): - try: - handler = self.__class__.__dict__[cmd.name + "_handler"] - except KeyError: - raise errors.MissingHandler(cmd.name) - else: - self.pre_handler(cmd) - handler(self, cmd) - self.post_handler(cmd) - if self.finished: - break - self.post_process() - - def note(self, msg, *args): - """Output a note but timestamp it.""" - msg = "%s %s" % (self._time_of_day(), msg) - note(msg, *args) - - def warning(self, msg, *args): - """Output a warning but timestamp it.""" - msg = "%s WARNING: %s" % (self._time_of_day(), msg) - warning(msg, *args) - - def debug(self, mgs, *args): - """Output a debug message if the appropriate -D option was given.""" - if "fast-import" in debug.debug_flags: - msg = "%s DEBUG: %s" % (self._time_of_day(), msg) - mutter(msg, *args) - - def _time_of_day(self): - """Time of day as a string.""" - # Note: this is a separate method so tests can patch in a fixed value - return time.strftime("%H:%M:%S") - - def pre_process(self): - """Hook for logic at start of processing.""" - pass - - def post_process(self): - """Hook for logic at end of processing.""" - pass - - def pre_handler(self, cmd): - """Hook for logic before each handler starts.""" - pass - - def post_handler(self, cmd): - """Hook for logic after each handler finishes.""" - pass - - def progress_handler(self, cmd): - """Process a ProgressCommand.""" - raise NotImplementedError(self.progress_handler) - - def blob_handler(self, cmd): - """Process a BlobCommand.""" - raise NotImplementedError(self.blob_handler) - - def checkpoint_handler(self, cmd): - """Process a CheckpointCommand.""" - raise NotImplementedError(self.checkpoint_handler) - - def commit_handler(self, cmd): - """Process a CommitCommand.""" - raise NotImplementedError(self.commit_handler) - - def reset_handler(self, cmd): - """Process a ResetCommand.""" - raise NotImplementedError(self.reset_handler) - - def tag_handler(self, cmd): - """Process a TagCommand.""" - raise NotImplementedError(self.tag_handler) - - -class CommitHandler(object): - """Base class for commit handling. - - Subclasses should override the pre_*, post_* and *_handler - methods as appropriate. - """ - - def __init__(self, command): - self.command = command - - def process(self): - self.pre_process_files() - for fc in self.command.file_iter(): - try: - handler = self.__class__.__dict__[fc.name[4:] + "_handler"] - except KeyError: - raise errors.MissingHandler(fc.name) - else: - handler(self, fc) - self.post_process_files() - - def note(self, msg, *args): - """Output a note but add context.""" - msg = "%s (%s)" % (msg, self.command.id) - note(msg, *args) - - def warning(self, msg, *args): - """Output a warning but add context.""" - msg = "WARNING: %s (%s)" % (msg, self.command.id) - warning(msg, *args) - - def mutter(self, msg, *args): - """Output a mutter but add context.""" - msg = "%s (%s)" % (msg, self.command.id) - mutter(msg, *args) - - def debug(self, msg, *args): - """Output a mutter if the appropriate -D option was given.""" - if "fast-import" in debug.debug_flags: - msg = "%s (%s)" % (msg, self.command.id) - mutter(msg, *args) - - def pre_process_files(self): - """Prepare for committing.""" - pass - - def post_process_files(self): - """Save the revision.""" - pass - - def modify_handler(self, filecmd): - """Handle a filemodify command.""" - raise NotImplementedError(self.modify_handler) - - def delete_handler(self, filecmd): - """Handle a filedelete command.""" - raise NotImplementedError(self.delete_handler) - - def copy_handler(self, filecmd): - """Handle a filecopy command.""" - raise NotImplementedError(self.copy_handler) - - def rename_handler(self, filecmd): - """Handle a filerename command.""" - raise NotImplementedError(self.rename_handler) - - def deleteall_handler(self, filecmd): - """Handle a filedeleteall command.""" - raise NotImplementedError(self.deleteall_handler) diff --git a/processors/filter_processor.py b/processors/filter_processor.py deleted file mode 100644 index 8284cb5..0000000 --- a/processors/filter_processor.py +++ /dev/null @@ -1,288 +0,0 @@ -# Copyright (C) 2009 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Import processor that filters the input (and doesn't import).""" - - -from bzrlib import osutils -from bzrlib.trace import ( - warning, - ) -from bzrlib.plugins.fastimport import ( - commands, - helpers, - processor, - ) - - -class FilterProcessor(processor.ImportProcessor): - """An import processor that filters the input to include/exclude objects. - - No changes to the current repository are made. - - Here are the supported parameters: - - * include_paths - a list of paths that commits must change in order to - be kept in the output stream - - * exclude_paths - a list of paths that should not appear in the output - stream - """ - - known_params = [ - 'include_paths', - 'exclude_paths', - ] - - def pre_process(self): - self.includes = self.params.get('include_paths') - self.excludes = self.params.get('exclude_paths') - # What's the new root, if any - self.new_root = helpers.common_directory(self.includes) - # Buffer of blobs until we know we need them: mark -> cmd - self.blobs = {} - # These are the commits we've output so far - self.interesting_commits = set() - # Map of commit-id to list of parents - self.parents = {} - - def pre_handler(self, cmd): - self.command = cmd - # Should this command be included in the output or not? - self.keep = False - # Blobs to dump into the output before dumping the command itself - self.referenced_blobs = [] - - def post_handler(self, cmd): - if not self.keep: - return - # print referenced blobs and the command - for blob_id in self.referenced_blobs: - self._print_command(self.blobs[blob_id]) - self._print_command(self.command) - - def progress_handler(self, cmd): - """Process a ProgressCommand.""" - # These always pass through - self.keep = True - - def blob_handler(self, cmd): - """Process a BlobCommand.""" - # These never pass through directly. We buffer them and only - # output them if referenced by an interesting command. - self.blobs[cmd.id] = cmd - self.keep = False - - def checkpoint_handler(self, cmd): - """Process a CheckpointCommand.""" - # These always pass through - self.keep = True - - def commit_handler(self, cmd): - """Process a CommitCommand.""" - # These pass through if they meet the filtering conditions - interesting_filecmds = self._filter_filecommands(cmd.file_iter) - if interesting_filecmds: - # If all we have is a single deleteall, skip this commit - if len(interesting_filecmds) == 1 and isinstance( - interesting_filecmds[0], commands.FileDeleteAllCommand): - pass - else: - # Remember just the interesting file commands - self.keep = True - cmd.file_iter = iter(interesting_filecmds) - - # Record the referenced blobs - for fc in interesting_filecmds: - if isinstance(fc, commands.FileModifyCommand): - if fc.dataref is not None: - self.referenced_blobs.append(fc.dataref) - - # Update from and merges to refer to commits in the output - cmd.from_ = self._find_interesting_from(cmd.from_) - cmd.merges = self._find_interesting_merges(cmd.merges) - self.interesting_commits.add(cmd.id) - - # Keep track of the parents - if cmd.from_ and cmd.merges: - parents = [cmd.from_] + cmd.merges - elif cmd.from_: - parents = [cmd.from_] - else: - parents = None - self.parents[":" + cmd.mark] = parents - - def reset_handler(self, cmd): - """Process a ResetCommand.""" - if cmd.from_ is None: - # We pass through resets that init a branch because we have to - # assume the branch might be interesting. - self.keep = True - else: - # Keep resets if they indirectly reference something we kept - cmd.from_ = self._find_interesting_from(cmd.from_) - self.keep = cmd.from_ is not None - - def tag_handler(self, cmd): - """Process a TagCommand.""" - # Keep tags if they indirectly reference something we kept - cmd.from_ = self._find_interesting_from(cmd.from_) - self.keep = cmd.from_ is not None - - def _print_command(self, cmd): - """Wrapper to avoid adding unnecessary blank lines.""" - text = repr(cmd) - self.outf.write(text) - if not text.endswith("\n"): - self.outf.write("\n") - - def _filter_filecommands(self, filecmd_iter): - """Return the filecommands filtered by includes & excludes. - - :return: a list of FileCommand objects - """ - if self.includes is None and self.excludes is None: - return list(filecmd_iter()) - - # Do the filtering, adjusting for the new_root - result = [] - for fc in filecmd_iter(): - if (isinstance(fc, commands.FileModifyCommand) or - isinstance(fc, commands.FileDeleteCommand)): - if self._path_to_be_kept(fc.path): - fc.path = self._adjust_for_new_root(fc.path) - else: - continue - elif isinstance(fc, commands.FileDeleteAllCommand): - pass - elif isinstance(fc, commands.FileRenameCommand): - fc = self._convert_rename(fc) - elif isinstance(fc, commands.FileCopyCommand): - fc = self._convert_copy(fc) - else: - warning("cannot handle FileCommands of class %s - ignoring", - fc.__class__) - continue - if fc is not None: - result.append(fc) - return result - - def _path_to_be_kept(self, path): - """Does the given path pass the filtering criteria?""" - if self.excludes and (path in self.excludes - or osutils.is_inside_any(self.excludes, path)): - return False - if self.includes: - return (path in self.includes - or osutils.is_inside_any(self.includes, path)) - return True - - def _adjust_for_new_root(self, path): - """Adjust a path given the new root directory of the output.""" - if self.new_root is None: - return path - elif path.startswith(self.new_root): - return path[len(self.new_root):] - else: - return path - - def _find_interesting_parent(self, commit_ref): - while True: - if commit_ref in self.interesting_commits: - return commit_ref - parents = self.parents.get(commit_ref) - if not parents: - return None - commit_ref = parents[0] - - def _find_interesting_from(self, commit_ref): - if commit_ref is None: - return None - return self._find_interesting_parent(commit_ref) - - def _find_interesting_merges(self, commit_refs): - if commit_refs is None: - return None - merges = [] - for commit_ref in commit_refs: - parent = self._find_interesting_parent(commit_ref) - if parent is not None: - merges.append(parent) - if merges: - return merges - else: - return None - - def _convert_rename(self, fc): - """Convert a FileRenameCommand into a new FileCommand. - - :return: None if the rename is being ignored, otherwise a - new FileCommand based on the whether the old and new paths - are inside or outside of the interesting locations. - """ - old = fc.old_path - new = fc.new_path - keep_old = self._path_to_be_kept(old) - keep_new = self._path_to_be_kept(new) - if keep_old and keep_new: - fc.old_path = self._adjust_for_new_root(old) - fc.new_path = self._adjust_for_new_root(new) - return fc - elif keep_old: - # The file has been renamed to a non-interesting location. - # Delete it! - old = self._adjust_for_new_root(old) - return commands.FileDeleteCommand(old) - elif keep_new: - # The file has been renamed into an interesting location - # We really ought to add it but we don't currently buffer - # the contents of all previous files and probably never want - # to. Maybe fast-import-info needs to be extended to - # remember all renames and a config file can be passed - # into here ala fast-import? - warning("cannot turn rename of %s into an add of %s yet" % - (old, new)) - return None - - def _convert_copy(self, fc): - """Convert a FileCopyCommand into a new FileCommand. - - :return: None if the copy is being ignored, otherwise a - new FileCommand based on the whether the source and destination - paths are inside or outside of the interesting locations. - """ - src = fc.src_path - dest = fc.dest_path - keep_src = self._path_to_be_kept(src) - keep_dest = self._path_to_be_kept(dest) - if keep_src and keep_dest: - fc.src_path = self._adjust_for_new_root(src) - fc.dest_path = self._adjust_for_new_root(dest) - return fc - elif keep_src: - # The file has been copied to a non-interesting location. - # Ignore it! - return None - elif keep_dest: - # The file has been copied into an interesting location - # We really ought to add it but we don't currently buffer - # the contents of all previous files and probably never want - # to. Maybe fast-import-info needs to be extended to - # remember all copies and a config file can be passed - # into here ala fast-import? - warning("cannot turn copy of %s into an add of %s yet" % - (src, dest)) - return None diff --git a/processors/generic_processor.py b/processors/generic_processor.py index 3f23c8b..43c933b 100644 --- a/processors/generic_processor.py +++ b/processors/generic_processor.py @@ -19,25 +19,34 @@ import time from bzrlib import ( - bzrdir, + debug, delta, errors, osutils, progress, ) from bzrlib.repofmt import pack_repo -from bzrlib.trace import note, mutter -import bzrlib.util.configobj.configobj as configobj +from bzrlib.trace import ( + mutter, + note, + warning, + ) +try: + import bzrlib.util.configobj.configobj as configobj +except ImportError: + import configobj from bzrlib.plugins.fastimport import ( branch_updater, - bzr_commit_handler, cache_manager, + marks_file, + revision_store, + ) +from fastimport import ( + commands, errors as plugin_errors, helpers, idmapfile, - marks_file, processor, - revision_store, ) @@ -51,8 +60,8 @@ _DEFAULT_AUTO_CHECKPOINT = 10000 _DEFAULT_AUTO_PACK = 4 # How many inventories to cache -_DEFAULT_INV_CACHE_SIZE = 10 -_DEFAULT_CHK_INV_CACHE_SIZE = 100 +_DEFAULT_INV_CACHE_SIZE = 1 +_DEFAULT_CHK_INV_CACHE_SIZE = 1 class GenericProcessor(processor.ImportProcessor): @@ -95,7 +104,7 @@ class GenericProcessor(processor.ImportProcessor): * autopack - pack every n checkpoints. The default is 4. * inv-cache - number of inventories to cache. - If not set, the default is 100 for CHK formats and 10 otherwise. + If not set, the default is 1. * mode - import algorithm to use: default, experimental or classic. @@ -118,13 +127,27 @@ class GenericProcessor(processor.ImportProcessor): def __init__(self, bzrdir, params=None, verbose=False, outf=None, prune_empty_dirs=True): - processor.ImportProcessor.__init__(self, bzrdir, params, verbose) + processor.ImportProcessor.__init__(self, params, verbose) self.prune_empty_dirs = prune_empty_dirs + self.bzrdir = bzrdir + try: + # Might be inside a branch + (self.working_tree, self.branch) = bzrdir._get_tree_branch() + self.repo = self.branch.repository + except errors.NotBranchError: + # Must be inside a repository + self.working_tree = None + self.branch = None + self.repo = bzrdir.open_repository() def pre_process(self): - self.note("Starting import ...") self._start_time = time.time() self._load_info_and_params() + if self.total_commits: + self.note("Starting import of %d commits ..." % + (self.total_commits,)) + else: + self.note("Starting import ...") self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose, self.inventory_cache_size) @@ -174,6 +197,7 @@ class GenericProcessor(processor.ImportProcessor): self.repo.start_write_group() def _load_info_and_params(self): + from bzrlib.plugins.fastimport import bzr_commit_handler self._mode = bool(self.params.get('mode', 'default')) self._experimental = self._mode == 'experimental' @@ -269,6 +293,31 @@ class GenericProcessor(processor.ImportProcessor): self.repo, self.inventory_cache_size, fulltext_when=fulltext_when) + def process(self, command_iter): + """Import data into Bazaar by processing a stream of commands. + + :param command_iter: an iterator providing commands + """ + if self.working_tree is not None: + self.working_tree.lock_write() + elif self.branch is not None: + self.branch.lock_write() + elif self.repo is not None: + self.repo.lock_write() + try: + super(GenericProcessor, self)._process(command_iter) + finally: + # If an unhandled exception occurred, abort the write group + if self.repo is not None and self.repo.is_in_write_group(): + self.repo.abort_write_group() + # Release the locks + if self.working_tree is not None: + self.working_tree.unlock() + elif self.branch is not None: + self.branch.unlock() + elif self.repo is not None: + self.repo.unlock() + def _process(self, command_iter): # if anything goes wrong, abort the write group if any try: @@ -287,15 +336,16 @@ class GenericProcessor(processor.ImportProcessor): marks_file.export_marks(self.params.get("export-marks"), self.cache_mgr.revision_ids) - if self.cache_mgr.last_ref == None: + if self.cache_mgr.reftracker.last_ref == None: """Nothing to refresh""" return # Update the branches self.note("Updating branch information ...") updater = branch_updater.BranchUpdater(self.repo, self.branch, - self.cache_mgr, helpers.invert_dictset(self.cache_mgr.heads), - self.cache_mgr.last_ref, self.tags) + self.cache_mgr, helpers.invert_dictset( + self.cache_mgr.reftracker.heads), + self.cache_mgr.reftracker.last_ref, self.tags) branches_updated, branches_lost = updater.update() self._branch_count = len(branches_updated) @@ -460,19 +510,19 @@ class GenericProcessor(processor.ImportProcessor): def commit_handler(self, cmd): """Process a CommitCommand.""" if self.skip_total and self._revision_count < self.skip_total: - self.cache_mgr.track_heads(cmd) + self.cache_mgr.reftracker.track_heads(cmd) # Check that we really do know about this commit-id if not self.cache_mgr.revision_ids.has_key(cmd.id): raise plugin_errors.BadRestart(cmd.id) - # Consume the file commands and free any non-sticky blobs - for fc in cmd.file_iter(): - pass self.cache_mgr._blobs = {} self._revision_count += 1 + if cmd.ref.startswith('refs/tags/'): + tag_name = cmd.ref[len('refs/tags/'):] + self._set_tag(tag_name, cmd.id) return if self.first_incremental_commit: self.first_incremental_commit = None - parents = self.cache_mgr.track_heads(cmd) + parents = self.cache_mgr.reftracker.track_heads(cmd) # 'Commit' the revision and report progress handler = self.commit_handler_factory(cmd, self.cache_mgr, @@ -487,6 +537,10 @@ class GenericProcessor(processor.ImportProcessor): self._revision_count += 1 self.report_progress("(%s)" % cmd.id) + if cmd.ref.startswith('refs/tags/'): + tag_name = cmd.ref[len('refs/tags/'):] + self._set_tag(tag_name, cmd.id) + # Check if we should finish up or automatically checkpoint if (self.max_commits is not None and self._revision_count >= self.max_commits): @@ -514,8 +568,10 @@ class GenericProcessor(processor.ImportProcessor): def progress_handler(self, cmd): """Process a ProgressCommand.""" - # We could use a progress bar here instead - self.note("progress %s" % (cmd.message,)) + # Most progress messages embedded in streams are annoying. + # Ignore them unless in verbose mode. + if self.verbose: + self.note("progress %s" % (cmd.message,)) def reset_handler(self, cmd): """Process a ResetCommand.""" @@ -529,7 +585,7 @@ class GenericProcessor(processor.ImportProcessor): return if cmd.from_ is not None: - self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_) + self.cache_mgr.reftracker.track_heads_for_ref(cmd.ref, cmd.from_) def tag_handler(self, cmd): """Process a TagCommand.""" @@ -543,3 +599,25 @@ class GenericProcessor(processor.ImportProcessor): bzr_tag_name = name.decode('utf-8', 'replace') bzr_rev_id = self.cache_mgr.revision_ids[from_] self.tags[bzr_tag_name] = bzr_rev_id + + def feature_handler(self, cmd): + """Process a FeatureCommand.""" + feature = cmd.feature_name + if feature not in commands.FEATURE_NAMES: + raise plugin_errors.UnknownFeature(feature) + + def debug(self, mgs, *args): + """Output a debug message if the appropriate -D option was given.""" + if "fast-import" in debug.debug_flags: + msg = "%s DEBUG: %s" % (self._time_of_day(), msg) + mutter(msg, *args) + + def note(self, msg, *args): + """Output a note but timestamp it.""" + msg = "%s %s" % (self._time_of_day(), msg) + note(msg, *args) + + def warning(self, msg, *args): + """Output a warning but timestamp it.""" + msg = "%s WARNING: %s" % (self._time_of_day(), msg) + warning(msg, *args) diff --git a/processors/info_processor.py b/processors/info_processor.py deleted file mode 100644 index e90418c..0000000 --- a/processors/info_processor.py +++ /dev/null @@ -1,281 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Import processor that dump stats about the input (and doesn't import).""" - - -from bzrlib.trace import ( - note, - warning, - ) -from bzrlib.plugins.fastimport import ( - cache_manager, - commands, - helpers, - processor, - ) - - -class InfoProcessor(processor.ImportProcessor): - """An import processor that dumps statistics about the input. - - No changes to the current repository are made. - - As well as providing useful information about an import - stream before importing it, this processor is useful for - benchmarking the speed at which data can be extracted from - the source. - """ - - def __init__(self, target=None, params=None, verbose=0, outf=None): - # Allow creation without a target - processor.ImportProcessor.__init__(self, target, params, verbose, - outf=outf) - - def pre_process(self): - self.note("Collecting statistics ...") - # Init statistics - self.cmd_counts = {} - for cmd in commands.COMMAND_NAMES: - self.cmd_counts[cmd] = 0 - self.file_cmd_counts = {} - for fc in commands.FILE_COMMAND_NAMES: - self.file_cmd_counts[fc] = 0 - self.parent_counts = {} - self.max_parent_count = 0 - self.committers = set() - self.separate_authors_found = False - self.symlinks_found = False - self.executables_found = False - self.sha_blob_references = False - self.lightweight_tags = 0 - # Blob usage tracking - self.blobs = {} - for usage in ['new', 'used', 'unknown', 'unmarked']: - self.blobs[usage] = set() - self.blob_ref_counts = {} - # Head tracking - delegate to the cache manager - self.cache_mgr = cache_manager.CacheManager(inventory_cache_size=0) - # Stuff to cache: a map from mark to # of times that mark is merged - self.merges = {} - # Stuff to cache: these are maps from mark to sets - self.rename_old_paths = {} - self.copy_source_paths = {} - - def post_process(self): - # Dump statistics - cmd_names = commands.COMMAND_NAMES - fc_names = commands.FILE_COMMAND_NAMES - cmd_values = [self.cmd_counts[c] for c in cmd_names] - fc_values = [self.file_cmd_counts[c] for c in fc_names] - self._dump_stats_group("Command counts", cmd_names, cmd_values, str) - self._dump_stats_group("File command counts", fc_names, fc_values, str) - - # Commit stats - if self.cmd_counts['commit']: - p_names = [] - p_values = [] - for i in xrange(0, self.max_parent_count + 1): - if i in self.parent_counts: - count = self.parent_counts[i] - p_names.append("parents-%d" % i) - p_values.append(count) - merges_count = len(self.merges.keys()) - p_names.append('total revisions merged') - p_values.append(merges_count) - flags = { - 'separate authors found': self.separate_authors_found, - 'executables': self.executables_found, - 'symlinks': self.symlinks_found, - 'blobs referenced by SHA': self.sha_blob_references, - } - self._dump_stats_group("Parent counts", p_names, p_values, str) - self._dump_stats_group("Commit analysis", flags.keys(), - flags.values(), _found) - heads = helpers.invert_dictset(self.cache_mgr.heads) - self._dump_stats_group("Head analysis", heads.keys(), - heads.values(), None, _iterable_as_config_list) - # note("\t%d\t%s" % (len(self.committers), 'unique committers')) - self._dump_stats_group("Merges", self.merges.keys(), - self.merges.values(), None) - # We only show the rename old path and copy source paths when -vv - # (verbose=2) is specified. The output here for mysql's data can't - # be parsed currently so this bit of code needs more work anyhow .. - if self.verbose >= 2: - self._dump_stats_group("Rename old paths", - self.rename_old_paths.keys(), - self.rename_old_paths.values(), len, - _iterable_as_config_list) - self._dump_stats_group("Copy source paths", - self.copy_source_paths.keys(), - self.copy_source_paths.values(), len, - _iterable_as_config_list) - - # Blob stats - if self.cmd_counts['blob']: - # In verbose mode, don't list every blob used - if self.verbose: - del self.blobs['used'] - self._dump_stats_group("Blob usage tracking", self.blobs.keys(), - self.blobs.values(), len, _iterable_as_config_list) - if self.blob_ref_counts: - blobs_by_count = helpers.invert_dict(self.blob_ref_counts) - self._dump_stats_group("Blob reference counts", - blobs_by_count.keys(), - blobs_by_count.values(), len, _iterable_as_config_list) - - # Other stats - if self.cmd_counts['reset']: - reset_stats = { - 'lightweight tags': self.lightweight_tags, - } - self._dump_stats_group("Reset analysis", reset_stats.keys(), - reset_stats.values()) - - def _dump_stats_group(self, title, names, values, normal_formatter=None, - verbose_formatter=None): - """Dump a statistics group. - - In verbose mode, do so as a config file so - that other processors can load the information if they want to. - :param normal_formatter: the callable to apply to the value - before displaying it in normal mode - :param verbose_formatter: the callable to apply to the value - before displaying it in verbose mode - """ - if self.verbose: - self.outf.write("[%s]\n" % (title,)) - for name, value in zip(names, values): - if verbose_formatter is not None: - value = verbose_formatter(value) - if type(name) == str: - name = name.replace(' ', '-') - self.outf.write("%s = %s\n" % (name, value)) - self.outf.write("\n") - else: - self.outf.write("%s:\n" % (title,)) - for name, value in zip(names, values): - if normal_formatter is not None: - value = normal_formatter(value) - self.outf.write("\t%s\t%s\n" % (value, name)) - - def progress_handler(self, cmd): - """Process a ProgressCommand.""" - self.cmd_counts[cmd.name] += 1 - - def blob_handler(self, cmd): - """Process a BlobCommand.""" - self.cmd_counts[cmd.name] += 1 - if cmd.mark is None: - self.blobs['unmarked'].add(cmd.id) - else: - self.blobs['new'].add(cmd.id) - # Marks can be re-used so remove it from used if already there. - # Note: we definitely do NOT want to remove it from multi if - # it's already in that set. - try: - self.blobs['used'].remove(cmd.id) - except KeyError: - pass - - def checkpoint_handler(self, cmd): - """Process a CheckpointCommand.""" - self.cmd_counts[cmd.name] += 1 - - def commit_handler(self, cmd): - """Process a CommitCommand.""" - self.cmd_counts[cmd.name] += 1 - self.committers.add(cmd.committer) - if cmd.author is not None: - self.separate_authors_found = True - for fc in cmd.file_iter(): - self.file_cmd_counts[fc.name] += 1 - if isinstance(fc, commands.FileModifyCommand): - if fc.is_executable: - self.executables_found = True - if fc.kind == commands.SYMLINK_KIND: - self.symlinks_found = True - if fc.dataref is not None: - if fc.dataref[0] == ':': - self._track_blob(fc.dataref) - else: - self.sha_blob_references = True - elif isinstance(fc, commands.FileRenameCommand): - self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path) - elif isinstance(fc, commands.FileCopyCommand): - self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path) - - # Track the heads - parents = self.cache_mgr.track_heads(cmd) - - # Track the parent counts - parent_count = len(parents) - if self.parent_counts.has_key(parent_count): - self.parent_counts[parent_count] += 1 - else: - self.parent_counts[parent_count] = 1 - if parent_count > self.max_parent_count: - self.max_parent_count = parent_count - - # Remember the merges - if cmd.merges: - #self.merges.setdefault(cmd.ref, set()).update(cmd.merges) - for merge in cmd.merges: - if merge in self.merges: - self.merges[merge] += 1 - else: - self.merges[merge] = 1 - - def reset_handler(self, cmd): - """Process a ResetCommand.""" - self.cmd_counts[cmd.name] += 1 - if cmd.ref.startswith('refs/tags/'): - self.lightweight_tags += 1 - else: - if cmd.from_ is not None: - self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_) - - def tag_handler(self, cmd): - """Process a TagCommand.""" - self.cmd_counts[cmd.name] += 1 - - def _track_blob(self, mark): - if mark in self.blob_ref_counts: - self.blob_ref_counts[mark] += 1 - pass - elif mark in self.blobs['used']: - self.blob_ref_counts[mark] = 2 - self.blobs['used'].remove(mark) - elif mark in self.blobs['new']: - self.blobs['used'].add(mark) - self.blobs['new'].remove(mark) - else: - self.blobs['unknown'].add(mark) - -def _found(b): - """Format a found boolean as a string.""" - return ['no', 'found'][b] - -def _iterable_as_config_list(s): - """Format an iterable as a sequence of comma-separated strings. - - To match what ConfigObj expects, a single item list has a trailing comma. - """ - items = sorted(s) - if len(items) == 1: - return "%s," % (items[0],) - else: - return ", ".join(items) diff --git a/processors/query_processor.py b/processors/query_processor.py deleted file mode 100644 index dfee745..0000000 --- a/processors/query_processor.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Import processor that queries the input (and doesn't import).""" - - -from bzrlib.plugins.fastimport import ( - commands, - processor, - ) - - -class QueryProcessor(processor.ImportProcessor): - """An import processor that queries the input. - - No changes to the current repository are made. - """ - - known_params = commands.COMMAND_NAMES + commands.FILE_COMMAND_NAMES - - def __init__(self, target=None, params=None, verbose=False): - # Allow creation without a target - processor.ImportProcessor.__init__(self, target, params, verbose) - self.parsed_params = {} - if params: - for name, value in params.iteritems(): - if value == 1: - # All fields - fields = None - else: - fields = value.split(',') - self.parsed_params[name] = fields - - def pre_handler(self, cmd): - """Hook for logic before each handler starts.""" - if self.parsed_params.has_key(cmd.name): - fields = self.parsed_params[cmd.name] - str = cmd.dump_str(fields, self.parsed_params, self.verbose) - print "%s" % (str,) - - def progress_handler(self, cmd): - """Process a ProgressCommand.""" - pass - - def blob_handler(self, cmd): - """Process a BlobCommand.""" - pass - - def checkpoint_handler(self, cmd): - """Process a CheckpointCommand.""" - pass - - def commit_handler(self, cmd): - """Process a CommitCommand.""" - for fc in cmd.file_iter(): - pass - - def reset_handler(self, cmd): - """Process a ResetCommand.""" - pass - - def tag_handler(self, cmd): - """Process a TagCommand.""" - pass diff --git a/revision_store.py b/revision_store.py index d2ab2d3..4ec4ba3 100644 --- a/revision_store.py +++ b/revision_store.py @@ -1,4 +1,4 @@ -# Copyright (C) 2008 Canonical Ltd +# Copyright (C) 2008, 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,9 +16,146 @@ """An abstraction of a repository providing just the bits importing needs.""" +import cStringIO -from bzrlib import errors, inventory, knit, lru_cache, osutils -from bzrlib import revision as _mod_revision +from bzrlib import ( + errors, + graph as _mod_graph, + inventory, + knit, + lru_cache, + osutils, + revision as _mod_revision, + trace, + ) + + +class _TreeShim(object): + """Fake a Tree implementation. + + This implements just enough of the tree api to make commit builder happy. + """ + + def __init__(self, repo, basis_inv, inv_delta, content_provider): + self._repo = repo + self._content_provider = content_provider + self._basis_inv = basis_inv + self._inv_delta = inv_delta + self._new_info_by_id = dict([(file_id, (new_path, ie)) + for _, new_path, file_id, ie in inv_delta]) + + def id2path(self, file_id): + if file_id in self._new_info_by_id: + new_path = self._new_info_by_id[file_id][0] + if new_path is None: + raise errors.NoSuchId(self, file_id) + return new_path + return self._basis_inv.id2path(file_id) + + def path2id(self, path): + # CommitBuilder currently only requires access to the root id. We don't + # build a map of renamed files, etc. One possibility if we ever *do* + # need more than just root, is to defer to basis_inv.path2id() and then + # check if the file_id is in our _new_info_by_id dict. And in that + # case, return _new_info_by_id[file_id][0] + if path != '': + raise NotImplementedError(_TreeShim.path2id) + # TODO: Handle root renames? + return self._basis_inv.root.file_id + + def get_file_with_stat(self, file_id, path=None): + content = self.get_file_text(file_id, path) + sio = cStringIO.StringIO(content) + return sio, None + + def get_file_text(self, file_id, path=None): + try: + return self._content_provider(file_id) + except KeyError: + # The content wasn't shown as 'new'. Just validate this fact + assert file_id not in self._new_info_by_id + old_ie = self._basis_inv[file_id] + old_text_key = (file_id, old_ie.revision) + stream = self._repo.texts.get_record_stream([old_text_key], + 'unordered', True) + return stream.next().get_bytes_as('fulltext') + + def get_symlink_target(self, file_id): + if file_id in self._new_info_by_id: + ie = self._new_info_by_id[file_id][1] + return ie.symlink_target + return self._basis_inv[file_id].symlink_target + + def get_reference_revision(self, file_id, path=None): + raise NotImplementedError(_TreeShim.get_reference_revision) + + def _delta_to_iter_changes(self): + """Convert the inv_delta into an iter_changes repr.""" + # iter_changes is: + # (file_id, + # (old_path, new_path), + # content_changed, + # (old_versioned, new_versioned), + # (old_parent_id, new_parent_id), + # (old_name, new_name), + # (old_kind, new_kind), + # (old_exec, new_exec), + # ) + basis_inv = self._basis_inv + for old_path, new_path, file_id, ie in self._inv_delta: + # Perf: Would this be faster if we did 'if file_id in basis_inv'? + # Since the *very* common case is that the file already exists, it + # probably is better to optimize for that + try: + old_ie = basis_inv[file_id] + except errors.NoSuchId: + old_ie = None + if ie is None: + raise AssertionError('How is both old and new None?') + change = (file_id, + (old_path, new_path), + False, + (False, False), + (None, None), + (None, None), + (None, None), + (None, None), + ) + change = (file_id, + (old_path, new_path), + True, + (False, True), + (None, ie.parent_id), + (None, ie.name), + (None, ie.kind), + (None, ie.executable), + ) + else: + if ie is None: + change = (file_id, + (old_path, new_path), + True, + (True, False), + (old_ie.parent_id, None), + (old_ie.name, None), + (old_ie.kind, None), + (old_ie.executable, None), + ) + else: + content_modified = (ie.text_sha1 != old_ie.text_sha1 + or ie.text_size != old_ie.text_size) + # TODO: ie.kind != old_ie.kind + # TODO: symlinks changing targets, content_modified? + change = (file_id, + (old_path, new_path), + content_modified, + (True, True), + (old_ie.parent_id, ie.parent_id), + (old_ie.name, ie.name), + (old_ie.kind, ie.kind), + (old_ie.executable, ie.executable), + ) + yield change class AbstractRevisionStore(object): @@ -33,6 +170,8 @@ class AbstractRevisionStore(object): :param repository: the target repository """ self.repo = repo + self._graph = None + self._use_known_graph = True self._supports_chks = getattr(repo._format, 'supports_chks', False) def expects_rich_root(self): @@ -224,29 +363,66 @@ class AbstractRevisionStore(object): including an empty inventory for the missing revisions If None, a default implementation is provided. """ - # Get the non-ghost parents and their inventories - if inventories_provider is None: - inventories_provider = self._default_inventories_provider - present_parents, parent_invs = inventories_provider(rev.parent_ids) - - # Load the inventory - try: - rev_id = rev.revision_id - rev.inventory_sha1, inv = self._add_inventory_by_delta( - rev_id, basis_inv, inv_delta, present_parents, parent_invs) - except errors.RevisionAlreadyPresent: + # TODO: set revision_id = rev.revision_id + builder = self.repo._commit_builder_class(self.repo, + parents=rev.parent_ids, config=None, timestamp=rev.timestamp, + timezone=rev.timezone, committer=rev.committer, + revprops=rev.properties, revision_id=rev.revision_id) + if self._graph is None and self._use_known_graph: + if (getattr(_mod_graph, 'GraphThunkIdsToKeys', None) and + getattr(_mod_graph.GraphThunkIdsToKeys, "add_node", None) and + getattr(self.repo, "get_known_graph_ancestry", None)): + self._graph = self.repo.get_known_graph_ancestry( + rev.parent_ids) + else: + self._use_known_graph = False + if self._graph is not None: + orig_heads = builder._heads + def thunked_heads(file_id, revision_ids): + # self._graph thinks in terms of keys, not ids, so translate + # them + # old_res = orig_heads(file_id, revision_ids) + if len(revision_ids) < 2: + res = set(revision_ids) + else: + res = set(self._graph.heads(revision_ids)) + # if old_res != res: + # import pdb; pdb.set_trace() + return res + builder._heads = thunked_heads + + if rev.parent_ids: + basis_rev_id = rev.parent_ids[0] + else: + basis_rev_id = _mod_revision.NULL_REVISION + tree = _TreeShim(self.repo, basis_inv, inv_delta, text_provider) + changes = tree._delta_to_iter_changes() + for (file_id, path, fs_hash) in builder.record_iter_changes( + tree, basis_rev_id, changes): + # So far, we don't *do* anything with the result pass + builder.finish_inventory() + # TODO: This is working around a bug in the bzrlib code base. + # 'builder.finish_inventory()' ends up doing: + # self.inv_sha1 = self.repository.add_inventory_by_delta(...) + # However, add_inventory_by_delta returns (sha1, inv) + # And we *want* to keep a handle on both of those objects + if isinstance(builder.inv_sha1, tuple): + builder.inv_sha1, builder.new_inventory = builder.inv_sha1 + # This is a duplicate of Builder.commit() since we already have the + # Revision object, and we *don't* want to call commit_write_group() + rev.inv_sha1 = builder.inv_sha1 + builder.repository.add_revision(builder._new_revision_id, rev, + builder.new_inventory, builder._config) + if self._graph is not None: + # TODO: Use StaticTuple and .intern() for these things + self._graph.add_node(builder._new_revision_id, rev.parent_ids) - # Load the texts, signature and revision - file_rev_ids_needing_texts = [(id, ie.revision) - for _, n, id, ie in inv_delta - if n is not None and ie.revision == rev_id] - self._load_texts_for_file_rev_ids(file_rev_ids_needing_texts, - text_provider, parents_provider) if signature is not None: - self.repo.add_signature_text(rev_id, signature) - self._add_revision(rev, inv) - return inv + raise AssertionError('signatures not guaranteed yet') + self.repo.add_signature_text(rev.revision_id, signature) + # self._add_revision(rev, inv) + return builder.revision_tree().inventory def _non_root_entries_iter(self, inv, revision_id): if hasattr(inv, 'iter_non_root_entries'): @@ -305,14 +481,19 @@ class AbstractRevisionStore(object): """ if len(parents): if self._supports_chks: - validator, new_inv = self.repo.add_inventory_by_delta(parents[0], - inv_delta, revision_id, parents, basis_inv=basis_inv, - propagate_caches=False) + try: + validator, new_inv = self.repo.add_inventory_by_delta(parents[0], + inv_delta, revision_id, parents, basis_inv=basis_inv, + propagate_caches=False) + except errors.InconsistentDelta: + #print "BASIS INV IS\n%s\n" % "\n".join([str(i) for i in basis_inv.iter_entries_by_dir()]) + trace.mutter("INCONSISTENT DELTA IS:\n%s\n" % "\n".join([str(i) for i in inv_delta])) + raise else: validator, new_inv = self.repo.add_inventory_by_delta(parents[0], inv_delta, revision_id, parents) else: - if hasattr(basis_inv, 'create_by_apply_delta'): + if isinstance(basis_inv, inventory.CHKInventory): new_inv = basis_inv.create_by_apply_delta(inv_delta, revision_id) else: new_inv = inventory.Inventory(revision_id=revision_id) @@ -3,12 +3,12 @@ from distutils.core import setup bzr_plugin_name = 'fastimport' -bzr_plugin_version = (0, 9, 0, 'dev', 0) +bzr_plugin_version = (0, 10, 0, 'dev', 0) bzr_minimum_version = (1, 1, 0) bzr_maximum_version = None if __name__ == '__main__': - setup(name="fastimport", + setup(name="bzr-fastimport", version="0.9.0dev0", description="stream-based import into and export from Bazaar.", author="Canonical Ltd", @@ -17,6 +17,7 @@ if __name__ == '__main__': url="https://launchpad.net/bzr-fastimport", scripts=[], packages=['bzrlib.plugins.fastimport', + 'bzrlib.plugins.fastimport.exporters', 'bzrlib.plugins.fastimport.processors', 'bzrlib.plugins.fastimport.tests', ], diff --git a/tests/__init__.py b/tests/__init__.py index 711b605..47441e6 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -17,19 +17,35 @@ """Tests for bzr-fastimport.""" -from bzrlib.tests.TestUtil import TestLoader, TestSuite +from bzrlib import errors as bzr_errors +from bzrlib.tests import Feature, TestLoader +from bzrlib.plugins.fastimport import load_fastimport + + +class _FastimportFeature(Feature): + + def _probe(self): + try: + load_fastimport() + except bzr_errors.DependencyNotPresent: + return False + return True + + def feature_name(self): + return 'fastimport' + + +FastimportFeature = _FastimportFeature() + def test_suite(): - module_names = [ - 'bzrlib.plugins.fastimport.tests.test_branch_mapper', - 'bzrlib.plugins.fastimport.tests.test_commands', - 'bzrlib.plugins.fastimport.tests.test_errors', - 'bzrlib.plugins.fastimport.tests.test_filter_processor', - 'bzrlib.plugins.fastimport.tests.test_generic_processor', - 'bzrlib.plugins.fastimport.tests.test_head_tracking', - 'bzrlib.plugins.fastimport.tests.test_helpers', - 'bzrlib.plugins.fastimport.tests.test_parser', - ] + module_names = [__name__ + '.' + x for x in [ + 'test_commands', + 'test_exporter', + 'test_branch_mapper', + 'test_generic_processor', + 'test_revision_store', + ]] loader = TestLoader() return loader.loadTestsFromModuleNames(module_names) diff --git a/tests/test_branch_mapper.py b/tests/test_branch_mapper.py index fe1b533..6d6f170 100644 --- a/tests/test_branch_mapper.py +++ b/tests/test_branch_mapper.py @@ -22,47 +22,49 @@ from bzrlib.plugins.fastimport import ( branch_mapper, ) +from bzrlib.plugins.fastimport.tests import ( + FastimportFeature, + ) + class TestBranchMapper(tests.TestCase): + _test_needs_features = [FastimportFeature] + def test_git_to_bzr(self): m = branch_mapper.BranchMapper() - git_refs = [ - 'refs/heads/master', - 'refs/heads/foo', - 'refs/tags/master', - 'refs/tags/foo', - 'refs/remotes/origin/master', - 'refs/remotes/origin/foo', - ] - git_to_bzr_map = m.git_to_bzr(git_refs) - self.assertEqual(git_to_bzr_map, { + for git, bzr in { 'refs/heads/master': 'trunk', 'refs/heads/foo': 'foo', 'refs/tags/master': 'trunk.tag', 'refs/tags/foo': 'foo.tag', 'refs/remotes/origin/master': 'trunk.remote', 'refs/remotes/origin/foo': 'foo.remote', - }) + }.items(): + self.assertEqual(m.git_to_bzr(git), bzr) + + def test_git_to_bzr_with_slashes(self): + m = branch_mapper.BranchMapper() + for git, bzr in { + 'refs/heads/master/slave': 'master/slave', + 'refs/heads/foo/bar': 'foo/bar', + 'refs/tags/master/slave': 'master/slave.tag', + 'refs/tags/foo/bar': 'foo/bar.tag', + 'refs/remotes/origin/master/slave': 'master/slave.remote', + 'refs/remotes/origin/foo/bar': 'foo/bar.remote', + }.items(): + self.assertEqual(m.git_to_bzr(git), bzr) def test_git_to_bzr_for_trunk(self): # As 'master' in git is mapped to trunk in bzr, we need to handle # 'trunk' in git in a sensible way. m = branch_mapper.BranchMapper() - git_refs = [ - 'refs/heads/trunk', - 'refs/tags/trunk', - 'refs/remotes/origin/trunk', - 'refs/heads/git-trunk', - 'refs/tags/git-trunk', - 'refs/remotes/origin/git-trunk', - ] - git_to_bzr_map = m.git_to_bzr(git_refs) - self.assertEqual(git_to_bzr_map, { + for git, bzr in { 'refs/heads/trunk': 'git-trunk', 'refs/tags/trunk': 'git-trunk.tag', 'refs/remotes/origin/trunk': 'git-trunk.remote', 'refs/heads/git-trunk': 'git-git-trunk', 'refs/tags/git-trunk': 'git-git-trunk.tag', 'refs/remotes/origin/git-trunk':'git-git-trunk.remote', - }) + }.items(): + self.assertEqual(m.git_to_bzr(git), bzr) diff --git a/tests/test_commands.py b/tests/test_commands.py index 5eb9418..81a43c8 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -1,4 +1,4 @@ -# Copyright (C) 2009 Canonical Ltd +# Copyright (C) 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,268 +14,45 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -"""Test how Commands are displayed""" +"""Test the command implementations.""" + +import os +import tempfile +import gzip from bzrlib import tests -from bzrlib.plugins.fastimport import ( - commands, +from bzrlib.plugins.fastimport.cmds import ( + _get_source_stream, ) - -class TestBlobDisplay(tests.TestCase): - - def test_blob(self): - c = commands.BlobCommand("1", "hello world") - self.assertEqual("blob\nmark :1\ndata 11\nhello world", repr(c)) - - def test_blob_no_mark(self): - c = commands.BlobCommand(None, "hello world") - self.assertEqual("blob\ndata 11\nhello world", repr(c)) - - -class TestCheckpointDisplay(tests.TestCase): - - def test_checkpoint(self): - c = commands.CheckpointCommand() - self.assertEqual("checkpoint", repr(c)) - - -class TestCommitDisplay(tests.TestCase): - - def test_commit(self): - # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) - committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) - c = commands.CommitCommand("refs/heads/master", "bbb", None, committer, - "release v1.0", ":aaa", None, None) - self.assertEqualDiff( - "commit refs/heads/master\n" - "mark :bbb\n" - "committer Joe Wong <joe@example.com> 1234567890 -0600\n" - "data 12\n" - "release v1.0\n" - "from :aaa", - repr(c)) - - def test_commit_unicode_committer(self): - # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) - name = u'\u013d\xf3r\xe9m \xcdp\u0161\xfam' - name_utf8 = name.encode('utf8') - committer = (name, 'test@example.com', 1234567890, -6 * 3600) - c = commands.CommitCommand("refs/heads/master", "bbb", None, committer, - "release v1.0", ":aaa", None, None) - self.assertEqualDiff( - "commit refs/heads/master\n" - "mark :bbb\n" - "committer %s <test@example.com> 1234567890 -0600\n" - "data 12\n" - "release v1.0\n" - "from :aaa" % (name_utf8,), - repr(c)) - - def test_commit_no_mark(self): - # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) - committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) - c = commands.CommitCommand("refs/heads/master", None, None, committer, - "release v1.0", ":aaa", None, None) - self.assertEqualDiff( - "commit refs/heads/master\n" - "committer Joe Wong <joe@example.com> 1234567890 -0600\n" - "data 12\n" - "release v1.0\n" - "from :aaa", - repr(c)) - - def test_commit_no_from(self): - # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) - committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) - c = commands.CommitCommand("refs/heads/master", "bbb", None, committer, - "release v1.0", None, None, None) - self.assertEqualDiff( - "commit refs/heads/master\n" - "mark :bbb\n" - "committer Joe Wong <joe@example.com> 1234567890 -0600\n" - "data 12\n" - "release v1.0", - repr(c)) - - def test_commit_with_author(self): - # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) - author = ('Sue Wong', 'sue@example.com', 1234565432, -6 * 3600) - committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) - c = commands.CommitCommand("refs/heads/master", "bbb", author, - committer, "release v1.0", ":aaa", None, None) - self.assertEqualDiff( - "commit refs/heads/master\n" - "mark :bbb\n" - "author Sue Wong <sue@example.com> 1234565432 -0600\n" - "committer Joe Wong <joe@example.com> 1234567890 -0600\n" - "data 12\n" - "release v1.0\n" - "from :aaa", - repr(c)) - - def test_commit_with_merges(self): - # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) - committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) - c = commands.CommitCommand("refs/heads/master", "ddd", None, committer, - "release v1.0", ":aaa", [':bbb', ':ccc'], None) - self.assertEqualDiff( - "commit refs/heads/master\n" - "mark :ddd\n" - "committer Joe Wong <joe@example.com> 1234567890 -0600\n" - "data 12\n" - "release v1.0\n" - "from :aaa\n" - "merge :bbb\n" - "merge :ccc", - repr(c)) - - def test_commit_with_filecommands(self): - file_cmds = iter([ - commands.FileDeleteCommand('readme.txt'), - commands.FileModifyCommand('NEWS', 'file', False, None, - 'blah blah blah'), - ]) - # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) - committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) - c = commands.CommitCommand("refs/heads/master", "bbb", None, committer, - "release v1.0", ":aaa", None, file_cmds) - self.assertEqualDiff( - "commit refs/heads/master\n" - "mark :bbb\n" - "committer Joe Wong <joe@example.com> 1234567890 -0600\n" - "data 12\n" - "release v1.0\n" - "from :aaa\n" - "D readme.txt\n" - "M 644 inline NEWS\n" - "data 14\n" - "blah blah blah", - repr(c)) - - -class TestProgressDisplay(tests.TestCase): - - def test_progress(self): - c = commands.ProgressCommand("doing foo") - self.assertEqual("progress doing foo", repr(c)) - - -class TestResetDisplay(tests.TestCase): - - def test_reset(self): - c = commands.ResetCommand("refs/tags/v1.0", ":xxx") - self.assertEqual("reset refs/tags/v1.0\nfrom :xxx\n", repr(c)) - - def test_reset_no_from(self): - c = commands.ResetCommand("refs/remotes/origin/master", None) - self.assertEqual("reset refs/remotes/origin/master", repr(c)) - - -class TestTagDisplay(tests.TestCase): - - def test_tag(self): - # tagger tuple is (name, email, secs-since-epoch, secs-offset-from-utc) - tagger = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) - c = commands.TagCommand("refs/tags/v1.0", ":xxx", tagger, "create v1.0") - self.assertEqual( - "tag refs/tags/v1.0\n" - "from :xxx\n" - "tagger Joe Wong <joe@example.com> 1234567890 -0600\n" - "data 11\n" - "create v1.0", - repr(c)) - - def test_tag_no_from(self): - tagger = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) - c = commands.TagCommand("refs/tags/v1.0", None, tagger, "create v1.0") - self.assertEqualDiff( - "tag refs/tags/v1.0\n" - "tagger Joe Wong <joe@example.com> 1234567890 -0600\n" - "data 11\n" - "create v1.0", - repr(c)) - - -class TestFileModifyDisplay(tests.TestCase): - - def test_filemodify_file(self): - c = commands.FileModifyCommand("foo/bar", "file", False, ":23", None) - self.assertEqual("M 644 :23 foo/bar", repr(c)) - - def test_filemodify_file_executable(self): - c = commands.FileModifyCommand("foo/bar", "file", True, ":23", None) - self.assertEqual("M 755 :23 foo/bar", repr(c)) - - def test_filemodify_file_internal(self): - c = commands.FileModifyCommand("foo/bar", "file", False, None, - "hello world") - self.assertEqual("M 644 inline foo/bar\ndata 11\nhello world", repr(c)) - - def test_filemodify_symlink(self): - c = commands.FileModifyCommand("foo/bar", "symlink", False, None, "baz") - self.assertEqual("M 120000 inline foo/bar\ndata 3\nbaz", repr(c)) - - -class TestFileDeleteDisplay(tests.TestCase): - - def test_filedelete(self): - c = commands.FileDeleteCommand("foo/bar") - self.assertEqual("D foo/bar", repr(c)) - - -class TestFileCopyDisplay(tests.TestCase): - - def test_filecopy(self): - c = commands.FileCopyCommand("foo/bar", "foo/baz") - self.assertEqual("C foo/bar foo/baz", repr(c)) - - def test_filecopy_quoted(self): - # Check the first path is quoted if it contains spaces - c = commands.FileCopyCommand("foo/b a r", "foo/b a z") - self.assertEqual('C "foo/b a r" foo/b a z', repr(c)) - - -class TestFileRenameDisplay(tests.TestCase): - - def test_filerename(self): - c = commands.FileRenameCommand("foo/bar", "foo/baz") - self.assertEqual("R foo/bar foo/baz", repr(c)) - - def test_filerename_quoted(self): - # Check the first path is quoted if it contains spaces - c = commands.FileRenameCommand("foo/b a r", "foo/b a z") - self.assertEqual('R "foo/b a r" foo/b a z', repr(c)) - - -class TestFileDeleteAllDisplay(tests.TestCase): - - def test_filedeleteall(self): - c = commands.FileDeleteAllCommand() - self.assertEqual("deleteall", repr(c)) +from bzrlib.plugins.fastimport.tests import ( + FastimportFeature, + ) -class TestPathChecking(tests.TestCase): +class TestSourceStream(tests.TestCase): - def test_filemodify_path_checking(self): - self.assertRaises(ValueError, commands.FileModifyCommand, "", - "file", False, None, "text") - self.assertRaises(ValueError, commands.FileModifyCommand, None, - "file", False, None, "text") + _test_needs_features = [FastimportFeature] - def test_filedelete_path_checking(self): - self.assertRaises(ValueError, commands.FileDeleteCommand, "") - self.assertRaises(ValueError, commands.FileDeleteCommand, None) + def test_get_source_stream_stdin(self): + # - returns standard in + self.assertIsNot(None, _get_source_stream("-")) - def test_filerename_path_checking(self): - self.assertRaises(ValueError, commands.FileRenameCommand, "", "foo") - self.assertRaises(ValueError, commands.FileRenameCommand, None, "foo") - self.assertRaises(ValueError, commands.FileRenameCommand, "foo", "") - self.assertRaises(ValueError, commands.FileRenameCommand, "foo", None) + def test_get_source_gz(self): + # files ending in .gz are automatically decompressed. + fd, filename = tempfile.mkstemp(suffix=".gz") + f = gzip.GzipFile(fileobj=os.fdopen(fd, "w"), mode='w') + f.write("bla") + f.close() + stream = _get_source_stream(filename) + self.assertIsNot("bla", stream.read()) - def test_filecopy_path_checking(self): - self.assertRaises(ValueError, commands.FileCopyCommand, "", "foo") - self.assertRaises(ValueError, commands.FileCopyCommand, None, "foo") - self.assertRaises(ValueError, commands.FileCopyCommand, "foo", "") - self.assertRaises(ValueError, commands.FileCopyCommand, "foo", None) + def test_get_source_file(self): + # other files are opened as regular files. + fd, filename = tempfile.mkstemp() + f = os.fdopen(fd, 'w') + f.write("bla") + f.close() + stream = _get_source_stream(filename) + self.assertIsNot("bla", stream.read()) diff --git a/tests/test_errors.py b/tests/test_errors.py deleted file mode 100644 index ac63b29..0000000 --- a/tests/test_errors.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Test the Import errors""" - -from bzrlib import tests - -from bzrlib.plugins.fastimport import ( - errors, - ) - - -class TestErrors(tests.TestCase): - - def test_MissingBytes(self): - e = errors.MissingBytes(99, 10, 8) - self.assertEqual("line 99: Unexpected EOF - expected 10 bytes, found 8", - str(e)) - - def test_MissingTerminator(self): - e = errors.MissingTerminator(99, '---') - self.assertEqual("line 99: Unexpected EOF - expected '---' terminator", - str(e)) - - def test_InvalidCommand(self): - e = errors.InvalidCommand(99, 'foo') - self.assertEqual("line 99: Invalid command 'foo'", - str(e)) - - def test_MissingSection(self): - e = errors.MissingSection(99, 'foo', 'bar') - self.assertEqual("line 99: Command foo is missing section bar", - str(e)) - - def test_BadFormat(self): - e = errors.BadFormat(99, 'foo', 'bar', 'xyz') - self.assertEqual("line 99: Bad format for section bar in " - "command foo: found 'xyz'", - str(e)) - - def test_InvalidTimezone(self): - e = errors.InvalidTimezone(99, 'aa:bb') - self.assertEqual('aa:bb', e.timezone) - self.assertEqual('', e.reason) - self.assertEqual("line 99: Timezone 'aa:bb' could not be converted.", - str(e)) - e = errors.InvalidTimezone(99, 'aa:bb', 'Non-numeric hours') - self.assertEqual('aa:bb', e.timezone) - self.assertEqual(' Non-numeric hours', e.reason) - self.assertEqual("line 99: Timezone 'aa:bb' could not be converted." - " Non-numeric hours", - str(e)) - - def test_UnknownDateFormat(self): - e = errors.UnknownDateFormat('aaa') - self.assertEqual("Unknown date format 'aaa'", str(e)) - - def test_MissingHandler(self): - e = errors.MissingHandler('foo') - self.assertEqual("Missing handler for command foo", str(e)) diff --git a/tests/test_exporter.py b/tests/test_exporter.py new file mode 100644 index 0000000..fe50e3b --- /dev/null +++ b/tests/test_exporter.py @@ -0,0 +1,62 @@ +# Copyright (C) 2010 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Test the exporter.""" + +import os +import tempfile +import gzip + +from bzrlib import tests + +from bzrlib.plugins.fastimport.exporter import ( + _get_output_stream, + ) + +from bzrlib.plugins.fastimport.tests import ( + FastimportFeature, + ) + + +class TestOutputStream(tests.TestCase): + + _test_needs_features = [FastimportFeature] + + def test_get_output_stream_stdout(self): + # - returns standard out + self.assertIsNot(None, _get_output_stream("-")) + + def test_get_source_gz(self): + fd, filename = tempfile.mkstemp(suffix=".gz") + os.close(fd) + stream = _get_output_stream(filename) + stream.write("bla") + stream.close() + # files ending in .gz are automatically decompressed. + f = gzip.GzipFile(filename) + self.assertEquals("bla", f.read()) + f.close() + + def test_get_source_file(self): + # other files are opened as regular files. + fd, filename = tempfile.mkstemp() + os.close(fd) + stream = _get_output_stream(filename) + stream.write("foo") + stream.close() + f = open(filename, 'r') + self.assertEquals("foo", f.read()) + f.close() diff --git a/tests/test_filter_processor.py b/tests/test_filter_processor.py deleted file mode 100644 index ff8a09f..0000000 --- a/tests/test_filter_processor.py +++ /dev/null @@ -1,877 +0,0 @@ -# Copyright (C) 2009 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Test FilterProcessor""" - -from cStringIO import StringIO - -from bzrlib import tests - -from bzrlib.plugins.fastimport import ( - parser, - ) -from bzrlib.plugins.fastimport.processors.filter_processor import ( - FilterProcessor, - ) - - -# A sample input stream containing all (top level) import commands -_SAMPLE_ALL = \ -"""blob -mark :1 -data 4 -foo -commit refs/heads/master -mark :2 -committer Joe <joe@example.com> 1234567890 +1000 -data 14 -Initial import -M 644 :1 COPYING -checkpoint -progress first import done -reset refs/remote/origin/master -from :2 -tag v0.1 -from :2 -tagger Joe <joe@example.com> 1234567890 +1000 -data 12 -release v0.1 -""" - - -# A sample input stream creating the following tree: -# -# NEWS -# doc/README.txt -# doc/index.txt -_SAMPLE_WITH_DIR = \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 doc/README.txt -blob -mark :2 -data 17 -Life -is -good ... -commit refs/heads/master -mark :101 -committer a <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :2 NEWS -blob -mark :3 -data 19 -Welcome! -my friend -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :101 -M 644 :3 doc/README.txt -M 644 :4 doc/index.txt -""" - - -class TestCaseWithFiltering(tests.TestCase): - - def assertFiltering(self, input, params, expected): - outf = StringIO() - proc = FilterProcessor(None, params=params) - proc.outf = outf - s = StringIO(input) - p = parser.ImportParser(s) - proc.process(p.iter_commands) - out = outf.getvalue() - self.assertEqualDiff(expected, out) - - -class TestNoFiltering(TestCaseWithFiltering): - - def test_params_not_given(self): - self.assertFiltering(_SAMPLE_ALL, None, _SAMPLE_ALL) - - def test_params_are_none(self): - params = {'include_paths': None, 'exclude_paths': None} - self.assertFiltering(_SAMPLE_ALL, params, _SAMPLE_ALL) - - -class TestIncludePaths(TestCaseWithFiltering): - - def test_file_in_root(self): - # Things to note: - # * only referenced blobs are retained - # * from clause is dropped from the first command - params = {'include_paths': ['NEWS']} - self.assertFiltering(_SAMPLE_WITH_DIR, params, \ -"""blob -mark :2 -data 17 -Life -is -good ... -commit refs/heads/master -mark :101 -committer a <b@c> 1234798653 +0000 -data 8 -test -ing -M 644 :2 NEWS -""") - - def test_file_in_subdir(self): - # Additional things to note: - # * new root: path is now index.txt, not doc/index.txt - # * other files changed in matching commits are excluded - params = {'include_paths': ['doc/index.txt']} - self.assertFiltering(_SAMPLE_WITH_DIR, params, \ -"""blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -M 644 :4 index.txt -""") - - def test_file_with_changes(self): - # Additional things to note: - # * from updated to reference parents in the output - params = {'include_paths': ['doc/README.txt']} - self.assertFiltering(_SAMPLE_WITH_DIR, params, \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 README.txt -blob -mark :3 -data 19 -Welcome! -my friend -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :3 README.txt -""") - - def test_subdir(self): - params = {'include_paths': ['doc/']} - self.assertFiltering(_SAMPLE_WITH_DIR, params, \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 README.txt -blob -mark :3 -data 19 -Welcome! -my friend -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :3 README.txt -M 644 :4 index.txt -""") - - def test_multiple_files_in_subdir(self): - # The new root should be the subdrectory - params = {'include_paths': ['doc/README.txt', 'doc/index.txt']} - self.assertFiltering(_SAMPLE_WITH_DIR, params, \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 README.txt -blob -mark :3 -data 19 -Welcome! -my friend -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :3 README.txt -M 644 :4 index.txt -""") - - -class TestExcludePaths(TestCaseWithFiltering): - - def test_file_in_root(self): - params = {'exclude_paths': ['NEWS']} - self.assertFiltering(_SAMPLE_WITH_DIR, params, \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 doc/README.txt -blob -mark :3 -data 19 -Welcome! -my friend -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :3 doc/README.txt -M 644 :4 doc/index.txt -""") - - def test_file_in_subdir(self): - params = {'exclude_paths': ['doc/README.txt']} - self.assertFiltering(_SAMPLE_WITH_DIR, params, \ -"""blob -mark :2 -data 17 -Life -is -good ... -commit refs/heads/master -mark :101 -committer a <b@c> 1234798653 +0000 -data 8 -test -ing -M 644 :2 NEWS -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :101 -M 644 :4 doc/index.txt -""") - - def test_subdir(self): - params = {'exclude_paths': ['doc/']} - self.assertFiltering(_SAMPLE_WITH_DIR, params, \ -"""blob -mark :2 -data 17 -Life -is -good ... -commit refs/heads/master -mark :101 -committer a <b@c> 1234798653 +0000 -data 8 -test -ing -M 644 :2 NEWS -""") - - def test_multple_files(self): - params = {'exclude_paths': ['doc/index.txt', 'NEWS']} - self.assertFiltering(_SAMPLE_WITH_DIR, params, \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 doc/README.txt -blob -mark :3 -data 19 -Welcome! -my friend -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :3 doc/README.txt -""") - - -class TestIncludeAndExcludePaths(TestCaseWithFiltering): - - def test_included_dir_and_excluded_file(self): - params = {'include_paths': ['doc/'], 'exclude_paths': ['doc/index.txt']} - self.assertFiltering(_SAMPLE_WITH_DIR, params, \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 README.txt -blob -mark :3 -data 19 -Welcome! -my friend -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :3 README.txt -""") - - -# A sample input stream creating the following tree: -# -# NEWS -# doc/README.txt -# doc/index.txt -# -# It then renames doc/README.txt => doc/README -_SAMPLE_WITH_RENAME_INSIDE = _SAMPLE_WITH_DIR + \ -"""commit refs/heads/master -mark :103 -committer d <b@c> 1234798653 +0000 -data 10 -move intro -from :102 -R doc/README.txt doc/README -""" - -# A sample input stream creating the following tree: -# -# NEWS -# doc/README.txt -# doc/index.txt -# -# It then renames doc/README.txt => README -_SAMPLE_WITH_RENAME_TO_OUTSIDE = _SAMPLE_WITH_DIR + \ -"""commit refs/heads/master -mark :103 -committer d <b@c> 1234798653 +0000 -data 10 -move intro -from :102 -R doc/README.txt README -""" - -# A sample input stream creating the following tree: -# -# NEWS -# doc/README.txt -# doc/index.txt -# -# It then renames NEWS => doc/NEWS -_SAMPLE_WITH_RENAME_TO_INSIDE = _SAMPLE_WITH_DIR + \ -"""commit refs/heads/master -mark :103 -committer d <b@c> 1234798653 +0000 -data 10 -move intro -from :102 -R NEWS doc/NEWS -""" - -class TestIncludePathsWithRenames(TestCaseWithFiltering): - - def test_rename_all_inside(self): - # These rename commands ought to be kept but adjusted for the new root - params = {'include_paths': ['doc/']} - self.assertFiltering(_SAMPLE_WITH_RENAME_INSIDE, params, \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 README.txt -blob -mark :3 -data 19 -Welcome! -my friend -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :3 README.txt -M 644 :4 index.txt -commit refs/heads/master -mark :103 -committer d <b@c> 1234798653 +0000 -data 10 -move intro -from :102 -R README.txt README -""") - - def test_rename_to_outside(self): - # These rename commands become deletes - params = {'include_paths': ['doc/']} - self.assertFiltering(_SAMPLE_WITH_RENAME_TO_OUTSIDE, params, \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 README.txt -blob -mark :3 -data 19 -Welcome! -my friend -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :3 README.txt -M 644 :4 index.txt -commit refs/heads/master -mark :103 -committer d <b@c> 1234798653 +0000 -data 10 -move intro -from :102 -D README.txt -""") - - def test_rename_to_inside(self): - # This ought to create a new file but doesn't yet - params = {'include_paths': ['doc/']} - self.assertFiltering(_SAMPLE_WITH_RENAME_TO_INSIDE, params, \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 README.txt -blob -mark :3 -data 19 -Welcome! -my friend -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :3 README.txt -M 644 :4 index.txt -""") - - -# A sample input stream creating the following tree: -# -# NEWS -# doc/README.txt -# doc/index.txt -# -# It then copies doc/README.txt => doc/README -_SAMPLE_WITH_COPY_INSIDE = _SAMPLE_WITH_DIR + \ -"""commit refs/heads/master -mark :103 -committer d <b@c> 1234798653 +0000 -data 10 -move intro -from :102 -C doc/README.txt doc/README -""" - -# A sample input stream creating the following tree: -# -# NEWS -# doc/README.txt -# doc/index.txt -# -# It then copies doc/README.txt => README -_SAMPLE_WITH_COPY_TO_OUTSIDE = _SAMPLE_WITH_DIR + \ -"""commit refs/heads/master -mark :103 -committer d <b@c> 1234798653 +0000 -data 10 -move intro -from :102 -C doc/README.txt README -""" - -# A sample input stream creating the following tree: -# -# NEWS -# doc/README.txt -# doc/index.txt -# -# It then copies NEWS => doc/NEWS -_SAMPLE_WITH_COPY_TO_INSIDE = _SAMPLE_WITH_DIR + \ -"""commit refs/heads/master -mark :103 -committer d <b@c> 1234798653 +0000 -data 10 -move intro -from :102 -C NEWS doc/NEWS -""" - - -class TestIncludePathsWithCopies(TestCaseWithFiltering): - - def test_copy_all_inside(self): - # These copy commands ought to be kept but adjusted for the new root - params = {'include_paths': ['doc/']} - self.assertFiltering(_SAMPLE_WITH_COPY_INSIDE, params, \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 README.txt -blob -mark :3 -data 19 -Welcome! -my friend -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :3 README.txt -M 644 :4 index.txt -commit refs/heads/master -mark :103 -committer d <b@c> 1234798653 +0000 -data 10 -move intro -from :102 -C README.txt README -""") - - def test_copy_to_outside(self): - # This can be ignored - params = {'include_paths': ['doc/']} - self.assertFiltering(_SAMPLE_WITH_COPY_TO_OUTSIDE, params, \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 README.txt -blob -mark :3 -data 19 -Welcome! -my friend -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :3 README.txt -M 644 :4 index.txt -""") - - def test_copy_to_inside(self): - # This ought to create a new file but doesn't yet - params = {'include_paths': ['doc/']} - self.assertFiltering(_SAMPLE_WITH_COPY_TO_INSIDE, params, \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 README.txt -blob -mark :3 -data 19 -Welcome! -my friend -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :3 README.txt -M 644 :4 index.txt -""") - - -# A sample input stream with deleteall's creating the following tree: -# -# NEWS -# doc/README.txt -# doc/index.txt -_SAMPLE_WITH_DELETEALL = \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -deleteall -M 644 :1 doc/README.txt -blob -mark :3 -data 19 -Welcome! -my friend -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -deleteall -M 644 :3 doc/README.txt -M 644 :4 doc/index.txt -""" - - -class TestIncludePathsWithDeleteAll(TestCaseWithFiltering): - - def test_deleteall(self): - params = {'include_paths': ['doc/index.txt']} - self.assertFiltering(_SAMPLE_WITH_DELETEALL, params, \ -"""blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -deleteall -M 644 :4 index.txt -""") - - -_SAMPLE_WITH_TAGS = _SAMPLE_WITH_DIR + \ -"""tag v0.1 -from :100 -tagger d <b@c> 1234798653 +0000 -data 12 -release v0.1 -tag v0.2 -from :102 -tagger d <b@c> 1234798653 +0000 -data 12 -release v0.2 -""" - -class TestIncludePathsWithTags(TestCaseWithFiltering): - - def test_tag_retention(self): - # If a tag references a commit with a parent we kept, - # keep the tag but adjust 'from' accordingly. - # Otherwise, delete the tag command. - params = {'include_paths': ['NEWS']} - self.assertFiltering(_SAMPLE_WITH_TAGS, params, \ -"""blob -mark :2 -data 17 -Life -is -good ... -commit refs/heads/master -mark :101 -committer a <b@c> 1234798653 +0000 -data 8 -test -ing -M 644 :2 NEWS -tag v0.2 -from :101 -tagger d <b@c> 1234798653 +0000 -data 12 -release v0.2 -""") - - -_SAMPLE_WITH_RESETS = _SAMPLE_WITH_DIR + \ -"""reset refs/heads/foo -reset refs/heads/bar -from :102 -""" - -class TestIncludePathsWithResets(TestCaseWithFiltering): - - def test_reset_retention(self): - # Resets init'ing a branch (without a from) are passed through. - # If a reset references a commit with a parent we kept, - # keep the reset but adjust 'from' accordingly. - params = {'include_paths': ['NEWS']} - self.assertFiltering(_SAMPLE_WITH_RESETS, params, \ -"""blob -mark :2 -data 17 -Life -is -good ... -commit refs/heads/master -mark :101 -committer a <b@c> 1234798653 +0000 -data 8 -test -ing -M 644 :2 NEWS -reset refs/heads/foo -reset refs/heads/bar -from :101 -""") diff --git a/tests/test_generic_processor.py b/tests/test_generic_processor.py index d4f789b..41f846e 100644 --- a/tests/test_generic_processor.py +++ b/tests/test_generic_processor.py @@ -17,25 +17,47 @@ import time from bzrlib import ( - branch, tests, ) - -from bzrlib.plugins.fastimport import ( - commands, - errors, +from bzrlib.plugins.fastimport.helpers import ( + kind_to_mode, ) - -from bzrlib.plugins.fastimport.processors import ( - generic_processor, +from bzrlib.plugins.fastimport.tests import ( + FastimportFeature, ) +try: + from fastimport import commands +except ImportError: + commands = object() + + +def load_tests(standard_tests, module, loader): + """Parameterize tests for all versions of groupcompress.""" + scenarios = [ + ('pack-0.92', {'branch_format': 'pack-0.92'}), + ('1.9-rich-root', {'branch_format': '1.9-rich-root'}), + ] + try: + from bzrlib.repofmt.groupcompress_repo import RepositoryFormat2a + scenarios.append(('2a', {'branch_format': '2a'})) + except ImportError: + pass + suite = loader.suiteClass() + result = tests.multiply_tests(standard_tests, scenarios, suite) + return result + class TestCaseForGenericProcessor(tests.TestCaseWithTransport): + _test_needs_features = [FastimportFeature] + branch_format = "pack-0.92" def get_handler(self): + from bzrlib.plugins.fastimport.processors import ( + generic_processor, + ) branch = self.make_branch('.', format=self.branch_format) handler = generic_processor.GenericProcessor(branch.bzrdir) return handler, branch @@ -176,23 +198,24 @@ class TestImportToPackModify(TestCaseForGenericProcessor): def file_command_iter(self, path, kind='file', content='aaa', executable=False, to_kind=None, to_content='bbb', to_executable=None): + # Revno 1: create a file or symlink # Revno 2: modify it if to_kind is None: to_kind = kind if to_executable is None: to_executable = executable + mode = kind_to_mode(kind, executable) + to_mode = kind_to_mode(to_kind, to_executable) def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): - yield commands.FileModifyCommand(path, kind, executable, - None, content) + yield commands.FileModifyCommand(path, mode, None, content) yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): - yield commands.FileModifyCommand(path, to_kind, to_executable, - None, to_content) + yield commands.FileModifyCommand(path, to_mode, None, to_content) yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list @@ -292,9 +315,46 @@ class TestImportToPackModify(TestCaseForGenericProcessor): self.assertExecutable(branch, revtree2, path, False) +class TestImportToPackModifyTwice(TestCaseForGenericProcessor): + """This tests when the same file is modified twice in the one commit. + + Note: hg-fast-export produces data like this on occasions. + """ + + def file_command_iter(self, path, kind='file', content='aaa', + executable=False, to_kind=None, to_content='bbb', to_executable=None): + + # Revno 1: create a file twice + if to_kind is None: + to_kind = kind + if to_executable is None: + to_executable = executable + def command_list(): + author = ['', 'bugs@a.com', time.time(), time.timezone] + committer = ['', 'elmer@a.com', time.time(), time.timezone] + def files_one(): + yield commands.FileModifyCommand(path, kind_to_mode(kind, executable), + None, content) + yield commands.FileModifyCommand(path, kind_to_mode(to_kind, to_executable), + None, to_content) + yield commands.CommitCommand('head', '1', author, + committer, "commit 1", None, [], files_one) + return command_list + + def test_modify_file_twice_in_root(self): + handler, branch = self.get_handler() + path = 'a' + handler.process(self.file_command_iter(path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(path,)]) + self.assertContent(branch, revtree1, path, "aaa") + self.assertRevisionRoot(revtree1, path) + + class TestImportToPackModifyTricky(TestCaseForGenericProcessor): def file_command_iter(self, path1, path2, kind='file'): + # Revno 1: create a file or symlink in a directory # Revno 2: create a second file that implicitly deletes the # first one because either: @@ -304,12 +364,12 @@ class TestImportToPackModifyTricky(TestCaseForGenericProcessor): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): - yield commands.FileModifyCommand(path1, kind, False, + yield commands.FileModifyCommand(path1, kind_to_mode(kind, False), None, "aaa") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): - yield commands.FileModifyCommand(path2, kind, False, + yield commands.FileModifyCommand(path2, kind_to_mode(kind, False), None, "bbb") yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) @@ -372,13 +432,14 @@ class TestImportToPackModifyTricky(TestCaseForGenericProcessor): class TestImportToPackDelete(TestCaseForGenericProcessor): def file_command_iter(self, path, kind='file'): + # Revno 1: create a file or symlink # Revno 2: delete it def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): - yield commands.FileModifyCommand(path, kind, False, + yield commands.FileModifyCommand(path, kind_to_mode(kind, False), None, "aaa") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) @@ -439,9 +500,211 @@ class TestImportToPackDelete(TestCaseForGenericProcessor): self.assertContent(branch, revtree1, path, "aaa") +class TestImportToPackDeleteNew(TestCaseForGenericProcessor): + """Test deletion of a newly added file.""" + + def file_command_iter(self, path, kind='file'): + + # Revno 1: create a file or symlink then delete it + def command_list(): + author = ['', 'bugs@a.com', time.time(), time.timezone] + committer = ['', 'elmer@a.com', time.time(), time.timezone] + def files_one(): + yield commands.FileModifyCommand(path, kind_to_mode(kind, False), + None, "aaa") + yield commands.FileDeleteCommand(path) + yield commands.CommitCommand('head', '1', author, + committer, "commit 1", None, [], files_one) + return command_list + + def test_delete_new_file_in_root(self): + handler, branch = self.get_handler() + path = 'a' + handler.process(self.file_command_iter(path)) + revtree0, revtree1 = self.assertChanges(branch, 1,) + + def test_delete_new_file_in_subdir(self): + handler, branch = self.get_handler() + path = 'a/a' + handler.process(self.file_command_iter(path)) + revtree0, revtree1 = self.assertChanges(branch, 1,) + + def test_delete_new_symlink_in_root(self): + handler, branch = self.get_handler() + path = 'a' + handler.process(self.file_command_iter(path, kind='symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1,) + + def test_delete_new_symlink_in_subdir(self): + handler, branch = self.get_handler() + path = 'a/a' + handler.process(self.file_command_iter(path, kind='symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1,) + + def test_delete_new_file_in_deep_subdir(self): + handler, branch = self.get_handler() + path = 'a/b/c/d' + handler.process(self.file_command_iter(path)) + revtree0, revtree1 = self.assertChanges(branch, 1,) + + +class TestImportToPackDeleteMultiLevel(TestCaseForGenericProcessor): + + def file_command_iter(self, paths, paths_to_delete): + + # Revno 1: create multiple files + # Revno 2: delete multiple files + def command_list(): + author = ['', 'bugs@a.com', time.time(), time.timezone] + committer = ['', 'elmer@a.com', time.time(), time.timezone] + def files_one(): + for i, path in enumerate(paths): + yield commands.FileModifyCommand(path, kind_to_mode('file', False), + None, "aaa%d" % i) + yield commands.CommitCommand('head', '1', author, + committer, "commit 1", None, [], files_one) + def files_two(): + for path in paths_to_delete: + yield commands.FileDeleteCommand(path) + yield commands.CommitCommand('head', '2', author, + committer, "commit 2", ":1", [], files_two) + return command_list + + def test_delete_files_in_multiple_levels(self): + handler, branch = self.get_handler() + paths = ['a/b/c', 'a/b/d/e'] + paths_to_delete = ['a/b/c', 'a/b/d/e'] + handler.process(self.file_command_iter(paths, paths_to_delete)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[ + ('a',), ('a/b',), ('a/b/c',), + ('a/b/d',), ('a/b/d/e',), + ]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[ + ('a',), ('a/b',), ('a/b/c',), + ('a/b/d',), ('a/b/d/e',), + ]) + + def test_delete_file_single_level(self): + handler, branch = self.get_handler() + paths = ['a/b/c', 'a/b/d/e'] + paths_to_delete = ['a/b/d/e'] + handler.process(self.file_command_iter(paths, paths_to_delete)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[ + ('a',), ('a/b',), ('a/b/c',), + ('a/b/d',), ('a/b/d/e',), + ]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[ + ('a/b/d',), ('a/b/d/e',), + ]) + + def test_delete_file_complex_level(self): + handler, branch = self.get_handler() + paths = ['a/b/c', 'a/b/d/e', 'a/f/g', 'a/h', 'a/b/d/i/j'] + paths_to_delete = ['a/b/c', 'a/b/d/e', 'a/f/g', 'a/b/d/i/j'] + handler.process(self.file_command_iter(paths, paths_to_delete)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[ + ('a',), ('a/b',), ('a/b/c',), + ('a/b/d',), ('a/b/d/e',), + ('a/f',), ('a/f/g',), + ('a/h',), + ('a/b/d/i',), ('a/b/d/i/j',), + ]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[ + ('a/b',), ('a/b/c',), + ('a/b/d',), ('a/b/d/e',), + ('a/f',), ('a/f/g',), + ('a/b/d/i',), ('a/b/d/i/j',), + ]) + +class TestImportToPackDeleteThenAdd(TestCaseForGenericProcessor): + """Test delete followed by an add. Merges can cause this.""" + + def file_command_iter(self, path, kind='file', content='aaa', + executable=False, to_kind=None, to_content='bbb', to_executable=None): + + # Revno 1: create a file or symlink + # Revno 2: delete it and add it + if to_kind is None: + to_kind = kind + if to_executable is None: + to_executable = executable + def command_list(): + author = ['', 'bugs@a.com', time.time(), time.timezone] + committer = ['', 'elmer@a.com', time.time(), time.timezone] + def files_one(): + yield commands.FileModifyCommand(path, kind_to_mode(kind, executable), + None, content) + yield commands.CommitCommand('head', '1', author, + committer, "commit 1", None, [], files_one) + def files_two(): + yield commands.FileDeleteCommand(path) + yield commands.FileModifyCommand(path, kind_to_mode(to_kind, to_executable), + None, to_content) + yield commands.CommitCommand('head', '2', author, + committer, "commit 2", ":1", [], files_two) + return command_list + + def test_delete_then_add_file_in_root(self): + handler, branch = self.get_handler() + path = 'a' + handler.process(self.file_command_iter(path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(path,)]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(path,)], + expected_added=[(path,)]) + self.assertContent(branch, revtree1, path, "aaa") + self.assertContent(branch, revtree2, path, "bbb") + self.assertRevisionRoot(revtree1, path) + self.assertRevisionRoot(revtree2, path) + + def test_delete_then_add_file_in_subdir(self): + handler, branch = self.get_handler() + path = 'a/a' + handler.process(self.file_command_iter(path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('a',), (path,)]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(path,)], + expected_added=[(path,)]) + self.assertContent(branch, revtree1, path, "aaa") + self.assertContent(branch, revtree2, path, "bbb") + + def test_delete_then_add_symlink_in_root(self): + handler, branch = self.get_handler() + path = 'a' + handler.process(self.file_command_iter(path, kind='symlink')) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(path,)], + expected_added=[(path,)]) + self.assertSymlinkTarget(branch, revtree1, path, "aaa") + self.assertSymlinkTarget(branch, revtree2, path, "bbb") + self.assertRevisionRoot(revtree1, path) + self.assertRevisionRoot(revtree2, path) + + def test_delete_then_add_symlink_in_subdir(self): + handler, branch = self.get_handler() + path = 'a/a' + handler.process(self.file_command_iter(path, kind='symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('a',), (path,)]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(path,)], + expected_added=[(path,)]) + self.assertSymlinkTarget(branch, revtree1, path, "aaa") + self.assertSymlinkTarget(branch, revtree2, path, "bbb") + + class TestImportToPackDeleteDirectory(TestCaseForGenericProcessor): def file_command_iter(self, paths, dir): + # Revno 1: create multiple files # Revno 2: delete a directory holding those files def command_list(): @@ -449,7 +712,7 @@ class TestImportToPackDeleteDirectory(TestCaseForGenericProcessor): committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): for i, path in enumerate(paths): - yield commands.FileModifyCommand(path, 'file', False, + yield commands.FileModifyCommand(path, kind_to_mode('file', False), None, "aaa%d" % i) yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) @@ -479,16 +742,68 @@ class TestImportToPackDeleteDirectory(TestCaseForGenericProcessor): ]) +class TestImportToPackDeleteDirectoryThenAddFile(TestCaseForGenericProcessor): + """Test deleting a directory then adding a file in the same commit.""" + + def file_command_iter(self, paths, dir, new_path, kind='file'): + + # Revno 1: create files in a directory + # Revno 2: delete the directory then add a file into it + def command_list(): + author = ['', 'bugs@a.com', time.time(), time.timezone] + committer = ['', 'elmer@a.com', time.time(), time.timezone] + def files_one(): + for i, path in enumerate(paths): + yield commands.FileModifyCommand(path, kind_to_mode(kind, False), + None, "aaa%d" % i) + yield commands.CommitCommand('head', '1', author, + committer, "commit 1", None, [], files_one) + def files_two(): + yield commands.FileDeleteCommand(dir) + yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False), + None, "bbb") + yield commands.CommitCommand('head', '2', author, + committer, "commit 2", ":1", [], files_two) + return command_list + + def test_delete_dir_then_add_file(self): + handler, branch = self.get_handler() + paths = ['a/b/c', 'a/b/d'] + dir = 'a/b' + new_path = 'a/b/z' + handler.process(self.file_command_iter(paths, dir, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('a',), ('a/b',), ('a/b/c',), ('a/b/d',),]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[('a/b',), ('a/b/c',), ('a/b/d',)], + expected_added=[('a/b',), ('a/b/z',)]) + self.assertContent(branch, revtree2, new_path, "bbb") + + def test_delete_dir_then_add_symlink(self): + handler, branch = self.get_handler() + paths = ['a/b/c', 'a/b/d'] + dir = 'a/b' + new_path = 'a/b/z' + handler.process(self.file_command_iter(paths, dir, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('a',), ('a/b',), ('a/b/c',), ('a/b/d',),]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[('a/b',), ('a/b/c',), ('a/b/d',)], + expected_added=[('a/b',), ('a/b/z',)]) + self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") + + class TestImportToPackRename(TestCaseForGenericProcessor): - def get_command_iter(self, old_path, new_path): + def get_command_iter(self, old_path, new_path, kind='file'): + # Revno 1: create a file or symlink # Revno 2: rename it def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): - yield commands.FileModifyCommand(old_path, 'file', False, + yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False), None, "aaa") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) @@ -498,7 +813,7 @@ class TestImportToPackRename(TestCaseForGenericProcessor): committer, "commit 2", ":1", [], files_two) return command_list - def test_rename_in_root(self): + def test_rename_file_in_root(self): handler, branch = self.get_handler() old_path = 'a' new_path = 'b' @@ -508,14 +823,31 @@ class TestImportToPackRename(TestCaseForGenericProcessor): self.assertRevisionRoot(revtree1, old_path) self.assertRevisionRoot(revtree2, new_path) - def test_rename_in_subdir(self): + def test_rename_symlink_in_root(self): + handler, branch = self.get_handler() + old_path = 'a' + new_path = 'b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_renamed=[(old_path, new_path)]) + self.assertRevisionRoot(revtree1, old_path) + self.assertRevisionRoot(revtree2, new_path) + + def test_rename_file_in_subdir(self): handler, branch = self.get_handler() old_path = 'a/a' new_path = 'a/b' handler.process(self.get_command_iter(old_path, new_path)) self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)]) - def test_move_to_new_dir(self): + def test_rename_symlink_in_subdir(self): + handler, branch = self.get_handler() + old_path = 'a/a' + new_path = 'a/b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)]) + + def test_rename_file_to_new_dir(self): handler, branch = self.get_handler() old_path = 'a/a' new_path = 'b/a' @@ -525,10 +857,547 @@ class TestImportToPackRename(TestCaseForGenericProcessor): expected_added=[('b',)], expected_removed=[('a',)]) + def test_rename_symlink_to_new_dir(self): + handler, branch = self.get_handler() + old_path = 'a/a' + new_path = 'b/a' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + self.assertChanges(branch, 2, + expected_renamed=[(old_path, new_path)], + expected_added=[('b',)], + expected_removed=[('a',)]) + + +class TestImportToPackRenameNew(TestCaseForGenericProcessor): + """Test rename of a newly added file.""" + + def get_command_iter(self, old_path, new_path, kind='file'): + + # Revno 1: create a file and rename it + def command_list(): + author = ['', 'bugs@a.com', time.time(), time.timezone] + committer = ['', 'elmer@a.com', time.time(), time.timezone] + def files_one(): + yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False), + None, "aaa") + yield commands.FileRenameCommand(old_path, new_path) + yield commands.CommitCommand('head', '1', author, + committer, "commit 1", None, [], files_one) + return command_list + + def test_rename_new_file_in_root(self): + handler, branch = self.get_handler() + old_path = 'a' + new_path = 'b' + handler.process(self.get_command_iter(old_path, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(new_path,)]) + self.assertRevisionRoot(revtree1, new_path) + + def test_rename_new_symlink_in_root(self): + handler, branch = self.get_handler() + old_path = 'a' + new_path = 'b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(new_path,)]) + self.assertRevisionRoot(revtree1, new_path) + + def test_rename_new_file_in_subdir(self): + handler, branch = self.get_handler() + old_path = 'a/a' + new_path = 'a/b' + handler.process(self.get_command_iter(old_path, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('a',), (new_path,)]) + + def test_rename_new_symlink_in_subdir(self): + handler, branch = self.get_handler() + old_path = 'a/a' + new_path = 'a/b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('a',), (new_path,)]) + + +class TestImportToPackRenameToDeleted(TestCaseForGenericProcessor): + """Test rename to a destination path deleted in this commit.""" + + def get_command_iter(self, old_path, new_path, kind='file'): + + # Revno 1: create two files + # Revno 2: delete one, rename the other one to that path + def command_list(): + author = ['', 'bugs@a.com', time.time(), time.timezone] + committer = ['', 'elmer@a.com', time.time(), time.timezone] + def files_one(): + yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False), + None, "aaa") + yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False), + None, "bbb") + yield commands.CommitCommand('head', '1', author, + committer, "commit 1", None, [], files_one) + def files_two(): + yield commands.FileDeleteCommand(new_path) + yield commands.FileRenameCommand(old_path, new_path) + yield commands.CommitCommand('head', '2', author, + committer, "commit 2", ":1", [], files_two) + return command_list + + def test_rename_to_deleted_file_in_root(self): + handler, branch = self.get_handler() + old_path = 'a' + new_path = 'b' + handler.process(self.get_command_iter(old_path, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(old_path,), (new_path,)]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(new_path,)], + expected_renamed=[(old_path, new_path)]) + self.assertContent(branch, revtree1, old_path, "aaa") + self.assertContent(branch, revtree1, new_path, "bbb") + self.assertContent(branch, revtree2, new_path, "aaa") + self.assertRevisionRoot(revtree1, old_path) + self.assertRevisionRoot(revtree1, new_path) + + def test_rename_to_deleted_symlink_in_root(self): + handler, branch = self.get_handler() + old_path = 'a' + new_path = 'b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(old_path,), (new_path,)]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(new_path,)], + expected_renamed=[(old_path, new_path)]) + self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") + self.assertSymlinkTarget(branch, revtree1, new_path, "bbb") + self.assertSymlinkTarget(branch, revtree2, new_path, "aaa") + self.assertRevisionRoot(revtree1, old_path) + self.assertRevisionRoot(revtree1, new_path) + + def test_rename_to_deleted_file_in_subdir(self): + handler, branch = self.get_handler() + old_path = 'd/a' + new_path = 'd/b' + handler.process(self.get_command_iter(old_path, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d',), (old_path,), (new_path,)]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(new_path,)], + expected_renamed=[(old_path, new_path)]) + self.assertContent(branch, revtree1, old_path, "aaa") + self.assertContent(branch, revtree1, new_path, "bbb") + self.assertContent(branch, revtree2, new_path, "aaa") + + def test_rename_to_deleted_symlink_in_subdir(self): + handler, branch = self.get_handler() + old_path = 'd/a' + new_path = 'd/b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d',), (old_path,), (new_path,)]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(new_path,)], + expected_renamed=[(old_path, new_path)]) + self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") + self.assertSymlinkTarget(branch, revtree1, new_path, "bbb") + self.assertSymlinkTarget(branch, revtree2, new_path, "aaa") + + def test_rename_to_deleted_file_in_new_dir(self): + handler, branch = self.get_handler() + old_path = 'd1/a' + new_path = 'd2/b' + handler.process(self.get_command_iter(old_path, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d1',), (old_path,), ('d2',), (new_path,)]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[('d1',), (new_path,)], + expected_renamed=[(old_path, new_path)]) + self.assertContent(branch, revtree1, old_path, "aaa") + self.assertContent(branch, revtree1, new_path, "bbb") + self.assertContent(branch, revtree2, new_path, "aaa") + + def test_rename_to_deleted_symlink_in_new_dir(self): + handler, branch = self.get_handler() + old_path = 'd1/a' + new_path = 'd2/b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d1',), (old_path,), ('d2',), (new_path,)]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[('d1',), (new_path,)], + expected_renamed=[(old_path, new_path)]) + self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") + self.assertSymlinkTarget(branch, revtree1, new_path, "bbb") + self.assertSymlinkTarget(branch, revtree2, new_path, "aaa") + + +class TestImportToPackRenameModified(TestCaseForGenericProcessor): + """Test rename of a path previously modified in this commit.""" + + def get_command_iter(self, old_path, new_path, kind='file'): + + # Revno 1: create a file or symlink + # Revno 2: modify then rename it + def command_list(): + author = ['', 'bugs@a.com', time.time(), time.timezone] + committer = ['', 'elmer@a.com', time.time(), time.timezone] + def files_one(): + yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False), + None, "aaa") + yield commands.CommitCommand('head', '1', author, + committer, "commit 1", None, [], files_one) + def files_two(): + yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False), + None, "bbb") + yield commands.FileRenameCommand(old_path, new_path) + yield commands.CommitCommand('head', '2', author, + committer, "commit 2", ":1", [], files_two) + return command_list + + def test_rename_of_modified_file_in_root(self): + handler, branch = self.get_handler() + old_path = 'a' + new_path = 'b' + handler.process(self.get_command_iter(old_path, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(old_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_renamed=[(old_path, new_path)]) + self.assertContent(branch, revtree1, old_path, "aaa") + self.assertContent(branch, revtree2, new_path, "bbb") + self.assertRevisionRoot(revtree1, old_path) + self.assertRevisionRoot(revtree2, new_path) + + def test_rename_of_modified_symlink_in_root(self): + handler, branch = self.get_handler() + old_path = 'a' + new_path = 'b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(old_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_renamed=[(old_path, new_path)]) + self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") + self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") + self.assertRevisionRoot(revtree1, old_path) + self.assertRevisionRoot(revtree2, new_path) + + def test_rename_of_modified_file_in_subdir(self): + handler, branch = self.get_handler() + old_path = 'd/a' + new_path = 'd/b' + handler.process(self.get_command_iter(old_path, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d',), (old_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_renamed=[(old_path, new_path)]) + self.assertContent(branch, revtree1, old_path, "aaa") + self.assertContent(branch, revtree2, new_path, "bbb") + + def test_rename_of_modified_symlink_in_subdir(self): + handler, branch = self.get_handler() + old_path = 'd/a' + new_path = 'd/b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d',), (old_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_renamed=[(old_path, new_path)]) + self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") + self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") + + def test_rename_of_modified_file_to_new_dir(self): + handler, branch = self.get_handler() + old_path = 'd1/a' + new_path = 'd2/b' + handler.process(self.get_command_iter(old_path, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d1',), (old_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_renamed=[(old_path, new_path)], + expected_added=[('d2',)], + expected_removed=[('d1',)]) + self.assertContent(branch, revtree1, old_path, "aaa") + self.assertContent(branch, revtree2, new_path, "bbb") + + def test_rename_of_modified_symlink_to_new_dir(self): + handler, branch = self.get_handler() + old_path = 'd1/a' + new_path = 'd2/b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d1',), (old_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_renamed=[(old_path, new_path)], + expected_added=[('d2',)], + expected_removed=[('d1',)]) + self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") + self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") + + +class TestImportToPackRenameThenModify(TestCaseForGenericProcessor): + """Test rename of a path then modfy the new-path in the same commit.""" + + def get_command_iter(self, old_path, new_path, kind='file'): + + # Revno 1: create a file or symlink + # Revno 2: rename it then modify the newly created path + def command_list(): + author = ['', 'bugs@a.com', time.time(), time.timezone] + committer = ['', 'elmer@a.com', time.time(), time.timezone] + def files_one(): + yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False), + None, "aaa") + yield commands.CommitCommand('head', '1', author, + committer, "commit 1", None, [], files_one) + def files_two(): + yield commands.FileRenameCommand(old_path, new_path) + yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False), + None, "bbb") + yield commands.CommitCommand('head', '2', author, + committer, "commit 2", ":1", [], files_two) + return command_list + + def test_rename_then_modify_file_in_root(self): + handler, branch = self.get_handler() + old_path = 'a' + new_path = 'b' + handler.process(self.get_command_iter(old_path, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(old_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_renamed=[(old_path, new_path)]) + self.assertContent(branch, revtree1, old_path, "aaa") + self.assertContent(branch, revtree2, new_path, "bbb") + self.assertRevisionRoot(revtree1, old_path) + self.assertRevisionRoot(revtree2, new_path) + + def test_rename_then_modify_file_in_subdir(self): + handler, branch = self.get_handler() + old_path = 'd/a' + new_path = 'd/b' + handler.process(self.get_command_iter(old_path, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d',), (old_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_renamed=[(old_path, new_path)]) + self.assertContent(branch, revtree1, old_path, "aaa") + self.assertContent(branch, revtree2, new_path, "bbb") + + def test_rename_then_modify_file_in_new_dir(self): + handler, branch = self.get_handler() + old_path = 'd1/a' + new_path = 'd2/b' + handler.process(self.get_command_iter(old_path, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d1',), (old_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_renamed=[(old_path, new_path)], + expected_added=[('d2',)], + expected_removed=[('d1',)]) + self.assertContent(branch, revtree1, old_path, "aaa") + self.assertContent(branch, revtree2, new_path, "bbb") + + def test_rename_then_modify_symlink_in_root(self): + handler, branch = self.get_handler() + old_path = 'a' + new_path = 'b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(old_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_renamed=[(old_path, new_path)]) + self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") + self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") + self.assertRevisionRoot(revtree1, old_path) + self.assertRevisionRoot(revtree2, new_path) + + def test_rename_then_modify_symlink_in_subdir(self): + handler, branch = self.get_handler() + old_path = 'd/a' + new_path = 'd/b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d',), (old_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_renamed=[(old_path, new_path)]) + self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") + self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") + + def test_rename_then_modify_symlink_in_new_dir(self): + handler, branch = self.get_handler() + old_path = 'd1/a' + new_path = 'd2/b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d1',), (old_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_renamed=[(old_path, new_path)], + expected_added=[('d2',)], + expected_removed=[('d1',)]) + self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") + self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") + + +class TestImportToPackDeleteRenameThenModify(TestCaseForGenericProcessor): + """Test rename of to a deleted path then modfy the new-path in the same commit.""" + + def get_command_iter(self, old_path, new_path, kind='file'): + + # Revno 1: create two files or symlinks + # Revno 2: delete one, rename the other to it then modify the newly created path + def command_list(): + author = ['', 'bugs@a.com', time.time(), time.timezone] + committer = ['', 'elmer@a.com', time.time(), time.timezone] + def files_one(): + yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False), + None, "aaa") + yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False), + None, "zzz") + yield commands.CommitCommand('head', '1', author, + committer, "commit 1", None, [], files_one) + def files_two(): + yield commands.FileDeleteCommand(new_path) + yield commands.FileRenameCommand(old_path, new_path) + yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False), + None, "bbb") + yield commands.CommitCommand('head', '2', author, + committer, "commit 2", ":1", [], files_two) + return command_list + + def test_delete_rename_then_modify_file_in_root(self): + handler, branch = self.get_handler() + old_path = 'a' + new_path = 'b' + handler.process(self.get_command_iter(old_path, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(old_path,), (new_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(new_path,)], + expected_renamed=[(old_path, new_path)]) + self.assertContent(branch, revtree1, old_path, "aaa") + self.assertContent(branch, revtree1, new_path, "zzz") + self.assertContent(branch, revtree2, new_path, "bbb") + self.assertRevisionRoot(revtree1, old_path) + self.assertRevisionRoot(revtree1, new_path) + self.assertRevisionRoot(revtree2, new_path) + + def test_delete_rename_then_modify_file_in_subdir(self): + handler, branch = self.get_handler() + old_path = 'd/a' + new_path = 'd/b' + handler.process(self.get_command_iter(old_path, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d',), (old_path,), (new_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(new_path,)], + expected_renamed=[(old_path, new_path)]) + self.assertContent(branch, revtree1, old_path, "aaa") + self.assertContent(branch, revtree1, new_path, "zzz") + self.assertContent(branch, revtree2, new_path, "bbb") + + def test_delete_rename_then_modify_file_in_new_dir(self): + handler, branch = self.get_handler() + old_path = 'd1/a' + new_path = 'd2/b' + handler.process(self.get_command_iter(old_path, new_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d1',), ('d2',), (old_path,), (new_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[('d1',), (new_path,)], + expected_renamed=[(old_path, new_path)]) + self.assertContent(branch, revtree1, old_path, "aaa") + self.assertContent(branch, revtree1, new_path, "zzz") + self.assertContent(branch, revtree2, new_path, "bbb") + + def test_delete_rename_then_modify_symlink_in_root(self): + handler, branch = self.get_handler() + old_path = 'a' + new_path = 'b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(old_path,), (new_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(new_path,)], + expected_renamed=[(old_path, new_path)]) + self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") + self.assertSymlinkTarget(branch, revtree1, new_path, "zzz") + self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") + self.assertRevisionRoot(revtree1, old_path) + self.assertRevisionRoot(revtree1, new_path) + self.assertRevisionRoot(revtree2, new_path) + + def test_delete_rename_then_modify_symlink_in_subdir(self): + handler, branch = self.get_handler() + old_path = 'd/a' + new_path = 'd/b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d',), (old_path,), (new_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(new_path,)], + expected_renamed=[(old_path, new_path)]) + self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") + self.assertSymlinkTarget(branch, revtree1, new_path, "zzz") + self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") + + def test_delete_rename_then_modify_symlink_in_new_dir(self): + handler, branch = self.get_handler() + old_path = 'd1/a' + new_path = 'd2/b' + handler.process(self.get_command_iter(old_path, new_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d1',), ('d2',), (old_path,), (new_path,)]) + # Note: the delta doesn't show the modification? + # The actual new content is validated in the assertions following. + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[('d1',), (new_path,)], + expected_renamed=[(old_path, new_path)]) + self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") + self.assertSymlinkTarget(branch, revtree1, new_path, "zzz") + self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") + class TestImportToPackRenameTricky(TestCaseForGenericProcessor): def file_command_iter(self, path1, old_path2, new_path2, kind='file'): + # Revno 1: create two files or symlinks in a directory # Revno 2: rename the second file so that it implicitly deletes the # first one because either: @@ -538,9 +1407,9 @@ class TestImportToPackRenameTricky(TestCaseForGenericProcessor): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): - yield commands.FileModifyCommand(path1, kind, False, + yield commands.FileModifyCommand(path1, kind_to_mode(kind, False), None, "aaa") - yield commands.FileModifyCommand(old_path2, kind, False, + yield commands.FileModifyCommand(old_path2, kind_to_mode(kind, False), None, "bbb") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) @@ -550,7 +1419,6 @@ class TestImportToPackRenameTricky(TestCaseForGenericProcessor): committer, "commit 2", ":1", [], files_two) return command_list - def test_rename_file_becomes_directory(self): handler, branch = self.get_handler() old_path2 = 'foo' @@ -613,13 +1481,14 @@ class TestImportToPackRenameTricky(TestCaseForGenericProcessor): class TestImportToPackCopy(TestCaseForGenericProcessor): def file_command_iter(self, src_path, dest_path, kind='file'): + # Revno 1: create a file or symlink # Revno 2: copy it def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): - yield commands.FileModifyCommand(src_path, kind, False, + yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False), None, "aaa") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) @@ -700,79 +1569,344 @@ class TestImportToPackCopy(TestCaseForGenericProcessor): self.assertSymlinkTarget(branch, revtree2, dest_path, "aaa") -class TestImportToPackFileKinds(TestCaseForGenericProcessor): +class TestImportToPackCopyNew(TestCaseForGenericProcessor): + """Test copy of a newly added file.""" - def get_command_iter(self, path, kind, content): + def file_command_iter(self, src_path, dest_path, kind='file'): + + # Revno 1: create a file or symlink and copy it def command_list(): + author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): - yield commands.FileModifyCommand(path, kind, False, - None, content) - yield commands.CommitCommand('head', '1', None, + yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False), + None, "aaa") + yield commands.FileCopyCommand(src_path, dest_path) + yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) return command_list - def test_import_plainfile(self): + def test_copy_new_file_in_root(self): handler, branch = self.get_handler() - handler.process(self.get_command_iter('foo', 'file', 'aaa')) + src_path = 'a' + dest_path = 'b' + handler.process(self.file_command_iter(src_path, dest_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(src_path,), (dest_path,)]) + self.assertContent(branch, revtree1, src_path, "aaa") + self.assertContent(branch, revtree1, dest_path, "aaa") + self.assertRevisionRoot(revtree1, src_path) + self.assertRevisionRoot(revtree1, dest_path) - def test_import_symlink(self): + def test_copy_new_file_in_subdir(self): handler, branch = self.get_handler() - handler.process(self.get_command_iter('foo', 'symlink', 'bar')) + src_path = 'a/a' + dest_path = 'a/b' + handler.process(self.file_command_iter(src_path, dest_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('a',), (src_path,), (dest_path,)]) + self.assertContent(branch, revtree1, src_path, "aaa") + self.assertContent(branch, revtree1, dest_path, "aaa") + def test_copy_new_file_to_new_dir(self): + handler, branch = self.get_handler() + src_path = 'a/a' + dest_path = 'b/a' + handler.process(self.file_command_iter(src_path, dest_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('a',), (src_path,), ('b',), (dest_path,)]) + self.assertContent(branch, revtree1, src_path, "aaa") + self.assertContent(branch, revtree1, dest_path, "aaa") -### TODO: Parameterise tests rather than below hack + def test_copy_new_symlink_in_root(self): + handler, branch = self.get_handler() + src_path = 'a' + dest_path = 'b' + handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(src_path,), (dest_path,)]) + self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") + self.assertSymlinkTarget(branch, revtree1, dest_path, "aaa") + self.assertRevisionRoot(revtree1, src_path) + self.assertRevisionRoot(revtree1, dest_path) -class TestImportToRichRootModify(TestImportToPackModify): - branch_format = "1.9-rich-root" + def test_copy_new_symlink_in_subdir(self): + handler, branch = self.get_handler() + src_path = 'a/a' + dest_path = 'a/b' + handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('a',), (src_path,), (dest_path,)]) + self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") + self.assertSymlinkTarget(branch, revtree1, dest_path, "aaa") -class TestImportToRichRootModifyTricky(TestImportToPackModifyTricky): - branch_format = "1.9-rich-root" + def test_copy_new_symlink_to_new_dir(self): + handler, branch = self.get_handler() + src_path = 'a/a' + dest_path = 'b/a' + handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('a',), (src_path,), ('b',), (dest_path,)]) + self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") + self.assertSymlinkTarget(branch, revtree1, dest_path, "aaa") -class TestImportToRichRootDelete(TestImportToPackDelete): - branch_format = "1.9-rich-root" -class TestImportToRichRootDeleteDirectory(TestImportToPackDeleteDirectory): - branch_format = "1.9-rich-root" +class TestImportToPackCopyToDeleted(TestCaseForGenericProcessor): -class TestImportToRichRootRename(TestImportToPackRename): - branch_format = "1.9-rich-root" + def file_command_iter(self, src_path, dest_path, kind='file'): -class TestImportToRichRootRenameTricky(TestImportToPackRenameTricky): - branch_format = "1.9-rich-root" + # Revno 1: create two files or symlinks + # Revno 2: delete one and copy the other one to its path + def command_list(): + author = ['', 'bugs@a.com', time.time(), time.timezone] + committer = ['', 'elmer@a.com', time.time(), time.timezone] + def files_one(): + yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False), + None, "aaa") + yield commands.FileModifyCommand(dest_path, kind_to_mode(kind, False), + None, "bbb") + yield commands.CommitCommand('head', '1', author, + committer, "commit 1", None, [], files_one) + def files_two(): + yield commands.FileDeleteCommand(dest_path) + yield commands.FileCopyCommand(src_path, dest_path) + yield commands.CommitCommand('head', '2', author, + committer, "commit 2", ":1", [], files_two) + return command_list -class TestImportToRichRootCopy(TestImportToPackCopy): - branch_format = "1.9-rich-root" + def test_copy_to_deleted_file_in_root(self): + handler, branch = self.get_handler() + src_path = 'a' + dest_path = 'b' + handler.process(self.file_command_iter(src_path, dest_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(src_path,), (dest_path,)]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(dest_path,)], + expected_added=[(dest_path,)]) + self.assertContent(branch, revtree1, src_path, "aaa") + self.assertContent(branch, revtree1, dest_path, "bbb") + self.assertContent(branch, revtree2, src_path, "aaa") + self.assertContent(branch, revtree2, dest_path, "aaa") + self.assertRevisionRoot(revtree1, src_path) + self.assertRevisionRoot(revtree1, dest_path) -class TestImportToRichRootFileKinds(TestImportToPackFileKinds): - branch_format = "1.9-rich-root" + def test_copy_to_deleted_symlink_in_root(self): + handler, branch = self.get_handler() + src_path = 'a' + dest_path = 'b' + handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[(src_path,), (dest_path,)]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(dest_path,)], + expected_added=[(dest_path,)]) + self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") + self.assertSymlinkTarget(branch, revtree1, dest_path, "bbb") + self.assertSymlinkTarget(branch, revtree2, src_path, "aaa") + self.assertSymlinkTarget(branch, revtree2, dest_path, "aaa") + self.assertRevisionRoot(revtree1, src_path) + self.assertRevisionRoot(revtree1, dest_path) -try: - from bzrlib.repofmt.groupcompress_repo import RepositoryFormatCHK1 + def test_copy_to_deleted_file_in_subdir(self): + handler, branch = self.get_handler() + src_path = 'd/a' + dest_path = 'd/b' + handler.process(self.file_command_iter(src_path, dest_path)) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d',), (src_path,), (dest_path,)]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(dest_path,)], + expected_added=[(dest_path,)]) + self.assertContent(branch, revtree1, src_path, "aaa") + self.assertContent(branch, revtree1, dest_path, "bbb") + self.assertContent(branch, revtree2, src_path, "aaa") + self.assertContent(branch, revtree2, dest_path, "aaa") + + def test_copy_to_deleted_symlink_in_subdir(self): + handler, branch = self.get_handler() + src_path = 'd/a' + dest_path = 'd/b' + handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) + revtree0, revtree1 = self.assertChanges(branch, 1, + expected_added=[('d',), (src_path,), (dest_path,)]) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_removed=[(dest_path,)], + expected_added=[(dest_path,)]) + self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") + self.assertSymlinkTarget(branch, revtree1, dest_path, "bbb") + self.assertSymlinkTarget(branch, revtree2, src_path, "aaa") + self.assertSymlinkTarget(branch, revtree2, dest_path, "aaa") - class TestImportToChkModify(TestImportToPackModify): - branch_format = "development6-rich-root" - class TestImportToChkModifyTricky(TestImportToPackModifyTricky): - branch_format = "development6-rich-root" +class TestImportToPackCopyModified(TestCaseForGenericProcessor): + """Test copy of file/symlink already modified in this commit.""" - class TestImportToChkDelete(TestImportToPackDelete): - branch_format = "development6-rich-root" + def file_command_iter(self, src_path, dest_path, kind='file'): - class TestImportToChkDeleteDirectory(TestImportToPackDeleteDirectory): - branch_format = "development6-rich-root" + # Revno 1: create a file or symlink + # Revno 2: modify and copy it + def command_list(): + author = ['', 'bugs@a.com', time.time(), time.timezone] + committer = ['', 'elmer@a.com', time.time(), time.timezone] + def files_one(): + yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False), + None, "aaa") + yield commands.CommitCommand('head', '1', author, + committer, "commit 1", None, [], files_one) + def files_two(): + yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False), + None, "bbb") + yield commands.FileCopyCommand(src_path, dest_path) + yield commands.CommitCommand('head', '2', author, + committer, "commit 2", ":1", [], files_two) + return command_list - class TestImportToChkRename(TestImportToPackRename): - branch_format = "development6-rich-root" + def test_copy_of_modified_file_in_root(self): + handler, branch = self.get_handler() + src_path = 'a' + dest_path = 'b' + handler.process(self.file_command_iter(src_path, dest_path)) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_modified=[(src_path,)], + expected_added=[(dest_path,)]) + self.assertContent(branch, revtree1, src_path, "aaa") + self.assertContent(branch, revtree2, src_path, "bbb") + self.assertContent(branch, revtree2, dest_path, "bbb") + self.assertRevisionRoot(revtree1, src_path) + self.assertRevisionRoot(revtree2, dest_path) - class TestImportToChkRenameTricky(TestImportToPackRenameTricky): - branch_format = "development6-rich-root" + def test_copy_of_modified_file_in_subdir(self): + handler, branch = self.get_handler() + src_path = 'd/a' + dest_path = 'd/b' + handler.process(self.file_command_iter(src_path, dest_path)) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_modified=[(src_path,)], + expected_added=[(dest_path,)]) + self.assertContent(branch, revtree1, src_path, "aaa") + self.assertContent(branch, revtree2, src_path, "bbb") + self.assertContent(branch, revtree2, dest_path, "bbb") - class TestImportToChkCopy(TestImportToPackCopy): - branch_format = "development6-rich-root" + def test_copy_of_modified_file_to_new_dir(self): + handler, branch = self.get_handler() + src_path = 'd1/a' + dest_path = 'd2/a' + handler.process(self.file_command_iter(src_path, dest_path)) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_modified=[(src_path,)], + expected_added=[('d2',), (dest_path,)]) + self.assertContent(branch, revtree1, src_path, "aaa") + self.assertContent(branch, revtree2, src_path, "bbb") + self.assertContent(branch, revtree2, dest_path, "bbb") - class TestImportToChkFileKinds(TestImportToPackFileKinds): - branch_format = "development6-rich-root" + def test_copy_of_modified_symlink_in_root(self): + handler, branch = self.get_handler() + src_path = 'a' + dest_path = 'b' + handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_modified=[(src_path,)], + expected_added=[(dest_path,)]) + self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") + self.assertSymlinkTarget(branch, revtree2, src_path, "bbb") + self.assertSymlinkTarget(branch, revtree2, dest_path, "bbb") + self.assertRevisionRoot(revtree1, src_path) + self.assertRevisionRoot(revtree2, dest_path) -except ImportError: - pass + def test_copy_of_modified_symlink_in_subdir(self): + handler, branch = self.get_handler() + src_path = 'd/a' + dest_path = 'd/b' + handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_modified=[(src_path,)], + expected_added=[(dest_path,)]) + self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") + self.assertSymlinkTarget(branch, revtree2, src_path, "bbb") + self.assertSymlinkTarget(branch, revtree2, dest_path, "bbb") + + def test_copy_of_modified_symlink_to_new_dir(self): + handler, branch = self.get_handler() + src_path = 'd1/a' + dest_path = 'd2/a' + handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) + revtree1, revtree2 = self.assertChanges(branch, 2, + expected_modified=[(src_path,)], + expected_added=[('d2',), (dest_path,)]) + self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") + self.assertSymlinkTarget(branch, revtree2, src_path, "bbb") + self.assertSymlinkTarget(branch, revtree2, dest_path, "bbb") + + +class TestImportToPackFileKinds(TestCaseForGenericProcessor): + + def get_command_iter(self, path, kind, content): + + def command_list(): + committer = ['', 'elmer@a.com', time.time(), time.timezone] + def files_one(): + yield commands.FileModifyCommand(path, kind_to_mode(kind, False), + None, content) + yield commands.CommitCommand('head', '1', None, + committer, "commit 1", None, [], files_one) + return command_list + + def test_import_plainfile(self): + handler, branch = self.get_handler() + handler.process(self.get_command_iter('foo', 'file', 'aaa')) + + def test_import_symlink(self): + handler, branch = self.get_handler() + handler.process(self.get_command_iter('foo', 'symlink', 'bar')) + + +class TestModifyRevertInBranch(TestCaseForGenericProcessor): + + def file_command_iter(self): + # A add 'foo' + # |\ + # | B modify 'foo' + # | | + # | C revert 'foo' back to A + # |/ + # D merge 'foo' + def command_list(): + committer_a = ['', 'a@elmer.com', time.time(), time.timezone] + committer_b = ['', 'b@elmer.com', time.time(), time.timezone] + committer_c = ['', 'c@elmer.com', time.time(), time.timezone] + committer_d = ['', 'd@elmer.com', time.time(), time.timezone] + def files_one(): + yield commands.FileModifyCommand('foo', kind_to_mode('file', False), + None, "content A\n") + yield commands.CommitCommand('head', '1', None, + committer_a, "commit 1", None, [], files_one) + def files_two(): + yield commands.FileModifyCommand('foo', kind_to_mode('file', False), + None, "content B\n") + yield commands.CommitCommand('head', '2', None, + committer_b, "commit 2", ":1", [], files_two) + def files_three(): + yield commands.FileModifyCommand('foo', kind_to_mode('file', False), + None, "content A\n") + yield commands.CommitCommand('head', '3', None, + committer_c, "commit 3", ":2", [], files_three) + yield commands.CommitCommand('head', '4', None, + committer_d, "commit 4", ":1", [':3'], lambda: []) + return command_list + + def test_modify_revert(self): + handler, branch = self.get_handler() + handler.process(self.file_command_iter()) + branch.lock_read() + self.addCleanup(branch.unlock) + rev_d = branch.last_revision() + rev_a, rev_c = branch.repository.get_parent_map([rev_d])[rev_d] + rev_b = branch.repository.get_parent_map([rev_c])[rev_c][0] + rtree_a, rtree_b, rtree_c, rtree_d = branch.repository.revision_trees([ + rev_a, rev_b, rev_c, rev_d]) + foo_id = rtree_a.path2id('foo') + self.assertEqual(rev_a, rtree_a.inventory[foo_id].revision) + self.assertEqual(rev_b, rtree_b.inventory[foo_id].revision) + self.assertEqual(rev_c, rtree_c.inventory[foo_id].revision) + self.assertEqual(rev_c, rtree_d.inventory[foo_id].revision) diff --git a/tests/test_head_tracking.py b/tests/test_head_tracking.py deleted file mode 100644 index 63712e0..0000000 --- a/tests/test_head_tracking.py +++ /dev/null @@ -1,257 +0,0 @@ -# Copyright (C) 2009 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Test tracking of heads""" - -from cStringIO import StringIO - -from bzrlib import tests - -from bzrlib.plugins.fastimport import ( - commands, - parser, - ) -from bzrlib.plugins.fastimport.cache_manager import CacheManager - - -# A sample input stream that only adds files to a branch -_SAMPLE_MAINLINE = \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 doc/README.txt -blob -mark :2 -data 17 -Life -is -good ... -commit refs/heads/master -mark :101 -committer a <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :2 NEWS -blob -mark :3 -data 19 -Welcome! -my friend -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :101 -M 644 :3 doc/README.txt -M 644 :4 doc/index.txt -""" - -# A sample input stream that adds files to two branches -_SAMPLE_TWO_HEADS = \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 doc/README.txt -blob -mark :2 -data 17 -Life -is -good ... -commit refs/heads/mybranch -mark :101 -committer a <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :2 NEWS -blob -mark :3 -data 19 -Welcome! -my friend -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :3 doc/README.txt -M 644 :4 doc/index.txt -""" - -# A sample input stream that adds files to two branches -_SAMPLE_TWO_BRANCHES_MERGED = \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 doc/README.txt -blob -mark :2 -data 17 -Life -is -good ... -commit refs/heads/mybranch -mark :101 -committer a <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :2 NEWS -blob -mark :3 -data 19 -Welcome! -my friend -blob -mark :4 -data 11 -== Docs == -commit refs/heads/master -mark :102 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -M 644 :3 doc/README.txt -M 644 :4 doc/index.txt -commit refs/heads/master -mark :103 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :102 -merge :101 -D doc/index.txt -""" - -# A sample input stream that contains a reset -_SAMPLE_RESET = \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 doc/README.txt -reset refs/remotes/origin/master -from :100 -""" - -# A sample input stream that contains a reset and more commits -_SAMPLE_RESET_WITH_MORE_COMMITS = \ -"""blob -mark :1 -data 9 -Welcome! -commit refs/heads/master -mark :100 -committer a <b@c> 1234798653 +0000 -data 4 -test -M 644 :1 doc/README.txt -reset refs/remotes/origin/master -from :100 -commit refs/remotes/origin/master -mark :101 -committer d <b@c> 1234798653 +0000 -data 8 -test -ing -from :100 -D doc/README.txt -""" - -class TestHeadTracking(tests.TestCase): - - def assertHeads(self, input, expected): - s = StringIO(input) - p = parser.ImportParser(s) - cm = CacheManager() - for cmd in p.iter_commands(): - if isinstance(cmd, commands.CommitCommand): - cm.track_heads(cmd) - # eat the file commands - list(cmd.file_iter()) - elif isinstance(cmd, commands.ResetCommand): - if cmd.from_ is not None: - cm.track_heads_for_ref(cmd.ref, cmd.from_) - self.assertEqual(cm.heads, expected) - - def test_mainline(self): - self.assertHeads(_SAMPLE_MAINLINE, { - ':102': set(['refs/heads/master']), - }) - - def test_two_heads(self): - self.assertHeads(_SAMPLE_TWO_HEADS, { - ':101': set(['refs/heads/mybranch']), - ':102': set(['refs/heads/master']), - }) - - def test_two_branches_merged(self): - self.assertHeads(_SAMPLE_TWO_BRANCHES_MERGED, { - ':103': set(['refs/heads/master']), - }) - - def test_reset(self): - self.assertHeads(_SAMPLE_RESET, { - ':100': set(['refs/heads/master', 'refs/remotes/origin/master']), - }) - - def test_reset_with_more_commits(self): - self.assertHeads(_SAMPLE_RESET_WITH_MORE_COMMITS, { - ':101': set(['refs/remotes/origin/master']), - }) diff --git a/tests/test_helpers.py b/tests/test_helpers.py deleted file mode 100644 index 89009d1..0000000 --- a/tests/test_helpers.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (C) 2009 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Test the helper functions.""" - -from bzrlib import tests - -from bzrlib.plugins.fastimport import ( - helpers, - ) - - -class TestCommonDirectory(tests.TestCase): - - def test_no_paths(self): - c = helpers.common_directory(None) - self.assertEqual(c, None) - c = helpers.common_directory([]) - self.assertEqual(c, None) - - def test_one_path(self): - c = helpers.common_directory(['foo']) - self.assertEqual(c, '') - c = helpers.common_directory(['foo/']) - self.assertEqual(c, 'foo/') - c = helpers.common_directory(['foo/bar']) - self.assertEqual(c, 'foo/') - - def test_two_paths(self): - c = helpers.common_directory(['foo', 'bar']) - self.assertEqual(c, '') - c = helpers.common_directory(['foo/', 'bar']) - self.assertEqual(c, '') - c = helpers.common_directory(['foo/', 'foo/bar']) - self.assertEqual(c, 'foo/') - c = helpers.common_directory(['foo/bar/x', 'foo/bar/y']) - self.assertEqual(c, 'foo/bar/') - c = helpers.common_directory(['foo/bar/aa_x', 'foo/bar/aa_y']) - self.assertEqual(c, 'foo/bar/') - - def test_lots_of_paths(self): - c = helpers.common_directory(['foo/bar/x', 'foo/bar/y', 'foo/bar/z']) - self.assertEqual(c, 'foo/bar/') diff --git a/tests/test_parser.py b/tests/test_parser.py deleted file mode 100644 index 91e27f0..0000000 --- a/tests/test_parser.py +++ /dev/null @@ -1,212 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Test the Import parsing""" - -import StringIO - -from bzrlib import tests - -from bzrlib.plugins.fastimport import ( - errors, - parser, - ) - - -class TestLineBasedParser(tests.TestCase): - - def test_push_line(self): - s = StringIO.StringIO("foo\nbar\nbaz\n") - p = parser.LineBasedParser(s) - self.assertEqual('foo', p.next_line()) - self.assertEqual('bar', p.next_line()) - p.push_line('bar') - self.assertEqual('bar', p.next_line()) - self.assertEqual('baz', p.next_line()) - self.assertEqual(None, p.next_line()) - - def test_read_bytes(self): - s = StringIO.StringIO("foo\nbar\nbaz\n") - p = parser.LineBasedParser(s) - self.assertEqual('fo', p.read_bytes(2)) - self.assertEqual('o\nb', p.read_bytes(3)) - self.assertEqual('ar', p.next_line()) - # Test that the line buffer is ignored - p.push_line('bar') - self.assertEqual('baz', p.read_bytes(3)) - # Test missing bytes - self.assertRaises(errors.MissingBytes, p.read_bytes, 10) - - def test_read_until(self): - # TODO - return - s = StringIO.StringIO("foo\nbar\nbaz\nabc\ndef\nghi\n") - p = parser.LineBasedParser(s) - self.assertEqual('foo\nbar', p.read_until('baz')) - self.assertEqual('abc', p.next_line()) - # Test that the line buffer is ignored - p.push_line('abc') - self.assertEqual('def', p.read_until('ghi')) - # Test missing terminator - self.assertRaises(errors.MissingTerminator, p.read_until('>>>')) - - -# Sample text -_sample_import_text = """ -progress completed -# Test blob formats -blob -mark :1 -data 4 -aaaablob -data 5 -bbbbb -# Commit formats -commit refs/heads/master -mark :2 -committer bugs bunny <bugs@bunny.org> now -data 14 -initial import -M 644 inline README -data 18 -Welcome from bugs -commit refs/heads/master -committer <bugs@bunny.org> now -data 13 -second commit -from :2 -M 644 inline README -data 23 -Welcome from bugs, etc. -# Miscellaneous -checkpoint -progress completed -# Test a commit without sub-commands (bug #351717) -commit refs/heads/master -mark :3 -author <bugs@bunny.org> now -committer <bugs@bunny.org> now -data 20 -first commit, empty -# Test a commit with a heredoc-style (delimited_data) messsage (bug #400960) -commit refs/heads/master -mark :4 -author <bugs@bunny.org> now -committer <bugs@bunny.org> now -data <<EOF -Commit with heredoc-style message -EOF -""" - - -class TestImportParser(tests.TestCase): - - def test_iter_commands(self): - s = StringIO.StringIO(_sample_import_text) - p = parser.ImportParser(s) - result = [] - for cmd in p.iter_commands(): - result.append(cmd) - if cmd.name == 'commit': - for fc in cmd.file_iter(): - result.append(fc) - self.assertEqual(len(result), 11) - cmd1 = result.pop(0) - self.assertEqual('progress', cmd1.name) - self.assertEqual('completed', cmd1.message) - cmd2 = result.pop(0) - self.assertEqual('blob', cmd2.name) - self.assertEqual('1', cmd2.mark) - self.assertEqual(':1', cmd2.id) - self.assertEqual('aaaa', cmd2.data) - self.assertEqual(4, cmd2.lineno) - cmd3 = result.pop(0) - self.assertEqual('blob', cmd3.name) - self.assertEqual('@7', cmd3.id) - self.assertEqual(None, cmd3.mark) - self.assertEqual('bbbbb', cmd3.data) - self.assertEqual(7, cmd3.lineno) - cmd4 = result.pop(0) - self.assertEqual('commit', cmd4.name) - self.assertEqual('2', cmd4.mark) - self.assertEqual(':2', cmd4.id) - self.assertEqual('initial import', cmd4.message) - self.assertEqual('bugs bunny', cmd4.committer[0]) - self.assertEqual('bugs@bunny.org', cmd4.committer[1]) - # FIXME: check timestamp and timezone as well - self.assertEqual(None, cmd4.author) - self.assertEqual(11, cmd4.lineno) - self.assertEqual('refs/heads/master', cmd4.ref) - self.assertEqual(None, cmd4.from_) - self.assertEqual([], cmd4.merges) - file_cmd1 = result.pop(0) - self.assertEqual('filemodify', file_cmd1.name) - self.assertEqual('README', file_cmd1.path) - self.assertEqual('file', file_cmd1.kind) - self.assertEqual(False, file_cmd1.is_executable) - self.assertEqual('Welcome from bugs\n', file_cmd1.data) - cmd5 = result.pop(0) - self.assertEqual('commit', cmd5.name) - self.assertEqual(None, cmd5.mark) - self.assertEqual('@19', cmd5.id) - self.assertEqual('second commit', cmd5.message) - self.assertEqual('', cmd5.committer[0]) - self.assertEqual('bugs@bunny.org', cmd5.committer[1]) - # FIXME: check timestamp and timezone as well - self.assertEqual(None, cmd5.author) - self.assertEqual(19, cmd5.lineno) - self.assertEqual('refs/heads/master', cmd5.ref) - self.assertEqual(':2', cmd5.from_) - self.assertEqual([], cmd5.merges) - file_cmd2 = result.pop(0) - self.assertEqual('filemodify', file_cmd2.name) - self.assertEqual('README', file_cmd2.path) - self.assertEqual('file', file_cmd2.kind) - self.assertEqual(False, file_cmd2.is_executable) - self.assertEqual('Welcome from bugs, etc.', file_cmd2.data) - cmd6 = result.pop(0) - self.assertEqual(cmd6.name, 'checkpoint') - cmd7 = result.pop(0) - self.assertEqual('progress', cmd7.name) - self.assertEqual('completed', cmd7.message) - cmd = result.pop(0) - self.assertEqual('commit', cmd.name) - self.assertEqual('3', cmd.mark) - self.assertEqual(None, cmd.from_) - cmd = result.pop(0) - self.assertEqual('commit', cmd.name) - self.assertEqual('4', cmd.mark) - self.assertEqual('Commit with heredoc-style message\n', cmd.message) - - -class TestStringParsing(tests.TestCase): - - def test_unquote(self): - s = r'hello \"sweet\" wo\\r\tld' - self.assertEquals(r'hello "sweet" wo\r' + "\tld", - parser._unquote_c_string(s)) - - -class TestPathPairParsing(tests.TestCase): - - def test_path_pair_simple(self): - p = parser.ImportParser("") - self.assertEqual(['foo', 'bar'], p._path_pair("foo bar")) - - def test_path_pair_spaces_in_first(self): - p = parser.ImportParser("") - self.assertEqual(['foo bar', 'baz'], - p._path_pair('"foo bar" baz')) diff --git a/tests/test_revision_store.py b/tests/test_revision_store.py new file mode 100644 index 0000000..9e39254 --- /dev/null +++ b/tests/test_revision_store.py @@ -0,0 +1,152 @@ +# Copyright (C) 2008, 2009 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +"""Direct tests of the revision_store classes.""" + +from bzrlib import ( + branch, + errors, + inventory, + osutils, + tests, + ) + +from bzrlib.plugins.fastimport import ( + revision_store, + ) +from bzrlib.plugins.fastimport.tests import ( + FastimportFeature, + ) + + +class Test_TreeShim(tests.TestCase): + + _test_needs_features = [FastimportFeature] + + def invAddEntry(self, inv, path, file_id=None): + if path.endswith('/'): + path = path[:-1] + kind = 'directory' + else: + kind = 'file' + parent_path, basename = osutils.split(path) + parent_id = inv.path2id(parent_path) + inv.add(inventory.make_entry(kind, basename, parent_id, file_id)) + + def make_trivial_basis_inv(self): + basis_inv = inventory.Inventory('TREE_ROOT') + self.invAddEntry(basis_inv, 'foo', 'foo-id') + self.invAddEntry(basis_inv, 'bar/', 'bar-id') + self.invAddEntry(basis_inv, 'bar/baz', 'baz-id') + return basis_inv + + def test_id2path_no_delta(self): + basis_inv = self.make_trivial_basis_inv() + shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, + inv_delta=[], content_provider=None) + self.assertEqual('', shim.id2path('TREE_ROOT')) + self.assertEqual('foo', shim.id2path('foo-id')) + self.assertEqual('bar', shim.id2path('bar-id')) + self.assertEqual('bar/baz', shim.id2path('baz-id')) + self.assertRaises(errors.NoSuchId, shim.id2path, 'qux-id') + + def test_id2path_with_delta(self): + basis_inv = self.make_trivial_basis_inv() + foo_entry = inventory.make_entry('file', 'foo2', 'TREE_ROOT', 'foo-id') + inv_delta = [('foo', 'foo2', 'foo-id', foo_entry), + ('bar/baz', None, 'baz-id', None), + ] + + shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, + inv_delta=inv_delta, + content_provider=None) + self.assertEqual('', shim.id2path('TREE_ROOT')) + self.assertEqual('foo2', shim.id2path('foo-id')) + self.assertEqual('bar', shim.id2path('bar-id')) + self.assertRaises(errors.NoSuchId, shim.id2path, 'baz-id') + + def test_path2id(self): + basis_inv = self.make_trivial_basis_inv() + shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, + inv_delta=[], content_provider=None) + self.assertEqual('TREE_ROOT', shim.path2id('')) + # We don't want to ever give a wrong value, so for now we just raise + # NotImplementedError + self.assertRaises(NotImplementedError, shim.path2id, 'bar') + + def test_get_file_with_stat_content_in_stream(self): + basis_inv = self.make_trivial_basis_inv() + + def content_provider(file_id): + return 'content of\n' + file_id + '\n' + + shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, + inv_delta=[], + content_provider=content_provider) + f_obj, stat_val = shim.get_file_with_stat('baz-id') + self.assertIs(None, stat_val) + self.assertEqualDiff('content of\nbaz-id\n', f_obj.read()) + + # TODO: Test when the content isn't in the stream, and we fall back to the + # repository that was passed in + + def test_get_symlink_target(self): + basis_inv = self.make_trivial_basis_inv() + ie = inventory.make_entry('symlink', 'link', 'TREE_ROOT', 'link-id') + ie.symlink_target = u'link-target' + basis_inv.add(ie) + shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, + inv_delta=[], content_provider=None) + self.assertEqual(u'link-target', shim.get_symlink_target('link-id')) + + def test_get_symlink_target_from_delta(self): + basis_inv = self.make_trivial_basis_inv() + ie = inventory.make_entry('symlink', 'link', 'TREE_ROOT', 'link-id') + ie.symlink_target = u'link-target' + inv_delta = [(None, 'link', 'link-id', ie)] + shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, + inv_delta=inv_delta, + content_provider=None) + self.assertEqual(u'link-target', shim.get_symlink_target('link-id')) + + def test__delta_to_iter_changes(self): + basis_inv = self.make_trivial_basis_inv() + foo_entry = inventory.make_entry('file', 'foo2', 'bar-id', 'foo-id') + link_entry = inventory.make_entry('symlink', 'link', 'TREE_ROOT', + 'link-id') + link_entry.symlink_target = u'link-target' + inv_delta = [('foo', 'bar/foo2', 'foo-id', foo_entry), + ('bar/baz', None, 'baz-id', None), + (None, 'link', 'link-id', link_entry), + ] + shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, + inv_delta=inv_delta, + content_provider=None) + changes = list(shim._delta_to_iter_changes()) + expected = [('foo-id', ('foo', 'bar/foo2'), False, (True, True), + ('TREE_ROOT', 'bar-id'), ('foo', 'foo2'), + ('file', 'file'), (False, False)), + ('baz-id', ('bar/baz', None), True, (True, False), + ('bar-id', None), ('baz', None), + ('file', None), (False, None)), + ('link-id', (None, 'link'), True, (False, True), + (None, 'TREE_ROOT'), (None, 'link'), + (None, 'symlink'), (None, False)), + ] + # from pprint import pformat + # self.assertEqualDiff(pformat(expected), pformat(changes)) + self.assertEqual(expected, changes) + diff --git a/user_mapper.py b/user_mapper.py new file mode 100644 index 0000000..4fcf4a4 --- /dev/null +++ b/user_mapper.py @@ -0,0 +1,81 @@ +# Copyright (C) 2009 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +from email import Utils + + +class UserMapper(object): + + def __init__(self, lines): + """Create a user-mapper from a list of lines. + + Blank lines and comment lines (starting with #) are ignored. + Otherwise lines are of the form: + + old-id = new-id + + Each id may be in the following forms: + + name <email> + name + + If old-id has the value '@', then new-id is the domain to use + when generating an email from a user-id. + """ + self._parse(lines) + + def _parse(self, lines): + self._user_map = {} + self._default_domain = None + for line in lines: + line = line.strip() + if len(line) == 0 or line.startswith('#'): + continue + old, new = line.split('=', 1) + old = old.strip() + new = new.strip() + if old == '@': + self._default_domain = new + continue + # Parse each id into a name and email address + old_name, old_email = self._parse_id(old) + new_name, new_email = self._parse_id(new) + #print "found user map: %s => %s" % ((old_name, old_email), (new_name, new_email)) + self._user_map[(old_name, old_email)] = (new_name, new_email) + + def _parse_id(self, id): + if id.find('<') == -1: + return id, None + else: + return Utils.parseaddr(id) + + def map_name_and_email(self, name, email): + """Map a name and an email to the preferred name and email. + + :param name: the current name + :param email: the current email + :result: the preferred name and email + """ + try: + new_name, new_email = self._user_map[(name, email)] + except KeyError: + new_name = name + if self._default_domain and not email: + new_email = "%s@%s" % (name, self._default_domain) + else: + new_email = email + #print "converted '%s <%s>' to '%s <%s>'" % (name, email, new_name, new_email) + return new_name, new_email |