diff options
author | Jelmer Vernooij <jelmer@jelmer.uk> | 2018-04-15 13:33:37 +0100 |
---|---|---|
committer | Jelmer Vernooij <jelmer@jelmer.uk> | 2018-04-15 13:33:37 +0100 |
commit | 088bc0459e50bbcb948f3317772825859347b014 (patch) | |
tree | e6184ba87d25c109e46781fdfdf6b6a1ce737ddf | |
parent | 7a12c836ba6be9c90077ab1b1f378ca2f40c26f9 (diff) | |
parent | fd59e7d835b63d1c9181c4c84dd311d9d6da6d6b (diff) | |
download | python-fastimport-git-088bc0459e50bbcb948f3317772825859347b014.tar.gz |
New upstream version 0.9.8upstream/0.9.8
-rw-r--r-- | NEWS | 12 | ||||
-rw-r--r-- | PKG-INFO | 15 | ||||
-rw-r--r-- | README.md | 2 | ||||
-rwxr-xr-x | bin/fast-import-filter | 100 | ||||
-rwxr-xr-x | bin/fast-import-info | 53 | ||||
-rwxr-xr-x | bin/fast-import-query | 77 | ||||
-rw-r--r-- | fastimport/__init__.py | 2 | ||||
-rw-r--r-- | fastimport/commands.py | 13 | ||||
-rw-r--r-- | fastimport/helpers.py | 76 | ||||
-rw-r--r-- | fastimport/processors/info_processor.py | 286 | ||||
-rw-r--r-- | fastimport/reftracker.py | 68 | ||||
-rw-r--r-- | fastimport/tests/__init__.py | 1 | ||||
-rw-r--r-- | fastimport/tests/test_commands.py | 22 | ||||
-rw-r--r-- | fastimport/tests/test_info_processor.py | 77 | ||||
-rwxr-xr-x | setup.py | 23 |
15 files changed, 814 insertions, 13 deletions
@@ -1,3 +1,15 @@ +0.9.8 2018-04-15 + + * Fix version number. (Jelmer Vernooij) + +0.9.7 2018-04-15 + + * Don't attempt to encode bytestrings in utf8_bytes_helper(). + (Jelmer Vernooij, #1647101) + + * Add fast-import-filter, fast-import-query and fast-import-info + script. (Jelmer Vernooij) + 0.9.6 2016-04-19 * Add python3.4 support (Jelmer Vernooij) @@ -1,10 +1,19 @@ -Metadata-Version: 1.0 +Metadata-Version: 1.1 Name: fastimport -Version: 0.9.6 +Version: 0.9.8 Summary: VCS fastimport/fastexport parser -Home-page: htps://github.com/jelmer/python-fastimport +Home-page: https://github.com/jelmer/python-fastimport Author: Jelmer Vernooij Author-email: jelmer@jelmer.uk License: GNU GPL v2 or later Description: UNKNOWN Platform: UNKNOWN +Classifier: Development Status :: 4 - Beta +Classifier: License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+) +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Operating System :: POSIX +Classifier: Topic :: Software Development :: Version Control @@ -1,3 +1,5 @@ +[![Build Status](https://travis-ci.org/jelmer/python-fastimport.png?branch=master)](https://travis-ci.org/jelmer/python-fastimport) + python-fastimport ================= diff --git a/bin/fast-import-filter b/bin/fast-import-filter new file mode 100755 index 0000000..03dbd01 --- /dev/null +++ b/bin/fast-import-filter @@ -0,0 +1,100 @@ +#!/usr/bin/python + +__doc__ = """Filter a fast-import stream to include/exclude files & directories. + +This command is useful for splitting a subdirectory or bunch of +files out from a project to create a new project complete with history +for just those files. It can also be used to create a new project +repository that removes all references to files that should not have +been committed, e.g. security-related information (like passwords), +commercially sensitive material, files with an incompatible license or +large binary files like CD images. + +To specify standard input as the input stream, use a source name +of '-'. If the source name ends in '.gz', it is assumed to be +compressed in gzip format. + +:File/directory filtering: + + This is supported by the -i and -x options. Excludes take precedence + over includes. + + When filtering out a subdirectory (or file), the new stream uses the + subdirectory (or subdirectory containing the file) as the root. As + fast-import doesn't know in advance whether a path is a file or + directory in the stream, you need to specify a trailing '/' on + directories passed to the `--includes option`. If multiple files or + directories are given, the new root is the deepest common directory. + + Note: If a path has been renamed, take care to specify the *original* + path name, not the final name that it ends up with. + +:History rewriting: + + By default fast-import-filter does quite aggressive history rewriting. + Empty commits (or commits which had all their content filtered out) will + be removed, and so are the references to commits not included in the stream. + + Flag --dont-squash-empty-commits reverses this behavior and makes it possible to + use fast-import-filter on incremental streams. + +:Examples: + + Create a new project from a library (note the trailing / on the + directory name of the library):: + + front-end | fast-import-filter -i lib/xxx/ > xxx.fi + fast-import xxx.fi mylibrary.bzr + (lib/xxx/foo is now foo) + + Create a new repository without a sensitive file:: + + front-end | fast-import-filter -x missile-codes.txt > clean.fi + fast-import clean.fi clean.bzr +""" + +import optparse +import sys + +parser = optparse.OptionParser('fast-import-filter [options] SOURCE?') + +parser.add_option('-v', '--verbose', dest="verbose", action="store_true", + help="Be verbose.", default=False) +parser.add_option('-i', '--include-paths', dest="include_paths", + action="append", type=str, + help="Only include commits affecting these paths." + " Directories should have a trailing /.") +parser.add_option('-x', '--exclude-paths', dest="exclude_paths", + type=str, help="Exclude these paths from commits.") +parser.add_option('--dont-squash-empty-commits', + dest="dont_squash_empty_commits", action="store_true", + help="Preserve all commits and links between them", + default=False) + +(opts, args) = parser.parse_args() + +if len(args) == 0: + source_path = "-" +elif len(args) == 1: + source_path = args[0] +else: + parser.print_usage() + +from fastimport.processors import filter_processor +params = { + 'include_paths': opts.include_paths, + 'exclude_paths': opts.exclude_paths, + } +params['squash_empty_commits'] = (not opts.dont_squash_empty_commits) + +from fastimport.errors import ParsingError +from fastimport import parser +from fastimport.helpers import get_source_stream +stream = get_source_stream(source_path) +proc = filter_processor.FilterProcessor(params=params, verbose=opts.verbose) +p = parser.ImportParser(stream, verbose=opts.verbose) +try: + sys.exit(proc.process(p.iter_commands)) +except ParsingError as e: + sys.stderr.write("%d: Parse error: %s\n" % (e.lineno, e)) + sys.exit(1) diff --git a/bin/fast-import-info b/bin/fast-import-info new file mode 100755 index 0000000..6f67443 --- /dev/null +++ b/bin/fast-import-info @@ -0,0 +1,53 @@ +#!/usr/bin/python +__doc__ = """Output information about a fast-import stream. + +This command reads a fast-import stream and outputs +statistics and interesting properties about what it finds. +When run in verbose mode, the information is output as a +configuration file that can be passed to fast-import to +assist it in intelligently caching objects. + +To specify standard input as the input stream, use a source name +of '-'. If the source name ends in '.gz', it is assumed to be +compressed in gzip format. + +:Examples: + + Display statistics about the import stream produced by front-end:: + + front-end | fast-import-info - + + Create a hints file for running fast-import on a large repository:: + + front-end | fast-import-info -v - > front-end.cfg +""" + +import optparse +import sys + +parser = optparse.OptionParser('fast-import-info [options] SOURCE') + +parser.add_option('-v', '--verbose', dest="verbose", + help="Be verbose.") + +(options, args) = parser.parse_args() + +if len(args) == 0: + source_path = "-" +elif len(args) == 1: + source_path = args[0] +else: + parser.print_usage() + +from fastimport.processors import info_processor +from fastimport.errors import ParsingError +from fastimport.helpers import get_source_stream +from fastimport import parser +stream = get_source_stream(source_path) +proc = info_processor.InfoProcessor(verbose=options.verbose) +p = parser.ImportParser(stream, verbose=options.verbose) +try: + sys.exit(proc.process(p.iter_commands)) +except ParsingError as e: + sys.stderr.write("%d: Parse error: %s\n" % (e.lineno, e)) + sys.exit(1) diff --git a/bin/fast-import-query b/bin/fast-import-query new file mode 100755 index 0000000..6be68c4 --- /dev/null +++ b/bin/fast-import-query @@ -0,0 +1,77 @@ +#!/usr/bin/python +__doc__ = """Query a fast-import stream displaying selected commands. + +To specify standard input as the input stream, use a source name +of '-'. If the source name ends in '.gz', it is assumed to be +compressed in gzip format. + +To specify a commit to display, give its mark using the +--commit-mark option. The commit will be displayed with +file-commands included but with inline blobs hidden. + +To specify the commands to display, use the -C option one or +more times. To specify just some fields for a command, use the +syntax:: + + command=field1,... + +By default, the nominated fields for the nominated commands +are displayed tab separated. To see the information in +a name:value format, use verbose mode. + +Note: Binary fields (e.g. data for blobs) are masked out +so it is generally safe to view the output in a terminal. + +:Examples: + + Show the commit with mark 429:: + + fast-import-query xxx.fi -m429 + + Show all the fields of the reset and tag commands:: + + fast-import-query xxx.fi -Creset -Ctag + + Show the mark and merge fields of the commit commands:: + + fast-import-query xxx.fi -Ccommit=mark,merge +""" + +import optparse +import sys + +parser = optparse.OptionParser('fast-import-query [options] SOURCE?') + +parser.add_option('-v', '--verbose', dest="verbose", + action="store_true", help="Be verbose") +parser.add_option('-m', '--commit-mark', dest="commit_mark", + type=str, help="Mark of the commit to display.") +parser.add_option('-C', '--commands', type=str, + help="Display fields for these commands.") + +(opts, args) = parser.parse_args() + +if len(args) == 0: + source_path = "-" +elif len(args) == 1: + source_path = args[0] +else: + parser.print_usage() + +from fastimport.processors import query_processor +from fastimport.helpers import defines_to_dict, get_source_stream +from fastimport.errors import ParsingError +from fastimport import parser + +params = defines_to_dict(opts.commands) or {} +if opts.commit_mark: + params['commit-mark'] = opts.commit_mark + +stream = get_source_stream(source_path) +proc = query_processor.QueryProcessor(verbose=opts.verbose, params=params) +p = parser.ImportParser(stream, verbose=opts.verbose) +try: + sys.exit(proc.process(p.iter_commands)) +except ParsingError as e: + sys.stderr.write("%d: Parse error: %s\n" % (e.lineno, e)) + sys.exit(1) diff --git a/fastimport/__init__.py b/fastimport/__init__.py index 0d17622..0b37616 100644 --- a/fastimport/__init__.py +++ b/fastimport/__init__.py @@ -30,4 +30,4 @@ it can be used by other projects. Use it like so: processor.process(parser.parse()) """ -__version__ = (0, 9, 6) +__version__ = (0, 9, 8) diff --git a/fastimport/commands.py b/fastimport/commands.py index b344911..7f29599 100644 --- a/fastimport/commands.py +++ b/fastimport/commands.py @@ -162,10 +162,13 @@ class CommitCommand(ImportCommand): self.lineno = lineno self._binary = [b'file_iter'] # Provide a unique id in case the mark is missing - if mark is None: + if self.mark is None: self.id = b'@' + ('%d' % lineno).encode('utf-8') else: - self.id = b':' + mark + if isinstance(self.mark, (int)): + self.id = b':' + str(self.mark).encode('utf-8') + else: + self.id = b':' + self.mark def copy(self, **kwargs): if not isinstance(self.file_iter, list): @@ -194,7 +197,11 @@ class CommitCommand(ImportCommand): if self.mark is None: mark_line = b'' else: - mark_line = b'\nmark :' + self.mark + if isinstance(self.mark, (int)): + mark_line = b'\nmark :' + str(self.mark).encode('utf-8') + else: + mark_line = b'\nmark :' + self.mark + if self.author is None: author_section = b'' else: diff --git a/fastimport/helpers.py b/fastimport/helpers.py index c27c436..67072be 100644 --- a/fastimport/helpers.py +++ b/fastimport/helpers.py @@ -99,9 +99,12 @@ def is_inside_any(dir_list, fname): def utf8_bytes_string(s): - """Convert a string to a bytes string encoded in utf8""" + """Convert a string to a bytes string (if necessary, encode in utf8)""" if sys.version_info[0] == 2: - return s.encode('utf8') + if isinstance(s, str): + return s + else: + return s.encode('utf8') else: if isinstance(s, str): return bytes(s, encoding='utf8') @@ -191,3 +194,72 @@ class newobject(object): Hook for the future.utils.native() function """ return object(self) + + +def binary_stream(stream): + """Ensure a stream is binary on Windows. + + :return: the stream + """ + try: + import os + if os.name == 'nt': + fileno = getattr(stream, 'fileno', None) + if fileno: + no = fileno() + if no >= 0: # -1 means we're working as subprocess + import msvcrt + msvcrt.setmode(no, os.O_BINARY) + except ImportError: + pass + return stream + + +def invert_dictset(d): + """Invert a dictionary with keys matching a set of values, turned into lists.""" + # Based on recipe from ASPN + result = {} + for k, c in d.items(): + for v in c: + keys = result.setdefault(v, []) + keys.append(k) + return result + + +def invert_dict(d): + """Invert a dictionary with keys matching each value turned into a list.""" + # Based on recipe from ASPN + result = {} + for k, v in d.items(): + keys = result.setdefault(v, []) + keys.append(k) + return result + + +def defines_to_dict(defines): + """Convert a list of definition strings to a dictionary.""" + if defines is None: + return None + result = {} + for define in defines: + kv = define.split('=', 1) + if len(kv) == 1: + result[define.strip()] = 1 + else: + result[kv[0].strip()] = kv[1].strip() + return result + + +def get_source_stream(source): + if source == '-' or source is None: + import sys + stream = binary_stream(sys.stdin) + elif source.endswith('.gz'): + import gzip + stream = gzip.open(source, "rb") + else: + stream = open(source, "rb") + return stream + + + diff --git a/fastimport/processors/info_processor.py b/fastimport/processors/info_processor.py new file mode 100644 index 0000000..28c7300 --- /dev/null +++ b/fastimport/processors/info_processor.py @@ -0,0 +1,286 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +"""Import processor that dump stats about the input (and doesn't import).""" + +from __future__ import absolute_import + +from .. import ( + reftracker, + ) +from ..helpers import ( + invert_dict, + invert_dictset, + ) +from fastimport import ( + commands, + processor, + ) +import stat + + +class InfoProcessor(processor.ImportProcessor): + """An import processor that dumps statistics about the input. + + No changes to the current repository are made. + + As well as providing useful information about an import + stream before importing it, this processor is useful for + benchmarking the speed at which data can be extracted from + the source. + """ + + def __init__(self, params=None, verbose=0, outf=None): + processor.ImportProcessor.__init__(self, params, verbose, + outf=outf) + + def pre_process(self): + # Init statistics + self.cmd_counts = {} + for cmd in commands.COMMAND_NAMES: + self.cmd_counts[cmd] = 0 + self.file_cmd_counts = {} + for fc in commands.FILE_COMMAND_NAMES: + self.file_cmd_counts[fc] = 0 + self.parent_counts = {} + self.max_parent_count = 0 + self.committers = set() + self.separate_authors_found = False + self.symlinks_found = False + self.executables_found = False + self.sha_blob_references = False + self.lightweight_tags = 0 + # Blob usage tracking + self.blobs = {} + for usage in ['new', 'used', 'unknown', 'unmarked']: + self.blobs[usage] = set() + self.blob_ref_counts = {} + # Head tracking + self.reftracker = reftracker.RefTracker() + # Stuff to cache: a map from mark to # of times that mark is merged + self.merges = {} + # Stuff to cache: these are maps from mark to sets + self.rename_old_paths = {} + self.copy_source_paths = {} + + def post_process(self): + # Dump statistics + cmd_names = commands.COMMAND_NAMES + fc_names = commands.FILE_COMMAND_NAMES + self._dump_stats_group("Command counts", + [(c.decode('utf-8'), self.cmd_counts[c]) for c in cmd_names], str) + self._dump_stats_group("File command counts", + [(c.decode('utf-8'), self.file_cmd_counts[c]) for c in fc_names], str) + + # Commit stats + if self.cmd_counts[b'commit']: + p_items = [] + for i in range(self.max_parent_count + 1): + if i in self.parent_counts: + count = self.parent_counts[i] + p_items.append(("parents-%d" % i, count)) + merges_count = len(self.merges) + p_items.append(('total revisions merged', merges_count)) + flags = { + 'separate authors found': self.separate_authors_found, + 'executables': self.executables_found, + 'symlinks': self.symlinks_found, + 'blobs referenced by SHA': self.sha_blob_references, + } + self._dump_stats_group("Parent counts", p_items, str) + self._dump_stats_group("Commit analysis", sorted(flags.items()), _found) + heads = invert_dictset(self.reftracker.heads) + self._dump_stats_group( + "Head analysis", + [(k.decode('utf-8'), + ', '.join([m.decode('utf-8') for m in v])) + for (k, v) in heads.items()], None, + _iterable_as_config_list) + # note("\t%d\t%s" % (len(self.committers), 'unique committers')) + self._dump_stats_group("Merges", self.merges.items(), None) + # We only show the rename old path and copy source paths when -vv + # (verbose=2) is specified. The output here for mysql's data can't + # be parsed currently so this bit of code needs more work anyhow .. + if self.verbose >= 2: + self._dump_stats_group("Rename old paths", + self.rename_old_paths.items(), len, + _iterable_as_config_list) + self._dump_stats_group("Copy source paths", + self.copy_source_paths.items(), len, + _iterable_as_config_list) + + # Blob stats + if self.cmd_counts[b'blob']: + # In verbose mode, don't list every blob used + if self.verbose: + del self.blobs['used'] + self._dump_stats_group("Blob usage tracking", + self.blobs.items(), len, _iterable_as_config_list) + if self.blob_ref_counts: + blobs_by_count = invert_dict(self.blob_ref_counts) + blob_items = sorted(blobs_by_count.items()) + self._dump_stats_group("Blob reference counts", + blob_items, len, _iterable_as_config_list) + + # Other stats + if self.cmd_counts[b'reset']: + reset_stats = { + 'lightweight tags': self.lightweight_tags, + } + self._dump_stats_group("Reset analysis", reset_stats.items()) + + def _dump_stats_group(self, title, items, normal_formatter=None, + verbose_formatter=None): + """Dump a statistics group. + + In verbose mode, do so as a config file so + that other processors can load the information if they want to. + :param normal_formatter: the callable to apply to the value + before displaying it in normal mode + :param verbose_formatter: the callable to apply to the value + before displaying it in verbose mode + """ + if self.verbose: + self.outf.write("[%s]\n" % (title,)) + for name, value in items: + if verbose_formatter is not None: + value = verbose_formatter(value) + if type(name) == str: + name = name.replace(' ', '-') + self.outf.write("%s = %s\n" % (name, value)) + self.outf.write("\n") + else: + self.outf.write("%s:\n" % (title,)) + for name, value in items: + if normal_formatter is not None: + value = normal_formatter(value) + self.outf.write("\t%s\t%s\n" % (value, name)) + + def progress_handler(self, cmd): + """Process a ProgressCommand.""" + self.cmd_counts[cmd.name] += 1 + + def blob_handler(self, cmd): + """Process a BlobCommand.""" + self.cmd_counts[cmd.name] += 1 + if cmd.mark is None: + self.blobs['unmarked'].add(cmd.id) + else: + self.blobs['new'].add(cmd.id) + # Marks can be re-used so remove it from used if already there. + # Note: we definitely do NOT want to remove it from multi if + # it's already in that set. + try: + self.blobs['used'].remove(cmd.id) + except KeyError: + pass + + def checkpoint_handler(self, cmd): + """Process a CheckpointCommand.""" + self.cmd_counts[cmd.name] += 1 + + def commit_handler(self, cmd): + """Process a CommitCommand.""" + self.cmd_counts[cmd.name] += 1 + self.committers.add(cmd.committer) + if cmd.author is not None: + self.separate_authors_found = True + for fc in cmd.iter_files(): + self.file_cmd_counts[fc.name] += 1 + if isinstance(fc, commands.FileModifyCommand): + if fc.mode & 0o111: + self.executables_found = True + if stat.S_ISLNK(fc.mode): + self.symlinks_found = True + if fc.dataref is not None: + if fc.dataref[0] == ':': + self._track_blob(fc.dataref) + else: + self.sha_blob_references = True + elif isinstance(fc, commands.FileRenameCommand): + self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path) + elif isinstance(fc, commands.FileCopyCommand): + self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path) + + # Track the heads + parents = self.reftracker.track_heads(cmd) + + # Track the parent counts + parent_count = len(parents) + try: + self.parent_counts[parent_count] += 1 + except KeyError: + self.parent_counts[parent_count] = 1 + if parent_count > self.max_parent_count: + self.max_parent_count = parent_count + + # Remember the merges + if cmd.merges: + #self.merges.setdefault(cmd.ref, set()).update(cmd.merges) + for merge in cmd.merges: + if merge in self.merges: + self.merges[merge] += 1 + else: + self.merges[merge] = 1 + + def reset_handler(self, cmd): + """Process a ResetCommand.""" + self.cmd_counts[cmd.name] += 1 + if cmd.ref.startswith('refs/tags/'): + self.lightweight_tags += 1 + else: + if cmd.from_ is not None: + self.reftracker.track_heads_for_ref( + cmd.ref, cmd.from_) + + def tag_handler(self, cmd): + """Process a TagCommand.""" + self.cmd_counts[cmd.name] += 1 + + def feature_handler(self, cmd): + """Process a FeatureCommand.""" + self.cmd_counts[cmd.name] += 1 + feature = cmd.feature_name + if feature not in commands.FEATURE_NAMES: + self.warning("feature %s is not supported - parsing may fail" + % (feature,)) + + def _track_blob(self, mark): + if mark in self.blob_ref_counts: + self.blob_ref_counts[mark] += 1 + pass + elif mark in self.blobs['used']: + self.blob_ref_counts[mark] = 2 + self.blobs['used'].remove(mark) + elif mark in self.blobs['new']: + self.blobs['used'].add(mark) + self.blobs['new'].remove(mark) + else: + self.blobs['unknown'].add(mark) + +def _found(b): + """Format a found boolean as a string.""" + return ['no', 'found'][b] + +def _iterable_as_config_list(s): + """Format an iterable as a sequence of comma-separated strings. + + To match what ConfigObj expects, a single item list has a trailing comma. + """ + items = sorted(s) + if len(items) == 1: + return "%s," % (items[0],) + else: + return ", ".join(items) diff --git a/fastimport/reftracker.py b/fastimport/reftracker.py new file mode 100644 index 0000000..16a5e45 --- /dev/null +++ b/fastimport/reftracker.py @@ -0,0 +1,68 @@ +# Copyright (C) 2009 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +"""Tracker of refs.""" + +from __future__ import absolute_import + + +class RefTracker(object): + + def __init__(self): + # Head tracking: last ref, last id per ref & map of commit ids to ref*s* + self.last_ref = None + self.last_ids = {} + self.heads = {} + + def dump_stats(self, note): + self._show_stats_for(self.last_ids, "last-ids", note=note) + self._show_stats_for(self.heads, "heads", note=note) + + def clear(self): + self.last_ids.clear() + self.heads.clear() + + def track_heads(self, cmd): + """Track the repository heads given a CommitCommand. + + :param cmd: the CommitCommand + :return: the list of parents in terms of commit-ids + """ + # Get the true set of parents + if cmd.from_ is not None: + parents = [cmd.from_] + else: + last_id = self.last_ids.get(cmd.ref) + if last_id is not None: + parents = [last_id] + else: + parents = [] + parents.extend(cmd.merges) + + # Track the heads + self.track_heads_for_ref(cmd.ref, cmd.id, parents) + return parents + + def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None): + if parents is not None: + for parent in parents: + if parent in self.heads: + del self.heads[parent] + self.heads.setdefault(cmd_id, set()).add(cmd_ref) + self.last_ids[cmd_ref] = cmd_id + self.last_ref = cmd_ref + + diff --git a/fastimport/tests/__init__.py b/fastimport/tests/__init__.py index ae5acb7..01a681b 100644 --- a/fastimport/tests/__init__.py +++ b/fastimport/tests/__init__.py @@ -26,6 +26,7 @@ def test_suite(): 'test_dates', 'test_errors', 'test_filter_processor', + 'test_info_processor', 'test_helpers', 'test_parser', ] diff --git a/fastimport/tests/test_commands.py b/fastimport/tests/test_commands.py index 16485eb..ccae34a 100644 --- a/fastimport/tests/test_commands.py +++ b/fastimport/tests/test_commands.py @@ -205,6 +205,27 @@ class TestCommitDisplay(TestCase): b"property planet 5 world", repr_bytes(c)) + def test_commit_with_int_mark(self): + # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) + committer = (b'Joe Wong', b'joe@example.com', 1234567890, -6 * 3600) + properties = { + u'greeting': u'hello', + u'planet': u'world', + } + c = commands.CommitCommand(b'refs/heads/master', 123, None, + committer, b'release v1.0', b':aaa', None, None, + properties=properties) + self.assertEqual( + b"commit refs/heads/master\n" + b"mark :123\n" + b"committer Joe Wong <joe@example.com> 1234567890 -0600\n" + b"data 12\n" + b"release v1.0\n" + b"from :aaa\n" + b"property greeting 5 hello\n" + b"property planet 5 world", + repr_bytes(c)) + class TestCommitCopy(TestCase): def setUp(self): @@ -227,7 +248,6 @@ class TestCommitCopy(TestCase): def test_replace_attr(self): c2 = self.c.copy(mark=b'ccc') - self.assertEqual( repr_bytes(self.c).replace(b'mark :bbb', b'mark :ccc'), repr_bytes(c2) diff --git a/fastimport/tests/test_info_processor.py b/fastimport/tests/test_info_processor.py new file mode 100644 index 0000000..6904b50 --- /dev/null +++ b/fastimport/tests/test_info_processor.py @@ -0,0 +1,77 @@ +# Copyright (C) 2018 Jelmer Vernooij +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +"""Test InfoProcessor""" +from io import BytesIO + +try: + from StringIO import StringIO +except ImportError: + from io import StringIO + +from unittest import TestCase + +from fastimport import ( + parser, + ) + +from fastimport.processors import ( + info_processor, + ) + +simple_fast_import_stream = b"""commit refs/heads/master +mark :1 +committer Jelmer Vernooij <jelmer@samba.org> 1299718135 +0100 +data 7 +initial + +""" + +class TestFastImportInfo(TestCase): + + def test_simple(self): + stream = BytesIO(simple_fast_import_stream) + outf = StringIO() + proc = info_processor.InfoProcessor(outf=outf) + p = parser.ImportParser(stream) + proc.process(p.iter_commands) + + self.maxDiff = None + self.assertEqual(outf.getvalue(), """Command counts: +\t0\tblob +\t0\tcheckpoint +\t1\tcommit +\t0\tfeature +\t0\tprogress +\t0\treset +\t0\ttag +File command counts: +\t0\tfilemodify +\t0\tfiledelete +\t0\tfilecopy +\t0\tfilerename +\t0\tfiledeleteall +Parent counts: +\t1\tparents-0 +\t0\ttotal revisions merged +Commit analysis: +\tno\tblobs referenced by SHA +\tno\texecutables +\tno\tseparate authors found +\tno\tsymlinks +Head analysis: +\t:1\trefs/heads/master +Merges: +""") @@ -1,7 +1,7 @@ #!/usr/bin/env python from distutils.core import setup -version = "0.9.6" +version = "0.9.8" setup(name="fastimport", description="VCS fastimport/fastexport parser", @@ -11,5 +11,22 @@ setup(name="fastimport", maintainer="Jelmer Vernooij", maintainer_email="jelmer@jelmer.uk", license="GNU GPL v2 or later", - url="htps://github.com/jelmer/python-fastimport", - packages=['fastimport', 'fastimport.tests', 'fastimport.processors']) + url="https://github.com/jelmer/python-fastimport", + packages=['fastimport', 'fastimport.tests', 'fastimport.processors'], + scripts=[ + 'bin/fast-import-query', + 'bin/fast-import-filter', + 'bin/fast-import-info', + ], + classifiers=[ + 'Development Status :: 4 - Beta', + 'License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: Implementation :: CPython', + 'Programming Language :: Python :: Implementation :: PyPy', + 'Operating System :: POSIX', + 'Topic :: Software Development :: Version Control', + ], + ) |