diff options
author | Ian Clatworthy <ian.clatworthy@internode.on.net> | 2008-02-20 16:40:06 +1000 |
---|---|---|
committer | Ian Clatworthy <ian.clatworthy@internode.on.net> | 2008-02-20 16:40:06 +1000 |
commit | 0d96c435907241f1f5e9b9dc0ce5a9b4b42ea4a6 (patch) | |
tree | 9684d32e1929547d2890d9447152872fa48b9c53 | |
parent | ac5df34d1c30adfaadaa3e85cd23ddeaa233bee0 (diff) | |
download | bzr-fastimport-0d96c435907241f1f5e9b9dc0ce5a9b4b42ea4a6.tar.gz |
identify unmarked blobs and commits by line numbers
-rw-r--r-- | commands.py | 16 | ||||
-rw-r--r-- | parser.py | 6 | ||||
-rw-r--r-- | processors/info_processor.py | 25 |
3 files changed, 36 insertions, 11 deletions
diff --git a/commands.py b/commands.py index 3cf3df6..3f0e986 100644 --- a/commands.py +++ b/commands.py @@ -64,10 +64,16 @@ class ImportCommand(object): class BlobCommand(ImportCommand): - def __init__(self, mark, data): + def __init__(self, mark, data, lineno=0): ImportCommand.__init__(self, 'blob') self.mark = mark self.data = data + self.lineno = lineno + # Provide a unique id in case the mark is missing + if mark is None: + self.id = '@%d' % lineno + else: + self.id = ':' + mark self._binary = ['data'] @@ -80,7 +86,7 @@ class CheckpointCommand(ImportCommand): class CommitCommand(ImportCommand): def __init__(self, ref, mark, author, committer, message, parents, - file_iter): + file_iter, lineno=0): ImportCommand.__init__(self, 'commit') self.ref = ref self.mark = mark @@ -89,7 +95,13 @@ class CommitCommand(ImportCommand): self.message = message self.parents = parents self.file_iter = file_iter + self.lineno = lineno self._binary = ['file_iter'] + # Provide a unique id in case the mark is missing + if mark is None: + self.id = '@%d' % lineno + else: + self.id = ':' + mark def dump_str(self, names=None, child_lists=None, verbose=False): result = [ImportCommand.dump_str(self, names, verbose=verbose)] @@ -323,12 +323,14 @@ class ImportParser(LineBasedParser): def _parse_blob(self): """Parse a blob command.""" + lineno = self.lineno mark = self._get_mark_if_any() data = self._get_data('blob') - return commands.BlobCommand(mark, data) + return commands.BlobCommand(mark, data, lineno) def _parse_commit(self, ref): """Parse a commit command.""" + lineno = self.lineno mark = self._get_mark_if_any() author = self._get_user_info('commit', 'author', False) committer = self._get_user_info('commit', 'committer') @@ -345,7 +347,7 @@ class ImportParser(LineBasedParser): else: parents = [] return commands.CommitCommand(ref, mark, author, committer, message, - parents, self.iter_file_commands) + parents, self.iter_file_commands, lineno) def _parse_file_modify(self, info): """Parse a filemodify command within a commit. diff --git a/processors/info_processor.py b/processors/info_processor.py index a6958b9..b0d430e 100644 --- a/processors/info_processor.py +++ b/processors/info_processor.py @@ -67,10 +67,11 @@ class InfoProcessor(processor.ImportProcessor): self.named_branches = [] # Blob usage tracking self.blobs = {} - for usage in ['new', 'used', 'multi', 'unknown']: + for usage in ['new', 'used', 'multi', 'unknown', 'unmarked']: self.blobs[usage] = set() # Head tracking: map of commit mark to ref self.heads = {} + self.last_ids = {} def post_process(self): # Dump statistics @@ -153,7 +154,10 @@ class InfoProcessor(processor.ImportProcessor): def blob_handler(self, cmd): """Process a BlobCommand.""" self.cmd_counts[cmd.name] += 1 - self.blobs['new'].add(":" + cmd.mark) + if cmd.mark is None: + self.blobs['unmarked'].add(cmd.id) + else: + self.blobs['new'].add(cmd.id) def checkpoint_handler(self, cmd): """Process a CheckpointCommand.""" @@ -179,14 +183,21 @@ class InfoProcessor(processor.ImportProcessor): else: self.sha_blob_references = True # Track the heads - for parent in cmd.parents: + if cmd.mark is None: + last_id = self.last_ids.get(cmd.ref) + if last_id is not None: + parents = [last_id] + else: + parents = [] + else: + parents = cmd.parents + for parent in parents: try: del self.heads[parent] except KeyError: - print "hmm - didn't find parent %s" % parent - pass - self.heads[":" + cmd.mark] = cmd.ref - + note("hmm - didn't find parent %s" % parent) + self.heads[cmd.id] = cmd.ref + self.last_ids[cmd.ref] = cmd.id def reset_handler(self, cmd): """Process a ResetCommand.""" |