summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Clatworthy <ian.clatworthy@internode.on.net>2008-02-20 16:40:06 +1000
committerIan Clatworthy <ian.clatworthy@internode.on.net>2008-02-20 16:40:06 +1000
commit0d96c435907241f1f5e9b9dc0ce5a9b4b42ea4a6 (patch)
tree9684d32e1929547d2890d9447152872fa48b9c53
parentac5df34d1c30adfaadaa3e85cd23ddeaa233bee0 (diff)
downloadbzr-fastimport-0d96c435907241f1f5e9b9dc0ce5a9b4b42ea4a6.tar.gz
identify unmarked blobs and commits by line numbers
-rw-r--r--commands.py16
-rw-r--r--parser.py6
-rw-r--r--processors/info_processor.py25
3 files changed, 36 insertions, 11 deletions
diff --git a/commands.py b/commands.py
index 3cf3df6..3f0e986 100644
--- a/commands.py
+++ b/commands.py
@@ -64,10 +64,16 @@ class ImportCommand(object):
class BlobCommand(ImportCommand):
- def __init__(self, mark, data):
+ def __init__(self, mark, data, lineno=0):
ImportCommand.__init__(self, 'blob')
self.mark = mark
self.data = data
+ self.lineno = lineno
+ # Provide a unique id in case the mark is missing
+ if mark is None:
+ self.id = '@%d' % lineno
+ else:
+ self.id = ':' + mark
self._binary = ['data']
@@ -80,7 +86,7 @@ class CheckpointCommand(ImportCommand):
class CommitCommand(ImportCommand):
def __init__(self, ref, mark, author, committer, message, parents,
- file_iter):
+ file_iter, lineno=0):
ImportCommand.__init__(self, 'commit')
self.ref = ref
self.mark = mark
@@ -89,7 +95,13 @@ class CommitCommand(ImportCommand):
self.message = message
self.parents = parents
self.file_iter = file_iter
+ self.lineno = lineno
self._binary = ['file_iter']
+ # Provide a unique id in case the mark is missing
+ if mark is None:
+ self.id = '@%d' % lineno
+ else:
+ self.id = ':' + mark
def dump_str(self, names=None, child_lists=None, verbose=False):
result = [ImportCommand.dump_str(self, names, verbose=verbose)]
diff --git a/parser.py b/parser.py
index 3e362bf..38abcea 100644
--- a/parser.py
+++ b/parser.py
@@ -323,12 +323,14 @@ class ImportParser(LineBasedParser):
def _parse_blob(self):
"""Parse a blob command."""
+ lineno = self.lineno
mark = self._get_mark_if_any()
data = self._get_data('blob')
- return commands.BlobCommand(mark, data)
+ return commands.BlobCommand(mark, data, lineno)
def _parse_commit(self, ref):
"""Parse a commit command."""
+ lineno = self.lineno
mark = self._get_mark_if_any()
author = self._get_user_info('commit', 'author', False)
committer = self._get_user_info('commit', 'committer')
@@ -345,7 +347,7 @@ class ImportParser(LineBasedParser):
else:
parents = []
return commands.CommitCommand(ref, mark, author, committer, message,
- parents, self.iter_file_commands)
+ parents, self.iter_file_commands, lineno)
def _parse_file_modify(self, info):
"""Parse a filemodify command within a commit.
diff --git a/processors/info_processor.py b/processors/info_processor.py
index a6958b9..b0d430e 100644
--- a/processors/info_processor.py
+++ b/processors/info_processor.py
@@ -67,10 +67,11 @@ class InfoProcessor(processor.ImportProcessor):
self.named_branches = []
# Blob usage tracking
self.blobs = {}
- for usage in ['new', 'used', 'multi', 'unknown']:
+ for usage in ['new', 'used', 'multi', 'unknown', 'unmarked']:
self.blobs[usage] = set()
# Head tracking: map of commit mark to ref
self.heads = {}
+ self.last_ids = {}
def post_process(self):
# Dump statistics
@@ -153,7 +154,10 @@ class InfoProcessor(processor.ImportProcessor):
def blob_handler(self, cmd):
"""Process a BlobCommand."""
self.cmd_counts[cmd.name] += 1
- self.blobs['new'].add(":" + cmd.mark)
+ if cmd.mark is None:
+ self.blobs['unmarked'].add(cmd.id)
+ else:
+ self.blobs['new'].add(cmd.id)
def checkpoint_handler(self, cmd):
"""Process a CheckpointCommand."""
@@ -179,14 +183,21 @@ class InfoProcessor(processor.ImportProcessor):
else:
self.sha_blob_references = True
# Track the heads
- for parent in cmd.parents:
+ if cmd.mark is None:
+ last_id = self.last_ids.get(cmd.ref)
+ if last_id is not None:
+ parents = [last_id]
+ else:
+ parents = []
+ else:
+ parents = cmd.parents
+ for parent in parents:
try:
del self.heads[parent]
except KeyError:
- print "hmm - didn't find parent %s" % parent
- pass
- self.heads[":" + cmd.mark] = cmd.ref
-
+ note("hmm - didn't find parent %s" % parent)
+ self.heads[cmd.id] = cmd.ref
+ self.last_ids[cmd.ref] = cmd.id
def reset_handler(self, cmd):
"""Process a ResetCommand."""