summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJelmer Vernooij <jelmer@samba.org>2010-12-12 04:53:15 +0100
committerJelmer Vernooij <jelmer@samba.org>2010-12-12 04:53:15 +0100
commitfe09c31695fc58cd93acd0b1ed0e1a681a52ea29 (patch)
treec3c178dd3e89deb1b79a063c7877f0e903d3756f
parent86249fff9a9c45faef963731fa36fe2866986a54 (diff)
downloadbzr-fastimport-fe09c31695fc58cd93acd0b1ed0e1a681a52ea29.tar.gz
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
-rw-r--r--bzr_commit_handler.py23
-rw-r--r--exporter.py21
2 files changed, 29 insertions, 15 deletions
diff --git a/bzr_commit_handler.py b/bzr_commit_handler.py
index 1df4c86..5cef958 100644
--- a/bzr_commit_handler.py
+++ b/bzr_commit_handler.py
@@ -234,8 +234,21 @@ class GenericCommitHandler(processor.CommitHandler):
"""Get a Bazaar file identifier for a path."""
return self.bzr_file_id_and_new(path)[0]
- def _format_name_email(self, name, email):
+ def _utf8_decode(self, field, value):
+ try:
+ return value.decode('utf_8')
+ except UnicodeDecodeError:
+ # The spec says fields are *typically* utf8 encoded
+ # but that isn't enforced by git-fast-export (at least)
+ self.warning("%s not in utf8 - replacing unknown "
+ "characters" % (field,))
+ return value.decode('utf_8', 'replace')
+
+ def _format_name_email(self, section, name, email):
"""Format name & email as a string."""
+ name = self._utf8_decode("%s name" % section, name)
+ email = self._utf8_decode("%s email" % section, email)
+
if email:
return "%s <%s>" % (name, email)
else:
@@ -249,7 +262,7 @@ class GenericCommitHandler(processor.CommitHandler):
committer = self.command.committer
# Perhaps 'who' being the person running the import is ok? If so,
# it might be a bit quicker and give slightly better compression?
- who = self._format_name_email(committer[0], committer[1])
+ who = self._format_name_email("committer", committer[0], committer[1])
timestamp = committer[2]
return generate_ids.gen_revision_id(who, timestamp)
@@ -260,7 +273,7 @@ class GenericCommitHandler(processor.CommitHandler):
self.branch_ref)
self._save_author_info(rev_props)
committer = self.command.committer
- who = self._format_name_email(committer[0], committer[1])
+ who = self._format_name_email("committer", committer[0], committer[1])
try:
message = self.command.message.decode("utf-8")
except UnicodeDecodeError:
@@ -300,9 +313,9 @@ class GenericCommitHandler(processor.CommitHandler):
return
if self.command.more_authors:
authors = [author] + self.command.more_authors
- author_ids = [self._format_name_email(a[0], a[1]) for a in authors]
+ author_ids = [self._format_name_email("author", a[0], a[1]) for a in authors]
elif author != self.command.committer:
- author_ids = [self._format_name_email(author[0], author[1])]
+ author_ids = [self._format_name_email("author", author[0], author[1])]
else:
return
# If we reach here, there are authors worth storing
diff --git a/exporter.py b/exporter.py
index 3f477d1..6d1a9c0 100644
--- a/exporter.py
+++ b/exporter.py
@@ -274,7 +274,7 @@ class BzrFastExporter(object):
email = ''
else:
name, email = parseaddr(user)
- return name, email
+ return name.encode("utf-8"), email.encode("utf-8")
def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
# Get the committer and author info
@@ -336,7 +336,7 @@ class BzrFastExporter(object):
# Build and return the result
return commands.CommitCommand(git_ref, mark, author_info,
- committer_info, revobj.message, from_, merges, iter(file_cmds),
+ committer_info, revobj.message.encode("utf-8"), from_, merges, iter(file_cmds),
more_authors=more_author_info, properties=properties)
def _get_revision_trees(self, parent, revision_id):
@@ -385,16 +385,16 @@ class BzrFastExporter(object):
for path, id_, kind in changes.added + my_modified + rd_modifies:
if kind == 'file':
text = tree_new.get_file_text(id_)
- file_cmds.append(commands.FileModifyCommand(path,
+ file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
helpers.kind_to_mode('file', tree_new.is_executable(id_)),
None, text))
elif kind == 'symlink':
- file_cmds.append(commands.FileModifyCommand(path,
+ file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
helpers.kind_to_mode('symlink', False),
None, tree_new.get_symlink_target(id_)))
elif kind == 'directory':
if not self.plain_format:
- file_cmds.append(commands.FileModifyCommand(path,
+ file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
helpers.kind_to_mode('directory', False),
None, None))
else:
@@ -436,7 +436,7 @@ class BzrFastExporter(object):
emit = kind != 'directory' or not self.plain_format
if newpath in deleted_paths:
if emit:
- file_cmds.append(commands.FileDeleteCommand(newpath))
+ file_cmds.append(commands.FileDeleteCommand(newpath.encode("utf-8")))
deleted_paths.remove(newpath)
if (self.is_empty_dir(tree_old, oldpath)):
self.note("Skipping empty dir %s in rev %s" % (oldpath,
@@ -447,7 +447,8 @@ class BzrFastExporter(object):
renamed.append([oldpath, newpath])
old_to_new[oldpath] = newpath
if emit:
- file_cmds.append(commands.FileRenameCommand(oldpath, newpath))
+ file_cmds.append(
+ commands.FileRenameCommand(oldpath.encode("utf-8"), newpath.encode("utf-8")))
if text_modified or meta_modified:
modifies.append((newpath, id_, kind))
@@ -470,8 +471,8 @@ class BzrFastExporter(object):
if self.verbose:
self.note("implicitly renaming %s => %s" % (old_child_path,
new_child_path))
- file_cmds.append(commands.FileRenameCommand(old_child_path,
- new_child_path))
+ file_cmds.append(commands.FileRenameCommand(old_child_path.encode("utf-8"),
+ new_child_path.encode("utf-8")))
# Record remaining deletes
for path, id_, kind in deletes:
@@ -480,7 +481,7 @@ class BzrFastExporter(object):
if kind == 'directory' and self.plain_format:
continue
#path = self._adjust_path_for_renames(path, renamed, revision_id)
- file_cmds.append(commands.FileDeleteCommand(path))
+ file_cmds.append(commands.FileDeleteCommand(path.encode("utf-8")))
return file_cmds, modifies, renamed
def _adjust_path_for_renames(self, path, renamed, revision_id):