diff options
author | Jelmer Vernooij <jelmer@samba.org> | 2010-12-12 04:53:15 +0100 |
---|---|---|
committer | Jelmer Vernooij <jelmer@samba.org> | 2010-12-12 04:53:15 +0100 |
commit | fe09c31695fc58cd93acd0b1ed0e1a681a52ea29 (patch) | |
tree | c3c178dd3e89deb1b79a063c7877f0e903d3756f | |
parent | 86249fff9a9c45faef963731fa36fe2866986a54 (diff) | |
download | bzr-fastimport-fe09c31695fc58cd93acd0b1ed0e1a681a52ea29.tar.gz |
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
-rw-r--r-- | bzr_commit_handler.py | 23 | ||||
-rw-r--r-- | exporter.py | 21 |
2 files changed, 29 insertions, 15 deletions
diff --git a/bzr_commit_handler.py b/bzr_commit_handler.py index 1df4c86..5cef958 100644 --- a/bzr_commit_handler.py +++ b/bzr_commit_handler.py @@ -234,8 +234,21 @@ class GenericCommitHandler(processor.CommitHandler): """Get a Bazaar file identifier for a path.""" return self.bzr_file_id_and_new(path)[0] - def _format_name_email(self, name, email): + def _utf8_decode(self, field, value): + try: + return value.decode('utf_8') + except UnicodeDecodeError: + # The spec says fields are *typically* utf8 encoded + # but that isn't enforced by git-fast-export (at least) + self.warning("%s not in utf8 - replacing unknown " + "characters" % (field,)) + return value.decode('utf_8', 'replace') + + def _format_name_email(self, section, name, email): """Format name & email as a string.""" + name = self._utf8_decode("%s name" % section, name) + email = self._utf8_decode("%s email" % section, email) + if email: return "%s <%s>" % (name, email) else: @@ -249,7 +262,7 @@ class GenericCommitHandler(processor.CommitHandler): committer = self.command.committer # Perhaps 'who' being the person running the import is ok? If so, # it might be a bit quicker and give slightly better compression? - who = self._format_name_email(committer[0], committer[1]) + who = self._format_name_email("committer", committer[0], committer[1]) timestamp = committer[2] return generate_ids.gen_revision_id(who, timestamp) @@ -260,7 +273,7 @@ class GenericCommitHandler(processor.CommitHandler): self.branch_ref) self._save_author_info(rev_props) committer = self.command.committer - who = self._format_name_email(committer[0], committer[1]) + who = self._format_name_email("committer", committer[0], committer[1]) try: message = self.command.message.decode("utf-8") except UnicodeDecodeError: @@ -300,9 +313,9 @@ class GenericCommitHandler(processor.CommitHandler): return if self.command.more_authors: authors = [author] + self.command.more_authors - author_ids = [self._format_name_email(a[0], a[1]) for a in authors] + author_ids = [self._format_name_email("author", a[0], a[1]) for a in authors] elif author != self.command.committer: - author_ids = [self._format_name_email(author[0], author[1])] + author_ids = [self._format_name_email("author", author[0], author[1])] else: return # If we reach here, there are authors worth storing diff --git a/exporter.py b/exporter.py index 3f477d1..6d1a9c0 100644 --- a/exporter.py +++ b/exporter.py @@ -274,7 +274,7 @@ class BzrFastExporter(object): email = '' else: name, email = parseaddr(user) - return name, email + return name.encode("utf-8"), email.encode("utf-8") def _get_commit_command(self, git_ref, mark, revobj, file_cmds): # Get the committer and author info @@ -336,7 +336,7 @@ class BzrFastExporter(object): # Build and return the result return commands.CommitCommand(git_ref, mark, author_info, - committer_info, revobj.message, from_, merges, iter(file_cmds), + committer_info, revobj.message.encode("utf-8"), from_, merges, iter(file_cmds), more_authors=more_author_info, properties=properties) def _get_revision_trees(self, parent, revision_id): @@ -385,16 +385,16 @@ class BzrFastExporter(object): for path, id_, kind in changes.added + my_modified + rd_modifies: if kind == 'file': text = tree_new.get_file_text(id_) - file_cmds.append(commands.FileModifyCommand(path, + file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"), helpers.kind_to_mode('file', tree_new.is_executable(id_)), None, text)) elif kind == 'symlink': - file_cmds.append(commands.FileModifyCommand(path, + file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"), helpers.kind_to_mode('symlink', False), None, tree_new.get_symlink_target(id_))) elif kind == 'directory': if not self.plain_format: - file_cmds.append(commands.FileModifyCommand(path, + file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"), helpers.kind_to_mode('directory', False), None, None)) else: @@ -436,7 +436,7 @@ class BzrFastExporter(object): emit = kind != 'directory' or not self.plain_format if newpath in deleted_paths: if emit: - file_cmds.append(commands.FileDeleteCommand(newpath)) + file_cmds.append(commands.FileDeleteCommand(newpath.encode("utf-8"))) deleted_paths.remove(newpath) if (self.is_empty_dir(tree_old, oldpath)): self.note("Skipping empty dir %s in rev %s" % (oldpath, @@ -447,7 +447,8 @@ class BzrFastExporter(object): renamed.append([oldpath, newpath]) old_to_new[oldpath] = newpath if emit: - file_cmds.append(commands.FileRenameCommand(oldpath, newpath)) + file_cmds.append( + commands.FileRenameCommand(oldpath.encode("utf-8"), newpath.encode("utf-8"))) if text_modified or meta_modified: modifies.append((newpath, id_, kind)) @@ -470,8 +471,8 @@ class BzrFastExporter(object): if self.verbose: self.note("implicitly renaming %s => %s" % (old_child_path, new_child_path)) - file_cmds.append(commands.FileRenameCommand(old_child_path, - new_child_path)) + file_cmds.append(commands.FileRenameCommand(old_child_path.encode("utf-8"), + new_child_path.encode("utf-8"))) # Record remaining deletes for path, id_, kind in deletes: @@ -480,7 +481,7 @@ class BzrFastExporter(object): if kind == 'directory' and self.plain_format: continue #path = self._adjust_path_for_renames(path, renamed, revision_id) - file_cmds.append(commands.FileDeleteCommand(path)) + file_cmds.append(commands.FileDeleteCommand(path.encode("utf-8"))) return file_cmds, modifies, renamed def _adjust_path_for_renames(self, path, renamed, revision_id): |