summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJelmer Vernooij <jelmer@samba.org>2011-09-21 12:20:35 +0200
committerJelmer Vernooij <jelmer@samba.org>2011-09-21 12:20:35 +0200
commit697545af3848c6580ce8596e170f9fcdb1e0f49f (patch)
treea474148c03989818a3113a4ca3b688451d383b5c
parent0357a7b41187f5e283140168f4826703a1ab2885 (diff)
downloadbzr-fastimport-697545af3848c6580ce8596e170f9fcdb1e0f49f.tar.gz
Cope with non-utf8 characters in paths when importing.
-rw-r--r--NEWS6
-rw-r--r--bzr_commit_handler.py45
-rw-r--r--tests/test_generic_processor.py24
3 files changed, 59 insertions, 16 deletions
diff --git a/NEWS b/NEWS
index 3f695e3..fc6b85b 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,12 @@ bzr-fastimport Release Notes
0.12 UNRELEASED
+Bug fixes
+---------
+
+* Cope with non-utf8 characters in paths when importing.
+ (Jelmer Vernooij, #838980)
+
0.11 2011-08-22
Bug fixes
diff --git a/bzr_commit_handler.py b/bzr_commit_handler.py
index 9e38741..9933db8 100644
--- a/bzr_commit_handler.py
+++ b/bzr_commit_handler.py
@@ -235,13 +235,23 @@ class GenericCommitHandler(processor.CommitHandler):
def _utf8_decode(self, field, value):
try:
- return value.decode('utf_8')
+ return value.decode('utf-8')
except UnicodeDecodeError:
# The spec says fields are *typically* utf8 encoded
# but that isn't enforced by git-fast-export (at least)
self.warning("%s not in utf8 - replacing unknown "
"characters" % (field,))
- return value.decode('utf_8', 'replace')
+ return value.decode('utf-8', 'replace')
+
+ def _decode_path(self, path):
+ try:
+ return path.decode('utf-8')
+ except UnicodeDecodeError:
+ # The spec says fields are *typically* utf8 encoded
+ # but that isn't enforced by git-fast-export (at least)
+ self.warning("path %r not in utf8 - replacing unknown "
+ "characters" % (path,))
+ return path.decode('utf-8', 'replace')
def _format_name_email(self, section, name, email):
"""Format name & email as a string."""
@@ -353,7 +363,7 @@ class GenericCommitHandler(processor.CommitHandler):
# make sure the cache used by get_lines knows that
self.data_for_commit[file_id] = ''
elif kind == 'symlink':
- ie.symlink_target = data.decode('utf8')
+ ie.symlink_target = self._decode_path(data)
# There are no lines stored for a symlink so
# make sure the cache used by get_lines knows that
self.data_for_commit[file_id] = ''
@@ -475,7 +485,8 @@ class GenericCommitHandler(processor.CommitHandler):
content = self.rev_store.get_file_text(self.parents[0], file_id)
self._modify_item(dest_path, kind, ie.executable, content, inv)
elif kind == 'symlink':
- self._modify_item(dest_path, kind, False, ie.symlink_target.encode("utf-8"), inv)
+ self._modify_item(dest_path, kind, False,
+ ie.symlink_target.encode("utf-8"), inv)
else:
self.warning("ignoring copy of %s %s - feature not yet supported",
kind, dest_path)
@@ -606,22 +617,22 @@ class InventoryCommitHandler(GenericCommitHandler):
data = filecmd.data
self.debug("modifying %s", filecmd.path)
(kind, is_executable) = mode_to_kind(filecmd.mode)
- self._modify_item(filecmd.path.decode('utf8'), kind,
+ self._modify_item(self._decode_path(filecmd.path), kind,
is_executable, data, self.inventory)
def delete_handler(self, filecmd):
self.debug("deleting %s", filecmd.path)
- self._delete_item(filecmd.path.decode('utf8'), self.inventory)
+ self._delete_item(self._decode_path(filecmd.path), self.inventory)
def copy_handler(self, filecmd):
- src_path = filecmd.src_path.decode('utf8')
- dest_path = filecmd.dest_path.decode('utf8')
+ src_path = self._decode_path(filecmd.src_path)
+ dest_path = self._decode_path(filecmd.dest_path)
self.debug("copying %s to %s", src_path, dest_path)
self._copy_item(src_path, dest_path, self.inventory)
def rename_handler(self, filecmd):
- old_path = filecmd.old_path.decode('utf8')
- new_path = filecmd.new_path.decode('utf8')
+ old_path = self._decode_path(filecmd.old_path)
+ new_path = self._decode_path(filecmd.new_path)
self.debug("renaming %s to %s", old_path, new_path)
self._rename_item(old_path, new_path, self.inventory)
@@ -887,22 +898,24 @@ class InventoryDeltaCommitHandler(GenericCommitHandler):
else:
data = filecmd.data
self.debug("modifying %s", filecmd.path)
- self._modify_item(filecmd.path.decode('utf8'), kind,
+ decoded_path = self._decode_path(filecmd.path)
+ self._modify_item(decoded_path, kind,
executable, data, self.basis_inventory)
def delete_handler(self, filecmd):
self.debug("deleting %s", filecmd.path)
- self._delete_item(filecmd.path.decode('utf8'), self.basis_inventory)
+ self._delete_item(
+ self._decode_path(filecmd.path), self.basis_inventory)
def copy_handler(self, filecmd):
- src_path = filecmd.src_path.decode("utf8")
- dest_path = filecmd.dest_path.decode("utf8")
+ src_path = self._decode_path(filecmd.src_path)
+ dest_path = self._decode_path(filecmd.dest_path)
self.debug("copying %s to %s", src_path, dest_path)
self._copy_item(src_path, dest_path, self.basis_inventory)
def rename_handler(self, filecmd):
- old_path = filecmd.old_path.decode("utf8")
- new_path = filecmd.new_path.decode("utf8")
+ old_path = self._decode_path(filecmd.old_path)
+ new_path = self._decode_path(filecmd.new_path)
self.debug("renaming %s to %s", old_path, new_path)
self._rename_item(old_path, new_path, self.basis_inventory)
diff --git a/tests/test_generic_processor.py b/tests/test_generic_processor.py
index 4fc5463..c491c51 100644
--- a/tests/test_generic_processor.py
+++ b/tests/test_generic_processor.py
@@ -1967,3 +1967,27 @@ class TestCommitCommands(TestCaseForGenericProcessor):
handler.process(command_list)
rev = branch.repository.get_revision(branch.last_revision())
self.assertEquals(u"This is a funky character: \ufffd", rev.message)
+
+
+class TestAddNonUtf8InBranch(TestCaseForGenericProcessor):
+
+ def file_command_iter(self):
+ # A add 'foo\x83'
+ def command_list():
+ committer_a = ['', 'a@elmer.com', time.time(), time.timezone]
+ def files_one():
+ yield commands.FileModifyCommand(
+ 'foo\x83', kind_to_mode('file', False), None, "content A\n")
+ yield commands.CommitCommand('head', '1', None,
+ committer_a, "commit 1", None, [], files_one)
+ return command_list
+
+ def test_add(self):
+ handler, branch = self.get_handler()
+ handler.process(self.file_command_iter())
+ branch.lock_read()
+ self.addCleanup(branch.unlock)
+ rev_a = branch.last_revision()
+ rtree_a = branch.repository.revision_tree(rev_a)
+ foo_id = rtree_a.path2id(u'foo\ufffd')
+ self.assertEqual(rev_a, rtree_a.inventory[foo_id].revision)