summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Hornung <alex.hornung@arm.com>2023-01-19 14:21:35 +0000
committerAlex Hornung <alex.hornung@arm.com>2023-01-24 17:08:53 +0000
commitffc03cfcc16965cdc81c3f7b88f650788a8fe5de (patch)
treebbc9f44f6eae9c194bf2526809bb75a416f755f3
parent944b9852c9f565ec433a808385d4ed0ad3f0abcc (diff)
downloadzuul-ffc03cfcc16965cdc81c3f7b88f650788a8fe5de.tar.gz
gerrit driver: fix bug around unicode branch names
If a branch name contains unicode characters that are more than 1-byte wide, the size in bytes of the pack record won't match the size in characters, and the pack parsing will be incorrect. Instead, treat everything as an encoded byte string until parsing is done - and only decode when handling a single, parsed, record. Change-Id: I7f1a0cc96a36129fbc04c7a8687da3f66c1eef99
-rw-r--r--tests/unit/test_gerrit.py45
-rw-r--r--zuul/driver/gerrit/gerritconnection.py9
2 files changed, 52 insertions, 2 deletions
diff --git a/tests/unit/test_gerrit.py b/tests/unit/test_gerrit.py
index 2e3057af6..2a63d5ef8 100644
--- a/tests/unit/test_gerrit.py
+++ b/tests/unit/test_gerrit.py
@@ -957,3 +957,48 @@ class TestGerritConnection(ZuulTestCase):
self.assertEqual(B.queried, 2)
self.assertEqual(A.data['status'], 'MERGED')
self.assertEqual(B.data['status'], 'MERGED')
+
+
+class TestGerritUnicodeRefs(ZuulTestCase):
+ config_file = 'zuul-gerrit-web.conf'
+ tenant_config_file = 'config/single-tenant/main.yaml'
+
+ upload_pack_data = (b'014452944ee370db5c87691e62e0f9079b6281319b4e HEAD'
+ b'\x00multi_ack thin-pack side-band side-band-64k '
+ b'ofs-delta shallow deepen-since deepen-not '
+ b'deepen-relative no-progress include-tag '
+ b'multi_ack_detailed allow-tip-sha1-in-want '
+ b'allow-reachable-sha1-in-want '
+ b'symref=HEAD:refs/heads/faster filter '
+ b'object-format=sha1 agent=git/2.37.1.gl1\n'
+ b'003d5f42665d737b3fd4ec22ca0209e6191859f09fd6 '
+ b'refs/for/faster\n'
+ b'004952944ee370db5c87691e62e0f9079b6281319b4e '
+ b'refs/heads/foo/\xf0\x9f\x94\xa5\xf0\x9f\x94\xa5'
+ b'\xf0\x9f\x94\xa5\n'
+ b'003f52944ee370db5c87691e62e0f9079b6281319b4e '
+ b'refs/heads/faster\n0000').decode("utf-8")
+
+ def test_mb_unicode_refs(self):
+ gerrit_config = {
+ 'user': 'gerrit',
+ 'server': 'localhost',
+ }
+ driver = GerritDriver()
+ gerrit = GerritConnection(driver, 'review_gerrit', gerrit_config)
+
+ def _uploadPack(project):
+ return self.upload_pack_data
+
+ self.patch(gerrit, '_uploadPack', _uploadPack)
+
+ project = gerrit.source.getProject('org/project')
+ refs = gerrit.getInfoRefs(project)
+
+ self.assertEqual(refs,
+ {'refs/for/faster':
+ '5f42665d737b3fd4ec22ca0209e6191859f09fd6',
+ 'refs/heads/foo/🔥🔥🔥':
+ '52944ee370db5c87691e62e0f9079b6281319b4e',
+ 'refs/heads/faster':
+ '52944ee370db5c87691e62e0f9079b6281319b4e'})
diff --git a/zuul/driver/gerrit/gerritconnection.py b/zuul/driver/gerrit/gerritconnection.py
index 0a1f0ee61..276365e1d 100644
--- a/zuul/driver/gerrit/gerritconnection.py
+++ b/zuul/driver/gerrit/gerritconnection.py
@@ -1643,7 +1643,10 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
def getInfoRefs(self, project: Project) -> Dict[str, str]:
try:
- data = self._uploadPack(project)
+ # Encode the UTF-8 data back to a byte array, as the size of
+ # each record in the pack is in bytes, and so the slicing must
+ # also be done on a byte-basis.
+ data = self._uploadPack(project).encode("utf-8")
except Exception:
self.log.error("Cannot get references from %s" % project)
raise # keeps error information
@@ -1662,7 +1665,9 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
plen -= 4
if len(data) - i < plen:
raise Exception("Invalid data in info/refs")
- line = data[i:i + plen]
+ # Once the pack data is sliced, we can safely decode it back
+ # into a (UTF-8) string.
+ line = data[i:i + plen].decode("utf-8")
i += plen
if not read_advertisement:
read_advertisement = True