diff options
author | Alex Hornung <alex.hornung@arm.com> | 2023-01-19 14:21:35 +0000 |
---|---|---|
committer | Alex Hornung <alex.hornung@arm.com> | 2023-01-24 17:08:53 +0000 |
commit | ffc03cfcc16965cdc81c3f7b88f650788a8fe5de (patch) | |
tree | bbc9f44f6eae9c194bf2526809bb75a416f755f3 | |
parent | 944b9852c9f565ec433a808385d4ed0ad3f0abcc (diff) | |
download | zuul-ffc03cfcc16965cdc81c3f7b88f650788a8fe5de.tar.gz |
gerrit driver: fix bug around unicode branch names
If a branch name contains unicode characters that are more than 1-byte
wide, the size in bytes of the pack record won't match the size in
characters, and the pack parsing will be incorrect.
Instead, treat everything as an encoded byte string until parsing is
done - and only decode when handling a single, parsed, record.
Change-Id: I7f1a0cc96a36129fbc04c7a8687da3f66c1eef99
-rw-r--r-- | tests/unit/test_gerrit.py | 45 | ||||
-rw-r--r-- | zuul/driver/gerrit/gerritconnection.py | 9 |
2 files changed, 52 insertions, 2 deletions
diff --git a/tests/unit/test_gerrit.py b/tests/unit/test_gerrit.py index 2e3057af6..2a63d5ef8 100644 --- a/tests/unit/test_gerrit.py +++ b/tests/unit/test_gerrit.py @@ -957,3 +957,48 @@ class TestGerritConnection(ZuulTestCase): self.assertEqual(B.queried, 2) self.assertEqual(A.data['status'], 'MERGED') self.assertEqual(B.data['status'], 'MERGED') + + +class TestGerritUnicodeRefs(ZuulTestCase): + config_file = 'zuul-gerrit-web.conf' + tenant_config_file = 'config/single-tenant/main.yaml' + + upload_pack_data = (b'014452944ee370db5c87691e62e0f9079b6281319b4e HEAD' + b'\x00multi_ack thin-pack side-band side-band-64k ' + b'ofs-delta shallow deepen-since deepen-not ' + b'deepen-relative no-progress include-tag ' + b'multi_ack_detailed allow-tip-sha1-in-want ' + b'allow-reachable-sha1-in-want ' + b'symref=HEAD:refs/heads/faster filter ' + b'object-format=sha1 agent=git/2.37.1.gl1\n' + b'003d5f42665d737b3fd4ec22ca0209e6191859f09fd6 ' + b'refs/for/faster\n' + b'004952944ee370db5c87691e62e0f9079b6281319b4e ' + b'refs/heads/foo/\xf0\x9f\x94\xa5\xf0\x9f\x94\xa5' + b'\xf0\x9f\x94\xa5\n' + b'003f52944ee370db5c87691e62e0f9079b6281319b4e ' + b'refs/heads/faster\n0000').decode("utf-8") + + def test_mb_unicode_refs(self): + gerrit_config = { + 'user': 'gerrit', + 'server': 'localhost', + } + driver = GerritDriver() + gerrit = GerritConnection(driver, 'review_gerrit', gerrit_config) + + def _uploadPack(project): + return self.upload_pack_data + + self.patch(gerrit, '_uploadPack', _uploadPack) + + project = gerrit.source.getProject('org/project') + refs = gerrit.getInfoRefs(project) + + self.assertEqual(refs, + {'refs/for/faster': + '5f42665d737b3fd4ec22ca0209e6191859f09fd6', + 'refs/heads/foo/🔥🔥🔥': + '52944ee370db5c87691e62e0f9079b6281319b4e', + 'refs/heads/faster': + '52944ee370db5c87691e62e0f9079b6281319b4e'}) diff --git a/zuul/driver/gerrit/gerritconnection.py b/zuul/driver/gerrit/gerritconnection.py index 0a1f0ee61..276365e1d 100644 --- a/zuul/driver/gerrit/gerritconnection.py +++ b/zuul/driver/gerrit/gerritconnection.py @@ -1643,7 +1643,10 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection): def getInfoRefs(self, project: Project) -> Dict[str, str]: try: - data = self._uploadPack(project) + # Encode the UTF-8 data back to a byte array, as the size of + # each record in the pack is in bytes, and so the slicing must + # also be done on a byte-basis. + data = self._uploadPack(project).encode("utf-8") except Exception: self.log.error("Cannot get references from %s" % project) raise # keeps error information @@ -1662,7 +1665,9 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection): plen -= 4 if len(data) - i < plen: raise Exception("Invalid data in info/refs") - line = data[i:i + plen] + # Once the pack data is sliced, we can safely decode it back + # into a (UTF-8) string. + line = data[i:i + plen].decode("utf-8") i += plen if not read_advertisement: read_advertisement = True |