summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Finucane <stephenfin@redhat.com>2020-03-14 00:18:03 +0000
committerGitHub <noreply@github.com>2020-03-14 13:18:03 +1300
commit26d31fa7c34019fad9038addf8114bbb4b656c92 (patch)
treec91df8ee9c999aeb4c964c7ad4fd137e53aef6e2
parent8fb3e0c2848d4225a7d5e13f107564646d2f5cb6 (diff)
downloadsubunit-git-26d31fa7c34019fad9038addf8114bbb4b656c92.tar.gz
Correctly handle py3 RawIOBase read()
Python3's RawIOBase guarantees only one syscall per read() requiring a loop to accumulate the desired number of bytes or actually reach EOF. TextIOBase.read does issue multiple syscalls (it must to correctly decode partial unicode characters), but subunit unwraps that to get a binary stream, and at least some of the time the layering is io.TextIOBase(_io.FileIO), where _io.FileIO is a RawIOBase subclass rather than BufferedIOBase. Signed-off-by: Stephen Finucane <stephenfin@redhat.com> Partial-bug: #1813147
-rw-r--r--python/subunit/v2.py43
1 files changed, 26 insertions, 17 deletions
diff --git a/python/subunit/v2.py b/python/subunit/v2.py
index c2c63f6..e8a31d6 100644
--- a/python/subunit/v2.py
+++ b/python/subunit/v2.py
@@ -72,13 +72,32 @@ def has_nul(buffer_or_bytes):
return NUL_ELEMENT in buffer_or_bytes
+def read_exactly(stream, size):
+ """Read exactly size bytes from stream.
+
+ :param stream: A file like object to read bytes from. Must support
+ read(<count>) and return bytes.
+ :param size: The number of bytes to retrieve.
+ """
+ data = b''
+ remaining = size
+ while remaining:
+ read = stream.read(remaining)
+ if len(read) == 0:
+ raise ParseError('Short read - got %d bytes, wanted %d bytes' % (
+ len(data), size))
+ data += read
+ remaining -= len(read)
+ return data
+
+
class ParseError(Exception):
"""Used to pass error messages within the parser."""
class StreamResultToBytes(object):
"""Convert StreamResult API calls to bytes.
-
+
The StreamResult API is defined by testtools.StreamResult.
"""
@@ -276,7 +295,7 @@ class ByteStreamToStreamResult(object):
def run(self, result):
"""Parse source and emit events to result.
-
+
This is a blocking call: it will run until EOF is detected on source.
"""
self.codec.reset()
@@ -406,21 +425,12 @@ class ByteStreamToStreamResult(object):
def _parse(self, packet, result):
# 2 bytes flags, at most 3 bytes length.
- packet.append(self.source.read(5))
- if len(packet[-1]) != 5:
- raise ParseError(
- 'Short read - got %d bytes, wanted 5' % len(packet[-1]))
-
- flag_bytes = packet[-1][:2]
- flags = struct.unpack(FMT_16, flag_bytes)[0]
- length, consumed = self._parse_varint(
- packet[-1], 2, max_3_bytes=True)
- remainder = self.source.read(length - 6)
- if len(remainder) != length - 6:
- raise ParseError(
- 'Short read - got %d bytes, wanted %d bytes' % (
- len(remainder), length - 6))
+ header = read_exactly(self.source, 5)
+ packet.append(header)
+ flags = struct.unpack(FMT_16, header[:2])[0]
+ length, consumed = self._parse_varint(header, 2, max_3_bytes=True)
+ remainder = read_exactly(self.source, length - 6)
if consumed != 3:
# Avoid having to parse torn values
packet[-1] += remainder
@@ -533,4 +543,3 @@ class ByteStreamToStreamResult(object):
return utf8, length+pos
except UnicodeDecodeError:
raise ParseError('UTF8 string at offset %d is not UTF8' % (pos-2,))
-