From b1f4837430ad582db4a59b1a9a4ee7bc96280eac Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 22 Jan 2013 17:01:59 +0200 Subject: Issue #1159051: GzipFile now raises EOFError when reading a corrupted file with truncated header or footer. Added tests for reading truncated gzip and bzip2 files. --- Lib/gzip.py | 72 ++++++++++++++++++++++++--------------------------- Lib/test/test_bz2.py | 18 +++++++++++++ Lib/test/test_gzip.py | 15 +++++++++++ Misc/NEWS | 3 +++ 4 files changed, 70 insertions(+), 38 deletions(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index 6aacc9a4f9..8fb1ed06c9 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -33,9 +33,6 @@ def write32u(output, value): # or unsigned. output.write(struct.pack(" self.extrasize: - self._read(readsize) - readsize = min(self.max_read_chunk, readsize * 2) - except EOFError: - if size > self.extrasize: - size = self.extrasize + while size > self.extrasize: + if not self._read(readsize): + if size > self.extrasize: + size = self.extrasize + break + readsize = min(self.max_read_chunk, readsize * 2) offset = self.offset - self.extrastart chunk = self.extrabuf[offset: offset + size] @@ -366,12 +364,9 @@ class GzipFile(io.BufferedIOBase): if self.extrasize == 0: if self.fileobj is None: return b'' - try: - # Ensure that we don't return b"" if we haven't reached EOF. - while self.extrasize == 0: - # 1024 is the same buffering heuristic used in read() - self._read(max(n, 1024)) - except EOFError: + # Ensure that we don't return b"" if we haven't reached EOF. + # 1024 is the same buffering heuristic used in read() + while self.extrasize == 0 and self._read(max(n, 1024)): pass offset = self.offset - self.extrastart remaining = self.extrasize @@ -384,13 +379,14 @@ class GzipFile(io.BufferedIOBase): def _read(self, size=1024): if self.fileobj is None: - raise EOFError("Reached EOF") + return False if self._new_member: # If the _new_member flag is set, we have to # jump to the next member, if there is one. self._init_read() - self._read_gzip_header() + if not self._read_gzip_header(): + return False self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) self._new_member = False @@ -407,7 +403,7 @@ class GzipFile(io.BufferedIOBase): self.fileobj.prepend(self.decompress.unused_data, True) self._read_eof() self._add_read_data( uncompress ) - raise EOFError('Reached EOF') + return False uncompress = self.decompress.decompress(buf) self._add_read_data( uncompress ) @@ -423,6 +419,7 @@ class GzipFile(io.BufferedIOBase): # a new member on the next call self._read_eof() self._new_member = True + return True def _add_read_data(self, data): self.crc = zlib.crc32(data, self.crc) & 0xffffffff @@ -437,8 +434,7 @@ class GzipFile(io.BufferedIOBase): # We check the that the computed CRC and size of the # uncompressed data matches the stored values. Note that the size # stored is the true file size mod 2**32. - crc32 = read32(self.fileobj) - isize = read32(self.fileobj) # may exceed 2GB + crc32, isize = struct.unpack("