summaryrefslogtreecommitdiff
path: root/Lib/zipfile.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/zipfile.py')
-rw-r--r--Lib/zipfile.py417
1 files changed, 303 insertions, 114 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index b223b4a4c1..ff64c90829 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -22,7 +22,18 @@ except ImportError:
zlib = None
crc32 = binascii.crc32
-__all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED",
+try:
+ import bz2 # We may need its compression method
+except ImportError:
+ bz2 = None
+
+try:
+ import lzma # We may need its compression method
+except ImportError:
+ lzma = None
+
+__all__ = ["BadZipFile", "BadZipfile", "error",
+ "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
"is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
class BadZipFile(Exception):
@@ -45,8 +56,17 @@ ZIP_MAX_COMMENT = (1 << 16) - 1
# constants for Zip file compression methods
ZIP_STORED = 0
ZIP_DEFLATED = 8
+ZIP_BZIP2 = 12
+ZIP_LZMA = 14
# Other ZIP compression methods not supported
+DEFAULT_VERSION = 20
+ZIP64_VERSION = 45
+BZIP2_VERSION = 46
+LZMA_VERSION = 63
+# we recognize (but not necessarily support) all features up to that version
+MAX_EXTRACT_VERSION = 63
+
# Below are some formats and associated data for reading/writing headers using
# the struct module. The names and structures of headers/records are those used
# in the PKWARE description of the ZIP file format:
@@ -322,8 +342,8 @@ class ZipInfo (object):
else:
# Assume everything else is unix-y
self.create_system = 3 # System which created ZIP archive
- self.create_version = 20 # Version which created ZIP archive
- self.extract_version = 20 # Version needed to extract archive
+ self.create_version = DEFAULT_VERSION # Version which created ZIP archive
+ self.extract_version = DEFAULT_VERSION # Version needed to extract archive
self.reserved = 0 # Must be zero
self.flag_bits = 0 # ZIP flag bits
self.volume = 0 # Volume number of file header
@@ -350,6 +370,7 @@ class ZipInfo (object):
extra = self.extra
+ min_version = 0
if zip64 is None:
zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
if zip64:
@@ -363,9 +384,15 @@ class ZipInfo (object):
# fall back to the ZIP64 extension
file_size = 0xffffffff
compress_size = 0xffffffff
- self.extract_version = max(45, self.extract_version)
- self.create_version = max(45, self.extract_version)
+ min_version = ZIP64_VERSION
+
+ if self.compress_type == ZIP_BZIP2:
+ min_version = max(BZIP2_VERSION, min_version)
+ elif self.compress_type == ZIP_LZMA:
+ min_version = max(LZMA_VERSION, min_version)
+ self.extract_version = max(min_version, self.extract_version)
+ self.create_version = max(min_version, self.create_version)
filename, flag_bits = self._encodeFilenameFlags()
header = struct.pack(structFileHeader, stringFileHeader,
self.extract_version, self.reserved, flag_bits,
@@ -476,6 +503,57 @@ class _ZipDecrypter:
return c
+class LZMACompressor:
+
+ def __init__(self):
+ self._comp = None
+
+ def _init(self):
+ props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
+ self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
+ lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
+ ])
+ return struct.pack('<BBH', 9, 4, len(props)) + props
+
+ def compress(self, data):
+ if self._comp is None:
+ return self._init() + self._comp.compress(data)
+ return self._comp.compress(data)
+
+ def flush(self):
+ if self._comp is None:
+ return self._init() + self._comp.flush()
+ return self._comp.flush()
+
+
+class LZMADecompressor:
+
+ def __init__(self):
+ self._decomp = None
+ self._unconsumed = b''
+ self.eof = False
+
+ def decompress(self, data):
+ if self._decomp is None:
+ self._unconsumed += data
+ if len(self._unconsumed) <= 4:
+ return b''
+ psize, = struct.unpack('<H', self._unconsumed[2:4])
+ if len(self._unconsumed) <= 4 + psize:
+ return b''
+
+ self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
+ lzma._decode_filter_properties(lzma.FILTER_LZMA1,
+ self._unconsumed[4:4 + psize])
+ ])
+ data = self._unconsumed[4 + psize:]
+ del self._unconsumed
+
+ result = self._decomp.decompress(data)
+ self.eof = self._decomp.eof
+ return result
+
+
compressor_names = {
0: 'store',
1: 'shrink',
@@ -496,6 +574,53 @@ compressor_names = {
98: 'ppmd',
}
+def _check_compression(compression):
+ if compression == ZIP_STORED:
+ pass
+ elif compression == ZIP_DEFLATED:
+ if not zlib:
+ raise RuntimeError(
+ "Compression requires the (missing) zlib module")
+ elif compression == ZIP_BZIP2:
+ if not bz2:
+ raise RuntimeError(
+ "Compression requires the (missing) bz2 module")
+ elif compression == ZIP_LZMA:
+ if not lzma:
+ raise RuntimeError(
+ "Compression requires the (missing) lzma module")
+ else:
+ raise RuntimeError("That compression method is not supported")
+
+
+def _get_compressor(compress_type):
+ if compress_type == ZIP_DEFLATED:
+ return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
+ zlib.DEFLATED, -15)
+ elif compress_type == ZIP_BZIP2:
+ return bz2.BZ2Compressor()
+ elif compress_type == ZIP_LZMA:
+ return LZMACompressor()
+ else:
+ return None
+
+
+def _get_decompressor(compress_type):
+ if compress_type == ZIP_STORED:
+ return None
+ elif compress_type == ZIP_DEFLATED:
+ return zlib.decompressobj(-15)
+ elif compress_type == ZIP_BZIP2:
+ return bz2.BZ2Decompressor()
+ elif compress_type == ZIP_LZMA:
+ return LZMADecompressor()
+ else:
+ descr = compressor_names.get(compress_type)
+ if descr:
+ raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
+ else:
+ raise NotImplementedError("compression type %d" % (compress_type,))
+
class ZipExtFile(io.BufferedIOBase):
"""File-like object for reading an archive member.
@@ -518,19 +643,12 @@ class ZipExtFile(io.BufferedIOBase):
self._close_fileobj = close_fileobj
self._compress_type = zipinfo.compress_type
- self._compress_size = zipinfo.compress_size
self._compress_left = zipinfo.compress_size
+ self._left = zipinfo.file_size
- if self._compress_type == ZIP_DEFLATED:
- self._decompressor = zlib.decompressobj(-15)
- elif self._compress_type != ZIP_STORED:
- descr = compressor_names.get(self._compress_type)
- if descr:
- raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
- else:
- raise NotImplementedError("compression type %d" % (self._compress_type,))
- self._unconsumed = b''
+ self._decompressor = _get_decompressor(self._compress_type)
+ self._eof = False
self._readbuffer = b''
self._offset = 0
@@ -605,7 +723,11 @@ class ZipExtFile(io.BufferedIOBase):
"""Returns buffered bytes without advancing the position."""
if n > len(self._readbuffer) - self._offset:
chunk = self.read(n)
- self._offset -= len(chunk)
+ if len(chunk) > self._offset:
+ self._readbuffer = chunk + self._readbuffer[self._offset:]
+ self._offset = 0
+ else:
+ self._offset -= len(chunk)
# Return up to 512 bytes to reduce allocation overhead for tight loops.
return self._readbuffer[self._offset: self._offset + 512]
@@ -617,80 +739,132 @@ class ZipExtFile(io.BufferedIOBase):
"""Read and return up to n bytes.
If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
"""
- buf = b''
- if n is None:
- n = -1
- while True:
- if n < 0:
- data = self.read1(n)
- elif n > len(buf):
- data = self.read1(n - len(buf))
- else:
- return buf
- if len(data) == 0:
- return buf
+ if n is None or n < 0:
+ buf = self._readbuffer[self._offset:]
+ self._readbuffer = b''
+ self._offset = 0
+ while not self._eof:
+ buf += self._read1(self.MAX_N)
+ return buf
+
+ end = n + self._offset
+ if end < len(self._readbuffer):
+ buf = self._readbuffer[self._offset:end]
+ self._offset = end
+ return buf
+
+ n = end - len(self._readbuffer)
+ buf = self._readbuffer[self._offset:]
+ self._readbuffer = b''
+ self._offset = 0
+ while n > 0 and not self._eof:
+ data = self._read1(n)
+ if n < len(data):
+ self._readbuffer = data
+ self._offset = n
+ buf += data[:n]
+ break
buf += data
+ n -= len(data)
+ return buf
- def _update_crc(self, newdata, eof):
+ def _update_crc(self, newdata):
# Update the CRC using the given data.
if self._expected_crc is None:
# No need to compute the CRC if we don't have a reference value
return
self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
# Check the CRC if we're at the end of the file
- if eof and self._running_crc != self._expected_crc:
+ if self._eof and self._running_crc != self._expected_crc:
raise BadZipFile("Bad CRC-32 for file %r" % self.name)
def read1(self, n):
"""Read up to n bytes with at most one read() system call."""
- # Simplify algorithm (branching) by transforming negative n to large n.
- if n < 0 or n is None:
- n = self.MAX_N
-
- # Bytes available in read buffer.
- len_readbuffer = len(self._readbuffer) - self._offset
+ if n is None or n < 0:
+ buf = self._readbuffer[self._offset:]
+ self._readbuffer = b''
+ self._offset = 0
+ while not self._eof:
+ data = self._read1(self.MAX_N)
+ if data:
+ buf += data
+ break
+ return buf
- # Read from file.
- if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
- nbytes = n - len_readbuffer - len(self._unconsumed)
- nbytes = max(nbytes, self.MIN_READ_SIZE)
- nbytes = min(nbytes, self._compress_left)
+ end = n + self._offset
+ if end < len(self._readbuffer):
+ buf = self._readbuffer[self._offset:end]
+ self._offset = end
+ return buf
- data = self._fileobj.read(nbytes)
- self._compress_left -= len(data)
+ n = end - len(self._readbuffer)
+ buf = self._readbuffer[self._offset:]
+ self._readbuffer = b''
+ self._offset = 0
+ if n > 0:
+ while not self._eof:
+ data = self._read1(n)
+ if n < len(data):
+ self._readbuffer = data
+ self._offset = n
+ buf += data[:n]
+ break
+ if data:
+ buf += data
+ break
+ return buf
- if data and self._decrypter is not None:
- data = bytes(map(self._decrypter, data))
+ def _read1(self, n):
+ # Read up to n compressed bytes with at most one read() system call,
+ # decrypt and decompress them.
+ if self._eof or n <= 0:
+ return b''
- if self._compress_type == ZIP_STORED:
- self._update_crc(data, eof=(self._compress_left==0))
- self._readbuffer = self._readbuffer[self._offset:] + data
- self._offset = 0
- else:
- # Prepare deflated bytes for decompression.
- self._unconsumed += data
-
- # Handle unconsumed data.
- if (len(self._unconsumed) > 0 and n > len_readbuffer and
- self._compress_type == ZIP_DEFLATED):
- data = self._decompressor.decompress(
- self._unconsumed,
- max(n - len_readbuffer, self.MIN_READ_SIZE)
- )
-
- self._unconsumed = self._decompressor.unconsumed_tail
- eof = len(self._unconsumed) == 0 and self._compress_left == 0
- if eof:
+ # Read from file.
+ if self._compress_type == ZIP_DEFLATED:
+ ## Handle unconsumed data.
+ data = self._decompressor.unconsumed_tail
+ if n > len(data):
+ data += self._read2(n - len(data))
+ else:
+ data = self._read2(n)
+
+ if self._compress_type == ZIP_STORED:
+ self._eof = self._compress_left <= 0
+ elif self._compress_type == ZIP_DEFLATED:
+ n = max(n, self.MIN_READ_SIZE)
+ data = self._decompressor.decompress(data, n)
+ self._eof = (self._decompressor.eof or
+ self._compress_left <= 0 and
+ not self._decompressor.unconsumed_tail)
+ if self._eof:
data += self._decompressor.flush()
+ else:
+ data = self._decompressor.decompress(data)
+ self._eof = self._decompressor.eof or self._compress_left <= 0
+
+ data = data[:self._left]
+ self._left -= len(data)
+ if self._left <= 0:
+ self._eof = True
+ self._update_crc(data)
+ return data
- self._update_crc(data, eof=eof)
- self._readbuffer = self._readbuffer[self._offset:] + data
- self._offset = 0
+ def _read2(self, n):
+ if self._compress_left <= 0:
+ return b''
+
+ n = max(n, self.MIN_READ_SIZE)
+ n = min(n, self._compress_left)
+
+ data = self._fileobj.read(n)
+ self._compress_left -= len(data)
+ if not data:
+ raise EOFError
- # Read from buffer.
- data = self._readbuffer[self._offset: self._offset + n]
- self._offset += len(data)
+ if self._decrypter is not None:
+ data = bytes(map(self._decrypter, data))
return data
def close(self):
@@ -709,7 +883,8 @@ class ZipFile:
file: Either the path to the file, or a file-like object.
If it is a path, the file will be opened and closed by ZipFile.
mode: The mode can be either read "r", write "w" or append "a".
- compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
+ compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
+ ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
allowZip64: if True ZipFile will create files with ZIP64 extensions when
needed, otherwise it will raise an exception when this would
be necessary.
@@ -717,20 +892,14 @@ class ZipFile:
"""
fp = None # Set here since __del__ checks it
+ _windows_illegal_name_trans_table = None
def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
"""Open the ZIP file with mode read "r", write "w" or append "a"."""
if mode not in ("r", "w", "a"):
raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
- if compression == ZIP_STORED:
- pass
- elif compression == ZIP_DEFLATED:
- if not zlib:
- raise RuntimeError(
- "Compression requires the (missing) zlib module")
- else:
- raise RuntimeError("That compression method is not supported")
+ _check_compression(compression)
self._allowZip64 = allowZip64
self._didModify = False
@@ -851,6 +1020,9 @@ class ZipFile:
(x.create_version, x.create_system, x.extract_version, x.reserved,
x.flag_bits, x.compress_type, t, d,
x.CRC, x.compress_size, x.file_size) = centdir[1:12]
+ if x.extract_version > MAX_EXTRACT_VERSION:
+ raise NotImplementedError("zip file version %.1f" %
+ (x.extract_version / 10))
x.volume, x.internal_attr, x.external_attr = centdir[15:18]
# Convert date/time code to (year, month, day, hour, min, sec)
x._raw_time = t
@@ -873,10 +1045,7 @@ class ZipFile:
def namelist(self):
"""Return a list of file names in the archive."""
- l = []
- for data in self.filelist:
- l.append(data.filename)
- return l
+ return [data.filename for data in self.filelist]
def infolist(self):
"""Return a list of class ZipInfo instances for files in the
@@ -933,10 +1102,10 @@ class ZipFile:
if not isinstance(comment, bytes):
raise TypeError("comment: expected bytes, got %s" % type(comment))
# check for valid comment length
- if len(comment) >= ZIP_MAX_COMMENT:
- if self.debug:
- print('Archive comment is too long; truncating to %d bytes'
- % ZIP_MAX_COMMENT)
+ if len(comment) > ZIP_MAX_COMMENT:
+ import warnings
+ warnings.warn('Archive comment is too long; truncating to %d bytes'
+ % ZIP_MAX_COMMENT, stacklevel=2)
comment = comment[:ZIP_MAX_COMMENT]
self._comment = comment
self._didModify = True
@@ -985,6 +1154,14 @@ class ZipFile:
if fheader[_FH_EXTRA_FIELD_LENGTH]:
zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
+ if zinfo.flag_bits & 0x20:
+ # Zip 2.7: compressed patched data
+ raise NotImplementedError("compressed patched data (flag bit 5)")
+
+ if zinfo.flag_bits & 0x40:
+ # strong encryption
+ raise NotImplementedError("strong encryption (flag bit 6)")
+
if zinfo.flag_bits & 0x800:
# UTF-8 filename
fname_str = fname.decode("utf-8")
@@ -1056,6 +1233,21 @@ class ZipFile:
for zipinfo in members:
self.extract(zipinfo, path, pwd)
+ @classmethod
+ def _sanitize_windows_name(cls, arcname, pathsep):
+ """Replace bad characters and remove trailing dots from parts."""
+ table = cls._windows_illegal_name_trans_table
+ if not table:
+ illegal = ':<>|"?*'
+ table = str.maketrans(illegal, '_' * len(illegal))
+ cls._windows_illegal_name_trans_table = table
+ arcname = arcname.translate(table)
+ # remove trailing dots
+ arcname = (x.rstrip('.') for x in arcname.split(pathsep))
+ # rejoin, removing empty parts.
+ arcname = pathsep.join(x for x in arcname if x)
+ return arcname
+
def _extract_member(self, member, targetpath, pwd):
"""Extract the ZipInfo object 'member' to a physical
file on the path targetpath.
@@ -1069,16 +1261,12 @@ class ZipFile:
# interpret absolute pathname as relative, remove drive letter or
# UNC path, redundant separators, "." and ".." components.
arcname = os.path.splitdrive(arcname)[1]
+ invalid_path_parts = ('', os.path.curdir, os.path.pardir)
arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
- if x not in ('', os.path.curdir, os.path.pardir))
+ if x not in invalid_path_parts)
if os.path.sep == '\\':
# filter illegal characters on Windows
- illegal = ':<>|"?*'
- table = str.maketrans(illegal, '_' * len(illegal))
- arcname = arcname.translate(table)
- # remove trailing dots
- arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
- arcname = os.path.sep.join(x for x in arcname if x)
+ arcname = self._sanitize_windows_name(arcname, os.path.sep)
targetpath = os.path.join(targetpath, arcname)
targetpath = os.path.normpath(targetpath)
@@ -1102,18 +1290,14 @@ class ZipFile:
def _writecheck(self, zinfo):
"""Check for errors before writing a file to the archive."""
if zinfo.filename in self.NameToInfo:
- if self.debug: # Warning for duplicate names
- print("Duplicate name:", zinfo.filename)
+ import warnings
+ warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
if self.mode not in ("w", "a"):
raise RuntimeError('write() requires mode "w" or "a"')
if not self.fp:
raise RuntimeError(
"Attempt to write ZIP archive that was already closed")
- if zinfo.compress_type == ZIP_DEFLATED and not zlib:
- raise RuntimeError(
- "Compression requires the (missing) zlib module")
- if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
- raise RuntimeError("That compression method is not supported")
+ _check_compression(zinfo.compress_type)
if zinfo.file_size > ZIP64_LIMIT:
if not self._allowZip64:
raise LargeZipFile("Filesize would require ZIP64 extensions")
@@ -1151,6 +1335,9 @@ class ZipFile:
zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00
zinfo.header_offset = self.fp.tell() # Start of header bytes
+ if zinfo.compress_type == ZIP_LZMA:
+ # Compressed data includes an end-of-stream (EOS) marker
+ zinfo.flag_bits |= 0x02
self._writecheck(zinfo)
self._didModify = True
@@ -1164,6 +1351,7 @@ class ZipFile:
self.fp.write(zinfo.FileHeader(False))
return
+ cmpr = _get_compressor(zinfo.compress_type)
with open(filename, "rb") as fp:
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
@@ -1172,11 +1360,6 @@ class ZipFile:
zip64 = self._allowZip64 and \
zinfo.file_size * 1.05 > ZIP64_LIMIT
self.fp.write(zinfo.FileHeader(zip64))
- if zinfo.compress_type == ZIP_DEFLATED:
- cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
- zlib.DEFLATED, -15)
- else:
- cmpr = None
file_size = 0
while 1:
buf = fp.read(1024 * 8)
@@ -1235,13 +1418,15 @@ class ZipFile:
zinfo.header_offset = self.fp.tell() # Start of header data
if compress_type is not None:
zinfo.compress_type = compress_type
+ if zinfo.compress_type == ZIP_LZMA:
+ # Compressed data includes an end-of-stream (EOS) marker
+ zinfo.flag_bits |= 0x02
self._writecheck(zinfo)
self._didModify = True
zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
- if zinfo.compress_type == ZIP_DEFLATED:
- co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
- zlib.DEFLATED, -15)
+ co = _get_compressor(zinfo.compress_type)
+ if co:
data = co.compress(data) + co.flush()
zinfo.compress_size = len(data) # Compressed size
else:
@@ -1298,18 +1483,22 @@ class ZipFile:
header_offset = zinfo.header_offset
extra_data = zinfo.extra
+ min_version = 0
if extra:
# Append a ZIP64 field to the extra's
extra_data = struct.pack(
'<HH' + 'Q'*len(extra),
1, 8*len(extra), *extra) + extra_data
- extract_version = max(45, zinfo.extract_version)
- create_version = max(45, zinfo.create_version)
- else:
- extract_version = zinfo.extract_version
- create_version = zinfo.create_version
+ min_version = ZIP64_VERSION
+
+ if zinfo.compress_type == ZIP_BZIP2:
+ min_version = max(BZIP2_VERSION, min_version)
+ elif zinfo.compress_type == ZIP_LZMA:
+ min_version = max(LZMA_VERSION, min_version)
+ extract_version = max(min_version, zinfo.extract_version)
+ create_version = max(min_version, zinfo.create_version)
try:
filename, flag_bits = zinfo._encodeFilenameFlags()
centdir = struct.pack(structCentralDir,