diff options
author | Max Sperlich <max.sperlich@livingsocial.com> | 2013-12-11 21:35:50 -0500 |
---|---|---|
committer | Max Sperlich <max.sperlich@livingsocial.com> | 2013-12-11 21:35:50 -0500 |
commit | e09c5f040fa020bb47c6610356214c0477c206aa (patch) | |
tree | 37b2370e0cf5520c39ed231ccb5c93a5dbaade0b /numpy/lib | |
parent | bdb6f8cabf755d4d7b18a3d7e7475480ce6fc008 (diff) | |
download | numpy-e09c5f040fa020bb47c6610356214c0477c206aa.tar.gz |
MAINT: Introduced _read_array function
This wrapper function is used everywhere in format.py now to ensure to
correctly the handle the case when fp.read returns fewer bytes than
requested.
Also added a test for the orignal bug, loading an array of size more
than 64K from a zip file.
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/format.py | 62 | ||||
-rw-r--r-- | numpy/lib/tests/test_format.py | 8 |
2 files changed, 45 insertions, 25 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 81366c50d..4cfbbe05d 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -138,6 +138,7 @@ from __future__ import division, absolute_import, print_function import numpy import sys +import io from numpy.lib.utils import safe_eval from numpy.compat import asbytes, isfileobj, long, basestring @@ -187,10 +188,7 @@ def read_magic(fp): major : int minor : int """ - magic_str = fp.read(MAGIC_LEN) - if len(magic_str) != MAGIC_LEN: - msg = "could not read %d characters for the magic string; got %r" - raise ValueError(msg % (MAGIC_LEN, magic_str)) + magic_str = _read_bytes(fp, MAGIC_LEN, "magic string") if magic_str[:-2] != MAGIC_PREFIX: msg = "the magic string is not correct; expected %r, got %r" raise ValueError(msg % (MAGIC_PREFIX, magic_str[:-2])) @@ -322,14 +320,9 @@ def read_array_header_1_0(fp): # Read an unsigned, little-endian short int which has the length of the # header. import struct - hlength_str = fp.read(2) - if len(hlength_str) != 2: - msg = "EOF at %s before reading array header length" - raise ValueError(msg % fp.tell()) + hlength_str = _read_bytes(fp, 2, "array header length") header_length = struct.unpack('<H', hlength_str)[0] - header = fp.read(header_length) - if len(header) != header_length: - raise ValueError("EOF at %s before reading array header" % fp.tell()) + header = _read_bytes(fp, header_length, "array header") # The header is a pretty-printed string representation of a literal Python # dictionary with trailing newlines padded to a 16-byte boundary. The keys @@ -476,21 +469,12 @@ def read_array(fp): max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dtype.itemsize) array = numpy.empty(count, dtype=dtype) - extra_data = bytes() - i = 0 - while i < count: + for i in range(0, count, max_read_count): read_count = min(max_read_count, count - i) - data = extra_data + fp.read(int(read_count * dtype.itemsize)) - if len(data) == len(extra_data): - #Unable to read sufficient data from fp - msg = "EOF: expected %d entries, got %d entries" % (count, i) - raise ValueError(msg) - actual_count = len(data) // dtype.itemsize - if actual_count > 0: - array[i:i + actual_count] = \ - numpy.frombuffer(data, dtype=dtype, count=actual_count) - i += actual_count - extra_data = data[actual_count * dtype.itemsize:] + read_size = int(read_count * dtype.itemsize) + data = _read_bytes(fp, read_size, "array data") + array[i:i+read_count] = numpy.frombuffer(data, dtype=dtype, + count=read_count) if fortran_order: array.shape = shape[::-1] @@ -609,3 +593,31 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, mode=mode, offset=offset) return marray + + +def _read_bytes(fp, size, error_template="ran out of data"): + """ + Read from file-like object until size bytes are read. + Raises ValueError if not EOF is encountered before size bytes are read. + Non-blocking objects only supported if they derive from io objects. + + Required as e.g. ZipExtFile in python 2.6 can return less data than + requested. + """ + data = bytes() + while True: + # io files (default in python3) return None or raise on would-block, + # python2 file will truncate, probably nothing can be done about that. + # note that regular files can't be non-blocking + try: + r = fp.read(size - len(data)) + data += r + if len(r) == 0 or len(data) == size: + break + except io.BlockingIOError: + pass + if len(data) != size: + msg = "EOF: reading %s, expected %d bytes got %d" + raise ValueError(msg %(error_template, size, len(data))) + else: + return data diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py index abb93fbd5..dbcdaaaa6 100644 --- a/numpy/lib/tests/test_format.py +++ b/numpy/lib/tests/test_format.py @@ -517,6 +517,14 @@ def test_memmap_roundtrip(): del ma +def test_compressed_roundtrip(): + arr = np.random.rand(200, 200) + npz_file = os.path.join(tempdir, 'compressed.npz') + np.savez_compressed(npz_file, arr=arr) + arr1 = np.load(npz_file)['arr'] + assert_array_equal(arr, arr1) + + def test_write_version_1_0(): f = BytesIO() arr = np.arange(1) |