diff options
| author | Charles Harris <charlesr.harris@gmail.com> | 2018-11-14 17:57:33 -0600 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-11-14 17:57:33 -0600 |
| commit | 13a69b5e0fc409928dc81cdce405043e7c71cd0a (patch) | |
| tree | 5270ba4e716b724e3e6116a823f11d8789b35b24 | |
| parent | 70a84c3192b4f231925e8e36f50d6a26bff3b46d (diff) | |
| parent | a2227556885c3b68a33285ae3ddb4a65d71b4497 (diff) | |
| download | numpy-13a69b5e0fc409928dc81cdce405043e7c71cd0a.tar.gz | |
Merge pull request #12358 from mattip/roundtrip-record-arrays
BUG: test, fix loading structured dtypes with padding
| -rw-r--r-- | numpy/lib/format.py | 39 | ||||
| -rw-r--r-- | numpy/lib/tests/test_format.py | 25 |
2 files changed, 62 insertions, 2 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 1ef3dca47..10945e5e8 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -259,6 +259,43 @@ def dtype_to_descr(dtype): else: return dtype.str +def descr_to_dtype(descr): + ''' + descr may be stored as dtype.descr, which is a list of + (name, format, [shape]) tuples. Offsets are not explicitly saved, rather + empty fields with name,format == '', '|Vn' are added as padding. + + This function reverses the process, eliminating the empty padding fields. + ''' + if isinstance(descr, (str, dict)): + # No padding removal needed + return numpy.dtype(descr) + + fields = [] + offset = 0 + for field in descr: + if len(field) == 2: + name, descr_str = field + dt = descr_to_dtype(descr_str) + else: + name, descr_str, shape = field + dt = numpy.dtype((descr_to_dtype(descr_str), shape)) + + # Ignore padding bytes, which will be void bytes with '' as name + # Once support for blank names is removed, only "if name == ''" needed) + is_pad = (name == '' and dt.type is numpy.void and dt.names is None) + if not is_pad: + fields.append((name, dt, offset)) + + offset += dt.itemsize + + names, formats, offsets = zip(*fields) + # names may be (title, names) tuples + nametups = (n if isinstance(n, tuple) else (None, n) for n in names) + titles, names = zip(*nametups) + return numpy.dtype({'names': names, 'formats': formats, 'titles': titles, + 'offsets': offsets, 'itemsize': offset}) + def header_data_from_array_1_0(array): """ Get the dictionary of header metadata from a numpy.ndarray. @@ -523,7 +560,7 @@ def _read_array_header(fp, version): msg = "fortran_order is not a valid bool: %r" raise ValueError(msg % (d['fortran_order'],)) try: - dtype = numpy.dtype(d['descr']) + dtype = descr_to_dtype(d['descr']) except TypeError as e: msg = "descr is not a valid dtype descriptor: %r" raise ValueError(msg % (d['descr'],)) diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py index 3185e32ac..0d7c7f7e0 100644 --- a/numpy/lib/tests/test_format.py +++ b/numpy/lib/tests/test_format.py @@ -524,6 +524,30 @@ def test_compressed_roundtrip(): assert_array_equal(arr, arr1) +# aligned +dt1 = np.dtype('i1, i4, i1', align=True) +# non-aligned, explicit offsets +dt2 = np.dtype({'names': ['a', 'b'], 'formats': ['i4', 'i4'], + 'offsets': [1, 6]}) +# nested struct-in-struct +dt3 = np.dtype({'names': ['c', 'd'], 'formats': ['i4', dt2]}) +# field with '' name +dt4 = np.dtype({'names': ['a', '', 'b'], 'formats': ['i4']*3}) +# titles +dt5 = np.dtype({'names': ['a', 'b'], 'formats': ['i4', 'i4'], + 'offsets': [1, 6], 'titles': ['aa', 'bb']}) + +@pytest.mark.parametrize("dt", [dt1, dt2, dt3, dt4, dt5]) +def test_load_padded_dtype(dt): + arr = np.zeros(3, dt) + for i in range(3): + arr[i] = i + 5 + npz_file = os.path.join(tempdir, 'aligned.npz') + np.savez(npz_file, arr=arr) + arr1 = np.load(npz_file)['arr'] + assert_array_equal(arr, arr1) + + def test_python2_python3_interoperability(): if sys.version_info[0] >= 3: fname = 'win64python2.npy' @@ -533,7 +557,6 @@ def test_python2_python3_interoperability(): data = np.load(path) assert_array_equal(data, np.ones(2)) - def test_pickle_python2_python3(): # Test that loading object arrays saved on Python 2 works both on # Python 2 and Python 3 and vice versa |
