Merge pull request #12358 from mattip/roundtrip-record-arrays

BUG: test, fix loading structured dtypes with padding
author: Charles Harris <charlesr.harris@gmail.com> 2018-11-14 17:57:33 -0600
committer: GitHub <noreply@github.com> 2018-11-14 17:57:33 -0600
commit: 13a69b5e0fc409928dc81cdce405043e7c71cd0a (patch)
tree: 5270ba4e716b724e3e6116a823f11d8789b35b24
parent: 70a84c3192b4f231925e8e36f50d6a26bff3b46d (diff)
parent: a2227556885c3b68a33285ae3ddb4a65d71b4497 (diff)
download: numpy-13a69b5e0fc409928dc81cdce405043e7c71cd0a.tar.gz
2 files changed, 62 insertions, 2 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 1ef3dca47..10945e5e8 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -259,6 +259,43 @@ def dtype_to_descr(dtype):
     else:
         return dtype.str
 
+def descr_to_dtype(descr):
+    '''
+    descr may be stored as dtype.descr, which is a list of
+    (name, format, [shape]) tuples. Offsets are not explicitly saved, rather
+    empty fields with name,format == '', '|Vn' are added as padding.
+
+    This function reverses the process, eliminating the empty padding fields.
+    '''
+    if isinstance(descr, (str, dict)):
+        # No padding removal needed
+        return numpy.dtype(descr)
+
+    fields = []
+    offset = 0
+    for field in descr:
+        if len(field) == 2:
+            name, descr_str = field
+            dt = descr_to_dtype(descr_str)
+        else:
+            name, descr_str, shape = field
+            dt = numpy.dtype((descr_to_dtype(descr_str), shape))
+
+        # Ignore padding bytes, which will be void bytes with '' as name
+        # Once support for blank names is removed, only "if name == ''" needed)
+        is_pad = (name == '' and dt.type is numpy.void and dt.names is None)
+        if not is_pad:
+            fields.append((name, dt, offset))
+
+        offset += dt.itemsize
+
+    names, formats, offsets = zip(*fields)
+    # names may be (title, names) tuples
+    nametups = (n  if isinstance(n, tuple) else (None, n) for n in names)
+    titles, names = zip(*nametups)
+    return numpy.dtype({'names': names, 'formats': formats, 'titles': titles,
+                        'offsets': offsets, 'itemsize': offset})
+
 def header_data_from_array_1_0(array):
     """ Get the dictionary of header metadata from a numpy.ndarray.
 
@@ -523,7 +560,7 @@ def _read_array_header(fp, version):
         msg = "fortran_order is not a valid bool: %r"
         raise ValueError(msg % (d['fortran_order'],))
     try:
-        dtype = numpy.dtype(d['descr'])
+        dtype = descr_to_dtype(d['descr'])
     except TypeError as e:
         msg = "descr is not a valid dtype descriptor: %r"
         raise ValueError(msg % (d['descr'],))
diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py
index 3185e32ac..0d7c7f7e0 100644
--- a/numpy/lib/tests/test_format.py
+++ b/numpy/lib/tests/test_format.py
@@ -524,6 +524,30 @@ def test_compressed_roundtrip():
     assert_array_equal(arr, arr1)
 
 
+# aligned
+dt1 = np.dtype('i1, i4, i1', align=True)
+# non-aligned, explicit offsets
+dt2 = np.dtype({'names': ['a', 'b'], 'formats': ['i4', 'i4'],
+                'offsets': [1, 6]})
+# nested struct-in-struct
+dt3 = np.dtype({'names': ['c', 'd'], 'formats': ['i4', dt2]})
+# field with '' name
+dt4 = np.dtype({'names': ['a', '', 'b'], 'formats': ['i4']*3})
+# titles
+dt5 = np.dtype({'names': ['a', 'b'], 'formats': ['i4', 'i4'],
+                'offsets': [1, 6], 'titles': ['aa', 'bb']})
+
+@pytest.mark.parametrize("dt", [dt1, dt2, dt3, dt4, dt5])
+def test_load_padded_dtype(dt):
+    arr = np.zeros(3, dt)
+    for i in range(3):
+        arr[i] = i + 5
+    npz_file = os.path.join(tempdir, 'aligned.npz')
+    np.savez(npz_file, arr=arr)
+    arr1 = np.load(npz_file)['arr']
+    assert_array_equal(arr, arr1)
+
+
 def test_python2_python3_interoperability():
     if sys.version_info[0] >= 3:
         fname = 'win64python2.npy'
@@ -533,7 +557,6 @@ def test_python2_python3_interoperability():
     data = np.load(path)
     assert_array_equal(data, np.ones(2))
 
-
 def test_pickle_python2_python3():
     # Test that loading object arrays saved on Python 2 works both on
     # Python 2 and Python 3 and vice versa
author	Charles Harris <charlesr.harris@gmail.com>	2018-11-14 17:57:33 -0600
committer	GitHub <noreply@github.com>	2018-11-14 17:57:33 -0600
commit	13a69b5e0fc409928dc81cdce405043e7c71cd0a (patch)
tree	5270ba4e716b724e3e6116a823f11d8789b35b24
parent	70a84c3192b4f231925e8e36f50d6a26bff3b46d (diff)
parent	a2227556885c3b68a33285ae3ddb4a65d71b4497 (diff)
download	numpy-13a69b5e0fc409928dc81cdce405043e7c71cd0a.tar.gz