Merge branch 'master' into master

author: Eric Wieser <wieser.eric@gmail.com> 2019-10-15 20:20:20 +0100
committer: GitHub <noreply@github.com> 2019-10-15 20:20:20 +0100
commit: 10a7a4a815105e16828fe83fb89778c3bbafe692 (patch)
tree: 2c73effc6bf4b8404e63564f78661caff034b255 /numpy/core/records.py
parent: d0731e118a5c40d866702f1b5da2be4d4f52ded9 (diff)
parent: 83da5faca3a313c5d37226b86fa781956f8d162b (diff)
download: numpy-10a7a4a815105e16828fe83fb89778c3bbafe692.tar.gz
1 files changed, 100 insertions, 84 deletions
diff --git a/numpy/core/records.py b/numpy/core/records.py
index a483871ba..a1439f9df 100644
--- a/numpy/core/records.py
+++ b/numpy/core/records.py
@@ -7,10 +7,9 @@ Most commonly, ndarrays contain elements of a single type, e.g. floats,
 integers, bools etc.  However, it is possible for elements to be combinations
 of these using structured types, such as::
 
-  >>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', int), ('y', float)])
+  >>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', np.int64), ('y', np.float64)])
   >>> a
-  array([(1, 2.0), (1, 2.0)],
-        dtype=[('x', '<i4'), ('y', '<f8')])
+  array([(1, 2.), (1, 2.)], dtype=[('x', '<i8'), ('y', '<f8')])
 
 Here, each element consists of two fields: x (and int), and y (a float).
 This is known as a structured array.  The different fields are analogous
@@ -21,7 +20,7 @@ one would a dictionary::
   array([1, 1])
 
   >>> a['y']
-  array([ 2.,  2.])
+  array([2., 2.])
 
 Record arrays allow us to access fields as properties::
 
@@ -31,7 +30,7 @@ Record arrays allow us to access fields as properties::
   array([1, 1])
 
   >>> ar.y
-  array([ 2.,  2.])
+  array([2., 2.])
 
 """
 from __future__ import division, absolute_import, print_function
@@ -39,10 +38,14 @@ from __future__ import division, absolute_import, print_function
 import sys
 import os
 import warnings
+from collections import Counter, OrderedDict
 
 from . import numeric as sb
 from . import numerictypes as nt
-from numpy.compat import isfileobj, bytes, long, unicode
+from numpy.compat import (
+    isfileobj, bytes, long, unicode, os_fspath, contextlib_nullcontext
+)
+from numpy.core.overrides import set_module
 from .arrayprint import get_printoptions
 
 # All of the functions allow formats to be a dtype
@@ -73,15 +76,28 @@ _byteorderconv = {'b':'>',
 
 numfmt = nt.typeDict
 
+# taken from OrderedDict recipes in the Python documentation
+# https://docs.python.org/3.3/library/collections.html#ordereddict-examples-and-recipes
+class _OrderedCounter(Counter, OrderedDict):
+    """Counter that remembers the order elements are first encountered"""
+
+    def __repr__(self):
+        return '%s(%r)' % (self.__class__.__name__, OrderedDict(self))
+
+    def __reduce__(self):
+        return self.__class__, (OrderedDict(self),)
+
+
 def find_duplicate(list):
     """Find duplication in a list, return a list of duplicated elements"""
-    dup = []
-    for i in range(len(list)):
-        if (list[i] in list[i + 1:]):
-            if (list[i] not in dup):
-                dup.append(list[i])
-    return dup
+    return [
+        item
+        for item, counts in _OrderedCounter(list).items()
+        if counts > 1
+    ]
+
 
+@set_module('numpy')
 class format_parser(object):
     """
     Class to convert formats, names, titles description to a dtype.
@@ -125,10 +141,9 @@ class format_parser(object):
 
     Examples
     --------
-    >>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
+    >>> np.format_parser(['<f8', '<i4', '<a5'], ['col1', 'col2', 'col3'],
     ...                  ['T1', 'T2', 'T3']).dtype
-    dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4'),
-           (('T3', 'col3'), '|S5')])
+    dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4'), (('T3', 'col3'), 'S5')])
 
     `names` and/or `titles` can be empty lists. If `titles` is an empty list,
     titles will simply not appear. If `names` is empty, default field names
@@ -136,9 +151,9 @@ class format_parser(object):
 
     >>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
     ...                  []).dtype
-    dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '|S5')])
-    >>> np.format_parser(['f8', 'i4', 'a5'], [], []).dtype
-    dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', '|S5')])
+    dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '<S5')])
+    >>> np.format_parser(['<f8', '<i4', '<a5'], [], []).dtype
+    dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', 'S5')])
 
     """
 
@@ -148,16 +163,18 @@ class format_parser(object):
         self._createdescr(byteorder)
         self.dtype = self._descr
 
-    def _parseFormats(self, formats, aligned=0):
+    def _parseFormats(self, formats, aligned=False):
         """ Parse the field formats """
 
         if formats is None:
             raise ValueError("Need formats argument")
         if isinstance(formats, list):
-            if len(formats) < 2:
-                formats.append('')
-            formats = ','.join(formats)
-        dtype = sb.dtype(formats, aligned)
+            dtype = sb.dtype(
+                [('f{}'.format(i), format_) for i, format_ in enumerate(formats)],
+                aligned,
+            )
+        else:
+            dtype = sb.dtype(formats, aligned)
         fields = dtype.fields
         if fields is None:
             dtype = sb.dtype([('f1', dtype)], aligned)
@@ -251,8 +268,8 @@ class record(nt.void):
             except AttributeError:
                 #happens if field is Object type
                 return obj
-            if dt.fields:
-                return obj.view((self.__class__, obj.dtype.fields))
+            if dt.names is not None:
+                return obj.view((self.__class__, obj.dtype))
             return obj
         else:
             raise AttributeError("'record' object has no "
@@ -276,8 +293,8 @@ class record(nt.void):
         obj = nt.void.__getitem__(self, indx)
 
         # copy behavior of record.__getattribute__,
-        if isinstance(obj, nt.void) and obj.dtype.fields:
-            return obj.view((self.__class__, obj.dtype.fields))
+        if isinstance(obj, nt.void) and obj.dtype.names is not None:
+            return obj.view((self.__class__, obj.dtype))
         else:
             # return a single element
             return obj
@@ -287,10 +304,8 @@ class record(nt.void):
         # pretty-print all fields
         names = self.dtype.names
         maxlen = max(len(name) for name in names)
-        rows = []
         fmt = '%% %ds: %%s' % maxlen
-        for name in names:
-            rows.append(fmt % (name, getattr(self, name)))
+        rows = [fmt % (name, getattr(self, name)) for name in names]
         return "\n".join(rows)
 
 # The recarray is almost identical to a standard array (which supports
@@ -379,20 +394,19 @@ class recarray(ndarray):
     --------
     Create an array with two fields, ``x`` and ``y``:
 
-    >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', float), ('y', int)])
+    >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', '<f8'), ('y', '<i8')])
     >>> x
-    array([(1.0, 2), (3.0, 4)],
-          dtype=[('x', '<f8'), ('y', '<i4')])
+    array([(1., 2), (3., 4)], dtype=[('x', '<f8'), ('y', '<i8')])
 
     >>> x['x']
-    array([ 1.,  3.])
+    array([1., 3.])
 
     View the array as a record array:
 
     >>> x = x.view(np.recarray)
 
     >>> x.x
-    array([ 1.,  3.])
+    array([1., 3.])
 
     >>> x.y
     array([2, 4])
@@ -430,7 +444,7 @@ class recarray(ndarray):
         return self
 
     def __array_finalize__(self, obj):
-        if self.dtype.type is not record and self.dtype.fields:
+        if self.dtype.type is not record and self.dtype.names is not None:
             # if self.dtype is not np.record, invoke __setattr__ which will
             # convert it to a record if it is a void dtype.
             self.dtype = self.dtype
@@ -458,7 +472,7 @@ class recarray(ndarray):
         # with void type convert it to the same dtype.type (eg to preserve
         # numpy.record type if present), since nested structured fields do not
         # inherit type. Don't do this for non-void structures though.
-        if obj.dtype.fields:
+        if obj.dtype.names is not None:
             if issubclass(obj.dtype.type, nt.void):
                 return obj.view(dtype=(self.dtype.type, obj.dtype))
             return obj
@@ -473,7 +487,7 @@ class recarray(ndarray):
 
         # Automatically convert (void) structured types to records
         # (but not non-void structures, subarrays, or non-structured voids)
-        if attr == 'dtype' and issubclass(val.type, nt.void) and val.fields:
+        if attr == 'dtype' and issubclass(val.type, nt.void) and val.names is not None:
             val = sb.dtype((record, val))
 
         newattr = attr not in self.__dict__
@@ -507,7 +521,7 @@ class recarray(ndarray):
         # copy behavior of getattr, except that here
         # we might also be returning a single element
         if isinstance(obj, ndarray):
-            if obj.dtype.fields:
+            if obj.dtype.names is not None:
                 obj = obj.view(type(self))
                 if issubclass(obj.dtype.type, nt.void):
                     return obj.view(dtype=(self.dtype.type, obj.dtype))
@@ -563,7 +577,7 @@ class recarray(ndarray):
 
         if val is None:
             obj = self.getfield(*res)
-            if obj.dtype.fields:
+            if obj.dtype.names is not None:
                 return obj
             return obj.view(ndarray)
         else:
@@ -579,7 +593,7 @@ def fromarrays(arrayList, dtype=None, shape=None, formats=None,
     >>> x3=np.array([1.1,2,3,4])
     >>> r = np.core.records.fromarrays([x1,x2,x3],names='a,b,c')
     >>> print(r[1])
-    (2, 'dd', 2.0)
+    (2, 'dd', 2.0) # may vary
     >>> x1[1]=34
     >>> r.a
     array([1, 2, 3, 4])
@@ -598,10 +612,7 @@ def fromarrays(arrayList, dtype=None, shape=None, formats=None,
         # and determine the formats.
         formats = []
         for obj in arrayList:
-            if not isinstance(obj, ndarray):
-                raise ValueError("item in the array list must be an ndarray.")
-            formats.append(obj.dtype.str)
-        formats = ','.join(formats)
+            formats.append(obj.dtype)
 
     if dtype is not None:
         descr = sb.dtype(dtype)
@@ -658,11 +669,11 @@ def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
     >>> r.col1
     array([456,   2])
     >>> r.col2
-    array(['dbe', 'de'],
-          dtype='|S3')
+    array(['dbe', 'de'], dtype='<U3')
     >>> import pickle
-    >>> print(pickle.loads(pickle.dumps(r)))
-    [(456, 'dbe', 1.2) (2, 'de', 1.3)]
+    >>> pickle.loads(pickle.dumps(r))
+    rec.array([(456, 'dbe', 1.2), (  2, 'de', 1.3)],
+              dtype=[('col1', '<i8'), ('col2', '<U3'), ('col3', '<f8')])
     """
 
     if formats is None and dtype is None:  # slower
@@ -710,7 +721,7 @@ def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None,
     a string"""
 
     if dtype is None and formats is None:
-        raise ValueError("Must have dtype= or formats=")
+        raise TypeError("fromstring() needs a 'dtype' or 'formats' argument")
 
     if dtype is not None:
         descr = sb.dtype(dtype)
@@ -737,9 +748,9 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
              names=None, titles=None, aligned=False, byteorder=None):
     """Create an array from binary file data
 
-    If file is a string then that file is opened, else it is assumed
-    to be a file object. The file object must support random access
-    (i.e. it must have tell and seek methods).
+    If file is a string or a path-like object then that file is opened,
+    else it is assumed to be a file object. The file object must
+    support random access (i.e. it must have tell and seek methods).
 
     >>> from tempfile import TemporaryFile
     >>> a = np.empty(10,dtype='f8,i4,a5')
@@ -749,7 +760,7 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
     >>> a = a.newbyteorder('<')
     >>> a.tofile(fd)
     >>>
-    >>> fd.seek(0)
+    >>> _ = fd.seek(0)
     >>> r=np.core.records.fromfile(fd, formats='f8,i4,a5', shape=10,
     ... byteorder='<')
     >>> print(r[5])
@@ -758,47 +769,52 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
     (10,)
     """
 
+    if dtype is None and formats is None:
+        raise TypeError("fromfile() needs a 'dtype' or 'formats' argument")
+
     if (shape is None or shape == 0):
         shape = (-1,)
     elif isinstance(shape, (int, long)):
         shape = (shape,)
 
-    name = 0
-    if isinstance(fd, str):
-        name = 1
-        fd = open(fd, 'rb')
-    if (offset > 0):
-        fd.seek(offset, 1)
-    size = get_remaining_size(fd)
-
-    if dtype is not None:
-        descr = sb.dtype(dtype)
+    if isfileobj(fd):
+        # file already opened
+        ctx = contextlib_nullcontext(fd)
     else:
-        descr = format_parser(formats, names, titles, aligned, byteorder)._descr
+        # open file
+        ctx = open(os_fspath(fd), 'rb')
 
-    itemsize = descr.itemsize
+    with ctx as fd:
+        if (offset > 0):
+            fd.seek(offset, 1)
+        size = get_remaining_size(fd)
 
-    shapeprod = sb.array(shape).prod()
-    shapesize = shapeprod * itemsize
-    if shapesize < 0:
-        shape = list(shape)
-        shape[shape.index(-1)] = size / -shapesize
-        shape = tuple(shape)
-        shapeprod = sb.array(shape).prod()
+        if dtype is not None:
+            descr = sb.dtype(dtype)
+        else:
+            descr = format_parser(formats, names, titles, aligned, byteorder)._descr
 
-    nbytes = shapeprod * itemsize
+        itemsize = descr.itemsize
 
-    if nbytes > size:
-        raise ValueError(
-                "Not enough bytes left in file for specified shape and type")
+        shapeprod = sb.array(shape).prod(dtype=nt.intp)
+        shapesize = shapeprod * itemsize
+        if shapesize < 0:
+            shape = list(shape)
+            shape[shape.index(-1)] = size // -shapesize
+            shape = tuple(shape)
+            shapeprod = sb.array(shape).prod(dtype=nt.intp)
 
-    # create the array
-    _array = recarray(shape, descr)
-    nbytesread = fd.readinto(_array.data)
-    if nbytesread != nbytes:
-        raise IOError("Didn't read as many bytes as expected")
-    if name:
-        fd.close()
+        nbytes = shapeprod * itemsize
+
+        if nbytes > size:
+            raise ValueError(
+                    "Not enough bytes left in file for specified shape and type")
+
+        # create the array
+        _array = recarray(shape, descr)
+        nbytesread = fd.readinto(_array.data)
+        if nbytesread != nbytes:
+            raise IOError("Didn't read as many bytes as expected")
 
     return _array
author	Eric Wieser <wieser.eric@gmail.com>	2019-10-15 20:20:20 +0100
committer	GitHub <noreply@github.com>	2019-10-15 20:20:20 +0100
commit	10a7a4a815105e16828fe83fb89778c3bbafe692 (patch)
tree	2c73effc6bf4b8404e63564f78661caff034b255 /numpy/core/records.py
parent	d0731e118a5c40d866702f1b5da2be4d4f52ded9 (diff)
parent	83da5faca3a313c5d37226b86fa781956f8d162b (diff)
download	numpy-10a7a4a815105e16828fe83fb89778c3bbafe692.tar.gz