summaryrefslogtreecommitdiff
path: root/numpy/core/records.py
diff options
context:
space:
mode:
authorEric Wieser <wieser.eric@gmail.com>2019-10-15 20:20:20 +0100
committerGitHub <noreply@github.com>2019-10-15 20:20:20 +0100
commit10a7a4a815105e16828fe83fb89778c3bbafe692 (patch)
tree2c73effc6bf4b8404e63564f78661caff034b255 /numpy/core/records.py
parentd0731e118a5c40d866702f1b5da2be4d4f52ded9 (diff)
parent83da5faca3a313c5d37226b86fa781956f8d162b (diff)
downloadnumpy-10a7a4a815105e16828fe83fb89778c3bbafe692.tar.gz
Merge branch 'master' into master
Diffstat (limited to 'numpy/core/records.py')
-rw-r--r--numpy/core/records.py184
1 files changed, 100 insertions, 84 deletions
diff --git a/numpy/core/records.py b/numpy/core/records.py
index a483871ba..a1439f9df 100644
--- a/numpy/core/records.py
+++ b/numpy/core/records.py
@@ -7,10 +7,9 @@ Most commonly, ndarrays contain elements of a single type, e.g. floats,
integers, bools etc. However, it is possible for elements to be combinations
of these using structured types, such as::
- >>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', int), ('y', float)])
+ >>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', np.int64), ('y', np.float64)])
>>> a
- array([(1, 2.0), (1, 2.0)],
- dtype=[('x', '<i4'), ('y', '<f8')])
+ array([(1, 2.), (1, 2.)], dtype=[('x', '<i8'), ('y', '<f8')])
Here, each element consists of two fields: x (and int), and y (a float).
This is known as a structured array. The different fields are analogous
@@ -21,7 +20,7 @@ one would a dictionary::
array([1, 1])
>>> a['y']
- array([ 2., 2.])
+ array([2., 2.])
Record arrays allow us to access fields as properties::
@@ -31,7 +30,7 @@ Record arrays allow us to access fields as properties::
array([1, 1])
>>> ar.y
- array([ 2., 2.])
+ array([2., 2.])
"""
from __future__ import division, absolute_import, print_function
@@ -39,10 +38,14 @@ from __future__ import division, absolute_import, print_function
import sys
import os
import warnings
+from collections import Counter, OrderedDict
from . import numeric as sb
from . import numerictypes as nt
-from numpy.compat import isfileobj, bytes, long, unicode
+from numpy.compat import (
+ isfileobj, bytes, long, unicode, os_fspath, contextlib_nullcontext
+)
+from numpy.core.overrides import set_module
from .arrayprint import get_printoptions
# All of the functions allow formats to be a dtype
@@ -73,15 +76,28 @@ _byteorderconv = {'b':'>',
numfmt = nt.typeDict
+# taken from OrderedDict recipes in the Python documentation
+# https://docs.python.org/3.3/library/collections.html#ordereddict-examples-and-recipes
+class _OrderedCounter(Counter, OrderedDict):
+ """Counter that remembers the order elements are first encountered"""
+
+ def __repr__(self):
+ return '%s(%r)' % (self.__class__.__name__, OrderedDict(self))
+
+ def __reduce__(self):
+ return self.__class__, (OrderedDict(self),)
+
+
def find_duplicate(list):
"""Find duplication in a list, return a list of duplicated elements"""
- dup = []
- for i in range(len(list)):
- if (list[i] in list[i + 1:]):
- if (list[i] not in dup):
- dup.append(list[i])
- return dup
+ return [
+ item
+ for item, counts in _OrderedCounter(list).items()
+ if counts > 1
+ ]
+
+@set_module('numpy')
class format_parser(object):
"""
Class to convert formats, names, titles description to a dtype.
@@ -125,10 +141,9 @@ class format_parser(object):
Examples
--------
- >>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
+ >>> np.format_parser(['<f8', '<i4', '<a5'], ['col1', 'col2', 'col3'],
... ['T1', 'T2', 'T3']).dtype
- dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4'),
- (('T3', 'col3'), '|S5')])
+ dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4'), (('T3', 'col3'), 'S5')])
`names` and/or `titles` can be empty lists. If `titles` is an empty list,
titles will simply not appear. If `names` is empty, default field names
@@ -136,9 +151,9 @@ class format_parser(object):
>>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
... []).dtype
- dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '|S5')])
- >>> np.format_parser(['f8', 'i4', 'a5'], [], []).dtype
- dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', '|S5')])
+ dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '<S5')])
+ >>> np.format_parser(['<f8', '<i4', '<a5'], [], []).dtype
+ dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', 'S5')])
"""
@@ -148,16 +163,18 @@ class format_parser(object):
self._createdescr(byteorder)
self.dtype = self._descr
- def _parseFormats(self, formats, aligned=0):
+ def _parseFormats(self, formats, aligned=False):
""" Parse the field formats """
if formats is None:
raise ValueError("Need formats argument")
if isinstance(formats, list):
- if len(formats) < 2:
- formats.append('')
- formats = ','.join(formats)
- dtype = sb.dtype(formats, aligned)
+ dtype = sb.dtype(
+ [('f{}'.format(i), format_) for i, format_ in enumerate(formats)],
+ aligned,
+ )
+ else:
+ dtype = sb.dtype(formats, aligned)
fields = dtype.fields
if fields is None:
dtype = sb.dtype([('f1', dtype)], aligned)
@@ -251,8 +268,8 @@ class record(nt.void):
except AttributeError:
#happens if field is Object type
return obj
- if dt.fields:
- return obj.view((self.__class__, obj.dtype.fields))
+ if dt.names is not None:
+ return obj.view((self.__class__, obj.dtype))
return obj
else:
raise AttributeError("'record' object has no "
@@ -276,8 +293,8 @@ class record(nt.void):
obj = nt.void.__getitem__(self, indx)
# copy behavior of record.__getattribute__,
- if isinstance(obj, nt.void) and obj.dtype.fields:
- return obj.view((self.__class__, obj.dtype.fields))
+ if isinstance(obj, nt.void) and obj.dtype.names is not None:
+ return obj.view((self.__class__, obj.dtype))
else:
# return a single element
return obj
@@ -287,10 +304,8 @@ class record(nt.void):
# pretty-print all fields
names = self.dtype.names
maxlen = max(len(name) for name in names)
- rows = []
fmt = '%% %ds: %%s' % maxlen
- for name in names:
- rows.append(fmt % (name, getattr(self, name)))
+ rows = [fmt % (name, getattr(self, name)) for name in names]
return "\n".join(rows)
# The recarray is almost identical to a standard array (which supports
@@ -379,20 +394,19 @@ class recarray(ndarray):
--------
Create an array with two fields, ``x`` and ``y``:
- >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', float), ('y', int)])
+ >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', '<f8'), ('y', '<i8')])
>>> x
- array([(1.0, 2), (3.0, 4)],
- dtype=[('x', '<f8'), ('y', '<i4')])
+ array([(1., 2), (3., 4)], dtype=[('x', '<f8'), ('y', '<i8')])
>>> x['x']
- array([ 1., 3.])
+ array([1., 3.])
View the array as a record array:
>>> x = x.view(np.recarray)
>>> x.x
- array([ 1., 3.])
+ array([1., 3.])
>>> x.y
array([2, 4])
@@ -430,7 +444,7 @@ class recarray(ndarray):
return self
def __array_finalize__(self, obj):
- if self.dtype.type is not record and self.dtype.fields:
+ if self.dtype.type is not record and self.dtype.names is not None:
# if self.dtype is not np.record, invoke __setattr__ which will
# convert it to a record if it is a void dtype.
self.dtype = self.dtype
@@ -458,7 +472,7 @@ class recarray(ndarray):
# with void type convert it to the same dtype.type (eg to preserve
# numpy.record type if present), since nested structured fields do not
# inherit type. Don't do this for non-void structures though.
- if obj.dtype.fields:
+ if obj.dtype.names is not None:
if issubclass(obj.dtype.type, nt.void):
return obj.view(dtype=(self.dtype.type, obj.dtype))
return obj
@@ -473,7 +487,7 @@ class recarray(ndarray):
# Automatically convert (void) structured types to records
# (but not non-void structures, subarrays, or non-structured voids)
- if attr == 'dtype' and issubclass(val.type, nt.void) and val.fields:
+ if attr == 'dtype' and issubclass(val.type, nt.void) and val.names is not None:
val = sb.dtype((record, val))
newattr = attr not in self.__dict__
@@ -507,7 +521,7 @@ class recarray(ndarray):
# copy behavior of getattr, except that here
# we might also be returning a single element
if isinstance(obj, ndarray):
- if obj.dtype.fields:
+ if obj.dtype.names is not None:
obj = obj.view(type(self))
if issubclass(obj.dtype.type, nt.void):
return obj.view(dtype=(self.dtype.type, obj.dtype))
@@ -563,7 +577,7 @@ class recarray(ndarray):
if val is None:
obj = self.getfield(*res)
- if obj.dtype.fields:
+ if obj.dtype.names is not None:
return obj
return obj.view(ndarray)
else:
@@ -579,7 +593,7 @@ def fromarrays(arrayList, dtype=None, shape=None, formats=None,
>>> x3=np.array([1.1,2,3,4])
>>> r = np.core.records.fromarrays([x1,x2,x3],names='a,b,c')
>>> print(r[1])
- (2, 'dd', 2.0)
+ (2, 'dd', 2.0) # may vary
>>> x1[1]=34
>>> r.a
array([1, 2, 3, 4])
@@ -598,10 +612,7 @@ def fromarrays(arrayList, dtype=None, shape=None, formats=None,
# and determine the formats.
formats = []
for obj in arrayList:
- if not isinstance(obj, ndarray):
- raise ValueError("item in the array list must be an ndarray.")
- formats.append(obj.dtype.str)
- formats = ','.join(formats)
+ formats.append(obj.dtype)
if dtype is not None:
descr = sb.dtype(dtype)
@@ -658,11 +669,11 @@ def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
>>> r.col1
array([456, 2])
>>> r.col2
- array(['dbe', 'de'],
- dtype='|S3')
+ array(['dbe', 'de'], dtype='<U3')
>>> import pickle
- >>> print(pickle.loads(pickle.dumps(r)))
- [(456, 'dbe', 1.2) (2, 'de', 1.3)]
+ >>> pickle.loads(pickle.dumps(r))
+ rec.array([(456, 'dbe', 1.2), ( 2, 'de', 1.3)],
+ dtype=[('col1', '<i8'), ('col2', '<U3'), ('col3', '<f8')])
"""
if formats is None and dtype is None: # slower
@@ -710,7 +721,7 @@ def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None,
a string"""
if dtype is None and formats is None:
- raise ValueError("Must have dtype= or formats=")
+ raise TypeError("fromstring() needs a 'dtype' or 'formats' argument")
if dtype is not None:
descr = sb.dtype(dtype)
@@ -737,9 +748,9 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
names=None, titles=None, aligned=False, byteorder=None):
"""Create an array from binary file data
- If file is a string then that file is opened, else it is assumed
- to be a file object. The file object must support random access
- (i.e. it must have tell and seek methods).
+ If file is a string or a path-like object then that file is opened,
+ else it is assumed to be a file object. The file object must
+ support random access (i.e. it must have tell and seek methods).
>>> from tempfile import TemporaryFile
>>> a = np.empty(10,dtype='f8,i4,a5')
@@ -749,7 +760,7 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
>>> a = a.newbyteorder('<')
>>> a.tofile(fd)
>>>
- >>> fd.seek(0)
+ >>> _ = fd.seek(0)
>>> r=np.core.records.fromfile(fd, formats='f8,i4,a5', shape=10,
... byteorder='<')
>>> print(r[5])
@@ -758,47 +769,52 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
(10,)
"""
+ if dtype is None and formats is None:
+ raise TypeError("fromfile() needs a 'dtype' or 'formats' argument")
+
if (shape is None or shape == 0):
shape = (-1,)
elif isinstance(shape, (int, long)):
shape = (shape,)
- name = 0
- if isinstance(fd, str):
- name = 1
- fd = open(fd, 'rb')
- if (offset > 0):
- fd.seek(offset, 1)
- size = get_remaining_size(fd)
-
- if dtype is not None:
- descr = sb.dtype(dtype)
+ if isfileobj(fd):
+ # file already opened
+ ctx = contextlib_nullcontext(fd)
else:
- descr = format_parser(formats, names, titles, aligned, byteorder)._descr
+ # open file
+ ctx = open(os_fspath(fd), 'rb')
- itemsize = descr.itemsize
+ with ctx as fd:
+ if (offset > 0):
+ fd.seek(offset, 1)
+ size = get_remaining_size(fd)
- shapeprod = sb.array(shape).prod()
- shapesize = shapeprod * itemsize
- if shapesize < 0:
- shape = list(shape)
- shape[shape.index(-1)] = size / -shapesize
- shape = tuple(shape)
- shapeprod = sb.array(shape).prod()
+ if dtype is not None:
+ descr = sb.dtype(dtype)
+ else:
+ descr = format_parser(formats, names, titles, aligned, byteorder)._descr
- nbytes = shapeprod * itemsize
+ itemsize = descr.itemsize
- if nbytes > size:
- raise ValueError(
- "Not enough bytes left in file for specified shape and type")
+ shapeprod = sb.array(shape).prod(dtype=nt.intp)
+ shapesize = shapeprod * itemsize
+ if shapesize < 0:
+ shape = list(shape)
+ shape[shape.index(-1)] = size // -shapesize
+ shape = tuple(shape)
+ shapeprod = sb.array(shape).prod(dtype=nt.intp)
- # create the array
- _array = recarray(shape, descr)
- nbytesread = fd.readinto(_array.data)
- if nbytesread != nbytes:
- raise IOError("Didn't read as many bytes as expected")
- if name:
- fd.close()
+ nbytes = shapeprod * itemsize
+
+ if nbytes > size:
+ raise ValueError(
+ "Not enough bytes left in file for specified shape and type")
+
+ # create the array
+ _array = recarray(shape, descr)
+ nbytesread = fd.readinto(_array.data)
+ if nbytesread != nbytes:
+ raise IOError("Didn't read as many bytes as expected")
return _array