diff options
author | Eric Wieser <wieser.eric@gmail.com> | 2019-10-15 20:20:20 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-10-15 20:20:20 +0100 |
commit | 10a7a4a815105e16828fe83fb89778c3bbafe692 (patch) | |
tree | 2c73effc6bf4b8404e63564f78661caff034b255 /numpy/core/records.py | |
parent | d0731e118a5c40d866702f1b5da2be4d4f52ded9 (diff) | |
parent | 83da5faca3a313c5d37226b86fa781956f8d162b (diff) | |
download | numpy-10a7a4a815105e16828fe83fb89778c3bbafe692.tar.gz |
Merge branch 'master' into master
Diffstat (limited to 'numpy/core/records.py')
-rw-r--r-- | numpy/core/records.py | 184 |
1 files changed, 100 insertions, 84 deletions
diff --git a/numpy/core/records.py b/numpy/core/records.py index a483871ba..a1439f9df 100644 --- a/numpy/core/records.py +++ b/numpy/core/records.py @@ -7,10 +7,9 @@ Most commonly, ndarrays contain elements of a single type, e.g. floats, integers, bools etc. However, it is possible for elements to be combinations of these using structured types, such as:: - >>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', int), ('y', float)]) + >>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', np.int64), ('y', np.float64)]) >>> a - array([(1, 2.0), (1, 2.0)], - dtype=[('x', '<i4'), ('y', '<f8')]) + array([(1, 2.), (1, 2.)], dtype=[('x', '<i8'), ('y', '<f8')]) Here, each element consists of two fields: x (and int), and y (a float). This is known as a structured array. The different fields are analogous @@ -21,7 +20,7 @@ one would a dictionary:: array([1, 1]) >>> a['y'] - array([ 2., 2.]) + array([2., 2.]) Record arrays allow us to access fields as properties:: @@ -31,7 +30,7 @@ Record arrays allow us to access fields as properties:: array([1, 1]) >>> ar.y - array([ 2., 2.]) + array([2., 2.]) """ from __future__ import division, absolute_import, print_function @@ -39,10 +38,14 @@ from __future__ import division, absolute_import, print_function import sys import os import warnings +from collections import Counter, OrderedDict from . import numeric as sb from . import numerictypes as nt -from numpy.compat import isfileobj, bytes, long, unicode +from numpy.compat import ( + isfileobj, bytes, long, unicode, os_fspath, contextlib_nullcontext +) +from numpy.core.overrides import set_module from .arrayprint import get_printoptions # All of the functions allow formats to be a dtype @@ -73,15 +76,28 @@ _byteorderconv = {'b':'>', numfmt = nt.typeDict +# taken from OrderedDict recipes in the Python documentation +# https://docs.python.org/3.3/library/collections.html#ordereddict-examples-and-recipes +class _OrderedCounter(Counter, OrderedDict): + """Counter that remembers the order elements are first encountered""" + + def __repr__(self): + return '%s(%r)' % (self.__class__.__name__, OrderedDict(self)) + + def __reduce__(self): + return self.__class__, (OrderedDict(self),) + + def find_duplicate(list): """Find duplication in a list, return a list of duplicated elements""" - dup = [] - for i in range(len(list)): - if (list[i] in list[i + 1:]): - if (list[i] not in dup): - dup.append(list[i]) - return dup + return [ + item + for item, counts in _OrderedCounter(list).items() + if counts > 1 + ] + +@set_module('numpy') class format_parser(object): """ Class to convert formats, names, titles description to a dtype. @@ -125,10 +141,9 @@ class format_parser(object): Examples -------- - >>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'], + >>> np.format_parser(['<f8', '<i4', '<a5'], ['col1', 'col2', 'col3'], ... ['T1', 'T2', 'T3']).dtype - dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4'), - (('T3', 'col3'), '|S5')]) + dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4'), (('T3', 'col3'), 'S5')]) `names` and/or `titles` can be empty lists. If `titles` is an empty list, titles will simply not appear. If `names` is empty, default field names @@ -136,9 +151,9 @@ class format_parser(object): >>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'], ... []).dtype - dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '|S5')]) - >>> np.format_parser(['f8', 'i4', 'a5'], [], []).dtype - dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', '|S5')]) + dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '<S5')]) + >>> np.format_parser(['<f8', '<i4', '<a5'], [], []).dtype + dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', 'S5')]) """ @@ -148,16 +163,18 @@ class format_parser(object): self._createdescr(byteorder) self.dtype = self._descr - def _parseFormats(self, formats, aligned=0): + def _parseFormats(self, formats, aligned=False): """ Parse the field formats """ if formats is None: raise ValueError("Need formats argument") if isinstance(formats, list): - if len(formats) < 2: - formats.append('') - formats = ','.join(formats) - dtype = sb.dtype(formats, aligned) + dtype = sb.dtype( + [('f{}'.format(i), format_) for i, format_ in enumerate(formats)], + aligned, + ) + else: + dtype = sb.dtype(formats, aligned) fields = dtype.fields if fields is None: dtype = sb.dtype([('f1', dtype)], aligned) @@ -251,8 +268,8 @@ class record(nt.void): except AttributeError: #happens if field is Object type return obj - if dt.fields: - return obj.view((self.__class__, obj.dtype.fields)) + if dt.names is not None: + return obj.view((self.__class__, obj.dtype)) return obj else: raise AttributeError("'record' object has no " @@ -276,8 +293,8 @@ class record(nt.void): obj = nt.void.__getitem__(self, indx) # copy behavior of record.__getattribute__, - if isinstance(obj, nt.void) and obj.dtype.fields: - return obj.view((self.__class__, obj.dtype.fields)) + if isinstance(obj, nt.void) and obj.dtype.names is not None: + return obj.view((self.__class__, obj.dtype)) else: # return a single element return obj @@ -287,10 +304,8 @@ class record(nt.void): # pretty-print all fields names = self.dtype.names maxlen = max(len(name) for name in names) - rows = [] fmt = '%% %ds: %%s' % maxlen - for name in names: - rows.append(fmt % (name, getattr(self, name))) + rows = [fmt % (name, getattr(self, name)) for name in names] return "\n".join(rows) # The recarray is almost identical to a standard array (which supports @@ -379,20 +394,19 @@ class recarray(ndarray): -------- Create an array with two fields, ``x`` and ``y``: - >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', float), ('y', int)]) + >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', '<f8'), ('y', '<i8')]) >>> x - array([(1.0, 2), (3.0, 4)], - dtype=[('x', '<f8'), ('y', '<i4')]) + array([(1., 2), (3., 4)], dtype=[('x', '<f8'), ('y', '<i8')]) >>> x['x'] - array([ 1., 3.]) + array([1., 3.]) View the array as a record array: >>> x = x.view(np.recarray) >>> x.x - array([ 1., 3.]) + array([1., 3.]) >>> x.y array([2, 4]) @@ -430,7 +444,7 @@ class recarray(ndarray): return self def __array_finalize__(self, obj): - if self.dtype.type is not record and self.dtype.fields: + if self.dtype.type is not record and self.dtype.names is not None: # if self.dtype is not np.record, invoke __setattr__ which will # convert it to a record if it is a void dtype. self.dtype = self.dtype @@ -458,7 +472,7 @@ class recarray(ndarray): # with void type convert it to the same dtype.type (eg to preserve # numpy.record type if present), since nested structured fields do not # inherit type. Don't do this for non-void structures though. - if obj.dtype.fields: + if obj.dtype.names is not None: if issubclass(obj.dtype.type, nt.void): return obj.view(dtype=(self.dtype.type, obj.dtype)) return obj @@ -473,7 +487,7 @@ class recarray(ndarray): # Automatically convert (void) structured types to records # (but not non-void structures, subarrays, or non-structured voids) - if attr == 'dtype' and issubclass(val.type, nt.void) and val.fields: + if attr == 'dtype' and issubclass(val.type, nt.void) and val.names is not None: val = sb.dtype((record, val)) newattr = attr not in self.__dict__ @@ -507,7 +521,7 @@ class recarray(ndarray): # copy behavior of getattr, except that here # we might also be returning a single element if isinstance(obj, ndarray): - if obj.dtype.fields: + if obj.dtype.names is not None: obj = obj.view(type(self)) if issubclass(obj.dtype.type, nt.void): return obj.view(dtype=(self.dtype.type, obj.dtype)) @@ -563,7 +577,7 @@ class recarray(ndarray): if val is None: obj = self.getfield(*res) - if obj.dtype.fields: + if obj.dtype.names is not None: return obj return obj.view(ndarray) else: @@ -579,7 +593,7 @@ def fromarrays(arrayList, dtype=None, shape=None, formats=None, >>> x3=np.array([1.1,2,3,4]) >>> r = np.core.records.fromarrays([x1,x2,x3],names='a,b,c') >>> print(r[1]) - (2, 'dd', 2.0) + (2, 'dd', 2.0) # may vary >>> x1[1]=34 >>> r.a array([1, 2, 3, 4]) @@ -598,10 +612,7 @@ def fromarrays(arrayList, dtype=None, shape=None, formats=None, # and determine the formats. formats = [] for obj in arrayList: - if not isinstance(obj, ndarray): - raise ValueError("item in the array list must be an ndarray.") - formats.append(obj.dtype.str) - formats = ','.join(formats) + formats.append(obj.dtype) if dtype is not None: descr = sb.dtype(dtype) @@ -658,11 +669,11 @@ def fromrecords(recList, dtype=None, shape=None, formats=None, names=None, >>> r.col1 array([456, 2]) >>> r.col2 - array(['dbe', 'de'], - dtype='|S3') + array(['dbe', 'de'], dtype='<U3') >>> import pickle - >>> print(pickle.loads(pickle.dumps(r))) - [(456, 'dbe', 1.2) (2, 'de', 1.3)] + >>> pickle.loads(pickle.dumps(r)) + rec.array([(456, 'dbe', 1.2), ( 2, 'de', 1.3)], + dtype=[('col1', '<i8'), ('col2', '<U3'), ('col3', '<f8')]) """ if formats is None and dtype is None: # slower @@ -710,7 +721,7 @@ def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None, a string""" if dtype is None and formats is None: - raise ValueError("Must have dtype= or formats=") + raise TypeError("fromstring() needs a 'dtype' or 'formats' argument") if dtype is not None: descr = sb.dtype(dtype) @@ -737,9 +748,9 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None, names=None, titles=None, aligned=False, byteorder=None): """Create an array from binary file data - If file is a string then that file is opened, else it is assumed - to be a file object. The file object must support random access - (i.e. it must have tell and seek methods). + If file is a string or a path-like object then that file is opened, + else it is assumed to be a file object. The file object must + support random access (i.e. it must have tell and seek methods). >>> from tempfile import TemporaryFile >>> a = np.empty(10,dtype='f8,i4,a5') @@ -749,7 +760,7 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None, >>> a = a.newbyteorder('<') >>> a.tofile(fd) >>> - >>> fd.seek(0) + >>> _ = fd.seek(0) >>> r=np.core.records.fromfile(fd, formats='f8,i4,a5', shape=10, ... byteorder='<') >>> print(r[5]) @@ -758,47 +769,52 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None, (10,) """ + if dtype is None and formats is None: + raise TypeError("fromfile() needs a 'dtype' or 'formats' argument") + if (shape is None or shape == 0): shape = (-1,) elif isinstance(shape, (int, long)): shape = (shape,) - name = 0 - if isinstance(fd, str): - name = 1 - fd = open(fd, 'rb') - if (offset > 0): - fd.seek(offset, 1) - size = get_remaining_size(fd) - - if dtype is not None: - descr = sb.dtype(dtype) + if isfileobj(fd): + # file already opened + ctx = contextlib_nullcontext(fd) else: - descr = format_parser(formats, names, titles, aligned, byteorder)._descr + # open file + ctx = open(os_fspath(fd), 'rb') - itemsize = descr.itemsize + with ctx as fd: + if (offset > 0): + fd.seek(offset, 1) + size = get_remaining_size(fd) - shapeprod = sb.array(shape).prod() - shapesize = shapeprod * itemsize - if shapesize < 0: - shape = list(shape) - shape[shape.index(-1)] = size / -shapesize - shape = tuple(shape) - shapeprod = sb.array(shape).prod() + if dtype is not None: + descr = sb.dtype(dtype) + else: + descr = format_parser(formats, names, titles, aligned, byteorder)._descr - nbytes = shapeprod * itemsize + itemsize = descr.itemsize - if nbytes > size: - raise ValueError( - "Not enough bytes left in file for specified shape and type") + shapeprod = sb.array(shape).prod(dtype=nt.intp) + shapesize = shapeprod * itemsize + if shapesize < 0: + shape = list(shape) + shape[shape.index(-1)] = size // -shapesize + shape = tuple(shape) + shapeprod = sb.array(shape).prod(dtype=nt.intp) - # create the array - _array = recarray(shape, descr) - nbytesread = fd.readinto(_array.data) - if nbytesread != nbytes: - raise IOError("Didn't read as many bytes as expected") - if name: - fd.close() + nbytes = shapeprod * itemsize + + if nbytes > size: + raise ValueError( + "Not enough bytes left in file for specified shape and type") + + # create the array + _array = recarray(shape, descr) + nbytesread = fd.readinto(_array.data) + if nbytesread != nbytes: + raise IOError("Didn't read as many bytes as expected") return _array |