1 files changed, 148 insertions, 192 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index d12482cb7..b8b3fe877 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -5,7 +5,7 @@ import itertools
 import warnings
 import weakref
 import contextlib
-from operator import itemgetter, index as opindex
+from operator import itemgetter, index as opindex, methodcaller
 from collections.abc import Mapping
 
 import numpy as np
@@ -14,7 +14,6 @@ from ._datasource import DataSource
 from numpy.core import overrides
 from numpy.core.multiarray import packbits, unpackbits
 from numpy.core.overrides import set_array_function_like_doc, set_module
-from numpy.core._internal import recursive
 from ._iotools import (
     LineSplitter, NameValidator, StringConverter, ConverterError,
     ConverterLockError, ConversionWarning, _is_string_like,
@@ -27,18 +26,9 @@ from numpy.compat import (
     )
 
 
-@set_module('numpy')
-def loads(*args, **kwargs):
-    # NumPy 1.15.0, 2017-12-10
-    warnings.warn(
-        "np.loads is deprecated, use pickle.loads instead",
-        DeprecationWarning, stacklevel=2)
-    return pickle.loads(*args, **kwargs)
-
-
 __all__ = [
-    'savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt',
-    'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez',
+    'savetxt', 'loadtxt', 'genfromtxt',
+    'recfromtxt', 'recfromcsv', 'load', 'save', 'savez',
     'savez_compressed', 'packbits', 'unpackbits', 'fromregex', 'DataSource'
     ]
 
@@ -729,36 +719,100 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
     zipf.close()
 
 
+def _floatconv(x):
+    try:
+        return float(x)  # The fastest path.
+    except ValueError:
+        if '0x' in x:  # Don't accidentally convert "a" ("0xa") to 10.
+            try:
+                return float.fromhex(x)
+            except ValueError:
+                pass
+        raise  # Raise the original exception, which makes more sense.
+
+
+_CONVERTERS = [  # These converters only ever get strs (not bytes) as input.
+    (np.bool_, lambda x: bool(int(x))),
+    (np.uint64, np.uint64),
+    (np.int64, np.int64),
+    (np.integer, lambda x: int(float(x))),
+    (np.longdouble, np.longdouble),
+    (np.floating, _floatconv),
+    (complex, lambda x: complex(x.replace('+-', '-'))),
+    (np.bytes_, methodcaller('encode', 'latin-1')),
+    (np.unicode_, str),
+]
+
+
 def _getconv(dtype):
-    """ Find the correct dtype converter. Adapted from matplotlib """
-
-    def floatconv(x):
-        x.lower()
-        if '0x' in x:
-            return float.fromhex(x)
-        return float(x)
-
-    typ = dtype.type
-    if issubclass(typ, np.bool_):
-        return lambda x: bool(int(x))
-    if issubclass(typ, np.uint64):
-        return np.uint64
-    if issubclass(typ, np.int64):
-        return np.int64
-    if issubclass(typ, np.integer):
-        return lambda x: int(float(x))
-    elif issubclass(typ, np.longdouble):
-        return np.longdouble
-    elif issubclass(typ, np.floating):
-        return floatconv
-    elif issubclass(typ, complex):
-        return lambda x: complex(asstr(x).replace('+-', '-'))
-    elif issubclass(typ, np.bytes_):
-        return asbytes
-    elif issubclass(typ, np.unicode_):
-        return asunicode
+    """
+    Find the correct dtype converter. Adapted from matplotlib.
+
+    Even when a lambda is returned, it is defined at the toplevel, to allow
+    testing for equality and enabling optimization for single-type data.
+    """
+    for base, conv in _CONVERTERS:
+        if issubclass(dtype.type, base):
+            return conv
+    return str
+
+
+# _loadtxt_flatten_dtype_internal and _loadtxt_pack_items are loadtxt helpers
+# lifted to the toplevel because recursive inner functions cause either
+# GC-dependent reference loops (because they are closures over loadtxt's
+# internal variables) or large overheads if using a manual trampoline to hide
+# the recursive calls.
+
+
+# not to be confused with the flatten_dtype we import...
+def _loadtxt_flatten_dtype_internal(dt):
+    """Unpack a structured data-type, and produce a packer function."""
+    if dt.names is None:
+        # If the dtype is flattened, return.
+        # If the dtype has a shape, the dtype occurs
+        # in the list more than once.
+        shape = dt.shape
+        if len(shape) == 0:
+            return ([dt.base], None)
+        else:
+            packing = [(shape[-1], list)]
+            if len(shape) > 1:
+                for dim in dt.shape[-2::-1]:
+                    packing = [(dim*packing[0][0], packing*dim)]
+            return ([dt.base] * int(np.prod(dt.shape)),
+                    functools.partial(_loadtxt_pack_items, packing))
+    else:
+        types = []
+        packing = []
+        for field in dt.names:
+            tp, bytes = dt.fields[field]
+            flat_dt, flat_packer = _loadtxt_flatten_dtype_internal(tp)
+            types.extend(flat_dt)
+            flat_packing = flat_packer.args[0] if flat_packer else None
+            # Avoid extra nesting for subarrays
+            if tp.ndim > 0:
+                packing.extend(flat_packing)
+            else:
+                packing.append((len(flat_dt), flat_packing))
+        return (types, functools.partial(_loadtxt_pack_items, packing))
+
+
+def _loadtxt_pack_items(packing, items):
+    """Pack items into nested lists based on re-packing info."""
+    if packing is None:
+        return items[0]
+    elif packing is tuple:
+        return tuple(items)
+    elif packing is list:
+        return list(items)
     else:
-        return asstr
+        start = 0
+        ret = []
+        for length, subpacking in packing:
+            ret.append(
+                _loadtxt_pack_items(subpacking, items[start:start+length]))
+            start += length
+        return tuple(ret)
 
 
 # amount of lines loadtxt reads in one chunk, can be overridden for testing
@@ -783,10 +837,11 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
 
     Parameters
     ----------
-    fname : file, str, or pathlib.Path
-        File, filename, or generator to read.  If the filename extension is
-        ``.gz`` or ``.bz2``, the file is first decompressed. Note that
-        generators should return byte strings.
+    fname : file, str, pathlib.Path, list of str, generator
+        File, filename, list, or generator to read.  If the filename
+        extension is ``.gz`` or ``.bz2``, the file is first decompressed. Note
+        that generators must return bytes or strings. The strings
+        in a list or produced by a generator are treated as lines.
     dtype : data-type, optional
         Data-type of the resulting array; default: float.  If this is a
         structured data-type, the resulting array will be 1-dimensional, and
@@ -915,60 +970,11 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
     # Nested functions used by loadtxt.
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
-    # not to be confused with the flatten_dtype we import...
-    @recursive
-    def flatten_dtype_internal(self, dt):
-        """Unpack a structured data-type, and produce re-packing info."""
-        if dt.names is None:
-            # If the dtype is flattened, return.
-            # If the dtype has a shape, the dtype occurs
-            # in the list more than once.
-            shape = dt.shape
-            if len(shape) == 0:
-                return ([dt.base], None)
-            else:
-                packing = [(shape[-1], list)]
-                if len(shape) > 1:
-                    for dim in dt.shape[-2::-1]:
-                        packing = [(dim*packing[0][0], packing*dim)]
-                return ([dt.base] * int(np.prod(dt.shape)), packing)
-        else:
-            types = []
-            packing = []
-            for field in dt.names:
-                tp, bytes = dt.fields[field]
-                flat_dt, flat_packing = self(tp)
-                types.extend(flat_dt)
-                # Avoid extra nesting for subarrays
-                if tp.ndim > 0:
-                    packing.extend(flat_packing)
-                else:
-                    packing.append((len(flat_dt), flat_packing))
-            return (types, packing)
-
-    @recursive
-    def pack_items(self, items, packing):
-        """Pack items into nested lists based on re-packing info."""
-        if packing is None:
-            return items[0]
-        elif packing is tuple:
-            return tuple(items)
-        elif packing is list:
-            return list(items)
-        else:
-            start = 0
-            ret = []
-            for length, subpacking in packing:
-                ret.append(self(items[start:start+length], subpacking))
-                start += length
-            return tuple(ret)
-
     def split_line(line):
         """Chop off comments, strip, and split at delimiter. """
         line = _decode_line(line, encoding=encoding)
-
-        if comments is not None:
-            line = regex_comments.split(line, maxsplit=1)[0]
+        for comment in comments:  # Much faster than using a single regex.
+            line = line.split(comment, 1)[0]
         line = line.strip('\r\n')
         return line.split(delimiter) if line else []
 
@@ -993,16 +999,13 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
                 continue
             if usecols:
                 vals = [vals[j] for j in usecols]
-            if len(vals) != N:
+            if len(vals) != ncols:
                 line_num = i + skiprows + 1
                 raise ValueError("Wrong number of columns at line %d"
                                  % line_num)
-
-            # Convert each value according to its column and store
-            items = [conv(val) for (conv, val) in zip(converters, vals)]
-
-            # Then pack it according to the dtype's nesting
-            items = pack_items(items, packing)
+            # Convert each value according to its column, then pack it
+            # according to the dtype's nesting
+            items = packer(convert_row(vals))
             X.append(items)
             if len(X) > chunk_size:
                 yield X
@@ -1023,9 +1026,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         if isinstance(comments, (str, bytes)):
             comments = [comments]
         comments = [_decode_line(x) for x in comments]
-        # Compile regex for comments beforehand
-        comments = (re.escape(comment) for comment in comments)
-        regex_comments = re.compile('|'.join(comments))
+    else:
+        comments = []
 
     if delimiter is not None:
         delimiter = _decode_line(delimiter)
@@ -1060,7 +1062,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
     dtype = np.dtype(dtype)
     defconv = _getconv(dtype)
 
-    dtype_types, packing = flatten_dtype_internal(dtype)
+    dtype_types, packer = _loadtxt_flatten_dtype_internal(dtype)
 
     fown = False
     try:
@@ -1076,7 +1078,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             fencoding = getattr(fname, 'encoding', 'latin1')
     except TypeError as e:
         raise ValueError(
-            'fname must be a string, file handle, or generator'
+            f"fname must be a string, filehandle, list of strings,\n"
+            f"or generator. Got {type(fname)} instead."
         ) from e
 
     # input may be a python2 io stream
@@ -1093,32 +1096,32 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         for i in range(skiprows):
             next(fh)
 
-        # Read until we find a line with some values, and use
-        # it to estimate the number of columns, N.
-        first_vals = None
-        try:
-            while not first_vals:
-                first_line = next(fh)
-                first_vals = split_line(first_line)
-        except StopIteration:
-            # End of lines reached
+        # Read until we find a line with some values, and use it to determine
+        # the need for decoding and estimate the number of columns.
+        for first_line in fh:
+            ncols = len(usecols or split_line(first_line))
+            if ncols:
+                break
+        else:  # End of lines reached
             first_line = ''
-            first_vals = []
+            ncols = len(usecols or [])
             warnings.warn('loadtxt: Empty input file: "%s"' % fname,
                           stacklevel=2)
-        N = len(usecols or first_vals)
 
-        # Now that we know N, create the default converters list, and
+        # Now that we know ncols, create the default converters list, and
         # set packing, if necessary.
         if len(dtype_types) > 1:
             # We're dealing with a structured array, each field of
             # the dtype matches a column
             converters = [_getconv(dt) for dt in dtype_types]
         else:
-            # All fields have the same dtype
-            converters = [defconv for i in range(N)]
-            if N > 1:
-                packing = [(N, tuple)]
+            # All fields have the same dtype; use specialized packers which are
+            # much faster than those using _loadtxt_pack_items.
+            converters = [defconv for i in range(ncols)]
+            if ncols == 1:
+                packer = itemgetter(0)
+            else:
+                def packer(row): return row
 
         # By preference, use the converters specified by the user
         for i, conv in (user_converters or {}).items():
@@ -1130,18 +1133,26 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
                     continue
             if byte_converters:
                 # converters may use decode to workaround numpy's old
-                # behaviour, so encode the string again before passing to
-                # the user converter
-                def tobytes_first(x, conv):
-                    if type(x) is bytes:
-                        return conv(x)
+                # behaviour, so encode the string again (converters are only
+                # called with strings) before passing to the user converter.
+                def tobytes_first(conv, x):
                     return conv(x.encode("latin1"))
-                converters[i] = functools.partial(tobytes_first, conv=conv)
+                converters[i] = functools.partial(tobytes_first, conv)
             else:
                 converters[i] = conv
 
-        converters = [conv if conv is not bytes else
-                      lambda x: x.encode(fencoding) for conv in converters]
+        fencode = methodcaller("encode", fencoding)
+        converters = [conv if conv is not bytes else fencode
+                      for conv in converters]
+        if len(set(converters)) == 1:
+            # Optimize single-type data. Note that this is only reached if
+            # `_getconv` returns equal callables (i.e. not local lambdas) on
+            # equal dtypes.
+            def convert_row(vals, _conv=converters[0]):
+                return [*map(_conv, vals)]
+        else:
+            def convert_row(vals):
+                return [conv(val) for conv, val in zip(converters, vals)]
 
         # read data in chunks and fill it into an array via resize
         # over-allocating and shrinking the array later may be faster but is
@@ -1581,8 +1592,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
     ----------
     fname : file, str, pathlib.Path, list of str, generator
         File, filename, list, or generator to read.  If the filename
-        extension is `.gz` or `.bz2`, the file is first decompressed. Note
-        that generators must return byte strings. The strings
+        extension is ``.gz`` or ``.bz2``, the file is first decompressed. Note
+        that generators must return bytes or strings. The strings
         in a list or produced by a generator are treated as lines.
     dtype : dtype, optional
         Data type of the resulting array.
@@ -1801,8 +1812,9 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
         fhd = iter(fid)
     except TypeError as e:
         raise TypeError(
-            "fname must be a string, filehandle, list of strings, "
-            "or generator. Got %s instead." % type(fname)) from e
+            f"fname must be a string, filehandle, list of strings,\n"
+            f"or generator. Got {type(fname)} instead."
+        ) from e
 
     with fid_ctx:
         split_line = LineSplitter(delimiter=delimiter, comments=comments,
@@ -2292,62 +2304,6 @@ _genfromtxt_with_like = array_function_dispatch(
 )(genfromtxt)
 
 
-def ndfromtxt(fname, **kwargs):
-    """
-    Load ASCII data stored in a file and return it as a single array.
-
-    .. deprecated:: 1.17
-        ndfromtxt` is a deprecated alias of `genfromtxt` which
-        overwrites the ``usemask`` argument with `False` even when
-        explicitly called as ``ndfromtxt(..., usemask=True)``.
-        Use `genfromtxt` instead.
-
-    Parameters
-    ----------
-    fname, kwargs : For a description of input parameters, see `genfromtxt`.
-
-    See Also
-    --------
-    numpy.genfromtxt : generic function.
-
-    """
-    kwargs['usemask'] = False
-    # Numpy 1.17
-    warnings.warn(
-        "np.ndfromtxt is a deprecated alias of np.genfromtxt, "
-        "prefer the latter.",
-        DeprecationWarning, stacklevel=2)
-    return genfromtxt(fname, **kwargs)
-
-
-def mafromtxt(fname, **kwargs):
-    """
-    Load ASCII data stored in a text file and return a masked array.
-
-    .. deprecated:: 1.17
-        np.mafromtxt is a deprecated alias of `genfromtxt` which
-        overwrites the ``usemask`` argument with `True` even when
-        explicitly called as ``mafromtxt(..., usemask=False)``.
-        Use `genfromtxt` instead.
-
-    Parameters
-    ----------
-    fname, kwargs : For a description of input parameters, see `genfromtxt`.
-
-    See Also
-    --------
-    numpy.genfromtxt : generic function to load ASCII data.
-
-    """
-    kwargs['usemask'] = True
-    # Numpy 1.17
-    warnings.warn(
-        "np.mafromtxt is a deprecated alias of np.genfromtxt, "
-        "prefer the latter.",
-        DeprecationWarning, stacklevel=2)
-    return genfromtxt(fname, **kwargs)
-
-
 def recfromtxt(fname, **kwargs):
     """
     Load ASCII data from a file and return it in a record array.