diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2017-11-26 10:30:17 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-11-26 10:30:17 -0700 |
commit | 8c441fae4bf1b101444d06fc5dcfdd29e66a9ba4 (patch) | |
tree | e8ea159c9d9d3eed3bedc0ea702732119dc0b5c4 /numpy | |
parent | b13c70551f1ebc5222e9b11da61e2b886fbbef5e (diff) | |
parent | 1d97b3aafdca2722bbe2f0c10a96544121c8f78b (diff) | |
download | numpy-8c441fae4bf1b101444d06fc5dcfdd29e66a9ba4.tar.gz |
Merge pull request #10054 from charris/gh-4208
ENH: Add encoding option to numpy text IO.
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/lib/_datasource.py | 149 | ||||
-rw-r--r-- | numpy/lib/_iotools.py | 106 | ||||
-rw-r--r-- | numpy/lib/npyio.py | 392 | ||||
-rw-r--r-- | numpy/lib/tests/test__iotools.py | 131 | ||||
-rw-r--r-- | numpy/lib/tests/test_io.py | 425 |
5 files changed, 919 insertions, 284 deletions
diff --git a/numpy/lib/_datasource.py b/numpy/lib/_datasource.py index 3affc5195..6f1295f09 100644 --- a/numpy/lib/_datasource.py +++ b/numpy/lib/_datasource.py @@ -15,7 +15,7 @@ DataSource files can originate locally or remotely: - URLs (http, ftp, ...) : 'http://www.scipy.org/not/real/data.txt' DataSource files can also be compressed or uncompressed. Currently only -gzip and bz2 are supported. +gzip, bz2 and xz are supported. Example:: @@ -38,13 +38,99 @@ from __future__ import division, absolute_import, print_function import os import sys import shutil +import io _open = open +def _check_mode(mode, encoding, newline): + """Check mode and that encoding and newline are compatible. + + Parameters + ---------- + mode : str + File open mode. + encoding : str + File encoding. + newline : str + Newline for text files. + + """ + if "t" in mode: + if "b" in mode: + raise ValueError("Invalid mode: %r" % (mode,)) + else: + if encoding is not None: + raise ValueError("Argument 'encoding' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + + +def _python2_bz2open(fn, mode, encoding, newline): + """Wrapper to open bz2 in text mode. + + Parameters + ---------- + fn : str + File name + mode : {'r', 'w'} + File mode. Note that bz2 Text files are not supported. + encoding : str + Ignored, text bz2 files not supported in Python2. + newline : str + Ignored, text bz2 files not supported in Python2. + """ + import bz2 + + _check_mode(mode, encoding, newline) + + if "t" in mode: + # BZ2File is missing necessary functions for TextIOWrapper + raise ValueError("bz2 text files not supported in python2") + else: + return bz2.BZ2File(fn, mode) + +def _python2_gzipopen(fn, mode, encoding, newline): + """ Wrapper to open gzip in text mode. + + Parameters + ---------- + fn : str, bytes, file + File path or opened file. + mode : str + File mode. The actual files are opened as binary, but will decoded + using the specified `encoding` and `newline`. + encoding : str + Encoding to be used when reading/writing as text. + newline : str + Newline to be used when reading/writing as text. + + """ + import gzip + # gzip is lacking read1 needed for TextIOWrapper + class GzipWrap(gzip.GzipFile): + def read1(self, n): + return self.read(n) + + _check_mode(mode, encoding, newline) + + gz_mode = mode.replace("t", "") + + if isinstance(fn, (str, bytes)): + binary_file = GzipWrap(fn, gz_mode) + elif hasattr(fn, "read") or hasattr(fn, "write"): + binary_file = GzipWrap(None, gz_mode, fileobj=fn) + else: + raise TypeError("filename must be a str or bytes object, or a file") + + if "t" in mode: + return io.TextIOWrapper(binary_file, encoding, newline=newline) + else: + return binary_file + # Using a class instead of a module-level dictionary # to reduce the initial 'import numpy' overhead by -# deferring the import of bz2 and gzip until needed +# deferring the import of lzma, bz2 and gzip until needed # TODO: .zip support, .tar support? class _FileOpeners(object): @@ -55,7 +141,7 @@ class _FileOpeners(object): supported file format. Attribute lookup is implemented in such a way that an instance of `_FileOpeners` itself can be indexed with the keys of that dictionary. Currently uncompressed files as well as files - compressed with ``gzip`` or ``bz2`` compression are supported. + compressed with ``gzip``, ``bz2`` or ``xz`` compression are supported. Notes ----- @@ -65,7 +151,7 @@ class _FileOpeners(object): Examples -------- >>> np.lib._datasource._file_openers.keys() - [None, '.bz2', '.gz'] + [None, '.bz2', '.gz', '.xz', '.lzma'] >>> np.lib._datasource._file_openers['.gz'] is gzip.open True @@ -73,21 +159,39 @@ class _FileOpeners(object): def __init__(self): self._loaded = False - self._file_openers = {None: open} + self._file_openers = {None: io.open} def _load(self): if self._loaded: return + try: import bz2 - self._file_openers[".bz2"] = bz2.BZ2File + if sys.version_info[0] >= 3: + self._file_openers[".bz2"] = bz2.open + else: + self._file_openers[".bz2"] = _python2_bz2open except ImportError: pass + try: import gzip - self._file_openers[".gz"] = gzip.open + if sys.version_info[0] >= 3: + self._file_openers[".gz"] = gzip.open + else: + self._file_openers[".gz"] = _python2_gzipopen except ImportError: pass + + try: + import lzma + self._file_openers[".xz"] = lzma.open + self._file_openers[".lzma"] = lzma.open + except (ImportError, AttributeError): + # There are incompatible backports of lzma that do not have the + # lzma.open attribute, so catch that as well as ImportError. + pass + self._loaded = True def keys(self): @@ -102,7 +206,7 @@ class _FileOpeners(object): ------- keys : list The keys are None for uncompressed files and the file extension - strings (i.e. ``'.gz'``, ``'.bz2'``) for supported compression + strings (i.e. ``'.gz'``, ``'.xz'``) for supported compression methods. """ @@ -115,7 +219,7 @@ class _FileOpeners(object): _file_openers = _FileOpeners() -def open(path, mode='r', destpath=os.curdir): +def open(path, mode='r', destpath=os.curdir, encoding=None, newline=None): """ Open `path` with `mode` and return the file object. @@ -134,6 +238,11 @@ def open(path, mode='r', destpath=os.curdir): Path to the directory where the source file gets downloaded to for use. If `destpath` is None, a temporary directory will be created. The default path is the current directory. + encoding : {None, str}, optional + Open text file with given encoding. The default encoding will be + what `io.open` uses. + newline : {None, str}, optional + Newline to use when reading text file. Returns ------- @@ -148,7 +257,7 @@ def open(path, mode='r', destpath=os.curdir): """ ds = DataSource(destpath) - return ds.open(path, mode) + return ds.open(path, mode, encoding=encoding, newline=newline) class DataSource (object): @@ -458,7 +567,7 @@ class DataSource (object): return False return False - def open(self, path, mode='r'): + def open(self, path, mode='r', encoding=None, newline=None): """ Open and return file-like object. @@ -473,6 +582,11 @@ class DataSource (object): Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to append. Available modes depend on the type of object specified by `path`. Default is 'r'. + encoding : {None, str}, optional + Open text file with given encoding. The default encoding will be + what `io.open` uses. + newline : {None, str}, optional + Newline to use when reading text file. Returns ------- @@ -496,7 +610,8 @@ class DataSource (object): _fname, ext = self._splitzipext(found) if ext == 'bz2': mode.replace("+", "") - return _file_openers[ext](found, mode=mode) + return _file_openers[ext](found, mode=mode, + encoding=encoding, newline=newline) else: raise IOError("%s not found." % path) @@ -619,7 +734,7 @@ class Repository (DataSource): """ return DataSource.exists(self, self._fullpath(path)) - def open(self, path, mode='r'): + def open(self, path, mode='r', encoding=None, newline=None): """ Open and return file-like object prepending Repository base URL. @@ -636,6 +751,11 @@ class Repository (DataSource): Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to append. Available modes depend on the type of object specified by `path`. Default is 'r'. + encoding : {None, str}, optional + Open text file with given encoding. The default encoding will be + what `io.open` uses. + newline : {None, str}, optional + Newline to use when reading text file. Returns ------- @@ -643,7 +763,8 @@ class Repository (DataSource): File object. """ - return DataSource.open(self, self._fullpath(path), mode) + return DataSource.open(self, self._fullpath(path), mode, + encoding=encoding, newline=newline) def listdir(self): """ diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index 1874c2e97..27143e5c6 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -8,7 +8,7 @@ __docformat__ = "restructuredtext en" import sys import numpy as np import numpy.core.numeric as nx -from numpy.compat import asbytes, bytes, asbytes_nested, basestring +from numpy.compat import asbytes, asunicode, bytes, asbytes_nested, basestring if sys.version_info[0] >= 3: from builtins import bool, int, float, complex, object, str @@ -17,15 +17,30 @@ else: from __builtin__ import bool, int, float, complex, object, unicode, str -if sys.version_info[0] >= 3: - def _bytes_to_complex(s): - return complex(s.decode('ascii')) +def _decode_line(line, encoding=None): + """Decode bytes from binary input streams. - def _bytes_to_name(s): - return s.decode('ascii') -else: - _bytes_to_complex = complex - _bytes_to_name = str + Defaults to decoding from 'latin1'. That differs from the behavior of + np.compat.asunicode that decodes from 'ascii'. + + Parameters + ---------- + line : str or bytes + Line to be decoded. + + Returns + ------- + decoded_line : unicode + Unicode in Python 2, a str (unicode) in Python 3. + + """ + if type(line) is bytes: + if encoding is None: + line = line.decode('latin1') + else: + line = line.decode(encoding) + + return line def _is_string_like(obj): @@ -189,12 +204,10 @@ class LineSplitter(object): return lambda input: [_.strip() for _ in method(input)] # - def __init__(self, delimiter=None, comments=b'#', autostrip=True): + def __init__(self, delimiter=None, comments='#', autostrip=True, encoding=None): self.comments = comments # Delimiter is a character - if isinstance(delimiter, unicode): - delimiter = delimiter.encode('ascii') - if (delimiter is None) or _is_bytes_like(delimiter): + if (delimiter is None) or isinstance(delimiter, basestring): delimiter = delimiter or None _handyman = self._delimited_splitter # Delimiter is a list of field widths @@ -213,12 +226,14 @@ class LineSplitter(object): self._handyman = self.autostrip(_handyman) else: self._handyman = _handyman + self.encoding = encoding # def _delimited_splitter(self, line): + """Chop off comments, strip, and split at delimiter. """ if self.comments is not None: line = line.split(self.comments)[0] - line = line.strip(b" \r\n") + line = line.strip(" \r\n") if not line: return [] return line.split(self.delimiter) @@ -227,7 +242,7 @@ class LineSplitter(object): def _fixedwidth_splitter(self, line): if self.comments is not None: line = line.split(self.comments)[0] - line = line.strip(b"\r\n") + line = line.strip("\r\n") if not line: return [] fixed = self.delimiter @@ -245,7 +260,7 @@ class LineSplitter(object): # def __call__(self, line): - return self._handyman(line) + return self._handyman(_decode_line(line, self.encoding)) class NameValidator(object): @@ -434,9 +449,9 @@ def str2bool(value): """ value = value.upper() - if value == b'TRUE': + if value == 'TRUE': return True - elif value == b'FALSE': + elif value == 'FALSE': return False else: raise ValueError("Invalid boolean") @@ -510,8 +525,10 @@ class StringConverter(object): Value to return by default, that is, when the string to be converted is flagged as missing. If not given, `StringConverter` tries to supply a reasonable default value. - missing_values : sequence of str, optional - Sequence of strings indicating a missing value. + missing_values : {None, sequence of str}, optional + ``None`` or sequence of strings indicating a missing value. If ``None`` + then missing values are indicated by empty entries. The default is + ``None``. locked : bool, optional Whether the StringConverter should be locked to prevent automatic upgrade or not. Default is False. @@ -527,9 +544,10 @@ class StringConverter(object): _mapper.append((nx.int64, int, -1)) _mapper.extend([(nx.floating, float, nx.nan), - (nx.complexfloating, _bytes_to_complex, nx.nan + 0j), + (nx.complexfloating, complex, nx.nan + 0j), (nx.longdouble, nx.longdouble, nx.nan), - (nx.string_, bytes, b'???')]) + (nx.unicode_, asunicode, '???'), + (nx.string_, asbytes, '???')]) (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper) @@ -601,11 +619,6 @@ class StringConverter(object): def __init__(self, dtype_or_func=None, default=None, missing_values=None, locked=False): - # Convert unicode (for Py3) - if isinstance(missing_values, unicode): - missing_values = asbytes(missing_values) - elif isinstance(missing_values, (list, tuple)): - missing_values = asbytes_nested(missing_values) # Defines a lock for upgrade self._locked = bool(locked) # No input dtype: minimal initialization @@ -631,7 +644,7 @@ class StringConverter(object): # None if default is None: try: - default = self.func(b'0') + default = self.func('0') except ValueError: default = None dtype = self._getdtype(default) @@ -676,11 +689,11 @@ class StringConverter(object): self.func = lambda x: int(float(x)) # Store the list of strings corresponding to missing values. if missing_values is None: - self.missing_values = set([b'']) + self.missing_values = set(['']) else: - if isinstance(missing_values, bytes): - missing_values = missing_values.split(b",") - self.missing_values = set(list(missing_values) + [b'']) + if isinstance(missing_values, basestring): + missing_values = missing_values.split(",") + self.missing_values = set(list(missing_values) + ['']) # self._callingfunction = self._strict_call self.type = self._dtypeortype(dtype) @@ -801,7 +814,7 @@ class StringConverter(object): self.iterupgrade(value) def update(self, func, default=None, testing_value=None, - missing_values=b'', locked=False): + missing_values='', locked=False): """ Set StringConverter attributes directly. @@ -817,8 +830,9 @@ class StringConverter(object): A string representing a standard input value of the converter. This string is used to help defining a reasonable default value. - missing_values : sequence of str, optional - Sequence of strings indicating a missing value. + missing_values : {sequence of str, None}, optional + Sequence of strings indicating a missing value. If ``None``, then + the existing `missing_values` are cleared. The default is `''`. locked : bool, optional Whether the StringConverter should be locked to prevent automatic upgrade or not. Default is False. @@ -832,25 +846,29 @@ class StringConverter(object): """ self.func = func self._locked = locked + # Don't reset the default to None if we can avoid it if default is not None: self.default = default self.type = self._dtypeortype(self._getdtype(default)) else: try: - tester = func(testing_value or b'1') + tester = func(testing_value or '1') except (TypeError, ValueError): tester = None self.type = self._dtypeortype(self._getdtype(tester)) - # Add the missing values to the existing set - if missing_values is not None: - if _is_bytes_like(missing_values): - self.missing_values.add(missing_values) - elif hasattr(missing_values, '__iter__'): - for val in missing_values: - self.missing_values.add(val) + + # Add the missing values to the existing set or clear it. + if missing_values is None: + # Clear all missing values even though the ctor initializes it to + # set(['']) when the argument is None. + self.missing_values = set() else: - self.missing_values = [] + if not np.iterable(missing_values): + missing_values = [missing_values] + if not all(isinstance(v, basestring) for v in missing_values): + raise TypeError("missing_values must be strings or unicode") + self.missing_values.update(missing_values) def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs): diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 6de5940d7..e4d827334 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1,5 +1,6 @@ from __future__ import division, absolute_import, print_function +import io import sys import os import re @@ -15,11 +16,12 @@ from numpy.core.multiarray import packbits, unpackbits from ._iotools import ( LineSplitter, NameValidator, StringConverter, ConverterError, ConverterLockError, ConversionWarning, _is_string_like, - has_nested_fields, flatten_dtype, easy_dtype, _bytes_to_name + has_nested_fields, flatten_dtype, easy_dtype, _decode_line ) from numpy.compat import ( - asbytes, asstr, asbytes_nested, bytes, basestring, unicode, is_pathlib_path + asbytes, asstr, asunicode, asbytes_nested, bytes, basestring, unicode, + is_pathlib_path ) if sys.version_info[0] >= 3: @@ -294,7 +296,7 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, used in Python 3. encoding : str, optional What encoding to use when reading Python 2 strings. Only useful when - loading Python 2 generated pickled files on Python 3, which includes + loading Python 2 generated pickled files in Python 3, which includes npy/npz files containing object arrays. Values other than 'latin1', 'ASCII', and 'bytes' are not allowed, as they can corrupt numerical data. Default: 'ASCII' @@ -731,8 +733,8 @@ def _getconv(dtype): def floatconv(x): x.lower() - if b'0x' in x: - return float.fromhex(asstr(x)) + if '0x' in x: + return float.fromhex(x) return float(x) typ = dtype.type @@ -752,13 +754,17 @@ def _getconv(dtype): return lambda x: complex(asstr(x)) elif issubclass(typ, np.bytes_): return asbytes + elif issubclass(typ, np.unicode_): + return asunicode else: return asstr +# amount of lines loadtxt reads in one chunk, can be overriden for testing +_loadtxt_chunksize = 50000 def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, - ndmin=0): + ndmin=0, encoding='bytes'): """ Load data from a text file. @@ -776,13 +782,13 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, each row will be interpreted as an element of the array. In this case, the number of columns used must match the number of fields in the data-type. - comments : str or sequence, optional + comments : str or sequence of str, optional The characters or list of characters used to indicate the start of a - comment; - default: '#'. + comment. For backwards compatibility, byte strings will be decoded as + 'latin1'. The default is '#'. delimiter : str, optional - The string used to separate values. By default, this is any - whitespace. + The string used to separate values. For backwards compatibility, byte + strings will be decoded as 'latin1'. The default is whitespace. converters : dict, optional A dictionary mapping column number to a function that will convert that column to a float. E.g., if column 0 is a date string: @@ -791,18 +797,15 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, ``converters = {3: lambda s: float(s.strip() or 0)}``. Default: None. skiprows : int, optional Skip the first `skiprows` lines; default: 0. - usecols : int or sequence, optional Which columns to read, with 0 being the first. For example, usecols = (1,4,5) will extract the 2nd, 5th and 6th columns. The default, None, results in all columns being read. - .. versionadded:: 1.11.0 - - Also when a single column has to be read it is possible to use - an integer instead of a tuple. E.g ``usecols = 3`` reads the - fourth column the same way as `usecols = (3,)`` would. - + .. versionchanged:: 1.11.0 + When a single column has to be read it is possible to use + an integer instead of a tuple. E.g ``usecols = 3`` reads the + fourth column the same way as `usecols = (3,)`` would. unpack : bool, optional If True, the returned array is transposed, so that arguments may be unpacked using ``x, y, z = loadtxt(...)``. When used with a structured @@ -813,6 +816,15 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, Legal values: 0 (default), 1 or 2. .. versionadded:: 1.6.0 + encoding : str, optional + Encoding used to decode the inputfile. Does not apply to input streams. + The special value 'bytes' enables backward compatibility workarounds + that ensures you receive byte arrays as results if possible and passes + latin1 encoded strings to converters. Override this value to receive + unicode arrays and pass strings as input to converters. If set to None + the system default is used. The default value is 'bytes'. + + .. versionadded:: 1.14.0 Returns ------- @@ -861,16 +873,22 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, # Type conversions for Py3 convenience if comments is not None: if isinstance(comments, (basestring, bytes)): - comments = [asbytes(comments)] - else: - comments = [asbytes(comment) for comment in comments] - + comments = [comments] + comments = [_decode_line(x) for x in comments] # Compile regex for comments beforehand comments = (re.escape(comment) for comment in comments) - regex_comments = re.compile(b'|'.join(comments)) - user_converters = converters + regex_comments = re.compile('|'.join(comments)) + if delimiter is not None: - delimiter = asbytes(delimiter) + delimiter = _decode_line(delimiter) + + user_converters = converters + + if encoding == 'bytes': + encoding = None + byte_converters = True + else: + byte_converters = False if usecols is not None: # Allow usecols to be a single int or a sequence of ints @@ -896,22 +914,24 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, if is_pathlib_path(fname): fname = str(fname) if _is_string_like(fname): + fh = np.lib._datasource.open(fname, 'rt', encoding=encoding) + fencoding = getattr(fh, 'encoding', 'latin1') + fh = iter(fh) fown = True - if fname.endswith('.gz'): - import gzip - fh = iter(gzip.GzipFile(fname)) - elif fname.endswith('.bz2'): - import bz2 - fh = iter(bz2.BZ2File(fname)) - elif sys.version_info[0] == 2: - fh = iter(open(fname, 'U')) - else: - fh = iter(open(fname)) else: fh = iter(fname) + fencoding = getattr(fname, 'encoding', 'latin1') except TypeError: raise ValueError('fname must be a string, file handle, or generator') - X = [] + + # input may be a python2 io stream + if encoding is not None: + fencoding = encoding + # we must assume local encoding + # TOOD emit portability warning? + elif fencoding is None: + import locale + fencoding = locale.getpreferredencoding() # not to be confused with the flatten_dtype we import... def flatten_dtype_internal(dt): @@ -960,21 +980,53 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, return tuple(ret) def split_line(line): - """Chop off comments, strip, and split at delimiter. + """Chop off comments, strip, and split at delimiter. """ + line = _decode_line(line, encoding=encoding) - Note that although the file is opened as text, this function - returns bytes. - - """ - line = asbytes(line) if comments is not None: - line = regex_comments.split(asbytes(line), maxsplit=1)[0] - line = line.strip(b'\r\n') + line = regex_comments.split(line, maxsplit=1)[0] + line = line.strip('\r\n') if line: return line.split(delimiter) else: return [] + def read_data(chunk_size): + """Parse each line, including the first. + + The file read, `fh`, is a global defined above. + + Parameters + ---------- + chunk_size : int + At most `chunk_size` lines are read at a time, with iteration + until all lines are read. + + """ + X = [] + for i, line in enumerate(itertools.chain([first_line], fh)): + vals = split_line(line) + if len(vals) == 0: + continue + if usecols: + vals = [vals[j] for j in usecols] + if len(vals) != N: + line_num = i + skiprows + 1 + raise ValueError("Wrong number of columns at line %d" + % line_num) + + # Convert each value according to its column and store + items = [conv(val) for (conv, val) in zip(converters, vals)] + + # Then pack it according to the dtype's nesting + items = pack_items(items, packing) + X.append(items) + if len(X) > chunk_size: + yield X + X = [] + if X: + yield X + try: # Make sure we're dealing with a proper dtype dtype = np.dtype(dtype) @@ -1017,30 +1069,42 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, except ValueError: # Unused converter specified continue - converters[i] = conv - - # Parse each line, including the first - for i, line in enumerate(itertools.chain([first_line], fh)): - vals = split_line(line) - if len(vals) == 0: - continue - if usecols: - vals = [vals[j] for j in usecols] - if len(vals) != N: - line_num = i + skiprows + 1 - raise ValueError("Wrong number of columns at line %d" - % line_num) - - # Convert each value according to its column and store - items = [conv(val) for (conv, val) in zip(converters, vals)] - # Then pack it according to the dtype's nesting - items = pack_items(items, packing) - X.append(items) + if byte_converters: + # converters may use decode to workaround numpy's old behaviour, + # so encode the string again before passing to the user converter + def tobytes_first(x, conv): + if type(x) is bytes: + return conv(x) + return conv(x.encode("latin1")) + import functools + converters[i] = functools.partial(tobytes_first, conv=conv) + else: + converters[i] = conv + + converters = [conv if conv is not bytes else + lambda x: x.encode(fencoding) for conv in converters] + + # read data in chunks and fill it into an array via resize + # over-allocating and shrinking the array later may be faster but is + # probably not relevant compared to the cost of actually reading and + # converting the data + X = None + for x in read_data(_loadtxt_chunksize): + if X is None: + X = np.array(x, dtype) + else: + nshape = list(X.shape) + pos = nshape[0] + nshape[0] += len(x) + X.resize(nshape) + X[pos:, ...] = x finally: if fown: fh.close() - X = np.array(X, dtype) + if X is None: + X = np.array([], dtype) + # Multicolumn data are returned with shape (1, N, M), i.e. # (1, 1, M) for a single row - remove the singleton dimension there if X.ndim == 3 and X.shape[:2] == (1, 1): @@ -1072,7 +1136,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', - footer='', comments='# '): + footer='', comments='# ', encoding=None): """ Save an array to a text file. @@ -1116,6 +1180,13 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', ``numpy.loadtxt``. .. versionadded:: 1.7.0 + encoding : {None, str}, optional + Encoding used to encode the outputfile. Does not apply to output + streams. If the encoding is something other than 'bytes' or 'latin1' + you will not be able to load the file in NumPy versions < 1.14. Default + is 'latin1'. + + .. versionadded:: 1.14.0 See Also @@ -1190,21 +1261,53 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', fmt = asstr(fmt) delimiter = asstr(delimiter) + class WriteWrap(object): + """Convert to unicode in py2 or to bytes on bytestream inputs. + + """ + def __init__(self, fh, encoding): + self.fh = fh + self.encoding = encoding + self.do_write = self.first_write + + def close(self): + self.fh.close() + + def write(self, v): + self.do_write(v) + + def write_bytes(self, v): + if isinstance(v, bytes): + self.fh.write(v) + else: + self.fh.write(v.encode(self.encoding)) + + def write_normal(self, v): + self.fh.write(asunicode(v)) + + def first_write(self, v): + try: + self.write_normal(v) + self.write = self.write_normal + except TypeError: + # input is probably a bytestream + self.write_bytes(v) + self.write = self.write_bytes + own_fh = False if is_pathlib_path(fname): fname = str(fname) if _is_string_like(fname): + # datasource doesn't support creating a new file ... + open(fname, 'wt').close() + fh = np.lib._datasource.open(fname, 'wt', encoding=encoding) own_fh = True - if fname.endswith('.gz'): - import gzip - fh = gzip.open(fname, 'wb') - else: - if sys.version_info[0] >= 3: - fh = open(fname, 'wb') - else: - fh = open(fname, 'w') + # need to convert str to unicode for text io output + if sys.version_info[0] == 2: + fh = WriteWrap(fh, encoding or 'latin1') elif hasattr(fname, 'write'): - fh = fname + # wrap to handle byte output streams + fh = WriteWrap(fname, encoding or 'latin1') else: raise ValueError('fname must be a string or file handle') @@ -1254,31 +1357,33 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', if len(header) > 0: header = header.replace('\n', '\n' + comments) - fh.write(asbytes(comments + header + newline)) + fh.write(comments + header + newline) if iscomplex_X: for row in X: row2 = [] for number in row: row2.append(number.real) row2.append(number.imag) - fh.write(asbytes(format % tuple(row2) + newline)) + fh.write(format % tuple(row2) + newline) else: for row in X: try: - fh.write(asbytes(format % tuple(row) + newline)) + v = format % tuple(row) + newline except TypeError: raise TypeError("Mismatch between array dtype ('%s') and " "format specifier ('%s')" % (str(X.dtype), format)) + fh.write(v) + if len(footer) > 0: footer = footer.replace('\n', '\n' + comments) - fh.write(asbytes(comments + footer + newline)) + fh.write(comments + footer + newline) finally: if own_fh: fh.close() -def fromregex(file, regexp, dtype): +def fromregex(file, regexp, dtype, encoding=None): """ Construct an array from a text file, using regular expression parsing. @@ -1295,6 +1400,10 @@ def fromregex(file, regexp, dtype): Groups in the regular expression correspond to fields in the dtype. dtype : dtype or list of dtypes Dtype for the structured array. + encoding : str, optional + Encoding used to decode the inputfile. Does not apply to input streams. + + .. versionadded:: 1.14.0 Returns ------- @@ -1335,16 +1444,22 @@ def fromregex(file, regexp, dtype): """ own_fh = False if not hasattr(file, "read"): - file = open(file, 'rb') + file = np.lib._datasource.open(file, 'rt', encoding=encoding) own_fh = True try: - if not hasattr(regexp, 'match'): - regexp = re.compile(asbytes(regexp)) if not isinstance(dtype, np.dtype): dtype = np.dtype(dtype) - seq = regexp.findall(file.read()) + content = file.read() + if isinstance(content, bytes) and not isinstance(regexp, bytes): + regexp = asbytes(regexp) + elif not isinstance(content, bytes) and isinstance(regexp, bytes): + regexp = asstr(regexp) + + if not hasattr(regexp, 'match'): + regexp = re.compile(regexp) + seq = regexp.findall(content) if seq and not isinstance(seq[0], tuple): # Only one group is in the regexp. # Create the new array as a single data-type and then @@ -1372,7 +1487,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, names=None, excludelist=None, deletechars=None, replace_space='_', autostrip=False, case_sensitive=True, defaultfmt="f%i", unpack=None, usemask=False, loose=True, - invalid_raise=True, max_rows=None): + invalid_raise=True, max_rows=None, encoding='bytes'): """ Load data from a text file, with missing values handled as specified. @@ -1460,6 +1575,15 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, to read the entire file. .. versionadded:: 1.10.0 + encoding : str, optional + Encoding used to decode the inputfile. Does not apply when `fname` is + a file object. The special value 'bytes' enables backward compatibility + workarounds that ensure that you receive byte arrays when possible + and passes latin1 encoded strings to converters. Override this value to + receive unicode arrays and pass strings as input to converters. If set + to None the system default is used. The default value is 'bytes'. + + .. versionadded:: 1.14.0 Returns ------- @@ -1536,15 +1660,6 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, if max_rows < 1: raise ValueError("'max_rows' must be at least 1.") - # Py3 data conversions to bytes, for convenience - if comments is not None: - comments = asbytes(comments) - if isinstance(delimiter, unicode): - delimiter = asbytes(delimiter) - if isinstance(missing_values, (unicode, list, tuple)): - missing_values = asbytes_nested(missing_values) - - # if usemask: from numpy.ma import MaskedArray, make_mask_descr # Check the input dictionary of converters @@ -1554,16 +1669,19 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, "The input argument 'converter' should be a valid dictionary " "(got '%s' instead)" % type(user_converters)) + if encoding == 'bytes': + encoding = None + byte_converters = True + else: + byte_converters = False + # Initialize the filehandle, the LineSplitter and the NameValidator own_fhd = False try: if is_pathlib_path(fname): fname = str(fname) if isinstance(fname, basestring): - if sys.version_info[0] == 2: - fhd = iter(np.lib._datasource.open(fname, 'rbU')) - else: - fhd = iter(np.lib._datasource.open(fname, 'rb')) + fhd = iter(np.lib._datasource.open(fname, 'rt', encoding=encoding)) own_fhd = True else: fhd = iter(fname) @@ -1573,7 +1691,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, "or generator. Got %s instead." % type(fname)) split_line = LineSplitter(delimiter=delimiter, comments=comments, - autostrip=autostrip)._handyman + autostrip=autostrip, encoding=encoding) validate_names = NameValidator(excludelist=excludelist, deletechars=deletechars, case_sensitive=case_sensitive, @@ -1587,15 +1705,15 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, first_values = None try: while not first_values: - first_line = next(fhd) + first_line = _decode_line(next(fhd), encoding) if names is True: if comments in first_line: first_line = ( - b''.join(first_line.split(comments)[1:])) + ''.join(first_line.split(comments)[1:])) first_values = split_line(first_line) except StopIteration: # return an empty array if the datafile is empty - first_line = b'' + first_line = '' first_values = [] warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2) @@ -1618,9 +1736,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Check the names and overwrite the dtype.names if needed if names is True: - names = validate_names([_bytes_to_name(_.strip()) - for _ in first_values]) - first_line = b'' + names = validate_names([str(_.strip()) for _ in first_values]) + first_line = '' elif _is_string_like(names): names = validate_names([_.strip() for _ in names.split(',')]) elif names: @@ -1657,9 +1774,11 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Process the missing values ............................... # Rename missing_values for convenience user_missing_values = missing_values or () + if isinstance(user_missing_values, bytes): + user_missing_values = user_missing_values.decode('latin1') # Define the list of missing_values (one column: one list) - missing_values = [list([b'']) for _ in range(nbcols)] + missing_values = [list(['']) for _ in range(nbcols)] # We have a dictionary: process it field by field if isinstance(user_missing_values, dict): @@ -1698,8 +1817,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, if value not in entry: entry.append(value) # We have a string : apply it to all entries - elif isinstance(user_missing_values, bytes): - user_value = user_missing_values.split(b",") + elif isinstance(user_missing_values, basestring): + user_value = user_missing_values.split(",") for entry in missing_values: entry.extend(user_value) # We have something else: apply it to all entries @@ -1787,16 +1906,29 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, testing_value = first_values[j] else: testing_value = None - converters[i].update(conv, locked=True, + if conv is bytes: + user_conv = asbytes + elif byte_converters: + # converters may use decode to workaround numpy's old behaviour, + # so encode the string again before passing to the user converter + def tobytes_first(x, conv): + if type(x) is bytes: + return conv(x) + return conv(x.encode("latin1")) + import functools + user_conv = functools.partial(tobytes_first, conv=conv) + else: + user_conv = conv + converters[i].update(user_conv, locked=True, testing_value=testing_value, default=filling_values[i], missing_values=missing_values[i],) - uc_update.append((i, conv)) + uc_update.append((i, user_conv)) # Make sure we have the corrected keys in user_converters... user_converters.update(uc_update) # Fixme: possible error as following variable never used. - #miss_chars = [_.missing_values for _ in converters] + # miss_chars = [_.missing_values for _ in converters] # Initialize the output lists ... # ... rows @@ -1908,16 +2040,42 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, column_types = [conv.type for conv in converters] # Find the columns with strings... strcolidx = [i for (i, v) in enumerate(column_types) - if v in (type('S'), np.string_)] + if v == np.unicode_] + + type_str = np.unicode_ + if byte_converters and strcolidx: + # convert strings back to bytes for backward compatibility + warnings.warn( + "Reading unicode strings without specifying the encoding " + "argument is deprecated. Set the encoding, use None for the " + "system default.", + np.VisibleDeprecationWarning, stacklevel=2) + def encode_unicode_cols(row_tup): + row = list(row_tup) + for i in strcolidx: + row[i] = row[i].encode('latin1') + return tuple(row) + + try: + data = [encode_unicode_cols(r) for r in data] + type_str = np.bytes_ + except UnicodeEncodeError: + pass + + # ... and take the largest number of chars. for i in strcolidx: - column_types[i] = "|S%i" % max(len(row[i]) for row in data) + max_line_length = max(len(row[i]) for row in data) + column_types[i] = np.dtype((type_str, max_line_length)) # if names is None: # If the dtype is uniform, don't define names, else use '' base = set([c.type for c in converters if c._checked]) if len(base) == 1: - (ddtype, mdtype) = (list(base)[0], bool) + if strcolidx: + (ddtype, mdtype) = (type_str, bool) + else: + (ddtype, mdtype) = (list(base)[0], bool) else: ddtype = [(defaultfmt % i, dt) for (i, dt) in enumerate(column_types)] @@ -1966,8 +2124,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Keep the dtype of the current converter if i in user_converters: ishomogeneous &= (ttype == dtype.type) - if ttype == np.string_: - ttype = "|S%i" % max(len(row[i]) for row in data) + if np.issubdtype(ttype, np.character): + ttype = (ttype, max(len(row[i]) for row in data)) descr.append(('', ttype)) else: descr.append(('', dtype)) @@ -1990,9 +2148,9 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Try to take care of the missing data we missed names = output.dtype.names if usemask and names: - for (name, conv) in zip(names or (), converters): + for (name, conv) in zip(names, converters): missing_values = [conv(_) for _ in conv.missing_values - if _ != b''] + if _ != ''] for mval in missing_values: outputmask[name] |= (output[name] == mval) # Construct the final array diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py index 03192896c..54fac8da4 100644 --- a/numpy/lib/tests/test__iotools.py +++ b/numpy/lib/tests/test__iotools.py @@ -12,6 +12,7 @@ from numpy.lib._iotools import ( LineSplitter, NameValidator, StringConverter, has_nested_fields, easy_dtype, flatten_dtype ) +from numpy.compat import unicode class TestLineSplitter(object): @@ -19,61 +20,61 @@ class TestLineSplitter(object): def test_no_delimiter(self): "Test LineSplitter w/o delimiter" - strg = b" 1 2 3 4 5 # test" + strg = " 1 2 3 4 5 # test" test = LineSplitter()(strg) - assert_equal(test, [b'1', b'2', b'3', b'4', b'5']) + assert_equal(test, ['1', '2', '3', '4', '5']) test = LineSplitter('')(strg) - assert_equal(test, [b'1', b'2', b'3', b'4', b'5']) + assert_equal(test, ['1', '2', '3', '4', '5']) def test_space_delimiter(self): "Test space delimiter" - strg = b" 1 2 3 4 5 # test" - test = LineSplitter(b' ')(strg) - assert_equal(test, [b'1', b'2', b'3', b'4', b'', b'5']) - test = LineSplitter(b' ')(strg) - assert_equal(test, [b'1 2 3 4', b'5']) + strg = " 1 2 3 4 5 # test" + test = LineSplitter(' ')(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5']) + test = LineSplitter(' ')(strg) + assert_equal(test, ['1 2 3 4', '5']) def test_tab_delimiter(self): "Test tab delimiter" - strg = b" 1\t 2\t 3\t 4\t 5 6" - test = LineSplitter(b'\t')(strg) - assert_equal(test, [b'1', b'2', b'3', b'4', b'5 6']) - strg = b" 1 2\t 3 4\t 5 6" - test = LineSplitter(b'\t')(strg) - assert_equal(test, [b'1 2', b'3 4', b'5 6']) + strg = " 1\t 2\t 3\t 4\t 5 6" + test = LineSplitter('\t')(strg) + assert_equal(test, ['1', '2', '3', '4', '5 6']) + strg = " 1 2\t 3 4\t 5 6" + test = LineSplitter('\t')(strg) + assert_equal(test, ['1 2', '3 4', '5 6']) def test_other_delimiter(self): "Test LineSplitter on delimiter" - strg = b"1,2,3,4,,5" - test = LineSplitter(b',')(strg) - assert_equal(test, [b'1', b'2', b'3', b'4', b'', b'5']) + strg = "1,2,3,4,,5" + test = LineSplitter(',')(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5']) # - strg = b" 1,2,3,4,,5 # test" - test = LineSplitter(b',')(strg) - assert_equal(test, [b'1', b'2', b'3', b'4', b'', b'5']) + strg = " 1,2,3,4,,5 # test" + test = LineSplitter(',')(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5']) def test_constant_fixed_width(self): "Test LineSplitter w/ fixed-width fields" - strg = b" 1 2 3 4 5 # test" + strg = " 1 2 3 4 5 # test" test = LineSplitter(3)(strg) - assert_equal(test, [b'1', b'2', b'3', b'4', b'', b'5', b'']) + assert_equal(test, ['1', '2', '3', '4', '', '5', '']) # - strg = b" 1 3 4 5 6# test" + strg = " 1 3 4 5 6# test" test = LineSplitter(20)(strg) - assert_equal(test, [b'1 3 4 5 6']) + assert_equal(test, ['1 3 4 5 6']) # - strg = b" 1 3 4 5 6# test" + strg = " 1 3 4 5 6# test" test = LineSplitter(30)(strg) - assert_equal(test, [b'1 3 4 5 6']) + assert_equal(test, ['1 3 4 5 6']) def test_variable_fixed_width(self): - strg = b" 1 3 4 5 6# test" + strg = " 1 3 4 5 6# test" test = LineSplitter((3, 6, 6, 3))(strg) - assert_equal(test, [b'1', b'3', b'4 5', b'6']) + assert_equal(test, ['1', '3', '4 5', '6']) # - strg = b" 1 3 4 5 6# test" + strg = " 1 3 4 5 6# test" test = LineSplitter((6, 6, 9))(strg) - assert_equal(test, [b'1', b'3 4', b'5 6']) + assert_equal(test, ['1', '3 4', '5 6']) # ----------------------------------------------------------------------------- @@ -133,10 +134,7 @@ class TestNameValidator(object): def _bytes_to_date(s): - if sys.version_info[0] >= 3: - return date(*time.strptime(s.decode('latin1'), "%Y-%m-%d")[:3]) - else: - return date(*time.strptime(s, "%Y-%m-%d")[:3]) + return date(*time.strptime(s, "%Y-%m-%d")[:3]) class TestStringConverter(object): @@ -155,39 +153,45 @@ class TestStringConverter(object): assert_equal(converter._status, 0) # test int - assert_equal(converter.upgrade(b'0'), 0) + assert_equal(converter.upgrade('0'), 0) assert_equal(converter._status, 1) - # On systems where integer defaults to 32-bit, the statuses will be + # On systems where long defaults to 32-bit, the statuses will be # offset by one, so we check for this here. import numpy.core.numeric as nx - status_offset = int(nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize) + status_offset = int(nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize) # test int > 2**32 - assert_equal(converter.upgrade(b'17179869184'), 17179869184) + assert_equal(converter.upgrade('17179869184'), 17179869184) assert_equal(converter._status, 1 + status_offset) # test float - assert_allclose(converter.upgrade(b'0.'), 0.0) + assert_allclose(converter.upgrade('0.'), 0.0) assert_equal(converter._status, 2 + status_offset) # test complex - assert_equal(converter.upgrade(b'0j'), complex('0j')) + assert_equal(converter.upgrade('0j'), complex('0j')) assert_equal(converter._status, 3 + status_offset) # test str - assert_equal(converter.upgrade(b'a'), b'a') - assert_equal(converter._status, len(converter._mapper) - 1) + # note that the longdouble type has been skipped, so the + # _status increases by 2. Everything should succeed with + # unicode conversion (5). + for s in ['a', u'a', b'a']: + res = converter.upgrade(s) + assert_(type(res) is unicode) + assert_equal(res, u'a') + assert_equal(converter._status, 5 + status_offset) def test_missing(self): "Tests the use of missing values." - converter = StringConverter(missing_values=(b'missing', - b'missed')) - converter.upgrade(b'0') - assert_equal(converter(b'0'), 0) - assert_equal(converter(b''), converter.default) - assert_equal(converter(b'missing'), converter.default) - assert_equal(converter(b'missed'), converter.default) + converter = StringConverter(missing_values=('missing', + 'missed')) + converter.upgrade('0') + assert_equal(converter('0'), 0) + assert_equal(converter(''), converter.default) + assert_equal(converter('missing'), converter.default) + assert_equal(converter('missed'), converter.default) try: converter('miss') except ValueError: @@ -198,58 +202,59 @@ class TestStringConverter(object): dateparser = _bytes_to_date StringConverter.upgrade_mapper(dateparser, date(2000, 1, 1)) convert = StringConverter(dateparser, date(2000, 1, 1)) - test = convert(b'2001-01-01') + test = convert('2001-01-01') assert_equal(test, date(2001, 1, 1)) - test = convert(b'2009-01-01') + test = convert('2009-01-01') assert_equal(test, date(2009, 1, 1)) - test = convert(b'') + test = convert('') assert_equal(test, date(2000, 1, 1)) def test_string_to_object(self): "Make sure that string-to-object functions are properly recognized" + old_mapper = StringConverter._mapper[:] # copy of list conv = StringConverter(_bytes_to_date) - assert_equal(conv._mapper[-2][0](0), 0j) + assert_equal(conv._mapper, old_mapper) assert_(hasattr(conv, 'default')) def test_keep_default(self): "Make sure we don't lose an explicit default" - converter = StringConverter(None, missing_values=b'', + converter = StringConverter(None, missing_values='', default=-999) - converter.upgrade(b'3.14159265') + converter.upgrade('3.14159265') assert_equal(converter.default, -999) assert_equal(converter.type, np.dtype(float)) # converter = StringConverter( - None, missing_values=b'', default=0) - converter.upgrade(b'3.14159265') + None, missing_values='', default=0) + converter.upgrade('3.14159265') assert_equal(converter.default, 0) assert_equal(converter.type, np.dtype(float)) def test_keep_default_zero(self): "Check that we don't lose a default of 0" converter = StringConverter(int, default=0, - missing_values=b"N/A") + missing_values="N/A") assert_equal(converter.default, 0) def test_keep_missing_values(self): "Check that we're not losing missing values" converter = StringConverter(int, default=0, - missing_values=b"N/A") + missing_values="N/A") assert_equal( - converter.missing_values, set([b'', b'N/A'])) + converter.missing_values, set(['', 'N/A'])) def test_int64_dtype(self): "Check that int64 integer types can be specified" converter = StringConverter(np.int64, default=0) - val = b"-9223372036854775807" + val = "-9223372036854775807" assert_(converter(val) == -9223372036854775807) - val = b"9223372036854775807" + val = "9223372036854775807" assert_(converter(val) == 9223372036854775807) def test_uint64_dtype(self): "Check that uint64 integer types can be specified" converter = StringConverter(np.uint64, default=0) - val = b"9223372043271415339" + val = "9223372043271415339" assert_(converter(val) == 9223372043271415339) diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 6f7fcc54c..75a8e4968 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -8,8 +8,11 @@ from tempfile import NamedTemporaryFile import time import warnings import gc -from io import BytesIO +import io +from io import BytesIO, StringIO from datetime import datetime +import locale +import re import numpy as np import numpy.ma as ma @@ -17,9 +20,9 @@ from numpy.lib._iotools import ConverterError, ConversionWarning from numpy.compat import asbytes, bytes, unicode, Path from numpy.ma.testutils import assert_equal from numpy.testing import ( - run_module_suite, assert_warns, assert_, assert_raises_regex, - assert_raises, assert_allclose, assert_array_equal, temppath, dec, IS_PYPY, - suppress_warnings + run_module_suite, assert_warns, assert_, SkipTest, + assert_raises_regex, assert_raises, assert_allclose, + assert_array_equal, temppath, tempdir, dec, IS_PYPY, suppress_warnings ) @@ -44,6 +47,16 @@ class TextIO(BytesIO): MAJVER, MINVER = sys.version_info[:2] IS_64BIT = sys.maxsize > 2**32 +try: + import bz2 + HAS_BZ2 = True +except ImportError: + HAS_BZ2 = False +try: + import lzma + HAS_LZMA = True +except ImportError: + HAS_LZMA = False def strptime(s, fmt=None): @@ -52,10 +65,9 @@ def strptime(s, fmt=None): 2.5. """ - if sys.version_info[0] >= 3: - return datetime(*time.strptime(s.decode('latin1'), fmt)[:3]) - else: - return datetime(*time.strptime(s, fmt)[:3]) + if type(s) == bytes: + s = s.decode("latin1") + return datetime(*time.strptime(s, fmt)[:3]) class RoundtripTest(object): @@ -144,7 +156,7 @@ class RoundtripTest(object): a = np.array([1, 2, 3, 4], int) self.roundtrip(a) - @np.testing.dec.knownfailureif(sys.platform == 'win32', "Fail on Win32") + @dec.knownfailureif(sys.platform == 'win32', "Fail on Win32") def test_mmap(self): a = np.array([[1, 2.5], [4, 7.3]]) self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'}) @@ -188,8 +200,8 @@ class TestSavezLoad(RoundtripTest): self.arr_reloaded.fid.close() os.remove(self.arr_reloaded.fid.name) - @np.testing.dec.skipif(not IS_64BIT, "Works only with 64bit systems") - @np.testing.dec.slow + @dec.skipif(not IS_64BIT, "Works only with 64bit systems") + @dec.slow def test_big_arrays(self): L = (1 << 31) + 100000 a = np.empty(L, dtype=np.uint8) @@ -265,7 +277,7 @@ class TestSavezLoad(RoundtripTest): fp.seek(0) assert_(not fp.closed) - @np.testing.dec.skipif(IS_PYPY, "context manager required on PyPy") + @dec.skipif(IS_PYPY, "context manager required on PyPy") def test_closing_fid(self): # Test that issue #1517 (too many opened files) remains closed # It might be a "weak" test since failed to get triggered on @@ -331,8 +343,8 @@ class TestSaveTxt(object): def test_0D_3D(self): c = BytesIO() - assert_raises(ValueError, np.savetxt, c, np.array(1)) - assert_raises(ValueError, np.savetxt, c, np.array([[[1], [2]]])) + assert_raises(ValueError, np.savetxt, c, np.array(1)) + assert_raises(ValueError, np.savetxt, c, np.array([[[1], [2]]])) def test_record(self): @@ -466,8 +478,134 @@ class TestSaveTxt(object): b = np.loadtxt(w) assert_array_equal(a, b) + def test_unicode(self): + utf8 = b'\xcf\x96'.decode('UTF-8') + a = np.array([utf8], dtype=np.unicode) + with tempdir() as tmpdir: + # set encoding as on windows it may not be unicode even on py3 + np.savetxt(os.path.join(tmpdir, 'test.csv'), a, fmt=['%s'], + encoding='UTF-8') + + def test_unicode_roundtrip(self): + utf8 = b'\xcf\x96'.decode('UTF-8') + a = np.array([utf8], dtype=np.unicode) + # our gz wrapper support encoding + suffixes = ['', '.gz'] + # stdlib 2 versions do not support encoding + if MAJVER > 2: + if HAS_BZ2: + suffixes.append('.bz2') + if HAS_LZMA: + suffixes.extend(['.xz', '.lzma']) + with tempdir() as tmpdir: + for suffix in suffixes: + np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a, + fmt=['%s'], encoding='UTF-16-LE') + b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix), + encoding='UTF-16-LE', dtype=np.unicode) + assert_array_equal(a, b) + + def test_unicode_bytestream(self): + utf8 = b'\xcf\x96'.decode('UTF-8') + a = np.array([utf8], dtype=np.unicode) + s = BytesIO() + np.savetxt(s, a, fmt=['%s'], encoding='UTF-8') + s.seek(0) + assert_equal(s.read().decode('UTF-8'), utf8 + '\n') + + def test_unicode_stringstream(self): + utf8 = b'\xcf\x96'.decode('UTF-8') + a = np.array([utf8], dtype=np.unicode) + s = StringIO() + np.savetxt(s, a, fmt=['%s'], encoding='UTF-8') + s.seek(0) + assert_equal(s.read(), utf8 + '\n') + + +class LoadTxtBase(object): + def check_compressed(self, fopen, suffixes): + # Test that we can load data from a compressed file + wanted = np.arange(6).reshape((2, 3)) + linesep = ('\n', '\r\n', '\r') + for sep in linesep: + data = '0 1 2' + sep + '3 4 5' + for suffix in suffixes: + with temppath(suffix=suffix) as name: + with fopen(name, mode='wt', encoding='UTF-32-LE') as f: + f.write(data) + res = self.loadfunc(name, encoding='UTF-32-LE') + assert_array_equal(res, wanted) + with fopen(name, "rt", encoding='UTF-32-LE') as f: + res = self.loadfunc(f) + assert_array_equal(res, wanted) + + # Python2 .open does not support encoding + @dec.skipif(MAJVER == 2) + def test_compressed_gzip(self): + self.check_compressed(gzip.open, ('.gz',)) + + @dec.skipif(MAJVER == 2 or not HAS_BZ2) + def test_compressed_gzip(self): + self.check_compressed(bz2.open, ('.bz2',)) + + @dec.skipif(MAJVER == 2 or not HAS_LZMA) + def test_compressed_gzip(self): + self.check_compressed(lzma.open, ('.xz', '.lzma')) + + def test_encoding(self): + with temppath() as path: + with open(path, "wb") as f: + f.write('0.\n1.\n2.'.encode("UTF-16")) + x = self.loadfunc(path, encoding="UTF-16") + assert_array_equal(x, [0., 1., 2.]) + + def test_stringload(self): + # umlaute + nonascii = b'\xc3\xb6\xc3\xbc\xc3\xb6'.decode("UTF-8") + with temppath() as path: + with open(path, "wb") as f: + f.write(nonascii.encode("UTF-16")) + x = self.loadfunc(path, encoding="UTF-16", dtype=np.unicode) + assert_array_equal(x, nonascii) + + def test_binary_decode(self): + utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04' + v = self.loadfunc(BytesIO(utf16), dtype=np.unicode, encoding='UTF-16') + assert_array_equal(v, np.array(utf16.decode('UTF-16').split())) + + def test_converters_decode(self): + # test converters that decode strings + c = TextIO() + c.write(b'\xcf\x96') + c.seek(0) + x = self.loadfunc(c, dtype=np.unicode, + converters={0: lambda x: x.decode('UTF-8')}) + a = np.array([b'\xcf\x96'.decode('UTF-8')]) + assert_array_equal(x, a) + + def test_converters_nodecode(self): + # test native string converters enabled by setting an encoding + utf8 = b'\xcf\x96'.decode('UTF-8') + with temppath() as path: + with io.open(path, 'wt', encoding='UTF-8') as f: + f.write(utf8) + x = self.loadfunc(path, dtype=np.unicode, + converters={0: lambda x: x + 't'}, + encoding='UTF-8') + a = np.array([utf8 + 't']) + assert_array_equal(x, a) + + +class TestLoadTxt(LoadTxtBase): + loadfunc = staticmethod(np.loadtxt) + + def setUp(self): + # lower chunksize for testing + self.orig_chunk = np.lib.npyio._loadtxt_chunksize + np.lib.npyio._loadtxt_chunksize = 1 + def tearDown(self): + np.lib.npyio._loadtxt_chunksize = self.orig_chunk -class TestLoadTxt(object): def test_record(self): c = TextIO() c.write('1 2\n3 4') @@ -869,9 +1007,24 @@ class TestLoadTxt(object): dt = np.dtype([('x', int), ('a', 'S10'), ('y', int)]) np.loadtxt(c, delimiter=',', dtype=dt, comments=None) # Should succeed + @dec.skipif(locale.getpreferredencoding() == 'ANSI_X3.4-1968') + def test_binary_load(self): + butf8 = b"5,6,7,\xc3\x95scarscar\n\r15,2,3,hello\n\r"\ + b"20,2,3,\xc3\x95scar\n\r" + sutf8 = butf8.decode("UTF-8").replace("\r", "").splitlines() + with temppath() as path: + with open(path, "wb") as f: + f.write(butf8) + with open(path, "rb") as f: + x = np.loadtxt(f, encoding="UTF-8", dtype=np.unicode) + assert_array_equal(x, sutf8) + # test broken latin1 conversion people now rely on + with open(path, "rb") as f: + x = np.loadtxt(f, encoding="UTF-8", dtype="S") + x = [b'5,6,7,\xc3\x95scarscar', b'15,2,3,hello', b'20,2,3,\xc3\x95scar'] + assert_array_equal(x, np.array(x, dtype="S")) class Testfromregex(object): - # np.fromregex expects files opened in binary mode. def test_record(self): c = TextIO() c.write('1.312 foo\n1.534 bar\n4.444 qux') @@ -904,12 +1057,29 @@ class Testfromregex(object): a = np.array([(1312,), (1534,), (4444,)], dtype=dt) assert_array_equal(x, a) + def test_record_unicode(self): + utf8 = b'\xcf\x96' + with temppath() as path: + with open(path, 'wb') as f: + f.write(b'1.312 foo' + utf8 + b' \n1.534 bar\n4.444 qux') + + dt = [('num', np.float64), ('val', 'U4')] + x = np.fromregex(path, r"(?u)([0-9.]+)\s+(\w+)", dt, encoding='UTF-8') + a = np.array([(1.312, 'foo' + utf8.decode('UTF-8')), (1.534, 'bar'), + (4.444, 'qux')], dtype=dt) + assert_array_equal(x, a) + + regexp = re.compile(r"([0-9.]+)\s+(\w+)", re.UNICODE) + x = np.fromregex(path, regexp, dt, encoding='UTF-8') + assert_array_equal(x, a) + #####-------------------------------------------------------------------------- -class TestFromTxt(object): - # +class TestFromTxt(LoadTxtBase): + loadfunc = staticmethod(np.genfromtxt) + def test_record(self): # Test w/ explicit dtype data = TextIO('1 2\n3 4') @@ -1012,7 +1182,10 @@ class TestFromTxt(object): def test_header(self): # Test retrieving a header data = TextIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0') - test = np.ndfromtxt(data, dtype=None, names=True) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.ndfromtxt(data, dtype=None, names=True) + assert_(w[0].category is np.VisibleDeprecationWarning) control = {'gender': np.array([b'M', b'F']), 'age': np.array([64.0, 25.0]), 'weight': np.array([75.0, 60.0])} @@ -1023,7 +1196,10 @@ class TestFromTxt(object): def test_auto_dtype(self): # Test the automatic definition of the output dtype data = TextIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False') - test = np.ndfromtxt(data, dtype=None) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.ndfromtxt(data, dtype=None) + assert_(w[0].category is np.VisibleDeprecationWarning) control = [np.array([b'A', b'BCD']), np.array([64, 25]), np.array([75.0, 60.0]), @@ -1069,7 +1245,10 @@ F 35 58.330000 M 33 21.99 """) # The # is part of the first name and should be deleted automatically. - test = np.genfromtxt(data, names=True, dtype=None) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(data, names=True, dtype=None) + assert_(w[0].category is np.VisibleDeprecationWarning) ctrl = np.array([('M', 21, 72.1), ('F', 35, 58.33), ('M', 33, 21.99)], dtype=[('gender', '|S1'), ('age', int), ('weight', float)]) assert_equal(test, ctrl) @@ -1080,14 +1259,20 @@ M 21 72.100000 F 35 58.330000 M 33 21.99 """) - test = np.genfromtxt(data, names=True, dtype=None) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(data, names=True, dtype=None) + assert_(w[0].category is np.VisibleDeprecationWarning) assert_equal(test, ctrl) def test_autonames_and_usecols(self): # Tests names and usecols data = TextIO('A B C D\n aaaa 121 45 9.1') - test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), - names=True, dtype=None) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), + names=True, dtype=None) + assert_(w[0].category is np.VisibleDeprecationWarning) control = np.array(('aaaa', 45, 9.1), dtype=[('A', '|S4'), ('C', int), ('D', float)]) assert_equal(test, control) @@ -1104,8 +1289,12 @@ M 33 21.99 def test_converters_with_usecols_and_names(self): # Tests names and usecols data = TextIO('A B C D\n aaaa 121 45 9.1') - test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), names=True, - dtype=None, converters={'C': lambda s: 2 * int(s)}) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), names=True, + dtype=None, + converters={'C': lambda s: 2 * int(s)}) + assert_(w[0].category is np.VisibleDeprecationWarning) control = np.array(('aaaa', 90, 9.1), dtype=[('A', '|S4'), ('C', int), ('D', float)]) assert_equal(test, control) @@ -1225,6 +1414,18 @@ M 33 21.99 dtype=[('', '|S10'), ('', float)]) assert_equal(test, control) + def test_utf8_userconverters_with_explicit_dtype(self): + utf8 = b'\xcf\x96' + with temppath() as path: + with open(path, 'wb') as f: + f.write(b'skip,skip,2001-01-01' + utf8 + b',1.0,skip') + test = np.genfromtxt(path, delimiter=",", names=None, dtype=float, + usecols=(2, 3), converters={2: np.unicode}, + encoding='UTF-8') + control = np.array([('2001-01-01' + utf8.decode('UTF-8'), 1.)], + dtype=[('', '|U11'), ('', float)]) + assert_equal(test, control) + def test_spacedelimiter(self): # Test space delimiter data = TextIO("1 2 3 4 5\n6 7 8 9 10") @@ -1551,11 +1752,17 @@ M 33 21.99 # Test autostrip data = "01/01/2003 , 1.3, abcde" kwargs = dict(delimiter=",", dtype=None) - mtest = np.ndfromtxt(TextIO(data), **kwargs) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + mtest = np.ndfromtxt(TextIO(data), **kwargs) + assert_(w[0].category is np.VisibleDeprecationWarning) ctrl = np.array([('01/01/2003 ', 1.3, ' abcde')], dtype=[('f0', '|S12'), ('f1', float), ('f2', '|S8')]) assert_equal(mtest, ctrl) - mtest = np.ndfromtxt(TextIO(data), autostrip=True, **kwargs) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + mtest = np.ndfromtxt(TextIO(data), autostrip=True, **kwargs) + assert_(w[0].category is np.VisibleDeprecationWarning) ctrl = np.array([('01/01/2003', 1.3, 'abcde')], dtype=[('f0', '|S10'), ('f1', float), ('f2', '|S5')]) assert_equal(mtest, ctrl) @@ -1675,13 +1882,127 @@ M 33 21.99 def test_comments_is_none(self): # Github issue 329 (None was previously being converted to 'None'). - test = np.genfromtxt(TextIO("test1,testNonetherestofthedata"), - dtype=None, comments=None, delimiter=',') + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO("test1,testNonetherestofthedata"), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) assert_equal(test[1], b'testNonetherestofthedata') - test = np.genfromtxt(TextIO("test1, testNonetherestofthedata"), - dtype=None, comments=None, delimiter=',') + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO("test1, testNonetherestofthedata"), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) assert_equal(test[1], b' testNonetherestofthedata') + def test_latin1(self): + latin1 = b'\xf6\xfc\xf6' + norm = b"norm1,norm2,norm3\n" + enc = b"test1,testNonethe" + latin1 + b",test3\n" + s = norm + enc + norm + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO(s), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) + assert_equal(test[1, 0], b"test1") + assert_equal(test[1, 1], b"testNonethe" + latin1) + assert_equal(test[1, 2], b"test3") + test = np.genfromtxt(TextIO(s), + dtype=None, comments=None, delimiter=',', + encoding='latin1') + assert_equal(test[1, 0], u"test1") + assert_equal(test[1, 1], u"testNonethe" + latin1.decode('latin1')) + assert_equal(test[1, 2], u"test3") + + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO(b"0,testNonethe" + latin1), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) + assert_equal(test['f0'], 0) + assert_equal(test['f1'], b"testNonethe" + latin1) + + def test_binary_decode_autodtype(self): + utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04' + v = self.loadfunc(BytesIO(utf16), dtype=None, encoding='UTF-16') + assert_array_equal(v, np.array(utf16.decode('UTF-16').split())) + + def test_utf8_byte_encoding(self): + utf8 = b"\xcf\x96" + norm = b"norm1,norm2,norm3\n" + enc = b"test1,testNonethe" + utf8 + b",test3\n" + s = norm + enc + norm + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO(s), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) + ctl = np.array([ + [b'norm1', b'norm2', b'norm3'], + [b'test1', b'testNonethe' + utf8, b'test3'], + [b'norm1', b'norm2', b'norm3']]) + assert_array_equal(test, ctl) + + def test_utf8_file(self): + utf8 = b"\xcf\x96" + latin1 = b"\xf6\xfc\xf6" + with temppath() as path: + with open(path, "wb") as f: + f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2) + test = np.genfromtxt(path, dtype=None, comments=None, + delimiter=',', encoding="UTF-8") + ctl = np.array([ + ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"], + ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]], + dtype=np.unicode) + assert_array_equal(test, ctl) + + # test a mixed dtype + with open(path, "wb") as f: + f.write(b"0,testNonethe" + utf8) + test = np.genfromtxt(path, dtype=None, comments=None, + delimiter=',', encoding="UTF-8") + assert_equal(test['f0'], 0) + assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8")) + + + def test_utf8_file_nodtype_unicode(self): + # bytes encoding with non-latin1 -> unicode upcast + utf8 = u'\u03d6' + latin1 = u'\xf6\xfc\xf6' + + # skip test if cannot encode utf8 test string with preferred + # encoding. The preferred encoding is assumed to be the default + # encoding of io.open. Will need to change this for PyTest, maybe + # using pytest.mark.xfail(raises=***). + try: + import locale + encoding = locale.getpreferredencoding() + utf8.encode(encoding) + except (UnicodeError, ImportError): + raise SkipTest('Skipping test_utf8_file_nodtype_unicode, ' + 'unable to encode utf8 in preferred encoding') + + with temppath() as path: + with io.open(path, "wt") as f: + f.write(u"norm1,norm2,norm3\n") + f.write(u"norm1," + latin1 + u",norm3\n") + f.write(u"test1,testNonethe" + utf8 + u",test3\n") + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', + np.VisibleDeprecationWarning) + test = np.genfromtxt(path, dtype=None, comments=None, + delimiter=',') + # Check for warning when encoding not specified. + assert_(w[0].category is np.VisibleDeprecationWarning) + ctl = np.array([ + ["norm1", "norm2", "norm3"], + ["norm1", latin1, "norm3"], + ["test1", "testNonethe" + utf8, "test3"]], + dtype=np.unicode) + assert_array_equal(test, ctl) + def test_recfromtxt(self): # data = TextIO('A,B\n0,1\n2,3') @@ -1793,11 +2114,7 @@ M 33 21.99 # Test that we can load data from a filename as well as a file # object tgt = np.arange(6).reshape((2, 3)) - if sys.version_info[0] >= 3: - # python 3k is known to fail for '\r' - linesep = ('\n', '\r\n') - else: - linesep = ('\n', '\r\n', '\r') + linesep = ('\n', '\r\n', '\r') for sep in linesep: data = '0 1 2' + sep + '3 4 5' @@ -1807,6 +2124,22 @@ M 33 21.99 res = np.genfromtxt(name) assert_array_equal(res, tgt) + def test_gft_from_gzip(self): + # Test that we can load data from a gzipped file + wanted = np.arange(6).reshape((2, 3)) + linesep = ('\n', '\r\n', '\r') + + for sep in linesep: + data = '0 1 2' + sep + '3 4 5' + s = BytesIO() + with gzip.GzipFile(fileobj=s, mode='w') as g: + g.write(asbytes(data)) + + with temppath(suffix='.gz2') as name: + with open(name, 'w') as f: + f.write(data) + assert_array_equal(np.genfromtxt(name), wanted) + def test_gft_using_generator(self): # gft doesn't work with unicode. def count(): @@ -1844,7 +2177,7 @@ M 33 21.99 class TestPathUsage(object): # Test that pathlib.Path can be used - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_loadtxt(self): with temppath(suffix='.txt') as path: path = Path(path) @@ -1853,7 +2186,7 @@ class TestPathUsage(object): x = np.loadtxt(path) assert_array_equal(x, a) - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_save_load(self): # Test that pathlib.Path instances can be used with savez. with temppath(suffix='.npy') as path: @@ -1863,7 +2196,7 @@ class TestPathUsage(object): data = np.load(path) assert_array_equal(data, a) - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_savez_load(self): # Test that pathlib.Path instances can be used with savez. with temppath(suffix='.npz') as path: @@ -1872,7 +2205,7 @@ class TestPathUsage(object): with np.load(path) as data: assert_array_equal(data['lab'], 'place holder') - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_savez_compressed_load(self): # Test that pathlib.Path instances can be used with savez. with temppath(suffix='.npz') as path: @@ -1882,7 +2215,7 @@ class TestPathUsage(object): assert_array_equal(data['lab'], 'place holder') data.close() - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_genfromtxt(self): with temppath(suffix='.txt') as path: path = Path(path) @@ -1891,7 +2224,7 @@ class TestPathUsage(object): data = np.genfromtxt(path) assert_array_equal(a, data) - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_ndfromtxt(self): # Test outputing a standard ndarray with temppath(suffix='.txt') as path: @@ -1903,7 +2236,7 @@ class TestPathUsage(object): test = np.ndfromtxt(path, dtype=int) assert_array_equal(test, control) - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_mafromtxt(self): # From `test_fancy_dtype_alt` above with temppath(suffix='.txt') as path: @@ -1915,7 +2248,7 @@ class TestPathUsage(object): control = ma.array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)]) assert_equal(test, control) - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_recfromtxt(self): with temppath(suffix='.txt') as path: path = Path(path) @@ -1929,7 +2262,7 @@ class TestPathUsage(object): assert_(isinstance(test, np.recarray)) assert_equal(test, control) - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_recfromcsv(self): with temppath(suffix='.txt') as path: path = Path(path) |