MAINT: Various minor code cleanups.

Minor cleanups of old code to reflect more modern usage.
author: Charles Harris <charlesr.harris@gmail.com> 2017-11-21 10:10:26 -0700
committer: Charles Harris <charlesr.harris@gmail.com> 2017-11-24 16:51:53 -0700
commit: 1d97b3aafdca2722bbe2f0c10a96544121c8f78b (patch)
tree: d70280d678745fdd419fdd17586336f766f279b6 /numpy/lib
parent: d9ca11117f37d48d07818a3aae3641c023454269 (diff)
download: numpy-1d97b3aafdca2722bbe2f0c10a96544121c8f78b.tar.gz
4 files changed, 90 insertions, 64 deletions
diff --git a/numpy/lib/_datasource.py b/numpy/lib/_datasource.py
index aec84865f..6f1295f09 100644
--- a/numpy/lib/_datasource.py
+++ b/numpy/lib/_datasource.py
@@ -238,6 +238,11 @@ def open(path, mode='r', destpath=os.curdir, encoding=None, newline=None):
         Path to the directory where the source file gets downloaded to for
         use.  If `destpath` is None, a temporary directory will be created.
         The default path is the current directory.
+    encoding : {None, str}, optional
+        Open text file with given encoding. The default encoding will be
+        what `io.open` uses.
+    newline : {None, str}, optional
+        Newline to use when reading text file.
 
     Returns
     -------
@@ -577,6 +582,11 @@ class DataSource (object):
             Mode to open `path`.  Mode 'r' for reading, 'w' for writing,
             'a' to append. Available modes depend on the type of object
             specified by `path`. Default is 'r'.
+        encoding : {None, str}, optional
+            Open text file with given encoding. The default encoding will be
+            what `io.open` uses.
+        newline : {None, str}, optional
+            Newline to use when reading text file.
 
         Returns
         -------
@@ -741,6 +751,11 @@ class Repository (DataSource):
             Mode to open `path`.  Mode 'r' for reading, 'w' for writing,
             'a' to append. Available modes depend on the type of object
             specified by `path`. Default is 'r'.
+        encoding : {None, str}, optional
+            Open text file with given encoding. The default encoding will be
+            what `io.open` uses.
+        newline : {None, str}, optional
+            Newline to use when reading text file.
 
         Returns
         -------
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index b7db77f32..27143e5c6 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -861,8 +861,8 @@ class StringConverter(object):
         # Add the missing values to the existing set or clear it.
         if missing_values is None:
             # Clear all missing values even though the ctor initializes it to
-            # {''} when the argument is None.
-            self.missing_values = {}
+            # set(['']) when the argument is None.
+            self.missing_values = set()
         else:
             if not np.iterable(missing_values):
                 missing_values = [missing_values]
diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py
index b25b42f8c..54fac8da4 100644
--- a/numpy/lib/tests/test__iotools.py
+++ b/numpy/lib/tests/test__iotools.py
@@ -12,6 +12,7 @@ from numpy.lib._iotools import (
     LineSplitter, NameValidator, StringConverter,
     has_nested_fields, easy_dtype, flatten_dtype
     )
+from numpy.compat import unicode
 
 
 class TestLineSplitter(object):
@@ -155,10 +156,10 @@ class TestStringConverter(object):
         assert_equal(converter.upgrade('0'), 0)
         assert_equal(converter._status, 1)
 
-        # On systems where integer defaults to 32-bit, the statuses will be
+        # On systems where long defaults to 32-bit, the statuses will be
         # offset by one, so we check for this here.
         import numpy.core.numeric as nx
-        status_offset = int(nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize)
+        status_offset = int(nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize)
 
         # test int > 2**32
         assert_equal(converter.upgrade('17179869184'), 17179869184)
@@ -172,9 +173,15 @@ class TestStringConverter(object):
         assert_equal(converter.upgrade('0j'), complex('0j'))
         assert_equal(converter._status, 3 + status_offset)
 
-        # test str TODO
-        #assert_equal(converter.upgrade(b'a'), b'a')
-        #assert_equal(converter._status, len(converter._mapper) - 1)
+        # test str
+        # note that the longdouble type has been skipped, so the
+        # _status increases by 2. Everything should succeed with
+        # unicode conversion (5).
+        for s in ['a', u'a', b'a']:
+            res = converter.upgrade(s)
+            assert_(type(res) is unicode)
+            assert_equal(res, u'a')
+            assert_equal(converter._status, 5 + status_offset)
 
     def test_missing(self):
         "Tests the use of missing values."
@@ -204,8 +211,9 @@ class TestStringConverter(object):
 
     def test_string_to_object(self):
         "Make sure that string-to-object functions are properly recognized"
+        old_mapper = StringConverter._mapper[:]  # copy of list
         conv = StringConverter(_bytes_to_date)
-        assert_equal(conv._mapper[-3][0](0), 0j)
+        assert_equal(conv._mapper, old_mapper)
         assert_(hasattr(conv, 'default'))
 
     def test_keep_default(self):
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 35c37c7be..75a8e4968 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -20,19 +20,11 @@ from numpy.lib._iotools import ConverterError, ConversionWarning
 from numpy.compat import asbytes, bytes, unicode, Path
 from numpy.ma.testutils import assert_equal
 from numpy.testing import (
-    run_module_suite, assert_warns, assert_,
+    run_module_suite, assert_warns, assert_, SkipTest,
     assert_raises_regex, assert_raises, assert_allclose,
     assert_array_equal, temppath, tempdir, dec, IS_PYPY, suppress_warnings
 )
 
-def can_encode(v):
-    """ check if bytes can be decoded with default encoding """
-    try:
-        v.encode(locale.getpreferredencoding())
-        return False # no skipping
-    except UnicodeEncodeError:
-        return True
-
 
 class TextIO(BytesIO):
     """Helper IO class.
@@ -164,7 +156,7 @@ class RoundtripTest(object):
         a = np.array([1, 2, 3, 4], int)
         self.roundtrip(a)
 
-    @np.testing.dec.knownfailureif(sys.platform == 'win32', "Fail on Win32")
+    @dec.knownfailureif(sys.platform == 'win32', "Fail on Win32")
     def test_mmap(self):
         a = np.array([[1, 2.5], [4, 7.3]])
         self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'})
@@ -208,8 +200,8 @@ class TestSavezLoad(RoundtripTest):
                 self.arr_reloaded.fid.close()
                 os.remove(self.arr_reloaded.fid.name)
 
-    @np.testing.dec.skipif(not IS_64BIT, "Works only with 64bit systems")
-    @np.testing.dec.slow
+    @dec.skipif(not IS_64BIT, "Works only with 64bit systems")
+    @dec.slow
     def test_big_arrays(self):
         L = (1 << 31) + 100000
         a = np.empty(L, dtype=np.uint8)
@@ -285,7 +277,7 @@ class TestSavezLoad(RoundtripTest):
                 fp.seek(0)
                 assert_(not fp.closed)
 
-    @np.testing.dec.skipif(IS_PYPY, "context manager required on PyPy")
+    @dec.skipif(IS_PYPY, "context manager required on PyPy")
     def test_closing_fid(self):
         # Test that issue #1517 (too many opened files) remains closed
         # It might be a "weak" test since failed to get triggered on
@@ -351,8 +343,8 @@ class TestSaveTxt(object):
 
     def test_0D_3D(self):
         c = BytesIO()
-        assert_raises(ValueError, np.savetxt, c, np.array(1)) 
-        assert_raises(ValueError, np.savetxt, c, np.array([[[1], [2]]])) 
+        assert_raises(ValueError, np.savetxt, c, np.array(1))
+        assert_raises(ValueError, np.savetxt, c, np.array([[[1], [2]]]))
 
 
     def test_record(self):
@@ -530,7 +522,7 @@ class TestSaveTxt(object):
         assert_equal(s.read(), utf8 + '\n')
 
 
-class LoadTxtBase:
+class LoadTxtBase(object):
     def check_compressed(self, fopen, suffixes):
         # Test that we can load data from a compressed file
         wanted = np.arange(6).reshape((2, 3))
@@ -541,23 +533,22 @@ class LoadTxtBase:
                 with temppath(suffix=suffix) as name:
                     with fopen(name, mode='wt', encoding='UTF-32-LE') as f:
                         f.write(data)
-                    res = getattr(np, self.loadfunc)(name,
-                                                     encoding='UTF-32-LE')
+                    res = self.loadfunc(name, encoding='UTF-32-LE')
                     assert_array_equal(res, wanted)
-                    res = getattr(np, self.loadfunc)(
-                                 fopen(name, "rt", encoding='UTF-32-LE'))
+                    with fopen(name, "rt",  encoding='UTF-32-LE') as f:
+                        res = self.loadfunc(f)
                     assert_array_equal(res, wanted)
 
     # Python2 .open does not support encoding
-    @np.testing.dec.skipif(MAJVER == 2)
+    @dec.skipif(MAJVER == 2)
     def test_compressed_gzip(self):
         self.check_compressed(gzip.open, ('.gz',))
 
-    @np.testing.dec.skipif(MAJVER == 2 or not HAS_BZ2)
+    @dec.skipif(MAJVER == 2 or not HAS_BZ2)
     def test_compressed_gzip(self):
         self.check_compressed(bz2.open, ('.bz2',))
 
-    @np.testing.dec.skipif(MAJVER == 2 or not HAS_LZMA)
+    @dec.skipif(MAJVER == 2 or not HAS_LZMA)
     def test_compressed_gzip(self):
         self.check_compressed(lzma.open, ('.xz', '.lzma'))
 
@@ -565,7 +556,7 @@ class LoadTxtBase:
         with temppath() as path:
             with open(path, "wb") as f:
                 f.write('0.\n1.\n2.'.encode("UTF-16"))
-            x = getattr(np, self.loadfunc)(path, encoding="UTF-16")
+            x = self.loadfunc(path, encoding="UTF-16")
             assert_array_equal(x, [0., 1., 2.])
 
     def test_stringload(self):
@@ -574,13 +565,12 @@ class LoadTxtBase:
         with temppath() as path:
             with open(path, "wb") as f:
                 f.write(nonascii.encode("UTF-16"))
-            x = getattr(np, self.loadfunc)(path, encoding="UTF-16", dtype=np.unicode)
+            x = self.loadfunc(path, encoding="UTF-16", dtype=np.unicode)
             assert_array_equal(x, nonascii)
 
     def test_binary_decode(self):
         utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04'
-        v = getattr(np, self.loadfunc)(BytesIO(utf16), dtype=np.unicode,
-                                       encoding='UTF-16')
+        v = self.loadfunc(BytesIO(utf16), dtype=np.unicode, encoding='UTF-16')
         assert_array_equal(v, np.array(utf16.decode('UTF-16').split()))
 
     def test_converters_decode(self):
@@ -588,8 +578,8 @@ class LoadTxtBase:
         c = TextIO()
         c.write(b'\xcf\x96')
         c.seek(0)
-        x = getattr(np, self.loadfunc)(c, dtype=np.unicode,
-                       converters={0: lambda x: x.decode('UTF-8')})
+        x = self.loadfunc(c, dtype=np.unicode,
+                          converters={0: lambda x: x.decode('UTF-8')})
         a = np.array([b'\xcf\x96'.decode('UTF-8')])
         assert_array_equal(x, a)
 
@@ -599,15 +589,16 @@ class LoadTxtBase:
         with temppath() as path:
             with io.open(path, 'wt', encoding='UTF-8') as f:
                 f.write(utf8)
-            x = getattr(np, self.loadfunc)(path, dtype=np.unicode,
-                                           converters={0: lambda x: x + 't'},
-                                           encoding='UTF-8')
+            x = self.loadfunc(path, dtype=np.unicode,
+                              converters={0: lambda x: x + 't'},
+                              encoding='UTF-8')
             a = np.array([utf8 + 't'])
             assert_array_equal(x, a)
 
 
 class TestLoadTxt(LoadTxtBase):
-    loadfunc = 'loadtxt'
+    loadfunc = staticmethod(np.loadtxt)
+
     def setUp(self):
         # lower chunksize for testing
         self.orig_chunk = np.lib.npyio._loadtxt_chunksize
@@ -1016,7 +1007,7 @@ class TestLoadTxt(LoadTxtBase):
         dt = np.dtype([('x', int), ('a', 'S10'), ('y', int)])
         np.loadtxt(c, delimiter=',', dtype=dt, comments=None)  # Should succeed
 
-    @np.testing.dec.skipif(locale.getpreferredencoding() == 'ANSI_X3.4-1968')
+    @dec.skipif(locale.getpreferredencoding() == 'ANSI_X3.4-1968')
     def test_binary_load(self):
         butf8 = b"5,6,7,\xc3\x95scarscar\n\r15,2,3,hello\n\r"\
                 b"20,2,3,\xc3\x95scar\n\r"
@@ -1087,7 +1078,8 @@ class Testfromregex(object):
 
 
 class TestFromTxt(LoadTxtBase):
-    loadfunc = 'genfromtxt'
+    loadfunc = staticmethod(np.genfromtxt)
+
     def test_record(self):
         # Test w/ explicit dtype
         data = TextIO('1 2\n3 4')
@@ -1933,8 +1925,7 @@ M   33  21.99
 
     def test_binary_decode_autodtype(self):
         utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04'
-        v = getattr(np, self.loadfunc)(BytesIO(utf16), dtype=None,
-                                       encoding='UTF-16')
+        v = self.loadfunc(BytesIO(utf16), dtype=None, encoding='UTF-16')
         assert_array_equal(v, np.array(utf16.decode('UTF-16').split()))
 
     def test_utf8_byte_encoding(self):
@@ -1975,28 +1966,40 @@ M   33  21.99
             assert_equal(test['f0'], 0)
             assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8"))
 
-    @np.testing.dec.skipif(can_encode(b"\xcf\x96".decode('UTF-8')))
+
     def test_utf8_file_nodtype_unicode(self):
         # bytes encoding with non-latin1 -> unicode upcast
-        utf8 = b"\xcf\x96"
-        latin1 = b"\xf6\xfc\xf6"
+        utf8 = u'\u03d6'
+        latin1 = u'\xf6\xfc\xf6'
+
+        # skip test if cannot encode utf8 test string with preferred
+        # encoding. The preferred encoding is assumed to be the default
+        # encoding of io.open. Will need to change this for PyTest, maybe
+        # using pytest.mark.xfail(raises=***).
+        try:
+            import locale
+            encoding = locale.getpreferredencoding()
+            utf8.encode(encoding)
+        except (UnicodeError, ImportError):
+            raise SkipTest('Skipping test_utf8_file_nodtype_unicode, '
+                           'unable to encode utf8 in preferred encoding') 
+
         with temppath() as path:
-            with io.open(path, "wt",
-                         encoding=locale.getpreferredencoding()) as f:
+            with io.open(path, "wt") as f:
                 f.write(u"norm1,norm2,norm3\n")
-                f.write(u"norm1," + latin1.decode("latin1") + u",norm3\n")
-                f.write(u"test1,testNonethe" + utf8.decode("UTF-8") +
-                        u",test3\n")
+                f.write(u"norm1," + latin1 + u",norm3\n")
+                f.write(u"test1,testNonethe" + utf8 + u",test3\n")
             with warnings.catch_warnings(record=True) as w:
                 warnings.filterwarnings('always', '',
                                         np.VisibleDeprecationWarning)
                 test = np.genfromtxt(path, dtype=None, comments=None,
                                      delimiter=',')
+                # Check for warning when encoding not specified.
                 assert_(w[0].category is np.VisibleDeprecationWarning)
             ctl = np.array([
                      ["norm1", "norm2", "norm3"],
-                     ["norm1", latin1.decode("latin1"), "norm3"],
-                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
+                     ["norm1", latin1, "norm3"],
+                     ["test1", "testNonethe" + utf8, "test3"]],
                      dtype=np.unicode)
             assert_array_equal(test, ctl)
 
@@ -2174,7 +2177,7 @@ M   33  21.99
 
 class TestPathUsage(object):
     # Test that pathlib.Path can be used
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    @dec.skipif(Path is None, "No pathlib.Path")
     def test_loadtxt(self):
         with temppath(suffix='.txt') as path:
             path = Path(path)
@@ -2183,7 +2186,7 @@ class TestPathUsage(object):
             x = np.loadtxt(path)
             assert_array_equal(x, a)
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    @dec.skipif(Path is None, "No pathlib.Path")
     def test_save_load(self):
         # Test that pathlib.Path instances can be used with savez.
         with temppath(suffix='.npy') as path:
@@ -2193,7 +2196,7 @@ class TestPathUsage(object):
             data = np.load(path)
             assert_array_equal(data, a)
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    @dec.skipif(Path is None, "No pathlib.Path")
     def test_savez_load(self):
         # Test that pathlib.Path instances can be used with savez.
         with temppath(suffix='.npz') as path:
@@ -2202,7 +2205,7 @@ class TestPathUsage(object):
             with np.load(path) as data:
                 assert_array_equal(data['lab'], 'place holder')
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    @dec.skipif(Path is None, "No pathlib.Path")
     def test_savez_compressed_load(self):
         # Test that pathlib.Path instances can be used with savez.
         with temppath(suffix='.npz') as path:
@@ -2212,7 +2215,7 @@ class TestPathUsage(object):
             assert_array_equal(data['lab'], 'place holder')
             data.close()
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    @dec.skipif(Path is None, "No pathlib.Path")
     def test_genfromtxt(self):
         with temppath(suffix='.txt') as path:
             path = Path(path)
@@ -2221,7 +2224,7 @@ class TestPathUsage(object):
             data = np.genfromtxt(path)
             assert_array_equal(a, data)
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    @dec.skipif(Path is None, "No pathlib.Path")
     def test_ndfromtxt(self):
         # Test outputing a standard ndarray
         with temppath(suffix='.txt') as path:
@@ -2233,7 +2236,7 @@ class TestPathUsage(object):
             test = np.ndfromtxt(path, dtype=int)
             assert_array_equal(test, control)
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    @dec.skipif(Path is None, "No pathlib.Path")
     def test_mafromtxt(self):
         # From `test_fancy_dtype_alt` above
         with temppath(suffix='.txt') as path:
@@ -2245,7 +2248,7 @@ class TestPathUsage(object):
             control = ma.array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)])
             assert_equal(test, control)
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    @dec.skipif(Path is None, "No pathlib.Path")
     def test_recfromtxt(self):
         with temppath(suffix='.txt') as path:
             path = Path(path)
@@ -2259,7 +2262,7 @@ class TestPathUsage(object):
             assert_(isinstance(test, np.recarray))
             assert_equal(test, control)
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    @dec.skipif(Path is None, "No pathlib.Path")
     def test_recfromcsv(self):
         with temppath(suffix='.txt') as path:
             path = Path(path)
author	Charles Harris <charlesr.harris@gmail.com>	2017-11-21 10:10:26 -0700
committer	Charles Harris <charlesr.harris@gmail.com>	2017-11-24 16:51:53 -0700
commit	1d97b3aafdca2722bbe2f0c10a96544121c8f78b (patch)
tree	d70280d678745fdd419fdd17586336f766f279b6 /numpy/lib
parent	d9ca11117f37d48d07818a3aae3641c023454269 (diff)
download	numpy-1d97b3aafdca2722bbe2f0c10a96544121c8f78b.tar.gz