diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2017-11-21 10:10:26 -0700 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2017-11-24 16:51:53 -0700 |
commit | 1d97b3aafdca2722bbe2f0c10a96544121c8f78b (patch) | |
tree | d70280d678745fdd419fdd17586336f766f279b6 /numpy/lib | |
parent | d9ca11117f37d48d07818a3aae3641c023454269 (diff) | |
download | numpy-1d97b3aafdca2722bbe2f0c10a96544121c8f78b.tar.gz |
MAINT: Various minor code cleanups.
Minor cleanups of old code to reflect more modern usage.
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/_datasource.py | 15 | ||||
-rw-r--r-- | numpy/lib/_iotools.py | 4 | ||||
-rw-r--r-- | numpy/lib/tests/test__iotools.py | 20 | ||||
-rw-r--r-- | numpy/lib/tests/test_io.py | 115 |
4 files changed, 90 insertions, 64 deletions
diff --git a/numpy/lib/_datasource.py b/numpy/lib/_datasource.py index aec84865f..6f1295f09 100644 --- a/numpy/lib/_datasource.py +++ b/numpy/lib/_datasource.py @@ -238,6 +238,11 @@ def open(path, mode='r', destpath=os.curdir, encoding=None, newline=None): Path to the directory where the source file gets downloaded to for use. If `destpath` is None, a temporary directory will be created. The default path is the current directory. + encoding : {None, str}, optional + Open text file with given encoding. The default encoding will be + what `io.open` uses. + newline : {None, str}, optional + Newline to use when reading text file. Returns ------- @@ -577,6 +582,11 @@ class DataSource (object): Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to append. Available modes depend on the type of object specified by `path`. Default is 'r'. + encoding : {None, str}, optional + Open text file with given encoding. The default encoding will be + what `io.open` uses. + newline : {None, str}, optional + Newline to use when reading text file. Returns ------- @@ -741,6 +751,11 @@ class Repository (DataSource): Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to append. Available modes depend on the type of object specified by `path`. Default is 'r'. + encoding : {None, str}, optional + Open text file with given encoding. The default encoding will be + what `io.open` uses. + newline : {None, str}, optional + Newline to use when reading text file. Returns ------- diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index b7db77f32..27143e5c6 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -861,8 +861,8 @@ class StringConverter(object): # Add the missing values to the existing set or clear it. if missing_values is None: # Clear all missing values even though the ctor initializes it to - # {''} when the argument is None. - self.missing_values = {} + # set(['']) when the argument is None. + self.missing_values = set() else: if not np.iterable(missing_values): missing_values = [missing_values] diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py index b25b42f8c..54fac8da4 100644 --- a/numpy/lib/tests/test__iotools.py +++ b/numpy/lib/tests/test__iotools.py @@ -12,6 +12,7 @@ from numpy.lib._iotools import ( LineSplitter, NameValidator, StringConverter, has_nested_fields, easy_dtype, flatten_dtype ) +from numpy.compat import unicode class TestLineSplitter(object): @@ -155,10 +156,10 @@ class TestStringConverter(object): assert_equal(converter.upgrade('0'), 0) assert_equal(converter._status, 1) - # On systems where integer defaults to 32-bit, the statuses will be + # On systems where long defaults to 32-bit, the statuses will be # offset by one, so we check for this here. import numpy.core.numeric as nx - status_offset = int(nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize) + status_offset = int(nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize) # test int > 2**32 assert_equal(converter.upgrade('17179869184'), 17179869184) @@ -172,9 +173,15 @@ class TestStringConverter(object): assert_equal(converter.upgrade('0j'), complex('0j')) assert_equal(converter._status, 3 + status_offset) - # test str TODO - #assert_equal(converter.upgrade(b'a'), b'a') - #assert_equal(converter._status, len(converter._mapper) - 1) + # test str + # note that the longdouble type has been skipped, so the + # _status increases by 2. Everything should succeed with + # unicode conversion (5). + for s in ['a', u'a', b'a']: + res = converter.upgrade(s) + assert_(type(res) is unicode) + assert_equal(res, u'a') + assert_equal(converter._status, 5 + status_offset) def test_missing(self): "Tests the use of missing values." @@ -204,8 +211,9 @@ class TestStringConverter(object): def test_string_to_object(self): "Make sure that string-to-object functions are properly recognized" + old_mapper = StringConverter._mapper[:] # copy of list conv = StringConverter(_bytes_to_date) - assert_equal(conv._mapper[-3][0](0), 0j) + assert_equal(conv._mapper, old_mapper) assert_(hasattr(conv, 'default')) def test_keep_default(self): diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 35c37c7be..75a8e4968 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -20,19 +20,11 @@ from numpy.lib._iotools import ConverterError, ConversionWarning from numpy.compat import asbytes, bytes, unicode, Path from numpy.ma.testutils import assert_equal from numpy.testing import ( - run_module_suite, assert_warns, assert_, + run_module_suite, assert_warns, assert_, SkipTest, assert_raises_regex, assert_raises, assert_allclose, assert_array_equal, temppath, tempdir, dec, IS_PYPY, suppress_warnings ) -def can_encode(v): - """ check if bytes can be decoded with default encoding """ - try: - v.encode(locale.getpreferredencoding()) - return False # no skipping - except UnicodeEncodeError: - return True - class TextIO(BytesIO): """Helper IO class. @@ -164,7 +156,7 @@ class RoundtripTest(object): a = np.array([1, 2, 3, 4], int) self.roundtrip(a) - @np.testing.dec.knownfailureif(sys.platform == 'win32', "Fail on Win32") + @dec.knownfailureif(sys.platform == 'win32', "Fail on Win32") def test_mmap(self): a = np.array([[1, 2.5], [4, 7.3]]) self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'}) @@ -208,8 +200,8 @@ class TestSavezLoad(RoundtripTest): self.arr_reloaded.fid.close() os.remove(self.arr_reloaded.fid.name) - @np.testing.dec.skipif(not IS_64BIT, "Works only with 64bit systems") - @np.testing.dec.slow + @dec.skipif(not IS_64BIT, "Works only with 64bit systems") + @dec.slow def test_big_arrays(self): L = (1 << 31) + 100000 a = np.empty(L, dtype=np.uint8) @@ -285,7 +277,7 @@ class TestSavezLoad(RoundtripTest): fp.seek(0) assert_(not fp.closed) - @np.testing.dec.skipif(IS_PYPY, "context manager required on PyPy") + @dec.skipif(IS_PYPY, "context manager required on PyPy") def test_closing_fid(self): # Test that issue #1517 (too many opened files) remains closed # It might be a "weak" test since failed to get triggered on @@ -351,8 +343,8 @@ class TestSaveTxt(object): def test_0D_3D(self): c = BytesIO() - assert_raises(ValueError, np.savetxt, c, np.array(1)) - assert_raises(ValueError, np.savetxt, c, np.array([[[1], [2]]])) + assert_raises(ValueError, np.savetxt, c, np.array(1)) + assert_raises(ValueError, np.savetxt, c, np.array([[[1], [2]]])) def test_record(self): @@ -530,7 +522,7 @@ class TestSaveTxt(object): assert_equal(s.read(), utf8 + '\n') -class LoadTxtBase: +class LoadTxtBase(object): def check_compressed(self, fopen, suffixes): # Test that we can load data from a compressed file wanted = np.arange(6).reshape((2, 3)) @@ -541,23 +533,22 @@ class LoadTxtBase: with temppath(suffix=suffix) as name: with fopen(name, mode='wt', encoding='UTF-32-LE') as f: f.write(data) - res = getattr(np, self.loadfunc)(name, - encoding='UTF-32-LE') + res = self.loadfunc(name, encoding='UTF-32-LE') assert_array_equal(res, wanted) - res = getattr(np, self.loadfunc)( - fopen(name, "rt", encoding='UTF-32-LE')) + with fopen(name, "rt", encoding='UTF-32-LE') as f: + res = self.loadfunc(f) assert_array_equal(res, wanted) # Python2 .open does not support encoding - @np.testing.dec.skipif(MAJVER == 2) + @dec.skipif(MAJVER == 2) def test_compressed_gzip(self): self.check_compressed(gzip.open, ('.gz',)) - @np.testing.dec.skipif(MAJVER == 2 or not HAS_BZ2) + @dec.skipif(MAJVER == 2 or not HAS_BZ2) def test_compressed_gzip(self): self.check_compressed(bz2.open, ('.bz2',)) - @np.testing.dec.skipif(MAJVER == 2 or not HAS_LZMA) + @dec.skipif(MAJVER == 2 or not HAS_LZMA) def test_compressed_gzip(self): self.check_compressed(lzma.open, ('.xz', '.lzma')) @@ -565,7 +556,7 @@ class LoadTxtBase: with temppath() as path: with open(path, "wb") as f: f.write('0.\n1.\n2.'.encode("UTF-16")) - x = getattr(np, self.loadfunc)(path, encoding="UTF-16") + x = self.loadfunc(path, encoding="UTF-16") assert_array_equal(x, [0., 1., 2.]) def test_stringload(self): @@ -574,13 +565,12 @@ class LoadTxtBase: with temppath() as path: with open(path, "wb") as f: f.write(nonascii.encode("UTF-16")) - x = getattr(np, self.loadfunc)(path, encoding="UTF-16", dtype=np.unicode) + x = self.loadfunc(path, encoding="UTF-16", dtype=np.unicode) assert_array_equal(x, nonascii) def test_binary_decode(self): utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04' - v = getattr(np, self.loadfunc)(BytesIO(utf16), dtype=np.unicode, - encoding='UTF-16') + v = self.loadfunc(BytesIO(utf16), dtype=np.unicode, encoding='UTF-16') assert_array_equal(v, np.array(utf16.decode('UTF-16').split())) def test_converters_decode(self): @@ -588,8 +578,8 @@ class LoadTxtBase: c = TextIO() c.write(b'\xcf\x96') c.seek(0) - x = getattr(np, self.loadfunc)(c, dtype=np.unicode, - converters={0: lambda x: x.decode('UTF-8')}) + x = self.loadfunc(c, dtype=np.unicode, + converters={0: lambda x: x.decode('UTF-8')}) a = np.array([b'\xcf\x96'.decode('UTF-8')]) assert_array_equal(x, a) @@ -599,15 +589,16 @@ class LoadTxtBase: with temppath() as path: with io.open(path, 'wt', encoding='UTF-8') as f: f.write(utf8) - x = getattr(np, self.loadfunc)(path, dtype=np.unicode, - converters={0: lambda x: x + 't'}, - encoding='UTF-8') + x = self.loadfunc(path, dtype=np.unicode, + converters={0: lambda x: x + 't'}, + encoding='UTF-8') a = np.array([utf8 + 't']) assert_array_equal(x, a) class TestLoadTxt(LoadTxtBase): - loadfunc = 'loadtxt' + loadfunc = staticmethod(np.loadtxt) + def setUp(self): # lower chunksize for testing self.orig_chunk = np.lib.npyio._loadtxt_chunksize @@ -1016,7 +1007,7 @@ class TestLoadTxt(LoadTxtBase): dt = np.dtype([('x', int), ('a', 'S10'), ('y', int)]) np.loadtxt(c, delimiter=',', dtype=dt, comments=None) # Should succeed - @np.testing.dec.skipif(locale.getpreferredencoding() == 'ANSI_X3.4-1968') + @dec.skipif(locale.getpreferredencoding() == 'ANSI_X3.4-1968') def test_binary_load(self): butf8 = b"5,6,7,\xc3\x95scarscar\n\r15,2,3,hello\n\r"\ b"20,2,3,\xc3\x95scar\n\r" @@ -1087,7 +1078,8 @@ class Testfromregex(object): class TestFromTxt(LoadTxtBase): - loadfunc = 'genfromtxt' + loadfunc = staticmethod(np.genfromtxt) + def test_record(self): # Test w/ explicit dtype data = TextIO('1 2\n3 4') @@ -1933,8 +1925,7 @@ M 33 21.99 def test_binary_decode_autodtype(self): utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04' - v = getattr(np, self.loadfunc)(BytesIO(utf16), dtype=None, - encoding='UTF-16') + v = self.loadfunc(BytesIO(utf16), dtype=None, encoding='UTF-16') assert_array_equal(v, np.array(utf16.decode('UTF-16').split())) def test_utf8_byte_encoding(self): @@ -1975,28 +1966,40 @@ M 33 21.99 assert_equal(test['f0'], 0) assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8")) - @np.testing.dec.skipif(can_encode(b"\xcf\x96".decode('UTF-8'))) + def test_utf8_file_nodtype_unicode(self): # bytes encoding with non-latin1 -> unicode upcast - utf8 = b"\xcf\x96" - latin1 = b"\xf6\xfc\xf6" + utf8 = u'\u03d6' + latin1 = u'\xf6\xfc\xf6' + + # skip test if cannot encode utf8 test string with preferred + # encoding. The preferred encoding is assumed to be the default + # encoding of io.open. Will need to change this for PyTest, maybe + # using pytest.mark.xfail(raises=***). + try: + import locale + encoding = locale.getpreferredencoding() + utf8.encode(encoding) + except (UnicodeError, ImportError): + raise SkipTest('Skipping test_utf8_file_nodtype_unicode, ' + 'unable to encode utf8 in preferred encoding') + with temppath() as path: - with io.open(path, "wt", - encoding=locale.getpreferredencoding()) as f: + with io.open(path, "wt") as f: f.write(u"norm1,norm2,norm3\n") - f.write(u"norm1," + latin1.decode("latin1") + u",norm3\n") - f.write(u"test1,testNonethe" + utf8.decode("UTF-8") + - u",test3\n") + f.write(u"norm1," + latin1 + u",norm3\n") + f.write(u"test1,testNonethe" + utf8 + u",test3\n") with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) test = np.genfromtxt(path, dtype=None, comments=None, delimiter=',') + # Check for warning when encoding not specified. assert_(w[0].category is np.VisibleDeprecationWarning) ctl = np.array([ ["norm1", "norm2", "norm3"], - ["norm1", latin1.decode("latin1"), "norm3"], - ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]], + ["norm1", latin1, "norm3"], + ["test1", "testNonethe" + utf8, "test3"]], dtype=np.unicode) assert_array_equal(test, ctl) @@ -2174,7 +2177,7 @@ M 33 21.99 class TestPathUsage(object): # Test that pathlib.Path can be used - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_loadtxt(self): with temppath(suffix='.txt') as path: path = Path(path) @@ -2183,7 +2186,7 @@ class TestPathUsage(object): x = np.loadtxt(path) assert_array_equal(x, a) - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_save_load(self): # Test that pathlib.Path instances can be used with savez. with temppath(suffix='.npy') as path: @@ -2193,7 +2196,7 @@ class TestPathUsage(object): data = np.load(path) assert_array_equal(data, a) - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_savez_load(self): # Test that pathlib.Path instances can be used with savez. with temppath(suffix='.npz') as path: @@ -2202,7 +2205,7 @@ class TestPathUsage(object): with np.load(path) as data: assert_array_equal(data['lab'], 'place holder') - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_savez_compressed_load(self): # Test that pathlib.Path instances can be used with savez. with temppath(suffix='.npz') as path: @@ -2212,7 +2215,7 @@ class TestPathUsage(object): assert_array_equal(data['lab'], 'place holder') data.close() - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_genfromtxt(self): with temppath(suffix='.txt') as path: path = Path(path) @@ -2221,7 +2224,7 @@ class TestPathUsage(object): data = np.genfromtxt(path) assert_array_equal(a, data) - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_ndfromtxt(self): # Test outputing a standard ndarray with temppath(suffix='.txt') as path: @@ -2233,7 +2236,7 @@ class TestPathUsage(object): test = np.ndfromtxt(path, dtype=int) assert_array_equal(test, control) - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_mafromtxt(self): # From `test_fancy_dtype_alt` above with temppath(suffix='.txt') as path: @@ -2245,7 +2248,7 @@ class TestPathUsage(object): control = ma.array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)]) assert_equal(test, control) - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_recfromtxt(self): with temppath(suffix='.txt') as path: path = Path(path) @@ -2259,7 +2262,7 @@ class TestPathUsage(object): assert_(isinstance(test, np.recarray)) assert_equal(test, control) - @np.testing.dec.skipif(Path is None, "No pathlib.Path") + @dec.skipif(Path is None, "No pathlib.Path") def test_recfromcsv(self): with temppath(suffix='.txt') as path: path = Path(path) |