diff options
Diffstat (limited to 'numpy/lib/tests/test_io.py')
-rw-r--r-- | numpy/lib/tests/test_io.py | 620 |
1 files changed, 445 insertions, 175 deletions
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 68b2018cd..333891d46 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -4,7 +4,7 @@ import sys import gzip import os import threading -from tempfile import mkstemp, NamedTemporaryFile +from tempfile import NamedTemporaryFile import time import warnings import gc @@ -13,16 +13,14 @@ from datetime import datetime import numpy as np import numpy.ma as ma -from numpy.lib._iotools import (ConverterError, ConverterLockError, - ConversionWarning) -from numpy.compat import asbytes, asbytes_nested, bytes, asstr -from nose import SkipTest -from numpy.ma.testutils import ( - TestCase, assert_equal, assert_array_equal, - assert_raises, assert_raises_regex, run_module_suite +from numpy.lib._iotools import ConverterError, ConversionWarning +from numpy.compat import asbytes, bytes, unicode, Path +from numpy.ma.testutils import assert_equal +from numpy.testing import ( + TestCase, run_module_suite, assert_warns, assert_, + assert_raises_regex, assert_raises, assert_allclose, + assert_array_equal, temppath, dec, IS_PYPY, suppress_warnings ) -from numpy.testing import assert_warns, assert_, build_err_msg -from numpy.testing.utils import tempdir class TextIO(BytesIO): @@ -49,8 +47,9 @@ IS_64BIT = sys.maxsize > 2**32 def strptime(s, fmt=None): - """This function is available in the datetime module only - from Python >= 2.5. + """ + This function is available in the datetime module only from Python >= + 2.5. """ if sys.version_info[0] >= 3: @@ -158,6 +157,7 @@ class RoundtripTest(object): a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) self.check_roundtrips(a) + @dec.slow def test_format_2_0(self): dt = [(("%d" % i) * 100, float) for i in range(500)] a = np.ones(1000, dtype=dt) @@ -194,13 +194,13 @@ class TestSavezLoad(RoundtripTest, TestCase): def test_big_arrays(self): L = (1 << 31) + 100000 a = np.empty(L, dtype=np.uint8) - with tempdir(prefix="numpy_test_big_arrays_") as tmpdir: - tmp = os.path.join(tmpdir, "file.npz") + with temppath(prefix="numpy_test_big_arrays_", suffix=".npz") as tmp: np.savez(tmp, a=a) del a npfile = np.load(tmp) - a = npfile['a'] + a = npfile['a'] # Should succeed npfile.close() + del a # Avoid pyflakes unused variable warning. def test_multiple_arrays(self): a = np.array([[1, 2], [3, 4]], float) @@ -216,7 +216,7 @@ class TestSavezLoad(RoundtripTest, TestCase): l = np.load(c) assert_equal(a, l['file_a']) assert_equal(b, l['file_b']) - + def test_BagObj(self): a = np.array([[1, 2], [3, 4]], float) b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex) @@ -233,16 +233,12 @@ class TestSavezLoad(RoundtripTest, TestCase): # and savez functions in multithreaded environment def writer(error_list): - fd, tmp = mkstemp(suffix='.npz') - os.close(fd) - try: + with temppath(suffix='.npz') as tmp: arr = np.random.randn(500, 500) try: np.savez(tmp, arr=arr) except OSError as err: error_list.append(err) - finally: - os.remove(tmp) errors = [] threads = [threading.Thread(target=writer, args=(errors,)) @@ -258,40 +254,27 @@ class TestSavezLoad(RoundtripTest, TestCase): def test_not_closing_opened_fid(self): # Test that issue #2178 is fixed: # verify could seek on 'loaded' file - - fd, tmp = mkstemp(suffix='.npz') - os.close(fd) - try: - fp = open(tmp, 'wb') - np.savez(fp, data='LOVELY LOAD') - fp.close() - - fp = open(tmp, 'rb', 10000) - fp.seek(0) - assert_(not fp.closed) - _ = np.load(fp)['data'] - assert_(not fp.closed) - # must not get closed by .load(opened fp) - fp.seek(0) - assert_(not fp.closed) - - finally: - fp.close() - os.remove(tmp) - + with temppath(suffix='.npz') as tmp: + with open(tmp, 'wb') as fp: + np.savez(fp, data='LOVELY LOAD') + with open(tmp, 'rb', 10000) as fp: + fp.seek(0) + assert_(not fp.closed) + np.load(fp)['data'] + # fp must not get closed by .load + assert_(not fp.closed) + fp.seek(0) + assert_(not fp.closed) + + @np.testing.dec.skipif(IS_PYPY, "context manager required on PyPy") def test_closing_fid(self): # Test that issue #1517 (too many opened files) remains closed # It might be a "weak" test since failed to get triggered on # e.g. Debian sid of 2012 Jul 05 but was reported to # trigger the failure on Ubuntu 10.04: # http://projects.scipy.org/numpy/ticket/1517#comment:2 - fd, tmp = mkstemp(suffix='.npz') - os.close(fd) - - try: - fp = open(tmp, 'wb') - np.savez(fp, data='LOVELY LOAD') - fp.close() + with temppath(suffix='.npz') as tmp: + np.savez(tmp, data='LOVELY LOAD') # We need to check if the garbage collector can properly close # numpy npz file returned by np.load when their reference count # goes to zero. Python 3 running in debug mode raises a @@ -299,24 +282,22 @@ class TestSavezLoad(RoundtripTest, TestCase): # collector, so we catch the warnings. Because ResourceWarning # is unknown in Python < 3.x, we take the easy way out and # catch all warnings. - with warnings.catch_warnings(): - warnings.simplefilter("ignore") + with suppress_warnings() as sup: + sup.filter(Warning) # TODO: specify exact message for i in range(1, 1025): try: np.load(tmp)["data"] except Exception as e: msg = "Failed to load data from a file: %s" % e raise AssertionError(msg) - finally: - os.remove(tmp) def test_closing_zipfile_after_load(self): - # Check that zipfile owns file and can close it. - # This needs to pass a file name to load for the - # test. - with tempdir(prefix="numpy_test_closing_zipfile_after_load_") as tmpdir: - fd, tmp = mkstemp(suffix='.npz', dir=tmpdir) - os.close(fd) + # Check that zipfile owns file and can close it. This needs to + # pass a file name to load for the test. On windows failure will + # cause a second error will be raised when the attempt to remove + # the open file is made. + prefix = 'numpy_test_closing_zipfile_after_load_' + with temppath(suffix='.npz', prefix=prefix) as tmp: np.savez(tmp, lab='place holder') data = np.load(tmp) fp = data.zip.fp @@ -390,9 +371,8 @@ class TestSaveTxt(TestCase): assert_raises(ValueError, np.savetxt, c, a, fmt=99) def test_header_footer(self): - """ - Test the functionality of the header and footer keyword argument. - """ + # Test the functionality of the header and footer keyword argument. + c = BytesIO() a = np.array([(1, 2), (3, 4)], dtype=np.int) test_header_footer = 'Test header / footer' @@ -425,15 +405,11 @@ class TestSaveTxt(TestCase): asbytes('1 2\n3 4\n' + commentstr + test_header_footer + '\n')) def test_file_roundtrip(self): - f, name = mkstemp() - os.close(f) - try: + with temppath() as name: a = np.array([(1, 2), (3, 4)]) np.savetxt(name, a) b = np.loadtxt(name) assert_array_equal(a, b) - finally: - os.unlink(name) def test_complex_arrays(self): ncols = 2 @@ -553,15 +529,49 @@ class TestLoadTxt(TestCase): a = np.array([[2, -999], [7, 9]], int) assert_array_equal(x, a) - def test_comments(self): + def test_comments_unicode(self): c = TextIO() c.write('# comment\n1,2,3,5\n') c.seek(0) x = np.loadtxt(c, dtype=int, delimiter=',', - comments='#') + comments=unicode('#')) + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + def test_comments_byte(self): + c = TextIO() + c.write('# comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments=b'#') + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + def test_comments_multiple(self): + c = TextIO() + c.write('# comment\n1,2,3\n@ comment2\n4,5,6 // comment3') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments=['#', '@', '//']) + a = np.array([[1, 2, 3], [4, 5, 6]], int) + assert_array_equal(x, a) + + def test_comments_multi_chars(self): + c = TextIO() + c.write('/* comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments='/*') a = np.array([1, 2, 3, 5], int) assert_array_equal(x, a) + # Check that '/*' is not transformed to ['/', '*'] + c = TextIO() + c.write('*/ comment\n1,2,3,5\n') + c.seek(0) + assert_raises(ValueError, np.loadtxt, c, dtype=int, delimiter=',', + comments='/*') + def test_skiprows(self): c = TextIO() c.write('comment\n1,2,3,5\n') @@ -599,6 +609,29 @@ class TestLoadTxt(TestCase): x = np.loadtxt(c, dtype=float, usecols=np.array([1, 2])) assert_array_equal(x, a[:, 1:]) + # Testing with an integer instead of a sequence + for int_type in [int, np.int8, np.int16, + np.int32, np.int64, np.uint8, np.uint16, + np.uint32, np.uint64]: + to_read = int_type(1) + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=to_read) + assert_array_equal(x, a[:, 1]) + + # Testing with some crazy custom integer type + class CrazyInt(object): + def __index__(self): + return 1 + + crazy_int = CrazyInt() + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=crazy_int) + assert_array_equal(x, a[:, 1]) + + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=(crazy_int,)) + assert_array_equal(x, a[:, 1]) + # Checking with dtypes defined converters. data = '''JOE 70.1 25.3 BOB 60.5 27.9 @@ -610,6 +643,21 @@ class TestLoadTxt(TestCase): assert_equal(arr['stid'], [b"JOE", b"BOB"]) assert_equal(arr['temp'], [25.3, 27.9]) + # Testing non-ints in usecols + c.seek(0) + bogus_idx = 1.5 + assert_raises_regex( + TypeError, + '^usecols must be.*%s' % type(bogus_idx), + np.loadtxt, c, usecols=bogus_idx + ) + + assert_raises_regex( + TypeError, + '^usecols must be.*%s' % type(bogus_idx), + np.loadtxt, c, usecols=[0, bogus_idx, 0] + ) + def test_fancy_dtype(self): c = TextIO() c.write('1,2,3.0\n4,5,6.0\n') @@ -639,9 +687,8 @@ class TestLoadTxt(TestCase): assert_array_equal(x, a) def test_empty_file(self): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", - message="loadtxt: Empty input file:") + with suppress_warnings() as sup: + sup.filter(message="loadtxt: Empty input file:") c = TextIO() x = np.loadtxt(c) assert_equal(x.shape, (0,)) @@ -663,9 +710,7 @@ class TestLoadTxt(TestCase): assert_array_equal(data, [33, 66]) def test_dtype_with_object(self): - "Test using an explicit dtype with an object" - from datetime import date - import time + # Test using an explicit dtype with an object data = """ 1; 2001-01-01 2; 2002-01-31 """ ndtype = [('idx', int), ('code', np.object)] @@ -694,16 +739,33 @@ class TestLoadTxt(TestCase): res = np.loadtxt(c, dtype=np.int64) assert_equal(res, tgt) - def test_universal_newline(self): - f, name = mkstemp() - os.write(f, b'1 21\r3 42\r') - os.close(f) + def test_from_float_hex(self): + # IEEE doubles and floats only, otherwise the float32 + # conversion may fail. + tgt = np.logspace(-10, 10, 5).astype(np.float32) + tgt = np.hstack((tgt, -tgt)).astype(np.float) + inp = '\n'.join(map(float.hex, tgt)) + c = TextIO() + c.write(inp) + for dt in [np.float, np.float32]: + c.seek(0) + res = np.loadtxt(c, dtype=dt) + assert_equal(res, tgt, err_msg="%s" % dt) + + def test_from_complex(self): + tgt = (complex(1, 1), complex(1, -1)) + c = TextIO() + c.write("%s %s" % tgt) + c.seek(0) + res = np.loadtxt(c, dtype=np.complex) + assert_equal(res, tgt) - try: + def test_universal_newline(self): + with temppath() as name: + with open(name, 'w') as f: + f.write('1 21\r3 42\r') data = np.loadtxt(name) - assert_array_equal(data, [[1, 21], [3, 42]]) - finally: - os.unlink(name) + assert_array_equal(data, [[1, 21], [3, 42]]) def test_empty_field_after_tab(self): c = TextIO() @@ -763,9 +825,8 @@ class TestLoadTxt(TestCase): assert_(x.shape == (3,)) # Test ndmin kw with empty file. - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", - message="loadtxt: Empty input file:") + with suppress_warnings() as sup: + sup.filter(message="loadtxt: Empty input file:") f = TextIO() assert_(np.loadtxt(f, ndmin=2).shape == (0, 1,)) assert_(np.loadtxt(f, ndmin=1).shape == (0,)) @@ -786,6 +847,14 @@ class TestLoadTxt(TestCase): # Check for exception and that exception contains line number assert_raises_regex(ValueError, "3", np.loadtxt, c) + def test_none_as_string(self): + # gh-5155, None should work as string when format demands it + c = TextIO() + c.write('100,foo,200\n300,None,400') + c.seek(0) + dt = np.dtype([('x', int), ('a', 'S10'), ('y', int)]) + np.loadtxt(c, delimiter=',', dtype=dt, comments=None) # Should succeed + class Testfromregex(TestCase): # np.fromregex expects files opened in binary mode. @@ -828,15 +897,13 @@ class Testfromregex(TestCase): class TestFromTxt(TestCase): # def test_record(self): - "Test w/ explicit dtype" + # Test w/ explicit dtype data = TextIO('1 2\n3 4') -# data.seek(0) test = np.ndfromtxt(data, dtype=[('x', np.int32), ('y', np.int32)]) control = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) assert_equal(test, control) # data = TextIO('M 64.0 75.0\nF 25.0 60.0') -# data.seek(0) descriptor = {'names': ('gender', 'age', 'weight'), 'formats': ('S1', 'i4', 'f4')} control = np.array([('M', 64.0, 75.0), ('F', 25.0, 60.0)], @@ -845,7 +912,7 @@ class TestFromTxt(TestCase): assert_equal(test, control) def test_array(self): - "Test outputing a standard ndarray" + # Test outputing a standard ndarray data = TextIO('1 2\n3 4') control = np.array([[1, 2], [3, 4]], dtype=int) test = np.ndfromtxt(data, dtype=int) @@ -857,7 +924,7 @@ class TestFromTxt(TestCase): assert_array_equal(test, control) def test_1D(self): - "Test squeezing to 1D" + # Test squeezing to 1D control = np.array([1, 2, 3, 4], int) # data = TextIO('1\n2\n3\n4\n') @@ -869,7 +936,7 @@ class TestFromTxt(TestCase): assert_array_equal(test, control) def test_comments(self): - "Test the stripping of comments" + # Test the stripping of comments control = np.array([1, 2, 3, 5], int) # Comment on its own line data = TextIO('# comment\n1,2,3,5\n') @@ -881,7 +948,7 @@ class TestFromTxt(TestCase): assert_equal(test, control) def test_skiprows(self): - "Test row skipping" + # Test row skipping control = np.array([1, 2, 3, 5], int) kwargs = dict(dtype=int, delimiter=',') # @@ -905,8 +972,8 @@ class TestFromTxt(TestCase): assert_equal(test, ctrl) def test_skip_footer_with_invalid(self): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore") + with suppress_warnings() as sup: + sup.filter(ConversionWarning) basestr = '1 1\n2 2\n3 3\n4 4\n5 \n6 \n7 \n' # Footer too small to get rid of all invalid values assert_raises(ValueError, np.genfromtxt, @@ -929,7 +996,7 @@ class TestFromTxt(TestCase): assert_equal(a, np.array([[1., 1.], [3., 3.], [4., 4.]])) def test_header(self): - "Test retrieving a header" + # Test retrieving a header data = TextIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0') test = np.ndfromtxt(data, dtype=None, names=True) control = {'gender': np.array([b'M', b'F']), @@ -940,7 +1007,7 @@ class TestFromTxt(TestCase): assert_equal(test['weight'], control['weight']) def test_auto_dtype(self): - "Test the automatic definition of the output dtype" + # Test the automatic definition of the output dtype data = TextIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False') test = np.ndfromtxt(data, dtype=None) control = [np.array([b'A', b'BCD']), @@ -953,14 +1020,14 @@ class TestFromTxt(TestCase): assert_equal(test['f%i' % i], ctrl) def test_auto_dtype_uniform(self): - "Tests whether the output dtype can be uniformized" + # Tests whether the output dtype can be uniformized data = TextIO('1 2 3 4\n5 6 7 8\n') test = np.ndfromtxt(data, dtype=None) control = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) assert_equal(test, control) def test_fancy_dtype(self): - "Check that a nested dtype isn't MIA" + # Check that a nested dtype isn't MIA data = TextIO('1,2,3.0\n4,5,6.0\n') fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) test = np.ndfromtxt(data, dtype=fancydtype, delimiter=',') @@ -968,7 +1035,7 @@ class TestFromTxt(TestCase): assert_equal(test, control) def test_names_overwrite(self): - "Test overwriting the names of the dtype" + # Test overwriting the names of the dtype descriptor = {'names': ('g', 'a', 'w'), 'formats': ('S1', 'i4', 'f4')} data = TextIO(b'M 64.0 75.0\nF 25.0 60.0') @@ -980,7 +1047,7 @@ class TestFromTxt(TestCase): assert_equal(test, control) def test_commented_header(self): - "Check that names can be retrieved even if the line is commented out." + # Check that names can be retrieved even if the line is commented out. data = TextIO(""" #gender age weight M 21 72.100000 @@ -1003,7 +1070,7 @@ M 33 21.99 assert_equal(test, ctrl) def test_autonames_and_usecols(self): - "Tests names and usecols" + # Tests names and usecols data = TextIO('A B C D\n aaaa 121 45 9.1') test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), names=True, dtype=None) @@ -1012,7 +1079,7 @@ M 33 21.99 assert_equal(test, control) def test_converters_with_usecols(self): - "Test the combination user-defined converters and usecol" + # Test the combination user-defined converters and usecol data = TextIO('1,2,3,,5\n6,7,8,9,10\n') test = np.ndfromtxt(data, dtype=int, delimiter=',', converters={3: lambda s: int(s or - 999)}, @@ -1021,7 +1088,7 @@ M 33 21.99 assert_equal(test, control) def test_converters_with_usecols_and_names(self): - "Tests names and usecols" + # Tests names and usecols data = TextIO('A B C D\n aaaa 121 45 9.1') test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), names=True, dtype=None, converters={'C': lambda s: 2 * int(s)}) @@ -1030,7 +1097,7 @@ M 33 21.99 assert_equal(test, control) def test_converters_cornercases(self): - "Test the conversion to datetime." + # Test the conversion to datetime. converter = { 'date': lambda s: strptime(s, '%Y-%m-%d %H:%M:%SZ')} data = TextIO('2009-02-03 12:00:00Z, 72214.0') @@ -1041,7 +1108,7 @@ M 33 21.99 assert_equal(test, control) def test_converters_cornercases2(self): - "Test the conversion to datetime64." + # Test the conversion to datetime64. converter = { 'date': lambda s: np.datetime64(strptime(s, '%Y-%m-%d %H:%M:%SZ'))} data = TextIO('2009-02-03 12:00:00Z, 72214.0') @@ -1052,7 +1119,7 @@ M 33 21.99 assert_equal(test, control) def test_unused_converter(self): - "Test whether unused converters are forgotten" + # Test whether unused converters are forgotten data = TextIO("1 21\n 3 42\n") test = np.ndfromtxt(data, usecols=(1,), converters={0: lambda s: int(s, 16)}) @@ -1077,7 +1144,7 @@ M 33 21.99 assert_raises(ConverterError, np.genfromtxt, s, **kwargs) def test_tricky_converter_bug1666(self): - "Test some corner case" + # Test some corner cases s = TextIO('q1,2\nq3,4') cnv = lambda s: float(s[1:]) test = np.genfromtxt(s, delimiter=',', converters={0: cnv}) @@ -1099,22 +1166,20 @@ M 33 21.99 def test_dtype_with_converters_and_usecols(self): dstr = "1,5,-1,1:1\n2,8,-1,1:n\n3,3,-2,m:n\n" dmap = {'1:1':0, '1:n':1, 'm:1':2, 'm:n':3} - dtyp = [('E1','i4'),('E2','i4'),('E3','i2'),('N', 'i1')] + dtyp = [('e1','i4'),('e2','i4'),('e3','i2'),('n', 'i1')] conv = {0: int, 1: int, 2: int, 3: lambda r: dmap[r.decode()]} test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',', names=None, converters=conv) control = np.rec.array([[1,5,-1,0], [2,8,-1,1], [3,3,-2,3]], dtype=dtyp) assert_equal(test, control) - dtyp = [('E1','i4'),('E2','i4'),('N', 'i1')] + dtyp = [('e1','i4'),('e2','i4'),('n', 'i1')] test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',', usecols=(0,1,3), names=None, converters=conv) control = np.rec.array([[1,5,0], [2,8,1], [3,3,3]], dtype=dtyp) assert_equal(test, control) def test_dtype_with_object(self): - "Test using an explicit dtype with an object" - from datetime import date - import time + # Test using an explicit dtype with an object data = """ 1; 2001-01-01 2; 2002-01-31 """ ndtype = [('idx', int), ('code', np.object)] @@ -1126,7 +1191,7 @@ M 33 21.99 [(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))], dtype=ndtype) assert_equal(test, control) - # + ndtype = [('nest', [('idx', int), ('code', np.object)])] try: test = np.genfromtxt(TextIO(data), delimiter=";", @@ -1138,7 +1203,7 @@ M 33 21.99 raise AssertionError(errmsg) def test_userconverters_with_explicit_dtype(self): - "Test user_converters w/ explicit (standard) dtype" + # Test user_converters w/ explicit (standard) dtype data = TextIO('skip,skip,2001-01-01,1.0,skip') test = np.genfromtxt(data, delimiter=",", names=None, dtype=float, usecols=(2, 3), converters={2: bytes}) @@ -1147,7 +1212,7 @@ M 33 21.99 assert_equal(test, control) def test_spacedelimiter(self): - "Test space delimiter" + # Test space delimiter data = TextIO("1 2 3 4 5\n6 7 8 9 10") test = np.ndfromtxt(data) control = np.array([[1., 2., 3., 4., 5.], @@ -1155,7 +1220,7 @@ M 33 21.99 assert_equal(test, control) def test_integer_delimiter(self): - "Test using an integer for delimiter" + # Test using an integer for delimiter data = " 1 2 3\n 4 5 67\n890123 4" test = np.genfromtxt(TextIO(data), delimiter=3) control = np.array([[1, 2, 3], [4, 5, 67], [890, 123, 4]]) @@ -1169,7 +1234,7 @@ M 33 21.99 assert_equal(test, control) def test_missing_with_tabs(self): - "Test w/ a delimiter tab" + # Test w/ a delimiter tab txt = "1\t2\t3\n\t2\t\n1\t\t3" test = np.genfromtxt(TextIO(txt), delimiter="\t", usemask=True,) @@ -1179,7 +1244,7 @@ M 33 21.99 assert_equal(test.mask, ctrl_m) def test_usecols(self): - "Test the selection of columns" + # Test the selection of columns # Select 1 column control = np.array([[1, 2], [3, 4]], float) data = TextIO() @@ -1200,7 +1265,7 @@ M 33 21.99 assert_equal(test, control[:, 1:]) def test_usecols_as_css(self): - "Test giving usecols with a comma-separated string" + # Test giving usecols with a comma-separated string data = "1 2 3\n4 5 6" test = np.genfromtxt(TextIO(data), names="a, b, c", usecols="a, c") @@ -1208,7 +1273,7 @@ M 33 21.99 assert_equal(test, ctrl) def test_usecols_with_structured_dtype(self): - "Test usecols with an explicit structured dtype" + # Test usecols with an explicit structured dtype data = TextIO("JOE 70.1 25.3\nBOB 60.5 27.9") names = ['stid', 'temp'] dtypes = ['S4', 'f8'] @@ -1218,12 +1283,12 @@ M 33 21.99 assert_equal(test['temp'], [25.3, 27.9]) def test_usecols_with_integer(self): - "Test usecols with an integer" + # Test usecols with an integer test = np.genfromtxt(TextIO(b"1 2 3\n4 5 6"), usecols=0) assert_equal(test, np.array([1., 4.])) def test_usecols_with_named_columns(self): - "Test usecols with named columns" + # Test usecols with named columns ctrl = np.array([(1, 3), (4, 6)], dtype=[('a', float), ('c', float)]) data = "1 2 3\n4 5 6" kwargs = dict(names="a, b, c") @@ -1234,16 +1299,15 @@ M 33 21.99 assert_equal(test, ctrl) def test_empty_file(self): - "Test that an empty file raises the proper warning." - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", - message="genfromtxt: Empty input file:") + # Test that an empty file raises the proper warning. + with suppress_warnings() as sup: + sup.filter(message="genfromtxt: Empty input file:") data = TextIO() test = np.genfromtxt(data) assert_equal(test, np.array([])) def test_fancy_dtype_alt(self): - "Check that a nested dtype isn't MIA" + # Check that a nested dtype isn't MIA data = TextIO('1,2,3.0\n4,5,6.0\n') fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) test = np.mafromtxt(data, dtype=fancydtype, delimiter=',') @@ -1309,7 +1373,7 @@ M 33 21.99 assert_equal(test, control) def test_user_filling_values(self): - "Test with missing and filling values" + # Test with missing and filling values ctrl = np.array([(0, 3), (4, -999)], dtype=[('a', int), ('b', int)]) data = "N/A, 2, 3\n4, ,???" kwargs = dict(delimiter=",", @@ -1347,7 +1411,7 @@ M 33 21.99 assert_equal(test.mask, control.mask) def test_with_masked_column_uniform(self): - "Test masked column" + # Test masked column data = TextIO('1 2 3\n4 5 6\n') test = np.genfromtxt(data, dtype=None, missing_values='2,5', usemask=True) @@ -1355,7 +1419,7 @@ M 33 21.99 assert_equal(test, control) def test_with_masked_column_various(self): - "Test masked column" + # Test masked column data = TextIO('True 2 3\nFalse 5 6\n') test = np.genfromtxt(data, dtype=None, missing_values='2,5', usemask=True) @@ -1365,7 +1429,7 @@ M 33 21.99 assert_equal(test, control) def test_invalid_raise(self): - "Test invalid raise" + # Test invalid raise data = ["1, 1, 1, 1, 1"] * 50 for i in range(5): data[10 * i] = "2, 2, 2, 2 2" @@ -1373,8 +1437,8 @@ M 33 21.99 mdata = TextIO("\n".join(data)) # kwargs = dict(delimiter=",", dtype=None, names=True) - # XXX: is there a better way to get the return value of the callable in - # assert_warns ? + # XXX: is there a better way to get the return value of the + # callable in assert_warns ? ret = {} def f(_ret={}): @@ -1389,7 +1453,7 @@ M 33 21.99 delimiter=",", names=True) def test_invalid_raise_with_usecols(self): - "Test invalid_raise with usecols" + # Test invalid_raise with usecols data = ["1, 1, 1, 1, 1"] * 50 for i in range(5): data[10 * i] = "2, 2, 2, 2 2" @@ -1397,8 +1461,8 @@ M 33 21.99 mdata = TextIO("\n".join(data)) kwargs = dict(delimiter=",", dtype=None, names=True, invalid_raise=False) - # XXX: is there a better way to get the return value of the callable in - # assert_warns ? + # XXX: is there a better way to get the return value of the + # callable in assert_warns ? ret = {} def f(_ret={}): @@ -1416,7 +1480,7 @@ M 33 21.99 assert_equal(mtest, control) def test_inconsistent_dtype(self): - "Test inconsistent dtype" + # Test inconsistent dtype data = ["1, 1, 1, 1, -1.1"] * 50 mdata = TextIO("\n".join(data)) @@ -1426,7 +1490,7 @@ M 33 21.99 assert_raises(ValueError, np.genfromtxt, mdata, **kwargs) def test_default_field_format(self): - "Test default format" + # Test default format data = "0, 1, 2.3\n4, 5, 6.7" mtest = np.ndfromtxt(TextIO(data), delimiter=",", dtype=None, defaultfmt="f%02i") @@ -1435,7 +1499,7 @@ M 33 21.99 assert_equal(mtest, ctrl) def test_single_dtype_wo_names(self): - "Test single dtype w/o names" + # Test single dtype w/o names data = "0, 1, 2.3\n4, 5, 6.7" mtest = np.ndfromtxt(TextIO(data), delimiter=",", dtype=float, defaultfmt="f%02i") @@ -1443,7 +1507,7 @@ M 33 21.99 assert_equal(mtest, ctrl) def test_single_dtype_w_explicit_names(self): - "Test single dtype w explicit names" + # Test single dtype w explicit names data = "0, 1, 2.3\n4, 5, 6.7" mtest = np.ndfromtxt(TextIO(data), delimiter=",", dtype=float, names="a, b, c") @@ -1452,7 +1516,7 @@ M 33 21.99 assert_equal(mtest, ctrl) def test_single_dtype_w_implicit_names(self): - "Test single dtype w implicit names" + # Test single dtype w implicit names data = "a, b, c\n0, 1, 2.3\n4, 5, 6.7" mtest = np.ndfromtxt(TextIO(data), delimiter=",", dtype=float, names=True) @@ -1461,7 +1525,7 @@ M 33 21.99 assert_equal(mtest, ctrl) def test_easy_structured_dtype(self): - "Test easy structured dtype" + # Test easy structured dtype data = "0, 1, 2.3\n4, 5, 6.7" mtest = np.ndfromtxt(TextIO(data), delimiter=",", dtype=(int, float, float), defaultfmt="f_%02i") @@ -1470,7 +1534,7 @@ M 33 21.99 assert_equal(mtest, ctrl) def test_autostrip(self): - "Test autostrip" + # Test autostrip data = "01/01/2003 , 1.3, abcde" kwargs = dict(delimiter=",", dtype=None) mtest = np.ndfromtxt(TextIO(data), **kwargs) @@ -1483,7 +1547,7 @@ M 33 21.99 assert_equal(mtest, ctrl) def test_replace_space(self): - "Test the 'replace_space' option" + # Test the 'replace_space' option txt = "A.A, B (B), C:C\n1, 2, 3.14" # Test default: replace ' ' by '_' and delete non-alphanum chars test = np.genfromtxt(TextIO(txt), @@ -1506,8 +1570,32 @@ M 33 21.99 ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) assert_equal(test, ctrl) + def test_replace_space_known_dtype(self): + # Test the 'replace_space' (and related) options when dtype != None + txt = "A.A, B (B), C:C\n1, 2, 3" + # Test default: replace ' ' by '_' and delete non-alphanum chars + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=int) + ctrl_dtype = [("AA", int), ("B_B", int), ("CC", int)] + ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) + assert_equal(test, ctrl) + # Test: no replace, no delete + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=int, + replace_space='', deletechars='') + ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", int)] + ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) + assert_equal(test, ctrl) + # Test: no delete (spaces are replaced by _) + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=int, + deletechars='') + ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", int)] + ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) + assert_equal(test, ctrl) + def test_incomplete_names(self): - "Test w/ incomplete names" + # Test w/ incomplete names data = "A,,C\n0,1,2\n3,4,5" kwargs = dict(delimiter=",", names=True) # w/ dtype=None @@ -1521,7 +1609,7 @@ M 33 21.99 test = np.ndfromtxt(TextIO(data), **kwargs) def test_names_auto_completion(self): - "Make sure that names are properly completed" + # Make sure that names are properly completed data = "1 2 3\n 4 5 6" test = np.genfromtxt(TextIO(data), dtype=(int, float, int), names="a") @@ -1530,7 +1618,7 @@ M 33 21.99 assert_equal(test, ctrl) def test_names_with_usecols_bug1636(self): - "Make sure we pick up the right names w/ usecols" + # Make sure we pick up the right names w/ usecols data = "A,B,C,D,E\n0,1,2,3,4\n0,1,2,3,4\n0,1,2,3,4" ctrl_names = ("A", "C", "E") test = np.genfromtxt(TextIO(data), @@ -1549,7 +1637,7 @@ M 33 21.99 assert_equal(test.dtype.names, ctrl_names) def test_fixed_width_names(self): - "Test fix-width w/ names" + # Test fix-width w/ names data = " A B C\n 0 1 2.3\n 45 67 9." kwargs = dict(delimiter=(5, 5, 4), names=True, dtype=None) ctrl = np.array([(0, 1, 2.3), (45, 67, 9.)], @@ -1564,7 +1652,7 @@ M 33 21.99 assert_equal(test, ctrl) def test_filling_values(self): - "Test missing values" + # Test missing values data = b"1, 2, 3\n1, , 5\n0, 6, \n" kwargs = dict(delimiter=",", dtype=None, filling_values=-999) ctrl = np.array([[1, 2, 3], [1, -999, 5], [0, 6, -999]], dtype=int) @@ -1633,9 +1721,64 @@ M 33 21.99 self.assertTrue(isinstance(test, np.recarray)) assert_equal(test, control) + def test_max_rows(self): + # Test the `max_rows` keyword argument. + data = '1 2\n3 4\n5 6\n7 8\n9 10\n' + txt = TextIO(data) + a1 = np.genfromtxt(txt, max_rows=3) + a2 = np.genfromtxt(txt) + assert_equal(a1, [[1, 2], [3, 4], [5, 6]]) + assert_equal(a2, [[7, 8], [9, 10]]) + + # max_rows must be at least 1. + assert_raises(ValueError, np.genfromtxt, TextIO(data), max_rows=0) + + # An input with several invalid rows. + data = '1 1\n2 2\n0 \n3 3\n4 4\n5 \n6 \n7 \n' + + test = np.genfromtxt(TextIO(data), max_rows=2) + control = np.array([[1., 1.], [2., 2.]]) + assert_equal(test, control) + + # Test keywords conflict + assert_raises(ValueError, np.genfromtxt, TextIO(data), skip_footer=1, + max_rows=4) + + # Test with invalid value + assert_raises(ValueError, np.genfromtxt, TextIO(data), max_rows=4) + + # Test with invalid not raise + with suppress_warnings() as sup: + sup.filter(ConversionWarning) + + test = np.genfromtxt(TextIO(data), max_rows=4, invalid_raise=False) + control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]) + assert_equal(test, control) + + test = np.genfromtxt(TextIO(data), max_rows=5, invalid_raise=False) + control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]) + assert_equal(test, control) + + # Structured array with field names. + data = 'a b\n#c d\n1 1\n2 2\n#0 \n3 3\n4 4\n5 5\n' + + # Test with header, names and comments + txt = TextIO(data) + test = np.genfromtxt(txt, skip_header=1, max_rows=3, names=True) + control = np.array([(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)], + dtype=[('c', '<f8'), ('d', '<f8')]) + assert_equal(test, control) + # To continue reading the same "file", don't use skip_header or + # names, and use the previously determined dtype. + test = np.genfromtxt(txt, max_rows=None, dtype=test.dtype) + control = np.array([(4.0, 4.0), (5.0, 5.0)], + dtype=[('c', '<f8'), ('d', '<f8')]) + assert_equal(test, control) + def test_gft_using_filename(self): - # Test that we can load data from a filename as well as a file object - wanted = np.arange(6).reshape((2, 3)) + # Test that we can load data from a filename as well as a file + # object + tgt = np.arange(6).reshape((2, 3)) if sys.version_info[0] >= 3: # python 3k is known to fail for '\r' linesep = ('\n', '\r\n') @@ -1644,15 +1787,11 @@ M 33 21.99 for sep in linesep: data = '0 1 2' + sep + '3 4 5' - f, name = mkstemp() - # We can't use NamedTemporaryFile on windows, because we cannot - # reopen the file. - try: - os.write(f, asbytes(data)) - assert_array_equal(np.genfromtxt(name), wanted) - finally: - os.close(f) - os.unlink(name) + with temppath() as name: + with open(name, 'w') as f: + f.write(data) + res = np.genfromtxt(name) + assert_array_equal(res, tgt) def test_gft_using_generator(self): # gft doesn't work with unicode. @@ -1663,6 +1802,133 @@ M 33 21.99 res = np.genfromtxt(count()) assert_array_equal(res, np.arange(10)) + def test_auto_dtype_largeint(self): + # Regression test for numpy/numpy#5635 whereby large integers could + # cause OverflowErrors. + + # Test the automatic definition of the output dtype + # + # 2**66 = 73786976294838206464 => should convert to float + # 2**34 = 17179869184 => should convert to int64 + # 2**10 = 1024 => should convert to int (int32 on 32-bit systems, + # int64 on 64-bit systems) + + data = TextIO('73786976294838206464 17179869184 1024') + + test = np.ndfromtxt(data, dtype=None) + + assert_equal(test.dtype.names, ['f0', 'f1', 'f2']) + + assert_(test.dtype['f0'] == np.float) + assert_(test.dtype['f1'] == np.int64) + assert_(test.dtype['f2'] == np.integer) + + assert_allclose(test['f0'], 73786976294838206464.) + assert_equal(test['f1'], 17179869184) + assert_equal(test['f2'], 1024) + + +class TestPathUsage(TestCase): + # Test that pathlib.Path can be used + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_loadtxt(self): + with temppath(suffix='.txt') as path: + path = Path(path) + a = np.array([[1.1, 2], [3, 4]]) + np.savetxt(path, a) + x = np.loadtxt(path) + assert_array_equal(x, a) + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_save_load(self): + # Test that pathlib.Path instances can be used with savez. + with temppath(suffix='.npy') as path: + path = Path(path) + a = np.array([[1, 2], [3, 4]], int) + np.save(path, a) + data = np.load(path) + assert_array_equal(data, a) + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_savez_load(self): + # Test that pathlib.Path instances can be used with savez. + with temppath(suffix='.npz') as path: + path = Path(path) + np.savez(path, lab='place holder') + with np.load(path) as data: + assert_array_equal(data['lab'], 'place holder') + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_savez_compressed_load(self): + # Test that pathlib.Path instances can be used with savez. + with temppath(suffix='.npz') as path: + path = Path(path) + np.savez_compressed(path, lab='place holder') + data = np.load(path) + assert_array_equal(data['lab'], 'place holder') + data.close() + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_genfromtxt(self): + with temppath(suffix='.txt') as path: + path = Path(path) + a = np.array([(1, 2), (3, 4)]) + np.savetxt(path, a) + data = np.genfromtxt(path) + assert_array_equal(a, data) + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_ndfromtxt(self): + # Test outputing a standard ndarray + with temppath(suffix='.txt') as path: + path = Path(path) + with path.open('w') as f: + f.write(u'1 2\n3 4') + + control = np.array([[1, 2], [3, 4]], dtype=int) + test = np.ndfromtxt(path, dtype=int) + assert_array_equal(test, control) + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_mafromtxt(self): + # From `test_fancy_dtype_alt` above + with temppath(suffix='.txt') as path: + path = Path(path) + with path.open('w') as f: + f.write(u'1,2,3.0\n4,5,6.0\n') + + test = np.mafromtxt(path, delimiter=',') + control = ma.array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)]) + assert_equal(test, control) + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_recfromtxt(self): + with temppath(suffix='.txt') as path: + path = Path(path) + with path.open('w') as f: + f.write(u'A,B\n0,1\n2,3') + + kwargs = dict(delimiter=",", missing_values="N/A", names=True) + test = np.recfromtxt(path, **kwargs) + control = np.array([(0, 1), (2, 3)], + dtype=[('A', np.int), ('B', np.int)]) + self.assertTrue(isinstance(test, np.recarray)) + assert_equal(test, control) + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_recfromcsv(self): + with temppath(suffix='.txt') as path: + path = Path(path) + with path.open('w') as f: + f.write(u'A,B\n0,1\n2,3') + + kwargs = dict(missing_values="N/A", names=True, case_sensitive=True) + test = np.recfromcsv(path, dtype=None, **kwargs) + control = np.array([(0, 1), (2, 3)], + dtype=[('A', np.int), ('B', np.int)]) + self.assertTrue(isinstance(test, np.recarray)) + assert_equal(test, control) + def test_gzip_load(): a = np.random.random((5, 5)) @@ -1688,16 +1954,15 @@ def test_gzip_loadtxt(): g = gzip.GzipFile(fileobj=s, mode='w') g.write(b'1 2 3\n') g.close() + s.seek(0) + with temppath(suffix='.gz') as name: + with open(name, 'wb') as f: + f.write(s.read()) + res = np.loadtxt(name) + s.close() - f, name = mkstemp(suffix='.gz') - try: - os.write(f, s.read()) - s.close() - assert_array_equal(np.loadtxt(name), [1, 2, 3]) - finally: - os.close(f) - os.unlink(name) + assert_array_equal(res, [1, 2, 3]) def test_gzip_loadtxt_from_string(): @@ -1746,12 +2011,17 @@ def test_load_refcount(): np.savez(f, [1, 2, 3]) f.seek(0) - gc.collect() - n_before = len(gc.get_objects()) - np.load(f) - n_after = len(gc.get_objects()) - - assert_equal(n_before, n_after) + assert_(gc.isenabled()) + gc.disable() + try: + gc.collect() + np.load(f) + # gc.collect returns the number of unreachable objects in cycles that + # were found -- we are checking that no cycles were created by np.load + n_objects_in_cycles = gc.collect() + finally: + gc.enable() + assert_equal(n_objects_in_cycles, 0) if __name__ == "__main__": run_module_suite() |