summaryrefslogtreecommitdiff
path: root/numpy/lib/tests/test_io.py
diff options
context:
space:
mode:
authorRoss Barnowski <rossbar@berkeley.edu>2022-01-06 14:51:41 -0800
committerSebastian Berg <sebastian@sipsolutions.net>2022-01-14 20:07:07 -0600
commit66a61b03658f3c9f312505dcf7eab07e4cf91ac6 (patch)
treea0532017a98ef007eadbeb2ea0f78e10c6bb56ab /numpy/lib/tests/test_io.py
parent3f2b8d38805d082459d5fc8cfd747291c5ed32d2 (diff)
downloadnumpy-66a61b03658f3c9f312505dcf7eab07e4cf91ac6.tar.gz
Port over tests from npreadtext test suite
- Add test for parsing scientific notation. - Add multiple-char comment test. - Port over tests for structured dtypes. - Add tests for exceptions on skiprows/max_rows. - port over ndmin tests. - Make structured data reusable, add unpack tests. - Port over delimiter tests. - Port over maxrows test w/ various dtypes. - Port over test of exception msg on parse failure. - Port over test for converters w/neg indices. - Port over usecols tests - Port over unicode tests. - Port over more converter tests. - Port over test for large rows. - Port over test for string-len discovery. - Port over float conversion accuracy test. - Port over bool test. - Add test for implicit float->int conversion. - Port over complex parsing tests. - Port over tests for reading from generator. - Port over object cleanup test. - Port over bytes incompat test. - Port over converters tests. Co-authored-by: Warren Weckesser <warren.weckesser@gmail.com> Co-authored-by: Sebastian Berg <sebastian@sipsolutions.net>
Diffstat (limited to 'numpy/lib/tests/test_io.py')
-rw-r--r--numpy/lib/tests/test_io.py484
1 files changed, 484 insertions, 0 deletions
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index a3f2ec411..1884349db 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -2396,6 +2396,13 @@ M 33 21.99
assert_equal(test['f1'], 17179869184)
assert_equal(test['f2'], 1024)
+ def test_unpack_float_data(self):
+ txt = TextIO("1,2,3\n4,5,6\n7,8,9\n0.0,1.0,2.0")
+ a, b, c = np.loadtxt(txt, delimiter=",", unpack=True)
+ assert_array_equal(a, np.array([1.0, 4.0, 7.0, 0.0]))
+ assert_array_equal(b, np.array([2.0, 5.0, 8.0, 1.0]))
+ assert_array_equal(c, np.array([3.0, 6.0, 9.0, 2.0]))
+
def test_unpack_structured(self):
# Regression test for gh-4341
# Unpacking should work on structured arrays
@@ -2682,3 +2689,480 @@ def test_load_refcount():
with assert_no_gc_cycles():
x = np.loadtxt(TextIO("0 1 2 3"), dtype=dt)
assert_equal(x, np.array([((0, 1), (2, 3))], dtype=dt))
+
+
+def test_loadtxt_scientific_notation():
+ """Test that both 'e' and 'E' are parsed correctly."""
+ data = TextIO(
+ (
+ "1.0e-1,2.0E1,3.0\n"
+ "4.0e-2,5.0E-1,6.0\n"
+ "7.0e-3,8.0E1,9.0\n"
+ "0.0e-4,1.0E-1,2.0"
+ )
+ )
+ expected = np.array(
+ [[0.1, 20., 3.0], [0.04, 0.5, 6], [0.007, 80., 9], [0, 0.1, 2]]
+ )
+ assert_array_equal(np.loadtxt(data, delimiter=","), expected)
+
+
+@pytest.mark.parametrize("comment", ["..", "//", "@-", "this is a comment:"])
+def test_loadtxt_comment_multiple_chars(comment):
+ content = "# IGNORE\n1.5, 2.5# ABC\n3.0,4.0# XXX\n5.5,6.0\n"
+ txt = TextIO(content.replace("#", comment))
+ a = np.loadtxt(txt, delimiter=",", comments=comment)
+ assert_equal(a, [[1.5, 2.5], [3.0, 4.0], [5.5, 6.0]])
+
+
+@pytest.fixture
+def mixed_types_structured():
+ """
+ Fixture providing hetergeneous input data with a structured dtype, along
+ with the associated structured array.
+ """
+ data = TextIO(
+ (
+ "1000;2.4;alpha;-34\n"
+ "2000;3.1;beta;29\n"
+ "3500;9.9;gamma;120\n"
+ "4090;8.1;delta;0\n"
+ "5001;4.4;epsilon;-99\n"
+ "6543;7.8;omega;-1\n"
+ )
+ )
+ dtype = np.dtype(
+ [('f0', np.uint16), ('f1', np.float64), ('f2', 'S7'), ('f3', np.int8)]
+ )
+ expected = np.array(
+ [
+ (1000, 2.4, "alpha", -34),
+ (2000, 3.1, "beta", 29),
+ (3500, 9.9, "gamma", 120),
+ (4090, 8.1, "delta", 0),
+ (5001, 4.4, "epsilon", -99),
+ (6543, 7.8, "omega", -1)
+ ],
+ dtype=dtype
+ )
+ return data, dtype, expected
+
+
+@pytest.mark.parametrize('skiprows', [0, 1, 2, 3])
+def test_loadtxt_structured_dtype_and_skiprows_no_empty_lines(
+ skiprows, mixed_types_structured
+ ):
+ data, dtype, expected = mixed_types_structured
+ a = np.loadtxt(data, dtype=dtype, delimiter=";", skiprows=skiprows)
+ assert_array_equal(a, expected[skiprows:])
+
+
+def test_loadtxt_unpack_structured(mixed_types_structured):
+ data, dtype, expected = mixed_types_structured
+
+ a, b, c, d = np.loadtxt(data, dtype=dtype, delimiter=";", unpack=True)
+ assert_array_equal(a, expected["f0"])
+ assert_array_equal(b, expected["f1"])
+ assert_array_equal(c, expected["f2"])
+ assert_array_equal(d, expected["f3"])
+
+
+def test_loadtxt_structured_dtype_with_shape():
+ dtype = np.dtype([("a", "u1", 2), ("b", "u1", 2)])
+ data = TextIO("0,1,2,3\n6,7,8,9\n")
+ expected = np.array([((0, 1), (2, 3)), ((6, 7), (8, 9))], dtype=dtype)
+ assert_array_equal(np.loadtxt(data, delimiter=",", dtype=dtype), expected)
+
+
+def test_loadtxt_structured_dtype_with_multi_shape():
+ dtype = np.dtype([("a", "u1", (2, 2))])
+ data = TextIO("0 1 2 3\n")
+ expected = np.array([(((0, 1), (2, 3)),)], dtype=dtype)
+ assert_array_equal(np.loadtxt(data, dtype=dtype), expected)
+
+
+def test_loadtxt_nested_structured_subarray():
+ # Test from gh-16678
+ point = np.dtype([('x', float), ('y', float)])
+ dt = np.dtype([('code', int), ('points', point, (2,))])
+ data = TextIO("100,1,2,3,4\n200,5,6,7,8\n")
+ expected = np.array(
+ [
+ (100, [(1., 2.), (3., 4.)]),
+ (200, [(5., 6.), (7., 8.)]),
+ ],
+ dtype=dt
+ )
+ assert_array_equal(np.loadtxt(data, dtype=dt, delimiter=","), expected)
+
+
+def test_loadtxt_structured_dtype_offsets():
+ # An aligned structured dtype will have additional padding
+ dt = np.dtype("i1, i4, i1, i4, i1, i4", align=True)
+ data = TextIO("1,2,3,4,5,6\n7,8,9,10,11,12\n")
+ expected = np.array([(1, 2, 3, 4, 5, 6), (7, 8, 9, 10, 11, 12)], dtype=dt)
+ assert_array_equal(np.loadtxt(data, delimiter=",", dtype=dt), expected)
+
+
+@pytest.mark.parametrize("param", ("skiprows", "max_rows"))
+def test_loadtxt_exception_negative_row_limits(param):
+ """skiprows and max_rows should raise for negative parameters."""
+ with pytest.raises(ValueError, match="argument must be nonnegative"):
+ np.loadtxt("foo.bar", **{param: -3})
+
+
+@pytest.mark.parametrize("param", ("skiprows", "max_rows"))
+def test_loadtxt_exception_noninteger_row_limits(param):
+ with pytest.raises(TypeError, match="argument must be an integer"):
+ np.loadtxt("foo.bar", **{param: 1.0})
+
+
+@pytest.mark.parametrize(
+ "data, shape",
+ [
+ ("1 2 3 4 5\n", (1, 5)), # Single row
+ ("1\n2\n3\n4\n5\n", (5, 1)), # Single column
+ ]
+)
+def test_loadtxt_ndmin_single_row_or_col(data, shape):
+ arr = np.array([1, 2, 3, 4, 5])
+ arr2d = arr.reshape(shape)
+
+ assert_array_equal(np.loadtxt(TextIO(data), dtype=int), arr)
+ assert_array_equal(np.loadtxt(TextIO(data), dtype=int, ndmin=0), arr)
+ assert_array_equal(np.loadtxt(TextIO(data), dtype=int, ndmin=1), arr)
+ assert_array_equal(np.loadtxt(TextIO(data), dtype=int, ndmin=2), arr2d)
+
+
+@pytest.mark.parametrize("badval", [-1, 3, None, "plate of shrimp"])
+def test_loadtxt_bad_ndmin(badval):
+ with pytest.raises(ValueError, match="Illegal value of ndmin keyword"):
+ np.loadtxt("foo.bar", ndmin=badval)
+
+
+@pytest.mark.parametrize(
+ "ws",
+ (
+ "\t", # tab
+ "\u2003", # em
+ "\u00A0", # non-break
+ "\u3000", # ideographic space
+ )
+)
+def test_loadtxt_blank_lines_spaces_delimit(ws):
+ txt = StringIO(
+ f"1 2{ws}30\n\n4 5 60\n {ws} \n7 8 {ws} 90\n # comment\n3 2 1"
+ )
+ # NOTE: It is unclear that the ` # comment` should succeed. Except
+ # for delimiter=None, which should use any whitespace (and maybe
+ # should just be implemented closer to Python
+ expected = np.array([[1, 2, 30], [4, 5, 60], [7, 8, 90], [3, 2, 1]])
+ assert_equal(
+ np.loadtxt(txt, dtype=int, delimiter='', comments="#"), expected
+ )
+
+
+def test_loadtxt_blank_lines_normal_delimiter():
+ txt = StringIO('1,2,30\n\n4,5,60\n\n7,8,90\n# comment\n3,2,1')
+ expected = np.array([[1, 2, 30], [4, 5, 60], [7, 8, 90], [3, 2, 1]])
+ assert_equal(
+ np.loadtxt(txt, dtype=int, delimiter=',', comments="#"), expected
+ )
+
+
+@pytest.mark.parametrize("dtype", (float, object))
+def test_loadtxt_maxrows_no_blank_lines(dtype):
+ txt = TextIO("1.5,2.5\n3.0,4.0\n5.5,6.0")
+ res = np.loadtxt(txt, dtype=dtype, delimiter=",", max_rows=2)
+ assert_equal(res.dtype, dtype)
+ assert_equal(res, np.array([["1.5", "2.5"], ["3.0", "4.0"]], dtype=dtype))
+
+
+@pytest.mark.parametrize("dtype", (np.dtype("f8"), np.dtype("i2")))
+def test_loadtxt_exception_message_bad_values(dtype):
+ txt = TextIO("1.0,2.0\n3.0,XXX\n5.5,6.0")
+ msg = f"could not convert string .XXX. to {dtype} at row 1, column 2"
+ with pytest.raises(ValueError, match=msg):
+ np.loadtxt(txt, dtype=dtype, delimiter=",")
+
+
+def test_loadtxt_converters_negative_indices():
+ txt = TextIO('1.5,2.5\n3.0,XXX\n5.5,6.0')
+ conv = {-1: lambda s: np.nan if s == 'XXX' else float(s)}
+ expected = np.array([[1.5, 2.5], [3.0, np.nan], [5.5, 6.0]])
+ res = np.loadtxt(
+ txt, dtype=np.float64, delimiter=",", converters=conv, encoding=None
+ )
+ assert_equal(res, expected)
+
+
+def test_loadtxt_converters_negative_indices_with_usecols():
+ txt = TextIO('1.5,2.5,3.5\n3.0,4.0,XXX\n5.5,6.0,7.5\n')
+ conv = {-1: lambda s: np.nan if s == 'XXX' else float(s)}
+ expected = np.array([[1.5, 3.5], [3.0, np.nan], [5.5, 7.5]])
+ res = np.loadtxt(
+ txt,
+ dtype=np.float64,
+ delimiter=",",
+ converters=conv,
+ usecols=[0, -1],
+ encoding=None,
+ )
+ assert_equal(res, expected)
+
+
+def test_loadtxt_ragged_usecols():
+ # usecols, and negative ones, work even with varying number of columns.
+ txt = TextIO("0,0,XXX\n0,XXX,0,XXX\n0,XXX,XXX,0,XXX\n")
+ expected = np.array([[0, 0], [0, 0], [0, 0]])
+ res = np.loadtxt(txt, dtype=float, delimiter=",", usecols=[0, -2])
+ assert_equal(res, expected)
+
+
+def test_loadtxt_empty_usecols():
+ txt = TextIO("0,0,XXX\n0,XXX,0,XXX\n0,XXX,XXX,0,XXX\n")
+ res = np.loadtxt(txt, dtype=np.dtype([]), delimiter=",", usecols=[])
+ assert res.shape == (3,)
+ assert res.dtype == np.dtype([])
+
+
+@pytest.mark.parametrize("c1", ["a", "の", "🫕"])
+@pytest.mark.parametrize("c2", ["a", "の", "🫕"])
+def test_loadtxt_large_unicode_characters(c1, c2):
+ # c1 and c2 span ascii, 16bit and 32bit range.
+ txt = StringIO(f"a,{c1},c,1.0\ne,{c2},2.0,g")
+ res = np.loadtxt(txt, dtype=np.dtype('U12'), delimiter=",")
+ expected = np.array(
+ [f"a,{c1},c,1.0".split(","), f"e,{c2},2.0,g".split(",")],
+ dtype=np.dtype('U12')
+ )
+ assert_equal(res, expected)
+
+
+def test_loadtxt_unicode_with_converter():
+ txt = StringIO("cat,dog\nαβγ,δεζ\nabc,def\n")
+ conv = {0: lambda s: s.upper()}
+ res = np.loadtxt(
+ txt,
+ dtype=np.dtype("U12"),
+ converters=conv,
+ delimiter=",",
+ encoding=None
+ )
+ expected = np.array([['CAT', 'dog'], ['ΑΒΓ', 'δεζ'], ['ABC', 'def']])
+ assert_equal(res, expected)
+
+
+def test_loadtxt_converter_with_structured_dtype():
+ txt = TextIO('1.5,2.5,Abc\n3.0,4.0,dEf\n5.5,6.0,ghI\n')
+ dt = np.dtype([('m', np.int32), ('r', np.float32), ('code', 'U8')])
+ conv = {0: lambda s: int(10*float(s)), -1: lambda s: s.upper()}
+ res = np.loadtxt(txt, dtype=dt, delimiter=",", converters=conv)
+ expected = np.array(
+ [(15, 2.5, 'ABC'), (30, 4.0, 'DEF'), (55, 6.0, 'GHI')], dtype=dt
+ )
+ assert_equal(res, expected)
+
+
+def test_loadtxt_converter_with_unicode_dtype():
+ """
+ With the default 'bytes' encoding, tokens are encoded prior to being passed
+ to the converter. This means that the output of the converter may be bytes
+ instead of unicode as expected by `read_rows`.
+
+ This test checks that outputs from the above scenario are properly decoded
+ prior to parsing by `read_rows`.
+ """
+ txt = StringIO('abc,def\nrst,xyz')
+ conv = bytes.upper
+ res = np.loadtxt(txt, dtype=np.dtype("U3"), converters=conv, delimiter=",")
+ expected = np.array([['ABC', 'DEF'], ['RST', 'XYZ']])
+ assert_equal(res, expected)
+
+
+def test_loadtxt_read_huge_row():
+ row = "1.5, 2.5," * 50000
+ row = row[:-1] + "\n"
+ txt = TextIO(row * 2)
+ res = np.loadtxt(txt, delimiter=",", dtype=float)
+ assert_equal(res, np.tile([1.5, 2.5], (2, 50000)))
+
+
+@pytest.mark.parametrize(
+ ("given_dtype", "expected_dtype"),
+ [
+ ("S", np.dtype("S5")),
+ ("U", np.dtype("U5")),
+ ],
+)
+def test_loadtxt_string_no_length_given(given_dtype, expected_dtype):
+ """
+ The given dtype is just 'S' or 'U' with no length. In these cases, the
+ length of the resulting dtype is determined by the longest string found
+ in the file.
+ """
+ txt = TextIO("AAA,5-1\nBBBBB,0-3\nC,4-9\n")
+ res = np.loadtxt(txt, dtype=given_dtype, delimiter=",")
+ expected = np.array(
+ [['AAA', '5-1'], ['BBBBB', '0-3'], ['C', '4-9']], dtype=expected_dtype
+ )
+ assert_equal(res, expected)
+ assert_equal(res.dtype, expected_dtype)
+
+
+def test_loadtxt_float_conversion():
+ """
+ Some tests that the conversion to float64 works as accurately as the Python
+ built-in `float` function. In a naive version of the float parser, these
+ strings resulted in values that were off by an ULP or two.
+ """
+ strings = [
+ '0.9999999999999999',
+ '9876543210.123456',
+ '5.43215432154321e+300',
+ '0.901',
+ '0.333',
+ ]
+ txt = TextIO('\n'.join(strings))
+ res = np.loadtxt(txt)
+ expected = np.array([float(s) for s in strings])
+ assert_equal(res, expected)
+
+
+def test_loadtxt_bool():
+ # Simple test for bool via integer
+ txt = TextIO("1, 0\n10, -1")
+ res = np.loadtxt(txt, dtype=bool, delimiter=",")
+ assert res.dtype == bool
+ assert_array_equal(res, [[True, False], [True, True]])
+ # Make sure we use only 1 and 0 on the byte level:
+ assert_array_equal(res.view(np.uint8), [[1, 0], [1, 1]])
+
+
+@pytest.mark.parametrize(
+ "dtype",
+ (
+ np.int8,
+ np.int16,
+ np.int32,
+ np.int64,
+ np.uint8,
+ np.uint16,
+ np.uint32,
+ np.uint64,
+ )
+)
+def test_loadtxt_implicit_cast_float_to_int(dtype):
+ """
+ Currently the parser_config flag `allow_flot_for_int` is hardcoded to be
+ true. This means that if the parsing of an integer value fails, the code
+ will attempt to parse it as a float, then cast the float value to an
+ integer. This flag is only used when an explicit dtype is given.
+ """
+ txt = TextIO("1.0, 2.1, 3.7\n4, 5, 6")
+ res = np.loadtxt(txt, dtype=dtype, delimiter=",")
+ expected = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype)
+ assert_equal(res, expected)
+
+
+@pytest.mark.parametrize("dtype", (np.complex64, np.complex128))
+@pytest.mark.parametrize("with_parens", (False, True))
+def test_loadtxt_complex_parsing(dtype, with_parens):
+ s = "(1.0-2.5j),3.75,(7+-5.0j)\n(4),(-19e2j),(0)"
+ if not with_parens:
+ s = s.replace("(", "").replace(")", "")
+
+ res = np.loadtxt(TextIO(s), dtype=dtype, delimiter=",")
+ expected = np.array(
+ [[1.0-2.5j, 3.75, 7-5j], [4.0, -1900j, 0]], dtype=dtype
+ )
+ assert_equal(res, expected)
+
+
+def test_loadtxt_read_from_generator():
+ def gen():
+ for i in range(4):
+ yield f"{i},{2*i},{i**2}"
+
+ res = np.loadtxt(gen(), dtype=int, delimiter=",")
+ expected = np.array([[0, 0, 0], [1, 2, 1], [2, 4, 4], [3, 6, 9]])
+ assert_equal(res, expected)
+
+
+def test_loadtxt_read_from_generator_multitype():
+ def gen():
+ for i in range(3):
+ yield f"{i} {i / 4}"
+
+ res = np.loadtxt(gen(), dtype="i, d", delimiter=" ")
+ expected = np.array([(0, 0.0), (1, 0.25), (2, 0.5)], dtype="i, d")
+ assert_equal(res, expected)
+
+
+def test_loadtxt_read_from_bad_generator():
+ def gen():
+ for entry in ["1,2", b"3, 5", 12738]:
+ yield entry
+
+ with pytest.raises(
+ TypeError, match=r"non-string returned while reading data"
+ ):
+ np.loadtxt(gen(), dtype="i, i", delimiter=",")
+
+
+@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
+def test_loadtxt_object_cleanup_on_read_error():
+ sentinel = object()
+
+ already_read = 0
+ def conv(x):
+ nonlocal already_read
+ if already_read > 4999:
+ raise ValueError("failed half-way through!")
+ already_read += 1
+ return sentinel
+
+ txt = TextIO("x\n" * 10000)
+
+ with pytest.raises(ValueError, match="at row 5000, column 1"):
+ np.loadtxt(txt, dtype=object, converters={0: conv})
+
+ assert sys.getrefcount(sentinel) == 2
+
+
+def test_loadtxt_character_not_bytes_compatible():
+ """Test exception when a character cannot be encoded as 'S'."""
+ data = StringIO("–") # == \u2013
+ with pytest.raises(ValueError):
+ np.loadtxt(data, dtype="S5")
+
+
+@pytest.mark.parametrize("conv", (0, [float], ""))
+def test_loadtxt_invalid_converter(conv):
+ msg = (
+ "converters must be a dictionary mapping columns to converter "
+ "functions or a single callable."
+ )
+ with pytest.raises(TypeError, match=msg):
+ np.loadtxt(TextIO("1 2\n3 4"), converters=conv)
+
+
+def test_loadtxt_converters_dict_raises_non_integer_key():
+ with pytest.raises(TypeError, match="keys of the converters dict"):
+ np.loadtxt(TextIO("1 2\n3 4"), converters={"a": int})
+ with pytest.raises(TypeError, match="keys of the converters dict"):
+ np.loadtxt(TextIO("1 2\n3 4"), converters={"a": int}, usecols=0)
+
+
+@pytest.mark.parametrize("bad_col_ind", (3, -3))
+def test_loadtxt_converters_dict_raises_non_col_key(bad_col_ind):
+ data = TextIO("1 2\n3 4")
+ with pytest.raises(ValueError, match="converter specified for column"):
+ np.loadtxt(data, converters={bad_col_ind: int})
+
+
+def test_loadtxt_converters_dict_raises_val_not_callable():
+ with pytest.raises(
+ TypeError, match="values of the converters dictionary must be callable"
+ ):
+ np.loadtxt(StringIO("1 2\n3 4"), converters={0: 1})