diff options
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/io.py | 42 | ||||
-rw-r--r-- | numpy/lib/tests/test_io.py | 72 |
2 files changed, 85 insertions, 29 deletions
diff --git a/numpy/lib/io.py b/numpy/lib/io.py index d16432814..3903f779e 100644 --- a/numpy/lib/io.py +++ b/numpy/lib/io.py @@ -362,22 +362,44 @@ def savetxt(fname, X, fmt='%.18e',delimiter=' '): X.shape = origShape import re -def fromregex(file, regexp, **kwds): +def fromregex(file, regexp, dtype): """Construct a record array from a text file, using regular-expressions parsing. - Groups in the regular exespression are converted to fields. + Array is constructed from all matches of the regular expression + in the file. Groups in the regular expression are converted to fields. + + Parameters + ---------- + file : str or file + File name or file object to read + regexp : str or regexp + Regular expression to use to parse the file + dtype : dtype or dtype list + Dtype for the record array + + Example + ------- + >>> import numpy as np + >>> f = open('test.dat', 'w') + >>> f.write("1312 foo\n1534 bar\n 444 qux") + >>> f.close() + >>> np.fromregex('test.dat', r"(\d+)\s+(...)", [('num', np.int64), ('key', 'S3')]) + array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')], + dtype=[('num', '<i8'), ('key', '|S3')]) + """ if not hasattr(file, "read"): file = open(file,'r') if not hasattr(regexp, 'match'): regexp = re.compile(regexp) - + if not isinstance(dtype, np.dtype): + dtype = np.dtype(dtype) + seq = regexp.findall(file.read()) - dtypelist = [] - for key, value in kwds.values(): - dtypelist.append((key, value)) - format = np.dtype(dtypelist) - output = array(seq, dtype=format) + if seq and not isinstance(seq[0], tuple): + # make sure np.array doesn't interpret strings as binary data + # by always producing a list of tuples + seq = [(x,) for x in seq] + output = np.array(seq, dtype=dtype) return output - - + diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index cd92491ba..9f7a585ef 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -2,27 +2,29 @@ from numpy.testing import * import numpy as np import StringIO -class Testsavetxt(NumpyTestCase): +class TestSaveTxt(NumpyTestCase): def test_array(self): a =np.array( [[1,2],[3,4]], float) c = StringIO.StringIO() np.savetxt(c, a) c.seek(0) - assert(c.readlines(), ['1.000000000000000000e+00 2.000000000000000000e+00\n', '3.000000000000000000e+00 4.000000000000000000e+00\n']) + assert(c.readlines(), + ['1.000000000000000000e+00 2.000000000000000000e+00\n', + '3.000000000000000000e+00 4.000000000000000000e+00\n']) a =np.array( [[1,2],[3,4]], int) c = StringIO.StringIO() np.savetxt(c, a) c.seek(0) assert(c.readlines(), ['1 2\n', '3 4\n']) - + def test_1D(self): a = np.array([1,2,3,4], int) c = StringIO.StringIO() np.savetxt(c, a, fmt='%d') c.seek(0) assert(c.readlines(), ['1\n', '2\n', '3\n', '4\n']) - + def test_record(self): a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) c = StringIO.StringIO() @@ -30,9 +32,8 @@ class Testsavetxt(NumpyTestCase): c.seek(0) assert(c.readlines(), ['1 2\n', '3 4\n']) - -class Testloadtxt(NumpyTestCase): +class TestLoadTxt(NumpyTestCase): def test_record(self): c = StringIO.StringIO() c.write('1 2\n3 4') @@ -40,31 +41,33 @@ class Testloadtxt(NumpyTestCase): x = np.loadtxt(c, dtype=[('x', np.int32), ('y', np.int32)]) a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) assert_array_equal(x, a) - + d = StringIO.StringIO() d.write('M 64.0 75.0\nF 25.0 60.0') d.seek(0) - mydescriptor = {'names': ('gender','age','weight'), 'formats': ('S1', - 'i4', 'f4')} - b = np.array([('M', 64.0, 75.0),('F', 25.0, 60.0)], dtype=mydescriptor) + mydescriptor = {'names': ('gender','age','weight'), + 'formats': ('S1', + 'i4', 'f4')} + b = np.array([('M', 64.0, 75.0), + ('F', 25.0, 60.0)], dtype=mydescriptor) y = np.loadtxt(d, dtype=mydescriptor) assert_array_equal(y, b) - + def test_array(self): c = StringIO.StringIO() c.write('1 2\n3 4') - + c.seek(0) x = np.loadtxt(c, dtype=int) a = np.array([[1,2],[3,4]], int) assert_array_equal(x, a) - + c.seek(0) x = np.loadtxt(c, dtype=float) a = np.array([[1,2],[3,4]], float) assert_array_equal(x, a) - + def test_1D(self): c = StringIO.StringIO() c.write('1\n2\n3\n4\n') @@ -72,15 +75,15 @@ class Testloadtxt(NumpyTestCase): x = np.loadtxt(c, dtype=int) a = np.array([1,2,3,4], int) assert_array_equal(x, a) - + c = StringIO.StringIO() c.write('1,2,3,4\n') c.seek(0) x = np.loadtxt(c, dtype=int, delimiter=',') a = np.array([1,2,3,4], int) assert_array_equal(x, a) - - + + def test_missing(self): c = StringIO.StringIO() c.write('1,2,3,,5\n') @@ -89,7 +92,38 @@ class Testloadtxt(NumpyTestCase): converters={3:lambda s: int(s or -999)}) a = np.array([1,2,3,-999,5], int) assert_array_equal(x, a) - - + +class Testfromregex(NumpyTestCase): + def test_record(self): + c = StringIO.StringIO() + c.write('1.312 foo\n1.534 bar\n4.444 qux') + c.seek(0) + + dt = [('num', np.float64), ('val', 'S3')] + x = np.fromregex(c, r"([0-9.]+)\s+(...)", dt) + a = np.array([(1.312, 'foo'), (1.534, 'bar'), (4.444, 'qux')], dtype=dt) + assert_array_equal(x, a) + + def test_record_2(self): + return # pass this test until #736 is resolved + c = StringIO.StringIO() + c.write('1312 foo\n1534 bar\n4444 qux') + c.seek(0) + + dt = [('num', np.int32), ('val', 'S3')] + x = np.fromregex(c, r"(\d+)\s+(...)", dt) + a = np.array([(1312, 'foo'), (1534, 'bar'), (4444, 'qux')], dtype=dt) + assert_array_equal(x, a) + + def test_record_3(self): + c = StringIO.StringIO() + c.write('1312 foo\n1534 bar\n4444 qux') + c.seek(0) + + dt = [('num', np.float64)] + x = np.fromregex(c, r"(\d+)\s+...", dt) + a = np.array([(1312,), (1534,), (4444,)], dtype=dt) + assert_array_equal(x, a) + if __name__ == "__main__": NumpyTest().run() |