diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/lib/_iotools.py | 7 | ||||
-rw-r--r-- | numpy/lib/io.py | 48 | ||||
-rw-r--r-- | numpy/lib/tests/test__iotools.py | 11 | ||||
-rw-r--r-- | numpy/lib/tests/test_io.py | 40 |
4 files changed, 90 insertions, 16 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index 398ed07a4..c69bd84dc 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -535,7 +535,10 @@ class StringConverter: for (i, (deftype, func, default_def)) in enumerate(self._mapper): if np.issubdtype(ttype, deftype): _status = i - self.default = default or default_def + if default is None: + self.default = default_def + else: + self.default = default break if _status == -1: # We never found a match in the _mapper... @@ -552,6 +555,8 @@ class StringConverter: if missing_values is None: self.missing_values = set(['']) else: + if isinstance(missing_values, basestring): + missing_values = missing_values.split(",") self.missing_values = set(list(missing_values) + ['']) # self._callingfunction = self._strict_call diff --git a/numpy/lib/io.py b/numpy/lib/io.py index 1dfded236..da3296ae4 100644 --- a/numpy/lib/io.py +++ b/numpy/lib/io.py @@ -1068,7 +1068,10 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Check the columns to use if usecols is not None: - usecols = list(usecols) + try: + usecols = list(usecols) + except TypeError: + usecols = [usecols,] nbcols = len(usecols or first_values) # Check the names and overwrite the dtype.names if needed @@ -1085,11 +1088,23 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, if dtype is not None: dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names) - # If usecols is a list of names, convert to a list of indices + if usecols: for (i, current) in enumerate(usecols): + # if usecols is a list of names, convert to a list of indices if _is_string_like(current): usecols[i] = names.index(current) + elif current < 0: + usecols[i] = current + len(first_values) + # If the dtype is not None, make sure we update it + if (dtype is not None) and (len(dtype) > nbcols): + descr = dtype.descr + dtype = np.dtype([descr[_] for _ in usecols]) + names = list(dtype.names) + # If the dtype is None, update the names + elif names is not None: + names = [names[_] for _ in usecols] + # Process the missing values ............................... # Rename missing_values for convenience @@ -1100,11 +1115,16 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # We have a dictionary: process it field by field if isinstance(user_missing_values, dict): - # Loop on the items + # Loop on the items for (key, val) in user_missing_values.items(): - # Make sure the key is an index + # Is the key a string ? if _is_string_like(key): - key = names.index(key) + try: + # Transform it into an integer + key = names.index(key) + except ValueError: + # We couldn't find it: the name must have been dropped, then + continue # Redefine the key as needed if it's a column number if usecols: try: @@ -1156,9 +1176,13 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # We have a dictionary : update each entry individually if isinstance(user_filling_values, dict): for (key, val) in user_filling_values.items(): - # Make sure the key is an index if _is_string_like(key): - key = names.index(key) + try: + # Transform it into an integer + key = names.index(key) + except ValueError: + # We couldn't find it: the name must have been dropped, then + continue # Redefine the key if it's a column number and usecols is defined if usecols: try: @@ -1204,8 +1228,11 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, for (i, conv) in user_converters.items(): # If the converter is specified by column names, use the index instead if _is_string_like(i): - i = names.index(i) - if usecols: + try: + i = names.index(i) + except ValueError: + continue + elif usecols: try: i = usecols.index(i) except ValueError: @@ -1220,9 +1247,6 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, miss_chars = [_.missing_values for _ in converters] - # Reset the names to match the usecols - if (not first_line) and usecols: - names = [names[_] for _ in usecols] # Initialize the output lists ... # ... rows diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py index ed0f4dc63..11ce4047b 100644 --- a/numpy/lib/tests/test__iotools.py +++ b/numpy/lib/tests/test__iotools.py @@ -191,7 +191,16 @@ class TestStringConverter(TestCase): converter.upgrade('3.14159265') assert_equal(converter.default, 0) assert_equal(converter.type, np.dtype(float)) - + # + def test_keep_default_zero(self): + "Check that we don't lose a default of 0" + converter = StringConverter(int, default=0, missing_values="N/A") + assert_equal(converter.default, 0) + # + def test_keep_missing_values(self): + "Check that we're not losing missing values" + converter = StringConverter(int, default=0, missing_values="N/A") + assert_equal(converter.missing_values, set(['', 'N/A'])) #------------------------------------------------------------------------------- diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index bb6833451..3357b517a 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -722,7 +722,7 @@ M 33 21.99 def test_usecols(self): "Test the selection of columns" # Select 1 column - control = np.array( [[1, 2], [3, 4]], float) + control = np.array([[1, 2], [3, 4]], float) data = StringIO.StringIO() np.savetxt(data, control) data.seek(0) @@ -739,7 +739,9 @@ M 33 21.99 data.seek(0) test = np.ndfromtxt(data, dtype=float, usecols=np.array([1, 2])) assert_equal(test, control[:, 1:]) - # Checking with dtypes defined converters. + + def test_usecols_with_structured_dtype(self): + "Test usecols with an explicit structured dtype" data = StringIO.StringIO("""JOE 70.1 25.3\nBOB 60.5 27.9""") names = ['stid', 'temp'] dtypes = ['S4', 'f8'] @@ -747,6 +749,22 @@ M 33 21.99 assert_equal(test['stid'], ["JOE", "BOB"]) assert_equal(test['temp'], [25.3, 27.9]) + def test_usecols_with_integer(self): + "Test usecols with an integer" + test = np.genfromtxt(StringIO.StringIO("1 2 3\n4 5 6"), usecols=0) + assert_equal(test, np.array([1., 4.])) + + def test_usecols_with_named_columns(self): + "Test usecols with named columns" + ctrl = np.array([(1, 3), (4, 6)], dtype=[('a', float), ('c', float)]) + data = "1 2 3\n4 5 6" + kwargs = dict(names="a, b, c") + test = np.genfromtxt(StringIO.StringIO(data), usecols=(0, -1), **kwargs) + assert_equal(test, ctrl) + test = np.genfromtxt(StringIO.StringIO(data), + usecols=('a', 'c'), **kwargs) + assert_equal(test, ctrl) + def test_empty_file(self): "Test that an empty file raises the proper exception" @@ -821,6 +839,24 @@ M 33 21.99 dtype=mdtype) assert_equal(test, control) + def test_user_filling_values(self): + "Test with missing and filling values" + ctrl = np.array([(0, 3), (4, -999)], dtype=[('a', int), ('b', int)]) + data = "N/A, 2, 3\n4, ,???" + kwargs = dict(delimiter=",", + dtype=int, + names="a,b,c", + missing_values={0:"N/A", 'b':" ", 2:"???"}, + filling_values={0:0, 'b':0, 2:-999}) + test = np.genfromtxt(StringIO.StringIO(data), **kwargs) + ctrl = np.array([(0, 2, 3), (4, 0, -999)], + dtype=[(_, int) for _ in "abc"]) + assert_equal(test, ctrl) + # + test = np.genfromtxt(StringIO.StringIO(data), usecols=(0, -1), **kwargs) + ctrl = np.array([(0, 3), (4, -999)], dtype=[(_, int) for _ in "ac"]) + assert_equal(test, ctrl) + def test_withmissing_float(self): data = StringIO.StringIO('A,B\n0,1.5\n2,-999.00') |