summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
Diffstat (limited to 'numpy')
-rw-r--r--numpy/lib/_iotools.py7
-rw-r--r--numpy/lib/io.py48
-rw-r--r--numpy/lib/tests/test__iotools.py11
-rw-r--r--numpy/lib/tests/test_io.py40
4 files changed, 90 insertions, 16 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index 398ed07a4..c69bd84dc 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -535,7 +535,10 @@ class StringConverter:
for (i, (deftype, func, default_def)) in enumerate(self._mapper):
if np.issubdtype(ttype, deftype):
_status = i
- self.default = default or default_def
+ if default is None:
+ self.default = default_def
+ else:
+ self.default = default
break
if _status == -1:
# We never found a match in the _mapper...
@@ -552,6 +555,8 @@ class StringConverter:
if missing_values is None:
self.missing_values = set([''])
else:
+ if isinstance(missing_values, basestring):
+ missing_values = missing_values.split(",")
self.missing_values = set(list(missing_values) + [''])
#
self._callingfunction = self._strict_call
diff --git a/numpy/lib/io.py b/numpy/lib/io.py
index 1dfded236..da3296ae4 100644
--- a/numpy/lib/io.py
+++ b/numpy/lib/io.py
@@ -1068,7 +1068,10 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
# Check the columns to use
if usecols is not None:
- usecols = list(usecols)
+ try:
+ usecols = list(usecols)
+ except TypeError:
+ usecols = [usecols,]
nbcols = len(usecols or first_values)
# Check the names and overwrite the dtype.names if needed
@@ -1085,11 +1088,23 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
if dtype is not None:
dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names)
- # If usecols is a list of names, convert to a list of indices
+
if usecols:
for (i, current) in enumerate(usecols):
+ # if usecols is a list of names, convert to a list of indices
if _is_string_like(current):
usecols[i] = names.index(current)
+ elif current < 0:
+ usecols[i] = current + len(first_values)
+ # If the dtype is not None, make sure we update it
+ if (dtype is not None) and (len(dtype) > nbcols):
+ descr = dtype.descr
+ dtype = np.dtype([descr[_] for _ in usecols])
+ names = list(dtype.names)
+ # If the dtype is None, update the names
+ elif names is not None:
+ names = [names[_] for _ in usecols]
+
# Process the missing values ...............................
# Rename missing_values for convenience
@@ -1100,11 +1115,16 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
# We have a dictionary: process it field by field
if isinstance(user_missing_values, dict):
- # Loop on the items
+ # Loop on the items
for (key, val) in user_missing_values.items():
- # Make sure the key is an index
+ # Is the key a string ?
if _is_string_like(key):
- key = names.index(key)
+ try:
+ # Transform it into an integer
+ key = names.index(key)
+ except ValueError:
+ # We couldn't find it: the name must have been dropped, then
+ continue
# Redefine the key as needed if it's a column number
if usecols:
try:
@@ -1156,9 +1176,13 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
# We have a dictionary : update each entry individually
if isinstance(user_filling_values, dict):
for (key, val) in user_filling_values.items():
- # Make sure the key is an index
if _is_string_like(key):
- key = names.index(key)
+ try:
+ # Transform it into an integer
+ key = names.index(key)
+ except ValueError:
+ # We couldn't find it: the name must have been dropped, then
+ continue
# Redefine the key if it's a column number and usecols is defined
if usecols:
try:
@@ -1204,8 +1228,11 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
for (i, conv) in user_converters.items():
# If the converter is specified by column names, use the index instead
if _is_string_like(i):
- i = names.index(i)
- if usecols:
+ try:
+ i = names.index(i)
+ except ValueError:
+ continue
+ elif usecols:
try:
i = usecols.index(i)
except ValueError:
@@ -1220,9 +1247,6 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
miss_chars = [_.missing_values for _ in converters]
- # Reset the names to match the usecols
- if (not first_line) and usecols:
- names = [names[_] for _ in usecols]
# Initialize the output lists ...
# ... rows
diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py
index ed0f4dc63..11ce4047b 100644
--- a/numpy/lib/tests/test__iotools.py
+++ b/numpy/lib/tests/test__iotools.py
@@ -191,7 +191,16 @@ class TestStringConverter(TestCase):
converter.upgrade('3.14159265')
assert_equal(converter.default, 0)
assert_equal(converter.type, np.dtype(float))
-
+ #
+ def test_keep_default_zero(self):
+ "Check that we don't lose a default of 0"
+ converter = StringConverter(int, default=0, missing_values="N/A")
+ assert_equal(converter.default, 0)
+ #
+ def test_keep_missing_values(self):
+ "Check that we're not losing missing values"
+ converter = StringConverter(int, default=0, missing_values="N/A")
+ assert_equal(converter.missing_values, set(['', 'N/A']))
#-------------------------------------------------------------------------------
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index bb6833451..3357b517a 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -722,7 +722,7 @@ M 33 21.99
def test_usecols(self):
"Test the selection of columns"
# Select 1 column
- control = np.array( [[1, 2], [3, 4]], float)
+ control = np.array([[1, 2], [3, 4]], float)
data = StringIO.StringIO()
np.savetxt(data, control)
data.seek(0)
@@ -739,7 +739,9 @@ M 33 21.99
data.seek(0)
test = np.ndfromtxt(data, dtype=float, usecols=np.array([1, 2]))
assert_equal(test, control[:, 1:])
- # Checking with dtypes defined converters.
+
+ def test_usecols_with_structured_dtype(self):
+ "Test usecols with an explicit structured dtype"
data = StringIO.StringIO("""JOE 70.1 25.3\nBOB 60.5 27.9""")
names = ['stid', 'temp']
dtypes = ['S4', 'f8']
@@ -747,6 +749,22 @@ M 33 21.99
assert_equal(test['stid'], ["JOE", "BOB"])
assert_equal(test['temp'], [25.3, 27.9])
+ def test_usecols_with_integer(self):
+ "Test usecols with an integer"
+ test = np.genfromtxt(StringIO.StringIO("1 2 3\n4 5 6"), usecols=0)
+ assert_equal(test, np.array([1., 4.]))
+
+ def test_usecols_with_named_columns(self):
+ "Test usecols with named columns"
+ ctrl = np.array([(1, 3), (4, 6)], dtype=[('a', float), ('c', float)])
+ data = "1 2 3\n4 5 6"
+ kwargs = dict(names="a, b, c")
+ test = np.genfromtxt(StringIO.StringIO(data), usecols=(0, -1), **kwargs)
+ assert_equal(test, ctrl)
+ test = np.genfromtxt(StringIO.StringIO(data),
+ usecols=('a', 'c'), **kwargs)
+ assert_equal(test, ctrl)
+
def test_empty_file(self):
"Test that an empty file raises the proper exception"
@@ -821,6 +839,24 @@ M 33 21.99
dtype=mdtype)
assert_equal(test, control)
+ def test_user_filling_values(self):
+ "Test with missing and filling values"
+ ctrl = np.array([(0, 3), (4, -999)], dtype=[('a', int), ('b', int)])
+ data = "N/A, 2, 3\n4, ,???"
+ kwargs = dict(delimiter=",",
+ dtype=int,
+ names="a,b,c",
+ missing_values={0:"N/A", 'b':" ", 2:"???"},
+ filling_values={0:0, 'b':0, 2:-999})
+ test = np.genfromtxt(StringIO.StringIO(data), **kwargs)
+ ctrl = np.array([(0, 2, 3), (4, 0, -999)],
+ dtype=[(_, int) for _ in "abc"])
+ assert_equal(test, ctrl)
+ #
+ test = np.genfromtxt(StringIO.StringIO(data), usecols=(0, -1), **kwargs)
+ ctrl = np.array([(0, 3), (4, -999)], dtype=[(_, int) for _ in "ac"])
+ assert_equal(test, ctrl)
+
def test_withmissing_float(self):
data = StringIO.StringIO('A,B\n0,1.5\n2,-999.00')