summaryrefslogtreecommitdiff
path: root/numpy/lib
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib')
-rw-r--r--numpy/lib/_iotools.py469
-rw-r--r--numpy/lib/io.py476
-rw-r--r--numpy/lib/tests/test__iotools.py140
-rw-r--r--numpy/lib/tests/test_io.py358
4 files changed, 1436 insertions, 7 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
new file mode 100644
index 000000000..3f3c6655e
--- /dev/null
+++ b/numpy/lib/_iotools.py
@@ -0,0 +1,469 @@
+"""
+A collection of functions designed to help I/O with ascii file.
+
+"""
+__docformat__ = "restructuredtext en"
+
+import numpy as np
+import numpy.core.numeric as nx
+from __builtin__ import bool, int, long, float, complex, object, unicode, str
+
+
+def _is_string_like(obj):
+ """
+ Check whether obj behaves like a string.
+ """
+ try:
+ obj + ''
+ except (TypeError, ValueError):
+ return False
+ return True
+
+
+def _to_filehandle(fname, flag='r', return_opened=False):
+ """
+ Returns the filehandle corresponding to a string or a file.
+ If the string ends in '.gz', the file is automatically unzipped.
+
+ Parameters
+ ----------
+ fname : string, filehandle
+ Name of the file whose filehandle must be returned.
+ flag : string, optional
+ Flag indicating the status of the file ('r' for read, 'w' for write).
+ return_opened : boolean, optional
+ Whether to return the opening status of the file.
+ """
+ if _is_string_like(fname):
+ if fname.endswith('.gz'):
+ import gzip
+ fhd = gzip.open(fname, flag)
+ elif fname.endswith('.bz2'):
+ import bz2
+ fhd = bz2.BZ2File(fname)
+ else:
+ fhd = file(fname, flag)
+ opened = True
+ elif hasattr(fname, 'seek'):
+ fhd = fname
+ opened = False
+ else:
+ raise ValueError('fname must be a string or file handle')
+ if return_opened:
+ return fhd, opened
+ return fhd
+
+
+def flatten_dtype(ndtype):
+ """
+ Unpack a structured data-type.
+
+ """
+ names = ndtype.names
+ if names is None:
+ return [ndtype]
+ else:
+ types = []
+ for field in names:
+ (typ, _) = ndtype.fields[field]
+ flat_dt = flatten_dtype(typ)
+ types.extend(flat_dt)
+ return types
+
+
+
+class LineSplitter:
+ """
+ Defines a function to split a string at a given delimiter or at given places.
+
+ Parameters
+ ----------
+ comment : {'#', string}
+ Character used to mark the beginning of a comment.
+ delimiter : var, optional
+ If a string, character used to delimit consecutive fields.
+ If an integer or a sequence of integers, width(s) of each field.
+ autostrip : boolean, optional
+ Whether to strip each individual fields
+ """
+
+ def autostrip(self, method):
+ "Wrapper to strip each member of the output of `method`."
+ return lambda input: [_.strip() for _ in method(input)]
+ #
+ def __init__(self, delimiter=None, comments='#', autostrip=True):
+ self.comments = comments
+ # Delimiter is a character
+ if (delimiter is None) or _is_string_like(delimiter):
+ delimiter = delimiter or None
+ _handyman = self._delimited_splitter
+ # Delimiter is a list of field widths
+ elif hasattr(delimiter, '__iter__'):
+ _handyman = self._variablewidth_splitter
+ idx = np.cumsum([0]+list(delimiter))
+ delimiter = [slice(i,j) for (i,j) in zip(idx[:-1], idx[1:])]
+ # Delimiter is a single integer
+ elif int(delimiter):
+ (_handyman, delimiter) = (self._fixedwidth_splitter, int(delimiter))
+ else:
+ (_handyman, delimiter) = (self._delimited_splitter, None)
+ self.delimiter = delimiter
+ if autostrip:
+ self._handyman = self.autostrip(_handyman)
+ else:
+ self._handyman = _handyman
+ #
+ def _delimited_splitter(self, line):
+ line = line.split(self.comments)[0].strip()
+ if not line:
+ return []
+ return line.split(self.delimiter)
+ #
+ def _fixedwidth_splitter(self, line):
+ line = line.split(self.comments)[0]
+ if not line:
+ return []
+ fixed = self.delimiter
+ slices = [slice(i, i+fixed) for i in range(len(line))[::fixed]]
+ return [line[s] for s in slices]
+ #
+ def _variablewidth_splitter(self, line):
+ line = line.split(self.comments)[0]
+ if not line:
+ return []
+ slices = self.delimiter
+ return [line[s] for s in slices]
+ #
+ def __call__(self, line):
+ return self._handyman(line)
+
+
+
+class NameValidator:
+ """
+ Validates a list of strings to use as field names.
+ The strings are stripped of any non alphanumeric character, and spaces
+ are replaced by `_`. If the optional input parameter `case_sensitive`
+ is False, the strings are set to upper case.
+
+ During instantiation, the user can define a list of names to exclude, as
+ well as a list of invalid characters. Names in the exclusion list
+ are appended a '_' character.
+
+ Once an instance has been created, it can be called with a list of names
+ and a list of valid names will be created.
+ The `__call__` method accepts an optional keyword, `default`, that sets
+ the default name in case of ambiguity. By default, `default = 'f'`, so
+ that names will default to `f0`, `f1`
+
+ Parameters
+ ----------
+ excludelist : sequence, optional
+ A list of names to exclude. This list is appended to the default list
+ ['return','file','print']. Excluded names are appended an underscore:
+ for example, `file` would become `file_`.
+ deletechars : string, optional
+ A string combining invalid characters that must be deleted from the names.
+ casesensitive : {True, False, 'upper', 'lower'}, optional
+ If True, field names are case_sensitive.
+ If False or 'upper', field names are converted to upper case.
+ If 'lower', field names are converted to lower case.
+ """
+ #
+ defaultexcludelist = ['return','file','print']
+ defaultdeletechars = set("""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""")
+ #
+ def __init__(self, excludelist=None, deletechars=None, case_sensitive=None):
+ #
+ if excludelist is None:
+ excludelist = []
+ excludelist.extend(self.defaultexcludelist)
+ self.excludelist = excludelist
+ #
+ if deletechars is None:
+ delete = self.defaultdeletechars
+ else:
+ delete = set(deletechars)
+ delete.add('"')
+ self.deletechars = delete
+
+ if (case_sensitive is None) or (case_sensitive is True):
+ self.case_converter = lambda x: x
+ elif (case_sensitive is False) or ('u' in case_sensitive):
+ self.case_converter = lambda x: x.upper()
+ elif 'l' in case_sensitive:
+ self.case_converter = lambda x: x.lower()
+ else:
+ self.case_converter = lambda x: x
+ #
+ def validate(self, names, default='f'):
+ #
+ if names is None:
+ return
+ #
+ validatednames = []
+ seen = dict()
+ #
+ deletechars = self.deletechars
+ excludelist = self.excludelist
+ #
+ case_converter = self.case_converter
+ #
+ for i, item in enumerate(names):
+ item = case_converter(item)
+ item = item.strip().replace(' ', '_')
+ item = ''.join([c for c in item if c not in deletechars])
+ if not len(item):
+ item = '%s%d' % (default, i)
+ elif item in excludelist:
+ item += '_'
+ cnt = seen.get(item, 0)
+ if cnt > 0:
+ validatednames.append(item + '_%d' % cnt)
+ else:
+ validatednames.append(item)
+ seen[item] = cnt+1
+ return validatednames
+ #
+ def __call__(self, names, default='f'):
+ return self.validate(names, default)
+
+
+
+def str2bool(value):
+ """
+ Tries to transform a string supposed to represent a boolean to a boolean.
+
+ Raises
+ ------
+ ValueError
+ If the string is not 'True' or 'False' (case independent)
+ """
+ value = value.upper()
+ if value == 'TRUE':
+ return True
+ elif value == 'FALSE':
+ return False
+ else:
+ raise ValueError("Invalid boolean")
+
+
+
+class StringConverter:
+ """
+ Factory class for function transforming a string into another object (int,
+ float).
+
+ After initialization, an instance can be called to transform a string
+ into another object. If the string is recognized as representing a missing
+ value, a default value is returned.
+
+ Parameters
+ ----------
+ dtype_or_func : {None, dtype, function}, optional
+ Input data type, used to define a basic function and a default value
+ for missing data. For example, when `dtype` is float, the :attr:`func`
+ attribute is set to ``float`` and the default value to `np.nan`.
+ Alternatively, function used to convert a string to another object.
+ In that later case, it is recommended to give an associated default
+ value as input.
+ default : {None, var}, optional
+ Value to return by default, that is, when the string to be converted
+ is flagged as missing.
+ missing_values : {sequence}, optional
+ Sequence of strings indicating a missing value.
+ locked : {boolean}, optional
+ Whether the StringConverter should be locked to prevent automatic
+ upgrade or not.
+
+ Attributes
+ ----------
+ func : function
+ Function used for the conversion
+ default : var
+ Default value to return when the input corresponds to a missing value.
+ type : type
+ Type of the output.
+ _status : integer
+ Integer representing the order of the conversion.
+ _mapper : sequence of tuples
+ Sequence of tuples (dtype, function, default value) to evaluate in order.
+ _locked : boolean
+ Whether the StringConverter is locked, thereby preventing automatic any
+ upgrade or not.
+
+ """
+ #
+ _mapper = [(nx.bool_, str2bool, None),
+ (nx.integer, int, -1),
+ (nx.floating, float, nx.nan),
+ (complex, complex, nx.nan+0j),
+ (nx.string_, str, '???')]
+ (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper)
+ #
+ @classmethod
+ def _getsubdtype(cls, val):
+ """Returns the type of the dtype of the input variable."""
+ return np.array(val).dtype.type
+ #
+ @classmethod
+ def upgrade_mapper(cls, func, default=None):
+ """
+ Upgrade the mapper of a StringConverter by adding a new function and its
+ corresponding default.
+
+ The input function (or sequence of functions) and its associated default
+ value (if any) is inserted in penultimate position of the mapper.
+ The corresponding type is estimated from the dtype of the default value.
+
+ Parameters
+ ----------
+ func : var
+ Function, or sequence of functions
+
+ Examples
+ --------
+ >>> import dateutil.parser
+ >>> import datetime
+ >>> dateparser = datetutil.parser.parse
+ >>> defaultdate = datetime.date(2000, 1, 1)
+ >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate)
+ """
+ # Func is a single functions
+ if hasattr(func, '__call__'):
+ cls._mapper.insert(-1, (cls._getsubdtype(default), func, default))
+ return
+ elif hasattr(func, '__iter__'):
+ if isinstance(func[0], (tuple, list)):
+ for _ in func:
+ cls._mapper.insert(-1, _)
+ return
+ if default is None:
+ default = [None] * len(func)
+ else:
+ default = list(default)
+ default.append([None] * (len(func)-len(default)))
+ for (fct, dft) in zip(func, default):
+ cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft))
+ #
+ def __init__(self, dtype_or_func=None, default=None, missing_values=None,
+ locked=False):
+ # Defines a lock for upgrade
+ self._locked = bool(locked)
+ # No input dtype: minimal initialization
+ if dtype_or_func is None:
+ self.func = str2bool
+ self._status = 0
+ self.default = default
+ ttype = np.bool
+ else:
+ # Is the input a np.dtype ?
+ try:
+ self.func = None
+ ttype = np.dtype(dtype_or_func).type
+ except TypeError:
+ # dtype_or_func must be a function, then
+ if not hasattr(dtype_or_func, '__call__'):
+ errmsg = "The input argument `dtype` is neither a function"\
+ " or a dtype (got '%s' instead)"
+ raise TypeError(errmsg % type(dtype_or_func))
+ # Set the function
+ self.func = dtype_or_func
+ # If we don't have a default, try to guess it or set it to None
+ if default is None:
+ try:
+ default = self.func('0')
+ except ValueError:
+ default = None
+ ttype = self._getsubdtype(default)
+ # Set the status according to the dtype
+ for (i, (deftype, func, default_def)) in enumerate(self._mapper):
+ if np.issubdtype(ttype, deftype):
+ self._status = i
+ self.default = default or default_def
+ break
+ # If the input was a dtype, set the function to the last we saw
+ if self.func is None:
+ self.func = func
+ # If the status is 1 (int), change the function to smthg more robust
+ if self.func == self._mapper[1][1]:
+ self.func = lambda x : int(float(x))
+ # Store the list of strings corresponding to missing values.
+ if missing_values is None:
+ self.missing_values = set([''])
+ else:
+ self.missing_values = set(list(missing_values) + [''])
+ #
+ self._callingfunction = self._strict_call
+ self.type = ttype
+ #
+ def _loose_call(self, value):
+ try:
+ return self.func(value)
+ except ValueError:
+ return self.default
+ #
+ def _strict_call(self, value):
+ try:
+ return self.func(value)
+ except ValueError:
+ if value.strip() in self.missing_values:
+ return self.default
+ raise ValueError("Cannot convert string '%s'" % value)
+ #
+ def __call__(self, value):
+ return self._callingfunction(value)
+ #
+ def upgrade(self, value):
+ """
+ Tries to find the best converter for `value`, by testing different
+ converters in order.
+ The order in which the converters are tested is read from the
+ :attr:`_status` attribute of the instance.
+ """
+ try:
+ self._strict_call(value)
+ except ValueError:
+ # Raise an exception if we locked the converter...
+ if self._locked:
+ raise ValueError("Converter is locked and cannot be upgraded")
+ _statusmax = len(self._mapper)
+ # Complains if we try to upgrade by the maximum
+ if self._status == _statusmax:
+ raise ValueError("Could not find a valid conversion function")
+ elif self._status < _statusmax - 1:
+ self._status += 1
+ (self.type, self.func, self.default) = self._mapper[self._status]
+ self.upgrade(value)
+ #
+ def update(self, func, default=None, missing_values='', locked=False):
+ """
+ Sets the :attr:`func` and :attr:`default` attributes directly.
+
+ Parameters
+ ----------
+ func : function
+ Conversion function.
+ default : {var}, optional
+ Default value to return when a missing value is encountered.
+ missing_values : {var}, optional
+ Sequence of strings representing missing values.
+ locked : {False, True}, optional
+ Whether the status should be locked to prevent automatic upgrade.
+ """
+ self.func = func
+ self._locked = locked
+ # Don't reset the default to None if we can avoid it
+ if default is not None:
+ self.default = default
+ # Add the missing values to the existing set
+ if missing_values is not None:
+ if _is_string_like(missing_values):
+ self.missing_values.add(missing_values)
+ elif hasattr(missing_values, '__iter__'):
+ for val in missing_values:
+ self.missing_values.add(val)
+ else:
+ self.missing_values = [] # Update the type
+ self.type = self._getsubdtype(func('0'))
+
diff --git a/numpy/lib/io.py b/numpy/lib/io.py
index e9a012db1..303796d5f 100644
--- a/numpy/lib/io.py
+++ b/numpy/lib/io.py
@@ -1,4 +1,5 @@
__all__ = ['savetxt', 'loadtxt',
+ 'genfromtxt', 'ndfromtxt', 'mafromtxt', 'recfromtxt', 'recfromcsv',
'load', 'loads',
'save', 'savez',
'packbits', 'unpackbits',
@@ -15,7 +16,11 @@ from cPickle import load as _cload, loads
from _datasource import DataSource
from _compiled_base import packbits, unpackbits
+from _iotools import LineSplitter, NameValidator, StringConverter, \
+ _is_string_like, flatten_dtype
+
_file = file
+_string_like = _is_string_like
class BagObj(object):
"""A simple class that converts attribute lookups to
@@ -264,10 +269,6 @@ def _getconv(dtype):
return str
-def _string_like(obj):
- try: obj + ''
- except (TypeError, ValueError): return 0
- return 1
def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None,
skiprows=0, usecols=None, unpack=False):
@@ -342,7 +343,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None,
if usecols is not None:
usecols = list(usecols)
- if _string_like(fname):
+ if _is_string_like(fname):
if fname.endswith('.gz'):
import gzip
fh = gzip.open(fname)
@@ -520,7 +521,7 @@ def savetxt(fname, X, fmt='%.18e',delimiter=' '):
"""
- if _string_like(fname):
+ if _is_string_like(fname):
if fname.endswith('.gz'):
import gzip
fh = gzip.open(fname,'wb')
@@ -608,3 +609,466 @@ def fromregex(file, regexp, dtype):
seq = [(x,) for x in seq]
output = np.array(seq, dtype=dtype)
return output
+
+
+
+
+#####--------------------------------------------------------------------------
+#---- --- ASCII functions ---
+#####--------------------------------------------------------------------------
+
+
+
+def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
+ converters=None, missing='', missing_values=None, usecols=None,
+ names=None, excludelist=None, deletechars=None,
+ case_sensitive=True, unpack=None, usemask=False, loose=True):
+ """
+ Load data from a text file.
+
+ Each line past the first `skiprows` ones is split at the `delimiter`
+ character, and characters following the `comments` character are discarded.
+
+
+
+ Parameters
+ ----------
+ fname : file or string
+ File or filename to read. If the filename extension is `.gz` or `.bz2`,
+ the file is first decompressed.
+ dtype : data-type
+ Data type of the resulting array. If this is a flexible data-type,
+ the resulting array will be 1-dimensional, and each row will be
+ interpreted as an element of the array. In this case, the number
+ of columns used must match the number of fields in the data-type,
+ and the names of each field will be set by the corresponding name
+ of the dtype.
+ If None, the dtypes will be determined by the contents of each
+ column, individually.
+ comments : {string}, optional
+ The character used to indicate the start of a comment.
+ All the characters occurring on a line after a comment are discarded
+ delimiter : {string}, optional
+ The string used to separate values. By default, any consecutive
+ whitespace act as delimiter.
+ skiprows : {int}, optional
+ Numbers of lines to skip at the beginning of the file.
+ converters : {None, dictionary}, optional
+ A dictionary mapping column number to a function that will convert
+ values in the column to a number. Converters can also be used to
+ provide a default value for missing data:
+ ``converters = {3: lambda s: float(s or 0)}``.
+ missing : {string}, optional
+ A string representing a missing value, irrespective of the column where
+ it appears (e.g., `'missing'` or `'unused'`).
+ missing_values : {None, dictionary}, optional
+ A dictionary mapping a column number to a string indicating whether the
+ corresponding field should be masked.
+ usecols : {None, sequence}, optional
+ Which columns to read, with 0 being the first. For example,
+ ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
+ names : {None, True, string, sequence}, optional
+ If `names` is True, the field names are read from the first valid line
+ after the first `skiprows` lines.
+ If `names` is a sequence or a single-string of comma-separated names,
+ the names will be used to define the field names in a flexible dtype.
+ If `names` is None, the names of the dtype fields will be used, if any.
+ excludelist : {sequence}, optional
+ A list of names to exclude. This list is appended to the default list
+ ['return','file','print']. Excluded names are appended an underscore:
+ for example, `file` would become `file_`.
+ deletechars : {string}, optional
+ A string combining invalid characters that must be deleted from the names.
+ case_sensitive : {True, False, 'upper', 'lower'}, optional
+ If True, field names are case_sensitive.
+ If False or 'upper', field names are converted to upper case.
+ If 'lower', field names are converted to lower case.
+ unpack : {bool}, optional
+ If True, the returned array is transposed, so that arguments may be
+ unpacked using ``x, y, z = loadtxt(...)``
+ usemask : {bool}, optional
+ If True, returns a masked array.
+ If False, return a regular standard array.
+
+ Returns
+ -------
+ out : MaskedArray
+ Data read from the text file.
+
+ Notes
+ --------
+ * When spaces are used as delimiters, or when no delimiter has been given
+ as input, there should not be any missing data between two fields.
+ * When the variable are named (either by a flexible dtype or with `names`,
+ there must not be any header in the file (else a :exc:ValueError exception
+ is raised).
+
+ See Also
+ --------
+ numpy.loadtxt : equivalent function when no data is missing.
+
+ """
+ #
+ if usemask:
+ from numpy.ma import MaskedArray, make_mask_descr
+ # Check the input dictionary of converters
+ user_converters = converters or {}
+ if not isinstance(user_converters, dict):
+ errmsg = "The input argument 'converter' should be a valid dictionary "\
+ "(got '%s' instead)"
+ raise TypeError(errmsg % type(user_converters))
+ # Check the input dictionary of missing values
+ user_missing_values = missing_values or {}
+ if not isinstance(user_missing_values, dict):
+ errmsg = "The input argument 'missing_values' should be a valid "\
+ "dictionary (got '%s' instead)"
+ raise TypeError(errmsg % type(missing_values))
+ defmissing = [_.strip() for _ in missing.split(',')] + ['']
+
+ # Initialize the filehandle, the LineSplitter and the NameValidator
+# fhd = _to_filehandle(fname)
+ if isinstance(fname, basestring):
+ fhd = np.lib._datasource.open(fname)
+ elif not hasattr(fname, 'read'):
+ raise TypeError("The input should be a string or a filehandle. "\
+ "(got %s instead)" % type(fname))
+ else:
+ fhd = fname
+ split_line = LineSplitter(delimiter=delimiter, comments=comments,
+ autostrip=False)._handyman
+ validate_names = NameValidator(excludelist=excludelist,
+ deletechars=deletechars,
+ case_sensitive=case_sensitive)
+
+ # Get the first valid lines after the first skiprows ones
+ for i in xrange(skiprows):
+ fhd.readline()
+ first_values = None
+ while not first_values:
+ first_line = fhd.readline()
+ if first_line == '':
+ raise IOError('End-of-file reached before encountering data.')
+ first_values = split_line(first_line)
+
+ # Check the columns to use
+ if usecols is not None:
+ usecols = list(usecols)
+ nbcols = len(usecols or first_values)
+
+ # Check the names and overwrite the dtype.names if needed
+ if dtype is not None:
+ dtype = np.dtype(dtype)
+ dtypenames = getattr(dtype, 'names', None)
+ if names is True:
+ names = validate_names([_.strip() for _ in first_values])
+ first_line =''
+ elif _is_string_like(names):
+ names = validate_names([_.strip() for _ in names.split(',')])
+ elif names:
+ names = validate_names(names)
+ elif dtypenames:
+ dtype.names = validate_names(dtypenames)
+ if names and dtypenames:
+ dtype.names = names
+
+ # If usecols is a list of names, convert to a list of indices
+ if usecols:
+ for (i, current) in enumerate(usecols):
+ if _is_string_like(current):
+ usecols[i] = names.index(current)
+
+ # If user_missing_values has names as keys, transform them to indices
+ missing_values = {}
+ for (key, val) in user_missing_values.iteritems():
+ # If val is a list, flatten it. In any case, add missing &'' to the list
+ if isinstance(val, (list, tuple)):
+ val = [str(_) for _ in val]
+ else:
+ val = [str(val),]
+ val.extend(defmissing)
+ if _is_string_like(key):
+ try:
+ missing_values[names.index(key)] = val
+ except ValueError:
+ pass
+ else:
+ missing_values[key] = val
+
+
+ # Initialize the default converters
+ if dtype is None:
+ # Note: we can't use a [...]*nbcols, as we would have 3 times the same
+ # ... converter, instead of 3 different converters.
+ converters = [StringConverter(None,
+ missing_values=missing_values.get(_, defmissing))
+ for _ in range(nbcols)]
+ else:
+ flatdtypes = flatten_dtype(dtype)
+ # Initialize the converters
+ if len(flatdtypes) > 1:
+ # Flexible type : get a converter from each dtype
+ converters = [StringConverter(dt,
+ missing_values=missing_values.get(i, defmissing),
+ locked=True)
+ for (i, dt) in enumerate(flatdtypes)]
+ else:
+ # Set to a default converter (but w/ different missing values)
+ converters = [StringConverter(dtype,
+ missing_values=missing_values.get(_, defmissing),
+ locked=True)
+ for _ in range(nbcols)]
+ missing_values = [_.missing_values for _ in converters]
+
+ # Update the converters to use the user-defined ones
+ for (i, conv) in user_converters.iteritems():
+ # If the converter is specified by column names, use the index instead
+ if _is_string_like(i):
+ i = names.index(i)
+ if usecols:
+ try:
+ i = usecols.index(i)
+ except ValueError:
+ # Unused converter specified
+ continue
+ converters[i].update(conv, default=None,
+ missing_values=missing_values[i],
+ locked=True)
+
+ # Reset the names to match the usecols
+ if (not first_line) and usecols:
+ names = [names[_] for _ in usecols]
+
+ rows = []
+ append_to_rows = rows.append
+ if usemask:
+ masks = []
+ append_to_masks = masks.append
+ # Parse each line
+ for line in itertools.chain([first_line,], fhd):
+ values = split_line(line)
+ # Skip an empty line
+ if len(values) == 0:
+ continue
+ # Select only the columns we need
+ if usecols:
+ values = [values[_] for _ in usecols]
+ # Check whether we need to update the converter
+ if dtype is None:
+ for (converter, item) in zip(converters, values):
+ converter.upgrade(item)
+ # Store the values
+ append_to_rows(tuple(values))
+ if usemask:
+ append_to_masks(tuple([val.strip() in mss
+ for (val, mss) in zip(values,
+ missing_values)]))
+
+ # Convert each value according to the converter:
+ # We want to modify the list in place to avoid creating a new one...
+ if loose:
+ conversionfuncs = [conv._loose_call for conv in converters]
+ else:
+ conversionfuncs = [conv._strict_call for conv in converters]
+ for (i, vals) in enumerate(rows):
+ rows[i] = tuple([convert(val)
+ for (convert, val) in zip(conversionfuncs, vals)])
+
+ # Reset the dtype
+ data = rows
+ if dtype is None:
+ # Get the dtypes from the first row
+ coldtypes = [np.array(val).dtype for val in data[0]]
+ # Find the columns with strings, and take the largest number of chars.
+ strcolidx = [i for (i, v) in enumerate(coldtypes) if v.char == 'S']
+ for i in strcolidx:
+ coldtypes[i] = "|S%i" % max(len(row[i]) for row in data)
+ #
+ if names is None:
+ # If the dtype is uniform, don't define names, else use ''
+ base = coldtypes[0]
+ if np.all([(dt == base) for dt in coldtypes]):
+ (ddtype, mdtype) = (base, np.bool)
+ else:
+ ddtype = [('', dt) for dt in coldtypes]
+ mdtype = [('', np.bool) for dt in coldtypes]
+ else:
+ ddtype = zip(names, coldtypes)
+ mdtype = zip(names, [np.bool] * len(coldtypes))
+ output = np.array(data, dtype=ddtype)
+ if usemask:
+ outputmask = np.array(masks, dtype=mdtype)
+ else:
+ # Overwrite the initial dtype names if needed
+ if names and dtype.names:
+ dtype.names = names
+ flatdtypes = flatten_dtype(dtype)
+ # Case 1. We have a structured type
+ if len(flatdtypes) > 1:
+ # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])]
+ # First, create the array using a flattened dtype:
+ # [('a', int), ('b1', int), ('b2', float)]
+ # Then, view the array using the specified dtype.
+ rows = np.array(data, dtype=[('', t) for t in flatdtypes])
+ output = rows.view(dtype)
+ # Now, process the rowmasks the same way
+ if usemask:
+ rowmasks = np.array(masks,
+ dtype=np.dtype([('', np.bool)
+ for t in flatdtypes]))
+ # Construct the new dtype
+ mdtype = make_mask_descr(dtype)
+ outputmask = rowmasks.view(mdtype)
+ # Case #2. We have a basic dtype
+ else:
+ # We used some user-defined converters
+ if user_converters:
+ ishomogeneous = True
+ descr = []
+ for (i, ttype) in enumerate([conv.type for conv in converters]):
+ # Keep the dtype of the current converter
+ if i in user_converters:
+ ishomogeneous &= (ttype == dtype.type)
+ if ttype == np.string_:
+ ttype = "|S%i" % max(len(row[i]) for row in data)
+ descr.append(('', ttype))
+ else:
+ descr.append(('', dtype))
+ if not ishomogeneous:
+ dtype = np.dtype(descr)
+ #
+ output = np.array(data, dtype)
+ if usemask:
+ if dtype.names:
+ mdtype = [(_, np.bool) for _ in dtype.names]
+ else:
+ mdtype = np.bool
+ outputmask = np.array(masks, dtype=mdtype)
+ # Try to take care of the missing data we missed
+ if usemask and output.dtype.names:
+ for (name, conv) in zip(names or (), converters):
+ missing_values = [conv(_) for _ in conv.missing_values if _ != '']
+ for mval in missing_values:
+ outputmask[name] |= (output[name] == mval)
+ # Construct the final array
+ if usemask:
+ output = output.view(MaskedArray)
+ output._mask = outputmask
+ if unpack:
+ return output.squeeze().T
+ return output.squeeze()
+
+
+
+def ndfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
+ converters=None, missing='', missing_values=None,
+ usecols=None, unpack=None, names=None,
+ excludelist=None, deletechars=None, case_sensitive=True,):
+ """
+ Load ASCII data stored in fname and returns a ndarray.
+
+ Complete description of all the optional input parameters is available in
+ the docstring of the `genfromtxt` function.
+
+ See Also
+ --------
+ numpy.genfromtxt : generic function.
+
+ """
+ kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter,
+ skiprows=skiprows, converters=converters,
+ missing=missing, missing_values=missing_values,
+ usecols=usecols, unpack=unpack, names=names,
+ excludelist=excludelist, deletechars=deletechars,
+ case_sensitive=case_sensitive, usemask=False)
+ return genfromtxt(fname, **kwargs)
+
+def mafromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
+ converters=None, missing='', missing_values=None,
+ usecols=None, unpack=None, names=None,
+ excludelist=None, deletechars=None, case_sensitive=True,):
+ """
+ Load ASCII data stored in fname and returns a MaskedArray.
+
+ Complete description of all the optional input parameters is available in
+ the docstring of the `genfromtxt` function.
+
+ See Also
+ --------
+ numpy.genfromtxt : generic function.
+ """
+ kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter,
+ skiprows=skiprows, converters=converters,
+ missing=missing, missing_values=missing_values,
+ usecols=usecols, unpack=unpack, names=names,
+ excludelist=excludelist, deletechars=deletechars,
+ case_sensitive=case_sensitive,
+ usemask=True)
+ return genfromtxt(fname, **kwargs)
+
+
+def recfromtxt(fname, dtype=None, comments='#', delimiter=None, skiprows=0,
+ converters=None, missing='', missing_values=None,
+ usecols=None, unpack=None, names=None,
+ excludelist=None, deletechars=None, case_sensitive=True,
+ usemask=False):
+ """
+ Load ASCII data stored in fname and returns a standard recarray (if
+ `usemask=False`) or a MaskedRecords (if `usemask=True`).
+
+ Complete description of all the optional input parameters is available in
+ the docstring of the `genfromtxt` function.
+
+ See Also
+ --------
+ numpy.genfromtxt : generic function
+
+ Warnings
+ --------
+ * by default, `dtype=None`, which means that the dtype of the output array
+ will be determined from the data.
+ """
+ kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter,
+ skiprows=skiprows, converters=converters,
+ missing=missing, missing_values=missing_values,
+ usecols=usecols, unpack=unpack, names=names,
+ excludelist=excludelist, deletechars=deletechars,
+ case_sensitive=case_sensitive, usemask=usemask)
+ output = genfromtxt(fname, **kwargs)
+ if usemask:
+ from numpy.ma.mrecords import MaskedRecords
+ output = output.view(MaskedRecords)
+ else:
+ output = output.view(np.recarray)
+ return output
+
+
+def recfromcsv(fname, dtype=None, comments='#', skiprows=0,
+ converters=None, missing='', missing_values=None,
+ usecols=None, unpack=None, names=True,
+ excludelist=None, deletechars=None, case_sensitive='lower',
+ usemask=False):
+ """
+ Load ASCII data stored in comma-separated file and returns a recarray (if
+ `usemask=False`) or a MaskedRecords (if `usemask=True`).
+
+ Complete description of all the optional input parameters is available in
+ the docstring of the `genfromtxt` function.
+
+ See Also
+ --------
+ numpy.genfromtxt : generic function
+ """
+ kwargs = dict(dtype=dtype, comments=comments, delimiter=",",
+ skiprows=skiprows, converters=converters,
+ missing=missing, missing_values=missing_values,
+ usecols=usecols, unpack=unpack, names=names,
+ excludelist=excludelist, deletechars=deletechars,
+ case_sensitive=case_sensitive, usemask=usemask)
+ output = genfromtxt(fname, **kwargs)
+ if usemask:
+ from numpy.ma.mrecords import MaskedRecords
+ output = output.view(MaskedRecords)
+ else:
+ output = output.view(np.recarray)
+ return output
+
diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py
new file mode 100644
index 000000000..3c4bc9bb6
--- /dev/null
+++ b/numpy/lib/tests/test__iotools.py
@@ -0,0 +1,140 @@
+
+import StringIO
+
+import numpy as np
+from numpy.lib._iotools import LineSplitter, NameValidator, StringConverter
+from numpy.testing import *
+
+class TestLineSplitter(TestCase):
+ "Tests the LineSplitter class."
+ #
+ def test_no_delimiter(self):
+ "Test LineSplitter w/o delimiter"
+ strg = " 1 2 3 4 5 # test"
+ test = LineSplitter()(strg)
+ assert_equal(test, ['1', '2', '3', '4', '5'])
+ test = LineSplitter('')(strg)
+ assert_equal(test, ['1', '2', '3', '4', '5'])
+
+ def test_space_delimiter(self):
+ "Test space delimiter"
+ strg = " 1 2 3 4 5 # test"
+ test = LineSplitter(' ')(strg)
+ assert_equal(test, ['1', '2', '3', '4', '', '5'])
+ test = LineSplitter(' ')(strg)
+ assert_equal(test, ['1 2 3 4', '5'])
+
+ def test_tab_delimiter(self):
+ "Test tab delimiter"
+ strg= " 1\t 2\t 3\t 4\t 5 6"
+ test = LineSplitter('\t')(strg)
+ assert_equal(test, ['1', '2', '3', '4', '5 6'])
+ strg= " 1 2\t 3 4\t 5 6"
+ test = LineSplitter('\t')(strg)
+ assert_equal(test, ['1 2', '3 4', '5 6'])
+
+ def test_other_delimiter(self):
+ "Test LineSplitter on delimiter"
+ strg = "1,2,3,4,,5"
+ test = LineSplitter(',')(strg)
+ assert_equal(test, ['1', '2', '3', '4', '', '5'])
+ #
+ strg = " 1,2,3,4,,5 # test"
+ test = LineSplitter(',')(strg)
+ assert_equal(test, ['1', '2', '3', '4', '', '5'])
+
+ def test_constant_fixed_width(self):
+ "Test LineSplitter w/ fixed-width fields"
+ strg = " 1 2 3 4 5 # test"
+ test = LineSplitter(3)(strg)
+ assert_equal(test, ['1', '2', '3', '4', '', '5', ''])
+ #
+ strg = " 1 3 4 5 6# test"
+ test = LineSplitter(20)(strg)
+ assert_equal(test, ['1 3 4 5 6'])
+ #
+ strg = " 1 3 4 5 6# test"
+ test = LineSplitter(30)(strg)
+ assert_equal(test, ['1 3 4 5 6'])
+
+ def test_variable_fixed_width(self):
+ strg = " 1 3 4 5 6# test"
+ test = LineSplitter((3,6,6,3))(strg)
+ assert_equal(test, ['1', '3', '4 5', '6'])
+ #
+ strg = " 1 3 4 5 6# test"
+ test = LineSplitter((6,6,9))(strg)
+ assert_equal(test, ['1', '3 4', '5 6'])
+
+
+#-------------------------------------------------------------------------------
+
+class TestNameValidator(TestCase):
+ #
+ def test_case_sensitivity(self):
+ "Test case sensitivity"
+ names = ['A', 'a', 'b', 'c']
+ test = NameValidator().validate(names)
+ assert_equal(test, ['A', 'a', 'b', 'c'])
+ test = NameValidator(case_sensitive=False).validate(names)
+ assert_equal(test, ['A', 'A_1', 'B', 'C'])
+ test = NameValidator(case_sensitive='upper').validate(names)
+ assert_equal(test, ['A', 'A_1', 'B', 'C'])
+ test = NameValidator(case_sensitive='lower').validate(names)
+ assert_equal(test, ['a', 'a_1', 'b', 'c'])
+ #
+ def test_excludelist(self):
+ "Test excludelist"
+ names = ['dates', 'data', 'Other Data', 'mask']
+ validator = NameValidator(excludelist = ['dates', 'data', 'mask'])
+ test = validator.validate(names)
+ assert_equal(test, ['dates_', 'data_', 'Other_Data', 'mask_'])
+
+
+#-------------------------------------------------------------------------------
+
+class TestStringConverter(TestCase):
+ "Test StringConverter"
+ #
+ def test_creation(self):
+ "Test creation of a StringConverter"
+ converter = StringConverter(int, -99999)
+ assert_equal(converter._status, 1)
+ assert_equal(converter.default, -99999)
+ #
+ def test_upgrade(self):
+ "Tests the upgrade method."
+ converter = StringConverter()
+ assert_equal(converter._status, 0)
+ converter.upgrade('0')
+ assert_equal(converter._status, 1)
+ converter.upgrade('0.')
+ assert_equal(converter._status, 2)
+ converter.upgrade('0j')
+ assert_equal(converter._status, 3)
+ converter.upgrade('a')
+ assert_equal(converter._status, len(converter._mapper)-1)
+ #
+ def test_missing(self):
+ "Tests the use of missing values."
+ converter = StringConverter(missing_values=('missing','missed'))
+ converter.upgrade('0')
+ assert_equal(converter('0'), 0)
+ assert_equal(converter(''), converter.default)
+ assert_equal(converter('missing'), converter.default)
+ assert_equal(converter('missed'), converter.default)
+ try:
+ converter('miss')
+ except ValueError:
+ pass
+ #
+ def test_upgrademapper(self):
+ "Tests updatemapper"
+ import dateutil.parser
+ import datetime
+ dateparser = dateutil.parser.parse
+ StringConverter.upgrade_mapper(dateparser, datetime.date(2000,1,1))
+ convert = StringConverter(dateparser, datetime.date(2000, 1, 1))
+ test = convert('2001-01-01')
+ assert_equal(test, datetime.datetime(2001, 01, 01, 00, 00, 00))
+
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 6ccfa818c..58eb7f129 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -1,5 +1,8 @@
-from numpy.testing import *
+
import numpy as np
+import numpy.ma as ma
+from numpy.ma.testutils import *
+
import StringIO
from tempfile import NamedTemporaryFile
@@ -355,5 +358,358 @@ class Testfromregex(TestCase):
assert_array_equal(x, a)
+#####--------------------------------------------------------------------------
+
+
+class TestFromTxt(TestCase):
+ #
+ def test_record(self):
+ "Test w/ explicit dtype"
+ data = StringIO.StringIO('1 2\n3 4')
+# data.seek(0)
+ test = np.ndfromtxt(data, dtype=[('x', np.int32), ('y', np.int32)])
+ control = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
+ assert_equal(test, control)
+ #
+ data = StringIO.StringIO('M 64.0 75.0\nF 25.0 60.0')
+# data.seek(0)
+ descriptor = {'names': ('gender','age','weight'),
+ 'formats': ('S1', 'i4', 'f4')}
+ control = np.array([('M', 64.0, 75.0), ('F', 25.0, 60.0)],
+ dtype=descriptor)
+ test = np.ndfromtxt(data, dtype=descriptor)
+ assert_equal(test, control)
+
+ def test_array(self):
+ "Test outputing a standard ndarray"
+ data = StringIO.StringIO('1 2\n3 4')
+ control = np.array([[1,2],[3,4]], dtype=int)
+ test = np.ndfromtxt(data, dtype=int)
+ assert_array_equal(test, control)
+ #
+ data.seek(0)
+ control = np.array([[1,2],[3,4]], dtype=float)
+ test = np.loadtxt(data, dtype=float)
+ assert_array_equal(test, control)
+
+ def test_1D(self):
+ "Test squeezing to 1D"
+ control = np.array([1, 2, 3, 4], int)
+ #
+ data = StringIO.StringIO('1\n2\n3\n4\n')
+ test = np.ndfromtxt(data, dtype=int)
+ assert_array_equal(test, control)
+ #
+ data = StringIO.StringIO('1,2,3,4\n')
+ test = np.ndfromtxt(data, dtype=int, delimiter=',')
+ assert_array_equal(test, control)
+
+ def test_comments(self):
+ "Test the stripping of comments"
+ control = np.array([1, 2, 3, 5], int)
+ # Comment on its own line
+ data = StringIO.StringIO('# comment\n1,2,3,5\n')
+ test = np.ndfromtxt(data, dtype=int, delimiter=',', comments='#')
+ assert_equal(test, control)
+ # Comment at the end of a line
+ data = StringIO.StringIO('1,2,3,5# comment\n')
+ test = np.ndfromtxt(data, dtype=int, delimiter=',', comments='#')
+ assert_equal(test, control)
+
+ def test_skiprows(self):
+ "Test row skipping"
+ control = np.array([1, 2, 3, 5], int)
+ #
+ data = StringIO.StringIO('comment\n1,2,3,5\n')
+ test = np.ndfromtxt(data, dtype=int, delimiter=',', skiprows=1)
+ assert_equal(test, control)
+ #
+ data = StringIO.StringIO('# comment\n1,2,3,5\n')
+ test = np.loadtxt(data, dtype=int, delimiter=',', skiprows=1)
+ assert_equal(test, control)
+
+ def test_header(self):
+ "Test retrieving a header"
+ data = StringIO.StringIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0')
+ test = np.ndfromtxt(data, dtype=None, names=True)
+ control = {'gender': np.array(['M', 'F']),
+ 'age': np.array([64.0, 25.0]),
+ 'weight': np.array([75.0, 60.0])}
+ assert_equal(test['gender'], control['gender'])
+ assert_equal(test['age'], control['age'])
+ assert_equal(test['weight'], control['weight'])
+
+ def test_auto_dtype(self):
+ "Test the automatic definition of the output dtype"
+ data = StringIO.StringIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False')
+ test = np.ndfromtxt(data, dtype=None)
+ control = [np.array(['A', 'BCD']),
+ np.array([64, 25]),
+ np.array([75.0, 60.0]),
+ np.array([3+4j, 5+6j]),
+ np.array([True, False]),]
+ assert_equal(test.dtype.names, ['f0','f1','f2','f3','f4'])
+ for (i, ctrl) in enumerate(control):
+ assert_equal(test['f%i' % i], ctrl)
+
+
+ def test_auto_dtype_uniform(self):
+ "Tests whether the output dtype can be uniformized"
+ data = StringIO.StringIO('1 2 3 4\n5 6 7 8\n')
+ test = np.ndfromtxt(data, dtype=None)
+ control = np.array([[1,2,3,4],[5,6,7,8]])
+ assert_equal(test, control)
+
+
+ def test_fancy_dtype(self):
+ "Check that a nested dtype isn't MIA"
+ data = StringIO.StringIO('1,2,3.0\n4,5,6.0\n')
+ fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])])
+ test = np.ndfromtxt(data, dtype=fancydtype, delimiter=',')
+ control = np.array([(1,(2,3.0)),(4,(5,6.0))], dtype=fancydtype)
+ assert_equal(test, control)
+
+
+ def test_names_overwrite(self):
+ "Test overwriting the names of the dtype"
+ descriptor = {'names': ('g','a','w'),
+ 'formats': ('S1', 'i4', 'f4')}
+ data = StringIO.StringIO('M 64.0 75.0\nF 25.0 60.0')
+ names = ('gender','age','weight')
+ test = np.ndfromtxt(data, dtype=descriptor, names=names)
+ descriptor['names'] = names
+ control = np.array([('M', 64.0, 75.0),
+ ('F', 25.0, 60.0)], dtype=descriptor)
+ assert_equal(test, control)
+
+
+ def test_autonames_and_usecols(self):
+ "Tests names and usecols"
+ data = StringIO.StringIO('A B C D\n aaaa 121 45 9.1')
+ test = np.ndfromtxt(data, usecols=('A', 'C', 'D'),
+ names=True, dtype=None)
+ control = np.array(('aaaa', 45, 9.1),
+ dtype=[('A', '|S4'), ('C', int), ('D', float)])
+ assert_equal(test, control)
+
+
+ def test_converters_with_usecols(self):
+ "Test the combination user-defined converters and usecol"
+ data = StringIO.StringIO('1,2,3,,5\n6,7,8,9,10\n')
+ test = np.ndfromtxt(data, dtype=int, delimiter=',',
+ converters={3:lambda s: int(s or -999)},
+ usecols=(1, 3, ))
+ control = np.array([[2, -999], [7, 9]], int)
+ assert_equal(test, control)
+
+ def test_converters_with_usecols_and_names(self):
+ "Tests names and usecols"
+ data = StringIO.StringIO('A B C D\n aaaa 121 45 9.1')
+ test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), names=True,
+ dtype=None, converters={'C':lambda s: 2 * int(s)})
+ control = np.array(('aaaa', 90, 9.1),
+ dtype=[('A', '|S4'), ('C', int), ('D', float)])
+ assert_equal(test, control)
+
+
+ def test_unused_converter(self):
+ "Test whether unused converters are forgotten"
+ data = StringIO.StringIO("1 21\n 3 42\n")
+ test = np.ndfromtxt(data, usecols=(1,),
+ converters={0: lambda s: int(s, 16)})
+ assert_equal(test, [21, 42])
+ #
+ data.seek(0)
+ test = np.ndfromtxt(data, usecols=(1,),
+ converters={1: lambda s: int(s, 16)})
+ assert_equal(test, [33, 66])
+
+
+ def test_dtype_with_converters(self):
+ dstr = "2009; 23; 46"
+ test = np.ndfromtxt(StringIO.StringIO(dstr,),
+ delimiter=";", dtype=float, converters={0:str})
+ control = np.array([('2009', 23., 46)],
+ dtype=[('f0','|S4'), ('f1', float), ('f2', float)])
+ assert_equal(test, control)
+ test = np.ndfromtxt(StringIO.StringIO(dstr,),
+ delimiter=";", dtype=float, converters={0:float})
+ control = np.array([2009., 23., 46],)
+ assert_equal(test, control)
+
+
+ def test_spacedelimiter(self):
+ "Test space delimiter"
+ data = StringIO.StringIO("1 2 3 4 5\n6 7 8 9 10")
+ test = np.ndfromtxt(data)
+ control = np.array([[ 1., 2., 3., 4., 5.],
+ [ 6., 7., 8., 9.,10.]])
+ assert_equal(test, control)
+
+
+ def test_missing(self):
+ data = StringIO.StringIO('1,2,3,,5\n')
+ test = np.ndfromtxt(data, dtype=int, delimiter=',', \
+ converters={3:lambda s: int(s or -999)})
+ control = np.array([1, 2, 3, -999, 5], int)
+ assert_equal(test, control)
+
+
+ def test_usecols(self):
+ "Test the selection of columns"
+ # Select 1 column
+ control = np.array( [[1, 2], [3, 4]], float)
+ data = StringIO.StringIO()
+ np.savetxt(data, control)
+ data.seek(0)
+ test = np.ndfromtxt(data, dtype=float, usecols=(1,))
+ assert_equal(test, control[:, 1])
+ #
+ control = np.array( [[1, 2, 3], [3, 4, 5]], float)
+ data = StringIO.StringIO()
+ np.savetxt(data, control)
+ data.seek(0)
+ test = np.ndfromtxt(data, dtype=float, usecols=(1, 2))
+ assert_equal(test, control[:, 1:])
+ # Testing with arrays instead of tuples.
+ data.seek(0)
+ test = np.ndfromtxt(data, dtype=float, usecols=np.array([1, 2]))
+ assert_equal(test, control[:, 1:])
+ # Checking with dtypes defined converters.
+ data = StringIO.StringIO("""JOE 70.1 25.3\nBOB 60.5 27.9""")
+ names = ['stid', 'temp']
+ dtypes = ['S4', 'f8']
+ test = np.ndfromtxt(data, usecols=(0, 2), dtype=zip(names, dtypes))
+ assert_equal(test['stid'], ["JOE", "BOB"])
+ assert_equal(test['temp'], [25.3, 27.9])
+
+
+ def test_empty_file(self):
+ "Test that an empty file raises the proper exception"
+ data = StringIO.StringIO()
+ assert_raises(IOError, np.ndfromtxt, data)
+
+
+ def test_fancy_dtype_alt(self):
+ "Check that a nested dtype isn't MIA"
+ data = StringIO.StringIO('1,2,3.0\n4,5,6.0\n')
+ fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])])
+ test = np.mafromtxt(data, dtype=fancydtype, delimiter=',')
+ control = ma.array([(1,(2,3.0)),(4,(5,6.0))], dtype=fancydtype)
+ assert_equal(test, control)
+
+
+ def test_withmissing(self):
+ data = StringIO.StringIO('A,B\n0,1\n2,N/A')
+ test = np.mafromtxt(data, dtype=None, delimiter=',', missing='N/A',
+ names=True)
+ control = ma.array([(0, 1), (2, -1)],
+ mask=[(False, False), (False, True)],
+ dtype=[('A', np.int), ('B', np.int)])
+ assert_equal(test, control)
+ assert_equal(test.mask, control.mask)
+ #
+ data.seek(0)
+ test = np.mafromtxt(data, delimiter=',', missing='N/A', names=True)
+ control = ma.array([(0, 1), (2, -1)],
+ mask=[[False, False], [False, True]],)
+ assert_equal(test, control)
+ assert_equal(test.mask, control.mask)
+
+
+ def test_user_missing_values(self):
+ datastr ="A, B, C\n0, 0., 0j\n1, N/A, 1j\n-9, 2.2, N/A\n3, -99, 3j"
+ data = StringIO.StringIO(datastr)
+ basekwargs = dict(dtype=None, delimiter=',', names=True, missing='N/A')
+ mdtype = [('A', int), ('B', float), ('C', complex)]
+ #
+ test = np.mafromtxt(data, **basekwargs)
+ control = ma.array([( 0, 0.0, 0j), (1, -999, 1j),
+ ( -9, 2.2, -999j), (3, -99, 3j)],
+ mask=[(0, 0, 0), (0, 1, 0), (0, 0, 1), (0, 0, 0)],
+ dtype=mdtype)
+ assert_equal(test, control)
+ #
+ data.seek(0)
+ test = np.mafromtxt(data,
+ missing_values={0:-9, 1:-99, 2:-999j}, **basekwargs)
+ control = ma.array([( 0, 0.0, 0j), (1, -999, 1j),
+ ( -9, 2.2, -999j), (3, -99, 3j)],
+ mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)],
+ dtype=mdtype)
+ assert_equal(test, control)
+ #
+ data.seek(0)
+ test = np.mafromtxt(data,
+ missing_values={0:-9, 'B':-99, 'C':-999j},
+ **basekwargs)
+ control = ma.array([( 0, 0.0, 0j), (1, -999, 1j),
+ ( -9, 2.2, -999j), (3, -99, 3j)],
+ mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)],
+ dtype=mdtype)
+ assert_equal(test, control)
+
+
+ def test_withmissing_float(self):
+ data = StringIO.StringIO('A,B\n0,1.5\n2,-999.00')
+ test = np.mafromtxt(data, dtype=None, delimiter=',', missing='-999.0',
+ names=True,)
+ control = ma.array([(0, 1.5), (2, -1.)],
+ mask=[(False, False), (False, True)],
+ dtype=[('A', np.int), ('B', np.float)])
+ assert_equal(test, control)
+ assert_equal(test.mask, control.mask)
+
+
+ def test_recfromtxt(self):
+ #
+ data = StringIO.StringIO('A,B\n0,1\n2,3')
+ test = np.recfromtxt(data, delimiter=',', missing='N/A', names=True)
+ control = np.array([(0, 1), (2, 3)],
+ dtype=[('A', np.int), ('B', np.int)])
+ self.failUnless(isinstance(test, np.recarray))
+ assert_equal(test, control)
+ #
+ data = StringIO.StringIO('A,B\n0,1\n2,N/A')
+ test = np.recfromtxt(data, dtype=None, delimiter=',', missing='N/A',
+ names=True, usemask=True)
+ control = ma.array([(0, 1), (2, -1)],
+ mask=[(False, False), (False, True)],
+ dtype=[('A', np.int), ('B', np.int)])
+ assert_equal(test, control)
+ assert_equal(test.mask, control.mask)
+ assert_equal(test.A, [0, 2])
+
+
+ def test_recfromcsv(self):
+ #
+ data = StringIO.StringIO('A,B\n0,1\n2,3')
+ test = np.recfromcsv(data, missing='N/A',
+ names=True, case_sensitive=True)
+ control = np.array([(0, 1), (2, 3)],
+ dtype=[('A', np.int), ('B', np.int)])
+ self.failUnless(isinstance(test, np.recarray))
+ assert_equal(test, control)
+ #
+ data = StringIO.StringIO('A,B\n0,1\n2,N/A')
+ test = np.recfromcsv(data, dtype=None, missing='N/A',
+ names=True, case_sensitive=True, usemask=True)
+ control = ma.array([(0, 1), (2, -1)],
+ mask=[(False, False), (False, True)],
+ dtype=[('A', np.int), ('B', np.int)])
+ assert_equal(test, control)
+ assert_equal(test.mask, control.mask)
+ assert_equal(test.A, [0, 2])
+ #
+ data = StringIO.StringIO('A,B\n0,1\n2,3')
+ test = np.recfromcsv(data, missing='N/A',)
+ control = np.array([(0, 1), (2, 3)],
+ dtype=[('a', np.int), ('b', np.int)])
+ self.failUnless(isinstance(test, np.recarray))
+ assert_equal(test, control)
+
+
+
+
if __name__ == "__main__":
run_module_suite()