1 files changed, 74 insertions, 14 deletions
diff --git a/numpy/lib/_datasource.py b/numpy/lib/_datasource.py
index 3affc5195..ad939df3f 100644
--- a/numpy/lib/_datasource.py
+++ b/numpy/lib/_datasource.py
@@ -15,7 +15,7 @@ DataSource files can originate locally or remotely:
 - URLs (http, ftp, ...) : 'http://www.scipy.org/not/real/data.txt'
 
 DataSource files can also be compressed or uncompressed.  Currently only
-gzip and bz2 are supported.
+gzip, bz2 and xz are supported.
 
 Example::
 
@@ -38,13 +38,59 @@ from __future__ import division, absolute_import, print_function
 import os
 import sys
 import shutil
+import io
 
 _open = open
 
+def _check_mode(mode, encoding, newline):
+    if "t" in mode:
+        if "b" in mode:
+            raise ValueError("Invalid mode: %r" % (mode,))
+    else:
+        if encoding is not None:
+            raise ValueError("Argument 'encoding' not supported in binary mode")
+        if newline is not None:
+            raise ValueError("Argument 'newline' not supported in binary mode")
+
+def _python2_bz2open(fn, mode, encoding, newline):
+    """ wrapper to open bz2 in text mode """
+    import bz2
+
+    _check_mode(mode, encoding, newline)
+
+    if "t" in mode:
+        # BZ2File is missing necessary functions for TextIOWrapper
+        raise ValueError("bz2 text files not supported in python2")
+    else:
+        return bz2.BZ2File(fn, mode)
+
+def _python2_gzipopen(fn, mode, encoding, newline):
+    """ wrapper to open gzip in text mode """
+    import gzip
+    # gzip is lacking read1 needed for TextIOWrapper
+    class GzipWrap(gzip.GzipFile):
+        def read1(self, n):
+            return self.read(n)
+
+    _check_mode(mode, encoding, newline)
+
+    gz_mode = mode.replace("t", "")
+    if isinstance(fn, (str, bytes)):
+        binary_file = GzipWrap(fn, gz_mode)
+    elif hasattr(fn, "read") or hasattr(fn, "write"):
+        binary_file = GzipWrap(None, gz_mode, fileobj=fn)
+    else:
+        raise TypeError("filename must be a str or bytes object, or a file")
+
+    if "t" in mode:
+        return io.TextIOWrapper(binary_file, encoding, newline=newline)
+    else:
+        return binary_file
+
 
 # Using a class instead of a module-level dictionary
 # to reduce the initial 'import numpy' overhead by
-# deferring the import of bz2 and gzip until needed
+# deferring the import of lzma, bz2 and gzip until needed
 
 # TODO: .zip support, .tar support?
 class _FileOpeners(object):
@@ -55,7 +101,7 @@ class _FileOpeners(object):
     supported file format. Attribute lookup is implemented in such a way
     that an instance of `_FileOpeners` itself can be indexed with the keys
     of that dictionary. Currently uncompressed files as well as files
-    compressed with ``gzip`` or ``bz2`` compression are supported.
+    compressed with ``gzip``, ``bz2`` or ``xz`` compression are supported.
 
     Notes
     -----
@@ -65,7 +111,7 @@ class _FileOpeners(object):
     Examples
     --------
     >>> np.lib._datasource._file_openers.keys()
-    [None, '.bz2', '.gz']
+    [None, '.bz2', '.gz', '.xz', '.lzma']
     >>> np.lib._datasource._file_openers['.gz'] is gzip.open
     True
 
@@ -73,19 +119,31 @@ class _FileOpeners(object):
 
     def __init__(self):
         self._loaded = False
-        self._file_openers = {None: open}
+        self._file_openers = {None: io.open}
 
     def _load(self):
         if self._loaded:
             return
         try:
             import bz2
-            self._file_openers[".bz2"] = bz2.BZ2File
+            if sys.version_info[0] >= 3:
+                self._file_openers[".bz2"] = bz2.open
+            else:
+                self._file_openers[".bz2"] = _python2_bz2open
         except ImportError:
             pass
         try:
             import gzip
-            self._file_openers[".gz"] = gzip.open
+            if sys.version_info[0] >= 3:
+                self._file_openers[".gz"] = gzip.open
+            else:
+                self._file_openers[".gz"] = _python2_gzipopen
+        except ImportError:
+            pass
+        try:
+            import lzma
+            self._file_openers[".xz"] = lzma.open
+            self._file_openers[".lzma"] = lzma.open
         except ImportError:
             pass
         self._loaded = True
@@ -102,7 +160,7 @@ class _FileOpeners(object):
         -------
         keys : list
             The keys are None for uncompressed files and the file extension
-            strings (i.e. ``'.gz'``, ``'.bz2'``) for supported compression
+            strings (i.e. ``'.gz'``, ``'.xz'``) for supported compression
             methods.
 
         """
@@ -115,7 +173,7 @@ class _FileOpeners(object):
 
 _file_openers = _FileOpeners()
 
-def open(path, mode='r', destpath=os.curdir):
+def open(path, mode='r', destpath=os.curdir, encoding=None, newline=None):
     """
     Open `path` with `mode` and return the file object.
 
@@ -148,7 +206,7 @@ def open(path, mode='r', destpath=os.curdir):
     """
 
     ds = DataSource(destpath)
-    return ds.open(path, mode)
+    return ds.open(path, mode, encoding=encoding, newline=newline)
 
 
 class DataSource (object):
@@ -458,7 +516,7 @@ class DataSource (object):
                 return False
         return False
 
-    def open(self, path, mode='r'):
+    def open(self, path, mode='r', encoding=None, newline=None):
         """
         Open and return file-like object.
 
@@ -496,7 +554,8 @@ class DataSource (object):
             _fname, ext = self._splitzipext(found)
             if ext == 'bz2':
                 mode.replace("+", "")
-            return _file_openers[ext](found, mode=mode)
+            return _file_openers[ext](found, mode=mode,
+                                      encoding=encoding, newline=newline)
         else:
             raise IOError("%s not found." % path)
 
@@ -619,7 +678,7 @@ class Repository (DataSource):
         """
         return DataSource.exists(self, self._fullpath(path))
 
-    def open(self, path, mode='r'):
+    def open(self, path, mode='r', encoding=None, newline=None):
         """
         Open and return file-like object prepending Repository base URL.
 
@@ -643,7 +702,8 @@ class Repository (DataSource):
             File object.
 
         """
-        return DataSource.open(self, self._fullpath(path), mode)
+        return DataSource.open(self, self._fullpath(path), mode,
+                               encoding=encoding, newline=newline)
 
     def listdir(self):
         """