summaryrefslogtreecommitdiff
path: root/numpy/lib/_datasource.py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib/_datasource.py')
-rw-r--r--numpy/lib/_datasource.py88
1 files changed, 74 insertions, 14 deletions
diff --git a/numpy/lib/_datasource.py b/numpy/lib/_datasource.py
index 3affc5195..ad939df3f 100644
--- a/numpy/lib/_datasource.py
+++ b/numpy/lib/_datasource.py
@@ -15,7 +15,7 @@ DataSource files can originate locally or remotely:
- URLs (http, ftp, ...) : 'http://www.scipy.org/not/real/data.txt'
DataSource files can also be compressed or uncompressed. Currently only
-gzip and bz2 are supported.
+gzip, bz2 and xz are supported.
Example::
@@ -38,13 +38,59 @@ from __future__ import division, absolute_import, print_function
import os
import sys
import shutil
+import io
_open = open
+def _check_mode(mode, encoding, newline):
+ if "t" in mode:
+ if "b" in mode:
+ raise ValueError("Invalid mode: %r" % (mode,))
+ else:
+ if encoding is not None:
+ raise ValueError("Argument 'encoding' not supported in binary mode")
+ if newline is not None:
+ raise ValueError("Argument 'newline' not supported in binary mode")
+
+def _python2_bz2open(fn, mode, encoding, newline):
+ """ wrapper to open bz2 in text mode """
+ import bz2
+
+ _check_mode(mode, encoding, newline)
+
+ if "t" in mode:
+ # BZ2File is missing necessary functions for TextIOWrapper
+ raise ValueError("bz2 text files not supported in python2")
+ else:
+ return bz2.BZ2File(fn, mode)
+
+def _python2_gzipopen(fn, mode, encoding, newline):
+ """ wrapper to open gzip in text mode """
+ import gzip
+ # gzip is lacking read1 needed for TextIOWrapper
+ class GzipWrap(gzip.GzipFile):
+ def read1(self, n):
+ return self.read(n)
+
+ _check_mode(mode, encoding, newline)
+
+ gz_mode = mode.replace("t", "")
+ if isinstance(fn, (str, bytes)):
+ binary_file = GzipWrap(fn, gz_mode)
+ elif hasattr(fn, "read") or hasattr(fn, "write"):
+ binary_file = GzipWrap(None, gz_mode, fileobj=fn)
+ else:
+ raise TypeError("filename must be a str or bytes object, or a file")
+
+ if "t" in mode:
+ return io.TextIOWrapper(binary_file, encoding, newline=newline)
+ else:
+ return binary_file
+
# Using a class instead of a module-level dictionary
# to reduce the initial 'import numpy' overhead by
-# deferring the import of bz2 and gzip until needed
+# deferring the import of lzma, bz2 and gzip until needed
# TODO: .zip support, .tar support?
class _FileOpeners(object):
@@ -55,7 +101,7 @@ class _FileOpeners(object):
supported file format. Attribute lookup is implemented in such a way
that an instance of `_FileOpeners` itself can be indexed with the keys
of that dictionary. Currently uncompressed files as well as files
- compressed with ``gzip`` or ``bz2`` compression are supported.
+ compressed with ``gzip``, ``bz2`` or ``xz`` compression are supported.
Notes
-----
@@ -65,7 +111,7 @@ class _FileOpeners(object):
Examples
--------
>>> np.lib._datasource._file_openers.keys()
- [None, '.bz2', '.gz']
+ [None, '.bz2', '.gz', '.xz', '.lzma']
>>> np.lib._datasource._file_openers['.gz'] is gzip.open
True
@@ -73,19 +119,31 @@ class _FileOpeners(object):
def __init__(self):
self._loaded = False
- self._file_openers = {None: open}
+ self._file_openers = {None: io.open}
def _load(self):
if self._loaded:
return
try:
import bz2
- self._file_openers[".bz2"] = bz2.BZ2File
+ if sys.version_info[0] >= 3:
+ self._file_openers[".bz2"] = bz2.open
+ else:
+ self._file_openers[".bz2"] = _python2_bz2open
except ImportError:
pass
try:
import gzip
- self._file_openers[".gz"] = gzip.open
+ if sys.version_info[0] >= 3:
+ self._file_openers[".gz"] = gzip.open
+ else:
+ self._file_openers[".gz"] = _python2_gzipopen
+ except ImportError:
+ pass
+ try:
+ import lzma
+ self._file_openers[".xz"] = lzma.open
+ self._file_openers[".lzma"] = lzma.open
except ImportError:
pass
self._loaded = True
@@ -102,7 +160,7 @@ class _FileOpeners(object):
-------
keys : list
The keys are None for uncompressed files and the file extension
- strings (i.e. ``'.gz'``, ``'.bz2'``) for supported compression
+ strings (i.e. ``'.gz'``, ``'.xz'``) for supported compression
methods.
"""
@@ -115,7 +173,7 @@ class _FileOpeners(object):
_file_openers = _FileOpeners()
-def open(path, mode='r', destpath=os.curdir):
+def open(path, mode='r', destpath=os.curdir, encoding=None, newline=None):
"""
Open `path` with `mode` and return the file object.
@@ -148,7 +206,7 @@ def open(path, mode='r', destpath=os.curdir):
"""
ds = DataSource(destpath)
- return ds.open(path, mode)
+ return ds.open(path, mode, encoding=encoding, newline=newline)
class DataSource (object):
@@ -458,7 +516,7 @@ class DataSource (object):
return False
return False
- def open(self, path, mode='r'):
+ def open(self, path, mode='r', encoding=None, newline=None):
"""
Open and return file-like object.
@@ -496,7 +554,8 @@ class DataSource (object):
_fname, ext = self._splitzipext(found)
if ext == 'bz2':
mode.replace("+", "")
- return _file_openers[ext](found, mode=mode)
+ return _file_openers[ext](found, mode=mode,
+ encoding=encoding, newline=newline)
else:
raise IOError("%s not found." % path)
@@ -619,7 +678,7 @@ class Repository (DataSource):
"""
return DataSource.exists(self, self._fullpath(path))
- def open(self, path, mode='r'):
+ def open(self, path, mode='r', encoding=None, newline=None):
"""
Open and return file-like object prepending Repository base URL.
@@ -643,7 +702,8 @@ class Repository (DataSource):
File object.
"""
- return DataSource.open(self, self._fullpath(path), mode)
+ return DataSource.open(self, self._fullpath(path), mode,
+ encoding=encoding, newline=newline)
def listdir(self):
"""