summaryrefslogtreecommitdiff
path: root/numpy/lib/_datasource.py
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2014-07-31 14:30:10 -0600
committerCharles Harris <charlesr.harris@gmail.com>2014-07-31 14:30:10 -0600
commitae7c942ced535fb39384aefeb8d32df92fb15988 (patch)
treed06ae19daed6c32522e3a06fb27afb4490302d0d /numpy/lib/_datasource.py
parent2ad538899928c249af456d93f250ebbd7535dcff (diff)
parent01b0d7e82211b581aaff925e3ccc36cff9ac1895 (diff)
downloadnumpy-ae7c942ced535fb39384aefeb8d32df92fb15988.tar.gz
Merge pull request #4929 from juliantaylor/charris-pep8-numpy-lib
Charris pep8 numpy lib
Diffstat (limited to 'numpy/lib/_datasource.py')
-rw-r--r--numpy/lib/_datasource.py138
1 files changed, 74 insertions, 64 deletions
diff --git a/numpy/lib/_datasource.py b/numpy/lib/_datasource.py
index 96d9af905..338c8b331 100644
--- a/numpy/lib/_datasource.py
+++ b/numpy/lib/_datasource.py
@@ -1,19 +1,21 @@
"""A file interface for handling local and remote data files.
-The goal of datasource is to abstract some of the file system operations when
-dealing with data files so the researcher doesn't have to know all the
+
+The goal of datasource is to abstract some of the file system operations
+when dealing with data files so the researcher doesn't have to know all the
low-level details. Through datasource, a researcher can obtain and use a
file with one function call, regardless of location of the file.
DataSource is meant to augment standard python libraries, not replace them.
-It should work seemlessly with standard file IO operations and the os module.
+It should work seemlessly with standard file IO operations and the os
+module.
DataSource files can originate locally or remotely:
- local files : '/home/guido/src/local/data.txt'
- URLs (http, ftp, ...) : 'http://www.scipy.org/not/real/data.txt'
-DataSource files can also be compressed or uncompressed. Currently only gzip
-and bz2 are supported.
+DataSource files can also be compressed or uncompressed. Currently only
+gzip and bz2 are supported.
Example::
@@ -33,14 +35,13 @@ Example::
"""
from __future__ import division, absolute_import, print_function
-__docformat__ = "restructuredtext en"
-
import os
import sys
-from shutil import rmtree, copyfile, copyfileobj
+import shutil
_open = open
+
# Using a class instead of a module-level dictionary
# to reduce the inital 'import numpy' overhead by
# deferring the import of bz2 and gzip until needed
@@ -51,9 +52,9 @@ class _FileOpeners(object):
Container for different methods to open (un-)compressed files.
`_FileOpeners` contains a dictionary that holds one method for each
- supported file format. Attribute lookup is implemented in such a way that
- an instance of `_FileOpeners` itself can be indexed with the keys of that
- dictionary. Currently uncompressed files as well as files
+ supported file format. Attribute lookup is implemented in such a way
+ that an instance of `_FileOpeners` itself can be indexed with the keys
+ of that dictionary. Currently uncompressed files as well as files
compressed with ``gzip`` or ``bz2`` compression are supported.
Notes
@@ -69,9 +70,11 @@ class _FileOpeners(object):
True
"""
+
def __init__(self):
self._loaded = False
self._file_openers = {None: open}
+
def _load(self):
if self._loaded:
return
@@ -105,6 +108,7 @@ class _FileOpeners(object):
"""
self._load()
return list(self._file_openers.keys())
+
def __getitem__(self, key):
self._load()
return self._file_openers[key]
@@ -115,8 +119,8 @@ def open(path, mode='r', destpath=os.curdir):
"""
Open `path` with `mode` and return the file object.
- If ``path`` is an URL, it will be downloaded, stored in the `DataSource`
- `destpath` directory and opened from there.
+ If ``path`` is an URL, it will be downloaded, stored in the
+ `DataSource` `destpath` directory and opened from there.
Parameters
----------
@@ -124,12 +128,12 @@ def open(path, mode='r', destpath=os.curdir):
Local file path or URL to open.
mode : str, optional
Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to
- append. Available modes depend on the type of object specified by path.
- Default is 'r'.
+ append. Available modes depend on the type of object specified by
+ path. Default is 'r'.
destpath : str, optional
- Path to the directory where the source file gets downloaded to for use.
- If `destpath` is None, a temporary directory will be created. The
- default path is the current directory.
+ Path to the directory where the source file gets downloaded to for
+ use. If `destpath` is None, a temporary directory will be created.
+ The default path is the current directory.
Returns
-------
@@ -154,15 +158,15 @@ class DataSource (object):
A generic data source file (file, http, ftp, ...).
DataSources can be local files or remote files/URLs. The files may
- also be compressed or uncompressed. DataSource hides some of the low-level
- details of downloading the file, allowing you to simply pass in a valid
- file path (or URL) and obtain a file object.
+ also be compressed or uncompressed. DataSource hides some of the
+ low-level details of downloading the file, allowing you to simply pass
+ in a valid file path (or URL) and obtain a file object.
Parameters
----------
destpath : str or None, optional
- Path to the directory where the source file gets downloaded to for use.
- If `destpath` is None, a temporary directory will be created.
+ Path to the directory where the source file gets downloaded to for
+ use. If `destpath` is None, a temporary directory will be created.
The default path is the current directory.
Notes
@@ -202,17 +206,18 @@ class DataSource (object):
self._destpath = os.path.abspath(destpath)
self._istmpdest = False
else:
- import tempfile # deferring import to improve startup time
+ import tempfile # deferring import to improve startup time
self._destpath = tempfile.mkdtemp()
self._istmpdest = True
def __del__(self):
# Remove temp directories
if self._istmpdest:
- rmtree(self._destpath)
+ shutil.rmtree(self._destpath)
def _iszip(self, filename):
"""Test if the filename is a zip file by looking at the file extension.
+
"""
fname, ext = os.path.splitext(filename)
return ext in _file_openers.keys()
@@ -294,7 +299,7 @@ class DataSource (object):
openedurl = urlopen(path)
f = _open(upath, 'wb')
try:
- copyfileobj(openedurl, f)
+ shutil.copyfileobj(openedurl, f)
finally:
f.close()
openedurl.close()
@@ -307,13 +312,12 @@ class DataSource (object):
def _findfile(self, path):
"""Searches for ``path`` and returns full path if found.
- If path is an URL, _findfile will cache a local copy and return
- the path to the cached file.
- If path is a local file, _findfile will return a path to that local
- file.
+ If path is an URL, _findfile will cache a local copy and return the
+ path to the cached file. If path is a local file, _findfile will
+ return a path to that local file.
- The search will include possible compressed versions of the file and
- return the first occurence found.
+ The search will include possible compressed versions of the file
+ and return the first occurence found.
"""
@@ -392,7 +396,7 @@ class DataSource (object):
# Note: os.path.join treats '/' as os.sep on Windows
path = path.lstrip(os.sep).lstrip('/')
path = path.lstrip(os.pardir).lstrip('..')
- drive, path = os.path.splitdrive(path) # for Windows
+ drive, path = os.path.splitdrive(path) # for Windows
return path
def exists(self, path):
@@ -404,7 +408,8 @@ class DataSource (object):
- a local file.
- a remote URL that has been downloaded and stored locally in the
`DataSource` directory.
- - a remote URL that has not been downloaded, but is valid and accessible.
+ - a remote URL that has not been downloaded, but is valid and
+ accessible.
Parameters
----------
@@ -418,10 +423,10 @@ class DataSource (object):
Notes
-----
- When `path` is an URL, `exists` will return True if it's either stored
- locally in the `DataSource` directory, or is a valid remote URL.
- `DataSource` does not discriminate between the two, the file is accessible
- if it exists in either location.
+ When `path` is an URL, `exists` will return True if it's either
+ stored locally in the `DataSource` directory, or is a valid remote
+ URL. `DataSource` does not discriminate between the two, the file
+ is accessible if it exists in either location.
"""
# We import this here because importing urllib2 is slow and
@@ -457,17 +462,17 @@ class DataSource (object):
"""
Open and return file-like object.
- If `path` is an URL, it will be downloaded, stored in the `DataSource`
- directory and opened from there.
+ If `path` is an URL, it will be downloaded, stored in the
+ `DataSource` directory and opened from there.
Parameters
----------
path : str
Local file path or URL to open.
mode : {'r', 'w', 'a'}, optional
- Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to
- append. Available modes depend on the type of object specified by
- `path`. Default is 'r'.
+ Mode to open `path`. Mode 'r' for reading, 'w' for writing,
+ 'a' to append. Available modes depend on the type of object
+ specified by `path`. Default is 'r'.
Returns
-------
@@ -500,12 +505,14 @@ class Repository (DataSource):
"""
Repository(baseurl, destpath='.')
- A data repository where multiple DataSource's share a base URL/directory.
+ A data repository where multiple DataSource's share a base
+ URL/directory.
- `Repository` extends `DataSource` by prepending a base URL (or directory)
- to all the files it handles. Use `Repository` when you will be working
- with multiple files from one base URL. Initialize `Repository` with the
- base URL, then refer to each file by its filename only.
+ `Repository` extends `DataSource` by prepending a base URL (or
+ directory) to all the files it handles. Use `Repository` when you will
+ be working with multiple files from one base URL. Initialize
+ `Repository` with the base URL, then refer to each file by its filename
+ only.
Parameters
----------
@@ -513,8 +520,8 @@ class Repository (DataSource):
Path to the local directory or remote location that contains the
data files.
destpath : str or None, optional
- Path to the directory where the source file gets downloaded to for use.
- If `destpath` is None, a temporary directory will be created.
+ Path to the directory where the source file gets downloaded to for
+ use. If `destpath` is None, a temporary directory will be created.
The default path is the current directory.
Examples
@@ -566,8 +573,9 @@ class Repository (DataSource):
Parameters
----------
path : str
- Can be a local file or a remote URL. This may, but does not have
- to, include the `baseurl` with which the `Repository` was initialized.
+ Can be a local file or a remote URL. This may, but does not
+ have to, include the `baseurl` with which the `Repository` was
+ initialized.
Returns
-------
@@ -592,8 +600,9 @@ class Repository (DataSource):
Parameters
----------
path : str
- Can be a local file or a remote URL. This may, but does not have
- to, include the `baseurl` with which the `Repository` was initialized.
+ Can be a local file or a remote URL. This may, but does not
+ have to, include the `baseurl` with which the `Repository` was
+ initialized.
Returns
-------
@@ -602,10 +611,10 @@ class Repository (DataSource):
Notes
-----
- When `path` is an URL, `exists` will return True if it's either stored
- locally in the `DataSource` directory, or is a valid remote URL.
- `DataSource` does not discriminate between the two, the file is accessible
- if it exists in either location.
+ When `path` is an URL, `exists` will return True if it's either
+ stored locally in the `DataSource` directory, or is a valid remote
+ URL. `DataSource` does not discriminate between the two, the file
+ is accessible if it exists in either location.
"""
return DataSource.exists(self, self._fullpath(path))
@@ -614,18 +623,19 @@ class Repository (DataSource):
"""
Open and return file-like object prepending Repository base URL.
- If `path` is an URL, it will be downloaded, stored in the DataSource
- directory and opened from there.
+ If `path` is an URL, it will be downloaded, stored in the
+ DataSource directory and opened from there.
Parameters
----------
path : str
Local file path or URL to open. This may, but does not have to,
- include the `baseurl` with which the `Repository` was initialized.
+ include the `baseurl` with which the `Repository` was
+ initialized.
mode : {'r', 'w', 'a'}, optional
- Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to
- append. Available modes depend on the type of object specified by
- `path`. Default is 'r'.
+ Mode to open `path`. Mode 'r' for reading, 'w' for writing,
+ 'a' to append. Available modes depend on the type of object
+ specified by `path`. Default is 'r'.
Returns
-------