diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2014-07-31 14:30:10 -0600 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2014-07-31 14:30:10 -0600 |
commit | ae7c942ced535fb39384aefeb8d32df92fb15988 (patch) | |
tree | d06ae19daed6c32522e3a06fb27afb4490302d0d /numpy/lib/_datasource.py | |
parent | 2ad538899928c249af456d93f250ebbd7535dcff (diff) | |
parent | 01b0d7e82211b581aaff925e3ccc36cff9ac1895 (diff) | |
download | numpy-ae7c942ced535fb39384aefeb8d32df92fb15988.tar.gz |
Merge pull request #4929 from juliantaylor/charris-pep8-numpy-lib
Charris pep8 numpy lib
Diffstat (limited to 'numpy/lib/_datasource.py')
-rw-r--r-- | numpy/lib/_datasource.py | 138 |
1 files changed, 74 insertions, 64 deletions
diff --git a/numpy/lib/_datasource.py b/numpy/lib/_datasource.py index 96d9af905..338c8b331 100644 --- a/numpy/lib/_datasource.py +++ b/numpy/lib/_datasource.py @@ -1,19 +1,21 @@ """A file interface for handling local and remote data files. -The goal of datasource is to abstract some of the file system operations when -dealing with data files so the researcher doesn't have to know all the + +The goal of datasource is to abstract some of the file system operations +when dealing with data files so the researcher doesn't have to know all the low-level details. Through datasource, a researcher can obtain and use a file with one function call, regardless of location of the file. DataSource is meant to augment standard python libraries, not replace them. -It should work seemlessly with standard file IO operations and the os module. +It should work seemlessly with standard file IO operations and the os +module. DataSource files can originate locally or remotely: - local files : '/home/guido/src/local/data.txt' - URLs (http, ftp, ...) : 'http://www.scipy.org/not/real/data.txt' -DataSource files can also be compressed or uncompressed. Currently only gzip -and bz2 are supported. +DataSource files can also be compressed or uncompressed. Currently only +gzip and bz2 are supported. Example:: @@ -33,14 +35,13 @@ Example:: """ from __future__ import division, absolute_import, print_function -__docformat__ = "restructuredtext en" - import os import sys -from shutil import rmtree, copyfile, copyfileobj +import shutil _open = open + # Using a class instead of a module-level dictionary # to reduce the inital 'import numpy' overhead by # deferring the import of bz2 and gzip until needed @@ -51,9 +52,9 @@ class _FileOpeners(object): Container for different methods to open (un-)compressed files. `_FileOpeners` contains a dictionary that holds one method for each - supported file format. Attribute lookup is implemented in such a way that - an instance of `_FileOpeners` itself can be indexed with the keys of that - dictionary. Currently uncompressed files as well as files + supported file format. Attribute lookup is implemented in such a way + that an instance of `_FileOpeners` itself can be indexed with the keys + of that dictionary. Currently uncompressed files as well as files compressed with ``gzip`` or ``bz2`` compression are supported. Notes @@ -69,9 +70,11 @@ class _FileOpeners(object): True """ + def __init__(self): self._loaded = False self._file_openers = {None: open} + def _load(self): if self._loaded: return @@ -105,6 +108,7 @@ class _FileOpeners(object): """ self._load() return list(self._file_openers.keys()) + def __getitem__(self, key): self._load() return self._file_openers[key] @@ -115,8 +119,8 @@ def open(path, mode='r', destpath=os.curdir): """ Open `path` with `mode` and return the file object. - If ``path`` is an URL, it will be downloaded, stored in the `DataSource` - `destpath` directory and opened from there. + If ``path`` is an URL, it will be downloaded, stored in the + `DataSource` `destpath` directory and opened from there. Parameters ---------- @@ -124,12 +128,12 @@ def open(path, mode='r', destpath=os.curdir): Local file path or URL to open. mode : str, optional Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to - append. Available modes depend on the type of object specified by path. - Default is 'r'. + append. Available modes depend on the type of object specified by + path. Default is 'r'. destpath : str, optional - Path to the directory where the source file gets downloaded to for use. - If `destpath` is None, a temporary directory will be created. The - default path is the current directory. + Path to the directory where the source file gets downloaded to for + use. If `destpath` is None, a temporary directory will be created. + The default path is the current directory. Returns ------- @@ -154,15 +158,15 @@ class DataSource (object): A generic data source file (file, http, ftp, ...). DataSources can be local files or remote files/URLs. The files may - also be compressed or uncompressed. DataSource hides some of the low-level - details of downloading the file, allowing you to simply pass in a valid - file path (or URL) and obtain a file object. + also be compressed or uncompressed. DataSource hides some of the + low-level details of downloading the file, allowing you to simply pass + in a valid file path (or URL) and obtain a file object. Parameters ---------- destpath : str or None, optional - Path to the directory where the source file gets downloaded to for use. - If `destpath` is None, a temporary directory will be created. + Path to the directory where the source file gets downloaded to for + use. If `destpath` is None, a temporary directory will be created. The default path is the current directory. Notes @@ -202,17 +206,18 @@ class DataSource (object): self._destpath = os.path.abspath(destpath) self._istmpdest = False else: - import tempfile # deferring import to improve startup time + import tempfile # deferring import to improve startup time self._destpath = tempfile.mkdtemp() self._istmpdest = True def __del__(self): # Remove temp directories if self._istmpdest: - rmtree(self._destpath) + shutil.rmtree(self._destpath) def _iszip(self, filename): """Test if the filename is a zip file by looking at the file extension. + """ fname, ext = os.path.splitext(filename) return ext in _file_openers.keys() @@ -294,7 +299,7 @@ class DataSource (object): openedurl = urlopen(path) f = _open(upath, 'wb') try: - copyfileobj(openedurl, f) + shutil.copyfileobj(openedurl, f) finally: f.close() openedurl.close() @@ -307,13 +312,12 @@ class DataSource (object): def _findfile(self, path): """Searches for ``path`` and returns full path if found. - If path is an URL, _findfile will cache a local copy and return - the path to the cached file. - If path is a local file, _findfile will return a path to that local - file. + If path is an URL, _findfile will cache a local copy and return the + path to the cached file. If path is a local file, _findfile will + return a path to that local file. - The search will include possible compressed versions of the file and - return the first occurence found. + The search will include possible compressed versions of the file + and return the first occurence found. """ @@ -392,7 +396,7 @@ class DataSource (object): # Note: os.path.join treats '/' as os.sep on Windows path = path.lstrip(os.sep).lstrip('/') path = path.lstrip(os.pardir).lstrip('..') - drive, path = os.path.splitdrive(path) # for Windows + drive, path = os.path.splitdrive(path) # for Windows return path def exists(self, path): @@ -404,7 +408,8 @@ class DataSource (object): - a local file. - a remote URL that has been downloaded and stored locally in the `DataSource` directory. - - a remote URL that has not been downloaded, but is valid and accessible. + - a remote URL that has not been downloaded, but is valid and + accessible. Parameters ---------- @@ -418,10 +423,10 @@ class DataSource (object): Notes ----- - When `path` is an URL, `exists` will return True if it's either stored - locally in the `DataSource` directory, or is a valid remote URL. - `DataSource` does not discriminate between the two, the file is accessible - if it exists in either location. + When `path` is an URL, `exists` will return True if it's either + stored locally in the `DataSource` directory, or is a valid remote + URL. `DataSource` does not discriminate between the two, the file + is accessible if it exists in either location. """ # We import this here because importing urllib2 is slow and @@ -457,17 +462,17 @@ class DataSource (object): """ Open and return file-like object. - If `path` is an URL, it will be downloaded, stored in the `DataSource` - directory and opened from there. + If `path` is an URL, it will be downloaded, stored in the + `DataSource` directory and opened from there. Parameters ---------- path : str Local file path or URL to open. mode : {'r', 'w', 'a'}, optional - Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to - append. Available modes depend on the type of object specified by - `path`. Default is 'r'. + Mode to open `path`. Mode 'r' for reading, 'w' for writing, + 'a' to append. Available modes depend on the type of object + specified by `path`. Default is 'r'. Returns ------- @@ -500,12 +505,14 @@ class Repository (DataSource): """ Repository(baseurl, destpath='.') - A data repository where multiple DataSource's share a base URL/directory. + A data repository where multiple DataSource's share a base + URL/directory. - `Repository` extends `DataSource` by prepending a base URL (or directory) - to all the files it handles. Use `Repository` when you will be working - with multiple files from one base URL. Initialize `Repository` with the - base URL, then refer to each file by its filename only. + `Repository` extends `DataSource` by prepending a base URL (or + directory) to all the files it handles. Use `Repository` when you will + be working with multiple files from one base URL. Initialize + `Repository` with the base URL, then refer to each file by its filename + only. Parameters ---------- @@ -513,8 +520,8 @@ class Repository (DataSource): Path to the local directory or remote location that contains the data files. destpath : str or None, optional - Path to the directory where the source file gets downloaded to for use. - If `destpath` is None, a temporary directory will be created. + Path to the directory where the source file gets downloaded to for + use. If `destpath` is None, a temporary directory will be created. The default path is the current directory. Examples @@ -566,8 +573,9 @@ class Repository (DataSource): Parameters ---------- path : str - Can be a local file or a remote URL. This may, but does not have - to, include the `baseurl` with which the `Repository` was initialized. + Can be a local file or a remote URL. This may, but does not + have to, include the `baseurl` with which the `Repository` was + initialized. Returns ------- @@ -592,8 +600,9 @@ class Repository (DataSource): Parameters ---------- path : str - Can be a local file or a remote URL. This may, but does not have - to, include the `baseurl` with which the `Repository` was initialized. + Can be a local file or a remote URL. This may, but does not + have to, include the `baseurl` with which the `Repository` was + initialized. Returns ------- @@ -602,10 +611,10 @@ class Repository (DataSource): Notes ----- - When `path` is an URL, `exists` will return True if it's either stored - locally in the `DataSource` directory, or is a valid remote URL. - `DataSource` does not discriminate between the two, the file is accessible - if it exists in either location. + When `path` is an URL, `exists` will return True if it's either + stored locally in the `DataSource` directory, or is a valid remote + URL. `DataSource` does not discriminate between the two, the file + is accessible if it exists in either location. """ return DataSource.exists(self, self._fullpath(path)) @@ -614,18 +623,19 @@ class Repository (DataSource): """ Open and return file-like object prepending Repository base URL. - If `path` is an URL, it will be downloaded, stored in the DataSource - directory and opened from there. + If `path` is an URL, it will be downloaded, stored in the + DataSource directory and opened from there. Parameters ---------- path : str Local file path or URL to open. This may, but does not have to, - include the `baseurl` with which the `Repository` was initialized. + include the `baseurl` with which the `Repository` was + initialized. mode : {'r', 'w', 'a'}, optional - Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to - append. Available modes depend on the type of object specified by - `path`. Default is 'r'. + Mode to open `path`. Mode 'r' for reading, 'w' for writing, + 'a' to append. Available modes depend on the type of object + specified by `path`. Default is 'r'. Returns ------- |