diff options
author | Jordan Cook <jordan.cook@pioneer.com> | 2021-04-02 22:55:52 -0500 |
---|---|---|
committer | Jordan Cook <jordan.cook@pioneer.com> | 2021-04-21 10:51:14 -0500 |
commit | f893f283957000be0aae7cf802ed341541379542 (patch) | |
tree | d7d1b1d33a3319811e6b899e0e014fa77108699f | |
parent | cdf07cc8aa68b410020182b4127aa8394ce7b7d4 (diff) | |
download | requests-cache-f893f283957000be0aae7cf802ed341541379542.tar.gz |
Add a filesystem backend
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | docs/user_guide.rst | 2 | ||||
-rw-r--r-- | pyproject.toml | 1 | ||||
-rw-r--r-- | requests_cache/backends/__init__.py | 5 | ||||
-rw-r--r-- | requests_cache/backends/filesystem.py | 87 | ||||
-rw-r--r-- | requests_cache/backends/sqlite.py | 2 | ||||
-rw-r--r-- | tests/integration/test_backends.py | 4 | ||||
-rw-r--r-- | tests/integration/test_filesystem.py | 32 | ||||
-rw-r--r-- | tests/integration/test_thread_safety.py | 2 |
10 files changed, 133 insertions, 5 deletions
@@ -4,6 +4,7 @@ *.egg-info build/ dist/ +http_cache/ venv/ # Editors @@ -24,7 +24,7 @@ See full project documentation at: https://requests-cache.readthedocs.io [expiration](https://requests-cache.readthedocs.io/en/latest/user_guide.html#cache-expiration) and other [behavior](https://requests-cache.readthedocs.io/en/latest/user_guide.html#cache-options) * **Persistence:** Includes several [storage backends](https://requests-cache.readthedocs.io/en/latest/user_guide.html#cache-backends): - SQLite, Redis, MongoDB, and DynamoDB. + SQLite, Redis, MongoDB, GridFS, DynamoDB, and filesystem. * **Compatibility:** Can be used alongside [other popular libraries based on requests](https://requests-cache.readthedocs.io/en/latest/advanced_usage.html#library-compatibility) diff --git a/docs/user_guide.rst b/docs/user_guide.rst index e068c4c..1c6619f 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -116,6 +116,7 @@ the ``backend`` parameter for either :py:class:`.CachedSession` or :py:func:`.in * ``'mongodb'``: `MongoDB <https://www.mongodb.com>`_ database (requires ``pymongo``) * ``'gridfs'``: `GridFS <https://docs.mongodb.com/manual/core/gridfs/>`_ collections on a MongoDB database (requires ``pymongo``) * ``'dynamodb'``: `Amazon DynamoDB <https://aws.amazon.com/dynamodb>`_ database (requires ``boto3``) +* ``'filesystem'``: Stores responses as files on the local filesystem * ``'memory'`` : A non-persistent cache that just stores responses in memory A backend can be specified either by name, class or instance: @@ -143,6 +144,7 @@ The ``cache_name`` parameter will be used as follows depending on the backend: * ``dynamodb``: Table name * ``mongodb`` and ``gridfs``: Database name * ``redis``: Namespace, meaning all keys will be prefixed with ``'<cache_name>:'`` +* ``filesystem``: Cache directory Cache Options ------------- diff --git a/pyproject.toml b/pyproject.toml index c2c5a0e..46d6ec3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ branch = true source = ['requests_cache'] omit = [ 'requests_cache/__init__.py', + 'requests_cache/backends/__init__.py', ] [tool.isort] diff --git a/requests_cache/backends/__init__.py b/requests_cache/backends/__init__.py index f64f051..2b165a2 100644 --- a/requests_cache/backends/__init__.py +++ b/requests_cache/backends/__init__.py @@ -72,10 +72,15 @@ try: from .sqlite import DbCache, DbDict, DbPickleDict except ImportError as e: DbCache = DbDict = DbPickleDict = get_placeholder_backend(e) # type: ignore +try: + from .filesystem import FileCache, FileDict +except ImportError as e: + FileCache = FileDict = get_placeholder_backend(e) # type: ignore BACKEND_CLASSES = { 'dynamodb': DynamoDbCache, + 'filesystem': FileCache, 'gridfs': GridFSCache, 'memory': BaseCache, 'mongo': MongoCache, diff --git a/requests_cache/backends/filesystem.py b/requests_cache/backends/filesystem.py new file mode 100644 index 0000000..c85751f --- /dev/null +++ b/requests_cache/backends/filesystem.py @@ -0,0 +1,87 @@ +# TODO: Add option for compression? +from contextlib import contextmanager +from os import listdir, makedirs, unlink +from os.path import abspath, expanduser, isabs, join +from pathlib import Path +from pickle import PickleError +from shutil import rmtree +from tempfile import gettempdir +from typing import Union + +from . import BaseCache, BaseStorage +from .sqlite import DbDict + + +class FileCache(BaseCache): + """Backend that stores cached responses as files on the local filesystem. Response paths will be + in the format ``<cache_name>/<cache_key>``. Redirects are stored in a SQLite database. + + Args: + cache_name: Base directory for cache files + use_temp: Store cache files in a temp directory (e.g., ``/tmp/http_cache/``). + Note: if ``cache_name`` is an absolute path, this option will be ignored. + """ + + def __init__(self, cache_name: Union[Path, str] = 'http_cache', use_temp: bool = False, **kwargs): + super().__init__(**kwargs) + cache_dir = _get_cache_dir(cache_name, use_temp) + self.responses = FileDict(cache_dir, **kwargs) + self.redirects = DbDict(join(cache_dir, 'redirects.sqlite'), 'redirects', **kwargs) + + +class FileDict(BaseStorage): + """A dictionary-like interface to files on the local filesystem""" + + def __init__(self, cache_dir, **kwargs): + kwargs.setdefault('suppress_warnings', True) + super().__init__(**kwargs) + self.cache_dir = cache_dir + makedirs(self.cache_dir, exist_ok=True) + + @contextmanager + def _try_io(self, ignore_errors: bool = False): + """Attempt an I/O operation, and either ignore errors or re-raise them as KeyErrors""" + try: + yield + except (IOError, OSError, PickleError) as e: + if not ignore_errors: + raise KeyError(e) + + def __getitem__(self, key): + with self._try_io(): + with open(join(self.cache_dir, str(key)), 'rb') as f: + return self.deserialize(f.read()) + + def __delitem__(self, key): + with self._try_io(): + unlink(join(self.cache_dir, str(key))) + + def __setitem__(self, key, value): + with self._try_io(): + with open(join(self.cache_dir, str(key)), 'wb') as f: + f.write(self.serialize(value)) + + def __iter__(self): + for filename in listdir(self.cache_dir): + yield filename + + def __len__(self): + return len(listdir(self.cache_dir)) + + def clear(self): + with self._try_io(ignore_errors=True): + rmtree(self.cache_dir) + makedirs(self.cache_dir) + + def paths(self): + """Get file paths to all cached responses""" + for key in self: + yield join(self.cache_dir, key) + + +def _get_cache_dir(cache_dir: Union[Path, str], use_temp: bool) -> str: + if use_temp and not isabs(cache_dir): + cache_dir = join(gettempdir(), cache_dir) + cache_dir = abspath(expanduser(str(cache_dir))) + makedirs(cache_dir, exist_ok=True) + return cache_dir diff --git a/requests_cache/backends/sqlite.py b/requests_cache/backends/sqlite.py index df08cf8..470d00c 100644 --- a/requests_cache/backends/sqlite.py +++ b/requests_cache/backends/sqlite.py @@ -136,7 +136,7 @@ class DbDict(BaseStorage): def clear(self): with self.connection(True) as con: - con.execute("drop table `%s`" % self.table_name) + con.execute("drop table if exists `%s`" % self.table_name) con.execute("create table `%s` (key PRIMARY KEY, value)" % self.table_name) con.execute("vacuum") diff --git a/tests/integration/test_backends.py b/tests/integration/test_backends.py index d84eac3..ca9c6e2 100644 --- a/tests/integration/test_backends.py +++ b/tests/integration/test_backends.py @@ -72,7 +72,7 @@ class BaseStorageTestCase: def test_clear_and_work_again(self): d1 = self.storage_class(self.NAMESPACE) - d2 = self.storage_class(self.NAMESPACE, connection=d1.connection) + d2 = self.storage_class(self.NAMESPACE, connection=getattr(d1, 'connection', None)) d1.clear() d2.clear() @@ -87,7 +87,7 @@ class BaseStorageTestCase: def test_same_settings(self): d1 = self.storage_class(self.NAMESPACE) - d2 = self.storage_class(self.NAMESPACE, connection=d1.connection) + d2 = self.storage_class(self.NAMESPACE, connection=getattr(d1, 'connection', None)) d1.clear() d2.clear() d1['key_1'] = 1 diff --git a/tests/integration/test_filesystem.py b/tests/integration/test_filesystem.py new file mode 100644 index 0000000..72301c9 --- /dev/null +++ b/tests/integration/test_filesystem.py @@ -0,0 +1,32 @@ +import pytest +import unittest +from os.path import isfile +from shutil import rmtree + +from requests_cache.backends import FileDict +from tests.integration.test_backends import BaseStorageTestCase + + +class FilesystemTestCase(BaseStorageTestCase, unittest.TestCase): + def __init__(self, *args, **kwargs): + super().__init__(*args, storage_class=FileDict, picklable=True, **kwargs) + + def tearDown(self): + rmtree(self.NAMESPACE) + + def test_set_get(self): + cache = self.storage_class(self.NAMESPACE) + cache['key'] = 'value' + assert list(cache.keys()) == ['key'] + assert list(cache.values()) == ['value'] + + with pytest.raises(KeyError): + cache[4] + + def test_paths(self): + cache = self.storage_class(self.NAMESPACE) + for i in range(10): + cache[f'key_{i}'] = f'value_{i}' + + for path in cache.paths(): + assert isfile(path) diff --git a/tests/integration/test_thread_safety.py b/tests/integration/test_thread_safety.py index e9f8aca..a62133a 100644 --- a/tests/integration/test_thread_safety.py +++ b/tests/integration/test_thread_safety.py @@ -18,7 +18,7 @@ N_ITERATIONS = 4 * MULTIPLIER def test_caching_with_threads(backend, iteration): """Run a multi-threaded stress test for each backend""" start = time() - session = CachedSession(backend=backend, **AWS_OPTIONS) + session = CachedSession(backend=backend, use_temp=True, **AWS_OPTIONS) session.cache.clear() url = httpbin('anything') |