summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan Cook <jordan.cook@pioneer.com>2021-04-02 22:55:52 -0500
committerJordan Cook <jordan.cook@pioneer.com>2021-04-21 10:51:14 -0500
commitf893f283957000be0aae7cf802ed341541379542 (patch)
treed7d1b1d33a3319811e6b899e0e014fa77108699f
parentcdf07cc8aa68b410020182b4127aa8394ce7b7d4 (diff)
downloadrequests-cache-f893f283957000be0aae7cf802ed341541379542.tar.gz
Add a filesystem backend
-rw-r--r--.gitignore1
-rw-r--r--README.md2
-rw-r--r--docs/user_guide.rst2
-rw-r--r--pyproject.toml1
-rw-r--r--requests_cache/backends/__init__.py5
-rw-r--r--requests_cache/backends/filesystem.py87
-rw-r--r--requests_cache/backends/sqlite.py2
-rw-r--r--tests/integration/test_backends.py4
-rw-r--r--tests/integration/test_filesystem.py32
-rw-r--r--tests/integration/test_thread_safety.py2
10 files changed, 133 insertions, 5 deletions
diff --git a/.gitignore b/.gitignore
index bf1cf69..ff5da6f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
*.egg-info
build/
dist/
+http_cache/
venv/
# Editors
diff --git a/README.md b/README.md
index 6a50e97..494f90b 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ See full project documentation at: https://requests-cache.readthedocs.io
[expiration](https://requests-cache.readthedocs.io/en/latest/user_guide.html#cache-expiration)
and other [behavior](https://requests-cache.readthedocs.io/en/latest/user_guide.html#cache-options)
* **Persistence:** Includes several [storage backends](https://requests-cache.readthedocs.io/en/latest/user_guide.html#cache-backends):
- SQLite, Redis, MongoDB, and DynamoDB.
+ SQLite, Redis, MongoDB, GridFS, DynamoDB, and filesystem.
* **Compatibility:** Can be used alongside
[other popular libraries based on requests](https://requests-cache.readthedocs.io/en/latest/advanced_usage.html#library-compatibility)
diff --git a/docs/user_guide.rst b/docs/user_guide.rst
index e068c4c..1c6619f 100644
--- a/docs/user_guide.rst
+++ b/docs/user_guide.rst
@@ -116,6 +116,7 @@ the ``backend`` parameter for either :py:class:`.CachedSession` or :py:func:`.in
* ``'mongodb'``: `MongoDB <https://www.mongodb.com>`_ database (requires ``pymongo``)
* ``'gridfs'``: `GridFS <https://docs.mongodb.com/manual/core/gridfs/>`_ collections on a MongoDB database (requires ``pymongo``)
* ``'dynamodb'``: `Amazon DynamoDB <https://aws.amazon.com/dynamodb>`_ database (requires ``boto3``)
+* ``'filesystem'``: Stores responses as files on the local filesystem
* ``'memory'`` : A non-persistent cache that just stores responses in memory
A backend can be specified either by name, class or instance:
@@ -143,6 +144,7 @@ The ``cache_name`` parameter will be used as follows depending on the backend:
* ``dynamodb``: Table name
* ``mongodb`` and ``gridfs``: Database name
* ``redis``: Namespace, meaning all keys will be prefixed with ``'<cache_name>:'``
+* ``filesystem``: Cache directory
Cache Options
-------------
diff --git a/pyproject.toml b/pyproject.toml
index c2c5a0e..46d6ec3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,6 +13,7 @@ branch = true
source = ['requests_cache']
omit = [
'requests_cache/__init__.py',
+ 'requests_cache/backends/__init__.py',
]
[tool.isort]
diff --git a/requests_cache/backends/__init__.py b/requests_cache/backends/__init__.py
index f64f051..2b165a2 100644
--- a/requests_cache/backends/__init__.py
+++ b/requests_cache/backends/__init__.py
@@ -72,10 +72,15 @@ try:
from .sqlite import DbCache, DbDict, DbPickleDict
except ImportError as e:
DbCache = DbDict = DbPickleDict = get_placeholder_backend(e) # type: ignore
+try:
+ from .filesystem import FileCache, FileDict
+except ImportError as e:
+ FileCache = FileDict = get_placeholder_backend(e) # type: ignore
BACKEND_CLASSES = {
'dynamodb': DynamoDbCache,
+ 'filesystem': FileCache,
'gridfs': GridFSCache,
'memory': BaseCache,
'mongo': MongoCache,
diff --git a/requests_cache/backends/filesystem.py b/requests_cache/backends/filesystem.py
new file mode 100644
index 0000000..c85751f
--- /dev/null
+++ b/requests_cache/backends/filesystem.py
@@ -0,0 +1,87 @@
+# TODO: Add option for compression?
+from contextlib import contextmanager
+from os import listdir, makedirs, unlink
+from os.path import abspath, expanduser, isabs, join
+from pathlib import Path
+from pickle import PickleError
+from shutil import rmtree
+from tempfile import gettempdir
+from typing import Union
+
+from . import BaseCache, BaseStorage
+from .sqlite import DbDict
+
+
+class FileCache(BaseCache):
+ """Backend that stores cached responses as files on the local filesystem. Response paths will be
+ in the format ``<cache_name>/<cache_key>``. Redirects are stored in a SQLite database.
+
+ Args:
+ cache_name: Base directory for cache files
+ use_temp: Store cache files in a temp directory (e.g., ``/tmp/http_cache/``).
+ Note: if ``cache_name`` is an absolute path, this option will be ignored.
+ """
+
+ def __init__(self, cache_name: Union[Path, str] = 'http_cache', use_temp: bool = False, **kwargs):
+ super().__init__(**kwargs)
+ cache_dir = _get_cache_dir(cache_name, use_temp)
+ self.responses = FileDict(cache_dir, **kwargs)
+ self.redirects = DbDict(join(cache_dir, 'redirects.sqlite'), 'redirects', **kwargs)
+
+
+class FileDict(BaseStorage):
+ """A dictionary-like interface to files on the local filesystem"""
+
+ def __init__(self, cache_dir, **kwargs):
+ kwargs.setdefault('suppress_warnings', True)
+ super().__init__(**kwargs)
+ self.cache_dir = cache_dir
+ makedirs(self.cache_dir, exist_ok=True)
+
+ @contextmanager
+ def _try_io(self, ignore_errors: bool = False):
+ """Attempt an I/O operation, and either ignore errors or re-raise them as KeyErrors"""
+ try:
+ yield
+ except (IOError, OSError, PickleError) as e:
+ if not ignore_errors:
+ raise KeyError(e)
+
+ def __getitem__(self, key):
+ with self._try_io():
+ with open(join(self.cache_dir, str(key)), 'rb') as f:
+ return self.deserialize(f.read())
+
+ def __delitem__(self, key):
+ with self._try_io():
+ unlink(join(self.cache_dir, str(key)))
+
+ def __setitem__(self, key, value):
+ with self._try_io():
+ with open(join(self.cache_dir, str(key)), 'wb') as f:
+ f.write(self.serialize(value))
+
+ def __iter__(self):
+ for filename in listdir(self.cache_dir):
+ yield filename
+
+ def __len__(self):
+ return len(listdir(self.cache_dir))
+
+ def clear(self):
+ with self._try_io(ignore_errors=True):
+ rmtree(self.cache_dir)
+ makedirs(self.cache_dir)
+
+ def paths(self):
+ """Get file paths to all cached responses"""
+ for key in self:
+ yield join(self.cache_dir, key)
+
+
+def _get_cache_dir(cache_dir: Union[Path, str], use_temp: bool) -> str:
+ if use_temp and not isabs(cache_dir):
+ cache_dir = join(gettempdir(), cache_dir)
+ cache_dir = abspath(expanduser(str(cache_dir)))
+ makedirs(cache_dir, exist_ok=True)
+ return cache_dir
diff --git a/requests_cache/backends/sqlite.py b/requests_cache/backends/sqlite.py
index df08cf8..470d00c 100644
--- a/requests_cache/backends/sqlite.py
+++ b/requests_cache/backends/sqlite.py
@@ -136,7 +136,7 @@ class DbDict(BaseStorage):
def clear(self):
with self.connection(True) as con:
- con.execute("drop table `%s`" % self.table_name)
+ con.execute("drop table if exists `%s`" % self.table_name)
con.execute("create table `%s` (key PRIMARY KEY, value)" % self.table_name)
con.execute("vacuum")
diff --git a/tests/integration/test_backends.py b/tests/integration/test_backends.py
index d84eac3..ca9c6e2 100644
--- a/tests/integration/test_backends.py
+++ b/tests/integration/test_backends.py
@@ -72,7 +72,7 @@ class BaseStorageTestCase:
def test_clear_and_work_again(self):
d1 = self.storage_class(self.NAMESPACE)
- d2 = self.storage_class(self.NAMESPACE, connection=d1.connection)
+ d2 = self.storage_class(self.NAMESPACE, connection=getattr(d1, 'connection', None))
d1.clear()
d2.clear()
@@ -87,7 +87,7 @@ class BaseStorageTestCase:
def test_same_settings(self):
d1 = self.storage_class(self.NAMESPACE)
- d2 = self.storage_class(self.NAMESPACE, connection=d1.connection)
+ d2 = self.storage_class(self.NAMESPACE, connection=getattr(d1, 'connection', None))
d1.clear()
d2.clear()
d1['key_1'] = 1
diff --git a/tests/integration/test_filesystem.py b/tests/integration/test_filesystem.py
new file mode 100644
index 0000000..72301c9
--- /dev/null
+++ b/tests/integration/test_filesystem.py
@@ -0,0 +1,32 @@
+import pytest
+import unittest
+from os.path import isfile
+from shutil import rmtree
+
+from requests_cache.backends import FileDict
+from tests.integration.test_backends import BaseStorageTestCase
+
+
+class FilesystemTestCase(BaseStorageTestCase, unittest.TestCase):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, storage_class=FileDict, picklable=True, **kwargs)
+
+ def tearDown(self):
+ rmtree(self.NAMESPACE)
+
+ def test_set_get(self):
+ cache = self.storage_class(self.NAMESPACE)
+ cache['key'] = 'value'
+ assert list(cache.keys()) == ['key']
+ assert list(cache.values()) == ['value']
+
+ with pytest.raises(KeyError):
+ cache[4]
+
+ def test_paths(self):
+ cache = self.storage_class(self.NAMESPACE)
+ for i in range(10):
+ cache[f'key_{i}'] = f'value_{i}'
+
+ for path in cache.paths():
+ assert isfile(path)
diff --git a/tests/integration/test_thread_safety.py b/tests/integration/test_thread_safety.py
index e9f8aca..a62133a 100644
--- a/tests/integration/test_thread_safety.py
+++ b/tests/integration/test_thread_safety.py
@@ -18,7 +18,7 @@ N_ITERATIONS = 4 * MULTIPLIER
def test_caching_with_threads(backend, iteration):
"""Run a multi-threaded stress test for each backend"""
start = time()
- session = CachedSession(backend=backend, **AWS_OPTIONS)
+ session = CachedSession(backend=backend, use_temp=True, **AWS_OPTIONS)
session.cache.clear()
url = httpbin('anything')