diff options
author | Jordan Cook <JWCook@users.noreply.github.com> | 2021-12-01 13:01:34 -0600 |
---|---|---|
committer | Jordan Cook <jordan.cook@pioneer.com> | 2021-12-02 10:01:14 -0600 |
commit | 5e11e7ffcc7e7b918a2dcba629a26a1b0ed4de33 (patch) | |
tree | 6a66bfcfde0cc97df4053173fbe219772205ce92 | |
parent | 45640082b02d595baa6ca378166fdc2daf2cd025 (diff) | |
parent | 838db690b0c06e9a03b15ea38c6856a69485d1fe (diff) | |
download | requests-cache-5e11e7ffcc7e7b918a2dcba629a26a1b0ed4de33.tar.gz |
Merge pull request #466 from JWCook/redis
Add TTL support for Redis backend
-rw-r--r-- | HISTORY.md | 1 | ||||
-rw-r--r-- | docs/user_guide/headers.md | 4 | ||||
-rw-r--r-- | requests_cache/backends/__init__.py | 4 | ||||
-rw-r--r-- | requests_cache/backends/redis.py | 124 | ||||
-rwxr-xr-x | requests_cache/models/response.py | 8 | ||||
-rw-r--r-- | requests_cache/serializers/__init__.py | 1 | ||||
-rw-r--r-- | requests_cache/serializers/cattrs.py | 4 | ||||
-rw-r--r-- | requests_cache/serializers/preconf.py | 3 | ||||
-rw-r--r-- | tests/integration/base_storage_test.py | 7 | ||||
-rw-r--r-- | tests/integration/test_redis.py | 10 |
10 files changed, 135 insertions, 31 deletions
@@ -11,6 +11,7 @@ **Backends:** * Filesystem and SQLite backends: Add better error message if parent path exists but isn't a directory +* Redis: Improve performance by using native Redis TTL for cache expiration **Other features:** * Support `expire_after` param for `CachedSession.send()` diff --git a/docs/user_guide/headers.md b/docs/user_guide/headers.md index 8373214..9fdfb8e 100644 --- a/docs/user_guide/headers.md +++ b/docs/user_guide/headers.md @@ -32,10 +32,10 @@ True, True ``` ## Cache-Control -`Cache-Control` request headers will be used if present. This is mainly useful for patching an +`Cache-Control` **request** headers will be used if present. This is mainly useful for patching an existing library that sets request headers. -`Cache-Control` response headers are an opt-in feature. If enabled, these will take priority over +`Cache-Control` **response** headers are an opt-in feature. If enabled, these will take priority over any other `expire_after` values. See {ref}`precedence` for the full order of precedence. To enable this behavior, use the `cache_control` option: diff --git a/requests_cache/backends/__init__.py b/requests_cache/backends/__init__.py index 0c525fe..385e6a8 100644 --- a/requests_cache/backends/__init__.py +++ b/requests_cache/backends/__init__.py @@ -42,9 +42,9 @@ try: except ImportError as e: MongoCache = MongoDict = MongoPickleDict = get_placeholder_class(e) # type: ignore try: - from .redis import RedisCache, RedisDict + from .redis import RedisCache, RedisDict, RedisHashDict except ImportError as e: - RedisCache = RedisDict = get_placeholder_class(e) # type: ignore + RedisCache = RedisDict = RedisHashDict = get_placeholder_class(e) # type: ignore try: # Note: Heroku doesn't support SQLite due to ephemeral storage from .sqlite import SQLiteCache, SQLiteDict, SQLitePickleDict diff --git a/requests_cache/backends/redis.py b/requests_cache/backends/redis.py index 41d908a..bc8bc15 100644 --- a/requests_cache/backends/redis.py +++ b/requests_cache/backends/redis.py @@ -9,7 +9,7 @@ applications. Persistence ^^^^^^^^^^^ Redis operates on data in memory, and by default also persists data to snapshots on disk. This is -optimized for performance with a minor risk of data loss, which is usually the best configuration +optimized for performance, with a minor risk of data loss, and is usually the best configuration for a cache. If you need different behavior, the frequency and type of persistence can be customized or disabled entirely. See `Redis Persistence <https://redis.io/topics/persistence>`_ for details. @@ -31,6 +31,7 @@ API Reference :classes-only: :nosignatures: """ +from logging import getLogger from typing import Iterable from redis import Redis, StrictRedis @@ -39,6 +40,8 @@ from .._utils import get_valid_kwargs from ..cache_keys import decode, encode from . import BaseCache, BaseStorage +logger = getLogger(__name__) + class RedisCache(BaseCache): """Redis cache backend @@ -51,51 +54,136 @@ class RedisCache(BaseCache): def __init__(self, namespace='http_cache', connection: Redis = None, **kwargs): super().__init__(**kwargs) - self.responses = RedisDict(namespace, 'responses', connection=connection, **kwargs) - self.redirects = RedisDict( + self.responses = RedisDict(namespace, connection=connection, **kwargs) + self.redirects = RedisHashDict( namespace, 'redirects', connection=self.responses.connection, **kwargs ) class RedisDict(BaseStorage): - """A dictionary-like interface for Redis operations + """A dictionary-like interface for Redis operations. **Notes:** - * In order to deal with how Redis stores data, all keys will be encoded and all values will - be serialized. - * The full hash name will be ``namespace:collection_name`` + * All keys will be encoded as bytes, and all values will be serialized + * Supports TTL """ - def __init__(self, namespace, collection_name='http_cache', connection=None, **kwargs): + def __init__(self, namespace: str, collection_name: str = None, connection=None, **kwargs): super().__init__(**kwargs) connection_kwargs = get_valid_kwargs(Redis, kwargs) self.connection = connection or StrictRedis(**connection_kwargs) - self._self_key = f'{namespace}:{collection_name}' + self.namespace = namespace + + def _bkey(self, key: str) -> bytes: + """Get a full hash key as bytes""" + return encode(f'{self.namespace}:{key}') + + def _bkeys(self, keys: Iterable[str]): + return [self._bkey(key) for key in keys] + + def __contains__(self, key) -> bool: + return bool(self.connection.exists(self._bkey(key))) def __getitem__(self, key): - result = self.connection.hget(self._self_key, encode(key)) + result = self.connection.get(self._bkey(key)) if result is None: raise KeyError return self.serializer.loads(result) def __setitem__(self, key, item): - self.connection.hset(self._self_key, encode(key), self.serializer.dumps(item)) + """Save an item to the cache, optionally with TTL""" + if getattr(item, 'ttl', None): + self.connection.setex(self._bkey(key), item.ttl, self.serializer.dumps(item)) + else: + self.connection.set(self._bkey(key), self.serializer.dumps(item)) def __delitem__(self, key): - if not self.connection.hdel(self._self_key, encode(key)): + if not self.connection.delete(self._bkey(key)): raise KeyError + def __iter__(self): + yield from self.keys() + def __len__(self): - return self.connection.hlen(self._self_key) + return len(list(self.keys())) + + def bulk_delete(self, keys: Iterable[str]): + """Delete multiple keys from the cache, without raising errors for missing keys""" + if keys: + self.connection.delete(*self._bkeys(keys)) + + def clear(self): + self.bulk_delete(self.keys()) + + def keys(self): + return [ + decode(key).replace(f'{self.namespace}:', '') + for key in self.connection.keys(f'{self.namespace}:*') + ] + + def items(self): + return [(k, self[k]) for k in self.keys()] + + def values(self): + return [self.serializer.loads(v) for v in self.connection.mget(*self._bkeys(self.keys()))] + + +class RedisHashDict(BaseStorage): + """A dictionary-like interface for operations on a single Redis hash + + **Notes:** + * All keys will be encoded as bytes, and all values will be serialized + * Items will be stored in a hash named ``namespace:collection_name`` + """ + + def __init__( + self, namespace: str = 'http_cache', collection_name: str = None, connection=None, **kwargs + ): + super().__init__(**kwargs) + connection_kwargs = get_valid_kwargs(Redis, kwargs) + self.connection = connection or StrictRedis(**connection_kwargs) + self._hash_key = f'{namespace}-{collection_name}' + + def __contains__(self, key): + return self.connection.hexists(self._hash_key, encode(key)) + + def __getitem__(self, key): + result = self.connection.hget(self._hash_key, encode(key)) + if result is None: + raise KeyError + return self.serializer.loads(result) + + def __setitem__(self, key, item): + self.connection.hset(self._hash_key, encode(key), self.serializer.dumps(item)) + + def __delitem__(self, key): + if not self.connection.hdel(self._hash_key, encode(key)): + raise KeyError def __iter__(self): - for key in self.connection.hkeys(self._self_key): - yield decode(key) + yield from self.keys() + + def __len__(self): + return self.connection.hlen(self._hash_key) def bulk_delete(self, keys: Iterable[str]): - """Delete multiple keys from the cache. Does not raise errors for missing keys.""" + """Delete multiple keys from the cache, without raising errors for missing keys""" if keys: - self.connection.hdel(self._self_key, *[encode(key) for key in keys]) + self.connection.hdel(self._hash_key, *[encode(key) for key in keys]) def clear(self): - self.connection.delete(self._self_key) + self.connection.delete(self._hash_key) + + def keys(self): + return [decode(key) for key in self.connection.hkeys(self._hash_key)] + + def items(self): + """Get all ``(key, value)`` pairs in the hash""" + return [ + (decode(k), self.serializer.loads(v)) + for k, v in self.connection.hgetall(self._hash_key).items() + ] + + def values(self): + """Get all values in the hash""" + return [self.serializer.loads(v) for v in self.connection.hvals(self._hash_key)] diff --git a/requests_cache/models/response.py b/requests_cache/models/response.py index ec65034..561d6c8 100755 --- a/requests_cache/models/response.py +++ b/requests_cache/models/response.py @@ -96,6 +96,14 @@ class CachedResponse(Response): return self.expires is not None and datetime.utcnow() >= self.expires @property + def ttl(self) -> Optional[int]: + """Get time to expiration in seconds""" + if self.expires is None or self.is_expired: + return None + delta = self.expires - datetime.utcnow() + return int(delta.total_seconds()) + + @property def next(self) -> Optional[PreparedRequest]: """Returns a PreparedRequest for the next request in a redirect chain, if there is one.""" return self._next.prepare() if self._next else None diff --git a/requests_cache/serializers/__init__.py b/requests_cache/serializers/__init__.py index 085f5cb..08ac11c 100644 --- a/requests_cache/serializers/__init__.py +++ b/requests_cache/serializers/__init__.py @@ -5,6 +5,7 @@ from .cattrs import CattrStage from .pipeline import SerializerPipeline, Stage from .preconf import ( bson_serializer, + dict_serializer, json_serializer, pickle_serializer, safe_pickle_serializer, diff --git a/requests_cache/serializers/cattrs.py b/requests_cache/serializers/cattrs.py index 522f0db..b28acd0 100644 --- a/requests_cache/serializers/cattrs.py +++ b/requests_cache/serializers/cattrs.py @@ -1,6 +1,6 @@ """ -Utilities to break down :py:class:`.CachedResponse` objects into python builtin types using -`cattrs <https://cattrs.readthedocs.io>`_. This does the majority of the work needed for any +Utilities to break down :py:class:`.CachedResponse` objects into a dict of python builtin types +using `cattrs <https://cattrs.readthedocs.io>`_. This does the majority of the work needed for any serialization format. .. automodsumm:: requests_cache.serializers.cattrs diff --git a/requests_cache/serializers/preconf.py b/requests_cache/serializers/preconf.py index cf9aeee..dce7e60 100644 --- a/requests_cache/serializers/preconf.py +++ b/requests_cache/serializers/preconf.py @@ -23,7 +23,8 @@ from .._utils import get_placeholder_class from .cattrs import CattrStage from .pipeline import SerializerPipeline, Stage -base_stage = CattrStage() #: Base stage for all serializer pipelines +base_stage = CattrStage() #: Base stage for all serializer pipelines (or standalone dict serializer) +dict_serializer = base_stage #: Partial serializer that unstructures responses into dicts bson_preconf_stage = CattrStage(bson_preconf.make_converter) #: Pre-serialization steps for BSON json_preconf_stage = CattrStage(json_preconf.make_converter) #: Pre-serialization steps for JSON msgpack_preconf_stage = CattrStage(msgpack.make_converter) #: Pre-serialization steps for msgpack diff --git a/tests/integration/base_storage_test.py b/tests/integration/base_storage_test.py index 9a8e3e0..e89931c 100644 --- a/tests/integration/base_storage_test.py +++ b/tests/integration/base_storage_test.py @@ -23,10 +23,9 @@ class BaseStorageTest: cache.clear() return cache - def tearDown(self): - for i in range(self.num_instances): - self.init_cache(i, clear=True) - super().tearDown() + def teardown_class(cls): + for i in range(cls.num_instances): + cls().init_cache(index=i, clear=True) def test_basic_methods(self): """Test basic dict methods with multiple cache instances: diff --git a/tests/integration/test_redis.py b/tests/integration/test_redis.py index bdabc26..08f1ee0 100644 --- a/tests/integration/test_redis.py +++ b/tests/integration/test_redis.py @@ -2,7 +2,7 @@ from unittest.mock import patch import pytest -from requests_cache.backends.redis import RedisCache, RedisDict +from requests_cache.backends.redis import RedisCache, RedisDict, RedisHashDict from tests.conftest import fail_if_no_connection from tests.integration.base_cache_test import BaseCacheTest from tests.integration.base_storage_test import BaseStorageTest @@ -19,7 +19,7 @@ def ensure_connection(): class TestRedisDict(BaseStorageTest): storage_class = RedisDict - picklable = True + num_instances = 1 # Only supports a single instance, since it stores items under top-level keys @patch('requests_cache.backends.redis.StrictRedis') def test_connection_kwargs(self, mock_redis): @@ -28,5 +28,11 @@ class TestRedisDict(BaseStorageTest): mock_redis.assert_called_with(username='user', password='pass') +class TestRedisHashDict(TestRedisDict): + storage_class = RedisHashDict + num_instances: int = 10 # Supports multiple instances, since this stores items under hash keys + picklable = True + + class TestRedisCache(BaseCacheTest): backend_class = RedisCache |