Merge pull request #466 from JWCook/redis

Add TTL support for Redis backend
author: Jordan Cook <JWCook@users.noreply.github.com> 2021-12-01 13:01:34 -0600
committer: Jordan Cook <jordan.cook@pioneer.com> 2021-12-02 10:01:14 -0600
commit: 5e11e7ffcc7e7b918a2dcba629a26a1b0ed4de33 (patch)
tree: 6a66bfcfde0cc97df4053173fbe219772205ce92
parent: 45640082b02d595baa6ca378166fdc2daf2cd025 (diff)
parent: 838db690b0c06e9a03b15ea38c6856a69485d1fe (diff)
download: requests-cache-5e11e7ffcc7e7b918a2dcba629a26a1b0ed4de33.tar.gz
10 files changed, 135 insertions, 31 deletions
diff --git a/HISTORY.md b/HISTORY.md
index ce4a577..e6dcfc1 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -11,6 +11,7 @@
 
 **Backends:**
 * Filesystem and SQLite backends: Add better error message if parent path exists but isn't a directory
+* Redis: Improve performance by using native Redis TTL for cache expiration
 
 **Other features:**
 * Support `expire_after` param for `CachedSession.send()`
diff --git a/docs/user_guide/headers.md b/docs/user_guide/headers.md
index 8373214..9fdfb8e 100644
--- a/docs/user_guide/headers.md
+++ b/docs/user_guide/headers.md
@@ -32,10 +32,10 @@ True, True
 ```
 
 ## Cache-Control
-`Cache-Control` request headers will be used if present. This is mainly useful for patching an
+`Cache-Control` **request** headers will be used if present. This is mainly useful for patching an
 existing library that sets request headers.
 
-`Cache-Control` response headers are an opt-in feature. If enabled, these will take priority over
+`Cache-Control` **response** headers are an opt-in feature. If enabled, these will take priority over
 any other `expire_after` values. See {ref}`precedence` for the full order of precedence.
 
 To enable this behavior, use the `cache_control` option:
diff --git a/requests_cache/backends/__init__.py b/requests_cache/backends/__init__.py
index 0c525fe..385e6a8 100644
--- a/requests_cache/backends/__init__.py
+++ b/requests_cache/backends/__init__.py
@@ -42,9 +42,9 @@ try:
 except ImportError as e:
     MongoCache = MongoDict = MongoPickleDict = get_placeholder_class(e)  # type: ignore
 try:
-    from .redis import RedisCache, RedisDict
+    from .redis import RedisCache, RedisDict, RedisHashDict
 except ImportError as e:
-    RedisCache = RedisDict = get_placeholder_class(e)  # type: ignore
+    RedisCache = RedisDict = RedisHashDict = get_placeholder_class(e)  # type: ignore
 try:
     # Note: Heroku doesn't support SQLite due to ephemeral storage
     from .sqlite import SQLiteCache, SQLiteDict, SQLitePickleDict
diff --git a/requests_cache/backends/redis.py b/requests_cache/backends/redis.py
index 41d908a..bc8bc15 100644
--- a/requests_cache/backends/redis.py
+++ b/requests_cache/backends/redis.py
@@ -9,7 +9,7 @@ applications.
 Persistence
 ^^^^^^^^^^^
 Redis operates on data in memory, and by default also persists data to snapshots on disk. This is
-optimized for performance with a minor risk of data loss, which is usually the best configuration
+optimized for performance, with a minor risk of data loss, and is usually the best configuration
 for a cache. If you need different behavior, the frequency and type of persistence can be customized
 or disabled entirely. See `Redis Persistence <https://redis.io/topics/persistence>`_ for details.
 
@@ -31,6 +31,7 @@ API Reference
    :classes-only:
    :nosignatures:
 """
+from logging import getLogger
 from typing import Iterable
 
 from redis import Redis, StrictRedis
@@ -39,6 +40,8 @@ from .._utils import get_valid_kwargs
 from ..cache_keys import decode, encode
 from . import BaseCache, BaseStorage
 
+logger = getLogger(__name__)
+
 
 class RedisCache(BaseCache):
     """Redis cache backend
@@ -51,51 +54,136 @@ class RedisCache(BaseCache):
 
     def __init__(self, namespace='http_cache', connection: Redis = None, **kwargs):
         super().__init__(**kwargs)
-        self.responses = RedisDict(namespace, 'responses', connection=connection, **kwargs)
-        self.redirects = RedisDict(
+        self.responses = RedisDict(namespace, connection=connection, **kwargs)
+        self.redirects = RedisHashDict(
             namespace, 'redirects', connection=self.responses.connection, **kwargs
         )
 
 
 class RedisDict(BaseStorage):
-    """A dictionary-like interface for Redis operations
+    """A dictionary-like interface for Redis operations.
 
     **Notes:**
-        * In order to deal with how Redis stores data, all keys will be encoded and all values will
-          be serialized.
-        * The full hash name will be ``namespace:collection_name``
+        * All keys will be encoded as bytes, and all values will be serialized
+        * Supports TTL
     """
 
-    def __init__(self, namespace, collection_name='http_cache', connection=None, **kwargs):
+    def __init__(self, namespace: str, collection_name: str = None, connection=None, **kwargs):
         super().__init__(**kwargs)
         connection_kwargs = get_valid_kwargs(Redis, kwargs)
         self.connection = connection or StrictRedis(**connection_kwargs)
-        self._self_key = f'{namespace}:{collection_name}'
+        self.namespace = namespace
+
+    def _bkey(self, key: str) -> bytes:
+        """Get a full hash key as bytes"""
+        return encode(f'{self.namespace}:{key}')
+
+    def _bkeys(self, keys: Iterable[str]):
+        return [self._bkey(key) for key in keys]
+
+    def __contains__(self, key) -> bool:
+        return bool(self.connection.exists(self._bkey(key)))
 
     def __getitem__(self, key):
-        result = self.connection.hget(self._self_key, encode(key))
+        result = self.connection.get(self._bkey(key))
         if result is None:
             raise KeyError
         return self.serializer.loads(result)
 
     def __setitem__(self, key, item):
-        self.connection.hset(self._self_key, encode(key), self.serializer.dumps(item))
+        """Save an item to the cache, optionally with TTL"""
+        if getattr(item, 'ttl', None):
+            self.connection.setex(self._bkey(key), item.ttl, self.serializer.dumps(item))
+        else:
+            self.connection.set(self._bkey(key), self.serializer.dumps(item))
 
     def __delitem__(self, key):
-        if not self.connection.hdel(self._self_key, encode(key)):
+        if not self.connection.delete(self._bkey(key)):
             raise KeyError
 
+    def __iter__(self):
+        yield from self.keys()
+
     def __len__(self):
-        return self.connection.hlen(self._self_key)
+        return len(list(self.keys()))
+
+    def bulk_delete(self, keys: Iterable[str]):
+        """Delete multiple keys from the cache, without raising errors for missing keys"""
+        if keys:
+            self.connection.delete(*self._bkeys(keys))
+
+    def clear(self):
+        self.bulk_delete(self.keys())
+
+    def keys(self):
+        return [
+            decode(key).replace(f'{self.namespace}:', '')
+            for key in self.connection.keys(f'{self.namespace}:*')
+        ]
+
+    def items(self):
+        return [(k, self[k]) for k in self.keys()]
+
+    def values(self):
+        return [self.serializer.loads(v) for v in self.connection.mget(*self._bkeys(self.keys()))]
+
+
+class RedisHashDict(BaseStorage):
+    """A dictionary-like interface for operations on a single Redis hash
+
+    **Notes:**
+        * All keys will be encoded as bytes, and all values will be serialized
+        * Items will be stored in a hash named ``namespace:collection_name``
+    """
+
+    def __init__(
+        self, namespace: str = 'http_cache', collection_name: str = None, connection=None, **kwargs
+    ):
+        super().__init__(**kwargs)
+        connection_kwargs = get_valid_kwargs(Redis, kwargs)
+        self.connection = connection or StrictRedis(**connection_kwargs)
+        self._hash_key = f'{namespace}-{collection_name}'
+
+    def __contains__(self, key):
+        return self.connection.hexists(self._hash_key, encode(key))
+
+    def __getitem__(self, key):
+        result = self.connection.hget(self._hash_key, encode(key))
+        if result is None:
+            raise KeyError
+        return self.serializer.loads(result)
+
+    def __setitem__(self, key, item):
+        self.connection.hset(self._hash_key, encode(key), self.serializer.dumps(item))
+
+    def __delitem__(self, key):
+        if not self.connection.hdel(self._hash_key, encode(key)):
+            raise KeyError
 
     def __iter__(self):
-        for key in self.connection.hkeys(self._self_key):
-            yield decode(key)
+        yield from self.keys()
+
+    def __len__(self):
+        return self.connection.hlen(self._hash_key)
 
     def bulk_delete(self, keys: Iterable[str]):
-        """Delete multiple keys from the cache. Does not raise errors for missing keys."""
+        """Delete multiple keys from the cache, without raising errors for missing keys"""
         if keys:
-            self.connection.hdel(self._self_key, *[encode(key) for key in keys])
+            self.connection.hdel(self._hash_key, *[encode(key) for key in keys])
 
     def clear(self):
-        self.connection.delete(self._self_key)
+        self.connection.delete(self._hash_key)
+
+    def keys(self):
+        return [decode(key) for key in self.connection.hkeys(self._hash_key)]
+
+    def items(self):
+        """Get all ``(key, value)`` pairs in the hash"""
+        return [
+            (decode(k), self.serializer.loads(v))
+            for k, v in self.connection.hgetall(self._hash_key).items()
+        ]
+
+    def values(self):
+        """Get all values in the hash"""
+        return [self.serializer.loads(v) for v in self.connection.hvals(self._hash_key)]
diff --git a/requests_cache/models/response.py b/requests_cache/models/response.py
index ec65034..561d6c8 100755
--- a/requests_cache/models/response.py
+++ b/requests_cache/models/response.py
@@ -96,6 +96,14 @@ class CachedResponse(Response):
         return self.expires is not None and datetime.utcnow() >= self.expires
 
     @property
+    def ttl(self) -> Optional[int]:
+        """Get time to expiration in seconds"""
+        if self.expires is None or self.is_expired:
+            return None
+        delta = self.expires - datetime.utcnow()
+        return int(delta.total_seconds())
+
+    @property
     def next(self) -> Optional[PreparedRequest]:
         """Returns a PreparedRequest for the next request in a redirect chain, if there is one."""
         return self._next.prepare() if self._next else None
diff --git a/requests_cache/serializers/__init__.py b/requests_cache/serializers/__init__.py
index 085f5cb..08ac11c 100644
--- a/requests_cache/serializers/__init__.py
+++ b/requests_cache/serializers/__init__.py
@@ -5,6 +5,7 @@ from .cattrs import CattrStage
 from .pipeline import SerializerPipeline, Stage
 from .preconf import (
     bson_serializer,
+    dict_serializer,
     json_serializer,
     pickle_serializer,
     safe_pickle_serializer,
diff --git a/requests_cache/serializers/cattrs.py b/requests_cache/serializers/cattrs.py
index 522f0db..b28acd0 100644
--- a/requests_cache/serializers/cattrs.py
+++ b/requests_cache/serializers/cattrs.py
@@ -1,6 +1,6 @@
 """
-Utilities to break down :py:class:`.CachedResponse` objects into python builtin types using
-`cattrs <https://cattrs.readthedocs.io>`_. This does the majority of the work needed for any
+Utilities to break down :py:class:`.CachedResponse` objects into a dict of python builtin types
+using `cattrs <https://cattrs.readthedocs.io>`_. This does the majority of the work needed for any
 serialization format.
 
 .. automodsumm:: requests_cache.serializers.cattrs
diff --git a/requests_cache/serializers/preconf.py b/requests_cache/serializers/preconf.py
index cf9aeee..dce7e60 100644
--- a/requests_cache/serializers/preconf.py
+++ b/requests_cache/serializers/preconf.py
@@ -23,7 +23,8 @@ from .._utils import get_placeholder_class
 from .cattrs import CattrStage
 from .pipeline import SerializerPipeline, Stage
 
-base_stage = CattrStage()  #: Base stage for all serializer pipelines
+base_stage = CattrStage()  #: Base stage for all serializer pipelines (or standalone dict serializer)
+dict_serializer = base_stage  #: Partial serializer that unstructures responses into dicts
 bson_preconf_stage = CattrStage(bson_preconf.make_converter)  #: Pre-serialization steps for BSON
 json_preconf_stage = CattrStage(json_preconf.make_converter)  #: Pre-serialization steps for JSON
 msgpack_preconf_stage = CattrStage(msgpack.make_converter)  #: Pre-serialization steps for msgpack
diff --git a/tests/integration/base_storage_test.py b/tests/integration/base_storage_test.py
index 9a8e3e0..e89931c 100644
--- a/tests/integration/base_storage_test.py
+++ b/tests/integration/base_storage_test.py
@@ -23,10 +23,9 @@ class BaseStorageTest:
             cache.clear()
         return cache
 
-    def tearDown(self):
-        for i in range(self.num_instances):
-            self.init_cache(i, clear=True)
-        super().tearDown()
+    def teardown_class(cls):
+        for i in range(cls.num_instances):
+            cls().init_cache(index=i, clear=True)
 
     def test_basic_methods(self):
         """Test basic dict methods with multiple cache instances:
diff --git a/tests/integration/test_redis.py b/tests/integration/test_redis.py
index bdabc26..08f1ee0 100644
--- a/tests/integration/test_redis.py
+++ b/tests/integration/test_redis.py
@@ -2,7 +2,7 @@ from unittest.mock import patch
 
 import pytest
 
-from requests_cache.backends.redis import RedisCache, RedisDict
+from requests_cache.backends.redis import RedisCache, RedisDict, RedisHashDict
 from tests.conftest import fail_if_no_connection
 from tests.integration.base_cache_test import BaseCacheTest
 from tests.integration.base_storage_test import BaseStorageTest
@@ -19,7 +19,7 @@ def ensure_connection():
 
 class TestRedisDict(BaseStorageTest):
     storage_class = RedisDict
-    picklable = True
+    num_instances = 1  # Only supports a single instance, since it stores items under top-level keys
 
     @patch('requests_cache.backends.redis.StrictRedis')
     def test_connection_kwargs(self, mock_redis):
@@ -28,5 +28,11 @@ class TestRedisDict(BaseStorageTest):
         mock_redis.assert_called_with(username='user', password='pass')
 
 
+class TestRedisHashDict(TestRedisDict):
+    storage_class = RedisHashDict
+    num_instances: int = 10  # Supports multiple instances, since this stores items under hash keys
+    picklable = True
+
+
 class TestRedisCache(BaseCacheTest):
     backend_class = RedisCache
author	Jordan Cook <JWCook@users.noreply.github.com>	2021-12-01 13:01:34 -0600
committer	Jordan Cook <jordan.cook@pioneer.com>	2021-12-02 10:01:14 -0600
commit	5e11e7ffcc7e7b918a2dcba629a26a1b0ed4de33 (patch)
tree	6a66bfcfde0cc97df4053173fbe219772205ce92
parent	45640082b02d595baa6ca378166fdc2daf2cd025 (diff)
parent	838db690b0c06e9a03b15ea38c6856a69485d1fe (diff)
download	requests-cache-5e11e7ffcc7e7b918a2dcba629a26a1b0ed4de33.tar.gz