summaryrefslogtreecommitdiff
path: root/requests_cache
diff options
context:
space:
mode:
authorJordan Cook <jordan.cook@pioneer.com>2021-08-12 17:05:02 -0500
committerJordan Cook <jordan.cook@pioneer.com>2021-08-14 21:58:30 -0500
commit9e93370be7f5f9c1a08d64175117667a97f9d6be (patch)
tree79e9483a3f413cb6218a6d77ad3b944618059320 /requests_cache
parent8c77c9086f229cf381f9f7b96d659ec5ca7af0cb (diff)
downloadrequests-cache-9e93370be7f5f9c1a08d64175117667a97f9d6be.tar.gz
Add support for Last-Modified + If-Modified-Since headers
Diffstat (limited to 'requests_cache')
-rw-r--r--requests_cache/backends/base.py21
-rw-r--r--requests_cache/cache_control.py47
-rw-r--r--requests_cache/cache_keys.py1
-rwxr-xr-xrequests_cache/models/response.py4
-rw-r--r--requests_cache/session.py83
5 files changed, 76 insertions, 80 deletions
diff --git a/requests_cache/backends/base.py b/requests_cache/backends/base.py
index 7ee7ccd..9cfcd3a 100644
--- a/requests_cache/backends/base.py
+++ b/requests_cache/backends/base.py
@@ -44,7 +44,7 @@ class BaseCache:
yield response.url
def save_response(self, response: AnyResponse, cache_key: str = None, expires: datetime = None):
- """Save response to cache
+ """Save a response to the cache
Args:
cache_key: Cache key for this response; will otherwise be generated based on request
@@ -55,24 +55,15 @@ class BaseCache:
cached_response = CachedResponse.from_response(response, cache_key=cache_key, expires=expires)
cached_response.request = remove_ignored_params(cached_response.request, self.ignored_parameters)
self.responses[cache_key] = cached_response
-
- def save_redirect(self, request: AnyRequest, response_key: str):
- """
- Map a redirect request to a response. This makes it possible to associate many keys with a
- single response.
-
- Args:
- request: Request object for redirect URL
- response_key: Cache key which can be found in ``responses``
- """
- self.redirects[self.create_key(request)] = response_key
+ for r in response.history:
+ self.redirects[self.create_key(r.request)] = cache_key
def get_response(self, key: str, default=None) -> CachedResponse:
- """Retrieves response for `key` if it's stored in cache, otherwise returns `default`
+ """Retrieve a response from the cache, if it exists
Args:
- key: Key of resource
- default: Value to return if `key` is not in cache
+ key: Cache key for the response
+ default: Value to return if `key` is not in the cache
"""
try:
if key not in self.responses:
diff --git a/requests_cache/cache_control.py b/requests_cache/cache_control.py
index ff76e8e..a35220c 100644
--- a/requests_cache/cache_control.py
+++ b/requests_cache/cache_control.py
@@ -27,9 +27,16 @@ logger = getLogger(__name__)
@define
class CacheActions:
- """A dataclass that contains info on specific actions to take for a given cache item.
- This is determined by a combination of cache settings and request + response headers.
- If multiple sources are provided, they will be used in the following order of precedence:
+ """A class that translates cache settings and headers into specific actions to take for a
+ given cache item. Actions include:
+
+ * Reading from the cache
+ * Writing to the cache
+ * Setting cache expiration
+ * Adding request headers
+
+ If multiple sources provide an expiration time, they will be used in the following order of
+ precedence:
1. Cache-Control request headers (if enabled)
2. Cache-Control response headers (if enabled)
@@ -38,9 +45,9 @@ class CacheActions:
5. Per-session expiration
"""
+ add_request_headers: Dict = field(factory=dict)
cache_control: bool = field(default=False)
cache_key: str = field(default=None)
- etag: str = field(default=None)
expire_after: ExpirationTime = field(default=None)
skip_read: bool = field(default=False)
skip_write: bool = field(default=False)
@@ -105,9 +112,23 @@ class CacheActions:
"""Convert the user/header-provided expiration value to a datetime"""
return get_expiration_datetime(self.expire_after)
+ # TODO: Behavior if no other expiration method was specified (expire_after=-1)?
+ def update_from_cached_response(self, response: CachedResponse):
+ """Used after fetching a cached response, but before potentially sending a new request.
+ Check for relevant cache headers on a cached response, and set corresponding request headers.
+ """
+ if not self.cache_control or not response or not response.is_expired:
+ return
+
+ self.add_request_headers['If-None-Match'] = response.headers.get('ETag')
+ self.add_request_headers['If-Modified-Since'] = response.headers.get('Last-Modified')
+ self.add_request_headers = {k: v for k, v in self.add_request_headers.items() if v}
+
def update_from_response(self, response: Response):
- """Update expiration + actions based on response headers, if not previously set by request"""
- if not self.cache_control:
+ """Used after receiving a new response but before saving it to the cache.
+ Update expiration + actions based on response headers, if not previously set.
+ """
+ if not self.cache_control or not response:
return
directives = get_cache_directives(response.headers)
@@ -115,18 +136,6 @@ class CacheActions:
self.expire_after = coalesce(self.expires, directives.get('max-age'), directives.get('expires'))
self.skip_write = self.skip_write or do_not_cache or 'no-store' in directives
- # TODO: Behavior if no other expiration method was specified (expire_after=-1)?
- def update_from_cached_response(self, response: CachedResponse):
- """Check for ETags on cached response"""
- if self.cache_control and response and response.is_expired and response.etag:
- self.etag = response.etag
-
- def __str__(self):
- return (
- f'Expire after: {self.expire_after} | Skip read: {self.skip_read} | '
- f'Skip write: {self.skip_write}'
- )
-
def coalesce(*values: Any, default=None) -> Any:
"""Get the first non-``None`` value in a list of values"""
@@ -182,7 +191,7 @@ def get_url_expiration(
def has_cache_headers(headers: Mapping) -> bool:
- """Determine if headers contain cache directives **that we currently support**"""
+ """Determine if headers contain supported cache directives"""
has_cache_control = any([d in headers.get('Cache-Control', '') for d in CACHE_DIRECTIVES])
return has_cache_control or bool(headers.get('Expires'))
diff --git a/requests_cache/cache_keys.py b/requests_cache/cache_keys.py
index 3ded784..a045f6e 100644
--- a/requests_cache/cache_keys.py
+++ b/requests_cache/cache_keys.py
@@ -1,3 +1,4 @@
+# TODO: Ignore If-None-Match and If-Modified-Since headers by default
from __future__ import annotations
import hashlib
diff --git a/requests_cache/models/response.py b/requests_cache/models/response.py
index 2bbe9d8..2fb0ea9 100755
--- a/requests_cache/models/response.py
+++ b/requests_cache/models/response.py
@@ -85,10 +85,6 @@ class CachedResponse(Response):
pass
@property
- def etag(self) -> Optional[str]:
- return self.headers.get('ETag')
-
- @property
def from_cache(self) -> bool:
return True
diff --git a/requests_cache/session.py b/requests_cache/session.py
index ae6d117..3c8d248 100644
--- a/requests_cache/session.py
+++ b/requests_cache/session.py
@@ -12,7 +12,7 @@ from urllib3 import filepost
from .backends import BackendSpecifier, get_valid_kwargs, init_backend
from .cache_control import CacheActions, ExpirationTime
from .cache_keys import normalize_dict
-from .models import AnyResponse, set_response_defaults
+from .models import AnyResponse, CachedResponse, set_response_defaults
ALL_METHODS = ['GET', 'HEAD', 'OPTIONS', 'POST', 'PUT', 'PATCH', 'DELETE']
@@ -121,84 +121,83 @@ class CacheMixin(MIXIN_BASE):
)
# Attempt to fetch a cached response
- response: Optional[AnyResponse] = None
+ cached_response: Optional[CachedResponse] = None
if not (self._disabled or actions.skip_read):
- response = self.cache.get_response(cache_key)
- actions.update_from_cached_response(response)
- is_expired = getattr(response, 'is_expired', False)
+ cached_response = self.cache.get_response(cache_key)
+ actions.update_from_cached_response(cached_response)
+ is_expired = getattr(cached_response, 'is_expired', False)
- # If the response is expired, missing, or the cache is disabled then fetch a new response
- if response is None:
+ # If the response is expired, missing, or the cache is disabled, then fetch a new response
+ if cached_response is None:
response = self._send_and_cache(request, actions, **kwargs)
elif is_expired and self.old_data_on_error:
- response = self._resend_and_ignore(request, actions, **kwargs) or response
- elif is_expired and actions.etag:
- response = self._send_and_check_etag(request, actions, **kwargs) or response
+ response = self._resend_and_ignore(request, actions, cached_response, **kwargs)
elif is_expired:
- response = self._resend(request, actions, **kwargs)
-
- # Dispatch any hooks here, because they are removed before pickling
- response = dispatch_hook('response', request.hooks, response, **kwargs)
- if TYPE_CHECKING:
- assert response is not None
+ response = self._resend(request, actions, cached_response, **kwargs)
+ else:
+ response = cached_response
- # If the request has been filtered out, delete previously cached response if it exists
+ # If the request has been filtered out, delete the previously cached response if it exists
if not self.filter_fn(response):
logger.debug(f'Deleting filtered response for URL: {response.url}')
self.cache.delete(cache_key)
return response
- # Cache redirect history
- for r in response.history:
- self.cache.save_redirect(r.request, cache_key)
- return response
+ # Dispatch any hooks here, because they are removed before pickling
+ return dispatch_hook('response', request.hooks, response, **kwargs)
+
+ def _send_and_cache(
+ self,
+ request: PreparedRequest,
+ actions: CacheActions,
+ cached_response: CachedResponse = None,
+ **kwargs,
+ ) -> AnyResponse:
+ """Send the request and cache the response, unless disabled by settings or headers.
- def _send_and_cache(self, request: PreparedRequest, actions: CacheActions, **kwargs):
- """Send the request and cache the response, unless disabled by settings or headers"""
+ If applicable, also add request headers to check if the remote resource has been modified.
+ If we get a 304 Not Modified response, return the expired cache item.
+ """
+ request.headers.update(actions.add_request_headers)
response = super().send(request, **kwargs)
actions.update_from_response(response)
if self._is_cacheable(response, actions):
logger.debug(f'Skipping cache write for URL: {request.url}')
self.cache.save_response(response, actions.cache_key, actions.expires)
+ elif cached_response and response.status_code == 304:
+ logger.debug(f'Response for URL {request.url} has not been modified; using cached response')
+ return cached_response
return set_response_defaults(response, actions.cache_key)
- def _send_and_check_etag(self, request: PreparedRequest, actions: CacheActions, **kwargs):
- """Send a request with an ETag to check if the content has changed"""
- request.headers['If-None-Match'] = actions.etag
- response = self._send_and_cache(request, actions, **kwargs)
-
- if response.status_code == 304:
- logger.debug(f'Response with ETag {actions.etag} has not been modified')
- return None
- return response
-
- def _resend(self, request: PreparedRequest, actions: CacheActions, **kwargs) -> AnyResponse:
+ def _resend(
+ self, request: PreparedRequest, actions: CacheActions, cached_response: CachedResponse, **kwargs
+ ) -> AnyResponse:
"""Attempt to resend the request and cache the new response. If the request fails, delete
the expired cache item.
"""
logger.debug('Expired response; attempting to re-send request')
try:
- return self._send_and_cache(request, actions, **kwargs)
+ return self._send_and_cache(request, actions, cached_response, **kwargs)
except Exception:
self.cache.delete(actions.cache_key)
raise
def _resend_and_ignore(
- self, request: PreparedRequest, actions: CacheActions, **kwargs
- ) -> Optional[AnyResponse]:
+ self, request: PreparedRequest, actions: CacheActions, cached_response: CachedResponse, **kwargs
+ ) -> AnyResponse:
"""Attempt to resend the request and cache the new response. If there are any errors, ignore
- them and and return ``None``.
+ them and and return the expired cache item.
"""
# Attempt to send the request and cache the new response
logger.debug('Expired response; attempting to re-send request')
try:
- response = self._send_and_cache(request, actions, **kwargs)
+ response = self._send_and_cache(request, actions, cached_response, **kwargs)
response.raise_for_status()
return response
- except Exception as e:
- logger.warning('Request failed; using stale cache data: %s', e)
- return None
+ except Exception:
+ logger.warning(f'Request for URL {request.url} failed; using cached response', exc_info=True)
+ return cached_response
def _is_cacheable(self, response: Response, actions: CacheActions) -> bool:
"""Perform all checks needed to determine if the given response should be saved to the cache"""