diff options
author | Jordan Cook <jordan.cook@pioneer.com> | 2021-08-12 17:05:02 -0500 |
---|---|---|
committer | Jordan Cook <jordan.cook@pioneer.com> | 2021-08-14 21:58:30 -0500 |
commit | 9e93370be7f5f9c1a08d64175117667a97f9d6be (patch) | |
tree | 79e9483a3f413cb6218a6d77ad3b944618059320 /requests_cache | |
parent | 8c77c9086f229cf381f9f7b96d659ec5ca7af0cb (diff) | |
download | requests-cache-9e93370be7f5f9c1a08d64175117667a97f9d6be.tar.gz |
Add support for Last-Modified + If-Modified-Since headers
Diffstat (limited to 'requests_cache')
-rw-r--r-- | requests_cache/backends/base.py | 21 | ||||
-rw-r--r-- | requests_cache/cache_control.py | 47 | ||||
-rw-r--r-- | requests_cache/cache_keys.py | 1 | ||||
-rwxr-xr-x | requests_cache/models/response.py | 4 | ||||
-rw-r--r-- | requests_cache/session.py | 83 |
5 files changed, 76 insertions, 80 deletions
diff --git a/requests_cache/backends/base.py b/requests_cache/backends/base.py index 7ee7ccd..9cfcd3a 100644 --- a/requests_cache/backends/base.py +++ b/requests_cache/backends/base.py @@ -44,7 +44,7 @@ class BaseCache: yield response.url def save_response(self, response: AnyResponse, cache_key: str = None, expires: datetime = None): - """Save response to cache + """Save a response to the cache Args: cache_key: Cache key for this response; will otherwise be generated based on request @@ -55,24 +55,15 @@ class BaseCache: cached_response = CachedResponse.from_response(response, cache_key=cache_key, expires=expires) cached_response.request = remove_ignored_params(cached_response.request, self.ignored_parameters) self.responses[cache_key] = cached_response - - def save_redirect(self, request: AnyRequest, response_key: str): - """ - Map a redirect request to a response. This makes it possible to associate many keys with a - single response. - - Args: - request: Request object for redirect URL - response_key: Cache key which can be found in ``responses`` - """ - self.redirects[self.create_key(request)] = response_key + for r in response.history: + self.redirects[self.create_key(r.request)] = cache_key def get_response(self, key: str, default=None) -> CachedResponse: - """Retrieves response for `key` if it's stored in cache, otherwise returns `default` + """Retrieve a response from the cache, if it exists Args: - key: Key of resource - default: Value to return if `key` is not in cache + key: Cache key for the response + default: Value to return if `key` is not in the cache """ try: if key not in self.responses: diff --git a/requests_cache/cache_control.py b/requests_cache/cache_control.py index ff76e8e..a35220c 100644 --- a/requests_cache/cache_control.py +++ b/requests_cache/cache_control.py @@ -27,9 +27,16 @@ logger = getLogger(__name__) @define class CacheActions: - """A dataclass that contains info on specific actions to take for a given cache item. - This is determined by a combination of cache settings and request + response headers. - If multiple sources are provided, they will be used in the following order of precedence: + """A class that translates cache settings and headers into specific actions to take for a + given cache item. Actions include: + + * Reading from the cache + * Writing to the cache + * Setting cache expiration + * Adding request headers + + If multiple sources provide an expiration time, they will be used in the following order of + precedence: 1. Cache-Control request headers (if enabled) 2. Cache-Control response headers (if enabled) @@ -38,9 +45,9 @@ class CacheActions: 5. Per-session expiration """ + add_request_headers: Dict = field(factory=dict) cache_control: bool = field(default=False) cache_key: str = field(default=None) - etag: str = field(default=None) expire_after: ExpirationTime = field(default=None) skip_read: bool = field(default=False) skip_write: bool = field(default=False) @@ -105,9 +112,23 @@ class CacheActions: """Convert the user/header-provided expiration value to a datetime""" return get_expiration_datetime(self.expire_after) + # TODO: Behavior if no other expiration method was specified (expire_after=-1)? + def update_from_cached_response(self, response: CachedResponse): + """Used after fetching a cached response, but before potentially sending a new request. + Check for relevant cache headers on a cached response, and set corresponding request headers. + """ + if not self.cache_control or not response or not response.is_expired: + return + + self.add_request_headers['If-None-Match'] = response.headers.get('ETag') + self.add_request_headers['If-Modified-Since'] = response.headers.get('Last-Modified') + self.add_request_headers = {k: v for k, v in self.add_request_headers.items() if v} + def update_from_response(self, response: Response): - """Update expiration + actions based on response headers, if not previously set by request""" - if not self.cache_control: + """Used after receiving a new response but before saving it to the cache. + Update expiration + actions based on response headers, if not previously set. + """ + if not self.cache_control or not response: return directives = get_cache_directives(response.headers) @@ -115,18 +136,6 @@ class CacheActions: self.expire_after = coalesce(self.expires, directives.get('max-age'), directives.get('expires')) self.skip_write = self.skip_write or do_not_cache or 'no-store' in directives - # TODO: Behavior if no other expiration method was specified (expire_after=-1)? - def update_from_cached_response(self, response: CachedResponse): - """Check for ETags on cached response""" - if self.cache_control and response and response.is_expired and response.etag: - self.etag = response.etag - - def __str__(self): - return ( - f'Expire after: {self.expire_after} | Skip read: {self.skip_read} | ' - f'Skip write: {self.skip_write}' - ) - def coalesce(*values: Any, default=None) -> Any: """Get the first non-``None`` value in a list of values""" @@ -182,7 +191,7 @@ def get_url_expiration( def has_cache_headers(headers: Mapping) -> bool: - """Determine if headers contain cache directives **that we currently support**""" + """Determine if headers contain supported cache directives""" has_cache_control = any([d in headers.get('Cache-Control', '') for d in CACHE_DIRECTIVES]) return has_cache_control or bool(headers.get('Expires')) diff --git a/requests_cache/cache_keys.py b/requests_cache/cache_keys.py index 3ded784..a045f6e 100644 --- a/requests_cache/cache_keys.py +++ b/requests_cache/cache_keys.py @@ -1,3 +1,4 @@ +# TODO: Ignore If-None-Match and If-Modified-Since headers by default from __future__ import annotations import hashlib diff --git a/requests_cache/models/response.py b/requests_cache/models/response.py index 2bbe9d8..2fb0ea9 100755 --- a/requests_cache/models/response.py +++ b/requests_cache/models/response.py @@ -85,10 +85,6 @@ class CachedResponse(Response): pass @property - def etag(self) -> Optional[str]: - return self.headers.get('ETag') - - @property def from_cache(self) -> bool: return True diff --git a/requests_cache/session.py b/requests_cache/session.py index ae6d117..3c8d248 100644 --- a/requests_cache/session.py +++ b/requests_cache/session.py @@ -12,7 +12,7 @@ from urllib3 import filepost from .backends import BackendSpecifier, get_valid_kwargs, init_backend from .cache_control import CacheActions, ExpirationTime from .cache_keys import normalize_dict -from .models import AnyResponse, set_response_defaults +from .models import AnyResponse, CachedResponse, set_response_defaults ALL_METHODS = ['GET', 'HEAD', 'OPTIONS', 'POST', 'PUT', 'PATCH', 'DELETE'] @@ -121,84 +121,83 @@ class CacheMixin(MIXIN_BASE): ) # Attempt to fetch a cached response - response: Optional[AnyResponse] = None + cached_response: Optional[CachedResponse] = None if not (self._disabled or actions.skip_read): - response = self.cache.get_response(cache_key) - actions.update_from_cached_response(response) - is_expired = getattr(response, 'is_expired', False) + cached_response = self.cache.get_response(cache_key) + actions.update_from_cached_response(cached_response) + is_expired = getattr(cached_response, 'is_expired', False) - # If the response is expired, missing, or the cache is disabled then fetch a new response - if response is None: + # If the response is expired, missing, or the cache is disabled, then fetch a new response + if cached_response is None: response = self._send_and_cache(request, actions, **kwargs) elif is_expired and self.old_data_on_error: - response = self._resend_and_ignore(request, actions, **kwargs) or response - elif is_expired and actions.etag: - response = self._send_and_check_etag(request, actions, **kwargs) or response + response = self._resend_and_ignore(request, actions, cached_response, **kwargs) elif is_expired: - response = self._resend(request, actions, **kwargs) - - # Dispatch any hooks here, because they are removed before pickling - response = dispatch_hook('response', request.hooks, response, **kwargs) - if TYPE_CHECKING: - assert response is not None + response = self._resend(request, actions, cached_response, **kwargs) + else: + response = cached_response - # If the request has been filtered out, delete previously cached response if it exists + # If the request has been filtered out, delete the previously cached response if it exists if not self.filter_fn(response): logger.debug(f'Deleting filtered response for URL: {response.url}') self.cache.delete(cache_key) return response - # Cache redirect history - for r in response.history: - self.cache.save_redirect(r.request, cache_key) - return response + # Dispatch any hooks here, because they are removed before pickling + return dispatch_hook('response', request.hooks, response, **kwargs) + + def _send_and_cache( + self, + request: PreparedRequest, + actions: CacheActions, + cached_response: CachedResponse = None, + **kwargs, + ) -> AnyResponse: + """Send the request and cache the response, unless disabled by settings or headers. - def _send_and_cache(self, request: PreparedRequest, actions: CacheActions, **kwargs): - """Send the request and cache the response, unless disabled by settings or headers""" + If applicable, also add request headers to check if the remote resource has been modified. + If we get a 304 Not Modified response, return the expired cache item. + """ + request.headers.update(actions.add_request_headers) response = super().send(request, **kwargs) actions.update_from_response(response) if self._is_cacheable(response, actions): logger.debug(f'Skipping cache write for URL: {request.url}') self.cache.save_response(response, actions.cache_key, actions.expires) + elif cached_response and response.status_code == 304: + logger.debug(f'Response for URL {request.url} has not been modified; using cached response') + return cached_response return set_response_defaults(response, actions.cache_key) - def _send_and_check_etag(self, request: PreparedRequest, actions: CacheActions, **kwargs): - """Send a request with an ETag to check if the content has changed""" - request.headers['If-None-Match'] = actions.etag - response = self._send_and_cache(request, actions, **kwargs) - - if response.status_code == 304: - logger.debug(f'Response with ETag {actions.etag} has not been modified') - return None - return response - - def _resend(self, request: PreparedRequest, actions: CacheActions, **kwargs) -> AnyResponse: + def _resend( + self, request: PreparedRequest, actions: CacheActions, cached_response: CachedResponse, **kwargs + ) -> AnyResponse: """Attempt to resend the request and cache the new response. If the request fails, delete the expired cache item. """ logger.debug('Expired response; attempting to re-send request') try: - return self._send_and_cache(request, actions, **kwargs) + return self._send_and_cache(request, actions, cached_response, **kwargs) except Exception: self.cache.delete(actions.cache_key) raise def _resend_and_ignore( - self, request: PreparedRequest, actions: CacheActions, **kwargs - ) -> Optional[AnyResponse]: + self, request: PreparedRequest, actions: CacheActions, cached_response: CachedResponse, **kwargs + ) -> AnyResponse: """Attempt to resend the request and cache the new response. If there are any errors, ignore - them and and return ``None``. + them and and return the expired cache item. """ # Attempt to send the request and cache the new response logger.debug('Expired response; attempting to re-send request') try: - response = self._send_and_cache(request, actions, **kwargs) + response = self._send_and_cache(request, actions, cached_response, **kwargs) response.raise_for_status() return response - except Exception as e: - logger.warning('Request failed; using stale cache data: %s', e) - return None + except Exception: + logger.warning(f'Request for URL {request.url} failed; using cached response', exc_info=True) + return cached_response def _is_cacheable(self, response: Response, actions: CacheActions) -> bool: """Perform all checks needed to determine if the given response should be saved to the cache""" |