diff options
author | Jordan Cook <JWCook@users.noreply.github.com> | 2021-10-10 14:05:53 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-10-10 14:05:53 -0500 |
commit | 73cc36133b7eb3d4f714b2199042d7b6fc6bd902 (patch) | |
tree | 185169048d1b0b584fdef39c9505f8ea6255b7c3 | |
parent | e42f6753d017b8d9f9a176c7bc957b13e17f5df5 (diff) | |
parent | d9ea0f1391ce3fdcf9e38bda1979ed7877e5b44a (diff) | |
download | requests-cache-73cc36133b7eb3d4f714b2199042d7b6fc6bd902.tar.gz |
Merge pull request #430 from JWCook/cache-control
Fix behavior for cache_control=True with expire_after, and support revalidation with max-age=0
-rw-r--r-- | HISTORY.md | 2 | ||||
-rw-r--r-- | requests_cache/cache_control.py | 56 | ||||
-rw-r--r-- | requests_cache/session.py | 29 | ||||
-rw-r--r-- | tests/integration/base_cache_test.py | 28 | ||||
-rw-r--r-- | tests/unit/test_cache_control.py | 82 |
5 files changed, 144 insertions, 53 deletions
@@ -7,6 +7,8 @@ * Make per-request expiration thread-safe for both `CachedSession.request()` and `CachedSession.send()` * Handle some additional corner cases when normalizing request data * Some micro-optimizations for request matching +* Fix issue with cache headers not being used correctly if `cache_control=True` is used with an `expire_after` value +* Support immediate expiration + revalidation for `Cache-Control: max-age=0` and `Expires: 0` * Fix license metadata as shown on PyPI ## 0.8.1 (2021-09-15) diff --git a/requests_cache/cache_control.py b/requests_cache/cache_control.py index 10a6e4e..2cad7b4 100644 --- a/requests_cache/cache_control.py +++ b/requests_cache/cache_control.py @@ -24,6 +24,7 @@ if TYPE_CHECKING: from .models import CachedResponse __all__ = ['DO_NOT_CACHE', 'CacheActions'] + # May be set by either headers or expire_after param to disable caching DO_NOT_CACHE = 0 # Supported Cache-Control directives @@ -41,10 +42,10 @@ class CacheActions: """A class that translates cache settings and headers into specific actions to take for a given cache item. Actions include: - * Reading from the cache - * Writing to the cache - * Setting cache expiration - * Adding request headers + * Read from the cache + * Write to the cache + * Set cache expiration + * Add headers for conditional requests If multiple sources provide an expiration time, they will be used in the following order of precedence: @@ -54,14 +55,17 @@ class CacheActions: 3. Per-request expiration 4. Per-URL expiration 5. Per-session expiration + + See :ref:`headers` for more details about behavior. """ cache_control: bool = field(default=False) cache_key: str = field(default=None) expire_after: ExpirationTime = field(default=None) - request_headers: Dict[str, str] = field(factory=dict) + request_directives: Dict[str, str] = field(factory=dict) skip_read: bool = field(default=False) skip_write: bool = field(default=False) + validation_headers: Dict[str, str] = field(factory=dict) @classmethod def from_request( @@ -75,22 +79,30 @@ class CacheActions: **kwargs, ): """Initialize from request info and cache settings""" - # Check expire_after values in order of precedence directives = get_cache_directives(request.headers) + logger.debug(f'Cache directives from request headers: {directives}') + + # Check expiration values in order of precedence expire_after = coalesce( directives.get('max-age'), request_expire_after, get_url_expiration(request.url, urls_expire_after), session_expire_after, ) - do_not_cache = expire_after == DO_NOT_CACHE + + # Check conditions for reading from the cache, based on request headers and possibly + # expire_after options. With cache_control=True, don't consider expire_after options, since + # these can be overridden by response headers. + check_expiration = directives.get('max-age') if cache_control else expire_after + do_not_cache = check_expiration == DO_NOT_CACHE or 'no-store' in directives return cls( cache_control=cache_control, cache_key=cache_key, expire_after=expire_after, - skip_read=do_not_cache or 'no-store' in directives or 'no-cache' in directives, - skip_write=do_not_cache or 'no-store' in directives, + request_directives=directives, + skip_read=do_not_cache or 'no-cache' in directives, + skip_write=do_not_cache, ) @property @@ -109,25 +121,39 @@ class CacheActions: return if response.headers.get('ETag'): - self.request_headers['If-None-Match'] = response.headers['ETag'] + self.validation_headers['If-None-Match'] = response.headers['ETag'] if response.headers.get('Last-Modified'): - self.request_headers['If-Modified-Since'] = response.headers['Last-Modified'] - self.request_headers = {k: v for k, v in self.request_headers.items() if v} + self.validation_headers['If-Modified-Since'] = response.headers['Last-Modified'] def update_from_response(self, response: Response): """Update expiration + actions based on response headers, if not previously set. Used after receiving a new response but before saving it to the cache. """ - if not self.cache_control or not response: + if not response or not self.cache_control: return directives = get_cache_directives(response.headers) - do_not_cache = directives.get('max-age') == DO_NOT_CACHE + logger.debug(f'Cache directives from response headers: {directives}') + + # Check expiration headers self.expire_after = coalesce( directives.get('max-age'), directives.get('expires'), self.expire_after ) - self.skip_write = self.skip_write or do_not_cache or 'no-store' in directives + + # If expiration is 0 and there's a validator, save it to the cache and revalidate on use + has_validator = response.headers.get('ETag') or response.headers.get('Last-Modified') + if self.expire_after == DO_NOT_CACHE and has_validator: + self.expire_after = datetime.utcnow() + + # Check conditions for writing to the cache + self.skip_write = any( + [ + self.expire_after == DO_NOT_CACHE, + 'no-store' in directives, + self.skip_write, + ] + ) def coalesce(*values: Any, default=None) -> Any: diff --git a/requests_cache/session.py b/requests_cache/session.py index a2234be..22bfd50 100644 --- a/requests_cache/session.py +++ b/requests_cache/session.py @@ -16,7 +16,7 @@ from contextlib import contextmanager from logging import getLogger from threading import RLock -from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, Optional +from typing import TYPE_CHECKING, Callable, Dict, Iterable, Optional from requests import PreparedRequest, Response from requests import Session as OriginalSession @@ -78,9 +78,6 @@ class CacheMixin(MIXIN_BASE): self, method: str, url: str, - params: Dict = None, - data: Any = None, - json: Dict = None, headers: Dict = None, expire_after: ExpirationTime = None, **kwargs, @@ -116,15 +113,7 @@ class CacheMixin(MIXIN_BASE): headers['Cache-Control'] = f'max-age={get_expiration_seconds(expire_after)}' with patch_form_boundary(**kwargs): - return super().request( - method, - url, - params=params, - data=data, - json=json, - headers=headers, - **kwargs, - ) + return super().request(method, url, headers=headers, **kwargs) def send( self, request: PreparedRequest, expire_after: ExpirationTime = None, **kwargs @@ -182,10 +171,10 @@ class CacheMixin(MIXIN_BASE): ) -> AnyResponse: """Send the request and cache the response, unless disabled by settings or headers. - If applicable, also add request headers to check if the remote resource has been modified. - If we get a 304 Not Modified response, return the expired cache item. + If applicable, also add headers to make a conditional request. If we get a 304 Not Modified + response, return the stale cache item. """ - request.headers.update(actions.request_headers) + request.headers.update(actions.validation_headers) response = super().send(request, **kwargs) actions.update_from_response(response) @@ -202,9 +191,9 @@ class CacheMixin(MIXIN_BASE): self, request: PreparedRequest, actions: CacheActions, cached_response: CachedResponse, **kwargs ) -> AnyResponse: """Attempt to resend the request and cache the new response. If the request fails, delete - the expired cache item. + the stale cache item. """ - logger.debug('Expired response; attempting to re-send request') + logger.debug('Stale response; attempting to re-send request') try: return self._send_and_cache(request, actions, cached_response, **kwargs) except Exception: @@ -215,10 +204,10 @@ class CacheMixin(MIXIN_BASE): self, request: PreparedRequest, actions: CacheActions, cached_response: CachedResponse, **kwargs ) -> AnyResponse: """Attempt to resend the request and cache the new response. If there are any errors, ignore - them and and return the expired cache item. + them and and return the stale cache item. """ # Attempt to send the request and cache the new response - logger.debug('Expired response; attempting to re-send request') + logger.debug('Stale response; attempting to re-send request') try: response = self._send_and_cache(request, actions, cached_response, **kwargs) response.raise_for_status() diff --git a/tests/integration/base_cache_test.py b/tests/integration/base_cache_test.py index b435b4f..8f1c5f2 100644 --- a/tests/integration/base_cache_test.py +++ b/tests/integration/base_cache_test.py @@ -5,11 +5,12 @@ from io import BytesIO from threading import Thread from time import sleep, time from typing import Dict, Type +from unittest.mock import MagicMock, patch from urllib.parse import parse_qs, urlparse import pytest import requests -from requests.models import PreparedRequest +from requests import PreparedRequest, Session from requests_cache import ALL_METHODS, CachedResponse, CachedSession from requests_cache.backends.base import BaseCache @@ -168,7 +169,7 @@ class BaseCacheTest: ({'ETag': ETAG, 'Last-Modified': LAST_MODIFIED}, True), ], ) - def test_304_not_modified(self, cached_response_headers, expected_from_cache): + def test_conditional_request(self, cached_response_headers, expected_from_cache): """Test behavior of ETag and Last-Modified headers and 304 responses. When a cached response contains one of these headers, corresponding request headers should @@ -184,6 +185,29 @@ class BaseCacheTest: response = session.get(httpbin('cache')) assert response.from_cache == expected_from_cache + def test_conditional_request__max_age_0(self): + """With both max-age=0 and a validator, the response should be saved and revalidated on next + request + """ + url = httpbin('response-headers') + response_headers = {'Cache-Control': 'max-age=0', 'ETag': ETAG} + session = self.init_session(cache_control=True) + + # This endpoint returns request params as response headers + session.get(url, params=response_headers) + + # It doesn't respond to conditional requests, but let's just pretend it does + with patch.object(Session, 'send', return_value=MagicMock(status_code=304)): + response = session.get(url, params=response_headers) + + assert response.from_cache is True + assert response.is_expired is True + + # cache_key = list(session.cache.responses.keys())[0] + # response = session.cache.responses[cache_key] + # response.request.url = httpbin(f'etag/{ETAG}') + # response = session.get(httpbin('response-headers'), params=response_headers) + @pytest.mark.parametrize('stream', [True, False]) def test_response_decode(self, stream): """Test that gzip-compressed raw responses (including streamed responses) can be manually diff --git a/tests/unit/test_cache_control.py b/tests/unit/test_cache_control.py index 3f105ec..2388f3d 100644 --- a/tests/unit/test_cache_control.py +++ b/tests/unit/test_cache_control.py @@ -88,19 +88,19 @@ def test_init_from_headers(headers, expected_expiration): @pytest.mark.parametrize( 'url, request_expire_after, expected_expiration', [ - # ('img.site_1.com', None, timedelta(hours=12)), - # ('img.site_1.com', 60, 60), - # ('http://img.site.com/base/', None, 1), - # ('https://img.site.com/base/img.jpg', None, 1), - # ('site_2.com/resource_1', None, timedelta(hours=20)), - # ('http://site_2.com/resource_1/index.html', None, timedelta(hours=20)), - # ('http://site_2.com/resource_2/', None, timedelta(days=7)), - # ('http://site_2.com/static/', None, -1), + ('img.site_1.com', None, timedelta(hours=12)), + ('img.site_1.com', 60, 60), + ('http://img.site.com/base/', None, 1), + ('https://img.site.com/base/img.jpg', None, 1), + ('site_2.com/resource_1', None, timedelta(hours=20)), + ('http://site_2.com/resource_1/index.html', None, timedelta(hours=20)), + ('http://site_2.com/resource_2/', None, timedelta(days=7)), + ('http://site_2.com/static/', None, -1), ('http://site_2.com/static/img.jpg', None, -1), - # ('site_2.com', None, 1), - # ('site_2.com', 60, 60), - # ('some_other_site.com', None, 1), - # ('some_other_site.com', 60, 60), + ('site_2.com', None, 1), + ('site_2.com', 60, 60), + ('some_other_site.com', None, 1), + ('some_other_site.com', 60, 60), ], ) def test_init_from_settings(url, request_expire_after, expected_expiration): @@ -125,7 +125,42 @@ def test_init_from_settings(url, request_expire_after, expected_expiration): @pytest.mark.parametrize( - 'response_headers, expected_request_headers', + 'cache_control, headers, expire_after, expected_expiration, expected_skip_read', + [ + (False, {'Cache-Control': 'max-age=60'}, 1, 60, False), + (False, {}, 1, 1, False), + (False, {}, 0, 0, True), + (True, {'Cache-Control': 'max-age=60'}, 1, 60, False), + (True, {'Cache-Control': 'max-age=0'}, 1, 0, True), + (True, {'Cache-Control': 'no-store'}, 1, 1, True), + (True, {'Cache-Control': 'no-cache'}, 1, 1, True), + (True, {}, 1, 1, False), + (True, {}, 0, 0, False), + ], +) +def test_init_from_settings_and_headers( + cache_control, headers, expire_after, expected_expiration, expected_skip_read +): + """Test behavior with both cache settings and request headers. The only variation in behavior + with cache_control=True is that expire_after=0 should *not* cause the cache read to be skipped. + """ + request = MagicMock( + url='https://img.site.com/base/img.jpg', + headers=headers, + ) + + actions = CacheActions.from_request( + cache_key='key', + cache_control=cache_control, + request=request, + session_expire_after=expire_after, + ) + assert actions.expire_after == expected_expiration + assert actions.skip_read == expected_skip_read + + +@pytest.mark.parametrize( + 'response_headers, expected_validation_headers', [ ({}, {}), ({'ETag': ETAG}, {'If-None-Match': ETAG}), @@ -136,7 +171,7 @@ def test_init_from_settings(url, request_expire_after, expected_expiration): ), ], ) -def test_update_from_cached_response(response_headers, expected_request_headers): +def test_update_from_cached_response(response_headers, expected_validation_headers): """Test that conditional request headers are added if the cached response is expired""" actions = CacheActions.from_request( cache_key='key', @@ -145,7 +180,7 @@ def test_update_from_cached_response(response_headers, expected_request_headers) cached_response = CachedResponse(headers=response_headers, expires=datetime.now() - timedelta(1)) actions.update_from_cached_response(cached_response) - assert actions.request_headers == expected_request_headers + assert actions.validation_headers == expected_validation_headers def test_update_from_cached_response__ignored(): @@ -159,7 +194,7 @@ def test_update_from_cached_response__ignored(): ) actions.update_from_cached_response(cached_response) - assert actions.request_headers == {} + assert actions.validation_headers == {} @pytest.mark.parametrize( @@ -203,6 +238,21 @@ def test_update_from_response__ignored(): assert actions.expire_after is None +@patch('requests_cache.cache_control.datetime') +def test_update_from_response__revalidate(mock_datetime): + """If expiration is 0 and there's a validator the response should be cached and revalidated""" + url = 'https://img.site.com/base/img.jpg' + actions = CacheActions.from_request( + cache_key='key', + request=MagicMock(url=url), + cache_control=True, + ) + actions.update_from_response( + MagicMock(url=url, headers={'Cache-Control': 'max-age=0', 'ETag': ETAG}) + ) + assert actions.expire_after == mock_datetime.utcnow() + + @pytest.mark.parametrize('directive', IGNORED_DIRECTIVES) def test_ignored_headers(directive): """Ensure that currently unimplemented Cache-Control headers do not affect behavior""" |