diff options
author | Jordan Cook <jordan.cook@pioneer.com> | 2022-04-17 13:49:34 -0500 |
---|---|---|
committer | Jordan Cook <jordan.cook@pioneer.com> | 2022-04-17 19:25:20 -0500 |
commit | 7ebf9df7ae2534bad66dc4f102993f5fb6d789b2 (patch) | |
tree | 91fb12d84ea6a39fb46fd865baec32a283b9e51b /requests_cache/policy | |
parent | 451b5dfca5b3c60dd52098902ce37125d4a1a45f (diff) | |
download | requests-cache-7ebf9df7ae2534bad66dc4f102993f5fb6d789b2.tar.gz |
Move all cache policy-related modules to separate 'policy' subpackage
Diffstat (limited to 'requests_cache/policy')
-rw-r--r-- | requests_cache/policy/__init__.py | 5 | ||||
-rw-r--r-- | requests_cache/policy/actions.py | 299 | ||||
-rw-r--r-- | requests_cache/policy/expiration.py | 103 | ||||
-rw-r--r-- | requests_cache/policy/settings.py | 60 |
4 files changed, 467 insertions, 0 deletions
diff --git a/requests_cache/policy/__init__.py b/requests_cache/policy/__init__.py new file mode 100644 index 0000000..9d4f7d6 --- /dev/null +++ b/requests_cache/policy/__init__.py @@ -0,0 +1,5 @@ +# flake8: noqa: E402,F401 +# isort: skip_file +from .expiration import * +from .settings import * +from .actions import * diff --git a/requests_cache/policy/actions.py b/requests_cache/policy/actions.py new file mode 100644 index 0000000..ba333a6 --- /dev/null +++ b/requests_cache/policy/actions.py @@ -0,0 +1,299 @@ +"""Internal utilities for determining cache expiration and other cache actions. + +.. automodsumm:: requests_cache.cache_control + :classes-only: + :nosignatures: + +.. automodsumm:: requests_cache.cache_control + :functions-only: + :nosignatures: +""" +from datetime import datetime +from logging import getLogger +from typing import Dict, MutableMapping, Optional, Tuple, Union + +from attr import define, field +from requests import PreparedRequest, Response +from requests.models import CaseInsensitiveDict + +from .._utils import coalesce, try_int +from ..models import CachedResponse +from .expiration import ( + DO_NOT_CACHE, + EXPIRE_IMMEDIATELY, + NEVER_EXPIRE, + ExpirationTime, + get_expiration_datetime, + get_expiration_seconds, + get_url_expiration, +) +from .settings import CacheSettings + +__all__ = ['CacheActions'] + +CacheDirective = Union[None, bool, int, str] +HeaderDict = MutableMapping[str, str] +logger = getLogger(__name__) + + +# TODO: Add custom __rich_repr__ to exclude default values to make logs cleaner (w/ RichHandler) +@define +class CacheActions: + """Translates cache settings and headers into specific actions to take for a given cache item. + This class defines the caching policy, and resulting actions are handled in + :py:meth:`CachedSession.send`. + + .. rubric:: Notes + + * See :ref:`precedence` for behavior if multiple sources provide an expiration + * See :ref:`headers` for more details about header behavior + * The following arguments/properties are the outputs of this class: + + Args: + cache_key: The cache key created based on the initial request + error_504: Indicates the request cannot be fulfilled based on cache settings + expire_after: User or header-provided expiration value + send_request: Send a new request + resend_request: Send a new request to refresh a stale cache item + skip_read: Skip reading from the cache + skip_write: Skip writing to the cache + """ + + # Outputs + cache_key: str = field(default=None) + error_504: bool = field(default=False) + expire_after: ExpirationTime = field(default=None) + resend_request: bool = field(default=False) + send_request: bool = field(default=False) + skip_read: bool = field(default=False) + skip_write: bool = field(default=False) + + # Inputs/internal attributes + _settings: CacheSettings = field(default=None, repr=False, init=False) + _validation_headers: Dict[str, str] = field(factory=dict, repr=False, init=False) + # TODO: It would be nice to not need these temp variables + _only_if_cached: bool = field(default=False) + _refresh: bool = field(default=False) + + @classmethod + def from_request(cls, cache_key: str, request: PreparedRequest, settings: CacheSettings = None): + """Initialize from request info and cache settings. + + Note on refreshing: `must-revalidate` isn't a standard request header, but is used here to + indicate a user-requested refresh. Typically that's only used in response headers, and + `max-age=0` would be used by a client to request a refresh. However, this would conflict + with the `expire_after` option provided in :py:meth:`.CachedSession.request`. + """ + directives = get_cache_directives(request.headers) + logger.debug(f'Cache directives from request headers: {directives}') + + # Merge relevant headers with session + request settings + settings = settings or CacheSettings() + only_if_cached = settings.only_if_cached or 'only-if-cached' in directives + expire_immediately = directives.get('max-age') == EXPIRE_IMMEDIATELY + refresh = expire_immediately or 'must-revalidate' in directives + force_refresh = 'no-cache' in directives + + # Check expiration values in order of precedence + expire_after = coalesce( + directives.get('max-age'), + get_url_expiration(request.url, settings.urls_expire_after), + settings.expire_after, + ) + + # Check and log conditions for reading from the cache + read_criteria = { + 'disabled cache': settings.disabled, + 'disabled method': str(request.method) not in settings.allowable_methods, + 'disabled by headers': 'no-store' in directives, + 'disabled by refresh': force_refresh, + 'disabled by expiration': expire_after == DO_NOT_CACHE, + } + _log_cache_criteria('read', read_criteria) + + actions = cls( + cache_key=cache_key, + expire_after=expire_after, + only_if_cached=only_if_cached, + refresh=refresh, + skip_read=any(read_criteria.values()), + skip_write='no-store' in directives, + ) + actions._settings = settings + return actions + + @property + def expires(self) -> Optional[datetime]: + """Convert the user/header-provided expiration value to a datetime""" + return get_expiration_datetime(self.expire_after) + + def update_from_cached_response(self, cached_response: CachedResponse): + """Check for relevant cache headers from a cached response, and set headers for a + conditional request, if possible. + + Used after fetching a cached response, but before potentially sending a new request. + """ + # Determine if we need to send a new request or respond with an error + is_expired = getattr(cached_response, 'is_expired', False) + invalid_response = cached_response is None or is_expired + if invalid_response and self._only_if_cached and not self._settings.stale_if_error: + self.error_504 = True + elif cached_response is None: + self.send_request = True + elif is_expired and not (self._only_if_cached and self._settings.stale_if_error): + self.resend_request = True + + if cached_response is not None: + self._update_validation_headers(cached_response) + logger.debug(f'Post-read cache actions: {self}') + + def _update_validation_headers(self, response: CachedResponse): + """If needed, get validation headers based on a cached response. Revalidation may be + triggered by a stale response, request headers, or cached response headers. + """ + directives = get_cache_directives(response.headers) + revalidate = _has_validator(response.headers) and ( + response.is_expired + or self._refresh + or 'no-cache' in directives + or 'must-revalidate' in directives + and directives.get('max-age') == 0 + ) + + # Add the appropriate validation headers, if needed + if revalidate: + if response.headers.get('ETag'): + self._validation_headers['If-None-Match'] = response.headers['ETag'] + if response.headers.get('Last-Modified'): + self._validation_headers['If-Modified-Since'] = response.headers['Last-Modified'] + self.send_request = True + self.resend_request = False + + def update_from_response(self, response: Response): + """Update expiration + actions based on headers and other details from a new response. + + Used after receiving a new response, but before saving it to the cache. + """ + if self._settings.cache_control: + self._update_from_response_headers(response) + + # If "expired" but there's a validator, save it to the cache and revalidate on use + do_not_cache = self.expire_after == DO_NOT_CACHE + expire_immediately = self.expire_after == EXPIRE_IMMEDIATELY + has_validator = _has_validator(response.headers) + + # Apply filter callback, if any + callback = self._settings.filter_fn + filtered_out = callback is not None and not callback(response) + + # Check and log conditions for writing to the cache + write_criteria = { + 'disabled cache': self._settings.disabled, + 'disabled method': str(response.request.method) not in self._settings.allowable_methods, + 'disabled status': response.status_code not in self._settings.allowable_codes, + 'disabled by filter': filtered_out, + 'disabled by headers': self.skip_write, + 'disabled by expiration': do_not_cache or (expire_immediately and not has_validator), + } + self.skip_write = any(write_criteria.values()) + _log_cache_criteria('write', write_criteria) + + def _update_from_response_headers(self, response: Response): + """Check response headers for expiration and other cache directives""" + directives = get_cache_directives(response.headers) + logger.debug(f'Cache directives from response headers: {directives}') + + if directives.get('immutable'): + self.expire_after = NEVER_EXPIRE + else: + self.expire_after = coalesce( + directives.get('max-age'), + directives.get('expires'), + self.expire_after, + ) + self.skip_write = self.skip_write or 'no-store' in directives + + def update_request(self, request: PreparedRequest) -> PreparedRequest: + """Apply validation headers (if any) before sending a request""" + request.headers.update(self._validation_headers) + return request + + def update_revalidated_response( + self, response: Response, cached_response: CachedResponse + ) -> CachedResponse: + """After revalidation, update the cached response's headers and reset its expiration""" + logger.debug( + f'Response for URL {response.request.url} has not been modified; ' + 'updating and using cached response' + ) + cached_response.expires = self.expires + cached_response.headers.update(response.headers) + self.update_from_response(cached_response) + return cached_response + + +def append_directive(headers: HeaderDict, directive: str) -> HeaderDict: + """Append a Cache-Control directive to existing headers (if any)""" + directives = headers['Cache-Control'].split(',') if headers.get('Cache-Control') else [] + directives.append(directive) + headers['Cache-Control'] = ','.join(directives) + return headers + + +def get_cache_directives(headers: HeaderDict) -> Dict[str, CacheDirective]: + """Get all Cache-Control directives as a dict. Handles duplicate headers (with + CaseInsensitiveDict) and comma-separated lists. + Key-only directives are returned as ``{key: True}``. + """ + if not headers: + return {} + + kv_directives: Dict[str, CacheDirective] = {} + if headers.get('Cache-Control'): + cache_directives = headers['Cache-Control'].split(',') + kv_directives = dict([_split_kv_directive(value) for value in cache_directives]) + + if 'Expires' in headers: + kv_directives['expires'] = headers['Expires'] + return kv_directives + + +def _split_kv_directive(header_value: str) -> Tuple[str, CacheDirective]: + """Split a cache directive into a ``(key, int)`` pair, if possible; otherwise just + ``(key, True)``. + """ + header_value = header_value.strip() + if '=' in header_value: + k, v = header_value.split('=', 1) + return k, try_int(v) + else: + return header_value, True + + +def set_request_headers( + headers: Optional[HeaderDict], expire_after, only_if_cached, refresh, force_refresh +): + """Translate keyword arguments into equivalent request headers, to be handled in CacheActions""" + headers = CaseInsensitiveDict(headers) + if expire_after is not None: + headers = append_directive(headers, f'max-age={get_expiration_seconds(expire_after)}') + if only_if_cached: + headers = append_directive(headers, 'only-if-cached') + if refresh: + headers = append_directive(headers, 'must-revalidate') + if force_refresh: + headers = append_directive(headers, 'no-cache') + return headers + + +def _has_validator(headers: HeaderDict) -> bool: + return bool(headers.get('ETag') or headers.get('Last-Modified')) + + +def _log_cache_criteria(operation: str, criteria: Dict): + """Log details on any failed checks for cache read or write""" + if any(criteria.values()): + status = ', '.join([k for k, v in criteria.items() if v]) + else: + status = 'Passed' + logger.debug(f'Pre-{operation} cache checks: {status}') diff --git a/requests_cache/policy/expiration.py b/requests_cache/policy/expiration.py new file mode 100644 index 0000000..7219718 --- /dev/null +++ b/requests_cache/policy/expiration.py @@ -0,0 +1,103 @@ +"""Utility functions used for converting expiration values""" +from datetime import datetime, timedelta, timezone +from email.utils import parsedate_to_datetime +from fnmatch import fnmatch +from logging import getLogger +from math import ceil +from typing import Dict, Optional, Union + +from .._utils import try_int + +__all__ = ['DO_NOT_CACHE', 'EXPIRE_IMMEDIATELY', 'NEVER_EXPIRE', 'get_expiration_datetime'] + +# Special expiration values that may be set by either headers or keyword args +DO_NOT_CACHE = 0x0D0E0200020704 # Per RFC 4824 +EXPIRE_IMMEDIATELY = 0 +NEVER_EXPIRE = -1 + +ExpirationTime = Union[None, int, float, str, datetime, timedelta] +ExpirationPatterns = Dict[str, ExpirationTime] + +logger = getLogger(__name__) + + +def get_expiration_datetime(expire_after: ExpirationTime) -> Optional[datetime]: + """Convert an expiration value in any supported format to an absolute datetime""" + # Never expire (or do not cache, in which case expiration won't be used) + if expire_after is None or expire_after in [NEVER_EXPIRE, DO_NOT_CACHE]: + return None + # Expire immediately + elif try_int(expire_after) == EXPIRE_IMMEDIATELY: + return datetime.utcnow() + # Already a datetime or datetime str + if isinstance(expire_after, str): + return parse_http_date(expire_after) + elif isinstance(expire_after, datetime): + return to_utc(expire_after) + + # Otherwise, it must be a timedelta or time in seconds + if not isinstance(expire_after, timedelta): + expire_after = timedelta(seconds=expire_after) + return datetime.utcnow() + expire_after + + +def get_expiration_seconds(expire_after: ExpirationTime) -> int: + """Convert an expiration value in any supported format to an expiration time in seconds""" + if expire_after == DO_NOT_CACHE: + return DO_NOT_CACHE + expires = get_expiration_datetime(expire_after) + return ceil((expires - datetime.utcnow()).total_seconds()) if expires else NEVER_EXPIRE + + +def get_url_expiration( + url: Optional[str], urls_expire_after: ExpirationPatterns = None +) -> ExpirationTime: + """Check for a matching per-URL expiration, if any""" + if not url: + return None + + for pattern, expire_after in (urls_expire_after or {}).items(): + if url_match(url, pattern): + logger.debug(f'URL {url} matched pattern "{pattern}": {expire_after}') + return expire_after + return None + + +def parse_http_date(value: str) -> Optional[datetime]: + """Attempt to parse an HTTP (RFC 5322-compatible) timestamp""" + try: + expire_after = parsedate_to_datetime(value) + return to_utc(expire_after) + except (TypeError, ValueError): + logger.debug(f'Failed to parse timestamp: {value}') + return None + + +def to_utc(dt: datetime): + """All internal datetimes are UTC and timezone-naive. Convert any user/header-provided + datetimes to the same format. + """ + if dt.tzinfo: + dt = dt.astimezone(timezone.utc) + dt = dt.replace(tzinfo=None) + return dt + + +def url_match(url: str, pattern: str) -> bool: + """Determine if a URL matches a pattern + + Args: + url: URL to test. Its base URL (without protocol) will be used. + pattern: Glob pattern to match against. A recursive wildcard will be added if not present + + Example: + >>> url_match('https://httpbin.org/delay/1', 'httpbin.org/delay') + True + >>> url_match('https://httpbin.org/stream/1', 'httpbin.org/*/1') + True + >>> url_match('https://httpbin.org/stream/2', 'httpbin.org/*/1') + False + """ + url = url.split('://')[-1] + pattern = pattern.split('://')[-1].rstrip('*') + '**' + return fnmatch(url, pattern) diff --git a/requests_cache/policy/settings.py b/requests_cache/policy/settings.py new file mode 100644 index 0000000..e23fd50 --- /dev/null +++ b/requests_cache/policy/settings.py @@ -0,0 +1,60 @@ +from typing import Callable, Dict, Iterable, Union + +from attr import define, field +from requests import Response + +from .._utils import get_valid_kwargs +from .expiration import ExpirationTime + +ALL_METHODS = ('GET', 'HEAD', 'OPTIONS', 'POST', 'PUT', 'PATCH', 'DELETE') +DEFAULT_CACHE_NAME = 'http_cache' +DEFAULT_METHODS = ('GET', 'HEAD') +DEFAULT_STATUS_CODES = (200,) + +# Default params and/or headers that are excluded from cache keys and redacted from cached responses +DEFAULT_IGNORED_PARAMS = ('Authorization', 'X-API-KEY', 'access_token', 'api_key') + +# Signatures for user-provided callbacks +FilterCallback = Callable[[Response], bool] +KeyCallback = Callable[..., str] + + +@define +class CacheSettings: + """Class used internally to store settings that affect caching behavior. This allows settings + to be used across multiple modules, but exposed to the user in a single property + (:py:attr:`.CachedSession.settings`). These values can safely be modified after initialization. + See :py:class:`.CachedSession` and :ref:`user-guide` for usage details. + """ + + allowable_codes: Iterable[int] = field(default=DEFAULT_STATUS_CODES) + allowable_methods: Iterable[str] = field(default=DEFAULT_METHODS) + cache_control: bool = field(default=False) + disabled: bool = field(default=False) + expire_after: ExpirationTime = field(default=None) + filter_fn: FilterCallback = field(default=None) + ignored_parameters: Iterable[str] = field(default=DEFAULT_IGNORED_PARAMS) + key_fn: KeyCallback = field(default=None) + match_headers: Union[Iterable[str], bool] = field(default=False) + only_if_cached: bool = field(default=False) + stale_if_error: bool = field(default=False) + urls_expire_after: Dict[str, ExpirationTime] = field(factory=dict) + + @classmethod + def from_kwargs(cls, **kwargs): + """Constructor with some additional steps: + + * Handle some deprecated argument names + * Ignore invalid settings, for easier initialization from mixed ``**kwargs`` + """ + kwargs = cls._rename_kwargs(kwargs) + kwargs = get_valid_kwargs(cls.__init__, kwargs) + return cls(**kwargs) + + @staticmethod + def _rename_kwargs(kwargs): + if 'old_data_on_error' in kwargs: + kwargs['stale_if_error'] = kwargs.pop('old_data_on_error') + if 'include_get_headers' in kwargs: + kwargs['match_headers'] = kwargs.pop('include_get_headers') + return kwargs |