summaryrefslogtreecommitdiff
path: root/requests_cache/policy
diff options
context:
space:
mode:
authorJordan Cook <jordan.cook@pioneer.com>2022-04-17 13:49:34 -0500
committerJordan Cook <jordan.cook@pioneer.com>2022-04-17 19:25:20 -0500
commit7ebf9df7ae2534bad66dc4f102993f5fb6d789b2 (patch)
tree91fb12d84ea6a39fb46fd865baec32a283b9e51b /requests_cache/policy
parent451b5dfca5b3c60dd52098902ce37125d4a1a45f (diff)
downloadrequests-cache-7ebf9df7ae2534bad66dc4f102993f5fb6d789b2.tar.gz
Move all cache policy-related modules to separate 'policy' subpackage
Diffstat (limited to 'requests_cache/policy')
-rw-r--r--requests_cache/policy/__init__.py5
-rw-r--r--requests_cache/policy/actions.py299
-rw-r--r--requests_cache/policy/expiration.py103
-rw-r--r--requests_cache/policy/settings.py60
4 files changed, 467 insertions, 0 deletions
diff --git a/requests_cache/policy/__init__.py b/requests_cache/policy/__init__.py
new file mode 100644
index 0000000..9d4f7d6
--- /dev/null
+++ b/requests_cache/policy/__init__.py
@@ -0,0 +1,5 @@
+# flake8: noqa: E402,F401
+# isort: skip_file
+from .expiration import *
+from .settings import *
+from .actions import *
diff --git a/requests_cache/policy/actions.py b/requests_cache/policy/actions.py
new file mode 100644
index 0000000..ba333a6
--- /dev/null
+++ b/requests_cache/policy/actions.py
@@ -0,0 +1,299 @@
+"""Internal utilities for determining cache expiration and other cache actions.
+
+.. automodsumm:: requests_cache.cache_control
+ :classes-only:
+ :nosignatures:
+
+.. automodsumm:: requests_cache.cache_control
+ :functions-only:
+ :nosignatures:
+"""
+from datetime import datetime
+from logging import getLogger
+from typing import Dict, MutableMapping, Optional, Tuple, Union
+
+from attr import define, field
+from requests import PreparedRequest, Response
+from requests.models import CaseInsensitiveDict
+
+from .._utils import coalesce, try_int
+from ..models import CachedResponse
+from .expiration import (
+ DO_NOT_CACHE,
+ EXPIRE_IMMEDIATELY,
+ NEVER_EXPIRE,
+ ExpirationTime,
+ get_expiration_datetime,
+ get_expiration_seconds,
+ get_url_expiration,
+)
+from .settings import CacheSettings
+
+__all__ = ['CacheActions']
+
+CacheDirective = Union[None, bool, int, str]
+HeaderDict = MutableMapping[str, str]
+logger = getLogger(__name__)
+
+
+# TODO: Add custom __rich_repr__ to exclude default values to make logs cleaner (w/ RichHandler)
+@define
+class CacheActions:
+ """Translates cache settings and headers into specific actions to take for a given cache item.
+ This class defines the caching policy, and resulting actions are handled in
+ :py:meth:`CachedSession.send`.
+
+ .. rubric:: Notes
+
+ * See :ref:`precedence` for behavior if multiple sources provide an expiration
+ * See :ref:`headers` for more details about header behavior
+ * The following arguments/properties are the outputs of this class:
+
+ Args:
+ cache_key: The cache key created based on the initial request
+ error_504: Indicates the request cannot be fulfilled based on cache settings
+ expire_after: User or header-provided expiration value
+ send_request: Send a new request
+ resend_request: Send a new request to refresh a stale cache item
+ skip_read: Skip reading from the cache
+ skip_write: Skip writing to the cache
+ """
+
+ # Outputs
+ cache_key: str = field(default=None)
+ error_504: bool = field(default=False)
+ expire_after: ExpirationTime = field(default=None)
+ resend_request: bool = field(default=False)
+ send_request: bool = field(default=False)
+ skip_read: bool = field(default=False)
+ skip_write: bool = field(default=False)
+
+ # Inputs/internal attributes
+ _settings: CacheSettings = field(default=None, repr=False, init=False)
+ _validation_headers: Dict[str, str] = field(factory=dict, repr=False, init=False)
+ # TODO: It would be nice to not need these temp variables
+ _only_if_cached: bool = field(default=False)
+ _refresh: bool = field(default=False)
+
+ @classmethod
+ def from_request(cls, cache_key: str, request: PreparedRequest, settings: CacheSettings = None):
+ """Initialize from request info and cache settings.
+
+ Note on refreshing: `must-revalidate` isn't a standard request header, but is used here to
+ indicate a user-requested refresh. Typically that's only used in response headers, and
+ `max-age=0` would be used by a client to request a refresh. However, this would conflict
+ with the `expire_after` option provided in :py:meth:`.CachedSession.request`.
+ """
+ directives = get_cache_directives(request.headers)
+ logger.debug(f'Cache directives from request headers: {directives}')
+
+ # Merge relevant headers with session + request settings
+ settings = settings or CacheSettings()
+ only_if_cached = settings.only_if_cached or 'only-if-cached' in directives
+ expire_immediately = directives.get('max-age') == EXPIRE_IMMEDIATELY
+ refresh = expire_immediately or 'must-revalidate' in directives
+ force_refresh = 'no-cache' in directives
+
+ # Check expiration values in order of precedence
+ expire_after = coalesce(
+ directives.get('max-age'),
+ get_url_expiration(request.url, settings.urls_expire_after),
+ settings.expire_after,
+ )
+
+ # Check and log conditions for reading from the cache
+ read_criteria = {
+ 'disabled cache': settings.disabled,
+ 'disabled method': str(request.method) not in settings.allowable_methods,
+ 'disabled by headers': 'no-store' in directives,
+ 'disabled by refresh': force_refresh,
+ 'disabled by expiration': expire_after == DO_NOT_CACHE,
+ }
+ _log_cache_criteria('read', read_criteria)
+
+ actions = cls(
+ cache_key=cache_key,
+ expire_after=expire_after,
+ only_if_cached=only_if_cached,
+ refresh=refresh,
+ skip_read=any(read_criteria.values()),
+ skip_write='no-store' in directives,
+ )
+ actions._settings = settings
+ return actions
+
+ @property
+ def expires(self) -> Optional[datetime]:
+ """Convert the user/header-provided expiration value to a datetime"""
+ return get_expiration_datetime(self.expire_after)
+
+ def update_from_cached_response(self, cached_response: CachedResponse):
+ """Check for relevant cache headers from a cached response, and set headers for a
+ conditional request, if possible.
+
+ Used after fetching a cached response, but before potentially sending a new request.
+ """
+ # Determine if we need to send a new request or respond with an error
+ is_expired = getattr(cached_response, 'is_expired', False)
+ invalid_response = cached_response is None or is_expired
+ if invalid_response and self._only_if_cached and not self._settings.stale_if_error:
+ self.error_504 = True
+ elif cached_response is None:
+ self.send_request = True
+ elif is_expired and not (self._only_if_cached and self._settings.stale_if_error):
+ self.resend_request = True
+
+ if cached_response is not None:
+ self._update_validation_headers(cached_response)
+ logger.debug(f'Post-read cache actions: {self}')
+
+ def _update_validation_headers(self, response: CachedResponse):
+ """If needed, get validation headers based on a cached response. Revalidation may be
+ triggered by a stale response, request headers, or cached response headers.
+ """
+ directives = get_cache_directives(response.headers)
+ revalidate = _has_validator(response.headers) and (
+ response.is_expired
+ or self._refresh
+ or 'no-cache' in directives
+ or 'must-revalidate' in directives
+ and directives.get('max-age') == 0
+ )
+
+ # Add the appropriate validation headers, if needed
+ if revalidate:
+ if response.headers.get('ETag'):
+ self._validation_headers['If-None-Match'] = response.headers['ETag']
+ if response.headers.get('Last-Modified'):
+ self._validation_headers['If-Modified-Since'] = response.headers['Last-Modified']
+ self.send_request = True
+ self.resend_request = False
+
+ def update_from_response(self, response: Response):
+ """Update expiration + actions based on headers and other details from a new response.
+
+ Used after receiving a new response, but before saving it to the cache.
+ """
+ if self._settings.cache_control:
+ self._update_from_response_headers(response)
+
+ # If "expired" but there's a validator, save it to the cache and revalidate on use
+ do_not_cache = self.expire_after == DO_NOT_CACHE
+ expire_immediately = self.expire_after == EXPIRE_IMMEDIATELY
+ has_validator = _has_validator(response.headers)
+
+ # Apply filter callback, if any
+ callback = self._settings.filter_fn
+ filtered_out = callback is not None and not callback(response)
+
+ # Check and log conditions for writing to the cache
+ write_criteria = {
+ 'disabled cache': self._settings.disabled,
+ 'disabled method': str(response.request.method) not in self._settings.allowable_methods,
+ 'disabled status': response.status_code not in self._settings.allowable_codes,
+ 'disabled by filter': filtered_out,
+ 'disabled by headers': self.skip_write,
+ 'disabled by expiration': do_not_cache or (expire_immediately and not has_validator),
+ }
+ self.skip_write = any(write_criteria.values())
+ _log_cache_criteria('write', write_criteria)
+
+ def _update_from_response_headers(self, response: Response):
+ """Check response headers for expiration and other cache directives"""
+ directives = get_cache_directives(response.headers)
+ logger.debug(f'Cache directives from response headers: {directives}')
+
+ if directives.get('immutable'):
+ self.expire_after = NEVER_EXPIRE
+ else:
+ self.expire_after = coalesce(
+ directives.get('max-age'),
+ directives.get('expires'),
+ self.expire_after,
+ )
+ self.skip_write = self.skip_write or 'no-store' in directives
+
+ def update_request(self, request: PreparedRequest) -> PreparedRequest:
+ """Apply validation headers (if any) before sending a request"""
+ request.headers.update(self._validation_headers)
+ return request
+
+ def update_revalidated_response(
+ self, response: Response, cached_response: CachedResponse
+ ) -> CachedResponse:
+ """After revalidation, update the cached response's headers and reset its expiration"""
+ logger.debug(
+ f'Response for URL {response.request.url} has not been modified; '
+ 'updating and using cached response'
+ )
+ cached_response.expires = self.expires
+ cached_response.headers.update(response.headers)
+ self.update_from_response(cached_response)
+ return cached_response
+
+
+def append_directive(headers: HeaderDict, directive: str) -> HeaderDict:
+ """Append a Cache-Control directive to existing headers (if any)"""
+ directives = headers['Cache-Control'].split(',') if headers.get('Cache-Control') else []
+ directives.append(directive)
+ headers['Cache-Control'] = ','.join(directives)
+ return headers
+
+
+def get_cache_directives(headers: HeaderDict) -> Dict[str, CacheDirective]:
+ """Get all Cache-Control directives as a dict. Handles duplicate headers (with
+ CaseInsensitiveDict) and comma-separated lists.
+ Key-only directives are returned as ``{key: True}``.
+ """
+ if not headers:
+ return {}
+
+ kv_directives: Dict[str, CacheDirective] = {}
+ if headers.get('Cache-Control'):
+ cache_directives = headers['Cache-Control'].split(',')
+ kv_directives = dict([_split_kv_directive(value) for value in cache_directives])
+
+ if 'Expires' in headers:
+ kv_directives['expires'] = headers['Expires']
+ return kv_directives
+
+
+def _split_kv_directive(header_value: str) -> Tuple[str, CacheDirective]:
+ """Split a cache directive into a ``(key, int)`` pair, if possible; otherwise just
+ ``(key, True)``.
+ """
+ header_value = header_value.strip()
+ if '=' in header_value:
+ k, v = header_value.split('=', 1)
+ return k, try_int(v)
+ else:
+ return header_value, True
+
+
+def set_request_headers(
+ headers: Optional[HeaderDict], expire_after, only_if_cached, refresh, force_refresh
+):
+ """Translate keyword arguments into equivalent request headers, to be handled in CacheActions"""
+ headers = CaseInsensitiveDict(headers)
+ if expire_after is not None:
+ headers = append_directive(headers, f'max-age={get_expiration_seconds(expire_after)}')
+ if only_if_cached:
+ headers = append_directive(headers, 'only-if-cached')
+ if refresh:
+ headers = append_directive(headers, 'must-revalidate')
+ if force_refresh:
+ headers = append_directive(headers, 'no-cache')
+ return headers
+
+
+def _has_validator(headers: HeaderDict) -> bool:
+ return bool(headers.get('ETag') or headers.get('Last-Modified'))
+
+
+def _log_cache_criteria(operation: str, criteria: Dict):
+ """Log details on any failed checks for cache read or write"""
+ if any(criteria.values()):
+ status = ', '.join([k for k, v in criteria.items() if v])
+ else:
+ status = 'Passed'
+ logger.debug(f'Pre-{operation} cache checks: {status}')
diff --git a/requests_cache/policy/expiration.py b/requests_cache/policy/expiration.py
new file mode 100644
index 0000000..7219718
--- /dev/null
+++ b/requests_cache/policy/expiration.py
@@ -0,0 +1,103 @@
+"""Utility functions used for converting expiration values"""
+from datetime import datetime, timedelta, timezone
+from email.utils import parsedate_to_datetime
+from fnmatch import fnmatch
+from logging import getLogger
+from math import ceil
+from typing import Dict, Optional, Union
+
+from .._utils import try_int
+
+__all__ = ['DO_NOT_CACHE', 'EXPIRE_IMMEDIATELY', 'NEVER_EXPIRE', 'get_expiration_datetime']
+
+# Special expiration values that may be set by either headers or keyword args
+DO_NOT_CACHE = 0x0D0E0200020704 # Per RFC 4824
+EXPIRE_IMMEDIATELY = 0
+NEVER_EXPIRE = -1
+
+ExpirationTime = Union[None, int, float, str, datetime, timedelta]
+ExpirationPatterns = Dict[str, ExpirationTime]
+
+logger = getLogger(__name__)
+
+
+def get_expiration_datetime(expire_after: ExpirationTime) -> Optional[datetime]:
+ """Convert an expiration value in any supported format to an absolute datetime"""
+ # Never expire (or do not cache, in which case expiration won't be used)
+ if expire_after is None or expire_after in [NEVER_EXPIRE, DO_NOT_CACHE]:
+ return None
+ # Expire immediately
+ elif try_int(expire_after) == EXPIRE_IMMEDIATELY:
+ return datetime.utcnow()
+ # Already a datetime or datetime str
+ if isinstance(expire_after, str):
+ return parse_http_date(expire_after)
+ elif isinstance(expire_after, datetime):
+ return to_utc(expire_after)
+
+ # Otherwise, it must be a timedelta or time in seconds
+ if not isinstance(expire_after, timedelta):
+ expire_after = timedelta(seconds=expire_after)
+ return datetime.utcnow() + expire_after
+
+
+def get_expiration_seconds(expire_after: ExpirationTime) -> int:
+ """Convert an expiration value in any supported format to an expiration time in seconds"""
+ if expire_after == DO_NOT_CACHE:
+ return DO_NOT_CACHE
+ expires = get_expiration_datetime(expire_after)
+ return ceil((expires - datetime.utcnow()).total_seconds()) if expires else NEVER_EXPIRE
+
+
+def get_url_expiration(
+ url: Optional[str], urls_expire_after: ExpirationPatterns = None
+) -> ExpirationTime:
+ """Check for a matching per-URL expiration, if any"""
+ if not url:
+ return None
+
+ for pattern, expire_after in (urls_expire_after or {}).items():
+ if url_match(url, pattern):
+ logger.debug(f'URL {url} matched pattern "{pattern}": {expire_after}')
+ return expire_after
+ return None
+
+
+def parse_http_date(value: str) -> Optional[datetime]:
+ """Attempt to parse an HTTP (RFC 5322-compatible) timestamp"""
+ try:
+ expire_after = parsedate_to_datetime(value)
+ return to_utc(expire_after)
+ except (TypeError, ValueError):
+ logger.debug(f'Failed to parse timestamp: {value}')
+ return None
+
+
+def to_utc(dt: datetime):
+ """All internal datetimes are UTC and timezone-naive. Convert any user/header-provided
+ datetimes to the same format.
+ """
+ if dt.tzinfo:
+ dt = dt.astimezone(timezone.utc)
+ dt = dt.replace(tzinfo=None)
+ return dt
+
+
+def url_match(url: str, pattern: str) -> bool:
+ """Determine if a URL matches a pattern
+
+ Args:
+ url: URL to test. Its base URL (without protocol) will be used.
+ pattern: Glob pattern to match against. A recursive wildcard will be added if not present
+
+ Example:
+ >>> url_match('https://httpbin.org/delay/1', 'httpbin.org/delay')
+ True
+ >>> url_match('https://httpbin.org/stream/1', 'httpbin.org/*/1')
+ True
+ >>> url_match('https://httpbin.org/stream/2', 'httpbin.org/*/1')
+ False
+ """
+ url = url.split('://')[-1]
+ pattern = pattern.split('://')[-1].rstrip('*') + '**'
+ return fnmatch(url, pattern)
diff --git a/requests_cache/policy/settings.py b/requests_cache/policy/settings.py
new file mode 100644
index 0000000..e23fd50
--- /dev/null
+++ b/requests_cache/policy/settings.py
@@ -0,0 +1,60 @@
+from typing import Callable, Dict, Iterable, Union
+
+from attr import define, field
+from requests import Response
+
+from .._utils import get_valid_kwargs
+from .expiration import ExpirationTime
+
+ALL_METHODS = ('GET', 'HEAD', 'OPTIONS', 'POST', 'PUT', 'PATCH', 'DELETE')
+DEFAULT_CACHE_NAME = 'http_cache'
+DEFAULT_METHODS = ('GET', 'HEAD')
+DEFAULT_STATUS_CODES = (200,)
+
+# Default params and/or headers that are excluded from cache keys and redacted from cached responses
+DEFAULT_IGNORED_PARAMS = ('Authorization', 'X-API-KEY', 'access_token', 'api_key')
+
+# Signatures for user-provided callbacks
+FilterCallback = Callable[[Response], bool]
+KeyCallback = Callable[..., str]
+
+
+@define
+class CacheSettings:
+ """Class used internally to store settings that affect caching behavior. This allows settings
+ to be used across multiple modules, but exposed to the user in a single property
+ (:py:attr:`.CachedSession.settings`). These values can safely be modified after initialization.
+ See :py:class:`.CachedSession` and :ref:`user-guide` for usage details.
+ """
+
+ allowable_codes: Iterable[int] = field(default=DEFAULT_STATUS_CODES)
+ allowable_methods: Iterable[str] = field(default=DEFAULT_METHODS)
+ cache_control: bool = field(default=False)
+ disabled: bool = field(default=False)
+ expire_after: ExpirationTime = field(default=None)
+ filter_fn: FilterCallback = field(default=None)
+ ignored_parameters: Iterable[str] = field(default=DEFAULT_IGNORED_PARAMS)
+ key_fn: KeyCallback = field(default=None)
+ match_headers: Union[Iterable[str], bool] = field(default=False)
+ only_if_cached: bool = field(default=False)
+ stale_if_error: bool = field(default=False)
+ urls_expire_after: Dict[str, ExpirationTime] = field(factory=dict)
+
+ @classmethod
+ def from_kwargs(cls, **kwargs):
+ """Constructor with some additional steps:
+
+ * Handle some deprecated argument names
+ * Ignore invalid settings, for easier initialization from mixed ``**kwargs``
+ """
+ kwargs = cls._rename_kwargs(kwargs)
+ kwargs = get_valid_kwargs(cls.__init__, kwargs)
+ return cls(**kwargs)
+
+ @staticmethod
+ def _rename_kwargs(kwargs):
+ if 'old_data_on_error' in kwargs:
+ kwargs['stale_if_error'] = kwargs.pop('old_data_on_error')
+ if 'include_get_headers' in kwargs:
+ kwargs['match_headers'] = kwargs.pop('include_get_headers')
+ return kwargs