diff options
author | Simon Leiner <simon@leiner.me> | 2023-03-14 18:23:40 +0100 |
---|---|---|
committer | Jordan Cook <jordan.cook.git@proton.me> | 2023-03-24 18:50:18 -0500 |
commit | 94331ef108ad160faddb48cfc6a259c8a2497c99 (patch) | |
tree | ff52bb7d9d7536e2ba139806b3532ed3401d56f0 | |
parent | 812301ab85a3b54387dcebd7d65407ace2589b9f (diff) | |
download | requests-cache-94331ef108ad160faddb48cfc6a259c8a2497c99.tar.gz |
Allow regexes for URL expiration patterns
This allows for more fine-grained control over URL patterns than
globbing in the rare cases where that is needed.
-rw-r--r-- | HISTORY.md | 3 | ||||
-rw-r--r-- | docs/user_guide/expiration.md | 12 | ||||
-rw-r--r-- | requests_cache/policy/__init__.py | 8 | ||||
-rw-r--r-- | requests_cache/policy/expiration.py | 19 | ||||
-rw-r--r-- | requests_cache/policy/settings.py | 4 | ||||
-rw-r--r-- | tests/unit/policy/test_expiration.py | 4 |
6 files changed, 36 insertions, 14 deletions
@@ -1,5 +1,8 @@ # History +## Unreleased +* Add support for regular expressions when using `urls_expire_after` + ## 1.0.0 (2023-03-01) [See all unreleased issues and PRs](https://github.com/requests-cache/requests-cache/milestone/10?closed=1) diff --git a/docs/user_guide/expiration.md b/docs/user_guide/expiration.md index d4b2fa5..9b6b50b 100644 --- a/docs/user_guide/expiration.md +++ b/docs/user_guide/expiration.md @@ -65,23 +65,25 @@ Examples: ``` (url-patterns)= -## Expiration With URL Patterns -You can use `urls_expire_after` to set different expiration values based on URL glob patterns: ```python >>> urls_expire_after = { ... '*.site_1.com': 30, ... 'site_2.com/resource_1': 60 * 2, ... 'site_2.com/resource_2': 60 * 60 * 24, +... re.compile(r'site_2.com/resource_\d'): 60 * 60 * 24 * 7, +... 'site_2.com/resource_*': 60 * 60, ... 'site_2.com/static': NEVER_EXPIRE, ... } >>> session = CachedSession(urls_expire_after=urls_expire_after) ``` **Notes:** -- `urls_expire_after` should be a dict in the format `{'pattern': expire_after}` +- `urls_expire_after` should be a dict in the format `{pattern': expire_after}` - `expire_after` accepts the same types as `CachedSession.settings.expire_after` -- Patterns will match request **base URLs without the protocol**, so the pattern `site.com/resource/` - is equivalent to `http*://site.com/resource/**` +- **Glob patterns** will match request **base URLs without the protocol**, so the pattern `site.com/resource/` + is equivalent to `http*://site.com/resource/**`. + For **regex patterns**, the **whole URL** will be matched, so you _can_ put restrictions on the protocol, e.g. + `re.compile(r'https://site.com/.*')`. - If there is more than one match, the first match will be used in the order they are defined - If no patterns match a request, `CachedSession.settings.expire_after` will be used as a default - See {ref}`url-filtering` for an example of using `urls_expire_after` as an allowlist diff --git a/requests_cache/policy/__init__.py b/requests_cache/policy/__init__.py index dbd5bab..48b384a 100644 --- a/requests_cache/policy/__init__.py +++ b/requests_cache/policy/__init__.py @@ -4,12 +4,16 @@ additional settings and features specific to requests-cache. # flake8: noqa: E402,F401 # isort: skip_file from datetime import datetime, timedelta -from typing import Callable, Dict, Union, MutableMapping +from typing import Callable, Dict, Pattern as RegexPattern, Union, MutableMapping from requests import Response ExpirationTime = Union[None, int, float, str, datetime, timedelta] -ExpirationPatterns = Dict[str, ExpirationTime] +ExpirationPattern = Union[ # Either a glob expression as str or a compiled regex pattern + str, + RegexPattern, +] +ExpirationPatterns = Dict[ExpirationPattern, ExpirationTime] FilterCallback = Callable[[Response], bool] KeyCallback = Callable[..., str] HeaderDict = MutableMapping[str, str] diff --git a/requests_cache/policy/expiration.py b/requests_cache/policy/expiration.py index 1041e36..1350951 100644 --- a/requests_cache/policy/expiration.py +++ b/requests_cache/policy/expiration.py @@ -5,9 +5,10 @@ from fnmatch import fnmatch from logging import getLogger from math import ceil from typing import Optional +from typing import Pattern as RegexPattern from .._utils import try_int -from . import ExpirationPatterns, ExpirationTime +from . import ExpirationPattern, ExpirationPatterns, ExpirationTime # Special expiration values that may be set by either headers or keyword args DO_NOT_CACHE = 0x0D0E0200020704 # Per RFC 4824 @@ -89,7 +90,7 @@ def _to_utc(dt: datetime): return dt -def _url_match(url: str, pattern: str) -> bool: +def _url_match(url: str, pattern: ExpirationPattern) -> bool: """Determine if a URL matches a pattern Args: @@ -103,7 +104,15 @@ def _url_match(url: str, pattern: str) -> bool: True >>> url_match('https://httpbin.org/stream/2', 'httpbin.org/*/1') False + >>> url_match('https://httpbin.org/stream/2', re.compile('httpbin.org/*/\\d+')) + True + >>> url_match('https://httpbin.org/stream/x', re.compile('httpbin.org/*/\\d+')) + False """ - url = url.split('://')[-1] - pattern = pattern.split('://')[-1].rstrip('*') + '**' - return fnmatch(url, pattern) + if isinstance(pattern, RegexPattern): + match = pattern.search(url) + return match is not None + else: + url = url.split('://')[-1] + pattern = pattern.split('://')[-1].rstrip('*') + '**' + return fnmatch(url, pattern) diff --git a/requests_cache/policy/settings.py b/requests_cache/policy/settings.py index 8e8e26b..7c4dce8 100644 --- a/requests_cache/policy/settings.py +++ b/requests_cache/policy/settings.py @@ -4,7 +4,7 @@ from attr import define, field from .._utils import get_valid_kwargs from ..models import RichMixin -from . import ExpirationTime, FilterCallback, KeyCallback +from . import ExpirationPattern, ExpirationTime, FilterCallback, KeyCallback ALL_METHODS = ('GET', 'HEAD', 'OPTIONS', 'POST', 'PUT', 'PATCH', 'DELETE') DEFAULT_CACHE_NAME = 'http_cache' @@ -36,7 +36,7 @@ class CacheSettings(RichMixin): only_if_cached: bool = field(default=False) stale_if_error: Union[bool, ExpirationTime] = field(default=False) stale_while_revalidate: Union[bool, ExpirationTime] = field(default=False) - urls_expire_after: Dict[str, ExpirationTime] = field(factory=dict) + urls_expire_after: Dict[ExpirationPattern, ExpirationTime] = field(factory=dict) @classmethod def from_kwargs(cls, **kwargs): diff --git a/tests/unit/policy/test_expiration.py b/tests/unit/policy/test_expiration.py index 54eb2a2..0a453db 100644 --- a/tests/unit/policy/test_expiration.py +++ b/tests/unit/policy/test_expiration.py @@ -1,3 +1,4 @@ +import re from datetime import datetime, timedelta, timezone from unittest.mock import patch @@ -56,6 +57,8 @@ def test_get_expiration_datetime__httpdate(): ('http://site_2.com/resource_1/index.html', 60 * 60 * 2), ('http://site_2.com/resource_2/', 60 * 60 * 24), ('http://site_2.com/static/', -1), + ('http://site_2.com/api/resource/123', 60 * 60 * 24 * 7), + ('http://site_2.com/api/resource/xyz', None), ('http://site_2.com/static/img.jpg', -1), ('site_2.com', None), ('some_other_site.com', None), @@ -67,6 +70,7 @@ def test_get_url_expiration(url, expected_expire_after, mock_session): '*.site_1.com': 60 * 60, 'site_2.com/resource_1': 60 * 60 * 2, 'site_2.com/resource_2': 60 * 60 * 24, + re.compile(r'site_2\.com/api/resource/\d+'): 60 * 60 * 24 * 7, 'site_2.com/static': -1, } assert get_url_expiration(url, urls_expire_after) == expected_expire_after |