summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan Cook <jordan.cook.git@proton.me>2023-03-18 09:35:26 -0700
committerJordan Cook <jordan.cook.git@proton.me>2023-03-24 18:50:18 -0500
commit7c71b5a5145136039c5a9cb6f202bcccc8fb078a (patch)
treeff52bb7d9d7536e2ba139806b3532ed3401d56f0
parent812301ab85a3b54387dcebd7d65407ace2589b9f (diff)
parent94331ef108ad160faddb48cfc6a259c8a2497c99 (diff)
downloadrequests-cache-7c71b5a5145136039c5a9cb6f202bcccc8fb078a.tar.gz
Merge pull request #804 from sleiner-forks/feat/url-match-regex
Allow regexes for URL expiration patterns
-rw-r--r--HISTORY.md3
-rw-r--r--docs/user_guide/expiration.md12
-rw-r--r--requests_cache/policy/__init__.py8
-rw-r--r--requests_cache/policy/expiration.py19
-rw-r--r--requests_cache/policy/settings.py4
-rw-r--r--tests/unit/policy/test_expiration.py4
6 files changed, 36 insertions, 14 deletions
diff --git a/HISTORY.md b/HISTORY.md
index b0174c0..b173cff 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,8 @@
# History
+## Unreleased
+* Add support for regular expressions when using `urls_expire_after`
+
## 1.0.0 (2023-03-01)
[See all unreleased issues and PRs](https://github.com/requests-cache/requests-cache/milestone/10?closed=1)
diff --git a/docs/user_guide/expiration.md b/docs/user_guide/expiration.md
index d4b2fa5..9b6b50b 100644
--- a/docs/user_guide/expiration.md
+++ b/docs/user_guide/expiration.md
@@ -65,23 +65,25 @@ Examples:
```
(url-patterns)=
-## Expiration With URL Patterns
-You can use `urls_expire_after` to set different expiration values based on URL glob patterns:
```python
>>> urls_expire_after = {
... '*.site_1.com': 30,
... 'site_2.com/resource_1': 60 * 2,
... 'site_2.com/resource_2': 60 * 60 * 24,
+... re.compile(r'site_2.com/resource_\d'): 60 * 60 * 24 * 7,
+... 'site_2.com/resource_*': 60 * 60,
... 'site_2.com/static': NEVER_EXPIRE,
... }
>>> session = CachedSession(urls_expire_after=urls_expire_after)
```
**Notes:**
-- `urls_expire_after` should be a dict in the format `{'pattern': expire_after}`
+- `urls_expire_after` should be a dict in the format `{pattern': expire_after}`
- `expire_after` accepts the same types as `CachedSession.settings.expire_after`
-- Patterns will match request **base URLs without the protocol**, so the pattern `site.com/resource/`
- is equivalent to `http*://site.com/resource/**`
+- **Glob patterns** will match request **base URLs without the protocol**, so the pattern `site.com/resource/`
+ is equivalent to `http*://site.com/resource/**`.
+ For **regex patterns**, the **whole URL** will be matched, so you _can_ put restrictions on the protocol, e.g.
+ `re.compile(r'https://site.com/.*')`.
- If there is more than one match, the first match will be used in the order they are defined
- If no patterns match a request, `CachedSession.settings.expire_after` will be used as a default
- See {ref}`url-filtering` for an example of using `urls_expire_after` as an allowlist
diff --git a/requests_cache/policy/__init__.py b/requests_cache/policy/__init__.py
index dbd5bab..48b384a 100644
--- a/requests_cache/policy/__init__.py
+++ b/requests_cache/policy/__init__.py
@@ -4,12 +4,16 @@ additional settings and features specific to requests-cache.
# flake8: noqa: E402,F401
# isort: skip_file
from datetime import datetime, timedelta
-from typing import Callable, Dict, Union, MutableMapping
+from typing import Callable, Dict, Pattern as RegexPattern, Union, MutableMapping
from requests import Response
ExpirationTime = Union[None, int, float, str, datetime, timedelta]
-ExpirationPatterns = Dict[str, ExpirationTime]
+ExpirationPattern = Union[ # Either a glob expression as str or a compiled regex pattern
+ str,
+ RegexPattern,
+]
+ExpirationPatterns = Dict[ExpirationPattern, ExpirationTime]
FilterCallback = Callable[[Response], bool]
KeyCallback = Callable[..., str]
HeaderDict = MutableMapping[str, str]
diff --git a/requests_cache/policy/expiration.py b/requests_cache/policy/expiration.py
index 1041e36..1350951 100644
--- a/requests_cache/policy/expiration.py
+++ b/requests_cache/policy/expiration.py
@@ -5,9 +5,10 @@ from fnmatch import fnmatch
from logging import getLogger
from math import ceil
from typing import Optional
+from typing import Pattern as RegexPattern
from .._utils import try_int
-from . import ExpirationPatterns, ExpirationTime
+from . import ExpirationPattern, ExpirationPatterns, ExpirationTime
# Special expiration values that may be set by either headers or keyword args
DO_NOT_CACHE = 0x0D0E0200020704 # Per RFC 4824
@@ -89,7 +90,7 @@ def _to_utc(dt: datetime):
return dt
-def _url_match(url: str, pattern: str) -> bool:
+def _url_match(url: str, pattern: ExpirationPattern) -> bool:
"""Determine if a URL matches a pattern
Args:
@@ -103,7 +104,15 @@ def _url_match(url: str, pattern: str) -> bool:
True
>>> url_match('https://httpbin.org/stream/2', 'httpbin.org/*/1')
False
+ >>> url_match('https://httpbin.org/stream/2', re.compile('httpbin.org/*/\\d+'))
+ True
+ >>> url_match('https://httpbin.org/stream/x', re.compile('httpbin.org/*/\\d+'))
+ False
"""
- url = url.split('://')[-1]
- pattern = pattern.split('://')[-1].rstrip('*') + '**'
- return fnmatch(url, pattern)
+ if isinstance(pattern, RegexPattern):
+ match = pattern.search(url)
+ return match is not None
+ else:
+ url = url.split('://')[-1]
+ pattern = pattern.split('://')[-1].rstrip('*') + '**'
+ return fnmatch(url, pattern)
diff --git a/requests_cache/policy/settings.py b/requests_cache/policy/settings.py
index 8e8e26b..7c4dce8 100644
--- a/requests_cache/policy/settings.py
+++ b/requests_cache/policy/settings.py
@@ -4,7 +4,7 @@ from attr import define, field
from .._utils import get_valid_kwargs
from ..models import RichMixin
-from . import ExpirationTime, FilterCallback, KeyCallback
+from . import ExpirationPattern, ExpirationTime, FilterCallback, KeyCallback
ALL_METHODS = ('GET', 'HEAD', 'OPTIONS', 'POST', 'PUT', 'PATCH', 'DELETE')
DEFAULT_CACHE_NAME = 'http_cache'
@@ -36,7 +36,7 @@ class CacheSettings(RichMixin):
only_if_cached: bool = field(default=False)
stale_if_error: Union[bool, ExpirationTime] = field(default=False)
stale_while_revalidate: Union[bool, ExpirationTime] = field(default=False)
- urls_expire_after: Dict[str, ExpirationTime] = field(factory=dict)
+ urls_expire_after: Dict[ExpirationPattern, ExpirationTime] = field(factory=dict)
@classmethod
def from_kwargs(cls, **kwargs):
diff --git a/tests/unit/policy/test_expiration.py b/tests/unit/policy/test_expiration.py
index 54eb2a2..0a453db 100644
--- a/tests/unit/policy/test_expiration.py
+++ b/tests/unit/policy/test_expiration.py
@@ -1,3 +1,4 @@
+import re
from datetime import datetime, timedelta, timezone
from unittest.mock import patch
@@ -56,6 +57,8 @@ def test_get_expiration_datetime__httpdate():
('http://site_2.com/resource_1/index.html', 60 * 60 * 2),
('http://site_2.com/resource_2/', 60 * 60 * 24),
('http://site_2.com/static/', -1),
+ ('http://site_2.com/api/resource/123', 60 * 60 * 24 * 7),
+ ('http://site_2.com/api/resource/xyz', None),
('http://site_2.com/static/img.jpg', -1),
('site_2.com', None),
('some_other_site.com', None),
@@ -67,6 +70,7 @@ def test_get_url_expiration(url, expected_expire_after, mock_session):
'*.site_1.com': 60 * 60,
'site_2.com/resource_1': 60 * 60 * 2,
'site_2.com/resource_2': 60 * 60 * 24,
+ re.compile(r'site_2\.com/api/resource/\d+'): 60 * 60 * 24 * 7,
'site_2.com/static': -1,
}
assert get_url_expiration(url, urls_expire_after) == expected_expire_after