summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan Cook <jordan.cook.git@proton.me>2022-09-29 11:47:49 -0500
committerJordan Cook <jordan.cook.git@proton.me>2022-09-29 12:05:13 -0500
commit5204d487fea1d0b89e9eecdbc4a38621d2d4dbe4 (patch)
tree5a7683e4cb01359a151742223c4d3a4301d75f50
parent8af0552b3f1011b5c004d345da4d94e48a40e4fe (diff)
downloadrequests-cache-5204d487fea1d0b89e9eecdbc4a38621d2d4dbe4.tar.gz
Match whether ignored_parameters are present in a request (without matching content)
-rw-r--r--HISTORY.md8
-rw-r--r--requests_cache/cache_keys.py21
-rw-r--r--tests/integration/base_cache_test.py24
-rw-r--r--tests/unit/test_cache_keys.py2
-rw-r--r--tests/unit/test_session.py33
5 files changed, 57 insertions, 31 deletions
diff --git a/HISTORY.md b/HISTORY.md
index c0bae25..919c1fc 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -54,8 +54,12 @@
**Request matching & filtering:**
* Add serializer name to cache keys to avoid errors due to switching serializers
* Always skip both cache read and write for requests excluded by `allowable_methods` (previously only skipped write)
-* Ignore and redact common authentication headers and request parameters by default. This provides some default recommended values for `ignored_parameters`, to avoid accidentally storing common credentials (e.g., OAuth tokens) in the cache. This will have no effect if you are already setting `ignored_parameters`.
-* Support distinct matching for requests that differ by duplicate request params (e.g, `a=1` vs `?a=1&a=2`)
+* Ignore and redact common authentication headers and request parameters by default. This provides
+ some default recommended values for `ignored_parameters`, to avoid accidentally storing common
+ credentials in the cache. This will have no effect if `ignored_parameters` is already set.
+* Support distinct matching for requests that differ only by a parameter in `ignored_parameters`
+ (e.g., for a request sent both with and without authentication)
+* Support distinct matching for requests that differ only by duplicate request params (e.g, `a=1` vs `?a=1&a=2`)
**Cache convenience methods:**
* Add `expired` and `invalid` arguments to `BaseCache.delete()` (to replace `remove_expired_responses()`)
diff --git a/requests_cache/cache_keys.py b/requests_cache/cache_keys.py
index 1aeed7a..fa60619 100644
--- a/requests_cache/cache_keys.py
+++ b/requests_cache/cache_keys.py
@@ -40,7 +40,7 @@ __all__ = [
if TYPE_CHECKING:
from .models import AnyPreparedRequest, AnyRequest, CachedResponse
-# Maximum JSON request body size that will be normalized
+# Maximum JSON request body size that will be filtered and normalized
MAX_NORM_BODY_SIZE = 10 * 1024 * 1024
KVList = List[Tuple[str, str]]
@@ -160,15 +160,16 @@ def normalize_url(url: str, ignored_parameters: ParamList) -> str:
def normalize_body(request: AnyPreparedRequest, ignored_parameters: ParamList) -> bytes:
"""Normalize and filter a request body if possible, depending on Content-Type"""
- original_body = request.body or b''
+ if not request.body:
+ return b''
content_type = request.headers.get('Content-Type')
# Filter and sort params if possible
- filtered_body: Union[str, bytes] = original_body
+ filtered_body: Union[str, bytes] = request.body
if content_type == 'application/json':
- filtered_body = normalize_json_body(original_body, ignored_parameters)
+ filtered_body = normalize_json_body(request.body, ignored_parameters)
elif content_type == 'application/x-www-form-urlencoded':
- filtered_body = normalize_params(original_body, ignored_parameters)
+ filtered_body = normalize_params(request.body, ignored_parameters)
return encode(filtered_body)
@@ -224,14 +225,18 @@ def filter_sort_json(data: Union[List, Mapping], ignored_parameters: ParamList):
def filter_sort_dict(
data: Mapping[str, str], ignored_parameters: ParamList = None
) -> Dict[str, str]:
- return {k: v for k, v in sorted(data.items()) if k not in set(ignored_parameters or [])}
+ # Note: Any ignored_parameters present will have their values replaced instead of removing the
+ # parameter, so the cache key will still match whether the parameter was present or not.
+ ignored_parameters = set(ignored_parameters or [])
+ return {k: ('REDACTED' if k in ignored_parameters else v) for k, v in sorted(data.items())}
def filter_sort_multidict(data: KVList, ignored_parameters: ParamList = None) -> KVList:
- return [(k, v) for k, v in sorted(data) if k not in set(ignored_parameters or [])]
+ ignored_parameters = set(ignored_parameters or [])
+ return [(k, 'REDACTED' if k in ignored_parameters else v) for k, v in sorted(data)]
def filter_sort_list(data: List, ignored_parameters: ParamList = None) -> List:
if not ignored_parameters:
return sorted(data)
- return [k for k in sorted(data) if k not in set(ignored_parameters or [])]
+ return [k for k in sorted(data) if k not in set(ignored_parameters)]
diff --git a/tests/integration/base_cache_test.py b/tests/integration/base_cache_test.py
index 5d69ea8..546f6c5 100644
--- a/tests/integration/base_cache_test.py
+++ b/tests/integration/base_cache_test.py
@@ -313,7 +313,7 @@ class BaseCacheTest:
assert response.from_cache is False
response = session.request(method, url, headers={"Authorization": "<Secret Key>"})
assert response.from_cache is True
- assert response.request.headers.get('Authorization') is None
+ assert response.request.headers.get('Authorization') == 'REDACTED'
@pytest.mark.parametrize('method', HTTPBIN_METHODS)
def test_filter_request_query_parameters(self, method):
@@ -325,23 +325,25 @@ class BaseCacheTest:
assert response.from_cache is True
query = urlparse(response.request.url).query
query_dict = parse_qs(query)
- assert 'api_key' not in query_dict
+ assert query_dict['api_key'] == ['REDACTED']
@pytest.mark.parametrize('post_type', ['data', 'json'])
def test_filter_request_post_data(self, post_type):
method = 'POST'
url = httpbin(method.lower())
+ body = {"api_key": "<Secret Key>"}
+ headers = {}
+ if post_type == 'data':
+ body = json.dumps(body)
+ headers = {'Content-Type': 'application/json'}
session = self.init_session(ignored_parameters=['api_key'])
- response = session.request(method, url, **{post_type: {"api_key": "<Secret Key>"}})
- assert response.from_cache is False
- response = session.request(method, url, **{post_type: {"api_key": "<Secret Key>"}})
+
+ response = session.request(method, url, headers=headers, **{post_type: body})
+ response = session.request(method, url, headers=headers, **{post_type: body})
assert response.from_cache is True
- if post_type == 'data':
- body = parse_qs(response.request.body)
- assert "api_key" not in body
- elif post_type == 'json':
- body = json.loads(response.request.body)
- assert "api_key" not in body
+
+ parsed_body = json.loads(response.request.body)
+ assert parsed_body['api_key'] == 'REDACTED'
@pytest.mark.parametrize('executor_class', [ThreadPoolExecutor, ProcessPoolExecutor])
@pytest.mark.parametrize('iteration', range(N_ITERATIONS))
diff --git a/tests/unit/test_cache_keys.py b/tests/unit/test_cache_keys.py
index cb76f52..40cca7f 100644
--- a/tests/unit/test_cache_keys.py
+++ b/tests/unit/test_cache_keys.py
@@ -69,7 +69,7 @@ def test_normalize_request__json_body():
headers={'Content-Type': 'application/json'},
)
norm_request = normalize_request(request, ignored_parameters=['param_2'])
- assert norm_request.body == b'{"param_1": "value_1"}'
+ assert norm_request.body == b'{"param_1": "value_1", "param_2": "REDACTED"}'
def test_normalize_request__json_body_list():
diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py
index 5151a7d..dda449a 100644
--- a/tests/unit/test_session.py
+++ b/tests/unit/test_session.py
@@ -143,10 +143,18 @@ def test_all_methods__ignored_parameters__redacted(field, method, mock_session):
mock_session.request(method, MOCKED_URL, **{field: params_1})
cached_response = mock_session.request(method, MOCKED_URL, **{field: params_1})
- assert 'ignored' not in cached_response.url
- assert 'ignored' not in cached_response.request.url
- assert 'ignored' not in cached_response.request.headers
- assert 'ignored' not in cached_response.request.body.decode('utf-8')
+ request_url = cached_response.request.url
+ headers = cached_response.request.headers
+ body = cached_response.request.body.decode('utf-8')
+
+ assert 'ignored' not in cached_response.url or 'ignored=REDACTED' in cached_response.url
+ assert 'ignored' not in request_url or 'ignored=REDACTED' in request_url
+ assert 'ignored' not in headers or headers['ignored'] == 'REDACTED'
+ if field == 'data':
+ assert 'ignored=REDACTED' in body
+ elif field == 'json':
+ body = json.loads(body)
+ assert body['ignored'] == 'REDACTED'
# Variations of relevant request arguments
@@ -502,12 +510,19 @@ def test_default_ignored_parameters(mock_session):
params={'access_token': 'token'},
headers={'Authorization': 'Bearer token'},
)
- response = mock_session.get(MOCKED_URL)
-
+ response = mock_session.get(
+ MOCKED_URL,
+ params={'access_token': 'token'},
+ headers={'Authorization': 'Bearer token'},
+ )
assert response.from_cache is True
- assert 'access_token' not in response.url
- assert 'access_token' not in response.request.url
- assert 'Authorization' not in response.request.headers
+
+ unauthenticated_response = mock_session.get(MOCKED_URL)
+ assert unauthenticated_response.from_cache is False
+
+ assert 'access_token=REDACTED' in response.url
+ assert 'access_token=REDACTED' in response.request.url
+ assert response.request.headers['Authorization'] == 'REDACTED'
@patch_normalize_url