diff options
author | Jordan Cook <jordan.cook.git@proton.me> | 2022-09-29 11:47:49 -0500 |
---|---|---|
committer | Jordan Cook <jordan.cook.git@proton.me> | 2022-09-29 12:05:13 -0500 |
commit | 5204d487fea1d0b89e9eecdbc4a38621d2d4dbe4 (patch) | |
tree | 5a7683e4cb01359a151742223c4d3a4301d75f50 | |
parent | 8af0552b3f1011b5c004d345da4d94e48a40e4fe (diff) | |
download | requests-cache-5204d487fea1d0b89e9eecdbc4a38621d2d4dbe4.tar.gz |
Match whether ignored_parameters are present in a request (without matching content)
-rw-r--r-- | HISTORY.md | 8 | ||||
-rw-r--r-- | requests_cache/cache_keys.py | 21 | ||||
-rw-r--r-- | tests/integration/base_cache_test.py | 24 | ||||
-rw-r--r-- | tests/unit/test_cache_keys.py | 2 | ||||
-rw-r--r-- | tests/unit/test_session.py | 33 |
5 files changed, 57 insertions, 31 deletions
@@ -54,8 +54,12 @@ **Request matching & filtering:** * Add serializer name to cache keys to avoid errors due to switching serializers * Always skip both cache read and write for requests excluded by `allowable_methods` (previously only skipped write) -* Ignore and redact common authentication headers and request parameters by default. This provides some default recommended values for `ignored_parameters`, to avoid accidentally storing common credentials (e.g., OAuth tokens) in the cache. This will have no effect if you are already setting `ignored_parameters`. -* Support distinct matching for requests that differ by duplicate request params (e.g, `a=1` vs `?a=1&a=2`) +* Ignore and redact common authentication headers and request parameters by default. This provides + some default recommended values for `ignored_parameters`, to avoid accidentally storing common + credentials in the cache. This will have no effect if `ignored_parameters` is already set. +* Support distinct matching for requests that differ only by a parameter in `ignored_parameters` + (e.g., for a request sent both with and without authentication) +* Support distinct matching for requests that differ only by duplicate request params (e.g, `a=1` vs `?a=1&a=2`) **Cache convenience methods:** * Add `expired` and `invalid` arguments to `BaseCache.delete()` (to replace `remove_expired_responses()`) diff --git a/requests_cache/cache_keys.py b/requests_cache/cache_keys.py index 1aeed7a..fa60619 100644 --- a/requests_cache/cache_keys.py +++ b/requests_cache/cache_keys.py @@ -40,7 +40,7 @@ __all__ = [ if TYPE_CHECKING: from .models import AnyPreparedRequest, AnyRequest, CachedResponse -# Maximum JSON request body size that will be normalized +# Maximum JSON request body size that will be filtered and normalized MAX_NORM_BODY_SIZE = 10 * 1024 * 1024 KVList = List[Tuple[str, str]] @@ -160,15 +160,16 @@ def normalize_url(url: str, ignored_parameters: ParamList) -> str: def normalize_body(request: AnyPreparedRequest, ignored_parameters: ParamList) -> bytes: """Normalize and filter a request body if possible, depending on Content-Type""" - original_body = request.body or b'' + if not request.body: + return b'' content_type = request.headers.get('Content-Type') # Filter and sort params if possible - filtered_body: Union[str, bytes] = original_body + filtered_body: Union[str, bytes] = request.body if content_type == 'application/json': - filtered_body = normalize_json_body(original_body, ignored_parameters) + filtered_body = normalize_json_body(request.body, ignored_parameters) elif content_type == 'application/x-www-form-urlencoded': - filtered_body = normalize_params(original_body, ignored_parameters) + filtered_body = normalize_params(request.body, ignored_parameters) return encode(filtered_body) @@ -224,14 +225,18 @@ def filter_sort_json(data: Union[List, Mapping], ignored_parameters: ParamList): def filter_sort_dict( data: Mapping[str, str], ignored_parameters: ParamList = None ) -> Dict[str, str]: - return {k: v for k, v in sorted(data.items()) if k not in set(ignored_parameters or [])} + # Note: Any ignored_parameters present will have their values replaced instead of removing the + # parameter, so the cache key will still match whether the parameter was present or not. + ignored_parameters = set(ignored_parameters or []) + return {k: ('REDACTED' if k in ignored_parameters else v) for k, v in sorted(data.items())} def filter_sort_multidict(data: KVList, ignored_parameters: ParamList = None) -> KVList: - return [(k, v) for k, v in sorted(data) if k not in set(ignored_parameters or [])] + ignored_parameters = set(ignored_parameters or []) + return [(k, 'REDACTED' if k in ignored_parameters else v) for k, v in sorted(data)] def filter_sort_list(data: List, ignored_parameters: ParamList = None) -> List: if not ignored_parameters: return sorted(data) - return [k for k in sorted(data) if k not in set(ignored_parameters or [])] + return [k for k in sorted(data) if k not in set(ignored_parameters)] diff --git a/tests/integration/base_cache_test.py b/tests/integration/base_cache_test.py index 5d69ea8..546f6c5 100644 --- a/tests/integration/base_cache_test.py +++ b/tests/integration/base_cache_test.py @@ -313,7 +313,7 @@ class BaseCacheTest: assert response.from_cache is False response = session.request(method, url, headers={"Authorization": "<Secret Key>"}) assert response.from_cache is True - assert response.request.headers.get('Authorization') is None + assert response.request.headers.get('Authorization') == 'REDACTED' @pytest.mark.parametrize('method', HTTPBIN_METHODS) def test_filter_request_query_parameters(self, method): @@ -325,23 +325,25 @@ class BaseCacheTest: assert response.from_cache is True query = urlparse(response.request.url).query query_dict = parse_qs(query) - assert 'api_key' not in query_dict + assert query_dict['api_key'] == ['REDACTED'] @pytest.mark.parametrize('post_type', ['data', 'json']) def test_filter_request_post_data(self, post_type): method = 'POST' url = httpbin(method.lower()) + body = {"api_key": "<Secret Key>"} + headers = {} + if post_type == 'data': + body = json.dumps(body) + headers = {'Content-Type': 'application/json'} session = self.init_session(ignored_parameters=['api_key']) - response = session.request(method, url, **{post_type: {"api_key": "<Secret Key>"}}) - assert response.from_cache is False - response = session.request(method, url, **{post_type: {"api_key": "<Secret Key>"}}) + + response = session.request(method, url, headers=headers, **{post_type: body}) + response = session.request(method, url, headers=headers, **{post_type: body}) assert response.from_cache is True - if post_type == 'data': - body = parse_qs(response.request.body) - assert "api_key" not in body - elif post_type == 'json': - body = json.loads(response.request.body) - assert "api_key" not in body + + parsed_body = json.loads(response.request.body) + assert parsed_body['api_key'] == 'REDACTED' @pytest.mark.parametrize('executor_class', [ThreadPoolExecutor, ProcessPoolExecutor]) @pytest.mark.parametrize('iteration', range(N_ITERATIONS)) diff --git a/tests/unit/test_cache_keys.py b/tests/unit/test_cache_keys.py index cb76f52..40cca7f 100644 --- a/tests/unit/test_cache_keys.py +++ b/tests/unit/test_cache_keys.py @@ -69,7 +69,7 @@ def test_normalize_request__json_body(): headers={'Content-Type': 'application/json'}, ) norm_request = normalize_request(request, ignored_parameters=['param_2']) - assert norm_request.body == b'{"param_1": "value_1"}' + assert norm_request.body == b'{"param_1": "value_1", "param_2": "REDACTED"}' def test_normalize_request__json_body_list(): diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index 5151a7d..dda449a 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -143,10 +143,18 @@ def test_all_methods__ignored_parameters__redacted(field, method, mock_session): mock_session.request(method, MOCKED_URL, **{field: params_1}) cached_response = mock_session.request(method, MOCKED_URL, **{field: params_1}) - assert 'ignored' not in cached_response.url - assert 'ignored' not in cached_response.request.url - assert 'ignored' not in cached_response.request.headers - assert 'ignored' not in cached_response.request.body.decode('utf-8') + request_url = cached_response.request.url + headers = cached_response.request.headers + body = cached_response.request.body.decode('utf-8') + + assert 'ignored' not in cached_response.url or 'ignored=REDACTED' in cached_response.url + assert 'ignored' not in request_url or 'ignored=REDACTED' in request_url + assert 'ignored' not in headers or headers['ignored'] == 'REDACTED' + if field == 'data': + assert 'ignored=REDACTED' in body + elif field == 'json': + body = json.loads(body) + assert body['ignored'] == 'REDACTED' # Variations of relevant request arguments @@ -502,12 +510,19 @@ def test_default_ignored_parameters(mock_session): params={'access_token': 'token'}, headers={'Authorization': 'Bearer token'}, ) - response = mock_session.get(MOCKED_URL) - + response = mock_session.get( + MOCKED_URL, + params={'access_token': 'token'}, + headers={'Authorization': 'Bearer token'}, + ) assert response.from_cache is True - assert 'access_token' not in response.url - assert 'access_token' not in response.request.url - assert 'Authorization' not in response.request.headers + + unauthenticated_response = mock_session.get(MOCKED_URL) + assert unauthenticated_response.from_cache is False + + assert 'access_token=REDACTED' in response.url + assert 'access_token=REDACTED' in response.request.url + assert response.request.headers['Authorization'] == 'REDACTED' @patch_normalize_url |