diff options
author | Jordan Cook <jordan.cook@pioneer.com> | 2021-08-14 21:19:17 -0500 |
---|---|---|
committer | Jordan Cook <jordan.cook@pioneer.com> | 2021-08-14 21:58:30 -0500 |
commit | f7754797b120c0166705230e7f05a48cb8976fbb (patch) | |
tree | 80c98fbe7f596539f562b7669e00338e11ff68b3 | |
parent | 7f6389747924c33999eb1a0dc9184dd5511a0e35 (diff) | |
download | requests-cache-f7754797b120c0166705230e7f05a48cb8976fbb.tar.gz |
Add docs + example script for custom cache keys
-rw-r--r-- | HISTORY.md | 1 | ||||
-rw-r--r-- | docs/advanced_usage.md | 40 | ||||
-rw-r--r-- | docs/examples.md | 13 | ||||
-rw-r--r-- | examples/custom_cache_keys.py | 64 |
4 files changed, 117 insertions, 1 deletions
@@ -7,6 +7,7 @@ (e.g., request headers manually set by the client) * Use `cattrs` for serialization by default, which enables a more forwards-compatible serialization format (e.g., less prone to invalidation due to future updates) +* Add support for custom cache key callbacks * Drop support for python 3.6 * Note: Any bugfixes for 0.8.x that also apply to 0.7.x will be backported * Remove deprecated `core` module diff --git a/docs/advanced_usage.md b/docs/advanced_usage.md index 971af2e..48fce2f 100644 --- a/docs/advanced_usage.md +++ b/docs/advanced_usage.md @@ -95,7 +95,7 @@ to both new responses (on write) and previously cached responses (on read): >>> from sys import getsizeof >>> from requests_cache import CachedSession ->>> def filter_by_size(response): +>>> def filter_by_size(response: Response) -> bool: >>> """Don't cache responses with a body over 1 MB""" >>> return getsizeof(response.content) <= 1024 * 1024 @@ -103,6 +103,44 @@ to both new responses (on write) and previously cached responses (on read): ``` ::: +## Custom Cache Keys +A cache key is a hash or other value based on request details that identifies a response in the cache. +This determines a cached response's uniqueness. For example, the option `ignored_parameters=['foo']` +will exclude the `foo` request parameter from the cache key, meaning these three requests will all +use the same cached response: +```python +>>> session = CachedSession(ignored_parameters=['foo']) +>>> response = session.get('https://example.com') # cache miss +>>> response = session.get('https://example.com?foo=bar') # cache hit +>>> response = session.get('https://example.com?foo=qux') # cache hit +``` + +If you want more control over this behavior, you can provide your own function to generate cache keys, +which will take a {py:class}`~requests.PreparedRequest` plus optional keyword args, and return a string: +```python +def create_key(request: requests.PreparedRequest, **kwargs) -> str: + """Generate a custom cache key for the given request""" +``` +`**kwargs` includes key-related {py:class}`.BaseCache` settings, and any other keyword args passed +to {py:meth}`.CachedSession.send()`. +See {py:func}`.create_key` for the reference implementation, and see the rest of the {py:mod}`.cache_keys` +module for some potentially useful helper functions. + +You can then pass this function via the `key_fn` param: +```python +session = CachedSession(key_fn=create_key) +``` + +```{tip} +Generally, if you include less info in your cache keys, you will have more cache hits and use less +storage space, but risk getting incorrect response data back. For example, if you exclude all request +parameters, you will get the same cached response back for any combination of request parameters. +``` +```{warning} +If you provide a custom key function for a non-empty cache, any responses previously cached with a +different key function will likely be unused. +``` + ## Custom Backends If the built-in {py:mod}`Cache Backends <requests_cache.backends>` don't suit your needs, you can create your own by making subclasses of {py:class}`.BaseCache` and {py:class}`.BaseStorage`: diff --git a/docs/examples.md b/docs/examples.md index a014d3e..74fc68b 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -81,3 +81,16 @@ These can also be found in the :lines: 1,6- ``` ::: + +## Custom cache key function +```{include} ../examples/custom_cache_keys.py +:start-line: 2 +:end-line: 15 +``` + +:::{admonition} Example code +:class: toggle +```{literalinclude} ../examples/custom_cache_keys.py +:lines: 1,17- +``` +::: diff --git a/examples/custom_cache_keys.py b/examples/custom_cache_keys.py new file mode 100644 index 0000000..5ba6dc0 --- /dev/null +++ b/examples/custom_cache_keys.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +""" +Example of a custom cache key function that caches a new response if the version of requests-cache, +requests, or urllib3 changes. + +This generally isn't needed, since anything that causes a deserialization error will simply result +in a new request being sent and cached. But you might want to include a library version in your cache +key if, for example, you suspect a change in the library does not cause errors but **results in +different response content**. + +This uses info from {py:func}`requests.help.info`. You can also preview this info from the command +line to see what else is available: +```bash +python -m requests.help +``` +""" +from hashlib import sha256 +from unittest.mock import patch + +from requests import PreparedRequest +from requests.help import info as get_requests_info + +import requests_cache +from requests_cache import CachedSession +from requests_cache.cache_keys import create_key + + +def create_custom_key(request: PreparedRequest, **kwargs) -> str: + """Make a custom cache key that includes library versions""" + # Start with the default key created by requests-cache + base_key = create_key(request, **kwargs) + key = sha256() + key.update(base_key.encode('utf-8')) + + # Add versions of requests-cache, requests, and urllib3 + requests_info = get_requests_info() + for lib in ['requests', 'urllib3']: + key.update(requests_info[lib]['version'].encode('utf-8')) + key.update(requests_cache.__version__.encode('utf-8')) + + return key.hexdigest() + + +def test_cache_key(): + """Test that the custom cache keys are working as expected""" + session = CachedSession('key-test', key_fn=create_custom_key) + session.cache.clear() + session.get('https://httpbin.org/get') + response = session.get('https://httpbin.org/get') + assert response.from_cache is True + + # Simulate a major version change + new_versions = { + 'requests': {'version': '3.0.0'}, + 'urllib3': {'version': '2.0.0'}, + } + with patch('__main__.get_requests_info', return_value=new_versions): + # A new request will be sent since the cache key no longer matches + response = session.get('https://httpbin.org/get') + assert response.from_cache is False + + +if __name__ == '__main__': + test_cache_key() |