diff options
-rw-r--r-- | requests_cache/models/__init__.py | 2 | ||||
-rwxr-xr-x | requests_cache/models/response.py | 5 | ||||
-rw-r--r-- | requests_cache/serializers/__init__.py | 2 | ||||
-rw-r--r-- | requests_cache/serializers/cattrs.py | 107 | ||||
-rw-r--r-- | requests_cache/serializers/content_decoder.py | 52 | ||||
-rw-r--r-- | requests_cache/serializers/pipeline.py | 4 | ||||
-rw-r--r-- | tests/integration/base_cache_test.py | 7 |
7 files changed, 71 insertions, 108 deletions
diff --git a/requests_cache/models/__init__.py b/requests_cache/models/__init__.py index 28825bc..93d8279 100644 --- a/requests_cache/models/__init__.py +++ b/requests_cache/models/__init__.py @@ -7,7 +7,7 @@ from requests import PreparedRequest, Request, Response from .base import RichMixin from .raw_response import CachedHTTPResponse from .request import CachedRequest -from .response import CachedResponse, OriginalResponse +from .response import CachedResponse, DecodedContent, OriginalResponse AnyResponse = Union[OriginalResponse, CachedResponse] AnyRequest = Union[Request, PreparedRequest, CachedRequest] diff --git a/requests_cache/models/response.py b/requests_cache/models/response.py index e3acdf9..1149d42 100755 --- a/requests_cache/models/response.py +++ b/requests_cache/models/response.py @@ -3,7 +3,7 @@ from __future__ import annotations from datetime import datetime, timedelta, timezone from logging import getLogger from time import time -from typing import TYPE_CHECKING, List, Optional +from typing import TYPE_CHECKING, Dict, List, Optional, Union import attr from attr import define, field @@ -19,6 +19,7 @@ if TYPE_CHECKING: from ..policy.actions import CacheActions DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S %Z' # Format used for __str__ only +DecodedContent = Union[Dict, str, None] logger = getLogger(__name__) @@ -63,7 +64,7 @@ class CachedResponse(RichMixin, BaseResponse): """A class that emulates :py:class:`requests.Response`, optimized for serialization""" _content: bytes = field(default=None) - _decoded_content: str = field(default=None) + _decoded_content: DecodedContent = field(default=None) _next: Optional[CachedRequest] = field(default=None) cookies: RequestsCookieJar = field(factory=RequestsCookieJar) created_at: datetime = field(factory=datetime.utcnow) diff --git a/requests_cache/serializers/__init__.py b/requests_cache/serializers/__init__.py index 36197e1..6328ea7 100644 --- a/requests_cache/serializers/__init__.py +++ b/requests_cache/serializers/__init__.py @@ -21,7 +21,7 @@ class that raises an ``ImportError`` at initialization time instead of at import # flake8: noqa: F401 from typing import Union -from .cattrs import CattrStage, DecodedBodyStage +from .cattrs import CattrStage from .pipeline import SerializerPipeline, Stage from .preconf import ( bson_document_serializer, diff --git a/requests_cache/serializers/cattrs.py b/requests_cache/serializers/cattrs.py index 6138d42..c124830 100644 --- a/requests_cache/serializers/cattrs.py +++ b/requests_cache/serializers/cattrs.py @@ -21,7 +21,7 @@ from requests.exceptions import JSONDecodeError from requests.structures import CaseInsensitiveDict from urllib3._collections import HTTPHeaderDict -from ..models import CachedResponse +from ..models import CachedResponse, DecodedContent from .pipeline import Stage try: @@ -33,67 +33,37 @@ except ImportError: class CattrStage(Stage): """Base serializer class that does pre/post-processing with ``cattrs``. This can be used either on its own, or as a stage within a :py:class:`.SerializerPipeline`. + + Args: + factory: A callable that returns a ``cattrs`` converter to start from instead of a new + ``GenConverter``. Mainly useful for preconf converters. + decode_content: Save response body in human-readable format, if possible + + Notes on ``decode_content`` option: + + * Response body will be decoded into a human-readable format (if possible) during serialization, + and re-encoded during deserialization to reconstruct the original response. + * Supported Content-Types are ``application/json`` and ``text/*``. All other types will be saved as-is. + * Decoded responses are saved in a separate ``_decoded_content`` attribute, to ensure that + ``_content`` is always binary. """ - def __init__(self, factory: Callable[..., GenConverter] = None, **kwargs): + def __init__( + self, factory: Callable[..., GenConverter] = None, decode_content: bool = True, **kwargs + ): self.converter = init_converter(factory, **kwargs) + self.decode_content = decode_content def dumps(self, value: CachedResponse) -> Dict: if not isinstance(value, CachedResponse): return value - return self.converter.unstructure(value) + response_dict = self.converter.unstructure(value) + return _decode_content(value, response_dict) if self.decode_content else response_dict def loads(self, value: Dict) -> CachedResponse: if not isinstance(value, MutableMapping): return value - return self.converter.structure(value, cl=CachedResponse) - - -class DecodedBodyStage(CattrStage): - """Converter that decodes the response body into a human-readable format (if possible) when - serializing, and re-encodes it to reconstruct the original response. Supported Content-Types - are ``application/json`` and ``text/*``. All other types will be saved as-is. - - Notes: - - * This needs access to the response object for decoding, so this is used _instead_ of - CattrStage, not before/after it. - * Decoded responses are saved in a separate ``_decoded_content`` attribute, to ensure that - ``_content`` is always binary. - """ - - def dumps(self, value: CachedResponse) -> Dict: - response_dict = super().dumps(value) - - # Decode body as JSON - if value.headers.get('Content-Type') == 'application/json': - try: - response_dict['_decoded_content'] = value.json() - response_dict.pop('_content', None) - except JSONDecodeError: - pass - - # Decode body as text - if value.headers.get('Content-Type', '').startswith('text/'): - response_dict['_decoded_content'] = value.text - response_dict.pop('_content', None) - - # Otherwise, it is most likely a binary body - return response_dict - - def loads(self, value: Dict) -> CachedResponse: - # Re-encode JSON and text bodies - if isinstance(value.get('_decoded_content'), dict): - value['_decoded_content'] = json.dumps(value['_decoded_content']) - - if isinstance(value.get('_decoded_content'), str): - response = super().loads(value) - response._content = response._decoded_content.encode('utf-8') - response._decoded_content = '' - response.encoding = 'utf-8' # Set encoding explicitly so requests doesn't have to guess - return response - else: - return super().loads(value) + return _encode_content(self.converter.structure(value, cl=CachedResponse)) def init_converter( @@ -134,6 +104,11 @@ def init_converter( converter.register_unstructure_hook(HTTPHeaderDict, dict) converter.register_structure_hook(HTTPHeaderDict, lambda obj, cls: HTTPHeaderDict(obj)) + # Convert decoded JSON body back to string + converter.register_structure_hook( + DecodedContent, lambda obj, cls: json.dumps(obj) if isinstance(obj, dict) else obj + ) + # Resolve forward references (required for CachedResponse.history) converter.register_unstructure_hook_func( lambda cls: cls.__class__ is ForwardRef, @@ -143,6 +118,7 @@ def init_converter( lambda cls: cls.__class__ is ForwardRef, lambda obj, cls: converter.structure(obj, cls.__forward_value__), ) + return converter @@ -156,6 +132,35 @@ def make_decimal_timedelta_converter(**kwargs) -> GenConverter: return converter +def _decode_content(response: CachedResponse, response_dict: Dict) -> Dict: + """Decode response body into a human-readable format, if possible""" + # Decode body as JSON + if response.headers.get('Content-Type') == 'application/json': + try: + response_dict['_decoded_content'] = response.json() + response_dict.pop('_content', None) + except JSONDecodeError: + pass + + # Decode body as text + if response.headers.get('Content-Type', '').startswith('text/'): + response_dict['_decoded_content'] = response.text + response_dict.pop('_content', None) + + # Otherwise, it is most likely a binary body + return response_dict + + +def _encode_content(response: CachedResponse) -> CachedResponse: + """Re-encode response body if saved as JSON or text; has no effect for a binary response body""" + if isinstance(response._decoded_content, str): + response._content = response._decoded_content.encode('utf-8') + response._decoded_content = None + response.encoding = 'utf-8' # Set encoding explicitly so requests doesn't have to guess + response.headers['Content-Length'] = str(len(response._content)) # Size may have changed + return response + + def _to_datetime(obj, cls) -> datetime: if isinstance(obj, str): obj = datetime.fromisoformat(obj) diff --git a/requests_cache/serializers/content_decoder.py b/requests_cache/serializers/content_decoder.py deleted file mode 100644 index 71bc3fd..0000000 --- a/requests_cache/serializers/content_decoder.py +++ /dev/null @@ -1,52 +0,0 @@ -import json -from typing import Dict - -from requests.exceptions import JSONDecodeError - -from requests_cache.models.response import CachedResponse -from requests_cache.serializers.cattrs import CattrStage - - -class DecodeBodyStage(CattrStage): - """Converter that decodes the response body into a human-readable format when serializing - (if possible), and re-encodes it to reconstruct the original response. Supported Content-Types - are ``application/json`` and ``text/*``. All other types will be saved as-is. - - This needs access to the CachedResponse object for decoding, so this is used _instead_ of - CattrStage, not before/after it. - """ - - def dumps(self, value: CachedResponse) -> Dict: - response_dict = super().dumps(value) - # Decode body as JSON - if value.headers.get('Content-Type') == 'application/json': - try: - response_dict['content'] = value.json() - response_dict.pop('_content', None) - except JSONDecodeError: - pass - - # Decode body as text - if value.headers.get('Content-Type', '').startswith('text/'): - response_dict['content'] = value.text - response_dict.pop('_content', None) - - # Otherwise, it is most likely a binary body - return response_dict - - def loads(self, value: Dict) -> CachedResponse: - if value.get('content'): - value['_content'] = value.pop('content') - value.setdefault('_content', None) - - # Re-encode JSON and text bodies - if isinstance(value['_content'], dict): - value['_content'] = json.dumps(value['_content']) - if isinstance(value['_content'], str): - value['_content'] = value['_content'].encode('utf-8') - response = super().loads(value) - # Since we know the encoding, set that explicitly so requests doesn't have to guess it - response.encoding = 'utf-8' - return response - else: - return super().loads(value) diff --git a/requests_cache/serializers/pipeline.py b/requests_cache/serializers/pipeline.py index 8f4a521..589a36c 100644 --- a/requests_cache/serializers/pipeline.py +++ b/requests_cache/serializers/pipeline.py @@ -33,6 +33,10 @@ class Stage: class SerializerPipeline: """A pipeline of stages chained together to serialize and deserialize response objects. + Note: Typically, the first stage should be a :py:class:`.CattrStage`, since this does the + majority of the non-format-specific work to unstructure a response object into a dict (and + vice versa). + Args: stages: A sequence of :py:class:`Stage` objects, or any objects with ``dumps()`` and ``loads()`` methods diff --git a/tests/integration/base_cache_test.py b/tests/integration/base_cache_test.py index c77885e..0b93a8c 100644 --- a/tests/integration/base_cache_test.py +++ b/tests/integration/base_cache_test.py @@ -108,7 +108,12 @@ class BaseCacheTest: r2 = session.get(httpbin(response_format)) assert r1.from_cache is False assert r2.from_cache is True - assert r1.content == r2.content + + # For JSON responses, variations like whitespace won't be preserved + if r1.text.startswith('{'): + assert r1.json() == r2.json() + else: + assert r1.content == r2.content def test_response_no_duplicate_read(self): """Ensure that response data is read only once per request, whether it's cached or not""" |