summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--requests_cache/models/__init__.py2
-rwxr-xr-xrequests_cache/models/response.py5
-rw-r--r--requests_cache/serializers/__init__.py2
-rw-r--r--requests_cache/serializers/cattrs.py107
-rw-r--r--requests_cache/serializers/content_decoder.py52
-rw-r--r--requests_cache/serializers/pipeline.py4
-rw-r--r--tests/integration/base_cache_test.py7
7 files changed, 71 insertions, 108 deletions
diff --git a/requests_cache/models/__init__.py b/requests_cache/models/__init__.py
index 28825bc..93d8279 100644
--- a/requests_cache/models/__init__.py
+++ b/requests_cache/models/__init__.py
@@ -7,7 +7,7 @@ from requests import PreparedRequest, Request, Response
from .base import RichMixin
from .raw_response import CachedHTTPResponse
from .request import CachedRequest
-from .response import CachedResponse, OriginalResponse
+from .response import CachedResponse, DecodedContent, OriginalResponse
AnyResponse = Union[OriginalResponse, CachedResponse]
AnyRequest = Union[Request, PreparedRequest, CachedRequest]
diff --git a/requests_cache/models/response.py b/requests_cache/models/response.py
index e3acdf9..1149d42 100755
--- a/requests_cache/models/response.py
+++ b/requests_cache/models/response.py
@@ -3,7 +3,7 @@ from __future__ import annotations
from datetime import datetime, timedelta, timezone
from logging import getLogger
from time import time
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, Dict, List, Optional, Union
import attr
from attr import define, field
@@ -19,6 +19,7 @@ if TYPE_CHECKING:
from ..policy.actions import CacheActions
DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S %Z' # Format used for __str__ only
+DecodedContent = Union[Dict, str, None]
logger = getLogger(__name__)
@@ -63,7 +64,7 @@ class CachedResponse(RichMixin, BaseResponse):
"""A class that emulates :py:class:`requests.Response`, optimized for serialization"""
_content: bytes = field(default=None)
- _decoded_content: str = field(default=None)
+ _decoded_content: DecodedContent = field(default=None)
_next: Optional[CachedRequest] = field(default=None)
cookies: RequestsCookieJar = field(factory=RequestsCookieJar)
created_at: datetime = field(factory=datetime.utcnow)
diff --git a/requests_cache/serializers/__init__.py b/requests_cache/serializers/__init__.py
index 36197e1..6328ea7 100644
--- a/requests_cache/serializers/__init__.py
+++ b/requests_cache/serializers/__init__.py
@@ -21,7 +21,7 @@ class that raises an ``ImportError`` at initialization time instead of at import
# flake8: noqa: F401
from typing import Union
-from .cattrs import CattrStage, DecodedBodyStage
+from .cattrs import CattrStage
from .pipeline import SerializerPipeline, Stage
from .preconf import (
bson_document_serializer,
diff --git a/requests_cache/serializers/cattrs.py b/requests_cache/serializers/cattrs.py
index 6138d42..c124830 100644
--- a/requests_cache/serializers/cattrs.py
+++ b/requests_cache/serializers/cattrs.py
@@ -21,7 +21,7 @@ from requests.exceptions import JSONDecodeError
from requests.structures import CaseInsensitiveDict
from urllib3._collections import HTTPHeaderDict
-from ..models import CachedResponse
+from ..models import CachedResponse, DecodedContent
from .pipeline import Stage
try:
@@ -33,67 +33,37 @@ except ImportError:
class CattrStage(Stage):
"""Base serializer class that does pre/post-processing with ``cattrs``. This can be used either
on its own, or as a stage within a :py:class:`.SerializerPipeline`.
+
+ Args:
+ factory: A callable that returns a ``cattrs`` converter to start from instead of a new
+ ``GenConverter``. Mainly useful for preconf converters.
+ decode_content: Save response body in human-readable format, if possible
+
+ Notes on ``decode_content`` option:
+
+ * Response body will be decoded into a human-readable format (if possible) during serialization,
+ and re-encoded during deserialization to reconstruct the original response.
+ * Supported Content-Types are ``application/json`` and ``text/*``. All other types will be saved as-is.
+ * Decoded responses are saved in a separate ``_decoded_content`` attribute, to ensure that
+ ``_content`` is always binary.
"""
- def __init__(self, factory: Callable[..., GenConverter] = None, **kwargs):
+ def __init__(
+ self, factory: Callable[..., GenConverter] = None, decode_content: bool = True, **kwargs
+ ):
self.converter = init_converter(factory, **kwargs)
+ self.decode_content = decode_content
def dumps(self, value: CachedResponse) -> Dict:
if not isinstance(value, CachedResponse):
return value
- return self.converter.unstructure(value)
+ response_dict = self.converter.unstructure(value)
+ return _decode_content(value, response_dict) if self.decode_content else response_dict
def loads(self, value: Dict) -> CachedResponse:
if not isinstance(value, MutableMapping):
return value
- return self.converter.structure(value, cl=CachedResponse)
-
-
-class DecodedBodyStage(CattrStage):
- """Converter that decodes the response body into a human-readable format (if possible) when
- serializing, and re-encodes it to reconstruct the original response. Supported Content-Types
- are ``application/json`` and ``text/*``. All other types will be saved as-is.
-
- Notes:
-
- * This needs access to the response object for decoding, so this is used _instead_ of
- CattrStage, not before/after it.
- * Decoded responses are saved in a separate ``_decoded_content`` attribute, to ensure that
- ``_content`` is always binary.
- """
-
- def dumps(self, value: CachedResponse) -> Dict:
- response_dict = super().dumps(value)
-
- # Decode body as JSON
- if value.headers.get('Content-Type') == 'application/json':
- try:
- response_dict['_decoded_content'] = value.json()
- response_dict.pop('_content', None)
- except JSONDecodeError:
- pass
-
- # Decode body as text
- if value.headers.get('Content-Type', '').startswith('text/'):
- response_dict['_decoded_content'] = value.text
- response_dict.pop('_content', None)
-
- # Otherwise, it is most likely a binary body
- return response_dict
-
- def loads(self, value: Dict) -> CachedResponse:
- # Re-encode JSON and text bodies
- if isinstance(value.get('_decoded_content'), dict):
- value['_decoded_content'] = json.dumps(value['_decoded_content'])
-
- if isinstance(value.get('_decoded_content'), str):
- response = super().loads(value)
- response._content = response._decoded_content.encode('utf-8')
- response._decoded_content = ''
- response.encoding = 'utf-8' # Set encoding explicitly so requests doesn't have to guess
- return response
- else:
- return super().loads(value)
+ return _encode_content(self.converter.structure(value, cl=CachedResponse))
def init_converter(
@@ -134,6 +104,11 @@ def init_converter(
converter.register_unstructure_hook(HTTPHeaderDict, dict)
converter.register_structure_hook(HTTPHeaderDict, lambda obj, cls: HTTPHeaderDict(obj))
+ # Convert decoded JSON body back to string
+ converter.register_structure_hook(
+ DecodedContent, lambda obj, cls: json.dumps(obj) if isinstance(obj, dict) else obj
+ )
+
# Resolve forward references (required for CachedResponse.history)
converter.register_unstructure_hook_func(
lambda cls: cls.__class__ is ForwardRef,
@@ -143,6 +118,7 @@ def init_converter(
lambda cls: cls.__class__ is ForwardRef,
lambda obj, cls: converter.structure(obj, cls.__forward_value__),
)
+
return converter
@@ -156,6 +132,35 @@ def make_decimal_timedelta_converter(**kwargs) -> GenConverter:
return converter
+def _decode_content(response: CachedResponse, response_dict: Dict) -> Dict:
+ """Decode response body into a human-readable format, if possible"""
+ # Decode body as JSON
+ if response.headers.get('Content-Type') == 'application/json':
+ try:
+ response_dict['_decoded_content'] = response.json()
+ response_dict.pop('_content', None)
+ except JSONDecodeError:
+ pass
+
+ # Decode body as text
+ if response.headers.get('Content-Type', '').startswith('text/'):
+ response_dict['_decoded_content'] = response.text
+ response_dict.pop('_content', None)
+
+ # Otherwise, it is most likely a binary body
+ return response_dict
+
+
+def _encode_content(response: CachedResponse) -> CachedResponse:
+ """Re-encode response body if saved as JSON or text; has no effect for a binary response body"""
+ if isinstance(response._decoded_content, str):
+ response._content = response._decoded_content.encode('utf-8')
+ response._decoded_content = None
+ response.encoding = 'utf-8' # Set encoding explicitly so requests doesn't have to guess
+ response.headers['Content-Length'] = str(len(response._content)) # Size may have changed
+ return response
+
+
def _to_datetime(obj, cls) -> datetime:
if isinstance(obj, str):
obj = datetime.fromisoformat(obj)
diff --git a/requests_cache/serializers/content_decoder.py b/requests_cache/serializers/content_decoder.py
deleted file mode 100644
index 71bc3fd..0000000
--- a/requests_cache/serializers/content_decoder.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import json
-from typing import Dict
-
-from requests.exceptions import JSONDecodeError
-
-from requests_cache.models.response import CachedResponse
-from requests_cache.serializers.cattrs import CattrStage
-
-
-class DecodeBodyStage(CattrStage):
- """Converter that decodes the response body into a human-readable format when serializing
- (if possible), and re-encodes it to reconstruct the original response. Supported Content-Types
- are ``application/json`` and ``text/*``. All other types will be saved as-is.
-
- This needs access to the CachedResponse object for decoding, so this is used _instead_ of
- CattrStage, not before/after it.
- """
-
- def dumps(self, value: CachedResponse) -> Dict:
- response_dict = super().dumps(value)
- # Decode body as JSON
- if value.headers.get('Content-Type') == 'application/json':
- try:
- response_dict['content'] = value.json()
- response_dict.pop('_content', None)
- except JSONDecodeError:
- pass
-
- # Decode body as text
- if value.headers.get('Content-Type', '').startswith('text/'):
- response_dict['content'] = value.text
- response_dict.pop('_content', None)
-
- # Otherwise, it is most likely a binary body
- return response_dict
-
- def loads(self, value: Dict) -> CachedResponse:
- if value.get('content'):
- value['_content'] = value.pop('content')
- value.setdefault('_content', None)
-
- # Re-encode JSON and text bodies
- if isinstance(value['_content'], dict):
- value['_content'] = json.dumps(value['_content'])
- if isinstance(value['_content'], str):
- value['_content'] = value['_content'].encode('utf-8')
- response = super().loads(value)
- # Since we know the encoding, set that explicitly so requests doesn't have to guess it
- response.encoding = 'utf-8'
- return response
- else:
- return super().loads(value)
diff --git a/requests_cache/serializers/pipeline.py b/requests_cache/serializers/pipeline.py
index 8f4a521..589a36c 100644
--- a/requests_cache/serializers/pipeline.py
+++ b/requests_cache/serializers/pipeline.py
@@ -33,6 +33,10 @@ class Stage:
class SerializerPipeline:
"""A pipeline of stages chained together to serialize and deserialize response objects.
+ Note: Typically, the first stage should be a :py:class:`.CattrStage`, since this does the
+ majority of the non-format-specific work to unstructure a response object into a dict (and
+ vice versa).
+
Args:
stages: A sequence of :py:class:`Stage` objects, or any objects with ``dumps()`` and
``loads()`` methods
diff --git a/tests/integration/base_cache_test.py b/tests/integration/base_cache_test.py
index c77885e..0b93a8c 100644
--- a/tests/integration/base_cache_test.py
+++ b/tests/integration/base_cache_test.py
@@ -108,7 +108,12 @@ class BaseCacheTest:
r2 = session.get(httpbin(response_format))
assert r1.from_cache is False
assert r2.from_cache is True
- assert r1.content == r2.content
+
+ # For JSON responses, variations like whitespace won't be preserved
+ if r1.text.startswith('{'):
+ assert r1.json() == r2.json()
+ else:
+ assert r1.content == r2.content
def test_response_no_duplicate_read(self):
"""Ensure that response data is read only once per request, whether it's cached or not"""