7 files changed, 71 insertions, 108 deletions
diff --git a/requests_cache/models/__init__.py b/requests_cache/models/__init__.py
index 28825bc..93d8279 100644
--- a/requests_cache/models/__init__.py
+++ b/requests_cache/models/__init__.py
@@ -7,7 +7,7 @@ from requests import PreparedRequest, Request, Response
 from .base import RichMixin
 from .raw_response import CachedHTTPResponse
 from .request import CachedRequest
-from .response import CachedResponse, OriginalResponse
+from .response import CachedResponse, DecodedContent, OriginalResponse
 
 AnyResponse = Union[OriginalResponse, CachedResponse]
 AnyRequest = Union[Request, PreparedRequest, CachedRequest]
diff --git a/requests_cache/models/response.py b/requests_cache/models/response.py
index e3acdf9..1149d42 100755
--- a/requests_cache/models/response.py
+++ b/requests_cache/models/response.py
@@ -3,7 +3,7 @@ from __future__ import annotations
 from datetime import datetime, timedelta, timezone
 from logging import getLogger
 from time import time
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, Dict, List, Optional, Union
 
 import attr
 from attr import define, field
@@ -19,6 +19,7 @@ if TYPE_CHECKING:
     from ..policy.actions import CacheActions
 
 DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S %Z'  # Format used for __str__ only
+DecodedContent = Union[Dict, str, None]
 logger = getLogger(__name__)
 
 
@@ -63,7 +64,7 @@ class CachedResponse(RichMixin, BaseResponse):
     """A class that emulates :py:class:`requests.Response`, optimized for serialization"""
 
     _content: bytes = field(default=None)
-    _decoded_content: str = field(default=None)
+    _decoded_content: DecodedContent = field(default=None)
     _next: Optional[CachedRequest] = field(default=None)
     cookies: RequestsCookieJar = field(factory=RequestsCookieJar)
     created_at: datetime = field(factory=datetime.utcnow)
diff --git a/requests_cache/serializers/__init__.py b/requests_cache/serializers/__init__.py
index 36197e1..6328ea7 100644
--- a/requests_cache/serializers/__init__.py
+++ b/requests_cache/serializers/__init__.py
@@ -21,7 +21,7 @@ class that raises an ``ImportError`` at initialization time instead of at import
 # flake8: noqa: F401
 from typing import Union
 
-from .cattrs import CattrStage, DecodedBodyStage
+from .cattrs import CattrStage
 from .pipeline import SerializerPipeline, Stage
 from .preconf import (
     bson_document_serializer,
diff --git a/requests_cache/serializers/cattrs.py b/requests_cache/serializers/cattrs.py
index 6138d42..c124830 100644
--- a/requests_cache/serializers/cattrs.py
+++ b/requests_cache/serializers/cattrs.py
@@ -21,7 +21,7 @@ from requests.exceptions import JSONDecodeError
 from requests.structures import CaseInsensitiveDict
 from urllib3._collections import HTTPHeaderDict
 
-from ..models import CachedResponse
+from ..models import CachedResponse, DecodedContent
 from .pipeline import Stage
 
 try:
@@ -33,67 +33,37 @@ except ImportError:
 class CattrStage(Stage):
     """Base serializer class that does pre/post-processing with  ``cattrs``. This can be used either
     on its own, or as a stage within a :py:class:`.SerializerPipeline`.
+
+    Args:
+        factory: A callable that returns a ``cattrs`` converter to start from instead of a new
+            ``GenConverter``. Mainly useful for preconf converters.
+        decode_content: Save response body in human-readable format, if possible
+
+    Notes on ``decode_content`` option:
+
+    * Response body will be decoded into a human-readable format (if possible) during serialization,
+      and re-encoded during deserialization to reconstruct the original response.
+    * Supported  Content-Types are ``application/json`` and ``text/*``. All other types will be saved as-is.
+    * Decoded responses are saved in a separate ``_decoded_content`` attribute, to ensure that
+      ``_content`` is always binary.
     """
 
-    def __init__(self, factory: Callable[..., GenConverter] = None, **kwargs):
+    def __init__(
+        self, factory: Callable[..., GenConverter] = None, decode_content: bool = True, **kwargs
+    ):
         self.converter = init_converter(factory, **kwargs)
+        self.decode_content = decode_content
 
     def dumps(self, value: CachedResponse) -> Dict:
         if not isinstance(value, CachedResponse):
             return value
-        return self.converter.unstructure(value)
+        response_dict = self.converter.unstructure(value)
+        return _decode_content(value, response_dict) if self.decode_content else response_dict
 
     def loads(self, value: Dict) -> CachedResponse:
         if not isinstance(value, MutableMapping):
             return value
-        return self.converter.structure(value, cl=CachedResponse)
-
-
-class DecodedBodyStage(CattrStage):
-    """Converter that decodes the response body into a human-readable format (if possible) when
-    serializing, and re-encodes it to reconstruct the original response. Supported Content-Types
-    are ``application/json`` and ``text/*``. All other types will be saved as-is.
-
-    Notes:
-
-    * This needs access to the response object for decoding, so this is used _instead_ of
-      CattrStage, not before/after it.
-    * Decoded responses are saved in a separate ``_decoded_content`` attribute, to ensure that
-      ``_content`` is always binary.
-    """
-
-    def dumps(self, value: CachedResponse) -> Dict:
-        response_dict = super().dumps(value)
-
-        # Decode body as JSON
-        if value.headers.get('Content-Type') == 'application/json':
-            try:
-                response_dict['_decoded_content'] = value.json()
-                response_dict.pop('_content', None)
-            except JSONDecodeError:
-                pass
-
-        # Decode body as text
-        if value.headers.get('Content-Type', '').startswith('text/'):
-            response_dict['_decoded_content'] = value.text
-            response_dict.pop('_content', None)
-
-        # Otherwise, it is most likely a binary body
-        return response_dict
-
-    def loads(self, value: Dict) -> CachedResponse:
-        # Re-encode JSON and text bodies
-        if isinstance(value.get('_decoded_content'), dict):
-            value['_decoded_content'] = json.dumps(value['_decoded_content'])
-
-        if isinstance(value.get('_decoded_content'), str):
-            response = super().loads(value)
-            response._content = response._decoded_content.encode('utf-8')
-            response._decoded_content = ''
-            response.encoding = 'utf-8'  # Set encoding explicitly so requests doesn't have to guess
-            return response
-        else:
-            return super().loads(value)
+        return _encode_content(self.converter.structure(value, cl=CachedResponse))
 
 
 def init_converter(
@@ -134,6 +104,11 @@ def init_converter(
     converter.register_unstructure_hook(HTTPHeaderDict, dict)
     converter.register_structure_hook(HTTPHeaderDict, lambda obj, cls: HTTPHeaderDict(obj))
 
+    # Convert decoded JSON body back to string
+    converter.register_structure_hook(
+        DecodedContent, lambda obj, cls: json.dumps(obj) if isinstance(obj, dict) else obj
+    )
+
     # Resolve forward references (required for CachedResponse.history)
     converter.register_unstructure_hook_func(
         lambda cls: cls.__class__ is ForwardRef,
@@ -143,6 +118,7 @@ def init_converter(
         lambda cls: cls.__class__ is ForwardRef,
         lambda obj, cls: converter.structure(obj, cls.__forward_value__),
     )
+
     return converter
 
 
@@ -156,6 +132,35 @@ def make_decimal_timedelta_converter(**kwargs) -> GenConverter:
     return converter
 
 
+def _decode_content(response: CachedResponse, response_dict: Dict) -> Dict:
+    """Decode response body into a human-readable format, if possible"""
+    # Decode body as JSON
+    if response.headers.get('Content-Type') == 'application/json':
+        try:
+            response_dict['_decoded_content'] = response.json()
+            response_dict.pop('_content', None)
+        except JSONDecodeError:
+            pass
+
+    # Decode body as text
+    if response.headers.get('Content-Type', '').startswith('text/'):
+        response_dict['_decoded_content'] = response.text
+        response_dict.pop('_content', None)
+
+    # Otherwise, it is most likely a binary body
+    return response_dict
+
+
+def _encode_content(response: CachedResponse) -> CachedResponse:
+    """Re-encode response body if saved as JSON or text; has no effect for a binary response body"""
+    if isinstance(response._decoded_content, str):
+        response._content = response._decoded_content.encode('utf-8')
+        response._decoded_content = None
+        response.encoding = 'utf-8'  # Set encoding explicitly so requests doesn't have to guess
+        response.headers['Content-Length'] = str(len(response._content))  # Size may have changed
+    return response
+
+
 def _to_datetime(obj, cls) -> datetime:
     if isinstance(obj, str):
         obj = datetime.fromisoformat(obj)
diff --git a/requests_cache/serializers/content_decoder.py b/requests_cache/serializers/content_decoder.py
deleted file mode 100644
index 71bc3fd..0000000
--- a/requests_cache/serializers/content_decoder.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import json
-from typing import Dict
-
-from requests.exceptions import JSONDecodeError
-
-from requests_cache.models.response import CachedResponse
-from requests_cache.serializers.cattrs import CattrStage
-
-
-class DecodeBodyStage(CattrStage):
-    """Converter that decodes the response body into a human-readable format when serializing
-    (if possible), and re-encodes it to reconstruct the original response. Supported Content-Types
-    are ``application/json`` and ``text/*``. All other types will be saved as-is.
-
-    This needs access to the CachedResponse object for decoding, so this is used _instead_ of
-    CattrStage, not before/after it.
-    """
-
-    def dumps(self, value: CachedResponse) -> Dict:
-        response_dict = super().dumps(value)
-        # Decode body as JSON
-        if value.headers.get('Content-Type') == 'application/json':
-            try:
-                response_dict['content'] = value.json()
-                response_dict.pop('_content', None)
-            except JSONDecodeError:
-                pass
-
-        # Decode body as text
-        if value.headers.get('Content-Type', '').startswith('text/'):
-            response_dict['content'] = value.text
-            response_dict.pop('_content', None)
-
-        # Otherwise, it is most likely a binary body
-        return response_dict
-
-    def loads(self, value: Dict) -> CachedResponse:
-        if value.get('content'):
-            value['_content'] = value.pop('content')
-        value.setdefault('_content', None)
-
-        # Re-encode JSON and text bodies
-        if isinstance(value['_content'], dict):
-            value['_content'] = json.dumps(value['_content'])
-        if isinstance(value['_content'], str):
-            value['_content'] = value['_content'].encode('utf-8')
-            response = super().loads(value)
-            # Since we know the encoding, set that explicitly so requests doesn't have to guess it
-            response.encoding = 'utf-8'
-            return response
-        else:
-            return super().loads(value)
diff --git a/requests_cache/serializers/pipeline.py b/requests_cache/serializers/pipeline.py
index 8f4a521..589a36c 100644
--- a/requests_cache/serializers/pipeline.py
+++ b/requests_cache/serializers/pipeline.py
@@ -33,6 +33,10 @@ class Stage:
 class SerializerPipeline:
     """A pipeline of stages chained together to serialize and deserialize response objects.
 
+    Note: Typically, the first stage should be a :py:class:`.CattrStage`, since this does the
+    majority of the non-format-specific work to unstructure a response object into a dict (and
+    vice versa).
+
     Args:
         stages: A sequence of :py:class:`Stage` objects, or any objects with ``dumps()`` and
             ``loads()`` methods
diff --git a/tests/integration/base_cache_test.py b/tests/integration/base_cache_test.py
index c77885e..0b93a8c 100644
--- a/tests/integration/base_cache_test.py
+++ b/tests/integration/base_cache_test.py
@@ -108,7 +108,12 @@ class BaseCacheTest:
         r2 = session.get(httpbin(response_format))
         assert r1.from_cache is False
         assert r2.from_cache is True
-        assert r1.content == r2.content
+
+        # For JSON responses, variations like whitespace won't be preserved
+        if r1.text.startswith('{'):
+            assert r1.json() == r2.json()
+        else:
+            assert r1.content == r2.content
 
     def test_response_no_duplicate_read(self):
         """Ensure that response data is read only once per request, whether it's cached or not"""