From 3b7849df68884a2e0add040c2ac1ae7503f69368 Mon Sep 17 00:00:00 2001 From: Jordan Cook Date: Sat, 11 Jun 2022 21:30:11 -0500 Subject: Remove HTTPResponse attributes from the cache, and re-construct CachedResponse.raw after deserialization --- requests_cache/models/raw_response.py | 51 +++++++++++++++++++++-------------- requests_cache/models/response.py | 12 +++------ requests_cache/serializers/cattrs.py | 4 --- 3 files changed, 35 insertions(+), 32 deletions(-) (limited to 'requests_cache') diff --git a/requests_cache/models/raw_response.py b/requests_cache/models/raw_response.py index f94a543..5850b2a 100644 --- a/requests_cache/models/raw_response.py +++ b/requests_cache/models/raw_response.py @@ -1,6 +1,6 @@ from io import BytesIO from logging import getLogger -from typing import Mapping +from typing import TYPE_CHECKING from attr import define, field, fields_dict from requests import Response @@ -15,50 +15,49 @@ from . import RichMixin logger = getLogger(__name__) +if TYPE_CHECKING: + from . import CachedResponse + + @define(auto_attribs=False, repr=False, slots=False) class CachedHTTPResponse(RichMixin, HTTPResponse): - """A serializable dataclass that emulates :py:class:`~urllib3.response.HTTPResponse`. - Supports streaming requests and generator usage. + """A wrapper class that emulates :py:class:`~urllib3.response.HTTPResponse`. - The only action this doesn't support is explicitly calling :py:meth:`.read` with - ``decode_content=False``. + This enables consistent behavior for streaming requests and generator usage in the following + cases: + * On an original response, after reading its content to write to the cache + * On a cached response """ decode_content: bool = field(default=None) - # These headers are redundant and not serialized; copied in init and CachedResponse post-init - headers: HTTPHeaderDict = None # type: ignore + headers: HTTPHeaderDict = field(factory=HTTPHeaderDict) reason: str = field(default=None) request_url: str = field(default=None) status: int = field(default=0) strict: int = field(default=0) version: int = field(default=0) - def __init__(self, *args, body: bytes = None, headers: Mapping = None, **kwargs): + def __init__(self, body: bytes = None, **kwargs): """First initialize via HTTPResponse, then via attrs""" kwargs = {k: v for k, v in kwargs.items() if v is not None} super().__init__(body=BytesIO(body or b''), preload_content=False, **kwargs) - self._body = body - self.headers = HTTPHeaderDict(headers) - self.__attrs_init__(*args, **kwargs) # type: ignore # False positive in mypy 0.920+? + self.__attrs_init__(**kwargs) # type: ignore # False positive in mypy 0.920+? @classmethod - def from_response(cls, original_response: Response): + def from_response(cls, response: Response): """Create a CachedHTTPResponse based on an original response""" # Copy basic attributes - raw = original_response.raw - copy_attrs = list(fields_dict(cls).keys()) + ['headers'] - kwargs = {k: getattr(raw, k, None) for k in copy_attrs} - - # Note: _request_url is not available in urllib <=1.21 - kwargs['request_url'] = getattr(raw, '_request_url', None) + raw = response.raw + kwargs = {k: getattr(raw, k, None) for k in fields_dict(cls).keys()} + kwargs['request_url'] = raw._request_url # Copy response data and restore response object to its original state if hasattr(raw, '_fp') and not is_fp_closed(raw._fp): body = raw.read(decode_content=False) kwargs['body'] = body raw._fp = BytesIO(body) - original_response.content # This property reads, decodes, and stores response content + response.content # This property reads, decodes, and stores response content # After reading, reset file pointer on original raw response raw._fp = BytesIO(body) @@ -67,6 +66,18 @@ class CachedHTTPResponse(RichMixin, HTTPResponse): return cls(**kwargs) # type: ignore # False positive in mypy 0.920+? + @classmethod + def from_cached_response(cls, response: 'CachedResponse'): + """Create a CachedHTTPResponse based on a cached response""" + obj = cls( + headers=HTTPHeaderDict(response.headers), + reason=response.reason, + status=response.status_code, + request_url=response.request.url, + ) + obj.reset(response._content) + return obj + def release_conn(self): """No-op for compatibility""" @@ -74,7 +85,7 @@ class CachedHTTPResponse(RichMixin, HTTPResponse): """Simplified reader for cached content that emulates :py:meth:`urllib3.response.HTTPResponse.read()` """ - if 'content-encoding' in self.headers and decode_content is False: + if 'Content-Encoding' in self.headers and decode_content is False: logger.warning('read(decode_content=False) is not supported for cached responses') data = self._fp.read(amt) diff --git a/requests_cache/models/response.py b/requests_cache/models/response.py index 68df763..c1f85e1 100755 --- a/requests_cache/models/response.py +++ b/requests_cache/models/response.py @@ -10,7 +10,6 @@ from attr import define, field from requests import PreparedRequest, Response from requests.cookies import RequestsCookieJar from requests.structures import CaseInsensitiveDict -from urllib3._collections import HTTPHeaderDict from ..policy.expiration import ExpirationTime, get_expiration_datetime from . import CachedHTTPResponse, CachedRequest, RichMixin @@ -73,18 +72,15 @@ class CachedResponse(RichMixin, BaseResponse): expires: Optional[datetime] = field(default=None) headers: CaseInsensitiveDict = field(factory=CaseInsensitiveDict) history: List['CachedResponse'] = field(factory=list) # type: ignore - raw: CachedHTTPResponse = field(factory=CachedHTTPResponse, repr=False) + raw: CachedHTTPResponse = None # type: ignore # Not serialized; populated from CachedResponse attrs reason: str = field(default=None) request: CachedRequest = field(factory=CachedRequest) # type: ignore status_code: int = field(default=0) url: str = field(default=None) def __attrs_post_init__(self): - """Re-initialize raw response body after deserialization""" - if self.raw._body is None and self._content is not None: - self.raw.reset(self._content) - if not self.raw.headers: - self.raw.headers = HTTPHeaderDict(self.headers) + """Re-initialize raw (urllib3) response after deserialization""" + self.raw = self.raw or CachedHTTPResponse.from_cached_response(self) @classmethod def from_response(cls, response: Response, **kwargs): @@ -101,8 +97,8 @@ class CachedResponse(RichMixin, BaseResponse): setattr(obj, k, getattr(response, k, None)) # Store request, raw response, and next response (if it's a redirect response) - obj.request = CachedRequest.from_request(response.request) obj.raw = CachedHTTPResponse.from_response(response) + obj.request = CachedRequest.from_request(response.request) obj._next = CachedRequest.from_request(response.next) if response.next else None # Store response body, which will have been read & decoded by requests.Response by now diff --git a/requests_cache/serializers/cattrs.py b/requests_cache/serializers/cattrs.py index befc4b5..e9eb991 100644 --- a/requests_cache/serializers/cattrs.py +++ b/requests_cache/serializers/cattrs.py @@ -19,7 +19,6 @@ from cattr import GenConverter from requests.cookies import RequestsCookieJar, cookiejar_from_dict from requests.exceptions import JSONDecodeError from requests.structures import CaseInsensitiveDict -from urllib3._collections import HTTPHeaderDict from ..models import CachedResponse, DecodedContent from .pipeline import Stage @@ -102,9 +101,6 @@ def init_converter( converter.register_structure_hook( CaseInsensitiveDict, lambda obj, cls: CaseInsensitiveDict(obj) ) - converter.register_unstructure_hook(HTTPHeaderDict, dict) - converter.register_structure_hook(HTTPHeaderDict, lambda obj, cls: HTTPHeaderDict(obj)) - # Convert decoded JSON body back to string converter.register_structure_hook( DecodedContent, lambda obj, cls: json.dumps(obj) if isinstance(obj, dict) else obj -- cgit v1.2.1