diff options
Diffstat (limited to 'django/utils/encoding.py')
-rw-r--r-- | django/utils/encoding.py | 53 |
1 files changed, 31 insertions, 22 deletions
diff --git a/django/utils/encoding.py b/django/utils/encoding.py index 19eb150ad7..89eac79dd4 100644 --- a/django/utils/encoding.py +++ b/django/utils/encoding.py @@ -13,10 +13,14 @@ class DjangoUnicodeDecodeError(UnicodeDecodeError): super().__init__(*args) def __str__(self): - return '%s. You passed in %r (%s)' % (super().__str__(), self.obj, type(self.obj)) + return "%s. You passed in %r (%s)" % ( + super().__str__(), + self.obj, + type(self.obj), + ) -def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'): +def smart_str(s, encoding="utf-8", strings_only=False, errors="strict"): """ Return a string representing 's'. Treat bytestrings using the 'encoding' codec. @@ -30,7 +34,13 @@ def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'): _PROTECTED_TYPES = ( - type(None), int, float, Decimal, datetime.datetime, datetime.date, datetime.time, + type(None), + int, + float, + Decimal, + datetime.datetime, + datetime.date, + datetime.time, ) @@ -43,7 +53,7 @@ def is_protected_type(obj): return isinstance(obj, _PROTECTED_TYPES) -def force_str(s, encoding='utf-8', strings_only=False, errors='strict'): +def force_str(s, encoding="utf-8", strings_only=False, errors="strict"): """ Similar to smart_str(), except that lazy instances are resolved to strings, rather than kept as lazy objects. @@ -65,7 +75,7 @@ def force_str(s, encoding='utf-8', strings_only=False, errors='strict'): return s -def smart_bytes(s, encoding='utf-8', strings_only=False, errors='strict'): +def smart_bytes(s, encoding="utf-8", strings_only=False, errors="strict"): """ Return a bytestring version of 's', encoded as specified in 'encoding'. @@ -77,7 +87,7 @@ def smart_bytes(s, encoding='utf-8', strings_only=False, errors='strict'): return force_bytes(s, encoding, strings_only, errors) -def force_bytes(s, encoding='utf-8', strings_only=False, errors='strict'): +def force_bytes(s, encoding="utf-8", strings_only=False, errors="strict"): """ Similar to smart_bytes, except that lazy instances are resolved to strings, rather than kept as lazy objects. @@ -86,10 +96,10 @@ def force_bytes(s, encoding='utf-8', strings_only=False, errors='strict'): """ # Handle the common case first for performance reasons. if isinstance(s, bytes): - if encoding == 'utf-8': + if encoding == "utf-8": return s else: - return s.decode('utf-8', errors).encode(encoding, errors) + return s.decode("utf-8", errors).encode(encoding, errors) if strings_only and is_protected_type(s): return s if isinstance(s, memoryview): @@ -136,15 +146,14 @@ _hextobyte = { (fmt % char).encode(): bytes((char,)) for ascii_range in _ascii_ranges for char in ascii_range - for fmt in ['%02x', '%02X'] + for fmt in ["%02x", "%02X"] } # And then everything above 128, because bytes ≥ 128 are part of multibyte # Unicode characters. -_hexdig = '0123456789ABCDEFabcdef' -_hextobyte.update({ - (a + b).encode(): bytes.fromhex(a + b) - for a in _hexdig[8:] for b in _hexdig -}) +_hexdig = "0123456789ABCDEFabcdef" +_hextobyte.update( + {(a + b).encode(): bytes.fromhex(a + b) for a in _hexdig[8:] for b in _hexdig} +) def uri_to_iri(uri): @@ -164,7 +173,7 @@ def uri_to_iri(uri): # second block, decode the first 2 bytes if they represent a hex code to # decode. The rest of the block is the part after '%AB', not containing # any '%'. Add that to the output without further processing. - bits = uri.split(b'%') + bits = uri.split(b"%") if len(bits) == 1: iri = uri else: @@ -177,9 +186,9 @@ def uri_to_iri(uri): append(hextobyte[item[:2]]) append(item[2:]) else: - append(b'%') + append(b"%") append(item) - iri = b''.join(parts) + iri = b"".join(parts) return repercent_broken_unicode(iri).decode() @@ -202,7 +211,7 @@ def escape_uri_path(path): def punycode(domain): """Return the Punycode of the given domain if it's non-ASCII.""" - return domain.encode('idna').decode('ascii') + return domain.encode("idna").decode("ascii") def repercent_broken_unicode(path): @@ -217,8 +226,8 @@ def repercent_broken_unicode(path): except UnicodeDecodeError as e: # CVE-2019-14235: A recursion shouldn't be used since the exception # handling uses massive amounts of memory - repercent = quote(path[e.start:e.end], safe=b"/#%[]=:;$&()+,!?*@'~") - path = path[:e.start] + repercent.encode() + path[e.end:] + repercent = quote(path[e.start : e.end], safe=b"/#%[]=:;$&()+,!?*@'~") + path = path[: e.start] + repercent.encode() + path[e.end :] else: return path @@ -245,10 +254,10 @@ def get_system_encoding(): #10335 and #5846. """ try: - encoding = locale.getdefaultlocale()[1] or 'ascii' + encoding = locale.getdefaultlocale()[1] or "ascii" codecs.lookup(encoding) except Exception: - encoding = 'ascii' + encoding = "ascii" return encoding |