summaryrefslogtreecommitdiff
path: root/django/utils/encoding.py
diff options
context:
space:
mode:
Diffstat (limited to 'django/utils/encoding.py')
-rw-r--r--django/utils/encoding.py53
1 files changed, 31 insertions, 22 deletions
diff --git a/django/utils/encoding.py b/django/utils/encoding.py
index 19eb150ad7..89eac79dd4 100644
--- a/django/utils/encoding.py
+++ b/django/utils/encoding.py
@@ -13,10 +13,14 @@ class DjangoUnicodeDecodeError(UnicodeDecodeError):
super().__init__(*args)
def __str__(self):
- return '%s. You passed in %r (%s)' % (super().__str__(), self.obj, type(self.obj))
+ return "%s. You passed in %r (%s)" % (
+ super().__str__(),
+ self.obj,
+ type(self.obj),
+ )
-def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
+def smart_str(s, encoding="utf-8", strings_only=False, errors="strict"):
"""
Return a string representing 's'. Treat bytestrings using the 'encoding'
codec.
@@ -30,7 +34,13 @@ def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
_PROTECTED_TYPES = (
- type(None), int, float, Decimal, datetime.datetime, datetime.date, datetime.time,
+ type(None),
+ int,
+ float,
+ Decimal,
+ datetime.datetime,
+ datetime.date,
+ datetime.time,
)
@@ -43,7 +53,7 @@ def is_protected_type(obj):
return isinstance(obj, _PROTECTED_TYPES)
-def force_str(s, encoding='utf-8', strings_only=False, errors='strict'):
+def force_str(s, encoding="utf-8", strings_only=False, errors="strict"):
"""
Similar to smart_str(), except that lazy instances are resolved to
strings, rather than kept as lazy objects.
@@ -65,7 +75,7 @@ def force_str(s, encoding='utf-8', strings_only=False, errors='strict'):
return s
-def smart_bytes(s, encoding='utf-8', strings_only=False, errors='strict'):
+def smart_bytes(s, encoding="utf-8", strings_only=False, errors="strict"):
"""
Return a bytestring version of 's', encoded as specified in 'encoding'.
@@ -77,7 +87,7 @@ def smart_bytes(s, encoding='utf-8', strings_only=False, errors='strict'):
return force_bytes(s, encoding, strings_only, errors)
-def force_bytes(s, encoding='utf-8', strings_only=False, errors='strict'):
+def force_bytes(s, encoding="utf-8", strings_only=False, errors="strict"):
"""
Similar to smart_bytes, except that lazy instances are resolved to
strings, rather than kept as lazy objects.
@@ -86,10 +96,10 @@ def force_bytes(s, encoding='utf-8', strings_only=False, errors='strict'):
"""
# Handle the common case first for performance reasons.
if isinstance(s, bytes):
- if encoding == 'utf-8':
+ if encoding == "utf-8":
return s
else:
- return s.decode('utf-8', errors).encode(encoding, errors)
+ return s.decode("utf-8", errors).encode(encoding, errors)
if strings_only and is_protected_type(s):
return s
if isinstance(s, memoryview):
@@ -136,15 +146,14 @@ _hextobyte = {
(fmt % char).encode(): bytes((char,))
for ascii_range in _ascii_ranges
for char in ascii_range
- for fmt in ['%02x', '%02X']
+ for fmt in ["%02x", "%02X"]
}
# And then everything above 128, because bytes ≥ 128 are part of multibyte
# Unicode characters.
-_hexdig = '0123456789ABCDEFabcdef'
-_hextobyte.update({
- (a + b).encode(): bytes.fromhex(a + b)
- for a in _hexdig[8:] for b in _hexdig
-})
+_hexdig = "0123456789ABCDEFabcdef"
+_hextobyte.update(
+ {(a + b).encode(): bytes.fromhex(a + b) for a in _hexdig[8:] for b in _hexdig}
+)
def uri_to_iri(uri):
@@ -164,7 +173,7 @@ def uri_to_iri(uri):
# second block, decode the first 2 bytes if they represent a hex code to
# decode. The rest of the block is the part after '%AB', not containing
# any '%'. Add that to the output without further processing.
- bits = uri.split(b'%')
+ bits = uri.split(b"%")
if len(bits) == 1:
iri = uri
else:
@@ -177,9 +186,9 @@ def uri_to_iri(uri):
append(hextobyte[item[:2]])
append(item[2:])
else:
- append(b'%')
+ append(b"%")
append(item)
- iri = b''.join(parts)
+ iri = b"".join(parts)
return repercent_broken_unicode(iri).decode()
@@ -202,7 +211,7 @@ def escape_uri_path(path):
def punycode(domain):
"""Return the Punycode of the given domain if it's non-ASCII."""
- return domain.encode('idna').decode('ascii')
+ return domain.encode("idna").decode("ascii")
def repercent_broken_unicode(path):
@@ -217,8 +226,8 @@ def repercent_broken_unicode(path):
except UnicodeDecodeError as e:
# CVE-2019-14235: A recursion shouldn't be used since the exception
# handling uses massive amounts of memory
- repercent = quote(path[e.start:e.end], safe=b"/#%[]=:;$&()+,!?*@'~")
- path = path[:e.start] + repercent.encode() + path[e.end:]
+ repercent = quote(path[e.start : e.end], safe=b"/#%[]=:;$&()+,!?*@'~")
+ path = path[: e.start] + repercent.encode() + path[e.end :]
else:
return path
@@ -245,10 +254,10 @@ def get_system_encoding():
#10335 and #5846.
"""
try:
- encoding = locale.getdefaultlocale()[1] or 'ascii'
+ encoding = locale.getdefaultlocale()[1] or "ascii"
codecs.lookup(encoding)
except Exception:
- encoding = 'ascii'
+ encoding = "ascii"
return encoding