summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJingxin Zhu <jz1371@nyu.edu>2018-01-25 21:41:08 -0800
committerBenjamin Peterson <benjamin@python.org>2018-01-25 21:41:08 -0800
commitdb3d0d678b422614d5de0f9ed76a2112a28c7b19 (patch)
tree2e163adc4c12d998ff56c28870e5b2189d565cc6
parent5fb43eb68b7edaedb08589dfa7229e717a78642d (diff)
downloadsix-git-db3d0d678b422614d5de0f9ed76a2112a28c7b19.tar.gz
add ensure_binary/str/text helper functions (#204)
-rw-r--r--documentation/index.rst18
-rw-r--r--six.py59
-rw-r--r--test_six.py58
3 files changed, 135 insertions, 0 deletions
diff --git a/documentation/index.rst b/documentation/index.rst
index 68b2fd7..86c3a03 100644
--- a/documentation/index.rst
+++ b/documentation/index.rst
@@ -433,6 +433,24 @@ string data in all Python versions.
a bytes object iterator in Python 3.
+.. function:: ensure_binary(s, encoding='utf-8', errors='strict')
+
+ A helper function to ensure output is :data:`binary_type`. ``encoding``, ``errors``
+ are the same as :meth:`py3:str.encode`
+
+
+.. function:: ensure_str(s, encoding='utf-8', errors='strict')
+
+ A helper function to ensure output is ``str``. ``encoding``, ``errors``
+ are the same :meth:`py3:str.encode`
+
+
+.. function:: ensure_text(s, encoding='utf-8', errors='strict')
+
+ A helper function to ensure output is :data:`text_type`. ``encoding``, ``errors``
+ are the same as :meth:`py3:str.encode`
+
+
.. data:: StringIO
This is a fake file object for textual data. It's an alias for
diff --git a/six.py b/six.py
index d9642ec..6ecf614 100644
--- a/six.py
+++ b/six.py
@@ -848,6 +848,65 @@ def add_metaclass(metaclass):
return wrapper
+def ensure_binary(s, encoding='utf-8', errors='strict'):
+ """ A helper function to ensure output is six.binary_type.
+
+ For Python 2:
+ - `unicode` -> encoded to `str`
+ - `str` -> `str`
+
+ For Python 3:
+ - `str` -> encoded to `bytes`
+ - `bytes` -> `bytes`
+ """
+ if isinstance(s, text_type):
+ return s.encode(encoding, errors)
+ elif isinstance(s, binary_type):
+ return s
+ else:
+ raise TypeError("not expecting type '%s'" % type(s))
+
+
+def ensure_str(s, encoding='utf-8', errors='strict'):
+ """ A helper function to ensure output is `str`.
+
+ For Python 2:
+ - `unicode` -> encoded to `str`
+ - `str` -> `str`
+
+ For Python 3:
+ - `str` -> `str`
+ - `bytes` -> decoded to `str`
+ """
+ if not isinstance(s, (text_type, binary_type)):
+ raise TypeError("not expecting type '%s'" % type(s))
+ if PY2 and isinstance(s, text_type):
+ s = s.encode(encoding, errors)
+ elif PY3 and isinstance(s, binary_type):
+ s = s.decode(encoding, errors)
+ return s
+
+
+def ensure_text(s, encoding='utf-8', errors='strict'):
+ """ A helper function to ensure output is six.text_type.
+
+ For Python 2:
+ - `unicode` -> `unicode`
+ - `str` -> `unicode`
+
+ For Python 3:
+ - `str` -> `str`
+ - `bytes` -> decoded to `str`
+ """
+ if isinstance(s, binary_type):
+ return s.decode(encoding, errors)
+ elif isinstance(s, text_type):
+ return s
+ else:
+ raise TypeError("not expecting type '%s'" % type(s))
+
+
+
def python_2_unicode_compatible(klass):
"""
A decorator that defines __unicode__ and __str__ methods under Python 2.
diff --git a/test_six.py b/test_six.py
index 872b087..980cdf3 100644
--- a/test_six.py
+++ b/test_six.py
@@ -932,3 +932,61 @@ def test_python_2_unicode_compatible():
assert str(my_test) == six.u("hello")
assert getattr(six.moves.builtins, 'bytes', str)(my_test) == six.b("hello")
+
+
+class EnsureTests:
+
+ # grinning face emoji
+ UNICODE_EMOJI = six.u("\U0001F600")
+ BINARY_EMOJI = b"\xf0\x9f\x98\x80"
+
+ def test_ensure_binary_raise_type_error(self):
+ with py.test.raises(TypeError):
+ six.ensure_str(8)
+
+ def test_errors_and_encoding(self):
+ six.ensure_binary(self.UNICODE_EMOJI, encoding='latin-1', errors='ignore')
+ with py.test.raises(UnicodeEncodeError):
+ six.ensure_binary(self.UNICODE_EMOJI, encoding='latin-1', errors='strict')
+
+ def test_ensure_binary_raise(self):
+ converted_unicode = six.ensure_binary(self.UNICODE_EMOJI, encoding='utf-8', errors='strict')
+ converted_binary = six.ensure_binary(self.BINARY_EMOJI, encoding="utf-8", errors='strict')
+ if six.PY2:
+ # PY2: unicode -> str
+ assert converted_unicode == self.BINARY_EMOJI and isinstance(converted_unicode, str)
+ # PY2: str -> str
+ assert converted_binary == self.BINARY_EMOJI and isinstance(converted_binary, str)
+ else:
+ # PY3: str -> bytes
+ assert converted_unicode == self.BINARY_EMOJI and isinstance(converted_unicode, bytes)
+ # PY3: bytes -> bytes
+ assert converted_binary == self.BINARY_EMOJI and isinstance(converted_binary, bytes)
+
+ def test_ensure_str(self):
+ converted_unicode = six.ensure_str(self.UNICODE_EMOJI, encoding='utf-8', errors='strict')
+ converted_binary = six.ensure_str(self.BINARY_EMOJI, encoding="utf-8", errors='strict')
+ if six.PY2:
+ # PY2: unicode -> str
+ assert converted_unicode == self.BINARY_EMOJI and isinstance(converted_unicode, str)
+ # PY2: str -> str
+ assert converted_binary == self.BINARY_EMOJI and isinstance(converted_binary, str)
+ else:
+ # PY3: str -> str
+ assert converted_unicode == self.UNICODE_EMOJI and isinstance(converted_unicode, str)
+ # PY3: bytes -> str
+ assert converted_binary == self.UNICODE_EMOJI and isinstance(converted_unicode, str)
+
+ def test_ensure_text(self):
+ converted_unicode = six.ensure_text(self.UNICODE_EMOJI, encoding='utf-8', errors='strict')
+ converted_binary = six.ensure_text(self.BINARY_EMOJI, encoding="utf-8", errors='strict')
+ if six.PY2:
+ # PY2: unicode -> unicode
+ assert converted_unicode == self.UNICODE_EMOJI and isinstance(converted_unicode, unicode)
+ # PY2: str -> unicode
+ assert converted_binary == self.UNICODE_EMOJI and isinstance(converted_unicode, unicode)
+ else:
+ # PY3: str -> str
+ assert converted_unicode == self.UNICODE_EMOJI and isinstance(converted_unicode, str)
+ # PY3: bytes -> str
+ assert converted_binary == self.UNICODE_EMOJI and isinstance(converted_unicode, str)