summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEli Collins <elic@assurancetechnologies.com>2020-05-12 12:11:57 -0400
committerEli Collins <elic@assurancetechnologies.com>2020-05-12 12:11:57 -0400
commitc8d8ad8e90dac480b8c5ab1019266b34026b9b30 (patch)
treeeeb7d97b312e6a69f662a6aa833c3c91ec36c01c
parentcfdce7ee2911d90c2b4204d67e741ad9dd6b4e69 (diff)
downloadpasslib-c8d8ad8e90dac480b8c5ab1019266b34026b9b30.tar.gz
bugfix: bcrypt os_crypt backend: fix some more crypt.crypt() utf8 encoding issues
which were causing it to fail to generate a hash. bcrypt ------ * _norm_digest(): fixed some PasslibValueError()s being thrown by os_crypt backend during UT fuzz verifier. These were due to non-UTF8 input being provided to crypt.crypt()... even though secret itself was UTF8 safe! This was because secret was being truncated/repeated as part of various backend bug workarounds; and the truncate/repeat operations weren't being done in manner that respected UTF8 character boundaries. This has now been fixed via _require_valid_utf8_bytes flag (which has been set for os_crypt backend), that enables utf8-safe mode of operation. utils ----- * added utf8_truncate() and utf8_repeat_string() helpers, for bcrypt fixes above. * simplified repeat_string() internals
-rw-r--r--docs/history/1.7.rst2
-rw-r--r--passlib/handlers/bcrypt.py36
-rw-r--r--passlib/tests/test_utils.py119
-rw-r--r--passlib/utils/__init__.py91
4 files changed, 237 insertions, 11 deletions
diff --git a/docs/history/1.7.rst b/docs/history/1.7.rst
index 12eb363..6497550 100644
--- a/docs/history/1.7.rst
+++ b/docs/history/1.7.rst
@@ -62,6 +62,8 @@ Other Changes
These can't be passed through, due to limitation in stdlib's :func:`!crypt.crypt`.
(Prior to this release, it would it incorrectly raise :exc:`~passlib.exc.MissingBackendError` instead).
+ Also improved legacy bcrypt format workarounds, to support a few more UTF8 edge cases than before.
+
* Modified some internals to help run on FIPS systems (:issue:`116`):
In particular, when MD5 hash is not available, :class:`~passlib.hash.hex_md5`
diff --git a/passlib/handlers/bcrypt.py b/passlib/handlers/bcrypt.py
index 5e5838a..323f2a0 100644
--- a/passlib/handlers/bcrypt.py
+++ b/passlib/handlers/bcrypt.py
@@ -27,7 +27,8 @@ _builtin_bcrypt = None # dynamically imported by _load_backend_builtin()
from passlib.crypto.digest import compile_hmac
from passlib.exc import PasslibHashWarning, PasslibSecurityWarning, PasslibSecurityError
from passlib.utils import safe_crypt, repeat_string, to_bytes, parse_version, \
- rng, getrandstr, test_crypt, to_unicode
+ rng, getrandstr, test_crypt, to_unicode, \
+ utf8_truncate, utf8_repeat_string
from passlib.utils.binary import bcrypt64
from passlib.utils.compat import get_unbound_method_function
from passlib.utils.compat import u, uascii_to_str, unicode, str_to_uascii, PY3, error_from
@@ -158,6 +159,7 @@ class _BcryptCommon(uh.SubclassBackendMixin, uh.TruncateMixin, uh.HasManyIdents,
_lacks_2y_support = False
_lacks_2b_support = False
_fallback_ident = IDENT_2A
+ _require_valid_utf8_bytes = False
#===================================================================
# formatting
@@ -486,8 +488,18 @@ class _BcryptCommon(uh.SubclassBackendMixin, uh.TruncateMixin, uh.HasManyIdents,
@classmethod
def _norm_digest_args(cls, secret, ident, new=False):
# make sure secret is unicode
+ require_valid_utf8_bytes = cls._require_valid_utf8_bytes
if isinstance(secret, unicode):
secret = secret.encode("utf-8")
+ elif require_valid_utf8_bytes:
+ # if backend requires utf8 bytes (os_crypt);
+ # make sure input actually is utf8, or don't bother enabling utf-8 specific helpers.
+ try:
+ secret.decode("utf-8")
+ except UnicodeDecodeError:
+ # XXX: could just throw PasswordValueError here, backend will just do that
+ # when _calc_digest() is actually called.
+ require_valid_utf8_bytes = False
# check max secret size
uh.validate_secret(secret)
@@ -508,7 +520,15 @@ class _BcryptCommon(uh.SubclassBackendMixin, uh.TruncateMixin, uh.HasManyIdents,
# bcrypt only uses first 72 bytes anyways.
# NOTE: not needed for 2y/2b, but might use 2a as fallback for them.
if cls._has_2a_wraparound_bug and len(secret) >= 255:
- secret = secret[:72]
+ if require_valid_utf8_bytes:
+ # backend requires valid utf8 bytes, so truncate secret to nearest valid segment.
+ # want to do this in constant time to not give away info about secret.
+ # NOTE: this only works because bcrypt will ignore everything past
+ # secret[71], so padding to include a full utf8 sequence
+ # won't break anything about the final output.
+ secret = utf8_truncate(secret, 72)
+ else:
+ secret = secret[:72]
# special case handling for variants (ordered most common first)
if ident == IDENT_2A:
@@ -535,7 +555,13 @@ class _BcryptCommon(uh.SubclassBackendMixin, uh.TruncateMixin, uh.HasManyIdents,
# we can fake $2$ behavior using the 2A/2Y/2B algorithm
# by repeating the password until it's at least 72 chars in length.
if secret:
- secret = repeat_string(secret, 72)
+ if require_valid_utf8_bytes:
+ # NOTE: this only works because bcrypt will ignore everything past
+ # secret[71], so padding to include a full utf8 sequence
+ # won't break anything about the final output.
+ secret = utf8_repeat_string(secret, 72)
+ else:
+ secret = repeat_string(secret, 72)
ident = cls._fallback_ident
elif ident == IDENT_2X:
@@ -745,6 +771,10 @@ class _OsCryptBackend(_BcryptCommon):
backend which uses :func:`crypt.crypt`
"""
+ #: set flag to ensure _prepare_digest_args() doesn't create invalid utf8 string
+ #: when truncating bytes.
+ _require_valid_utf8_bytes = PY3
+
@classmethod
def _load_backend_mixin(mixin_cls, name, dryrun):
if not test_crypt("test", TEST_HASH_2A):
diff --git a/passlib/tests/test_utils.py b/passlib/tests/test_utils.py
index 3fd0001..f3a275b 100644
--- a/passlib/tests/test_utils.py
+++ b/passlib/tests/test_utils.py
@@ -413,6 +413,125 @@ class MiscTest(TestCase):
self.assertEqual(splitcomma(" a , b"), ['a', 'b'])
self.assertEqual(splitcomma(" a, b, "), ['a', 'b'])
+ def test_utf8_truncate(self):
+ """
+ utf8_truncate()
+ """
+ from passlib.utils import utf8_truncate
+
+ #
+ # run through a bunch of reference strings,
+ # and make sure they truncate properly across all possible indexes
+ #
+ for source in [
+ # empty string
+ b"",
+ # strings w/ only single-byte chars
+ b"1",
+ b"123",
+ b'\x1a',
+ b'\x1a' * 10,
+ b'\x7f',
+ b'\x7f' * 10,
+ # strings w/ properly formed UTF8 continuation sequences
+ b'a\xc2\xa0\xc3\xbe\xc3\xbe',
+ b'abcdefghjusdfaoiu\xc2\xa0\xc3\xbe\xc3\xbedsfioauweoiruer',
+ ]:
+ source.decode("utf-8") # sanity check - should always be valid UTF8
+
+ end = len(source)
+ for idx in range(end + 16):
+ prefix = "source=%r index=%r: " % (source, idx)
+
+ result = utf8_truncate(source, idx)
+
+ # result should always be valid utf-8
+ result.decode("utf-8")
+
+ # result should never be larger than source
+ self.assertLessEqual(len(result), end, msg=prefix)
+
+ # result should always be in range(idx, idx+4)
+ self.assertGreaterEqual(len(result), min(idx, end), msg=prefix)
+ self.assertLess(len(result), min(idx + 4, end + 1), msg=prefix)
+
+ # should be strict prefix of source
+ self.assertEqual(result, source[:len(result)], msg=prefix)
+
+ #
+ # malformed utf8 --
+ # strings w/ only initial chars (should cut just like single-byte chars)
+ #
+ for source in [
+ b'\xca',
+ b'\xca' * 10,
+ # also test null bytes (not valid utf8, but this func should treat them like ascii)
+ b'\x00',
+ b'\x00' * 10,
+ ]:
+ end = len(source)
+ for idx in range(end + 16):
+ prefix = "source=%r index=%r: " % (source, idx)
+ result = utf8_truncate(source, idx)
+ self.assertEqual(result, source[:idx], msg=prefix)
+
+ #
+ # malformed utf8 --
+ # strings w/ only continuation chars (should cut at index+3)
+ #
+ for source in [
+ b'\xaa',
+ b'\xaa' * 10,
+ ]:
+ end = len(source)
+ for idx in range(end + 16):
+ prefix = "source=%r index=%r: " % (source, idx)
+ result = utf8_truncate(source, idx)
+ self.assertEqual(result, source[:idx+3], msg=prefix)
+
+ #
+ # string w/ some invalid utf8 --
+ # * \xaa byte is too many continuation byte after \xff start byte
+ # * \xab byte doesn't have preceding start byte
+ # XXX: could also test continuation bytes w/o start byte, WITHIN the string.
+ # but think this covers edges well enough...
+ #
+ source = b'MN\xff\xa0\xa1\xa2\xaaOP\xab'
+
+ self.assertEqual(utf8_truncate(source, 0), b'') # index="M", stops there
+
+ self.assertEqual(utf8_truncate(source, 1), b'M') # index="N", stops there
+
+ self.assertEqual(utf8_truncate(source, 2), b'MN') # index="\xff", stops there
+
+ self.assertEqual(utf8_truncate(source, 3),
+ b'MN\xff\xa0\xa1\xa2') # index="\xa0", runs out after index+3="\xa2"
+
+ self.assertEqual(utf8_truncate(source, 4),
+ b'MN\xff\xa0\xa1\xa2\xaa') # index="\xa1", runs out after index+3="\xaa"
+
+ self.assertEqual(utf8_truncate(source, 5),
+ b'MN\xff\xa0\xa1\xa2\xaa') # index="\xa2", stops before "O"
+
+ self.assertEqual(utf8_truncate(source, 6),
+ b'MN\xff\xa0\xa1\xa2\xaa') # index="\xaa", stops before "O"
+
+ self.assertEqual(utf8_truncate(source, 7),
+ b'MN\xff\xa0\xa1\xa2\xaa') # index="O", stops there
+
+ self.assertEqual(utf8_truncate(source, 8),
+ b'MN\xff\xa0\xa1\xa2\xaaO') # index="P", stops there
+
+ self.assertEqual(utf8_truncate(source, 9),
+ b'MN\xff\xa0\xa1\xa2\xaaOP\xab') # index="\xab", runs out at end
+
+ self.assertEqual(utf8_truncate(source, 10),
+ b'MN\xff\xa0\xa1\xa2\xaaOP\xab') # index=end
+
+ self.assertEqual(utf8_truncate(source, 11),
+ b'MN\xff\xa0\xa1\xa2\xaaOP\xab') # index=end+1
+
+
#=============================================================================
# byte/unicode helpers
#=============================================================================
diff --git a/passlib/utils/__init__.py b/passlib/utils/__init__.py
index cb8872c..386ef65 100644
--- a/passlib/utils/__init__.py
+++ b/passlib/utils/__init__.py
@@ -55,7 +55,7 @@ from passlib.utils.decor import (
classproperty,
hybrid_method,
)
-from passlib.exc import ExpectedStringError
+from passlib.exc import ExpectedStringError, ExpectedTypeError
from passlib.utils.compat import (add_doc, join_bytes, join_byte_values,
join_byte_elems, irange, imap, PY3, u,
join_unicode, unicode, byte_elem_value, nextgetter,
@@ -574,13 +574,20 @@ def xor_bytes(left, right):
return int_to_bytes(bytes_to_int(left) ^ bytes_to_int(right), len(left))
def repeat_string(source, size):
- """repeat or truncate <source> string, so it has length <size>"""
- cur = len(source)
- if size > cur:
- mult = (size+cur-1)//cur
- return (source*mult)[:size]
- else:
- return source[:size]
+ """
+ repeat or truncate <source> string, so it has length <size>
+ """
+ mult = 1 + (size - 1) // len(source)
+ return (source * mult)[:size]
+
+
+def utf8_repeat_string(source, size):
+ """
+ variant of repeat_string() which truncates to nearest UTF8 boundary.
+ """
+ mult = 1 + (size - 1) // len(source)
+ return utf8_truncate(source * mult, size)
+
_BNULL = b"\x00"
_UNULL = u("\x00")
@@ -595,6 +602,74 @@ def right_pad_string(source, size, pad=None):
else:
return source[:size]
+
+def utf8_truncate(source, index):
+ """
+ helper to truncate UTF8 byte string to nearest character boundary ON OR AFTER <index>.
+ returned prefix will always have length of at least <index>, and will stop on the
+ first byte that's not a UTF8 continuation byte (128 - 191 inclusive).
+ since utf8 should never take more than 4 bytes to encode known unicode values,
+ we can stop after ``index+3`` is reached.
+
+ :param bytes source:
+ :param int index:
+ :rtype: bytes
+ """
+ # general approach:
+ #
+ # * UTF8 bytes will have high two bits (0xC0) as one of:
+ # 00 -- ascii char
+ # 01 -- ascii char
+ # 10 -- continuation of multibyte char
+ # 11 -- start of multibyte char.
+ # thus we can cut on anything where high bits aren't "10" (0x80; continuation byte)
+ #
+ # * UTF8 characters SHOULD always be 1 to 4 bytes, though they may be unbounded.
+ # so we just keep going until first non-continuation byte is encountered, or end of str.
+ # this should work predictably even for malformed/non UTF8 inputs.
+
+ if not isinstance(source, bytes):
+ raise ExpectedTypeError(source, bytes, "source")
+
+ # validate index
+ end = len(source)
+ if index < 0:
+ index = max(0, index + end)
+ if index >= end:
+ return source
+
+ # can stop search after 4 bytes, won't ever have longer utf8 sequence.
+ end = min(index + 3, end)
+
+ # loop until we find non-continuation byte
+ while index < end:
+ if byte_elem_value(source[index]) & 0xC0 != 0x80:
+ # found single-char byte, or start-char byte.
+ break
+ # else: found continuation byte.
+ index += 1
+ else:
+ assert index == end
+
+ # truncate at final index
+ result = source[:index]
+
+ def sanity_check():
+ # try to decode source
+ try:
+ text = source.decode("utf-8")
+ except UnicodeDecodeError:
+ # if source isn't valid utf8, byte level match is enough
+ return True
+
+ # validate that result was cut on character boundary
+ assert text.startswith(result.decode("utf-8"))
+ return True
+
+ assert sanity_check()
+
+ return result
+
#=============================================================================
# encoding helpers
#=============================================================================