diff options
author | Eli Collins <elic@assurancetechnologies.com> | 2020-05-12 12:11:57 -0400 |
---|---|---|
committer | Eli Collins <elic@assurancetechnologies.com> | 2020-05-12 12:11:57 -0400 |
commit | c8d8ad8e90dac480b8c5ab1019266b34026b9b30 (patch) | |
tree | eeb7d97b312e6a69f662a6aa833c3c91ec36c01c | |
parent | cfdce7ee2911d90c2b4204d67e741ad9dd6b4e69 (diff) | |
download | passlib-c8d8ad8e90dac480b8c5ab1019266b34026b9b30.tar.gz |
bugfix: bcrypt os_crypt backend: fix some more crypt.crypt() utf8 encoding issues
which were causing it to fail to generate a hash.
bcrypt
------
* _norm_digest(): fixed some PasslibValueError()s being thrown by os_crypt
backend during UT fuzz verifier. These were due to non-UTF8 input
being provided to crypt.crypt()... even though secret itself was UTF8 safe!
This was because secret was being truncated/repeated as part of
various backend bug workarounds; and the truncate/repeat operations
weren't being done in manner that respected UTF8 character boundaries.
This has now been fixed via _require_valid_utf8_bytes flag
(which has been set for os_crypt backend), that enables
utf8-safe mode of operation.
utils
-----
* added utf8_truncate() and utf8_repeat_string() helpers, for bcrypt fixes above.
* simplified repeat_string() internals
-rw-r--r-- | docs/history/1.7.rst | 2 | ||||
-rw-r--r-- | passlib/handlers/bcrypt.py | 36 | ||||
-rw-r--r-- | passlib/tests/test_utils.py | 119 | ||||
-rw-r--r-- | passlib/utils/__init__.py | 91 |
4 files changed, 237 insertions, 11 deletions
diff --git a/docs/history/1.7.rst b/docs/history/1.7.rst index 12eb363..6497550 100644 --- a/docs/history/1.7.rst +++ b/docs/history/1.7.rst @@ -62,6 +62,8 @@ Other Changes These can't be passed through, due to limitation in stdlib's :func:`!crypt.crypt`. (Prior to this release, it would it incorrectly raise :exc:`~passlib.exc.MissingBackendError` instead). + Also improved legacy bcrypt format workarounds, to support a few more UTF8 edge cases than before. + * Modified some internals to help run on FIPS systems (:issue:`116`): In particular, when MD5 hash is not available, :class:`~passlib.hash.hex_md5` diff --git a/passlib/handlers/bcrypt.py b/passlib/handlers/bcrypt.py index 5e5838a..323f2a0 100644 --- a/passlib/handlers/bcrypt.py +++ b/passlib/handlers/bcrypt.py @@ -27,7 +27,8 @@ _builtin_bcrypt = None # dynamically imported by _load_backend_builtin() from passlib.crypto.digest import compile_hmac from passlib.exc import PasslibHashWarning, PasslibSecurityWarning, PasslibSecurityError from passlib.utils import safe_crypt, repeat_string, to_bytes, parse_version, \ - rng, getrandstr, test_crypt, to_unicode + rng, getrandstr, test_crypt, to_unicode, \ + utf8_truncate, utf8_repeat_string from passlib.utils.binary import bcrypt64 from passlib.utils.compat import get_unbound_method_function from passlib.utils.compat import u, uascii_to_str, unicode, str_to_uascii, PY3, error_from @@ -158,6 +159,7 @@ class _BcryptCommon(uh.SubclassBackendMixin, uh.TruncateMixin, uh.HasManyIdents, _lacks_2y_support = False _lacks_2b_support = False _fallback_ident = IDENT_2A + _require_valid_utf8_bytes = False #=================================================================== # formatting @@ -486,8 +488,18 @@ class _BcryptCommon(uh.SubclassBackendMixin, uh.TruncateMixin, uh.HasManyIdents, @classmethod def _norm_digest_args(cls, secret, ident, new=False): # make sure secret is unicode + require_valid_utf8_bytes = cls._require_valid_utf8_bytes if isinstance(secret, unicode): secret = secret.encode("utf-8") + elif require_valid_utf8_bytes: + # if backend requires utf8 bytes (os_crypt); + # make sure input actually is utf8, or don't bother enabling utf-8 specific helpers. + try: + secret.decode("utf-8") + except UnicodeDecodeError: + # XXX: could just throw PasswordValueError here, backend will just do that + # when _calc_digest() is actually called. + require_valid_utf8_bytes = False # check max secret size uh.validate_secret(secret) @@ -508,7 +520,15 @@ class _BcryptCommon(uh.SubclassBackendMixin, uh.TruncateMixin, uh.HasManyIdents, # bcrypt only uses first 72 bytes anyways. # NOTE: not needed for 2y/2b, but might use 2a as fallback for them. if cls._has_2a_wraparound_bug and len(secret) >= 255: - secret = secret[:72] + if require_valid_utf8_bytes: + # backend requires valid utf8 bytes, so truncate secret to nearest valid segment. + # want to do this in constant time to not give away info about secret. + # NOTE: this only works because bcrypt will ignore everything past + # secret[71], so padding to include a full utf8 sequence + # won't break anything about the final output. + secret = utf8_truncate(secret, 72) + else: + secret = secret[:72] # special case handling for variants (ordered most common first) if ident == IDENT_2A: @@ -535,7 +555,13 @@ class _BcryptCommon(uh.SubclassBackendMixin, uh.TruncateMixin, uh.HasManyIdents, # we can fake $2$ behavior using the 2A/2Y/2B algorithm # by repeating the password until it's at least 72 chars in length. if secret: - secret = repeat_string(secret, 72) + if require_valid_utf8_bytes: + # NOTE: this only works because bcrypt will ignore everything past + # secret[71], so padding to include a full utf8 sequence + # won't break anything about the final output. + secret = utf8_repeat_string(secret, 72) + else: + secret = repeat_string(secret, 72) ident = cls._fallback_ident elif ident == IDENT_2X: @@ -745,6 +771,10 @@ class _OsCryptBackend(_BcryptCommon): backend which uses :func:`crypt.crypt` """ + #: set flag to ensure _prepare_digest_args() doesn't create invalid utf8 string + #: when truncating bytes. + _require_valid_utf8_bytes = PY3 + @classmethod def _load_backend_mixin(mixin_cls, name, dryrun): if not test_crypt("test", TEST_HASH_2A): diff --git a/passlib/tests/test_utils.py b/passlib/tests/test_utils.py index 3fd0001..f3a275b 100644 --- a/passlib/tests/test_utils.py +++ b/passlib/tests/test_utils.py @@ -413,6 +413,125 @@ class MiscTest(TestCase): self.assertEqual(splitcomma(" a , b"), ['a', 'b']) self.assertEqual(splitcomma(" a, b, "), ['a', 'b']) + def test_utf8_truncate(self): + """ + utf8_truncate() + """ + from passlib.utils import utf8_truncate + + # + # run through a bunch of reference strings, + # and make sure they truncate properly across all possible indexes + # + for source in [ + # empty string + b"", + # strings w/ only single-byte chars + b"1", + b"123", + b'\x1a', + b'\x1a' * 10, + b'\x7f', + b'\x7f' * 10, + # strings w/ properly formed UTF8 continuation sequences + b'a\xc2\xa0\xc3\xbe\xc3\xbe', + b'abcdefghjusdfaoiu\xc2\xa0\xc3\xbe\xc3\xbedsfioauweoiruer', + ]: + source.decode("utf-8") # sanity check - should always be valid UTF8 + + end = len(source) + for idx in range(end + 16): + prefix = "source=%r index=%r: " % (source, idx) + + result = utf8_truncate(source, idx) + + # result should always be valid utf-8 + result.decode("utf-8") + + # result should never be larger than source + self.assertLessEqual(len(result), end, msg=prefix) + + # result should always be in range(idx, idx+4) + self.assertGreaterEqual(len(result), min(idx, end), msg=prefix) + self.assertLess(len(result), min(idx + 4, end + 1), msg=prefix) + + # should be strict prefix of source + self.assertEqual(result, source[:len(result)], msg=prefix) + + # + # malformed utf8 -- + # strings w/ only initial chars (should cut just like single-byte chars) + # + for source in [ + b'\xca', + b'\xca' * 10, + # also test null bytes (not valid utf8, but this func should treat them like ascii) + b'\x00', + b'\x00' * 10, + ]: + end = len(source) + for idx in range(end + 16): + prefix = "source=%r index=%r: " % (source, idx) + result = utf8_truncate(source, idx) + self.assertEqual(result, source[:idx], msg=prefix) + + # + # malformed utf8 -- + # strings w/ only continuation chars (should cut at index+3) + # + for source in [ + b'\xaa', + b'\xaa' * 10, + ]: + end = len(source) + for idx in range(end + 16): + prefix = "source=%r index=%r: " % (source, idx) + result = utf8_truncate(source, idx) + self.assertEqual(result, source[:idx+3], msg=prefix) + + # + # string w/ some invalid utf8 -- + # * \xaa byte is too many continuation byte after \xff start byte + # * \xab byte doesn't have preceding start byte + # XXX: could also test continuation bytes w/o start byte, WITHIN the string. + # but think this covers edges well enough... + # + source = b'MN\xff\xa0\xa1\xa2\xaaOP\xab' + + self.assertEqual(utf8_truncate(source, 0), b'') # index="M", stops there + + self.assertEqual(utf8_truncate(source, 1), b'M') # index="N", stops there + + self.assertEqual(utf8_truncate(source, 2), b'MN') # index="\xff", stops there + + self.assertEqual(utf8_truncate(source, 3), + b'MN\xff\xa0\xa1\xa2') # index="\xa0", runs out after index+3="\xa2" + + self.assertEqual(utf8_truncate(source, 4), + b'MN\xff\xa0\xa1\xa2\xaa') # index="\xa1", runs out after index+3="\xaa" + + self.assertEqual(utf8_truncate(source, 5), + b'MN\xff\xa0\xa1\xa2\xaa') # index="\xa2", stops before "O" + + self.assertEqual(utf8_truncate(source, 6), + b'MN\xff\xa0\xa1\xa2\xaa') # index="\xaa", stops before "O" + + self.assertEqual(utf8_truncate(source, 7), + b'MN\xff\xa0\xa1\xa2\xaa') # index="O", stops there + + self.assertEqual(utf8_truncate(source, 8), + b'MN\xff\xa0\xa1\xa2\xaaO') # index="P", stops there + + self.assertEqual(utf8_truncate(source, 9), + b'MN\xff\xa0\xa1\xa2\xaaOP\xab') # index="\xab", runs out at end + + self.assertEqual(utf8_truncate(source, 10), + b'MN\xff\xa0\xa1\xa2\xaaOP\xab') # index=end + + self.assertEqual(utf8_truncate(source, 11), + b'MN\xff\xa0\xa1\xa2\xaaOP\xab') # index=end+1 + + #============================================================================= # byte/unicode helpers #============================================================================= diff --git a/passlib/utils/__init__.py b/passlib/utils/__init__.py index cb8872c..386ef65 100644 --- a/passlib/utils/__init__.py +++ b/passlib/utils/__init__.py @@ -55,7 +55,7 @@ from passlib.utils.decor import ( classproperty, hybrid_method, ) -from passlib.exc import ExpectedStringError +from passlib.exc import ExpectedStringError, ExpectedTypeError from passlib.utils.compat import (add_doc, join_bytes, join_byte_values, join_byte_elems, irange, imap, PY3, u, join_unicode, unicode, byte_elem_value, nextgetter, @@ -574,13 +574,20 @@ def xor_bytes(left, right): return int_to_bytes(bytes_to_int(left) ^ bytes_to_int(right), len(left)) def repeat_string(source, size): - """repeat or truncate <source> string, so it has length <size>""" - cur = len(source) - if size > cur: - mult = (size+cur-1)//cur - return (source*mult)[:size] - else: - return source[:size] + """ + repeat or truncate <source> string, so it has length <size> + """ + mult = 1 + (size - 1) // len(source) + return (source * mult)[:size] + + +def utf8_repeat_string(source, size): + """ + variant of repeat_string() which truncates to nearest UTF8 boundary. + """ + mult = 1 + (size - 1) // len(source) + return utf8_truncate(source * mult, size) + _BNULL = b"\x00" _UNULL = u("\x00") @@ -595,6 +602,74 @@ def right_pad_string(source, size, pad=None): else: return source[:size] + +def utf8_truncate(source, index): + """ + helper to truncate UTF8 byte string to nearest character boundary ON OR AFTER <index>. + returned prefix will always have length of at least <index>, and will stop on the + first byte that's not a UTF8 continuation byte (128 - 191 inclusive). + since utf8 should never take more than 4 bytes to encode known unicode values, + we can stop after ``index+3`` is reached. + + :param bytes source: + :param int index: + :rtype: bytes + """ + # general approach: + # + # * UTF8 bytes will have high two bits (0xC0) as one of: + # 00 -- ascii char + # 01 -- ascii char + # 10 -- continuation of multibyte char + # 11 -- start of multibyte char. + # thus we can cut on anything where high bits aren't "10" (0x80; continuation byte) + # + # * UTF8 characters SHOULD always be 1 to 4 bytes, though they may be unbounded. + # so we just keep going until first non-continuation byte is encountered, or end of str. + # this should work predictably even for malformed/non UTF8 inputs. + + if not isinstance(source, bytes): + raise ExpectedTypeError(source, bytes, "source") + + # validate index + end = len(source) + if index < 0: + index = max(0, index + end) + if index >= end: + return source + + # can stop search after 4 bytes, won't ever have longer utf8 sequence. + end = min(index + 3, end) + + # loop until we find non-continuation byte + while index < end: + if byte_elem_value(source[index]) & 0xC0 != 0x80: + # found single-char byte, or start-char byte. + break + # else: found continuation byte. + index += 1 + else: + assert index == end + + # truncate at final index + result = source[:index] + + def sanity_check(): + # try to decode source + try: + text = source.decode("utf-8") + except UnicodeDecodeError: + # if source isn't valid utf8, byte level match is enough + return True + + # validate that result was cut on character boundary + assert text.startswith(result.decode("utf-8")) + return True + + assert sanity_check() + + return result + #============================================================================= # encoding helpers #============================================================================= |