summaryrefslogtreecommitdiff
path: root/passlib
diff options
context:
space:
mode:
authorEli Collins <elic@assurancetechnologies.com>2012-04-10 14:21:16 -0400
committerEli Collins <elic@assurancetechnologies.com>2012-04-10 14:21:16 -0400
commitd70e36e2fb1fc2b4e33d237c1a72cc6bc8802d52 (patch)
tree96ccbbe5e755dfafb9f51ac63a99737ac8921808 /passlib
parentd1aaad2741b18bf582e938c03230a84e158a7445 (diff)
downloadpasslib-d70e36e2fb1fc2b4e33d237c1a72cc6bc8802d52.tar.gz
md5_crypt / sha2-crypt cleanup
* tried to clarify documentation & alg for builtin md5_crypt / sha2-crypt backends * replaced regex parser in sha2-crypt with index-based one - less redundant, and should be faster.
Diffstat (limited to 'passlib')
-rw-r--r--passlib/handlers/md5_crypt.py276
-rw-r--r--passlib/handlers/sha2_crypt.py714
-rw-r--r--passlib/utils/__init__.py9
-rw-r--r--passlib/utils/handlers.py2
4 files changed, 489 insertions, 512 deletions
diff --git a/passlib/handlers/md5_crypt.py b/passlib/handlers/md5_crypt.py
index 3237b42..f55e7c7 100644
--- a/passlib/handlers/md5_crypt.py
+++ b/passlib/handlers/md5_crypt.py
@@ -9,7 +9,7 @@ import logging; log = logging.getLogger(__name__)
from warnings import warn
#site
#libs
-from passlib.utils import classproperty, h64, safe_crypt, test_crypt
+from passlib.utils import classproperty, h64, safe_crypt, test_crypt, repeat_bytes
from passlib.utils.compat import b, bytes, irange, unicode, u
import passlib.utils.handlers as uh
#pkg
@@ -26,148 +26,187 @@ B_NULL = b("\x00")
B_MD5_MAGIC = b("$1$")
B_APR_MAGIC = b("$apr1$")
-_OFFSETS = [
- (0, 3), (3, 2), (3, 3), (2, 1), (3, 2), (3, 3), (2, 3),
- (1, 2), (3, 3), (2, 3), (3, 0), (3, 3), (2, 3), (3, 2),
- (1, 3), (2, 3), (3, 2), (3, 1), (2, 3), (3, 2), (3, 3),
- ]
-
-def extend(source, size_ref):
- "helper which repeats <source> so it's the same length as <size_ref>"
- m,d = divmod(len(size_ref), len(source))
- if d:
- return source*m + source[:d]
- else:
- return source*m
-
-def _raw_md5_crypt(password, salt, apr=False):
+# pre-calculated offsets used to speed up C digest stage (see notes below).
+# sequence generated using the following:
+ ##perms_order = "p,pp,ps,psp,sp,spp".split(",")
+ ##def offset(i):
+ ## key = (("p" if i % 2 else "") + ("s" if i % 3 else "") +
+ ## ("p" if i % 7 else "") + ("" if i % 2 else "p"))
+ ## return perms_order.index(key)
+ ##_c_digest_offsets = [(offset(i), offset(i+1)) for i in range(0,42,2)]
+_c_digest_offsets = (
+ (0, 3), (5, 1), (5, 3), (1, 2), (5, 1), (5, 3), (1, 3),
+ (4, 1), (5, 3), (1, 3), (5, 0), (5, 3), (1, 3), (5, 1),
+ (4, 3), (1, 3), (5, 1), (5, 2), (1, 3), (5, 1), (5, 3),
+ )
+
+# map used to transpose bytes when encoding final digest
+_transpose_map = (12, 6, 0, 13, 7, 1, 14, 8, 2, 15, 9, 3, 5, 10, 4, 11)
+
+def _raw_md5_crypt(pwd, salt, use_apr=False):
"""perform raw md5-crypt calculation
- :arg password:
- password, bytes or unicode (encoded to utf-8)
-
- :arg salt:
- salt portion of hash, bytes or unicode (encoded to ascii),
- clipped to max 8 bytes.
+ this function provides a pure-python implementation of the internals
+ for the MD5-Crypt algorithms; it doesn't handle any of the
+ parsing/validation of the hash strings themselves.
- :param apr:
- flag to use apache variant
+ :arg pwd: password chars/bytes to encrypt
+ :arg salt: salt chars to use
+ :arg use_apr: use apache variant
:returns:
- encoded checksum as unicode
+ encoded checksum chars
"""
- #NOTE: regarding 'apr' format:
+ # NOTE: regarding 'apr' format:
# really, apache? you had to invent a whole new "$apr1$" format,
# when all you did was change the ident incorporated into the hash?
# would love to find webpage explaining why just using a portable
# implementation of $1$ wasn't sufficient. *nothing* else was changed.
- #validate password
- #FIXME: can't find definitive policy on how md5-crypt handles non-ascii.
- if isinstance(password, unicode):
- password = password.encode("utf-8")
+ #=====================================================================
+ # init & validate inputs
+ #=====================================================================
- #validate salt
- if isinstance(salt, unicode):
- salt = salt.encode("ascii")
- if len(salt) > 8:
- salt = salt[:8]
+ #validate secret
+ if isinstance(pwd, unicode):
+ # XXX: not sure what official unicode policy is, using this as default
+ pwd = pwd.encode("utf-8")
+ elif not isinstance(pwd, bytes):
+ raise TypeError("password must be bytes or unicode")
+ pwd_len = len(pwd)
- #primary hash = password+id+salt+...
- if apr:
+ #validate salt
+ assert isinstance(salt, unicode), "salt not unicode"
+ salt = salt.encode("ascii")
+ assert len(salt) < 9, "salt too large"
+ # NOTE: spec says salts larger than 8 bytes should be truncated,
+ # instead of causing an error. this function assumes that's been
+ # taken care of by the handler class.
+
+ # load APR specific constants
+ if use_apr:
magic = B_APR_MAGIC
else:
magic = B_MD5_MAGIC
- a_hash = md5(password + magic + salt)
-
- # primary hash - add len(password) chars of tmp hash,
- # where temp hash is md5(password+salt+password)
- b = md5(password + salt + password).digest()
- a_hash.update(extend(b, password))
- # primary hash - add null chars & first char of password !?!
+ #=====================================================================
+ # digest B - used as subinput to digest A
+ #=====================================================================
+ db = md5(pwd + salt + pwd).digest()
+
+ #=====================================================================
+ # digest A - used to initialize first round of digest C
+ #=====================================================================
+ # start out with pwd + magic + salt
+ a_ctx = md5(pwd + magic + salt)
+ a_ctx_update = a_ctx.update
+
+ # add pwd_len bytes of b, repeating b as many times as needed.
+ a_ctx_update(repeat_bytes(db, pwd_len))
+
+ # add null chars & first char of password
+ # NOTE: this may have historically been a bug,
+ # where they meant to use db[0] instead of B_NULL,
+ # but the original code memclear'ed db,
+ # and now all implementations have to use this.
+ i = pwd_len
+ evenchar = pwd[:1]
+ while i:
+ a_ctx_update(B_NULL if i & 1 else evenchar)
+ i >>= 1
+
+ # finish A
+ da = a_ctx.digest()
+
+ #=====================================================================
+ # digest C - for a 1000 rounds, combine A, S, and P
+ # digests in various ways; in order to burn CPU time.
+ #=====================================================================
+
+ # NOTE: the original MD5-Crypt implementation performs the C digest
+ # calculation using the following loop:
#
- # this may have historically been a bug,
- # where they meant to use tmp[0] instead of '\x00',
- # but the code memclear'ed the buffer,
- # and now all implementations have to use this.
+ ##dc = da
+ ##i = 0
+ ##while i < rounds:
+ ## tmp_ctx = md5(pwd if i & 1 else dc)
+ ## if i % 3:
+ ## tmp_ctx.update(salt)
+ ## if i % 7:
+ ## tmp_ctx.update(pwd)
+ ## tmp_ctx.update(dc if i & 1 else pwd)
+ ## dc = tmp_ctx.digest()
+ ## i += 1
#
- # sha-crypt replaced this step with
- # something more useful, anyways
- idx = len(password)
- evenchar = password[:1]
- while idx > 0:
- a_hash.update(B_NULL if idx & 1 else evenchar)
- idx >>= 1
- a = a_hash.digest()
-
- #next:
- # do 1000 rounds of md5 to make things harder.
- # each round we do digest of round-specific content,
- # where content is formed from concatenation of...
- # secret if round % 2 else result
- # salt if round % 3
- # secret if round % 7
- # result if round % 2 else secret
+ # The code Passlib uses (below) implements an equivalent algorithm,
+ # it's just been heavily optimized to pre-calculate a large number
+ # of things beforehand. It works off of a couple of observations
+ # about the original algorithm:
#
- # NOTE: instead of doing this directly, this implementation precomputes
- # most of the data ahead of time. (see sha2_crypt for details, it
- # uses the same alg & optimization).
+ # 1. each round is a combination of 'dc', 'salt', and 'pwd'; determined
+ # by the whether 'i' a multiple of 2,3, and/or 7.
+ # 2. since lcm(2,3,7)==42, the series of combinations will repeat
+ # every 42 rounds.
+ # 3. even rounds 0-40 consist of 'hash(dc + round-specific-constant)';
+ # while odd rounds 1-41 consist of hash(round-specific-constant + dc)
#
- p = password
- s = salt
- p_p = p*2
- s_p = s+p
- evens = [p, s_p, p_p, s_p+p]
- odds = [p, p+s, p_p, p+s_p]
- data = [(evens[e], odds[o]) for e,o in _OFFSETS]
-
- # perform 23 blocks of 42 rounds each
- c = a
- i = 0
- while i < 23:
+ # Using these observations, the following code...
+ # * calculates the round-specific combination of salt & pwd for each round 0-41
+ # * runs through as many 42-round blocks as possible
+ # * runs through as many pairs of rounds as possible for remaining rounds
+ # * performs once last round if the total rounds should be odd.
+ #
+ # this cuts out a lot of the control overhead incurred when running the
+ # original loop 40,000+ times in python, resulting in ~20% increase in
+ # speed under CPython (though still 2x slower than glibc crypt)
+
+ # prepare the 6 combinations of pwd & salt which are needed
+ # (order of 'perms' must match how _c_digest_offsets was generated)
+ pwd_pwd = pwd+pwd
+ pwd_salt = pwd+salt
+ perms = [pwd, pwd_pwd, pwd_salt, pwd_salt+pwd, salt+pwd, salt+pwd_pwd]
+
+ # build up list of even-round & odd-round constants,
+ # and store in 21-element list as (even,odd) pairs.
+ data = [ (perms[even], perms[odd]) for even, odd in _c_digest_offsets]
+
+ # perform 23 blocks of 42 rounds each (for a total of 966 rounds)
+ dc = da
+ blocks = 23
+ while blocks:
for even, odd in data:
- c = md5(odd + md5(c + even).digest()).digest()
- i += 1
+ dc = md5(odd + md5(dc + even).digest()).digest()
+ blocks -= 1
- # perform 34 additional rounds, 2 at a time; for a total of 1000 rounds.
+ # perform 17 more pairs of rounds (34 more rounds, for a total of 1000)
for even, odd in data[:17]:
- c = md5(odd + md5(c + even).digest()).digest()
-
- #encode resulting hash
- return h64.encode_transposed_bytes(c, _chk_offsets).decode("ascii")
+ dc = md5(odd + md5(dc + even).digest()).digest()
-_chk_offsets = (
- 12,6,0,
- 13,7,1,
- 14,8,2,
- 15,9,3,
- 5,10,4,
- 11,
-)
+ #=====================================================================
+ # encode digest using appropriate transpose map
+ #=====================================================================
+ return h64.encode_transposed_bytes(dc, _transpose_map).decode("ascii")
#=========================================================
-#handler
+# handler
#=========================================================
-class _Md5Common(uh.HasSalt, uh.GenericHandler):
+class _MD5_Common(uh.HasSalt, uh.GenericHandler):
"common code for md5_crypt and apr_md5_crypt"
#=========================================================
- #algorithm information
+ # class attrs
#=========================================================
- #--GenericHandler--
- #name in subclass
+ #name - set in subclass
setting_kwds = ("salt", "salt_size")
- #ident in subclass
+ #ident - set in subclass
checksum_size = 22
checksum_chars = uh.HASH64_CHARS
- #--HasSalt--
min_salt_size = 0
max_salt_size = 8
salt_chars = uh.HASH64_CHARS
#=========================================================
- #internal helpers
+ # methods
#=========================================================
@classmethod
@@ -178,19 +217,13 @@ class _Md5Common(uh.HasSalt, uh.GenericHandler):
def to_string(self):
return uh.render_mc2(self.ident, self.salt, self.checksum)
- #=========================================================
- #primary interface
- #=========================================================
- # calc_checksum defined in subclass
+ # _calc_checksum() - provided by subclass
#=========================================================
#eoc
#=========================================================
-#=========================================================
-#handler
-#=========================================================
-class md5_crypt(uh.HasManyBackends, _Md5Common):
+class md5_crypt(uh.HasManyBackends, _MD5_Common):
"""This class implements the MD5-Crypt password hash, and follows the :ref:`password-hash-api`.
It supports a variable-length salt.
@@ -210,13 +243,13 @@ class md5_crypt(uh.HasManyBackends, _Md5Common):
You can see which backend is in use by calling the :meth:`get_backend()` method.
"""
#=========================================================
- #algorithm information
+ # class attrs
#=========================================================
name = "md5_crypt"
ident = u("$1$")
#=========================================================
- #primary interface
+ # methods
#=========================================================
#FIXME: can't find definitive policy on how md5-crypt handles non-ascii.
# all backends currently coerce -> utf-8
@@ -240,13 +273,10 @@ class md5_crypt(uh.HasManyBackends, _Md5Common):
return self._calc_checksum_builtin(secret)
#=========================================================
- #eoc
+ # eoc
#=========================================================
-#=========================================================
-#apache variant of md5-crypt
-#=========================================================
-class apr_md5_crypt(_Md5Common):
+class apr_md5_crypt(_MD5_Common):
"""This class implements the Apr-MD5-Crypt password hash, and follows the :ref:`password-hash-api`.
It supports a variable-length salt.
@@ -259,21 +289,21 @@ class apr_md5_crypt(_Md5Common):
If specified, it must be 0-8 characters, drawn from the regexp range ``[./0-9A-Za-z]``.
"""
#=========================================================
- #algorithm information
+ # class attrs
#=========================================================
name = "apr_md5_crypt"
ident = u("$apr1$")
#=========================================================
- #primary interface
+ # methods
#=========================================================
def _calc_checksum(self, secret):
- return _raw_md5_crypt(secret, self.salt, apr=True)
+ return _raw_md5_crypt(secret, self.salt, use_apr=True)
#=========================================================
- #eoc
+ # eoc
#=========================================================
#=========================================================
-#eof
+# eof
#=========================================================
diff --git a/passlib/handlers/sha2_crypt.py b/passlib/handlers/sha2_crypt.py
index 9adb28c..dc68226 100644
--- a/passlib/handlers/sha2_crypt.py
+++ b/passlib/handlers/sha2_crypt.py
@@ -1,377 +1,364 @@
-"""passlib.handlers.sha2_crypt - SHA256/512-CRYPT"""
+"""passlib.handlers.sha2_crypt - SHA256-Crypt / SHA512-Crypt"""
#=========================================================
#imports
#=========================================================
#core
-from hashlib import sha256, sha512
-import re
+import hashlib
import logging; log = logging.getLogger(__name__)
from warnings import warn
#site
#libs
-from passlib.utils import classproperty, h64, safe_crypt, test_crypt
+from passlib.utils import classproperty, h64, safe_crypt, test_crypt, repeat_bytes
from passlib.utils.compat import b, bytes, byte_elem_value, irange, u, \
- uascii_to_str, unicode
+ uascii_to_str, unicode, lmap
import passlib.utils.handlers as uh
#pkg
#local
__all__ = [
- "SHA256Crypt",
- "SHA512Crypt",
+ "sha512_crypt",
+ "sha256_crypt",
]
-#=========================================================
-#pure-python backend (shared between sha256-crypt & sha512-crypt)
-#=========================================================
-INVALID_SALT_VALUES = b("\x00$")
-
-##_OFFSETS = [((i % 3 > 0) + (i % 7 > 0)*2,
-## ((i + 1) % 3 > 0) + ((i + 1) % 7 > 0)*2)
-## for i in irange(0, 42, 2) ]
-_OFFSETS = [
- (0, 3), (3, 2), (3, 3), (2, 1), (3, 2), (3, 3), (2, 3),
- (1, 2), (3, 3), (2, 3), (3, 0), (3, 3), (2, 3), (3, 2),
- (1, 3), (2, 3), (3, 2), (3, 1), (2, 3), (3, 2), (3, 3),
- ]
-
-def extend(source, size_ref):
- "helper which repeats <source> so it's the same length as <size_ref>"
- m,d = divmod(len(size_ref), len(source))
- if d:
- return source*m + source[:d]
- else:
- return source*m
+#==============================================================
+# pure-python backend, used by both sha256_crypt & sha512_crypt
+# when crypt.crypt() backend is not available.
+#==============================================================
+
+# pre-calculated offsets used to speed up C digest stage (see notes below).
+# sequence generated using the following:
+ ##perms_order = "p,pp,ps,psp,sp,spp".split(",")
+ ##def offset(i):
+ ## key = (("p" if i % 2 else "") + ("s" if i % 3 else "") +
+ ## ("p" if i % 7 else "") + ("" if i % 2 else "p"))
+ ## return perms_order.index(key)
+ ##_c_digest_offsets = [(offset(i), offset(i+1)) for i in range(0,42,2)]
+_c_digest_offsets = (
+ (0, 3), (5, 1), (5, 3), (1, 2), (5, 1), (5, 3), (1, 3),
+ (4, 1), (5, 3), (1, 3), (5, 0), (5, 3), (1, 3), (5, 1),
+ (4, 3), (1, 3), (5, 1), (5, 2), (1, 3), (5, 1), (5, 3),
+ )
+
+# map used to transpose bytes when encoding final sha256_crypt digest
+_256_transpose_map = (
+ 20, 10, 0, 11, 1, 21, 2, 22, 12, 23, 13, 3, 14, 4, 24, 5,
+ 25, 15, 26, 16, 6, 17, 7, 27, 8, 28, 18, 29, 19, 9, 30, 31,
+)
+
+# map used to transpose bytes when encoding final sha512_crypt digest
+_512_transpose_map = (
+ 42, 21, 0, 1, 43, 22, 23, 2, 44, 45, 24, 3, 4, 46, 25, 26,
+ 5, 47, 48, 27, 6, 7, 49, 28, 29, 8, 50, 51, 30, 9, 10, 52,
+ 31, 32, 11, 53, 54, 33, 12, 13, 55, 34, 35, 14, 56, 57, 36, 15,
+ 16, 58, 37, 38, 17, 59, 60, 39, 18, 19, 61, 40, 41, 20, 62, 63,
+)
-def _raw_sha_crypt(secret, salt, rounds, hash):
- """perform raw sha crypt
+def _raw_sha2_crypt(pwd, salt, rounds, use_512=False):
+ """perform raw sha256-crypt / sha512-crypt
- :arg secret: password to encode (if unicode, encoded to utf-8)
- :arg salt: salt string to use (required)
- :arg rounds: int rounds
- :arg hash: hash constructor function for sha-256 or sha-512
+ this function provides a pure-python implementation of the internals
+ for the SHA256-Crypt and SHA512-Crypt algorithms; it doesn't
+ handle any of the parsing/validation of the hash strings themselves.
- :returns:
- Returns tuple of ``(unencoded checksum, normalized salt, normalized rounds)``.
+ :arg pwd: password chars/bytes to encrypt
+ :arg salt: salt chars to use
+ :arg rounds: linear rounds cost
+ :arg use_512: use sha512-crypt instead of sha256-crypt mode
+ :returns:
+ encoded checksum chars
"""
- #validate secret
- if not isinstance(secret, bytes):
- raise TypeError("secret must be encoded as bytes")
-
- #validate rounds
- # XXX: this should be taken care of by handler,
- # change this to an assertion?
- if rounds < 1000:
- rounds = 1000
- if rounds > 999999999: #pragma: no cover
- rounds = 999999999
-
- #validate salt
- if not isinstance(salt, bytes):
- raise TypeError("salt must be encoded as bytes")
- if any(c in salt for c in INVALID_SALT_VALUES):
- raise ValueError("invalid chars in salt")
- if len(salt) > 16:
- salt = salt[:16]
-
- #calc digest B
- b = hash(secret + salt + secret).digest()
-
- #begin digest A
- a_hash = hash(secret + salt + extend(b, secret))
-
- #for each bit in slen, add B or SECRET
- i = len(secret)
- while i > 0:
- if i & 1:
- a_hash.update(b)
- else:
- a_hash.update(secret)
+ #=====================================================================
+ # init & validate inputs
+ #=====================================================================
+
+ # validate secret
+ if isinstance(pwd, unicode):
+ # XXX: not sure what official unicode policy is, using this as default
+ pwd = pwd.encode("utf-8")
+ elif not isinstance(pwd, bytes):
+ raise TypeError("password must be bytes or unicode")
+ pwd_len = len(pwd)
+
+ # validate rounds
+ assert 1000 <= rounds <= 999999999, "invalid rounds"
+ # NOTE: spec says out-of-range rounds should be clipped, instead of
+ # causing an error. this function assumes that's been taken care of
+ # by the handler class.
+
+ # validate salt
+ assert isinstance(salt, unicode), "salt not unicode"
+ salt = salt.encode("ascii")
+ salt_len = len(salt)
+ assert salt_len < 17, "salt too large"
+ # NOTE: spec says salts larger than 16 bytes should be truncated,
+ # instead of causing an error. this function assumes that's been
+ # taken care of by the handler class.
+
+ # load sha256/512 specific constants
+ if use_512:
+ hash_const = hashlib.sha512
+ hash_len = 64
+ transpose_map = _512_transpose_map
+ else:
+ hash_const = hashlib.sha256
+ hash_len = 32
+ transpose_map = _256_transpose_map
+
+ #=====================================================================
+ # digest B - used as subinput to digest A
+ #=====================================================================
+ db = hash_const(pwd + salt + pwd).digest()
+
+ #=====================================================================
+ # digest A - used to initialize first round of digest C
+ #=====================================================================
+ # start out with pwd + salt
+ a_ctx = hash_const(pwd + salt)
+ a_ctx_update = a_ctx.update
+
+ # add pwd_len bytes of b, repeating b as many times as needed.
+ a_ctx_update(repeat_bytes(db, pwd_len))
+
+ # for each bit in pwd_len: add b if it's 1, or pwd if it's 0
+ i = pwd_len
+ while i:
+ a_ctx_update(db if i & 1 else pwd)
i >>= 1
- #finish A
- a = a_hash.digest()
-
- #calc DP - hash of password, extended to size of password
- tmp = hash(secret * len(secret))
- dp = extend(tmp.digest(), secret)
-
- #calc DS - hash of salt, extended to size of salt
- tmp = hash(salt * (16+byte_elem_value(a[0])))
- ds = extend(tmp.digest(), salt)
-
+ # finish A
+ da = a_ctx.digest()
+
+ #=====================================================================
+ # digest P from password - used instead of password itself
+ # when calculating digest C.
+ #=====================================================================
+ if pwd_len < 64:
+ # method this is faster under python, but uses O(pwd_len**2) memory
+ # so we don't use it for larger passwords, to avoid a potential DOS.
+ dp = repeat_bytes(hash_const(pwd * pwd_len).digest(), pwd_len)
+ else:
+ tmp_ctx = hash_const(pwd)
+ tmp_ctx_update = tmp_ctx.update
+ i = pwd_len-1
+ while i:
+ tmp_ctx_update(pwd)
+ i -= 1
+ dp = repeat_bytes(tmp_ctx.digest(), pwd_len)
+ assert len(dp) == pwd_len
+
+ #=====================================================================
+ # digest S - used instead of salt itself when calculating digest C
+ #=====================================================================
+ ds = hash_const(salt * (16 + byte_elem_value(da[0]))).digest()[:salt_len]
+ assert len(ds) == salt_len, "salt_len somehow > hash_len!"
+
+ #=====================================================================
+ # digest C - for a variable number of rounds, combine A, S, and P
+ # digests in various ways; in order to burn CPU time.
+ #=====================================================================
+
+ # NOTE: the original SHA256/512-Crypt specification performs the C digest
+ # calculation using the following loop:
+ #
+ ##dc = da
+ ##i = 0
+ ##while i < rounds:
+ ## tmp_ctx = hash_const(dp if i & 1 else dc)
+ ## if i % 3:
+ ## tmp_ctx.update(ds)
+ ## if i % 7:
+ ## tmp_ctx.update(dp)
+ ## tmp_ctx.update(dc if i & 1 else dp)
+ ## dc = tmp_ctx.digest()
+ ## i += 1
#
- # calc digest C
+ # The code Passlib uses (below) implements an equivalent algorithm,
+ # it's just been heavily optimized to pre-calculate a large number
+ # of things beforehand. It works off of a couple of observations
+ # about the original algorithm:
#
- # NOTE: The code below is quite different in appearance from how the
- # specification performs this step. the original algorithm was that:
- # C starts out set to A
- # for i in [0,rounds), the next value of C is calculated as the digest of:
- # if i%2>0 then DP else C
- # +
- # if i%3>0 then DS else ""
- # +
- # if i%7>0 then DP else ""
- # +
- # if i%2>0 then C else DP
+ # 1. each round is a combination of 'dc', 'ds', and 'dp'; determined
+ # by the whether 'i' a multiple of 2,3, and/or 7.
+ # 2. since lcm(2,3,7)==42, the series of combinations will repeat
+ # every 42 rounds.
+ # 3. even rounds 0-40 consist of 'hash(dc + round-specific-constant)';
+ # while odd rounds 1-41 consist of hash(round-specific-constant + dc)
#
- # This algorithm can be seen as a series of paired even/odd rounds,
- # with each pair performing 'C = md5(odd_data + md5(C + even_data))',
- # where even_data & odd_data cycle through a fixed series of
- # combinations of DP & DS, repeating every 42 rounds (since lcm(2,3,7)==42)
+ # Using these observations, the following code...
+ # * calculates the round-specific combination of ds & dp for each round 0-41
+ # * runs through as many 42-round blocks as possible
+ # * runs through as many pairs of rounds as possible for remaining rounds
+ # * performs once last round if the total rounds should be odd.
#
- # This code takes advantage of these facts: it precalculates all possible
- # combinations, and then orders them into 21 pairs of even,odd values.
- # this allows the brunt of C stage to be performed in 42-round blocks,
- # with minimal branching/concatenation overhead.
-
- # build array containing 42-round pattern as pairs of even & odd data.
- dp_dp = dp*2
- ds_dp = ds+dp
- evens = [dp, ds_dp, dp_dp, ds_dp+dp]
- odds = [dp, dp+ds, dp_dp, dp+ds_dp]
- data = [(evens[e], odds[o]) for e,o in _OFFSETS]
+ # this cuts out a lot of the control overhead incurred when running the
+ # original loop 40,000+ times in python, resulting in ~20% increase in
+ # speed under CPython (though still 2x slower than glibc crypt)
+
+ # prepare the 6 combinations of ds & dp which are needed
+ # (order of 'perms' must match how _c_digest_offsets was generated)
+ dp_dp = dp+dp
+ dp_ds = dp+ds
+ perms = [dp, dp_dp, dp_ds, dp_ds+dp, ds+dp, ds+dp_dp]
+
+ # build up list of even-round & odd-round constants,
+ # and store in 21-element list as (even,odd) pairs.
+ data = [ (perms[even], perms[odd]) for even, odd in _c_digest_offsets]
# perform as many full 42-round blocks as possible
- c = a
+ dc = da
blocks, tail = divmod(rounds, 42)
- i = 0
- while i < blocks:
+ while blocks:
for even, odd in data:
- c = hash(odd + hash(c + even).digest()).digest()
- i += 1
+ dc = hash_const(odd + hash_const(dc + even).digest()).digest()
+ blocks -= 1
# perform any leftover rounds
if tail:
# perform any pairs of rounds
pairs = tail>>1
for even, odd in data[:pairs]:
- c = hash(odd + hash(c + even).digest()).digest()
- # if rounds was odd, do one last round
+ dc = hash_const(odd + hash_const(dc + even).digest()).digest()
+
+ # if rounds was odd, do one last round (since we started at 0,
+ # last round will be an even-numbered round)
if tail & 1:
- c = hash(c + data[pairs][0]).digest()
-
- #return unencoded result, along w/ normalized config values
- return c, salt, rounds
-
-def _raw_sha256_crypt(secret, salt, rounds):
- "perform raw sha256-crypt; returns encoded checksum, normalized salt & rounds"
- #run common crypt routine
- result, salt, rounds = _raw_sha_crypt(secret, salt, rounds, sha256)
- out = h64.encode_transposed_bytes(result, _256_offsets)
- assert len(out) == 43, "wrong length: %r" % (out,)
- return out, salt, rounds
-
-_256_offsets = (
- 20, 10, 0,
- 11, 1, 21,
- 2, 22, 12,
- 23, 13, 3,
- 14, 4, 24,
- 5, 25, 15,
- 26, 16, 6,
- 17, 7, 27,
- 8, 28, 18,
- 29, 19, 9,
- 30, 31,
-)
+ dc = hash_const(dc + data[pairs][0]).digest()
-def _raw_sha512_crypt(secret, salt, rounds):
- "perform raw sha512-crypt; returns encoded checksum, normalized salt & rounds"
- #run common crypt routine
- result, salt, rounds = _raw_sha_crypt(secret, salt, rounds, sha512)
-
- ###encode result
- out = h64.encode_transposed_bytes(result, _512_offsets)
- assert len(out) == 86, "wrong length: %r" % (out,)
- return out, salt, rounds
-
-_512_offsets = (
- 42, 21, 0,
- 1, 43, 22,
- 23, 2, 44,
- 45, 24, 3,
- 4, 46, 25,
- 26, 5, 47,
- 48, 27, 6,
- 7, 49, 28,
- 29, 8, 50,
- 51, 30, 9,
- 10, 52, 31,
- 32, 11, 53,
- 54, 33, 12,
- 13, 55, 34,
- 35, 14, 56,
- 57, 36, 15,
- 16, 58, 37,
- 38, 17, 59,
- 60, 39, 18,
- 19, 61, 40,
- 41, 20, 62,
- 63,
-)
+ #=====================================================================
+ # encode digest using appropriate transpose map
+ #=====================================================================
+ return h64.encode_transposed_bytes(dc, transpose_map).decode("ascii")
#=========================================================
-#handler
+# handlers
#=========================================================
-class sha256_crypt(uh.HasManyBackends, uh.HasRounds, uh.HasSalt, uh.GenericHandler):
- """This class implements the SHA256-Crypt password hash, and follows the :ref:`password-hash-api`.
-
- It supports a variable-length salt, and a variable number of rounds.
-
- The :meth:`encrypt()` and :meth:`genconfig` methods accept the following optional keywords:
-
- :param salt:
- Optional salt string.
- If not specified, one will be autogenerated (this is recommended).
- If specified, it must be 0-16 characters, drawn from the regexp range ``[./0-9A-Za-z]``.
-
- :param rounds:
- Optional number of rounds to use.
- Defaults to 40000, must be between 1000 and 999999999, inclusive.
-
- :param implicit_rounds:
- this is an internal option which generally doesn't need to be touched.
-
- this flag determines whether the hash should omit the rounds parameter
- when encoding it to a string; this is only permitted by the spec for rounds=5000,
- and the flag is ignored otherwise. the spec requires the two different
- encodings be preserved as they are, instead of normalizing them.
-
- It will use the first available of two possible backends:
-
- * stdlib :func:`crypt()`, if the host OS supports SHA256-Crypt.
- * a pure python implementation of SHA256-Crypt built into passlib.
-
- You can see which backend is in use by calling the :meth:`get_backend()` method.
- """
+_UROUNDS = u("rounds=")
+_UDOLLAR = u("$")
+_UZERO = u("0")
+class _SHA2_Common(uh.HasManyBackends, uh.HasRounds, uh.HasSalt,
+ uh.GenericHandler):
+ "class containing common code shared by sha256_crypt & sha512_crypt"
#=========================================================
- #algorithm information
+ # class attrs
#=========================================================
- #--GenericHandler--
- name = "sha256_crypt"
+ # name - set by subclass
setting_kwds = ("salt", "rounds", "implicit_rounds", "salt_size")
- ident = u("$5$")
+ # ident - set by subclass
checksum_chars = uh.HASH64_CHARS
+ # checksum_size - set by subclass
- #--HasSalt--
min_salt_size = 0
max_salt_size = 16
- #TODO: allow salt charset 0-255 except for "\x00\n:$"
salt_chars = uh.HASH64_CHARS
- #--HasRounds--
- default_rounds = 40000 #current passlib default
- min_rounds = 1000 #other bounds set by spec
- max_rounds = 999999999
+ default_rounds = 40000 # current passlib default
+ min_rounds = 1000 # bounds set by spec
+ max_rounds = 999999999 # bounds set by spec
rounds_cost = "linear"
- #=========================================================
- #init
- #=========================================================
- def __init__(self, implicit_rounds=None, **kwds):
- if implicit_rounds is None:
- implicit_rounds = True
- self.implicit_rounds = implicit_rounds
- super(sha256_crypt, self).__init__(**kwds)
+ _cdb_use_512 = False # flag for _calc_digest_builtin()
+ _rounds_prefix = None # ident + _UROUNDS
#=========================================================
- #parsing
+ # methods
#=========================================================
+ implicit_rounds = True
- #: regexp used to parse hashes
- _hash_regex = re.compile(u(r"""
- ^
- \$5
- (\$rounds=(?P<rounds>\d+))?
- \$
- (
- (?P<salt1>[^:$]*)
- |
- (?P<salt2>[^:$]{0,16})
- \$
- (?P<chk>[A-Za-z0-9./]{43})?
- )
- $
- """), re.X)
+ def __init__(self, implicit_rounds=None, **kwds):
+ if implicit_rounds is not None:
+ self.implicit_rounds = implicit_rounds
+ super(_SHA2_Common, self).__init__(**kwds)
@classmethod
def from_string(cls, hash):
+ # basic format this parses -
+ # $5$[rounds=<rounds>$]<salt>[$<checksum>]
+
+ # TODO: this *could* use uh.parse_mc3(), except that the rounds
+ # portion has a slightly different grammar.
+
+ # convert to unicode, check for ident prefix, split on dollar signs.
if not hash:
raise uh.exc.MissingHashError(cls)
if isinstance(hash, bytes):
- hash = hash.decode("ascii")
- m = cls._hash_regex.match(hash)
- if not m:
+ hash = hash.decode('ascii')
+ ident = cls.ident
+ if not hash.startswith(ident):
raise uh.exc.InvalidHashError(cls)
- rounds, salt1, salt2, chk = m.group("rounds", "salt1", "salt2", "chk")
- if rounds and rounds.startswith(u("0")):
- raise uh.exc.ZeroPaddedRoundsError(cls)
+ assert len(ident) == 3
+ parts = hash[3:].split(_UDOLLAR)
+
+ # extract rounds value
+ if parts[0].startswith(_UROUNDS):
+ assert len(_UROUNDS) == 7
+ rounds = parts.pop(0)[7:]
+ if rounds.startswith(_UZERO) and rounds != _UZERO:
+ raise uh.exc.ZeroPaddedRoundsError(cls)
+ rounds = int(rounds)
+ implicit_rounds = False
+ else:
+ rounds = 5000
+ implicit_rounds = True
+
+ # rest should be salt and checksum
+ if len(parts) == 2:
+ salt, chk = parts
+ elif len(parts) == 1:
+ salt = parts[0]
+ chk = None
+ else:
+ raise uh.exc.MalformedHashError(cls)
+
+ # return new object
return cls(
- implicit_rounds=not rounds,
- rounds=int(rounds) if rounds else 5000,
- salt=salt1 or salt2,
- checksum=chk,
- relaxed=not chk, # NOTE: relaxing parsing for config strings,
- # since SHA2-Crypt specification treats them this
- # way (at least for the rounds value)
- )
+ rounds=rounds,
+ salt=salt,
+ checksum=chk or None,
+ implicit_rounds=implicit_rounds,
+ relaxed=not chk, # NOTE: relaxing parsing for config strings
+ # so that out-of-range rounds are clipped,
+ # since SHA2-Crypt spec treats them this way.
+ )
def to_string(self):
- chk = self.checksum or u('')
- rounds = self.rounds
- if rounds == 5000 and self.implicit_rounds:
- hash = u("$5$%s$%s") % (self.salt, chk)
+ if self.rounds == 5000 and self.implicit_rounds:
+ hash = u("%s%s$%s") % (self.ident, self.salt,
+ self.checksum or u(''))
else:
- hash = u("$5$rounds=%d$%s$%s") % (rounds, self.salt, chk)
+ hash = u("%srounds=%d$%s$%s") % (self.ident, self.rounds,
+ self.salt, self.checksum or u(''))
return uascii_to_str(hash)
#=========================================================
- #backend
+ # backends
#=========================================================
backends = ("os_crypt", "builtin")
_has_backend_builtin = True
- @classproperty
- def _has_backend_os_crypt(cls):
- return test_crypt("test", "$5$rounds=1000$test$QmQADEXMG8POI5W"
- "Dsaeho0P36yK3Tcrgboabng6bkb/")
+ # _has_backend_os_crypt - provided by subclass
def _calc_checksum_builtin(self, secret):
- if isinstance(secret, unicode):
- secret = secret.encode("utf-8")
- checksum, salt, rounds = _raw_sha256_crypt(secret,
- self.salt.encode("ascii"),
- self.rounds)
- assert salt == self.salt.encode("ascii"), \
- "class doesn't agree w/ builtin backend: salt %r != %r" % (salt, self.salt.encode("ascii"))
- assert rounds == self.rounds, \
- "class doesn't agree w/ builtin backend: rounds %r != %r" % (rounds, self.rounds)
- return checksum.decode("ascii")
+ return _raw_sha2_crypt(secret, self.salt, self.rounds,
+ self._cdb_use_512)
def _calc_checksum_os_crypt(self, secret):
hash = safe_crypt(secret, self.to_string())
if hash:
- #NOTE: avoiding full parsing routine via from_string().checksum,
+ # NOTE: avoiding full parsing routine via from_string().checksum,
# and just extracting the bit we need.
- assert hash.startswith(u("$5$"))
- chk = hash[-43:]
- assert u('$') not in chk
- return chk
+ cs = self.checksum_size
+ return hash[-cs:]
else:
return self._calc_checksum_builtin(secret)
#=========================================================
- #eoc
+ # eoc
#=========================================================
-#=========================================================
-#sha 512 crypt
-#=========================================================
-class sha512_crypt(uh.HasManyBackends, uh.HasRounds, uh.HasSalt, uh.GenericHandler):
- """This class implements the SHA512-Crypt password hash, and follows the :ref:`password-hash-api`.
+class sha256_crypt(_SHA2_Common):
+ """This class implements the SHA256-Crypt password hash, and follows the :ref:`password-hash-api`.
It supports a variable-length salt, and a variable number of rounds.
@@ -396,98 +383,76 @@ class sha512_crypt(uh.HasManyBackends, uh.HasRounds, uh.HasSalt, uh.GenericHandl
It will use the first available of two possible backends:
- * stdlib :func:`crypt()`, if the host OS supports SHA512-Crypt.
- * a pure python implementation of SHA512-Crypt built into passlib.
+ * stdlib :func:`crypt()`, if the host OS supports SHA256-Crypt.
+ * a pure python implementation of SHA256-Crypt built into passlib.
You can see which backend is in use by calling the :meth:`get_backend()` method.
"""
-
#=========================================================
- #algorithm information
+ # class attrs
#=========================================================
- name = "sha512_crypt"
- ident = u("$6$")
- checksum_chars = uh.HASH64_CHARS
-
- setting_kwds = ("salt", "rounds", "implicit_rounds", "salt_size")
-
- min_salt_size = 0
- max_salt_size = 16
- #TODO: allow salt charset 0-255 except for "\x00\n:$"
- salt_chars = uh.HASH64_CHARS
-
- default_rounds = 40000 #current passlib default
- min_rounds = 1000
- max_rounds = 999999999
- rounds_cost = "linear"
+ name = "sha256_crypt"
+ ident = u("$5$")
+ checksum_size = 43
#=========================================================
- #init
+ # backends
#=========================================================
- def __init__(self, implicit_rounds=None, **kwds):
- if implicit_rounds is None:
- implicit_rounds = True
- self.implicit_rounds = implicit_rounds
- super(sha512_crypt, self).__init__(**kwds)
+ @classproperty
+ def _has_backend_os_crypt(cls):
+ return test_crypt("test", "$5$rounds=1000$test$QmQADEXMG8POI5W"
+ "Dsaeho0P36yK3Tcrgboabng6bkb/")
#=========================================================
- #parsing
+ #eoc
#=========================================================
- #: regexp used to parse hashes
- _hash_regex = re.compile(u(r"""
- ^
- \$6
- (\$rounds=(?P<rounds>\d+))?
- \$
- (
- (?P<salt1>[^:$\n]*)
- |
- (?P<salt2>[^:$\n]{0,16})
- (
- \$
- (?P<chk>[A-Za-z0-9./]{86})?
- )?
- )
- $
- """), re.X)
+#=========================================================
+#sha 512 crypt
+#=========================================================
+class sha512_crypt(_SHA2_Common):
+ """This class implements the SHA512-Crypt password hash, and follows the :ref:`password-hash-api`.
- @classmethod
- def from_string(cls, hash):
- if not hash:
- raise uh.exc.MissingHashError()
- if isinstance(hash, bytes):
- hash = hash.decode("ascii")
- m = cls._hash_regex.match(hash)
- if not m:
- raise uh.exc.InvalidHashError(cls)
- rounds, salt1, salt2, chk = m.group("rounds", "salt1", "salt2", "chk")
- if rounds and rounds.startswith("0"):
- raise uh.exc.ZeroPaddedRoundsError(cls)
- return cls(
- implicit_rounds = not rounds,
- rounds=int(rounds) if rounds else 5000,
- salt=salt1 or salt2,
- checksum=chk,
- relaxed=not chk, # NOTE: relaxing parsing for config strings,
- # since SHA2-Crypt specification treats them this
- # way (at least for the rounds value)
- )
+ It supports a variable-length salt, and a variable number of rounds.
- def to_string(self):
- if self.rounds == 5000 and self.implicit_rounds:
- hash = u("$6$%s$%s") % (self.salt, self.checksum or u(''))
- else:
- hash = u("$6$rounds=%d$%s$%s") % (self.rounds, self.salt, self.checksum or u(''))
- return uascii_to_str(hash)
+ The :meth:`encrypt()` and :meth:`genconfig` methods accept the following optional keywords:
+
+ :param salt:
+ Optional salt string.
+ If not specified, one will be autogenerated (this is recommended).
+ If specified, it must be 0-16 characters, drawn from the regexp range ``[./0-9A-Za-z]``.
+
+ :param rounds:
+ Optional number of rounds to use.
+ Defaults to 40000, must be between 1000 and 999999999, inclusive.
+
+ :param implicit_rounds:
+ this is an internal option which generally doesn't need to be touched.
+
+ this flag determines whether the hash should omit the rounds parameter
+ when encoding it to a string; this is only permitted by the spec for rounds=5000,
+ and the flag is ignored otherwise. the spec requires the two different
+ encodings be preserved as they are, instead of normalizing them.
+
+ It will use the first available of two possible backends:
+
+ * stdlib :func:`crypt()`, if the host OS supports SHA512-Crypt.
+ * a pure python implementation of SHA512-Crypt built into passlib.
+
+ You can see which backend is in use by calling the :meth:`get_backend()` method.
+ """
#=========================================================
- #backend
+ # class attrs
#=========================================================
- backends = ("os_crypt", "builtin")
-
- _has_backend_builtin = True
+ name = "sha512_crypt"
+ ident = u("$6$")
+ checksum_size = 86
+ _cdb_use_512 = True
+ #=========================================================
+ # backend
+ #=========================================================
@classproperty
def _has_backend_os_crypt(cls):
return test_crypt("test", "$6$rounds=1000$test$2M/Lx6Mtobqj"
@@ -495,37 +460,10 @@ class sha512_crypt(uh.HasManyBackends, uh.HasRounds, uh.HasSalt, uh.GenericHandl
"yWeBdRDx4DU.1H3eGmse6pgsOgDisWBG"
"I5c7TZauS0")
- # NOTE: testing w/ HashTimer shows 64-bit linux's crypt to be ~2.6x faster
- # than builtin (627253 vs 238152 rounds/sec)
-
- def _calc_checksum_builtin(self, secret):
- if isinstance(secret, unicode):
- secret = secret.encode("utf-8")
- checksum, salt, rounds = _raw_sha512_crypt(secret,
- self.salt.encode("ascii"),
- self.rounds)
- assert salt == self.salt.encode("ascii"), \
- "class doesn't agree w/ builtin backend: salt %r != %r" % (salt, self.salt.encode("ascii"))
- assert rounds == self.rounds, \
- "class doesn't agree w/ builtin backend: rounds %r != %r" % (rounds, self.rounds)
- return checksum.decode("ascii")
-
- def _calc_checksum_os_crypt(self, secret):
- hash = safe_crypt(secret, self.to_string())
- if hash:
- #NOTE: avoiding full parsing routine via from_string().checksum,
- # and just extracting the bit we need.
- assert hash.startswith(u("$6$"))
- chk = hash[-86:]
- assert u('$') not in chk
- return chk
- else:
- return self._calc_checksum_builtin(secret)
-
#=========================================================
- #eoc
+ # eoc
#=========================================================
#=========================================================
-#eof
+# eof
#=========================================================
diff --git a/passlib/utils/__init__.py b/passlib/utils/__init__.py
index e8e5b27..785afe0 100644
--- a/passlib/utils/__init__.py
+++ b/passlib/utils/__init__.py
@@ -481,6 +481,15 @@ def int_to_bytes(value, count):
for s in irange(8*count-8,-8,-8)
)
+def repeat_bytes(source, size):
+ "repeat or truncate <source> bytes, so it has length <size>"
+ cur = len(source)
+ if size <= cur:
+ return source[:size]
+ else:
+ mult = (size+cur-1)//cur
+ return (source*mult)[:size]
+
#=============================================================================
# encoding helpers
#=============================================================================
diff --git a/passlib/utils/handlers.py b/passlib/utils/handlers.py
index abd975d..ea8674c 100644
--- a/passlib/utils/handlers.py
+++ b/passlib/utils/handlers.py
@@ -113,7 +113,7 @@ def parse_mc3(hash, prefix, sep=_UDOLLAR, rounds_base=10,
default_rounds=None, handler=None):
"""parse hash using 3-part modular crypt format.
- this expects a hash of the format :samp:`{prefix}[{rounds}$]{salt}[${checksum}]`,
+ this expects a hash of the format :samp:`{prefix}[{rounds}]${salt}[${checksum}]`,
such as sha1_crypt, and parses it into rounds / salt / checksum portions.
tries to convert the rounds to an integer,
and throws error if it has zero-padding.