summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEli Collins <elic@assurancetechnologies.com>2016-11-22 17:24:24 -0500
committerEli Collins <elic@assurancetechnologies.com>2016-11-22 17:24:24 -0500
commit0b4dd0be304bf2facd0f3c3f88b61353b8f0a473 (patch)
tree4711f3838f2c31395f15a613b5f37a3b4e7fcd92
parent4f03b94b1c26468400c4839c0932f411e6667fe7 (diff)
downloadpasslib-0b4dd0be304bf2facd0f3c3f88b61353b8f0a473.tar.gz
passlib.utils: relocated a bunch of binary encoding helpers to .utils.binary submodule
-rw-r--r--docs/history/1.5.rst2
-rw-r--r--docs/history/1.6.rst2
-rw-r--r--docs/lib/passlib.hash.bigcrypt.rst8
-rw-r--r--docs/lib/passlib.hash.bsdi_crypt.rst8
-rw-r--r--docs/lib/passlib.hash.cisco_pix.rst4
-rw-r--r--docs/lib/passlib.hash.crypt16.rst8
-rw-r--r--docs/lib/passlib.hash.des_crypt.rst8
-rw-r--r--docs/lib/passlib.hash.dlitz_pbkdf2_sha1.rst4
-rw-r--r--docs/lib/passlib.hash.md5_crypt.rst4
-rw-r--r--docs/lib/passlib.hash.pbkdf2_digest.rst6
-rw-r--r--docs/lib/passlib.hash.scram.rst4
-rw-r--r--docs/lib/passlib.hash.sha1_crypt.rst2
-rw-r--r--docs/lib/passlib.hash.sha256_crypt.rst2
-rw-r--r--docs/lib/passlib.ifc.rst2
-rw-r--r--docs/lib/passlib.utils.binary.rst87
-rw-r--r--docs/lib/passlib.utils.rst71
-rw-r--r--passlib/context.py3
-rw-r--r--passlib/crypto/_blowfish/__init__.py3
-rw-r--r--passlib/handlers/argon2.py3
-rw-r--r--passlib/handlers/bcrypt.py3
-rw-r--r--passlib/handlers/cisco.py3
-rw-r--r--passlib/handlers/des_crypt.py3
-rw-r--r--passlib/handlers/django.py3
-rw-r--r--passlib/handlers/md5_crypt.py3
-rw-r--r--passlib/handlers/pbkdf2.py3
-rw-r--r--passlib/handlers/phpass.py2
-rw-r--r--passlib/handlers/scram.py4
-rw-r--r--passlib/handlers/scrypt.py3
-rw-r--r--passlib/handlers/sha1_crypt.py3
-rw-r--r--passlib/handlers/sha2_crypt.py3
-rw-r--r--passlib/handlers/sun_md5_crypt.py3
-rw-r--r--passlib/tests/test_utils.py12
-rw-r--r--passlib/tests/utils.py2
-rw-r--r--passlib/totp.py25
-rw-r--r--passlib/utils/__init__.py713
-rw-r--r--passlib/utils/binary.py884
-rw-r--r--passlib/utils/handlers.py23
37 files changed, 1059 insertions, 867 deletions
diff --git a/docs/history/1.5.rst b/docs/history/1.5.rst
index 006bf99..575e81b 100644
--- a/docs/history/1.5.rst
+++ b/docs/history/1.5.rst
@@ -57,7 +57,7 @@
.. currentmodule:: passlib.hash
* *bugfix:* :class:`django_des_crypt` now accepts all
- :data:`hash64 <passlib.utils.h64>` characters in its salts;
+ :data:`hash64 <passlib.utils.binary.h64>` characters in its salts;
previously it accepted only lower-case hexadecimal characters (:issue:`22`).
* Additional unittests added for all
diff --git a/docs/history/1.6.rst b/docs/history/1.6.rst
index 6f3fac5..6b88c4b 100644
--- a/docs/history/1.6.rst
+++ b/docs/history/1.6.rst
@@ -392,7 +392,7 @@ Internal Changes
.. currentmodule:: passlib.utils
* :mod:`!passlib.utils.h64` has been replaced by an instance of the
- new :class:`~passlib.utils.Base64Engine` class. This instance is
+ new :class:`~passlib.utils.binary.Base64Engine` class. This instance is
imported under the same name, and has (mostly) the same interface;
but should be faster, more flexible, and better unit-tested.
diff --git a/docs/lib/passlib.hash.bigcrypt.rst b/docs/lib/passlib.hash.bigcrypt.rst
index fa9542b..cd603c7 100644
--- a/docs/lib/passlib.hash.bigcrypt.rst
+++ b/docs/lib/passlib.hash.bigcrypt.rst
@@ -25,11 +25,11 @@ Format
An example hash (of the string ``passphrase``) is ``S/8NbAAlzbYO66hAa9XZyWy2``.
A bigcrypt hash string has the format :samp:`{salt}{checksum_1}{checksum_2...}{checksum_n}` for some integer :samp:`{n}>0`, where:
-* :samp:`{salt}` is the salt, stored as a 2 character :data:`hash64 <passlib.utils.h64>`-encoded
+* :samp:`{salt}` is the salt, stored as a 2 character :data:`hash64 <passlib.utils.binary.h64>`-encoded
12-bit integer (``S/`` in the example).
* each :samp:`{checksum_i}` is a separate checksum, stored as an 11 character
- :data:`hash64-big <passlib.utils.h64big>`-encoded 64-bit integer (``8NbAAlzbYO6`` and ``6hAa9XZyWy2``
+ :data:`hash64-big <passlib.utils.binary.h64big>`-encoded 64-bit integer (``8NbAAlzbYO6`` and ``6hAa9XZyWy2``
in the example).
* the integer :samp:`n` (the number of checksums) is determined by the formula
@@ -65,7 +65,7 @@ The bigcrypt algorithm is designed to re-use the original des-crypt algorithm:
4. The 2 character salt string is decoded to a 12-bit integer salt value;
The salt string uses little-endian
- :data:`hash64 <passlib.utils.h64>` encoding.
+ :data:`hash64 <passlib.utils.binary.h64>` encoding.
5. 25 repeated rounds of modified DES encryption are performed;
starting with a null input block,
@@ -79,7 +79,7 @@ The bigcrypt algorithm is designed to re-use the original des-crypt algorithm:
lsb-padded with 2 zero bits.
7. The resulting 66-bit integer is encoded in big-endian order
- using the :data:`hash64-big <passlib.utils.h64big>` format.
+ using the :data:`hash64-big <passlib.utils.binary.h64big>` format.
This forms the first checksum segment.
8. For each additional block of 8 bytes in the padded password (from step 2),
diff --git a/docs/lib/passlib.hash.bsdi_crypt.rst b/docs/lib/passlib.hash.bsdi_crypt.rst
index 137a7c6..cb82797 100644
--- a/docs/lib/passlib.hash.bsdi_crypt.rst
+++ b/docs/lib/passlib.hash.bsdi_crypt.rst
@@ -52,7 +52,7 @@ A bsdi_crypt hash string consists of a 21 character string of the form :samp:`_{
All characters except the underscore prefix are drawn from ``[./0-9A-Za-z]``.
* ``_`` - the underscore is used to distinguish this scheme from others, such as des-crypt.
-* :samp:`{rounds}` is the number of rounds, stored as a 4 character :data:`hash64 <passlib.utils.h64>`-encoded 24-bit integer (``EQ0.`` in the example).
+* :samp:`{rounds}` is the number of rounds, stored as a 4 character :data:`hash64 <passlib.utils.binary.h64>`-encoded 24-bit integer (``EQ0.`` in the example).
* :samp:`{salt}` is the salt, stored as as a 4 character hash64-encoded 24-bit integer (``jzhS`` in the example).
* :samp:`{checksum}` is the checksum, stored as an 11 character hash64-encoded 64-bit integer (``VeUyoSqLupI`` in the example).
@@ -68,11 +68,11 @@ The checksum is formed by a modified version of the DES cipher in encrypt mode:
1. Given a password string, a salt string, and rounds string.
2. The 4 character rounds string is decoded to a 24-bit integer rounds value;
- The rounds string uses little-endian :data:`hash64 <passlib.utils.h64>`
+ The rounds string uses little-endian :data:`hash64 <passlib.utils.binary.h64>`
encoding.
3. The 4 character salt string is decoded to a 24-bit integer salt value;
- The salt string uses little-endian :data:`hash64 <passlib.utils.h64>`
+ The salt string uses little-endian :data:`hash64 <passlib.utils.binary.h64>`
encoding.
4. The password is NULL-padded on the end to the smallest non-zero multiple of 8 bytes.
@@ -103,7 +103,7 @@ The checksum is formed by a modified version of the DES cipher in encrypt mode:
lsb-padded with 2 zero bits.
9. The resulting 66-bit integer is encoded in big-endian order
- using the :data:`hash64-big <passlib.utils.h64big>` format.
+ using the :data:`hash64-big <passlib.utils.binary.h64big>` format.
.. _bsdi-crypt-security-issues:
diff --git a/docs/lib/passlib.hash.cisco_pix.rst b/docs/lib/passlib.hash.cisco_pix.rst
index 9ad9fdd..d879748 100644
--- a/docs/lib/passlib.hash.cisco_pix.rst
+++ b/docs/lib/passlib.hash.cisco_pix.rst
@@ -81,7 +81,7 @@ Interface
Format & Algorithm
==================
Cisco PIX / ASA hashes consist of a 12 byte digest, encoded as a 16 character
-:data:`HASH64 <passlib.utils.h64>`-encoded string. An example
+:data:`HASH64 <passlib.utils.binary.h64>`-encoded string. An example
hash (of ``"password"``) is ``"NuLKvvWGg.x9HEKO"``.
The PIX / ASA digests are calculated as follows:
@@ -110,7 +110,7 @@ The PIX / ASA digests are calculated as follows:
5. Discard every 4th byte of the 16-byte MD5 hash, starting
with the 4th byte.
-6. Encode the 12-byte result using :data:`HASH64 <passlib.utils.h64>`.
+6. Encode the 12-byte result using :data:`HASH64 <passlib.utils.binary.h64>`.
Security Issues
===============
diff --git a/docs/lib/passlib.hash.crypt16.rst b/docs/lib/passlib.hash.crypt16.rst
index 187c320..fc904d3 100644
--- a/docs/lib/passlib.hash.crypt16.rst
+++ b/docs/lib/passlib.hash.crypt16.rst
@@ -24,11 +24,11 @@ An example hash (of the string ``passphrase``) is ``aaX/UmCcBrceQ0kQGGWKTbuE``.
A crypt16 hash string has the format :samp:`{salt}{checksum_1}{checksum_2}`, where:
* :samp:`{salt}` is the salt, stored as a 2 character
- :data:`hash64 <passlib.utils.h64>`-encoded 12-bit integer (``aa`` in the
+ :data:`hash64 <passlib.utils.binary.h64>`-encoded 12-bit integer (``aa`` in the
example).
* each :samp:`{checksum_i}` is a separate checksum, stored as an 11 character
- :data:`hash64-big <passlib.utils.h64big>`-encoded 64-bit integer
+ :data:`hash64-big <passlib.utils.binary.h64big>`-encoded 64-bit integer
(``X/UmCcBrceQ`` and ``0kQGGWKTbuE`` in the example).
.. note::
@@ -47,7 +47,7 @@ The crypt16 algorithm uses a weakened version of the des-crypt algorithm:
1. Given a password string and a salt string.
2. The 2 character salt string is decoded to a 12-bit integer salt value;
- The salt string uses little-endian :data:`hash64 <passlib.utils.h64>`
+ The salt string uses little-endian :data:`hash64 <passlib.utils.binary.h64>`
encoding.
3. If the password is larger than 16 bytes, the end is truncated to 16 bytes.
@@ -71,7 +71,7 @@ The crypt16 algorithm uses a weakened version of the des-crypt algorithm:
lsb-padded with 2 zero bits.
7. The resulting 66-bit integer is encoded in big-endian order
- using the :data:`hash64-big <passlib.utils.h64big>` format.
+ using the :data:`hash64-big <passlib.utils.binary.h64big>` format.
This is the first checksum segment.
8. The second checksum segment is created by repeating
diff --git a/docs/lib/passlib.hash.des_crypt.rst b/docs/lib/passlib.hash.des_crypt.rst
index dfd61f5..7aa6528 100644
--- a/docs/lib/passlib.hash.des_crypt.rst
+++ b/docs/lib/passlib.hash.des_crypt.rst
@@ -41,7 +41,7 @@ Interface
Format
======
A des-crypt hash string consists of 13 characters, drawn from ``[./0-9A-Za-z]``.
-The first 2 characters form a :data:`hash64 <passlib.utils.h64>`-encoded
+The first 2 characters form a :data:`hash64 <passlib.utils.binary.h64>`-encoded
12 bit integer used as the salt, with the remaining characters
forming a hash64-encoded 64-bit integer checksum.
@@ -59,7 +59,7 @@ The checksum is formed by a modified version of the DES cipher in encrypt mode:
1. Given a password string and a salt string.
2. The 2 character salt string is decoded to a 12-bit integer salt value;
- The salt string uses little-endian :data:`hash64 <passlib.utils.h64>`
+ The salt string uses little-endian :data:`hash64 <passlib.utils.binary.h64>`
encoding.
3. If the password is less than 8 bytes, it's NULL padded at the end to 8 bytes.
@@ -86,7 +86,7 @@ The checksum is formed by a modified version of the DES cipher in encrypt mode:
lsb-padded with 2 zero bits.
7. The resulting 66-bit integer is encoded in big-endian order using the
- :data:`hash64-big <passlib.utils.h64big>` format.
+ :data:`hash64-big <passlib.utils.binary.h64big>` format.
Security Issues
===============
@@ -117,7 +117,7 @@ This implementation of des-crypt differs from others in a few ways:
* Restricted salt string character set:
The underlying algorithm expects salt strings to use the
- :data:`hash64 <passlib.utils.HASH64_CHARS>` character set to encode
+ :data:`hash64 <passlib.utils.binary.HASH64_CHARS>` character set to encode
a 12-bit integer. Many implementations of des-crypt will
accept a salt containing other characters, but
vary wildly in how they are handled, including errors and implementation-specific value mappings.
diff --git a/docs/lib/passlib.hash.dlitz_pbkdf2_sha1.rst b/docs/lib/passlib.hash.dlitz_pbkdf2_sha1.rst
index 270cd75..e32b525 100644
--- a/docs/lib/passlib.hash.dlitz_pbkdf2_sha1.rst
+++ b/docs/lib/passlib.hash.dlitz_pbkdf2_sha1.rst
@@ -44,11 +44,11 @@ where:
stored as lowercase hexadecimal number with no zero-padding (in the example: ``2710`` or 10000 iterations).
* :samp:`{salt}` is the salt string, which can be any number of characters,
- drawn from the :data:`hash64 charset <passlib.utils.HASH64_CHARS>`
+ drawn from the :data:`hash64 charset <passlib.utils.binary.HASH64_CHARS>`
(``.pPqsEwHD7MiECU0`` in the example).
* :samp:`{checksum}` is 32 characters, which encode
- the resulting 24-byte PBKDF2 derived key using :func:`~passlib.utils.ab64_encode`
+ the resulting 24-byte PBKDF2 derived key using :func:`~passlib.utils.binary.ab64_encode`
(``b8TQ5AMQemtlaSgegw5Je.JBE3QQhLbO`` in the example).
In order to generate the checksum, the password is first encoded into UTF-8 if it's unicode.
diff --git a/docs/lib/passlib.hash.md5_crypt.rst b/docs/lib/passlib.hash.md5_crypt.rst
index 3251f12..844b6b6 100644
--- a/docs/lib/passlib.hash.md5_crypt.rst
+++ b/docs/lib/passlib.hash.md5_crypt.rst
@@ -146,7 +146,7 @@ The MD5-Crypt algorithm [#f1]_ calculates a checksum as follows:
following order: ``12,6,0,13,7,1,14,8,2,15,9,3,5,10,4,11``.
18. Encode the resulting 16 byte string into a 22 character
- :data:`hash64 <passlib.utils.h64>`-encoded string
+ :data:`hash64 <passlib.utils.binary.h64>`-encoded string
(the 2 msb bits encoded by the last hash64 character are used as 0 padding).
This results in the portion of the md5 crypt hash string referred to as :samp:`{checksum}` in the format section.
@@ -172,7 +172,7 @@ Passlib's implementation of md5-crypt differs from the reference implementation
The underlying algorithm can unambiguously handle salt strings
which contain any possible byte value besides ``\x00`` and ``$``.
However, Passlib strictly limits salts to the
- :data:`hash64 <passlib.utils.HASH64_CHARS>` character set,
+ :data:`hash64 <passlib.utils.binary.HASH64_CHARS>` character set,
as nearly all implementations of md5-crypt generate
and expect salts containing those characters,
but may have unexpected behaviors for other character values.
diff --git a/docs/lib/passlib.hash.pbkdf2_digest.rst b/docs/lib/passlib.hash.pbkdf2_digest.rst
index ff82c4f..10e68ba 100644
--- a/docs/lib/passlib.hash.pbkdf2_digest.rst
+++ b/docs/lib/passlib.hash.pbkdf2_digest.rst
@@ -84,10 +84,10 @@ follow the same format, :samp:`$pbkdf2-{digest}${rounds}${salt}${checksum}`.
this is encoded as a positive decimal number with no zero-padding
(``6400`` in the example).
-* :samp:`{salt}` - this is the :func:`adapted base64 encoding <passlib.utils.ab64_encode>`
+* :samp:`{salt}` - this is the :func:`adapted base64 encoding <passlib.utils.binary.ab64_encode>`
of the raw salt bytes passed into the PBKDF2 function.
-* :samp:`{checksum}` - this is the :func:`adapted base64 encoding <passlib.utils.ab64_encode>`
+* :samp:`{checksum}` - this is the :func:`adapted base64 encoding <passlib.utils.binary.ab64_encode>`
of the raw derived key bytes returned from the PBKDF2 function.
Each scheme uses the digest size of its specific hash algorithm (:samp:`{digest}`)
as the size of the raw derived key. This is enlarged
@@ -99,7 +99,7 @@ The password is encoded into UTF-8 if not already encoded,
and run through :func:`~passlib.crypto.digest.pbkdf2_hmac`
along with the decoded salt, the number of rounds,
and a prf built from HMAC + the respective message digest.
-The result is then encoded using :func:`~passlib.utils.ab64_encode`.
+The result is then encoded using :func:`~passlib.utils.binary.ab64_encode`.
.. rubric:: Footnotes
diff --git a/docs/lib/passlib.hash.scram.rst b/docs/lib/passlib.hash.scram.rst
index 9a6a70e..a1f63b6 100644
--- a/docs/lib/passlib.hash.scram.rst
+++ b/docs/lib/passlib.hash.scram.rst
@@ -119,13 +119,13 @@ An scram hash string has the format :samp:`$scram${rounds}${salt}${alg1}={digest
zero-padding not allowed. this value must be in ``range(1, 2**32)``.
* :samp:`{salt}` is a base64 salt string (``.Z/znnNOKWUsBaCU`` in the example),
- encoded using :func:`~passlib.utils.ab64_encode`.
+ encoded using :func:`~passlib.utils.binary.ab64_encode`.
* :samp:`{alg}` is a lowercase IANA hash function name [#hnames]_, which should
match the digest in the SCRAM mechanism name.
* :samp:`{digest}` is a base64 digest for the specific algorithm,
- encoded using :func:`~passlib.utils.ab64_encode`.
+ encoded using :func:`~passlib.utils.binary.ab64_encode`.
Digests for ``sha-1``, ``sha-256``, and ``sha-512`` are present in the example.
* There will always be one or more :samp:`{alg}={digest}` pairs, separated by a
diff --git a/docs/lib/passlib.hash.sha1_crypt.rst b/docs/lib/passlib.hash.sha1_crypt.rst
index 88c205e..f8be4d8 100644
--- a/docs/lib/passlib.hash.sha1_crypt.rst
+++ b/docs/lib/passlib.hash.sha1_crypt.rst
@@ -88,7 +88,7 @@ in a few ways:
The underlying algorithm can unambiguously handle salt strings
which contain any possible byte value besides ``\x00`` and ``$``.
However, Passlib strictly limits salts to the
- :data:`hash64 <passlib.utils.HASH64_CHARS>` character set,
+ :data:`hash64 <passlib.utils.binary.HASH64_CHARS>` character set,
as nearly all implementations of sha1-crypt generate
and expect salts containing those characters.
diff --git a/docs/lib/passlib.hash.sha256_crypt.rst b/docs/lib/passlib.hash.sha256_crypt.rst
index 4c098ba..b1e23e8 100644
--- a/docs/lib/passlib.hash.sha256_crypt.rst
+++ b/docs/lib/passlib.hash.sha256_crypt.rst
@@ -115,7 +115,7 @@ and other implementations, in a few ways:
The underlying algorithm can unambiguously handle salt strings
which contain any possible byte value besides ``\x00`` and ``$``.
However, Passlib strictly limits salts to the
- :data:`hash64 <passlib.utils.HASH64_CHARS>` character set,
+ :data:`hash64 <passlib.utils.binary.HASH64_CHARS>` character set,
as nearly all implementations of sha256-crypt generate
and expect salts containing those characters,
but may have unexpected behaviors for other character values.
diff --git a/docs/lib/passlib.ifc.rst b/docs/lib/passlib.ifc.rst
index 7612a95..8118a4c 100644
--- a/docs/lib/passlib.ifc.rst
+++ b/docs/lib/passlib.ifc.rst
@@ -604,7 +604,7 @@ and the following attributes should be defined:
in a salt string.
For most :ref:`modular-crypt-format` hashes,
- this is equal to :data:`passlib.utils.HASH64_CHARS`.
+ this is equal to :data:`passlib.utils.binary.HASH64_CHARS`.
For the rare hashes where the ``salt`` parameter must be specified
in bytes, this will be a placeholder :class:`!bytes` object containing
all 256 possible byte values.
diff --git a/docs/lib/passlib.utils.binary.rst b/docs/lib/passlib.utils.binary.rst
new file mode 100644
index 0000000..0c5847d
--- /dev/null
+++ b/docs/lib/passlib.utils.binary.rst
@@ -0,0 +1,87 @@
+=====================================================
+:mod:`passlib.utils.binary` - Binary Helper Functions
+=====================================================
+
+.. module:: passlib.utils.binary
+ :synopsis: internal helpers for binary data
+
+.. warning::
+
+ This module is primarily used as an internal support module.
+ Its interface has not been finalized yet, and may be changed somewhat
+ between major releases of Passlib, as the internal code is cleaned up
+ and simplified.
+
+Constants
+=========
+
+Base64 Encoding
+===============
+
+Base64Engine Class
+------------------
+Passlib has to deal with a number of different Base64 encodings,
+with varying endianness, as well as wildly different character <-> value
+mappings. This is all encapsulated in the :class:`Base64Engine` class,
+which provides common encoding actions for an arbitrary base64-style encoding
+scheme. There are also a couple of predefined instances which are commonly
+used by the hashes in Passlib.
+
+.. autoclass:: Base64Engine
+
+Common Character Maps
+---------------------
+.. data:: BASE64_CHARS
+
+ Character map used by standard MIME-compatible Base64 encoding scheme.
+
+.. data:: HASH64_CHARS
+
+ Base64 character map used by a number of hash formats;
+ the ordering is wildly different from the standard base64 character map.
+
+ This encoding system appears to have originated with
+ :class:`~passlib.hash.des_crypt`, but is used by
+ :class:`~passlib.hash.md5_crypt`, :class:`~passlib.hash.sha256_crypt`,
+ and others. Within Passlib, this encoding is referred as the "hash64" encoding,
+ to distinguish it from normal base64 and others.
+
+.. data:: BCRYPT_CHARS
+
+ Base64 character map used by :class:`~passlib.hash.bcrypt`.
+ The ordering is wildly different from both the standard base64 character map,
+ and the common hash64 character map.
+
+Predefined Instances
+--------------------
+.. data:: h64
+
+ Predefined instance of :class:`Base64Engine` which uses
+ the :data:`!HASH64_CHARS` character map and little-endian encoding.
+ (see :data:`HASH64_CHARS` for more details).
+
+.. data:: h64big
+
+ Predefined variant of :data:`h64` which uses big-endian encoding.
+ This is mainly used by :class:`~passlib.hash.des_crypt`.
+
+.. versionchanged:: 1.6
+ Previous versions of Passlib contained
+ a module named :mod:`!passlib.utils.h64`; As of Passlib 1.6 this
+ was replaced by the the ``h64`` and ``h64big`` instances of
+ the :class:`Base64Engine` class;
+ the interface remains mostly unchanged.
+
+
+Other
+-----
+.. autofunction:: ab64_encode
+.. autofunction:: ab64_decode
+.. autofunction:: b32_encode
+.. autofunction:: b32_decode
+
+..
+ .. data:: AB64_CHARS
+
+ Variant of standard Base64 character map used by some
+ custom Passlib hashes (see :func:`ab64_encode`).
diff --git a/docs/lib/passlib.utils.rst b/docs/lib/passlib.utils.rst
index fc08f13..6f05703 100644
--- a/docs/lib/passlib.utils.rst
+++ b/docs/lib/passlib.utils.rst
@@ -108,75 +108,6 @@ Encoding Helpers
.. autofunction:: to_unicode
.. autofunction:: to_native_str
-Base64 Encoding
-===============
-
-Base64Engine Class
-------------------
-Passlib has to deal with a number of different Base64 encodings,
-with varying endianness, as well as wildly different character <-> value
-mappings. This is all encapsulated in the :class:`Base64Engine` class,
-which provides common encoding actions for an arbitrary base64-style encoding
-scheme. There are also a couple of predefined instances which are commonly
-used by the hashes in Passlib.
-
-.. autoclass:: Base64Engine
-
-Common Character Maps
----------------------
-.. data:: BASE64_CHARS
-
- Character map used by standard MIME-compatible Base64 encoding scheme.
-
-.. data:: HASH64_CHARS
-
- Base64 character map used by a number of hash formats;
- the ordering is wildly different from the standard base64 character map.
-
- This encoding system appears to have originated with
- :class:`~passlib.hash.des_crypt`, but is used by
- :class:`~passlib.hash.md5_crypt`, :class:`~passlib.hash.sha256_crypt`,
- and others. Within Passlib, this encoding is referred as the "hash64" encoding,
- to distinguish it from normal base64 and others.
-
-.. data:: BCRYPT_CHARS
-
- Base64 character map used by :class:`~passlib.hash.bcrypt`.
- The ordering is wildly different from both the standard base64 character map,
- and the common hash64 character map.
-
-Predefined Instances
---------------------
-.. data:: h64
-
- Predefined instance of :class:`Base64Engine` which uses
- the :data:`!HASH64_CHARS` character map and little-endian encoding.
- (see :data:`HASH64_CHARS` for more details).
-
-.. data:: h64big
-
- Predefined variant of :data:`h64` which uses big-endian encoding.
- This is mainly used by :class:`~passlib.hash.des_crypt`.
-
-.. versionchanged:: 1.6
- Previous versions of Passlib contained
- a module named :mod:`!passlib.utils.h64`; As of Passlib 1.6 this
- was replaced by the the ``h64`` and ``h64big`` instances of
- the :class:`Base64Engine` class;
- the interface remains mostly unchanged.
-
-
-Other
------
-.. autofunction:: ab64_encode
-.. autofunction:: ab64_decode
-
-..
- .. data:: AB64_CHARS
-
- Variant of standard Base64 character map used by some
- custom Passlib hashes (see :func:`ab64_encode`).
-
..
Host OS
=======
@@ -215,8 +146,10 @@ There are also a few sub modules which provide additional utility functions:
:maxdepth: 1
passlib.utils.handlers
+ passlib.utils.binary
passlib.utils.des
passlib.utils.pbkdf2
..
+ passlib.utils.decor
passlib.utils.compat
diff --git a/passlib/context.py b/passlib/context.py
index 98cee6c..e9d1e23 100644
--- a/passlib/context.py
+++ b/passlib/context.py
@@ -15,8 +15,9 @@ from passlib.exc import ExpectedStringError, ExpectedTypeError, PasslibConfigWar
from passlib.registry import get_crypt_handler, _validate_handler_name
from passlib.utils import (handlers as uh, to_bytes,
to_unicode, splitcomma,
- as_bool, timer, rng, getrandstr, BASE64_CHARS,
+ as_bool, timer, rng, getrandstr,
)
+from passlib.utils.binary import BASE64_CHARS
from passlib.utils.compat import (iteritems, num_types, irange,
PY2, PY3, unicode, SafeConfigParser,
NativeStringIO, BytesIO,
diff --git a/passlib/crypto/_blowfish/__init__.py b/passlib/crypto/_blowfish/__init__.py
index 57cb9f2..1aa1c85 100644
--- a/passlib/crypto/_blowfish/__init__.py
+++ b/passlib/crypto/_blowfish/__init__.py
@@ -54,7 +54,8 @@ released under the BSD license::
from itertools import chain
import struct
# pkg
-from passlib.utils import bcrypt64, getrandbytes, rng
+from passlib.utils import getrandbytes, rng
+from passlib.utils.binary import bcrypt64
from passlib.utils.compat import BytesIO, unicode, u, native_string_types
from passlib.crypto._blowfish.unrolled import BlowfishEngine
# local
diff --git a/passlib/handlers/argon2.py b/passlib/handlers/argon2.py
index 2cee4b3..578c2c5 100644
--- a/passlib/handlers/argon2.py
+++ b/passlib/handlers/argon2.py
@@ -28,7 +28,8 @@ _argon2pure = None # dynamically imported by _load_backend_argon2pure()
# pkg
from passlib import exc
from passlib.crypto.digest import MAX_UINT32
-from passlib.utils import to_bytes, b64s_encode, b64s_decode
+from passlib.utils import to_bytes
+from passlib.utils.binary import b64s_encode, b64s_decode
from passlib.utils.compat import u, unicode, bascii_to_str
import passlib.utils.handlers as uh
# local
diff --git a/passlib/handlers/bcrypt.py b/passlib/handlers/bcrypt.py
index 8f8cd62..117bc64 100644
--- a/passlib/handlers/bcrypt.py
+++ b/passlib/handlers/bcrypt.py
@@ -25,8 +25,9 @@ _bcryptor = None # dynamically imported by _load_backend_bcryptor()
# pkg
_builtin_bcrypt = None # dynamically imported by _load_backend_builtin()
from passlib.exc import PasslibHashWarning, PasslibSecurityWarning, PasslibSecurityError
-from passlib.utils import bcrypt64, safe_crypt, repeat_string, to_bytes, parse_version, \
+from passlib.utils import safe_crypt, repeat_string, to_bytes, parse_version, \
rng, getrandstr, test_crypt, to_unicode
+from passlib.utils.binary import bcrypt64
from passlib.utils.compat import u, uascii_to_str, unicode, str_to_uascii
import passlib.utils.handlers as uh
diff --git a/passlib/handlers/cisco.py b/passlib/handlers/cisco.py
index fb1082b..186c247 100644
--- a/passlib/handlers/cisco.py
+++ b/passlib/handlers/cisco.py
@@ -9,7 +9,8 @@ import logging; log = logging.getLogger(__name__)
from warnings import warn
# site
# pkg
-from passlib.utils import h64, right_pad_string, to_unicode
+from passlib.utils import right_pad_string, to_unicode
+from passlib.utils.binary import h64
from passlib.utils.compat import unicode, u, join_byte_values, \
join_byte_elems, iter_byte_values, uascii_to_str
import passlib.utils.handlers as uh
diff --git a/passlib/handlers/des_crypt.py b/passlib/handlers/des_crypt.py
index ad08d75..9561ab4 100644
--- a/passlib/handlers/des_crypt.py
+++ b/passlib/handlers/des_crypt.py
@@ -8,7 +8,8 @@ import logging; log = logging.getLogger(__name__)
from warnings import warn
# site
# pkg
-from passlib.utils import h64, h64big, safe_crypt, test_crypt, to_unicode
+from passlib.utils import safe_crypt, test_crypt, to_unicode
+from passlib.utils.binary import h64, h64big
from passlib.utils.compat import byte_elem_value, u, uascii_to_str, unicode, suppress_cause
from passlib.crypto.des import des_encrypt_int_block
import passlib.utils.handlers as uh
diff --git a/passlib/handlers/django.py b/passlib/handlers/django.py
index c531d88..88906f5 100644
--- a/passlib/handlers/django.py
+++ b/passlib/handlers/django.py
@@ -11,7 +11,8 @@ import logging; log = logging.getLogger(__name__)
# pkg
from passlib.handlers.bcrypt import _wrapped_bcrypt
from passlib.hash import argon2, bcrypt, pbkdf2_sha1, pbkdf2_sha256
-from passlib.utils import to_unicode, rng, getrandstr, BASE64_CHARS
+from passlib.utils import to_unicode, rng, getrandstr
+from passlib.utils.binary import BASE64_CHARS
from passlib.utils.compat import str_to_uascii, uascii_to_str, unicode, u
from passlib.crypto.digest import pbkdf2_hmac
import passlib.utils.handlers as uh
diff --git a/passlib/handlers/md5_crypt.py b/passlib/handlers/md5_crypt.py
index 1fb019f..fc1fb1d 100644
--- a/passlib/handlers/md5_crypt.py
+++ b/passlib/handlers/md5_crypt.py
@@ -7,7 +7,8 @@ from hashlib import md5
import logging; log = logging.getLogger(__name__)
# site
# pkg
-from passlib.utils import h64, safe_crypt, test_crypt, repeat_string
+from passlib.utils import safe_crypt, test_crypt, repeat_string
+from passlib.utils.binary import h64
from passlib.utils.compat import unicode, u
import passlib.utils.handlers as uh
# local
diff --git a/passlib/handlers/pbkdf2.py b/passlib/handlers/pbkdf2.py
index 5d9943f..274278d 100644
--- a/passlib/handlers/pbkdf2.py
+++ b/passlib/handlers/pbkdf2.py
@@ -8,7 +8,8 @@ from base64 import b64encode, b64decode
import logging; log = logging.getLogger(__name__)
# site
# pkg
-from passlib.utils import ab64_decode, ab64_encode, to_unicode
+from passlib.utils import to_unicode
+from passlib.utils.binary import ab64_decode, ab64_encode
from passlib.utils.compat import str_to_bascii, u, uascii_to_str, unicode
from passlib.crypto.digest import pbkdf2_hmac
import passlib.utils.handlers as uh
diff --git a/passlib/handlers/phpass.py b/passlib/handlers/phpass.py
index 4c458c8..6736f0f 100644
--- a/passlib/handlers/phpass.py
+++ b/passlib/handlers/phpass.py
@@ -13,7 +13,7 @@ from hashlib import md5
import logging; log = logging.getLogger(__name__)
# site
# pkg
-from passlib.utils import h64
+from passlib.utils.binary import h64
from passlib.utils.compat import u, uascii_to_str, unicode
import passlib.utils.handlers as uh
# local
diff --git a/passlib/handlers/scram.py b/passlib/handlers/scram.py
index 1a9b6d5..87bfabd 100644
--- a/passlib/handlers/scram.py
+++ b/passlib/handlers/scram.py
@@ -6,8 +6,8 @@
import logging; log = logging.getLogger(__name__)
# site
# pkg
-from passlib.utils import ab64_decode, ab64_encode, consteq, saslprep, \
- to_native_str, splitcomma
+from passlib.utils import consteq, saslprep, to_native_str, splitcomma
+from passlib.utils.binary import ab64_decode, ab64_encode
from passlib.utils.compat import bascii_to_str, iteritems, u, native_string_types
from passlib.crypto.digest import pbkdf2_hmac, norm_hash_name
import passlib.utils.handlers as uh
diff --git a/passlib/handlers/scrypt.py b/passlib/handlers/scrypt.py
index 1d0d893..1686fda 100644
--- a/passlib/handlers/scrypt.py
+++ b/passlib/handlers/scrypt.py
@@ -8,7 +8,8 @@ import logging; log = logging.getLogger(__name__)
# site
# pkg
from passlib.crypto import scrypt as _scrypt
-from passlib.utils import h64, to_bytes, b64s_decode, b64s_encode
+from passlib.utils import h64, to_bytes
+from passlib.utils.binary import h64, b64s_decode, b64s_encode
from passlib.utils.compat import u, bascii_to_str, suppress_cause
from passlib.utils.decor import classproperty
import passlib.utils.handlers as uh
diff --git a/passlib/handlers/sha1_crypt.py b/passlib/handlers/sha1_crypt.py
index b66ee17..d3e972c 100644
--- a/passlib/handlers/sha1_crypt.py
+++ b/passlib/handlers/sha1_crypt.py
@@ -9,7 +9,8 @@
import logging; log = logging.getLogger(__name__)
# site
# pkg
-from passlib.utils import h64, safe_crypt, test_crypt
+from passlib.utils import safe_crypt, test_crypt
+from passlib.utils.binary import h64
from passlib.utils.compat import u, unicode, irange
from passlib.crypto.digest import compile_hmac
import passlib.utils.handlers as uh
diff --git a/passlib/handlers/sha2_crypt.py b/passlib/handlers/sha2_crypt.py
index 99c947b..d12e3f1 100644
--- a/passlib/handlers/sha2_crypt.py
+++ b/passlib/handlers/sha2_crypt.py
@@ -7,8 +7,9 @@ import hashlib
import logging; log = logging.getLogger(__name__)
# site
# pkg
-from passlib.utils import h64, safe_crypt, test_crypt, \
+from passlib.utils import safe_crypt, test_crypt, \
repeat_string, to_unicode
+from passlib.utils.binary import h64
from passlib.utils.compat import byte_elem_value, u, \
uascii_to_str, unicode
import passlib.utils.handlers as uh
diff --git a/passlib/handlers/sun_md5_crypt.py b/passlib/handlers/sun_md5_crypt.py
index c0b2d33..0eeb4e7 100644
--- a/passlib/handlers/sun_md5_crypt.py
+++ b/passlib/handlers/sun_md5_crypt.py
@@ -17,7 +17,8 @@ import logging; log = logging.getLogger(__name__)
from warnings import warn
# site
# pkg
-from passlib.utils import h64, to_unicode
+from passlib.utils import to_unicode
+from passlib.utils.binary import h64
from passlib.utils.compat import byte_elem_value, irange, u, \
uascii_to_str, unicode, str_to_bascii
import passlib.utils.handlers as uh
diff --git a/passlib/tests/test_utils.py b/passlib/tests/test_utils.py
index d6a4555..3fd0001 100644
--- a/passlib/tests/test_utils.py
+++ b/passlib/tests/test_utils.py
@@ -549,7 +549,7 @@ class Base64EngineTest(TestCase):
# NOTE: most Base64Engine testing done via _Base64Test subclasses below.
def test_constructor(self):
- from passlib.utils import Base64Engine, AB64_CHARS
+ from passlib.utils.binary import Base64Engine, AB64_CHARS
# bad charmap type
self.assertRaises(TypeError, Base64Engine, 1)
@@ -562,7 +562,7 @@ class Base64EngineTest(TestCase):
def test_ab64_decode(self):
"""ab64_decode()"""
- from passlib.utils import ab64_decode
+ from passlib.utils.binary import ab64_decode
# accept bytes or unicode
self.assertEqual(ab64_decode(b"abc"), hb("69b7"))
@@ -590,7 +590,7 @@ class Base64EngineTest(TestCase):
def test_ab64_encode(self):
"""ab64_encode()"""
- from passlib.utils import ab64_encode
+ from passlib.utils.binary import ab64_encode
# accept bytes
self.assertEqual(ab64_encode(hb("69b7")), b"abc")
@@ -609,7 +609,7 @@ class Base64EngineTest(TestCase):
def test_b64s_decode(self):
"""b64s_decode()"""
- from passlib.utils import b64s_decode
+ from passlib.utils.binary import b64s_decode
# accept bytes or unicode
self.assertEqual(b64s_decode(b"abc"), hb("69b7"))
@@ -632,7 +632,7 @@ class Base64EngineTest(TestCase):
def test_b64s_encode(self):
"""b64s_encode()"""
- from passlib.utils import b64s_encode
+ from passlib.utils.binary import b64s_encode
# accept bytes
self.assertEqual(b64s_encode(hb("69b7")), b"abc")
@@ -966,7 +966,7 @@ class _Base64Test(TestCase):
# NOTE: testing H64 & H64Big should be sufficient to verify
# that Base64Engine() works in general.
-from passlib.utils import h64, h64big
+from passlib.utils.binary import h64, h64big
class H64_Test(_Base64Test):
"""test H64 codec functions"""
diff --git a/passlib/tests/utils.py b/passlib/tests/utils.py
index d974a41..89ff2e7 100644
--- a/passlib/tests/utils.py
+++ b/passlib/tests/utils.py
@@ -1347,7 +1347,7 @@ class HandlerCase(TestCase):
def prepare_salt(self, salt):
"""prepare generated salt"""
if self.fuzz_salts_need_bcrypt_repair:
- from passlib.utils import bcrypt64
+ from passlib.utils.binary import bcrypt64
salt = bcrypt64.repair_unused(salt)
return salt
diff --git a/passlib/totp.py b/passlib/totp.py
index a2e91e1..f0edf36 100644
--- a/passlib/totp.py
+++ b/passlib/totp.py
@@ -36,7 +36,8 @@ except ImportError:
from passlib import exc
from passlib.exc import TokenError, MalformedTokenError, InvalidTokenError, UsedTokenError
from passlib.utils import (to_unicode, to_bytes, consteq,
- getrandbytes, rng, SequenceMixin, xor_bytes, getrandstr, BASE64_CHARS)
+ getrandbytes, rng, SequenceMixin, xor_bytes, getrandstr)
+from passlib.utils.binary import BASE64_CHARS, b32encode, b32decode
from passlib.utils.compat import (u, unicode, native_string_types, bascii_to_str, int_types, num_types,
irange, byte_elem_value, UnicodeIO, suppress_cause)
from passlib.utils.decor import hybrid_method, memoized_property
@@ -118,28 +119,6 @@ def group_string(value, sep="-"):
# encoding helpers
#-----------------------------------------------------------------------------
-def b32encode(key):
- """
- wrapper around :func:`base64.b32encode` which strips padding,
- and returns a native string.
- """
- # NOTE: using upper case by default here, since base32 has less ambiguity
- # in that case ('i & l' are visually more similar than 'I & L')
- return bascii_to_str(base64.b32encode(key).rstrip(b"="))
-
-def b32decode(key):
- """
- wrapper around :func:`base64.b32decode`
- which handles common mistyped chars, and inserts padding.
- """
- if isinstance(key, unicode):
- key = key.encode("ascii")
- # XXX: could correct '1' -> 'I', but could be a mistyped lower-case 'l', so leaving it alone.
- key = key.replace(b"8", b"B") # replace commonly mistyped char
- key = key.replace(b"0", b"O") # ditto
- pad = -len(key) % 8 # pad things so final string is multiple of 8
- return base64.b32decode(key + b"=" * pad, True)
-
def _decode_bytes(key, format):
"""
internal TOTP() helper --
diff --git a/passlib/utils/__init__.py b/passlib/utils/__init__.py
index 701557d..d94af47 100644
--- a/passlib/utils/__init__.py
+++ b/passlib/utils/__init__.py
@@ -34,6 +34,12 @@ import types
from warnings import warn
# site
# pkg
+from passlib.utils.binary import (
+ # [remove these aliases in 2.0]
+ BASE64_CHARS, AB64_CHARS, HASH64_CHARS, BCRYPT_CHARS,
+ Base64Engine, LazyBase64Engine, h64, h64big, bcrypt64,
+ ab64_encode, ab64_decode, b64s_encode, b64s_decode
+)
from passlib.utils.decor import (
# [remove these aliases in 2.0]
deprecated_function,
@@ -71,11 +77,6 @@ __all__ = [
'to_unicode',
'to_native_str',
- # base64 helpers
- "BASE64_CHARS", "HASH64_CHARS", "BCRYPT_CHARS", "AB64_CHARS",
- "Base64Engine", "h64", "h64big",
- "ab64_encode", "ab64_decode",
-
# host OS
'has_crypt',
'test_crypt',
@@ -716,708 +717,6 @@ def as_bool(value, none=None, param="boolean"):
return bool(value)
#=============================================================================
-# base64-variant encoding
-#=============================================================================
-
-class Base64Engine(object):
- """Provides routines for encoding/decoding base64 data using
- arbitrary character mappings, selectable endianness, etc.
-
- :arg charmap:
- A string of 64 unique characters,
- which will be used to encode successive 6-bit chunks of data.
- A character's position within the string should correspond
- to its 6-bit value.
-
- :param big:
- Whether the encoding should be big-endian (default False).
-
- .. note::
- This class does not currently handle base64's padding characters
- in any way what so ever.
-
- Raw Bytes <-> Encoded Bytes
- ===========================
- The following methods convert between raw bytes,
- and strings encoded using the engine's specific base64 variant:
-
- .. automethod:: encode_bytes
- .. automethod:: decode_bytes
- .. automethod:: encode_transposed_bytes
- .. automethod:: decode_transposed_bytes
-
- ..
- .. automethod:: check_repair_unused
- .. automethod:: repair_unused
-
- Integers <-> Encoded Bytes
- ==========================
- The following methods allow encoding and decoding
- unsigned integers to and from the engine's specific base64 variant.
- Endianess is determined by the engine's ``big`` constructor keyword.
-
- .. automethod:: encode_int6
- .. automethod:: decode_int6
-
- .. automethod:: encode_int12
- .. automethod:: decode_int12
-
- .. automethod:: encode_int24
- .. automethod:: decode_int24
-
- .. automethod:: encode_int64
- .. automethod:: decode_int64
-
- Informational Attributes
- ========================
- .. attribute:: charmap
-
- unicode string containing list of characters used in encoding;
- position in string matches 6bit value of character.
-
- .. attribute:: bytemap
-
- bytes version of :attr:`charmap`
-
- .. attribute:: big
-
- boolean flag indicating this using big-endian encoding.
- """
-
- #===================================================================
- # instance attrs
- #===================================================================
- # public config
- bytemap = None # charmap as bytes
- big = None # little or big endian
-
- # filled in by init based on charmap.
- # (byte elem: single byte under py2, 8bit int under py3)
- _encode64 = None # maps 6bit value -> byte elem
- _decode64 = None # maps byte elem -> 6bit value
-
- # helpers filled in by init based on endianness
- _encode_bytes = None # throws IndexError if bad value (shouldn't happen)
- _decode_bytes = None # throws KeyError if bad char.
-
- #===================================================================
- # init
- #===================================================================
- def __init__(self, charmap, big=False):
- # validate charmap, generate encode64/decode64 helper functions.
- if isinstance(charmap, unicode):
- charmap = charmap.encode("latin-1")
- elif not isinstance(charmap, bytes):
- raise ExpectedStringError(charmap, "charmap")
- if len(charmap) != 64:
- raise ValueError("charmap must be 64 characters in length")
- if len(set(charmap)) != 64:
- raise ValueError("charmap must not contain duplicate characters")
- self.bytemap = charmap
- self._encode64 = charmap.__getitem__
- lookup = dict((value, idx) for idx, value in enumerate(charmap))
- self._decode64 = lookup.__getitem__
-
- # validate big, set appropriate helper functions.
- self.big = big
- if big:
- self._encode_bytes = self._encode_bytes_big
- self._decode_bytes = self._decode_bytes_big
- else:
- self._encode_bytes = self._encode_bytes_little
- self._decode_bytes = self._decode_bytes_little
-
- # TODO: support padding character
- ##if padding is not None:
- ## if isinstance(padding, unicode):
- ## padding = padding.encode("latin-1")
- ## elif not isinstance(padding, bytes):
- ## raise TypeError("padding char must be unicode or bytes")
- ## if len(padding) != 1:
- ## raise ValueError("padding must be single character")
- ##self.padding = padding
-
- @property
- def charmap(self):
- """charmap as unicode"""
- return self.bytemap.decode("latin-1")
-
- #===================================================================
- # encoding byte strings
- #===================================================================
- def encode_bytes(self, source):
- """encode bytes to base64 string.
-
- :arg source: byte string to encode.
- :returns: byte string containing encoded data.
- """
- if not isinstance(source, bytes):
- raise TypeError("source must be bytes, not %s" % (type(source),))
- chunks, tail = divmod(len(source), 3)
- if PY3:
- next_value = nextgetter(iter(source))
- else:
- next_value = nextgetter(ord(elem) for elem in source)
- gen = self._encode_bytes(next_value, chunks, tail)
- out = join_byte_elems(imap(self._encode64, gen))
- ##if tail:
- ## padding = self.padding
- ## if padding:
- ## out += padding * (3-tail)
- return out
-
- def _encode_bytes_little(self, next_value, chunks, tail):
- """helper used by encode_bytes() to handle little-endian encoding"""
- #
- # output bit layout:
- #
- # first byte: v1 543210
- #
- # second byte: v1 ....76
- # +v2 3210..
- #
- # third byte: v2 ..7654
- # +v3 10....
- #
- # fourth byte: v3 765432
- #
- idx = 0
- while idx < chunks:
- v1 = next_value()
- v2 = next_value()
- v3 = next_value()
- yield v1 & 0x3f
- yield ((v2 & 0x0f)<<2)|(v1>>6)
- yield ((v3 & 0x03)<<4)|(v2>>4)
- yield v3>>2
- idx += 1
- if tail:
- v1 = next_value()
- if tail == 1:
- # note: 4 msb of last byte are padding
- yield v1 & 0x3f
- yield v1>>6
- else:
- assert tail == 2
- # note: 2 msb of last byte are padding
- v2 = next_value()
- yield v1 & 0x3f
- yield ((v2 & 0x0f)<<2)|(v1>>6)
- yield v2>>4
-
- def _encode_bytes_big(self, next_value, chunks, tail):
- """helper used by encode_bytes() to handle big-endian encoding"""
- #
- # output bit layout:
- #
- # first byte: v1 765432
- #
- # second byte: v1 10....
- # +v2 ..7654
- #
- # third byte: v2 3210..
- # +v3 ....76
- #
- # fourth byte: v3 543210
- #
- idx = 0
- while idx < chunks:
- v1 = next_value()
- v2 = next_value()
- v3 = next_value()
- yield v1>>2
- yield ((v1&0x03)<<4)|(v2>>4)
- yield ((v2&0x0f)<<2)|(v3>>6)
- yield v3 & 0x3f
- idx += 1
- if tail:
- v1 = next_value()
- if tail == 1:
- # note: 4 lsb of last byte are padding
- yield v1>>2
- yield (v1&0x03)<<4
- else:
- assert tail == 2
- # note: 2 lsb of last byte are padding
- v2 = next_value()
- yield v1>>2
- yield ((v1&0x03)<<4)|(v2>>4)
- yield ((v2&0x0f)<<2)
-
- #===================================================================
- # decoding byte strings
- #===================================================================
-
- def decode_bytes(self, source):
- """decode bytes from base64 string.
-
- :arg source: byte string to decode.
- :returns: byte string containing decoded data.
- """
- if not isinstance(source, bytes):
- raise TypeError("source must be bytes, not %s" % (type(source),))
- ##padding = self.padding
- ##if padding:
- ## # TODO: add padding size check?
- ## source = source.rstrip(padding)
- chunks, tail = divmod(len(source), 4)
- if tail == 1:
- # only 6 bits left, can't encode a whole byte!
- raise ValueError("input string length cannot be == 1 mod 4")
- next_value = nextgetter(imap(self._decode64, source))
- try:
- return join_byte_values(self._decode_bytes(next_value, chunks, tail))
- except KeyError as err:
- raise ValueError("invalid character: %r" % (err.args[0],))
-
- def _decode_bytes_little(self, next_value, chunks, tail):
- """helper used by decode_bytes() to handle little-endian encoding"""
- #
- # input bit layout:
- #
- # first byte: v1 ..543210
- # +v2 10......
- #
- # second byte: v2 ....5432
- # +v3 3210....
- #
- # third byte: v3 ......54
- # +v4 543210..
- #
- idx = 0
- while idx < chunks:
- v1 = next_value()
- v2 = next_value()
- v3 = next_value()
- v4 = next_value()
- yield v1 | ((v2 & 0x3) << 6)
- yield (v2>>2) | ((v3 & 0xF) << 4)
- yield (v3>>4) | (v4<<2)
- idx += 1
- if tail:
- # tail is 2 or 3
- v1 = next_value()
- v2 = next_value()
- yield v1 | ((v2 & 0x3) << 6)
- # NOTE: if tail == 2, 4 msb of v2 are ignored (should be 0)
- if tail == 3:
- # NOTE: 2 msb of v3 are ignored (should be 0)
- v3 = next_value()
- yield (v2>>2) | ((v3 & 0xF) << 4)
-
- def _decode_bytes_big(self, next_value, chunks, tail):
- """helper used by decode_bytes() to handle big-endian encoding"""
- #
- # input bit layout:
- #
- # first byte: v1 543210..
- # +v2 ......54
- #
- # second byte: v2 3210....
- # +v3 ....5432
- #
- # third byte: v3 10......
- # +v4 ..543210
- #
- idx = 0
- while idx < chunks:
- v1 = next_value()
- v2 = next_value()
- v3 = next_value()
- v4 = next_value()
- yield (v1<<2) | (v2>>4)
- yield ((v2&0xF)<<4) | (v3>>2)
- yield ((v3&0x3)<<6) | v4
- idx += 1
- if tail:
- # tail is 2 or 3
- v1 = next_value()
- v2 = next_value()
- yield (v1<<2) | (v2>>4)
- # NOTE: if tail == 2, 4 lsb of v2 are ignored (should be 0)
- if tail == 3:
- # NOTE: 2 lsb of v3 are ignored (should be 0)
- v3 = next_value()
- yield ((v2&0xF)<<4) | (v3>>2)
-
- #===================================================================
- # encode/decode helpers
- #===================================================================
-
- # padmap2/3 - dict mapping last char of string ->
- # equivalent char with no padding bits set.
-
- def __make_padset(self, bits):
- """helper to generate set of valid last chars & bytes"""
- pset = set(c for i,c in enumerate(self.bytemap) if not i & bits)
- pset.update(c for i,c in enumerate(self.charmap) if not i & bits)
- return frozenset(pset)
-
- @memoized_property
- def _padinfo2(self):
- """mask to clear padding bits, and valid last bytes (for strings 2 % 4)"""
- # 4 bits of last char unused (lsb for big, msb for little)
- bits = 15 if self.big else (15<<2)
- return ~bits, self.__make_padset(bits)
-
- @memoized_property
- def _padinfo3(self):
- """mask to clear padding bits, and valid last bytes (for strings 3 % 4)"""
- # 2 bits of last char unused (lsb for big, msb for little)
- bits = 3 if self.big else (3<<4)
- return ~bits, self.__make_padset(bits)
-
- def check_repair_unused(self, source):
- """helper to detect & clear invalid unused bits in last character.
-
- :arg source:
- encoded data (as ascii bytes or unicode).
-
- :returns:
- `(True, result)` if the string was repaired,
- `(False, source)` if the string was ok as-is.
- """
- # figure out how many padding bits there are in last char.
- tail = len(source) & 3
- if tail == 2:
- mask, padset = self._padinfo2
- elif tail == 3:
- mask, padset = self._padinfo3
- elif not tail:
- return False, source
- else:
- raise ValueError("source length must != 1 mod 4")
-
- # check if last char is ok (padset contains bytes & unicode versions)
- last = source[-1]
- if last in padset:
- return False, source
-
- # we have dirty bits - repair the string by decoding last char,
- # clearing the padding bits via <mask>, and encoding new char.
- if isinstance(source, unicode):
- cm = self.charmap
- last = cm[cm.index(last) & mask]
- assert last in padset, "failed to generate valid padding char"
- else:
- # NOTE: this assumes ascii-compat encoding, and that
- # all chars used by encoding are 7-bit ascii.
- last = self._encode64(self._decode64(last) & mask)
- assert last in padset, "failed to generate valid padding char"
- if PY3:
- last = bytes([last])
- return True, source[:-1] + last
-
- def repair_unused(self, source):
- return self.check_repair_unused(source)[1]
-
- ##def transcode(self, source, other):
- ## return ''.join(
- ## other.charmap[self.charmap.index(char)]
- ## for char in source
- ## )
-
- ##def random_encoded_bytes(self, size, random=None, unicode=False):
- ## "return random encoded string of given size"
- ## data = getrandstr(random or rng,
- ## self.charmap if unicode else self.bytemap, size)
- ## return self.repair_unused(data)
-
- #===================================================================
- # transposed encoding/decoding
- #===================================================================
- def encode_transposed_bytes(self, source, offsets):
- """encode byte string, first transposing source using offset list"""
- if not isinstance(source, bytes):
- raise TypeError("source must be bytes, not %s" % (type(source),))
- tmp = join_byte_elems(source[off] for off in offsets)
- return self.encode_bytes(tmp)
-
- def decode_transposed_bytes(self, source, offsets):
- """decode byte string, then reverse transposition described by offset list"""
- # NOTE: if transposition does not use all bytes of source,
- # the original can't be recovered... and join_byte_elems() will throw
- # an error because 1+ values in <buf> will be None.
- tmp = self.decode_bytes(source)
- buf = [None] * len(offsets)
- for off, char in zip(offsets, tmp):
- buf[off] = char
- return join_byte_elems(buf)
-
- #===================================================================
- # integer decoding helpers - mainly used by des_crypt family
- #===================================================================
- def _decode_int(self, source, bits):
- """decode base64 string -> integer
-
- :arg source: base64 string to decode.
- :arg bits: number of bits in resulting integer.
-
- :raises ValueError:
- * if the string contains invalid base64 characters.
- * if the string is not long enough - it must be at least
- ``int(ceil(bits/6))`` in length.
-
- :returns:
- a integer in the range ``0 <= n < 2**bits``
- """
- if not isinstance(source, bytes):
- raise TypeError("source must be bytes, not %s" % (type(source),))
- big = self.big
- pad = -bits % 6
- chars = (bits+pad)/6
- if len(source) != chars:
- raise ValueError("source must be %d chars" % (chars,))
- decode = self._decode64
- out = 0
- try:
- for c in source if big else reversed(source):
- out = (out<<6) + decode(c)
- except KeyError:
- raise ValueError("invalid character in string: %r" % (c,))
- if pad:
- # strip padding bits
- if big:
- out >>= pad
- else:
- out &= (1<<bits)-1
- return out
-
- #---------------------------------------------------------------
- # optimized versions for common integer sizes
- #---------------------------------------------------------------
-
- def decode_int6(self, source):
- """decode single character -> 6 bit integer"""
- if not isinstance(source, bytes):
- raise TypeError("source must be bytes, not %s" % (type(source),))
- if len(source) != 1:
- raise ValueError("source must be exactly 1 byte")
- if PY3:
- # convert to 8bit int before doing lookup
- source = source[0]
- try:
- return self._decode64(source)
- except KeyError:
- raise ValueError("invalid character")
-
- def decode_int12(self, source):
- """decodes 2 char string -> 12-bit integer"""
- if not isinstance(source, bytes):
- raise TypeError("source must be bytes, not %s" % (type(source),))
- if len(source) != 2:
- raise ValueError("source must be exactly 2 bytes")
- decode = self._decode64
- try:
- if self.big:
- return decode(source[1]) + (decode(source[0])<<6)
- else:
- return decode(source[0]) + (decode(source[1])<<6)
- except KeyError:
- raise ValueError("invalid character")
-
- def decode_int24(self, source):
- """decodes 4 char string -> 24-bit integer"""
- if not isinstance(source, bytes):
- raise TypeError("source must be bytes, not %s" % (type(source),))
- if len(source) != 4:
- raise ValueError("source must be exactly 4 bytes")
- decode = self._decode64
- try:
- if self.big:
- return decode(source[3]) + (decode(source[2])<<6)+ \
- (decode(source[1])<<12) + (decode(source[0])<<18)
- else:
- return decode(source[0]) + (decode(source[1])<<6)+ \
- (decode(source[2])<<12) + (decode(source[3])<<18)
- except KeyError:
- raise ValueError("invalid character")
-
- def decode_int30(self, source):
- """decode 5 char string -> 30 bit integer"""
- return self._decode_int(source, 30)
-
- def decode_int64(self, source):
- """decode 11 char base64 string -> 64-bit integer
-
- this format is used primarily by des-crypt & variants to encode
- the DES output value used as a checksum.
- """
- return self._decode_int(source, 64)
-
- #===================================================================
- # integer encoding helpers - mainly used by des_crypt family
- #===================================================================
- def _encode_int(self, value, bits):
- """encode integer into base64 format
-
- :arg value: non-negative integer to encode
- :arg bits: number of bits to encode
-
- :returns:
- a string of length ``int(ceil(bits/6.0))``.
- """
- assert value >= 0, "caller did not sanitize input"
- pad = -bits % 6
- bits += pad
- if self.big:
- itr = irange(bits-6, -6, -6)
- # shift to add lsb padding.
- value <<= pad
- else:
- itr = irange(0, bits, 6)
- # padding is msb, so no change needed.
- return join_byte_elems(imap(self._encode64,
- ((value>>off) & 0x3f for off in itr)))
-
- #---------------------------------------------------------------
- # optimized versions for common integer sizes
- #---------------------------------------------------------------
-
- def encode_int6(self, value):
- """encodes 6-bit integer -> single hash64 character"""
- if value < 0 or value > 63:
- raise ValueError("value out of range")
- if PY3:
- return self.bytemap[value:value+1]
- else:
- return self._encode64(value)
-
- def encode_int12(self, value):
- """encodes 12-bit integer -> 2 char string"""
- if value < 0 or value > 0xFFF:
- raise ValueError("value out of range")
- raw = [value & 0x3f, (value>>6) & 0x3f]
- if self.big:
- raw = reversed(raw)
- return join_byte_elems(imap(self._encode64, raw))
-
- def encode_int24(self, value):
- """encodes 24-bit integer -> 4 char string"""
- if value < 0 or value > 0xFFFFFF:
- raise ValueError("value out of range")
- raw = [value & 0x3f, (value>>6) & 0x3f,
- (value>>12) & 0x3f, (value>>18) & 0x3f]
- if self.big:
- raw = reversed(raw)
- return join_byte_elems(imap(self._encode64, raw))
-
- def encode_int30(self, value):
- """decode 5 char string -> 30 bit integer"""
- if value < 0 or value > 0x3fffffff:
- raise ValueError("value out of range")
- return self._encode_int(value, 30)
-
- def encode_int64(self, value):
- """encode 64-bit integer -> 11 char hash64 string
-
- this format is used primarily by des-crypt & variants to encode
- the DES output value used as a checksum.
- """
- if value < 0 or value > 0xffffffffffffffff:
- raise ValueError("value out of range")
- return self._encode_int(value, 64)
-
- #===================================================================
- # eof
- #===================================================================
-
-class LazyBase64Engine(Base64Engine):
- """Base64Engine which delays initialization until it's accessed"""
- _lazy_opts = None
-
- def __init__(self, *args, **kwds):
- self._lazy_opts = (args, kwds)
-
- def _lazy_init(self):
- args, kwds = self._lazy_opts
- super(LazyBase64Engine, self).__init__(*args, **kwds)
- del self._lazy_opts
- self.__class__ = Base64Engine
-
- def __getattribute__(self, attr):
- if not attr.startswith("_"):
- self._lazy_init()
- return object.__getattribute__(self, attr)
-
-# common charmaps
-BASE64_CHARS = u("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
-AB64_CHARS = u("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789./")
-HASH64_CHARS = u("./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
-BCRYPT_CHARS = u("./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
-
-# common variants
-h64 = LazyBase64Engine(HASH64_CHARS)
-h64big = LazyBase64Engine(HASH64_CHARS, big=True)
-bcrypt64 = LazyBase64Engine(BCRYPT_CHARS, big=True)
-
-#=============================================================================
-# adapted-base64 encoding
-#=============================================================================
-_BASE64_STRIP = b"=\n"
-_BASE64_PAD1 = b"="
-_BASE64_PAD2 = b"=="
-
-# XXX: Passlib 1.8/1.9 -- deprecate everything that's using ab64_encode(),
-# have it start outputing b64s_encode() instead? can use a64_decode() to retain backwards compat.
-
-def ab64_encode(data):
- """
- encode using shortened base64 format which omits padding & whitespace.
- uses custom ``./`` altchars.
-
- it is primarily used by Passlib's custom pbkdf2 hashes.
- """
- return b64s_encode(data).replace(b"+", b".")
-
-def ab64_decode(data):
- """
- decode from shortened base64 format which omits padding & whitespace.
- uses custom ``./`` altchars, but supports decoding normal ``+/`` altchars as well.
-
- it is primarily used by Passlib's custom pbkdf2 hashes.
- """
- if isinstance(data, unicode):
- # needs bytes for replace() call, but want to accept ascii-unicode ala a2b_base64()
- try:
- data = data.encode("ascii")
- except UnicodeEncodeError:
- raise suppress_cause(ValueError("string argument should contain only ASCII characters"))
- return b64s_decode(data.replace(b".", b"+"))
-
-def b64s_encode(data):
- """
- encode using shortened base64 format which omits padding & whitespace.
- uses default ``+/`` altchars.
- """
- return b2a_base64(data).rstrip(_BASE64_STRIP)
-
-def b64s_decode(data):
- """
- decode from shortened base64 format which omits padding & whitespace.
- uses default ``+/`` altchars.
- """
- if isinstance(data, unicode):
- # needs bytes for replace() call, but want to accept ascii-unicode ala a2b_base64()
- try:
- data = data.encode("ascii")
- except UnicodeEncodeError:
- raise suppress_cause(ValueError("string argument should contain only ASCII characters"))
- off = len(data) & 3
- if off == 0:
- pass
- elif off == 2:
- data += _BASE64_PAD2
- elif off == 3:
- data += _BASE64_PAD1
- else: # off == 1
- raise ValueError("invalid base64 input")
- try:
- return a2b_base64(data)
- except _BinAsciiError as err:
- raise suppress_cause(TypeError(err))
-
-#=============================================================================
# host OS helpers
#=============================================================================
diff --git a/passlib/utils/binary.py b/passlib/utils/binary.py
new file mode 100644
index 0000000..521b64a
--- /dev/null
+++ b/passlib/utils/binary.py
@@ -0,0 +1,884 @@
+"""
+passlib.utils.binary - binary data encoding/decoding/manipulation
+"""
+#=============================================================================
+# imports
+#=============================================================================
+# core
+from __future__ import absolute_import, division, print_function
+from base64 import (
+ b64encode,
+ b64decode,
+ b32decode as _b32decode,
+ b32encode as _b32encode,
+)
+from binascii import b2a_base64, a2b_base64, Error as _BinAsciiError
+import logging
+log = logging.getLogger(__name__)
+# site
+# pkg
+from passlib import exc
+from passlib.utils.compat import (
+ PY3, bascii_to_str,
+ irange, imap, iter_byte_chars, join_byte_values, join_byte_elems,
+ nextgetter, suppress_cause,
+ u, unicode, unicode_or_bytes_types,
+)
+from passlib.utils.decor import memoized_property
+# from passlib.utils import BASE64_CHARS, HASH64_CHARS
+# local
+__all__ = [
+ # constants
+ "BASE64_CHARS", "PADDED_BASE64_CHARS",
+ "AB64_CHARS",
+ "HASH64_CHARS",
+ "BCRYPT_CHARS",
+ "HEX_CHARS", "LOWER_HEX_CHARS", "UPPER_HEX_CHARS",
+
+ "ALL_BYTE_VALUES",
+
+ # misc
+ "compile_byte_translation",
+
+ # base64
+ 'ab64_encode', 'ab64_decode',
+ 'b64s_encode', 'b64s_decode',
+
+ # base32
+ "b32encode", "b32decode",
+
+ # custom encodings
+ 'Base64Engine',
+ 'LazyBase64Engine',
+ 'h64',
+ 'h64big',
+ 'bcrypt64',
+]
+
+#=============================================================================
+# constant strings
+#=============================================================================
+
+#-------------------------------------------------------------
+# common salt_chars & checksum_chars values
+#-------------------------------------------------------------
+
+#: standard base64 charmap
+BASE64_CHARS = u("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
+
+#: alt base64 charmap -- "." instead of "+"
+AB64_CHARS = u("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789./")
+
+#: charmap used by HASH64 encoding.
+HASH64_CHARS = u("./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
+
+#: charmap used by BCrypt
+BCRYPT_CHARS = u("./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
+
+#: std base64 chars + padding char
+PADDED_BASE64_CHARS = BASE64_CHARS + u("=")
+
+#: all hex chars
+HEX_CHARS = u("0123456789abcdefABCDEF")
+
+#: upper case hex chars
+UPPER_HEX_CHARS = u("0123456789ABCDEF")
+
+#: lower case hex chars
+LOWER_HEX_CHARS = u("0123456789abcdef")
+
+#-------------------------------------------------------------
+# byte strings
+#-------------------------------------------------------------
+
+#: special byte string containing all possible byte values
+#: NOTE: for efficiency, this is treated as singleton by some of the code
+ALL_BYTE_VALUES = join_byte_values(irange(256))
+
+#: some string constants we reuse
+B_EMPTY = b''
+B_NULL = b'\x00'
+B_EQUAL = b'='
+
+#=============================================================================
+# byte translation
+#=============================================================================
+
+#: base list used to compile byte translations
+_TRANSLATE_SOURCE = list(iter_byte_chars(ALL_BYTE_VALUES))
+
+def compile_byte_translation(mapping, source=None):
+ """
+ return a 256-byte string for translating bytes using specified mapping.
+ bytes not specified by mapping will be left alone.
+
+ :param mapping:
+ dict mapping input byte (str or int) -> output byte (str or int).
+
+ :param source:
+ optional existing byte translation string to use as base.
+ (must be 255-length byte string). defaults to identity mapping.
+
+ :returns:
+ 255-length byte string for passing to bytes().translate.
+ """
+ if source is None:
+ target = _TRANSLATE_SOURCE[:]
+ else:
+ assert isinstance(source, bytes) and len(source) == 255
+ target = list(iter_byte_chars(source))
+ for k, v in mapping.items():
+ if isinstance(k, unicode_or_bytes_types):
+ k = ord(k)
+ assert isinstance(k, int) and 0 <= k < 256
+ if isinstance(v, unicode):
+ v = v.encode("ascii")
+ assert isinstance(v, bytes) and len(v) == 1
+ target[k] = v
+ return B_EMPTY.join(target)
+
+#=============================================================================
+# unpadding / stripped base64 encoding
+#=============================================================================
+def b64s_encode(data):
+ """
+ encode using shortened base64 format which omits padding & whitespace.
+ uses default ``+/`` altchars.
+ """
+ return b2a_base64(data).rstrip(_BASE64_STRIP)
+
+def b64s_decode(data):
+ """
+ decode from shortened base64 format which omits padding & whitespace.
+ uses default ``+/`` altchars.
+ """
+ if isinstance(data, unicode):
+ # needs bytes for replace() call, but want to accept ascii-unicode ala a2b_base64()
+ try:
+ data = data.encode("ascii")
+ except UnicodeEncodeError:
+ raise suppress_cause(ValueError("string argument should contain only ASCII characters"))
+ off = len(data) & 3
+ if off == 0:
+ pass
+ elif off == 2:
+ data += _BASE64_PAD2
+ elif off == 3:
+ data += _BASE64_PAD1
+ else: # off == 1
+ raise ValueError("invalid base64 input")
+ try:
+ return a2b_base64(data)
+ except _BinAsciiError as err:
+ raise suppress_cause(TypeError(err))
+
+#=============================================================================
+# adapted-base64 encoding
+#=============================================================================
+_BASE64_STRIP = b"=\n"
+_BASE64_PAD1 = b"="
+_BASE64_PAD2 = b"=="
+
+# XXX: Passlib 1.8/1.9 -- deprecate everything that's using ab64_encode(),
+# have it start outputing b64s_encode() instead? can use a64_decode() to retain backwards compat.
+
+def ab64_encode(data):
+ """
+ encode using shortened base64 format which omits padding & whitespace.
+ uses custom ``./`` altchars.
+
+ it is primarily used by Passlib's custom pbkdf2 hashes.
+ """
+ return b64s_encode(data).replace(b"+", b".")
+
+def ab64_decode(data):
+ """
+ decode from shortened base64 format which omits padding & whitespace.
+ uses custom ``./`` altchars, but supports decoding normal ``+/`` altchars as well.
+
+ it is primarily used by Passlib's custom pbkdf2 hashes.
+ """
+ if isinstance(data, unicode):
+ # needs bytes for replace() call, but want to accept ascii-unicode ala a2b_base64()
+ try:
+ data = data.encode("ascii")
+ except UnicodeEncodeError:
+ raise suppress_cause(ValueError("string argument should contain only ASCII characters"))
+ return b64s_decode(data.replace(b".", b"+"))
+
+#=============================================================================
+# base32 codec
+#=============================================================================
+
+def b32encode(source):
+ """
+ wrapper around :func:`base64.b32encode` which strips padding,
+ and returns a native string.
+ """
+ # NOTE: using upper case by default here, since 'I & L' are less
+ # visually ambiguous than 'i & l'
+ return bascii_to_str(_b32encode(source).rstrip(B_EQUAL))
+
+#: byte translation map to replace common mistyped base32 chars.
+#: XXX: could correct '1' -> 'I', but could be a mistyped lower-case 'l', so leaving it alone.
+_b32_translate = compile_byte_translation({"8": "B", "0": "O"})
+
+#: helper to add padding
+_b32_decode_pad = B_EQUAL * 8
+
+def b32decode(source):
+ """
+ wrapper around :func:`base64.b32decode`
+ which handles common mistyped chars.
+ padding optional, ignored if present.
+ """
+ # encode & correct for typos
+ if isinstance(source, unicode):
+ source = source.encode("ascii")
+ source = source.translate(_b32_translate)
+
+ # pad things so final string is multiple of 8
+ remainder = len(source) & 0x7
+ if remainder:
+ source += _b32_decode_pad[:-remainder]
+
+ # XXX: py27 stdlib's version of this has some inefficiencies,
+ # could look into using optimized version.
+ return _b32decode(source, True)
+
+#=============================================================================
+# base64-variant encoding
+#=============================================================================
+
+class Base64Engine(object):
+ """Provides routines for encoding/decoding base64 data using
+ arbitrary character mappings, selectable endianness, etc.
+
+ :arg charmap:
+ A string of 64 unique characters,
+ which will be used to encode successive 6-bit chunks of data.
+ A character's position within the string should correspond
+ to its 6-bit value.
+
+ :param big:
+ Whether the encoding should be big-endian (default False).
+
+ .. note::
+ This class does not currently handle base64's padding characters
+ in any way what so ever.
+
+ Raw Bytes <-> Encoded Bytes
+ ===========================
+ The following methods convert between raw bytes,
+ and strings encoded using the engine's specific base64 variant:
+
+ .. automethod:: encode_bytes
+ .. automethod:: decode_bytes
+ .. automethod:: encode_transposed_bytes
+ .. automethod:: decode_transposed_bytes
+
+ ..
+ .. automethod:: check_repair_unused
+ .. automethod:: repair_unused
+
+ Integers <-> Encoded Bytes
+ ==========================
+ The following methods allow encoding and decoding
+ unsigned integers to and from the engine's specific base64 variant.
+ Endianess is determined by the engine's ``big`` constructor keyword.
+
+ .. automethod:: encode_int6
+ .. automethod:: decode_int6
+
+ .. automethod:: encode_int12
+ .. automethod:: decode_int12
+
+ .. automethod:: encode_int24
+ .. automethod:: decode_int24
+
+ .. automethod:: encode_int64
+ .. automethod:: decode_int64
+
+ Informational Attributes
+ ========================
+ .. attribute:: charmap
+
+ unicode string containing list of characters used in encoding;
+ position in string matches 6bit value of character.
+
+ .. attribute:: bytemap
+
+ bytes version of :attr:`charmap`
+
+ .. attribute:: big
+
+ boolean flag indicating this using big-endian encoding.
+ """
+
+ #===================================================================
+ # instance attrs
+ #===================================================================
+ # public config
+ bytemap = None # charmap as bytes
+ big = None # little or big endian
+
+ # filled in by init based on charmap.
+ # (byte elem: single byte under py2, 8bit int under py3)
+ _encode64 = None # maps 6bit value -> byte elem
+ _decode64 = None # maps byte elem -> 6bit value
+
+ # helpers filled in by init based on endianness
+ _encode_bytes = None # throws IndexError if bad value (shouldn't happen)
+ _decode_bytes = None # throws KeyError if bad char.
+
+ #===================================================================
+ # init
+ #===================================================================
+ def __init__(self, charmap, big=False):
+ # validate charmap, generate encode64/decode64 helper functions.
+ if isinstance(charmap, unicode):
+ charmap = charmap.encode("latin-1")
+ elif not isinstance(charmap, bytes):
+ raise exc.ExpectedStringError(charmap, "charmap")
+ if len(charmap) != 64:
+ raise ValueError("charmap must be 64 characters in length")
+ if len(set(charmap)) != 64:
+ raise ValueError("charmap must not contain duplicate characters")
+ self.bytemap = charmap
+ self._encode64 = charmap.__getitem__
+ lookup = dict((value, idx) for idx, value in enumerate(charmap))
+ self._decode64 = lookup.__getitem__
+
+ # validate big, set appropriate helper functions.
+ self.big = big
+ if big:
+ self._encode_bytes = self._encode_bytes_big
+ self._decode_bytes = self._decode_bytes_big
+ else:
+ self._encode_bytes = self._encode_bytes_little
+ self._decode_bytes = self._decode_bytes_little
+
+ # TODO: support padding character
+ ##if padding is not None:
+ ## if isinstance(padding, unicode):
+ ## padding = padding.encode("latin-1")
+ ## elif not isinstance(padding, bytes):
+ ## raise TypeError("padding char must be unicode or bytes")
+ ## if len(padding) != 1:
+ ## raise ValueError("padding must be single character")
+ ##self.padding = padding
+
+ @property
+ def charmap(self):
+ """charmap as unicode"""
+ return self.bytemap.decode("latin-1")
+
+ #===================================================================
+ # encoding byte strings
+ #===================================================================
+ def encode_bytes(self, source):
+ """encode bytes to base64 string.
+
+ :arg source: byte string to encode.
+ :returns: byte string containing encoded data.
+ """
+ if not isinstance(source, bytes):
+ raise TypeError("source must be bytes, not %s" % (type(source),))
+ chunks, tail = divmod(len(source), 3)
+ if PY3:
+ next_value = nextgetter(iter(source))
+ else:
+ next_value = nextgetter(ord(elem) for elem in source)
+ gen = self._encode_bytes(next_value, chunks, tail)
+ out = join_byte_elems(imap(self._encode64, gen))
+ ##if tail:
+ ## padding = self.padding
+ ## if padding:
+ ## out += padding * (3-tail)
+ return out
+
+ def _encode_bytes_little(self, next_value, chunks, tail):
+ """helper used by encode_bytes() to handle little-endian encoding"""
+ #
+ # output bit layout:
+ #
+ # first byte: v1 543210
+ #
+ # second byte: v1 ....76
+ # +v2 3210..
+ #
+ # third byte: v2 ..7654
+ # +v3 10....
+ #
+ # fourth byte: v3 765432
+ #
+ idx = 0
+ while idx < chunks:
+ v1 = next_value()
+ v2 = next_value()
+ v3 = next_value()
+ yield v1 & 0x3f
+ yield ((v2 & 0x0f)<<2)|(v1>>6)
+ yield ((v3 & 0x03)<<4)|(v2>>4)
+ yield v3>>2
+ idx += 1
+ if tail:
+ v1 = next_value()
+ if tail == 1:
+ # note: 4 msb of last byte are padding
+ yield v1 & 0x3f
+ yield v1>>6
+ else:
+ assert tail == 2
+ # note: 2 msb of last byte are padding
+ v2 = next_value()
+ yield v1 & 0x3f
+ yield ((v2 & 0x0f)<<2)|(v1>>6)
+ yield v2>>4
+
+ def _encode_bytes_big(self, next_value, chunks, tail):
+ """helper used by encode_bytes() to handle big-endian encoding"""
+ #
+ # output bit layout:
+ #
+ # first byte: v1 765432
+ #
+ # second byte: v1 10....
+ # +v2 ..7654
+ #
+ # third byte: v2 3210..
+ # +v3 ....76
+ #
+ # fourth byte: v3 543210
+ #
+ idx = 0
+ while idx < chunks:
+ v1 = next_value()
+ v2 = next_value()
+ v3 = next_value()
+ yield v1>>2
+ yield ((v1&0x03)<<4)|(v2>>4)
+ yield ((v2&0x0f)<<2)|(v3>>6)
+ yield v3 & 0x3f
+ idx += 1
+ if tail:
+ v1 = next_value()
+ if tail == 1:
+ # note: 4 lsb of last byte are padding
+ yield v1>>2
+ yield (v1&0x03)<<4
+ else:
+ assert tail == 2
+ # note: 2 lsb of last byte are padding
+ v2 = next_value()
+ yield v1>>2
+ yield ((v1&0x03)<<4)|(v2>>4)
+ yield ((v2&0x0f)<<2)
+
+ #===================================================================
+ # decoding byte strings
+ #===================================================================
+
+ def decode_bytes(self, source):
+ """decode bytes from base64 string.
+
+ :arg source: byte string to decode.
+ :returns: byte string containing decoded data.
+ """
+ if not isinstance(source, bytes):
+ raise TypeError("source must be bytes, not %s" % (type(source),))
+ ##padding = self.padding
+ ##if padding:
+ ## # TODO: add padding size check?
+ ## source = source.rstrip(padding)
+ chunks, tail = divmod(len(source), 4)
+ if tail == 1:
+ # only 6 bits left, can't encode a whole byte!
+ raise ValueError("input string length cannot be == 1 mod 4")
+ next_value = nextgetter(imap(self._decode64, source))
+ try:
+ return join_byte_values(self._decode_bytes(next_value, chunks, tail))
+ except KeyError as err:
+ raise ValueError("invalid character: %r" % (err.args[0],))
+
+ def _decode_bytes_little(self, next_value, chunks, tail):
+ """helper used by decode_bytes() to handle little-endian encoding"""
+ #
+ # input bit layout:
+ #
+ # first byte: v1 ..543210
+ # +v2 10......
+ #
+ # second byte: v2 ....5432
+ # +v3 3210....
+ #
+ # third byte: v3 ......54
+ # +v4 543210..
+ #
+ idx = 0
+ while idx < chunks:
+ v1 = next_value()
+ v2 = next_value()
+ v3 = next_value()
+ v4 = next_value()
+ yield v1 | ((v2 & 0x3) << 6)
+ yield (v2>>2) | ((v3 & 0xF) << 4)
+ yield (v3>>4) | (v4<<2)
+ idx += 1
+ if tail:
+ # tail is 2 or 3
+ v1 = next_value()
+ v2 = next_value()
+ yield v1 | ((v2 & 0x3) << 6)
+ # NOTE: if tail == 2, 4 msb of v2 are ignored (should be 0)
+ if tail == 3:
+ # NOTE: 2 msb of v3 are ignored (should be 0)
+ v3 = next_value()
+ yield (v2>>2) | ((v3 & 0xF) << 4)
+
+ def _decode_bytes_big(self, next_value, chunks, tail):
+ """helper used by decode_bytes() to handle big-endian encoding"""
+ #
+ # input bit layout:
+ #
+ # first byte: v1 543210..
+ # +v2 ......54
+ #
+ # second byte: v2 3210....
+ # +v3 ....5432
+ #
+ # third byte: v3 10......
+ # +v4 ..543210
+ #
+ idx = 0
+ while idx < chunks:
+ v1 = next_value()
+ v2 = next_value()
+ v3 = next_value()
+ v4 = next_value()
+ yield (v1<<2) | (v2>>4)
+ yield ((v2&0xF)<<4) | (v3>>2)
+ yield ((v3&0x3)<<6) | v4
+ idx += 1
+ if tail:
+ # tail is 2 or 3
+ v1 = next_value()
+ v2 = next_value()
+ yield (v1<<2) | (v2>>4)
+ # NOTE: if tail == 2, 4 lsb of v2 are ignored (should be 0)
+ if tail == 3:
+ # NOTE: 2 lsb of v3 are ignored (should be 0)
+ v3 = next_value()
+ yield ((v2&0xF)<<4) | (v3>>2)
+
+ #===================================================================
+ # encode/decode helpers
+ #===================================================================
+
+ # padmap2/3 - dict mapping last char of string ->
+ # equivalent char with no padding bits set.
+
+ def __make_padset(self, bits):
+ """helper to generate set of valid last chars & bytes"""
+ pset = set(c for i,c in enumerate(self.bytemap) if not i & bits)
+ pset.update(c for i,c in enumerate(self.charmap) if not i & bits)
+ return frozenset(pset)
+
+ @memoized_property
+ def _padinfo2(self):
+ """mask to clear padding bits, and valid last bytes (for strings 2 % 4)"""
+ # 4 bits of last char unused (lsb for big, msb for little)
+ bits = 15 if self.big else (15<<2)
+ return ~bits, self.__make_padset(bits)
+
+ @memoized_property
+ def _padinfo3(self):
+ """mask to clear padding bits, and valid last bytes (for strings 3 % 4)"""
+ # 2 bits of last char unused (lsb for big, msb for little)
+ bits = 3 if self.big else (3<<4)
+ return ~bits, self.__make_padset(bits)
+
+ def check_repair_unused(self, source):
+ """helper to detect & clear invalid unused bits in last character.
+
+ :arg source:
+ encoded data (as ascii bytes or unicode).
+
+ :returns:
+ `(True, result)` if the string was repaired,
+ `(False, source)` if the string was ok as-is.
+ """
+ # figure out how many padding bits there are in last char.
+ tail = len(source) & 3
+ if tail == 2:
+ mask, padset = self._padinfo2
+ elif tail == 3:
+ mask, padset = self._padinfo3
+ elif not tail:
+ return False, source
+ else:
+ raise ValueError("source length must != 1 mod 4")
+
+ # check if last char is ok (padset contains bytes & unicode versions)
+ last = source[-1]
+ if last in padset:
+ return False, source
+
+ # we have dirty bits - repair the string by decoding last char,
+ # clearing the padding bits via <mask>, and encoding new char.
+ if isinstance(source, unicode):
+ cm = self.charmap
+ last = cm[cm.index(last) & mask]
+ assert last in padset, "failed to generate valid padding char"
+ else:
+ # NOTE: this assumes ascii-compat encoding, and that
+ # all chars used by encoding are 7-bit ascii.
+ last = self._encode64(self._decode64(last) & mask)
+ assert last in padset, "failed to generate valid padding char"
+ if PY3:
+ last = bytes([last])
+ return True, source[:-1] + last
+
+ def repair_unused(self, source):
+ return self.check_repair_unused(source)[1]
+
+ ##def transcode(self, source, other):
+ ## return ''.join(
+ ## other.charmap[self.charmap.index(char)]
+ ## for char in source
+ ## )
+
+ ##def random_encoded_bytes(self, size, random=None, unicode=False):
+ ## "return random encoded string of given size"
+ ## data = getrandstr(random or rng,
+ ## self.charmap if unicode else self.bytemap, size)
+ ## return self.repair_unused(data)
+
+ #===================================================================
+ # transposed encoding/decoding
+ #===================================================================
+ def encode_transposed_bytes(self, source, offsets):
+ """encode byte string, first transposing source using offset list"""
+ if not isinstance(source, bytes):
+ raise TypeError("source must be bytes, not %s" % (type(source),))
+ tmp = join_byte_elems(source[off] for off in offsets)
+ return self.encode_bytes(tmp)
+
+ def decode_transposed_bytes(self, source, offsets):
+ """decode byte string, then reverse transposition described by offset list"""
+ # NOTE: if transposition does not use all bytes of source,
+ # the original can't be recovered... and join_byte_elems() will throw
+ # an error because 1+ values in <buf> will be None.
+ tmp = self.decode_bytes(source)
+ buf = [None] * len(offsets)
+ for off, char in zip(offsets, tmp):
+ buf[off] = char
+ return join_byte_elems(buf)
+
+ #===================================================================
+ # integer decoding helpers - mainly used by des_crypt family
+ #===================================================================
+ def _decode_int(self, source, bits):
+ """decode base64 string -> integer
+
+ :arg source: base64 string to decode.
+ :arg bits: number of bits in resulting integer.
+
+ :raises ValueError:
+ * if the string contains invalid base64 characters.
+ * if the string is not long enough - it must be at least
+ ``int(ceil(bits/6))`` in length.
+
+ :returns:
+ a integer in the range ``0 <= n < 2**bits``
+ """
+ if not isinstance(source, bytes):
+ raise TypeError("source must be bytes, not %s" % (type(source),))
+ big = self.big
+ pad = -bits % 6
+ chars = (bits+pad)/6
+ if len(source) != chars:
+ raise ValueError("source must be %d chars" % (chars,))
+ decode = self._decode64
+ out = 0
+ try:
+ for c in source if big else reversed(source):
+ out = (out<<6) + decode(c)
+ except KeyError:
+ raise ValueError("invalid character in string: %r" % (c,))
+ if pad:
+ # strip padding bits
+ if big:
+ out >>= pad
+ else:
+ out &= (1<<bits)-1
+ return out
+
+ #---------------------------------------------------------------
+ # optimized versions for common integer sizes
+ #---------------------------------------------------------------
+
+ def decode_int6(self, source):
+ """decode single character -> 6 bit integer"""
+ if not isinstance(source, bytes):
+ raise TypeError("source must be bytes, not %s" % (type(source),))
+ if len(source) != 1:
+ raise ValueError("source must be exactly 1 byte")
+ if PY3:
+ # convert to 8bit int before doing lookup
+ source = source[0]
+ try:
+ return self._decode64(source)
+ except KeyError:
+ raise ValueError("invalid character")
+
+ def decode_int12(self, source):
+ """decodes 2 char string -> 12-bit integer"""
+ if not isinstance(source, bytes):
+ raise TypeError("source must be bytes, not %s" % (type(source),))
+ if len(source) != 2:
+ raise ValueError("source must be exactly 2 bytes")
+ decode = self._decode64
+ try:
+ if self.big:
+ return decode(source[1]) + (decode(source[0])<<6)
+ else:
+ return decode(source[0]) + (decode(source[1])<<6)
+ except KeyError:
+ raise ValueError("invalid character")
+
+ def decode_int24(self, source):
+ """decodes 4 char string -> 24-bit integer"""
+ if not isinstance(source, bytes):
+ raise TypeError("source must be bytes, not %s" % (type(source),))
+ if len(source) != 4:
+ raise ValueError("source must be exactly 4 bytes")
+ decode = self._decode64
+ try:
+ if self.big:
+ return decode(source[3]) + (decode(source[2])<<6)+ \
+ (decode(source[1])<<12) + (decode(source[0])<<18)
+ else:
+ return decode(source[0]) + (decode(source[1])<<6)+ \
+ (decode(source[2])<<12) + (decode(source[3])<<18)
+ except KeyError:
+ raise ValueError("invalid character")
+
+ def decode_int30(self, source):
+ """decode 5 char string -> 30 bit integer"""
+ return self._decode_int(source, 30)
+
+ def decode_int64(self, source):
+ """decode 11 char base64 string -> 64-bit integer
+
+ this format is used primarily by des-crypt & variants to encode
+ the DES output value used as a checksum.
+ """
+ return self._decode_int(source, 64)
+
+ #===================================================================
+ # integer encoding helpers - mainly used by des_crypt family
+ #===================================================================
+ def _encode_int(self, value, bits):
+ """encode integer into base64 format
+
+ :arg value: non-negative integer to encode
+ :arg bits: number of bits to encode
+
+ :returns:
+ a string of length ``int(ceil(bits/6.0))``.
+ """
+ assert value >= 0, "caller did not sanitize input"
+ pad = -bits % 6
+ bits += pad
+ if self.big:
+ itr = irange(bits-6, -6, -6)
+ # shift to add lsb padding.
+ value <<= pad
+ else:
+ itr = irange(0, bits, 6)
+ # padding is msb, so no change needed.
+ return join_byte_elems(imap(self._encode64,
+ ((value>>off) & 0x3f for off in itr)))
+
+ #---------------------------------------------------------------
+ # optimized versions for common integer sizes
+ #---------------------------------------------------------------
+
+ def encode_int6(self, value):
+ """encodes 6-bit integer -> single hash64 character"""
+ if value < 0 or value > 63:
+ raise ValueError("value out of range")
+ if PY3:
+ return self.bytemap[value:value+1]
+ else:
+ return self._encode64(value)
+
+ def encode_int12(self, value):
+ """encodes 12-bit integer -> 2 char string"""
+ if value < 0 or value > 0xFFF:
+ raise ValueError("value out of range")
+ raw = [value & 0x3f, (value>>6) & 0x3f]
+ if self.big:
+ raw = reversed(raw)
+ return join_byte_elems(imap(self._encode64, raw))
+
+ def encode_int24(self, value):
+ """encodes 24-bit integer -> 4 char string"""
+ if value < 0 or value > 0xFFFFFF:
+ raise ValueError("value out of range")
+ raw = [value & 0x3f, (value>>6) & 0x3f,
+ (value>>12) & 0x3f, (value>>18) & 0x3f]
+ if self.big:
+ raw = reversed(raw)
+ return join_byte_elems(imap(self._encode64, raw))
+
+ def encode_int30(self, value):
+ """decode 5 char string -> 30 bit integer"""
+ if value < 0 or value > 0x3fffffff:
+ raise ValueError("value out of range")
+ return self._encode_int(value, 30)
+
+ def encode_int64(self, value):
+ """encode 64-bit integer -> 11 char hash64 string
+
+ this format is used primarily by des-crypt & variants to encode
+ the DES output value used as a checksum.
+ """
+ if value < 0 or value > 0xffffffffffffffff:
+ raise ValueError("value out of range")
+ return self._encode_int(value, 64)
+
+ #===================================================================
+ # eof
+ #===================================================================
+
+class LazyBase64Engine(Base64Engine):
+ """Base64Engine which delays initialization until it's accessed"""
+ _lazy_opts = None
+
+ def __init__(self, *args, **kwds):
+ self._lazy_opts = (args, kwds)
+
+ def _lazy_init(self):
+ args, kwds = self._lazy_opts
+ super(LazyBase64Engine, self).__init__(*args, **kwds)
+ del self._lazy_opts
+ self.__class__ = Base64Engine
+
+ def __getattribute__(self, attr):
+ if not attr.startswith("_"):
+ self._lazy_init()
+ return object.__getattribute__(self, attr)
+
+#-------------------------------------------------------------
+# common variants
+#-------------------------------------------------------------
+
+h64 = LazyBase64Engine(HASH64_CHARS)
+h64big = LazyBase64Engine(HASH64_CHARS, big=True)
+bcrypt64 = LazyBase64Engine(BCRYPT_CHARS, big=True)
+
+#=============================================================================
+# eof
+#=============================================================================
diff --git a/passlib/utils/handlers.py b/passlib/utils/handlers.py
index 876edec..0c103e1 100644
--- a/passlib/utils/handlers.py
+++ b/passlib/utils/handlers.py
@@ -18,10 +18,15 @@ from passlib.ifc import PasswordHash
from passlib.registry import get_crypt_handler
from passlib.utils import (
consteq, getrandstr, getrandbytes,
- BASE64_CHARS, HASH64_CHARS, rng, to_native_str,
+ rng, to_native_str,
is_crypt_handler, to_unicode,
MAX_PASSWORD_SIZE, accepts_keyword, as_bool,
update_mixin_classes)
+from passlib.utils.binary import (
+ BASE64_CHARS, HASH64_CHARS, PADDED_BASE64_CHARS,
+ HEX_CHARS, UPPER_HEX_CHARS, LOWER_HEX_CHARS,
+ ALL_BYTE_VALUES,
+)
from passlib.utils.compat import join_byte_values, irange, u, native_string_types, \
uascii_to_str, join_unicode, unicode, str_to_uascii, \
join_unicode, unicode_or_bytes_types, PY2, int_types
@@ -47,23 +52,15 @@ __all__ = [
# other helpers
'PrefixWrapper',
+
+ # TODO: a bunch of other things are commonly assumed in this namespace
+ # (e.g. HEX_CHARS etc); need to audit uses and update this list.
]
#=============================================================================
# constants
#=============================================================================
-# common salt_chars & checksum_chars values
-# (BASE64_CHARS, HASH64_CHARS imported above)
-PADDED_BASE64_CHARS = BASE64_CHARS + u("=")
-HEX_CHARS = u("0123456789abcdefABCDEF")
-UPPER_HEX_CHARS = u("0123456789ABCDEF")
-LOWER_HEX_CHARS = u("0123456789abcdef")
-
-# special byte string containing all possible byte values
-# XXX: treated as singleton by some of the code for efficiency.
-ALL_BYTE_VALUES = join_byte_values(irange(256))
-
# deprecated aliases - will be removed after passlib 1.8
H64_CHARS = HASH64_CHARS
B64_CHARS = BASE64_CHARS
@@ -821,7 +818,7 @@ class GenericHandler(MinimalHandler):
if value is None:
return None
if isinstance(value, bytes):
- from passlib.utils import ab64_encode
+ from passlib.utils.binary import ab64_encode
value = ab64_encode(value).decode("ascii")
elif not isinstance(value, unicode):
value = unicode(value)