Adds verification tests for int2bytes and bytes2int

* There is a bug in the older int2bytes implementation. I've raised an issue on bitbucket for that already. #11 The pkcs1 file verification test fails if the behavior for int2bytes is corrected.
author: Yesudeep Mangalapilly <yesudeep@gmail.com> 2011-08-12 13:06:51 +0530
committer: Yesudeep Mangalapilly <yesudeep@gmail.com> 2011-08-12 13:06:51 +0530
commit: b7341b658d6d2285676ce0b3c287383ad69a293e (patch)
tree: 2610f797f057feedec88d13d9ed7d16efe5c4232 /rsa
parent: a5673890cec3d7518726b15d4cecc17c0b55aa4a (diff)
download: rsa-b7341b658d6d2285676ce0b3c287383ad69a293e.tar.gz
2 files changed, 130 insertions, 48 deletions
diff --git a/rsa/_compat.py b/rsa/_compat.py
index 162f47e..b01fd3e 100644
--- a/rsa/_compat.py
+++ b/rsa/_compat.py
@@ -19,9 +19,31 @@
 
 from __future__ import absolute_import
 
+import sys
 from struct import pack
 
 try:
+    MAX_INT = sys.maxsize
+except AttributeError:
+    MAX_INT = sys.maxint
+
+MAX_INT64 = (1 << 63) - 1
+MAX_INT32 = (1 << 31) - 1
+MAX_INT16 = (1 << 15) - 1
+
+# Determine the word size of the processor.
+if MAX_INT == MAX_INT64:
+    # 64-bit processor.
+    MACHINE_WORD_SIZE = 64
+elif MAX_INT == MAX_INT32:
+    # 32-bit processor.
+    MACHINE_WORD_SIZE = 32
+else:
+    # Else we just assume 64-bit processor keeping up with modern times.
+    MACHINE_WORD_SIZE = 64
+
+
+try:
     # < Python3
     unicode_type = unicode
     have_python3 = False
@@ -97,3 +119,42 @@ def byte(num):
         A single byte.
     """
     return pack("B", num)
+
+
+def get_machine_alignment(num, force_arch=64,
+                          _machine_word_size=MACHINE_WORD_SIZE):
+    """
+    Returns alignment details for the given number based on the platform
+    Python is running on.
+
+    :param num:
+        Unsigned integral number.
+    :param force_arch:
+        If you don't want to use 64-bit unsigned chunks, set this to
+        anything other than 64. 32-bit chunks will be preferred then.
+        Default 64 will be used when on a 64-bit machine.
+    :param _machine_word_size:
+        (Internal) The machine word size used for alignment.
+    :returns:
+        4-tuple::
+
+            (word_bits, word_bytes,
+             max_uint, packing_format_type)
+    """
+    max_uint64 = 0xffffffffffffffff
+    max_uint32 = 0xffffffff
+    max_uint16 = 0xffff
+    max_uint8 = 0xff
+
+    if force_arch == 64 and _machine_word_size >= 64 and num > max_uint32:
+        # 64-bit unsigned integer.
+        return 64, 8, max_uint64, "Q"
+    elif num > max_uint16:
+        # 32-bit unsigned integer
+        return 32, 4, max_uint32, "L"
+    elif num > max_uint8:
+        # 16-bit unsigned integer.
+        return 16, 2, max_uint16, "H"
+    else:
+        # 8-bit unsigned integer.
+        return 8, 1, max_uint8, "B"
diff --git a/rsa/transform.py b/rsa/transform.py
index 8d3a461..5318dbf 100644
--- a/rsa/transform.py
+++ b/rsa/transform.py
@@ -21,11 +21,11 @@ From bytes to a number, number to bytes, etc.
 
 from __future__ import absolute_import
 
-import types
 import binascii
 from struct import pack
 from rsa import common
-from rsa._compat import is_integer, b, byte
+from rsa._compat import is_integer, b, byte, get_machine_alignment
+
 
 ZERO_BYTE = b('\x00')
 
@@ -45,8 +45,8 @@ def bytes2int(raw_bytes):
     return int(binascii.hexlify(raw_bytes), 16)
 
 
-def old_int2bytes(number, block_size=0):
-    r'''Converts a number to a string of bytes.
+def _int2bytes(number, block_size=0):
+    """Converts a number to a string of bytes.
 
     @param number: the number to convert
     @param block_size: the number of bytes to output. If the number encoded to
@@ -56,24 +56,22 @@ def old_int2bytes(number, block_size=0):
     @throws OverflowError when block_size is given and the number takes up more
         bytes than fit into the block.
 
-
-    >>> old_int2bytes(123456789)
+    >>> _int2bytes(123456789)
     b'\x07[\xcd\x15'
     >>> bytes2int(int2bytes(123456789))
     123456789
 
-    >>> old_int2bytes(123456789, 6)
+    >>> _int2bytes(123456789, 6)
     b'\x00\x00\x07[\xcd\x15'
     >>> bytes2int(int2bytes(123456789, 128))
     123456789
 
-    >>> old_int2bytes(123456789, 3)
+    >>> _int2bytes(123456789, 3)
     Traceback (most recent call last):
     ...
     OverflowError: Needed 4 bytes for number, but block size is 3
 
-    '''
-
+    """
     # Type checking
     if not is_integer(number):
         raise TypeError("You must pass an integer for 'number', not %s" %
@@ -104,70 +102,93 @@ def old_int2bytes(number, block_size=0):
     return padding + b('').join(raw_bytes)
 
 
-def int2bytes(number, block_size=None):
-    """Converts a number to a string of bytes.
-
-    @param number: the number to convert
-    @param block_size: the number of bytes to output. If the number encoded to
-        bytes is less than this, the block will be zero-padded. When not given,
-        the returned block is not padded.
 
-    @throws OverflowError when block_size is given and the number takes up more
+def int2bytes(number, chunk_size=0,
+                     _zero_byte=ZERO_BYTE,
+                     _get_machine_alignment=get_machine_alignment):
+    """
+    Convert a integer to bytes (base-256 representation)::
+
+        int2bytes(n:int, chunk_size:int) : string
+
+    .. WARNING:
+        Does not preserve leading zeros if you don't specify a chunk size.
+
+    Usage::
+    
+        >>> int2bytes(123456789)
+        b'\x07[\xcd\x15'
+        >>> bytes2int(int2bytes(123456789))
+        123456789
+
+        >>> int2bytes(123456789, 6)
+        b'\x00\x00\x07[\xcd\x15'
+        >>> bytes2int(int2bytes(123456789, 128))
+        123456789
+
+        >>> int2bytes(123456789, 3)
+        Traceback (most recent call last):
+        ...
+        OverflowError: Need 4 bytes for number, but block size is 3
+
+    :param number:
+        Integer value
+    :param chunk_size:
+        If optional chunk size is given and greater than zero, pad the front of
+        the byte string with binary zeros so that the length is a multiple of
+        ``chunk_size``. Raises an OverflowError if the chunk_size is not
+        sufficient to represent the integer.
+    :returns:
+        Raw bytes (base-256 representation).
+    :raises:
+        ``OverflowError`` when block_size is given and the number takes up more
         bytes than fit into the block.
-
-
-    >>> int2bytes(123456789)
-    '\x07[\xcd\x15'
-    >>> bytes2int(int2bytes(123456789))
-    123456789
-
-    >>> int2bytes(123456789, 6)
-    '\x00\x00\x07[\xcd\x15'
-    >>> bytes2int(int2bytes(123456789, 128))
-    123456789
-
-    >>> int2bytes(123456789, 3)
-    Traceback (most recent call last):
-    ...
-    OverflowError: Need 4 bytes for number, but block size is 3
     """
-    # Type checking
-    if not is_integer(number):
-        raise TypeError("You must pass an integer for 'number', not %s" %
-            type(number).__name__)
+    # Machine word-aligned implementation.
+    # ~19x faster than naive implementation on 32-bit processors.
+    # ~33x faster than naive implementation on 64-bit processors.
+    # ~50x faster on 64-bit pypy 1.5
+
+    # Don't need to raise TypeError ourselves. The code does that already
+    # if a bad type is passed in as argument.
 
     if number < 0:
         raise ValueError('Number must be unsigned integer: %d' % number)
 
     raw_bytes = b('')
     if not number:
-        raw_bytes = ZERO_BYTE
+        # 0 == '\x00'
+        raw_bytes = _zero_byte
 
+    # Align packing to machine word size.
     num = number
+    word_bits, word_bytes, max_uint, pack_type = _get_machine_alignment(num)
+    pack_format = ">" + pack_type
     while num > 0:
-        raw_bytes = pack('>I', num & 0xffffffff) + raw_bytes
-        num >>= 32
+        raw_bytes = pack(pack_format, num & max_uint) + raw_bytes
+        num >>= word_bits
 
     # Count the number of zero prefix bytes.
     zero_leading = 0
     for zero_leading, x in enumerate(raw_bytes):
-        if x != ZERO_BYTE[0]:
+        if x != _zero_byte[0]:
             break
 
-    if block_size is not None and block_size > 0:
+    if chunk_size > 0:
         # Bounds checking. We're not doing this up-front because the
         # most common use case is not specifying a chunk size. In the worst
         # case, the number will already have been converted to bytes above.
+        #length = count * word_bytes
         length = len(raw_bytes)
         bytes_needed = length - zero_leading
-        if bytes_needed > block_size:
+        if bytes_needed > chunk_size:
             raise OverflowError(
-                "Need %d bytes for number, but block size is %d" %
-                (bytes_needed, block_size)
+                "Need %d bytes for number, but chunk size is %d" %
+                (bytes_needed, chunk_size)
             )
-        remainder = length % block_size
+        remainder = length % chunk_size
         if remainder:
-            raw_bytes = (block_size - remainder) * ZERO_BYTE + raw_bytes
+            raw_bytes = (chunk_size - remainder) * _zero_byte + raw_bytes
     else:
         raw_bytes = raw_bytes[zero_leading:]
     return raw_bytes
author	Yesudeep Mangalapilly <yesudeep@gmail.com>	2011-08-12 13:06:51 +0530
committer	Yesudeep Mangalapilly <yesudeep@gmail.com>	2011-08-12 13:06:51 +0530
commit	b7341b658d6d2285676ce0b3c287383ad69a293e (patch)
tree	2610f797f057feedec88d13d9ed7d16efe5c4232 /rsa
parent	a5673890cec3d7518726b15d4cecc17c0b55aa4a (diff)
download	rsa-b7341b658d6d2285676ce0b3c287383ad69a293e.tar.gz