From b7341b658d6d2285676ce0b3c287383ad69a293e Mon Sep 17 00:00:00 2001
From: Yesudeep Mangalapilly <yesudeep@gmail.com>
Date: Fri, 12 Aug 2011 13:06:51 +0530
Subject: Adds verification tests for int2bytes and bytes2int

* There is a bug in the older int2bytes implementation.
  I've raised an issue on bitbucket for that already.
  #11

 The pkcs1 file verification test fails if the behavior for
 int2bytes is corrected.
---
 rsa/_compat.py          |  61 +++++++++++++++++++++++++
 rsa/transform.py        | 117 ++++++++++++++++++++++++++++--------------------
 speed.sh                |  11 ++---
 tests/test_transform.py |  55 ++++++++++++++++++-----
 4 files changed, 181 insertions(+), 63 deletions(-)

diff --git a/rsa/_compat.py b/rsa/_compat.py
index 162f47e..b01fd3e 100644
--- a/rsa/_compat.py
+++ b/rsa/_compat.py
@@ -19,8 +19,30 @@
 
 from __future__ import absolute_import
 
+import sys
 from struct import pack
 
+try:
+    MAX_INT = sys.maxsize
+except AttributeError:
+    MAX_INT = sys.maxint
+
+MAX_INT64 = (1 << 63) - 1
+MAX_INT32 = (1 << 31) - 1
+MAX_INT16 = (1 << 15) - 1
+
+# Determine the word size of the processor.
+if MAX_INT == MAX_INT64:
+    # 64-bit processor.
+    MACHINE_WORD_SIZE = 64
+elif MAX_INT == MAX_INT32:
+    # 32-bit processor.
+    MACHINE_WORD_SIZE = 32
+else:
+    # Else we just assume 64-bit processor keeping up with modern times.
+    MACHINE_WORD_SIZE = 64
+
+
 try:
     # < Python3
     unicode_type = unicode
@@ -97,3 +119,42 @@ def byte(num):
         A single byte.
     """
     return pack("B", num)
+
+
+def get_machine_alignment(num, force_arch=64,
+                          _machine_word_size=MACHINE_WORD_SIZE):
+    """
+    Returns alignment details for the given number based on the platform
+    Python is running on.
+
+    :param num:
+        Unsigned integral number.
+    :param force_arch:
+        If you don't want to use 64-bit unsigned chunks, set this to
+        anything other than 64. 32-bit chunks will be preferred then.
+        Default 64 will be used when on a 64-bit machine.
+    :param _machine_word_size:
+        (Internal) The machine word size used for alignment.
+    :returns:
+        4-tuple::
+
+            (word_bits, word_bytes,
+             max_uint, packing_format_type)
+    """
+    max_uint64 = 0xffffffffffffffff
+    max_uint32 = 0xffffffff
+    max_uint16 = 0xffff
+    max_uint8 = 0xff
+
+    if force_arch == 64 and _machine_word_size >= 64 and num > max_uint32:
+        # 64-bit unsigned integer.
+        return 64, 8, max_uint64, "Q"
+    elif num > max_uint16:
+        # 32-bit unsigned integer
+        return 32, 4, max_uint32, "L"
+    elif num > max_uint8:
+        # 16-bit unsigned integer.
+        return 16, 2, max_uint16, "H"
+    else:
+        # 8-bit unsigned integer.
+        return 8, 1, max_uint8, "B"
diff --git a/rsa/transform.py b/rsa/transform.py
index 8d3a461..5318dbf 100644
--- a/rsa/transform.py
+++ b/rsa/transform.py
@@ -21,11 +21,11 @@ From bytes to a number, number to bytes, etc.
 
 from __future__ import absolute_import
 
-import types
 import binascii
 from struct import pack
 from rsa import common
-from rsa._compat import is_integer, b, byte
+from rsa._compat import is_integer, b, byte, get_machine_alignment
+
 
 ZERO_BYTE = b('\x00')
 
@@ -45,8 +45,8 @@ def bytes2int(raw_bytes):
     return int(binascii.hexlify(raw_bytes), 16)
 
 
-def old_int2bytes(number, block_size=0):
-    r'''Converts a number to a string of bytes.
+def _int2bytes(number, block_size=0):
+    """Converts a number to a string of bytes.
 
     @param number: the number to convert
     @param block_size: the number of bytes to output. If the number encoded to
@@ -56,24 +56,22 @@ def old_int2bytes(number, block_size=0):
     @throws OverflowError when block_size is given and the number takes up more
         bytes than fit into the block.
 
-
-    >>> old_int2bytes(123456789)
+    >>> _int2bytes(123456789)
     b'\x07[\xcd\x15'
     >>> bytes2int(int2bytes(123456789))
     123456789
 
-    >>> old_int2bytes(123456789, 6)
+    >>> _int2bytes(123456789, 6)
     b'\x00\x00\x07[\xcd\x15'
     >>> bytes2int(int2bytes(123456789, 128))
     123456789
 
-    >>> old_int2bytes(123456789, 3)
+    >>> _int2bytes(123456789, 3)
     Traceback (most recent call last):
     ...
     OverflowError: Needed 4 bytes for number, but block size is 3
 
-    '''
-
+    """
     # Type checking
     if not is_integer(number):
         raise TypeError("You must pass an integer for 'number', not %s" %
@@ -104,70 +102,93 @@ def old_int2bytes(number, block_size=0):
     return padding + b('').join(raw_bytes)
 
 
-def int2bytes(number, block_size=None):
-    """Converts a number to a string of bytes.
-
-    @param number: the number to convert
-    @param block_size: the number of bytes to output. If the number encoded to
-        bytes is less than this, the block will be zero-padded. When not given,
-        the returned block is not padded.
 
-    @throws OverflowError when block_size is given and the number takes up more
+def int2bytes(number, chunk_size=0,
+                     _zero_byte=ZERO_BYTE,
+                     _get_machine_alignment=get_machine_alignment):
+    """
+    Convert a integer to bytes (base-256 representation)::
+
+        int2bytes(n:int, chunk_size:int) : string
+
+    .. WARNING:
+        Does not preserve leading zeros if you don't specify a chunk size.
+
+    Usage::
+    
+        >>> int2bytes(123456789)
+        b'\x07[\xcd\x15'
+        >>> bytes2int(int2bytes(123456789))
+        123456789
+
+        >>> int2bytes(123456789, 6)
+        b'\x00\x00\x07[\xcd\x15'
+        >>> bytes2int(int2bytes(123456789, 128))
+        123456789
+
+        >>> int2bytes(123456789, 3)
+        Traceback (most recent call last):
+        ...
+        OverflowError: Need 4 bytes for number, but block size is 3
+
+    :param number:
+        Integer value
+    :param chunk_size:
+        If optional chunk size is given and greater than zero, pad the front of
+        the byte string with binary zeros so that the length is a multiple of
+        ``chunk_size``. Raises an OverflowError if the chunk_size is not
+        sufficient to represent the integer.
+    :returns:
+        Raw bytes (base-256 representation).
+    :raises:
+        ``OverflowError`` when block_size is given and the number takes up more
         bytes than fit into the block.
-
-
-    >>> int2bytes(123456789)
-    '\x07[\xcd\x15'
-    >>> bytes2int(int2bytes(123456789))
-    123456789
-
-    >>> int2bytes(123456789, 6)
-    '\x00\x00\x07[\xcd\x15'
-    >>> bytes2int(int2bytes(123456789, 128))
-    123456789
-
-    >>> int2bytes(123456789, 3)
-    Traceback (most recent call last):
-    ...
-    OverflowError: Need 4 bytes for number, but block size is 3
     """
-    # Type checking
-    if not is_integer(number):
-        raise TypeError("You must pass an integer for 'number', not %s" %
-            type(number).__name__)
+    # Machine word-aligned implementation.
+    # ~19x faster than naive implementation on 32-bit processors.
+    # ~33x faster than naive implementation on 64-bit processors.
+    # ~50x faster on 64-bit pypy 1.5
+
+    # Don't need to raise TypeError ourselves. The code does that already
+    # if a bad type is passed in as argument.
 
     if number < 0:
         raise ValueError('Number must be unsigned integer: %d' % number)
 
     raw_bytes = b('')
     if not number:
-        raw_bytes = ZERO_BYTE
+        # 0 == '\x00'
+        raw_bytes = _zero_byte
 
+    # Align packing to machine word size.
     num = number
+    word_bits, word_bytes, max_uint, pack_type = _get_machine_alignment(num)
+    pack_format = ">" + pack_type
     while num > 0:
-        raw_bytes = pack('>I', num & 0xffffffff) + raw_bytes
-        num >>= 32
+        raw_bytes = pack(pack_format, num & max_uint) + raw_bytes
+        num >>= word_bits
 
     # Count the number of zero prefix bytes.
     zero_leading = 0
     for zero_leading, x in enumerate(raw_bytes):
-        if x != ZERO_BYTE[0]:
+        if x != _zero_byte[0]:
             break
 
-    if block_size is not None and block_size > 0:
+    if chunk_size > 0:
         # Bounds checking. We're not doing this up-front because the
         # most common use case is not specifying a chunk size. In the worst
         # case, the number will already have been converted to bytes above.
+        #length = count * word_bytes
         length = len(raw_bytes)
         bytes_needed = length - zero_leading
-        if bytes_needed > block_size:
+        if bytes_needed > chunk_size:
             raise OverflowError(
-                "Need %d bytes for number, but block size is %d" %
-                (bytes_needed, block_size)
+                "Need %d bytes for number, but chunk size is %d" %
+                (bytes_needed, chunk_size)
             )
-        remainder = length % block_size
+        remainder = length % chunk_size
         if remainder:
-            raw_bytes = (block_size - remainder) * ZERO_BYTE + raw_bytes
+            raw_bytes = (chunk_size - remainder) * _zero_byte + raw_bytes
     else:
         raw_bytes = raw_bytes[zero_leading:]
     return raw_bytes
diff --git a/speed.sh b/speed.sh
index 17f1ea4..a9a1431 100755
--- a/speed.sh
+++ b/speed.sh
@@ -3,13 +3,14 @@
 echo "int2bytes speed test"
 echo "python2.5"
 python2.5 -mtimeit -s'from rsa.transform import int2bytes; n = 1<<4096' 'int2bytes(n)'
-python2.5 -mtimeit -s'from rsa.transform import old_int2bytes; n = 1<<4096' 'old_int2bytes(n)'
+python2.5 -mtimeit -s'from rsa.transform import _int2bytes; n = 1<<4096' '_int2bytes(n)'
 echo "python2.6"
-python2.6 -mtimeit -s'from rsa.transform import int2bytes; n = 1<<4096' 'int2bytes(n)'
-python2.6 -mtimeit -s'from rsa.transform import old_int2bytes; n = 1<<4096' 'old_int2bytes(n)'
+python2.6 -mtimeit -s'from rsa.transform import int2bytes; n = 1<<4096' 'int2bytes(n, 516)'
+python2.6 -mtimeit -s'from rsa.transform import _int2bytes; n = 1<<4096' '_int2bytes(n, 516)'
 echo "python2.7"
 python2.7 -mtimeit -s'from rsa.transform import int2bytes; n = 1<<4096' 'int2bytes(n)'
-python2.7 -mtimeit -s'from rsa.transform import old_int2bytes; n = 1<<4096' 'old_int2bytes(n)'
+python2.7 -mtimeit -s'from rsa.transform import _int2bytes; n = 1<<4096' '_int2bytes(n)'
 echo "python3.2"
 python3 -mtimeit -s'from rsa.transform import int2bytes; n = 1<<4096' 'int2bytes(n)'
-python3 -mtimeit -s'from rsa.transform import old_int2bytes; n = 1<<4096' 'old_int2bytes(n)'
+python3 -mtimeit -s'from rsa.transform import _int2bytes; n = 1<<4096' '_int2bytes(n)'
+
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 9bd3c6d..ffd9ec8 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -3,30 +3,65 @@
 
 import unittest2
 from rsa._compat import b
-from rsa.transform import int2bytes, old_int2bytes
+from rsa.transform import int2bytes, bytes2int, _int2bytes
 
 
-class Test_integer_to_bytes(unittest2.TestCase):
+class Test_int2bytes(unittest2.TestCase):
+    def test_accuracy(self):
+        self.assertEqual(int2bytes(123456789), b('\x07[\xcd\x15'))
+        self.assertEqual(_int2bytes(123456789), b('\x07[\xcd\x15'))
+
+    def test_codec_identity(self):
+        self.assertEqual(bytes2int(int2bytes(123456789, 128)), 123456789)
+        self.assertEqual(bytes2int(_int2bytes(123456789, 128)), 123456789)
+
     def test_chunk_size(self):
-        self.assertEqual(int2bytes(123456789, 6),
-                         b('\x00\x00\x07[\xcd\x15'))
+        self.assertEqual(int2bytes(123456789, 6), b('\x00\x00\x07[\xcd\x15'))
         self.assertEqual(int2bytes(123456789, 7),
                          b('\x00\x00\x00\x07[\xcd\x15'))
-        self.assertEqual(old_int2bytes(123456789, 6),
+
+        self.assertEqual(_int2bytes(123456789, 6),
                          b('\x00\x00\x07[\xcd\x15'))
-        self.assertEqual(old_int2bytes(123456789, 7),
+        self.assertEqual(_int2bytes(123456789, 7),
                          b('\x00\x00\x00\x07[\xcd\x15'))
 
+    def test_zero(self):
+        self.assertEqual(int2bytes(0, 4), b('\x00') * 4)
+        self.assertEqual(int2bytes(0, 7), b('\x00') * 7)
+        self.assertEqual(int2bytes(0), b('\x00'))
+
+        self.assertEqual(_int2bytes(0, 4), b('\x00') * 4)
+        self.assertEqual(_int2bytes(0, 7), b('\x00') * 7)
+        self.assertEqual(_int2bytes(0), b('\x00'))
+
+    def test_correctness_against_base_implementation(self):
+        # Slow test.
+        values = [
+            1 << 512,
+            1 << 8192,
+            1 << 77,
+        ]
+        for value in values:
+            self.assertEqual(int2bytes(value), _int2bytes(value),
+                             "Boom %d" % value)
+            self.assertEqual(bytes2int(int2bytes(value)),
+                             value,
+                             "Boom %d" % value)
+            self.assertEqual(bytes2int(_int2bytes(value)),
+                             value,
+                             "Boom %d" % value)
+
     def test_raises_OverflowError_when_chunk_size_is_insufficient(self):
         self.assertRaises(OverflowError, int2bytes, 123456789, 3)
         self.assertRaises(OverflowError, int2bytes, 299999999999, 4)
-        self.assertRaises(OverflowError, old_int2bytes, 123456789, 3)
-        self.assertRaises(OverflowError, old_int2bytes, 299999999999, 4)
+
+        self.assertRaises(OverflowError, _int2bytes, 123456789, 3)
+        self.assertRaises(OverflowError, _int2bytes, 299999999999, 4)
 
     def test_raises_ValueError_when_negative_integer(self):
         self.assertRaises(ValueError, int2bytes, -1)
-        self.assertRaises(ValueError, old_int2bytes, -1)
+        self.assertRaises(ValueError, _int2bytes, -1)
 
     def test_raises_TypeError_when_not_integer(self):
         self.assertRaises(TypeError, int2bytes, None)
-        self.assertRaises(TypeError, old_int2bytes, None)
+        self.assertRaises(TypeError, _int2bytes, None)
-- 
cgit v1.2.1