From b7341b658d6d2285676ce0b3c287383ad69a293e Mon Sep 17 00:00:00 2001 From: Yesudeep Mangalapilly Date: Fri, 12 Aug 2011 13:06:51 +0530 Subject: Adds verification tests for int2bytes and bytes2int * There is a bug in the older int2bytes implementation. I've raised an issue on bitbucket for that already. #11 The pkcs1 file verification test fails if the behavior for int2bytes is corrected. --- rsa/_compat.py | 61 +++++++++++++++++++++++++ rsa/transform.py | 117 ++++++++++++++++++++++++++++-------------------- speed.sh | 11 ++--- tests/test_transform.py | 55 ++++++++++++++++++----- 4 files changed, 181 insertions(+), 63 deletions(-) diff --git a/rsa/_compat.py b/rsa/_compat.py index 162f47e..b01fd3e 100644 --- a/rsa/_compat.py +++ b/rsa/_compat.py @@ -19,8 +19,30 @@ from __future__ import absolute_import +import sys from struct import pack +try: + MAX_INT = sys.maxsize +except AttributeError: + MAX_INT = sys.maxint + +MAX_INT64 = (1 << 63) - 1 +MAX_INT32 = (1 << 31) - 1 +MAX_INT16 = (1 << 15) - 1 + +# Determine the word size of the processor. +if MAX_INT == MAX_INT64: + # 64-bit processor. + MACHINE_WORD_SIZE = 64 +elif MAX_INT == MAX_INT32: + # 32-bit processor. + MACHINE_WORD_SIZE = 32 +else: + # Else we just assume 64-bit processor keeping up with modern times. + MACHINE_WORD_SIZE = 64 + + try: # < Python3 unicode_type = unicode @@ -97,3 +119,42 @@ def byte(num): A single byte. """ return pack("B", num) + + +def get_machine_alignment(num, force_arch=64, + _machine_word_size=MACHINE_WORD_SIZE): + """ + Returns alignment details for the given number based on the platform + Python is running on. + + :param num: + Unsigned integral number. + :param force_arch: + If you don't want to use 64-bit unsigned chunks, set this to + anything other than 64. 32-bit chunks will be preferred then. + Default 64 will be used when on a 64-bit machine. + :param _machine_word_size: + (Internal) The machine word size used for alignment. + :returns: + 4-tuple:: + + (word_bits, word_bytes, + max_uint, packing_format_type) + """ + max_uint64 = 0xffffffffffffffff + max_uint32 = 0xffffffff + max_uint16 = 0xffff + max_uint8 = 0xff + + if force_arch == 64 and _machine_word_size >= 64 and num > max_uint32: + # 64-bit unsigned integer. + return 64, 8, max_uint64, "Q" + elif num > max_uint16: + # 32-bit unsigned integer + return 32, 4, max_uint32, "L" + elif num > max_uint8: + # 16-bit unsigned integer. + return 16, 2, max_uint16, "H" + else: + # 8-bit unsigned integer. + return 8, 1, max_uint8, "B" diff --git a/rsa/transform.py b/rsa/transform.py index 8d3a461..5318dbf 100644 --- a/rsa/transform.py +++ b/rsa/transform.py @@ -21,11 +21,11 @@ From bytes to a number, number to bytes, etc. from __future__ import absolute_import -import types import binascii from struct import pack from rsa import common -from rsa._compat import is_integer, b, byte +from rsa._compat import is_integer, b, byte, get_machine_alignment + ZERO_BYTE = b('\x00') @@ -45,8 +45,8 @@ def bytes2int(raw_bytes): return int(binascii.hexlify(raw_bytes), 16) -def old_int2bytes(number, block_size=0): - r'''Converts a number to a string of bytes. +def _int2bytes(number, block_size=0): + """Converts a number to a string of bytes. @param number: the number to convert @param block_size: the number of bytes to output. If the number encoded to @@ -56,24 +56,22 @@ def old_int2bytes(number, block_size=0): @throws OverflowError when block_size is given and the number takes up more bytes than fit into the block. - - >>> old_int2bytes(123456789) + >>> _int2bytes(123456789) b'\x07[\xcd\x15' >>> bytes2int(int2bytes(123456789)) 123456789 - >>> old_int2bytes(123456789, 6) + >>> _int2bytes(123456789, 6) b'\x00\x00\x07[\xcd\x15' >>> bytes2int(int2bytes(123456789, 128)) 123456789 - >>> old_int2bytes(123456789, 3) + >>> _int2bytes(123456789, 3) Traceback (most recent call last): ... OverflowError: Needed 4 bytes for number, but block size is 3 - ''' - + """ # Type checking if not is_integer(number): raise TypeError("You must pass an integer for 'number', not %s" % @@ -104,70 +102,93 @@ def old_int2bytes(number, block_size=0): return padding + b('').join(raw_bytes) -def int2bytes(number, block_size=None): - """Converts a number to a string of bytes. - - @param number: the number to convert - @param block_size: the number of bytes to output. If the number encoded to - bytes is less than this, the block will be zero-padded. When not given, - the returned block is not padded. - @throws OverflowError when block_size is given and the number takes up more +def int2bytes(number, chunk_size=0, + _zero_byte=ZERO_BYTE, + _get_machine_alignment=get_machine_alignment): + """ + Convert a integer to bytes (base-256 representation):: + + int2bytes(n:int, chunk_size:int) : string + + .. WARNING: + Does not preserve leading zeros if you don't specify a chunk size. + + Usage:: + + >>> int2bytes(123456789) + b'\x07[\xcd\x15' + >>> bytes2int(int2bytes(123456789)) + 123456789 + + >>> int2bytes(123456789, 6) + b'\x00\x00\x07[\xcd\x15' + >>> bytes2int(int2bytes(123456789, 128)) + 123456789 + + >>> int2bytes(123456789, 3) + Traceback (most recent call last): + ... + OverflowError: Need 4 bytes for number, but block size is 3 + + :param number: + Integer value + :param chunk_size: + If optional chunk size is given and greater than zero, pad the front of + the byte string with binary zeros so that the length is a multiple of + ``chunk_size``. Raises an OverflowError if the chunk_size is not + sufficient to represent the integer. + :returns: + Raw bytes (base-256 representation). + :raises: + ``OverflowError`` when block_size is given and the number takes up more bytes than fit into the block. - - - >>> int2bytes(123456789) - '\x07[\xcd\x15' - >>> bytes2int(int2bytes(123456789)) - 123456789 - - >>> int2bytes(123456789, 6) - '\x00\x00\x07[\xcd\x15' - >>> bytes2int(int2bytes(123456789, 128)) - 123456789 - - >>> int2bytes(123456789, 3) - Traceback (most recent call last): - ... - OverflowError: Need 4 bytes for number, but block size is 3 """ - # Type checking - if not is_integer(number): - raise TypeError("You must pass an integer for 'number', not %s" % - type(number).__name__) + # Machine word-aligned implementation. + # ~19x faster than naive implementation on 32-bit processors. + # ~33x faster than naive implementation on 64-bit processors. + # ~50x faster on 64-bit pypy 1.5 + + # Don't need to raise TypeError ourselves. The code does that already + # if a bad type is passed in as argument. if number < 0: raise ValueError('Number must be unsigned integer: %d' % number) raw_bytes = b('') if not number: - raw_bytes = ZERO_BYTE + # 0 == '\x00' + raw_bytes = _zero_byte + # Align packing to machine word size. num = number + word_bits, word_bytes, max_uint, pack_type = _get_machine_alignment(num) + pack_format = ">" + pack_type while num > 0: - raw_bytes = pack('>I', num & 0xffffffff) + raw_bytes - num >>= 32 + raw_bytes = pack(pack_format, num & max_uint) + raw_bytes + num >>= word_bits # Count the number of zero prefix bytes. zero_leading = 0 for zero_leading, x in enumerate(raw_bytes): - if x != ZERO_BYTE[0]: + if x != _zero_byte[0]: break - if block_size is not None and block_size > 0: + if chunk_size > 0: # Bounds checking. We're not doing this up-front because the # most common use case is not specifying a chunk size. In the worst # case, the number will already have been converted to bytes above. + #length = count * word_bytes length = len(raw_bytes) bytes_needed = length - zero_leading - if bytes_needed > block_size: + if bytes_needed > chunk_size: raise OverflowError( - "Need %d bytes for number, but block size is %d" % - (bytes_needed, block_size) + "Need %d bytes for number, but chunk size is %d" % + (bytes_needed, chunk_size) ) - remainder = length % block_size + remainder = length % chunk_size if remainder: - raw_bytes = (block_size - remainder) * ZERO_BYTE + raw_bytes + raw_bytes = (chunk_size - remainder) * _zero_byte + raw_bytes else: raw_bytes = raw_bytes[zero_leading:] return raw_bytes diff --git a/speed.sh b/speed.sh index 17f1ea4..a9a1431 100755 --- a/speed.sh +++ b/speed.sh @@ -3,13 +3,14 @@ echo "int2bytes speed test" echo "python2.5" python2.5 -mtimeit -s'from rsa.transform import int2bytes; n = 1<<4096' 'int2bytes(n)' -python2.5 -mtimeit -s'from rsa.transform import old_int2bytes; n = 1<<4096' 'old_int2bytes(n)' +python2.5 -mtimeit -s'from rsa.transform import _int2bytes; n = 1<<4096' '_int2bytes(n)' echo "python2.6" -python2.6 -mtimeit -s'from rsa.transform import int2bytes; n = 1<<4096' 'int2bytes(n)' -python2.6 -mtimeit -s'from rsa.transform import old_int2bytes; n = 1<<4096' 'old_int2bytes(n)' +python2.6 -mtimeit -s'from rsa.transform import int2bytes; n = 1<<4096' 'int2bytes(n, 516)' +python2.6 -mtimeit -s'from rsa.transform import _int2bytes; n = 1<<4096' '_int2bytes(n, 516)' echo "python2.7" python2.7 -mtimeit -s'from rsa.transform import int2bytes; n = 1<<4096' 'int2bytes(n)' -python2.7 -mtimeit -s'from rsa.transform import old_int2bytes; n = 1<<4096' 'old_int2bytes(n)' +python2.7 -mtimeit -s'from rsa.transform import _int2bytes; n = 1<<4096' '_int2bytes(n)' echo "python3.2" python3 -mtimeit -s'from rsa.transform import int2bytes; n = 1<<4096' 'int2bytes(n)' -python3 -mtimeit -s'from rsa.transform import old_int2bytes; n = 1<<4096' 'old_int2bytes(n)' +python3 -mtimeit -s'from rsa.transform import _int2bytes; n = 1<<4096' '_int2bytes(n)' + diff --git a/tests/test_transform.py b/tests/test_transform.py index 9bd3c6d..ffd9ec8 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -3,30 +3,65 @@ import unittest2 from rsa._compat import b -from rsa.transform import int2bytes, old_int2bytes +from rsa.transform import int2bytes, bytes2int, _int2bytes -class Test_integer_to_bytes(unittest2.TestCase): +class Test_int2bytes(unittest2.TestCase): + def test_accuracy(self): + self.assertEqual(int2bytes(123456789), b('\x07[\xcd\x15')) + self.assertEqual(_int2bytes(123456789), b('\x07[\xcd\x15')) + + def test_codec_identity(self): + self.assertEqual(bytes2int(int2bytes(123456789, 128)), 123456789) + self.assertEqual(bytes2int(_int2bytes(123456789, 128)), 123456789) + def test_chunk_size(self): - self.assertEqual(int2bytes(123456789, 6), - b('\x00\x00\x07[\xcd\x15')) + self.assertEqual(int2bytes(123456789, 6), b('\x00\x00\x07[\xcd\x15')) self.assertEqual(int2bytes(123456789, 7), b('\x00\x00\x00\x07[\xcd\x15')) - self.assertEqual(old_int2bytes(123456789, 6), + + self.assertEqual(_int2bytes(123456789, 6), b('\x00\x00\x07[\xcd\x15')) - self.assertEqual(old_int2bytes(123456789, 7), + self.assertEqual(_int2bytes(123456789, 7), b('\x00\x00\x00\x07[\xcd\x15')) + def test_zero(self): + self.assertEqual(int2bytes(0, 4), b('\x00') * 4) + self.assertEqual(int2bytes(0, 7), b('\x00') * 7) + self.assertEqual(int2bytes(0), b('\x00')) + + self.assertEqual(_int2bytes(0, 4), b('\x00') * 4) + self.assertEqual(_int2bytes(0, 7), b('\x00') * 7) + self.assertEqual(_int2bytes(0), b('\x00')) + + def test_correctness_against_base_implementation(self): + # Slow test. + values = [ + 1 << 512, + 1 << 8192, + 1 << 77, + ] + for value in values: + self.assertEqual(int2bytes(value), _int2bytes(value), + "Boom %d" % value) + self.assertEqual(bytes2int(int2bytes(value)), + value, + "Boom %d" % value) + self.assertEqual(bytes2int(_int2bytes(value)), + value, + "Boom %d" % value) + def test_raises_OverflowError_when_chunk_size_is_insufficient(self): self.assertRaises(OverflowError, int2bytes, 123456789, 3) self.assertRaises(OverflowError, int2bytes, 299999999999, 4) - self.assertRaises(OverflowError, old_int2bytes, 123456789, 3) - self.assertRaises(OverflowError, old_int2bytes, 299999999999, 4) + + self.assertRaises(OverflowError, _int2bytes, 123456789, 3) + self.assertRaises(OverflowError, _int2bytes, 299999999999, 4) def test_raises_ValueError_when_negative_integer(self): self.assertRaises(ValueError, int2bytes, -1) - self.assertRaises(ValueError, old_int2bytes, -1) + self.assertRaises(ValueError, _int2bytes, -1) def test_raises_TypeError_when_not_integer(self): self.assertRaises(TypeError, int2bytes, None) - self.assertRaises(TypeError, old_int2bytes, None) + self.assertRaises(TypeError, _int2bytes, None) -- cgit v1.2.1