summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSybren A. St?vel <sybren@stuvel.eu>2011-07-31 20:47:49 +0200
committerSybren A. St?vel <sybren@stuvel.eu>2011-07-31 20:47:49 +0200
commit3ad13edad1f0da543f4e7b38aff62f7c91b58052 (patch)
tree864f5a2722340061553407cdbcb33972cf44d7bc
parent5e7aa172647a6571abd28bb174b7a1f62e2f81f5 (diff)
downloadrsa-3ad13edad1f0da543f4e7b38aff62f7c91b58052.tar.gz
Made hashing efficient for large files
-rw-r--r--rsa/bigfile.py83
-rw-r--r--rsa/pkcs1.py30
-rw-r--r--rsa/varblock.py (renamed from rsa/blocks.py)54
-rw-r--r--tests/test_bigfile.py37
-rw-r--r--tests/test_varblock.py (renamed from tests/test_blocks.py)45
5 files changed, 155 insertions, 94 deletions
diff --git a/rsa/bigfile.py b/rsa/bigfile.py
new file mode 100644
index 0000000..02c0b53
--- /dev/null
+++ b/rsa/bigfile.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2011 Sybren A. Stüvel <sybren@stuvel.eu>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''Large file support
+
+ - break a file into smaller blocks, and encrypt them, and store the
+ encrypted blocks in another file.
+
+ - take such an encrypted files, decrypt its blocks, and reconstruct the
+ original file.
+
+The encrypted file format is as follows, where || denotes byte concatenation:
+
+ FILE := VERSION || BLOCK || BLOCK ...
+
+ BLOCK := LENGTH || DATA
+
+ LENGTH := varint-encoded length of the subsequent data. Varint comes from
+ Google Protobuf, and encodes an integer into a variable number of bytes.
+ Each byte uses the 7 lowest bits to encode the value. The highest bit set
+ to 1 indicates the next byte is also part of the varint. The last byte will
+ have this bit set to 0.
+
+This file format is called the VARBLOCK format, in line with the varint format
+used to denote the block sizes.
+
+'''
+
+from rsa import key, common, pkcs1, varblock
+
+def encrypt_bigfile(infile, outfile, pub_key):
+ '''Encrypts a file, writing it to 'outfile' in VARBLOCK format.
+
+ :param infile: file-like object to read the cleartext from
+ :param outfile: file-like object to write the crypto in VARBLOCK format to
+ :param pub_key: :py:class:`rsa.PublicKey` to encrypt with
+
+ '''
+
+ if not isinstance(pub_key, key.PublicKey):
+ raise TypeError('Public key required, but got %r' % pub_key)
+
+ key_bytes = common.bit_size(pub_key.n) // 8
+ blocksize = key_bytes - 11 # keep space for PKCS#1 padding
+
+ # Write the version number to the VARBLOCK file
+ outfile.write(chr(varblock.VARBLOCK_VERSION))
+
+ # Encrypt and write each block
+ for block in varblock.yield_fixedblocks(infile, blocksize):
+ crypto = pkcs1.encrypt(block, pub_key)
+
+ varblock.write_varint(outfile, len(crypto))
+ outfile.write(crypto)
+
+def decrypt_bigfile(infile, outfile, priv_key):
+ '''Decrypts an encrypted VARBLOCK file, writing it to 'outfile'
+
+ :param infile: file-like object to read the crypto in VARBLOCK format from
+ :param outfile: file-like object to write the cleartext to
+ :param priv_key: :py:class:`rsa.PrivateKey` to decrypt with
+
+ '''
+
+ if not isinstance(priv_key, key.PrivateKey):
+ raise TypeError('Private key required, but got %r' % priv_key)
+
+ for block in varblock.yield_varblocks(infile):
+ cleartext = pkcs1.decrypt(block, priv_key)
+ outfile.write(cleartext)
+
diff --git a/rsa/pkcs1.py b/rsa/pkcs1.py
index 7612b27..b81629e 100644
--- a/rsa/pkcs1.py
+++ b/rsa/pkcs1.py
@@ -31,7 +31,7 @@ SUCH INFORMATION to your users.
import hashlib
import os
-from rsa import common, transform, core
+from rsa import common, transform, core, varblock
# ASN.1 codes that describe the hash algorithm used.
HASH_ASN1 = {
@@ -224,7 +224,9 @@ def sign(message, priv_key, hash):
Hashes the message, then signs the hash with the given key. This is known
as a "detached signature", because the message itself isn't altered.
- :param message: the message to sign
+ :param message: the message to sign. Can be an 8-bit string or a file-like
+ object. If ``message`` has a ``read()`` method, it is assumed to be a
+ file-like object.
:param priv_key: the :py:class:`rsa.PrivateKey` to sign with
:param hash: the hash method used on the message. Use 'MD5', 'SHA-1',
'SHA-256', 'SHA-384' or 'SHA-512'.
@@ -258,7 +260,9 @@ def verify(message, signature, pub_key):
The hash method is detected automatically from the signature.
- :param message: the signed message
+ :param message: the signed message. Can be an 8-bit string or a file-like
+ object. If ``message`` has a ``read()`` method, it is assumed to be a
+ file-like object.
:param signature: the signature block, as created with ``sign(...)``.
:param pub_key: the :py:class:`rsa.PublicKey` of the person signing the message.
:raise VerificationError: when the signature doesn't match the message.
@@ -289,14 +293,30 @@ def verify(message, signature, pub_key):
raise VerificationError('Verification failed')
def _hash(message, method_name):
- '''Returns the message digest.'''
+ '''Returns the message digest.
+
+ :param message: the signed message. Can be an 8-bit string or a file-like
+ object. If ``message`` has a ``read()`` method, it is assumed to be a
+ file-like object.
+ :param method_name: the hash method, must be a key of
+ :py:const:`HASH_METHODS`.
+
+ '''
if method_name not in HASH_METHODS:
raise ValueError('Invalid hash method: %s' % method_name)
method = HASH_METHODS[method_name]
hasher = method()
- hasher.update(message)
+
+ if hasattr(message, 'read') and hasattr(message.read, '__call__'):
+ # read as 1K blocks
+ for block in varblock.yield_fixedblocks(message, 1024):
+ hasher.update(block)
+ else:
+ # hash the message object itself.
+ hasher.update(message)
+
return hasher.digest()
diff --git a/rsa/blocks.py b/rsa/varblock.py
index fed247e..b8bd899 100644
--- a/rsa/blocks.py
+++ b/rsa/varblock.py
@@ -13,15 +13,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-'''Large file support
+'''VARBLOCK file support
- - break a file into smaller blocks, and encrypt them, and store the
- encrypted blocks in another file.
-
- - take such an encrypted files, decrypt its blocks, and reconstruct the
- original file.
-
-The encrypted file format is as follows, where || denotes byte concatenation:
+The VARBLOCK file format is as follows, where || denotes byte concatenation:
FILE := VERSION || BLOCK || BLOCK ...
@@ -38,8 +32,6 @@ used to denote the block sizes.
'''
-from rsa import key, common, pkcs1
-
VARBLOCK_VERSION = 1
def read_varint(infile):
@@ -155,45 +147,3 @@ def yield_fixedblocks(infile, blocksize):
if read_bytes < blocksize:
break
-
-def encrypt_bigfile(infile, outfile, pub_key):
- '''Encrypts a file, writing it to 'outfile' in VARBLOCK format.
-
- :param infile: file-like object to read the cleartext from
- :param outfile: file-like object to write the crypto in VARBLOCK format to
- :param pub_key: :py:class:`rsa.PublicKey` to encrypt with
-
- '''
-
- if not isinstance(pub_key, key.PublicKey):
- raise TypeError('Public key required, but got %r' % pub_key)
-
- key_bytes = common.bit_size(pub_key.n) // 8
- blocksize = key_bytes - 11 # keep space for PKCS#1 padding
-
- # Write the version number to the VARBLOCK file
- outfile.write(chr(VARBLOCK_VERSION))
-
- # Encrypt and write each block
- for block in yield_fixedblocks(infile, blocksize):
- crypto = pkcs1.encrypt(block, pub_key)
-
- write_varint(outfile, len(crypto))
- outfile.write(crypto)
-
-def decrypt_bigfile(infile, outfile, priv_key):
- '''Decrypts an encrypted VARBLOCK file, writing it to 'outfile'
-
- :param infile: file-like object to read the crypto in VARBLOCK format from
- :param outfile: file-like object to write the cleartext to
- :param priv_key: :py:class:`rsa.PrivateKey` to decrypt with
-
- '''
-
- if not isinstance(priv_key, key.PrivateKey):
- raise TypeError('Private key required, but got %r' % priv_key)
-
- for block in yield_varblocks(infile):
- cleartext = pkcs1.decrypt(block, priv_key)
- outfile.write(cleartext)
-
diff --git a/tests/test_bigfile.py b/tests/test_bigfile.py
new file mode 100644
index 0000000..ffca5b0
--- /dev/null
+++ b/tests/test_bigfile.py
@@ -0,0 +1,37 @@
+'''Tests block operations.'''
+
+from StringIO import StringIO
+import unittest
+
+import rsa
+from rsa import bigfile, varblock
+
+class BigfileTest(unittest.TestCase):
+
+ def test_encrypt_decrypt_bigfile(self):
+
+ # Expected block size + 11 bytes padding
+ pub_key, priv_key = rsa.newkeys((6 + 11) * 8)
+
+ # Encrypt the file
+ message = '123456Sybren'
+ infile = StringIO(message)
+ outfile = StringIO()
+
+ bigfile.encrypt_bigfile(infile, outfile, pub_key)
+
+ # Test
+ crypto = outfile.getvalue()
+
+ cryptfile = StringIO(crypto)
+ clearfile = StringIO()
+
+ bigfile.decrypt_bigfile(cryptfile, clearfile, priv_key)
+ self.assertEquals(clearfile.getvalue(), message)
+
+ # We have 2x6 bytes in the message, so that should result in two
+ # bigfile.
+ cryptfile.seek(0)
+ varblocks = list(varblock.yield_varblocks(cryptfile))
+ self.assertEqual(2, len(varblocks))
+
diff --git a/tests/test_blocks.py b/tests/test_varblock.py
index 22d6500..d8addb4 100644
--- a/tests/test_blocks.py
+++ b/tests/test_varblock.py
@@ -1,10 +1,10 @@
-'''Tests block operations.'''
+'''Tests varblock operations.'''
from StringIO import StringIO
import unittest
import rsa
-from rsa import blocks
+from rsa import varblock
class VarintTest(unittest.TestCase):
@@ -13,7 +13,7 @@ class VarintTest(unittest.TestCase):
encoded = '\xac\x02crummy'
infile = StringIO(encoded)
- (decoded, read) = blocks.read_varint(infile)
+ (decoded, read) = varblock.read_varint(infile)
# Test the returned values
self.assertEqual(300, decoded)
@@ -27,7 +27,7 @@ class VarintTest(unittest.TestCase):
encoded = '\x00crummy'
infile = StringIO(encoded)
- (decoded, read) = blocks.read_varint(infile)
+ (decoded, read) = varblock.read_varint(infile)
# Test the returned values
self.assertEqual(0, decoded)
@@ -41,7 +41,7 @@ class VarintTest(unittest.TestCase):
expected = '\xac\x02'
outfile = StringIO()
- written = blocks.write_varint(outfile, 300)
+ written = varblock.write_varint(outfile, 300)
# Test the returned values
self.assertEqual(expected, outfile.getvalue())
@@ -51,7 +51,7 @@ class VarintTest(unittest.TestCase):
def test_write_zero(self):
outfile = StringIO()
- written = blocks.write_varint(outfile, 0)
+ written = varblock.write_varint(outfile, 0)
# Test the returned values
self.assertEqual('\x00', outfile.getvalue())
@@ -63,7 +63,7 @@ class VarblockTest(unittest.TestCase):
def test_yield_varblock(self):
infile = StringIO('\x01\x0512345\x06Sybren')
- varblocks = list(blocks.yield_varblocks(infile))
+ varblocks = list(varblock.yield_varblocks(infile))
self.assertEqual(['12345', 'Sybren'], varblocks)
class FixedblockTest(unittest.TestCase):
@@ -72,35 +72,6 @@ class FixedblockTest(unittest.TestCase):
infile = StringIO('123456Sybren')
- fixedblocks = list(blocks.yield_fixedblocks(infile, 6))
+ fixedblocks = list(varblock.yield_fixedblocks(infile, 6))
self.assertEqual(['123456', 'Sybren'], fixedblocks)
-class BigfileTest(unittest.TestCase):
-
- def test_encrypt_decrypt_bigfile(self):
-
- # Expected block size + 11 bytes padding
- pub_key, priv_key = rsa.newkeys((6 + 11) * 8)
-
- # Encrypt the file
- message = '123456Sybren'
- infile = StringIO(message)
- outfile = StringIO()
-
- blocks.encrypt_bigfile(infile, outfile, pub_key)
-
- # Test
- crypto = outfile.getvalue()
-
- cryptfile = StringIO(crypto)
- clearfile = StringIO()
-
- blocks.decrypt_bigfile(cryptfile, clearfile, priv_key)
- self.assertEquals(clearfile.getvalue(), message)
-
- # We have 2x6 bytes in the message, so that should result in two
- # blocks.
- cryptfile.seek(0)
- varblocks = list(blocks.yield_varblocks(cryptfile))
- self.assertEqual(2, len(varblocks))
-