cleanup: Move modules to "lib/Crypto" subdirectory.

This will avoid the previous situation where scripts like the old "test.py" get included accidentally in a release. It also frees us to put additional build scripts in the top-level directory of the source tree.
author: Dwayne C. Litzenberger <dlitz@dlitz.net> 2009-02-28 13:14:53 -0500
committer: Dwayne C. Litzenberger <dlitz@dlitz.net> 2009-02-28 13:14:53 -0500
commit: ff8a657a8dd688551c59b4bbf7be33510992ee46 (patch)
tree: fee3a96bc95fdfda34c18c2714a75105a713ad50 /lib/Crypto/Protocol
parent: d1c4875e1f220652fe7ff8358f56dee3b2aba31b (diff)
download: pycrypto-ff8a657a8dd688551c59b4bbf7be33510992ee46.tar.gz
3 files changed, 541 insertions, 0 deletions
diff --git a/lib/Crypto/Protocol/AllOrNothing.py b/lib/Crypto/Protocol/AllOrNothing.py
new file mode 100644
index 0000000..795d602
--- /dev/null
+++ b/lib/Crypto/Protocol/AllOrNothing.py
@@ -0,0 +1,295 @@
+"""This file implements all-or-nothing package transformations.
+
+An all-or-nothing package transformation is one in which some text is
+transformed into message blocks, such that all blocks must be obtained before
+the reverse transformation can be applied.  Thus, if any blocks are corrupted
+or lost, the original message cannot be reproduced.
+
+An all-or-nothing package transformation is not encryption, although a block
+cipher algorithm is used.  The encryption key is randomly generated and is
+extractable from the message blocks.
+
+This class implements the All-Or-Nothing package transformation algorithm
+described in:
+
+Ronald L. Rivest.  "All-Or-Nothing Encryption and The Package Transform"
+http://theory.lcs.mit.edu/~rivest/fusion.pdf
+
+"""
+
+__revision__ = "$Id$"
+
+import operator
+import string
+from Crypto.Util.number import bytes_to_long, long_to_bytes
+
+
+
+class AllOrNothing:
+    """Class implementing the All-or-Nothing package transform.
+
+    Methods for subclassing:
+
+        _inventkey(key_size):
+            Returns a randomly generated key.  Subclasses can use this to
+            implement better random key generating algorithms.  The default
+            algorithm is probably not very cryptographically secure.
+
+    """
+
+    def __init__(self, ciphermodule, mode=None, IV=None):
+        """AllOrNothing(ciphermodule, mode=None, IV=None)
+
+        ciphermodule is a module implementing the cipher algorithm to
+        use.  It must provide the PEP272 interface.
+
+        Note that the encryption key is randomly generated
+        automatically when needed.  Optional arguments mode and IV are
+        passed directly through to the ciphermodule.new() method; they
+        are the feedback mode and initialization vector to use.  All
+        three arguments must be the same for the object used to create
+        the digest, and to undigest'ify the message blocks.
+        """
+
+        self.__ciphermodule = ciphermodule
+        self.__mode = mode
+        self.__IV = IV
+        self.__key_size = ciphermodule.key_size
+        if self.__key_size == 0:
+            self.__key_size = 16
+
+    __K0digit = chr(0x69)
+
+    def digest(self, text):
+        """digest(text:string) : [string]
+
+        Perform the All-or-Nothing package transform on the given
+        string.  Output is a list of message blocks describing the
+        transformed text, where each block is a string of bit length equal
+        to the ciphermodule's block_size.
+        """
+
+        # generate a random session key and K0, the key used to encrypt the
+        # hash blocks.  Rivest calls this a fixed, publically-known encryption
+        # key, but says nothing about the security implications of this key or
+        # how to choose it.
+        key = self._inventkey(self.__key_size)
+        K0 = self.__K0digit * self.__key_size
+
+        # we need two cipher objects here, one that is used to encrypt the
+        # message blocks and one that is used to encrypt the hashes.  The
+        # former uses the randomly generated key, while the latter uses the
+        # well-known key.
+        mcipher = self.__newcipher(key)
+        hcipher = self.__newcipher(K0)
+
+        # Pad the text so that its length is a multiple of the cipher's
+        # block_size.  Pad with trailing spaces, which will be eliminated in
+        # the undigest() step.
+        block_size = self.__ciphermodule.block_size
+        padbytes = block_size - (len(text) % block_size)
+        text = text + ' ' * padbytes
+
+        # Run through the algorithm:
+        # s: number of message blocks (size of text / block_size)
+        # input sequence: m1, m2, ... ms
+        # random key K' (`key' in the code)
+        # Compute output sequence: m'1, m'2, ... m's' for s' = s + 1
+        # Let m'i = mi ^ E(K', i) for i = 1, 2, 3, ..., s
+        # Let m's' = K' ^ h1 ^ h2 ^ ... hs
+        # where hi = E(K0, m'i ^ i) for i = 1, 2, ... s
+        #
+        # The one complication I add is that the last message block is hard
+        # coded to the number of padbytes added, so that these can be stripped
+        # during the undigest() step
+        s = len(text) / block_size
+        blocks = []
+        hashes = []
+        for i in range(1, s+1):
+            start = (i-1) * block_size
+            end = start + block_size
+            mi = text[start:end]
+            assert len(mi) == block_size
+            cipherblock = mcipher.encrypt(long_to_bytes(i, block_size))
+            mticki = bytes_to_long(mi) ^ bytes_to_long(cipherblock)
+            blocks.append(mticki)
+            # calculate the hash block for this block
+            hi = hcipher.encrypt(long_to_bytes(mticki ^ i, block_size))
+            hashes.append(bytes_to_long(hi))
+
+        # Add the padbytes length as a message block
+        i = i + 1
+        cipherblock = mcipher.encrypt(long_to_bytes(i, block_size))
+        mticki = padbytes ^ bytes_to_long(cipherblock)
+        blocks.append(mticki)
+
+        # calculate this block's hash
+        hi = hcipher.encrypt(long_to_bytes(mticki ^ i, block_size))
+        hashes.append(bytes_to_long(hi))
+
+        # Now calculate the last message block of the sequence 1..s'.  This
+        # will contain the random session key XOR'd with all the hash blocks,
+        # so that for undigest(), once all the hash blocks are calculated, the
+        # session key can be trivially extracted.  Calculating all the hash
+        # blocks requires that all the message blocks be received, thus the
+        # All-or-Nothing algorithm succeeds.
+        mtick_stick = bytes_to_long(key) ^ reduce(operator.xor, hashes)
+        blocks.append(mtick_stick)
+
+        # we convert the blocks to strings since in Python, byte sequences are
+        # always represented as strings.  This is more consistent with the
+        # model that encryption and hash algorithms always operate on strings.
+        return [long_to_bytes(i,self.__ciphermodule.block_size) for i in blocks]
+
+
+    def undigest(self, blocks):
+        """undigest(blocks : [string]) : string
+
+        Perform the reverse package transformation on a list of message
+        blocks.  Note that the ciphermodule used for both transformations
+        must be the same.  blocks is a list of strings of bit length
+        equal to the ciphermodule's block_size.
+        """
+
+        # better have at least 2 blocks, for the padbytes package and the hash
+        # block accumulator
+        if len(blocks) < 2:
+            raise ValueError, "List must be at least length 2."
+
+        # blocks is a list of strings.  We need to deal with them as long
+        # integers
+        blocks = map(bytes_to_long, blocks)
+
+        # Calculate the well-known key, to which the hash blocks are
+        # encrypted, and create the hash cipher.
+        K0 = self.__K0digit * self.__key_size
+        hcipher = self.__newcipher(K0)
+
+        # Since we have all the blocks (or this method would have been called
+        # prematurely), we can calcualte all the hash blocks.
+        hashes = []
+        for i in range(1, len(blocks)):
+            mticki = blocks[i-1] ^ i
+            hi = hcipher.encrypt(long_to_bytes(mticki))
+            hashes.append(bytes_to_long(hi))
+
+        # now we can calculate K' (key).  remember the last block contains
+        # m's' which we don't include here
+        key = blocks[-1] ^ reduce(operator.xor, hashes)
+
+        # and now we can create the cipher object
+        mcipher = self.__newcipher(long_to_bytes(key))
+        block_size = self.__ciphermodule.block_size
+
+        # And we can now decode the original message blocks
+        parts = []
+        for i in range(1, len(blocks)):
+            cipherblock = mcipher.encrypt(long_to_bytes(i, block_size))
+            mi = blocks[i-1] ^ bytes_to_long(cipherblock)
+            parts.append(mi)
+
+        # The last message block contains the number of pad bytes appended to
+        # the original text string, such that its length was an even multiple
+        # of the cipher's block_size.  This number should be small enough that
+        # the conversion from long integer to integer should never overflow
+        padbytes = int(parts[-1])
+        text = string.join(map(long_to_bytes, parts[:-1]), '')
+        return text[:-padbytes]
+
+    def _inventkey(self, key_size):
+        # TBD: Not a very secure algorithm.  Eventually, I'd like to use JHy's
+        # kernelrand module
+        import time
+        from Crypto.Util import randpool
+        # TBD: key_size * 2 to work around possible bug in RandomPool?
+        pool = randpool.RandomPool(key_size * 2)
+        while key_size > pool.entropy:
+            pool.add_event()
+
+        # we now have enough entropy in the pool to get a key_size'd key
+        return pool.get_bytes(key_size)
+
+    def __newcipher(self, key):
+        if self.__mode is None and self.__IV is None:
+            return self.__ciphermodule.new(key)
+        elif self.__IV is None:
+            return self.__ciphermodule.new(key, self.__mode)
+        else:
+            return self.__ciphermodule.new(key, self.__mode, self.__IV)
+
+
+
+if __name__ == '__main__':
+    import sys
+    import getopt
+    import base64
+
+    usagemsg = '''\
+Test module usage: %(program)s [-c cipher] [-l] [-h]
+
+Where:
+    --cipher module
+    -c module
+        Cipher module to use.  Default: %(ciphermodule)s
+
+    --aslong
+    -l
+        Print the encoded message blocks as long integers instead of base64
+        encoded strings
+
+    --help
+    -h
+        Print this help message
+'''
+
+    ciphermodule = 'AES'
+    aslong = 0
+
+    def usage(code, msg=None):
+        if msg:
+            print msg
+        print usagemsg % {'program': sys.argv[0],
+                          'ciphermodule': ciphermodule}
+        sys.exit(code)
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],
+                                   'c:l', ['cipher=', 'aslong'])
+    except getopt.error, msg:
+        usage(1, msg)
+
+    if args:
+        usage(1, 'Too many arguments')
+
+    for opt, arg in opts:
+        if opt in ('-h', '--help'):
+            usage(0)
+        elif opt in ('-c', '--cipher'):
+            ciphermodule = arg
+        elif opt in ('-l', '--aslong'):
+            aslong = 1
+
+    # ugly hack to force __import__ to give us the end-path module
+    module = __import__('Crypto.Cipher.'+ciphermodule, None, None, ['new'])
+
+    a = AllOrNothing(module)
+    print 'Original text:\n=========='
+    print __doc__
+    print '=========='
+    msgblocks = a.digest(__doc__)
+    print 'message blocks:'
+    for i, blk in map(None, range(len(msgblocks)), msgblocks):
+        # base64 adds a trailing newline
+        print '    %3d' % i,
+        if aslong:
+            print bytes_to_long(blk)
+        else:
+            print base64.encodestring(blk)[:-1]
+    #
+    # get a new undigest-only object so there's no leakage
+    b = AllOrNothing(module)
+    text = b.undigest(msgblocks)
+    if text == __doc__:
+        print 'They match!'
+    else:
+        print 'They differ!'
diff --git a/lib/Crypto/Protocol/Chaffing.py b/lib/Crypto/Protocol/Chaffing.py
new file mode 100644
index 0000000..8da997f
--- /dev/null
+++ b/lib/Crypto/Protocol/Chaffing.py
@@ -0,0 +1,229 @@
+"""This file implements the chaffing algorithm.
+
+Winnowing and chaffing is a technique for enhancing privacy without requiring
+strong encryption.  In short, the technique takes a set of authenticated
+message blocks (the wheat) and adds a number of chaff blocks which have
+randomly chosen data and MAC fields.  This means that to an adversary, the
+chaff blocks look as valid as the wheat blocks, and so the authentication
+would have to be performed on every block.  By tailoring the number of chaff
+blocks added to the message, the sender can make breaking the message
+computationally infeasible.  There are many other interesting properties of
+the winnow/chaff technique.
+
+For example, say Alice is sending a message to Bob.  She packetizes the
+message and performs an all-or-nothing transformation on the packets.  Then
+she authenticates each packet with a message authentication code (MAC).  The
+MAC is a hash of the data packet, and there is a secret key which she must
+share with Bob (key distribution is an exercise left to the reader).  She then
+adds a serial number to each packet, and sends the packets to Bob.
+
+Bob receives the packets, and using the shared secret authentication key,
+authenticates the MACs for each packet.  Those packets that have bad MACs are
+simply discarded.  The remainder are sorted by serial number, and passed
+through the reverse all-or-nothing transform.  The transform means that an
+eavesdropper (say Eve) must acquire all the packets before any of the data can
+be read.  If even one packet is missing, the data is useless.
+
+There's one twist: by adding chaff packets, Alice and Bob can make Eve's job
+much harder, since Eve now has to break the shared secret key, or try every
+combination of wheat and chaff packet to read any of the message.  The cool
+thing is that Bob doesn't need to add any additional code; the chaff packets
+are already filtered out because their MACs don't match (in all likelihood --
+since the data and MACs for the chaff packets are randomly chosen it is
+possible, but very unlikely that a chaff MAC will match the chaff data).  And
+Alice need not even be the party adding the chaff!  She could be completely
+unaware that a third party, say Charles, is adding chaff packets to her
+messages as they are transmitted.
+
+For more information on winnowing and chaffing see this paper:
+
+Ronald L. Rivest, "Chaffing and Winnowing: Confidentiality without Encryption"
+http://theory.lcs.mit.edu/~rivest/chaffing.txt
+
+"""
+
+__revision__ = "$Id$"
+
+from Crypto.Util.number import bytes_to_long
+
+class Chaff:
+    """Class implementing the chaff adding algorithm.
+
+    Methods for subclasses:
+
+            _randnum(size):
+                Returns a randomly generated number with a byte-length equal
+                to size.  Subclasses can use this to implement better random
+                data and MAC generating algorithms.  The default algorithm is
+                probably not very cryptographically secure.  It is most
+                important that the chaff data does not contain any patterns
+                that can be used to discern it from wheat data without running
+                the MAC.
+
+    """
+
+    def __init__(self, factor=1.0, blocksper=1):
+        """Chaff(factor:float, blocksper:int)
+
+        factor is the number of message blocks to add chaff to,
+        expressed as a percentage between 0.0 and 1.0.  blocksper is
+        the number of chaff blocks to include for each block being
+        chaffed.  Thus the defaults add one chaff block to every
+        message block.  By changing the defaults, you can adjust how
+        computationally difficult it could be for an adversary to
+        brute-force crack the message.  The difficulty is expressed
+        as:
+
+            pow(blocksper, int(factor * number-of-blocks))
+
+        For ease of implementation, when factor < 1.0, only the first
+        int(factor*number-of-blocks) message blocks are chaffed.
+        """
+
+        if not (0.0<=factor<=1.0):
+            raise ValueError, "'factor' must be between 0.0 and 1.0"
+        if blocksper < 0:
+            raise ValueError, "'blocksper' must be zero or more"
+
+        self.__factor = factor
+        self.__blocksper = blocksper
+
+
+    def chaff(self, blocks):
+        """chaff( [(serial-number:int, data:string, MAC:string)] )
+        : [(int, string, string)]
+
+        Add chaff to message blocks.  blocks is a list of 3-tuples of the
+        form (serial-number, data, MAC).
+
+        Chaff is created by choosing a random number of the same
+        byte-length as data, and another random number of the same
+        byte-length as MAC.  The message block's serial number is
+        placed on the chaff block and all the packet's chaff blocks
+        are randomly interspersed with the single wheat block.  This
+        method then returns a list of 3-tuples of the same form.
+        Chaffed blocks will contain multiple instances of 3-tuples
+        with the same serial number, but the only way to figure out
+        which blocks are wheat and which are chaff is to perform the
+        MAC hash and compare values.
+        """
+
+        chaffedblocks = []
+
+        # count is the number of blocks to add chaff to.  blocksper is the
+        # number of chaff blocks to add per message block that is being
+        # chaffed.
+        count = len(blocks) * self.__factor
+        blocksper = range(self.__blocksper)
+        for i, wheat in map(None, range(len(blocks)), blocks):
+            # it shouldn't matter which of the n blocks we add chaff to, so for
+            # ease of implementation, we'll just add them to the first count
+            # blocks
+            if i < count:
+                serial, data, mac = wheat
+                datasize = len(data)
+                macsize = len(mac)
+                addwheat = 1
+                # add chaff to this block
+                for j in blocksper:
+                    import sys
+                    chaffdata = self._randnum(datasize)
+                    chaffmac = self._randnum(macsize)
+                    chaff = (serial, chaffdata, chaffmac)
+                    # mix up the order, if the 5th bit is on then put the
+                    # wheat on the list
+                    if addwheat and bytes_to_long(self._randnum(16)) & 0x40:
+                        chaffedblocks.append(wheat)
+                        addwheat = 0
+                    chaffedblocks.append(chaff)
+                if addwheat:
+                    chaffedblocks.append(wheat)
+            else:
+                # just add the wheat
+                chaffedblocks.append(wheat)
+        return chaffedblocks
+
+    def _randnum(self, size):
+        # TBD: Not a very secure algorithm.
+        # TBD: size * 2 to work around possible bug in RandomPool
+        from Crypto.Util import randpool
+        import time
+        pool = randpool.RandomPool(size * 2)
+        while size > pool.entropy:
+            pass
+
+        # we now have enough entropy in the pool to get size bytes of random
+        # data... well, probably
+        return pool.get_bytes(size)
+
+
+
+if __name__ == '__main__':
+    text = """\
+We hold these truths to be self-evident, that all men are created equal, that
+they are endowed by their Creator with certain unalienable Rights, that among
+these are Life, Liberty, and the pursuit of Happiness. That to secure these
+rights, Governments are instituted among Men, deriving their just powers from
+the consent of the governed. That whenever any Form of Government becomes
+destructive of these ends, it is the Right of the People to alter or to
+abolish it, and to institute new Government, laying its foundation on such
+principles and organizing its powers in such form, as to them shall seem most
+likely to effect their Safety and Happiness.
+"""
+    print 'Original text:\n=========='
+    print text
+    print '=========='
+
+    # first transform the text into packets
+    blocks = [] ; size = 40
+    for i in range(0, len(text), size):
+        blocks.append( text[i:i+size] )
+
+    # now get MACs for all the text blocks.  The key is obvious...
+    print 'Calculating MACs...'
+    from Crypto.Hash import HMAC, SHA
+    key = 'Jefferson'
+    macs = [HMAC.new(key, block, digestmod=SHA).digest()
+            for block in blocks]
+
+    assert len(blocks) == len(macs)
+
+    # put these into a form acceptable as input to the chaffing procedure
+    source = []
+    m = map(None, range(len(blocks)), blocks, macs)
+    print m
+    for i, data, mac in m:
+        source.append((i, data, mac))
+
+    # now chaff these
+    print 'Adding chaff...'
+    c = Chaff(factor=0.5, blocksper=2)
+    chaffed = c.chaff(source)
+
+    from base64 import encodestring
+
+    # print the chaffed message blocks.  meanwhile, separate the wheat from
+    # the chaff
+
+    wheat = []
+    print 'chaffed message blocks:'
+    for i, data, mac in chaffed:
+        # do the authentication
+        h = HMAC.new(key, data, digestmod=SHA)
+        pmac = h.digest()
+        if pmac == mac:
+            tag = '-->'
+            wheat.append(data)
+        else:
+            tag = '   '
+        # base64 adds a trailing newline
+        print tag, '%3d' % i, \
+              repr(data), encodestring(mac)[:-1]
+
+    # now decode the message packets and check it against the original text
+    print 'Undigesting wheat...'
+    newtext = "".join(wheat)
+    if newtext == text:
+        print 'They match!'
+    else:
+        print 'They differ!'
diff --git a/lib/Crypto/Protocol/__init__.py b/lib/Crypto/Protocol/__init__.py
new file mode 100644
index 0000000..516558a
--- /dev/null
+++ b/lib/Crypto/Protocol/__init__.py
@@ -0,0 +1,17 @@
+
+"""Cryptographic protocols
+
+Implements various cryptographic protocols.  (Don't expect to find
+network protocols here.)
+
+Crypto.Protocol.AllOrNothing   Transforms a message into a set of message
+                               blocks, such that the blocks can be
+                               recombined to get the message back.
+
+Crypto.Protocol.Chaffing       Takes a set of authenticated message blocks
+                               (the wheat) and adds a number of
+                               randomly generated blocks (the chaff).
+"""
+
+__all__ = ['AllOrNothing', 'Chaffing']
+__revision__ = "$Id$"
author	Dwayne C. Litzenberger <dlitz@dlitz.net>	2009-02-28 13:14:53 -0500
committer	Dwayne C. Litzenberger <dlitz@dlitz.net>	2009-02-28 13:14:53 -0500
commit	ff8a657a8dd688551c59b4bbf7be33510992ee46 (patch)
tree	fee3a96bc95fdfda34c18c2714a75105a713ad50 /lib/Crypto/Protocol
parent	d1c4875e1f220652fe7ff8358f56dee3b2aba31b (diff)
download	pycrypto-ff8a657a8dd688551c59b4bbf7be33510992ee46.tar.gz