summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--Makefile.in6
-rw-r--r--serpent-decrypt.c411
-rw-r--r--serpent-encrypt.c415
-rw-r--r--serpent-internal.h75
-rw-r--r--serpent-set-key.c351
-rw-r--r--serpent.c857
7 files changed, 1266 insertions, 859 deletions
diff --git a/ChangeLog b/ChangeLog
index dc0ede72..6360e693 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
2011-06-06 Niels Möller <nisse@lysator.liu.se>
+ * Makefile.in (DISTFILES): Added serpent-internal.h.
+ (nettle_SOURCES): Replaced serpent.c by serpent-set-key.c,
+ serpent-encrypt.c, and serpent-decrypt.c.
+
+ * serpent.c: Replaced by several new files.
+ * serpent-set-key.c: New file.
+ * serpent-encrypt.c: New file.
+ * serpent-decrypt.c: New file.
+ * serpent-internal.h: New file.
+
* serpent.c [HAVE_NATIVE_64_BIT]: Process two blocks at a time in
parallel. Measured speedup of 10%--25% (higher for encryption) on
x86_64.
diff --git a/Makefile.in b/Makefile.in
index 6b4948b5..3859c15e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -71,7 +71,8 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c \
sha1.c sha1-compress.c sha1-meta.c \
sha256.c sha256-compress.c sha224-meta.c sha256-meta.c \
sha512.c sha512-compress.c sha384-meta.c sha512-meta.c \
- serpent.c serpent-meta.c \
+ serpent-set-key.c serpent-encrypt.c serpent-decrypt.c \
+ serpent-meta.c \
twofish.c twofish-meta.c \
yarrow256.c yarrow_key_event.c \
buffer.c buffer-init.c realloc.c \
@@ -127,7 +128,8 @@ DISTFILES = $(SOURCES) $(HEADERS) .bootstrap aclocal.m4 configure.ac \
config.h.in config.m4.in config.make.in Makefile.in \
README AUTHORS COPYING COPYING.LIB INSTALL NEWS TODO ChangeLog \
memxor.c $(des_headers) descore.README \
- aes-internal.h camellia-internal.h cast128_sboxes.h desinfo.h desCode.h \
+ aes-internal.h camellia-internal.h serpent-internal.h \
+ cast128_sboxes.h desinfo.h desCode.h \
nettle-internal.h nettle-write.h prime-list.h \
asm.m4 \
nettle.texinfo nettle.info nettle.html nettle.pdf sha-example.c
diff --git a/serpent-decrypt.c b/serpent-decrypt.c
new file mode 100644
index 00000000..6814a280
--- /dev/null
+++ b/serpent-decrypt.c
@@ -0,0 +1,411 @@
+/* serpent-decrypt.c
+ *
+ * The serpent block cipher.
+ *
+ * For more details on this algorithm, see the Serpent website at
+ * http://www.cl.cam.ac.uk/~rja14/serpent.html
+ */
+
+/* nettle, low-level cryptographics library
+ *
+ * Copyright (C) 2011 Niels Möller
+ * Copyright (C) 2010, 2011 Simon Josefsson
+ * Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+ *
+ * The nettle library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * The nettle library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the nettle library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ */
+
+/* This file is derived from cipher/serpent.c in Libgcrypt v1.4.6.
+ The adaption to Nettle was made by Simon Josefsson on 2010-12-07
+ with final touches on 2011-05-30. Changes include replacing
+ libgcrypt with nettle in the license template, renaming
+ serpent_context to serpent_ctx, renaming u32 to uint32_t, removing
+ libgcrypt stubs and selftests, modifying entry function prototypes,
+ using FOR_BLOCKS to iterate through data in encrypt/decrypt, using
+ LE_READ_UINT32 and LE_WRITE_UINT32 to access data in
+ encrypt/decrypt, and running indent on the code. */
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <limits.h>
+
+#include "serpent.h"
+
+#include "macros.h"
+#include "serpent-internal.h"
+
+/* These are the S-Boxes of Serpent. They are copied from Serpents
+ reference implementation (the optimized one, contained in
+ `floppy2') and are therefore:
+
+ Copyright (C) 1998 Ross Anderson, Eli Biham, Lars Knudsen.
+
+ To quote the Serpent homepage
+ (http://www.cl.cam.ac.uk/~rja14/serpent.html):
+
+ "Serpent is now completely in the public domain, and we impose no
+ restrictions on its use. This was announced on the 21st August at
+ the First AES Candidate Conference. The optimised implementations
+ in the submission package are now under the GNU PUBLIC LICENSE
+ (GPL), although some comments in the code still say otherwise. You
+ are welcome to use Serpent for any application." */
+
+/* FIXME: Except when used within the key schedule, the inputs are not
+ used after the substitution, and hence we could allow them to be
+ destroyed. Can this freedom be used to optimize the sboxes? */
+
+#define SBOX0_INVERSE(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t06, t08, t09, t10; \
+ type t12, t13, t14, t15, t17, t18, t01; \
+ t01 = c ^ d ; \
+ t02 = a | b ; \
+ t03 = b | c ; \
+ t04 = c & t01; \
+ t05 = t02 ^ t01; \
+ t06 = a | t04; \
+ y = ~ t05; \
+ t08 = b ^ d ; \
+ t09 = t03 & t08; \
+ t10 = d | y ; \
+ x = t09 ^ t06; \
+ t12 = a | t05; \
+ t13 = x ^ t12; \
+ t14 = t03 ^ t10; \
+ t15 = a ^ c ; \
+ z = t14 ^ t13; \
+ t17 = t05 & t13; \
+ t18 = t14 | t17; \
+ w = t15 ^ t18; \
+ } while (0)
+
+#define SBOX1_INVERSE(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t06, t07, t08; \
+ type t09, t10, t11, t14, t15, t17, t01; \
+ t01 = a ^ b ; \
+ t02 = b | d ; \
+ t03 = a & c ; \
+ t04 = c ^ t02; \
+ t05 = a | t04; \
+ t06 = t01 & t05; \
+ t07 = d | t03; \
+ t08 = b ^ t06; \
+ t09 = t07 ^ t06; \
+ t10 = t04 | t03; \
+ t11 = d & t08; \
+ y = ~ t09; \
+ x = t10 ^ t11; \
+ t14 = a | y ; \
+ t15 = t06 ^ x ; \
+ z = t01 ^ t04; \
+ t17 = c ^ t15; \
+ w = t14 ^ t17; \
+ } while (0)
+
+#define SBOX2_INVERSE(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t06, t07, t08, t09; \
+ type t10, t11, t12, t15, t16, t17, t01; \
+ t01 = a ^ d ; \
+ t02 = c ^ d ; \
+ t03 = a & c ; \
+ t04 = b | t02; \
+ w = t01 ^ t04; \
+ t06 = a | c ; \
+ t07 = d | w ; \
+ t08 = ~ d ; \
+ t09 = b & t06; \
+ t10 = t08 | t03; \
+ t11 = b & t07; \
+ t12 = t06 & t02; \
+ z = t09 ^ t10; \
+ x = t12 ^ t11; \
+ t15 = c & z ; \
+ t16 = w ^ x ; \
+ t17 = t10 ^ t15; \
+ y = t16 ^ t17; \
+ } while (0)
+
+#define SBOX3_INVERSE(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t06, t07, t09; \
+ type t11, t12, t13, t14, t16, t01; \
+ t01 = c | d ; \
+ t02 = a | d ; \
+ t03 = c ^ t02; \
+ t04 = b ^ t02; \
+ t05 = a ^ d ; \
+ t06 = t04 & t03; \
+ t07 = b & t01; \
+ y = t05 ^ t06; \
+ t09 = a ^ t03; \
+ w = t07 ^ t03; \
+ t11 = w | t05; \
+ t12 = t09 & t11; \
+ t13 = a & y ; \
+ t14 = t01 ^ t05; \
+ x = b ^ t12; \
+ t16 = b | t13; \
+ z = t14 ^ t16; \
+ } while (0)
+
+#define SBOX4_INVERSE(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t06, t07, t09; \
+ type t10, t11, t12, t13, t15, t01; \
+ t01 = b | d ; \
+ t02 = c | d ; \
+ t03 = a & t01; \
+ t04 = b ^ t02; \
+ t05 = c ^ d ; \
+ t06 = ~ t03; \
+ t07 = a & t04; \
+ x = t05 ^ t07; \
+ t09 = x | t06; \
+ t10 = a ^ t07; \
+ t11 = t01 ^ t09; \
+ t12 = d ^ t04; \
+ t13 = c | t10; \
+ z = t03 ^ t12; \
+ t15 = a ^ t04; \
+ y = t11 ^ t13; \
+ w = t15 ^ t09; \
+ } while (0)
+
+#define SBOX5_INVERSE(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t07, t08, t09; \
+ type t10, t12, t13, t15, t16, t01; \
+ t01 = a & d ; \
+ t02 = c ^ t01; \
+ t03 = a ^ d ; \
+ t04 = b & t02; \
+ t05 = a & c ; \
+ w = t03 ^ t04; \
+ t07 = a & w ; \
+ t08 = t01 ^ w ; \
+ t09 = b | t05; \
+ t10 = ~ b ; \
+ x = t08 ^ t09; \
+ t12 = t10 | t07; \
+ t13 = w | x ; \
+ z = t02 ^ t12; \
+ t15 = t02 ^ t13; \
+ t16 = b ^ d ; \
+ y = t16 ^ t15; \
+ } while (0)
+
+#define SBOX6_INVERSE(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t06, t07, t08, t09; \
+ type t12, t13, t14, t15, t16, t17, t01; \
+ t01 = a ^ c ; \
+ t02 = ~ c ; \
+ t03 = b & t01; \
+ t04 = b | t02; \
+ t05 = d | t03; \
+ t06 = b ^ d ; \
+ t07 = a & t04; \
+ t08 = a | t02; \
+ t09 = t07 ^ t05; \
+ x = t06 ^ t08; \
+ w = ~ t09; \
+ t12 = b & w ; \
+ t13 = t01 & t05; \
+ t14 = t01 ^ t12; \
+ t15 = t07 ^ t13; \
+ t16 = d | t02; \
+ t17 = a ^ x ; \
+ z = t17 ^ t15; \
+ y = t16 ^ t14; \
+ } while (0)
+
+#define SBOX7_INVERSE(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t06, t07, t08, t09; \
+ type t10, t11, t13, t14, t15, t16, t01; \
+ t01 = a & b ; \
+ t02 = a | b ; \
+ t03 = c | t01; \
+ t04 = d & t02; \
+ z = t03 ^ t04; \
+ t06 = b ^ t04; \
+ t07 = d ^ z ; \
+ t08 = ~ t07; \
+ t09 = t06 | t08; \
+ t10 = b ^ d ; \
+ t11 = a | d ; \
+ x = a ^ t09; \
+ t13 = c ^ t06; \
+ t14 = c & t11; \
+ t15 = d | x ; \
+ t16 = t01 | t10; \
+ w = t13 ^ t15; \
+ y = t14 ^ t16; \
+ } while (0)
+
+/* In-place inverse linear transformation. */
+#define LINEAR_TRANSFORMATION_INVERSE(x0,x1,x2,x3) \
+ do { \
+ x2 = ROL32 (x2, 10); \
+ x0 = ROL32 (x0, 27); \
+ x2 = x2 ^ x3 ^ (x1 << 7); \
+ x0 = x0 ^ x1 ^ x3; \
+ x3 = ROL32 (x3, 25); \
+ x1 = ROL32 (x1, 31); \
+ x3 = x3 ^ x2 ^ (x0 << 3); \
+ x1 = x1 ^ x0 ^ x2; \
+ x2 = ROL32 (x2, 29); \
+ x0 = ROL32 (x0, 19); \
+ } while (0)
+
+/* Round inputs are x0,x1,x2,x3 (destroyed), and round outputs are
+ y0,y1,y2,y3. */
+#define ROUND_INVERSE(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
+ do { \
+ LINEAR_TRANSFORMATION_INVERSE (x0,x1,x2,x3); \
+ SBOX##which##_INVERSE(uint32_t, x0,x1,x2,x3, y0,y1,y2,y3); \
+ KEYXOR(y0,y1,y2,y3, subkey); \
+ } while (0)
+
+#if HAVE_NATIVE_64_BIT
+
+/* In-place inverse linear transformation. */
+#define LINEAR_TRANSFORMATION64_INVERSE(x0,x1,x2,x3) \
+ do { \
+ x2 = ROL64 (x2, 10); \
+ x0 = ROL64 (x0, 27); \
+ x2 = x2 ^ x3 ^ RSHIFT64(x1, 7); \
+ x0 = x0 ^ x1 ^ x3; \
+ x3 = ROL64 (x3, 25); \
+ x1 = ROL64 (x1, 31); \
+ x3 = x3 ^ x2 ^ RSHIFT64(x0, 3); \
+ x1 = x1 ^ x0 ^ x2; \
+ x2 = ROL64 (x2, 29); \
+ x0 = ROL64 (x0, 19); \
+ } while (0)
+
+#define ROUND64_INVERSE(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
+ do { \
+ LINEAR_TRANSFORMATION64_INVERSE (x0,x1,x2,x3); \
+ SBOX##which##_INVERSE(uint64_t, x0,x1,x2,x3, y0,y1,y2,y3); \
+ KEYXOR64(y0,y1,y2,y3, subkey); \
+ } while (0)
+
+#endif /* HAVE_NATIVE_64_BIT */
+
+void
+serpent_decrypt (const struct serpent_ctx *ctx,
+ unsigned length, uint8_t * dst, const uint8_t * src)
+{
+ assert( !(length % SERPENT_BLOCK_SIZE));
+
+#if HAVE_NATIVE_64_BIT
+ if (length & SERPENT_BLOCK_SIZE)
+#else
+ while (length >= SERPENT_BLOCK_SIZE)
+#endif
+ {
+ uint32_t x0,x1,x2,x3, y0,y1,y2,y3;
+ unsigned k;
+
+ x0 = LE_READ_UINT32 (src);
+ x1 = LE_READ_UINT32 (src + 4);
+ x2 = LE_READ_UINT32 (src + 8);
+ x3 = LE_READ_UINT32 (src + 12);
+
+ /* Inverse of special round */
+ KEYXOR (x0,x1,x2,x3, ctx->keys[32]);
+ SBOX7_INVERSE (uint32_t, x0,x1,x2,x3, y0,y1,y2,y3);
+ KEYXOR (y0,y1,y2,y3, ctx->keys[31]);
+
+ k = 24;
+ goto start32;
+ while (k > 0)
+ {
+ k -= 8;
+ ROUND_INVERSE (7, ctx->keys[k+7], x0,x1,x2,x3, y0,y1,y2,y3);
+ start32:
+ ROUND_INVERSE (6, ctx->keys[k+6], y0,y1,y2,y3, x0,x1,x2,x3);
+ ROUND_INVERSE (5, ctx->keys[k+5], x0,x1,x2,x3, y0,y1,y2,y3);
+ ROUND_INVERSE (4, ctx->keys[k+4], y0,y1,y2,y3, x0,x1,x2,x3);
+ ROUND_INVERSE (3, ctx->keys[k+3], x0,x1,x2,x3, y0,y1,y2,y3);
+ ROUND_INVERSE (2, ctx->keys[k+2], y0,y1,y2,y3, x0,x1,x2,x3);
+ ROUND_INVERSE (1, ctx->keys[k+1], x0,x1,x2,x3, y0,y1,y2,y3);
+ ROUND_INVERSE (0, ctx->keys[k], y0,y1,y2,y3, x0,x1,x2,x3);
+ }
+
+ LE_WRITE_UINT32 (dst, x0);
+ LE_WRITE_UINT32 (dst + 4, x1);
+ LE_WRITE_UINT32 (dst + 8, x2);
+ LE_WRITE_UINT32 (dst + 12, x3);
+
+ src += SERPENT_BLOCK_SIZE;
+ dst += SERPENT_BLOCK_SIZE;
+ length -= SERPENT_BLOCK_SIZE;
+ }
+#if HAVE_NATIVE_64_BIT
+ FOR_BLOCKS(length, dst, src, 2*SERPENT_BLOCK_SIZE)
+ {
+ uint64_t x0,x1,x2,x3, y0,y1,y2,y3;
+ unsigned k;
+
+ x0 = LE_READ_UINT32 (src);
+ x1 = LE_READ_UINT32 (src + 4);
+ x2 = LE_READ_UINT32 (src + 8);
+ x3 = LE_READ_UINT32 (src + 12);
+
+ x0 <<= 32; x0 |= LE_READ_UINT32 (src + 16);
+ x1 <<= 32; x1 |= LE_READ_UINT32 (src + 20);
+ x2 <<= 32; x2 |= LE_READ_UINT32 (src + 24);
+ x3 <<= 32; x3 |= LE_READ_UINT32 (src + 28);
+
+ /* Inverse of special round */
+ KEYXOR64 (x0,x1,x2,x3, ctx->keys[32]);
+ SBOX7_INVERSE (uint64_t, x0,x1,x2,x3, y0,y1,y2,y3);
+ KEYXOR64 (y0,y1,y2,y3, ctx->keys[31]);
+
+ k = 24;
+ goto start64;
+ while (k > 0)
+ {
+ k -= 8;
+ ROUND64_INVERSE (7, ctx->keys[k+7], x0,x1,x2,x3, y0,y1,y2,y3);
+ start64:
+ ROUND64_INVERSE (6, ctx->keys[k+6], y0,y1,y2,y3, x0,x1,x2,x3);
+ ROUND64_INVERSE (5, ctx->keys[k+5], x0,x1,x2,x3, y0,y1,y2,y3);
+ ROUND64_INVERSE (4, ctx->keys[k+4], y0,y1,y2,y3, x0,x1,x2,x3);
+ ROUND64_INVERSE (3, ctx->keys[k+3], x0,x1,x2,x3, y0,y1,y2,y3);
+ ROUND64_INVERSE (2, ctx->keys[k+2], y0,y1,y2,y3, x0,x1,x2,x3);
+ ROUND64_INVERSE (1, ctx->keys[k+1], x0,x1,x2,x3, y0,y1,y2,y3);
+ ROUND64_INVERSE (0, ctx->keys[k], y0,y1,y2,y3, x0,x1,x2,x3);
+ }
+
+ LE_WRITE_UINT32 (dst + 16, x0);
+ LE_WRITE_UINT32 (dst + 20, x1);
+ LE_WRITE_UINT32 (dst + 24, x2);
+ LE_WRITE_UINT32 (dst + 28, x3);
+ x0 >>= 32; LE_WRITE_UINT32 (dst, x0);
+ x1 >>= 32; LE_WRITE_UINT32 (dst + 4, x1);
+ x2 >>= 32; LE_WRITE_UINT32 (dst + 8, x2);
+ x3 >>= 32; LE_WRITE_UINT32 (dst + 12, x3);
+ }
+#endif /* HAVE_NATIVE_64_BIT */
+}
diff --git a/serpent-encrypt.c b/serpent-encrypt.c
new file mode 100644
index 00000000..91ba13f5
--- /dev/null
+++ b/serpent-encrypt.c
@@ -0,0 +1,415 @@
+/* serpent-encrypt.c
+ *
+ * The serpent block cipher.
+ *
+ * For more details on this algorithm, see the Serpent website at
+ * http://www.cl.cam.ac.uk/~rja14/serpent.html
+ */
+
+/* nettle, low-level cryptographics library
+ *
+ * Copyright (C) 2011 Niels Möller
+ * Copyright (C) 2010, 2011 Simon Josefsson
+ * Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+ *
+ * The nettle library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * The nettle library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the nettle library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ */
+
+/* This file is derived from cipher/serpent.c in Libgcrypt v1.4.6.
+ The adaption to Nettle was made by Simon Josefsson on 2010-12-07
+ with final touches on 2011-05-30. Changes include replacing
+ libgcrypt with nettle in the license template, renaming
+ serpent_context to serpent_ctx, renaming u32 to uint32_t, removing
+ libgcrypt stubs and selftests, modifying entry function prototypes,
+ using FOR_BLOCKS to iterate through data in encrypt/decrypt, using
+ LE_READ_UINT32 and LE_WRITE_UINT32 to access data in
+ encrypt/decrypt, and running indent on the code. */
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <limits.h>
+
+#include "serpent.h"
+
+#include "macros.h"
+#include "serpent-internal.h"
+
+/* These are the S-Boxes of Serpent. They are copied from Serpents
+ reference implementation (the optimized one, contained in
+ `floppy2') and are therefore:
+
+ Copyright (C) 1998 Ross Anderson, Eli Biham, Lars Knudsen.
+
+ To quote the Serpent homepage
+ (http://www.cl.cam.ac.uk/~rja14/serpent.html):
+
+ "Serpent is now completely in the public domain, and we impose no
+ restrictions on its use. This was announced on the 21st August at
+ the First AES Candidate Conference. The optimised implementations
+ in the submission package are now under the GNU PUBLIC LICENSE
+ (GPL), although some comments in the code still say otherwise. You
+ are welcome to use Serpent for any application." */
+
+/* FIXME: Except when used within the key schedule, the inputs are not
+ used after the substitution, and hence we could allow them to be
+ destroyed. Can this freedom be used to optimize the sboxes? */
+
+/* S0: 3 8 15 1 10 6 5 11 14 13 4 2 7 0 9 12 */
+#define SBOX0(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t05, t06, t07, t08, t09; \
+ type t11, t12, t13, t14, t15, t17, t01; \
+ t01 = b ^ c ; \
+ t02 = a | d ; \
+ t03 = a ^ b ; \
+ z = t02 ^ t01; \
+ t05 = c | z ; \
+ t06 = a ^ d ; \
+ t07 = b | c ; \
+ t08 = d & t05; \
+ t09 = t03 & t07; \
+ y = t09 ^ t08; \
+ t11 = t09 & y ; \
+ t12 = c ^ d ; \
+ t13 = t07 ^ t11; \
+ t14 = b & t06; \
+ t15 = t06 ^ t13; \
+ w = ~ t15; \
+ t17 = w ^ t14; \
+ x = t12 ^ t17; \
+ } while (0)
+
+/* S1: 15 12 2 7 9 0 5 10 1 11 14 8 6 13 3 4 */
+#define SBOX1(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t06, t07, t08; \
+ type t10, t11, t12, t13, t16, t17, t01; \
+ t01 = a | d ; \
+ t02 = c ^ d ; \
+ t03 = ~ b ; \
+ t04 = a ^ c ; \
+ t05 = a | t03; \
+ t06 = d & t04; \
+ t07 = t01 & t02; \
+ t08 = b | t06; \
+ y = t02 ^ t05; \
+ t10 = t07 ^ t08; \
+ t11 = t01 ^ t10; \
+ t12 = y ^ t11; \
+ t13 = b & d ; \
+ z = ~ t10; \
+ x = t13 ^ t12; \
+ t16 = t10 | x ; \
+ t17 = t05 & t16; \
+ w = c ^ t17; \
+ } while (0)
+
+/* S2: 8 6 7 9 3 12 10 15 13 1 14 4 0 11 5 2 */
+#define SBOX2(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t05, t06, t07, t08; \
+ type t09, t10, t12, t13, t14, t01; \
+ t01 = a | c ; \
+ t02 = a ^ b ; \
+ t03 = d ^ t01; \
+ w = t02 ^ t03; \
+ t05 = c ^ w ; \
+ t06 = b ^ t05; \
+ t07 = b | t05; \
+ t08 = t01 & t06; \
+ t09 = t03 ^ t07; \
+ t10 = t02 | t09; \
+ x = t10 ^ t08; \
+ t12 = a | d ; \
+ t13 = t09 ^ x ; \
+ t14 = b ^ t13; \
+ z = ~ t09; \
+ y = t12 ^ t14; \
+ } while (0)
+
+/* S3: 0 15 11 8 12 9 6 3 13 1 2 4 10 7 5 14 */
+#define SBOX3(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t06, t07, t08; \
+ type t09, t10, t11, t13, t14, t15, t01; \
+ t01 = a ^ c ; \
+ t02 = a | d ; \
+ t03 = a & d ; \
+ t04 = t01 & t02; \
+ t05 = b | t03; \
+ t06 = a & b ; \
+ t07 = d ^ t04; \
+ t08 = c | t06; \
+ t09 = b ^ t07; \
+ t10 = d & t05; \
+ t11 = t02 ^ t10; \
+ z = t08 ^ t09; \
+ t13 = d | z ; \
+ t14 = a | t07; \
+ t15 = b & t13; \
+ y = t08 ^ t11; \
+ w = t14 ^ t15; \
+ x = t05 ^ t04; \
+ } while (0)
+
+/* S4: 1 15 8 3 12 0 11 6 2 5 4 10 9 14 7 13 */
+#define SBOX4(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t06, t08, t09; \
+ type t10, t11, t12, t13, t14, t15, t16, t01; \
+ t01 = a | b ; \
+ t02 = b | c ; \
+ t03 = a ^ t02; \
+ t04 = b ^ d ; \
+ t05 = d | t03; \
+ t06 = d & t01; \
+ z = t03 ^ t06; \
+ t08 = z & t04; \
+ t09 = t04 & t05; \
+ t10 = c ^ t06; \
+ t11 = b & c ; \
+ t12 = t04 ^ t08; \
+ t13 = t11 | t03; \
+ t14 = t10 ^ t09; \
+ t15 = a & t05; \
+ t16 = t11 | t12; \
+ y = t13 ^ t08; \
+ x = t15 ^ t16; \
+ w = ~ t14; \
+ } while (0)
+
+/* S5: 15 5 2 11 4 10 9 12 0 3 14 8 13 6 7 1 */
+#define SBOX5(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t07, t08, t09; \
+ type t10, t11, t12, t13, t14, t01; \
+ t01 = b ^ d ; \
+ t02 = b | d ; \
+ t03 = a & t01; \
+ t04 = c ^ t02; \
+ t05 = t03 ^ t04; \
+ w = ~ t05; \
+ t07 = a ^ t01; \
+ t08 = d | w ; \
+ t09 = b | t05; \
+ t10 = d ^ t08; \
+ t11 = b | t07; \
+ t12 = t03 | w ; \
+ t13 = t07 | t10; \
+ t14 = t01 ^ t11; \
+ y = t09 ^ t13; \
+ x = t07 ^ t08; \
+ z = t12 ^ t14; \
+ } while (0)
+
+/* S6: 7 2 12 5 8 4 6 11 14 9 1 15 13 3 10 0 */
+#define SBOX6(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t07, t08, t09, t10; \
+ type t11, t12, t13, t15, t17, t18, t01; \
+ t01 = a & d ; \
+ t02 = b ^ c ; \
+ t03 = a ^ d ; \
+ t04 = t01 ^ t02; \
+ t05 = b | c ; \
+ x = ~ t04; \
+ t07 = t03 & t05; \
+ t08 = b & x ; \
+ t09 = a | c ; \
+ t10 = t07 ^ t08; \
+ t11 = b | d ; \
+ t12 = c ^ t11; \
+ t13 = t09 ^ t10; \
+ y = ~ t13; \
+ t15 = x & t03; \
+ z = t12 ^ t07; \
+ t17 = a ^ b ; \
+ t18 = y ^ t15; \
+ w = t17 ^ t18; \
+ } while (0)
+
+/* S7: 1 13 15 0 14 8 2 11 7 4 12 10 9 3 5 6 */
+#define SBOX7(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t06, t08, t09, t10; \
+ type t11, t13, t14, t15, t16, t17, t01; \
+ t01 = a & c ; \
+ t02 = ~ d ; \
+ t03 = a & t02; \
+ t04 = b | t01; \
+ t05 = a & b ; \
+ t06 = c ^ t04; \
+ z = t03 ^ t06; \
+ t08 = c | z ; \
+ t09 = d | t05; \
+ t10 = a ^ t08; \
+ t11 = t04 & z ; \
+ x = t09 ^ t10; \
+ t13 = b ^ x ; \
+ t14 = t01 ^ x ; \
+ t15 = c ^ t05; \
+ t16 = t11 | t13; \
+ t17 = t02 | t14; \
+ w = t15 ^ t17; \
+ y = a ^ t16; \
+ } while (0)
+
+/* In-place linear transformation. */
+#define LINEAR_TRANSFORMATION(x0,x1,x2,x3) \
+ do { \
+ x0 = ROL32 (x0, 13); \
+ x2 = ROL32 (x2, 3); \
+ x1 = x1 ^ x0 ^ x2; \
+ x3 = x3 ^ x2 ^ (x0 << 3); \
+ x1 = ROL32 (x1, 1); \
+ x3 = ROL32 (x3, 7); \
+ x0 = x0 ^ x1 ^ x3; \
+ x2 = x2 ^ x3 ^ (x1 << 7); \
+ x0 = ROL32 (x0, 5); \
+ x2 = ROL32 (x2, 22); \
+ } while (0)
+
+/* Round inputs are x0,x1,x2,x3 (destroyed), and round outputs are
+ y0,y1,y2,y3. */
+#define ROUND(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
+ do { \
+ KEYXOR(x0,x1,x2,x3, subkey); \
+ SBOX##which(uint32_t, x0,x1,x2,x3, y0,y1,y2,y3); \
+ LINEAR_TRANSFORMATION(y0,y1,y2,y3); \
+ } while (0)
+
+#if HAVE_NATIVE_64_BIT
+
+#define LINEAR_TRANSFORMATION64(x0,x1,x2,x3) \
+ do { \
+ x0 = ROL64 (x0, 13); \
+ x2 = ROL64 (x2, 3); \
+ x1 = x1 ^ x0 ^ x2; \
+ x3 = x3 ^ x2 ^ RSHIFT64(x0, 3); \
+ x1 = ROL64 (x1, 1); \
+ x3 = ROL64 (x3, 7); \
+ x0 = x0 ^ x1 ^ x3; \
+ x2 = x2 ^ x3 ^ RSHIFT64(x1, 7); \
+ x0 = ROL64 (x0, 5); \
+ x2 = ROL64 (x2, 22); \
+ } while (0)
+
+#define ROUND64(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
+ do { \
+ KEYXOR64(x0,x1,x2,x3, subkey); \
+ SBOX##which(uint64_t, x0,x1,x2,x3, y0,y1,y2,y3); \
+ LINEAR_TRANSFORMATION64(y0,y1,y2,y3); \
+ } while (0)
+
+#endif /* HAVE_NATIVE_64_BIT */
+
+void
+serpent_encrypt (const struct serpent_ctx *ctx,
+ unsigned length, uint8_t * dst, const uint8_t * src)
+{
+ assert( !(length % SERPENT_BLOCK_SIZE));
+
+#if HAVE_NATIVE_64_BIT
+ if (length & SERPENT_BLOCK_SIZE)
+#else
+ while (length >= SERPENT_BLOCK_SIZE)
+#endif
+ {
+ uint32_t x0,x1,x2,x3, y0,y1,y2,y3;
+ unsigned k;
+
+ x0 = LE_READ_UINT32 (src);
+ x1 = LE_READ_UINT32 (src + 4);
+ x2 = LE_READ_UINT32 (src + 8);
+ x3 = LE_READ_UINT32 (src + 12);
+
+ for (k = 0; ; k += 8)
+ {
+ ROUND (0, ctx->keys[k+0], x0,x1,x2,x3, y0,y1,y2,y3);
+ ROUND (1, ctx->keys[k+1], y0,y1,y2,y3, x0,x1,x2,x3);
+ ROUND (2, ctx->keys[k+2], x0,x1,x2,x3, y0,y1,y2,y3);
+ ROUND (3, ctx->keys[k+3], y0,y1,y2,y3, x0,x1,x2,x3);
+ ROUND (4, ctx->keys[k+4], x0,x1,x2,x3, y0,y1,y2,y3);
+ ROUND (5, ctx->keys[k+5], y0,y1,y2,y3, x0,x1,x2,x3);
+ ROUND (6, ctx->keys[k+6], x0,x1,x2,x3, y0,y1,y2,y3);
+ if (k == 24)
+ break;
+ ROUND (7, ctx->keys[k+7], y0,y1,y2,y3, x0,x1,x2,x3);
+ }
+
+ /* Special final round, using two subkeys. */
+ KEYXOR (y0,y1,y2,y3, ctx->keys[31]);
+ SBOX7 (uint32_t, y0,y1,y2,y3, x0,x1,x2,x3);
+ KEYXOR (x0,x1,x2,x3, ctx->keys[32]);
+
+ LE_WRITE_UINT32 (dst, x0);
+ LE_WRITE_UINT32 (dst + 4, x1);
+ LE_WRITE_UINT32 (dst + 8, x2);
+ LE_WRITE_UINT32 (dst + 12, x3);
+
+ src += SERPENT_BLOCK_SIZE;
+ dst += SERPENT_BLOCK_SIZE;
+ length -= SERPENT_BLOCK_SIZE;
+ }
+#if HAVE_NATIVE_64_BIT
+ FOR_BLOCKS(length, dst, src, 2*SERPENT_BLOCK_SIZE)
+ {
+ uint64_t x0,x1,x2,x3, y0,y1,y2,y3;
+ unsigned k;
+
+ x0 = LE_READ_UINT32 (src);
+ x1 = LE_READ_UINT32 (src + 4);
+ x2 = LE_READ_UINT32 (src + 8);
+ x3 = LE_READ_UINT32 (src + 12);
+
+ x0 <<= 32; x0 |= LE_READ_UINT32 (src + 16);
+ x1 <<= 32; x1 |= LE_READ_UINT32 (src + 20);
+ x2 <<= 32; x2 |= LE_READ_UINT32 (src + 24);
+ x3 <<= 32; x3 |= LE_READ_UINT32 (src + 28);
+
+ for (k = 0; ; k += 8)
+ {
+ ROUND64 (0, ctx->keys[k+0], x0,x1,x2,x3, y0,y1,y2,y3);
+ ROUND64 (1, ctx->keys[k+1], y0,y1,y2,y3, x0,x1,x2,x3);
+ ROUND64 (2, ctx->keys[k+2], x0,x1,x2,x3, y0,y1,y2,y3);
+ ROUND64 (3, ctx->keys[k+3], y0,y1,y2,y3, x0,x1,x2,x3);
+ ROUND64 (4, ctx->keys[k+4], x0,x1,x2,x3, y0,y1,y2,y3);
+ ROUND64 (5, ctx->keys[k+5], y0,y1,y2,y3, x0,x1,x2,x3);
+ ROUND64 (6, ctx->keys[k+6], x0,x1,x2,x3, y0,y1,y2,y3);
+ if (k == 24)
+ break;
+ ROUND64 (7, ctx->keys[k+7], y0,y1,y2,y3, x0,x1,x2,x3);
+ }
+
+ /* Special final round, using two subkeys. */
+ KEYXOR64 (y0,y1,y2,y3, ctx->keys[31]);
+ SBOX7 (uint64_t, y0,y1,y2,y3, x0,x1,x2,x3);
+ KEYXOR64 (x0,x1,x2,x3, ctx->keys[32]);
+
+ LE_WRITE_UINT32 (dst + 16, x0);
+ LE_WRITE_UINT32 (dst + 20, x1);
+ LE_WRITE_UINT32 (dst + 24, x2);
+ LE_WRITE_UINT32 (dst + 28, x3);
+ x0 >>= 32; LE_WRITE_UINT32 (dst, x0);
+ x1 >>= 32; LE_WRITE_UINT32 (dst + 4, x1);
+ x2 >>= 32; LE_WRITE_UINT32 (dst + 8, x2);
+ x3 >>= 32; LE_WRITE_UINT32 (dst + 12, x3);
+ }
+#endif /* HAVE_NATIVE_64_BIT */
+}
diff --git a/serpent-internal.h b/serpent-internal.h
new file mode 100644
index 00000000..66d5af49
--- /dev/null
+++ b/serpent-internal.h
@@ -0,0 +1,75 @@
+/* serpent-internal-h
+ *
+ * The serpent block cipher.
+ *
+ * For more details on this algorithm, see the Serpent website at
+ * http://www.cl.cam.ac.uk/~rja14/serpent.html
+ */
+
+/* nettle, low-level cryptographics library
+ *
+ * Copyright (C) 2011 Niels Möller
+ * Copyright (C) 2010, 2011 Simon Josefsson
+ * Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+ *
+ * The nettle library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * The nettle library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the nettle library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ */
+
+/* This file is derived from cipher/serpent.c in Libgcrypt v1.4.6.
+ The adaption to Nettle was made by Simon Josefsson on 2010-12-07
+ with final touches on 2011-05-30. Changes include replacing
+ libgcrypt with nettle in the license template, renaming
+ serpent_context to serpent_ctx, renaming u32 to uint32_t, removing
+ libgcrypt stubs and selftests, modifying entry function prototypes,
+ using FOR_BLOCKS to iterate through data in encrypt/decrypt, using
+ LE_READ_UINT32 and LE_WRITE_UINT32 to access data in
+ encrypt/decrypt, and running indent on the code. */
+
+#ifndef NETTLE_SERPENT_INTERNAL_H_INCLUDED
+#define NETTLE_SERPENT_INTERNAL_H_INCLUDED
+
+/* FIXME: Unify ROL macros used here, in camellia.c and cast128.c. */
+#define ROL32(x,n) ((((x))<<(n)) | (((x))>>(32-(n))))
+
+#define KEYXOR(x0,x1,x2,x3, subkey) \
+ do { \
+ (x0) ^= (subkey)[0]; \
+ (x1) ^= (subkey)[1]; \
+ (x2) ^= (subkey)[2]; \
+ (x3) ^= (subkey)[3]; \
+ } while (0)
+
+#if HAVE_NATIVE_64_BIT
+/* Operate independently on both halves of a 64-bit word. */
+#define ROL64(x,n) \
+ (((x) << (n) & ~(((1L << (n))-1) << 32)) \
+ |(((x) >> (32-(n))) & ~(((1L << (32-(n)))-1) << (n))))
+
+#define KEYXOR64(x0,x1,x2,x3, subkey) \
+ do { \
+ uint64_t _sk; \
+ _sk = (subkey)[0]; _sk |= _sk << 32; (x0) ^= _sk; \
+ _sk = (subkey)[1]; _sk |= _sk << 32; (x1) ^= _sk; \
+ _sk = (subkey)[2]; _sk |= _sk << 32; (x2) ^= _sk; \
+ _sk = (subkey)[3]; _sk |= _sk << 32; (x3) ^= _sk; \
+ } while (0)
+
+#define RSHIFT64(x,n) \
+ ( ((x) << (n)) & ~(((1L << n) - 1) << 32))
+#endif /* HAVE_NATIVE_64_BIT */
+
+#endif /* NETTLE_SERPENT_INTERNAL_H_INCLUDED */
+
diff --git a/serpent-set-key.c b/serpent-set-key.c
new file mode 100644
index 00000000..d03f50eb
--- /dev/null
+++ b/serpent-set-key.c
@@ -0,0 +1,351 @@
+/* serpent-set-key.c
+ *
+ * The serpent block cipher.
+ *
+ * For more details on this algorithm, see the Serpent website at
+ * http://www.cl.cam.ac.uk/~rja14/serpent.html
+ */
+
+/* nettle, low-level cryptographics library
+ *
+ * Copyright (C) 2011 Niels Möller
+ * Copyright (C) 2010, 2011 Simon Josefsson
+ * Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+ *
+ * The nettle library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * The nettle library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the nettle library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ */
+
+/* This file is derived from cipher/serpent.c in Libgcrypt v1.4.6.
+ The adaption to Nettle was made by Simon Josefsson on 2010-12-07
+ with final touches on 2011-05-30. Changes include replacing
+ libgcrypt with nettle in the license template, renaming
+ serpent_context to serpent_ctx, renaming u32 to uint32_t, removing
+ libgcrypt stubs and selftests, modifying entry function prototypes,
+ using FOR_BLOCKS to iterate through data in encrypt/decrypt, using
+ LE_READ_UINT32 and LE_WRITE_UINT32 to access data in
+ encrypt/decrypt, and running indent on the code. */
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <limits.h>
+
+#include "serpent.h"
+
+#include "macros.h"
+#include "serpent-internal.h"
+
+/* Magic number, used during generating of the subkeys. */
+#define PHI 0x9E3779B9
+
+/* These are the S-Boxes of Serpent. They are copied from Serpents
+ reference implementation (the optimized one, contained in
+ `floppy2') and are therefore:
+
+ Copyright (C) 1998 Ross Anderson, Eli Biham, Lars Knudsen.
+
+ To quote the Serpent homepage
+ (http://www.cl.cam.ac.uk/~rja14/serpent.html):
+
+ "Serpent is now completely in the public domain, and we impose no
+ restrictions on its use. This was announced on the 21st August at
+ the First AES Candidate Conference. The optimised implementations
+ in the submission package are now under the GNU PUBLIC LICENSE
+ (GPL), although some comments in the code still say otherwise. You
+ are welcome to use Serpent for any application." */
+
+/* FIXME: Except when used within the key schedule, the inputs are not
+ used after the substitution, and hence we could allow them to be
+ destroyed. Can this freedom be used to optimize the sboxes? */
+#define SBOX0(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t05, t06, t07, t08, t09; \
+ type t11, t12, t13, t14, t15, t17, t01; \
+ t01 = b ^ c ; \
+ t02 = a | d ; \
+ t03 = a ^ b ; \
+ z = t02 ^ t01; \
+ t05 = c | z ; \
+ t06 = a ^ d ; \
+ t07 = b | c ; \
+ t08 = d & t05; \
+ t09 = t03 & t07; \
+ y = t09 ^ t08; \
+ t11 = t09 & y ; \
+ t12 = c ^ d ; \
+ t13 = t07 ^ t11; \
+ t14 = b & t06; \
+ t15 = t06 ^ t13; \
+ w = ~ t15; \
+ t17 = w ^ t14; \
+ x = t12 ^ t17; \
+ } while (0)
+
+#define SBOX1(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t06, t07, t08; \
+ type t10, t11, t12, t13, t16, t17, t01; \
+ t01 = a | d ; \
+ t02 = c ^ d ; \
+ t03 = ~ b ; \
+ t04 = a ^ c ; \
+ t05 = a | t03; \
+ t06 = d & t04; \
+ t07 = t01 & t02; \
+ t08 = b | t06; \
+ y = t02 ^ t05; \
+ t10 = t07 ^ t08; \
+ t11 = t01 ^ t10; \
+ t12 = y ^ t11; \
+ t13 = b & d ; \
+ z = ~ t10; \
+ x = t13 ^ t12; \
+ t16 = t10 | x ; \
+ t17 = t05 & t16; \
+ w = c ^ t17; \
+ } while (0)
+
+#define SBOX2(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t05, t06, t07, t08; \
+ type t09, t10, t12, t13, t14, t01; \
+ t01 = a | c ; \
+ t02 = a ^ b ; \
+ t03 = d ^ t01; \
+ w = t02 ^ t03; \
+ t05 = c ^ w ; \
+ t06 = b ^ t05; \
+ t07 = b | t05; \
+ t08 = t01 & t06; \
+ t09 = t03 ^ t07; \
+ t10 = t02 | t09; \
+ x = t10 ^ t08; \
+ t12 = a | d ; \
+ t13 = t09 ^ x ; \
+ t14 = b ^ t13; \
+ z = ~ t09; \
+ y = t12 ^ t14; \
+ } while (0)
+
+#define SBOX3(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t06, t07, t08; \
+ type t09, t10, t11, t13, t14, t15, t01; \
+ t01 = a ^ c ; \
+ t02 = a | d ; \
+ t03 = a & d ; \
+ t04 = t01 & t02; \
+ t05 = b | t03; \
+ t06 = a & b ; \
+ t07 = d ^ t04; \
+ t08 = c | t06; \
+ t09 = b ^ t07; \
+ t10 = d & t05; \
+ t11 = t02 ^ t10; \
+ z = t08 ^ t09; \
+ t13 = d | z ; \
+ t14 = a | t07; \
+ t15 = b & t13; \
+ y = t08 ^ t11; \
+ w = t14 ^ t15; \
+ x = t05 ^ t04; \
+ } while (0)
+
+#define SBOX4(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t06, t08, t09; \
+ type t10, t11, t12, t13, t14, t15, t16, t01; \
+ t01 = a | b ; \
+ t02 = b | c ; \
+ t03 = a ^ t02; \
+ t04 = b ^ d ; \
+ t05 = d | t03; \
+ t06 = d & t01; \
+ z = t03 ^ t06; \
+ t08 = z & t04; \
+ t09 = t04 & t05; \
+ t10 = c ^ t06; \
+ t11 = b & c ; \
+ t12 = t04 ^ t08; \
+ t13 = t11 | t03; \
+ t14 = t10 ^ t09; \
+ t15 = a & t05; \
+ t16 = t11 | t12; \
+ y = t13 ^ t08; \
+ x = t15 ^ t16; \
+ w = ~ t14; \
+ } while (0)
+
+#define SBOX5(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t07, t08, t09; \
+ type t10, t11, t12, t13, t14, t01; \
+ t01 = b ^ d ; \
+ t02 = b | d ; \
+ t03 = a & t01; \
+ t04 = c ^ t02; \
+ t05 = t03 ^ t04; \
+ w = ~ t05; \
+ t07 = a ^ t01; \
+ t08 = d | w ; \
+ t09 = b | t05; \
+ t10 = d ^ t08; \
+ t11 = b | t07; \
+ t12 = t03 | w ; \
+ t13 = t07 | t10; \
+ t14 = t01 ^ t11; \
+ y = t09 ^ t13; \
+ x = t07 ^ t08; \
+ z = t12 ^ t14; \
+ } while (0)
+
+#define SBOX6(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t07, t08, t09, t10; \
+ type t11, t12, t13, t15, t17, t18, t01; \
+ t01 = a & d ; \
+ t02 = b ^ c ; \
+ t03 = a ^ d ; \
+ t04 = t01 ^ t02; \
+ t05 = b | c ; \
+ x = ~ t04; \
+ t07 = t03 & t05; \
+ t08 = b & x ; \
+ t09 = a | c ; \
+ t10 = t07 ^ t08; \
+ t11 = b | d ; \
+ t12 = c ^ t11; \
+ t13 = t09 ^ t10; \
+ y = ~ t13; \
+ t15 = x & t03; \
+ z = t12 ^ t07; \
+ t17 = a ^ b ; \
+ t18 = y ^ t15; \
+ w = t17 ^ t18; \
+ } while (0)
+
+#define SBOX7(type, a, b, c, d, w, x, y, z) \
+ do { \
+ type t02, t03, t04, t05, t06, t08, t09, t10; \
+ type t11, t13, t14, t15, t16, t17, t01; \
+ t01 = a & c ; \
+ t02 = ~ d ; \
+ t03 = a & t02; \
+ t04 = b | t01; \
+ t05 = a & b ; \
+ t06 = c ^ t04; \
+ z = t03 ^ t06; \
+ t08 = c | z ; \
+ t09 = d | t05; \
+ t10 = a ^ t08; \
+ t11 = t04 & z ; \
+ x = t09 ^ t10; \
+ t13 = b ^ x ; \
+ t14 = t01 ^ x ; \
+ t15 = c ^ t05; \
+ t16 = t11 | t13; \
+ t17 = t02 | t14; \
+ w = t15 ^ t17; \
+ y = a ^ t16; \
+ } while (0)
+
+/* Key schedule */
+/* Note: Increments k */
+#define KS_RECURRENCE(w, i, k) \
+ do { \
+ uint32_t _wn = (w)[(i)] ^ (w)[((i)+3)&7] ^ w[((i)+5)&7] \
+ ^ w[((i)+7)&7] ^ PHI ^ (k)++; \
+ ((w)[(i)] = ROL32(_wn, 11)); \
+ } while (0)
+
+/* Note: Increments k four times and keys once */
+#define KS(keys, s, w, i, k) \
+ do { \
+ KS_RECURRENCE(w, (i), (k)); \
+ KS_RECURRENCE(w, (i)+1, (k)); \
+ KS_RECURRENCE(w, (i)+2, (k)); \
+ KS_RECURRENCE(w, (i)+3, (k)); \
+ SBOX##s(uint32_t, w[(i)],w[(i)+1],w[(i)+2],w[(i)+3], \
+ (*keys)[0],(*keys)[1],(*keys)[2],(*keys)[3]); \
+ (keys)++; \
+ } while (0)
+
+/* Pad user key and convert to an array of 8 uint32_t. */
+static void
+serpent_key_pad (const uint8_t *key, unsigned int key_length,
+ uint32_t *w)
+{
+ unsigned int i;
+
+ assert (key_length <= SERPENT_MAX_KEY_SIZE);
+
+ for (i = 0; key_length >= 4; key_length -=4, key += 4)
+ w[i++] = LE_READ_UINT32(key);
+
+ if (i < 8)
+ {
+ /* Key must be padded according to the Serpent specification.
+ "aabbcc" -> "aabbcc0100...00" -> 0x01ccbbaa. */
+ uint32_t pad = 0x01;
+
+ while (key_length > 0)
+ pad = pad << 8 | key[--key_length];
+
+ w[i++] = pad;
+
+ while (i < 8)
+ w[i++] = 0;
+ }
+}
+
+/* Initialize CONTEXT with the key KEY of KEY_LENGTH bits. */
+void
+serpent_set_key (struct serpent_ctx *ctx,
+ unsigned length, const uint8_t * key)
+{
+ uint32_t w[8];
+ uint32_t (*keys)[4];
+ unsigned k;
+
+ serpent_key_pad (key, length, w);
+
+ /* Derive the 33 subkeys from KEY and store them in SUBKEYS. We do
+ the recurrence in the key schedule using W as a circular buffer
+ of just 8 uint32_t. */
+
+ /* FIXME: Would be better to invoke SBOX with scalar variables as
+ arguments, no arrays. To do that, unpack w into separate
+ variables, use temporary variables as the SBOX destination. */
+
+ keys = ctx->keys;
+ k = 0;
+ for (;;)
+ {
+ KS(keys, 3, w, 0, k);
+ if (k == 132)
+ break;
+ KS(keys, 2, w, 4, k);
+ KS(keys, 1, w, 0, k);
+ KS(keys, 0, w, 4, k);
+ KS(keys, 7, w, 0, k);
+ KS(keys, 6, w, 4, k);
+ KS(keys, 5, w, 0, k);
+ KS(keys, 4, w, 4, k);
+ }
+ assert (keys == ctx->keys + 33);
+}
diff --git a/serpent.c b/serpent.c
deleted file mode 100644
index eaf38f51..00000000
--- a/serpent.c
+++ /dev/null
@@ -1,857 +0,0 @@
-/* serpent.c
- *
- * The serpent block cipher.
- *
- * For more details on this algorithm, see the Serpent website at
- * http://www.cl.cam.ac.uk/~rja14/serpent.html
- */
-
-/* nettle, low-level cryptographics library
- *
- * Copyright (C) 2011 Niels Möller
- * Copyright (C) 2010, 2011 Simon Josefsson
- * Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
- *
- * The nettle library is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at your
- * option) any later version.
- *
- * The nettle library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
- * License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with the nettle library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- */
-
-/* This file is derived from cipher/serpent.c in Libgcrypt v1.4.6.
- The adaption to Nettle was made by Simon Josefsson on 2010-12-07
- with final touches on 2011-05-30. Changes include replacing
- libgcrypt with nettle in the license template, renaming
- serpent_context to serpent_ctx, renaming u32 to uint32_t, removing
- libgcrypt stubs and selftests, modifying entry function prototypes,
- using FOR_BLOCKS to iterate through data in encrypt/decrypt, using
- LE_READ_UINT32 and LE_WRITE_UINT32 to access data in
- encrypt/decrypt, and running indent on the code. */
-
-#if HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <assert.h>
-#include <limits.h>
-
-#include "serpent.h"
-
-#include "macros.h"
-
-/* Magic number, used during generating of the subkeys. */
-#define PHI 0x9E3779B9
-
-/* FIXME: Unify ROL macros used here, in camellia.c and cast128.c. */
-#define ROL32(x,n) ((((x))<<(n)) | (((x))>>(32-(n))))
-
-/* These are the S-Boxes of Serpent. They are copied from Serpents
- reference implementation (the optimized one, contained in
- `floppy2') and are therefore:
-
- Copyright (C) 1998 Ross Anderson, Eli Biham, Lars Knudsen.
-
- To quote the Serpent homepage
- (http://www.cl.cam.ac.uk/~rja14/serpent.html):
-
- "Serpent is now completely in the public domain, and we impose no
- restrictions on its use. This was announced on the 21st August at
- the First AES Candidate Conference. The optimised implementations
- in the submission package are now under the GNU PUBLIC LICENSE
- (GPL), although some comments in the code still say otherwise. You
- are welcome to use Serpent for any application." */
-
-/* FIXME: Except when used within the key schedule, the inputs are not
- used after the substitution, and hence we could allow them to be
- destroyed. Can this freedom be used to optimize the sboxes? */
-#define SBOX0(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t05, t06, t07, t08, t09; \
- type t11, t12, t13, t14, t15, t17, t01; \
- t01 = b ^ c ; \
- t02 = a | d ; \
- t03 = a ^ b ; \
- z = t02 ^ t01; \
- t05 = c | z ; \
- t06 = a ^ d ; \
- t07 = b | c ; \
- t08 = d & t05; \
- t09 = t03 & t07; \
- y = t09 ^ t08; \
- t11 = t09 & y ; \
- t12 = c ^ d ; \
- t13 = t07 ^ t11; \
- t14 = b & t06; \
- t15 = t06 ^ t13; \
- w = ~ t15; \
- t17 = w ^ t14; \
- x = t12 ^ t17; \
- } while (0)
-
-#define SBOX0_INVERSE(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t04, t05, t06, t08, t09, t10; \
- type t12, t13, t14, t15, t17, t18, t01; \
- t01 = c ^ d ; \
- t02 = a | b ; \
- t03 = b | c ; \
- t04 = c & t01; \
- t05 = t02 ^ t01; \
- t06 = a | t04; \
- y = ~ t05; \
- t08 = b ^ d ; \
- t09 = t03 & t08; \
- t10 = d | y ; \
- x = t09 ^ t06; \
- t12 = a | t05; \
- t13 = x ^ t12; \
- t14 = t03 ^ t10; \
- t15 = a ^ c ; \
- z = t14 ^ t13; \
- t17 = t05 & t13; \
- t18 = t14 | t17; \
- w = t15 ^ t18; \
- } while (0)
-
-#define SBOX1(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t04, t05, t06, t07, t08; \
- type t10, t11, t12, t13, t16, t17, t01; \
- t01 = a | d ; \
- t02 = c ^ d ; \
- t03 = ~ b ; \
- t04 = a ^ c ; \
- t05 = a | t03; \
- t06 = d & t04; \
- t07 = t01 & t02; \
- t08 = b | t06; \
- y = t02 ^ t05; \
- t10 = t07 ^ t08; \
- t11 = t01 ^ t10; \
- t12 = y ^ t11; \
- t13 = b & d ; \
- z = ~ t10; \
- x = t13 ^ t12; \
- t16 = t10 | x ; \
- t17 = t05 & t16; \
- w = c ^ t17; \
- } while (0)
-
-#define SBOX1_INVERSE(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t04, t05, t06, t07, t08; \
- type t09, t10, t11, t14, t15, t17, t01; \
- t01 = a ^ b ; \
- t02 = b | d ; \
- t03 = a & c ; \
- t04 = c ^ t02; \
- t05 = a | t04; \
- t06 = t01 & t05; \
- t07 = d | t03; \
- t08 = b ^ t06; \
- t09 = t07 ^ t06; \
- t10 = t04 | t03; \
- t11 = d & t08; \
- y = ~ t09; \
- x = t10 ^ t11; \
- t14 = a | y ; \
- t15 = t06 ^ x ; \
- z = t01 ^ t04; \
- t17 = c ^ t15; \
- w = t14 ^ t17; \
- } while (0)
-
-#define SBOX2(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t05, t06, t07, t08; \
- type t09, t10, t12, t13, t14, t01; \
- t01 = a | c ; \
- t02 = a ^ b ; \
- t03 = d ^ t01; \
- w = t02 ^ t03; \
- t05 = c ^ w ; \
- t06 = b ^ t05; \
- t07 = b | t05; \
- t08 = t01 & t06; \
- t09 = t03 ^ t07; \
- t10 = t02 | t09; \
- x = t10 ^ t08; \
- t12 = a | d ; \
- t13 = t09 ^ x ; \
- t14 = b ^ t13; \
- z = ~ t09; \
- y = t12 ^ t14; \
- } while (0)
-
-#define SBOX2_INVERSE(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t04, t06, t07, t08, t09; \
- type t10, t11, t12, t15, t16, t17, t01; \
- t01 = a ^ d ; \
- t02 = c ^ d ; \
- t03 = a & c ; \
- t04 = b | t02; \
- w = t01 ^ t04; \
- t06 = a | c ; \
- t07 = d | w ; \
- t08 = ~ d ; \
- t09 = b & t06; \
- t10 = t08 | t03; \
- t11 = b & t07; \
- t12 = t06 & t02; \
- z = t09 ^ t10; \
- x = t12 ^ t11; \
- t15 = c & z ; \
- t16 = w ^ x ; \
- t17 = t10 ^ t15; \
- y = t16 ^ t17; \
- } while (0)
-
-#define SBOX3(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t04, t05, t06, t07, t08; \
- type t09, t10, t11, t13, t14, t15, t01; \
- t01 = a ^ c ; \
- t02 = a | d ; \
- t03 = a & d ; \
- t04 = t01 & t02; \
- t05 = b | t03; \
- t06 = a & b ; \
- t07 = d ^ t04; \
- t08 = c | t06; \
- t09 = b ^ t07; \
- t10 = d & t05; \
- t11 = t02 ^ t10; \
- z = t08 ^ t09; \
- t13 = d | z ; \
- t14 = a | t07; \
- t15 = b & t13; \
- y = t08 ^ t11; \
- w = t14 ^ t15; \
- x = t05 ^ t04; \
- } while (0)
-
-#define SBOX3_INVERSE(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t04, t05, t06, t07, t09; \
- type t11, t12, t13, t14, t16, t01; \
- t01 = c | d ; \
- t02 = a | d ; \
- t03 = c ^ t02; \
- t04 = b ^ t02; \
- t05 = a ^ d ; \
- t06 = t04 & t03; \
- t07 = b & t01; \
- y = t05 ^ t06; \
- t09 = a ^ t03; \
- w = t07 ^ t03; \
- t11 = w | t05; \
- t12 = t09 & t11; \
- t13 = a & y ; \
- t14 = t01 ^ t05; \
- x = b ^ t12; \
- t16 = b | t13; \
- z = t14 ^ t16; \
- } while (0)
-
-#define SBOX4(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t04, t05, t06, t08, t09; \
- type t10, t11, t12, t13, t14, t15, t16, t01; \
- t01 = a | b ; \
- t02 = b | c ; \
- t03 = a ^ t02; \
- t04 = b ^ d ; \
- t05 = d | t03; \
- t06 = d & t01; \
- z = t03 ^ t06; \
- t08 = z & t04; \
- t09 = t04 & t05; \
- t10 = c ^ t06; \
- t11 = b & c ; \
- t12 = t04 ^ t08; \
- t13 = t11 | t03; \
- t14 = t10 ^ t09; \
- t15 = a & t05; \
- t16 = t11 | t12; \
- y = t13 ^ t08; \
- x = t15 ^ t16; \
- w = ~ t14; \
- } while (0)
-
-#define SBOX4_INVERSE(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t04, t05, t06, t07, t09; \
- type t10, t11, t12, t13, t15, t01; \
- t01 = b | d ; \
- t02 = c | d ; \
- t03 = a & t01; \
- t04 = b ^ t02; \
- t05 = c ^ d ; \
- t06 = ~ t03; \
- t07 = a & t04; \
- x = t05 ^ t07; \
- t09 = x | t06; \
- t10 = a ^ t07; \
- t11 = t01 ^ t09; \
- t12 = d ^ t04; \
- t13 = c | t10; \
- z = t03 ^ t12; \
- t15 = a ^ t04; \
- y = t11 ^ t13; \
- w = t15 ^ t09; \
- } while (0)
-
-#define SBOX5(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t04, t05, t07, t08, t09; \
- type t10, t11, t12, t13, t14, t01; \
- t01 = b ^ d ; \
- t02 = b | d ; \
- t03 = a & t01; \
- t04 = c ^ t02; \
- t05 = t03 ^ t04; \
- w = ~ t05; \
- t07 = a ^ t01; \
- t08 = d | w ; \
- t09 = b | t05; \
- t10 = d ^ t08; \
- t11 = b | t07; \
- t12 = t03 | w ; \
- t13 = t07 | t10; \
- t14 = t01 ^ t11; \
- y = t09 ^ t13; \
- x = t07 ^ t08; \
- z = t12 ^ t14; \
- } while (0)
-
-#define SBOX5_INVERSE(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t04, t05, t07, t08, t09; \
- type t10, t12, t13, t15, t16, t01; \
- t01 = a & d ; \
- t02 = c ^ t01; \
- t03 = a ^ d ; \
- t04 = b & t02; \
- t05 = a & c ; \
- w = t03 ^ t04; \
- t07 = a & w ; \
- t08 = t01 ^ w ; \
- t09 = b | t05; \
- t10 = ~ b ; \
- x = t08 ^ t09; \
- t12 = t10 | t07; \
- t13 = w | x ; \
- z = t02 ^ t12; \
- t15 = t02 ^ t13; \
- t16 = b ^ d ; \
- y = t16 ^ t15; \
- } while (0)
-
-#define SBOX6(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t04, t05, t07, t08, t09, t10; \
- type t11, t12, t13, t15, t17, t18, t01; \
- t01 = a & d ; \
- t02 = b ^ c ; \
- t03 = a ^ d ; \
- t04 = t01 ^ t02; \
- t05 = b | c ; \
- x = ~ t04; \
- t07 = t03 & t05; \
- t08 = b & x ; \
- t09 = a | c ; \
- t10 = t07 ^ t08; \
- t11 = b | d ; \
- t12 = c ^ t11; \
- t13 = t09 ^ t10; \
- y = ~ t13; \
- t15 = x & t03; \
- z = t12 ^ t07; \
- t17 = a ^ b ; \
- t18 = y ^ t15; \
- w = t17 ^ t18; \
- } while (0)
-
-#define SBOX6_INVERSE(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t04, t05, t06, t07, t08, t09; \
- type t12, t13, t14, t15, t16, t17, t01; \
- t01 = a ^ c ; \
- t02 = ~ c ; \
- t03 = b & t01; \
- t04 = b | t02; \
- t05 = d | t03; \
- t06 = b ^ d ; \
- t07 = a & t04; \
- t08 = a | t02; \
- t09 = t07 ^ t05; \
- x = t06 ^ t08; \
- w = ~ t09; \
- t12 = b & w ; \
- t13 = t01 & t05; \
- t14 = t01 ^ t12; \
- t15 = t07 ^ t13; \
- t16 = d | t02; \
- t17 = a ^ x ; \
- z = t17 ^ t15; \
- y = t16 ^ t14; \
- } while (0)
-
-#define SBOX7(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t04, t05, t06, t08, t09, t10; \
- type t11, t13, t14, t15, t16, t17, t01; \
- t01 = a & c ; \
- t02 = ~ d ; \
- t03 = a & t02; \
- t04 = b | t01; \
- t05 = a & b ; \
- t06 = c ^ t04; \
- z = t03 ^ t06; \
- t08 = c | z ; \
- t09 = d | t05; \
- t10 = a ^ t08; \
- t11 = t04 & z ; \
- x = t09 ^ t10; \
- t13 = b ^ x ; \
- t14 = t01 ^ x ; \
- t15 = c ^ t05; \
- t16 = t11 | t13; \
- t17 = t02 | t14; \
- w = t15 ^ t17; \
- y = a ^ t16; \
- } while (0)
-
-#define SBOX7_INVERSE(type, a, b, c, d, w, x, y, z) \
- do { \
- type t02, t03, t04, t06, t07, t08, t09; \
- type t10, t11, t13, t14, t15, t16, t01; \
- t01 = a & b ; \
- t02 = a | b ; \
- t03 = c | t01; \
- t04 = d & t02; \
- z = t03 ^ t04; \
- t06 = b ^ t04; \
- t07 = d ^ z ; \
- t08 = ~ t07; \
- t09 = t06 | t08; \
- t10 = b ^ d ; \
- t11 = a | d ; \
- x = a ^ t09; \
- t13 = c ^ t06; \
- t14 = c & t11; \
- t15 = d | x ; \
- t16 = t01 | t10; \
- w = t13 ^ t15; \
- y = t14 ^ t16; \
- } while (0)
-
-/* In-place linear transformation. */
-#define LINEAR_TRANSFORMATION(x0,x1,x2,x3) \
- do { \
- x0 = ROL32 (x0, 13); \
- x2 = ROL32 (x2, 3); \
- x1 = x1 ^ x0 ^ x2; \
- x3 = x3 ^ x2 ^ (x0 << 3); \
- x1 = ROL32 (x1, 1); \
- x3 = ROL32 (x3, 7); \
- x0 = x0 ^ x1 ^ x3; \
- x2 = x2 ^ x3 ^ (x1 << 7); \
- x0 = ROL32 (x0, 5); \
- x2 = ROL32 (x2, 22); \
- } while (0)
-
-/* In-place inverse linear transformation. */
-#define LINEAR_TRANSFORMATION_INVERSE(x0,x1,x2,x3) \
- do { \
- x2 = ROL32 (x2, 10); \
- x0 = ROL32 (x0, 27); \
- x2 = x2 ^ x3 ^ (x1 << 7); \
- x0 = x0 ^ x1 ^ x3; \
- x3 = ROL32 (x3, 25); \
- x1 = ROL32 (x1, 31); \
- x3 = x3 ^ x2 ^ (x0 << 3); \
- x1 = x1 ^ x0 ^ x2; \
- x2 = ROL32 (x2, 29); \
- x0 = ROL32 (x0, 19); \
- } while (0)
-
-#define KEYXOR(x0,x1,x2,x3, subkey) \
- do { \
- (x0) ^= (subkey)[0]; \
- (x1) ^= (subkey)[1]; \
- (x2) ^= (subkey)[2]; \
- (x3) ^= (subkey)[3]; \
- } while (0)
-
-/* Round inputs are x0,x1,x2,x3 (destroyed), and round outputs are
- y0,y1,y2,y3. */
-#define ROUND(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
- do { \
- KEYXOR(x0,x1,x2,x3, subkey); \
- SBOX##which(uint32_t, x0,x1,x2,x3, y0,y1,y2,y3); \
- LINEAR_TRANSFORMATION(y0,y1,y2,y3); \
- } while (0)
-
-/* Round inputs are x0,x1,x2,x3 (destroyed), and round outputs are
- y0,y1,y2,y3. */
-#define ROUND_INVERSE(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
- do { \
- LINEAR_TRANSFORMATION_INVERSE (x0,x1,x2,x3); \
- SBOX##which##_INVERSE(uint32_t, x0,x1,x2,x3, y0,y1,y2,y3); \
- KEYXOR(y0,y1,y2,y3, subkey); \
- } while (0)
-
-#if HAVE_NATIVE_64_BIT
-/* Operate independently on both halves of a 64-bit word. */
-#define ROL64(x,n) \
- (((x) << (n) & ~(((1L << (n))-1) << 32)) \
- |(((x) >> (32-(n))) & ~(((1L << (32-(n)))-1) << (n))))
-
-#define KEYXOR64(x0,x1,x2,x3, subkey) \
- do { \
- uint64_t _sk; \
- _sk = (subkey)[0]; _sk |= _sk << 32; (x0) ^= _sk; \
- _sk = (subkey)[1]; _sk |= _sk << 32; (x1) ^= _sk; \
- _sk = (subkey)[2]; _sk |= _sk << 32; (x2) ^= _sk; \
- _sk = (subkey)[3]; _sk |= _sk << 32; (x3) ^= _sk; \
- } while (0)
-
-#define RSHIFT64(x,n) \
- ( ((x) << (n)) & ~(((1L << n) - 1) << 32))
-
-#define LINEAR_TRANSFORMATION64(x0,x1,x2,x3) \
- do { \
- x0 = ROL64 (x0, 13); \
- x2 = ROL64 (x2, 3); \
- x1 = x1 ^ x0 ^ x2; \
- x3 = x3 ^ x2 ^ RSHIFT64(x0, 3); \
- x1 = ROL64 (x1, 1); \
- x3 = ROL64 (x3, 7); \
- x0 = x0 ^ x1 ^ x3; \
- x2 = x2 ^ x3 ^ RSHIFT64(x1, 7); \
- x0 = ROL64 (x0, 5); \
- x2 = ROL64 (x2, 22); \
- } while (0)
-
-/* In-place inverse linear transformation. */
-#define LINEAR_TRANSFORMATION64_INVERSE(x0,x1,x2,x3) \
- do { \
- x2 = ROL64 (x2, 10); \
- x0 = ROL64 (x0, 27); \
- x2 = x2 ^ x3 ^ RSHIFT64(x1, 7); \
- x0 = x0 ^ x1 ^ x3; \
- x3 = ROL64 (x3, 25); \
- x1 = ROL64 (x1, 31); \
- x3 = x3 ^ x2 ^ RSHIFT64(x0, 3); \
- x1 = x1 ^ x0 ^ x2; \
- x2 = ROL64 (x2, 29); \
- x0 = ROL64 (x0, 19); \
- } while (0)
-
-#define ROUND64(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
- do { \
- KEYXOR64(x0,x1,x2,x3, subkey); \
- SBOX##which(uint64_t, x0,x1,x2,x3, y0,y1,y2,y3); \
- LINEAR_TRANSFORMATION64(y0,y1,y2,y3); \
- } while (0)
-
-#define ROUND64_INVERSE(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
- do { \
- LINEAR_TRANSFORMATION64_INVERSE (x0,x1,x2,x3); \
- SBOX##which##_INVERSE(uint64_t, x0,x1,x2,x3, y0,y1,y2,y3); \
- KEYXOR64(y0,y1,y2,y3, subkey); \
- } while (0)
-
-#endif
-
-/* Key schedule */
-/* Note: Increments k */
-#define KS_RECURRENCE(w, i, k) \
- do { \
- uint32_t _wn = (w)[(i)] ^ (w)[((i)+3)&7] ^ w[((i)+5)&7] \
- ^ w[((i)+7)&7] ^ PHI ^ (k)++; \
- ((w)[(i)] = ROL32(_wn, 11)); \
- } while (0)
-
-/* Note: Increments k four times and keys once */
-#define KS(keys, s, w, i, k) \
- do { \
- KS_RECURRENCE(w, (i), (k)); \
- KS_RECURRENCE(w, (i)+1, (k)); \
- KS_RECURRENCE(w, (i)+2, (k)); \
- KS_RECURRENCE(w, (i)+3, (k)); \
- SBOX##s(uint32_t, w[(i)],w[(i)+1],w[(i)+2],w[(i)+3], \
- (*keys)[0],(*keys)[1],(*keys)[2],(*keys)[3]); \
- (keys)++; \
- } while (0)
-
-/* Pad user key and convert to an array of 8 uint32_t. */
-static void
-serpent_key_pad (const uint8_t *key, unsigned int key_length,
- uint32_t *w)
-{
- unsigned int i;
-
- assert (key_length <= SERPENT_MAX_KEY_SIZE);
-
- for (i = 0; key_length >= 4; key_length -=4, key += 4)
- w[i++] = LE_READ_UINT32(key);
-
- if (i < 8)
- {
- /* Key must be padded according to the Serpent specification.
- "aabbcc" -> "aabbcc0100...00" -> 0x01ccbbaa. */
- uint32_t pad = 0x01;
-
- while (key_length > 0)
- pad = pad << 8 | key[--key_length];
-
- w[i++] = pad;
-
- while (i < 8)
- w[i++] = 0;
- }
-}
-
-/* Initialize CONTEXT with the key KEY of KEY_LENGTH bits. */
-void
-serpent_set_key (struct serpent_ctx *ctx,
- unsigned length, const uint8_t * key)
-{
- uint32_t w[8];
- uint32_t (*keys)[4];
- unsigned k;
-
- serpent_key_pad (key, length, w);
-
- /* Derive the 33 subkeys from KEY and store them in SUBKEYS. We do
- the recurrence in the key schedule using W as a circular buffer
- of just 8 uint32_t. */
-
- /* FIXME: Would be better to invoke SBOX with scalar variables as
- arguments, no arrays. To do that, unpack w into separate
- variables, use temporary variables as the SBOX destination. */
-
- keys = ctx->keys;
- k = 0;
- for (;;)
- {
- KS(keys, 3, w, 0, k);
- if (k == 132)
- break;
- KS(keys, 2, w, 4, k);
- KS(keys, 1, w, 0, k);
- KS(keys, 0, w, 4, k);
- KS(keys, 7, w, 0, k);
- KS(keys, 6, w, 4, k);
- KS(keys, 5, w, 0, k);
- KS(keys, 4, w, 4, k);
- }
- assert (keys == ctx->keys + 33);
-}
-
-void
-serpent_encrypt (const struct serpent_ctx *ctx,
- unsigned length, uint8_t * dst, const uint8_t * src)
-{
- assert( !(length % SERPENT_BLOCK_SIZE));
-
-#if HAVE_NATIVE_64_BIT
- if (length & SERPENT_BLOCK_SIZE)
-#else
- while (length >= SERPENT_BLOCK_SIZE)
-#endif
- {
- uint32_t x0,x1,x2,x3, y0,y1,y2,y3;
- unsigned k;
-
- x0 = LE_READ_UINT32 (src);
- x1 = LE_READ_UINT32 (src + 4);
- x2 = LE_READ_UINT32 (src + 8);
- x3 = LE_READ_UINT32 (src + 12);
-
- for (k = 0; ; k += 8)
- {
- ROUND (0, ctx->keys[k+0], x0,x1,x2,x3, y0,y1,y2,y3);
- ROUND (1, ctx->keys[k+1], y0,y1,y2,y3, x0,x1,x2,x3);
- ROUND (2, ctx->keys[k+2], x0,x1,x2,x3, y0,y1,y2,y3);
- ROUND (3, ctx->keys[k+3], y0,y1,y2,y3, x0,x1,x2,x3);
- ROUND (4, ctx->keys[k+4], x0,x1,x2,x3, y0,y1,y2,y3);
- ROUND (5, ctx->keys[k+5], y0,y1,y2,y3, x0,x1,x2,x3);
- ROUND (6, ctx->keys[k+6], x0,x1,x2,x3, y0,y1,y2,y3);
- if (k == 24)
- break;
- ROUND (7, ctx->keys[k+7], y0,y1,y2,y3, x0,x1,x2,x3);
- }
-
- /* Special final round, using two subkeys. */
- KEYXOR (y0,y1,y2,y3, ctx->keys[31]);
- SBOX7 (uint32_t, y0,y1,y2,y3, x0,x1,x2,x3);
- KEYXOR (x0,x1,x2,x3, ctx->keys[32]);
-
- LE_WRITE_UINT32 (dst, x0);
- LE_WRITE_UINT32 (dst + 4, x1);
- LE_WRITE_UINT32 (dst + 8, x2);
- LE_WRITE_UINT32 (dst + 12, x3);
-
- src += SERPENT_BLOCK_SIZE;
- dst += SERPENT_BLOCK_SIZE;
- length -= SERPENT_BLOCK_SIZE;
- }
-#if HAVE_NATIVE_64_BIT
- FOR_BLOCKS(length, dst, src, 2*SERPENT_BLOCK_SIZE)
- {
- uint64_t x0,x1,x2,x3, y0,y1,y2,y3;
- unsigned k;
-
- x0 = LE_READ_UINT32 (src);
- x1 = LE_READ_UINT32 (src + 4);
- x2 = LE_READ_UINT32 (src + 8);
- x3 = LE_READ_UINT32 (src + 12);
-
- x0 <<= 32; x0 |= LE_READ_UINT32 (src + 16);
- x1 <<= 32; x1 |= LE_READ_UINT32 (src + 20);
- x2 <<= 32; x2 |= LE_READ_UINT32 (src + 24);
- x3 <<= 32; x3 |= LE_READ_UINT32 (src + 28);
-
- for (k = 0; ; k += 8)
- {
- ROUND64 (0, ctx->keys[k+0], x0,x1,x2,x3, y0,y1,y2,y3);
- ROUND64 (1, ctx->keys[k+1], y0,y1,y2,y3, x0,x1,x2,x3);
- ROUND64 (2, ctx->keys[k+2], x0,x1,x2,x3, y0,y1,y2,y3);
- ROUND64 (3, ctx->keys[k+3], y0,y1,y2,y3, x0,x1,x2,x3);
- ROUND64 (4, ctx->keys[k+4], x0,x1,x2,x3, y0,y1,y2,y3);
- ROUND64 (5, ctx->keys[k+5], y0,y1,y2,y3, x0,x1,x2,x3);
- ROUND64 (6, ctx->keys[k+6], x0,x1,x2,x3, y0,y1,y2,y3);
- if (k == 24)
- break;
- ROUND64 (7, ctx->keys[k+7], y0,y1,y2,y3, x0,x1,x2,x3);
- }
-
- /* Special final round, using two subkeys. */
- KEYXOR64 (y0,y1,y2,y3, ctx->keys[31]);
- SBOX7 (uint64_t, y0,y1,y2,y3, x0,x1,x2,x3);
- KEYXOR64 (x0,x1,x2,x3, ctx->keys[32]);
-
- LE_WRITE_UINT32 (dst + 16, x0);
- LE_WRITE_UINT32 (dst + 20, x1);
- LE_WRITE_UINT32 (dst + 24, x2);
- LE_WRITE_UINT32 (dst + 28, x3);
- x0 >>= 32; LE_WRITE_UINT32 (dst, x0);
- x1 >>= 32; LE_WRITE_UINT32 (dst + 4, x1);
- x2 >>= 32; LE_WRITE_UINT32 (dst + 8, x2);
- x3 >>= 32; LE_WRITE_UINT32 (dst + 12, x3);
- }
-#endif /* HAVE_NATIVE_64_BIT */
-}
-
-void
-serpent_decrypt (const struct serpent_ctx *ctx,
- unsigned length, uint8_t * dst, const uint8_t * src)
-{
- assert( !(length % SERPENT_BLOCK_SIZE));
-
-#if HAVE_NATIVE_64_BIT
- if (length & SERPENT_BLOCK_SIZE)
-#else
- while (length >= SERPENT_BLOCK_SIZE)
-#endif
- {
- uint32_t x0,x1,x2,x3, y0,y1,y2,y3;
- unsigned k;
-
- x0 = LE_READ_UINT32 (src);
- x1 = LE_READ_UINT32 (src + 4);
- x2 = LE_READ_UINT32 (src + 8);
- x3 = LE_READ_UINT32 (src + 12);
-
- /* Inverse of special round */
- KEYXOR (x0,x1,x2,x3, ctx->keys[32]);
- SBOX7_INVERSE (uint32_t, x0,x1,x2,x3, y0,y1,y2,y3);
- KEYXOR (y0,y1,y2,y3, ctx->keys[31]);
-
- k = 24;
- goto start32;
- while (k > 0)
- {
- k -= 8;
- ROUND_INVERSE (7, ctx->keys[k+7], x0,x1,x2,x3, y0,y1,y2,y3);
- start32:
- ROUND_INVERSE (6, ctx->keys[k+6], y0,y1,y2,y3, x0,x1,x2,x3);
- ROUND_INVERSE (5, ctx->keys[k+5], x0,x1,x2,x3, y0,y1,y2,y3);
- ROUND_INVERSE (4, ctx->keys[k+4], y0,y1,y2,y3, x0,x1,x2,x3);
- ROUND_INVERSE (3, ctx->keys[k+3], x0,x1,x2,x3, y0,y1,y2,y3);
- ROUND_INVERSE (2, ctx->keys[k+2], y0,y1,y2,y3, x0,x1,x2,x3);
- ROUND_INVERSE (1, ctx->keys[k+1], x0,x1,x2,x3, y0,y1,y2,y3);
- ROUND_INVERSE (0, ctx->keys[k], y0,y1,y2,y3, x0,x1,x2,x3);
- }
-
- LE_WRITE_UINT32 (dst, x0);
- LE_WRITE_UINT32 (dst + 4, x1);
- LE_WRITE_UINT32 (dst + 8, x2);
- LE_WRITE_UINT32 (dst + 12, x3);
-
- src += SERPENT_BLOCK_SIZE;
- dst += SERPENT_BLOCK_SIZE;
- length -= SERPENT_BLOCK_SIZE;
- }
-#if HAVE_NATIVE_64_BIT
- FOR_BLOCKS(length, dst, src, 2*SERPENT_BLOCK_SIZE)
- {
- uint64_t x0,x1,x2,x3, y0,y1,y2,y3;
- unsigned k;
-
- x0 = LE_READ_UINT32 (src);
- x1 = LE_READ_UINT32 (src + 4);
- x2 = LE_READ_UINT32 (src + 8);
- x3 = LE_READ_UINT32 (src + 12);
-
- x0 <<= 32; x0 |= LE_READ_UINT32 (src + 16);
- x1 <<= 32; x1 |= LE_READ_UINT32 (src + 20);
- x2 <<= 32; x2 |= LE_READ_UINT32 (src + 24);
- x3 <<= 32; x3 |= LE_READ_UINT32 (src + 28);
-
- /* Inverse of special round */
- KEYXOR64 (x0,x1,x2,x3, ctx->keys[32]);
- SBOX7_INVERSE (uint64_t, x0,x1,x2,x3, y0,y1,y2,y3);
- KEYXOR64 (y0,y1,y2,y3, ctx->keys[31]);
-
- k = 24;
- goto start64;
- while (k > 0)
- {
- k -= 8;
- ROUND64_INVERSE (7, ctx->keys[k+7], x0,x1,x2,x3, y0,y1,y2,y3);
- start64:
- ROUND64_INVERSE (6, ctx->keys[k+6], y0,y1,y2,y3, x0,x1,x2,x3);
- ROUND64_INVERSE (5, ctx->keys[k+5], x0,x1,x2,x3, y0,y1,y2,y3);
- ROUND64_INVERSE (4, ctx->keys[k+4], y0,y1,y2,y3, x0,x1,x2,x3);
- ROUND64_INVERSE (3, ctx->keys[k+3], x0,x1,x2,x3, y0,y1,y2,y3);
- ROUND64_INVERSE (2, ctx->keys[k+2], y0,y1,y2,y3, x0,x1,x2,x3);
- ROUND64_INVERSE (1, ctx->keys[k+1], x0,x1,x2,x3, y0,y1,y2,y3);
- ROUND64_INVERSE (0, ctx->keys[k], y0,y1,y2,y3, x0,x1,x2,x3);
- }
-
- LE_WRITE_UINT32 (dst + 16, x0);
- LE_WRITE_UINT32 (dst + 20, x1);
- LE_WRITE_UINT32 (dst + 24, x2);
- LE_WRITE_UINT32 (dst + 28, x3);
- x0 >>= 32; LE_WRITE_UINT32 (dst, x0);
- x1 >>= 32; LE_WRITE_UINT32 (dst + 4, x1);
- x2 >>= 32; LE_WRITE_UINT32 (dst + 8, x2);
- x3 >>= 32; LE_WRITE_UINT32 (dst + 12, x3);
- }
-#endif /* HAVE_NATIVE_64_BIT */
-}