summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2023-04-13 19:11:25 +0200
committerNiels Möller <nisse@lysator.liu.se>2023-04-13 19:11:25 +0200
commit5251c8fe842b2121b19e384f862d35ccd2053cc0 (patch)
tree27abcf710858501686a9ab81287464bd63d67eee
parent76ad2fa6fe68de4361aabb1a596fa3332050d5c4 (diff)
downloadnettle-5251c8fe842b2121b19e384f862d35ccd2053cc0.tar.gz
Rewrite of table-based ghash code, for side-channel silence.
-rw-r--r--ChangeLog23
-rw-r--r--Makefile.in3
-rw-r--r--examples/nettle-benchmark.c14
-rw-r--r--gcmdata.c84
-rw-r--r--ghash-set-key.c53
-rw-r--r--ghash-update.c83
-rw-r--r--testsuite/gcm-test.c15
-rw-r--r--x86_64/ghash-update.asm183
8 files changed, 144 insertions, 314 deletions
diff --git a/ChangeLog b/ChangeLog
index 67986932..41f814fe 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,26 @@
+2023-04-13 Niels Möller <nisse@lysator.liu.se>
+
+ * ghash-update.c (gcm_gf_mul): Rewrite to avoid side-channel
+ leakage. Now processes the message bits one at a time, using
+ tabulated values of the key premultiplied by appropriate powers of
+ x, so that the table is accessed in a fixed sequential order.
+ Performance penalty, on x86_64, is roughly 3 times.
+ (shift_table): Deleted table.
+ (gcm_gf_shift_8): Deleted function.
+ * ghash-set-key.c (_ghash_set_key): Rewrite table generation.
+ * gcmdata.c: Deleted.
+ * Makefile.in: Delete references to gcmdata.
+
+ * x86_64/ghash-update.asm: Rewritten, similar side-channel silent
+ method as the C implementation, with same table layout, but using
+ sse2 instructions.
+
+ * testsuite/gcm-test.c (test_ghash_internal): Add valgrind
+ annotations, to verify that the ghash implementation makes no
+ data-dependent branches or memory accesses.
+
+ * examples/nettle-benchmark.c (bench_ghash_update): New function.
+
2023-04-03 Niels Möller <nisse@lysator.liu.se>
From Mamone Tarsha:
diff --git a/Makefile.in b/Makefile.in
index 081337a8..2464e17e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -250,7 +250,7 @@ INSTALL_HEADERS = $(HEADERS) version.h @IF_MINI_GMP@ mini-gmp.h
SOURCES = $(nettle_SOURCES) $(hogweed_SOURCES) \
$(getopt_SOURCES) $(internal_SOURCES) \
$(OPT_SOURCES) \
- aesdata.c desdata.c twofishdata.c shadata.c gcmdata.c eccdata.c
+ aesdata.c desdata.c twofishdata.c shadata.c eccdata.c
# NOTE: This list must include all source files, with no duplicates,
# independently of which source files are included in the build.
@@ -683,7 +683,6 @@ clean-here:
desdata$(EXEEXT_FOR_BUILD) \
twofishdata$(EXEEXT_FOR_BUILD) \
shadata$(EXEEXT_FOR_BUILD) \
- gcmdata$(EXEEXT_FOR_BUILD) \
eccdata$(EXEEXT_FOR_BUILD) eccdata.stamp
-rm -rf .lib libnettle.stamp libhogweed.stamp
diff --git a/examples/nettle-benchmark.c b/examples/nettle-benchmark.c
index 613b1dcf..04c3e0f2 100644
--- a/examples/nettle-benchmark.c
+++ b/examples/nettle-benchmark.c
@@ -56,6 +56,7 @@
#include "des.h"
#include "eax.h"
#include "gcm.h"
+#include "ghash-internal.h"
#include "memxor.h"
#include "salsa20.h"
#include "salsa20-internal.h"
@@ -875,10 +876,22 @@ bench_sha3_permute(void)
TIME_CYCLES (t, sha3_permute (&state));
printf("sha3_permute: %.2f cycles (%.2f / round)\n", t, t / 24.0);
}
+static void
+bench_ghash_update(void)
+{
+ struct gcm_key key;
+ union nettle_block16 state;
+ const uint8_t data[160];
+ double t;
+
+ TIME_CYCLES (t, _ghash_update (&key, &state, 10, data));
+ printf("ghash_update: %.2f cycles / block\n", t / 10.0);
+}
#else
#define bench_sha1_compress()
#define bench_salsa20_core()
#define bench_sha3_permute()
+#define bench_ghash_update()
#endif
#if WITH_OPENSSL
@@ -986,6 +999,7 @@ main(int argc, char **argv)
bench_sha1_compress();
bench_salsa20_core();
bench_sha3_permute();
+ bench_ghash_update();
printf("\n");
header();
diff --git a/gcmdata.c b/gcmdata.c
deleted file mode 100644
index 2d57b46a..00000000
--- a/gcmdata.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/* gcmdata.c
-
- Galois counter mode, specified by NIST,
- http://csrc.nist.gov/publications/nistpubs/800-38D/SP-800-38D.pdf
-
- Generation of fixed multiplication tables.
-
- Copyright (C) 2011 Niels Möller
-
- This file is part of GNU Nettle.
-
- GNU Nettle is free software: you can redistribute it and/or
- modify it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
- or
-
- * the GNU General Public License as published by the Free
- Software Foundation; either version 2 of the License, or (at your
- option) any later version.
-
- or both in parallel, as here.
-
- GNU Nettle is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received copies of the GNU General Public License and
- the GNU Lesser General Public License along with this program. If
- not, see http://www.gnu.org/licenses/.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#define GHASH_POLYNOMIAL 0xE1
-
-
-/* When x is shifted out over the block edge, add multiples of the
- defining polynomial to eliminate each bit. */
-static unsigned
-reduce(unsigned x)
-{
- unsigned p = GHASH_POLYNOMIAL << 1;
- unsigned y = 0;
- for (; x; x >>= 1, p <<= 1)
- if (x & 1)
- y ^= p;
- return y;
-}
-
-int
-main(int argc, char **argv)
-{
- unsigned i;
- printf("4-bit table:\n");
-
- for (i = 0; i<16; i++)
- {
- unsigned x;
- if (i && !(i%8))
- printf("\n");
-
- x = reduce(i << 4);
- printf("W(%02x,%02x),", x >> 8, x & 0xff);
- }
- printf("\n\n");
- printf("8-bit table:\n");
- for (i = 0; i<256; i++)
- {
- unsigned x;
- if (i && !(i%8))
- printf("\n");
-
- x = reduce(i);
- printf("W(%02x,%02x),", x >> 8, x & 0xff);
- }
- printf("\n");
- return EXIT_SUCCESS;
-}
diff --git a/ghash-set-key.c b/ghash-set-key.c
index 0e91afcb..da1c90f0 100644
--- a/ghash-set-key.c
+++ b/ghash-set-key.c
@@ -51,25 +51,46 @@ _nettle_ghash_set_key_c (struct gcm_key *ctx, const union nettle_block16 *key);
#define _nettle_ghash_set_key _nettle_ghash_set_key_c
#endif
+#if GCM_TABLE_BITS < 7
+# error Unsupported table size.
+#endif
+
/* Implements a lookup table for processors without carryless-mul
instruction. */
void
_ghash_set_key (struct gcm_key *ctx, const union nettle_block16 *key)
{
- /* Middle element if GCM_TABLE_BITS > 0, otherwise the first
- element */
- unsigned i = (1<<GCM_TABLE_BITS)/2;
- block16_zero (&ctx->h[0]);
- ctx->h[i] = *key;
-
- /* Algorithm 3 from the gcm paper. First do powers of two, then do
- the rest by adding. */
- while (i /= 2)
- block16_mulx_ghash (&ctx->h[i], &ctx->h[2*i]);
- for (i = 2; i < 1<<GCM_TABLE_BITS; i *= 2)
- {
- unsigned j;
- for (j = 1; j < i; j++)
- block16_xor3 (&ctx->h[i+j], &ctx->h[i], &ctx->h[j]);
- }
+ /* Table elements hold the key, premultiplied by all needed powers
+ of x. Element ordering follows the order bits are processed in
+ _ghash_update, alternating u64[0] and u64[1] bits, starting from
+ the least significant end. In the gcm bit order, bits (left to
+ right) correspond to x powers (the numbers) like
+
+ |0...7|8...15|...|56...63|64...71|72...79|...|120...127|
+
+ where | indicates the byte boundaries. On little endian, these
+ bits are in u64 like
+
+ u64[0]: | 56...63 48...55 40...47 32...39 24...31 16...23 8...15 0...7|
+ u64[1]: |120...127 112...129 104...111 96...103 88...95 80...87 72...79 64...71|
+
+ With big-endian, we instead get
+
+ u64[0]: |0...63|
+ u64[1]: |64...127|
+ */
+#if WORDS_BIGENDIAN
+# define INDEX_PERMUTE 63
+#else
+# define INDEX_PERMUTE 7
+#endif
+ unsigned i;
+
+ block16_set (&ctx->h[2*INDEX_PERMUTE], key);
+ for (i = 1; i < 64; i++)
+ block16_mulx_ghash(&ctx->h[2*(i ^ INDEX_PERMUTE)], &ctx->h[2*((i-1) ^ INDEX_PERMUTE)]);
+
+ block16_mulx_ghash(&ctx->h[2*INDEX_PERMUTE + 1], &ctx->h[2*(63^INDEX_PERMUTE)]);
+ for (i = 1; i < 64; i++)
+ block16_mulx_ghash(&ctx->h[2*(i ^ INDEX_PERMUTE)+1], &ctx->h[2*((i-1) ^ INDEX_PERMUTE)+1]);
}
diff --git a/ghash-update.c b/ghash-update.c
index 6eb19d80..bdeaa38d 100644
--- a/ghash-update.c
+++ b/ghash-update.c
@@ -44,7 +44,7 @@
#include "ghash-internal.h"
#include "block-internal.h"
-#if GCM_TABLE_BITS != 8
+#if GCM_TABLE_BITS < 7
# error Unsupported table size.
#endif
@@ -54,83 +54,26 @@ const uint8_t *
_nettle_ghash_update_c (const struct gcm_key *ctx, union nettle_block16 *state,
size_t blocks, const uint8_t *data);
#define _nettle_ghash_update _nettle_ghash_update_c
-
-#endif
-#if WORDS_BIGENDIAN
-# define W(left,right) (0x##left##right)
-#else
-# define W(left,right) (0x##right##left)
#endif
-static const uint16_t
-shift_table[0x100] = {
- W(00,00),W(01,c2),W(03,84),W(02,46),W(07,08),W(06,ca),W(04,8c),W(05,4e),
- W(0e,10),W(0f,d2),W(0d,94),W(0c,56),W(09,18),W(08,da),W(0a,9c),W(0b,5e),
- W(1c,20),W(1d,e2),W(1f,a4),W(1e,66),W(1b,28),W(1a,ea),W(18,ac),W(19,6e),
- W(12,30),W(13,f2),W(11,b4),W(10,76),W(15,38),W(14,fa),W(16,bc),W(17,7e),
- W(38,40),W(39,82),W(3b,c4),W(3a,06),W(3f,48),W(3e,8a),W(3c,cc),W(3d,0e),
- W(36,50),W(37,92),W(35,d4),W(34,16),W(31,58),W(30,9a),W(32,dc),W(33,1e),
- W(24,60),W(25,a2),W(27,e4),W(26,26),W(23,68),W(22,aa),W(20,ec),W(21,2e),
- W(2a,70),W(2b,b2),W(29,f4),W(28,36),W(2d,78),W(2c,ba),W(2e,fc),W(2f,3e),
- W(70,80),W(71,42),W(73,04),W(72,c6),W(77,88),W(76,4a),W(74,0c),W(75,ce),
- W(7e,90),W(7f,52),W(7d,14),W(7c,d6),W(79,98),W(78,5a),W(7a,1c),W(7b,de),
- W(6c,a0),W(6d,62),W(6f,24),W(6e,e6),W(6b,a8),W(6a,6a),W(68,2c),W(69,ee),
- W(62,b0),W(63,72),W(61,34),W(60,f6),W(65,b8),W(64,7a),W(66,3c),W(67,fe),
- W(48,c0),W(49,02),W(4b,44),W(4a,86),W(4f,c8),W(4e,0a),W(4c,4c),W(4d,8e),
- W(46,d0),W(47,12),W(45,54),W(44,96),W(41,d8),W(40,1a),W(42,5c),W(43,9e),
- W(54,e0),W(55,22),W(57,64),W(56,a6),W(53,e8),W(52,2a),W(50,6c),W(51,ae),
- W(5a,f0),W(5b,32),W(59,74),W(58,b6),W(5d,f8),W(5c,3a),W(5e,7c),W(5f,be),
- W(e1,00),W(e0,c2),W(e2,84),W(e3,46),W(e6,08),W(e7,ca),W(e5,8c),W(e4,4e),
- W(ef,10),W(ee,d2),W(ec,94),W(ed,56),W(e8,18),W(e9,da),W(eb,9c),W(ea,5e),
- W(fd,20),W(fc,e2),W(fe,a4),W(ff,66),W(fa,28),W(fb,ea),W(f9,ac),W(f8,6e),
- W(f3,30),W(f2,f2),W(f0,b4),W(f1,76),W(f4,38),W(f5,fa),W(f7,bc),W(f6,7e),
- W(d9,40),W(d8,82),W(da,c4),W(db,06),W(de,48),W(df,8a),W(dd,cc),W(dc,0e),
- W(d7,50),W(d6,92),W(d4,d4),W(d5,16),W(d0,58),W(d1,9a),W(d3,dc),W(d2,1e),
- W(c5,60),W(c4,a2),W(c6,e4),W(c7,26),W(c2,68),W(c3,aa),W(c1,ec),W(c0,2e),
- W(cb,70),W(ca,b2),W(c8,f4),W(c9,36),W(cc,78),W(cd,ba),W(cf,fc),W(ce,3e),
- W(91,80),W(90,42),W(92,04),W(93,c6),W(96,88),W(97,4a),W(95,0c),W(94,ce),
- W(9f,90),W(9e,52),W(9c,14),W(9d,d6),W(98,98),W(99,5a),W(9b,1c),W(9a,de),
- W(8d,a0),W(8c,62),W(8e,24),W(8f,e6),W(8a,a8),W(8b,6a),W(89,2c),W(88,ee),
- W(83,b0),W(82,72),W(80,34),W(81,f6),W(84,b8),W(85,7a),W(87,3c),W(86,fe),
- W(a9,c0),W(a8,02),W(aa,44),W(ab,86),W(ae,c8),W(af,0a),W(ad,4c),W(ac,8e),
- W(a7,d0),W(a6,12),W(a4,54),W(a5,96),W(a0,d8),W(a1,1a),W(a3,5c),W(a2,9e),
- W(b5,e0),W(b4,22),W(b6,64),W(b7,a6),W(b2,e8),W(b3,2a),W(b1,6c),W(b0,ae),
- W(bb,f0),W(ba,32),W(b8,74),W(b9,b6),W(bc,f8),W(bd,3a),W(bf,7c),W(be,be),
-};
-#undef W
-
-static void
-gcm_gf_shift_8(union nettle_block16 *x)
-{
- uint64_t reduce;
-
- /* Shift uses big-endian representation. */
-#if WORDS_BIGENDIAN
- reduce = shift_table[x->u64[1] & 0xff];
- x->u64[1] = (x->u64[1] >> 8) | ((x->u64[0] & 0xff) << 56);
- x->u64[0] = (x->u64[0] >> 8) ^ (reduce << 48);
-#else /* ! WORDS_BIGENDIAN */
- reduce = shift_table[(x->u64[1] >> 56) & 0xff];
- x->u64[1] = (x->u64[1] << 8) | (x->u64[0] >> 56);
- x->u64[0] = (x->u64[0] << 8) ^ reduce;
-#endif /* ! WORDS_BIGENDIAN */
-}
-
static void
gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *table)
{
- union nettle_block16 Z;
+ uint64_t x0 = x->u64[0];
+ uint64_t x1 = x->u64[1];
+ uint64_t r0 = 0;
+ uint64_t r1 = 0;
unsigned i;
-
- Z = table[x->b[GCM_BLOCK_SIZE-1]];
-
- for (i = GCM_BLOCK_SIZE-2; i > 0; i--)
+ for (i = 0; i < 64; i++, x0 >>= 1, x1 >>= 1)
{
- gcm_gf_shift_8(&Z);
- block16_xor(&Z, &table[x->b[i]]);
+ uint64_t m0 = -(x0 & 1);
+ uint64_t m1 = -(x1 & 1);
+ r0 ^= m0 & table[2*i].u64[0];
+ r1 ^= m0 & table[2*i].u64[1];
+ r0 ^= m1 & table[2*i+1].u64[0];
+ r1 ^= m1 & table[2*i+1].u64[1];
}
- gcm_gf_shift_8(&Z);
- block16_xor3(x, &Z, &table[x->b[0]]);
+ x->u64[0] = r0; x->u64[1] = r1;
}
const uint8_t *
diff --git a/testsuite/gcm-test.c b/testsuite/gcm-test.c
index d70cdd1e..bc555d60 100644
--- a/testsuite/gcm-test.c
+++ b/testsuite/gcm-test.c
@@ -6,6 +6,13 @@
#include "gcm.h"
#include "ghash-internal.h"
+#if HAVE_VALGRIND_MEMCHECK_H
+# include <valgrind/memcheck.h>
+#else
+# define VALGRIND_MAKE_MEM_UNDEFINED(p, n)
+# define VALGRIND_MAKE_MEM_DEFINED(p, n)
+#endif
+
static void
test_gcm_hash (const struct tstring *msg, const struct tstring *ref)
{
@@ -42,11 +49,19 @@ test_ghash_internal (const struct tstring *key,
struct gcm_key gcm_key;
union nettle_block16 state;
+ /* Use VALGRIND_MAKE_MEM_DEFINED to mark inputs as "undefined", to
+ get valgrind to warn about any branches or memory accesses
+ depending on secret data. */
memcpy (state.b, key->data, GCM_BLOCK_SIZE);
+ VALGRIND_MAKE_MEM_UNDEFINED (&state, sizeof(state));
_ghash_set_key (&gcm_key, &state);
memcpy (state.b, iv->data, GCM_BLOCK_SIZE);
+ VALGRIND_MAKE_MEM_UNDEFINED (&state, sizeof(state));
+ VALGRIND_MAKE_MEM_UNDEFINED (message->data, message->length);
_ghash_update (&gcm_key, &state, message->length / GCM_BLOCK_SIZE, message->data);
+ VALGRIND_MAKE_MEM_DEFINED (&state, sizeof(state));
+ VALGRIND_MAKE_MEM_DEFINED (message->data, message->length);
if (!MEMEQ(GCM_BLOCK_SIZE, state.b, digest->data))
{
fprintf (stderr, "gcm_hash (internal) failed\n");
diff --git a/x86_64/ghash-update.asm b/x86_64/ghash-update.asm
index f3a66210..b3417e45 100644
--- a/x86_64/ghash-update.asm
+++ b/x86_64/ghash-update.asm
@@ -36,15 +36,14 @@ define(`KEY', `%rdi')
define(`XP', `%rsi')
define(`BLOCKS', `%rdx')
define(`SRC', `%rcx')
-define(`X0', `%rax')
-define(`X1', `%rbx')
-define(`CNT', `%ebp')
-define(`T0', `%r8')
-define(`T1', `%r9')
-define(`T2', `%r10')
-define(`Z0', `%r11')
-define(`Z1', `%r12')
-define(`SHIFT_TABLE', `%r13')
+define(`CNT', `%rax')
+define(`KEY32', `%r8')
+define(`X', `%xmm0')
+define(`R', `%xmm1')
+define(`M0', `%xmm2')
+define(`M1', `%xmm3')
+define(`M2', `%xmm4')
+define(`M3', `%xmm5')
.file "ghash-update.asm"
@@ -55,150 +54,50 @@ define(`SHIFT_TABLE', `%r13')
.text
ALIGN(16)
PROLOGUE(_nettle_ghash_update)
- W64_ENTRY(4, 0)
- push %rbx
- push %rbp
- push %r12
- push %r13
+ W64_ENTRY(4, 6)
sub $1, BLOCKS
- lea .Lshift_table(%rip), SHIFT_TABLE
- mov (XP), X0
- mov 8(XP), X1
+ movups (XP), X
jc .Ldone
-ALIGN(16)
-.Lblock_loop:
-
- xor (SRC), X0
- xor 8(SRC), X1
-
-.Lblock_mul:
- rol $8, X1
- movzbl LREG(X1), XREG(T1)
- shl $4, T1
- mov (KEY, T1), Z0
- mov 8(KEY, T1), Z1
-
- C shift Z1, Z0, transforming
- C +-----------------------+-----------------------+
- C |15 14 13 12 11 10 09 08|07 06 05 04 03 02 01 00|
- C +-----------------------+-----------------------+
- C into
- C +-----------------------+-----------------------+
- C |14 13 12 11 10 09 08 07|06 05 04 03 02 01 00 |
- C +-----------------------+-----------------+-----+
- C xor |T[15]|
- C +-----+
-
- mov $7, CNT
+ C Table offset corresponding to 32 bits.
+ lea 1024(KEY), KEY32
ALIGN(16)
-.Loop_X1:
- mov Z1, T1
- shr $56, T1
- shl $8, Z1
- mov Z0, T0
- shl $8, Z0
- shr $56, T0
- movzwl (SHIFT_TABLE, T1, 2), XREG(T1)
- xor T1, Z0
- rol $8, X1
- movzbl LREG(X1), XREG(T2)
- shl $4, T2
- xor (KEY, T2), Z0
- add T0, Z1
- xor 8(KEY, T2), Z1
- decl CNT
- jne .Loop_X1
-
- mov $7, CNT
-
+.Lblock_loop:
+ C Unaligned input
+ movups (SRC), M0
+ pxor M0, X
+ pxor R, R
+ mov $992, CNT
ALIGN(16)
-.Loop_X0:
- mov Z1, T1
- shr $56, T1
- shl $8, Z1
- mov Z0, T0
- shl $8, Z0
- shr $56, T0
- movzwl (SHIFT_TABLE, T1, 2), XREG(T1)
- xor T1, Z0
- rol $8, X0
- movzbl LREG(X0), XREG(T2)
- shl $4, T2
- xor (KEY, T2), Z0
- add T0, Z1
- xor 8(KEY, T2), Z1
- decl CNT
- jne .Loop_X0
-
- mov Z1, T1
- shr $56, T1
- shl $8, Z1
- mov Z0, T0
- shl $8, Z0
- shr $56, T0
- movzwl (SHIFT_TABLE, T1, 2), XREG(T1)
- xor T1, Z0
- rol $8, X0
- movzbl LREG(X0), XREG(T2)
- shl $4, T2
- mov (KEY, T2), X0
- xor Z0, X0
- add T0, Z1
- mov 8(KEY, T2), X1
- xor Z1, X1
+.Loop_bit:
+ movdqa X, M3
+ psrad $31, M3
+ pshufd $0x00, M3, M0
+ pshufd $0x55, M3, M1
+ pshufd $0xaa, M3, M2
+ pshufd $0xff, M3, M3
+ pslld $1, X
+ pand (KEY, CNT), M0
+ pand (KEY32, CNT), M1
+ pand 16(KEY, CNT), M2
+ pand 16(KEY32, CNT), M3
+ pxor M0, M1
+ pxor M2, M3
+ pxor M1, R
+ pxor M3, R
+
+ sub $32, CNT
+ jnc .Loop_bit
+
+ movaps R, X
add $16, SRC
sub $1, BLOCKS
jnc .Lblock_loop
.Ldone:
- mov X0, (XP)
- mov X1, 8(XP)
+ movups X, (XP)
mov SRC, %rax
- pop %r13
- pop %r12
- pop %rbp
- pop %rbx
- W64_EXIT(4, 0)
+ W64_EXIT(4, 6)
ret
EPILOGUE(_nettle_ghash_update)
-
-define(`W', `0x$2$1')
- RODATA
- ALIGN(2)
-C NOTE: Sun/Oracle assembler doesn't support ".short".
-C Using ".value" seems more portable.
-.Lshift_table:
-.value W(00,00),W(01,c2),W(03,84),W(02,46),W(07,08),W(06,ca),W(04,8c),W(05,4e)
-.value W(0e,10),W(0f,d2),W(0d,94),W(0c,56),W(09,18),W(08,da),W(0a,9c),W(0b,5e)
-.value W(1c,20),W(1d,e2),W(1f,a4),W(1e,66),W(1b,28),W(1a,ea),W(18,ac),W(19,6e)
-.value W(12,30),W(13,f2),W(11,b4),W(10,76),W(15,38),W(14,fa),W(16,bc),W(17,7e)
-.value W(38,40),W(39,82),W(3b,c4),W(3a,06),W(3f,48),W(3e,8a),W(3c,cc),W(3d,0e)
-.value W(36,50),W(37,92),W(35,d4),W(34,16),W(31,58),W(30,9a),W(32,dc),W(33,1e)
-.value W(24,60),W(25,a2),W(27,e4),W(26,26),W(23,68),W(22,aa),W(20,ec),W(21,2e)
-.value W(2a,70),W(2b,b2),W(29,f4),W(28,36),W(2d,78),W(2c,ba),W(2e,fc),W(2f,3e)
-.value W(70,80),W(71,42),W(73,04),W(72,c6),W(77,88),W(76,4a),W(74,0c),W(75,ce)
-.value W(7e,90),W(7f,52),W(7d,14),W(7c,d6),W(79,98),W(78,5a),W(7a,1c),W(7b,de)
-.value W(6c,a0),W(6d,62),W(6f,24),W(6e,e6),W(6b,a8),W(6a,6a),W(68,2c),W(69,ee)
-.value W(62,b0),W(63,72),W(61,34),W(60,f6),W(65,b8),W(64,7a),W(66,3c),W(67,fe)
-.value W(48,c0),W(49,02),W(4b,44),W(4a,86),W(4f,c8),W(4e,0a),W(4c,4c),W(4d,8e)
-.value W(46,d0),W(47,12),W(45,54),W(44,96),W(41,d8),W(40,1a),W(42,5c),W(43,9e)
-.value W(54,e0),W(55,22),W(57,64),W(56,a6),W(53,e8),W(52,2a),W(50,6c),W(51,ae)
-.value W(5a,f0),W(5b,32),W(59,74),W(58,b6),W(5d,f8),W(5c,3a),W(5e,7c),W(5f,be)
-.value W(e1,00),W(e0,c2),W(e2,84),W(e3,46),W(e6,08),W(e7,ca),W(e5,8c),W(e4,4e)
-.value W(ef,10),W(ee,d2),W(ec,94),W(ed,56),W(e8,18),W(e9,da),W(eb,9c),W(ea,5e)
-.value W(fd,20),W(fc,e2),W(fe,a4),W(ff,66),W(fa,28),W(fb,ea),W(f9,ac),W(f8,6e)
-.value W(f3,30),W(f2,f2),W(f0,b4),W(f1,76),W(f4,38),W(f5,fa),W(f7,bc),W(f6,7e)
-.value W(d9,40),W(d8,82),W(da,c4),W(db,06),W(de,48),W(df,8a),W(dd,cc),W(dc,0e)
-.value W(d7,50),W(d6,92),W(d4,d4),W(d5,16),W(d0,58),W(d1,9a),W(d3,dc),W(d2,1e)
-.value W(c5,60),W(c4,a2),W(c6,e4),W(c7,26),W(c2,68),W(c3,aa),W(c1,ec),W(c0,2e)
-.value W(cb,70),W(ca,b2),W(c8,f4),W(c9,36),W(cc,78),W(cd,ba),W(cf,fc),W(ce,3e)
-.value W(91,80),W(90,42),W(92,04),W(93,c6),W(96,88),W(97,4a),W(95,0c),W(94,ce)
-.value W(9f,90),W(9e,52),W(9c,14),W(9d,d6),W(98,98),W(99,5a),W(9b,1c),W(9a,de)
-.value W(8d,a0),W(8c,62),W(8e,24),W(8f,e6),W(8a,a8),W(8b,6a),W(89,2c),W(88,ee)
-.value W(83,b0),W(82,72),W(80,34),W(81,f6),W(84,b8),W(85,7a),W(87,3c),W(86,fe)
-.value W(a9,c0),W(a8,02),W(aa,44),W(ab,86),W(ae,c8),W(af,0a),W(ad,4c),W(ac,8e)
-.value W(a7,d0),W(a6,12),W(a4,54),W(a5,96),W(a0,d8),W(a1,1a),W(a3,5c),W(a2,9e)
-.value W(b5,e0),W(b4,22),W(b6,64),W(b7,a6),W(b2,e8),W(b3,2a),W(b1,6c),W(b0,ae)
-.value W(bb,f0),W(ba,32),W(b8,74),W(b9,b6),W(bc,f8),W(bd,3a),W(bf,7c),W(be,be)