summaryrefslogtreecommitdiff
path: root/include/crypto/gf128mul.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 15:53:46 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 15:53:46 -0700
commit5a0387a8a8efb90ae7fea1e2e5c62de3efa74691 (patch)
tree9e5bbbafe7fea01c843d86c7c3d40f29f962c474 /include/crypto/gf128mul.h
parent204f144c9fcac355843412b6ba1150086488a208 (diff)
parent929562b144783b9212625305eadcbbd800809643 (diff)
downloadlinux-rt-5a0387a8a8efb90ae7fea1e2e5c62de3efa74691.tar.gz
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "Here is the crypto update for 4.12: API: - Add batch registration for acomp/scomp - Change acomp testing to non-unique compressed result - Extend algorithm name limit to 128 bytes - Require setkey before accept(2) in algif_aead Algorithms: - Add support for deflate rfc1950 (zlib) Drivers: - Add accelerated crct10dif for powerpc - Add crc32 in stm32 - Add sha384/sha512 in ccp - Add 3des/gcm(aes) for v5 devices in ccp - Add Queue Interface (QI) backend support in caam - Add new Exynos RNG driver - Add ThunderX ZIP driver - Add driver for hardware random generator on MT7623 SoC" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (101 commits) crypto: stm32 - Fix OF module alias information crypto: algif_aead - Require setkey before accept(2) crypto: scomp - add support for deflate rfc1950 (zlib) crypto: scomp - allow registration of multiple scomps crypto: ccp - Change ISR handler method for a v5 CCP crypto: ccp - Change ISR handler method for a v3 CCP crypto: crypto4xx - rename ce_ring_contol to ce_ring_control crypto: testmgr - Allow ecb(cipher_null) in FIPS mode Revert "crypto: arm64/sha - Add constant operand modifier to ASM_EXPORT" crypto: ccp - Disable interrupts early on unload crypto: ccp - Use only the relevant interrupt bits hwrng: mtk - Add driver for hardware random generator on MT7623 SoC dt-bindings: hwrng: Add Mediatek hardware random generator bindings crypto: crct10dif-vpmsum - Fix missing preempt_disable() crypto: testmgr - replace compression known answer test crypto: acomp - allow registration of multiple acomps hwrng: n2 - Use devm_kcalloc() in n2rng_probe() crypto: chcr - Fix error handling related to 'chcr_alloc_shash' padata: get_next is never NULL crypto: exynos - Add new Exynos RNG driver ...
Diffstat (limited to 'include/crypto/gf128mul.h')
-rw-r--r--include/crypto/gf128mul.h87
1 files changed, 70 insertions, 17 deletions
diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h
index 592d47e565a8..0977fb18ff68 100644
--- a/include/crypto/gf128mul.h
+++ b/include/crypto/gf128mul.h
@@ -43,12 +43,13 @@
---------------------------------------------------------------------------
Issue Date: 31/01/2006
- An implementation of field multiplication in Galois Field GF(128)
+ An implementation of field multiplication in Galois Field GF(2^128)
*/
#ifndef _CRYPTO_GF128MUL_H
#define _CRYPTO_GF128MUL_H
+#include <asm/byteorder.h>
#include <crypto/b128ops.h>
#include <linux/slab.h>
@@ -65,7 +66,7 @@
* are left and the lsb's are right. char b[16] is an array and b[0] is
* the first octet.
*
- * 80000000 00000000 00000000 00000000 .... 00000000 00000000 00000000
+ * 10000000 00000000 00000000 00000000 .... 00000000 00000000 00000000
* b[0] b[1] b[2] b[3] b[13] b[14] b[15]
*
* Every bit is a coefficient of some power of X. We can store the bits
@@ -85,15 +86,17 @@
* Both of the above formats are easy to implement on big-endian
* machines.
*
- * EME (which is patent encumbered) uses the ble format (bits are stored
- * in big endian order and the bytes in little endian). The above buffer
- * represents X^7 in this case and the primitive polynomial is b[0] = 0x87.
+ * XTS and EME (the latter of which is patent encumbered) use the ble
+ * format (bits are stored in big endian order and the bytes in little
+ * endian). The above buffer represents X^7 in this case and the
+ * primitive polynomial is b[0] = 0x87.
*
* The common machine word-size is smaller than 128 bits, so to make
* an efficient implementation we must split into machine word sizes.
- * This file uses one 32bit for the moment. Machine endianness comes into
- * play. The lle format in relation to machine endianness is discussed
- * below by the original author of gf128mul Dr Brian Gladman.
+ * This implementation uses 64-bit words for the moment. Machine
+ * endianness comes into play. The lle format in relation to machine
+ * endianness is discussed below by the original author of gf128mul Dr
+ * Brian Gladman.
*
* Let's look at the bbe and ble format on a little endian machine.
*
@@ -127,10 +130,10 @@
* machines this will automatically aligned to wordsize and on a 64-bit
* machine also.
*/
-/* Multiply a GF128 field element by x. Field elements are held in arrays
- of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower
- indexed bits placed in the more numerically significant bit positions
- within bytes.
+/* Multiply a GF(2^128) field element by x. Field elements are
+ held in arrays of bytes in which field bits 8n..8n + 7 are held in
+ byte[n], with lower indexed bits placed in the more numerically
+ significant bit positions within bytes.
On little endian machines the bit indexes translate into the bit
positions within four 32-bit words in the following way
@@ -161,8 +164,58 @@ void gf128mul_lle(be128 *a, const be128 *b);
void gf128mul_bbe(be128 *a, const be128 *b);
-/* multiply by x in ble format, needed by XTS */
-void gf128mul_x_ble(be128 *a, const be128 *b);
+/*
+ * The following functions multiply a field element by x in
+ * the polynomial field representation. They use 64-bit word operations
+ * to gain speed but compensate for machine endianness and hence work
+ * correctly on both styles of machine.
+ *
+ * They are defined here for performance.
+ */
+
+static inline u64 gf128mul_mask_from_bit(u64 x, int which)
+{
+ /* a constant-time version of 'x & ((u64)1 << which) ? (u64)-1 : 0' */
+ return ((s64)(x << (63 - which)) >> 63);
+}
+
+static inline void gf128mul_x_lle(be128 *r, const be128 *x)
+{
+ u64 a = be64_to_cpu(x->a);
+ u64 b = be64_to_cpu(x->b);
+
+ /* equivalent to gf128mul_table_le[(b << 7) & 0xff] << 48
+ * (see crypto/gf128mul.c): */
+ u64 _tt = gf128mul_mask_from_bit(b, 0) & ((u64)0xe1 << 56);
+
+ r->b = cpu_to_be64((b >> 1) | (a << 63));
+ r->a = cpu_to_be64((a >> 1) ^ _tt);
+}
+
+static inline void gf128mul_x_bbe(be128 *r, const be128 *x)
+{
+ u64 a = be64_to_cpu(x->a);
+ u64 b = be64_to_cpu(x->b);
+
+ /* equivalent to gf128mul_table_be[a >> 63] (see crypto/gf128mul.c): */
+ u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87;
+
+ r->a = cpu_to_be64((a << 1) | (b >> 63));
+ r->b = cpu_to_be64((b << 1) ^ _tt);
+}
+
+/* needed by XTS */
+static inline void gf128mul_x_ble(le128 *r, const le128 *x)
+{
+ u64 a = le64_to_cpu(x->a);
+ u64 b = le64_to_cpu(x->b);
+
+ /* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */
+ u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87;
+
+ r->a = cpu_to_le64((a << 1) | (b >> 63));
+ r->b = cpu_to_le64((b << 1) ^ _tt);
+}
/* 4k table optimization */
@@ -172,8 +225,8 @@ struct gf128mul_4k {
struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g);
struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g);
-void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t);
-void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t);
+void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t);
+void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t);
static inline void gf128mul_free_4k(struct gf128mul_4k *t)
{
@@ -194,6 +247,6 @@ struct gf128mul_64k {
*/
struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g);
void gf128mul_free_64k(struct gf128mul_64k *t);
-void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t);
+void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t);
#endif /* _CRYPTO_GF128MUL_H */