summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Plank <plank@cs.utk.edu>2013-12-31 20:08:18 -0500
committerJim Plank <plank@cs.utk.edu>2013-12-31 20:08:18 -0500
commitfb0bbdcf62a8cc82268207285a2f94808672dfdb (patch)
tree344dabc8a3b177852a24c830989e05a5370c3f7d
parent8900c0e635bc809f2667647eb71156b0732112d3 (diff)
downloadgf-complete-fb0bbdcf62a8cc82268207285a2f94808672dfdb.tar.gz
Fixed the problem with PCLMUL and gf_complete.h. Removed
ARCH_64 from everything but 128/GROUP/SSE. Fortunately, no one ever uses that.
-rw-r--r--include/gf_complete.h8
-rw-r--r--src/gf_w128.c21
-rw-r--r--src/gf_w16.c16
-rw-r--r--src/gf_w32.c16
-rw-r--r--src/gf_w4.c4
-rw-r--r--src/gf_w64.c14
-rw-r--r--src/gf_w8.c14
7 files changed, 45 insertions, 48 deletions
diff --git a/include/gf_complete.h b/include/gf_complete.h
index ef685f9..57b439e 100644
--- a/include/gf_complete.h
+++ b/include/gf_complete.h
@@ -24,14 +24,8 @@
#include <emmintrin.h>
#endif
-#ifdef INTEL_PCLMUL
+#ifdef INTEL_SSE4_PCLMUL
#include <wmmintrin.h>
- #ifdef INTEL_SSE4
- #define INTEL_SSE4_PCLMUL
- #endif
- #ifdef INTEL_SSSE3
- #define INTEL_SSSE3_PCLMUL
- #endif
#endif
diff --git a/src/gf_w128.c b/src/gf_w128.c
index fc08a3f..c888f44 100644
--- a/src/gf_w128.c
+++ b/src/gf_w128.c
@@ -91,7 +91,7 @@ int xor)
gf_val_128_t d128;
uint64_t c128[2];
gf_region_data rd;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a,b;
__m128i result0,result1;
__m128i prim_poly;
@@ -296,7 +296,7 @@ gf_w128_shift_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_12
void
gf_w128_clm_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a,b;
__m128i result0,result1;
@@ -382,7 +382,7 @@ gf_w128_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_
void
gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
-#if defined(INTEL_SSE4) && defined(ARCH_64)
+#if defined(INTEL_SSE4)
int i;
__m128i a, b, pp, one, prod, amask, l_middle_one, u_middle_one;
/*John: pmask is always the highest bit set, and the rest zeros. amask changes, it's a countdown.*/
@@ -440,7 +440,7 @@ gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_
void
gf_w128_sse_bytwo_b_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
-#if defined(INTEL_SSE4) && defined(ARCH_64)
+#if defined(INTEL_SSE4)
__m128i a, b, lmask, hmask, pp, c, middle_one;
gf_internal_t *h;
uint64_t topbit, middlebit;
@@ -987,7 +987,7 @@ void gf_w128_group_m_init(gf_t *gf, gf_val_128_t b128)
static
void gf_w128_group_m_sse_init(gf_t *gf, gf_val_128_t b128)
{
-#if defined(INTEL_SSE4) && defined(ARCH_64)
+#if defined(INTEL_SSE4)
int i, j;
int g_m;
uint64_t lbit, middlebit;
@@ -1277,7 +1277,7 @@ static
void
gf_w128_group_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
{
-#if defined(INTEL_SSE4) && defined(ARCH_64)
+#if defined(INTEL_SSE4)
int i;
int i_r, i_m, t_m;
int mask_m, mask_r, mask_s;
@@ -1706,7 +1706,7 @@ int gf_w128_composite_init(gf_t *gf)
static
int gf_w128_cfm_init(gf_t *gf)
{
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
gf->inverse.w128 = gf_w128_euclid;
gf->multiply.w128 = gf_w128_clm_multiply;
gf->multiply_region.w128 = gf_w128_clm_multiply_region_from_single;
@@ -1779,7 +1779,7 @@ void gf_w128_group_r_init(gf_t *gf)
static
void gf_w128_group_r_sse_init(gf_t *gf)
{
-#if defined(INTEL_SSE4) && defined(ARCH_64)
+#if defined(INTEL_SSE4)
int i, j;
int g_r;
uint64_t pp;
@@ -1814,7 +1814,7 @@ int gf_w128_split_init(gf_t *gf)
h = (gf_internal_t *) gf->scratch;
gf->multiply.w128 = gf_w128_bytwo_p_multiply;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
if (!(h->region_type & GF_REGION_NOSSE)){
gf->multiply.w128 = gf_w128_clm_multiply;
}
@@ -1880,6 +1880,9 @@ int gf_w128_group_init(gf_t *gf)
gf->inverse.w128 = gf_w128_euclid;
gf->multiply_region.w128 = gf_w128_group_multiply_region;
+ /* JSP: I've got a problem compiling here -- something about "vmovq", and
+ I don't have the time to chase it down right now. */
+
#if defined(INTEL_SSE4) && defined(ARCH_64)
if(!(scratch->region_type & GF_REGION_NOSSE))
{
diff --git a/src/gf_w16.c b/src/gf_w16.c
index 2d73034..454c6cc 100644
--- a/src/gf_w16.c
+++ b/src/gf_w16.c
@@ -133,7 +133,7 @@ gf_w16_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val
uint16_t *s16;
uint16_t *d16;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
__m128i prim_poly;
@@ -197,7 +197,7 @@ gf_w16_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val
uint16_t *s16;
uint16_t *d16;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -266,7 +266,7 @@ gf_w16_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val
uint16_t *s16;
uint16_t *d16;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -448,7 +448,7 @@ gf_w16_clm_multiply_2 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -495,7 +495,7 @@ gf_w16_clm_multiply_3 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -535,7 +535,7 @@ gf_w16_clm_multiply_4 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -611,7 +611,7 @@ int gf_w16_cfm_init(gf_t *gf)
/*Ben: Determining how many reductions to do */
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
if ((0xfe00 & h->prim_poly) == 0) {
gf->multiply.w32 = gf_w16_clm_multiply_2;
gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_2;
@@ -739,7 +739,7 @@ int gf_w16_log_init(gf_t *gf)
if (check) {
if (h->mult_type != GF_MULT_LOG_TABLE) {
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
return gf_w16_cfm_init(gf);
#endif
return gf_w16_shift_init(gf);
diff --git a/src/gf_w32.c b/src/gf_w32.c
index e2fb0f9..03f285f 100644
--- a/src/gf_w32.c
+++ b/src/gf_w32.c
@@ -125,7 +125,7 @@ void
gf_w32_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
int i;
uint32_t *s32;
@@ -175,7 +175,7 @@ void
gf_w32_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
int i;
uint32_t *s32;
@@ -229,7 +229,7 @@ static
void
gf_w32_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
int i;
uint32_t *s32;
uint32_t *d32;
@@ -409,7 +409,7 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -453,7 +453,7 @@ gf_w32_clm_multiply_3 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -492,7 +492,7 @@ gf_w32_clm_multiply_4 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -565,7 +565,7 @@ int gf_w32_cfm_init(gf_t *gf)
/*Ben: We also check to see if the prim poly will work for pclmul */
/*Ben: Check to see how many reduction steps it will take*/
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
if ((0xfffe0000 & h->prim_poly) == 0){
gf->multiply.w32 = gf_w32_clm_multiply_2;
gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_2;
@@ -2176,7 +2176,7 @@ int gf_w32_split_init(gf_t *gf)
int i, j, exp, ispclmul, issse3;
ispclmul = 0;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
ispclmul = 1;
#endif
diff --git a/src/gf_w4.c b/src/gf_w4.c
index 3e00cd2..2504ec6 100644
--- a/src/gf_w4.c
+++ b/src/gf_w4.c
@@ -182,7 +182,7 @@ gf_w4_clm_multiply (gf_t *gf, gf_val_32_t a4, gf_val_32_t b4)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -1967,7 +1967,7 @@ int gf_w4_cfm_init(gf_t *gf)
h = (gf_internal_t *) gf->scratch;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
gf->multiply.w32 = gf_w4_clm_multiply;
return 1;
#endif
diff --git a/src/gf_w64.c b/src/gf_w64.c
index b8baa8f..73bf164 100644
--- a/src/gf_w64.c
+++ b/src/gf_w64.c
@@ -96,7 +96,7 @@ xor)
gf_val_64_t *s64, *d64, *top;
gf_region_data rd;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result, r1;
__m128i prim_poly;
@@ -187,7 +187,7 @@ xor)
gf_val_64_t *s64, *d64, *top;
gf_region_data rd;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result, r1;
__m128i prim_poly;
@@ -385,7 +385,7 @@ gf_w64_clm_multiply_2 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
{
gf_val_64_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -427,7 +427,7 @@ gf_w64_clm_multiply_4 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
{
gf_val_64_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -466,7 +466,7 @@ gf_w64_clm_multiply_4 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
void
gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
{
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
gf_internal_t *h;
int i, j, k;
uint8_t *s8, *d8, *dtop;
@@ -759,7 +759,7 @@ int gf_w64_cfm_init(gf_t *gf)
gf->inverse.w64 = gf_w64_euclid;
gf->multiply_region.w64 = gf_w64_multiply_region_from_single;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
if ((0xfffffffe00000000ULL & h->prim_poly) == 0){
gf->multiply.w64 = gf_w64_clm_multiply_2;
gf->multiply_region.w64 = gf_w64_clm_multiply_region_from_single_2;
@@ -2030,7 +2030,7 @@ int gf_w64_split_init(gf_t *gf)
gf->multiply.w64 = gf_w64_bytwo_p_multiply;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
if ((!(h->region_type & GF_REGION_NOSSE) &&
(h->arg1 == 64 || h->arg2 == 64)) ||
h->mult_type == GF_MULT_DEFAULT){
diff --git a/src/gf_w8.c b/src/gf_w8.c
index da34968..7661aad 100644
--- a/src/gf_w8.c
+++ b/src/gf_w8.c
@@ -211,7 +211,7 @@ gf_w8_clm_multiply_2 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -257,7 +257,7 @@ gf_w8_clm_multiply_3 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -296,7 +296,7 @@ gf_w8_clm_multiply_4 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -373,7 +373,7 @@ gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_
uint8_t *s8;
uint8_t *d8;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -432,7 +432,7 @@ gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_
uint8_t *s8;
uint8_t *d8;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -495,7 +495,7 @@ gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_
uint8_t *s8;
uint8_t *d8;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@@ -592,7 +592,7 @@ int gf_w8_cfm_init(gf_t *gf)
h = (gf_internal_t *) gf->scratch;
-#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
+#if defined(INTEL_SSE4_PCLMUL)
if ((0xe0 & h->prim_poly) == 0){
gf->multiply.w32 = gf_w8_clm_multiply_2;
gf->multiply_region.w32 = gf_w8_clm_multiply_region_from_single_2;