summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Plank <plank@cs.utk.edu>2014-01-01 11:00:40 -0500
committerJim Plank <plank@cs.utk.edu>2014-01-01 11:00:40 -0500
commitf0c32c94bcc2cc5037cb714c75f8ec0ad2e1e2ef (patch)
treed4303855be08819a93dd10a2b0a7fd93521679db
parentfb0bbdcf62a8cc82268207285a2f94808672dfdb (diff)
downloadgf-complete-f0c32c94bcc2cc5037cb714c75f8ec0ad2e1e2ef.tar.gz
Removed GROUP/128/SSE. It wasn't compiling, and it needed an overhaul.
I'll do it someday when I'm bored.
-rw-r--r--include/gf_int.h1
-rw-r--r--src/gf.c2
-rw-r--r--src/gf_int.h1
-rw-r--r--src/gf_w128.c298
-rw-r--r--tools/gf_methods.c4
5 files changed, 3 insertions, 303 deletions
diff --git a/include/gf_int.h b/include/gf_int.h
index ab9281d..9221569 100644
--- a/include/gf_int.h
+++ b/include/gf_int.h
@@ -144,7 +144,6 @@ typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */
GF_E_GR_W_48, /* Mult == GROUP, w \in { 4, 8 } */
GF_E_GR_W_16, /* Mult == GROUP, w == 16, arg1 != 4 || arg2 != 4 */
GF_E_GR_128A, /* Mult == GROUP, w == 128, bad args */
- GF_E_GR_SSE4, /* Mult == GROUP, w == 128, No SSE4 */
GF_E_GR_A_27, /* Mult == GROUP, either arg > 27 */
GF_E_GR_AR_W, /* Mult == GROUP, either arg > w */
GF_E_GR____J, /* Mult == GROUP, Reg == SSE|ALTMAP|NOSSE */
diff --git a/src/gf.c b/src/gf.c
index 09a668e..a443f17 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -75,7 +75,6 @@ void gf_error()
case GF_E_GR_W_48: s = "With -m GROUP, w cannot be 4 or 8."; break;
case GF_E_GR_W_16: s = "With -m GROUP, w == 16, arg1 and arg2 must be 4."; break;
case GF_E_GR_128A: s = "With -m GROUP, w == 128, arg1 must be 4, and arg2 in { 4,8,16 }."; break;
- case GF_E_GR_SSE4: s = "With -m GROUP, w == 128, you need SSE4."; break;
case GF_E_GR_A_27: s = "With -m GROUP, arg1 and arg2 must be <= 27."; break;
case GF_E_GR_AR_W: s = "With -m GROUP, arg1 and arg2 must be <= w."; break;
case GF_E_GR____J: s = "Cannot use GROUP with -r ALTMAP|SSE|NOSSE."; break;
@@ -320,7 +319,6 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type,
if (w == 16 && (arg1 != 4 || arg2 != 4)) { _gf_errno = GF_E_GR_W_16; return 0; }
if (w == 128 && (arg1 != 4 ||
(arg2 != 4 && arg2 != 8 && arg2 != 16))) { _gf_errno = GF_E_GR_128A; return 0; }
- if (w == 128 && !sse4) { _gf_errno = GF_E_GR_SSE4; return 0; }
if (arg1 > 27 || arg2 > 27) { _gf_errno = GF_E_GR_A_27; return 0; }
if (arg1 > w || arg2 > w) { _gf_errno = GF_E_GR_AR_W; return 0; }
if (raltmap || rsse || rnosse) { _gf_errno = GF_E_GR____J; return 0; }
diff --git a/src/gf_int.h b/src/gf_int.h
index ab9281d..9221569 100644
--- a/src/gf_int.h
+++ b/src/gf_int.h
@@ -144,7 +144,6 @@ typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */
GF_E_GR_W_48, /* Mult == GROUP, w \in { 4, 8 } */
GF_E_GR_W_16, /* Mult == GROUP, w == 16, arg1 != 4 || arg2 != 4 */
GF_E_GR_128A, /* Mult == GROUP, w == 128, bad args */
- GF_E_GR_SSE4, /* Mult == GROUP, w == 128, No SSE4 */
GF_E_GR_A_27, /* Mult == GROUP, either arg > 27 */
GF_E_GR_AR_W, /* Mult == GROUP, either arg > w */
GF_E_GR____J, /* Mult == GROUP, Reg == SSE|ALTMAP|NOSSE */
diff --git a/src/gf_w128.c b/src/gf_w128.c
index c888f44..fae9f5c 100644
--- a/src/gf_w128.c
+++ b/src/gf_w128.c
@@ -984,45 +984,6 @@ void gf_w128_group_m_init(gf_t *gf, gf_val_128_t b128)
return;
}
-static
-void gf_w128_group_m_sse_init(gf_t *gf, gf_val_128_t b128)
-{
-#if defined(INTEL_SSE4)
- int i, j;
- int g_m;
- uint64_t lbit, middlebit;
- gf_internal_t *scratch;
- gf_group_tables_t *gt;
- scratch = (gf_internal_t *) gf->scratch;
- gt = scratch->private;
- g_m = scratch->arg1;
-
- __m128i *table = (__m128i *)(gt->m_table), b, a, ubit, prim_poly;
- prim_poly = _mm_insert_epi64(_mm_setzero_si128(), scratch->prim_poly, 0);
- b = _mm_loadu_si128((__m128i *)(b128));
-
- table[0] = _mm_setzero_si128();
- table[1] = table[0];
- table[1] = _mm_insert_epi64(table[1],b128[0],1);
- table[1] = _mm_insert_epi64(table[1],b128[1],0);
- lbit = 1;
- lbit <<= 63;
- ubit = _mm_set_epi32(0, 1, 0, 0);
- for (i = 2; i < (1 << g_m); i <<= 1) {
- a = table[(i >> 1)];
- middlebit = (_mm_extract_epi64(a, 0x0) & lbit);
- a = _mm_slli_epi64(a, 1);
- if (middlebit) a = _mm_xor_si128(a, ubit);
- table[i] = a;
- if (_mm_extract_epi64(table[i >> 1], 0x1) & lbit) table[i] = _mm_xor_si128(table[i], prim_poly);
- for (j = 0; j < i; j++) {
- table[i + j] = _mm_xor_si128(table[i], table[j]);
- }
- }
- return;
-#endif
-}
-
void
gf_w128_group_multiply(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
@@ -1095,88 +1056,6 @@ gf_w128_group_multiply(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_
c128[1] = p_i[1];
}
-void
-gf_w128_group_sse_multiply(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
-{
-#if defined(INTEL_SSE4) && defined(ARCH_64)
- int i,j;
- int i_r, i_m, t_m;
- int mask_m, mask_r, mask_s;
- int g_m, g_r;
- uint32_t shiftbits;
- uint64_t a[2], tbit = 1;
- tbit <<= 63;
- gf_internal_t *scratch;
- gf_group_tables_t *gt;
- __m128i p_i, *m_table, *r_table, zero;
-
- zero = _mm_setzero_si128();
- scratch = (gf_internal_t *) gf->scratch;
- gt = scratch->private;
- m_table = (__m128i *)(gt->m_table);
- r_table = (__m128i *)(gt->r_table);
- g_m = scratch->arg1;
- g_r = scratch->arg2;
-
- mask_m = (1 << g_m) - 1;
- mask_r = (1 << g_r) - 1;
- mask_s = mask_m << (32-g_m); /*sets g_m leftmost bits to 1*/
- if (b128[0] != _mm_extract_epi64(m_table[1], 1) || b128[1] != _mm_extract_epi64(m_table[1], 0)) {
- gf_w128_group_m_sse_init(gf, b128);
- }
-
- p_i = zero;
- a[0] = a128[0];
- a[1] = a128[1];
-
- t_m = 0;
- i_r = 0;
-
- /* Top 64 bits */
- for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) {
- i_m = (a[0] >> (i * g_m)) & mask_m;
- i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r;
-
- shiftbits = _mm_extract_epi32(p_i, 1) & mask_s;
- shiftbits >>= 32-g_m;
- p_i = _mm_slli_epi64(p_i, g_m);
- p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2));
-
- p_i = _mm_xor_si128(p_i, m_table[i_m]);
- t_m += g_m;
- if (t_m == g_r) {
- p_i = _mm_xor_si128(p_i, r_table[i_r]);
- t_m = 0;
- i_r = 0;
- } else {
- i_r <<= g_m;
- }
- }
-
- for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) {
- i_m = (a[1] >> (i * g_m)) & mask_m;
- i_r ^= (((uint64_t)_mm_extract_epi64(p_i,1)) >> (64 - g_m)) & mask_r;
-
- shiftbits = _mm_extract_epi32(p_i, 1) & mask_s;
- shiftbits >>= 32-g_m;
- p_i = _mm_slli_epi64(p_i, g_m);
- p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2));
-
- p_i = _mm_xor_si128(p_i, m_table[i_m]);
- t_m += g_m;
- if (t_m == g_r) {
- p_i = _mm_xor_si128(p_i, r_table[i_r]);
- t_m = 0;
- i_r = 0;
- } else {
- i_r <<= g_m;
- }
- }
- c128[0] = _mm_extract_epi64(p_i, 1);
- c128[1] = _mm_extract_epi64(p_i, 0);
-#endif
-}
-
static
void
gf_w128_group_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
@@ -1273,160 +1152,6 @@ gf_w128_group_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val,
}
}
-static
-void
-gf_w128_group_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
-{
-#if defined(INTEL_SSE4)
- int i;
- int i_r, i_m, t_m;
- int mask_m, mask_r, mask_s;
- int g_m, g_r;
- uint32_t shiftbits;
- uint64_t a[2];
- gf_internal_t *scratch;
- gf_group_tables_t *gt;
- gf_region_data rd;
- uint64_t *a128, *c128, *top;
- __m128i *m_table, *r_table, p_i, zero;
- zero = _mm_setzero_si128();
- /* We only do this to check on alignment. */
- gf_set_region_data(&rd, gf, src, dest, bytes, 0, xor, 8);
-
- if (val[0] == 0) {
- if (val[1] == 0) { gf_multby_zero(dest, bytes, xor); return; }
- if (val[1] == 1) { gf_multby_one(src, dest, bytes, xor); return; }
- }
-
- scratch = (gf_internal_t *) gf->scratch;
- gt = scratch->private;
- m_table = (__m128i *)(gt->m_table);
- r_table = (__m128i *)(gt->r_table);
- g_m = scratch->arg1;
- g_r = scratch->arg2;
-
- mask_m = (1 << g_m) - 1;
- mask_r = (1 << g_r) - 1;
- mask_s = mask_m << (32-g_m);
-
- if (val[0] != _mm_extract_epi64(m_table[1], 1) || val[1] != _mm_extract_epi64(m_table[1], 0)) {
- gf_w128_group_m_sse_init(gf, val);
- }
-
- a128 = (uint64_t *) src;
- c128 = (uint64_t *) dest;
- top = (uint64_t *) rd.d_top;
-
- if (xor){
- while (c128 < top) {
- p_i = zero;
- a[0] = a128[0];
- a[1] = a128[1];
-
- t_m = 0;
- i_r = 0;
- /* Top 64 bits */
- for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) {
- i_m = (a[0] >> (i * g_m)) & mask_m;
- i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r;
-
- shiftbits = _mm_extract_epi32(p_i, 1) & mask_s;
- shiftbits >>= 32-g_m;
- p_i = _mm_slli_epi64(p_i, g_m);
- p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2));
- p_i = _mm_xor_si128(p_i, m_table[i_m]);
- t_m += g_m;
- if (t_m == g_r) {
- p_i = _mm_xor_si128(p_i, r_table[i_r]);
- t_m = 0;
- i_r = 0;
- } else {
- i_r <<= g_m;
- }
- }
-
- for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) {
- i_m = (a[1] >> (i * g_m)) & mask_m;
- i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r;
-
- shiftbits = _mm_extract_epi32(p_i, 1) & mask_s;
- shiftbits >>= 32-g_m;
- p_i = _mm_slli_epi64(p_i, g_m);
- p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2));
-
- p_i = _mm_xor_si128(p_i, m_table[i_m]);
- t_m += g_m;
- if (t_m == g_r) {
- p_i = _mm_xor_si128(p_i, r_table[i_r]);
- t_m = 0;
- i_r = 0;
- } else {
- i_r <<= g_m;
- }
- }
-
- c128[0] ^= _mm_extract_epi64(p_i, 1);
- c128[1] ^= _mm_extract_epi64(p_i, 0);
- a128 += 2;
- c128 += 2;
- }
- }else{
- while (c128 < top) {
- p_i = zero;
- a[0] = a128[0];
- a[1] = a128[1];
-
- t_m = 0;
- i_r = 0;
- /* Top 64 bits */
- for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) {
- i_m = (a[0] >> (i * g_m)) & mask_m;
- i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r;
-
- shiftbits = _mm_extract_epi32(p_i, 1) & mask_s;
- shiftbits >>= 32-g_m;
- p_i = _mm_slli_epi64(p_i, g_m);
- p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2));
- p_i = _mm_xor_si128(p_i, m_table[i_m]);
- t_m += g_m;
- if (t_m == g_r) {
- p_i = _mm_xor_si128(p_i, r_table[i_r]);
- t_m = 0;
- i_r = 0;
- } else {
- i_r <<= g_m;
- }
- }
-
- for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) {
- i_m = (a[1] >> (i * g_m)) & mask_m;
- i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r;
-
- shiftbits = _mm_extract_epi32(p_i, 1) & mask_s;
- shiftbits >>= 32-g_m;
- p_i = _mm_slli_epi64(p_i, g_m);
- p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2));
-
- p_i = _mm_xor_si128(p_i, m_table[i_m]);
- t_m += g_m;
- if (t_m == g_r) {
- p_i = _mm_xor_si128(p_i, r_table[i_r]);
- t_m = 0;
- i_r = 0;
- } else {
- i_r <<= g_m;
- }
- }
-
- c128[0] = _mm_extract_epi64(p_i, 1);
- c128[1] = _mm_extract_epi64(p_i, 0);
- a128 += 2;
- c128 += 2;
- }
- }
-#endif
-}
-
/* a^-1 -> b */
void
gf_w128_euclid(GFP gf, gf_val_128_t a128, gf_val_128_t b128)
@@ -1880,28 +1605,7 @@ int gf_w128_group_init(gf_t *gf)
gf->inverse.w128 = gf_w128_euclid;
gf->multiply_region.w128 = gf_w128_group_multiply_region;
- /* JSP: I've got a problem compiling here -- something about "vmovq", and
- I don't have the time to chase it down right now. */
-
- #if defined(INTEL_SSE4) && defined(ARCH_64)
- if(!(scratch->region_type & GF_REGION_NOSSE))
- {
- if ((g_m != 4) && ((g_r != 4) || (g_r != 8)))
- return 0;
- gt->r_table = (void *)(((uint64_t)gt->r_table + 15) & (~0xfULL)); /* aligns gt->r_table on a 16-bit boundary*/
- gt->m_table = gt->r_table + 2*size_r;
- gt->m_table[2] = 0;
- gt->m_table[3] = 0;
- gf->multiply.w128 = gf_w128_group_sse_multiply;
- gf->multiply_region.w128 = gf_w128_group_sse_multiply_region;
- gf_w128_group_r_sse_init(gf);
- }
- else
- gf_w128_group_r_init(gf);
- #else
- if(scratch->region_type & GF_REGION_SSE) return 0;
- else gf_w128_group_r_init(gf);
- #endif
+ gf_w128_group_r_init(gf);
return 1;
}
diff --git a/tools/gf_methods.c b/tools/gf_methods.c
index 903d8b0..3afb438 100644
--- a/tools/gf_methods.c
+++ b/tools/gf_methods.c
@@ -20,8 +20,8 @@
#define BNMULTS (8)
static char *BMULTS[BNMULTS] = { "CARRY_FREE", "GROUP48",
"TABLE", "LOG", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE" };
-#define NMULTS (15)
-static char *MULTS[NMULTS] = { "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
+#define NMULTS (16)
+static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
"TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2",
"SPLIT4", "SPLIT8", "SPLIT16", "SPLIT88", "COMPOSITE" };