diff options
author | Jim Plank <plank@cs.utk.edu> | 2014-01-01 11:00:40 -0500 |
---|---|---|
committer | Jim Plank <plank@cs.utk.edu> | 2014-01-01 11:00:40 -0500 |
commit | f0c32c94bcc2cc5037cb714c75f8ec0ad2e1e2ef (patch) | |
tree | d4303855be08819a93dd10a2b0a7fd93521679db | |
parent | fb0bbdcf62a8cc82268207285a2f94808672dfdb (diff) | |
download | gf-complete-f0c32c94bcc2cc5037cb714c75f8ec0ad2e1e2ef.tar.gz |
Removed GROUP/128/SSE. It wasn't compiling, and it needed an overhaul.
I'll do it someday when I'm bored.
-rw-r--r-- | include/gf_int.h | 1 | ||||
-rw-r--r-- | src/gf.c | 2 | ||||
-rw-r--r-- | src/gf_int.h | 1 | ||||
-rw-r--r-- | src/gf_w128.c | 298 | ||||
-rw-r--r-- | tools/gf_methods.c | 4 |
5 files changed, 3 insertions, 303 deletions
diff --git a/include/gf_int.h b/include/gf_int.h index ab9281d..9221569 100644 --- a/include/gf_int.h +++ b/include/gf_int.h @@ -144,7 +144,6 @@ typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */ GF_E_GR_W_48, /* Mult == GROUP, w \in { 4, 8 } */ GF_E_GR_W_16, /* Mult == GROUP, w == 16, arg1 != 4 || arg2 != 4 */ GF_E_GR_128A, /* Mult == GROUP, w == 128, bad args */ - GF_E_GR_SSE4, /* Mult == GROUP, w == 128, No SSE4 */ GF_E_GR_A_27, /* Mult == GROUP, either arg > 27 */ GF_E_GR_AR_W, /* Mult == GROUP, either arg > w */ GF_E_GR____J, /* Mult == GROUP, Reg == SSE|ALTMAP|NOSSE */ @@ -75,7 +75,6 @@ void gf_error() case GF_E_GR_W_48: s = "With -m GROUP, w cannot be 4 or 8."; break; case GF_E_GR_W_16: s = "With -m GROUP, w == 16, arg1 and arg2 must be 4."; break; case GF_E_GR_128A: s = "With -m GROUP, w == 128, arg1 must be 4, and arg2 in { 4,8,16 }."; break; - case GF_E_GR_SSE4: s = "With -m GROUP, w == 128, you need SSE4."; break; case GF_E_GR_A_27: s = "With -m GROUP, arg1 and arg2 must be <= 27."; break; case GF_E_GR_AR_W: s = "With -m GROUP, arg1 and arg2 must be <= w."; break; case GF_E_GR____J: s = "Cannot use GROUP with -r ALTMAP|SSE|NOSSE."; break; @@ -320,7 +319,6 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type, if (w == 16 && (arg1 != 4 || arg2 != 4)) { _gf_errno = GF_E_GR_W_16; return 0; } if (w == 128 && (arg1 != 4 || (arg2 != 4 && arg2 != 8 && arg2 != 16))) { _gf_errno = GF_E_GR_128A; return 0; } - if (w == 128 && !sse4) { _gf_errno = GF_E_GR_SSE4; return 0; } if (arg1 > 27 || arg2 > 27) { _gf_errno = GF_E_GR_A_27; return 0; } if (arg1 > w || arg2 > w) { _gf_errno = GF_E_GR_AR_W; return 0; } if (raltmap || rsse || rnosse) { _gf_errno = GF_E_GR____J; return 0; } diff --git a/src/gf_int.h b/src/gf_int.h index ab9281d..9221569 100644 --- a/src/gf_int.h +++ b/src/gf_int.h @@ -144,7 +144,6 @@ typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */ GF_E_GR_W_48, /* Mult == GROUP, w \in { 4, 8 } */ GF_E_GR_W_16, /* Mult == GROUP, w == 16, arg1 != 4 || arg2 != 4 */ GF_E_GR_128A, /* Mult == GROUP, w == 128, bad args */ - GF_E_GR_SSE4, /* Mult == GROUP, w == 128, No SSE4 */ GF_E_GR_A_27, /* Mult == GROUP, either arg > 27 */ GF_E_GR_AR_W, /* Mult == GROUP, either arg > w */ GF_E_GR____J, /* Mult == GROUP, Reg == SSE|ALTMAP|NOSSE */ diff --git a/src/gf_w128.c b/src/gf_w128.c index c888f44..fae9f5c 100644 --- a/src/gf_w128.c +++ b/src/gf_w128.c @@ -984,45 +984,6 @@ void gf_w128_group_m_init(gf_t *gf, gf_val_128_t b128) return; } -static -void gf_w128_group_m_sse_init(gf_t *gf, gf_val_128_t b128) -{ -#if defined(INTEL_SSE4) - int i, j; - int g_m; - uint64_t lbit, middlebit; - gf_internal_t *scratch; - gf_group_tables_t *gt; - scratch = (gf_internal_t *) gf->scratch; - gt = scratch->private; - g_m = scratch->arg1; - - __m128i *table = (__m128i *)(gt->m_table), b, a, ubit, prim_poly; - prim_poly = _mm_insert_epi64(_mm_setzero_si128(), scratch->prim_poly, 0); - b = _mm_loadu_si128((__m128i *)(b128)); - - table[0] = _mm_setzero_si128(); - table[1] = table[0]; - table[1] = _mm_insert_epi64(table[1],b128[0],1); - table[1] = _mm_insert_epi64(table[1],b128[1],0); - lbit = 1; - lbit <<= 63; - ubit = _mm_set_epi32(0, 1, 0, 0); - for (i = 2; i < (1 << g_m); i <<= 1) { - a = table[(i >> 1)]; - middlebit = (_mm_extract_epi64(a, 0x0) & lbit); - a = _mm_slli_epi64(a, 1); - if (middlebit) a = _mm_xor_si128(a, ubit); - table[i] = a; - if (_mm_extract_epi64(table[i >> 1], 0x1) & lbit) table[i] = _mm_xor_si128(table[i], prim_poly); - for (j = 0; j < i; j++) { - table[i + j] = _mm_xor_si128(table[i], table[j]); - } - } - return; -#endif -} - void gf_w128_group_multiply(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128) { @@ -1095,88 +1056,6 @@ gf_w128_group_multiply(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_ c128[1] = p_i[1]; } -void -gf_w128_group_sse_multiply(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128) -{ -#if defined(INTEL_SSE4) && defined(ARCH_64) - int i,j; - int i_r, i_m, t_m; - int mask_m, mask_r, mask_s; - int g_m, g_r; - uint32_t shiftbits; - uint64_t a[2], tbit = 1; - tbit <<= 63; - gf_internal_t *scratch; - gf_group_tables_t *gt; - __m128i p_i, *m_table, *r_table, zero; - - zero = _mm_setzero_si128(); - scratch = (gf_internal_t *) gf->scratch; - gt = scratch->private; - m_table = (__m128i *)(gt->m_table); - r_table = (__m128i *)(gt->r_table); - g_m = scratch->arg1; - g_r = scratch->arg2; - - mask_m = (1 << g_m) - 1; - mask_r = (1 << g_r) - 1; - mask_s = mask_m << (32-g_m); /*sets g_m leftmost bits to 1*/ - if (b128[0] != _mm_extract_epi64(m_table[1], 1) || b128[1] != _mm_extract_epi64(m_table[1], 0)) { - gf_w128_group_m_sse_init(gf, b128); - } - - p_i = zero; - a[0] = a128[0]; - a[1] = a128[1]; - - t_m = 0; - i_r = 0; - - /* Top 64 bits */ - for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) { - i_m = (a[0] >> (i * g_m)) & mask_m; - i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r; - - shiftbits = _mm_extract_epi32(p_i, 1) & mask_s; - shiftbits >>= 32-g_m; - p_i = _mm_slli_epi64(p_i, g_m); - p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2)); - - p_i = _mm_xor_si128(p_i, m_table[i_m]); - t_m += g_m; - if (t_m == g_r) { - p_i = _mm_xor_si128(p_i, r_table[i_r]); - t_m = 0; - i_r = 0; - } else { - i_r <<= g_m; - } - } - - for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) { - i_m = (a[1] >> (i * g_m)) & mask_m; - i_r ^= (((uint64_t)_mm_extract_epi64(p_i,1)) >> (64 - g_m)) & mask_r; - - shiftbits = _mm_extract_epi32(p_i, 1) & mask_s; - shiftbits >>= 32-g_m; - p_i = _mm_slli_epi64(p_i, g_m); - p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2)); - - p_i = _mm_xor_si128(p_i, m_table[i_m]); - t_m += g_m; - if (t_m == g_r) { - p_i = _mm_xor_si128(p_i, r_table[i_r]); - t_m = 0; - i_r = 0; - } else { - i_r <<= g_m; - } - } - c128[0] = _mm_extract_epi64(p_i, 1); - c128[1] = _mm_extract_epi64(p_i, 0); -#endif -} - static void gf_w128_group_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor) @@ -1273,160 +1152,6 @@ gf_w128_group_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, } } -static -void -gf_w128_group_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor) -{ -#if defined(INTEL_SSE4) - int i; - int i_r, i_m, t_m; - int mask_m, mask_r, mask_s; - int g_m, g_r; - uint32_t shiftbits; - uint64_t a[2]; - gf_internal_t *scratch; - gf_group_tables_t *gt; - gf_region_data rd; - uint64_t *a128, *c128, *top; - __m128i *m_table, *r_table, p_i, zero; - zero = _mm_setzero_si128(); - /* We only do this to check on alignment. */ - gf_set_region_data(&rd, gf, src, dest, bytes, 0, xor, 8); - - if (val[0] == 0) { - if (val[1] == 0) { gf_multby_zero(dest, bytes, xor); return; } - if (val[1] == 1) { gf_multby_one(src, dest, bytes, xor); return; } - } - - scratch = (gf_internal_t *) gf->scratch; - gt = scratch->private; - m_table = (__m128i *)(gt->m_table); - r_table = (__m128i *)(gt->r_table); - g_m = scratch->arg1; - g_r = scratch->arg2; - - mask_m = (1 << g_m) - 1; - mask_r = (1 << g_r) - 1; - mask_s = mask_m << (32-g_m); - - if (val[0] != _mm_extract_epi64(m_table[1], 1) || val[1] != _mm_extract_epi64(m_table[1], 0)) { - gf_w128_group_m_sse_init(gf, val); - } - - a128 = (uint64_t *) src; - c128 = (uint64_t *) dest; - top = (uint64_t *) rd.d_top; - - if (xor){ - while (c128 < top) { - p_i = zero; - a[0] = a128[0]; - a[1] = a128[1]; - - t_m = 0; - i_r = 0; - /* Top 64 bits */ - for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) { - i_m = (a[0] >> (i * g_m)) & mask_m; - i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r; - - shiftbits = _mm_extract_epi32(p_i, 1) & mask_s; - shiftbits >>= 32-g_m; - p_i = _mm_slli_epi64(p_i, g_m); - p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2)); - p_i = _mm_xor_si128(p_i, m_table[i_m]); - t_m += g_m; - if (t_m == g_r) { - p_i = _mm_xor_si128(p_i, r_table[i_r]); - t_m = 0; - i_r = 0; - } else { - i_r <<= g_m; - } - } - - for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) { - i_m = (a[1] >> (i * g_m)) & mask_m; - i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r; - - shiftbits = _mm_extract_epi32(p_i, 1) & mask_s; - shiftbits >>= 32-g_m; - p_i = _mm_slli_epi64(p_i, g_m); - p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2)); - - p_i = _mm_xor_si128(p_i, m_table[i_m]); - t_m += g_m; - if (t_m == g_r) { - p_i = _mm_xor_si128(p_i, r_table[i_r]); - t_m = 0; - i_r = 0; - } else { - i_r <<= g_m; - } - } - - c128[0] ^= _mm_extract_epi64(p_i, 1); - c128[1] ^= _mm_extract_epi64(p_i, 0); - a128 += 2; - c128 += 2; - } - }else{ - while (c128 < top) { - p_i = zero; - a[0] = a128[0]; - a[1] = a128[1]; - - t_m = 0; - i_r = 0; - /* Top 64 bits */ - for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) { - i_m = (a[0] >> (i * g_m)) & mask_m; - i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r; - - shiftbits = _mm_extract_epi32(p_i, 1) & mask_s; - shiftbits >>= 32-g_m; - p_i = _mm_slli_epi64(p_i, g_m); - p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2)); - p_i = _mm_xor_si128(p_i, m_table[i_m]); - t_m += g_m; - if (t_m == g_r) { - p_i = _mm_xor_si128(p_i, r_table[i_r]); - t_m = 0; - i_r = 0; - } else { - i_r <<= g_m; - } - } - - for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) { - i_m = (a[1] >> (i * g_m)) & mask_m; - i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r; - - shiftbits = _mm_extract_epi32(p_i, 1) & mask_s; - shiftbits >>= 32-g_m; - p_i = _mm_slli_epi64(p_i, g_m); - p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2)); - - p_i = _mm_xor_si128(p_i, m_table[i_m]); - t_m += g_m; - if (t_m == g_r) { - p_i = _mm_xor_si128(p_i, r_table[i_r]); - t_m = 0; - i_r = 0; - } else { - i_r <<= g_m; - } - } - - c128[0] = _mm_extract_epi64(p_i, 1); - c128[1] = _mm_extract_epi64(p_i, 0); - a128 += 2; - c128 += 2; - } - } -#endif -} - /* a^-1 -> b */ void gf_w128_euclid(GFP gf, gf_val_128_t a128, gf_val_128_t b128) @@ -1880,28 +1605,7 @@ int gf_w128_group_init(gf_t *gf) gf->inverse.w128 = gf_w128_euclid; gf->multiply_region.w128 = gf_w128_group_multiply_region; - /* JSP: I've got a problem compiling here -- something about "vmovq", and - I don't have the time to chase it down right now. */ - - #if defined(INTEL_SSE4) && defined(ARCH_64) - if(!(scratch->region_type & GF_REGION_NOSSE)) - { - if ((g_m != 4) && ((g_r != 4) || (g_r != 8))) - return 0; - gt->r_table = (void *)(((uint64_t)gt->r_table + 15) & (~0xfULL)); /* aligns gt->r_table on a 16-bit boundary*/ - gt->m_table = gt->r_table + 2*size_r; - gt->m_table[2] = 0; - gt->m_table[3] = 0; - gf->multiply.w128 = gf_w128_group_sse_multiply; - gf->multiply_region.w128 = gf_w128_group_sse_multiply_region; - gf_w128_group_r_sse_init(gf); - } - else - gf_w128_group_r_init(gf); - #else - if(scratch->region_type & GF_REGION_SSE) return 0; - else gf_w128_group_r_init(gf); - #endif + gf_w128_group_r_init(gf); return 1; } diff --git a/tools/gf_methods.c b/tools/gf_methods.c index 903d8b0..3afb438 100644 --- a/tools/gf_methods.c +++ b/tools/gf_methods.c @@ -20,8 +20,8 @@ #define BNMULTS (8) static char *BMULTS[BNMULTS] = { "CARRY_FREE", "GROUP48", "TABLE", "LOG", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE" }; -#define NMULTS (15) -static char *MULTS[NMULTS] = { "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b", +#define NMULTS (16) +static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b", "TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2", "SPLIT4", "SPLIT8", "SPLIT16", "SPLIT88", "COMPOSITE" }; |