From 568df90edc6ae07744de45de8665fb86ce6c84ee Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Fri, 19 Sep 2014 12:30:57 +0200 Subject: simd: rename the region flags from SSE to SIMD SSE is not the only supported SIMD instruction set. Keep the old names for backward compatibility. --- include/gf_complete.h | 2 ++ include/gf_int.h | 10 +++--- src/gf.c | 91 ++++++++++++++++++++++++++------------------------- src/gf_method.c | 10 ++++-- src/gf_w128.c | 6 ++-- src/gf_w16.c | 14 ++++---- src/gf_w32.c | 14 ++++---- src/gf_w4.c | 12 +++---- src/gf_w64.c | 16 ++++----- src/gf_w8.c | 12 +++---- tools/gf_methods.c | 2 +- 11 files changed, 99 insertions(+), 90 deletions(-) diff --git a/include/gf_complete.h b/include/gf_complete.h index 5806625..e8ea2ca 100644 --- a/include/gf_complete.h +++ b/include/gf_complete.h @@ -61,7 +61,9 @@ typedef enum {GF_MULT_DEFAULT, #define GF_REGION_DOUBLE_TABLE (0x1) #define GF_REGION_QUAD_TABLE (0x2) #define GF_REGION_LAZY (0x4) +#define GF_REGION_SIMD (0x8) #define GF_REGION_SSE (0x8) +#define GF_REGION_NOSIMD (0x10) #define GF_REGION_NOSSE (0x10) #define GF_REGION_ALTMAP (0x20) #define GF_REGION_CAUCHY (0x40) diff --git a/include/gf_int.h b/include/gf_int.h index 98294cc..32866f4 100644 --- a/include/gf_int.h +++ b/include/gf_int.h @@ -113,7 +113,7 @@ typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */ GF_E_DIVCOMP, /* Mult == Composite && Div != Default */ GF_E_CAUCOMP, /* Mult == Composite && Reg == CAUCHY */ GF_E_DOUQUAD, /* Reg == DOUBLE && Reg == QUAD */ - GF_E_SSE__NO, /* Reg == SSE && Reg == NOSSE */ + GF_E_SIMD_NO, /* Reg == SIMD && Reg == NOSIMD */ GF_E_CAUCHYB, /* Reg == CAUCHY && Other Reg */ GF_E_CAUGT32, /* Reg == CAUCHY && w > 32*/ GF_E_ARG1SET, /* Arg1 != 0 && Mult \notin COMPOSITE/SPLIT/GROUP */ @@ -129,9 +129,9 @@ typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */ GF_E_QUAD__J, /* Reg == QUAD && other Reg */ GF_E_LAZY__X, /* Reg == LAZY && not DOUBLE or QUAD*/ GF_E_ALTSHIF, /* Mult == Shift && Reg == ALTMAP */ - GF_E_SSESHIF, /* Mult == Shift && Reg == SSE|NOSSE */ + GF_E_SSESHIF, /* Mult == Shift && Reg == SIMD|NOSIMD */ GF_E_ALT_CFM, /* Mult == CARRY_FREE && Reg == ALTMAP */ - GF_E_SSE_CFM, /* Mult == CARRY_FREE && Reg == SSE|NOSSE */ + GF_E_SSE_CFM, /* Mult == CARRY_FREE && Reg == SIMD|NOSIMD */ GF_E_PCLMULX, /* Mult == Carry_Free && No PCLMUL */ GF_E_ALT_BY2, /* Mult == Bytwo_x && Reg == ALTMAP */ GF_E_BY2_SSE, /* Mult == Bytwo_x && Reg == SSE && No SSE2 */ @@ -148,7 +148,7 @@ typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */ GF_E_GR_AR_W, /* Mult == GROUP, either arg > w */ GF_E_GR____J, /* Mult == GROUP, Reg == SSE|ALTMAP|NOSSE */ GF_E_TABLE_W, /* Mult == TABLE, w too big */ - GF_E_TAB_SSE, /* Mult == TABLE, SSE|NOSSE only apply to w == 4 */ + GF_E_TAB_SSE, /* Mult == TABLE, SIMD|NOSIMD only apply to w == 4 */ GF_E_TABSSE3, /* Mult == TABLE, Need SSSE3 for SSE */ GF_E_TAB_ALT, /* Mult == TABLE, Reg == ALTMAP */ GF_E_SP128AR, /* Mult == SPLIT, w=128, Bad arg1/arg2 */ @@ -172,7 +172,7 @@ typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */ GF_E_SP_8__A, /* Mult == SPLIT, w=8, no ALTMAP */ GF_E_SP_SSE3, /* Mult == SPLIT, Need SSSE3 for SSE */ GF_E_COMP_A2, /* Mult == COMP, arg1 must be = 2 */ - GF_E_COMP_SS, /* Mult == COMP, SSE|NOSSE */ + GF_E_COMP_SS, /* Mult == COMP, SIMD|NOSIMD */ GF_E_COMP__W, /* Mult == COMP, Bad w. */ GF_E_UNKFLAG, /* Unknown flag in create_from.... */ GF_E_UNKNOWN, /* Unknown mult_type. */ diff --git a/src/gf.c b/src/gf.c index 10c9b3c..ca6a7f8 100644 --- a/src/gf.c +++ b/src/gf.c @@ -41,7 +41,7 @@ void gf_error() case GF_E_MDEFARG: s = "If multiplication method == default, can't use arg1/arg2."; break; case GF_E_DIVCOMP: s = "Cannot change the division technique with -m COMPOSITE."; break; case GF_E_DOUQUAD: s = "Cannot specify -r DOUBLE and -r QUAD."; break; - case GF_E_SSE__NO: s = "Cannot specify -r SSE and -r NOSSE."; break; + case GF_E_SIMD_NO: s = "Cannot specify -r SIMD and -r NOSIMD."; break; case GF_E_CAUCHYB: s = "Cannot specify -r CAUCHY and any other -r."; break; case GF_E_CAUCOMP: s = "Cannot specify -m COMPOSITE and -r CAUCHY."; break; case GF_E_CAUGT32: s = "Cannot specify -r CAUCHY with w > 32."; break; @@ -51,23 +51,23 @@ void gf_error() case GF_E_BAD___W: s = "W must be 1-32, 64 or 128."; break; case GF_E_DOUBLET: s = "Can only specify -r DOUBLE with -m TABLE."; break; case GF_E_DOUBLEW: s = "Can only specify -r DOUBLE w = 4 or w = 8."; break; - case GF_E_DOUBLEJ: s = "Cannot specify -r DOUBLE with -r ALTMAP|SSE|NOSSE."; break; + case GF_E_DOUBLEJ: s = "Cannot specify -r DOUBLE with -r ALTMAP|SIMD|NOSIMD."; break; case GF_E_DOUBLEL: s = "Can only specify -r DOUBLE -r LAZY with w = 8"; break; case GF_E_QUAD__T: s = "Can only specify -r QUAD with -m TABLE."; break; case GF_E_QUAD__W: s = "Can only specify -r QUAD w = 4."; break; - case GF_E_QUAD__J: s = "Cannot specify -r QUAD with -r ALTMAP|SSE|NOSSE."; break; + case GF_E_QUAD__J: s = "Cannot specify -r QUAD with -r ALTMAP|SIMD|NOSIMD."; break; case GF_E_BADPOLY: s = "Bad primitive polynomial (high bits set)."; break; case GF_E_COMP_PP: s = "Bad primitive polynomial -- bigger than sub-field."; break; case GF_E_LAZY__X: s = "If -r LAZY, then -r must be DOUBLE or QUAD."; break; case GF_E_ALTSHIF: s = "Cannot specify -m SHIFT and -r ALTMAP."; break; - case GF_E_SSESHIF: s = "Cannot specify -m SHIFT and -r SSE|NOSSE."; break; + case GF_E_SSESHIF: s = "Cannot specify -m SHIFT and -r SIMD|NOSIMD."; break; case GF_E_ALT_CFM: s = "Cannot specify -m CARRY_FREE and -r ALTMAP."; break; - case GF_E_SSE_CFM: s = "Cannot specify -m CARRY_FREE and -r SSE|NOSSE."; break; + case GF_E_SSE_CFM: s = "Cannot specify -m CARRY_FREE and -r SIMD|NOSIMD."; break; case GF_E_PCLMULX: s = "Specified -m CARRY_FREE, but PCLMUL is not supported."; break; case GF_E_ALT_BY2: s = "Cannot specify -m BYTWO_x and -r ALTMAP."; break; - case GF_E_BY2_SSE: s = "Specified -m BYTWO_x -r SSE, but SSE2 is not supported."; break; + case GF_E_BY2_SSE: s = "Specified -m BYTWO_x -r SIMD, but SSE2 is not supported."; break; case GF_E_LOGBADW: s = "With Log Tables, w must be <= 27."; break; - case GF_E_LOG___J: s = "Cannot use Log tables with -r ALTMAP|SSE|NOSSE."; break; + case GF_E_LOG___J: s = "Cannot use Log tables with -r ALTMAP|SIMD|NOSIMD."; break; case GF_E_LOGPOLY: s = "Cannot use Log tables because the polynomial is not primitive."; break; case GF_E_ZERBADW: s = "With -m LOG_ZERO, w must be 8 or 16."; break; case GF_E_ZEXBADW: s = "With -m LOG_ZERO_EXT, w must be 8."; break; @@ -77,33 +77,33 @@ void gf_error() case GF_E_GR_128A: s = "With -m GROUP, w == 128, arg1 must be 4, and arg2 in { 4,8,16 }."; break; case GF_E_GR_A_27: s = "With -m GROUP, arg1 and arg2 must be <= 27."; break; case GF_E_GR_AR_W: s = "With -m GROUP, arg1 and arg2 must be <= w."; break; - case GF_E_GR____J: s = "Cannot use GROUP with -r ALTMAP|SSE|NOSSE."; break; + case GF_E_GR____J: s = "Cannot use GROUP with -r ALTMAP|SIMD|NOSIMD."; break; case GF_E_TABLE_W: s = "With -m TABLE, w must be < 15, or == 16."; break; - case GF_E_TAB_SSE: s = "With -m TABLE, SSE|NOSSE only applies to w=4."; break; - case GF_E_TABSSE3: s = "With -m TABLE, -r SSE, you need SSSE3 supported."; break; + case GF_E_TAB_SSE: s = "With -m TABLE, SIMD|NOSIMD only applies to w=4."; break; + case GF_E_TABSSE3: s = "With -m TABLE, -r SIMD, you need SSSE3 supported."; break; case GF_E_TAB_ALT: s = "With -m TABLE, you cannot use ALTMAP."; break; case GF_E_SP128AR: s = "With -m SPLIT, w=128, bad arg1/arg2."; break; - case GF_E_SP128AL: s = "With -m SPLIT, w=128, -r SSE requires -r ALTMAP."; break; + case GF_E_SP128AL: s = "With -m SPLIT, w=128, -r SIMD requires -r ALTMAP."; break; case GF_E_SP128AS: s = "With -m SPLIT, w=128, ALTMAP needs SSSE3 supported."; break; case GF_E_SP128_A: s = "With -m SPLIT, w=128, -r ALTMAP only with arg1/arg2 = 4/128."; break; - case GF_E_SP128_S: s = "With -m SPLIT, w=128, -r SSE|NOSSE only with arg1/arg2 = 4/128."; break; + case GF_E_SP128_S: s = "With -m SPLIT, w=128, -r SIMD|NOSIMD only with arg1/arg2 = 4/128."; break; case GF_E_SPLIT_W: s = "With -m SPLIT, w must be in {8, 16, 32, 64, 128}."; break; case GF_E_SP_16AR: s = "With -m SPLIT, w=16, Bad arg1/arg2."; break; case GF_E_SP_16_A: s = "With -m SPLIT, w=16, -r ALTMAP only with arg1/arg2 = 4/16."; break; - case GF_E_SP_16_S: s = "With -m SPLIT, w=16, -r SSE|NOSSE only with arg1/arg2 = 4/16."; break; + case GF_E_SP_16_S: s = "With -m SPLIT, w=16, -r SIMD|NOSIMD only with arg1/arg2 = 4/16."; break; case GF_E_SP_32AR: s = "With -m SPLIT, w=32, Bad arg1/arg2."; break; case GF_E_SP_32AS: s = "With -m SPLIT, w=32, -r ALTMAP needs SSSE3 supported."; break; case GF_E_SP_32_A: s = "With -m SPLIT, w=32, -r ALTMAP only with arg1/arg2 = 4/32."; break; - case GF_E_SP_32_S: s = "With -m SPLIT, w=32, -r SSE|NOSSE only with arg1/arg2 = 4/32."; break; + case GF_E_SP_32_S: s = "With -m SPLIT, w=32, -r SIMD|NOSIMD only with arg1/arg2 = 4/32."; break; case GF_E_SP_64AR: s = "With -m SPLIT, w=64, Bad arg1/arg2."; break; case GF_E_SP_64AS: s = "With -m SPLIT, w=64, -r ALTMAP needs SSSE3 supported."; break; case GF_E_SP_64_A: s = "With -m SPLIT, w=64, -r ALTMAP only with arg1/arg2 = 4/64."; break; - case GF_E_SP_64_S: s = "With -m SPLIT, w=64, -r SSE|NOSSE only with arg1/arg2 = 4/64."; break; + case GF_E_SP_64_S: s = "With -m SPLIT, w=64, -r SIMD|NOSIMD only with arg1/arg2 = 4/64."; break; case GF_E_SP_8_AR: s = "With -m SPLIT, w=8, Bad arg1/arg2."; break; case GF_E_SP_8__A: s = "With -m SPLIT, w=8, Can't have -r ALTMAP."; break; - case GF_E_SP_SSE3: s = "With -m SPLIT, Need SSSE3 support for SSE."; break; + case GF_E_SP_SSE3: s = "With -m SPLIT, Need SSSE3 support for SIMD."; break; case GF_E_COMP_A2: s = "With -m COMPOSITE, arg1 must equal 2."; break; - case GF_E_COMP_SS: s = "With -m COMPOSITE, -r SSE and -r NOSSE do not apply."; break; + case GF_E_COMP_SS: s = "With -m COMPOSITE, -r SIMD and -r NOSIMD do not apply."; break; case GF_E_COMP__W: s = "With -m COMPOSITE, w must be 8, 16, 32, 64 or 128."; break; case GF_E_UNKFLAG: s = "Unknown method flag - should be -m, -d, -r or -p."; break; case GF_E_UNKNOWN: s = "Unknown multiplication type."; break; @@ -182,14 +182,14 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type, int sse3 = 0; int sse2 = 0; int pclmul = 0; - int rdouble, rquad, rlazy, rsse, rnosse, raltmap, rcauchy, tmp; + int rdouble, rquad, rlazy, rsimd, rnosimd, raltmap, rcauchy, tmp; gf_internal_t *sub; rdouble = (region_type & GF_REGION_DOUBLE_TABLE); rquad = (region_type & GF_REGION_QUAD_TABLE); rlazy = (region_type & GF_REGION_LAZY); - rsse = (region_type & GF_REGION_SSE); - rnosse = (region_type & GF_REGION_NOSSE); + rsimd = (region_type & GF_REGION_SIMD); + rnosimd = (region_type & GF_REGION_NOSIMD); raltmap = (region_type & GF_REGION_ALTMAP); rcauchy = (region_type & GF_REGION_CAUCHY); @@ -201,7 +201,8 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type, } tmp = ( GF_REGION_DOUBLE_TABLE | GF_REGION_QUAD_TABLE | GF_REGION_LAZY | - GF_REGION_SSE | GF_REGION_NOSSE | GF_REGION_ALTMAP | GF_REGION_CAUCHY ); + GF_REGION_SIMD | GF_REGION_NOSIMD | GF_REGION_ALTMAP | + GF_REGION_CAUCHY ); if (region_type & (~tmp)) { _gf_errno = GF_E_UNK_REG; return 0; } #ifdef INTEL_SSE2 @@ -230,7 +231,7 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type, return 1; } - if (rsse && rnosse) { _gf_errno = GF_E_SSE__NO; return 0; } + if (rsimd && rnosimd) { _gf_errno = GF_E_SIMD_NO; return 0; } if (rcauchy && w > 32) { _gf_errno = GF_E_CAUGT32; return 0; } if (rcauchy && region_type != GF_REGION_CAUCHY) { _gf_errno = GF_E_CAUCHYB; return 0; } if (rcauchy && mult_type == GF_MULT_COMPOSITE) { _gf_errno = GF_E_CAUCOMP; return 0; } @@ -252,7 +253,7 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type, if (rquad) { _gf_errno = GF_E_DOUQUAD; return 0; } if (mult_type != GF_MULT_TABLE) { _gf_errno = GF_E_DOUBLET; return 0; } if (w != 4 && w != 8) { _gf_errno = GF_E_DOUBLEW; return 0; } - if (rsse || rnosse || raltmap) { _gf_errno = GF_E_DOUBLEJ; return 0; } + if (rsimd || rnosimd || raltmap) { _gf_errno = GF_E_DOUBLEJ; return 0; } if (rlazy && w == 4) { _gf_errno = GF_E_DOUBLEL; return 0; } return 1; } @@ -260,7 +261,7 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type, if (rquad) { if (mult_type != GF_MULT_TABLE) { _gf_errno = GF_E_QUAD__T; return 0; } if (w != 4) { _gf_errno = GF_E_QUAD__W; return 0; } - if (rsse || rnosse || raltmap) { _gf_errno = GF_E_QUAD__J; return 0; } + if (rsimd || rnosimd || raltmap) { _gf_errno = GF_E_QUAD__J; return 0; } return 1; } @@ -268,7 +269,7 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type, if (mult_type == GF_MULT_SHIFT) { if (raltmap) { _gf_errno = GF_E_ALTSHIF; return 0; } - if (rsse || rnosse) { _gf_errno = GF_E_SSESHIF; return 0; } + if (rsimd || rnosimd) { _gf_errno = GF_E_SSESHIF; return 0; } return 1; } @@ -281,7 +282,7 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type, if (w == 32 && (poly & 0xfe000000)) { _gf_errno = GF_E_CF32POL; return 0; } if (w == 64 && (poly & 0xfffe000000000000ULL)) { _gf_errno = GF_E_CF64POL; return 0; } if (raltmap) { _gf_errno = GF_E_ALT_CFM; return 0; } - if (rsse || rnosse) { _gf_errno = GF_E_SSE_CFM; return 0; } + if (rsimd || rnosimd) { _gf_errno = GF_E_SSE_CFM; return 0; } if (!pclmul) { _gf_errno = GF_E_PCLMULX; return 0; } return 1; } @@ -290,21 +291,21 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type, if (w != 4 && w != 8 && w != 16 && w != 32 && w != 64 && w != 128) { _gf_errno = GF_E_CFM___W; return 0; } if (raltmap) { _gf_errno = GF_E_ALT_CFM; return 0; } - if (rsse || rnosse) { _gf_errno = GF_E_SSE_CFM; return 0; } + if (rsimd || rnosimd) { _gf_errno = GF_E_SSE_CFM; return 0; } if (!pclmul) { _gf_errno = GF_E_PCLMULX; return 0; } return 1; } if (mult_type == GF_MULT_BYTWO_p || mult_type == GF_MULT_BYTWO_b) { if (raltmap) { _gf_errno = GF_E_ALT_BY2; return 0; } - if (rsse && !sse2) { _gf_errno = GF_E_BY2_SSE; return 0; } + if (rsimd && !sse2) { _gf_errno = GF_E_BY2_SSE; return 0; } return 1; } if (mult_type == GF_MULT_LOG_TABLE || mult_type == GF_MULT_LOG_ZERO || mult_type == GF_MULT_LOG_ZERO_EXT ) { if (w > 27) { _gf_errno = GF_E_LOGBADW; return 0; } - if (raltmap || rsse || rnosse) { _gf_errno = GF_E_LOG___J; return 0; } + if (raltmap || rsimd || rnosimd) { _gf_errno = GF_E_LOG___J; return 0; } if (mult_type == GF_MULT_LOG_TABLE) return 1; @@ -324,14 +325,14 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type, (arg2 != 4 && arg2 != 8 && arg2 != 16))) { _gf_errno = GF_E_GR_128A; return 0; } if (arg1 > 27 || arg2 > 27) { _gf_errno = GF_E_GR_A_27; return 0; } if (arg1 > w || arg2 > w) { _gf_errno = GF_E_GR_AR_W; return 0; } - if (raltmap || rsse || rnosse) { _gf_errno = GF_E_GR____J; return 0; } + if (raltmap || rsimd || rnosimd) { _gf_errno = GF_E_GR____J; return 0; } return 1; } if (mult_type == GF_MULT_TABLE) { if (w != 16 && w >= 15) { _gf_errno = GF_E_TABLE_W; return 0; } - if (w != 4 && (rsse || rnosse)) { _gf_errno = GF_E_TAB_SSE; return 0; } - if (rsse && !sse3) { _gf_errno = GF_E_TABSSE3; return 0; } + if (w != 4 && (rsimd || rnosimd)) { _gf_errno = GF_E_TAB_SSE; return 0; } + if (rsimd && !sse3) { _gf_errno = GF_E_TABSSE3; return 0; } if (raltmap) { _gf_errno = GF_E_TAB_ALT; return 0; } return 1; } @@ -344,46 +345,46 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type, } if (w == 8) { if (arg1 != 4 || arg2 != 8) { _gf_errno = GF_E_SP_8_AR; return 0; } - if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; } + if (rsimd && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; } if (raltmap) { _gf_errno = GF_E_SP_8__A; return 0; } } else if (w == 16) { if ((arg1 == 8 && arg2 == 8) || (arg1 == 8 && arg2 == 16)) { - if (rsse || rnosse) { _gf_errno = GF_E_SP_16_S; return 0; } + if (rsimd || rnosimd) { _gf_errno = GF_E_SP_16_S; return 0; } if (raltmap) { _gf_errno = GF_E_SP_16_A; return 0; } } else if (arg1 == 4 && arg2 == 16) { - if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; } + if (rsimd && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; } } else { _gf_errno = GF_E_SP_16AR; return 0; } } else if (w == 32) { if ((arg1 == 8 && arg2 == 8) || (arg1 == 8 && arg2 == 32) || (arg1 == 16 && arg2 == 32)) { - if (rsse || rnosse) { _gf_errno = GF_E_SP_32_S; return 0; } + if (rsimd || rnosimd) { _gf_errno = GF_E_SP_32_S; return 0; } if (raltmap) { _gf_errno = GF_E_SP_32_A; return 0; } } else if (arg1 == 4 && arg2 == 32) { - if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; } + if (rsimd && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; } if (raltmap && !sse3) { _gf_errno = GF_E_SP_32AS; return 0; } - if (raltmap && rnosse) { _gf_errno = GF_E_SP_32AS; return 0; } + if (raltmap && rnosimd) { _gf_errno = GF_E_SP_32AS; return 0; } } else { _gf_errno = GF_E_SP_32AR; return 0; } } else if (w == 64) { if ((arg1 == 8 && arg2 == 8) || (arg1 == 8 && arg2 == 64) || (arg1 == 16 && arg2 == 64)) { - if (rsse || rnosse) { _gf_errno = GF_E_SP_64_S; return 0; } + if (rsimd || rnosimd) { _gf_errno = GF_E_SP_64_S; return 0; } if (raltmap) { _gf_errno = GF_E_SP_64_A; return 0; } } else if (arg1 == 4 && arg2 == 64) { - if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; } + if (rsimd && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; } if (raltmap && !sse3) { _gf_errno = GF_E_SP_64AS; return 0; } - if (raltmap && rnosse) { _gf_errno = GF_E_SP_64AS; return 0; } + if (raltmap && rnosimd) { _gf_errno = GF_E_SP_64AS; return 0; } } else { _gf_errno = GF_E_SP_64AR; return 0; } } else if (w == 128) { if (arg1 == 8 && arg2 == 128) { - if (rsse || rnosse) { _gf_errno = GF_E_SP128_S; return 0; } + if (rsimd || rnosimd) { _gf_errno = GF_E_SP128_S; return 0; } if (raltmap) { _gf_errno = GF_E_SP128_A; return 0; } } else if (arg1 == 4 && arg2 == 128) { - if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; } + if (rsimd && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; } if (raltmap && !sse3) { _gf_errno = GF_E_SP128AS; return 0; } - if (raltmap && rnosse) { _gf_errno = GF_E_SP128AS; return 0; } + if (raltmap && rnosimd) { _gf_errno = GF_E_SP128AS; return 0; } } else { _gf_errno = GF_E_SP128AR; return 0; } } else { _gf_errno = GF_E_SPLIT_W; return 0; } return 1; @@ -395,7 +396,7 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type, if (w < 128 && (poly >> (w/2)) != 0) { _gf_errno = GF_E_COMP_PP; return 0; } if (divide_type != GF_DIVIDE_DEFAULT) { _gf_errno = GF_E_DIVCOMP; return 0; } if (arg1 != 2) { _gf_errno = GF_E_COMP_A2; return 0; } - if (rsse || rnosse) { _gf_errno = GF_E_COMP_SS; return 0; } + if (rsimd || rnosimd) { _gf_errno = GF_E_COMP_SS; return 0; } if (base != NULL) { sub = (gf_internal_t *) base->scratch; if (sub->w != w/2) { _gf_errno = GF_E_BASE__W; return 0; } diff --git a/src/gf_method.c b/src/gf_method.c index 2548a63..2210305 100644 --- a/src/gf_method.c +++ b/src/gf_method.c @@ -121,11 +121,17 @@ int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting) } else if (strcmp(argv[starting], "LAZY") == 0) { region_type |= GF_REGION_LAZY; starting++; + } else if (strcmp(argv[starting], "SIMD") == 0) { + region_type |= GF_REGION_SIMD; + starting++; + } else if (strcmp(argv[starting], "NOSIMD") == 0) { + region_type |= GF_REGION_NOSIMD; + starting++; } else if (strcmp(argv[starting], "SSE") == 0) { - region_type |= GF_REGION_SSE; + region_type |= GF_REGION_SIMD; starting++; } else if (strcmp(argv[starting], "NOSSE") == 0) { - region_type |= GF_REGION_NOSSE; + region_type |= GF_REGION_NOSIMD; starting++; } else if (strcmp(argv[starting], "CAUCHY") == 0) { region_type |= GF_REGION_CAUCHY; diff --git a/src/gf_w128.c b/src/gf_w128.c index 66f9422..190f6b0 100644 --- a/src/gf_w128.c +++ b/src/gf_w128.c @@ -1527,7 +1527,7 @@ int gf_w128_split_init(gf_t *gf) gf->multiply.w128 = gf_w128_bytwo_p_multiply; #if defined(INTEL_SSE4_PCLMUL) - if (!(h->region_type & GF_REGION_NOSSE)){ + if (!(h->region_type & GF_REGION_NOSIMD)){ gf->multiply.w128 = gf_w128_clm_multiply; } #endif @@ -1546,7 +1546,7 @@ int gf_w128_split_init(gf_t *gf) if((h->region_type & GF_REGION_ALTMAP)) { #ifdef INTEL_SSE4 - if(!(h->region_type & GF_REGION_NOSSE)) + if(!(h->region_type & GF_REGION_NOSIMD)) gf->multiply_region.w128 = gf_w128_split_4_128_sse_altmap_multiply_region; else return 0; @@ -1556,7 +1556,7 @@ int gf_w128_split_init(gf_t *gf) } else { #ifdef INTEL_SSE4 - if(!(h->region_type & GF_REGION_NOSSE)) + if(!(h->region_type & GF_REGION_NOSIMD)) gf->multiply_region.w128 = gf_w128_split_4_128_sse_multiply_region; else gf->multiply_region.w128 = gf_w128_split_4_128_multiply_region; diff --git a/src/gf_w16.c b/src/gf_w16.c index c4cd22d..0904115 100644 --- a/src/gf_w16.c +++ b/src/gf_w16.c @@ -1327,14 +1327,14 @@ int gf_w16_split_init(gf_t *gf) } else if ((h->arg1 == 4 && h->arg2 == 16) || (h->arg2 == 4 && h->arg1 == 16)) { if (issse3) { - if(h->region_type & GF_REGION_ALTMAP && h->region_type & GF_REGION_NOSSE) + if(h->region_type & GF_REGION_ALTMAP && h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w16_split_4_16_lazy_nosse_altmap_multiply_region; - else if(h->region_type & GF_REGION_NOSSE) + else if(h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w16_split_4_16_lazy_multiply_region; else if(h->region_type & GF_REGION_ALTMAP) gf->multiply_region.w32 = gf_w16_split_4_16_lazy_sse_altmap_multiply_region; } else { - if(h->region_type & GF_REGION_SSE) + if(h->region_type & GF_REGION_SIMD) return 0; else if(h->region_type & GF_REGION_ALTMAP) gf->multiply_region.w32 = gf_w16_split_4_16_lazy_nosse_altmap_multiply_region; @@ -1884,25 +1884,25 @@ int gf_w16_bytwo_init(gf_t *gf) if (h->mult_type == GF_MULT_BYTWO_p) { gf->multiply.w32 = gf_w16_bytwo_p_multiply; #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSSE) + if (h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w16_bytwo_p_nosse_multiply_region; else gf->multiply_region.w32 = gf_w16_bytwo_p_sse_multiply_region; #else gf->multiply_region.w32 = gf_w16_bytwo_p_nosse_multiply_region; - if(h->region_type & GF_REGION_SSE) + if(h->region_type & GF_REGION_SIMD) return 0; #endif } else { gf->multiply.w32 = gf_w16_bytwo_b_multiply; #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSSE) + if (h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w16_bytwo_b_nosse_multiply_region; else gf->multiply_region.w32 = gf_w16_bytwo_b_sse_multiply_region; #else gf->multiply_region.w32 = gf_w16_bytwo_b_nosse_multiply_region; - if(h->region_type & GF_REGION_SSE) + if(h->region_type & GF_REGION_SIMD) return 0; #endif } diff --git a/src/gf_w32.c b/src/gf_w32.c index 5ec2aa7..8e7c741 100644 --- a/src/gf_w32.c +++ b/src/gf_w32.c @@ -1434,25 +1434,25 @@ int gf_w32_bytwo_init(gf_t *gf) if (h->mult_type == GF_MULT_BYTWO_p) { gf->multiply.w32 = gf_w32_bytwo_p_multiply; #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSSE) + if (h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w32_bytwo_p_nosse_multiply_region; else gf->multiply_region.w32 = gf_w32_bytwo_p_sse_multiply_region; #else gf->multiply_region.w32 = gf_w32_bytwo_p_nosse_multiply_region; - if(h->region_type & GF_REGION_SSE) + if(h->region_type & GF_REGION_SIMD) return 0; #endif } else { gf->multiply.w32 = gf_w32_bytwo_b_multiply; #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSSE) + if (h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w32_bytwo_b_nosse_multiply_region; else gf->multiply_region.w32 = gf_w32_bytwo_b_sse_multiply_region; #else gf->multiply_region.w32 = gf_w32_bytwo_b_nosse_multiply_region; - if(h->region_type & GF_REGION_SSE) + if(h->region_type & GF_REGION_SIMD) return 0; #endif } @@ -2335,13 +2335,13 @@ int gf_w32_split_init(gf_t *gf) ld2 = (struct gf_split_2_32_lazy_data *) h->private; ld2->last_value = 0; #ifdef INTEL_SSSE3 - if (!(h->region_type & GF_REGION_NOSSE)) + if (!(h->region_type & GF_REGION_NOSIMD)) gf->multiply_region.w32 = gf_w32_split_2_32_lazy_sse_multiply_region; else gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region; #else gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region; - if(h->region_type & GF_REGION_SSE) return 0; + if(h->region_type & GF_REGION_SIMD) return 0; #endif return 1; } @@ -2352,7 +2352,7 @@ int gf_w32_split_init(gf_t *gf) (issse3 && h->mult_type == GF_REGION_DEFAULT)) { ld4 = (struct gf_split_4_32_lazy_data *) h->private; ld4->last_value = 0; - if ((h->region_type & GF_REGION_NOSSE) || !issse3) { + if ((h->region_type & GF_REGION_NOSIMD) || !issse3) { gf->multiply_region.w32 = gf_w32_split_4_32_lazy_multiply_region; } else if (h->region_type & GF_REGION_ALTMAP) { gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_altmap_multiply_region; diff --git a/src/gf_w4.c b/src/gf_w4.c index 6bc79d0..f098323 100644 --- a/src/gf_w4.c +++ b/src/gf_w4.c @@ -490,13 +490,13 @@ int gf_w4_single_table_init(gf_t *gf) gf->divide.w32 = gf_w4_single_table_divide; gf->multiply.w32 = gf_w4_single_table_multiply; #ifdef INTEL_SSSE3 - if(h->region_type & (GF_REGION_NOSSE | GF_REGION_CAUCHY)) + if(h->region_type & (GF_REGION_NOSIMD | GF_REGION_CAUCHY)) gf->multiply_region.w32 = gf_w4_single_table_multiply_region; else gf->multiply_region.w32 = gf_w4_single_table_sse_multiply_region; #else gf->multiply_region.w32 = gf_w4_single_table_multiply_region; - if (h->region_type & GF_REGION_SSE) return 0; + if (h->region_type & GF_REGION_SIMD) return 0; #endif return 1; @@ -1905,25 +1905,25 @@ int gf_w4_bytwo_init(gf_t *gf) if (h->mult_type == GF_MULT_BYTWO_p) { gf->multiply.w32 = gf_w4_bytwo_p_multiply; #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSSE) + if (h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w4_bytwo_p_nosse_multiply_region; else gf->multiply_region.w32 = gf_w4_bytwo_p_sse_multiply_region; #else gf->multiply_region.w32 = gf_w4_bytwo_p_nosse_multiply_region; - if (h->region_type & GF_REGION_SSE) + if (h->region_type & GF_REGION_SIMD) return 0; #endif } else { gf->multiply.w32 = gf_w4_bytwo_b_multiply; #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSSE) + if (h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w4_bytwo_b_nosse_multiply_region; else gf->multiply_region.w32 = gf_w4_bytwo_b_sse_multiply_region; #else gf->multiply_region.w32 = gf_w4_bytwo_b_nosse_multiply_region; - if (h->region_type & GF_REGION_SSE) + if (h->region_type & GF_REGION_SIMD) return 0; #endif } diff --git a/src/gf_w64.c b/src/gf_w64.c index fdc4a7c..fe1c75d 100644 --- a/src/gf_w64.c +++ b/src/gf_w64.c @@ -1488,25 +1488,25 @@ int gf_w64_bytwo_init(gf_t *gf) if (h->mult_type == GF_MULT_BYTWO_p) { gf->multiply.w64 = gf_w64_bytwo_p_multiply; #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSSE) + if (h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w64 = gf_w64_bytwo_p_nosse_multiply_region; else gf->multiply_region.w64 = gf_w64_bytwo_p_sse_multiply_region; #else gf->multiply_region.w64 = gf_w64_bytwo_p_nosse_multiply_region; - if(h->region_type & GF_REGION_SSE) + if(h->region_type & GF_REGION_SIMD) return 0; #endif } else { gf->multiply.w64 = gf_w64_bytwo_b_multiply; #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSSE) + if (h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w64 = gf_w64_bytwo_b_nosse_multiply_region; else gf->multiply_region.w64 = gf_w64_bytwo_b_sse_multiply_region; #else gf->multiply_region.w64 = gf_w64_bytwo_b_nosse_multiply_region; - if(h->region_type & GF_REGION_SSE) + if(h->region_type & GF_REGION_SIMD) return 0; #endif } @@ -2006,7 +2006,7 @@ int gf_w64_split_init(gf_t *gf) gf->multiply.w64 = gf_w64_bytwo_p_multiply; #if defined(INTEL_SSE4_PCLMUL) - if ((!(h->region_type & GF_REGION_NOSSE) && + if ((!(h->region_type & GF_REGION_NOSIMD) && (h->arg1 == 64 || h->arg2 == 64)) || h->mult_type == GF_MULT_DEFAULT){ @@ -2045,7 +2045,7 @@ int gf_w64_split_init(gf_t *gf) d4 = (struct gf_split_4_64_lazy_data *) h->private; d4->last_value = 0; - if((h->region_type & GF_REGION_ALTMAP) && (h->region_type & GF_REGION_NOSSE)) return 0; + if((h->region_type & GF_REGION_ALTMAP) && (h->region_type & GF_REGION_NOSIMD)) return 0; if(h->region_type & GF_REGION_ALTMAP) { #ifdef INTEL_SSSE3 @@ -2057,13 +2057,13 @@ int gf_w64_split_init(gf_t *gf) else //no altmap { #ifdef INTEL_SSE4 - if(h->region_type & GF_REGION_NOSSE) + if(h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w64 = gf_w64_split_4_64_lazy_multiply_region; else gf->multiply_region.w64 = gf_w64_split_4_64_lazy_sse_multiply_region; #else gf->multiply_region.w64 = gf_w64_split_4_64_lazy_multiply_region; - if(h->region_type & GF_REGION_SSE) + if(h->region_type & GF_REGION_SIMD) return 0; #endif } diff --git a/src/gf_w8.c b/src/gf_w8.c index 67fd688..bc4f5d1 100644 --- a/src/gf_w8.c +++ b/src/gf_w8.c @@ -1180,13 +1180,13 @@ int gf_w8_split_init(gf_t *gf) gf->multiply.w32 = gf_w8_split_multiply; #ifdef INTEL_SSSE3 - if (h->region_type & GF_REGION_NOSSE) + if (h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w8_split_multiply_region; else gf->multiply_region.w32 = gf_w8_split_multiply_region_sse; #else gf->multiply_region.w32 = gf_w8_split_multiply_region; - if(h->region_type & GF_REGION_SSE) + if(h->region_type & GF_REGION_SIMD) return 0; #endif @@ -2259,25 +2259,25 @@ int gf_w8_bytwo_init(gf_t *gf) if (h->mult_type == GF_MULT_BYTWO_p) { gf->multiply.w32 = gf_w8_bytwo_p_multiply; #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSSE) + if (h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w8_bytwo_p_nosse_multiply_region; else gf->multiply_region.w32 = gf_w8_bytwo_p_sse_multiply_region; #else gf->multiply_region.w32 = gf_w8_bytwo_p_nosse_multiply_region; - if(h->region_type & GF_REGION_SSE) + if(h->region_type & GF_REGION_SIMD) return 0; #endif } else { gf->multiply.w32 = gf_w8_bytwo_b_multiply; #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSSE) + if (h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w8_bytwo_b_nosse_multiply_region; else gf->multiply_region.w32 = gf_w8_bytwo_b_sse_multiply_region; #else gf->multiply_region.w32 = gf_w8_bytwo_b_nosse_multiply_region; - if(h->region_type & GF_REGION_SSE) + if(h->region_type & GF_REGION_SIMD) return 0; #endif } diff --git a/tools/gf_methods.c b/tools/gf_methods.c index 43589ac..c7d3d58 100644 --- a/tools/gf_methods.c +++ b/tools/gf_methods.c @@ -28,7 +28,7 @@ static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "CARRY_FREE_GK", "GROUP44" /* Make sure CAUCHY is last */ #define NREGIONS (7) -static char *REGIONS[NREGIONS] = { "DOUBLE", "QUAD", "LAZY", "SSE", "NOSSE", +static char *REGIONS[NREGIONS] = { "DOUBLE", "QUAD", "LAZY", "SIMD", "NOSIMD", "ALTMAP", "CAUCHY" }; #define BNREGIONS (4) -- cgit v1.2.1