diff options
Diffstat (limited to 'src/gf_w4.c')
-rw-r--r-- | src/gf_w4.c | 80 |
1 files changed, 24 insertions, 56 deletions
diff --git a/src/gf_w4.c b/src/gf_w4.c index 6bc79d0..0e86aa8 100644 --- a/src/gf_w4.c +++ b/src/gf_w4.c @@ -11,49 +11,7 @@ #include "gf_int.h" #include <stdio.h> #include <stdlib.h> - -#define GF_FIELD_WIDTH 4 -#define GF_DOUBLE_WIDTH (GF_FIELD_WIDTH*2) -#define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH) -#define GF_MULT_GROUP_SIZE (GF_FIELD_SIZE-1) - -/* ------------------------------------------------------------ - JSP: Each implementation has its own data, which is allocated - at one time as part of the handle. For that reason, it - shouldn't be hierarchical -- i.e. one should be able to - allocate it with one call to malloc. */ - -struct gf_logtable_data { - uint8_t log_tbl[GF_FIELD_SIZE]; - uint8_t antilog_tbl[GF_FIELD_SIZE * 2]; - uint8_t *antilog_tbl_div; -}; - -struct gf_single_table_data { - uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE]; - uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; -}; - -struct gf_double_table_data { - uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; - uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE*GF_FIELD_SIZE]; -}; -struct gf_quad_table_data { - uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; - uint16_t mult[GF_FIELD_SIZE][(1<<16)]; -}; - -struct gf_quad_table_lazy_data { - uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; - uint8_t smult[GF_FIELD_SIZE][GF_FIELD_SIZE]; - uint16_t mult[(1 << 16)]; -}; - -struct gf_bytwo_data { - uint64_t prim_poly; - uint64_t mask1; - uint64_t mask2; -}; +#include "gf_w4.h" #define AB2(ip, am1 ,am2, b, t1, t2) {\ t1 = (b << 1) & am1;\ @@ -489,14 +447,18 @@ int gf_w4_single_table_init(gf_t *gf) gf->inverse.w32 = NULL; gf->divide.w32 = gf_w4_single_table_divide; gf->multiply.w32 = gf_w4_single_table_multiply; - #ifdef INTEL_SSSE3 - if(h->region_type & (GF_REGION_NOSSE | GF_REGION_CAUCHY)) + #if defined(INTEL_SSSE3) || defined(ARM_NEON) + if(h->region_type & (GF_REGION_NOSIMD | GF_REGION_CAUCHY)) gf->multiply_region.w32 = gf_w4_single_table_multiply_region; else + #if defined(INTEL_SSSE3) gf->multiply_region.w32 = gf_w4_single_table_sse_multiply_region; + #elif defined(ARM_NEON) + gf_w4_neon_single_table_init(gf); + #endif #else gf->multiply_region.w32 = gf_w4_single_table_multiply_region; - if (h->region_type & GF_REGION_SSE) return 0; + if (h->region_type & GF_REGION_SIMD) return 0; #endif return 1; @@ -774,16 +736,16 @@ int gf_w4_table_init(gf_t *gf) { int rt; gf_internal_t *h; - int issse3 = 0; + int simd = 0; -#ifdef INTEL_SSSE3 - issse3 = 1; +#if defined(INTEL_SSSE3) || defined(ARM_NEON) + simd = 1; #endif h = (gf_internal_t *) gf->scratch; rt = (h->region_type); - if (h->mult_type == GF_MULT_DEFAULT && !issse3) rt |= GF_REGION_DOUBLE_TABLE; + if (h->mult_type == GF_MULT_DEFAULT && !simd) rt |= GF_REGION_DOUBLE_TABLE; if (rt & GF_REGION_DOUBLE_TABLE) { return gf_w4_double_table_init(gf); @@ -1905,25 +1867,25 @@ int gf_w4_bytwo_init(gf_t *gf) if (h->mult_type == GF_MULT_BYTWO_p) { gf->multiply.w32 = gf_w4_bytwo_p_multiply; #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSSE) + if (h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w4_bytwo_p_nosse_multiply_region; else gf->multiply_region.w32 = gf_w4_bytwo_p_sse_multiply_region; #else gf->multiply_region.w32 = gf_w4_bytwo_p_nosse_multiply_region; - if (h->region_type & GF_REGION_SSE) + if (h->region_type & GF_REGION_SIMD) return 0; #endif } else { gf->multiply.w32 = gf_w4_bytwo_b_multiply; #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSSE) + if (h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w4_bytwo_b_nosse_multiply_region; else gf->multiply_region.w32 = gf_w4_bytwo_b_sse_multiply_region; #else gf->multiply_region.w32 = gf_w4_bytwo_b_nosse_multiply_region; - if (h->region_type & GF_REGION_SSE) + if (h->region_type & GF_REGION_SIMD) return 0; #endif } @@ -1937,6 +1899,8 @@ int gf_w4_cfm_init(gf_t *gf) #if defined(INTEL_SSE4_PCLMUL) gf->multiply.w32 = gf_w4_clm_multiply; return 1; +#elif defined(ARM_NEON) + return gf_w4_neon_cfm_init(gf); #endif return 0; } @@ -1953,11 +1917,14 @@ int gf_w4_shift_init(gf_t *gf) int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) { - int issse3 = 0; + int issse3 = 0, isneon = 0; #ifdef INTEL_SSSE3 issse3 = 1; #endif +#ifdef ARM_NEON + isneon = 1; +#endif switch(mult_type) { @@ -1971,7 +1938,8 @@ int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1 return sizeof(gf_internal_t) + sizeof(struct gf_single_table_data) + 64; } - if (mult_type == GF_MULT_DEFAULT && !issse3) region_type = GF_REGION_DOUBLE_TABLE; + if (mult_type == GF_MULT_DEFAULT && !(issse3 || isneon)) + region_type = GF_REGION_DOUBLE_TABLE; if (region_type & GF_REGION_DOUBLE_TABLE) { return sizeof(gf_internal_t) + sizeof(struct gf_double_table_data) + 64; |