summaryrefslogtreecommitdiff
path: root/src/gf_w4.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gf_w4.c')
-rw-r--r--src/gf_w4.c80
1 files changed, 24 insertions, 56 deletions
diff --git a/src/gf_w4.c b/src/gf_w4.c
index 6bc79d0..0e86aa8 100644
--- a/src/gf_w4.c
+++ b/src/gf_w4.c
@@ -11,49 +11,7 @@
#include "gf_int.h"
#include <stdio.h>
#include <stdlib.h>
-
-#define GF_FIELD_WIDTH 4
-#define GF_DOUBLE_WIDTH (GF_FIELD_WIDTH*2)
-#define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH)
-#define GF_MULT_GROUP_SIZE (GF_FIELD_SIZE-1)
-
-/* ------------------------------------------------------------
- JSP: Each implementation has its own data, which is allocated
- at one time as part of the handle. For that reason, it
- shouldn't be hierarchical -- i.e. one should be able to
- allocate it with one call to malloc. */
-
-struct gf_logtable_data {
- uint8_t log_tbl[GF_FIELD_SIZE];
- uint8_t antilog_tbl[GF_FIELD_SIZE * 2];
- uint8_t *antilog_tbl_div;
-};
-
-struct gf_single_table_data {
- uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE];
- uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE];
-};
-
-struct gf_double_table_data {
- uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE];
- uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE*GF_FIELD_SIZE];
-};
-struct gf_quad_table_data {
- uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE];
- uint16_t mult[GF_FIELD_SIZE][(1<<16)];
-};
-
-struct gf_quad_table_lazy_data {
- uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE];
- uint8_t smult[GF_FIELD_SIZE][GF_FIELD_SIZE];
- uint16_t mult[(1 << 16)];
-};
-
-struct gf_bytwo_data {
- uint64_t prim_poly;
- uint64_t mask1;
- uint64_t mask2;
-};
+#include "gf_w4.h"
#define AB2(ip, am1 ,am2, b, t1, t2) {\
t1 = (b << 1) & am1;\
@@ -489,14 +447,18 @@ int gf_w4_single_table_init(gf_t *gf)
gf->inverse.w32 = NULL;
gf->divide.w32 = gf_w4_single_table_divide;
gf->multiply.w32 = gf_w4_single_table_multiply;
- #ifdef INTEL_SSSE3
- if(h->region_type & (GF_REGION_NOSSE | GF_REGION_CAUCHY))
+ #if defined(INTEL_SSSE3) || defined(ARM_NEON)
+ if(h->region_type & (GF_REGION_NOSIMD | GF_REGION_CAUCHY))
gf->multiply_region.w32 = gf_w4_single_table_multiply_region;
else
+ #if defined(INTEL_SSSE3)
gf->multiply_region.w32 = gf_w4_single_table_sse_multiply_region;
+ #elif defined(ARM_NEON)
+ gf_w4_neon_single_table_init(gf);
+ #endif
#else
gf->multiply_region.w32 = gf_w4_single_table_multiply_region;
- if (h->region_type & GF_REGION_SSE) return 0;
+ if (h->region_type & GF_REGION_SIMD) return 0;
#endif
return 1;
@@ -774,16 +736,16 @@ int gf_w4_table_init(gf_t *gf)
{
int rt;
gf_internal_t *h;
- int issse3 = 0;
+ int simd = 0;
-#ifdef INTEL_SSSE3
- issse3 = 1;
+#if defined(INTEL_SSSE3) || defined(ARM_NEON)
+ simd = 1;
#endif
h = (gf_internal_t *) gf->scratch;
rt = (h->region_type);
- if (h->mult_type == GF_MULT_DEFAULT && !issse3) rt |= GF_REGION_DOUBLE_TABLE;
+ if (h->mult_type == GF_MULT_DEFAULT && !simd) rt |= GF_REGION_DOUBLE_TABLE;
if (rt & GF_REGION_DOUBLE_TABLE) {
return gf_w4_double_table_init(gf);
@@ -1905,25 +1867,25 @@ int gf_w4_bytwo_init(gf_t *gf)
if (h->mult_type == GF_MULT_BYTWO_p) {
gf->multiply.w32 = gf_w4_bytwo_p_multiply;
#ifdef INTEL_SSE2
- if (h->region_type & GF_REGION_NOSSE)
+ if (h->region_type & GF_REGION_NOSIMD)
gf->multiply_region.w32 = gf_w4_bytwo_p_nosse_multiply_region;
else
gf->multiply_region.w32 = gf_w4_bytwo_p_sse_multiply_region;
#else
gf->multiply_region.w32 = gf_w4_bytwo_p_nosse_multiply_region;
- if (h->region_type & GF_REGION_SSE)
+ if (h->region_type & GF_REGION_SIMD)
return 0;
#endif
} else {
gf->multiply.w32 = gf_w4_bytwo_b_multiply;
#ifdef INTEL_SSE2
- if (h->region_type & GF_REGION_NOSSE)
+ if (h->region_type & GF_REGION_NOSIMD)
gf->multiply_region.w32 = gf_w4_bytwo_b_nosse_multiply_region;
else
gf->multiply_region.w32 = gf_w4_bytwo_b_sse_multiply_region;
#else
gf->multiply_region.w32 = gf_w4_bytwo_b_nosse_multiply_region;
- if (h->region_type & GF_REGION_SSE)
+ if (h->region_type & GF_REGION_SIMD)
return 0;
#endif
}
@@ -1937,6 +1899,8 @@ int gf_w4_cfm_init(gf_t *gf)
#if defined(INTEL_SSE4_PCLMUL)
gf->multiply.w32 = gf_w4_clm_multiply;
return 1;
+#elif defined(ARM_NEON)
+ return gf_w4_neon_cfm_init(gf);
#endif
return 0;
}
@@ -1953,11 +1917,14 @@ int gf_w4_shift_init(gf_t *gf)
int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
- int issse3 = 0;
+ int issse3 = 0, isneon = 0;
#ifdef INTEL_SSSE3
issse3 = 1;
#endif
+#ifdef ARM_NEON
+ isneon = 1;
+#endif
switch(mult_type)
{
@@ -1971,7 +1938,8 @@ int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1
return sizeof(gf_internal_t) + sizeof(struct gf_single_table_data) + 64;
}
- if (mult_type == GF_MULT_DEFAULT && !issse3) region_type = GF_REGION_DOUBLE_TABLE;
+ if (mult_type == GF_MULT_DEFAULT && !(issse3 || isneon))
+ region_type = GF_REGION_DOUBLE_TABLE;
if (region_type & GF_REGION_DOUBLE_TABLE) {
return sizeof(gf_internal_t) + sizeof(struct gf_double_table_data) + 64;