diff options
Diffstat (limited to 'src/gf_w16.c')
-rw-r--r-- | src/gf_w16.c | 61 |
1 files changed, 11 insertions, 50 deletions
diff --git a/src/gf_w16.c b/src/gf_w16.c index 0904115..ce47849 100644 --- a/src/gf_w16.c +++ b/src/gf_w16.c @@ -11,54 +11,7 @@ #include "gf_int.h" #include <stdio.h> #include <stdlib.h> - -#define GF_FIELD_WIDTH (16) -#define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH) -#define GF_MULT_GROUP_SIZE GF_FIELD_SIZE-1 - -#define GF_BASE_FIELD_WIDTH (8) -#define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH) - -struct gf_w16_logtable_data { - uint16_t log_tbl[GF_FIELD_SIZE]; - uint16_t antilog_tbl[GF_FIELD_SIZE * 2]; - uint16_t inv_tbl[GF_FIELD_SIZE]; - uint16_t *d_antilog; -}; - -struct gf_w16_zero_logtable_data { - int log_tbl[GF_FIELD_SIZE]; - uint16_t _antilog_tbl[GF_FIELD_SIZE * 4]; - uint16_t *antilog_tbl; - uint16_t inv_tbl[GF_FIELD_SIZE]; -}; - -struct gf_w16_lazytable_data { - uint16_t log_tbl[GF_FIELD_SIZE]; - uint16_t antilog_tbl[GF_FIELD_SIZE * 2]; - uint16_t inv_tbl[GF_FIELD_SIZE]; - uint16_t *d_antilog; - uint16_t lazytable[GF_FIELD_SIZE]; -}; - -struct gf_w16_bytwo_data { - uint64_t prim_poly; - uint64_t mask1; - uint64_t mask2; -}; - -struct gf_w16_split_8_8_data { - uint16_t tables[3][256][256]; -}; - -struct gf_w16_group_4_4_data { - uint16_t reduce[16]; - uint16_t shift[16]; -}; - -struct gf_w16_composite_data { - uint8_t *mult_table; -}; +#include "gf_w16.h" #define AB2(ip, am1 ,am2, b, t1, t2) {\ t1 = (b << 1) & am1;\ @@ -1264,6 +1217,7 @@ int gf_w16_split_init(gf_t *gf) gf_internal_t *h; struct gf_w16_split_8_8_data *d8; int i, j, exp, issse3; + int isneon = 0; uint32_t p, basep; h = (gf_internal_t *) gf->scratch; @@ -1273,6 +1227,9 @@ int gf_w16_split_init(gf_t *gf) #else issse3 = 0; #endif +#ifdef ARM_NEON + isneon = 1; +#endif if (h->arg1 == 8 && h->arg2 == 8) { d8 = (struct gf_w16_split_8_8_data *) h->private; @@ -1317,6 +1274,10 @@ int gf_w16_split_init(gf_t *gf) if (issse3) { gf->multiply_region.w32 = gf_w16_split_4_16_lazy_sse_multiply_region; + } else if (isneon) { +#ifdef ARM_NEON + gf_w16_neon_split_init(gf); +#endif } else { gf->multiply_region.w32 = gf_w16_split_8_16_lazy_multiply_region; } @@ -1326,12 +1287,12 @@ int gf_w16_split_init(gf_t *gf) gf->multiply_region.w32 = gf_w16_split_8_16_lazy_multiply_region; } else if ((h->arg1 == 4 && h->arg2 == 16) || (h->arg2 == 4 && h->arg1 == 16)) { - if (issse3) { + if (issse3 || isneon) { if(h->region_type & GF_REGION_ALTMAP && h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w16_split_4_16_lazy_nosse_altmap_multiply_region; else if(h->region_type & GF_REGION_NOSIMD) gf->multiply_region.w32 = gf_w16_split_4_16_lazy_multiply_region; - else if(h->region_type & GF_REGION_ALTMAP) + else if(h->region_type & GF_REGION_ALTMAP && issse3) gf->multiply_region.w32 = gf_w16_split_4_16_lazy_sse_altmap_multiply_region; } else { if(h->region_type & GF_REGION_SIMD) |