summaryrefslogtreecommitdiff
path: root/src/gf_w32.c
diff options
context:
space:
mode:
authorbassamtabbara <bassam.tabbara@quantum.com>2016-09-14 20:22:27 +0000
committerbassamtabbara <bassam.tabbara@quantum.com>2016-09-14 20:22:27 +0000
commita6847973cba329ae079d3bd26341a4ec2906f012 (patch)
treecbdb3947d9d86f2fa7d9cee84d3b773e1bb8f2b2 /src/gf_w32.c
parent185295f247698f727fd3bb11c4795e1741bb359e (diff)
parent0690ba86a81faff99a3383b5907ddc02a317eea0 (diff)
downloadgf-complete-a6847973cba329ae079d3bd26341a4ec2906f012.tar.gz
Merge branch 'simd-runtime-detection' into 'master'
Support for runtime detection of SIMD This merge request adds support for runtime SIMD detection. The idea is that you would build gf-complete with full SIMD support, and gf_init will select the appropriate function at runtime based on the capabilities of the target machine. This would eliminate the need to build different versions of the code for different processors (you still need to build for different archs). Ceph for example has 3-4 flavors of jerasure on Intel (and does not support PCLMUL optimizations as a result of using to many binaries). Numerous libraries have followed as similar approach include zlib. When reviewing this merge request I recommend that you look at each of the 5 commits independently. The first 3 commits don't change the existing logic. Instead they add debugging functions and test scripts that facilitate testing of the 4th and commit. The 4th commit is where all the new logic goes along with tests. The 5th commit fixes build scripts. I've tested this on x86_64, arm, and aarch64 using QEMU. Numerous tests have been added that help this code and could help with future testing of gf-complete. Also I've compared the functions selected with the old code (prior to runtime SIMD support) with the new code and all functions are identical. Here's a gist with the test results prior to SIMD extensions: https://gist.github.com/bassamtabbara/d9a6dcf0a749b7ab01bc2953a359edec. See merge request !18
Diffstat (limited to 'src/gf_w32.c')
-rw-r--r--src/gf_w32.c295
1 files changed, 141 insertions, 154 deletions
diff --git a/src/gf_w32.c b/src/gf_w32.c
index 854a6e4..bb22894 100644
--- a/src/gf_w32.c
+++ b/src/gf_w32.c
@@ -13,6 +13,7 @@
#include <stdio.h>
#include <stdlib.h>
#include "gf_w32.h"
+#include "gf_cpu.h"
#define MM_PRINT32(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 4) printf(" %02x%02x%02x%02x", blah[15-ii], blah[14-ii], blah[13-ii], blah[12-ii]); printf("\n"); }
@@ -347,6 +348,8 @@ uint32_t gf_w32_matrix (gf_t *gf, uint32_t b)
extra memory.
*/
+#if defined(INTEL_SSE4_PCLMUL)
+
static
inline
gf_val_32_t
@@ -354,8 +357,6 @@ gf_w32_cfmgk_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i w;
@@ -378,9 +379,9 @@ gf_w32_cfmgk_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
/* Extracts 32 bit value from result. */
rv = ((gf_val_32_t)_mm_extract_epi32(result, 0));
-#endif
return rv;
}
+#endif
#if defined(INTEL_SSE4_PCLMUL)
@@ -435,6 +436,8 @@ gf_w32_cfmgk_multiply_region_from_single(gf_t *gf, void *src, void *dest, uint32
#endif
+#if defined(INTEL_SSE4_PCLMUL)
+
static
inline
gf_val_32_t
@@ -442,8 +445,6 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
@@ -476,9 +477,11 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
/* Extracts 32 bit value from result. */
rv = ((gf_val_32_t)_mm_extract_epi32(result, 0));
-#endif
return rv;
}
+#endif
+
+#if defined(INTEL_SSE4_PCLMUL)
static
inline
@@ -487,8 +490,6 @@ gf_w32_clm_multiply_3 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
@@ -515,9 +516,11 @@ gf_w32_clm_multiply_3 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
/* Extracts 32 bit value from result. */
rv = ((gf_val_32_t)_mm_extract_epi32(result, 0));
-#endif
return rv;
}
+#endif
+
+#if defined(INTEL_SSE4_PCLMUL)
static
inline
@@ -526,8 +529,6 @@ gf_w32_clm_multiply_4 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
@@ -556,9 +557,9 @@ gf_w32_clm_multiply_4 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
/* Extracts 32 bit value from result. */
rv = ((gf_val_32_t)_mm_extract_epi32(result, 0));
-#endif
return rv;
}
+#endif
static
@@ -589,33 +590,35 @@ gf_w32_shift_multiply (gf_t *gf, uint32_t a32, uint32_t b32)
static
int gf_w32_cfmgk_init(gf_t *gf)
{
- gf->inverse.w32 = gf_w32_euclid;
- gf->multiply_region.w32 = gf_w32_multiply_region_from_single;
+ SET_FUNCTION(gf,inverse,w32,gf_w32_euclid)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_multiply_region_from_single)
#if defined(INTEL_SSE4_PCLMUL)
- gf_internal_t *h;
+ if (gf_cpu_supports_intel_pclmul) {
+ gf_internal_t *h;
- h = (gf_internal_t *) gf->scratch;
- gf->multiply.w32 = gf_w32_cfmgk_multiply;
- gf->multiply_region.w32 = gf_w32_cfmgk_multiply_region_from_single;
+ h = (gf_internal_t *) gf->scratch;
+ SET_FUNCTION(gf,multiply,w32,gf_w32_cfmgk_multiply)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_cfmgk_multiply_region_from_single)
- uint64_t *q_plus = (uint64_t *) h->private;
- uint64_t *g_star = (uint64_t *) h->private + 1;
+ uint64_t *q_plus = (uint64_t *) h->private;
+ uint64_t *g_star = (uint64_t *) h->private + 1;
- uint64_t tmp = h->prim_poly << 32;
- *q_plus = 1ULL << 32;
+ uint64_t tmp = h->prim_poly << 32;
+ *q_plus = 1ULL << 32;
- int i;
- for(i = 63; i >= 32; i--)
- if((1ULL << i) & tmp)
- {
- *q_plus |= 1ULL << (i-32);
- tmp ^= h->prim_poly << (i-32);
- }
+ int i;
+ for(i = 63; i >= 32; i--)
+ if((1ULL << i) & tmp)
+ {
+ *q_plus |= 1ULL << (i-32);
+ tmp ^= h->prim_poly << (i-32);
+ }
- *g_star = h->prim_poly & ((1ULL << 32) - 1);
+ *g_star = h->prim_poly & ((1ULL << 32) - 1);
- return 1;
+ return 1;
+ }
#endif
return 0;
@@ -624,30 +627,32 @@ int gf_w32_cfmgk_init(gf_t *gf)
static
int gf_w32_cfm_init(gf_t *gf)
{
- gf->inverse.w32 = gf_w32_euclid;
- gf->multiply_region.w32 = gf_w32_multiply_region_from_single;
+ SET_FUNCTION(gf,inverse,w32,gf_w32_euclid)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_multiply_region_from_single)
/*Ben: We also check to see if the prim poly will work for pclmul */
/*Ben: Check to see how many reduction steps it will take*/
#if defined(INTEL_SSE4_PCLMUL)
- gf_internal_t *h;
-
- h = (gf_internal_t *) gf->scratch;
-
- if ((0xfffe0000 & h->prim_poly) == 0){
- gf->multiply.w32 = gf_w32_clm_multiply_2;
- gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_2;
- }else if ((0xffc00000 & h->prim_poly) == 0){
- gf->multiply.w32 = gf_w32_clm_multiply_3;
- gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_3;
- }else if ((0xfe000000 & h->prim_poly) == 0){
- gf->multiply.w32 = gf_w32_clm_multiply_4;
- gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_4;
- } else {
- return 0;
+ if (gf_cpu_supports_intel_pclmul) {
+ gf_internal_t *h;
+
+ h = (gf_internal_t *) gf->scratch;
+
+ if ((0xfffe0000 & h->prim_poly) == 0){
+ SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_2)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_2)
+ }else if ((0xffc00000 & h->prim_poly) == 0){
+ SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_3)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_3)
+ }else if ((0xfe000000 & h->prim_poly) == 0){
+ SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_4)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_4)
+ } else {
+ return 0;
+ }
+ return 1;
}
- return 1;
#endif
return 0;
@@ -656,9 +661,9 @@ int gf_w32_cfm_init(gf_t *gf)
static
int gf_w32_shift_init(gf_t *gf)
{
- gf->inverse.w32 = gf_w32_euclid;
- gf->multiply_region.w32 = gf_w32_multiply_region_from_single;
- gf->multiply.w32 = gf_w32_shift_multiply;
+ SET_FUNCTION(gf,inverse,w32,gf_w32_euclid)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_multiply_region_from_single)
+ SET_FUNCTION(gf,multiply,w32,gf_w32_shift_multiply)
return 1;
}
@@ -1380,32 +1385,34 @@ int gf_w32_bytwo_init(gf_t *gf)
}
if (h->mult_type == GF_MULT_BYTWO_p) {
- gf->multiply.w32 = gf_w32_bytwo_p_multiply;
+ SET_FUNCTION(gf,multiply,w32,gf_w32_bytwo_p_multiply)
#ifdef INTEL_SSE2
- if (h->region_type & GF_REGION_NOSIMD)
- gf->multiply_region.w32 = gf_w32_bytwo_p_nosse_multiply_region;
- else
- gf->multiply_region.w32 = gf_w32_bytwo_p_sse_multiply_region;
- #else
- gf->multiply_region.w32 = gf_w32_bytwo_p_nosse_multiply_region;
- if(h->region_type & GF_REGION_SIMD)
- return 0;
+ if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) {
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_p_sse_multiply_region)
+ } else {
+ #endif
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_p_nosse_multiply_region)
+ if(h->region_type & GF_REGION_SIMD)
+ return 0;
+ #ifdef INTEL_SSE2
+ }
#endif
} else {
- gf->multiply.w32 = gf_w32_bytwo_b_multiply;
+ SET_FUNCTION(gf,multiply,w32,gf_w32_bytwo_b_multiply)
#ifdef INTEL_SSE2
- if (h->region_type & GF_REGION_NOSIMD)
- gf->multiply_region.w32 = gf_w32_bytwo_b_nosse_multiply_region;
- else
- gf->multiply_region.w32 = gf_w32_bytwo_b_sse_multiply_region;
- #else
- gf->multiply_region.w32 = gf_w32_bytwo_b_nosse_multiply_region;
+ if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) {
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_b_sse_multiply_region)
+ } else {
+ #endif
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_b_nosse_multiply_region)
if(h->region_type & GF_REGION_SIMD)
return 0;
+ #ifdef INTEL_SSE2
+ }
#endif
}
- gf->inverse.w32 = gf_w32_euclid;
+ SET_FUNCTION(gf,inverse,w32,gf_w32_euclid)
return 1;
}
@@ -1755,11 +1762,11 @@ gf_w32_split_4_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint32_t
gf_do_final_region_alignment(&rd);
}
+#ifdef INTEL_SSSE3
static
void
gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
gf_internal_t *h;
int i, j, k;
uint32_t pp, v, *s32, *d32, *top;
@@ -1942,16 +1949,15 @@ gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des
}
gf_do_final_region_alignment(&rd);
-
-#endif
}
+#endif
+#ifdef INTEL_SSSE3
static
void
gf_w32_split_4_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
gf_internal_t *h;
int i, j, k;
uint32_t pp, v, *s32, *d32, *top, tmp_table[16];
@@ -2216,9 +2222,8 @@ gf_w32_split_4_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint
}
}
gf_do_final_region_alignment(&rd);
-
-#endif
}
+#endif
static
int gf_w32_split_init(gf_t *gf)
@@ -2230,29 +2235,13 @@ int gf_w32_split_init(gf_t *gf)
struct gf_split_8_32_lazy_data *d32;
struct gf_split_16_32_lazy_data *d16;
uint32_t p, basep;
- int i, j, exp, ispclmul, issse3;
- int isneon = 0;
-
-#if defined(INTEL_SSE4_PCLMUL)
- ispclmul = 1;
-#else
- ispclmul = 0;
-#endif
-
-#ifdef INTEL_SSSE3
- issse3 = 1;
-#else
- issse3 = 0;
-#endif
-#ifdef ARM_NEON
- isneon = 1;
-#endif
+ int i, j, exp;
h = (gf_internal_t *) gf->scratch;
/* Defaults */
- gf->inverse.w32 = gf_w32_euclid;
+ SET_FUNCTION(gf,inverse,w32,gf_w32_euclid)
/* JSP: First handle single multiplication:
If args == 8, then we're doing split 8 8.
@@ -2261,17 +2250,19 @@ int gf_w32_split_init(gf_t *gf)
*/
if (h->arg1 == 8 && h->arg2 == 8) {
- gf->multiply.w32 = gf_w32_split_8_8_multiply;
- } else if (ispclmul) {
+ SET_FUNCTION(gf,multiply,w32,gf_w32_split_8_8_multiply)
+#if defined(INTEL_SSE4_PCLMUL)
+ } else if (gf_cpu_supports_intel_pclmul) {
if ((0xfffe0000 & h->prim_poly) == 0){
- gf->multiply.w32 = gf_w32_clm_multiply_2;
+ SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_2)
} else if ((0xffc00000 & h->prim_poly) == 0){
- gf->multiply.w32 = gf_w32_clm_multiply_3;
+ SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_3)
} else if ((0xfe000000 & h->prim_poly) == 0){
- gf->multiply.w32 = gf_w32_clm_multiply_4;
+ SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_4)
}
+#endif
} else {
- gf->multiply.w32 = gf_w32_bytwo_p_multiply;
+ SET_FUNCTION(gf,multiply,w32,gf_w32_bytwo_p_multiply)
}
/* Easy cases: 16/32 and 2/32 */
@@ -2279,7 +2270,7 @@ int gf_w32_split_init(gf_t *gf)
if ((h->arg1 == 16 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 16)) {
d16 = (struct gf_split_16_32_lazy_data *) h->private;
d16->last_value = 0;
- gf->multiply_region.w32 = gf_w32_split_16_32_lazy_multiply_region;
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_16_32_lazy_multiply_region)
return 1;
}
@@ -2287,33 +2278,39 @@ int gf_w32_split_init(gf_t *gf)
ld2 = (struct gf_split_2_32_lazy_data *) h->private;
ld2->last_value = 0;
#ifdef INTEL_SSSE3
- if (!(h->region_type & GF_REGION_NOSIMD))
- gf->multiply_region.w32 = gf_w32_split_2_32_lazy_sse_multiply_region;
- else
- gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region;
- #else
- gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region;
- if(h->region_type & GF_REGION_SIMD) return 0;
+ if (gf_cpu_supports_intel_ssse3 && !(h->region_type & GF_REGION_NOSIMD)) {
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_2_32_lazy_sse_multiply_region)
+ } else {
+ #endif
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_2_32_lazy_multiply_region)
+ if(h->region_type & GF_REGION_SIMD) return 0;
+ #ifdef INTEL_SSSE3
+ }
#endif
return 1;
}
/* 4/32 or Default + SSE - There is no ALTMAP/NOSSE. */
+
if ((h->arg1 == 4 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 4) ||
- ((issse3 || isneon) && h->mult_type == GF_REGION_DEFAULT)) {
+ ((gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon) && h->mult_type == GF_REGION_DEFAULT)) {
ld4 = (struct gf_split_4_32_lazy_data *) h->private;
ld4->last_value = 0;
- if ((h->region_type & GF_REGION_NOSIMD) || !(issse3 || isneon)) {
- gf->multiply_region.w32 = gf_w32_split_4_32_lazy_multiply_region;
- } else if (isneon) {
+ if ((h->region_type & GF_REGION_NOSIMD) || !(gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon)) {
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_4_32_lazy_multiply_region)
+ } else if (gf_cpu_supports_arm_neon) {
#ifdef ARM_NEON
gf_w32_neon_split_init(gf);
#endif
} else if (h->region_type & GF_REGION_ALTMAP) {
- gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_altmap_multiply_region;
+#ifdef INTEL_SSSE3
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_4_32_lazy_sse_altmap_multiply_region)
+#endif
} else {
- gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_multiply_region;
+#ifdef INTEL_SSSE3
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_4_32_lazy_sse_multiply_region)
+#endif
}
return 1;
}
@@ -2324,7 +2321,7 @@ int gf_w32_split_init(gf_t *gf)
h->mult_type == GF_MULT_DEFAULT) {
d32 = (struct gf_split_8_32_lazy_data *) h->private;
d32->last_value = 0;
- gf->multiply_region.w32 = gf_w32_split_8_32_lazy_multiply_region;
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_8_32_lazy_multiply_region)
return 1;
}
@@ -2333,8 +2330,8 @@ int gf_w32_split_init(gf_t *gf)
if (h->arg1 == 8 && h->arg2 == 8) {
d8 = (struct gf_w32_split_8_8_data *) h->private;
d8->last_value = 0;
- gf->multiply.w32 = gf_w32_split_8_8_multiply;
- gf->multiply_region.w32 = gf_w32_split_8_32_lazy_multiply_region;
+ SET_FUNCTION(gf,multiply,w32,gf_w32_split_8_8_multiply)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_8_32_lazy_multiply_region)
basep = 1;
for (exp = 0; exp < 7; exp++) {
for (j = 0; j < 256; j++) d8->tables[exp][0][j] = 0;
@@ -2407,14 +2404,14 @@ int gf_w32_group_init(gf_t *gf)
}
if (g_s == g_r) {
- gf->multiply.w32 = gf_w32_group_s_equals_r_multiply;
- gf->multiply_region.w32 = gf_w32_group_s_equals_r_multiply_region;
+ SET_FUNCTION(gf,multiply,w32,gf_w32_group_s_equals_r_multiply)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_group_s_equals_r_multiply_region)
} else {
- gf->multiply.w32 = gf_w32_group_multiply;
- gf->multiply_region.w32 = gf_w32_group_multiply_region;
+ SET_FUNCTION(gf,multiply,w32,gf_w32_group_multiply)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_group_multiply_region)
}
- gf->divide.w32 = NULL;
- gf->inverse.w32 = gf_w32_euclid;
+ SET_FUNCTION(gf,divide,w32,NULL)
+ SET_FUNCTION(gf,inverse,w32,gf_w32_euclid)
return 1;
}
@@ -2666,18 +2663,18 @@ int gf_w32_composite_init(gf_t *gf)
cd->alog = gf_w16_get_mult_alog_table(h->base_gf);
if (h->region_type & GF_REGION_ALTMAP) {
- gf->multiply_region.w32 = gf_w32_composite_multiply_region_alt;
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_composite_multiply_region_alt)
} else {
- gf->multiply_region.w32 = gf_w32_composite_multiply_region;
+ SET_FUNCTION(gf,multiply_region,w32,gf_w32_composite_multiply_region)
}
if (cd->log == NULL) {
- gf->multiply.w32 = gf_w32_composite_multiply_recursive;
+ SET_FUNCTION(gf,multiply,w32,gf_w32_composite_multiply_recursive)
} else {
- gf->multiply.w32 = gf_w32_composite_multiply_inline;
+ SET_FUNCTION(gf,multiply,w32,gf_w32_composite_multiply_inline)
}
- gf->divide.w32 = NULL;
- gf->inverse.w32 = gf_w32_composite_inverse;
+ SET_FUNCTION(gf,divide,w32,NULL)
+ SET_FUNCTION(gf,inverse,w32,gf_w32_composite_inverse)
return 1;
}
@@ -2686,16 +2683,6 @@ int gf_w32_composite_init(gf_t *gf)
int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
- int issse3 = 0;
- int isneon = 0;
-
-#ifdef INTEL_SSSE3
- issse3 = 1;
-#endif
-#ifdef ARM_NEON
- isneon = 1;
-#endif
-
switch(mult_type)
{
case GF_MULT_BYTWO_p:
@@ -2720,7 +2707,7 @@ int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg
return sizeof(gf_internal_t) + sizeof(struct gf_split_2_32_lazy_data) + 64;
}
if ((arg1 == 8 && arg2 == 32) || (arg2 == 8 && arg1 == 32) ||
- (mult_type == GF_MULT_DEFAULT && !(issse3 || isneon))) {
+ (mult_type == GF_MULT_DEFAULT && !(gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon))) {
return sizeof(gf_internal_t) + sizeof(struct gf_split_8_32_lazy_data) + 64;
}
if ((arg1 == 4 && arg2 == 32) ||
@@ -2776,10 +2763,10 @@ int gf_w32_init(gf_t *gf)
if(h->mult_type != GF_MULT_COMPOSITE) h->prim_poly &= 0xffffffff;
- gf->multiply.w32 = NULL;
- gf->divide.w32 = NULL;
- gf->inverse.w32 = NULL;
- gf->multiply_region.w32 = NULL;
+ SET_FUNCTION(gf,multiply,w32,NULL)
+ SET_FUNCTION(gf,divide,w32,NULL)
+ SET_FUNCTION(gf,inverse,w32,NULL)
+ SET_FUNCTION(gf,multiply_region,w32,NULL)
switch(h->mult_type) {
case GF_MULT_CARRY_FREE: if (gf_w32_cfm_init(gf) == 0) return 0; break;
@@ -2794,30 +2781,30 @@ int gf_w32_init(gf_t *gf)
default: return 0;
}
if (h->divide_type == GF_DIVIDE_EUCLID) {
- gf->divide.w32 = gf_w32_divide_from_inverse;
- gf->inverse.w32 = gf_w32_euclid;
+ SET_FUNCTION(gf,divide,w32,gf_w32_divide_from_inverse)
+ SET_FUNCTION(gf,inverse,w32,gf_w32_euclid)
} else if (h->divide_type == GF_DIVIDE_MATRIX) {
- gf->divide.w32 = gf_w32_divide_from_inverse;
- gf->inverse.w32 = gf_w32_matrix;
+ SET_FUNCTION(gf,divide,w32,gf_w32_divide_from_inverse)
+ SET_FUNCTION(gf,inverse,w32,gf_w32_matrix)
}
if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) {
- gf->divide.w32 = gf_w32_divide_from_inverse;
+ SET_FUNCTION(gf,divide,w32,gf_w32_divide_from_inverse)
}
if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) {
- gf->inverse.w32 = gf_w32_inverse_from_divide;
+ SET_FUNCTION(gf,inverse,w32,gf_w32_inverse_from_divide)
}
if (h->region_type == GF_REGION_CAUCHY) {
- gf->extract_word.w32 = gf_wgen_extract_word;
- gf->multiply_region.w32 = gf_wgen_cauchy_region;
+ SET_FUNCTION(gf,extract_word,w32,gf_wgen_extract_word)
+ SET_FUNCTION(gf,multiply_region,w32,gf_wgen_cauchy_region)
} else if (h->region_type & GF_REGION_ALTMAP) {
if (h->mult_type == GF_MULT_COMPOSITE) {
- gf->extract_word.w32 = gf_w32_composite_extract_word;
+ SET_FUNCTION(gf,extract_word,w32,gf_w32_composite_extract_word)
} else {
- gf->extract_word.w32 = gf_w32_split_extract_word;
+ SET_FUNCTION(gf,extract_word,w32,gf_w32_split_extract_word)
}
} else {
- gf->extract_word.w32 = gf_w32_extract_word;
+ SET_FUNCTION(gf,extract_word,w32,gf_w32_extract_word)
}
return 1;
}