diff options
author | bassamtabbara <bassam.tabbara@quantum.com> | 2016-09-14 20:22:27 +0000 |
---|---|---|
committer | bassamtabbara <bassam.tabbara@quantum.com> | 2016-09-14 20:22:27 +0000 |
commit | a6847973cba329ae079d3bd26341a4ec2906f012 (patch) | |
tree | cbdb3947d9d86f2fa7d9cee84d3b773e1bb8f2b2 /src/gf_w32.c | |
parent | 185295f247698f727fd3bb11c4795e1741bb359e (diff) | |
parent | 0690ba86a81faff99a3383b5907ddc02a317eea0 (diff) | |
download | gf-complete-a6847973cba329ae079d3bd26341a4ec2906f012.tar.gz |
Merge branch 'simd-runtime-detection' into 'master'
Support for runtime detection of SIMD
This merge request adds support for runtime SIMD detection. The idea is that you would build gf-complete with full SIMD support, and gf_init will select the appropriate function at runtime based on the capabilities of the target machine. This would eliminate the need to build different versions of the code for different processors (you still need to build for different archs). Ceph for example has 3-4 flavors of jerasure on Intel (and does not support PCLMUL optimizations as a result of using to many binaries). Numerous libraries have followed as similar approach include zlib.
When reviewing this merge request I recommend that you look at each of the 5 commits independently. The first 3 commits don't change the existing logic. Instead they add debugging functions and test scripts that facilitate testing of the 4th and commit. The 4th commit is where all the new logic goes along with tests. The 5th commit fixes build scripts.
I've tested this on x86_64, arm, and aarch64 using QEMU. Numerous tests have been added that help this code and could help with future testing of gf-complete. Also I've compared the functions selected with the old code (prior to runtime SIMD support) with the new code and all functions are identical. Here's a gist with the test results prior to SIMD extensions: https://gist.github.com/bassamtabbara/d9a6dcf0a749b7ab01bc2953a359edec.
See merge request !18
Diffstat (limited to 'src/gf_w32.c')
-rw-r--r-- | src/gf_w32.c | 295 |
1 files changed, 141 insertions, 154 deletions
diff --git a/src/gf_w32.c b/src/gf_w32.c index 854a6e4..bb22894 100644 --- a/src/gf_w32.c +++ b/src/gf_w32.c @@ -13,6 +13,7 @@ #include <stdio.h> #include <stdlib.h> #include "gf_w32.h" +#include "gf_cpu.h" #define MM_PRINT32(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 4) printf(" %02x%02x%02x%02x", blah[15-ii], blah[14-ii], blah[13-ii], blah[12-ii]); printf("\n"); } @@ -347,6 +348,8 @@ uint32_t gf_w32_matrix (gf_t *gf, uint32_t b) extra memory. */ +#if defined(INTEL_SSE4_PCLMUL) + static inline gf_val_32_t @@ -354,8 +357,6 @@ gf_w32_cfmgk_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i w; @@ -378,9 +379,9 @@ gf_w32_cfmgk_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) /* Extracts 32 bit value from result. */ rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); -#endif return rv; } +#endif #if defined(INTEL_SSE4_PCLMUL) @@ -435,6 +436,8 @@ gf_w32_cfmgk_multiply_region_from_single(gf_t *gf, void *src, void *dest, uint32 #endif +#if defined(INTEL_SSE4_PCLMUL) + static inline gf_val_32_t @@ -442,8 +445,6 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -476,9 +477,11 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) /* Extracts 32 bit value from result. */ rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); -#endif return rv; } +#endif + +#if defined(INTEL_SSE4_PCLMUL) static inline @@ -487,8 +490,6 @@ gf_w32_clm_multiply_3 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -515,9 +516,11 @@ gf_w32_clm_multiply_3 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) /* Extracts 32 bit value from result. */ rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); -#endif return rv; } +#endif + +#if defined(INTEL_SSE4_PCLMUL) static inline @@ -526,8 +529,6 @@ gf_w32_clm_multiply_4 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -556,9 +557,9 @@ gf_w32_clm_multiply_4 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) /* Extracts 32 bit value from result. */ rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); -#endif return rv; } +#endif static @@ -589,33 +590,35 @@ gf_w32_shift_multiply (gf_t *gf, uint32_t a32, uint32_t b32) static int gf_w32_cfmgk_init(gf_t *gf) { - gf->inverse.w32 = gf_w32_euclid; - gf->multiply_region.w32 = gf_w32_multiply_region_from_single; + SET_FUNCTION(gf,inverse,w32,gf_w32_euclid) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_multiply_region_from_single) #if defined(INTEL_SSE4_PCLMUL) - gf_internal_t *h; + if (gf_cpu_supports_intel_pclmul) { + gf_internal_t *h; - h = (gf_internal_t *) gf->scratch; - gf->multiply.w32 = gf_w32_cfmgk_multiply; - gf->multiply_region.w32 = gf_w32_cfmgk_multiply_region_from_single; + h = (gf_internal_t *) gf->scratch; + SET_FUNCTION(gf,multiply,w32,gf_w32_cfmgk_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_cfmgk_multiply_region_from_single) - uint64_t *q_plus = (uint64_t *) h->private; - uint64_t *g_star = (uint64_t *) h->private + 1; + uint64_t *q_plus = (uint64_t *) h->private; + uint64_t *g_star = (uint64_t *) h->private + 1; - uint64_t tmp = h->prim_poly << 32; - *q_plus = 1ULL << 32; + uint64_t tmp = h->prim_poly << 32; + *q_plus = 1ULL << 32; - int i; - for(i = 63; i >= 32; i--) - if((1ULL << i) & tmp) - { - *q_plus |= 1ULL << (i-32); - tmp ^= h->prim_poly << (i-32); - } + int i; + for(i = 63; i >= 32; i--) + if((1ULL << i) & tmp) + { + *q_plus |= 1ULL << (i-32); + tmp ^= h->prim_poly << (i-32); + } - *g_star = h->prim_poly & ((1ULL << 32) - 1); + *g_star = h->prim_poly & ((1ULL << 32) - 1); - return 1; + return 1; + } #endif return 0; @@ -624,30 +627,32 @@ int gf_w32_cfmgk_init(gf_t *gf) static int gf_w32_cfm_init(gf_t *gf) { - gf->inverse.w32 = gf_w32_euclid; - gf->multiply_region.w32 = gf_w32_multiply_region_from_single; + SET_FUNCTION(gf,inverse,w32,gf_w32_euclid) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_multiply_region_from_single) /*Ben: We also check to see if the prim poly will work for pclmul */ /*Ben: Check to see how many reduction steps it will take*/ #if defined(INTEL_SSE4_PCLMUL) - gf_internal_t *h; - - h = (gf_internal_t *) gf->scratch; - - if ((0xfffe0000 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w32_clm_multiply_2; - gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_2; - }else if ((0xffc00000 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w32_clm_multiply_3; - gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_3; - }else if ((0xfe000000 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w32_clm_multiply_4; - gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_4; - } else { - return 0; + if (gf_cpu_supports_intel_pclmul) { + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + + if ((0xfffe0000 & h->prim_poly) == 0){ + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_2) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_2) + }else if ((0xffc00000 & h->prim_poly) == 0){ + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_3) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_3) + }else if ((0xfe000000 & h->prim_poly) == 0){ + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_4) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_4) + } else { + return 0; + } + return 1; } - return 1; #endif return 0; @@ -656,9 +661,9 @@ int gf_w32_cfm_init(gf_t *gf) static int gf_w32_shift_init(gf_t *gf) { - gf->inverse.w32 = gf_w32_euclid; - gf->multiply_region.w32 = gf_w32_multiply_region_from_single; - gf->multiply.w32 = gf_w32_shift_multiply; + SET_FUNCTION(gf,inverse,w32,gf_w32_euclid) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_multiply_region_from_single) + SET_FUNCTION(gf,multiply,w32,gf_w32_shift_multiply) return 1; } @@ -1380,32 +1385,34 @@ int gf_w32_bytwo_init(gf_t *gf) } if (h->mult_type == GF_MULT_BYTWO_p) { - gf->multiply.w32 = gf_w32_bytwo_p_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w32_bytwo_p_multiply) #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w32_bytwo_p_nosse_multiply_region; - else - gf->multiply_region.w32 = gf_w32_bytwo_p_sse_multiply_region; - #else - gf->multiply_region.w32 = gf_w32_bytwo_p_nosse_multiply_region; - if(h->region_type & GF_REGION_SIMD) - return 0; + if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) { + SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_p_sse_multiply_region) + } else { + #endif + SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_p_nosse_multiply_region) + if(h->region_type & GF_REGION_SIMD) + return 0; + #ifdef INTEL_SSE2 + } #endif } else { - gf->multiply.w32 = gf_w32_bytwo_b_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w32_bytwo_b_multiply) #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w32_bytwo_b_nosse_multiply_region; - else - gf->multiply_region.w32 = gf_w32_bytwo_b_sse_multiply_region; - #else - gf->multiply_region.w32 = gf_w32_bytwo_b_nosse_multiply_region; + if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) { + SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_b_sse_multiply_region) + } else { + #endif + SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_b_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; + #ifdef INTEL_SSE2 + } #endif } - gf->inverse.w32 = gf_w32_euclid; + SET_FUNCTION(gf,inverse,w32,gf_w32_euclid) return 1; } @@ -1755,11 +1762,11 @@ gf_w32_split_4_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint32_t gf_do_final_region_alignment(&rd); } +#ifdef INTEL_SSSE3 static void gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) { -#ifdef INTEL_SSSE3 gf_internal_t *h; int i, j, k; uint32_t pp, v, *s32, *d32, *top; @@ -1942,16 +1949,15 @@ gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des } gf_do_final_region_alignment(&rd); - -#endif } +#endif +#ifdef INTEL_SSSE3 static void gf_w32_split_4_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) { -#ifdef INTEL_SSSE3 gf_internal_t *h; int i, j, k; uint32_t pp, v, *s32, *d32, *top, tmp_table[16]; @@ -2216,9 +2222,8 @@ gf_w32_split_4_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint } } gf_do_final_region_alignment(&rd); - -#endif } +#endif static int gf_w32_split_init(gf_t *gf) @@ -2230,29 +2235,13 @@ int gf_w32_split_init(gf_t *gf) struct gf_split_8_32_lazy_data *d32; struct gf_split_16_32_lazy_data *d16; uint32_t p, basep; - int i, j, exp, ispclmul, issse3; - int isneon = 0; - -#if defined(INTEL_SSE4_PCLMUL) - ispclmul = 1; -#else - ispclmul = 0; -#endif - -#ifdef INTEL_SSSE3 - issse3 = 1; -#else - issse3 = 0; -#endif -#ifdef ARM_NEON - isneon = 1; -#endif + int i, j, exp; h = (gf_internal_t *) gf->scratch; /* Defaults */ - gf->inverse.w32 = gf_w32_euclid; + SET_FUNCTION(gf,inverse,w32,gf_w32_euclid) /* JSP: First handle single multiplication: If args == 8, then we're doing split 8 8. @@ -2261,17 +2250,19 @@ int gf_w32_split_init(gf_t *gf) */ if (h->arg1 == 8 && h->arg2 == 8) { - gf->multiply.w32 = gf_w32_split_8_8_multiply; - } else if (ispclmul) { + SET_FUNCTION(gf,multiply,w32,gf_w32_split_8_8_multiply) +#if defined(INTEL_SSE4_PCLMUL) + } else if (gf_cpu_supports_intel_pclmul) { if ((0xfffe0000 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w32_clm_multiply_2; + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_2) } else if ((0xffc00000 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w32_clm_multiply_3; + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_3) } else if ((0xfe000000 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w32_clm_multiply_4; + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_4) } +#endif } else { - gf->multiply.w32 = gf_w32_bytwo_p_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w32_bytwo_p_multiply) } /* Easy cases: 16/32 and 2/32 */ @@ -2279,7 +2270,7 @@ int gf_w32_split_init(gf_t *gf) if ((h->arg1 == 16 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 16)) { d16 = (struct gf_split_16_32_lazy_data *) h->private; d16->last_value = 0; - gf->multiply_region.w32 = gf_w32_split_16_32_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_16_32_lazy_multiply_region) return 1; } @@ -2287,33 +2278,39 @@ int gf_w32_split_init(gf_t *gf) ld2 = (struct gf_split_2_32_lazy_data *) h->private; ld2->last_value = 0; #ifdef INTEL_SSSE3 - if (!(h->region_type & GF_REGION_NOSIMD)) - gf->multiply_region.w32 = gf_w32_split_2_32_lazy_sse_multiply_region; - else - gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region; - #else - gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region; - if(h->region_type & GF_REGION_SIMD) return 0; + if (gf_cpu_supports_intel_ssse3 && !(h->region_type & GF_REGION_NOSIMD)) { + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_2_32_lazy_sse_multiply_region) + } else { + #endif + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_2_32_lazy_multiply_region) + if(h->region_type & GF_REGION_SIMD) return 0; + #ifdef INTEL_SSSE3 + } #endif return 1; } /* 4/32 or Default + SSE - There is no ALTMAP/NOSSE. */ + if ((h->arg1 == 4 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 4) || - ((issse3 || isneon) && h->mult_type == GF_REGION_DEFAULT)) { + ((gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon) && h->mult_type == GF_REGION_DEFAULT)) { ld4 = (struct gf_split_4_32_lazy_data *) h->private; ld4->last_value = 0; - if ((h->region_type & GF_REGION_NOSIMD) || !(issse3 || isneon)) { - gf->multiply_region.w32 = gf_w32_split_4_32_lazy_multiply_region; - } else if (isneon) { + if ((h->region_type & GF_REGION_NOSIMD) || !(gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon)) { + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_4_32_lazy_multiply_region) + } else if (gf_cpu_supports_arm_neon) { #ifdef ARM_NEON gf_w32_neon_split_init(gf); #endif } else if (h->region_type & GF_REGION_ALTMAP) { - gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_altmap_multiply_region; +#ifdef INTEL_SSSE3 + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_4_32_lazy_sse_altmap_multiply_region) +#endif } else { - gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_multiply_region; +#ifdef INTEL_SSSE3 + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_4_32_lazy_sse_multiply_region) +#endif } return 1; } @@ -2324,7 +2321,7 @@ int gf_w32_split_init(gf_t *gf) h->mult_type == GF_MULT_DEFAULT) { d32 = (struct gf_split_8_32_lazy_data *) h->private; d32->last_value = 0; - gf->multiply_region.w32 = gf_w32_split_8_32_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_8_32_lazy_multiply_region) return 1; } @@ -2333,8 +2330,8 @@ int gf_w32_split_init(gf_t *gf) if (h->arg1 == 8 && h->arg2 == 8) { d8 = (struct gf_w32_split_8_8_data *) h->private; d8->last_value = 0; - gf->multiply.w32 = gf_w32_split_8_8_multiply; - gf->multiply_region.w32 = gf_w32_split_8_32_lazy_multiply_region; + SET_FUNCTION(gf,multiply,w32,gf_w32_split_8_8_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_8_32_lazy_multiply_region) basep = 1; for (exp = 0; exp < 7; exp++) { for (j = 0; j < 256; j++) d8->tables[exp][0][j] = 0; @@ -2407,14 +2404,14 @@ int gf_w32_group_init(gf_t *gf) } if (g_s == g_r) { - gf->multiply.w32 = gf_w32_group_s_equals_r_multiply; - gf->multiply_region.w32 = gf_w32_group_s_equals_r_multiply_region; + SET_FUNCTION(gf,multiply,w32,gf_w32_group_s_equals_r_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_group_s_equals_r_multiply_region) } else { - gf->multiply.w32 = gf_w32_group_multiply; - gf->multiply_region.w32 = gf_w32_group_multiply_region; + SET_FUNCTION(gf,multiply,w32,gf_w32_group_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_group_multiply_region) } - gf->divide.w32 = NULL; - gf->inverse.w32 = gf_w32_euclid; + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,gf_w32_euclid) return 1; } @@ -2666,18 +2663,18 @@ int gf_w32_composite_init(gf_t *gf) cd->alog = gf_w16_get_mult_alog_table(h->base_gf); if (h->region_type & GF_REGION_ALTMAP) { - gf->multiply_region.w32 = gf_w32_composite_multiply_region_alt; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_composite_multiply_region_alt) } else { - gf->multiply_region.w32 = gf_w32_composite_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_composite_multiply_region) } if (cd->log == NULL) { - gf->multiply.w32 = gf_w32_composite_multiply_recursive; + SET_FUNCTION(gf,multiply,w32,gf_w32_composite_multiply_recursive) } else { - gf->multiply.w32 = gf_w32_composite_multiply_inline; + SET_FUNCTION(gf,multiply,w32,gf_w32_composite_multiply_inline) } - gf->divide.w32 = NULL; - gf->inverse.w32 = gf_w32_composite_inverse; + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,gf_w32_composite_inverse) return 1; } @@ -2686,16 +2683,6 @@ int gf_w32_composite_init(gf_t *gf) int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) { - int issse3 = 0; - int isneon = 0; - -#ifdef INTEL_SSSE3 - issse3 = 1; -#endif -#ifdef ARM_NEON - isneon = 1; -#endif - switch(mult_type) { case GF_MULT_BYTWO_p: @@ -2720,7 +2707,7 @@ int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg return sizeof(gf_internal_t) + sizeof(struct gf_split_2_32_lazy_data) + 64; } if ((arg1 == 8 && arg2 == 32) || (arg2 == 8 && arg1 == 32) || - (mult_type == GF_MULT_DEFAULT && !(issse3 || isneon))) { + (mult_type == GF_MULT_DEFAULT && !(gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon))) { return sizeof(gf_internal_t) + sizeof(struct gf_split_8_32_lazy_data) + 64; } if ((arg1 == 4 && arg2 == 32) || @@ -2776,10 +2763,10 @@ int gf_w32_init(gf_t *gf) if(h->mult_type != GF_MULT_COMPOSITE) h->prim_poly &= 0xffffffff; - gf->multiply.w32 = NULL; - gf->divide.w32 = NULL; - gf->inverse.w32 = NULL; - gf->multiply_region.w32 = NULL; + SET_FUNCTION(gf,multiply,w32,NULL) + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,NULL) + SET_FUNCTION(gf,multiply_region,w32,NULL) switch(h->mult_type) { case GF_MULT_CARRY_FREE: if (gf_w32_cfm_init(gf) == 0) return 0; break; @@ -2794,30 +2781,30 @@ int gf_w32_init(gf_t *gf) default: return 0; } if (h->divide_type == GF_DIVIDE_EUCLID) { - gf->divide.w32 = gf_w32_divide_from_inverse; - gf->inverse.w32 = gf_w32_euclid; + SET_FUNCTION(gf,divide,w32,gf_w32_divide_from_inverse) + SET_FUNCTION(gf,inverse,w32,gf_w32_euclid) } else if (h->divide_type == GF_DIVIDE_MATRIX) { - gf->divide.w32 = gf_w32_divide_from_inverse; - gf->inverse.w32 = gf_w32_matrix; + SET_FUNCTION(gf,divide,w32,gf_w32_divide_from_inverse) + SET_FUNCTION(gf,inverse,w32,gf_w32_matrix) } if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) { - gf->divide.w32 = gf_w32_divide_from_inverse; + SET_FUNCTION(gf,divide,w32,gf_w32_divide_from_inverse) } if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) { - gf->inverse.w32 = gf_w32_inverse_from_divide; + SET_FUNCTION(gf,inverse,w32,gf_w32_inverse_from_divide) } if (h->region_type == GF_REGION_CAUCHY) { - gf->extract_word.w32 = gf_wgen_extract_word; - gf->multiply_region.w32 = gf_wgen_cauchy_region; + SET_FUNCTION(gf,extract_word,w32,gf_wgen_extract_word) + SET_FUNCTION(gf,multiply_region,w32,gf_wgen_cauchy_region) } else if (h->region_type & GF_REGION_ALTMAP) { if (h->mult_type == GF_MULT_COMPOSITE) { - gf->extract_word.w32 = gf_w32_composite_extract_word; + SET_FUNCTION(gf,extract_word,w32,gf_w32_composite_extract_word) } else { - gf->extract_word.w32 = gf_w32_split_extract_word; + SET_FUNCTION(gf,extract_word,w32,gf_w32_split_extract_word) } } else { - gf->extract_word.w32 = gf_w32_extract_word; + SET_FUNCTION(gf,extract_word,w32,gf_w32_extract_word) } return 1; } |