diff options
author | Janne Grunau <j@jannau.net> | 2014-09-17 16:10:25 +0200 |
---|---|---|
committer | Janne Grunau <j@jannau.net> | 2014-10-24 14:53:57 +0200 |
commit | 474010a91d35fef5ca7dea77205b6a5c7e68c3e9 (patch) | |
tree | 456270652db9787d9bb013f896cb507475480a72 /include | |
parent | bec15359de5273d06673c43b8e73c70f97396041 (diff) | |
download | gf-complete-474010a91d35fef5ca7dea77205b6a5c7e68c3e9.tar.gz |
arm: NEON optimisations for gf_w16
Optimisations for the 4,16 split table region multiplications.
Selected time_tool.sh 16 -A -B results for a 1.7 GHz cortex-a9:
Region Best (MB/s): 532.14 W-Method: 16 -m SPLIT 16 4 -r SIMD -
Region Best (MB/s): 212.34 W-Method: 16 -m SPLIT 16 4 -r NOSIMD -
Region Best (MB/s): 801.36 W-Method: 16 -m SPLIT 16 4 -r SIMD -r ALTMAP -
Region Best (MB/s): 93.20 W-Method: 16 -m SPLIT 16 4 -r NOSIMD -r ALTMAP -
Region Best (MB/s): 273.99 W-Method: 16 -m SPLIT 16 8 -
Region Best (MB/s): 270.81 W-Method: 16 -m SPLIT 8 8 -
Region Best (MB/s): 70.42 W-Method: 16 -m COMPOSITE 2 - -
Region Best (MB/s): 393.54 W-Method: 16 -m COMPOSITE 2 - -r ALTMAP -
Diffstat (limited to 'include')
-rw-r--r-- | include/gf_w16.h | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/include/gf_w16.h b/include/gf_w16.h new file mode 100644 index 0000000..fb4c0e9 --- /dev/null +++ b/include/gf_w16.h @@ -0,0 +1,66 @@ +/* + * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic + * James S. Plank, Ethan L. Miller, Kevin M. Greenan, + * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. + * + * gf_w16.h + * + * Defines and data structures for 16-bit Galois fields + */ + +#ifndef GF_COMPLETE_GF_W16_H +#define GF_COMPLETE_GF_W16_H + +#include <stdint.h> + +#define GF_FIELD_WIDTH (16) +#define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH) +#define GF_MULT_GROUP_SIZE GF_FIELD_SIZE-1 + +#define GF_BASE_FIELD_WIDTH (8) +#define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH) + +struct gf_w16_logtable_data { + uint16_t log_tbl[GF_FIELD_SIZE]; + uint16_t antilog_tbl[GF_FIELD_SIZE * 2]; + uint16_t inv_tbl[GF_FIELD_SIZE]; + uint16_t *d_antilog; +}; + +struct gf_w16_zero_logtable_data { + int log_tbl[GF_FIELD_SIZE]; + uint16_t _antilog_tbl[GF_FIELD_SIZE * 4]; + uint16_t *antilog_tbl; + uint16_t inv_tbl[GF_FIELD_SIZE]; +}; + +struct gf_w16_lazytable_data { + uint16_t log_tbl[GF_FIELD_SIZE]; + uint16_t antilog_tbl[GF_FIELD_SIZE * 2]; + uint16_t inv_tbl[GF_FIELD_SIZE]; + uint16_t *d_antilog; + uint16_t lazytable[GF_FIELD_SIZE]; +}; + +struct gf_w16_bytwo_data { + uint64_t prim_poly; + uint64_t mask1; + uint64_t mask2; +}; + +struct gf_w16_split_8_8_data { + uint16_t tables[3][256][256]; +}; + +struct gf_w16_group_4_4_data { + uint16_t reduce[16]; + uint16_t shift[16]; +}; + +struct gf_w16_composite_data { + uint8_t *mult_table; +}; + +void gf_w16_neon_split_init(gf_t *gf); + +#endif /* GF_COMPLETE_GF_W16_H */ |