summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJanne Grunau <j@jannau.net>2014-09-17 16:10:25 +0200
committerJanne Grunau <j@jannau.net>2014-10-24 14:53:57 +0200
commit474010a91d35fef5ca7dea77205b6a5c7e68c3e9 (patch)
tree456270652db9787d9bb013f896cb507475480a72 /include
parentbec15359de5273d06673c43b8e73c70f97396041 (diff)
downloadgf-complete-474010a91d35fef5ca7dea77205b6a5c7e68c3e9.tar.gz
arm: NEON optimisations for gf_w16
Optimisations for the 4,16 split table region multiplications. Selected time_tool.sh 16 -A -B results for a 1.7 GHz cortex-a9: Region Best (MB/s): 532.14 W-Method: 16 -m SPLIT 16 4 -r SIMD - Region Best (MB/s): 212.34 W-Method: 16 -m SPLIT 16 4 -r NOSIMD - Region Best (MB/s): 801.36 W-Method: 16 -m SPLIT 16 4 -r SIMD -r ALTMAP - Region Best (MB/s): 93.20 W-Method: 16 -m SPLIT 16 4 -r NOSIMD -r ALTMAP - Region Best (MB/s): 273.99 W-Method: 16 -m SPLIT 16 8 - Region Best (MB/s): 270.81 W-Method: 16 -m SPLIT 8 8 - Region Best (MB/s): 70.42 W-Method: 16 -m COMPOSITE 2 - - Region Best (MB/s): 393.54 W-Method: 16 -m COMPOSITE 2 - -r ALTMAP -
Diffstat (limited to 'include')
-rw-r--r--include/gf_w16.h66
1 files changed, 66 insertions, 0 deletions
diff --git a/include/gf_w16.h b/include/gf_w16.h
new file mode 100644
index 0000000..fb4c0e9
--- /dev/null
+++ b/include/gf_w16.h
@@ -0,0 +1,66 @@
+/*
+ * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
+ * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
+ * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
+ *
+ * gf_w16.h
+ *
+ * Defines and data structures for 16-bit Galois fields
+ */
+
+#ifndef GF_COMPLETE_GF_W16_H
+#define GF_COMPLETE_GF_W16_H
+
+#include <stdint.h>
+
+#define GF_FIELD_WIDTH (16)
+#define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH)
+#define GF_MULT_GROUP_SIZE GF_FIELD_SIZE-1
+
+#define GF_BASE_FIELD_WIDTH (8)
+#define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH)
+
+struct gf_w16_logtable_data {
+ uint16_t log_tbl[GF_FIELD_SIZE];
+ uint16_t antilog_tbl[GF_FIELD_SIZE * 2];
+ uint16_t inv_tbl[GF_FIELD_SIZE];
+ uint16_t *d_antilog;
+};
+
+struct gf_w16_zero_logtable_data {
+ int log_tbl[GF_FIELD_SIZE];
+ uint16_t _antilog_tbl[GF_FIELD_SIZE * 4];
+ uint16_t *antilog_tbl;
+ uint16_t inv_tbl[GF_FIELD_SIZE];
+};
+
+struct gf_w16_lazytable_data {
+ uint16_t log_tbl[GF_FIELD_SIZE];
+ uint16_t antilog_tbl[GF_FIELD_SIZE * 2];
+ uint16_t inv_tbl[GF_FIELD_SIZE];
+ uint16_t *d_antilog;
+ uint16_t lazytable[GF_FIELD_SIZE];
+};
+
+struct gf_w16_bytwo_data {
+ uint64_t prim_poly;
+ uint64_t mask1;
+ uint64_t mask2;
+};
+
+struct gf_w16_split_8_8_data {
+ uint16_t tables[3][256][256];
+};
+
+struct gf_w16_group_4_4_data {
+ uint16_t reduce[16];
+ uint16_t shift[16];
+};
+
+struct gf_w16_composite_data {
+ uint8_t *mult_table;
+};
+
+void gf_w16_neon_split_init(gf_t *gf);
+
+#endif /* GF_COMPLETE_GF_W16_H */