summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Greenan <kmgreen2@gmail.com>2014-04-09 10:39:47 -0700
committerKevin Greenan <kmgreen2@gmail.com>2014-04-09 10:39:47 -0700
commit522670550c3bafb622e0ea54817de45468e2ab8d (patch)
treea603d25dec061e9d31168f8cf582fe5fe76090fe
parent89a983239a42de475327383d2e75ff2d6384028c (diff)
parent3ab94b1234df2d9f8c0616536bed370e897ff236 (diff)
downloadgf-complete-522670550c3bafb622e0ea54817de45468e2ab8d.tar.gz
Merged in dachary/gf-complete/wip-compilation-warnings-v1 (pull request #9)
backport compilation warnings to v1
-rw-r--r--examples/gf_example_2.c4
-rw-r--r--examples/gf_example_5.c1
-rw-r--r--examples/gf_example_6.c1
-rw-r--r--examples/gf_example_7.c1
-rw-r--r--src/gf.c43
-rw-r--r--src/gf_general.c7
-rw-r--r--src/gf_general.h61
-rw-r--r--src/gf_int.h200
-rw-r--r--src/gf_method.c3
-rw-r--r--src/gf_rand.h22
-rw-r--r--src/gf_w128.c61
-rw-r--r--src/gf_w16.c76
-rw-r--r--src/gf_w32.c89
-rw-r--r--src/gf_w4.c203
-rw-r--r--src/gf_w64.c90
-rw-r--r--src/gf_w8.c100
-rw-r--r--src/gf_wgen.c13
-rw-r--r--test/gf_unit.c20
-rw-r--r--tools/gf_add.c2
-rw-r--r--tools/gf_inline_time.c5
-rw-r--r--tools/gf_methods.c2
-rw-r--r--tools/gf_poly.c6
-rw-r--r--tools/gf_time.c15
23 files changed, 303 insertions, 722 deletions
diff --git a/examples/gf_example_2.c b/examples/gf_example_2.c
index e98774a..576d9a5 100644
--- a/examples/gf_example_2.c
+++ b/examples/gf_example_2.c
@@ -28,8 +28,8 @@ int main(int argc, char **argv)
{
uint32_t a, b, c;
uint8_t *r1, *r2;
- uint16_t *r16;
- uint32_t *r32;
+ uint16_t *r16 = NULL;
+ uint32_t *r32 = NULL;
int w, i;
gf_t gf;
diff --git a/examples/gf_example_5.c b/examples/gf_example_5.c
index 8e7dd4e..da6e9ca 100644
--- a/examples/gf_example_5.c
+++ b/examples/gf_example_5.c
@@ -74,4 +74,5 @@ int main(int argc, char **argv)
gf.extract_word.w32(&gf, a, 30*2, i+15),
gf.extract_word.w32(&gf, b, 30*2, i+15));
}
+ return 0;
}
diff --git a/examples/gf_example_6.c b/examples/gf_example_6.c
index 54cdf83..800a35f 100644
--- a/examples/gf_example_6.c
+++ b/examples/gf_example_6.c
@@ -80,4 +80,5 @@ int main(int argc, char **argv)
gf.extract_word.w32(&gf, a, 30*4, i+15),
gf.extract_word.w32(&gf, b, 30*4, i+15));
}
+ return 0;
}
diff --git a/examples/gf_example_7.c b/examples/gf_example_7.c
index cd5c44b..ee07d53 100644
--- a/examples/gf_example_7.c
+++ b/examples/gf_example_7.c
@@ -71,4 +71,5 @@ int main(int argc, char **argv)
gf.extract_word.w32(&gf, a, 3, i),
gf.extract_word.w32(&gf, b, 3, i));
}
+ return 0;
}
diff --git a/src/gf.c b/src/gf.c
index a443f17..701739b 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -179,13 +179,11 @@ uint64_t gf_composite_get_default_poly(gf_t *base)
int gf_error_check(int w, int mult_type, int region_type, int divide_type,
int arg1, int arg2, uint64_t poly, gf_t *base)
{
- int sse4 = 0;
int sse3 = 0;
int sse2 = 0;
int pclmul = 0;
int rdouble, rquad, rlazy, rsse, rnosse, raltmap, rcauchy, tmp;
- uint64_t pp;
- gf_internal_t *sub, *subsub, *subsubsub;
+ gf_internal_t *sub;
rdouble = (region_type & GF_REGION_DOUBLE_TABLE);
rquad = (region_type & GF_REGION_QUAD_TABLE);
@@ -214,10 +212,6 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type,
sse3 = 1;
#endif
-#ifdef INTEL_SSE4
- sse4 = 1;
-#endif
-
#ifdef INTEL_SSE4_PCLMUL
pclmul = 1;
#endif
@@ -488,7 +482,7 @@ int gf_init_hard(gf_t *gf, int w, int mult_type,
h->arg2 = arg2;
h->base_gf = base_gf;
h->private = (void *) gf->scratch;
- h->private += (sizeof(gf_internal_t));
+ h->private = (char*)h->private + (sizeof(gf_internal_t));
gf->extract_word.w32 = NULL;
switch(w) {
@@ -525,7 +519,7 @@ void gf_alignment_error(char *s, int a)
static
void gf_invert_binary_matrix(uint32_t *mat, uint32_t *inv, int rows) {
- int cols, i, j, k;
+ int cols, i, j;
uint32_t tmp;
cols = rows;
@@ -594,7 +588,7 @@ uint32_t gf_bitmatrix_inverse(uint32_t y, int w, uint32_t pp)
void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base)
{
uint64_t a, prod;
- int j, xor;
+ int xor;
uint64_t *s64, *d64, *top;
s64 = rd->s_start;
@@ -693,8 +687,8 @@ static void gf_slow_multiply_region(gf_region_data *rd, void *src, void *dest, v
fprintf(stderr, "Error: gf_slow_multiply_region: w=%d not implemented.\n", h->w);
exit(1);
}
- src += wb;
- dest += wb;
+ src = (char*)src + wb;
+ dest = (char*)dest + wb;
}
}
@@ -773,8 +767,7 @@ void gf_set_region_data(gf_region_data *rd,
int xor,
int align)
{
- uint8_t *s8, *d8;
- gf_internal_t *h;
+ gf_internal_t *h = NULL;
int wb;
uint32_t a;
unsigned long uls, uld;
@@ -802,7 +795,7 @@ void gf_set_region_data(gf_region_data *rd,
if (align == -1) { /* JSP: This is cauchy. Error check bytes, then set up the pointers
so that there are no alignment regions. */
- if (bytes % h->w != 0) {
+ if (h != NULL && bytes % h->w != 0) {
fprintf(stderr, "Error in region multiply operation.\n");
fprintf(stderr, "The size must be a multiple of %d bytes.\n", h->w);
exit(1);
@@ -810,8 +803,8 @@ void gf_set_region_data(gf_region_data *rd,
rd->s_start = src;
rd->d_start = dest;
- rd->s_top = src + bytes;
- rd->d_top = src + bytes;
+ rd->s_top = (char*)src + bytes;
+ rd->d_top = (char*)src + bytes;
return;
}
@@ -840,12 +833,12 @@ void gf_set_region_data(gf_region_data *rd,
uls %= a;
if (uls != 0) uls = (a-uls);
- rd->s_start = rd->src + uls;
- rd->d_start = rd->dest + uls;
+ rd->s_start = (char*)rd->src + uls;
+ rd->d_start = (char*)rd->dest + uls;
bytes -= uls;
bytes -= (bytes % align);
- rd->s_top = rd->s_start + bytes;
- rd->d_top = rd->d_start + bytes;
+ rd->s_top = (char*)rd->s_start + bytes;
+ rd->d_top = (char*)rd->d_start + bytes;
}
@@ -856,7 +849,7 @@ void gf_do_initial_region_alignment(gf_region_data *rd)
void gf_do_final_region_alignment(gf_region_data *rd)
{
- gf_slow_multiply_region(rd, rd->s_top, rd->d_top, rd->src+rd->bytes);
+ gf_slow_multiply_region(rd, rd->s_top, rd->d_top, (char*)rd->src+rd->bytes);
}
void gf_multby_zero(void *dest, int bytes, int xor)
@@ -897,9 +890,8 @@ void gf_multby_one(void *src, void *dest, int bytes, int xor)
__m128i ms, md;
#endif
unsigned long uls, uld;
- uint8_t *s8, *d8, *dtop8;
+ uint8_t *s8, *d8;
uint64_t *s64, *d64, *dtop64;
- int abytes;
gf_region_data rd;
if (!xor) {
@@ -910,6 +902,7 @@ void gf_multby_one(void *src, void *dest, int bytes, int xor)
uld = (unsigned long) dest;
#ifdef INTEL_SSE2
+ int abytes;
s8 = (uint8_t *) src;
d8 = (uint8_t *) dest;
if (uls % 16 == uld % 16) {
@@ -1025,7 +1018,7 @@ static void gf_unaligned_xor(void *src, void *dest, int bytes)
}
d8 = (uint8_t *) d64;
- while (d8 < (uint8_t *) (dest+bytes)) {
+ while (d8 < (uint8_t *) ((char*)dest+bytes)) {
*d8 ^= *s8;
d8++;
s8++;
diff --git a/src/gf_general.c b/src/gf_general.c
index d9d1700..c410598 100644
--- a/src/gf_general.c
+++ b/src/gf_general.c
@@ -267,7 +267,6 @@ void gf_general_do_region_check(gf_t *gf, gf_general_t *a, void *orig_a, void *o
int w, words, i;
gf_general_t oa, ot, ft, sb;
char sa[50], soa[50], sot[50], sft[50], ssb[50];
- uint8_t *p;
h = (gf_internal_t *) gf->scratch;
w = h->w;
@@ -327,7 +326,7 @@ void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
uint64_t *r64;
int i;
- top = rb+size;
+ top = (char*)rb+size;
/* If w is 8, 16, 32, 64 or 128, fill the regions with random bytes.
However, don't allow for zeros in rb, because that will screw up
@@ -366,7 +365,7 @@ void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
r64[1] = g.w128[1];
break;
}
- rb += (w/8);
+ rb = (char*)rb + (w/8);
}
} else if (w == 4) {
r8a = (uint8_t *) ra;
@@ -408,7 +407,7 @@ int gf_general_do_single_timing_test(gf_t *gf, void *ra, void *rb, int size, cha
h = (gf_internal_t *) gf->scratch;
w = h->w;
- top = ra + size;
+ top = (char*)ra + size;
if (w == 8 || w == 4) {
r8a = (uint8_t *) ra;
diff --git a/src/gf_general.h b/src/gf_general.h
deleted file mode 100644
index 9a5de52..0000000
--- a/src/gf_general.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
- * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
- * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
- *
- * gf_general.h
- *
- * This file has helper routines for doing basic GF operations with any
- * legal value of w. The problem is that w <= 32, w=64 and w=128 all have
- * different data types, which is a pain. The procedures in this file try
- * to alleviate that pain. They are used in gf_unit and gf_time.
- */
-
-#pragma once
-
-#include <stdio.h>
-#include <getopt.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdlib.h>
-#include <time.h>
-
-#include "gf_complete.h"
-
-typedef union {
- uint32_t w32;
- uint64_t w64;
- uint64_t w128[2];
-} gf_general_t;
-
-void gf_general_set_zero(gf_general_t *v, int w);
-void gf_general_set_one(gf_general_t *v, int w);
-void gf_general_set_two(gf_general_t *v, int w);
-
-int gf_general_is_zero(gf_general_t *v, int w);
-int gf_general_is_one(gf_general_t *v, int w);
-int gf_general_are_equal(gf_general_t *v1, gf_general_t *v2, int w);
-
-void gf_general_val_to_s(gf_general_t *v, int w, char *s, int hex);
-int gf_general_s_to_val(gf_general_t *v, int w, char *s, int hex);
-
-void gf_general_set_random(gf_general_t *v, int w, int zero_ok);
-
-void gf_general_add(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
-void gf_general_multiply(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
-void gf_general_divide(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
-void gf_general_inverse(gf_t *gf, gf_general_t *a, gf_general_t *b);
-
-void gf_general_do_region_multiply(gf_t *gf, gf_general_t *a,
- void *ra, void *rb,
- int bytes, int xor);
-
-void gf_general_do_region_check(gf_t *gf, gf_general_t *a,
- void *orig_a, void *orig_target, void *final_target,
- int bytes, int xor);
-
-
-/* Which is M, D or I for multiply, divide or inverse. */
-
-void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size);
-int gf_general_do_single_timing_test(gf_t *gf, void *ra, void *rb, int size, char which);
diff --git a/src/gf_int.h b/src/gf_int.h
deleted file mode 100644
index 9221569..0000000
--- a/src/gf_int.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
- * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
- * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
- *
- * gf_int.h
- *
- * Internal code for Galois field routines. This is not meant for
- * users to include, but for the internal GF files to use.
- */
-
-#pragma once
-
-#include "gf_complete.h"
-
-#include <string.h>
-
-extern void timer_start (double *t);
-extern double timer_split (const double *t);
-extern void galois_fill_random (void *buf, int len, unsigned int seed);
-
-typedef struct {
- int mult_type;
- int region_type;
- int divide_type;
- int w;
- uint64_t prim_poly;
- int free_me;
- int arg1;
- int arg2;
- gf_t *base_gf;
- void *private;
-} gf_internal_t;
-
-extern int gf_w4_init (gf_t *gf);
-extern int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
-
-extern int gf_w8_init (gf_t *gf);
-extern int gf_w8_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
-
-extern int gf_w16_init (gf_t *gf);
-extern int gf_w16_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
-
-extern int gf_w32_init (gf_t *gf);
-extern int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
-
-extern int gf_w64_init (gf_t *gf);
-extern int gf_w64_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
-
-extern int gf_w128_init (gf_t *gf);
-extern int gf_w128_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
-
-extern int gf_wgen_init (gf_t *gf);
-extern int gf_wgen_scratch_size(int w, int mult_type, int region_type, int divide_type, int arg1, int arg2);
-
-void gf_wgen_cauchy_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor);
-gf_val_32_t gf_wgen_extract_word(gf_t *gf, void *start, int bytes, int index);
-
-extern void gf_alignment_error(char *s, int a);
-
-extern uint32_t gf_bitmatrix_inverse(uint32_t y, int w, uint32_t pp);
-
-/* This returns the correct default for prim_poly when base is used as the base
- field for COMPOSITE. It returns 0 if we don't have a default prim_poly. */
-
-extern uint64_t gf_composite_get_default_poly(gf_t *base);
-
-/* This structure lets you define a region multiply. It helps because you can handle
- unaligned portions of the data with the procedures below, which really cleans
- up the code. */
-
-typedef struct {
- gf_t *gf;
- void *src;
- void *dest;
- int bytes;
- uint64_t val;
- int xor;
- int align; /* The number of bytes to which to align. */
- void *s_start; /* The start and the top of the aligned region. */
- void *d_start;
- void *s_top;
- void *d_top;
-} gf_region_data;
-
-/* This lets you set up one of these in one call. It also sets the start/top pointers. */
-
-void gf_set_region_data(gf_region_data *rd,
- gf_t *gf,
- void *src,
- void *dest,
- int bytes,
- uint64_t val,
- int xor,
- int align);
-
-/* This performs gf->multiply.32() on all of the unaligned bytes in the beginning of the region */
-
-extern void gf_do_initial_region_alignment(gf_region_data *rd);
-
-/* This performs gf->multiply.32() on all of the unaligned bytes in the end of the region */
-
-extern void gf_do_final_region_alignment(gf_region_data *rd);
-
-extern void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base);
-
-extern void gf_multby_zero(void *dest, int bytes, int xor);
-extern void gf_multby_one(void *src, void *dest, int bytes, int xor);
-
-typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */
- GF_E_MDEFREG, /* Reg != Default && Mult == Default */
- GF_E_MDEFARG, /* Args != Default && Mult == Default */
- GF_E_DIVCOMP, /* Mult == Composite && Div != Default */
- GF_E_CAUCOMP, /* Mult == Composite && Reg == CAUCHY */
- GF_E_DOUQUAD, /* Reg == DOUBLE && Reg == QUAD */
- GF_E_SSE__NO, /* Reg == SSE && Reg == NOSSE */
- GF_E_CAUCHYB, /* Reg == CAUCHY && Other Reg */
- GF_E_CAUGT32, /* Reg == CAUCHY && w > 32*/
- GF_E_ARG1SET, /* Arg1 != 0 && Mult \notin COMPOSITE/SPLIT/GROUP */
- GF_E_ARG2SET, /* Arg2 != 0 && Mult \notin SPLIT/GROUP */
- GF_E_MATRIXW, /* Div == MATRIX && w > 32 */
- GF_E_BAD___W, /* Illegal w */
- GF_E_DOUBLET, /* Reg == DOUBLE && Mult != TABLE */
- GF_E_DOUBLEW, /* Reg == DOUBLE && w \notin {4,8} */
- GF_E_DOUBLEJ, /* Reg == DOUBLE && other Reg */
- GF_E_DOUBLEL, /* Reg == DOUBLE & LAZY but w = 4 */
- GF_E_QUAD__T, /* Reg == QUAD && Mult != TABLE */
- GF_E_QUAD__W, /* Reg == QUAD && w != 4 */
- GF_E_QUAD__J, /* Reg == QUAD && other Reg */
- GF_E_LAZY__X, /* Reg == LAZY && not DOUBLE or QUAD*/
- GF_E_ALTSHIF, /* Mult == Shift && Reg == ALTMAP */
- GF_E_SSESHIF, /* Mult == Shift && Reg == SSE|NOSSE */
- GF_E_ALT_CFM, /* Mult == CARRY_FREE && Reg == ALTMAP */
- GF_E_SSE_CFM, /* Mult == CARRY_FREE && Reg == SSE|NOSSE */
- GF_E_PCLMULX, /* Mult == Carry_Free && No PCLMUL */
- GF_E_ALT_BY2, /* Mult == Bytwo_x && Reg == ALTMAP */
- GF_E_BY2_SSE, /* Mult == Bytwo_x && Reg == SSE && No SSE2 */
- GF_E_LOGBADW, /* Mult == LOGx, w too big*/
- GF_E_LOG___J, /* Mult == LOGx, && Reg == SSE|ALTMAP|NOSSE */
- GF_E_ZERBADW, /* Mult == LOG_ZERO, w \notin {8,16} */
- GF_E_ZEXBADW, /* Mult == LOG_ZERO_EXT, w != 8 */
- GF_E_LOGPOLY, /* Mult == LOG & poly not primitive */
- GF_E_GR_ARGX, /* Mult == GROUP, Bad arg1/2 */
- GF_E_GR_W_48, /* Mult == GROUP, w \in { 4, 8 } */
- GF_E_GR_W_16, /* Mult == GROUP, w == 16, arg1 != 4 || arg2 != 4 */
- GF_E_GR_128A, /* Mult == GROUP, w == 128, bad args */
- GF_E_GR_A_27, /* Mult == GROUP, either arg > 27 */
- GF_E_GR_AR_W, /* Mult == GROUP, either arg > w */
- GF_E_GR____J, /* Mult == GROUP, Reg == SSE|ALTMAP|NOSSE */
- GF_E_TABLE_W, /* Mult == TABLE, w too big */
- GF_E_TAB_SSE, /* Mult == TABLE, SSE|NOSSE only apply to w == 4 */
- GF_E_TABSSE3, /* Mult == TABLE, Need SSSE3 for SSE */
- GF_E_TAB_ALT, /* Mult == TABLE, Reg == ALTMAP */
- GF_E_SP128AR, /* Mult == SPLIT, w=128, Bad arg1/arg2 */
- GF_E_SP128AL, /* Mult == SPLIT, w=128, SSE requires ALTMAP */
- GF_E_SP128AS, /* Mult == SPLIT, w=128, ALTMAP requires SSE */
- GF_E_SP128_A, /* Mult == SPLIT, w=128, SSE only with 4/128 */
- GF_E_SP128_S, /* Mult == SPLIT, w=128, ALTMAP only with 4/128 */
- GF_E_SPLIT_W, /* Mult == SPLIT, Bad w (8, 16, 32, 64, 128) */
- GF_E_SP_16AR, /* Mult == SPLIT, w=16, Bad arg1/arg2 */
- GF_E_SP_16_A, /* Mult == SPLIT, w=16, ALTMAP only with 4/16 */
- GF_E_SP_16_S, /* Mult == SPLIT, w=16, SSE only with 4/16 */
- GF_E_SP_32AR, /* Mult == SPLIT, w=32, Bad arg1/arg2 */
- GF_E_SP_32AS, /* Mult == SPLIT, w=32, ALTMAP requires SSE */
- GF_E_SP_32_A, /* Mult == SPLIT, w=32, ALTMAP only with 4/32 */
- GF_E_SP_32_S, /* Mult == SPLIT, w=32, SSE only with 4/32 */
- GF_E_SP_64AR, /* Mult == SPLIT, w=64, Bad arg1/arg2 */
- GF_E_SP_64AS, /* Mult == SPLIT, w=64, ALTMAP requires SSE */
- GF_E_SP_64_A, /* Mult == SPLIT, w=64, ALTMAP only with 4/64 */
- GF_E_SP_64_S, /* Mult == SPLIT, w=64, SSE only with 4/64 */
- GF_E_SP_8_AR, /* Mult == SPLIT, w=8, Bad arg1/arg2 */
- GF_E_SP_8__A, /* Mult == SPLIT, w=8, no ALTMAP */
- GF_E_SP_SSE3, /* Mult == SPLIT, Need SSSE3 for SSE */
- GF_E_COMP_A2, /* Mult == COMP, arg1 must be = 2 */
- GF_E_COMP_SS, /* Mult == COMP, SSE|NOSSE */
- GF_E_COMP__W, /* Mult == COMP, Bad w. */
- GF_E_UNKFLAG, /* Unknown flag in create_from.... */
- GF_E_UNKNOWN, /* Unknown mult_type. */
- GF_E_UNK_REG, /* Unknown region_type. */
- GF_E_UNK_DIV, /* Unknown divide_type. */
- GF_E_CFM___W, /* Mult == CFM, Bad w. */
- GF_E_CFM4POL, /* Mult == CFM & Prim Poly has high bits set. */
- GF_E_CFM8POL, /* Mult == CFM & Prim Poly has high bits set. */
- GF_E_CF16POL, /* Mult == CFM & Prim Poly has high bits set. */
- GF_E_CF32POL, /* Mult == CFM & Prim Poly has high bits set. */
- GF_E_CF64POL, /* Mult == CFM & Prim Poly has high bits set. */
- GF_E_FEWARGS, /* Too few args in argc/argv. */
- GF_E_BADPOLY, /* Bad primitive polynomial -- too many bits set. */
- GF_E_COMP_PP, /* Bad primitive polynomial -- bigger than sub-field. */
- GF_E_COMPXPP, /* Can't derive a default pp for composite field. */
- GF_E_BASE__W, /* Composite -- Base field is the wrong size. */
- GF_E_TWOMULT, /* In create_from... two -m's. */
- GF_E_TWO_DIV, /* In create_from... two -d's. */
- GF_E_POLYSPC, /* Bad numbera after -p. */
- GF_E_SPLITAR, /* Ran out of arguments in SPLIT */
- GF_E_SPLITNU, /* Arguments not integers in SPLIT. */
- GF_E_GROUPAR, /* Ran out of arguments in GROUP */
- GF_E_GROUPNU, /* Arguments not integers in GROUP. */
- GF_E_DEFAULT } gf_error_type_t;
-
diff --git a/src/gf_method.c b/src/gf_method.c
index 36ec3c4..a7bcacf 100644
--- a/src/gf_method.c
+++ b/src/gf_method.c
@@ -21,10 +21,9 @@
int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting)
{
int mult_type, divide_type, region_type;
- int arg1, arg2, subrg_size;
+ int arg1, arg2;
uint64_t prim_poly;
gf_t *base;
- char *crt, *x, *y;
mult_type = GF_MULT_DEFAULT;
region_type = GF_REGION_DEFAULT;
diff --git a/src/gf_rand.h b/src/gf_rand.h
deleted file mode 100644
index 24294ad..0000000
--- a/src/gf_rand.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
- * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
- * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
- *
- * gf_rand.h
- *
- * Random number generation, using the "Mother of All" random number generator. */
-
-#pragma once
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-/* These are all pretty self-explanatory */
-uint32_t MOA_Random_32();
-uint64_t MOA_Random_64();
-void MOA_Random_128(uint64_t *x);
-uint32_t MOA_Random_W(int w, int zero_ok);
-void MOA_Fill_Random_Region (void *reg, int size); /* reg should be aligned to 4 bytes, but
- size can be anything. */
-void MOA_Seed(uint32_t seed);
diff --git a/src/gf_w128.c b/src/gf_w128.c
index fae9f5c..881df00 100644
--- a/src/gf_w128.c
+++ b/src/gf_w128.c
@@ -81,6 +81,7 @@ int xor)
}
}
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w128_clm_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes,
@@ -89,9 +90,7 @@ int xor)
int i;
gf_val_128_t s128;
gf_val_128_t d128;
- uint64_t c128[2];
gf_region_data rd;
-#if defined(INTEL_SSE4_PCLMUL)
__m128i a,b;
__m128i result0,result1;
__m128i prim_poly;
@@ -106,8 +105,6 @@ int xor)
if (val[1] == 1) { gf_multby_one(src, dest, bytes, xor); return; }
}
- set_zero(c128, 0);
-
s128 = (gf_val_128_t) src;
d128 = (gf_val_128_t) dest;
@@ -184,8 +181,8 @@ int xor)
d128[i+1] = (uint64_t)_mm_extract_epi64(result1,0);
}
}
-#endif
}
+#endif
/*
* Some w128 notes:
@@ -384,7 +381,7 @@ gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_
{
#if defined(INTEL_SSE4)
int i;
- __m128i a, b, pp, one, prod, amask, l_middle_one, u_middle_one;
+ __m128i a, b, pp, prod, amask, u_middle_one;
/*John: pmask is always the highest bit set, and the rest zeros. amask changes, it's a countdown.*/
uint32_t topbit, middlebit, pmask; /* this is used as a boolean value */
gf_internal_t *h;
@@ -400,7 +397,6 @@ gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_
pmask = 0x80000000;
amask = _mm_insert_epi32(prod, 0x80000000, 0x3);
u_middle_one = _mm_insert_epi32(prod, 1, 0x2);
- l_middle_one = _mm_insert_epi32(prod, 1 << 31, 0x1);
for (i = 0; i < 64; i++) {
topbit = (_mm_extract_epi32(prod, 0x3) & pmask);
@@ -599,13 +595,13 @@ gf_w128_split_4_128_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_
}
}
+#ifdef INTEL_SSSE3
static
void
gf_w128_split_4_128_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
gf_internal_t *h;
- int i, m, j, k, tindex;
+ int i, j, k;
uint64_t pp, v[2], s, *s64, *d64, *top;
__m128i p, tables[32][16];
struct gf_w128_split_4_128_data *ld;
@@ -624,7 +620,7 @@ gf_w128_split_4_128_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_
/* Doing this instead of gf_do_initial_region_alignment() because that doesn't hold 128-bit vals */
- gf_w128_multiply_region_from_single(gf, src, dest, val, (rd.s_start-src), xor);
+ gf_w128_multiply_region_from_single(gf, src, dest, val, ((char*)rd.s_start-(char*)src), xor);
s64 = (uint64_t *) rd.s_start;
d64 = (uint64_t *) rd.d_start;
@@ -694,18 +690,18 @@ gf_w128_split_4_128_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_
/* Doing this instead of gf_do_final_region_alignment() because that doesn't hold 128-bit vals */
- gf_w128_multiply_region_from_single(gf, rd.s_top, rd.d_top, val, (src+bytes)-rd.s_top, xor);
-#endif
+ gf_w128_multiply_region_from_single(gf, rd.s_top, rd.d_top, val, ((char*)src+bytes)-(char*)rd.s_top, xor);
}
+#endif
+#ifdef INTEL_SSSE3
static
void
gf_w128_split_4_128_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
gf_internal_t *h;
- int i, m, j, k, tindex;
- uint64_t pp, v[2], s, *s64, *d64, *top;
+ int i, j, k;
+ uint64_t pp, v[2], *s64, *d64, *top;
__m128i si, tables[32][16], p[16], v0, mask1;
struct gf_w128_split_4_128_data *ld;
uint8_t btable[16];
@@ -724,7 +720,7 @@ gf_w128_split_4_128_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest,
/* Doing this instead of gf_do_initial_region_alignment() because that doesn't hold 128-bit vals */
- gf_w128_multiply_region_from_single(gf, src, dest, val, (rd.s_start-src), xor);
+ gf_w128_multiply_region_from_single(gf, src, dest, val, ((char*)rd.s_start-(char*)src), xor);
s64 = (uint64_t *) rd.s_start;
d64 = (uint64_t *) rd.d_start;
@@ -804,9 +800,9 @@ gf_w128_split_4_128_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest,
}
/* Doing this instead of gf_do_final_region_alignment() because that doesn't hold 128-bit vals */
- gf_w128_multiply_region_from_single(gf, rd.s_top, rd.d_top, val, (src+bytes)-rd.s_top, xor);
-#endif
+ gf_w128_multiply_region_from_single(gf, rd.s_top, rd.d_top, val, ((char*)src+bytes)-(char*)rd.s_top, xor);
}
+#endif
static
void
@@ -886,7 +882,7 @@ gf_w128_split_8_128_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_
void
gf_w128_bytwo_b_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
{
- uint64_t bmask, pp, vmask;
+ uint64_t bmask, pp;
gf_internal_t *h;
uint64_t a[2], c[2], b[2], *s64, *d64, *top;
gf_region_data rd;
@@ -987,7 +983,7 @@ void gf_w128_group_m_init(gf_t *gf, gf_val_128_t b128)
void
gf_w128_group_multiply(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
- int i,j;
+ int i;
/* index_r, index_m, total_m (if g_r > g_m) */
int i_r, i_m, t_m;
int mask_m, mask_r;
@@ -1162,11 +1158,12 @@ gf_w128_euclid(GFP gf, gf_val_128_t a128, gf_val_128_t b128)
uint64_t c_i[2];
uint64_t *b;
uint64_t one = 1;
- uint64_t buf, buf1;
/* This needs to return some sort of error (in b128?) */
if (a128[0] == 0 && a128[1] == 0) return;
+ b = (uint64_t *) b128;
+
e_im1[0] = 0;
e_im1[1] = ((gf_internal_t *) (gf->scratch))->prim_poly;
e_i[0] = a128[0];
@@ -1240,7 +1237,6 @@ gf_w128_euclid(GFP gf, gf_val_128_t a128, gf_val_128_t b128)
d_i = d_ip1;
}
- b = (uint64_t *) b128;
b[0] = y_i[0];
b[1] = y_i[1];
return;
@@ -1326,7 +1322,6 @@ static
void
gf_w128_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
{
- unsigned long uls, uld;
gf_internal_t *h = (gf_internal_t *) gf->scratch;
gf_t *base_gf = h->base_gf;
uint64_t b0 = val[1];
@@ -1381,14 +1376,13 @@ gf_w128_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_12
gf_internal_t *h = (gf_internal_t *) gf->scratch; gf_t *base_gf = h->base_gf;
gf_val_64_t val0 = val[1];
gf_val_64_t val1 = val[0];
- uint64_t *l, *hi;
uint8_t *slow, *shigh;
uint8_t *dlow, *dhigh, *top;
int sub_reg_size;
gf_region_data rd;
gf_set_region_data(&rd, gf, src, dest, bytes, 0, xor, 64);
- gf_w128_multiply_region_from_single(gf, src, dest, val, (rd.s_start-src), xor);
+ gf_w128_multiply_region_from_single(gf, src, dest, val, ((char*)rd.s_start-(char*)src), xor);
slow = (uint8_t *) rd.s_start;
dlow = (uint8_t *) rd.d_start;
@@ -1404,7 +1398,7 @@ gf_w128_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_12
base_gf->multiply_region.w64(base_gf, shigh, dhigh, base_gf->multiply.w64(base_gf, h->prim_poly, val1
), sub_reg_size, 1);
- gf_w128_multiply_region_from_single(gf, rd.s_top, rd.d_top, val, (src+bytes)-rd.s_top, xor);
+ gf_w128_multiply_region_from_single(gf, rd.s_top, rd.d_top, val, ((char*)src+bytes)-(char*)rd.s_top, xor);
}
@@ -1419,8 +1413,6 @@ int gf_w128_composite_init(gf_t *gf)
gf->multiply_region.w128 = gf_w128_composite_multiply_region;
}
- gf_internal_t *base_h = (gf_internal_t *) h->base_gf->scratch;
-
gf->multiply.w128 = gf_w128_composite_multiply;
gf->divide.w128 = gf_w128_divide_from_inverse;
gf->inverse.w128 = gf_w128_composite_inverse;
@@ -1444,8 +1436,6 @@ int gf_w128_cfm_init(gf_t *gf)
static
int gf_w128_shift_init(gf_t *gf)
{
- gf_internal_t *h;
- h = (gf_internal_t*) gf->scratch;
gf->multiply.w128 = gf_w128_shift_multiply;
gf->inverse.w128 = gf_w128_euclid;
gf->multiply_region.w128 = gf_w128_multiply_region_from_single;
@@ -1501,10 +1491,10 @@ void gf_w128_group_r_init(gf_t *gf)
return;
}
+#if 0 // defined(INTEL_SSE4)
static
void gf_w128_group_r_sse_init(gf_t *gf)
{
-#if defined(INTEL_SSE4)
int i, j;
int g_r;
uint64_t pp;
@@ -1526,8 +1516,8 @@ void gf_w128_group_r_sse_init(gf_t *gf)
}
}
return;
-#endif
}
+#endif
static
int gf_w128_split_init(gf_t *gf)
@@ -1587,12 +1577,10 @@ int gf_w128_group_init(gf_t *gf)
{
gf_internal_t *scratch;
gf_group_tables_t *gt;
- int g_m, g_r, size_r;
- long tmp;
+ int g_r, size_r;
scratch = (gf_internal_t *) gf->scratch;
gt = scratch->private;
- g_m = scratch->arg1;
g_r = scratch->arg2;
size_r = (1 << g_r);
@@ -1690,7 +1678,6 @@ void gf_w128_composite_extract_word(gf_t *gf, void *start, int bytes, int index,
int gf_w128_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
int size_m, size_r;
- int w = 128;
if (divide_type==GF_DIVIDE_MATRIX) return 0;
switch(mult_type)
@@ -1739,7 +1726,7 @@ int gf_w128_scratch_size(int mult_type, int region_type, int divide_type, int ar
int gf_w128_init(gf_t *gf)
{
- gf_internal_t *h, *h_base, *h_base_base, *h_base_base_base;
+ gf_internal_t *h;
int no_default_flag = 0;
h = (gf_internal_t *) gf->scratch;
diff --git a/src/gf_w16.c b/src/gf_w16.c
index 454c6cc..f1fb650 100644
--- a/src/gf_w16.c
+++ b/src/gf_w16.c
@@ -125,6 +125,7 @@ gf_w16_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_32_t
gf_do_final_region_alignment(&rd);
}
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w16_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
@@ -132,8 +133,6 @@ gf_w16_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val
gf_region_data rd;
uint16_t *s16;
uint16_t *d16;
-
-#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
__m128i prim_poly;
@@ -186,9 +185,10 @@ gf_w16_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w16_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
@@ -197,8 +197,6 @@ gf_w16_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val
uint16_t *s16;
uint16_t *d16;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
@@ -255,9 +253,10 @@ gf_w16_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w16_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
@@ -266,8 +265,6 @@ gf_w16_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val
uint16_t *s16;
uint16_t *d16;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
@@ -328,8 +325,8 @@ gf_w16_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
inline
@@ -453,7 +450,7 @@ gf_w16_clm_multiply_2 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a16, 0);
@@ -500,7 +497,7 @@ gf_w16_clm_multiply_3 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a16, 0);
@@ -540,7 +537,7 @@ gf_w16_clm_multiply_4 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a16, 0);
@@ -605,13 +602,13 @@ int gf_w16_shift_init(gf_t *gf)
static
int gf_w16_cfm_init(gf_t *gf)
{
+#if defined(INTEL_SSE4_PCLMUL)
gf_internal_t *h;
h = (gf_internal_t *) gf->scratch;
/*Ben: Determining how many reductions to do */
-#if defined(INTEL_SSE4_PCLMUL)
if ((0xfe00 & h->prim_poly) == 0) {
gf->multiply.w32 = gf_w16_clm_multiply_2;
gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_2;
@@ -774,9 +771,8 @@ static
void
gf_w16_split_4_16_lazy_nosse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- uint64_t i, j, a, b, c, prod;
+ uint64_t i, j, c, prod;
uint8_t *s8, *d8, *top;
- gf_internal_t *h;
uint16_t table[4][16];
gf_region_data rd;
@@ -786,8 +782,6 @@ gf_w16_split_4_16_lazy_nosse_altmap_multiply_region(gf_t *gf, void *src, void *d
gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 32);
gf_do_initial_region_alignment(&rd);
- h = (gf_internal_t *) gf->scratch;
-
/*Ben: Constructs lazy multiplication table*/
for (j = 0; j < 16; j++) {
@@ -840,7 +834,6 @@ gf_w16_split_4_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_3
{
uint64_t i, j, a, c, prod;
uint16_t *s16, *d16, *top;
- gf_internal_t *h;
uint16_t table[4][16];
gf_region_data rd;
@@ -850,8 +843,6 @@ gf_w16_split_4_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_3
gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 2);
gf_do_initial_region_alignment(&rd);
- h = (gf_internal_t *) gf->scratch;
-
for (j = 0; j < 16; j++) {
for (i = 0; i < 4; i++) {
c = (j << (i*4));
@@ -880,7 +871,7 @@ static
void
gf_w16_split_8_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- uint64_t j, k, v, a, c, prod, *s64, *d64, *top64;
+ uint64_t j, k, v, a, prod, *s64, *d64, *top64;
gf_internal_t *h;
uint64_t htable[256], ltable[256];
gf_region_data rd;
@@ -966,7 +957,7 @@ gf_w16_split_8_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_3
static void
gf_w16_table_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- uint64_t j, a, c, pp;
+ uint64_t c;
gf_internal_t *h;
struct gf_w16_lazytable_data *ltd;
gf_region_data rd;
@@ -1010,12 +1001,12 @@ gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_v
{
#ifdef INTEL_SSSE3
uint64_t i, j, *s64, *d64, *top64;;
- uint64_t a, c, prod;
+ uint64_t c, prod;
uint8_t low[4][16];
uint8_t high[4][16];
gf_region_data rd;
- __m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4], tta, ttb, shuffler, unshuffler, lmask;
+ __m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4], tta, ttb, lmask;
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
@@ -1147,7 +1138,6 @@ gf_w16_split_4_16_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des
uint8_t low[4][16];
uint8_t high[4][16];
gf_region_data rd;
- struct gf_single_table_data *std;
__m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4];
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
@@ -1358,11 +1348,8 @@ issse3 = 0;
static
int gf_w16_table_init(gf_t *gf)
{
- gf_internal_t *h;
gf_w16_log_init(gf);
- h = (gf_internal_t *) gf->scratch;
-
gf->multiply_region.w32 = gf_w16_table_lazy_multiply_region;
return 1;
}
@@ -1557,15 +1544,14 @@ gf_w16_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_
prod = _mm_xor_si128(prod, t1); \
v = _mm_srli_epi64(v, 1); }
+#ifdef INTEL_SSE2
static
void
gf_w16_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
int i;
uint8_t *s8, *d8;
uint32_t vrev;
- uint64_t amask;
__m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
struct gf_w16_bytwo_data *btd;
gf_region_data rd;
@@ -1618,17 +1604,16 @@ gf_w16_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
s8 += 16;
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w16_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w16_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, m2, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
@@ -1644,16 +1629,15 @@ gf_w16_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w16_bytwo_data *
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w16_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w16_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
+ uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
@@ -1672,15 +1656,15 @@ gf_w16_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w16_bytwo_data *bt
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w16_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
int itb;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
@@ -1728,14 +1712,13 @@ gf_w16_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
void
gf_w16_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- int i;
uint64_t *s64, *d64, t1, t2, ta, tb, prod;
struct gf_w16_bytwo_data *btd;
gf_region_data rd;
@@ -1988,7 +1971,6 @@ gf_val_32_t
gf_w16_composite_multiply_inline(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
uint8_t b0 = b & 0x00ff;
uint8_t b1 = (b & 0xff00) >> 8;
uint8_t a0 = a & 0x00ff;
@@ -2072,7 +2054,6 @@ static
void
gf_w16_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- unsigned long uls, uld;
gf_internal_t *h = (gf_internal_t *) gf->scratch;
gf_t *base_gf = h->base_gf;
uint8_t b0 = val & 0x00ff;
@@ -2080,7 +2061,6 @@ gf_w16_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t va
uint16_t *s16, *d16, *top;
uint8_t a0, a1, a1b1, *mt;
gf_region_data rd;
- struct gf_w16_logtable_data *ltd;
struct gf_w16_composite_data *cd;
cd = (struct gf_w16_composite_data *) h->private;
@@ -2237,7 +2217,6 @@ inline
gf_val_32_t
gf_w16_group_4_4_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
- int i;
uint16_t p, l, ind, r, a16;
struct gf_w16_group_4_4_data *d44;
@@ -2270,7 +2249,6 @@ gf_w16_group_4_4_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
static
void gf_w16_group_4_4_region_multiply(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- int i;
uint16_t p, l, ind, r, a16, p16;
struct gf_w16_group_4_4_data *d44;
gf_region_data rd;
@@ -2475,10 +2453,8 @@ int gf_w16_init(gf_t *gf)
uint16_t *gf_w16_get_log_table(gf_t *gf)
{
- gf_internal_t *h;
struct gf_w16_logtable_data *ltd;
- h = (gf_internal_t *) gf->scratch;
if (gf->multiply.w32 == gf_w16_log_multiply) {
ltd = (struct gf_w16_logtable_data *) ((gf_internal_t *) gf->scratch)->private;
return (uint16_t *) ltd->log_tbl;
diff --git a/src/gf_w32.c b/src/gf_w32.c
index 03f285f..1503c72 100644
--- a/src/gf_w32.c
+++ b/src/gf_w32.c
@@ -120,13 +120,13 @@ xor)
}
}
+#if defined(INTEL_SSE4_PCLMUL)
+
static
void
gf_w32_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
-#if defined(INTEL_SSE4_PCLMUL)
-
int i;
uint32_t *s32;
uint32_t *d32;
@@ -167,16 +167,16 @@ gf_w32_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, uint32
d32[i] = ((gf_val_32_t)_mm_extract_epi32(result, 0));
}
}
-#endif
}
+#endif
+
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w32_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
-#if defined(INTEL_SSE4_PCLMUL)
-
int i;
uint32_t *s32;
uint32_t *d32;
@@ -222,14 +222,14 @@ gf_w32_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, uint32
d32[i] = ((gf_val_32_t)_mm_extract_epi32(result, 0));
}
}
-#endif
}
+#endif
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w32_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
-#if defined(INTEL_SSE4_PCLMUL)
int i;
uint32_t *s32;
uint32_t *d32;
@@ -279,8 +279,8 @@ gf_w32_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, uint32
d32[i] = ((gf_val_32_t)_mm_extract_epi32(result, 0));
}
}
-#endif
}
+#endif
static
inline
@@ -414,7 +414,7 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
@@ -458,7 +458,7 @@ gf_w32_clm_multiply_3 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
@@ -497,7 +497,7 @@ gf_w32_clm_multiply_4 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
@@ -555,10 +555,6 @@ gf_w32_shift_multiply (gf_t *gf, uint32_t a32, uint32_t b32)
static
int gf_w32_cfm_init(gf_t *gf)
{
- gf_internal_t *h;
-
- h = (gf_internal_t *) gf->scratch;
-
gf->inverse.w32 = gf_w32_euclid;
gf->multiply_region.w32 = gf_w32_multiply_region_from_single;
@@ -566,6 +562,10 @@ int gf_w32_cfm_init(gf_t *gf)
/*Ben: Check to see how many reduction steps it will take*/
#if defined(INTEL_SSE4_PCLMUL)
+ gf_internal_t *h;
+
+ h = (gf_internal_t *) gf->scratch;
+
if ((0xfffe0000 & h->prim_poly) == 0){
gf->multiply.w32 = gf_w32_clm_multiply_2;
gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_2;
@@ -616,9 +616,8 @@ gf_w32_group_set_shift_tables(uint32_t *shift, uint32_t val, gf_internal_t *h)
static
void gf_w32_group_s_equals_r_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- int i;
int leftover, rs;
- uint32_t p, l, ind, r, a32;
+ uint32_t p, l, ind, a32;
int bits_left;
int g_s;
gf_region_data rd;
@@ -741,9 +740,8 @@ inline
gf_val_32_t
gf_w32_group_s_equals_r_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
- int i;
int leftover, rs;
- uint32_t p, l, ind, r, a32;
+ uint32_t p, l, ind, a32;
int bits_left;
int g_s;
@@ -781,8 +779,7 @@ inline
gf_val_32_t
gf_w32_group_4_4_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
- int i;
- uint32_t p, l, ind, r, a32;
+ uint32_t p, l, ind, a32;
struct gf_w32_group_data *d44;
gf_internal_t *h = (gf_internal_t *) gf->scratch;
@@ -832,7 +829,7 @@ gf_w32_group_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
int i;
int leftover;
- uint64_t p, l, r, mask;
+ uint64_t p, l, r;
uint32_t a32, ind;
int g_s, g_r;
struct gf_w32_group_data *gd;
@@ -986,15 +983,14 @@ gf_w32_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_
prod = _mm_xor_si128(prod, t1); \
v = _mm_srli_epi64(v, 1); }
+#ifdef INTEL_SSE2
static
void
gf_w32_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
int i;
uint8_t *s8, *d8;
uint32_t vrev;
- uint64_t amask;
__m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
struct gf_w32_bytwo_data *btd;
gf_region_data rd;
@@ -1039,14 +1035,13 @@ gf_w32_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
s8 += 16;
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
void
gf_w32_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- int i;
uint64_t *s64, *d64, t1, t2, ta, tb, prod;
struct gf_w32_bytwo_data *btd;
gf_region_data rd;
@@ -1181,14 +1176,13 @@ gf_w32_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_
gf_do_final_region_alignment(&rd);
}
+#ifdef INTEL_SSE2
static
void
gf_w32_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w32_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, m2, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
@@ -1204,16 +1198,15 @@ gf_w32_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w32_bytwo_data *
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w32_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w32_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
+ uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
@@ -1232,15 +1225,15 @@ gf_w32_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w32_bytwo_data *bt
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w32_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
uint32_t itb;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
@@ -1288,8 +1281,8 @@ gf_w32_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
int gf_w32_bytwo_init(gf_t *gf)
@@ -1556,14 +1549,14 @@ gf_w32_split_2_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint32_t
gf_do_final_region_alignment(&rd);
}
+#ifdef INTEL_SSSE3
static
void
gf_w32_split_2_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
gf_internal_t *h;
- int i, m, j, tindex;
- uint32_t pp, v, v2, s, *s32, *d32, *top;
+ int i, tindex;
+ uint32_t pp, v, v2, *s32, *d32, *top;
__m128i vi, si, pi, shuffler, tables[16], adder, xi, mask1, mask2;
gf_region_data rd;
@@ -1635,8 +1628,8 @@ gf_w32_split_2_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
void
@@ -1699,8 +1692,8 @@ gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des
{
#ifdef INTEL_SSSE3
gf_internal_t *h;
- int i, m, j, k, tindex;
- uint32_t pp, v, s, *s32, *d32, *top, *realtop;
+ int i, j, k;
+ uint32_t pp, v, *s32, *d32, *top;
__m128i si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3;
struct gf_split_4_32_lazy_data *ld;
uint8_t btable[16];
@@ -1891,9 +1884,9 @@ gf_w32_split_4_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint
{
#ifdef INTEL_SSSE3
gf_internal_t *h;
- int i, m, j, k, tindex;
- uint32_t pp, v, s, *s32, *d32, *top, tmp_table[16];
- __m128i vi, si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3, mask8;
+ int i, j, k;
+ uint32_t pp, v, *s32, *d32, *top, tmp_table[16];
+ __m128i si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3, mask8;
__m128i tv1, tv2, tv3, tv0;
uint8_t btable[16];
gf_region_data rd;
@@ -2378,7 +2371,6 @@ uint32_t
gf_w32_composite_multiply_inline(gf_t *gf, uint32_t a, uint32_t b)
{
gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
uint32_t b0 = b & 0x0000ffff;
uint32_t b1 = b >> 16;
uint32_t a0 = a & 0x0000ffff;
@@ -2620,11 +2612,8 @@ int gf_w32_composite_init(gf_t *gf)
int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
- int ss;
int issse3 = 0;
- ss = (GF_REGION_SSE | GF_REGION_NOSSE);
-
#ifdef INTEL_SSSE3
issse3 = 1;
#endif
diff --git a/src/gf_w4.c b/src/gf_w4.c
index 2504ec6..65cbf23 100644
--- a/src/gf_w4.c
+++ b/src/gf_w4.c
@@ -61,7 +61,7 @@ struct gf_bytwo_data {
t2 = ((t2 << 1) - (t2 >> (GF_FIELD_WIDTH-1))); \
b = (t1 ^ (t2 & ip));}
-#define SSE_AB2(pp, m1 ,m2, va, t1, t2) {\
+#define SSE_AB2(pp, m1, va, t1, t2) {\
t1 = _mm_and_si128(_mm_slli_epi64(va, 1), m1); \
t2 = _mm_and_si128(va, _mm_set1_epi8(0x88)); \
t2 = _mm_sub_epi64 (_mm_slli_epi64(t2, 1), _mm_srli_epi64(t2, (GF_FIELD_WIDTH-1))); \
@@ -414,14 +414,14 @@ gf_w4_single_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
#define MM_PRINT(s, r) { uint8_t blah[16]; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (i = 0; i < 16; i++) printf(" %02x", blah[i]); printf("\n"); }
+#ifdef INTEL_SSSE3
static
void
gf_w4_single_table_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
gf_region_data rd;
uint8_t *base, *sptr, *dptr, *top;
- __m128i tl, loset, h4, r, va, th;
+ __m128i tl, loset, r, va, th;
struct gf_single_table_data *std;
@@ -460,15 +460,15 @@ gf_w4_single_table_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_3
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
int gf_w4_single_table_init(gf_t *gf)
{
gf_internal_t *h;
struct gf_single_table_data *std;
- int a, b, prod, loga, logb;
+ int a, b, prod;
h = (gf_internal_t *) gf->scratch;
@@ -531,7 +531,6 @@ static
void
gf_w4_double_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- unsigned long uls, uld;
int i;
uint8_t *s8, *d8, *base;
gf_region_data rd;
@@ -560,7 +559,7 @@ int gf_w4_double_table_init(gf_t *gf)
{
gf_internal_t *h;
struct gf_double_table_data *std;
- int a, b, c, prod, loga, logb, ab;
+ int a, b, c, prod, ab;
uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE];
h = (gf_internal_t *) gf->scratch;
@@ -687,7 +686,7 @@ int gf_w4_quad_table_init(gf_t *gf)
{
gf_internal_t *h;
struct gf_quad_table_data *std;
- int prod, loga, logb, ab, val, a, b, c, d, va, vb, vc, vd;
+ int prod, val, a, b, c, d, va, vb, vc, vd;
uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE];
h = (gf_internal_t *) gf->scratch;
@@ -731,10 +730,9 @@ int gf_w4_quad_table_lazy_init(gf_t *gf)
{
gf_internal_t *h;
struct gf_quad_table_lazy_data *std;
- int a, b, c, prod, loga, logb, ab;
+ int a, b, prod, loga, logb;
uint8_t log_tbl[GF_FIELD_SIZE];
uint8_t antilog_tbl[GF_FIELD_SIZE*2];
- uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE];
h = (gf_internal_t *) gf->scratch;
std = (struct gf_quad_table_lazy_data *)h->private;
@@ -911,23 +909,22 @@ gf_w4_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
}
#define BYTWO_P_ONESTEP {\
- SSE_AB2(pp, m1 ,m2, prod, t1, t2); \
+ SSE_AB2(pp, m1, prod, t1, t2); \
t1 = _mm_and_si128(v, one); \
t1 = _mm_sub_epi8(t1, one); \
t1 = _mm_and_si128(t1, ta); \
prod = _mm_xor_si128(prod, t1); \
v = _mm_srli_epi64(v, 1); }
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
int i;
uint8_t *s8, *d8;
uint8_t vrev;
- uint64_t amask;
- __m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
+ __m128i pp, m1, ta, prod, t1, t2, tp, one, v;
struct gf_bytwo_data *btd;
gf_region_data rd;
@@ -950,7 +947,6 @@ gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
one = _mm_set1_epi8(1);
while (d8 < (uint8_t *) rd.d_top) {
@@ -967,8 +963,8 @@ gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
s8 += 16;
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
/*
static
@@ -1036,354 +1032,330 @@ gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
}
*/
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
_mm_store_si128((__m128i *)d8, va);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_load_si128 ((__m128i *)(d8));
vb = _mm_xor_si128(vb, va);
_mm_store_si128((__m128i *)d8, vb);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_4_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
- SSE_AB2(pp, m1, m2, va, t1, t2);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
_mm_store_si128((__m128i *)d8, va);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_4_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
- SSE_AB2(pp, m1, m2, va, t1, t2);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_load_si128 ((__m128i *)(d8));
vb = _mm_xor_si128(vb, va);
_mm_store_si128((__m128i *)d8, vb);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_3_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
vb = va;
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
va = _mm_xor_si128(va, vb);
_mm_store_si128((__m128i *)d8, va);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_3_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
vb = _mm_xor_si128(_mm_load_si128 ((__m128i *)(d8)), va);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_xor_si128(vb, va);
_mm_store_si128((__m128i *)d8, vb);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_5_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
vb = va;
- SSE_AB2(pp, m1, m2, va, t1, t2);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
va = _mm_xor_si128(va, vb);
_mm_store_si128((__m128i *)d8, va);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_5_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
vb = _mm_xor_si128(_mm_load_si128 ((__m128i *)(d8)), va);
- SSE_AB2(pp, m1, m2, va, t1, t2);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_xor_si128(vb, va);
_mm_store_si128((__m128i *)d8, vb);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_7_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
vb = va;
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_xor_si128(va, vb);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
va = _mm_xor_si128(va, vb);
_mm_store_si128((__m128i *)d8, va);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_7_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
vb = _mm_xor_si128(_mm_load_si128 ((__m128i *)(d8)), va);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_xor_si128(vb, va);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_xor_si128(vb, va);
_mm_store_si128((__m128i *)d8, vb);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_6_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = va;
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
va = _mm_xor_si128(va, vb);
_mm_store_si128((__m128i *)d8, va);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_6_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_xor_si128(_mm_load_si128 ((__m128i *)(d8)), va);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_xor_si128(vb, va);
_mm_store_si128((__m128i *)d8, vb);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
struct gf_bytwo_data *btd;
@@ -1464,7 +1436,7 @@ gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
if (tb & 1) vb = _mm_xor_si128(vb, va);
tb >>= 1;
if (tb == 0) break;
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
}
_mm_store_si128((__m128i *)d8, vb);
d8 += 16;
@@ -1491,16 +1463,13 @@ gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
void
gf_w4_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- unsigned long uls, uld;
- int i;
- uint8_t *s8, *d8, *top;
uint64_t *s64, *d64, t1, t2, ta, tb, prod;
struct gf_bytwo_data *btd;
gf_region_data rd;
@@ -1963,10 +1932,6 @@ int gf_w4_bytwo_init(gf_t *gf)
static
int gf_w4_cfm_init(gf_t *gf)
{
- gf_internal_t *h;
-
- h = (gf_internal_t *) gf->scratch;
-
#if defined(INTEL_SSE4_PCLMUL)
gf->multiply.w32 = gf_w4_clm_multiply;
return 1;
@@ -1986,8 +1951,6 @@ int gf_w4_shift_init(gf_t *gf)
int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
- int region_tbl_size;
- int ss;
int issse3 = 0;
#ifdef INTEL_SSSE3
diff --git a/src/gf_w64.c b/src/gf_w64.c
index 73bf164..f04daf0 100644
--- a/src/gf_w64.c
+++ b/src/gf_w64.c
@@ -87,20 +87,19 @@ xor)
}
}
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w64_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int
xor)
{
- int i, size;
gf_val_64_t *s64, *d64, *top;
gf_region_data rd;
-#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result, r1;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
__m128i m1, m2, m3, m4;
gf_internal_t * h = gf->scratch;
@@ -121,7 +120,6 @@ xor)
s64 = (gf_val_64_t *) rd.s_start;
d64 = (gf_val_64_t *) rd.d_start;
top = (gf_val_64_t *) rd.d_top;
- size = bytes/sizeof(gf_val_64_t);
if (xor) {
while (d64 != top) {
@@ -175,19 +173,18 @@ xor)
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w64_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int
xor)
{
- int i, size;
gf_val_64_t *s64, *d64, *top;
gf_region_data rd;
-#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result, r1;
__m128i prim_poly;
@@ -210,7 +207,6 @@ xor)
s64 = (gf_val_64_t *) rd.s_start;
d64 = (gf_val_64_t *) rd.d_start;
top = (gf_val_64_t *) rd.d_top;
- size = bytes/sizeof(gf_val_64_t);
if (xor) {
while (d64 != top) {
@@ -263,8 +259,8 @@ xor)
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
inline
@@ -321,7 +317,7 @@ inline
gf_val_64_t
gf_w64_shift_multiply (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
{
- uint64_t pl, pr, ppl, ppr, i, pp, a, bl, br, one, lbit;
+ uint64_t pl, pr, ppl, ppr, i, a, bl, br, one, lbit;
gf_internal_t *h;
h = (gf_internal_t *) gf->scratch;
@@ -468,9 +464,7 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
{
#if defined(INTEL_SSE4_PCLMUL)
gf_internal_t *h;
- int i, j, k;
uint8_t *s8, *d8, *dtop;
- uint64_t *s64, *d64;
gf_region_data rd;
__m128i v, b, m, prim_poly, c, fr, w, result;
@@ -492,7 +486,6 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
if (xor) {
while (d8 != dtop) {
- s64 = (uint64_t *) s8;
b = _mm_load_si128((__m128i *) s8);
result = _mm_clmulepi64_si128 (b, v, 0);
c = _mm_insert_epi32 (_mm_srli_si128 (result, 8), 0, 0);
@@ -521,7 +514,6 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
}
} else {
while (d8 < dtop) {
- s64 = (uint64_t *) s8;
b = _mm_load_si128((__m128i *) s8);
result = _mm_clmulepi64_si128 (b, v, 0);
c = _mm_insert_epi32 (_mm_srli_si128 (result, 8), 0, 0);
@@ -741,8 +733,6 @@ gf_w64_split_16_64_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint64_
static
int gf_w64_shift_init(gf_t *gf)
{
- gf_internal_t *h;
-
gf->multiply.w64 = gf_w64_shift_multiply;
gf->inverse.w64 = gf_w64_euclid;
gf->multiply_region.w64 = gf_w64_multiply_region_from_single;
@@ -752,14 +742,14 @@ int gf_w64_shift_init(gf_t *gf)
static
int gf_w64_cfm_init(gf_t *gf)
{
- gf_internal_t *h;
-
- h = (gf_internal_t *) gf->scratch;
-
gf->inverse.w64 = gf_w64_euclid;
gf->multiply_region.w64 = gf_w64_multiply_region_from_single;
#if defined(INTEL_SSE4_PCLMUL)
+ gf_internal_t *h;
+
+ h = (gf_internal_t *) gf->scratch;
+
if ((0xfffffffe00000000ULL & h->prim_poly) == 0){
gf->multiply.w64 = gf_w64_clm_multiply_2;
gf->multiply_region.w64 = gf_w64_clm_multiply_region_from_single_2;
@@ -803,7 +793,6 @@ inline
gf_val_64_t
gf_w64_group_multiply(gf_t *gf, gf_val_64_t a, gf_val_64_t b)
{
- int i;
uint64_t top, bot, mask, tp;
int g_s, g_r, lshift, rshift;
struct gf_w64_group_data *gd;
@@ -854,7 +843,7 @@ static
void gf_w64_group_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int xor)
{
int i, fzb;
- uint64_t a64, smask, rmask, top, bot, tp, one;
+ uint64_t a64, smask, rmask, top, bot, tp;
int lshift, rshift, g_s, g_r;
gf_region_data rd;
uint64_t *s64, *d64, *dtop;
@@ -936,9 +925,8 @@ inline
gf_val_64_t
gf_w64_group_s_equals_r_multiply(gf_t *gf, gf_val_64_t a, gf_val_64_t b)
{
- int i;
int leftover, rs;
- uint64_t p, l, ind, r, a64;
+ uint64_t p, l, ind, a64;
int bits_left;
int g_s;
@@ -974,9 +962,8 @@ gf_w64_group_s_equals_r_multiply(gf_t *gf, gf_val_64_t a, gf_val_64_t b)
static
void gf_w64_group_s_equals_r_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int xor)
{
- int i;
int leftover, rs;
- uint64_t p, l, ind, r, a64;
+ uint64_t p, l, ind, a64;
int bits_left;
int g_s;
gf_region_data rd;
@@ -1189,7 +1176,7 @@ static
void
gf_w64_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int xor)
{
- uint64_t *s64, *d64, t1, t2, ta, prod, amask, pmask, pp;
+ uint64_t *s64, *d64, ta, prod, amask, pmask, pp;
gf_region_data rd;
gf_internal_t *h;
@@ -1243,7 +1230,7 @@ static
void
gf_w64_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int xor)
{
- uint64_t *s64, *d64, t1, t2, ta, tb, prod, amask, bmask, pp;
+ uint64_t *s64, *d64, ta, tb, prod, bmask, pp;
gf_region_data rd;
gf_internal_t *h;
@@ -1374,14 +1361,13 @@ void gf_w64_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_
#endif
}
+#ifdef INTEL_SSE2
static
void
gf_w64_bytwo_b_sse_region_2_xor(gf_region_data *rd)
{
-#ifdef INTEL_SSE2
- int i;
uint64_t one64, amask;
- uint8_t *d8, *s8, tb;
+ uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
gf_internal_t *h;
@@ -1405,17 +1391,16 @@ gf_w64_bytwo_b_sse_region_2_xor(gf_region_data *rd)
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w64_bytwo_b_sse_region_2_noxor(gf_region_data *rd)
{
-#ifdef INTEL_SSE2
- int i;
uint64_t one64, amask;
- uint8_t *d8, *s8, tb;
+ uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va;
gf_internal_t *h;
@@ -1437,18 +1422,17 @@ gf_w64_bytwo_b_sse_region_2_noxor(gf_region_data *rd)
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w64_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
uint64_t itb, amask, one64;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
- struct gf_w32_bytwo_data *btd;
gf_region_data rd;
gf_internal_t *h;
@@ -1495,8 +1479,8 @@ gf_w64_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
@@ -1620,17 +1604,13 @@ static
void
gf_w64_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int xor)
{
- unsigned long uls, uld;
gf_internal_t *h = (gf_internal_t *) gf->scratch;
gf_t *base_gf = h->base_gf;
- int i=0;
uint32_t b0 = val & 0x00000000ffffffff;
uint32_t b1 = (val & 0xffffffff00000000) >> 32;
uint64_t *s64, *d64;
uint64_t *top;
uint64_t a0, a1, a1b1;
- int num_syms = bytes / 8;
- int sym_divisible = bytes % 4;
gf_region_data rd;
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
@@ -1721,14 +1701,14 @@ int gf_w64_composite_init(gf_t *gf)
return 1;
}
+#ifdef INTEL_SSSE3
static
void
gf_w64_split_4_64_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
gf_internal_t *h;
- int i, m, j, k, tindex;
- uint64_t pp, v, s, *s64, *d64, *top;
+ int i, j, k;
+ uint64_t pp, v, *s64, *d64, *top;
__m128i si, tables[16][8], p[8], v0, mask1;
struct gf_split_4_64_lazy_data *ld;
uint8_t btable[16];
@@ -1802,18 +1782,18 @@ gf_w64_split_4_64_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#ifdef INTEL_SSE4
static
void
gf_w64_split_4_64_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE4
gf_internal_t *h;
- int i, m, j, k, tindex;
- uint64_t pp, v, s, *s64, *d64, *top;
- __m128i si, tables[16][8], p[8], st[8], mask1, mask8, mask16, t1, t2;
+ int i, j, k;
+ uint64_t pp, v, *s64, *d64, *top;
+ __m128i si, tables[16][8], p[8], st[8], mask1, mask8, mask16, t1;
struct gf_split_4_64_lazy_data *ld;
uint8_t btable[16];
gf_region_data rd;
@@ -2006,8 +1986,8 @@ gf_w64_split_4_64_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
#define GF_MULTBY_TWO(p) (((p) & GF_FIRST_BIT) ? (((p) << 1) ^ h->prim_poly) : (p) << 1);
@@ -2141,8 +2121,6 @@ int gf_w64_split_init(gf_t *gf)
int gf_w64_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
- int issse4;
-
switch(mult_type)
{
case GF_MULT_SHIFT:
@@ -2162,11 +2140,9 @@ int gf_w64_scratch_size(int mult_type, int region_type, int divide_type, int arg
* then fall through to split table scratch size code. */
#ifdef INTEL_SSE4
- issse4 = 1;
arg1 = 64;
arg2 = 4;
#else
- issse4 = 0;
arg1 = 64;
arg2 = 8;
#endif
@@ -2202,7 +2178,7 @@ int gf_w64_scratch_size(int mult_type, int region_type, int divide_type, int arg
int gf_w64_init(gf_t *gf)
{
- gf_internal_t *h, *h_base, *h_base_base, *h_base_base_base;
+ gf_internal_t *h;
int no_default_flag = 0;
h = (gf_internal_t *) gf->scratch;
diff --git a/src/gf_w8.c b/src/gf_w8.c
index 7661aad..89ef6a2 100644
--- a/src/gf_w8.c
+++ b/src/gf_w8.c
@@ -216,7 +216,7 @@ gf_w8_clm_multiply_2 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a8, 0);
@@ -262,7 +262,7 @@ gf_w8_clm_multiply_3 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a8, 0);
@@ -301,7 +301,7 @@ gf_w8_clm_multiply_4 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a8, 0);
@@ -364,6 +364,7 @@ gf_w8_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_32_t v
gf_do_final_region_alignment(&rd);
}
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int
@@ -373,12 +374,10 @@ gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_
uint8_t *s8;
uint8_t *d8;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0x1ffULL));
@@ -420,9 +419,10 @@ gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int
@@ -432,12 +432,10 @@ gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_
uint8_t *s8;
uint8_t *d8;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0x1ffULL));
@@ -483,9 +481,10 @@ gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int
@@ -495,12 +494,10 @@ gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_
uint8_t *s8;
uint8_t *d8;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0x1ffULL));
@@ -550,8 +547,8 @@ gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
/* ------------------------------------------------------------
IMPLEMENTATION: SHIFT:
@@ -588,11 +585,11 @@ gf_w8_shift_multiply (gf_t *gf, uint32_t a8, uint32_t b8)
static
int gf_w8_cfm_init(gf_t *gf)
{
+#if defined(INTEL_SSE4_PCLMUL)
gf_internal_t *h;
h = (gf_internal_t *) gf->scratch;
-#if defined(INTEL_SSE4_PCLMUL)
if ((0xe0 & h->prim_poly) == 0){
gf->multiply.w32 = gf_w8_clm_multiply_2;
gf->multiply_region.w32 = gf_w8_clm_multiply_region_from_single_2;
@@ -731,7 +728,7 @@ static
gf_w8_log_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
int i;
- uint8_t lv, b, c;
+ uint8_t lv;
uint8_t *s8, *d8;
struct gf_w8_logtable_data *ltd;
@@ -760,7 +757,7 @@ static
gf_w8_logzero_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
int i;
- uint8_t lv, b, c;
+ uint8_t lv;
uint8_t *s8, *d8;
struct gf_w8_logzero_table_data *ltd;
struct gf_w8_logzero_small_table_data *std;
@@ -802,9 +799,9 @@ gf_w8_logzero_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int
int gf_w8_log_init(gf_t *gf)
{
gf_internal_t *h;
- struct gf_w8_logtable_data *ltd;
- struct gf_w8_logzero_table_data *ztd;
- struct gf_w8_logzero_small_table_data *std;
+ struct gf_w8_logtable_data *ltd = NULL;
+ struct gf_w8_logzero_table_data *ztd = NULL;
+ struct gf_w8_logzero_small_table_data *std = NULL;
uint8_t *alt;
uint8_t *inv;
int i, b;
@@ -941,6 +938,7 @@ gf_w8_default_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
return (ftd->multtable[a][b]);
}
+#ifdef INTEL_SSSE3
static
gf_val_32_t
gf_w8_default_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
@@ -950,6 +948,7 @@ gf_w8_default_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
ftd = (struct gf_w8_default_data *) ((gf_internal_t *) gf->scratch)->private;
return (ftd->divtable[a][b]);
}
+#endif
static
gf_val_32_t
@@ -976,7 +975,7 @@ static
gf_w8_double_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
uint16_t *base;
- uint32_t b, c, prod, vc, vb;
+ uint32_t b, c, vc, vb;
gf_internal_t *h;
struct gf_w8_double_table_data *dtd;
struct gf_w8_double_table_lazy_data *ltd;
@@ -1033,7 +1032,6 @@ static
gf_w8_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
int i;
- uint8_t lv, b, c;
uint8_t *s8, *d8;
struct gf_w8_single_table_data *ftd;
@@ -1055,14 +1053,13 @@ gf_w8_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, in
}
}
+#ifdef INTEL_SSSE3
static
void
gf_w8_split_multiply_region_sse(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
- uint8_t *s8, *d8, *bh, *bl, *sptr, *dptr, *top;
- __m128i tbl, loset, t1, r, va, mth, mtl;
- uint64_t altable[4];
+ uint8_t *bh, *bl, *sptr, *dptr;
+ __m128i loset, t1, r, va, mth, mtl;
struct gf_w8_half_table_data *htd;
gf_region_data rd;
@@ -1115,8 +1112,8 @@ gf_w8_split_multiply_region_sse(gf_t *gf, void *src, void *dest, gf_val_32_t val
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
/* ------------------------------------------------------------
@@ -1137,9 +1134,7 @@ static
void
gf_w8_split_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- unsigned long uls, uld;
int i;
- uint8_t lv, b, c;
uint8_t *s8, *d8;
struct gf_w8_half_table_data *htd;
@@ -1167,11 +1162,10 @@ int gf_w8_split_init(gf_t *gf)
{
gf_internal_t *h;
struct gf_w8_half_table_data *htd;
- int a, b, pp;
+ int a, b;
h = (gf_internal_t *) gf->scratch;
htd = (struct gf_w8_half_table_data *)h->private;
- pp = h->prim_poly;
bzero(htd->high, sizeof(uint8_t)*GF_FIELD_SIZE*GF_HALF_SIZE);
bzero(htd->low, sizeof(uint8_t)*GF_FIELD_SIZE*GF_HALF_SIZE);
@@ -1325,13 +1319,13 @@ gf_w8_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_32_t
gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 32);
gf_do_initial_region_alignment(&rd);
- sub_reg_size = (rd.d_top - rd.d_start) / 2;
+ sub_reg_size = ((char*)rd.d_top - (char*)rd.d_start) / 2;
base_gf->multiply_region.w32(base_gf, rd.s_start, rd.d_start, val0, sub_reg_size, xor);
- base_gf->multiply_region.w32(base_gf, rd.s_start+sub_reg_size, rd.d_start, val1, sub_reg_size, 1);
- base_gf->multiply_region.w32(base_gf, rd.s_start, rd.d_start+sub_reg_size, val1, sub_reg_size, xor);
- base_gf->multiply_region.w32(base_gf, rd.s_start+sub_reg_size, rd.d_start+sub_reg_size, val0, sub_reg_size, 1);
- base_gf->multiply_region.w32(base_gf, rd.s_start+sub_reg_size, rd.d_start+sub_reg_size, base_gf->multiply.w32(base_gf, h->prim_poly, val1), sub_reg_size, 1);
+ base_gf->multiply_region.w32(base_gf, (char*)rd.s_start+sub_reg_size, rd.d_start, val1, sub_reg_size, 1);
+ base_gf->multiply_region.w32(base_gf, rd.s_start, (char*)rd.d_start+sub_reg_size, val1, sub_reg_size, xor);
+ base_gf->multiply_region.w32(base_gf, (char*)rd.s_start+sub_reg_size, (char*)rd.d_start+sub_reg_size, val0, sub_reg_size, 1);
+ base_gf->multiply_region.w32(base_gf, (char*)rd.s_start+sub_reg_size, (char*)rd.d_start+sub_reg_size, base_gf->multiply.w32(base_gf, h->prim_poly, val1), sub_reg_size, 1);
gf_do_final_region_alignment(&rd);
}
@@ -1361,7 +1355,6 @@ gf_val_32_t
gf_w8_composite_multiply_inline(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
uint8_t b0 = b & 0x0f;
uint8_t b1 = (b & 0xf0) >> 4;
uint8_t a0 = a & 0x0f;
@@ -1674,15 +1667,14 @@ gf_w8_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
prod = _mm_xor_si128(prod, t1); \
v = _mm_srli_epi64(v, 1); }
+#ifdef INTEL_SSE2
static
void
gf_w8_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
int i;
uint8_t *s8, *d8;
uint8_t vrev;
- uint64_t amask;
__m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
struct gf_w8_bytwo_data *btd;
gf_region_data rd;
@@ -1727,17 +1719,16 @@ gf_w8_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
s8 += 16;
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w8_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w8_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, m2, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
@@ -1753,16 +1744,15 @@ gf_w8_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w8_bytwo_data *bt
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w8_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w8_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
+ uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
@@ -1781,15 +1771,15 @@ gf_w8_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w8_bytwo_data *btd)
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w8_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
int itb;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
@@ -1837,15 +1827,13 @@ gf_w8_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
void
gf_w8_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- int i;
- uint8_t *s8, *d8, *top;
uint64_t *s64, *d64, t1, t2, ta, tb, prod;
struct gf_w8_bytwo_data *btd;
gf_region_data rd;
@@ -2362,7 +2350,7 @@ int gf_w8_scratch_size(int mult_type, int region_type, int divide_type, int arg1
int gf_w8_init(gf_t *gf)
{
- gf_internal_t *h, *h_base;
+ gf_internal_t *h;
h = (gf_internal_t *) gf->scratch;
@@ -2454,11 +2442,9 @@ uint8_t *gf_w8_get_mult_table(gf_t *gf)
uint8_t *gf_w8_get_div_table(gf_t *gf)
{
- gf_internal_t *h;
struct gf_w8_default_data *ftd;
struct gf_w8_single_table_data *std;
- h = (gf_internal_t *) gf->scratch;
if (gf->multiply.w32 == gf_w8_default_multiply) {
ftd = (struct gf_w8_default_data *) ((gf_internal_t *) gf->scratch)->private;
return (uint8_t *) ftd->divtable;
diff --git a/src/gf_wgen.c b/src/gf_wgen.c
index f5e22e0..68c6bb0 100644
--- a/src/gf_wgen.c
+++ b/src/gf_wgen.c
@@ -284,9 +284,8 @@ inline
gf_val_32_t
gf_wgen_group_s_equals_r_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
- int i;
int leftover, rs;
- uint32_t p, l, ind, r, a32;
+ uint32_t p, l, ind, a32;
int bits_left;
int g_s;
int w;
@@ -362,7 +361,7 @@ gf_wgen_group_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
int i;
int leftover;
- uint64_t p, l, r, mask;
+ uint64_t p, l, r;
uint32_t a32, ind;
int g_s, g_r;
struct gf_wgen_group_data *gd;
@@ -496,7 +495,7 @@ int gf_wgen_table_8_init(gf_t *gf)
gf_internal_t *h;
int w;
struct gf_wgen_table_w8_data *std;
- uint32_t a, b, p, pp;
+ uint32_t a, b, p;
h = (gf_internal_t *) gf->scratch;
w = h->w;
@@ -557,7 +556,7 @@ int gf_wgen_table_16_init(gf_t *gf)
gf_internal_t *h;
int w;
struct gf_wgen_table_w16_data *std;
- uint32_t a, b, p, pp;
+ uint32_t a, b, p;
h = (gf_internal_t *) gf->scratch;
w = h->w;
@@ -917,11 +916,11 @@ gf_wgen_cauchy_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int byte
for (i = 0; i < h->w; i++) {
for (j = 0; j < h->w; j++) {
if (val & (1 << j)) {
- gf_multby_one(src, dest + j*rs, rs, (written & (1 << j)));
+ gf_multby_one(src, ((char*)dest) + j*rs, rs, (written & (1 << j)));
written |= (1 << j);
}
}
- src += rs;
+ src = (char*)src + rs;
val = gf->multiply.w32(gf, val, 2);
}
}
diff --git a/test/gf_unit.c b/test/gf_unit.c
index cf466fe..deaaced 100644
--- a/test/gf_unit.c
+++ b/test/gf_unit.c
@@ -70,16 +70,16 @@ int main(int argc, char **argv)
{
signal(SIGSEGV, SigHandler);
- int w, i, verbose, single, region, tested, top;
+ int w, i, verbose, single, region, top;
int s_start, d_start, bytes, xor, alignment_test;
gf_t gf, gf_def;
time_t t0;
gf_internal_t *h;
- gf_general_t *a, *b, *c, *d, *ai, *bi;
- uint8_t a8, b8, c8, *mult4, *div4, *mult8, *div8;
- uint16_t a16, b16, c16, d16, *log16, *alog16;
- char as[50], bs[50], cs[50], ds[50], ais[50], bis[50];
- uint32_t mask;
+ gf_general_t *a, *b, *c, *d;
+ uint8_t a8, b8, c8, *mult4 = NULL, *mult8 = NULL;
+ uint16_t a16, b16, c16, *log16 = NULL, *alog16 = NULL;
+ char as[50], bs[50], cs[50], ds[50];
+ uint32_t mask = 0;
char *ra, *rb, *rc, *rd, *target;
int align;
@@ -115,8 +115,6 @@ int main(int argc, char **argv)
b = (gf_general_t *) malloc(sizeof(gf_general_t));
c = (gf_general_t *) malloc(sizeof(gf_general_t));
d = (gf_general_t *) malloc(sizeof(gf_general_t));
- ai = (gf_general_t *) malloc(sizeof(gf_general_t));
- bi = (gf_general_t *) malloc(sizeof(gf_general_t));
//15 bytes extra to make sure it's 16byte aligned
ra = (char *) malloc(sizeof(char)*REGION_SIZE+15);
@@ -145,12 +143,10 @@ int main(int argc, char **argv)
problem("No default for this value of w");
if (w == 4) {
mult4 = gf_w4_get_mult_table(&gf);
- div4 = gf_w4_get_div_table(&gf);
}
if (w == 8) {
mult8 = gf_w8_get_mult_table(&gf);
- div8 = gf_w8_get_div_table(&gf);
}
if (w == 16) {
@@ -240,7 +236,6 @@ int main(int argc, char **argv)
}
}
- tested = 0;
gf_general_multiply(&gf, a, b, c);
/* If w is 4, 8 or 16, then there are inline multiplication/division methods.
@@ -285,7 +280,6 @@ int main(int argc, char **argv)
/* If this is not composite, then first test against the default: */
if (h->mult_type != GF_MULT_COMPOSITE) {
- tested = 1;
gf_general_multiply(&gf_def, a, b, d);
if (!gf_general_are_equal(c, d, w)) {
@@ -306,7 +300,6 @@ int main(int argc, char **argv)
if (gf_general_is_zero(a, w) || gf_general_is_zero(b, w) ||
gf_general_is_one(a, w) || gf_general_is_one(b, w)) {
- tested = 1;
if (((gf_general_is_zero(a, w) || gf_general_is_zero(b, w)) && !gf_general_is_zero(c, w)) ||
(gf_general_is_one(a, w) && !gf_general_are_equal(b, c, w)) ||
(gf_general_is_one(b, w) && !gf_general_are_equal(a, c, w))) {
@@ -429,4 +422,5 @@ int main(int argc, char **argv)
gf_general_do_region_check(&gf, a, rc+s_start, rd+d_start, target+d_start, bytes, xor);
}
}
+ return 0;
}
diff --git a/tools/gf_add.c b/tools/gf_add.c
index b900e69..28cc12c 100644
--- a/tools/gf_add.c
+++ b/tools/gf_add.c
@@ -62,7 +62,7 @@ void print_128(uint64_t *v)
int main(int argc, char **argv)
{
- int hex, al, bl, w;
+ int hex, w;
uint32_t a, b, c, top;
uint64_t a64, b64, c64;
uint64_t a128[2], b128[2], c128[2];
diff --git a/tools/gf_inline_time.c b/tools/gf_inline_time.c
index e64f0b3..c81e8a9 100644
--- a/tools/gf_inline_time.c
+++ b/tools/gf_inline_time.c
@@ -62,8 +62,8 @@ int main(int argc, char **argv)
int w, j, i, size, iterations;
gf_t gf;
double timer, elapsed, dnum, num;
- uint8_t *ra, *rb, *mult4, *mult8;
- uint16_t *ra16, *rb16, *log16, *alog16;
+ uint8_t *ra = NULL, *rb = NULL, *mult4, *mult8;
+ uint16_t *ra16 = NULL, *rb16 = NULL, *log16, *alog16;
time_t t0;
if (argc != 5) usage(NULL);
@@ -164,4 +164,5 @@ int main(int argc, char **argv)
printf("Inline mult: %10.6lf s Mops: %10.3lf %10.3lf Mega-ops/s\n",
elapsed, dnum/1024.0/1024.0, dnum/1024.0/1024.0/elapsed);
}
+ return 0;
}
diff --git a/tools/gf_methods.c b/tools/gf_methods.c
index 3afb438..6664bec 100644
--- a/tools/gf_methods.c
+++ b/tools/gf_methods.c
@@ -76,7 +76,7 @@ int main(int argc, char *argv[])
int listing;
char *gf_argv[50], *x;
gf_t gf;
- char divs[200], ks[10], ls[10];
+ char ls[10];
char * w_str;
if (argc != 4) usage(NULL);
diff --git a/tools/gf_poly.c b/tools/gf_poly.c
index e19706c..44a24ac 100644
--- a/tools/gf_poly.c
+++ b/tools/gf_poly.c
@@ -84,7 +84,6 @@ int gcd_one(gf_t *gf, int w, int n, gf_general_t *poly, gf_general_t *prod)
{
gf_general_t *a, *b, zero, factor, p;
int i, j, da, db;
- char buf[30];
gf_general_set_zero(&zero, w);
@@ -123,7 +122,6 @@ void x_to_q_to_i_minus_x(gf_t *gf, int w, int n, gf_general_t *poly, int logq, i
gf_general_t *product;
gf_general_t p, zero, factor;
int j, k, lq;
- char buf[20];
gf_general_set_zero(&zero, w);
product = (gf_general_t *) malloc(sizeof(gf_general_t) * n*2);
@@ -181,9 +179,9 @@ void x_to_q_to_i_minus_x(gf_t *gf, int w, int n, gf_general_t *poly, int logq, i
free(x_to_q);
}
-main(int argc, char **argv)
+int main(int argc, char **argv)
{
- int w, i, power, n, ap, success, j;
+ int w, i, power, n, ap, success;
gf_t gf;
gf_general_t *poly, *prod;
char *string, *ptr;
diff --git a/tools/gf_time.c b/tools/gf_time.c
index 2bd2d04..4becc8d 100644
--- a/tools/gf_time.c
+++ b/tools/gf_time.c
@@ -119,7 +119,7 @@ int main(int argc, char **argv)
if (!create_gf_from_argv(&gf, w, argc, argv, 6)) usage(BM);
strcpy(tests, "");
- for (i = 0; i < argv[2][i] != '\0'; i++) {
+ for (i = 0; argv[2][i] != '\0'; i++) {
switch(argv[2][i]) {
case 'A': strcat(tests, single_tests);
strcat(tests, region_tests);
@@ -163,8 +163,8 @@ int main(int argc, char **argv)
for (i = 0; i < 3; i++) {
test = single_tests[i];
if (strchr(tests, test) != NULL) {
- if (tmethods[test] == NULL) {
- printf("No %s method.\n", tstrings[test]);
+ if (tmethods[(int)test] == NULL) {
+ printf("No %s method.\n", tstrings[(int)test]);
} else {
elapsed = 0;
dnum = 0;
@@ -176,7 +176,7 @@ int main(int argc, char **argv)
elapsed += timer_split(&timer);
}
printf("%14s: %10.6lf s Mops: %10.3lf %10.3lf Mega-ops/s\n",
- tstrings[test], elapsed,
+ tstrings[(int)test], elapsed,
dnum/1024.0/1024.0, dnum/1024.0/1024.0/elapsed);
}
}
@@ -185,8 +185,8 @@ int main(int argc, char **argv)
for (i = 0; i < 4; i++) {
test = region_tests[i];
if (strchr(tests, test) != NULL) {
- if (tmethods[test] == NULL) {
- printf("No %s method.\n", tstrings[test]);
+ if (tmethods[(int)test] == NULL) {
+ printf("No %s method.\n", tstrings[(int)test]);
} else {
elapsed = 0;
@@ -204,10 +204,11 @@ int main(int argc, char **argv)
elapsed += timer_split(&timer);
}
printf("%14s: XOR: %d %10.6lf s MB: %10.3lf %10.3lf MB/s\n",
- tstrings[test], xor, elapsed,
+ tstrings[(int)test], xor, elapsed,
ds*di/1024.0/1024.0, ds*di/1024.0/1024.0/elapsed);
}
}
}
}
+ return 0;
}