summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Plank <plank@cs.utk.edu>2013-03-04 17:06:43 -0500
committerJim Plank <plank@cs.utk.edu>2013-03-04 17:06:43 -0500
commit47896e9ddcd26da694d6775bf7fcadc7feebf385 (patch)
tree379f1f45289ebc08ac6d91fc0f0ff618f9c236c4
parent4d5f453827d1d100d83d003d807edd52672c943e (diff)
downloadgf-complete-47896e9ddcd26da694d6775bf7fcadc7feebf385.tar.gz
Killing all of these junk files. They should not be in the repository.
-rwxr-xr-xjunk8
-rwxr-xr-xjunk-pick-best-outputbin28584 -> 0 bytes
-rw-r--r--junk-pick-best-output.cpp78
-rw-r--r--junk-proc.awk11
-rw-r--r--junk-save.c658
-rw-r--r--junk-w16-backup.c1585
-rw-r--r--junk-w16-timing-tests.sh12
-rw-r--r--junk-w2.eps203
-rw-r--r--junk-w32-backup.c1337
-rw-r--r--junk-w32-single-time.c16
-rw-r--r--junk-w4-out.txt60
-rw-r--r--junk-w4-timing-out.txt792
-rw-r--r--junk-w4-timing-tests.sh11
-rw-r--r--junk-w4-timing.jgr11
-rw-r--r--junk-w4.jgr6
-rw-r--r--junk-w8-timing-out.txt936
-rw-r--r--junk-w8-timing-tests.sh13
-rw-r--r--junk-w8-timing.jgr11
-rw-r--r--junk.c18
-rw-r--r--junk.ps199
-rw-r--r--junk.sh14
-rw-r--r--junk_gf_unit.c957
22 files changed, 0 insertions, 6936 deletions
diff --git a/junk b/junk
deleted file mode 100755
index 1376133..0000000
--- a/junk
+++ /dev/null
@@ -1,8 +0,0 @@
-gf_54
-gf_div
-gf_methods
-gf_mult
-gf_time
-gf_unit
-junk
-junk-pick-best-output
diff --git a/junk-pick-best-output b/junk-pick-best-output
deleted file mode 100755
index eb0db93..0000000
--- a/junk-pick-best-output
+++ /dev/null
Binary files differ
diff --git a/junk-pick-best-output.cpp b/junk-pick-best-output.cpp
deleted file mode 100644
index 4f5fed8..0000000
--- a/junk-pick-best-output.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-#include <string>
-#include <vector>
-#include <list>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <iostream>
-#include <sstream>
-#include <cstdio>
-#include <cstdlib>
-using namespace std;
-
-#define VIT(i, v) for (i = 0; i < v.size(); i++)
-#define IT(it, ds) for (it = ds.begin(); it != ds.end(); it++)
-#define FUP(i, n) for (i = 0; i < n; i++)
-
-typedef map<int, string> ISmap;
-typedef map<int, int> IImap;
-typedef map<string, double> SDmap;
-
-typedef ISmap::iterator ISmit;
-typedef IImap::iterator IImit;
-typedef SDmap::iterator SDmit;
-
-typedef vector <string> SVec;
-
-void StoSVec(string &s, SVec &sv)
-{
- istringstream ss;
- string s2;
-
- ss.clear();
- ss.str(s);
- while (ss >> s2) sv.push_back(s2);
-}
-
-main()
-{
- string s, k;
- double d, b;
- int i;
- SVec sv;
- SDmap bmap;
- SDmit bmit;
-
- while (getline(cin, s)) {
- sv.clear();
- StoSVec(s, sv);
-
- if (sv[0] == "Seed:") {
- b = 0;
- for (i = 0; i < 2; i++) {
- getline(cin, s);
- sv.clear();
- StoSVec(s, sv);
- sscanf(sv[3].c_str(), "%lf", &d);
- if (d > b) b = d;
- }
- getline(cin, s);
- sv.clear();
- StoSVec(s, sv);
- k = sv[2];
- k += " ";
- k += sv[3];
- for (i = 4; i < sv.size(); i++) {
- if (sv[i] != "-") {
- k += " ";
- k += sv[i];
- }
- }
- if (bmap[k] < b) bmap[k] = b;
- }
- }
-
- IT(bmit, bmap) {
- printf("%10.4lf %s\n", bmit->second, bmit->first.c_str());
- }
-}
diff --git a/junk-proc.awk b/junk-proc.awk
deleted file mode 100644
index ed1f4cd..0000000
--- a/junk-proc.awk
+++ /dev/null
@@ -1,11 +0,0 @@
-($1 == "Seed:") { l = 0; n++; t=0 }
-{ if (l >= 1 && l <= 4) {
- t += $4
- if (l == 4) avg = t/4.0
- }
- if (l == 5) {
- printf("xaxis max %d hash_label at %d : %s\n", n+1, n, $0 )
- printf("newcurve marktype xbar marksize 1 cfill 1 1 0 pts %d %.2lf\n", n, avg);
- }
- l++
-}
diff --git a/junk-save.c b/junk-save.c
deleted file mode 100644
index 9b73025..0000000
--- a/junk-save.c
+++ /dev/null
@@ -1,658 +0,0 @@
-
-/*
- c = gf.multiply.w32(&gf, a, b);
- tested = 0;
-
-*/
- /* If this is not composite, then first test against the default: */
-
-/*
- if (h->mult_type != GF_MULT_COMPOSITE) {
- tested = 1;
- d = gf_def.multiply.w32(&gf_def, a, b);
-
- if (c != d) {
- printf("Error in single multiplication (all numbers in hex):\n\n");
- printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c);
- printf(" The default returned %x\n", d);
- exit(1);
- }
- }
-
-*/
- /* Now, we also need to double-check, in case the default is wanky, and when
- we're performing composite operations. Start with 0 and 1: */
-
-/*
- if (a == 0 || b == 0 || a == 1 || b == 1) {
- tested = 1;
- if (((a == 0 || b == 0) && c != 0) ||
- (a == 1 && c != b) ||
- (b == 1 && c != a)) {
- printf("Error in single multiplication (all numbers in hex):\n\n");
- printf(" gf.multiply.w32() of %x and %x returned %x, which is clearly wrong.\n", a, b, c);
- exit(1);
- }
-
-*/
- /* If division or inverses are defined, let's test all combinations to make sure
- that the operations are consistent with each other. */
-
-/*
- } else {
- if ((c & mask) != c) {
- printf("Error in single multiplication (all numbers in hex):\n\n");
- printf(" gf.multiply.w32() of %x and %x returned %x, which is too big.\n", a, b, c);
- exit(1);
- }
-
- }
- if (gf.inverse.w32 != NULL && (a != 0 || b != 0)) {
- tested = 1;
- if (a != 0) {
- ai = gf.inverse.w32(&gf, a);
-
- if (gf.multiply.w32(&gf, c, ai) != b) {
- printf("Error in single multiplication (all numbers in hex):\n\n");
- printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c);
- printf(" The inverse of %x is %x, and gf_multiply.w32() of %x and %x equals %x\n",
- a, ai, c, ai, gf.multiply.w32(&gf, c, ai));
- exit(1);
- }
- }
- if (b != 0) {
- bi = gf.inverse.w32(&gf, b);
- if (gf.multiply.w32(&gf, c, bi) != a) {
- printf("Error in single multiplication (all numbers in hex):\n\n");
- printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c);
- printf(" The inverse of %x is %x, and gf_multiply.w32() of %x and %x equals %x\n",
- b, bi, c, bi, gf.multiply.w32(&gf, c, bi));
- exit(1);
- }
- }
- }
- if (gf.divide.w32 != NULL && (a != 0 || b != 0)) {
- tested = 1;
-
- if (a != 0) {
- ai = gf.divide.w32(&gf, c, a);
-
- if (ai != b) {
- printf("Error in single multiplication/division (all numbers in hex):\n\n");
- printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c);
- printf(" gf.divide.w32() of %x and %x returned %x\n", c, a, ai);
- exit(1);
- }
- }
- if (b != 0) {
- bi = gf.divide.w32(&gf, c, b);
-
- if (bi != a) {
- printf("Error in single multiplication/division (all numbers in hex):\n\n");
- printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c);
- printf(" gf.divide.w32() of %x and %x returned %x\n", c, b, bi);
- exit(1);
- }
- }
- }
-
- if (!tested) problem("There is no way to test multiplication.\n");
- }
-*/
-
-/*
- if (region) {
-
- if (w == 4) {
- if (gf.multiply_region.w32 == NULL) {
- printf("No multiply_region.\n");
- } else {
- r8b = (uint8_t *) malloc(REGION_SIZE);
- r8c = (uint8_t *) malloc(REGION_SIZE);
- r8d = (uint8_t *) malloc(REGION_SIZE);
- fill_random_region(r8b, REGION_SIZE);
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src != dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (a = 0; a < 16; a++) {
- fill_random_region(r8c, REGION_SIZE);
- memcpy(r8d, r8c, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint8_t);
- gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8c+sindex), a, size, xor);
- for (i = sindex; i < eindex; i++) {
- b = (r8b[i] >> 4);
- c = (r8c[i] >> 4);
- d = (r8d[i] >> 4);
- if (!xor && gf.multiply.w32(&gf, a, b) != c) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i));
- printf(" %d * %d = %d, but should equal %d\n", a, b, c, gf.multiply.w32(&gf, a, b) );
- printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i));
- printf(" %d %d %d %d\n", a, b, c, d);
- printf(" %d %d %d %d\n", a, r8b[i], r8c[i], r8d[i]);
- problem("Failed buffer-constant, xor=1");
- }
- b = (r8b[i] & 0xf);
- c = (r8c[i] & 0xf);
- d = (r8d[i] & 0xf);
- if (!xor && gf.multiply.w32(&gf, a, b) != c) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i));
- printf(" %d * %d = %d, but should equal %d\n", a, b, c, gf.multiply.w32(&gf, a, b) );
- printf("i=%d. 0x%x 0x%x 0x%x 0x%x\n", i, a, r8b[i], r8c[i], r8d[i]);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i));
- printf(" (%d * %d ^ %d) should equal %d - equals %d\n",
- a, b, d, (gf.multiply.w32(&gf, a, b) ^ d), c);
- printf(" %d %d %d %d\n", a, r8b[i], r8c[i], r8d[i]);
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src == dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (a = 0; a < 16; a++) {
- fill_random_region(r8b, REGION_SIZE);
- memcpy(r8d, r8b, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint8_t);
- gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8b+sindex), a, size, xor);
- for (i = sindex; i < eindex; i++) {
- b = (r8b[i] >> 4);
- d = (r8d[i] >> 4);
- if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0");
- if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) {
- printf("i=%d. %d %d %d\n", i, a, b, d);
- printf("i=%d. %d %d %d\n", i, a, r8b[i], r8d[i]);
- problem("Failed buffer-constant, xor=1");
- }
- b = (r8b[i] & 0xf);
- d = (r8d[i] & 0xf);
- if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0");
- if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) {
- printf("%d %d %d\n", a, b, d);
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- free(r8b);
- free(r8c);
- free(r8d);
- }
- } else if (w == 8) {
- if (gf.multiply_region.w32 == NULL) {
- printf("No multiply_region.\n");
- } else {
- r8b = (uint8_t *) malloc(REGION_SIZE);
- r8c = (uint8_t *) malloc(REGION_SIZE);
- r8d = (uint8_t *) malloc(REGION_SIZE);
- fill_random_region(r8b, REGION_SIZE);
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src != dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (a = 0; a < 256; a++) {
- fill_random_region(r8c, REGION_SIZE);
- memcpy(r8d, r8c, REGION_SIZE);
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- sindex = 0;
- eindex = REGION_SIZE;
- } else {
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1);
- }
- size = (eindex-sindex)*sizeof(uint8_t);
- gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8c+sindex), a, size, xor);
- for (i = sindex; i < eindex; i++) {
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- b = get_alt_map_2w8(i, (uint8_t*)r8b, REGION_SIZE / 2);
- c = get_alt_map_2w8(i, (uint8_t*)r8c, REGION_SIZE / 2);
- d = get_alt_map_2w8(i, (uint8_t*)r8d, REGION_SIZE / 2);
- } else {
- b = r8b[i];
- c = r8c[i];
- d = r8d[i];
- }
- if (!xor && gf.multiply.w32(&gf, a, b) != c) {
- printf("i=%d. %d %d %d %d\n", i, a, b, c, d);
- printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]);
- printf("%llx. Sindex: %d\n", r8b+i, sindex);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) {
- printf("i=%d. %d %d %d %d\n", i, a, b, c, d);
- printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]);
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- for (xor = 0; xor < 2; xor++) {
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- continue;
- }
- if (verbose) {
- printf("Testing buffer-constant, src == dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (a = 0; a < 256; a++) {
- fill_random_region(r8b, REGION_SIZE);
- memcpy(r8d, r8b, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint8_t);
- gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8b+sindex), a, size, xor);
- for (i = sindex; i < eindex; i++) {
- b = r8b[i];
- d = r8d[i];
- if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0");
- if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) {
- printf("i=%d. %d %d %d\n", i, a, b, d);
- printf("i=%d. %d %d %d\n", i, a, r8b[i], r8d[i]);
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- free(r8b);
- free(r8c);
- free(r8d);
- }
- } else if (w == 16) {
- if (gf.multiply_region.w32 == NULL) {
- printf("No multiply_region.\n");
- } else {
- r16b = (uint16_t *) malloc(REGION_SIZE);
- r16c = (uint16_t *) malloc(REGION_SIZE);
- r16d = (uint16_t *) malloc(REGION_SIZE);
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src != dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- fill_random_region(r16b, REGION_SIZE);
- a = MOA_Random_W(w, 0);
- fill_random_region(r16c, REGION_SIZE);
- memcpy(r16d, r16c, REGION_SIZE);
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- sindex = 0;
- eindex = REGION_SIZE / sizeof(uint16_t);
- } else {
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint16_t)-MOA_Random_W(3, 1);
- }
- size = (eindex-sindex)*sizeof(uint16_t);
- gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16c+sindex), a, size, xor);
- ai = gf.inverse.w32(&gf, a);
- if (!xor) {
- gf.multiply_region.w32(&gf, (void *) (r16c+sindex), (void *) (r16d+sindex), ai, size, xor);
- } else {
- gf.multiply_region.w32(&gf, (void *) (r16c+sindex), (void *) (r16d+sindex), 1, size, xor);
- gf.multiply_region.w32(&gf, (void *) (r16d+sindex), (void *) (r16b+sindex), ai, size, xor);
- }
-
- for (i = sindex; i < eindex; i++) {
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- b = get_alt_map_2w16(i, (uint8_t*)r16b, size / 2);
- c = get_alt_map_2w16(i, (uint8_t*)r16c, size / 2);
- d = get_alt_map_2w16(i, (uint8_t*)r16d, size / 2);
- } else {
- b = r16b[i];
- c = r16c[i];
- d = r16d[i];
- }
- if (!xor && d != b) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i));
- printf("We have %d * %d = %d, and %d * %d = %d.\n", a, b, c, c, ai, d);
- printf("%d is the inverse of %d\n", ai, a);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && b != 0) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i));
- printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n");
- printf(" b should equal 0, but it doesn't. Probe into it.\n");
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- for (xor = 0; xor < 2; xor++) {
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- continue;
- }
- if (verbose) {
- printf("Testing buffer-constant, src == dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- a = MOA_Random_W(w, 0);
- fill_random_region(r16b, REGION_SIZE);
- memcpy(r16d, r16b, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint16_t)-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint16_t);
- gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), a, size, xor);
- ai = gf.inverse.w32(&gf, a);
- if (!xor) {
- gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), ai, size, xor);
- } else {
- gf.multiply_region.w32(&gf, (void *) (r16d+sindex), (void *) (r16b+sindex), 1, size, xor);
- gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), ai, size, 0);
- }
-
- for (i = sindex; i < eindex; i++) {
- b = r16b[i];
- c = r16c[i];
- d = r16d[i];
- if (!xor && (d != b)) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i));
- printf("We did d=b; b = ba; b = b(a^-1).\n");
- printf("So, b should equal d, but it doesn't. Look into it.\n");
- printf("b = %d. d = %d. a = %d\n", b, d, a);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && d != b) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i));
- printf("We did d=b; b = b + ba; b += d; b = b(a^-1);\n");
- printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n");
- printf("So, b should equal d, but it doesn't. Look into it.\n");
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- free(r16b);
- free(r16c);
- free(r16d);
- }
- } else if (w == 32) {
- if (gf.multiply_region.w32 == NULL) {
- printf("No multiply_region.\n");
- } else {
- r32b = (uint32_t *) malloc(REGION_SIZE);
- r32c = (uint32_t *) malloc(REGION_SIZE);
- r32d = (uint32_t *) malloc(REGION_SIZE);
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src != dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- a = MOA_Random_32();
- fill_random_region(r32b, REGION_SIZE);
- fill_random_region(r32c, REGION_SIZE);
- memcpy(r32d, r32c, REGION_SIZE);
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- sindex = 0;
- eindex = REGION_SIZE / sizeof(uint32_t);
- } else {
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint32_t)-MOA_Random_W(3, 1);
- }
- size = (eindex-sindex)*sizeof(uint32_t);
- gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32c+sindex), a, size, xor);
- ai = gf.inverse.w32(&gf, a);
- if (!xor) {
- gf.multiply_region.w32(&gf, (void *) (r32c+sindex), (void *) (r32d+sindex), ai, size, xor);
- } else {
- gf.multiply_region.w32(&gf, (void *) (r32c+sindex), (void *) (r32d+sindex), 1, size, xor);
- gf.multiply_region.w32(&gf, (void *) (r32d+sindex), (void *) (r32b+sindex), ai, size, xor);
- }
- for (i = sindex; i < eindex; i++) {
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- b = get_alt_map_2w32(i, (uint8_t*)r32b, size / 2);
- c = get_alt_map_2w32(i, (uint8_t*)r32c, size / 2);
- d = get_alt_map_2w32(i, (uint8_t*)r32d, size / 2);
- i++;
- } else {
- b = r32b[i];
- c = r32c[i];
- d = r32d[i];
- }
- if (!xor && d != b) {
- printf("i=%d. Addresses: b: 0x%lx\n", i, (unsigned long) (r32b+i));
- printf("We have %d * %d = %d, and %d * %d = %d.\n", a, b, c, c, ai, d);
- printf("%d is the inverse of %d\n", ai, a);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && b != 0) {
- printf("i=%d. Addresses: b: 0x%lx c: 0x%lx d: 0x%lx\n", i,
- (unsigned long) (r32b+i), (unsigned long) (r32c+i), (unsigned long) (r32d+i));
- printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n");
- printf(" b should equal 0, but it doesn't. Probe into it.\n");
- printf("a: %8x b: %8x c: %8x, d: %8x\n", a, b, c, d);
- problem("Failed buffer-constant, xor=1");
- }
-
- }
- }
- }
- for (xor = 0; xor < 2; xor++) {
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- continue;
- }
- if (verbose) {
- printf("Testing buffer-constant, src == dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- a = MOA_Random_32();
- fill_random_region(r32b, REGION_SIZE);
- memcpy(r32d, r32b, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint32_t)-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint32_t);
- gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), a, size, xor);
- ai = gf.inverse.w32(&gf, a);
- if (!xor) {
- gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), ai, size, xor);
- } else {
- gf.multiply_region.w32(&gf, (void *) (r32d+sindex), (void *) (r32b+sindex), 1, size, xor);
- gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), ai, size, 0);
- }
-
- for (i = sindex; i < eindex; i++) {
- b = r32b[i];
- c = r32c[i];
- d = r32d[i];
- if (!xor && (d != b)) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r32b+i));
- printf("We did d=b; b = ba; b = b(a^-1).\n");
- printf("So, b should equal d, but it doesn't. Look into it.\n");
- printf("b = %d. d = %d. a = %d\n", b, d, a);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && d != b) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r32b+i));
- printf("We did d=b; b = b + ba; b += d; b = b(a^-1);\n");
- printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n");
- printf("So, b should equal d, but it doesn't. Look into it.\n");
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- free(r32b);
- free(r32c);
- free(r32d);
- }
- } else if (w == 64) {
- if (gf.multiply_region.w64 == NULL) {
- printf("No multiply_region.\n");
- } else {
- r64b = (uint64_t *) malloc(REGION_SIZE);
- r64c = (uint64_t *) malloc(REGION_SIZE);
- r64d = (uint64_t *) malloc(REGION_SIZE);
- fill_random_region(r64b, REGION_SIZE);
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src != dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- a64 = MOA_Random_64();
- fill_random_region(r64c, REGION_SIZE);
- memcpy(r64d, r64c, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint64_t)-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint64_t);
- gf.multiply_region.w64(&gf, (void *) (r64b+sindex), (void *) (r64c+sindex), a64, size, xor);
- for (i = sindex; i < eindex; i++) {
- b64 = r64b[i];
- c64 = r64c[i];
- d64 = r64d[i];
- if (!xor && gf.multiply.w64(&gf, a64, b64) != c64) {
- printf("i=%d. 0x%llx 0x%llx 0x%llx should be 0x%llx\n", i, a64, b64, c64,
- gf.multiply.w64(&gf, a64, b64));
- printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64c[i]);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && (gf.multiply.w64(&gf, a64, b64) ^ d64) != c64) {
- printf("i=%d. 0x%llx 0x%llx 0x%llx 0x%llx\n", i, a64, b64, c64, d64);
- printf("i=%d. 0x%llx 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64c[i], r64d[i]);
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src == dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- a64 = MOA_Random_64();
- fill_random_region(r64b, REGION_SIZE);
- memcpy(r64d, r64b, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint64_t)-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint64_t);
- gf.multiply_region.w64(&gf, (void *) (r64b+sindex), (void *) (r64b+sindex), a64, size, xor);
- for (i = sindex; i < eindex; i++) {
- b64 = r64b[i];
- d64 = r64d[i];
- if (!xor && gf.multiply.w64(&gf, a64, d64) != b64) problem("Failed buffer-constant, xor=0");
- if (xor && (gf.multiply.w64(&gf, a64, d64) ^ d64) != b64) {
- printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, b64, d64);
- printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64d[i]);
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- free(r64b);
- free(r64c);
- free(r64d);
- }
- } else if (w == 128) {
- if (gf.multiply_region.w128 == NULL) {
- printf("No multiply_region.\n");
- } else {
- r128b = (uint64_t *) malloc(REGION_SIZE);
- r128c = (uint64_t *) malloc(REGION_SIZE);
- r128d = (uint64_t *) malloc(REGION_SIZE);
- fill_random_region(r128b, REGION_SIZE);
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src != dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- MOA_Random_128(a128);
- fill_random_region(r128c, REGION_SIZE);
- memcpy(r128d, r128c, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/(2*sizeof(uint64_t))-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint64_t)*2;
- gf.multiply_region.w128(&gf, (void *) (r128b+sindex*2), (void *) (r128c+sindex*2), a128, size, xor);
- for (i = sindex; i < eindex; i++) {
- b128[0] = r128b[2*i];
- b128[1] = r128b[2*i+1];
- c128[0] = r128c[2*i];
- c128[1] = r128c[2*i+1];
- d128[0] = r128d[2*i];
- d128[1] = r128d[2*i+1];
- gf.multiply.w128(&gf, a128, b128, e128);
- if (xor) {
- e128[0] ^= d128[0];
- e128[1] ^= d128[1];
- }
- if (!xor && !GF_W128_EQUAL(c128, e128)) {
- printf("i=%d. 0x%llx%llx 0x%llx%llx 0x%llx%llx should be 0x%llx%llx\n",
- i, a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], e128[0], e128[1]);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && !GF_W128_EQUAL(e128, c128)) {
- printf("i=%d. 0x%llx%llx 0x%llx%llx 0x%llx%llx 0x%llx%llx\n", i,
- a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], d128[0], d128[1]);
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src == dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- MOA_Random_128(a128);
- fill_random_region(r128b, REGION_SIZE);
- memcpy(r128d, r128b, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- sindex = 0;
- eindex = REGION_SIZE/(2*sizeof(uint64_t))-MOA_Random_W(3, 1);
- eindex = REGION_SIZE/(2*sizeof(uint64_t));
- size = (eindex-sindex)*sizeof(uint64_t)*2;
- gf.multiply_region.w128(&gf, (void *) (r128b+sindex), (void *) (r128b+sindex), a128, size, xor);
- for (i = sindex; i < eindex; i++) {
- b128[0] = r128b[2*i];
- b128[1] = r128b[2*i + 1];
- d128[0] = r128d[2*i];
- d128[1] = r128d[2*i + 1];
- gf.multiply.w128(&gf, a128, d128, e128);
- if (xor) {
- e128[0] ^= d128[0];
- e128[1] ^= d128[1];
- }
- if (!xor && !GF_W128_EQUAL(b128, e128)) problem("Failed buffer-constant, xor=0");
- if (xor && !GF_W128_EQUAL(b128, e128)) {
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- free(r128b);
- free(r128c);
- free(r128d);
- }
- }
- }
- exit(0);
- */
-}
diff --git a/junk-w16-backup.c b/junk-w16-backup.c
deleted file mode 100644
index ad0788a..0000000
--- a/junk-w16-backup.c
+++ /dev/null
@@ -1,1585 +0,0 @@
-/*
- * gf_w16.c
- *
- * Routines for 16-bit Galois fields
- */
-
-#include "gf_int.h"
-#include <stdio.h>
-#include <stdlib.h>
-
-#define GF_FIELD_WIDTH (16)
-#define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH)
-#define GF_MULT_GROUP_SIZE GF_FIELD_SIZE-1
-
-#define GF_BASE_FIELD_WIDTH (8)
-#define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH)
-#define GF_S_GF_8_2 (63)
-
-struct gf_logtable_data {
- int log_tbl[GF_FIELD_SIZE];
- gf_val_16_t antilog_tbl[GF_FIELD_SIZE * 2];
- gf_val_16_t inv_tbl[GF_FIELD_SIZE];
-};
-
-struct gf_zero_logtable_data {
- int log_tbl[GF_FIELD_SIZE];
- gf_val_16_t _antilog_tbl[GF_FIELD_SIZE * 4];
- gf_val_16_t *antilog_tbl;
- gf_val_16_t inv_tbl[GF_FIELD_SIZE];
-};
-
-struct gf_lazytable_data {
- int log_tbl[GF_FIELD_SIZE];
- gf_val_16_t antilog_tbl[GF_FIELD_SIZE * 2];
- gf_val_16_t inv_tbl[GF_FIELD_SIZE];
- gf_val_16_t lazytable[GF_FIELD_SIZE];
-};
-
-struct gf_w8_logtable_data {
- gf_val_8_t log_tbl[GF_BASE_FIELD_SIZE];
- gf_val_8_t antilog_tbl[GF_BASE_FIELD_SIZE * 2];
- gf_val_8_t *antilog_tbl_div;
-};
-
-struct gf_w8_single_table_data {
- gf_val_8_t mult[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE];
- gf_val_8_t div[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE];
-};
-
-struct gf_w8_double_table_data {
- gf_val_8_t div[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE];
- gf_val_8_t mult[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE*GF_BASE_FIELD_SIZE];
-};
-
-
-#define MM_PRINT(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 2) printf(" %02x %02x", blah[15-ii], blah[14-ii]); printf("\n"); }
-
-static
-inline
-gf_val_16_t gf_w16_inverse_from_divide (gf_t *gf, gf_val_16_t a)
-{
- return gf->divide.w16(gf, 1, a);
-}
-
-static
-inline
-gf_val_16_t gf_w16_divide_from_inverse (gf_t *gf, gf_val_16_t a, gf_val_16_t b)
-{
- b = gf->inverse.w16(gf, b);
- return gf->multiply.w16(gf, a, b);
-}
-
-static
-void
-gf_w16_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor)
-{
- int i;
- gf_val_16_t *s16;
- gf_val_16_t *d16;
-
- s16 = (gf_val_16_t *) src;
- d16 = (gf_val_16_t *) dest;
-
- if (xor) {
- for (i = 0; i < bytes/2; i++) {
- d16[i] ^= gf->multiply.w16(gf, val, s16[i]);
- }
- } else {
- for (i = 0; i < bytes/2; i++) {
- d16[i] = gf->multiply.w16(gf, val, s16[i]);
- }
- }
-}
-
-static
-inline
-gf_val_16_t gf_w16_euclid (gf_t *gf, gf_val_16_t b)
-{
- gf_val_32_t e_i, e_im1, e_ip1;
- gf_val_32_t d_i, d_im1, d_ip1;
- gf_val_16_t y_i, y_im1, y_ip1;
- gf_val_16_t c_i;
-
- if (b == 0) return -1;
- e_im1 = ((gf_internal_t *) (gf->scratch))->prim_poly;
- e_i = b;
- d_im1 = 16;
- for (d_i = d_im1; ((1 << d_i) & e_i) == 0; d_i--) ;
- y_i = 1;
- y_im1 = 0;
-
- while (e_i != 1) {
-
- e_ip1 = e_im1;
- d_ip1 = d_im1;
- c_i = 0;
-
- while (d_ip1 >= d_i) {
- c_i ^= (1 << (d_ip1 - d_i));
- e_ip1 ^= (e_i << (d_ip1 - d_i));
- while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--;
- }
-
- y_ip1 = y_im1 ^ gf->multiply.w16(gf, c_i, y_i);
- y_im1 = y_i;
- y_i = y_ip1;
-
- e_im1 = e_i;
- d_im1 = d_i;
- e_i = e_ip1;
- d_i = d_ip1;
- }
-
- return y_i;
-}
-
-static
-inline
-gf_val_16_t gf_w16_matrix (gf_t *gf, gf_val_16_t b)
-{
- return gf_bitmatrix_inverse(b, 16, ((gf_internal_t *) (gf->scratch))->prim_poly);
-}
-
-/* JSP: GF_MULT_SHIFT: The world's dumbest multiplication algorithm. I only
- include it for completeness. It does have the feature that it requires no
- extra memory.
-*/
-
-static
-inline
-gf_val_16_t
-gf_w16_shift_multiply (gf_t *gf, gf_val_16_t a16, gf_val_16_t b16)
-{
- uint32_t product, i, pp, a, b;
- gf_internal_t *h;
-
- a = a16;
- b = b16;
- h = (gf_internal_t *) gf->scratch;
- pp = h->prim_poly;
-
- product = 0;
-
- for (i = 0; i < GF_FIELD_WIDTH; i++) {
- if (a & (1 << i)) product ^= (b << i);
- }
- for (i = (GF_FIELD_WIDTH*2-1); i >= GF_FIELD_WIDTH; i--) {
- if (product & (1 << i)) product ^= (pp << (i-GF_FIELD_WIDTH));
- }
- return product;
-}
-
-static
-int gf_w16_shift_init(gf_t *gf)
-{
- gf->multiply.w16 = gf_w16_shift_multiply;
- gf->inverse.w16 = gf_w16_euclid;
- gf->multiply_region.w16 = gf_w16_multiply_region_from_single;
- return 1;
-}
-
-/* KMG: GF_MULT_LOGTABLE: */
-
-static
-void
-gf_w16_log_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor)
-{
- unsigned long uls, uld;
- int i;
- uint16_t lv, b, c;
- uint16_t *s16, *d16;
- int num_syms = bytes >> 1;
- int sym_divisible = bytes % 2;
-
- struct gf_logtable_data *ltd;
-
- uls = (unsigned long) src;
- uld = (unsigned long) dest;
- if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w16_buf_const_log", 2);
- if (sym_divisible) {
- gf_alignment_error("gf_w16_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2);
- }
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private;
- s16 = (uint16_t *) src;
- d16 = (uint16_t *) dest;
-
- lv = ltd->log_tbl[val];
-
- if (xor) {
- for (i = 0; i < num_syms; i++) {
- d16[i] ^= (s16[i] == 0 ? 0 : ltd->antilog_tbl[lv + ltd->log_tbl[s16[i]]]);
- }
- } else {
- for (i = 0; i < num_syms; i++) {
- d16[i] = (s16[i] == 0 ? 0 : ltd->antilog_tbl[lv + ltd->log_tbl[s16[i]]]);
- }
- }
-}
-
-static
-inline
-gf_val_16_t
-gf_w16_log_multiply(gf_t *gf, gf_val_16_t a, gf_val_16_t b)
-{
- struct gf_logtable_data *ltd;
-
- ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private;
- return (a == 0 || b == 0) ? 0 : ltd->antilog_tbl[ltd->log_tbl[a] + ltd->log_tbl[b]];
-}
-
-static
-inline
-gf_val_16_t
-gf_w16_log_divide(gf_t *gf, gf_val_16_t a, gf_val_16_t b)
-{
- int log_sum = 0;
- struct gf_logtable_data *ltd;
-
- if (a == 0 || b == 0) return 0;
- ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private;
-
- log_sum = ltd->log_tbl[a] - ltd->log_tbl[b] + (GF_MULT_GROUP_SIZE);
- return (ltd->antilog_tbl[log_sum]);
-}
-
-static
-gf_val_16_t
-gf_w16_log_inverse(gf_t *gf, gf_val_16_t a)
-{
- struct gf_logtable_data *ltd;
-
- ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private;
- return (ltd->inv_tbl[a]);
-}
-
-static
-int gf_w16_log_init(gf_t *gf)
-{
- gf_internal_t *h;
- struct gf_logtable_data *ltd;
- int i, b;
-
- h = (gf_internal_t *) gf->scratch;
- ltd = h->private;
-
- ltd->log_tbl[0] = 0;
-
- b = 1;
- for (i = 0; i < GF_MULT_GROUP_SIZE; i++) {
- ltd->log_tbl[b] = (gf_val_16_t)i;
- ltd->antilog_tbl[i] = (gf_val_16_t)b;
- ltd->antilog_tbl[i+GF_MULT_GROUP_SIZE] = (gf_val_16_t)b;
- b <<= 1;
- if (b & GF_FIELD_SIZE) {
- b = b ^ h->prim_poly;
- }
- }
- ltd->inv_tbl[0] = 0; /* Not really, but we need to fill it with something */
- ltd->inv_tbl[1] = 1;
- for (i = 2; i < GF_FIELD_SIZE; i++) {
- ltd->inv_tbl[i] = ltd->antilog_tbl[GF_MULT_GROUP_SIZE-ltd->log_tbl[i]];
- }
-
- gf->inverse.w16 = gf_w16_log_inverse;
- gf->divide.w16 = gf_w16_log_divide;
- gf->multiply.w16 = gf_w16_log_multiply;
- gf->multiply_region.w16 = gf_w16_log_multiply_region;
-
- return 1;
-}
-
-/* JSP: GF_MULT_SPLIT_TABLE: Using 8 multiplication tables to leverage SSE instructions.
-*/
-
-static
-void
-gf_w16_split_4_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor)
-{
- uint64_t i, j, a, c, prod;
- uint16_t *s16, *d16, *top;
- gf_internal_t *h;
- uint16_t table[4][16];
-
- h = (gf_internal_t *) gf->scratch;
-
- for (j = 0; j < 16; j++) {
- for (i = 0; i < 4; i++) {
- c = (j << (i*4));
- table[i][j] = gf_w16_log_multiply(gf, c, val);
- }
- }
-
- s16 = (uint16_t *) src;
- d16 = (uint16_t *) dest;
- top = (uint16_t *) (dest+bytes);
-
- while (d16 < top) {
- a = *s16;
- prod = (xor) ? *d16 : 0;
- for (i = 0; i < 4; i++) {
- prod ^= table[i][a&0xf];
- a >>= 4;
- }
- *d16 = prod;
- s16++;
- d16++;
- }
-}
-
-static
-void
-gf_w16_split_8_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor)
-{
- uint64_t j, a, c, prod, *s64, *d64, *top64;
- uint16_t *s16, *d16, *top;
- gf_internal_t *h;
- uint64_t htable[256], ltable[256];
- unsigned long uls, uld;
-
- h = (gf_internal_t *) gf->scratch;
-
- uls = ((unsigned long) src) & 0xf;
- uld = ((unsigned long) dest) & 0xf;
- if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_split_8_16_lazy_multiply_region", 2);
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- for (j = 0; j < 256; j++) {
- ltable[j] = gf_w16_log_multiply(gf, j, val);
- htable[j] = gf_w16_log_multiply(gf, (j<<8), val);
- }
-
- s16 = (uint16_t *) src;
- d16 = (uint16_t *) dest;
- top = (uint16_t *) (dest+bytes);
-
- if (uls != 0) {
- while (uls != 16 && d16 < top) {
- a = *s16;
- prod = (xor) ? *d16 : 0;
- prod ^= ltable[a&0xff];
- a >>= 8;
- prod ^= htable[a];
- *d16 = prod;
- s16++;
- d16++;
- uls += 2;
- }
- if (d16 == top) return;
- }
-
- uls = ((unsigned long) top) & 0xf;
- uld = ((unsigned long) top) ^ uls;
- top64 = (uint64_t *) uld;
- s64 = (uint64_t *) s16;
- d64 = (uint64_t *) d16;
-
-/* Does Unrolling Matter? -- Doesn't seem to.
- while (d64 != top64) {
- a = *s64;
-
- prod = htable[a >> 56];
- a <<= 8;
- prod ^= ltable[a >> 56];
- a <<= 8;
- prod <<= 16;
-
- prod ^= htable[a >> 56];
- a <<= 8;
- prod ^= ltable[a >> 56];
- a <<= 8;
- prod <<= 16;
-
- prod ^= htable[a >> 56];
- a <<= 8;
- prod ^= ltable[a >> 56];
- a <<= 8;
- prod <<= 16;
-
- prod ^= htable[a >> 56];
- a <<= 8;
- prod ^= ltable[a >> 56];
- prod ^= ((xor) ? *d64 : 0);
- *d64 = prod;
- *s64++;
- *d64++;
- }
-*/
-
- while (d64 != top64) {
- a = *s64;
-
- prod = 0;
- for (j = 0; j < 4; j++) {
- prod <<= 16;
- prod ^= htable[a >> 56];
- a <<= 8;
- prod ^= ltable[a >> 56];
- a <<= 8;
- }
-
- prod ^= ((xor) ? *d64 : 0);
- *d64 = prod;
- *s64++;
- *d64++;
- }
-
-
- if (uls != 0) {
- d16 = (uint16_t *) d64;
- s16 = (uint16_t *) s64;
- while (d16 < top) {
- a = *s16;
- prod = (xor) ? *d16 : 0;
- prod ^= ltable[a&0xff];
- a >>= 8;
- prod ^= htable[a];
- *d16 = prod;
- s16++;
- d16++;
- }
- }
- return;
-}
-
-static
-void
-gf_w16_table_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor)
-{
- uint64_t j, a, c, prod, *s64, *d64, *top64, pp;
- uint16_t *s16, *d16, *top;
- gf_internal_t *h;
- struct gf_lazytable_data *ltd;
- unsigned long uls, uld;
-
- h = (gf_internal_t *) gf->scratch;
-
- uls = ((unsigned long) src) & 0xf;
- uld = ((unsigned long) dest) & 0xf;
- if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_table_lazy_multiply_region", 2);
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- ltd = (struct gf_lazytable_data *) h->private;
-
- ltd->lazytable[0] = 0;
- a = val;
- c = 1;
- pp = h->prim_poly;
-
- do {
- ltd->lazytable[c] = a;
- c <<= 1;
- if (c & (1 << GF_FIELD_WIDTH)) c ^= pp;
- a <<= 1;
- if (a & (1 << GF_FIELD_WIDTH)) a ^= pp;
- } while (c != 1);
-
- s16 = (uint16_t *) src;
- d16 = (uint16_t *) dest;
- top = (uint16_t *) (dest+bytes);
-
- if (uls != 0) {
- while (uls != 16 && d16 < top) {
- prod = (xor) ? *d16 : 0;
- prod ^= ltd->lazytable[*s16];
- *d16 = prod;
- s16++;
- d16++;
- uls += 2;
- }
- if (d16 == top) return;
- }
-
- uls = ((unsigned long) top) & 0xf;
- uld = ((unsigned long) top) ^ uls;
- top64 = (uint64_t *) uld;
- s64 = (uint64_t *) s16;
- d64 = (uint64_t *) d16;
-
- /* Unrolling doesn't seem to matter
- while (d64 != top64) {
- a = *s64;
-
- prod = ltd->lazytable[a >> 48];
- a <<= 16;
- prod <<= 16;
-
- prod ^= ltd->lazytable[a >> 48];
- a <<= 16;
- prod <<= 16;
-
- prod ^= ltd->lazytable[a >> 48];
- a <<= 16;
- prod <<= 16;
-
- prod ^= ltd->lazytable[a >> 48];
-
- prod ^= ((xor) ? *d64 : 0);
- *d64 = prod;
- *s64++;
- *d64++;
- }
- */
-
- while (d64 != top64) {
- a = *s64;
-
- prod = 0;
- for (j = 0; j < 4; j++) {
- prod <<= 16;
- prod ^= ltd->lazytable[a >> 48];
- a <<= 16;
- }
- prod ^= ((xor) ? *d64 : 0);
- *d64 = prod;
- *s64++;
- *d64++;
- }
-
- if (uls != 0) {
- d16 = (uint16_t *) d64;
- s16 = (uint16_t *) s64;
- while (d16 < top) {
- prod = (xor) ? *d16 : 0;
- prod ^= ltd->lazytable[*s16];
- *d16 = prod;
- s16++;
- d16++;
- }
- }
- return;
-}
-
-static
-void
-gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor)
-{
-#ifdef INTEL_SSE4
- uint64_t i, j, *s64, *d64, *top64;;
- uint64_t a, c, prod;
- uint16_t *s16, *d16, *top;
- uint8_t low[4][16];
- uint8_t high[4][16];
- unsigned long uls, uld;
-
- __m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4], shuffler, unshuffler, tta, ttb;
-
- struct gf_single_table_data *std;
-
- uls = ((unsigned long) src) & 0xf;
- uld = ((unsigned long) dest) & 0xf;
- if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_split_4_16_lazy_sse_altmap_multiply_region", 2);
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- for (j = 0; j < 16; j++) {
- for (i = 0; i < 4; i++) {
- c = (j << (i*4));
- prod = gf_w16_log_multiply(gf, c, val);
- low[i][j] = (prod & 0xff);
- high[i][j] = (prod >> 8);
- }
- }
-
- s16 = (uint16_t *) src;
- d16 = (uint16_t *) dest;
- top = (uint16_t *) (dest+bytes);
-
- if (uls != 0) {
- while (uls != 16 && d16 < top) {
- a = *s16;
- prod = (xor) ? *d16 : 0;
- for (i = 0; i < 4; i++) {
- c = a & 0xf;
- prod ^= low[i][c];
- prod ^= (high[i][c] << 8);
- a >>= 4;
- }
- *d16 = prod;
- s16++;
- d16++;
- uls += 2;
- }
- if (d16 == top) return;
- }
-
- for (i = 0; i < 4; i++) {
- tlow[i] = _mm_loadu_si128((__m128i *)low[i]);
- thigh[i] = _mm_loadu_si128((__m128i *)high[i]);
- }
-
- uls = ((unsigned long) top);
- uld = ((unsigned long) d16);
- bytes = (uls - uld);
- if ((bytes & 0x1f) != 0) bytes -= (bytes & 0x1f);
-
- top64 = (uint64_t *) (uld + bytes);
- s64 = (uint64_t *) s16;
- d64 = (uint64_t *) d16;
- mask = _mm_set1_epi8 (0x0f);
- shuffler = _mm_set_epi8(0xf, 0xd, 0xb, 0x9, 7, 5, 3, 1, 0xe, 0xc, 0xa, 8, 6, 4, 2, 0);
- unshuffler = _mm_set_epi8(0xf, 7, 0xe, 6, 0xd, 5, 0xc, 4, 0xb, 3, 0xa, 2, 9, 1, 8, 0);
-
- if (xor) {
- while (d64 != top64) {
-
- ta = _mm_load_si128((__m128i *) s64);
- MM_PRINT("Ta", ta);
- tb = _mm_load_si128((__m128i *) (s64+2));
- MM_PRINT("Tb", tb);
- tta = _mm_shuffle_epi8(ta, shuffler);
- ttb = _mm_shuffle_epi8(tb, shuffler);
- ta = _mm_unpackhi_epi64(ttb, tta);
- MM_PRINT("New ta", ta);
- tb = _mm_unpacklo_epi64(ttb, tta);
- MM_PRINT("New tb", tb);
- exit(0);
-
-
- ti = _mm_and_si128 (mask, tb);
- tph = _mm_shuffle_epi8 (thigh[0], ti);
- tpl = _mm_shuffle_epi8 (tlow[0], ti);
-
- tb = _mm_srli_epi16(tb, 4);
- ti = _mm_and_si128 (mask, tb);
- tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl);
- tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph);
-
- ti = _mm_and_si128 (mask, ta);
- tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl);
- tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph);
-
- ta = _mm_srli_epi16(ta, 4);
- ti = _mm_and_si128 (mask, ta);
- tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl);
- tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph);
-
- tta = _mm_unpackhi_epi64(tpl, tph);
- ttb = _mm_unpacklo_epi64(tpl, tph);
- ta = _mm_shuffle_epi8(tta, unshuffler);
- tb = _mm_shuffle_epi8(ttb, unshuffler);
- tta = _mm_load_si128((__m128i *) d64);
- ta = _mm_xor_si128(ta, tta);
- ttb = _mm_load_si128((__m128i *) (d64+2));
- tb = _mm_xor_si128(tb, ttb);
- _mm_store_si128 ((__m128i *)d64, ta);
- _mm_store_si128 ((__m128i *)(d64+2), tb);
-
- d64 += 4;
- s64 += 4;
-
- }
- } else {
- while (d64 != top64) {
-
- ta = _mm_load_si128((__m128i *) s64);
- tb = _mm_load_si128((__m128i *) (s64+2));
- tta = _mm_shuffle_epi8(ta, shuffler);
- ttb = _mm_shuffle_epi8(tb, shuffler);
- ta = _mm_unpackhi_epi64(ttb, tta);
- tb = _mm_unpacklo_epi64(ttb, tta);
-
- ti = _mm_and_si128 (mask, tb);
- tph = _mm_shuffle_epi8 (thigh[0], ti);
- tpl = _mm_shuffle_epi8 (tlow[0], ti);
-
- tb = _mm_srli_epi16(tb, 4);
- ti = _mm_and_si128 (mask, tb);
- tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl);
- tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph);
-
- ti = _mm_and_si128 (mask, ta);
- tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl);
- tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph);
-
- ta = _mm_srli_epi16(ta, 4);
- ti = _mm_and_si128 (mask, ta);
- tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl);
- tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph);
-
- tta = _mm_unpackhi_epi64(tpl, tph);
- ttb = _mm_unpacklo_epi64(tpl, tph);
- ta = _mm_shuffle_epi8(tta, unshuffler);
- tb = _mm_shuffle_epi8(ttb, unshuffler);
- _mm_store_si128 ((__m128i *)d64, ta);
- _mm_store_si128 ((__m128i *)(d64+2), tb);
-
- d64 += 4;
- s64 += 4;
- }
- }
-
- d16 = (uint16_t *) d64;
- s16 = (uint16_t *) s64;
-
- while (d16 != top) {
- a = *s16;
- prod = (xor) ? *d16 : 0;
- for (i = 0; i < 4; i++) {
- c = a & 0xf;
- prod ^= low[i][c];
- prod ^= (high[i][c] << 8);
- a >>= 4;
- }
- *d16 = prod;
- s16++;
- d16++;
- }
-#endif
-}
-
-/*
-static
-void
-gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor)
-{
-#ifdef INTEL_SSE4
- uint64_t i, j, *s64, *d64, *top64;;
- uint64_t a, c, prod;
- uint16_t *s16, *d16, *top;
- uint8_t low[4][16];
- uint8_t high[4][16];
- unsigned long uls, uld;
-
- __m128i mask, ta, ti, tp, tlow[4], thigh[4];
-
- struct gf_single_table_data *std;
-
- uls = ((unsigned long) src) & 0xf;
- uld = ((unsigned long) dest) & 0xf;
- if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_split_4_16_lazy_sse_multiply_region", 2);
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- for (j = 0; j < 16; j++) {
- for (i = 0; i < 4; i++) {
- c = (j << (i*4));
- prod = gf_w16_log_multiply(gf, c, val);
- low[i][j] = (prod & 0xff);
- high[i][j] = (prod >> 8);
- }
- }
-
- s16 = (uint16_t *) src;
- d16 = (uint16_t *) dest;
- top = (uint16_t *) (dest+bytes);
-
- if (uls != 0) {
- while (uls != 16 && d16 < top) {
- a = *s16;
- prod = (xor) ? *d16 : 0;
- for (i = 0; i < 4; i++) {
- c = a & 0xf;
- prod ^= low[i][c];
- prod ^= (high[i][c] << 8);
- a >>= 4;
- }
- *d16 = prod;
- s16++;
- d16++;
- uls += 2;
- }
- if (d16 == top) return;
- }
-
- for (i = 0; i < 4; i++) {
- tlow[i] = _mm_loadu_si128((__m128i *)low[i]);
- thigh[i] = _mm_loadu_si128((__m128i *)high[i]);
- }
-
- uls = ((unsigned long) top) & 0xf;
- uld = ((unsigned long) top) ^ uls;
- top64 = (uint64_t *) uld;
- s64 = (uint64_t *) s16;
- d64 = (uint64_t *) d16;
- mask = _mm_set1_epi16 (0x0f);
-
- if (xor) {
- while (d64 != top64) {
- ta = _mm_load_si128((__m128i *) s64);
- ti = _mm_and_si128 (mask, ta);
- tp = _mm_shuffle_epi8 (tlow[0], ti);
- ti = _mm_slli_epi16 (ti, 8);
- ti = _mm_shuffle_epi8 (thigh[0], ti);
- tp = _mm_xor_si128 (tp, ti);
-
- ta = _mm_srli_epi16(ta, 4);
- ti = _mm_and_si128 (mask, ta);
- tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tp);
- ti = _mm_slli_epi16 (ti, 8);
- ti = _mm_shuffle_epi8 (thigh[1], ti);
- tp = _mm_xor_si128 (tp, ti);
-
- ta = _mm_srli_epi16(ta, 4);
- ti = _mm_and_si128 (mask, ta);
- tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tp);
- ti = _mm_slli_epi16 (ti, 8);
- ti = _mm_shuffle_epi8 (thigh[2], ti);
- tp = _mm_xor_si128 (tp, ti);
-
- ti = _mm_srli_epi16(ta, 4);
- tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tp);
- ti = _mm_slli_epi16 (ti, 8);
- ti = _mm_shuffle_epi8 (thigh[3], ti);
- tp = _mm_xor_si128 (tp, ti);
- ti = _mm_load_si128((__m128i *)d64);
- tp = _mm_xor_si128 (tp, ti);
- _mm_store_si128 ((__m128i *)d64, tp);
- s64 += 2;
- d64 += 2;
- }
- } else {
- while (d64 != top64) {
- ta = _mm_load_si128((__m128i *) s64);
- ti = _mm_and_si128 (mask, ta);
- tp = _mm_shuffle_epi8 (tlow[0], ti);
- ti = _mm_slli_epi16 (ti, 8);
- ti = _mm_shuffle_epi8 (thigh[0], ti);
- tp = _mm_xor_si128 (tp, ti);
-
- ta = _mm_srli_epi16(ta, 4);
- ti = _mm_and_si128 (mask, ta);
- tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tp);
- ti = _mm_slli_epi16 (ti, 8);
- ti = _mm_shuffle_epi8 (thigh[1], ti);
- tp = _mm_xor_si128 (tp, ti);
-
- ta = _mm_srli_epi16(ta, 4);
- ti = _mm_and_si128 (mask, ta);
- tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tp);
- ti = _mm_slli_epi16 (ti, 8);
- ti = _mm_shuffle_epi8 (thigh[2], ti);
- tp = _mm_xor_si128 (tp, ti);
-
- ti = _mm_srli_epi16(ta, 4);
- tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tp);
- ti = _mm_slli_epi16 (ti, 8);
- ti = _mm_shuffle_epi8 (thigh[3], ti);
- tp = _mm_xor_si128 (tp, ti);
- _mm_store_si128 ((__m128i *)d64, tp);
- s64 += 2;
- d64 += 2;
- }
- }
-
- d16 = (uint16_t *) d64;
- s16 = (uint16_t *) s64;
-
- while (d16 != top) {
- a = *s16;
- prod = (xor) ? *d16 : 0;
- for (i = 0; i < 4; i++) {
- c = a & 0xf;
- prod ^= low[i][c];
- prod ^= (high[i][c] << 8);
- a >>= 4;
- }
- *d16 = prod;
- s16++;
- d16++;
- }
-#endif
-}
-*/
-
-
-static
-void
-gf_w16_split_4_16_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor)
-{
-#ifdef INTEL_SSE4
- uint64_t i, j, *s64, *d64, *top64;;
- uint64_t a, c, prod;
- uint16_t *s16, *d16, *top;
- uint8_t low[4][16];
- uint8_t high[4][16];
- unsigned long uls, uld;
-
- __m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4];
-
- struct gf_single_table_data *std;
-
- uls = ((unsigned long) src) & 0xf;
- uld = ((unsigned long) dest) & 0xf;
- if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_split_4_16_lazy_sse_altmap_multiply_region", 2);
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- for (j = 0; j < 16; j++) {
- for (i = 0; i < 4; i++) {
- c = (j << (i*4));
- prod = gf_w16_log_multiply(gf, c, val);
- low[i][j] = (prod & 0xff);
- high[i][j] = (prod >> 8);
- }
- }
-
- s16 = (uint16_t *) src;
- d16 = (uint16_t *) dest;
- top = (uint16_t *) (dest+bytes);
-
- if (uls != 0) {
- while (uls != 16 && d16 < top) {
- a = *s16;
- prod = (xor) ? *d16 : 0;
- for (i = 0; i < 4; i++) {
- c = a & 0xf;
- prod ^= low[i][c];
- prod ^= (high[i][c] << 8);
- a >>= 4;
- }
- *d16 = prod;
- s16++;
- d16++;
- uls += 2;
- }
- if (d16 == top) return;
- }
-
- for (i = 0; i < 4; i++) {
- tlow[i] = _mm_loadu_si128((__m128i *)low[i]);
- thigh[i] = _mm_loadu_si128((__m128i *)high[i]);
- }
-
- uls = ((unsigned long) top);
- uld = ((unsigned long) d16);
- bytes = (uls - uld);
- if ((bytes & 0x1f) != 0) bytes -= (bytes & 0x1f);
-
- top64 = (uint64_t *) (uld + bytes);
- s64 = (uint64_t *) s16;
- d64 = (uint64_t *) d16;
- mask = _mm_set1_epi8 (0x0f);
-
- if (xor) {
- while (d64 != top64) {
-
- ta = _mm_load_si128((__m128i *) s64);
- tb = _mm_load_si128((__m128i *) (s64+2));
-
- ti = _mm_and_si128 (mask, tb);
- tph = _mm_shuffle_epi8 (thigh[0], ti);
- tpl = _mm_shuffle_epi8 (tlow[0], ti);
-
- tb = _mm_srli_epi16(tb, 4);
- ti = _mm_and_si128 (mask, tb);
- tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl);
- tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph);
-
- ti = _mm_and_si128 (mask, ta);
- tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl);
- tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph);
-
- ta = _mm_srli_epi16(ta, 4);
- ti = _mm_and_si128 (mask, ta);
- tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl);
- tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph);
-
- ta = _mm_load_si128((__m128i *) d64);
- tph = _mm_xor_si128(tph, ta);
- _mm_store_si128 ((__m128i *)d64, tph);
- tb = _mm_load_si128((__m128i *) (d64+2));
- tpl = _mm_xor_si128(tpl, tb);
- _mm_store_si128 ((__m128i *)(d64+2), tpl);
-
- d64 += 4;
- s64 += 4;
- }
- } else {
- while (d64 != top64) {
-
- ta = _mm_load_si128((__m128i *) s64);
- tb = _mm_load_si128((__m128i *) (s64+2));
-
- ti = _mm_and_si128 (mask, tb);
- tph = _mm_shuffle_epi8 (thigh[0], ti);
- tpl = _mm_shuffle_epi8 (tlow[0], ti);
-
- tb = _mm_srli_epi16(tb, 4);
- ti = _mm_and_si128 (mask, tb);
- tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl);
- tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph);
-
- ti = _mm_and_si128 (mask, ta);
- tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl);
- tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph);
-
- ta = _mm_srli_epi16(ta, 4);
- ti = _mm_and_si128 (mask, ta);
- tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl);
- tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph);
-
- _mm_store_si128 ((__m128i *)d64, tph);
- _mm_store_si128 ((__m128i *)(d64+2), tpl);
-
- d64 += 4;
- s64 += 4;
-
- }
- }
-
- d16 = (uint16_t *) d64;
- s16 = (uint16_t *) s64;
-
- while (d16 != top) {
- a = *s16;
- prod = (xor) ? *d16 : 0;
- for (i = 0; i < 4; i++) {
- c = a & 0xf;
- prod ^= low[i][c];
- prod ^= (high[i][c] << 8);
- a >>= 4;
- }
- *d16 = prod;
- s16++;
- d16++;
- }
-#endif
-}
-
-static
-int gf_w16_split_init(gf_t *gf)
-{
- gf_internal_t *h;
- gf_w16_log_init(gf);
-
- h = (gf_internal_t *) gf->scratch;
- if (h->arg1 == 8 || h->arg2 == 8) {
- gf->multiply_region.w16 = gf_w16_split_8_16_lazy_multiply_region;
- } else if (h->arg1 == 4 || h->arg2 == 4) {
- if (h->region_type & GF_REGION_SSE) {
- if (h->region_type & GF_REGION_ALTMAP) {
- gf->multiply_region.w16 = gf_w16_split_4_16_lazy_sse_altmap_multiply_region;
- } else {
- gf->multiply_region.w16 = gf_w16_split_4_16_lazy_sse_multiply_region;
- }
- } else {
- gf->multiply_region.w16 = gf_w16_split_4_16_lazy_multiply_region;
- }
- }
- return 1;
-}
-
-static
-int gf_w16_table_init(gf_t *gf)
-{
- gf_internal_t *h;
- gf_w16_log_init(gf);
-
- h = (gf_internal_t *) gf->scratch;
- gf->multiply_region.w16 = NULL;
- gf->multiply_region.w16 = gf_w16_table_lazy_multiply_region;
- return 1;
-}
-
-static
-void
-gf_w16_log_zero_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor)
-{
- unsigned long uls, uld;
- int i;
- uint16_t lv, b, c;
- uint16_t *s16, *d16;
- int num_syms = bytes >> 1;
- int sym_divisible = bytes % 2;
-
- struct gf_zero_logtable_data *ltd;
-
- uls = (unsigned long) src;
- uld = (unsigned long) dest;
- if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w16_buf_const_log", 2);
- if (sym_divisible) {
- gf_alignment_error("gf_w16_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2);
- }
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- ltd = (struct gf_zero_logtable_data*) ((gf_internal_t *) gf->scratch)->private;
- s16 = (uint16_t *) src;
- d16 = (uint16_t *) dest;
-
- lv = ltd->log_tbl[val];
-
- if (xor) {
- for (i = 0; i < num_syms; i++) {
- d16[i] ^= ltd->antilog_tbl[lv + ltd->log_tbl[s16[i]]];
- }
- } else {
- for (i = 0; i < num_syms; i++) {
- d16[i] = ltd->antilog_tbl[lv + ltd->log_tbl[s16[i]]];
- }
- }
-}
-
-static
-inline
-gf_val_16_t
-gf_w16_log_zero_multiply (gf_t *gf, gf_val_16_t a, gf_val_16_t b)
-{
- struct gf_zero_logtable_data *ltd;
-
- ltd = (struct gf_zero_logtable_data *) ((gf_internal_t *) gf->scratch)->private;
- return ltd->antilog_tbl[ltd->log_tbl[a] + ltd->log_tbl[b]];
-}
-
-static
-inline
-gf_val_16_t
-gf_w16_log_zero_divide (gf_t *gf, gf_val_16_t a, gf_val_16_t b)
-{
- int log_sum = 0;
- struct gf_zero_logtable_data *ltd;
-
- if (a == 0 || b == 0) return 0;
- ltd = (struct gf_zero_logtable_data *) ((gf_internal_t *) gf->scratch)->private;
-
- log_sum = ltd->log_tbl[a] - ltd->log_tbl[b] + (GF_MULT_GROUP_SIZE);
- return (ltd->antilog_tbl[log_sum]);
-}
-
-static
-gf_val_16_t
-gf_w16_log_zero_inverse (gf_t *gf, gf_val_16_t a)
-{
- struct gf_zero_logtable_data *ltd;
-
- ltd = (struct gf_zero_logtable_data *) ((gf_internal_t *) gf->scratch)->private;
- return (ltd->inv_tbl[a]);
-}
-
-static
-int gf_w16_log_zero_init(gf_t *gf)
-{
- gf_internal_t *h;
- struct gf_zero_logtable_data *ltd;
- int i, b;
-
- h = (gf_internal_t *) gf->scratch;
- ltd = h->private;
-
- ltd->log_tbl[0] = (-GF_MULT_GROUP_SIZE) + 1;
-
- bzero(&(ltd->_antilog_tbl[0]), sizeof(ltd->_antilog_tbl));
-
- ltd->antilog_tbl = &(ltd->_antilog_tbl[GF_FIELD_SIZE * 2]);
-
- b = 1;
- for (i = 0; i < GF_MULT_GROUP_SIZE; i++) {
- ltd->log_tbl[b] = (gf_val_16_t)i;
- ltd->antilog_tbl[i] = (gf_val_16_t)b;
- ltd->antilog_tbl[i+GF_MULT_GROUP_SIZE] = (gf_val_16_t)b;
- b <<= 1;
- if (b & GF_FIELD_SIZE) {
- b = b ^ h->prim_poly;
- }
- }
- ltd->inv_tbl[0] = 0; /* Not really, but we need to fill it with something */
- ltd->inv_tbl[1] = 1;
- for (i = 2; i < GF_FIELD_SIZE; i++) {
- ltd->inv_tbl[i] = ltd->antilog_tbl[GF_MULT_GROUP_SIZE-ltd->log_tbl[i]];
- }
-
- gf->inverse.w16 = gf_w16_log_zero_inverse;
- gf->divide.w16 = gf_w16_log_zero_divide;
- gf->multiply.w16 = gf_w16_log_zero_multiply;
- gf->multiply_region.w16 = gf_w16_log_zero_multiply_region;
- return 1;
-}
-
-static
-gf_val_16_t
-gf_w16_composite_multiply(gf_t *gf, gf_val_16_t a, gf_val_16_t b)
-{
- gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
- uint8_t b0 = b & 0x00ff;
- uint8_t b1 = (b & 0xff00) >> 8;
- uint8_t a0 = a & 0x00ff;
- uint8_t a1 = (a & 0xff00) >> 8;
- uint8_t a1b1;
-
- a1b1 = base_gf->multiply.w8(base_gf, a1, b1);
-
- return ((base_gf->multiply.w8(base_gf, a0, b0) ^ a1b1) | ((base_gf->multiply.w8(base_gf, a1, b0) ^ base_gf->multiply.w8(base_gf, a0, b1) ^ base_gf->multiply.w8(base_gf, a1b1, GF_S_GF_8_2)) << 8));
-}
-
-/*
- * Composite field division trick (explained in 2007 tech report)
- *
- * Compute a / b = a*b^-1, where p(x) = x^2 + sx + 1
- *
- * let c = b^-1
- *
- * c*b = (s*b1c1+b1c0+b0c1)x+(b1c1+b0c0)
- *
- * want (s*b1c1+b1c0+b0c1) = 0 and (b1c1+b0c0) = 1
- *
- * let d = b1c1 and d+1 = b0c0
- *
- * solve s*b1c1+b1c0+b0c1 = 0
- *
- * solution: d = (b1b0^-1)(b1b0^-1+b0b1^-1+s)^-1
- *
- * c0 = (d+1)b0^-1
- * c1 = d*b1^-1
- *
- * a / b = a * c
- */
-static
-gf_val_16_t
-gf_w16_composite_inverse(gf_t *gf, gf_val_16_t a)
-{
- gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
- uint8_t a0 = a & 0x00ff;
- uint8_t a1 = (a & 0xff00) >> 8;
- uint8_t c0, c1, d, tmp;
- uint16_t c;
- uint8_t a0inv, a1inv;
-
- if (a0 == 0) {
- a1inv = base_gf->inverse.w8(base_gf, a1);
- c0 = base_gf->multiply.w8(base_gf, a1inv, GF_S_GF_8_2);
- c1 = a1inv;
- } else if (a1 == 0) {
- c0 = base_gf->inverse.w8(base_gf, a0);
- c1 = 0;
- } else {
- a1inv = base_gf->inverse.w8(base_gf, a1);
- a0inv = base_gf->inverse.w8(base_gf, a0);
-
- d = base_gf->multiply.w8(base_gf, a1, a0inv);
-
- tmp = (base_gf->multiply.w8(base_gf, a1, a0inv) ^ base_gf->multiply.w8(base_gf, a0, a1inv) ^ GF_S_GF_8_2);
- tmp = base_gf->inverse.w8(base_gf, tmp);
-
- d = base_gf->multiply.w8(base_gf, d, tmp);
-
- c0 = base_gf->multiply.w8(base_gf, (d^1), a0inv);
- c1 = base_gf->multiply.w8(base_gf, d, a1inv);
- }
-
- c = c0 | (c1 << 8);
-
- return c;
-}
-
-static
-gf_val_16_t
-gf_w16_composite_divide(gf_t *gf, gf_val_16_t a, gf_val_16_t b)
-{
- gf_val_16_t binv;
-
- binv = gf_w16_composite_inverse(gf, b);
-
- return gf_w16_composite_multiply(gf, a, binv);
-}
-
-static
-void
-gf_w16_composite_multiply_region_table(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor)
-{
- unsigned long uls, uld;
- gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
- int i=0;
- struct gf_w8_single_table_data * std;
- uint8_t b0 = val & 0x00ff;
- uint8_t b1 = (val & 0xff00) >> 8;
- uint16_t *s16 = (uint16_t *) src;
- uint16_t *d16 = (uint16_t *) dest;
- uint8_t a0, a1, a1b1;
- int num_syms = bytes >> 1;
- int sym_divisible = bytes % 2;
-
- struct gf_logtable_data *ltd;
-
- uls = (unsigned long) src;
- uld = (unsigned long) dest;
- if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w16_buf_const_log", 2);
- if (sym_divisible) {
- gf_alignment_error("gf_w16_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2);
- }
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- std = (struct gf_w8_single_table_data *) h->private;
-
- if (xor) {
- for (i = 0;i < num_syms; i++) {
- a0 = s16[i] & 0x00ff;
- a1 = (s16[i] & 0xff00) >> 8;
- a1b1 = std->mult[a1][b1];
-
- d16[i] ^= ((std->mult[a0][b0] ^ a1b1) | ((std->mult[a1][b0] ^ std->mult[a0][b1] ^ std->mult[a1b1][GF_S_GF_8_2]) << 8));
-
- }
- } else {
- for (i = 0;i < num_syms; i++) {
- a0 = s16[i] & 0x00ff;
- a1 = (s16[i] & 0xff00) >> 8;
- a1b1 = std->mult[a1][b1];
-
- d16[i] = ((std->mult[a0][b0] ^ a1b1) | ((std->mult[a1][b0] ^ std->mult[a0][b1] ^ std->mult[a1b1][GF_S_GF_8_2]) << 8));
- }
- }
-}
-
-static
-void
-gf_w16_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor)
-{
- unsigned long uls, uld;
- gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
- int i=0;
- struct gf_w8_single_table_data * std;
- uint8_t b0 = val & 0x00ff;
- uint8_t b1 = (val & 0xff00) >> 8;
- uint16_t *s16 = (uint16_t *) src;
- uint16_t *d16 = (uint16_t *) dest;
- uint8_t a0, a1, a1b1;
- int num_syms = bytes >> 1;
- int sym_divisible = bytes % 2;
-
- struct gf_logtable_data *ltd;
-
- uls = (unsigned long) src;
- uld = (unsigned long) dest;
- if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w16_buf_const_log", 2);
- if (sym_divisible) {
- gf_alignment_error("gf_w16_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2);
- }
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- std = (struct gf_w8_single_table_data *) h->private;
-
- if (xor) {
- for (i = 0;i < num_syms; i++) {
- a0 = s16[i] & 0x00ff;
- a1 = (s16[i] & 0xff00) >> 8;
- a1b1 = std->mult[a1][b1];
-
- d16[i] ^= ((base_gf->multiply.w8(base_gf, a0, b0) ^ a1b1) |
- ((base_gf->multiply.w8(base_gf, a1, b0) ^ base_gf->multiply.w8(base_gf, a0, b1) ^ base_gf->multiply.w8(base_gf, a1b1, GF_S_GF_8_2)) << 8));
-
- }
- } else {
- for (i = 0;i < num_syms; i++) {
- a0 = s16[i] & 0x00ff;
- a1 = (s16[i] & 0xff00) >> 8;
- a1b1 = std->mult[a1][b1];
-
- d16[i] = ((base_gf->multiply.w8(base_gf, a0, b0) ^ a1b1) |
- ((base_gf->multiply.w8(base_gf, a1, b0) ^ base_gf->multiply.w8(base_gf, a0, b1) ^ base_gf->multiply.w8(base_gf, a1b1, GF_S_GF_8_2)) << 8));
- }
- }
-}
-
-static
-void
-gf_w16_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor)
-{
- gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
- gf_val_8_t val0 = val & 0x00ff;
- gf_val_8_t val1 = (val & 0xff00) >> 8;
- int sub_reg_size = bytes / 2;
-
- if (!xor) {
- memset(dest, 0, bytes);
- }
-
- if (bytes % 2 != 0) gf_alignment_error("gf_w8_composite_multiply_region_alt", 1);
-
- base_gf->multiply_region.w8(base_gf, src, dest, val0, sub_reg_size, xor);
- base_gf->multiply_region.w8(base_gf, src+sub_reg_size, dest, val1, sub_reg_size, 1);
- base_gf->multiply_region.w8(base_gf, src, dest+sub_reg_size, val1, sub_reg_size, xor);
- base_gf->multiply_region.w8(base_gf, src+sub_reg_size, dest+sub_reg_size, val0, sub_reg_size, 1);
- base_gf->multiply_region.w8(base_gf, src+sub_reg_size, dest+sub_reg_size, base_gf->multiply.w8(base_gf, GF_S_GF_8_2, val1), sub_reg_size, 1);
-}
-
-static
-int gf_w16_composite_init(gf_t *gf)
-{
- struct gf_w8_single_table_data * std;
- gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
- gf_val_16_t a, b;
-
- std = (struct gf_w8_single_table_data *) h->private;
-
- for (a = 0; a < 256; a++) {
- for (b = 0; b < 256; b++) {
- std->mult[a][b] = base_gf->multiply.w8(base_gf, a, b);
- }
- }
-
- if (h->region_type & GF_REGION_ALTMAP) {
- gf->multiply_region.w16 = gf_w16_composite_multiply_region_alt;
- } else {
- if (h->region_type & GF_REGION_SINGLE_TABLE) {
- gf->multiply_region.w16 = gf_w16_composite_multiply_region_table;
- } else {
- gf->multiply_region.w16 = gf_w16_composite_multiply_region;
- }
- }
-
- gf->multiply.w16 = gf_w16_composite_multiply;
- gf->divide.w16 = gf_w16_composite_divide;
- gf->inverse.w16 = gf_w16_composite_inverse;
-
- return 1;
-}
-
-
-
-int gf_w16_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
-{
- int ss;
- int sa;
-
- ss = (GF_REGION_SSE | GF_REGION_NOSSE);
- sa = (GF_REGION_STDMAP | GF_REGION_ALTMAP);
-
- switch(mult_type)
- {
- case GF_MULT_TABLE:
- region_type |= GF_REGION_LAZY;
- if (arg1 != 0 || arg2 != 0 || region_type != GF_REGION_LAZY) return -1;
- return sizeof(gf_internal_t) + sizeof(struct gf_lazytable_data) + 64;
- break;
- case GF_MULT_LOG_TABLE:
- if (arg2 != 0) return -1;
- if (region_type != GF_REGION_DEFAULT) return -1;
- if (arg1 == 1) {
- return sizeof(gf_internal_t) + sizeof(struct gf_zero_logtable_data) + 64;
- } else if (arg1 == 0) {
- return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64;
- } else {
- return -1;
- }
- break;
- case GF_MULT_SPLIT_TABLE:
- if ((arg1 == 8 && arg2 == 16) || (arg2 == 8 && arg1 == 16)) {
- region_type |= GF_REGION_LAZY;
- if (region_type != GF_REGION_LAZY) return -1;
- return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64;
- } else if ((arg1 == 4 && arg2 == 16) || (arg2 == 4 && arg1 == 16)) {
- region_type &= (~GF_REGION_LAZY); /* Ignore GF_REGION_LAZY */
- if ((region_type & ss) == ss) return -1;
- if ((region_type & sa) == sa) return -1;
- if ((region_type & ss) == 0) region_type |= GF_REGION_SSE;
- if (region_type & GF_REGION_NOSSE) {
- if (region_type != GF_REGION_NOSSE) return -1;
- return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64;
- } else {
- if ((region_type | ss | sa) != (ss|sa)) return -1;
- return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64;
- }
- }
- return -1;
- break;
- case GF_MULT_DEFAULT:
- case GF_MULT_SHIFT:
- if (arg1 != 0 || arg2 != 0 || region_type != 0) return -1;
- return sizeof(gf_internal_t);
- break;
- case GF_MULT_COMPOSITE:
- if (region_type & ~(GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP | GF_REGION_STDMAP)) return -1;
- if ((region_type & (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) == (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) return -1;
- if (arg1 == 2 && arg2 == 8) {
- return sizeof(gf_internal_t) + sizeof(struct gf_w8_single_table_data) + 64;
- } else {
- return -1;
- }
-
- default:
- return -1;
- }
-}
-
-int gf_w16_init(gf_t *gf)
-{
- gf_internal_t *h;
-
- h = (gf_internal_t *) gf->scratch;
- if (h->prim_poly == 0) h->prim_poly = 0x1100b;
-
- gf->multiply.w16 = NULL;
- gf->divide.w16 = NULL;
- gf->inverse.w16 = NULL;
- gf->multiply_region.w16 = NULL;
-
- switch(h->mult_type) {
- case GF_MULT_LOG_TABLE:
- if (h->arg1 == 1) {
- return gf_w16_log_zero_init(gf);
- } else {
- return gf_w16_log_init(gf);
- }
- case GF_MULT_SPLIT_TABLE: return gf_w16_split_init(gf);
- case GF_MULT_TABLE: return gf_w16_table_init(gf);
- case GF_MULT_DEFAULT:
- case GF_MULT_SHIFT: if (gf_w16_shift_init(gf) == 0) return 0; break;
- case GF_MULT_COMPOSITE: if (gf_w16_composite_init(gf) == 0) return 0; break;
- default: return 0;
- }
- if (h->divide_type == GF_DIVIDE_EUCLID) {
- gf->divide.w16 = gf_w16_divide_from_inverse;
- gf->inverse.w16 = gf_w16_euclid;
- } else if (h->divide_type == GF_DIVIDE_MATRIX) {
- gf->divide.w16 = gf_w16_divide_from_inverse;
- gf->inverse.w16 = gf_w16_matrix;
- }
-
- if (gf->inverse.w16 != NULL && gf->divide.w16 == NULL) {
- gf->divide.w16 = gf_w16_divide_from_inverse;
- }
- if (gf->inverse.w16 == NULL && gf->divide.w16 != NULL) {
- gf->inverse.w16 = gf_w16_inverse_from_divide;
- }
- return 1;
-}
diff --git a/junk-w16-timing-tests.sh b/junk-w16-timing-tests.sh
deleted file mode 100644
index cea73cf..0000000
--- a/junk-w16-timing-tests.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-sh tmp-time-test.sh 16 LOG - -
-sh tmp-time-test.sh 16 LOG_ZERO - -
-sh tmp-time-test.sh 16 TABLE - -
-sh tmp-time-test.sh 16 TABLE LE,LAZY -
-sh tmp-time-test.sh 16 SPLIT 16 4 ALTMAP,NOSSE -
-sh tmp-time-test.sh 16 SPLIT 16 4 ALTMAP,LAZY,SSE -
-sh tmp-time-test.sh 16 SPLIT 16 4 ALTMAP,LAZY,NOSSE -
-sh tmp-time-test.sh 16 SPLIT 16 4 ALTMAP,SSE -
-sh tmp-time-test.sh 16 SPLIT 16 4 NOSSE -
-sh tmp-time-test.sh 16 SPLIT 16 4 LAZY,SSE -
-sh tmp-time-test.sh 16 SPLIT 16 4 LAZY,NOSSE -
-sh tmp-time-test.sh 16 SPLIT 16 4 SSE -
diff --git a/junk-w2.eps b/junk-w2.eps
deleted file mode 100644
index 1796352..0000000
--- a/junk-w2.eps
+++ /dev/null
@@ -1,203 +0,0 @@
-%!PS-Adobe-2.0 EPSF-1.2
-%%Page: 1 1
-%%BoundingBox: -40 -93 289 73
-%%EndComments
-1 setlinecap 1 setlinejoin
-0.700 setlinewidth
-0.00 setgray
-
-/Jrnd { exch cvi exch cvi dup 3 1 roll idiv mul } def
-/JDEdict 8 dict def
-JDEdict /mtrx matrix put
-/JDE {
- JDEdict begin
- /yrad exch def
- /xrad exch def
- /savematrix mtrx currentmatrix def
- xrad yrad scale
- 0 0 1 0 360 arc
- savematrix setmatrix
- end
-} def
-/JSTR {
- gsave 1 eq { gsave 1 setgray fill grestore } if
- exch neg exch neg translate
- clip
- rotate
- 4 dict begin
- pathbbox /&top exch def
- /&right exch def
- /&bottom exch def
- &right sub /&width exch def
- newpath
- currentlinewidth mul round dup
- &bottom exch Jrnd exch &top
- 4 -1 roll currentlinewidth mul setlinewidth
- { &right exch moveto &width 0 rlineto stroke } for
- end
- grestore
- newpath
-} bind def
- gsave /Times-Roman findfont 9.000000 scalefont setfont
-0.000000 0.000000 translate
-0.700000 setlinewidth gsave newpath 0.000000 0.000000 moveto 288.000000 0.000000 lineto stroke
-newpath 0.000000 0.000000 moveto 0.000000 -5.000000 lineto stroke
-newpath 26.181818 0.000000 moveto 26.181818 -2.000000 lineto stroke
-newpath 52.363636 0.000000 moveto 52.363636 -5.000000 lineto stroke
-newpath 78.545456 0.000000 moveto 78.545456 -2.000000 lineto stroke
-newpath 104.727272 0.000000 moveto 104.727272 -5.000000 lineto stroke
-newpath 130.909088 0.000000 moveto 130.909088 -2.000000 lineto stroke
-newpath 157.090912 0.000000 moveto 157.090912 -5.000000 lineto stroke
-newpath 183.272720 0.000000 moveto 183.272720 -2.000000 lineto stroke
-newpath 209.454544 0.000000 moveto 209.454544 -5.000000 lineto stroke
-newpath 235.636368 0.000000 moveto 235.636368 -2.000000 lineto stroke
-newpath 261.818176 0.000000 moveto 261.818176 -5.000000 lineto stroke
-newpath 288.000000 0.000000 moveto 288.000000 -2.000000 lineto stroke
-/Times-Roman findfont 11.000000 scalefont setfont
-gsave 26.181818 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (BYTWO_p) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 52.363636 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (BYTWO_p SSE) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 78.545456 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (BYTWO_b) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 104.727272 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (BYTWO_b SSE) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 130.909088 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (TABLE SINGLE) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 157.090912 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (TABLE DOUBLE) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 183.272720 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (TABLE QUAD) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 209.454544 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (TABLE QUAD,LAZY) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 235.636368 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (TABLE SINGLE,SSE) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 261.818176 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (LOG) dup stringwidth pop pop 0 0 moveto
-show
-grestore
- grestore
-0.700000 setlinewidth gsave newpath 0.000000 0.000000 moveto 0.000000 72.000000 lineto stroke
-newpath 0.000000 0.000000 moveto -5.000000 0.000000 lineto stroke
-newpath 0.000000 9.916304 moveto -2.000000 9.916304 lineto stroke
-newpath 0.000000 19.832607 moveto -5.000000 19.832607 lineto stroke
-newpath 0.000000 29.748911 moveto -2.000000 29.748911 lineto stroke
-newpath 0.000000 39.665215 moveto -5.000000 39.665215 lineto stroke
-newpath 0.000000 49.581520 moveto -2.000000 49.581520 lineto stroke
-newpath 0.000000 59.497822 moveto -5.000000 59.497822 lineto stroke
-newpath 0.000000 69.414124 moveto -2.000000 69.414124 lineto stroke
-/Times-Roman findfont 9.000000 scalefont setfont
-gsave -8.000000 0.000000 translate 0.000000 rotate
-0 -2.700000 translate (0) dup stringwidth pop neg 0 moveto
-show
-grestore
-gsave -8.000000 19.832607 translate 0.000000 rotate
-0 -2.700000 translate (2000) dup stringwidth pop neg 0 moveto
-show
-grestore
-gsave -8.000000 39.665215 translate 0.000000 rotate
-0 -2.700000 translate (4000) dup stringwidth pop neg 0 moveto
-show
-grestore
-gsave -8.000000 59.497822 translate 0.000000 rotate
-0 -2.700000 translate (6000) dup stringwidth pop neg 0 moveto
-show
-grestore
-/Times-Bold findfont 10.000000 scalefont setfont
-gsave -33.279999 36.000000 translate 90.000000 rotate
-0 0.000000 translate (MB/s) dup stringwidth pop 2 div neg 0 moveto
-show
-grestore
- grestore
- gsave
- gsave gsave 26.181818 9.564870 translate 0.000000 rotate
- newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto
- -13.090909 -9.564870 lineto
- 13.090909 -9.564870 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore grestore
- gsave gsave 52.363636 15.887009 translate 0.000000 rotate
- newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto
- -13.090909 -15.887009 lineto
- 13.090909 -15.887009 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore grestore
- gsave gsave 78.545456 20.109272 translate 0.000000 rotate
- newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto
- -13.090909 -20.109272 lineto
- 13.090909 -20.109272 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore grestore
- gsave gsave 104.727272 26.881811 translate 0.000000 rotate
- newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto
- -13.090909 -26.881811 lineto
- 13.090909 -26.881811 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore grestore
- gsave gsave 130.909088 4.538296 translate 0.000000 rotate
- newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto
- -13.090909 -4.538296 lineto
- 13.090909 -4.538296 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore grestore
- gsave gsave 157.090912 8.978618 translate 0.000000 rotate
- newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto
- -13.090909 -8.978618 lineto
- 13.090909 -8.978618 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore grestore
- gsave gsave 183.272720 13.178271 translate 0.000000 rotate
- newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto
- -13.090909 -13.178271 lineto
- 13.090909 -13.178271 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore grestore
- gsave gsave 209.454544 11.003130 translate 0.000000 rotate
- newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto
- -13.090909 -11.003130 lineto
- 13.090909 -11.003130 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore grestore
- gsave gsave 235.636368 72.000000 translate 0.000000 rotate
- newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto
- -13.090909 -72.000000 lineto
- 13.090909 -72.000000 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore grestore
- gsave gsave 261.818176 2.016877 translate 0.000000 rotate
- newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto
- -13.090909 -2.016877 lineto
- 13.090909 -2.016877 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore grestore
- grestore
--0.000000 -0.000000 translate
- grestore
diff --git a/junk-w32-backup.c b/junk-w32-backup.c
deleted file mode 100644
index d742a3f..0000000
--- a/junk-w32-backup.c
+++ /dev/null
@@ -1,1337 +0,0 @@
-/*
- * gf_w32.c
- *
- * Routines for 32-bit Galois fields
- */
-
-#define MM_PRINT32(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 4) printf(" %02x%02x%02x%02x", blah[15-ii], blah[14-ii], blah[13-ii], blah[12-ii]); printf("\n"); }
-
-#define MM_PRINT8(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 1) printf("%s%02x", (ii%4==0) ? " " : " ", blah[15-ii]); printf("\n"); }
-
-#include "gf_int.h"
-#include <stdio.h>
-#include <stdlib.h>
-
-#define GF_FIELD_WIDTH (32)
-#define GF_FIRST_BIT (1 << 31)
-
-#define GF_BASE_FIELD_WIDTH (16)
-#define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH)
-#define GF_BASE_FIELD_GROUP_SIZE GF_BASE_FIELD_SIZE-1
-#define GF_S_GF_16_2 (40188)
-#define GF_MULTBY_TWO(p) (((p) & GF_FIRST_BIT) ? (((p) << 1) ^ h->prim_poly) : (p) << 1);
-
-
-struct gf_w16_logtable_data {
- int log_tbl[GF_BASE_FIELD_SIZE];
- gf_val_16_t _antilog_tbl[GF_BASE_FIELD_SIZE * 4];
- gf_val_16_t *antilog_tbl;
- gf_val_16_t inv_tbl[GF_BASE_FIELD_SIZE];
-};
-
-struct gf_split_2_32_lazy_data {
- gf_val_32_t last_value;
- gf_val_32_t tables[16][4];
-};
-
-struct gf_split_8_8_data {
- gf_val_32_t tables[7][256][256];
-};
-
-struct gf_split_4_32_lazy_data {
- gf_val_32_t last_value;
- gf_val_32_t tables[8][16];
-};
-
-static
-inline
-gf_val_32_t gf_w32_inverse_from_divide (gf_t *gf, gf_val_32_t a)
-{
- return gf->divide.w32(gf, 1, a);
-}
-
-static
-inline
-gf_val_32_t gf_w32_divide_from_inverse (gf_t *gf, gf_val_32_t a, gf_val_32_t b)
-{
- b = gf->inverse.w32(gf, b);
- return gf->multiply.w32(gf, a, b);
-}
-
-static
-void
-gf_w32_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int
-xor)
-{
- int i;
- gf_val_32_t *s32;
- gf_val_32_t *d32;
-
- s32 = (gf_val_32_t *) src;
- d32 = (gf_val_32_t *) dest;
-
- if (xor) {
- for (i = 0; i < bytes/sizeof(gf_val_32_t); i++) {
- d32[i] ^= gf->multiply.w32(gf, val, s32[i]);
- }
- } else {
- for (i = 0; i < bytes/sizeof(gf_val_32_t); i++) {
- d32[i] = gf->multiply.w32(gf, val, s32[i]);
- }
- }
-}
-
-static
-inline
-gf_val_32_t gf_w32_euclid (gf_t *gf, gf_val_32_t b)
-{
- gf_val_32_t e_i, e_im1, e_ip1;
- gf_val_32_t d_i, d_im1, d_ip1;
- gf_val_32_t y_i, y_im1, y_ip1;
- gf_val_32_t c_i;
-
- if (b == 0) return -1;
- e_im1 = ((gf_internal_t *) (gf->scratch))->prim_poly;
- e_i = b;
- d_im1 = 32;
- for (d_i = d_im1-1; ((1 << d_i) & e_i) == 0; d_i--) ;
- y_i = 1;
- y_im1 = 0;
-
- while (e_i != 1) {
-
- e_ip1 = e_im1;
- d_ip1 = d_im1;
- c_i = 0;
-
- while (d_ip1 >= d_i) {
- c_i ^= (1 << (d_ip1 - d_i));
- e_ip1 ^= (e_i << (d_ip1 - d_i));
- d_ip1--;
- while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--;
- }
-
- y_ip1 = y_im1 ^ gf->multiply.w32(gf, c_i, y_i);
- y_im1 = y_i;
- y_i = y_ip1;
-
- e_im1 = e_i;
- d_im1 = d_i;
- e_i = e_ip1;
- d_i = d_ip1;
- }
-
- return y_i;
-}
-
-static
-inline
-gf_val_32_t gf_w32_matrix (gf_t *gf, gf_val_32_t b)
-{
- return gf_bitmatrix_inverse(b, 32, ((gf_internal_t *) (gf->scratch))->prim_poly);
-}
-
-/* JSP: GF_MULT_SHIFT: The world's dumbest multiplication algorithm. I only
- include it for completeness. It does have the feature that it requires no
- extra memory.
-*/
-
-static
-inline
-gf_val_32_t
-gf_w32_shift_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
-{
- uint64_t product, i, pp, a, b, one;
- gf_internal_t *h;
-
- a = a32;
- b = b32;
- h = (gf_internal_t *) gf->scratch;
- one = 1;
- pp = h->prim_poly | (one << 32);
-
- product = 0;
-
- for (i = 0; i < GF_FIELD_WIDTH; i++) {
- if (a & (one << i)) product ^= (b << i);
- }
- for (i = (GF_FIELD_WIDTH*2-1); i >= GF_FIELD_WIDTH; i--) {
- if (product & (one << i)) product ^= (pp << (i-GF_FIELD_WIDTH));
- }
- return product;
-}
-
-static
-int gf_w32_shift_init(gf_t *gf)
-{
- gf->multiply.w32 = gf_w32_shift_multiply;
- gf->inverse.w32 = gf_w32_euclid;
- gf->multiply_region.w32 = gf_w32_multiply_region_from_single;
- return 1;
-}
-
-static
-inline
-gf_val_32_t
-gf_w32_split_8_8_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
-{
- uint32_t product, i, j, mask, tb;
- gf_internal_t *h;
- struct gf_split_8_8_data *d8;
-
- h = (gf_internal_t *) gf->scratch;
- d8 = (struct gf_split_8_8_data *) h->private;
- product = 0;
- mask = 0xff;
-
- for (i = 0; i < 4; i++) {
- tb = b32;
- for (j = 0; j < 4; j++) {
- product ^= d8->tables[i+j][a32&mask][tb&mask];
- tb >>= 8;
- }
- a32 >>= 8;
- }
- return product;
-}
-
-static
-inline
-void
-gf_w32_split_8_8_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
-{
- uint32_t product, mask, tb, tv, tp;
- gf_internal_t *h;
- struct gf_split_8_8_data *d8;
- uint32_t *p00, *p01, *p02, *p03;
- uint32_t *p10, *p11, *p12, *p13;
- uint32_t *p20, *p21, *p22, *p23;
- uint32_t *p30, *p31, *p32, *p33;
- uint32_t *s32, *d32, *top;
- unsigned long uls, uld;
-
- uls = (unsigned long) src;
- uld = (unsigned long) dest;
- if (uls %4 != 0 || ((uls & 0x7) != (uld & 0x7))) gf_alignment_error("gf_w32_split_8_8_multiply_region", 4);
- if (bytes % 4 != 0) {
- gf_alignment_error("gf_w32_split_8_8_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4);
- }
-
- tv = val;
- h = (gf_internal_t *) gf->scratch;
- d8 = (struct gf_split_8_8_data *) h->private;
- mask = 0xff;
-
- p00 = &(d8->tables[0][val&mask][0]);
- p01 = &(d8->tables[1][val&mask][0]);
- p02 = &(d8->tables[2][val&mask][0]);
- p03 = &(d8->tables[3][val&mask][0]);
- val >>= 8;
- p10 = &(d8->tables[1][val&mask][0]);
- p11 = &(d8->tables[2][val&mask][0]);
- p12 = &(d8->tables[3][val&mask][0]);
- p13 = &(d8->tables[4][val&mask][0]);
- val >>= 8;
- p20 = &(d8->tables[2][val&mask][0]);
- p21 = &(d8->tables[3][val&mask][0]);
- p22 = &(d8->tables[4][val&mask][0]);
- p23 = &(d8->tables[5][val&mask][0]);
- val >>= 8;
- p30 = &(d8->tables[3][val&mask][0]);
- p31 = &(d8->tables[4][val&mask][0]);
- p32 = &(d8->tables[5][val&mask][0]);
- p33 = &(d8->tables[6][val&mask][0]);
-
- s32 = (uint32_t *) src;
- d32 = (uint32_t *) dest;
- top = (d32 + (bytes/4));
-
- while (d32 < top) {
- tb = *s32;
- tp = *d32;
- product = (xor) ? (*d32) : 0;
- product ^= p00[tb&mask];
- product ^= p10[tb&mask];
- product ^= p20[tb&mask];
- product ^= p30[tb&mask];
-
- tb >>= 8;
- product ^= p01[tb&mask];
- product ^= p11[tb&mask];
- product ^= p21[tb&mask];
- product ^= p31[tb&mask];
-
- tb >>= 8;
- product ^= p02[tb&mask];
- product ^= p12[tb&mask];
- product ^= p22[tb&mask];
- product ^= p32[tb&mask];
-
- tb >>= 8;
- product ^= p03[tb&mask];
- product ^= p13[tb&mask];
- product ^= p23[tb&mask];
- product ^= p33[tb&mask];
- *d32 = product;
- s32++;
- d32++;
- }
-}
-
-static
-void
-gf_w32_split_2_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
-{
- unsigned long uls, uld;
- gf_internal_t *h;
- struct gf_split_2_32_lazy_data *ld;
- int i;
- gf_val_32_t pp, v, v2, s, *s32, *d32, *top;
-
- uls = (unsigned long) src;
- uld = (unsigned long) dest;
- if (uls %4 != 0 || ((uls & 0x7) != (uld & 0x7))) gf_alignment_error("gf_w32_split_2_32_lazy_multiply_region", 4);
- if (bytes % 4 != 0) {
- gf_alignment_error("gf_w32_split_2_32_lazy_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4);
- }
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- h = (gf_internal_t *) gf->scratch;
- pp = h->prim_poly;
-
- ld = (struct gf_split_2_32_lazy_data *) h->private;
-
- if (ld->last_value != val) {
- v = val;
- for (i = 0; i < 16; i++) {
- v2 = (v << 1);
- if (v & GF_FIRST_BIT) v2 ^= pp;
- ld->tables[i][0] = 0;
- ld->tables[i][1] = v;
- ld->tables[i][2] = v2;
- ld->tables[i][3] = (v2 ^ v);
- v = (v2 << 1);
- if (v2 & GF_FIRST_BIT) v ^= pp;
- }
- }
- ld->last_value = val;
-
- s32 = (gf_val_32_t *) src;
- d32 = (gf_val_32_t *) dest;
- top = d32 + (bytes/4);
-
- while (d32 != top) {
- v = (xor) ? *d32 : 0;
- s = *s32;
- i = 0;
- while (s != 0) {
- v ^= ld->tables[i][s&3];
- s >>= 2;
- i++;
- }
- *d32 = v;
- d32++;
- s32++;
- }
-}
-
-static
-void
-gf_w32_split_2_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
-{
-#ifdef INTEL_SSE4
- unsigned long uls, uld;
- gf_internal_t *h;
- int i, m, j, tindex;
- gf_val_32_t pp, v, v2, s, *s32, *d32, *top;
- __m128i vi, si, pi, shuffler, tables[16], adder, xi, mask1, mask2;
-
- uls = (unsigned long) src;
- uld = (unsigned long) dest;
- if (uls %4 != 0 || ((uls & 0xf) != (uld & 0xf))) gf_alignment_error("gf_w32_split_2_32_lazy_sse_multiply_region", 4);
- if (bytes % 4 != 0) {
- gf_alignment_error("gf_w32_split_2_32_lazy_sse_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4);
- }
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- h = (gf_internal_t *) gf->scratch;
- pp = h->prim_poly;
-
- uls &= 0xf;
-
- s32 = (gf_val_32_t *) src;
- d32 = (gf_val_32_t *) dest;
- top = d32 + (bytes/4);
-
- if (uls != 0) {
- while (uls != 16) {
- if (xor) {
- *d32 ^= gf->multiply.w32(gf, *s32, val);
- } else {
- *d32 = gf->multiply.w32(gf, *s32, val);
- }
- *s32++;
- *d32++;
- if (d32 == top) return;
- uls += 4;
- }
- }
-
- uld = (unsigned long) top;
- top = (gf_val_32_t *) (uld - (uld & 0xf));
- uld &= 0xf;
-
- v = val;
- for (i = 0; i < 16; i++) {
- v2 = (v << 1);
- if (v & GF_FIRST_BIT) v2 ^= pp;
- tables[i] = _mm_set_epi32(v2 ^ v, v2, v, 0);
- v = (v2 << 1);
- if (v2 & GF_FIRST_BIT) v ^= pp;
- }
-
- shuffler = _mm_set_epi8(0xc, 0xc, 0xc, 0xc, 8, 8, 8, 8, 4, 4, 4, 4, 0, 0, 0, 0);
- adder = _mm_set_epi8(3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0);
- mask1 = _mm_set1_epi8(0x3);
- mask2 = _mm_set1_epi8(0xc);
-
- while (d32 != top) {
- pi = (xor) ? _mm_load_si128 ((__m128i *) d32) : _mm_setzero_si128();
- vi = _mm_load_si128((__m128i *) s32);
-
- tindex = 0;
- for (i = 0; i < 4; i++) {
- si = _mm_shuffle_epi8(vi, shuffler);
-
- xi = _mm_and_si128(si, mask1);
- xi = _mm_slli_epi16(xi, 2);
- xi = _mm_xor_si128(xi, adder);
- pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi));
- tindex++;
-
- xi = _mm_and_si128(si, mask2);
- xi = _mm_xor_si128(xi, adder);
- pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi));
- si = _mm_srli_epi16(si, 2);
- tindex++;
-
- xi = _mm_and_si128(si, mask2);
- xi = _mm_xor_si128(xi, adder);
- pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi));
- si = _mm_srli_epi16(si, 2);
- tindex++;
-
- xi = _mm_and_si128(si, mask2);
- xi = _mm_xor_si128(xi, adder);
- pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi));
- si = _mm_srli_epi16(si, 2);
- tindex++;
-
- vi = _mm_srli_epi32(vi, 8);
- }
- _mm_store_si128((__m128i *) d32, pi);
- d32 += 4;
- s32 += 4;
- }
-
- while (uld > 0) {
- if (xor) {
- *d32 ^= gf->multiply.w32(gf, *s32, val);
- } else {
- *d32 = gf->multiply.w32(gf, *s32, val);
- }
- *s32++;
- *d32++;
- uld -= 4;
- }
-
-
-#endif
-}
-
-static
-void
-gf_w32_split_4_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
-{
- unsigned long uls, uld;
- gf_internal_t *h;
- struct gf_split_4_32_lazy_data *ld;
- int i, j, k;
- gf_val_32_t pp, v, s, *s32, *d32, *top;
-
- uls = (unsigned long) src;
- uld = (unsigned long) dest;
- if (uls %4 != 0 || ((uls & 0x7) != (uld & 0x7))) gf_alignment_error("gf_w32_split_4_32_lazy_multiply_region", 4);
- if (bytes % 4 != 0) {
- gf_alignment_error("gf_w32_split_4_32_lazy_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4);
- }
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- h = (gf_internal_t *) gf->scratch;
- pp = h->prim_poly;
-
- ld = (struct gf_split_4_32_lazy_data *) h->private;
-
- if (ld->last_value != val) {
- v = val;
- for (i = 0; i < 8; i++) {
- ld->tables[i][0] = 0;
- for (j = 1; j < 16; j <<= 1) {
- for (k = 0; k < j; k++) {
- ld->tables[i][k^j] = (v ^ ld->tables[i][k]);
- }
- v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1);
- }
- }
- }
- ld->last_value = val;
-
- s32 = (gf_val_32_t *) src;
- d32 = (gf_val_32_t *) dest;
- top = d32 + (bytes/4);
-
- while (d32 != top) {
- v = (xor) ? *d32 : 0;
- s = *s32;
- i = 0;
- while (s != 0) {
- v ^= ld->tables[i][s&0xf];
- s >>= 4;
- i++;
- }
- *d32 = v;
- d32++;
- s32++;
- }
-}
-
-static
-void
-gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
-{
-#ifdef INTEL_SSE4
- unsigned long uls, uld;
- gf_internal_t *h;
- int i, m, j, k, tindex;
- gf_val_32_t pp, v, s, *s32, *d32, *top, *realtop;
- __m128i vi, si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3;
- __m128i tv1, tv2, tv3, tv0;
- struct gf_split_4_32_lazy_data *ld;
- uint8_t btable[16];
-
- uls = (unsigned long) src;
- uld = (unsigned long) dest;
- if (uls %4 != 0 || ((uls & 0xf) != (uld & 0xf))) gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region", 4);
- if (bytes % 4 != 0) {
- gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4);
- }
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- h = (gf_internal_t *) gf->scratch;
- pp = h->prim_poly;
-
- uls &= 0xf;
-
- s32 = (gf_val_32_t *) src;
- d32 = (gf_val_32_t *) dest;
- top = d32 + (bytes/4);
-
- if (uls != 0) {
- while (uls != 16) {
- if (xor) {
- *d32 ^= gf->multiply.w32(gf, *s32, val);
- } else {
- *d32 = gf->multiply.w32(gf, *s32, val);
- }
- *s32++;
- *d32++;
- if (d32 == top) return;
- uls += 4;
- }
- }
-
- uld = (unsigned long) top;
- realtop = top;
-
- /* You need the size of this region to be a multiple of 64 bytes */
- bytes = (top - d32);
- bytes -= (bytes & 0xf);
- top = (d32 + bytes);
-
- ld = (struct gf_split_4_32_lazy_data *) h->private;
-
- v = val;
- for (i = 0; i < 8; i++) {
- ld->tables[i][0] = 0;
- for (j = 1; j < 16; j <<= 1) {
- for (k = 0; k < j; k++) {
- ld->tables[i][k^j] = (v ^ ld->tables[i][k]);
- }
- v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1);
- }
- for (j = 0; j < 4; j++) {
- for (k = 0; k < 16; k++) {
- btable[k] = (uint8_t) ld->tables[i][k];
- ld->tables[i][k] >>= 8;
- }
- tables[i][j] = _mm_loadu_si128((__m128i *) btable);
- }
- }
-
- mask1 = _mm_set1_epi8(0xf);
-
- if (xor) {
- while (d32 != top) {
- p0 = _mm_load_si128 ((__m128i *) d32);
- p1 = _mm_load_si128 ((__m128i *) (d32+4));
- p2 = _mm_load_si128 ((__m128i *) (d32+8));
- p3 = _mm_load_si128 ((__m128i *) (d32+12));
-
- v0 = _mm_load_si128((__m128i *) s32); s32 += 4;
- v1 = _mm_load_si128((__m128i *) s32); s32 += 4;
- v2 = _mm_load_si128((__m128i *) s32); s32 += 4;
- v3 = _mm_load_si128((__m128i *) s32); s32 += 4;
-
- si = _mm_and_si128(v0, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[0][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[0][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[0][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[0][3], si));
-
- v0 = _mm_srli_epi32(v0, 4);
- si = _mm_and_si128(v0, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[1][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[1][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[1][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[1][3], si));
-
- si = _mm_and_si128(v1, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[2][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[2][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[2][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[2][3], si));
-
- v1 = _mm_srli_epi32(v1, 4);
- si = _mm_and_si128(v1, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[3][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[3][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[3][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[3][3], si));
-
- si = _mm_and_si128(v2, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[4][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[4][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[4][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[4][3], si));
-
- v2 = _mm_srli_epi32(v2, 4);
- si = _mm_and_si128(v2, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[5][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[5][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[5][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[5][3], si));
-
- si = _mm_and_si128(v3, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[6][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[6][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[6][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[6][3], si));
-
- v3 = _mm_srli_epi32(v3, 4);
- si = _mm_and_si128(v3, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[7][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[7][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[7][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[7][3], si));
-
- _mm_store_si128((__m128i *) d32, p0);
- _mm_store_si128((__m128i *) (d32+4), p1);
- _mm_store_si128((__m128i *) (d32+8), p2);
- _mm_store_si128((__m128i *) (d32+12), p3);
- d32 += 16;
- }
- } else {
- while (d32 != top) {
-
- v0 = _mm_load_si128((__m128i *) s32); s32 += 4;
- v1 = _mm_load_si128((__m128i *) s32); s32 += 4;
- v2 = _mm_load_si128((__m128i *) s32); s32 += 4;
- v3 = _mm_load_si128((__m128i *) s32); s32 += 4;
-
-
-
- si = _mm_and_si128(v0, mask1);
- p0 = _mm_shuffle_epi8(tables[0][0], si);
- p1 = _mm_shuffle_epi8(tables[0][1], si);
- p2 = _mm_shuffle_epi8(tables[0][2], si);
- p3 = _mm_shuffle_epi8(tables[0][3], si);
-
- v0 = _mm_srli_epi32(v0, 4);
- si = _mm_and_si128(v0, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[1][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[1][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[1][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[1][3], si));
-
- si = _mm_and_si128(v1, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[2][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[2][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[2][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[2][3], si));
-
- v1 = _mm_srli_epi32(v1, 4);
- si = _mm_and_si128(v1, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[3][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[3][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[3][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[3][3], si));
-
- si = _mm_and_si128(v2, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[4][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[4][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[4][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[4][3], si));
-
- v2 = _mm_srli_epi32(v2, 4);
- si = _mm_and_si128(v2, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[5][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[5][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[5][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[5][3], si));
-
- si = _mm_and_si128(v3, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[6][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[6][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[6][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[6][3], si));
-
- v3 = _mm_srli_epi32(v3, 4);
- si = _mm_and_si128(v3, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[7][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[7][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[7][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[7][3], si));
-
- _mm_store_si128((__m128i *) d32, p0);
- _mm_store_si128((__m128i *) (d32+4), p1);
- _mm_store_si128((__m128i *) (d32+8), p2);
- _mm_store_si128((__m128i *) (d32+12), p3);
- d32 += 16;
- }
- }
-
- while (d32 < realtop) {
- if (xor) {
- *d32 ^= gf->multiply.w32(gf, *s32, val);
- } else {
- *d32 = gf->multiply.w32(gf, *s32, val);
- }
- *s32++;
- *d32++;
- }
-
-
-#endif
-}
-
-/*
-static
-void
-gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
-{
-#ifdef INTEL_SSE4
- unsigned long uls, uld;
- gf_internal_t *h;
- int i, m, j, k, tindex;
- gf_val_32_t pp, v, s, *s32, *d32, *top, *realtop;
- __m128i vi, si, tables[8][4], p0, p1, p2, p3, mask1;
- struct gf_split_4_32_lazy_data *ld;
- uint8_t btable[16];
-
- uls = (unsigned long) src;
- uld = (unsigned long) dest;
- if (uls %4 != 0 || ((uls & 0xf) != (uld & 0xf))) gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region", 4);
- if (bytes % 4 != 0) {
- gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4);
- }
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- h = (gf_internal_t *) gf->scratch;
- pp = h->prim_poly;
-
- uls &= 0xf;
-
- s32 = (gf_val_32_t *) src;
- d32 = (gf_val_32_t *) dest;
- top = d32 + (bytes/4);
-
- if (uls != 0) {
- while (uls != 16) {
- if (xor) {
- *d32 ^= gf->multiply.w32(gf, *s32, val);
- } else {
- *d32 = gf->multiply.w32(gf, *s32, val);
- }
- *s32++;
- *d32++;
- if (d32 == top) return;
- uls += 4;
- }
- }
-
- uld = (unsigned long) top;
- realtop = top;
-
- bytes = (top - d32);
- bytes -= (bytes & 0xf);
- top = (d32 + bytes);
-
- ld = (struct gf_split_4_32_lazy_data *) h->private;
-
- v = val;
- for (i = 0; i < 8; i++) {
- ld->tables[i][0] = 0;
- for (j = 1; j < 16; j <<= 1) {
- for (k = 0; k < j; k++) {
- ld->tables[i][k^j] = (v ^ ld->tables[i][k]);
- }
- v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1);
- }
- for (j = 0; j < 4; j++) {
- for (k = 0; k < 16; k++) {
- btable[k] = (uint8_t) ld->tables[i][k];
- ld->tables[i][k] >>= 8;
- }
- tables[i][j] = _mm_loadu_si128((__m128i *) btable);
- }
- }
-
- mask1 = _mm_set1_epi8(0xf);
-
- if (xor) {
- while (d32 != top) {
- p0 = _mm_load_si128 ((__m128i *) d32);
- p1 = _mm_load_si128 ((__m128i *) (d32+4));
- p2 = _mm_load_si128 ((__m128i *) (d32+8));
- p3 = _mm_load_si128 ((__m128i *) (d32+12));
-
- for (i = 0; i < 8; i++) {
- vi = _mm_load_si128((__m128i *) s32);
-
- si = _mm_and_si128(vi, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[i][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[i][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[i][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[i][3], si));
-
- i++;
- vi = _mm_srli_epi32(vi, 4);
- si = _mm_and_si128(vi, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[i][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[i][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[i][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[i][3], si));
- s32 += 4;
- }
- _mm_store_si128((__m128i *) d32, p0);
- _mm_store_si128((__m128i *) (d32+4), p1);
- _mm_store_si128((__m128i *) (d32+8), p2);
- _mm_store_si128((__m128i *) (d32+12), p3);
- d32 += 16;
- }
- } else {
- while (d32 != top) {
- for (i = 0; i < 8; i++) {
- vi = _mm_load_si128((__m128i *) s32);
-
- si = _mm_and_si128(vi, mask1);
- p0 = _mm_shuffle_epi8(tables[i][0], si);
- p1 = _mm_shuffle_epi8(tables[i][1], si);
- p2 = _mm_shuffle_epi8(tables[i][2], si);
- p3 = _mm_shuffle_epi8(tables[i][3], si);
-
- i++;
- vi = _mm_srli_epi32(vi, 4);
- si = _mm_and_si128(vi, mask1);
- p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[i][0], si));
- p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[i][1], si));
- p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[i][2], si));
- p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[i][3], si));
- s32 += 4;
- }
- _mm_store_si128((__m128i *) d32, p0);
- _mm_store_si128((__m128i *) (d32+4), p1);
- _mm_store_si128((__m128i *) (d32+8), p2);
- _mm_store_si128((__m128i *) (d32+12), p3);
- d32 += 16;
- }
- }
-
- while (d32 < realtop) {
- if (xor) {
- *d32 ^= gf->multiply.w32(gf, *s32, val);
- } else {
- *d32 = gf->multiply.w32(gf, *s32, val);
- }
- *s32++;
- *d32++;
- }
-
-
-#endif
-}
-*/
-
-static
-int gf_w32_split_init(gf_t *gf)
-{
- gf_internal_t *h;
- struct gf_split_2_32_lazy_data *ld2;
- struct gf_split_4_32_lazy_data *ld4;
- struct gf_split_8_8_data *d8;
- uint32_t p, basep;
- int i, j, exp;
-
- h = (gf_internal_t *) gf->scratch;
-
- /* Defaults */
- gf->multiply_region.w32 = gf_w32_multiply_region_from_single;
- gf->multiply.w32 = gf_w32_shift_multiply;
- gf->inverse.w32 = gf_w32_euclid;
-
- if (h->arg1 == 8 && h->arg2 == 8) {
- gf->multiply.w32 = gf_w32_split_8_8_multiply;
- gf->multiply_region.w32 = gf_w32_split_8_8_multiply_region;
- d8 = (struct gf_split_8_8_data *) h->private;
- basep = 1;
- for (exp = 0; exp < 7; exp++) {
- for (j = 0; j < 256; j++) d8->tables[exp][0][j] = 0;
- for (i = 0; i < 256; i++) d8->tables[exp][i][0] = 0;
- d8->tables[exp][1][1] = basep;
- for (i = 2; i < 256; i++) {
- if (i&1) {
- p = d8->tables[exp][i^1][1];
- d8->tables[exp][i][1] = p ^ basep;
- } else {
- p = d8->tables[exp][i>>1][1];
- d8->tables[exp][i][1] = GF_MULTBY_TWO(p);
- }
- }
- for (i = 1; i < 256; i++) {
- p = d8->tables[exp][i][1];
- for (j = 1; j < 256; j++) {
- if (j&1) {
- d8->tables[exp][i][j] = d8->tables[exp][i][j^1] ^ p;
- } else {
- d8->tables[exp][i][j] = GF_MULTBY_TWO(d8->tables[exp][i][j>>1]);
- }
- }
- }
- for (i = 0; i < 8; i++) basep = GF_MULTBY_TWO(basep);
- }
- }
- if ((h->arg1 == 2 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 2)) {
- ld2 = (struct gf_split_2_32_lazy_data *) h->private;
- ld2->last_value = 0;
- if (h->region_type & GF_REGION_SSE) {
- gf->multiply_region.w32 = gf_w32_split_2_32_lazy_sse_multiply_region;
- } else {
- gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region;
- }
- }
- if ((h->arg1 == 4 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 4)) {
- ld4 = (struct gf_split_4_32_lazy_data *) h->private;
- ld4->last_value = 0;
- if (h->region_type & GF_REGION_SSE) {
- if (h->region_type & GF_REGION_ALTMAP) {
- gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_altmap_multiply_region;
- }
- } else {
- gf->multiply_region.w32 = gf_w32_split_4_32_lazy_multiply_region;
- }
- }
- return 1;
-}
-
-static
-gf_val_32_t
-gf_w32_composite_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
-{
- gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
- uint16_t b0 = b & 0x0000ffff;
- uint16_t b1 = (b & 0xffff0000) >> 16;
- uint16_t a0 = a & 0x0000ffff;
- uint16_t a1 = (a & 0xffff0000) >> 16;
- uint16_t a1b1;
-
- a1b1 = base_gf->multiply.w16(base_gf, a1, b1);
-
- return ((base_gf->multiply.w16(base_gf, a0, b0) ^ a1b1) | ((base_gf->multiply.w16(base_gf, a1, b0) ^ base_gf->multiply.w16(base_gf, a0, b1) ^ base_gf->multiply.w16(base_gf, a1b1, GF_S_GF_16_2)) << 16));
-}
-
-/*
- * Composite field division trick (explained in 2007 tech report)
- *
- * Compute a / b = a*b^-1, where p(x) = x^2 + sx + 1
- *
- * let c = b^-1
- *
- * c*b = (s*b1c1+b1c0+b0c1)x+(b1c1+b0c0)
- *
- * want (s*b1c1+b1c0+b0c1) = 0 and (b1c1+b0c0) = 1
- *
- * let d = b1c1 and d+1 = b0c0
- *
- * solve s*b1c1+b1c0+b0c1 = 0
- *
- * solution: d = (b1b0^-1)(b1b0^-1+b0b1^-1+s)^-1
- *
- * c0 = (d+1)b0^-1
- * c1 = d*b1^-1
- *
- * a / b = a * c
- */
-static
-gf_val_32_t
-gf_w32_composite_inverse(gf_t *gf, gf_val_32_t a)
-{
- gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
- uint16_t a0 = a & 0x0000ffff;
- uint16_t a1 = (a & 0xffff0000) >> 16;
- uint16_t c0, c1, d, tmp;
- uint32_t c;
- uint16_t a0inv, a1inv;
-
- if (a0 == 0) {
- a1inv = base_gf->inverse.w16(base_gf, a1);
- c0 = base_gf->multiply.w16(base_gf, a1inv, GF_S_GF_16_2);
- c1 = a1inv;
- } else if (a1 == 0) {
- c0 = base_gf->inverse.w16(base_gf, a0);
- c1 = 0;
- } else {
- a1inv = base_gf->inverse.w16(base_gf, a1);
- a0inv = base_gf->inverse.w16(base_gf, a0);
-
- d = base_gf->multiply.w16(base_gf, a1, a0inv);
-
- tmp = (base_gf->multiply.w16(base_gf, a1, a0inv) ^ base_gf->multiply.w16(base_gf, a0, a1inv) ^ GF_S_GF_16_2);
- tmp = base_gf->inverse.w16(base_gf, tmp);
-
- d = base_gf->multiply.w16(base_gf, d, tmp);
-
- c0 = base_gf->multiply.w16(base_gf, (d^1), a0inv);
- c1 = base_gf->multiply.w16(base_gf, d, a1inv);
- }
-
- c = c0 | (c1 << 16);
-
- return c;
-}
-
-static
-gf_val_32_t
-gf_w32_composite_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
-{
- gf_val_32_t binv;
-
- binv = gf_w32_composite_inverse(gf, b);
-
- return gf_w32_composite_multiply(gf, a, binv);
-}
-
-static
-void
-gf_w32_composite_multiply_region_table(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
-{
- unsigned long uls, uld;
- gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
- int i=0;
- struct gf_w16_logtable_data * ltd;
- uint16_t b0 = val & 0x0000ffff;
- uint16_t b1 = (val & 0xffff0000) >> 16;
- uint32_t *s32 = (uint32_t *) src;
- uint32_t *d32 = (uint32_t *) dest;
- uint16_t a0, a1, a1b1;
- int num_syms = bytes >> 2;
- int sym_divisible = bytes % 4;
-
- uls = (unsigned long) src;
- uld = (unsigned long) dest;
- if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w32_buf_const_log", 2);
- if (sym_divisible) {
- gf_alignment_error("gf_w32_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2);
- }
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- ltd = (struct gf_w16_logtable_data *) h->private;
-
- if (xor) {
- for (i = 0;i < num_syms; i++) {
- a0 = s32[i] & 0x0000ffff;
- a1 = (s32[i] & 0xffff0000) >> 16;
- a1b1 = ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b1]];
-
- d32[i] ^= ((ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b0]] ^ a1b1) |
- ((ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b0]] ^ ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b1]] ^
- ltd->antilog_tbl[ltd->log_tbl[a1b1] + ltd->log_tbl[GF_S_GF_16_2]]) << 16));
-
- }
- } else {
- for (i = 0;i < num_syms; i++) {
- a0 = s32[i] & 0x0000ffff;
- a1 = (s32[i] & 0xffff0000) >> 16;
- a1b1 = ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b1]];
-
- d32[i] = ((ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b0]] ^ a1b1) |
- ((ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b0]] ^ ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b1]] ^
- ltd->antilog_tbl[ltd->log_tbl[a1b1] + ltd->log_tbl[GF_S_GF_16_2]]) << 16));
- }
- }
-}
-
-static
-void
-gf_w32_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
-{
- unsigned long uls, uld;
- gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
- int i=0;
- struct gf_w16_logtable_data * ltd;
- uint16_t b0 = val & 0x0000ffff;
- uint16_t b1 = (val & 0xffff0000) >> 16;
- uint32_t *s32 = (uint32_t *) src;
- uint32_t *d32 = (uint32_t *) dest;
- uint16_t a0, a1, a1b1;
- int num_syms = bytes >> 2;
- int sym_divisible = bytes % 4;
-
- uls = (unsigned long) src;
- uld = (unsigned long) dest;
- if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w32_buf_const_log", 2);
- if (sym_divisible) {
- gf_alignment_error("gf_w32_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2);
- }
-
- if (val == 0) {
- if (xor) return;
- bzero(dest, bytes);
- return;
- }
-
- ltd = (struct gf_w16_logtable_data *) h->private;
-
- if (xor) {
- for (i = 0;i < num_syms; i++) {
- a0 = s32[i] & 0x0000ffff;
- a1 = (s32[i] & 0xffff0000) >> 16;
- a1b1 = base_gf->multiply.w16(base_gf, a1, b1);
-
- d32[i] ^= ((base_gf->multiply.w16(base_gf, a0, b0) ^ a1b1) |
- ((base_gf->multiply.w16(base_gf, a1, b0) ^ base_gf->multiply.w16(base_gf, a0, b1) ^ base_gf->multiply.w16(base_gf, a1b1, GF_S_GF_16_2)) << 16));
-
- }
- } else {
- for (i = 0;i < num_syms; i++) {
- a0 = s32[i] & 0x0000ffff;
- a1 = (s32[i] & 0xffff0000) >> 16;
- a1b1 = base_gf->multiply.w16(base_gf, a1, b1);
-
- d32[i] = ((base_gf->multiply.w16(base_gf, a0, b0) ^ a1b1) |
- ((base_gf->multiply.w16(base_gf, a1, b0) ^ base_gf->multiply.w16(base_gf, a0, b1) ^ base_gf->multiply.w16(base_gf, a1b1, GF_S_GF_16_2)) << 16));
- }
- }
-}
-
-
-
-static
-void
-gf_w32_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
-{
- gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
- gf_val_16_t val0 = val & 0x0000ffff;
- gf_val_16_t val1 = (val & 0xffff0000) >> 16;
- int sub_reg_size = bytes / 2;
-
- if (bytes % 2 != 0) gf_alignment_error("gf_w32_composite_multiply_region_alt", 1);
- if (sub_reg_size % 2 != 0) gf_alignment_error("gf_w32_composite_multiply_region_alt", 1);
-
- if (!xor) {
- memset(dest, 0, bytes);
- }
-
- base_gf->multiply_region.w16(base_gf, src, dest, val0, sub_reg_size, xor);
- base_gf->multiply_region.w16(base_gf, src+sub_reg_size, dest, val1, sub_reg_size, 1);
- base_gf->multiply_region.w16(base_gf, src, dest+sub_reg_size, val1, sub_reg_size, xor);
- base_gf->multiply_region.w16(base_gf, src+sub_reg_size, dest+sub_reg_size, val0, sub_reg_size, 1);
- base_gf->multiply_region.w16(base_gf, src+sub_reg_size, dest+sub_reg_size, base_gf->multiply.w16(base_gf, GF_S_GF_16_2, val1), sub_reg_size, 1);
-}
-
-static
-int gf_w32_composite_init(gf_t *gf)
-{
- struct gf_w16_logtable_data *ltd;
- gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
- gf_val_32_t a, b;
- uint64_t prim_poly = ((gf_internal_t *) base_gf->scratch)->prim_poly;
- int i;
-
- ltd = (struct gf_w16_logtable_data *) h->private;
-
- ltd->log_tbl[0] = 0;
-
- bzero(&(ltd->_antilog_tbl[0]), sizeof(ltd->_antilog_tbl));
-
- ltd->antilog_tbl = &(ltd->_antilog_tbl[GF_BASE_FIELD_SIZE * 2]);
-
- b = 1;
- for (i = 0; i < GF_BASE_FIELD_GROUP_SIZE; i++) {
- ltd->log_tbl[b] = (gf_val_16_t)i;
- ltd->antilog_tbl[i] = (gf_val_16_t)b;
- ltd->antilog_tbl[i+GF_BASE_FIELD_GROUP_SIZE] = (gf_val_16_t)b;
- b <<= 1;
- if (b & GF_BASE_FIELD_SIZE) {
- b = b ^ prim_poly;
- }
- }
- ltd->inv_tbl[0] = 0; /* Not really, but we need to fill it with something */
- ltd->inv_tbl[1] = 1;
- for (i = 2; i < GF_BASE_FIELD_SIZE; i++) {
- ltd->inv_tbl[i] = ltd->antilog_tbl[GF_BASE_FIELD_GROUP_SIZE-ltd->log_tbl[i]];
- }
-
- if (h->region_type & GF_REGION_ALTMAP) {
- gf->multiply_region.w32 = gf_w32_composite_multiply_region_alt;
- } else {
- if (h->region_type & GF_REGION_SINGLE_TABLE) {
- gf->multiply_region.w32 = gf_w32_composite_multiply_region_table;
- } else {
- gf->multiply_region.w32 = gf_w32_composite_multiply_region;
- }
- }
-
- gf->multiply.w32 = gf_w32_composite_multiply;
- gf->divide.w32 = gf_w32_composite_divide;
- gf->inverse.w32 = gf_w32_composite_inverse;
-
- return 1;
-}
-
-int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
-{
- int ss;
-
- ss = (GF_REGION_SSE | GF_REGION_NOSSE);
- switch(mult_type)
- {
- case GF_MULT_SPLIT_TABLE:
- if (arg1 == 8 && arg2 == 8){
- if (region_type != GF_REGION_DEFAULT) return -1;
- return sizeof(gf_internal_t) + sizeof(struct gf_split_8_8_data) + 64;
- }
- if ((arg1 == 2 && arg2 == 32) || (arg2 == 2 && arg1 == 32)) {
- region_type &= (~GF_REGION_LAZY);
- if ((region_type & ss) == ss) return -1;
- if ((region_type | ss) != ss) return -1;
- return sizeof(gf_internal_t) + sizeof(struct gf_split_2_32_lazy_data) + 64;
- }
- if ((arg1 == 4 && arg2 == 32) || (arg2 == 4 && arg1 == 32)) {
- region_type &= (~GF_REGION_LAZY);
- if (region_type & GF_REGION_ALTMAP) {
- region_type &= (~GF_REGION_ALTMAP);
- if ((region_type & ss) == ss) return -1;
- if ((region_type | ss) != ss) return -1;
- return sizeof(gf_internal_t) + sizeof(struct gf_split_4_32_lazy_data) + 64;
- } else return -1;
- }
- return -1;
- case GF_MULT_DEFAULT:
- case GF_MULT_SHIFT:
- if (arg1 != 0 || arg2 != 0 || region_type != 0) return -1;
- return sizeof(gf_internal_t);
- break;
- case GF_MULT_COMPOSITE:
- if (region_type & ~(GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP | GF_REGION_STDMAP)) return -1;
- if ((region_type & (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) == (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) return -1;
- if (arg1 == 2 && arg2 == 16 || arg2 == 2 && arg1 == 16) {
- return sizeof(gf_internal_t) + sizeof(struct gf_w16_logtable_data) + 64;
- } else {
- return -1;
- }
- default:
- return -1;
- }
-}
-
-int gf_w32_init(gf_t *gf)
-{
- gf_internal_t *h;
-
- h = (gf_internal_t *) gf->scratch;
- if (h->prim_poly == 0) h->prim_poly = 0x400007;
-
- gf->multiply.w32 = NULL;
- gf->divide.w32 = NULL;
- gf->inverse.w32 = NULL;
- gf->multiply_region.w32 = NULL;
-
- switch(h->mult_type) {
- case GF_MULT_DEFAULT:
- case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break;
- case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break;
- case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break;
- default: return 0;
- }
- if (h->divide_type == GF_DIVIDE_EUCLID) {
- gf->divide.w32 = gf_w32_divide_from_inverse;
- gf->inverse.w32 = gf_w32_euclid;
- } else if (h->divide_type == GF_DIVIDE_MATRIX) {
- gf->divide.w32 = gf_w32_divide_from_inverse;
- gf->inverse.w32 = gf_w32_matrix;
- }
-
- if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) {
- gf->divide.w32 = gf_w32_divide_from_inverse;
- }
- if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) {
- gf->inverse.w32 = gf_w32_inverse_from_divide;
- }
- return 1;
-}
diff --git a/junk-w32-single-time.c b/junk-w32-single-time.c
deleted file mode 100644
index ab406b7..0000000
--- a/junk-w32-single-time.c
+++ /dev/null
@@ -1,16 +0,0 @@
-echo "SHIFT" `gf_time 32 M 0 10240 10240 SHIFT - - | tail -n 1`
-echo "GROUP 2 4" `gf_time 32 M 0 10240 10240 GROUP 2 4 - - | tail -n 1`
-echo "GROUP 3 4" `gf_time 32 M 0 10240 10240 GROUP 3 4 - - | tail -n 1`
-echo "GROUP 4 4" `gf_time 32 M 0 10240 10240 GROUP 4 4 - - | tail -n 1`
-echo "GROUP 2 8" `gf_time 32 M 0 10240 10240 GROUP 2 8 - - | tail -n 1`
-echo "GROUP 3 8" `gf_time 32 M 0 10240 10240 GROUP 3 8 - - | tail -n 1`
-echo "GROUP 4 8" `gf_time 32 M 0 10240 10240 GROUP 4 8 - - | tail -n 1`
-echo "GROUP 2 2" `gf_time 32 M 0 10240 10240 GROUP 2 2 - - | tail -n 1`
-echo "GROUP 3 3" `gf_time 32 M 0 10240 10240 GROUP 3 3 - - | tail -n 1`
-echo "BYTWO_p" `gf_time 32 M 0 10240 10240 BYTWO_p - - | tail -n 1`
-echo "BYTWO_b" `gf_time 32 M 0 10240 10240 BYTWO_b - - | tail -n 1`
-echo "SPLIT 32 2" `gf_time 32 M 0 10240 10240 SPLIT 32 2 - - | tail -n 1`
-echo "SPLIT 32 4" `gf_time 32 M 0 10240 10240 SPLIT 32 4 - - | tail -n 1`
-echo "SPLIT 32 8" `gf_time 32 M 0 10240 10240 SPLIT 32 8 - - | tail -n 1`
-echo "SPLIT 8 8" `gf_time 32 M 0 10240 10240 SPLIT 8 8 - - | tail -n 1`
-echo "COMPOSITE 2 16 -" `gf_time 32 M 0 10240 10240 COMPOSITE 2 16 - - - | tail -n 1`
diff --git a/junk-w4-out.txt b/junk-w4-out.txt
deleted file mode 100644
index 60efcdc..0000000
--- a/junk-w4-out.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-Seed: 1345648646
-Buffer-Const,s!=d,xor=0: 1.005451 s 971.268 MB/s
-Buffer-Const,s!=d,xor=1: 1.029715 s 948.382 MB/s
-Buffer-Const,s==d,xor=0: 0.989556 s 986.869 MB/s
-Buffer-Const,s==d,xor=1: 1.026105 s 951.718 MB/s
-BYTWO_p
-Seed: 1345648655
-Buffer-Const,s!=d,xor=0: 0.603574 s 1617.966 MB/s
-Buffer-Const,s!=d,xor=1: 0.612757 s 1593.720 MB/s
-Buffer-Const,s==d,xor=0: 0.599630 s 1628.609 MB/s
-Buffer-Const,s==d,xor=1: 0.622749 s 1568.149 MB/s
-BYTWO_p SSE
-Seed: 1345648662
-Buffer-Const,s!=d,xor=0: 0.487348 s 2003.831 MB/s
-Buffer-Const,s!=d,xor=1: 0.488745 s 1998.100 MB/s
-Buffer-Const,s==d,xor=0: 0.470528 s 2075.463 MB/s
-Buffer-Const,s==d,xor=1: 0.480067 s 2034.223 MB/s
-BYTWO_b
-Seed: 1345648669
-Buffer-Const,s!=d,xor=0: 0.359088 s 2719.564 MB/s
-Buffer-Const,s!=d,xor=1: 0.365816 s 2669.543 MB/s
-Buffer-Const,s==d,xor=0: 0.361701 s 2699.920 MB/s
-Buffer-Const,s==d,xor=1: 0.354540 s 2754.449 MB/s
-BYTWO_b SSE
-Seed: 1345648689
-Buffer-Const,s!=d,xor=0: 2.036338 s 479.568 MB/s
-Buffer-Const,s!=d,xor=1: 2.237701 s 436.413 MB/s
-Buffer-Const,s==d,xor=0: 2.048971 s 476.611 MB/s
-Buffer-Const,s==d,xor=1: 2.229312 s 438.056 MB/s
-TABLE SINGLE
-Seed: 1345648703
-Buffer-Const,s!=d,xor=0: 1.074082 s 909.207 MB/s
-Buffer-Const,s!=d,xor=1: 1.083797 s 901.057 MB/s
-Buffer-Const,s==d,xor=0: 1.077001 s 906.743 MB/s
-Buffer-Const,s==d,xor=1: 1.079369 s 904.753 MB/s
-TABLE DOUBLE
-Seed: 1345648712
-Buffer-Const,s!=d,xor=0: 0.743830 s 1312.884 MB/s
-Buffer-Const,s!=d,xor=1: 0.760719 s 1283.736 MB/s
-Buffer-Const,s==d,xor=0: 0.708908 s 1377.559 MB/s
-Buffer-Const,s==d,xor=1: 0.727896 s 1341.624 MB/s
-TABLE QUAD
-Seed: 1345648720
-Buffer-Const,s!=d,xor=0: 0.898810 s 1086.506 MB/s
-Buffer-Const,s!=d,xor=1: 0.876269 s 1114.455 MB/s
-Buffer-Const,s==d,xor=0: 0.872698 s 1119.015 MB/s
-Buffer-Const,s==d,xor=1: 0.873175 s 1118.404 MB/s
-TABLE QUAD,LAZY
-Seed: 1345648729
-Buffer-Const,s!=d,xor=0: 0.143798 s 6791.205 MB/s
-Buffer-Const,s!=d,xor=1: 0.151166 s 6460.201 MB/s
-Buffer-Const,s==d,xor=0: 0.123824 s 7886.721 MB/s
-Buffer-Const,s==d,xor=1: 0.123538 s 7904.940 MB/s
-TABLE SINGLE,SSE
-Seed: 1345648748
-Buffer-Const,s!=d,xor=0: 4.562493 s 214.041 MB/s
-Buffer-Const,s!=d,xor=1: 5.116838 s 190.853 MB/s
-Buffer-Const,s==d,xor=0: 4.533105 s 215.429 MB/s
-Buffer-Const,s==d,xor=1: 5.053730 s 193.236 MB/s
-LOG
diff --git a/junk-w4-timing-out.txt b/junk-w4-timing-out.txt
deleted file mode 100644
index 6daadd4..0000000
--- a/junk-w4-timing-out.txt
+++ /dev/null
@@ -1,792 +0,0 @@
-Seed: 1352748099
-Buffer-Const,s!=d,xor=0: 0.608121 s 210.484 MB/s
-Buffer-Const,s!=d,xor=1: 0.692329 s 184.883 MB/s
-1024 131072 4 LOG - -
-Seed: 1352748102
-Buffer-Const,s!=d,xor=0: 0.699226 s 183.060 MB/s
-Buffer-Const,s!=d,xor=1: 0.687310 s 186.233 MB/s
-2048 65536 4 LOG - -
-Seed: 1352748106
-Buffer-Const,s!=d,xor=0: 0.604397 s 211.781 MB/s
-Buffer-Const,s!=d,xor=1: 0.682591 s 187.521 MB/s
-4096 32768 4 LOG - -
-Seed: 1352748109
-Buffer-Const,s!=d,xor=0: 0.602384 s 212.489 MB/s
-Buffer-Const,s!=d,xor=1: 0.678849 s 188.555 MB/s
-8192 16384 4 LOG - -
-Seed: 1352748112
-Buffer-Const,s!=d,xor=0: 0.602103 s 212.588 MB/s
-Buffer-Const,s!=d,xor=1: 0.688450 s 185.925 MB/s
-16384 8192 4 LOG - -
-Seed: 1352748115
-Buffer-Const,s!=d,xor=0: 0.598464 s 213.881 MB/s
-Buffer-Const,s!=d,xor=1: 0.676076 s 189.328 MB/s
-32768 4096 4 LOG - -
-Seed: 1352748119
-Buffer-Const,s!=d,xor=0: 0.611499 s 209.322 MB/s
-Buffer-Const,s!=d,xor=1: 0.693351 s 184.611 MB/s
-65536 2048 4 LOG - -
-Seed: 1352748122
-Buffer-Const,s!=d,xor=0: 0.609786 s 209.910 MB/s
-Buffer-Const,s!=d,xor=1: 0.689794 s 185.563 MB/s
-131072 1024 4 LOG - -
-Seed: 1352748125
-Buffer-Const,s!=d,xor=0: 0.619027 s 206.776 MB/s
-Buffer-Const,s!=d,xor=1: 0.703627 s 181.915 MB/s
-262144 512 4 LOG - -
-Seed: 1352748129
-Buffer-Const,s!=d,xor=0: 0.605785 s 211.296 MB/s
-Buffer-Const,s!=d,xor=1: 0.696728 s 183.716 MB/s
-524288 256 4 LOG - -
-Seed: 1352748132
-Buffer-Const,s!=d,xor=0: 0.591555 s 216.379 MB/s
-Buffer-Const,s!=d,xor=1: 0.666735 s 191.980 MB/s
-1048576 128 4 LOG - -
-Seed: 1352748135
-Buffer-Const,s!=d,xor=0: 0.623167 s 205.403 MB/s
-Buffer-Const,s!=d,xor=1: 0.675010 s 189.627 MB/s
-2097152 64 4 LOG - -
-Seed: 1352748138
-Buffer-Const,s!=d,xor=0: 0.572467 s 223.594 MB/s
-Buffer-Const,s!=d,xor=1: 0.733714 s 174.455 MB/s
-4194304 32 4 LOG - -
-Seed: 1352748142
-Buffer-Const,s!=d,xor=0: 0.617676 s 207.228 MB/s
-Buffer-Const,s!=d,xor=1: 0.742744 s 172.334 MB/s
-8388608 16 4 LOG - -
-Seed: 1352748145
-Buffer-Const,s!=d,xor=0: 0.579833 s 220.753 MB/s
-Buffer-Const,s!=d,xor=1: 0.736355 s 173.829 MB/s
-16777216 8 4 LOG - -
-Seed: 1352748148
-Buffer-Const,s!=d,xor=0: 0.682980 s 187.414 MB/s
-Buffer-Const,s!=d,xor=1: 0.738846 s 173.243 MB/s
-33554432 4 4 LOG - -
-Seed: 1352748152
-Buffer-Const,s!=d,xor=0: 0.692141 s 184.933 MB/s
-Buffer-Const,s!=d,xor=1: 0.725968 s 176.316 MB/s
-67108864 2 4 LOG - -
-Seed: 1352748155
-Buffer-Const,s!=d,xor=0: 0.737346 s 173.596 MB/s
-Buffer-Const,s!=d,xor=1: 0.725769 s 176.365 MB/s
-134217728 1 4 LOG - -
-Seed: 1352748159
-Buffer-Const,s!=d,xor=0: 0.252694 s 506.541 MB/s
-Buffer-Const,s!=d,xor=1: 0.280102 s 456.976 MB/s
-1024 131072 4 TABLE SINGLE -
-Seed: 1352748160
-Buffer-Const,s!=d,xor=0: 0.246866 s 518.501 MB/s
-Buffer-Const,s!=d,xor=1: 0.276830 s 462.377 MB/s
-2048 65536 4 TABLE SINGLE -
-Seed: 1352748162
-Buffer-Const,s!=d,xor=0: 0.246874 s 518.482 MB/s
-Buffer-Const,s!=d,xor=1: 0.274016 s 467.125 MB/s
-4096 32768 4 TABLE SINGLE -
-Seed: 1352748164
-Buffer-Const,s!=d,xor=0: 0.247869 s 516.402 MB/s
-Buffer-Const,s!=d,xor=1: 0.271679 s 471.144 MB/s
-8192 16384 4 TABLE SINGLE -
-Seed: 1352748166
-Buffer-Const,s!=d,xor=0: 0.244581 s 523.345 MB/s
-Buffer-Const,s!=d,xor=1: 0.270779 s 472.710 MB/s
-16384 8192 4 TABLE SINGLE -
-Seed: 1352748167
-Buffer-Const,s!=d,xor=0: 0.256167 s 499.675 MB/s
-Buffer-Const,s!=d,xor=1: 0.278188 s 460.121 MB/s
-32768 4096 4 TABLE SINGLE -
-Seed: 1352748169
-Buffer-Const,s!=d,xor=0: 0.248786 s 514.498 MB/s
-Buffer-Const,s!=d,xor=1: 0.305109 s 419.522 MB/s
-65536 2048 4 TABLE SINGLE -
-Seed: 1352748171
-Buffer-Const,s!=d,xor=0: 0.249003 s 514.050 MB/s
-Buffer-Const,s!=d,xor=1: 0.276043 s 463.696 MB/s
-131072 1024 4 TABLE SINGLE -
-Seed: 1352748173
-Buffer-Const,s!=d,xor=0: 0.249019 s 514.016 MB/s
-Buffer-Const,s!=d,xor=1: 0.278464 s 459.665 MB/s
-262144 512 4 TABLE SINGLE -
-Seed: 1352748174
-Buffer-Const,s!=d,xor=0: 0.257905 s 496.308 MB/s
-Buffer-Const,s!=d,xor=1: 0.266241 s 480.767 MB/s
-524288 256 4 TABLE SINGLE -
-Seed: 1352748176
-Buffer-Const,s!=d,xor=0: 0.254655 s 502.641 MB/s
-Buffer-Const,s!=d,xor=1: 0.267730 s 478.093 MB/s
-1048576 128 4 TABLE SINGLE -
-Seed: 1352748178
-Buffer-Const,s!=d,xor=0: 0.264532 s 483.874 MB/s
-Buffer-Const,s!=d,xor=1: 0.270533 s 473.140 MB/s
-2097152 64 4 TABLE SINGLE -
-Seed: 1352748180
-Buffer-Const,s!=d,xor=0: 0.249658 s 512.702 MB/s
-Buffer-Const,s!=d,xor=1: 0.265106 s 482.826 MB/s
-4194304 32 4 TABLE SINGLE -
-Seed: 1352748181
-Buffer-Const,s!=d,xor=0: 0.244030 s 524.527 MB/s
-Buffer-Const,s!=d,xor=1: 0.301052 s 425.176 MB/s
-8388608 16 4 TABLE SINGLE -
-Seed: 1352748183
-Buffer-Const,s!=d,xor=0: 0.263009 s 486.676 MB/s
-Buffer-Const,s!=d,xor=1: 0.270075 s 473.943 MB/s
-16777216 8 4 TABLE SINGLE -
-Seed: 1352748185
-Buffer-Const,s!=d,xor=0: 0.318133 s 402.348 MB/s
-Buffer-Const,s!=d,xor=1: 0.315726 s 405.415 MB/s
-33554432 4 4 TABLE SINGLE -
-Seed: 1352748187
-Buffer-Const,s!=d,xor=0: 0.329082 s 388.961 MB/s
-Buffer-Const,s!=d,xor=1: 0.303774 s 421.366 MB/s
-67108864 2 4 TABLE SINGLE -
-Seed: 1352748189
-Buffer-Const,s!=d,xor=0: 0.373282 s 342.904 MB/s
-Buffer-Const,s!=d,xor=1: 0.299255 s 427.729 MB/s
-134217728 1 4 TABLE SINGLE -
-Seed: 1352748191
-Buffer-Const,s!=d,xor=0: 0.026432 s 4842.652 MB/s
-Buffer-Const,s!=d,xor=1: 0.028027 s 4566.976 MB/s
-1024 131072 4 TABLE SINGLE,SSE -
-Seed: 1352748192
-Buffer-Const,s!=d,xor=0: 0.020923 s 6117.629 MB/s
-Buffer-Const,s!=d,xor=1: 0.021753 s 5884.226 MB/s
-2048 65536 4 TABLE SINGLE,SSE -
-Seed: 1352748193
-Buffer-Const,s!=d,xor=0: 0.017533 s 7300.592 MB/s
-Buffer-Const,s!=d,xor=1: 0.018308 s 6991.599 MB/s
-4096 32768 4 TABLE SINGLE,SSE -
-Seed: 1352748193
-Buffer-Const,s!=d,xor=0: 0.016224 s 7889.591 MB/s
-Buffer-Const,s!=d,xor=1: 0.016537 s 7740.353 MB/s
-8192 16384 4 TABLE SINGLE,SSE -
-Seed: 1352748194
-Buffer-Const,s!=d,xor=0: 0.015627 s 8191.000 MB/s
-Buffer-Const,s!=d,xor=1: 0.016160 s 7921.020 MB/s
-16384 8192 4 TABLE SINGLE,SSE -
-Seed: 1352748195
-Buffer-Const,s!=d,xor=0: 0.015679 s 8163.599 MB/s
-Buffer-Const,s!=d,xor=1: 0.016548 s 7735.000 MB/s
-32768 4096 4 TABLE SINGLE,SSE -
-Seed: 1352748196
-Buffer-Const,s!=d,xor=0: 0.016351 s 7828.046 MB/s
-Buffer-Const,s!=d,xor=1: 0.017147 s 7464.939 MB/s
-65536 2048 4 TABLE SINGLE,SSE -
-Seed: 1352748196
-Buffer-Const,s!=d,xor=0: 0.015204 s 8418.863 MB/s
-Buffer-Const,s!=d,xor=1: 0.016621 s 7701.049 MB/s
-131072 1024 4 TABLE SINGLE,SSE -
-Seed: 1352748197
-Buffer-Const,s!=d,xor=0: 0.019366 s 6609.594 MB/s
-Buffer-Const,s!=d,xor=1: 0.020611 s 6210.405 MB/s
-262144 512 4 TABLE SINGLE,SSE -
-Seed: 1352748198
-Buffer-Const,s!=d,xor=0: 0.019287 s 6636.721 MB/s
-Buffer-Const,s!=d,xor=1: 0.020470 s 6253.155 MB/s
-524288 256 4 TABLE SINGLE,SSE -
-Seed: 1352748199
-Buffer-Const,s!=d,xor=0: 0.019210 s 6663.244 MB/s
-Buffer-Const,s!=d,xor=1: 0.021175 s 6044.754 MB/s
-1048576 128 4 TABLE SINGLE,SSE -
-Seed: 1352748199
-Buffer-Const,s!=d,xor=0: 0.035533 s 3602.314 MB/s
-Buffer-Const,s!=d,xor=1: 0.032351 s 3956.628 MB/s
-2097152 64 4 TABLE SINGLE,SSE -
-Seed: 1352748200
-Buffer-Const,s!=d,xor=0: 0.048733 s 2626.557 MB/s
-Buffer-Const,s!=d,xor=1: 0.044163 s 2898.370 MB/s
-4194304 32 4 TABLE SINGLE,SSE -
-Seed: 1352748201
-Buffer-Const,s!=d,xor=0: 0.051737 s 2474.071 MB/s
-Buffer-Const,s!=d,xor=1: 0.048826 s 2621.555 MB/s
-8388608 16 4 TABLE SINGLE,SSE -
-Seed: 1352748202
-Buffer-Const,s!=d,xor=0: 0.056330 s 2272.306 MB/s
-Buffer-Const,s!=d,xor=1: 0.029557 s 4330.617 MB/s
-16777216 8 4 TABLE SINGLE,SSE -
-Seed: 1352748203
-Buffer-Const,s!=d,xor=0: 0.066551 s 1923.338 MB/s
-Buffer-Const,s!=d,xor=1: 0.037378 s 3424.489 MB/s
-33554432 4 4 TABLE SINGLE,SSE -
-Seed: 1352748203
-Buffer-Const,s!=d,xor=0: 0.082171 s 1557.728 MB/s
-Buffer-Const,s!=d,xor=1: 0.048228 s 2654.058 MB/s
-67108864 2 4 TABLE SINGLE,SSE -
-Seed: 1352748204
-Buffer-Const,s!=d,xor=0: 0.125187 s 1022.469 MB/s
-Buffer-Const,s!=d,xor=1: 0.047497 s 2694.905 MB/s
-134217728 1 4 TABLE SINGLE,SSE -
-Seed: 1352748205
-Buffer-Const,s!=d,xor=0: 0.151542 s 844.651 MB/s
-Buffer-Const,s!=d,xor=1: 0.153138 s 835.847 MB/s
-1024 131072 4 TABLE DOUBLE -
-Seed: 1352748207
-Buffer-Const,s!=d,xor=0: 0.146267 s 875.111 MB/s
-Buffer-Const,s!=d,xor=1: 0.150025 s 853.189 MB/s
-2048 65536 4 TABLE DOUBLE -
-Seed: 1352748208
-Buffer-Const,s!=d,xor=0: 0.145038 s 882.529 MB/s
-Buffer-Const,s!=d,xor=1: 0.146365 s 874.525 MB/s
-4096 32768 4 TABLE DOUBLE -
-Seed: 1352748209
-Buffer-Const,s!=d,xor=0: 0.142601 s 897.608 MB/s
-Buffer-Const,s!=d,xor=1: 0.144650 s 884.893 MB/s
-8192 16384 4 TABLE DOUBLE -
-Seed: 1352748211
-Buffer-Const,s!=d,xor=0: 0.141861 s 902.293 MB/s
-Buffer-Const,s!=d,xor=1: 0.142722 s 896.848 MB/s
-16384 8192 4 TABLE DOUBLE -
-Seed: 1352748212
-Buffer-Const,s!=d,xor=0: 0.140131 s 913.433 MB/s
-Buffer-Const,s!=d,xor=1: 0.143035 s 894.888 MB/s
-32768 4096 4 TABLE DOUBLE -
-Seed: 1352748213
-Buffer-Const,s!=d,xor=0: 0.141368 s 905.436 MB/s
-Buffer-Const,s!=d,xor=1: 0.142083 s 900.879 MB/s
-65536 2048 4 TABLE DOUBLE -
-Seed: 1352748214
-Buffer-Const,s!=d,xor=0: 0.144412 s 886.351 MB/s
-Buffer-Const,s!=d,xor=1: 0.145837 s 877.693 MB/s
-131072 1024 4 TABLE DOUBLE -
-Seed: 1352748216
-Buffer-Const,s!=d,xor=0: 0.141466 s 904.810 MB/s
-Buffer-Const,s!=d,xor=1: 0.146338 s 874.686 MB/s
-262144 512 4 TABLE DOUBLE -
-Seed: 1352748217
-Buffer-Const,s!=d,xor=0: 0.141775 s 902.837 MB/s
-Buffer-Const,s!=d,xor=1: 0.143733 s 890.543 MB/s
-524288 256 4 TABLE DOUBLE -
-Seed: 1352748218
-Buffer-Const,s!=d,xor=0: 0.144309 s 886.984 MB/s
-Buffer-Const,s!=d,xor=1: 0.145978 s 876.843 MB/s
-1048576 128 4 TABLE DOUBLE -
-Seed: 1352748219
-Buffer-Const,s!=d,xor=0: 0.145523 s 879.584 MB/s
-Buffer-Const,s!=d,xor=1: 0.152104 s 841.530 MB/s
-2097152 64 4 TABLE DOUBLE -
-Seed: 1352748221
-Buffer-Const,s!=d,xor=0: 0.150421 s 850.944 MB/s
-Buffer-Const,s!=d,xor=1: 0.154586 s 828.018 MB/s
-4194304 32 4 TABLE DOUBLE -
-Seed: 1352748222
-Buffer-Const,s!=d,xor=0: 0.151304 s 845.978 MB/s
-Buffer-Const,s!=d,xor=1: 0.151530 s 844.720 MB/s
-8388608 16 4 TABLE DOUBLE -
-Seed: 1352748223
-Buffer-Const,s!=d,xor=0: 0.160126 s 799.369 MB/s
-Buffer-Const,s!=d,xor=1: 0.151316 s 845.910 MB/s
-16777216 8 4 TABLE DOUBLE -
-Seed: 1352748224
-Buffer-Const,s!=d,xor=0: 0.167688 s 763.323 MB/s
-Buffer-Const,s!=d,xor=1: 0.152321 s 840.331 MB/s
-33554432 4 4 TABLE DOUBLE -
-Seed: 1352748226
-Buffer-Const,s!=d,xor=0: 0.194515 s 658.047 MB/s
-Buffer-Const,s!=d,xor=1: 0.149023 s 858.929 MB/s
-67108864 2 4 TABLE DOUBLE -
-Seed: 1352748227
-Buffer-Const,s!=d,xor=0: 0.237898 s 538.046 MB/s
-Buffer-Const,s!=d,xor=1: 0.148526 s 861.802 MB/s
-134217728 1 4 TABLE DOUBLE -
-Seed: 1352748229
-Buffer-Const,s!=d,xor=0: 0.151483 s 844.979 MB/s
-Buffer-Const,s!=d,xor=1: 0.153012 s 836.535 MB/s
-1024 131072 4 TABLE DOUBLE -
-Seed: 1352748230
-Buffer-Const,s!=d,xor=0: 0.146577 s 873.259 MB/s
-Buffer-Const,s!=d,xor=1: 0.146274 s 875.070 MB/s
-2048 65536 4 TABLE DOUBLE -
-Seed: 1352748231
-Buffer-Const,s!=d,xor=0: 0.145069 s 882.341 MB/s
-Buffer-Const,s!=d,xor=1: 0.143911 s 889.436 MB/s
-4096 32768 4 TABLE DOUBLE -
-Seed: 1352748233
-Buffer-Const,s!=d,xor=0: 0.143011 s 895.035 MB/s
-Buffer-Const,s!=d,xor=1: 0.142096 s 900.798 MB/s
-8192 16384 4 TABLE DOUBLE -
-Seed: 1352748234
-Buffer-Const,s!=d,xor=0: 0.142743 s 896.719 MB/s
-Buffer-Const,s!=d,xor=1: 0.142004 s 901.383 MB/s
-16384 8192 4 TABLE DOUBLE -
-Seed: 1352748235
-Buffer-Const,s!=d,xor=0: 0.141290 s 905.940 MB/s
-Buffer-Const,s!=d,xor=1: 0.142891 s 895.785 MB/s
-32768 4096 4 TABLE DOUBLE -
-Seed: 1352748236
-Buffer-Const,s!=d,xor=0: 0.141509 s 904.534 MB/s
-Buffer-Const,s!=d,xor=1: 0.142357 s 899.150 MB/s
-65536 2048 4 TABLE DOUBLE -
-Seed: 1352748237
-Buffer-Const,s!=d,xor=0: 0.141353 s 905.532 MB/s
-Buffer-Const,s!=d,xor=1: 0.147224 s 869.422 MB/s
-131072 1024 4 TABLE DOUBLE -
-Seed: 1352748239
-Buffer-Const,s!=d,xor=0: 0.142758 s 896.623 MB/s
-Buffer-Const,s!=d,xor=1: 0.144537 s 885.585 MB/s
-262144 512 4 TABLE DOUBLE -
-Seed: 1352748240
-Buffer-Const,s!=d,xor=0: 0.141772 s 902.858 MB/s
-Buffer-Const,s!=d,xor=1: 0.145832 s 877.723 MB/s
-524288 256 4 TABLE DOUBLE -
-Seed: 1352748241
-Buffer-Const,s!=d,xor=0: 0.142111 s 900.705 MB/s
-Buffer-Const,s!=d,xor=1: 0.143957 s 889.155 MB/s
-1048576 128 4 TABLE DOUBLE -
-Seed: 1352748242
-Buffer-Const,s!=d,xor=0: 0.144863 s 883.596 MB/s
-Buffer-Const,s!=d,xor=1: 0.148948 s 859.359 MB/s
-2097152 64 4 TABLE DOUBLE -
-Seed: 1352748244
-Buffer-Const,s!=d,xor=0: 0.150453 s 850.766 MB/s
-Buffer-Const,s!=d,xor=1: 0.151897 s 842.677 MB/s
-4194304 32 4 TABLE DOUBLE -
-Seed: 1352748245
-Buffer-Const,s!=d,xor=0: 0.152495 s 839.371 MB/s
-Buffer-Const,s!=d,xor=1: 0.153424 s 834.289 MB/s
-8388608 16 4 TABLE DOUBLE -
-Seed: 1352748246
-Buffer-Const,s!=d,xor=0: 0.159227 s 803.886 MB/s
-Buffer-Const,s!=d,xor=1: 0.151101 s 847.118 MB/s
-16777216 8 4 TABLE DOUBLE -
-Seed: 1352748248
-Buffer-Const,s!=d,xor=0: 0.167903 s 762.344 MB/s
-Buffer-Const,s!=d,xor=1: 0.152000 s 842.106 MB/s
-33554432 4 4 TABLE DOUBLE -
-Seed: 1352748249
-Buffer-Const,s!=d,xor=0: 0.193370 s 661.943 MB/s
-Buffer-Const,s!=d,xor=1: 0.153193 s 835.547 MB/s
-67108864 2 4 TABLE DOUBLE -
-Seed: 1352748250
-Buffer-Const,s!=d,xor=0: 0.241834 s 529.288 MB/s
-Buffer-Const,s!=d,xor=1: 0.150811 s 848.745 MB/s
-134217728 1 4 TABLE DOUBLE -
-Seed: 1352748252
-Buffer-Const,s!=d,xor=0: 0.158047 s 809.887 MB/s
-Buffer-Const,s!=d,xor=1: 0.156660 s 817.057 MB/s
-1024 131072 4 TABLE QUAD -
-Seed: 1352748253
-Buffer-Const,s!=d,xor=0: 0.141239 s 906.264 MB/s
-Buffer-Const,s!=d,xor=1: 0.146382 s 874.422 MB/s
-2048 65536 4 TABLE QUAD -
-Seed: 1352748254
-Buffer-Const,s!=d,xor=0: 0.134986 s 948.245 MB/s
-Buffer-Const,s!=d,xor=1: 0.140656 s 910.023 MB/s
-4096 32768 4 TABLE QUAD -
-Seed: 1352748256
-Buffer-Const,s!=d,xor=0: 0.153383 s 834.514 MB/s
-Buffer-Const,s!=d,xor=1: 0.128968 s 992.498 MB/s
-8192 16384 4 TABLE QUAD -
-Seed: 1352748257
-Buffer-Const,s!=d,xor=0: 0.120985 s 1057.984 MB/s
-Buffer-Const,s!=d,xor=1: 0.121486 s 1053.618 MB/s
-16384 8192 4 TABLE QUAD -
-Seed: 1352748258
-Buffer-Const,s!=d,xor=0: 0.113212 s 1130.626 MB/s
-Buffer-Const,s!=d,xor=1: 0.116994 s 1094.076 MB/s
-32768 4096 4 TABLE QUAD -
-Seed: 1352748259
-Buffer-Const,s!=d,xor=0: 0.106910 s 1197.266 MB/s
-Buffer-Const,s!=d,xor=1: 0.109951 s 1164.152 MB/s
-65536 2048 4 TABLE QUAD -
-Seed: 1352748260
-Buffer-Const,s!=d,xor=0: 0.106585 s 1200.916 MB/s
-Buffer-Const,s!=d,xor=1: 0.119656 s 1069.735 MB/s
-131072 1024 4 TABLE QUAD -
-Seed: 1352748261
-Buffer-Const,s!=d,xor=0: 0.108813 s 1176.332 MB/s
-Buffer-Const,s!=d,xor=1: 0.109021 s 1174.081 MB/s
-262144 512 4 TABLE QUAD -
-Seed: 1352748263
-Buffer-Const,s!=d,xor=0: 0.103341 s 1238.614 MB/s
-Buffer-Const,s!=d,xor=1: 0.108952 s 1174.826 MB/s
-524288 256 4 TABLE QUAD -
-Seed: 1352748264
-Buffer-Const,s!=d,xor=0: 0.105469 s 1213.627 MB/s
-Buffer-Const,s!=d,xor=1: 0.110848 s 1154.735 MB/s
-1048576 128 4 TABLE QUAD -
-Seed: 1352748265
-Buffer-Const,s!=d,xor=0: 0.105542 s 1212.785 MB/s
-Buffer-Const,s!=d,xor=1: 0.108646 s 1178.134 MB/s
-2097152 64 4 TABLE QUAD -
-Seed: 1352748266
-Buffer-Const,s!=d,xor=0: 0.106677 s 1199.889 MB/s
-Buffer-Const,s!=d,xor=1: 0.112022 s 1142.631 MB/s
-4194304 32 4 TABLE QUAD -
-Seed: 1352748267
-Buffer-Const,s!=d,xor=0: 0.110966 s 1153.507 MB/s
-Buffer-Const,s!=d,xor=1: 0.100766 s 1270.264 MB/s
-8388608 16 4 TABLE QUAD -
-Seed: 1352748268
-Buffer-Const,s!=d,xor=0: 0.108207 s 1182.915 MB/s
-Buffer-Const,s!=d,xor=1: 0.113488 s 1127.871 MB/s
-16777216 8 4 TABLE QUAD -
-Seed: 1352748269
-Buffer-Const,s!=d,xor=0: 0.129142 s 991.157 MB/s
-Buffer-Const,s!=d,xor=1: 0.110923 s 1153.953 MB/s
-33554432 4 4 TABLE QUAD -
-Seed: 1352748270
-Buffer-Const,s!=d,xor=0: 0.156426 s 818.279 MB/s
-Buffer-Const,s!=d,xor=1: 0.110093 s 1162.652 MB/s
-67108864 2 4 TABLE QUAD -
-Seed: 1352748272
-Buffer-Const,s!=d,xor=0: 0.203508 s 628.967 MB/s
-Buffer-Const,s!=d,xor=1: 0.111907 s 1143.807 MB/s
-134217728 1 4 TABLE QUAD -
-Seed: 1352748273
-Buffer-Const,s!=d,xor=0: 8.741033 s 14.644 MB/s
-Buffer-Const,s!=d,xor=1: 8.972750 s 14.265 MB/s
-1024 131072 4 TABLE QUAD,LAZY -
-Seed: 1352748309
-Buffer-Const,s!=d,xor=0: 4.387740 s 29.172 MB/s
-Buffer-Const,s!=d,xor=1: 4.401799 s 29.079 MB/s
-2048 65536 4 TABLE QUAD,LAZY -
-Seed: 1352748327
-Buffer-Const,s!=d,xor=0: 2.255454 s 56.751 MB/s
-Buffer-Const,s!=d,xor=1: 2.243299 s 57.059 MB/s
-4096 32768 4 TABLE QUAD,LAZY -
-Seed: 1352748337
-Buffer-Const,s!=d,xor=0: 1.166870 s 109.695 MB/s
-Buffer-Const,s!=d,xor=1: 1.180004 s 108.474 MB/s
-8192 16384 4 TABLE QUAD,LAZY -
-Seed: 1352748342
-Buffer-Const,s!=d,xor=0: 0.661613 s 193.467 MB/s
-Buffer-Const,s!=d,xor=1: 0.629827 s 203.230 MB/s
-16384 8192 4 TABLE QUAD,LAZY -
-Seed: 1352748345
-Buffer-Const,s!=d,xor=0: 0.364647 s 351.024 MB/s
-Buffer-Const,s!=d,xor=1: 0.376395 s 340.069 MB/s
-32768 4096 4 TABLE QUAD,LAZY -
-Seed: 1352748348
-Buffer-Const,s!=d,xor=0: 0.226271 s 565.694 MB/s
-Buffer-Const,s!=d,xor=1: 0.234560 s 545.704 MB/s
-65536 2048 4 TABLE QUAD,LAZY -
-Seed: 1352748349
-Buffer-Const,s!=d,xor=0: 0.160475 s 797.630 MB/s
-Buffer-Const,s!=d,xor=1: 0.166329 s 769.561 MB/s
-131072 1024 4 TABLE QUAD,LAZY -
-Seed: 1352748351
-Buffer-Const,s!=d,xor=0: 0.130999 s 977.110 MB/s
-Buffer-Const,s!=d,xor=1: 0.134676 s 950.431 MB/s
-262144 512 4 TABLE QUAD,LAZY -
-Seed: 1352748352
-Buffer-Const,s!=d,xor=0: 0.110626 s 1157.057 MB/s
-Buffer-Const,s!=d,xor=1: 0.118067 s 1084.134 MB/s
-524288 256 4 TABLE QUAD,LAZY -
-Seed: 1352748353
-Buffer-Const,s!=d,xor=0: 0.105213 s 1216.581 MB/s
-Buffer-Const,s!=d,xor=1: 0.109697 s 1166.854 MB/s
-1048576 128 4 TABLE QUAD,LAZY -
-Seed: 1352748354
-Buffer-Const,s!=d,xor=0: 0.107641 s 1189.138 MB/s
-Buffer-Const,s!=d,xor=1: 0.108062 s 1184.502 MB/s
-2097152 64 4 TABLE QUAD,LAZY -
-Seed: 1352748355
-Buffer-Const,s!=d,xor=0: 0.103473 s 1237.035 MB/s
-Buffer-Const,s!=d,xor=1: 0.098362 s 1301.310 MB/s
-4194304 32 4 TABLE QUAD,LAZY -
-Seed: 1352748356
-Buffer-Const,s!=d,xor=0: 0.107058 s 1195.616 MB/s
-Buffer-Const,s!=d,xor=1: 0.097883 s 1307.687 MB/s
-8388608 16 4 TABLE QUAD,LAZY -
-Seed: 1352748357
-Buffer-Const,s!=d,xor=0: 0.116388 s 1099.769 MB/s
-Buffer-Const,s!=d,xor=1: 0.098690 s 1296.990 MB/s
-16777216 8 4 TABLE QUAD,LAZY -
-Seed: 1352748358
-Buffer-Const,s!=d,xor=0: 0.129120 s 991.325 MB/s
-Buffer-Const,s!=d,xor=1: 0.109833 s 1165.403 MB/s
-33554432 4 4 TABLE QUAD,LAZY -
-Seed: 1352748360
-Buffer-Const,s!=d,xor=0: 0.157534 s 812.524 MB/s
-Buffer-Const,s!=d,xor=1: 0.114721 s 1115.750 MB/s
-67108864 2 4 TABLE QUAD,LAZY -
-Seed: 1352748361
-Buffer-Const,s!=d,xor=0: 0.205053 s 624.229 MB/s
-Buffer-Const,s!=d,xor=1: 0.110099 s 1162.589 MB/s
-134217728 1 4 TABLE QUAD,LAZY -
-Seed: 1352748362
-Buffer-Const,s!=d,xor=0: 0.142388 s 898.955 MB/s
-Buffer-Const,s!=d,xor=1: 0.146045 s 876.440 MB/s
-1024 131072 4 BYTWO_p - -
-Seed: 1352748363
-Buffer-Const,s!=d,xor=0: 0.135040 s 947.867 MB/s
-Buffer-Const,s!=d,xor=1: 0.140142 s 913.360 MB/s
-2048 65536 4 BYTWO_p - -
-Seed: 1352748365
-Buffer-Const,s!=d,xor=0: 0.131358 s 974.437 MB/s
-Buffer-Const,s!=d,xor=1: 0.137115 s 933.525 MB/s
-4096 32768 4 BYTWO_p - -
-Seed: 1352748366
-Buffer-Const,s!=d,xor=0: 0.129772 s 986.347 MB/s
-Buffer-Const,s!=d,xor=1: 0.135098 s 947.462 MB/s
-8192 16384 4 BYTWO_p - -
-Seed: 1352748367
-Buffer-Const,s!=d,xor=0: 0.128670 s 994.795 MB/s
-Buffer-Const,s!=d,xor=1: 0.133591 s 958.145 MB/s
-16384 8192 4 BYTWO_p - -
-Seed: 1352748368
-Buffer-Const,s!=d,xor=0: 0.130064 s 984.129 MB/s
-Buffer-Const,s!=d,xor=1: 0.135170 s 946.959 MB/s
-32768 4096 4 BYTWO_p - -
-Seed: 1352748369
-Buffer-Const,s!=d,xor=0: 0.129942 s 985.052 MB/s
-Buffer-Const,s!=d,xor=1: 0.134780 s 949.695 MB/s
-65536 2048 4 BYTWO_p - -
-Seed: 1352748371
-Buffer-Const,s!=d,xor=0: 0.130649 s 979.725 MB/s
-Buffer-Const,s!=d,xor=1: 0.134556 s 951.280 MB/s
-131072 1024 4 BYTWO_p - -
-Seed: 1352748372
-Buffer-Const,s!=d,xor=0: 0.129390 s 989.255 MB/s
-Buffer-Const,s!=d,xor=1: 0.134418 s 952.257 MB/s
-262144 512 4 BYTWO_p - -
-Seed: 1352748373
-Buffer-Const,s!=d,xor=0: 0.130153 s 983.455 MB/s
-Buffer-Const,s!=d,xor=1: 0.137027 s 934.126 MB/s
-524288 256 4 BYTWO_p - -
-Seed: 1352748374
-Buffer-Const,s!=d,xor=0: 0.128065 s 999.493 MB/s
-Buffer-Const,s!=d,xor=1: 0.136548 s 937.402 MB/s
-1048576 128 4 BYTWO_p - -
-Seed: 1352748375
-Buffer-Const,s!=d,xor=0: 0.137841 s 928.608 MB/s
-Buffer-Const,s!=d,xor=1: 0.149983 s 853.428 MB/s
-2097152 64 4 BYTWO_p - -
-Seed: 1352748377
-Buffer-Const,s!=d,xor=0: 0.143009 s 895.049 MB/s
-Buffer-Const,s!=d,xor=1: 0.151799 s 843.218 MB/s
-4194304 32 4 BYTWO_p - -
-Seed: 1352748378
-Buffer-Const,s!=d,xor=0: 0.148001 s 864.859 MB/s
-Buffer-Const,s!=d,xor=1: 0.150979 s 847.802 MB/s
-8388608 16 4 BYTWO_p - -
-Seed: 1352748379
-Buffer-Const,s!=d,xor=0: 0.153637 s 833.133 MB/s
-Buffer-Const,s!=d,xor=1: 0.133152 s 961.307 MB/s
-16777216 8 4 BYTWO_p - -
-Seed: 1352748380
-Buffer-Const,s!=d,xor=0: 0.164125 s 779.894 MB/s
-Buffer-Const,s!=d,xor=1: 0.150620 s 849.821 MB/s
-33554432 4 4 BYTWO_p - -
-Seed: 1352748382
-Buffer-Const,s!=d,xor=0: 0.188526 s 678.952 MB/s
-Buffer-Const,s!=d,xor=1: 0.153114 s 835.979 MB/s
-67108864 2 4 BYTWO_p - -
-Seed: 1352748383
-Buffer-Const,s!=d,xor=0: 0.235626 s 543.234 MB/s
-Buffer-Const,s!=d,xor=1: 0.158839 s 805.847 MB/s
-134217728 1 4 BYTWO_p - -
-Seed: 1352748385
-Buffer-Const,s!=d,xor=0: 0.076323 s 1677.087 MB/s
-Buffer-Const,s!=d,xor=1: 0.077654 s 1648.345 MB/s
-1024 131072 4 BYTWO_b - -
-Seed: 1352748386
-Buffer-Const,s!=d,xor=0: 0.068027 s 1881.605 MB/s
-Buffer-Const,s!=d,xor=1: 0.070778 s 1808.462 MB/s
-2048 65536 4 BYTWO_b - -
-Seed: 1352748387
-Buffer-Const,s!=d,xor=0: 0.065722 s 1947.591 MB/s
-Buffer-Const,s!=d,xor=1: 0.068535 s 1867.669 MB/s
-4096 32768 4 BYTWO_b - -
-Seed: 1352748388
-Buffer-Const,s!=d,xor=0: 0.063732 s 2008.398 MB/s
-Buffer-Const,s!=d,xor=1: 0.066054 s 1937.805 MB/s
-8192 16384 4 BYTWO_b - -
-Seed: 1352748389
-Buffer-Const,s!=d,xor=0: 0.062660 s 2042.779 MB/s
-Buffer-Const,s!=d,xor=1: 0.065213 s 1962.793 MB/s
-16384 8192 4 BYTWO_b - -
-Seed: 1352748390
-Buffer-Const,s!=d,xor=0: 0.062758 s 2039.566 MB/s
-Buffer-Const,s!=d,xor=1: 0.066957 s 1911.668 MB/s
-32768 4096 4 BYTWO_b - -
-Seed: 1352748390
-Buffer-Const,s!=d,xor=0: 0.063058 s 2029.865 MB/s
-Buffer-Const,s!=d,xor=1: 0.065829 s 1944.424 MB/s
-65536 2048 4 BYTWO_b - -
-Seed: 1352748391
-Buffer-Const,s!=d,xor=0: 0.065844 s 1943.994 MB/s
-Buffer-Const,s!=d,xor=1: 0.065374 s 1957.968 MB/s
-131072 1024 4 BYTWO_b - -
-Seed: 1352748392
-Buffer-Const,s!=d,xor=0: 0.062168 s 2058.949 MB/s
-Buffer-Const,s!=d,xor=1: 0.068710 s 1862.906 MB/s
-262144 512 4 BYTWO_b - -
-Seed: 1352748393
-Buffer-Const,s!=d,xor=0: 0.062623 s 2043.984 MB/s
-Buffer-Const,s!=d,xor=1: 0.066550 s 1923.379 MB/s
-524288 256 4 BYTWO_b - -
-Seed: 1352748394
-Buffer-Const,s!=d,xor=0: 0.064571 s 1982.317 MB/s
-Buffer-Const,s!=d,xor=1: 0.061325 s 2087.246 MB/s
-1048576 128 4 BYTWO_b - -
-Seed: 1352748395
-Buffer-Const,s!=d,xor=0: 0.070771 s 1808.657 MB/s
-Buffer-Const,s!=d,xor=1: 0.072981 s 1753.878 MB/s
-2097152 64 4 BYTWO_b - -
-Seed: 1352748396
-Buffer-Const,s!=d,xor=0: 0.078018 s 1640.643 MB/s
-Buffer-Const,s!=d,xor=1: 0.072307 s 1770.227 MB/s
-4194304 32 4 BYTWO_b - -
-Seed: 1352748397
-Buffer-Const,s!=d,xor=0: 0.079478 s 1610.508 MB/s
-Buffer-Const,s!=d,xor=1: 0.073757 s 1735.424 MB/s
-8388608 16 4 BYTWO_b - -
-Seed: 1352748398
-Buffer-Const,s!=d,xor=0: 0.085826 s 1491.383 MB/s
-Buffer-Const,s!=d,xor=1: 0.087615 s 1460.945 MB/s
-16777216 8 4 BYTWO_b - -
-Seed: 1352748399
-Buffer-Const,s!=d,xor=0: 0.081822 s 1564.373 MB/s
-Buffer-Const,s!=d,xor=1: 0.083410 s 1534.583 MB/s
-33554432 4 4 BYTWO_b - -
-Seed: 1352748400
-Buffer-Const,s!=d,xor=0: 0.101873 s 1256.467 MB/s
-Buffer-Const,s!=d,xor=1: 0.074412 s 1720.150 MB/s
-67108864 2 4 BYTWO_b - -
-Seed: 1352748401
-Buffer-Const,s!=d,xor=0: 0.188405 s 679.387 MB/s
-Buffer-Const,s!=d,xor=1: 0.053904 s 2374.589 MB/s
-134217728 1 4 BYTWO_b - -
-Seed: 1352748403
-Buffer-Const,s!=d,xor=0: 0.092518 s 1383.520 MB/s
-Buffer-Const,s!=d,xor=1: 0.097347 s 1314.877 MB/s
-1024 131072 4 BYTWO_p SSE -
-Seed: 1352748404
-Buffer-Const,s!=d,xor=0: 0.086226 s 1484.463 MB/s
-Buffer-Const,s!=d,xor=1: 0.092092 s 1389.910 MB/s
-2048 65536 4 BYTWO_p SSE -
-Seed: 1352748405
-Buffer-Const,s!=d,xor=0: 0.082721 s 1547.370 MB/s
-Buffer-Const,s!=d,xor=1: 0.088092 s 1453.025 MB/s
-4096 32768 4 BYTWO_p SSE -
-Seed: 1352748406
-Buffer-Const,s!=d,xor=0: 0.081612 s 1568.395 MB/s
-Buffer-Const,s!=d,xor=1: 0.086144 s 1485.885 MB/s
-8192 16384 4 BYTWO_p SSE -
-Seed: 1352748407
-Buffer-Const,s!=d,xor=0: 0.080819 s 1583.783 MB/s
-Buffer-Const,s!=d,xor=1: 0.085448 s 1497.982 MB/s
-16384 8192 4 BYTWO_p SSE -
-Seed: 1352748408
-Buffer-Const,s!=d,xor=0: 0.080971 s 1580.804 MB/s
-Buffer-Const,s!=d,xor=1: 0.086504 s 1479.709 MB/s
-32768 4096 4 BYTWO_p SSE -
-Seed: 1352748409
-Buffer-Const,s!=d,xor=0: 0.080746 s 1585.214 MB/s
-Buffer-Const,s!=d,xor=1: 0.085679 s 1493.943 MB/s
-65536 2048 4 BYTWO_p SSE -
-Seed: 1352748410
-Buffer-Const,s!=d,xor=0: 0.081038 s 1579.511 MB/s
-Buffer-Const,s!=d,xor=1: 0.086381 s 1481.804 MB/s
-131072 1024 4 BYTWO_p SSE -
-Seed: 1352748411
-Buffer-Const,s!=d,xor=0: 0.079807 s 1603.873 MB/s
-Buffer-Const,s!=d,xor=1: 0.085420 s 1498.484 MB/s
-262144 512 4 BYTWO_p SSE -
-Seed: 1352748412
-Buffer-Const,s!=d,xor=0: 0.080044 s 1599.115 MB/s
-Buffer-Const,s!=d,xor=1: 0.083843 s 1526.654 MB/s
-524288 256 4 BYTWO_p SSE -
-Seed: 1352748413
-Buffer-Const,s!=d,xor=0: 0.082954 s 1543.016 MB/s
-Buffer-Const,s!=d,xor=1: 0.086807 s 1474.535 MB/s
-1048576 128 4 BYTWO_p SSE -
-Seed: 1352748414
-Buffer-Const,s!=d,xor=0: 0.090553 s 1413.536 MB/s
-Buffer-Const,s!=d,xor=1: 0.092115 s 1389.565 MB/s
-2097152 64 4 BYTWO_p SSE -
-Seed: 1352748415
-Buffer-Const,s!=d,xor=0: 0.087072 s 1470.054 MB/s
-Buffer-Const,s!=d,xor=1: 0.093465 s 1369.492 MB/s
-4194304 32 4 BYTWO_p SSE -
-Seed: 1352748416
-Buffer-Const,s!=d,xor=0: 0.097724 s 1309.812 MB/s
-Buffer-Const,s!=d,xor=1: 0.090922 s 1407.795 MB/s
-8388608 16 4 BYTWO_p SSE -
-Seed: 1352748417
-Buffer-Const,s!=d,xor=0: 0.104649 s 1223.136 MB/s
-Buffer-Const,s!=d,xor=1: 0.084963 s 1506.532 MB/s
-16777216 8 4 BYTWO_p SSE -
-Seed: 1352748418
-Buffer-Const,s!=d,xor=0: 0.112079 s 1142.050 MB/s
-Buffer-Const,s!=d,xor=1: 0.096727 s 1323.313 MB/s
-33554432 4 4 BYTWO_p SSE -
-Seed: 1352748419
-Buffer-Const,s!=d,xor=0: 0.136256 s 939.408 MB/s
-Buffer-Const,s!=d,xor=1: 0.103244 s 1239.781 MB/s
-67108864 2 4 BYTWO_p SSE -
-Seed: 1352748420
-Buffer-Const,s!=d,xor=0: 0.181231 s 706.281 MB/s
-Buffer-Const,s!=d,xor=1: 0.092887 s 1378.016 MB/s
-134217728 1 4 BYTWO_p SSE -
-Seed: 1352748422
-Buffer-Const,s!=d,xor=0: 0.107760 s 1187.825 MB/s
-Buffer-Const,s!=d,xor=1: 0.065748 s 1946.828 MB/s
-1024 131072 4 BYTWO_b SSE -
-Seed: 1352748423
-Buffer-Const,s!=d,xor=0: 0.104705 s 1222.484 MB/s
-Buffer-Const,s!=d,xor=1: 0.058541 s 2186.508 MB/s
-2048 65536 4 BYTWO_b SSE -
-Seed: 1352748424
-Buffer-Const,s!=d,xor=0: 0.098082 s 1305.026 MB/s
-Buffer-Const,s!=d,xor=1: 0.053539 s 2390.768 MB/s
-4096 32768 4 BYTWO_b SSE -
-Seed: 1352748425
-Buffer-Const,s!=d,xor=0: 0.094147 s 1359.576 MB/s
-Buffer-Const,s!=d,xor=1: 0.051867 s 2467.839 MB/s
-8192 16384 4 BYTWO_b SSE -
-Seed: 1352748426
-Buffer-Const,s!=d,xor=0: 0.092755 s 1379.975 MB/s
-Buffer-Const,s!=d,xor=1: 0.049600 s 2580.651 MB/s
-16384 8192 4 BYTWO_b SSE -
-Seed: 1352748427
-Buffer-Const,s!=d,xor=0: 0.093161 s 1373.971 MB/s
-Buffer-Const,s!=d,xor=1: 0.048734 s 2626.480 MB/s
-32768 4096 4 BYTWO_b SSE -
-Seed: 1352748428
-Buffer-Const,s!=d,xor=0: 0.092071 s 1390.227 MB/s
-Buffer-Const,s!=d,xor=1: 0.048645 s 2631.282 MB/s
-65536 2048 4 BYTWO_b SSE -
-Seed: 1352748429
-Buffer-Const,s!=d,xor=0: 0.093282 s 1372.191 MB/s
-Buffer-Const,s!=d,xor=1: 0.047374 s 2701.903 MB/s
-131072 1024 4 BYTWO_b SSE -
-Seed: 1352748430
-Buffer-Const,s!=d,xor=0: 0.094085 s 1360.479 MB/s
-Buffer-Const,s!=d,xor=1: 0.050752 s 2522.072 MB/s
-262144 512 4 BYTWO_b SSE -
-Seed: 1352748431
-Buffer-Const,s!=d,xor=0: 0.099099 s 1291.639 MB/s
-Buffer-Const,s!=d,xor=1: 0.046550 s 2749.729 MB/s
-524288 256 4 BYTWO_b SSE -
-Seed: 1352748431
-Buffer-Const,s!=d,xor=0: 0.093943 s 1362.530 MB/s
-Buffer-Const,s!=d,xor=1: 0.050178 s 2550.940 MB/s
-1048576 128 4 BYTWO_b SSE -
-Seed: 1352748432
-Buffer-Const,s!=d,xor=0: 0.121096 s 1057.011 MB/s
-Buffer-Const,s!=d,xor=1: 0.055513 s 2305.770 MB/s
-2097152 64 4 BYTWO_b SSE -
-Seed: 1352748433
-Buffer-Const,s!=d,xor=0: 0.109734 s 1166.456 MB/s
-Buffer-Const,s!=d,xor=1: 0.057743 s 2216.716 MB/s
-4194304 32 4 BYTWO_b SSE -
-Seed: 1352748434
-Buffer-Const,s!=d,xor=0: 0.117161 s 1092.513 MB/s
-Buffer-Const,s!=d,xor=1: 0.057568 s 2223.464 MB/s
-8388608 16 4 BYTWO_b SSE -
-Seed: 1352748436
-Buffer-Const,s!=d,xor=0: 0.102332 s 1250.832 MB/s
-Buffer-Const,s!=d,xor=1: 0.061185 s 2092.004 MB/s
-16777216 8 4 BYTWO_b SSE -
-Seed: 1352748437
-Buffer-Const,s!=d,xor=0: 0.173641 s 737.153 MB/s
-Buffer-Const,s!=d,xor=1: 0.054822 s 2334.830 MB/s
-33554432 4 4 BYTWO_b SSE -
-Seed: 1352748438
-Buffer-Const,s!=d,xor=0: 0.130181 s 983.246 MB/s
-Buffer-Const,s!=d,xor=1: 0.051398 s 2490.367 MB/s
-67108864 2 4 BYTWO_b SSE -
-Seed: 1352748439
-Buffer-Const,s!=d,xor=0: 0.150805 s 848.778 MB/s
-Buffer-Const,s!=d,xor=1: 0.000005 s 2330.524 MB/s
-134217728 1 4 BYTWO_b SSE -
diff --git a/junk-w4-timing-tests.sh b/junk-w4-timing-tests.sh
deleted file mode 100644
index 8ed8c08..0000000
--- a/junk-w4-timing-tests.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-sh tmp-time-test.sh 4 LOG - -
-sh tmp-time-test.sh 4 TABLE SINGLE -
-sh tmp-time-test.sh 4 TABLE SINGLE,SSE -
-sh tmp-time-test.sh 4 TABLE DOUBLE -
-sh tmp-time-test.sh 4 TABLE DOUBLE -
-sh tmp-time-test.sh 4 TABLE QUAD -
-sh tmp-time-test.sh 4 TABLE QUAD,LAZY -
-sh tmp-time-test.sh 4 BYTWO_p - -
-sh tmp-time-test.sh 4 BYTWO_b - -
-sh tmp-time-test.sh 4 BYTWO_p SSE -
-sh tmp-time-test.sh 4 BYTWO_b SSE -
diff --git a/junk-w4-timing.jgr b/junk-w4-timing.jgr
deleted file mode 100644
index 9123257..0000000
--- a/junk-w4-timing.jgr
+++ /dev/null
@@ -1,11 +0,0 @@
-newgraph
-xaxis size 4 min 0 no_auto_hash_labels
- hash_labels hjl vjc rotate -90 fontsize 11
-
-shell : junk-pick-best-output < junk-w4-timing-out.txt | sort -nr | sed 's/.............//' | awk '{ print "hash_label at ", ++l, ":", $0 }'
-
-yaxis size 1 min 0 label : MB/s
-
-newcurve marktype xbar cfill 1 1 0 marksize 1 pts
-shell : junk-pick-best-output < junk-w4-timing-out.txt | sort -nr | awk '{ print $1 }' | cat -n
-
diff --git a/junk-w4.jgr b/junk-w4.jgr
deleted file mode 100644
index e4c4a82..0000000
--- a/junk-w4.jgr
+++ /dev/null
@@ -1,6 +0,0 @@
-newgraph
-xaxis size 4 min 0 no_auto_hash_labels
- hash_labels hjl vjc rotate -90 fontsize 11
-yaxis size 1 min 0 label : MB/s
-
-shell : awk -f junk-proc.awk < junk-w4-out.txt
diff --git a/junk-w8-timing-out.txt b/junk-w8-timing-out.txt
deleted file mode 100644
index cf542be..0000000
--- a/junk-w8-timing-out.txt
+++ /dev/null
@@ -1,936 +0,0 @@
-Seed: 1352746852
-Buffer-Const,s!=d,xor=0: 0.205907 s 621.640 MB/s
-Buffer-Const,s!=d,xor=1: 0.252565 s 506.800 MB/s
-1024 131072 8 LOG - -
-Seed: 1352746854
-Buffer-Const,s!=d,xor=0: 0.206410 s 620.126 MB/s
-Buffer-Const,s!=d,xor=1: 0.251469 s 509.008 MB/s
-2048 65536 8 LOG - -
-Seed: 1352746856
-Buffer-Const,s!=d,xor=0: 0.209941 s 609.695 MB/s
-Buffer-Const,s!=d,xor=1: 0.255838 s 500.316 MB/s
-4096 32768 8 LOG - -
-Seed: 1352746857
-Buffer-Const,s!=d,xor=0: 0.206109 s 621.030 MB/s
-Buffer-Const,s!=d,xor=1: 0.262056 s 488.445 MB/s
-8192 16384 8 LOG - -
-Seed: 1352746859
-Buffer-Const,s!=d,xor=0: 0.201892 s 634.001 MB/s
-Buffer-Const,s!=d,xor=1: 0.250816 s 510.335 MB/s
-16384 8192 8 LOG - -
-Seed: 1352746860
-Buffer-Const,s!=d,xor=0: 0.201995 s 633.679 MB/s
-Buffer-Const,s!=d,xor=1: 0.254832 s 502.292 MB/s
-32768 4096 8 LOG - -
-Seed: 1352746862
-Buffer-Const,s!=d,xor=0: 0.203099 s 630.236 MB/s
-Buffer-Const,s!=d,xor=1: 0.255779 s 500.431 MB/s
-65536 2048 8 LOG - -
-Seed: 1352746864
-Buffer-Const,s!=d,xor=0: 0.200691 s 637.796 MB/s
-Buffer-Const,s!=d,xor=1: 0.256675 s 498.685 MB/s
-131072 1024 8 LOG - -
-Seed: 1352746865
-Buffer-Const,s!=d,xor=0: 0.201240 s 636.057 MB/s
-Buffer-Const,s!=d,xor=1: 0.255231 s 501.506 MB/s
-262144 512 8 LOG - -
-Seed: 1352746867
-Buffer-Const,s!=d,xor=0: 0.202006 s 633.645 MB/s
-Buffer-Const,s!=d,xor=1: 0.251845 s 508.250 MB/s
-524288 256 8 LOG - -
-Seed: 1352746868
-Buffer-Const,s!=d,xor=0: 0.203552 s 628.830 MB/s
-Buffer-Const,s!=d,xor=1: 0.255775 s 500.440 MB/s
-1048576 128 8 LOG - -
-Seed: 1352746870
-Buffer-Const,s!=d,xor=0: 0.206480 s 619.915 MB/s
-Buffer-Const,s!=d,xor=1: 0.256771 s 498.498 MB/s
-2097152 64 8 LOG - -
-Seed: 1352746872
-Buffer-Const,s!=d,xor=0: 0.210690 s 607.528 MB/s
-Buffer-Const,s!=d,xor=1: 0.260851 s 490.701 MB/s
-4194304 32 8 LOG - -
-Seed: 1352746873
-Buffer-Const,s!=d,xor=0: 0.212292 s 602.944 MB/s
-Buffer-Const,s!=d,xor=1: 0.263464 s 485.834 MB/s
-8388608 16 8 LOG - -
-Seed: 1352746875
-Buffer-Const,s!=d,xor=0: 0.217703 s 587.957 MB/s
-Buffer-Const,s!=d,xor=1: 0.260255 s 491.826 MB/s
-16777216 8 8 LOG - -
-Seed: 1352746876
-Buffer-Const,s!=d,xor=0: 0.229996 s 556.531 MB/s
-Buffer-Const,s!=d,xor=1: 0.268077 s 477.475 MB/s
-33554432 4 8 LOG - -
-Seed: 1352746878
-Buffer-Const,s!=d,xor=0: 0.255076 s 501.811 MB/s
-Buffer-Const,s!=d,xor=1: 0.268757 s 476.266 MB/s
-67108864 2 8 LOG - -
-Seed: 1352746880
-Buffer-Const,s!=d,xor=0: 0.299095 s 427.958 MB/s
-Buffer-Const,s!=d,xor=1: 0.271954 s 470.668 MB/s
-134217728 1 8 LOG - -
-Seed: 1352746882
-Buffer-Const,s!=d,xor=0: 0.198089 s 646.175 MB/s
-Buffer-Const,s!=d,xor=1: 0.199934 s 640.212 MB/s
-1024 131072 8 LOG_ZERO - -
-Seed: 1352746883
-Buffer-Const,s!=d,xor=0: 0.191693 s 667.733 MB/s
-Buffer-Const,s!=d,xor=1: 0.195976 s 653.142 MB/s
-2048 65536 8 LOG_ZERO - -
-Seed: 1352746885
-Buffer-Const,s!=d,xor=0: 0.190896 s 670.524 MB/s
-Buffer-Const,s!=d,xor=1: 0.194985 s 656.459 MB/s
-4096 32768 8 LOG_ZERO - -
-Seed: 1352746886
-Buffer-Const,s!=d,xor=0: 0.190779 s 670.933 MB/s
-Buffer-Const,s!=d,xor=1: 0.195833 s 653.617 MB/s
-8192 16384 8 LOG_ZERO - -
-Seed: 1352746887
-Buffer-Const,s!=d,xor=0: 0.188468 s 679.159 MB/s
-Buffer-Const,s!=d,xor=1: 0.192885 s 663.608 MB/s
-16384 8192 8 LOG_ZERO - -
-Seed: 1352746889
-Buffer-Const,s!=d,xor=0: 0.187547 s 682.497 MB/s
-Buffer-Const,s!=d,xor=1: 0.193131 s 662.763 MB/s
-32768 4096 8 LOG_ZERO - -
-Seed: 1352746890
-Buffer-Const,s!=d,xor=0: 0.185810 s 688.875 MB/s
-Buffer-Const,s!=d,xor=1: 0.192531 s 664.829 MB/s
-65536 2048 8 LOG_ZERO - -
-Seed: 1352746892
-Buffer-Const,s!=d,xor=0: 0.186486 s 686.379 MB/s
-Buffer-Const,s!=d,xor=1: 0.192416 s 665.226 MB/s
-131072 1024 8 LOG_ZERO - -
-Seed: 1352746893
-Buffer-Const,s!=d,xor=0: 0.187854 s 681.379 MB/s
-Buffer-Const,s!=d,xor=1: 0.193211 s 662.488 MB/s
-262144 512 8 LOG_ZERO - -
-Seed: 1352746895
-Buffer-Const,s!=d,xor=0: 0.186622 s 685.880 MB/s
-Buffer-Const,s!=d,xor=1: 0.193951 s 659.961 MB/s
-524288 256 8 LOG_ZERO - -
-Seed: 1352746896
-Buffer-Const,s!=d,xor=0: 0.193502 s 661.492 MB/s
-Buffer-Const,s!=d,xor=1: 0.194600 s 657.760 MB/s
-1048576 128 8 LOG_ZERO - -
-Seed: 1352746897
-Buffer-Const,s!=d,xor=0: 0.191789 s 667.400 MB/s
-Buffer-Const,s!=d,xor=1: 0.206557 s 619.683 MB/s
-2097152 64 8 LOG_ZERO - -
-Seed: 1352746899
-Buffer-Const,s!=d,xor=0: 0.216762 s 590.509 MB/s
-Buffer-Const,s!=d,xor=1: 0.220943 s 579.334 MB/s
-4194304 32 8 LOG_ZERO - -
-Seed: 1352746901
-Buffer-Const,s!=d,xor=0: 0.212998 s 600.944 MB/s
-Buffer-Const,s!=d,xor=1: 0.229660 s 557.346 MB/s
-8388608 16 8 LOG_ZERO - -
-Seed: 1352746902
-Buffer-Const,s!=d,xor=0: 0.225217 s 568.340 MB/s
-Buffer-Const,s!=d,xor=1: 0.208174 s 614.871 MB/s
-16777216 8 8 LOG_ZERO - -
-Seed: 1352746904
-Buffer-Const,s!=d,xor=0: 0.215686 s 593.456 MB/s
-Buffer-Const,s!=d,xor=1: 0.204155 s 626.975 MB/s
-33554432 4 8 LOG_ZERO - -
-Seed: 1352746905
-Buffer-Const,s!=d,xor=0: 0.250863 s 510.239 MB/s
-Buffer-Const,s!=d,xor=1: 0.200680 s 637.832 MB/s
-67108864 2 8 LOG_ZERO - -
-Seed: 1352746907
-Buffer-Const,s!=d,xor=0: 0.285895 s 447.717 MB/s
-Buffer-Const,s!=d,xor=1: 0.201105 s 636.484 MB/s
-134217728 1 8 LOG_ZERO - -
-Seed: 1352746909
-Buffer-Const,s!=d,xor=0: 0.154129 s 830.473 MB/s
-Buffer-Const,s!=d,xor=1: 0.200737 s 637.650 MB/s
-1024 131072 8 TABLE - -
-Seed: 1352746910
-Buffer-Const,s!=d,xor=0: 0.150785 s 848.888 MB/s
-Buffer-Const,s!=d,xor=1: 0.199187 s 642.614 MB/s
-2048 65536 8 TABLE - -
-Seed: 1352746911
-Buffer-Const,s!=d,xor=0: 0.149158 s 858.153 MB/s
-Buffer-Const,s!=d,xor=1: 0.196224 s 652.316 MB/s
-4096 32768 8 TABLE - -
-Seed: 1352746913
-Buffer-Const,s!=d,xor=0: 0.147988 s 864.936 MB/s
-Buffer-Const,s!=d,xor=1: 0.195025 s 656.325 MB/s
-8192 16384 8 TABLE - -
-Seed: 1352746914
-Buffer-Const,s!=d,xor=0: 0.146994 s 870.786 MB/s
-Buffer-Const,s!=d,xor=1: 0.193489 s 661.536 MB/s
-16384 8192 8 TABLE - -
-Seed: 1352746915
-Buffer-Const,s!=d,xor=0: 0.151192 s 846.606 MB/s
-Buffer-Const,s!=d,xor=1: 0.196197 s 652.405 MB/s
-32768 4096 8 TABLE - -
-Seed: 1352746917
-Buffer-Const,s!=d,xor=0: 0.149436 s 856.553 MB/s
-Buffer-Const,s!=d,xor=1: 0.194907 s 656.724 MB/s
-65536 2048 8 TABLE - -
-Seed: 1352746918
-Buffer-Const,s!=d,xor=0: 0.150252 s 851.900 MB/s
-Buffer-Const,s!=d,xor=1: 0.196657 s 650.878 MB/s
-131072 1024 8 TABLE - -
-Seed: 1352746920
-Buffer-Const,s!=d,xor=0: 0.152423 s 839.767 MB/s
-Buffer-Const,s!=d,xor=1: 0.196896 s 650.090 MB/s
-262144 512 8 TABLE - -
-Seed: 1352746921
-Buffer-Const,s!=d,xor=0: 0.149577 s 855.748 MB/s
-Buffer-Const,s!=d,xor=1: 0.196668 s 650.843 MB/s
-524288 256 8 TABLE - -
-Seed: 1352746922
-Buffer-Const,s!=d,xor=0: 0.151604 s 844.307 MB/s
-Buffer-Const,s!=d,xor=1: 0.198012 s 646.425 MB/s
-1048576 128 8 TABLE - -
-Seed: 1352746924
-Buffer-Const,s!=d,xor=0: 0.155570 s 822.779 MB/s
-Buffer-Const,s!=d,xor=1: 0.195111 s 656.036 MB/s
-2097152 64 8 TABLE - -
-Seed: 1352746925
-Buffer-Const,s!=d,xor=0: 0.159052 s 804.766 MB/s
-Buffer-Const,s!=d,xor=1: 0.204684 s 625.353 MB/s
-4194304 32 8 TABLE - -
-Seed: 1352746926
-Buffer-Const,s!=d,xor=0: 0.163852 s 781.193 MB/s
-Buffer-Const,s!=d,xor=1: 0.204403 s 626.215 MB/s
-8388608 16 8 TABLE - -
-Seed: 1352746928
-Buffer-Const,s!=d,xor=0: 0.174190 s 734.832 MB/s
-Buffer-Const,s!=d,xor=1: 0.202681 s 631.535 MB/s
-16777216 8 8 TABLE - -
-Seed: 1352746929
-Buffer-Const,s!=d,xor=0: 0.184380 s 694.218 MB/s
-Buffer-Const,s!=d,xor=1: 0.204282 s 626.585 MB/s
-33554432 4 8 TABLE - -
-Seed: 1352746931
-Buffer-Const,s!=d,xor=0: 0.204508 s 625.892 MB/s
-Buffer-Const,s!=d,xor=1: 0.207667 s 616.371 MB/s
-67108864 2 8 TABLE - -
-Seed: 1352746932
-Buffer-Const,s!=d,xor=0: 0.252662 s 506.606 MB/s
-Buffer-Const,s!=d,xor=1: 0.208596 s 613.626 MB/s
-134217728 1 8 TABLE - -
-Seed: 1352746934
-Buffer-Const,s!=d,xor=0: 0.870799 s 146.991 MB/s
-Buffer-Const,s!=d,xor=1: 0.888333 s 144.090 MB/s
-1024 131072 8 TABLE DOUBLE -
-Seed: 1352746938
-Buffer-Const,s!=d,xor=0: 0.808797 s 158.260 MB/s
-Buffer-Const,s!=d,xor=1: 0.812444 s 157.549 MB/s
-2048 65536 8 TABLE DOUBLE -
-Seed: 1352746942
-Buffer-Const,s!=d,xor=0: 0.724551 s 176.661 MB/s
-Buffer-Const,s!=d,xor=1: 0.733140 s 174.591 MB/s
-4096 32768 8 TABLE DOUBLE -
-Seed: 1352746946
-Buffer-Const,s!=d,xor=0: 0.622008 s 205.785 MB/s
-Buffer-Const,s!=d,xor=1: 0.636914 s 200.969 MB/s
-8192 16384 8 TABLE DOUBLE -
-Seed: 1352746949
-Buffer-Const,s!=d,xor=0: 0.454528 s 281.611 MB/s
-Buffer-Const,s!=d,xor=1: 0.467266 s 273.934 MB/s
-16384 8192 8 TABLE DOUBLE -
-Seed: 1352746952
-Buffer-Const,s!=d,xor=0: 0.285370 s 448.541 MB/s
-Buffer-Const,s!=d,xor=1: 0.292051 s 438.279 MB/s
-32768 4096 8 TABLE DOUBLE -
-Seed: 1352746954
-Buffer-Const,s!=d,xor=0: 0.193707 s 660.791 MB/s
-Buffer-Const,s!=d,xor=1: 0.202114 s 633.307 MB/s
-65536 2048 8 TABLE DOUBLE -
-Seed: 1352746955
-Buffer-Const,s!=d,xor=0: 0.147023 s 870.614 MB/s
-Buffer-Const,s!=d,xor=1: 0.151774 s 843.360 MB/s
-131072 1024 8 TABLE DOUBLE -
-Seed: 1352746957
-Buffer-Const,s!=d,xor=0: 0.127245 s 1005.930 MB/s
-Buffer-Const,s!=d,xor=1: 0.130981 s 977.243 MB/s
-262144 512 8 TABLE DOUBLE -
-Seed: 1352746958
-Buffer-Const,s!=d,xor=0: 0.112772 s 1135.034 MB/s
-Buffer-Const,s!=d,xor=1: 0.117758 s 1086.972 MB/s
-524288 256 8 TABLE DOUBLE -
-Seed: 1352746959
-Buffer-Const,s!=d,xor=0: 0.106724 s 1199.355 MB/s
-Buffer-Const,s!=d,xor=1: 0.110677 s 1156.521 MB/s
-1048576 128 8 TABLE DOUBLE -
-Seed: 1352746960
-Buffer-Const,s!=d,xor=0: 0.109126 s 1172.960 MB/s
-Buffer-Const,s!=d,xor=1: 0.115353 s 1109.641 MB/s
-2097152 64 8 TABLE DOUBLE -
-Seed: 1352746962
-Buffer-Const,s!=d,xor=0: 0.111492 s 1148.063 MB/s
-Buffer-Const,s!=d,xor=1: 0.114936 s 1113.660 MB/s
-4194304 32 8 TABLE DOUBLE -
-Seed: 1352746963
-Buffer-Const,s!=d,xor=0: 0.114727 s 1115.694 MB/s
-Buffer-Const,s!=d,xor=1: 0.112702 s 1135.740 MB/s
-8388608 16 8 TABLE DOUBLE -
-Seed: 1352746964
-Buffer-Const,s!=d,xor=0: 0.122290 s 1046.691 MB/s
-Buffer-Const,s!=d,xor=1: 0.112557 s 1137.205 MB/s
-16777216 8 8 TABLE DOUBLE -
-Seed: 1352746965
-Buffer-Const,s!=d,xor=0: 0.130774 s 978.789 MB/s
-Buffer-Const,s!=d,xor=1: 0.115443 s 1108.772 MB/s
-33554432 4 8 TABLE DOUBLE -
-Seed: 1352746966
-Buffer-Const,s!=d,xor=0: 0.152678 s 838.367 MB/s
-Buffer-Const,s!=d,xor=1: 0.112051 s 1142.337 MB/s
-67108864 2 8 TABLE DOUBLE -
-Seed: 1352746968
-Buffer-Const,s!=d,xor=0: 0.199972 s 640.090 MB/s
-Buffer-Const,s!=d,xor=1: 0.111309 s 1149.951 MB/s
-134217728 1 8 TABLE DOUBLE -
-Seed: 1352746969
-Buffer-Const,s!=d,xor=0: 12.353054 s 10.362 MB/s
-Buffer-Const,s!=d,xor=1: 12.311798 s 10.397 MB/s
-1024 131072 8 TABLE DOUBLE,LAZY -
-Seed: 1352747019
-Buffer-Const,s!=d,xor=0: 6.245450 s 20.495 MB/s
-Buffer-Const,s!=d,xor=1: 6.251623 s 20.475 MB/s
-2048 65536 8 TABLE DOUBLE,LAZY -
-Seed: 1352747045
-Buffer-Const,s!=d,xor=0: 3.157618 s 40.537 MB/s
-Buffer-Const,s!=d,xor=1: 3.147050 s 40.673 MB/s
-4096 32768 8 TABLE DOUBLE,LAZY -
-Seed: 1352747058
-Buffer-Const,s!=d,xor=0: 1.631175 s 78.471 MB/s
-Buffer-Const,s!=d,xor=1: 1.657020 s 77.247 MB/s
-8192 16384 8 TABLE DOUBLE,LAZY -
-Seed: 1352747065
-Buffer-Const,s!=d,xor=0: 0.860207 s 148.801 MB/s
-Buffer-Const,s!=d,xor=1: 0.874988 s 146.288 MB/s
-16384 8192 8 TABLE DOUBLE,LAZY -
-Seed: 1352747069
-Buffer-Const,s!=d,xor=0: 0.478988 s 267.230 MB/s
-Buffer-Const,s!=d,xor=1: 0.485077 s 263.876 MB/s
-32768 4096 8 TABLE DOUBLE,LAZY -
-Seed: 1352747072
-Buffer-Const,s!=d,xor=0: 0.291041 s 439.800 MB/s
-Buffer-Const,s!=d,xor=1: 0.294611 s 434.472 MB/s
-65536 2048 8 TABLE DOUBLE,LAZY -
-Seed: 1352747074
-Buffer-Const,s!=d,xor=0: 0.195826 s 653.643 MB/s
-Buffer-Const,s!=d,xor=1: 0.201743 s 634.472 MB/s
-131072 1024 8 TABLE DOUBLE,LAZY -
-Seed: 1352747075
-Buffer-Const,s!=d,xor=0: 0.148775 s 860.359 MB/s
-Buffer-Const,s!=d,xor=1: 0.153898 s 831.717 MB/s
-262144 512 8 TABLE DOUBLE,LAZY -
-Seed: 1352747077
-Buffer-Const,s!=d,xor=0: 0.128037 s 999.707 MB/s
-Buffer-Const,s!=d,xor=1: 0.130179 s 983.260 MB/s
-524288 256 8 TABLE DOUBLE,LAZY -
-Seed: 1352747078
-Buffer-Const,s!=d,xor=0: 0.112728 s 1135.473 MB/s
-Buffer-Const,s!=d,xor=1: 0.119275 s 1073.152 MB/s
-1048576 128 8 TABLE DOUBLE,LAZY -
-Seed: 1352747079
-Buffer-Const,s!=d,xor=0: 0.113098 s 1131.763 MB/s
-Buffer-Const,s!=d,xor=1: 0.117425 s 1090.056 MB/s
-2097152 64 8 TABLE DOUBLE,LAZY -
-Seed: 1352747080
-Buffer-Const,s!=d,xor=0: 0.113271 s 1130.033 MB/s
-Buffer-Const,s!=d,xor=1: 0.116355 s 1100.082 MB/s
-4194304 32 8 TABLE DOUBLE,LAZY -
-Seed: 1352747081
-Buffer-Const,s!=d,xor=0: 0.109173 s 1172.448 MB/s
-Buffer-Const,s!=d,xor=1: 0.114466 s 1118.239 MB/s
-8388608 16 8 TABLE DOUBLE,LAZY -
-Seed: 1352747082
-Buffer-Const,s!=d,xor=0: 0.120238 s 1064.555 MB/s
-Buffer-Const,s!=d,xor=1: 0.113906 s 1123.737 MB/s
-16777216 8 8 TABLE DOUBLE,LAZY -
-Seed: 1352747084
-Buffer-Const,s!=d,xor=0: 0.127838 s 1001.266 MB/s
-Buffer-Const,s!=d,xor=1: 0.112099 s 1141.846 MB/s
-33554432 4 8 TABLE DOUBLE,LAZY -
-Seed: 1352747085
-Buffer-Const,s!=d,xor=0: 0.154731 s 827.243 MB/s
-Buffer-Const,s!=d,xor=1: 0.111025 s 1152.893 MB/s
-67108864 2 8 TABLE DOUBLE,LAZY -
-Seed: 1352747086
-Buffer-Const,s!=d,xor=0: 0.202618 s 631.730 MB/s
-Buffer-Const,s!=d,xor=1: 0.110840 s 1154.819 MB/s
-134217728 1 8 TABLE DOUBLE,LAZY -
-Seed: 1352747087
-Buffer-Const,s!=d,xor=0: 0.400666 s 319.468 MB/s
-Buffer-Const,s!=d,xor=1: 0.408545 s 313.307 MB/s
-1024 131072 8 BYTWO_p - -
-Seed: 1352747090
-Buffer-Const,s!=d,xor=0: 0.393822 s 325.020 MB/s
-Buffer-Const,s!=d,xor=1: 0.400213 s 319.829 MB/s
-2048 65536 8 BYTWO_p - -
-Seed: 1352747092
-Buffer-Const,s!=d,xor=0: 0.388415 s 329.545 MB/s
-Buffer-Const,s!=d,xor=1: 0.396545 s 322.788 MB/s
-4096 32768 8 BYTWO_p - -
-Seed: 1352747094
-Buffer-Const,s!=d,xor=0: 0.389005 s 329.044 MB/s
-Buffer-Const,s!=d,xor=1: 0.395450 s 323.682 MB/s
-8192 16384 8 BYTWO_p - -
-Seed: 1352747096
-Buffer-Const,s!=d,xor=0: 0.385698 s 331.866 MB/s
-Buffer-Const,s!=d,xor=1: 0.395319 s 323.789 MB/s
-16384 8192 8 BYTWO_p - -
-Seed: 1352747099
-Buffer-Const,s!=d,xor=0: 0.385273 s 332.232 MB/s
-Buffer-Const,s!=d,xor=1: 0.396203 s 323.067 MB/s
-32768 4096 8 BYTWO_p - -
-Seed: 1352747101
-Buffer-Const,s!=d,xor=0: 0.387427 s 330.385 MB/s
-Buffer-Const,s!=d,xor=1: 0.394610 s 324.371 MB/s
-65536 2048 8 BYTWO_p - -
-Seed: 1352747103
-Buffer-Const,s!=d,xor=0: 0.389866 s 328.318 MB/s
-Buffer-Const,s!=d,xor=1: 0.398012 s 321.598 MB/s
-131072 1024 8 BYTWO_p - -
-Seed: 1352747105
-Buffer-Const,s!=d,xor=0: 0.389453 s 328.666 MB/s
-Buffer-Const,s!=d,xor=1: 0.397982 s 321.622 MB/s
-262144 512 8 BYTWO_p - -
-Seed: 1352747108
-Buffer-Const,s!=d,xor=0: 0.388304 s 329.638 MB/s
-Buffer-Const,s!=d,xor=1: 0.399512 s 320.391 MB/s
-524288 256 8 BYTWO_p - -
-Seed: 1352747110
-Buffer-Const,s!=d,xor=0: 0.390699 s 327.618 MB/s
-Buffer-Const,s!=d,xor=1: 0.407622 s 314.016 MB/s
-1048576 128 8 BYTWO_p - -
-Seed: 1352747112
-Buffer-Const,s!=d,xor=0: 0.398830 s 320.939 MB/s
-Buffer-Const,s!=d,xor=1: 0.401909 s 318.480 MB/s
-2097152 64 8 BYTWO_p - -
-Seed: 1352747114
-Buffer-Const,s!=d,xor=0: 0.402605 s 317.930 MB/s
-Buffer-Const,s!=d,xor=1: 0.410941 s 311.480 MB/s
-4194304 32 8 BYTWO_p - -
-Seed: 1352747117
-Buffer-Const,s!=d,xor=0: 0.404638 s 316.332 MB/s
-Buffer-Const,s!=d,xor=1: 0.406369 s 314.984 MB/s
-8388608 16 8 BYTWO_p - -
-Seed: 1352747119
-Buffer-Const,s!=d,xor=0: 0.412950 s 309.965 MB/s
-Buffer-Const,s!=d,xor=1: 0.411819 s 310.816 MB/s
-16777216 8 8 BYTWO_p - -
-Seed: 1352747121
-Buffer-Const,s!=d,xor=0: 0.417898 s 306.295 MB/s
-Buffer-Const,s!=d,xor=1: 0.412159 s 310.560 MB/s
-33554432 4 8 BYTWO_p - -
-Seed: 1352747124
-Buffer-Const,s!=d,xor=0: 0.444945 s 287.676 MB/s
-Buffer-Const,s!=d,xor=1: 0.404381 s 316.533 MB/s
-67108864 2 8 BYTWO_p - -
-Seed: 1352747126
-Buffer-Const,s!=d,xor=0: 0.494330 s 258.936 MB/s
-Buffer-Const,s!=d,xor=1: 0.412325 s 310.435 MB/s
-134217728 1 8 BYTWO_p - -
-Seed: 1352747129
-Buffer-Const,s!=d,xor=0: 0.306549 s 417.552 MB/s
-Buffer-Const,s!=d,xor=1: 0.309033 s 414.195 MB/s
-1024 131072 8 BYTWO_b - -
-Seed: 1352747131
-Buffer-Const,s!=d,xor=0: 0.297702 s 429.961 MB/s
-Buffer-Const,s!=d,xor=1: 0.297253 s 430.609 MB/s
-2048 65536 8 BYTWO_b - -
-Seed: 1352747132
-Buffer-Const,s!=d,xor=0: 0.293193 s 436.572 MB/s
-Buffer-Const,s!=d,xor=1: 0.293018 s 436.833 MB/s
-4096 32768 8 BYTWO_b - -
-Seed: 1352747134
-Buffer-Const,s!=d,xor=0: 0.294984 s 433.922 MB/s
-Buffer-Const,s!=d,xor=1: 0.290863 s 440.070 MB/s
-8192 16384 8 BYTWO_b - -
-Seed: 1352747136
-Buffer-Const,s!=d,xor=0: 0.288896 s 443.067 MB/s
-Buffer-Const,s!=d,xor=1: 0.288462 s 443.732 MB/s
-16384 8192 8 BYTWO_b - -
-Seed: 1352747138
-Buffer-Const,s!=d,xor=0: 0.290112 s 441.208 MB/s
-Buffer-Const,s!=d,xor=1: 0.288533 s 443.623 MB/s
-32768 4096 8 BYTWO_b - -
-Seed: 1352747140
-Buffer-Const,s!=d,xor=0: 0.288124 s 444.253 MB/s
-Buffer-Const,s!=d,xor=1: 0.286360 s 446.989 MB/s
-65536 2048 8 BYTWO_b - -
-Seed: 1352747142
-Buffer-Const,s!=d,xor=0: 0.292166 s 438.106 MB/s
-Buffer-Const,s!=d,xor=1: 0.288037 s 444.388 MB/s
-131072 1024 8 BYTWO_b - -
-Seed: 1352747143
-Buffer-Const,s!=d,xor=0: 0.295804 s 432.719 MB/s
-Buffer-Const,s!=d,xor=1: 0.292226 s 438.017 MB/s
-262144 512 8 BYTWO_b - -
-Seed: 1352747145
-Buffer-Const,s!=d,xor=0: 0.284928 s 449.236 MB/s
-Buffer-Const,s!=d,xor=1: 0.286746 s 446.388 MB/s
-524288 256 8 BYTWO_b - -
-Seed: 1352747147
-Buffer-Const,s!=d,xor=0: 0.295747 s 432.803 MB/s
-Buffer-Const,s!=d,xor=1: 0.291578 s 438.990 MB/s
-1048576 128 8 BYTWO_b - -
-Seed: 1352747149
-Buffer-Const,s!=d,xor=0: 0.300418 s 426.073 MB/s
-Buffer-Const,s!=d,xor=1: 0.283470 s 451.547 MB/s
-2097152 64 8 BYTWO_b - -
-Seed: 1352747151
-Buffer-Const,s!=d,xor=0: 0.310105 s 412.764 MB/s
-Buffer-Const,s!=d,xor=1: 0.306506 s 417.610 MB/s
-4194304 32 8 BYTWO_b - -
-Seed: 1352747153
-Buffer-Const,s!=d,xor=0: 0.303049 s 422.373 MB/s
-Buffer-Const,s!=d,xor=1: 0.294477 s 434.669 MB/s
-8388608 16 8 BYTWO_b - -
-Seed: 1352747155
-Buffer-Const,s!=d,xor=0: 0.318920 s 401.354 MB/s
-Buffer-Const,s!=d,xor=1: 0.292649 s 437.384 MB/s
-16777216 8 8 BYTWO_b - -
-Seed: 1352747157
-Buffer-Const,s!=d,xor=0: 0.369239 s 346.659 MB/s
-Buffer-Const,s!=d,xor=1: 0.299009 s 428.081 MB/s
-33554432 4 8 BYTWO_b - -
-Seed: 1352747159
-Buffer-Const,s!=d,xor=0: 0.370332 s 345.636 MB/s
-Buffer-Const,s!=d,xor=1: 0.292907 s 436.999 MB/s
-67108864 2 8 BYTWO_b - -
-Seed: 1352747161
-Buffer-Const,s!=d,xor=0: 0.437750 s 292.404 MB/s
-Buffer-Const,s!=d,xor=1: 0.303224 s 422.130 MB/s
-134217728 1 8 BYTWO_b - -
-Seed: 1352747163
-Buffer-Const,s!=d,xor=0: 0.199102 s 642.888 MB/s
-Buffer-Const,s!=d,xor=1: 0.198709 s 644.159 MB/s
-1024 131072 8 BYTWO_p SSE -
-Seed: 1352747164
-Buffer-Const,s!=d,xor=0: 0.188358 s 679.558 MB/s
-Buffer-Const,s!=d,xor=1: 0.190699 s 671.215 MB/s
-2048 65536 8 BYTWO_p SSE -
-Seed: 1352747166
-Buffer-Const,s!=d,xor=0: 0.184177 s 694.985 MB/s
-Buffer-Const,s!=d,xor=1: 0.186848 s 685.049 MB/s
-4096 32768 8 BYTWO_p SSE -
-Seed: 1352747167
-Buffer-Const,s!=d,xor=0: 0.189242 s 676.384 MB/s
-Buffer-Const,s!=d,xor=1: 0.186107 s 687.776 MB/s
-8192 16384 8 BYTWO_p SSE -
-Seed: 1352747169
-Buffer-Const,s!=d,xor=0: 0.179632 s 712.566 MB/s
-Buffer-Const,s!=d,xor=1: 0.182739 s 700.454 MB/s
-16384 8192 8 BYTWO_p SSE -
-Seed: 1352747170
-Buffer-Const,s!=d,xor=0: 0.199486 s 641.648 MB/s
-Buffer-Const,s!=d,xor=1: 0.187585 s 682.357 MB/s
-32768 4096 8 BYTWO_p SSE -
-Seed: 1352747172
-Buffer-Const,s!=d,xor=0: 0.181719 s 704.385 MB/s
-Buffer-Const,s!=d,xor=1: 0.183744 s 696.620 MB/s
-65536 2048 8 BYTWO_p SSE -
-Seed: 1352747173
-Buffer-Const,s!=d,xor=0: 0.179243 s 714.114 MB/s
-Buffer-Const,s!=d,xor=1: 0.181455 s 705.409 MB/s
-131072 1024 8 BYTWO_p SSE -
-Seed: 1352747174
-Buffer-Const,s!=d,xor=0: 0.178887 s 715.536 MB/s
-Buffer-Const,s!=d,xor=1: 0.180799 s 707.969 MB/s
-262144 512 8 BYTWO_p SSE -
-Seed: 1352747176
-Buffer-Const,s!=d,xor=0: 0.180232 s 710.196 MB/s
-Buffer-Const,s!=d,xor=1: 0.180657 s 708.523 MB/s
-524288 256 8 BYTWO_p SSE -
-Seed: 1352747177
-Buffer-Const,s!=d,xor=0: 0.180044 s 710.938 MB/s
-Buffer-Const,s!=d,xor=1: 0.183542 s 697.386 MB/s
-1048576 128 8 BYTWO_p SSE -
-Seed: 1352747179
-Buffer-Const,s!=d,xor=0: 0.188030 s 680.743 MB/s
-Buffer-Const,s!=d,xor=1: 0.189776 s 674.480 MB/s
-2097152 64 8 BYTWO_p SSE -
-Seed: 1352747180
-Buffer-Const,s!=d,xor=0: 0.188869 s 677.718 MB/s
-Buffer-Const,s!=d,xor=1: 0.199248 s 642.415 MB/s
-4194304 32 8 BYTWO_p SSE -
-Seed: 1352747181
-Buffer-Const,s!=d,xor=0: 0.191749 s 667.538 MB/s
-Buffer-Const,s!=d,xor=1: 0.188193 s 680.153 MB/s
-8388608 16 8 BYTWO_p SSE -
-Seed: 1352747183
-Buffer-Const,s!=d,xor=0: 0.200427 s 638.638 MB/s
-Buffer-Const,s!=d,xor=1: 0.189489 s 675.501 MB/s
-16777216 8 8 BYTWO_p SSE -
-Seed: 1352747184
-Buffer-Const,s!=d,xor=0: 0.206467 s 619.954 MB/s
-Buffer-Const,s!=d,xor=1: 0.195798 s 653.735 MB/s
-33554432 4 8 BYTWO_p SSE -
-Seed: 1352747186
-Buffer-Const,s!=d,xor=0: 0.226630 s 564.797 MB/s
-Buffer-Const,s!=d,xor=1: 0.189382 s 675.883 MB/s
-67108864 2 8 BYTWO_p SSE -
-Seed: 1352747187
-Buffer-Const,s!=d,xor=0: 0.279772 s 457.515 MB/s
-Buffer-Const,s!=d,xor=1: 0.196061 s 652.858 MB/s
-134217728 1 8 BYTWO_p SSE -
-Seed: 1352747189
-Buffer-Const,s!=d,xor=0: 0.148536 s 861.741 MB/s
-Buffer-Const,s!=d,xor=1: 0.276922 s 462.224 MB/s
-1024 131072 8 BYTWO_b SSE -
-Seed: 1352747191
-Buffer-Const,s!=d,xor=0: 0.137811 s 928.805 MB/s
-Buffer-Const,s!=d,xor=1: 0.268928 s 475.964 MB/s
-2048 65536 8 BYTWO_b SSE -
-Seed: 1352747192
-Buffer-Const,s!=d,xor=0: 0.132821 s 963.706 MB/s
-Buffer-Const,s!=d,xor=1: 0.265851 s 481.474 MB/s
-4096 32768 8 BYTWO_b SSE -
-Seed: 1352747194
-Buffer-Const,s!=d,xor=0: 0.131842 s 970.862 MB/s
-Buffer-Const,s!=d,xor=1: 0.263387 s 485.977 MB/s
-8192 16384 8 BYTWO_b SSE -
-Seed: 1352747195
-Buffer-Const,s!=d,xor=0: 0.131891 s 970.495 MB/s
-Buffer-Const,s!=d,xor=1: 0.260863 s 490.680 MB/s
-16384 8192 8 BYTWO_b SSE -
-Seed: 1352747197
-Buffer-Const,s!=d,xor=0: 0.128815 s 993.670 MB/s
-Buffer-Const,s!=d,xor=1: 0.260589 s 491.196 MB/s
-32768 4096 8 BYTWO_b SSE -
-Seed: 1352747198
-Buffer-Const,s!=d,xor=0: 0.127239 s 1005.979 MB/s
-Buffer-Const,s!=d,xor=1: 0.261076 s 490.278 MB/s
-65536 2048 8 BYTWO_b SSE -
-Seed: 1352747200
-Buffer-Const,s!=d,xor=0: 0.127946 s 1000.421 MB/s
-Buffer-Const,s!=d,xor=1: 0.266347 s 480.576 MB/s
-131072 1024 8 BYTWO_b SSE -
-Seed: 1352747201
-Buffer-Const,s!=d,xor=0: 0.129641 s 987.340 MB/s
-Buffer-Const,s!=d,xor=1: 0.261065 s 490.299 MB/s
-262144 512 8 BYTWO_b SSE -
-Seed: 1352747202
-Buffer-Const,s!=d,xor=0: 0.131109 s 976.285 MB/s
-Buffer-Const,s!=d,xor=1: 0.259368 s 493.507 MB/s
-524288 256 8 BYTWO_b SSE -
-Seed: 1352747204
-Buffer-Const,s!=d,xor=0: 0.130358 s 981.911 MB/s
-Buffer-Const,s!=d,xor=1: 0.268218 s 477.224 MB/s
-1048576 128 8 BYTWO_b SSE -
-Seed: 1352747205
-Buffer-Const,s!=d,xor=0: 0.135308 s 945.990 MB/s
-Buffer-Const,s!=d,xor=1: 0.282554 s 453.011 MB/s
-2097152 64 8 BYTWO_b SSE -
-Seed: 1352747207
-Buffer-Const,s!=d,xor=0: 0.141210 s 906.454 MB/s
-Buffer-Const,s!=d,xor=1: 0.284272 s 450.272 MB/s
-4194304 32 8 BYTWO_b SSE -
-Seed: 1352747208
-Buffer-Const,s!=d,xor=0: 0.150900 s 848.245 MB/s
-Buffer-Const,s!=d,xor=1: 0.291628 s 438.916 MB/s
-8388608 16 8 BYTWO_b SSE -
-Seed: 1352747210
-Buffer-Const,s!=d,xor=0: 0.147792 s 866.084 MB/s
-Buffer-Const,s!=d,xor=1: 0.278963 s 458.842 MB/s
-16777216 8 8 BYTWO_b SSE -
-Seed: 1352747211
-Buffer-Const,s!=d,xor=0: 0.154891 s 826.390 MB/s
-Buffer-Const,s!=d,xor=1: 0.176620 s 724.721 MB/s
-33554432 4 8 BYTWO_b SSE -
-Seed: 1352747213
-Buffer-Const,s!=d,xor=0: 0.193885 s 660.186 MB/s
-Buffer-Const,s!=d,xor=1: 0.268795 s 476.199 MB/s
-67108864 2 8 BYTWO_b SSE -
-Seed: 1352747214
-Buffer-Const,s!=d,xor=0: 0.204667 s 625.407 MB/s
-Buffer-Const,s!=d,xor=1: 0.269170 s 475.536 MB/s
-134217728 1 8 BYTWO_b SSE -
-Seed: 1352747216
-Buffer-Const,s!=d,xor=0: 1.940300 s 65.969 MB/s
-Buffer-Const,s!=d,xor=1: 2.143284 s 59.721 MB/s
-1024 131072 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747225
-Buffer-Const,s!=d,xor=0: 1.923481 s 66.546 MB/s
-Buffer-Const,s!=d,xor=1: 2.147470 s 59.605 MB/s
-2048 65536 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747234
-Buffer-Const,s!=d,xor=0: 1.916270 s 66.796 MB/s
-Buffer-Const,s!=d,xor=1: 2.139770 s 59.820 MB/s
-4096 32768 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747243
-Buffer-Const,s!=d,xor=0: 1.938715 s 66.023 MB/s
-Buffer-Const,s!=d,xor=1: 2.137380 s 59.886 MB/s
-8192 16384 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747252
-Buffer-Const,s!=d,xor=0: 1.922527 s 66.579 MB/s
-Buffer-Const,s!=d,xor=1: 2.148529 s 59.576 MB/s
-16384 8192 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747261
-Buffer-Const,s!=d,xor=0: 1.929218 s 66.348 MB/s
-Buffer-Const,s!=d,xor=1: 2.138858 s 59.845 MB/s
-32768 4096 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747270
-Buffer-Const,s!=d,xor=0: 1.921590 s 66.612 MB/s
-Buffer-Const,s!=d,xor=1: 2.137566 s 59.881 MB/s
-65536 2048 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747278
-Buffer-Const,s!=d,xor=0: 1.932345 s 66.241 MB/s
-Buffer-Const,s!=d,xor=1: 2.130586 s 60.077 MB/s
-131072 1024 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747287
-Buffer-Const,s!=d,xor=0: 1.944353 s 65.832 MB/s
-Buffer-Const,s!=d,xor=1: 2.126287 s 60.199 MB/s
-262144 512 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747296
-Buffer-Const,s!=d,xor=0: 1.921692 s 66.608 MB/s
-Buffer-Const,s!=d,xor=1: 2.128691 s 60.131 MB/s
-524288 256 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747305
-Buffer-Const,s!=d,xor=0: 1.883663 s 67.953 MB/s
-Buffer-Const,s!=d,xor=1: 2.149924 s 59.537 MB/s
-1048576 128 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747314
-Buffer-Const,s!=d,xor=0: 1.957364 s 65.394 MB/s
-Buffer-Const,s!=d,xor=1: 2.167789 s 59.046 MB/s
-2097152 64 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747323
-Buffer-Const,s!=d,xor=0: 1.958212 s 65.366 MB/s
-Buffer-Const,s!=d,xor=1: 2.159558 s 59.271 MB/s
-4194304 32 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747332
-Buffer-Const,s!=d,xor=0: 1.958506 s 65.356 MB/s
-Buffer-Const,s!=d,xor=1: 2.019473 s 63.383 MB/s
-8388608 16 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747341
-Buffer-Const,s!=d,xor=0: 1.949758 s 65.649 MB/s
-Buffer-Const,s!=d,xor=1: 2.165875 s 59.099 MB/s
-16777216 8 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747349
-Buffer-Const,s!=d,xor=0: 1.964626 s 65.152 MB/s
-Buffer-Const,s!=d,xor=1: 2.151822 s 59.484 MB/s
-33554432 4 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747358
-Buffer-Const,s!=d,xor=0: 2.045733 s 62.569 MB/s
-Buffer-Const,s!=d,xor=1: 2.177383 s 58.786 MB/s
-67108864 2 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747367
-Buffer-Const,s!=d,xor=0: 2.055240 s 62.280 MB/s
-Buffer-Const,s!=d,xor=1: 2.190975 s 58.421 MB/s
-134217728 1 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-Seed: 1352747377
-Buffer-Const,s!=d,xor=0: 0.080290 s 1594.215 MB/s
-Buffer-Const,s!=d,xor=1: 0.082083 s 1559.402 MB/s
-1024 131072 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747378
-Buffer-Const,s!=d,xor=0: 0.059030 s 2168.378 MB/s
-Buffer-Const,s!=d,xor=1: 0.064752 s 1976.763 MB/s
-2048 65536 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747379
-Buffer-Const,s!=d,xor=0: 0.050239 s 2547.829 MB/s
-Buffer-Const,s!=d,xor=1: 0.050503 s 2534.526 MB/s
-4096 32768 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747379
-Buffer-Const,s!=d,xor=0: 0.044825 s 2855.560 MB/s
-Buffer-Const,s!=d,xor=1: 0.045130 s 2836.220 MB/s
-8192 16384 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747380
-Buffer-Const,s!=d,xor=0: 0.042018 s 3046.301 MB/s
-Buffer-Const,s!=d,xor=1: 0.042297 s 3026.210 MB/s
-16384 8192 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747381
-Buffer-Const,s!=d,xor=0: 0.040955 s 3125.413 MB/s
-Buffer-Const,s!=d,xor=1: 0.041454 s 3087.754 MB/s
-32768 4096 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747382
-Buffer-Const,s!=d,xor=0: 0.040984 s 3123.195 MB/s
-Buffer-Const,s!=d,xor=1: 0.041577 s 3078.635 MB/s
-65536 2048 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747383
-Buffer-Const,s!=d,xor=0: 0.041093 s 3114.859 MB/s
-Buffer-Const,s!=d,xor=1: 0.042611 s 3003.911 MB/s
-131072 1024 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747384
-Buffer-Const,s!=d,xor=0: 0.047338 s 2703.972 MB/s
-Buffer-Const,s!=d,xor=1: 0.049673 s 2576.836 MB/s
-262144 512 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747385
-Buffer-Const,s!=d,xor=0: 0.049656 s 2577.739 MB/s
-Buffer-Const,s!=d,xor=1: 0.050634 s 2527.950 MB/s
-524288 256 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747386
-Buffer-Const,s!=d,xor=0: 0.049906 s 2564.833 MB/s
-Buffer-Const,s!=d,xor=1: 0.051381 s 2491.188 MB/s
-1048576 128 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747386
-Buffer-Const,s!=d,xor=0: 0.075184 s 1702.487 MB/s
-Buffer-Const,s!=d,xor=1: 0.070414 s 1817.825 MB/s
-2097152 64 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747387
-Buffer-Const,s!=d,xor=0: 0.108748 s 1177.034 MB/s
-Buffer-Const,s!=d,xor=1: 0.111286 s 1150.190 MB/s
-4194304 32 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747388
-Buffer-Const,s!=d,xor=0: 0.117474 s 1089.600 MB/s
-Buffer-Const,s!=d,xor=1: 0.114860 s 1114.400 MB/s
-8388608 16 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747389
-Buffer-Const,s!=d,xor=0: 0.126348 s 1013.075 MB/s
-Buffer-Const,s!=d,xor=1: 0.109330 s 1170.768 MB/s
-16777216 8 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747391
-Buffer-Const,s!=d,xor=0: 0.123002 s 1040.635 MB/s
-Buffer-Const,s!=d,xor=1: 0.110046 s 1163.148 MB/s
-33554432 4 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747392
-Buffer-Const,s!=d,xor=0: 0.159381 s 803.107 MB/s
-Buffer-Const,s!=d,xor=1: 0.120685 s 1060.611 MB/s
-67108864 2 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747393
-Buffer-Const,s!=d,xor=0: 0.196446 s 651.578 MB/s
-Buffer-Const,s!=d,xor=1: 0.121685 s 1051.896 MB/s
-134217728 1 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
-Seed: 1352747520
-Buffer-Const,s!=d,xor=0: 0.244784 s 522.910 MB/s
-Buffer-Const,s!=d,xor=1: 0.259940 s 492.421 MB/s
-1024 131072 8 SPLIT 8 4 NOSSE -
-Seed: 1352747522
-Buffer-Const,s!=d,xor=0: 0.243595 s 525.463 MB/s
-Buffer-Const,s!=d,xor=1: 0.253145 s 505.640 MB/s
-2048 65536 8 SPLIT 8 4 NOSSE -
-Seed: 1352747523
-Buffer-Const,s!=d,xor=0: 0.240463 s 532.307 MB/s
-Buffer-Const,s!=d,xor=1: 0.251567 s 508.811 MB/s
-4096 32768 8 SPLIT 8 4 NOSSE -
-Seed: 1352747525
-Buffer-Const,s!=d,xor=0: 0.240079 s 533.157 MB/s
-Buffer-Const,s!=d,xor=1: 0.255671 s 500.643 MB/s
-8192 16384 8 SPLIT 8 4 NOSSE -
-Seed: 1352747527
-Buffer-Const,s!=d,xor=0: 0.242857 s 527.059 MB/s
-Buffer-Const,s!=d,xor=1: 0.251837 s 508.264 MB/s
-16384 8192 8 SPLIT 8 4 NOSSE -
-Seed: 1352747528
-Buffer-Const,s!=d,xor=0: 0.240757 s 531.657 MB/s
-Buffer-Const,s!=d,xor=1: 0.253888 s 504.160 MB/s
-32768 4096 8 SPLIT 8 4 NOSSE -
-Seed: 1352747530
-Buffer-Const,s!=d,xor=0: 0.240586 s 532.034 MB/s
-Buffer-Const,s!=d,xor=1: 0.256642 s 498.749 MB/s
-65536 2048 8 SPLIT 8 4 NOSSE -
-Seed: 1352747532
-Buffer-Const,s!=d,xor=0: 0.238570 s 536.529 MB/s
-Buffer-Const,s!=d,xor=1: 0.254111 s 503.717 MB/s
-131072 1024 8 SPLIT 8 4 NOSSE -
-Seed: 1352747533
-Buffer-Const,s!=d,xor=0: 0.237666 s 538.572 MB/s
-Buffer-Const,s!=d,xor=1: 0.254334 s 503.275 MB/s
-262144 512 8 SPLIT 8 4 NOSSE -
-Seed: 1352747535
-Buffer-Const,s!=d,xor=0: 0.244512 s 523.491 MB/s
-Buffer-Const,s!=d,xor=1: 0.255911 s 500.174 MB/s
-524288 256 8 SPLIT 8 4 NOSSE -
-Seed: 1352747537
-Buffer-Const,s!=d,xor=0: 0.242439 s 527.968 MB/s
-Buffer-Const,s!=d,xor=1: 0.255622 s 500.740 MB/s
-1048576 128 8 SPLIT 8 4 NOSSE -
-Seed: 1352747538
-Buffer-Const,s!=d,xor=0: 0.248633 s 514.815 MB/s
-Buffer-Const,s!=d,xor=1: 0.257451 s 497.181 MB/s
-2097152 64 8 SPLIT 8 4 NOSSE -
-Seed: 1352747540
-Buffer-Const,s!=d,xor=0: 0.241531 s 529.952 MB/s
-Buffer-Const,s!=d,xor=1: 0.264452 s 484.020 MB/s
-4194304 32 8 SPLIT 8 4 NOSSE -
-Seed: 1352747542
-Buffer-Const,s!=d,xor=0: 0.255533 s 500.914 MB/s
-Buffer-Const,s!=d,xor=1: 0.248849 s 514.368 MB/s
-8388608 16 8 SPLIT 8 4 NOSSE -
-Seed: 1352747543
-Buffer-Const,s!=d,xor=0: 0.259687 s 492.902 MB/s
-Buffer-Const,s!=d,xor=1: 0.264417 s 484.084 MB/s
-16777216 8 8 SPLIT 8 4 NOSSE -
-Seed: 1352747545
-Buffer-Const,s!=d,xor=0: 0.267928 s 477.740 MB/s
-Buffer-Const,s!=d,xor=1: 0.269417 s 475.100 MB/s
-33554432 4 8 SPLIT 8 4 NOSSE -
-Seed: 1352747547
-Buffer-Const,s!=d,xor=0: 0.295526 s 433.126 MB/s
-Buffer-Const,s!=d,xor=1: 0.270747 s 472.766 MB/s
-67108864 2 8 SPLIT 8 4 NOSSE -
-Seed: 1352747549
-Buffer-Const,s!=d,xor=0: 0.342706 s 373.498 MB/s
-Buffer-Const,s!=d,xor=1: 0.266642 s 480.045 MB/s
-134217728 1 8 SPLIT 8 4 NOSSE -
-Seed: 1352747551
-Buffer-Const,s!=d,xor=0: 0.027748 s 4612.927 MB/s
-Buffer-Const,s!=d,xor=1: 0.028090 s 4556.704 MB/s
-1024 131072 8 SPLIT 8 4 SSE -
-Seed: 1352747552
-Buffer-Const,s!=d,xor=0: 0.023128 s 5534.409 MB/s
-Buffer-Const,s!=d,xor=1: 0.023134 s 5533.040 MB/s
-2048 65536 8 SPLIT 8 4 SSE -
-Seed: 1352747552
-Buffer-Const,s!=d,xor=0: 0.019114 s 6696.740 MB/s
-Buffer-Const,s!=d,xor=1: 0.019763 s 6476.596 MB/s
-4096 32768 8 SPLIT 8 4 SSE -
-Seed: 1352747553
-Buffer-Const,s!=d,xor=0: 0.017541 s 7297.119 MB/s
-Buffer-Const,s!=d,xor=1: 0.018266 s 7007.661 MB/s
-8192 16384 8 SPLIT 8 4 SSE -
-Seed: 1352747554
-Buffer-Const,s!=d,xor=0: 0.017010 s 7524.892 MB/s
-Buffer-Const,s!=d,xor=1: 0.017399 s 7356.613 MB/s
-16384 8192 8 SPLIT 8 4 SSE -
-Seed: 1352747555
-Buffer-Const,s!=d,xor=0: 0.016979 s 7538.522 MB/s
-Buffer-Const,s!=d,xor=1: 0.017508 s 7311.130 MB/s
-32768 4096 8 SPLIT 8 4 SSE -
-Seed: 1352747555
-Buffer-Const,s!=d,xor=0: 0.016780 s 7628.283 MB/s
-Buffer-Const,s!=d,xor=1: 0.017439 s 7340.018 MB/s
-65536 2048 8 SPLIT 8 4 SSE -
-Seed: 1352747556
-Buffer-Const,s!=d,xor=0: 0.017527 s 7302.876 MB/s
-Buffer-Const,s!=d,xor=1: 0.018656 s 6861.145 MB/s
-131072 1024 8 SPLIT 8 4 SSE -
-Seed: 1352747557
-Buffer-Const,s!=d,xor=0: 0.020679 s 6189.855 MB/s
-Buffer-Const,s!=d,xor=1: 0.022183 s 5770.138 MB/s
-262144 512 8 SPLIT 8 4 SSE -
-Seed: 1352747558
-Buffer-Const,s!=d,xor=0: 0.020437 s 6263.296 MB/s
-Buffer-Const,s!=d,xor=1: 0.021715 s 5894.434 MB/s
-524288 256 8 SPLIT 8 4 SSE -
-Seed: 1352747558
-Buffer-Const,s!=d,xor=0: 0.020800 s 6153.883 MB/s
-Buffer-Const,s!=d,xor=1: 0.021934 s 5835.617 MB/s
-1048576 128 8 SPLIT 8 4 SSE -
-Seed: 1352747559
-Buffer-Const,s!=d,xor=0: 0.035634 s 3592.095 MB/s
-Buffer-Const,s!=d,xor=1: 0.036323 s 3523.977 MB/s
-2097152 64 8 SPLIT 8 4 SSE -
-Seed: 1352747560
-Buffer-Const,s!=d,xor=0: 0.050565 s 2531.419 MB/s
-Buffer-Const,s!=d,xor=1: 0.048358 s 2646.914 MB/s
-4194304 32 8 SPLIT 8 4 SSE -
-Seed: 1352747561
-Buffer-Const,s!=d,xor=0: 0.053646 s 2386.008 MB/s
-Buffer-Const,s!=d,xor=1: 0.047063 s 2719.766 MB/s
-8388608 16 8 SPLIT 8 4 SSE -
-Seed: 1352747562
-Buffer-Const,s!=d,xor=0: 0.055658 s 2299.775 MB/s
-Buffer-Const,s!=d,xor=1: 0.047532 s 2692.918 MB/s
-16777216 8 8 SPLIT 8 4 SSE -
-Seed: 1352747563
-Buffer-Const,s!=d,xor=0: 0.064355 s 1988.963 MB/s
-Buffer-Const,s!=d,xor=1: 0.047547 s 2692.067 MB/s
-33554432 4 8 SPLIT 8 4 SSE -
-Seed: 1352747563
-Buffer-Const,s!=d,xor=0: 0.084876 s 1508.086 MB/s
-Buffer-Const,s!=d,xor=1: 0.048017 s 2665.721 MB/s
-67108864 2 8 SPLIT 8 4 SSE -
-Seed: 1352747564
-Buffer-Const,s!=d,xor=0: 0.121661 s 1052.104 MB/s
-Buffer-Const,s!=d,xor=1: 0.047558 s 2691.447 MB/s
-134217728 1 8 SPLIT 8 4 SSE -
diff --git a/junk-w8-timing-tests.sh b/junk-w8-timing-tests.sh
deleted file mode 100644
index 6b78dab..0000000
--- a/junk-w8-timing-tests.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-sh tmp-time-test.sh 8 LOG - -
-sh tmp-time-test.sh 8 LOG_ZERO - -
-sh tmp-time-test.sh 8 TABLE - -
-sh tmp-time-test.sh 8 TABLE DOUBLE -
-sh tmp-time-test.sh 8 TABLE DOUBLE,LAZY -
-sh tmp-time-test.sh 8 BYTWO_p - -
-sh tmp-time-test.sh 8 BYTWO_b - -
-sh tmp-time-test.sh 8 BYTWO_p SSE -
-sh tmp-time-test.sh 8 BYTWO_b SSE -
-sh tmp-time-test.sh 8 SPLIT 8 4 NOSSE -
-sh tmp-time-test.sh 8 SPLIT 8 4 SSE -
-sh tmp-time-test.sh 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - -
-sh tmp-time-test.sh 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP -
diff --git a/junk-w8-timing.jgr b/junk-w8-timing.jgr
deleted file mode 100644
index 0245111..0000000
--- a/junk-w8-timing.jgr
+++ /dev/null
@@ -1,11 +0,0 @@
-newgraph
-xaxis size 4 min 0 no_auto_hash_labels
- hash_labels hjl vjc rotate -90 fontsize 11
-
-shell : junk-pick-best-output < junk-w8-timing-out.txt | sort -nr | sed 's/.............//' | awk '{ print "hash_label at ", ++l, ":", $0 }'
-
-yaxis size 1 min 0 label : MB/s
-
-newcurve marktype xbar cfill 1 1 0 marksize 1 pts
-shell : junk-pick-best-output < junk-w8-timing-out.txt | sort -nr | awk '{ print $1 }' | cat -n
-
diff --git a/junk.c b/junk.c
deleted file mode 100644
index 4bcf81e..0000000
--- a/junk.c
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Multiplies four and five in GF(2^4).
- */
-
-#include <stdio.h>
-#include <stdint.h>
-#include <stdlib.h>
-
-#include "gf_complete.h"
-
-main()
-{
- gf_t gf;
-
- gf_init_easy(&gf, 4);
- printf("%d\n", gf.multiply.w32(&gf, 5, 4));
- exit(0);
-}
diff --git a/junk.ps b/junk.ps
deleted file mode 100644
index f8e80c7..0000000
--- a/junk.ps
+++ /dev/null
@@ -1,199 +0,0 @@
-%!PS-Adobe-2.0 EPSF-1.2
-%%Page: 1 1
-%%BoundingBox: -40 -93 292 73
-%%EndComments
-180.000000 406.000000 translate
-1 setlinecap 1 setlinejoin
-0.700 setlinewidth
-0.00 setgray
-
-/Jrnd { exch cvi exch cvi dup 3 1 roll idiv mul } def
-/JDEdict 8 dict def
-JDEdict /mtrx matrix put
-/JDE {
- JDEdict begin
- /yrad exch def
- /xrad exch def
- /savematrix mtrx currentmatrix def
- xrad yrad scale
- 0 0 1 0 360 arc
- savematrix setmatrix
- end
-} def
-/JSTR {
- gsave 1 eq { gsave 1 setgray fill grestore } if
- exch neg exch neg translate
- clip
- rotate
- 4 dict begin
- pathbbox /&top exch def
- /&right exch def
- /&bottom exch def
- &right sub /&width exch def
- newpath
- currentlinewidth mul round dup
- &bottom exch Jrnd exch &top
- 4 -1 roll currentlinewidth mul setlinewidth
- { &right exch moveto &width 0 rlineto stroke } for
- end
- grestore
- newpath
-} bind def
- gsave /Times-Roman findfont 9.000000 scalefont setfont
-0.000000 0.000000 translate
-0.700000 setlinewidth gsave newpath 0.000000 0.000000 moveto 288.000000 0.000000 lineto stroke
-newpath 0.000000 0.000000 moveto 0.000000 -5.000000 lineto stroke
-newpath 28.799999 0.000000 moveto 28.799999 -2.000000 lineto stroke
-newpath 57.599998 0.000000 moveto 57.599998 -5.000000 lineto stroke
-newpath 86.399994 0.000000 moveto 86.399994 -2.000000 lineto stroke
-newpath 115.199997 0.000000 moveto 115.199997 -5.000000 lineto stroke
-newpath 144.000000 0.000000 moveto 144.000000 -2.000000 lineto stroke
-newpath 172.799988 0.000000 moveto 172.799988 -5.000000 lineto stroke
-newpath 201.599991 0.000000 moveto 201.599991 -2.000000 lineto stroke
-newpath 230.399994 0.000000 moveto 230.399994 -5.000000 lineto stroke
-newpath 259.199982 0.000000 moveto 259.199982 -2.000000 lineto stroke
-newpath 288.000000 0.000000 moveto 288.000000 -5.000000 lineto stroke
-/Times-Roman findfont 11.000000 scalefont setfont
-gsave 28.799999 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (TABLE SINGLE,SSE) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 57.599998 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (BYTWO_b SSE) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 86.399994 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (BYTWO_b) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 115.199997 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (BYTWO_p SSE) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 144.000000 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (TABLE QUAD,LAZY) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 172.799988 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (TABLE QUAD) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 201.599991 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (BYTWO_p) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 230.399994 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (TABLE DOUBLE) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 259.199982 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (TABLE SINGLE) dup stringwidth pop pop 0 0 moveto
-show
-grestore
-gsave 288.000000 -8.000000 translate -90.000000 rotate
-0 -3.300000 translate (LOG) dup stringwidth pop pop 0 0 moveto
-show
-grestore
- grestore
-0.700000 setlinewidth gsave newpath 0.000000 0.000000 moveto 0.000000 72.000000 lineto stroke
-newpath 0.000000 0.000000 moveto -5.000000 0.000000 lineto stroke
-newpath 0.000000 8.552223 moveto -2.000000 8.552223 lineto stroke
-newpath 0.000000 17.104446 moveto -5.000000 17.104446 lineto stroke
-newpath 0.000000 25.656670 moveto -2.000000 25.656670 lineto stroke
-newpath 0.000000 34.208893 moveto -5.000000 34.208893 lineto stroke
-newpath 0.000000 42.761116 moveto -2.000000 42.761116 lineto stroke
-newpath 0.000000 51.313339 moveto -5.000000 51.313339 lineto stroke
-newpath 0.000000 59.865562 moveto -2.000000 59.865562 lineto stroke
-newpath 0.000000 68.417786 moveto -5.000000 68.417786 lineto stroke
-/Times-Roman findfont 9.000000 scalefont setfont
-gsave -8.000000 0.000000 translate 0.000000 rotate
-0 -2.700000 translate (0) dup stringwidth pop neg 0 moveto
-show
-grestore
-gsave -8.000000 17.104446 translate 0.000000 rotate
-0 -2.700000 translate (2000) dup stringwidth pop neg 0 moveto
-show
-grestore
-gsave -8.000000 34.208893 translate 0.000000 rotate
-0 -2.700000 translate (4000) dup stringwidth pop neg 0 moveto
-show
-grestore
-gsave -8.000000 51.313339 translate 0.000000 rotate
-0 -2.700000 translate (6000) dup stringwidth pop neg 0 moveto
-show
-grestore
-gsave -8.000000 68.417786 translate 0.000000 rotate
-0 -2.700000 translate (8000) dup stringwidth pop neg 0 moveto
-show
-grestore
-/Times-Bold findfont 10.000000 scalefont setfont
-gsave -33.279999 36.000000 translate 90.000000 rotate
-0 0.000000 translate (MB/s) dup stringwidth pop 2 div neg 0 moveto
-show
-grestore
- grestore
- gsave
- gsave gsave 28.799999 72.000000 translate 0.000000 rotate
- newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto
- -14.400000 -72.000000 lineto
- 14.400000 -72.000000 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore gsave 57.599998 23.516296 translate 0.000000 rotate
- newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto
- -14.400000 -23.516296 lineto
- 14.400000 -23.516296 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore gsave 86.399994 20.308016 translate 0.000000 rotate
- newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto
- -14.400000 -20.308016 lineto
- 14.400000 -20.308016 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore gsave 115.199997 13.716681 translate 0.000000 rotate
- newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto
- -14.400000 -13.716681 lineto
- 14.400000 -13.716681 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore gsave 144.000000 11.183632 translate 0.000000 rotate
- newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto
- -14.400000 -11.183632 lineto
- 14.400000 -11.183632 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore gsave 172.799988 10.863582 translate 0.000000 rotate
- newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto
- -14.400000 -10.863582 lineto
- 14.400000 -10.863582 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore gsave 201.599991 8.547887 translate 0.000000 rotate
- newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto
- -14.400000 -8.547887 lineto
- 14.400000 -8.547887 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore gsave 230.399994 7.811883 translate 0.000000 rotate
- newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto
- -14.400000 -7.811883 lineto
- 14.400000 -7.811883 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore gsave 259.199982 4.485872 translate 0.000000 rotate
- newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto
- -14.400000 -4.485872 lineto
- 14.400000 -4.485872 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore gsave 288.000000 1.912226 translate 0.000000 rotate
- newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto
- -14.400000 -1.912226 lineto
- 14.400000 -1.912226 lineto
-closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore
-stroke
- grestore grestore
- grestore
--0.000000 -0.000000 translate
- grestore showpage
diff --git a/junk.sh b/junk.sh
deleted file mode 100644
index 8e62234..0000000
--- a/junk.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-gf_time 4 R -1 1024000 1000 - ; echo '-'
-gf_time 4 R -1 1024000 1000 SHIFT - - ; echo 'SHIFT - -'
-gf_time 4 R -1 1024000 1000 BYTWO_p - - ; echo 'BYTWO_p - -'
-gf_time 4 R -1 1024000 1000 BYTWO_p SSE - ; echo 'BYTWO_p SSE -'
-gf_time 4 R -1 1024000 1000 BYTWO_b - - ; echo 'BYTWO_b - -'
-gf_time 4 R -1 1024000 1000 BYTWO_b SSE - ; echo 'BYTWO_b SSE -'
-gf_time 4 R -1 1024000 1000 TABLE - - ; echo 'TABLE - -'
-gf_time 4 R -1 1024000 1000 TABLE SINGLE - ; echo 'TABLE SINGLE -'
-gf_time 4 R -1 1024000 1000 TABLE DOUBLE - ; echo 'TABLE DOUBLE -'
-gf_time 4 R -1 1024000 1000 TABLE QUAD - ; echo 'TABLE QUAD -'
-gf_time 4 R -1 1024000 1000 TABLE QUAD,LAZY - ; echo 'TABLE QUAD,LAZY -'
-gf_time 4 R -1 1024000 1000 TABLE SINGLE,SSE - ; echo 'TABLE SINGLE,SSE -'
-gf_time 4 R -1 1024000 1000 TABLE SINGLE,NOSSE - ; echo 'TABLE SINGLE,NOSSE -'
-gf_time 4 R -1 1024000 1000 LOG - - ; echo 'LOG - -'
diff --git a/junk_gf_unit.c b/junk_gf_unit.c
deleted file mode 100644
index 3ae45a3..0000000
--- a/junk_gf_unit.c
+++ /dev/null
@@ -1,957 +0,0 @@
-/*
- * gf_unit.c
- *
- * Performs unit testing for gf arithmetic
- */
-
-#include <stdio.h>
-#include <getopt.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdlib.h>
-#include <time.h>
-
-#include "gf.h"
-#include "gf_int.h"
-#include "gf_method.h"
-#include "gf_rand.h"
-
-#define REGION_SIZE (65536)
-
-static
-uint8_t get_alt_map_2w8(int offset, uint8_t *buf, int region_size)
-{
- uint8_t symbol = 0;
- int bit_off = offset % 2;
-
- if (bit_off == 0) {
- symbol = buf[offset / 2] & 0x0f | ((buf[(offset / 2)+region_size] & 0x0f) << 4);
- } else {
- symbol = ((buf[offset / 2] & 0xf0) >> 4) | (buf[(offset / 2)+region_size] & 0xf0);
- }
-
- return symbol;
-}
-
-static
-uint16_t get_alt_map_2w16(int offset, uint8_t *buf, int region_size)
-{
- uint16_t symbol = 0;
-
- symbol = buf[offset] | (buf[offset+region_size] << 8);
-
- return symbol;
-}
-
-static
-uint32_t get_alt_map_2w32(int offset, uint8_t *buf, int region_size)
-{
- uint32_t symbol = 0;
- uint16_t buf_a = buf[offset] | (buf[offset + 1] << 8);
- uint16_t buf_b = buf[offset + region_size] | (buf[offset + region_size + 1] << 8);
-
- symbol = buf_a | (buf_b << 16);
-
- return symbol;
-}
-
-static
-void test_alt_map()
-{
- uint8_t* buf = (uint8_t*)malloc(sizeof(uint8_t)*REGION_SIZE);
- int i=0;
- uint8_t c=1, next_c;
-
- for (i=0; i < REGION_SIZE/2;i++) {
- if (c == 255) c = 1;
- buf[i] = c;
- buf[i+(REGION_SIZE/2)] = c;
- c++;
- }
-
-
- c = 1;
- for (i=0; i < REGION_SIZE;i++) {
- uint8_t sym_w8 = get_alt_map_2w8(i, buf, REGION_SIZE/2);
- uint8_t c_val = ((i % 2) == 0) ? (c & 0x0f) : ((c & 0xf0) >> 4);
- uint8_t exp_sym_w8 = c_val | c_val << 4;
-
- if (exp_sym_w8 != sym_w8) {
- fprintf(stderr, "Alt mapping failure (w=8,c=%d,i=%d): %u != %u\n", c, i, exp_sym_w8, sym_w8);
- exit(1);
- }
-
- if ((i % 2) == 1) {
- c++;
- }
- if (c == 255) {
- c = 1;
- }
- }
-
- c = 1;
-
- for (i=0; i < REGION_SIZE/2;i++) {
- uint16_t sym_w16 = get_alt_map_2w16(i, buf, REGION_SIZE/2);
- uint16_t exp_sym_w16 = c | c << 8;
-
- if (exp_sym_w16 != sym_w16) {
- fprintf(stderr, "Alt mapping failure (w=16,c=%d,i=%d): %u != %u\n", c, i, exp_sym_w16, sym_w16);
- exit(1);
- }
-
- c++;
- if (c == 255) {
- c = 1;
- }
- }
-
- c = 1;
- next_c = 2;
-
- for (i=0; i < REGION_SIZE/4;i++) {
- uint32_t sym_w32 = get_alt_map_2w32(i, buf, REGION_SIZE/2);
- uint32_t exp_sym_w32 = c | (next_c << 8) | c << 16 | (next_c << 24);
-
- if (exp_sym_w32 != sym_w32) {
- fprintf(stderr, "Alt mapping failure (w=32,c=%d,i=%d): %u != %u\n", c, i, exp_sym_w32, sym_w32);
- exit(1);
- }
- c++;
- next_c++;
- if (c == 255) {
- c = 1;
- next_c = 2;
- } else if (c == 254) {
- next_c = 1;
- }
- }
-
-}
-
-void fill_random_region(void *reg, int size)
-{
- uint32_t *r;
- int i;
-
- r = (uint32_t *) reg;
- for (i = 0; i < size/sizeof(uint32_t); i++) {
- r[i] = MOA_Random_32();
- }
-}
-
-void problem(char *s)
-{
- fprintf(stderr, "Unit test failed.\n");
- fprintf(stderr, "%s\n", s);
- exit(1);
-}
-
-void usage(char *s)
-{
- fprintf(stderr, "usage: gf_unit w tests seed [method] - does unit testing in GF(2^w)\n");
- fprintf(stderr, "\n");
- fprintf(stderr, "Legal w are: 4, 8, 16, 32, 64 and 128\n");
- fprintf(stderr, "\n");
- fprintf(stderr, "Tests may be any combination of:\n");
- fprintf(stderr, " A: All\n");
- fprintf(stderr, " S: Single operations (multiplication/division)\n");
- fprintf(stderr, " R: Region operations\n");
- fprintf(stderr, " V: Verbose Output\n");
- fprintf(stderr, "\n");
- fprintf(stderr, "Use -1 for time(0) as a seed.\n");
- fprintf(stderr, "\n");
- fprintf(stderr, "For method specification, type gf_methods\n");
- fprintf(stderr, "\n");
- if (s != NULL) fprintf(stderr, "%s\n", s);
- exit(1);
-}
-
-int main(int argc, char **argv)
-{
- int w, i, j, verbose, single, region, xor, off, size, sindex, eindex, tested, top;
- uint32_t a, b, c, d, ai, da, bi, mask;
- uint64_t a64, b64, c64, d64;
- uint64_t a128[2], b128[2], c128[2], d128[2], e128[2];
- gf_t gf, gf_def;
- uint8_t *r8b, *r8c, *r8d;
- uint16_t *r16b, *r16c, *r16d;
- uint32_t *r32b, *r32c, *r32d;
- uint64_t *r64b, *r64c, *r64d;
- uint64_t *r128b, *r128c, *r128d;
- time_t t0;
- gf_internal_t *h;
-
- if (argc < 4) usage(NULL);
- if (sscanf(argv[1], "%d", &w) == 0) usage("Bad w\n");
- if (sscanf(argv[3], "%ld", &t0) == 0) usage("Bad seed\n");
- if (t0 == -1) t0 = time(0);
- MOA_Seed(t0);
-
- if (w > 32 && w != 64 && w != 128) usage("Bad w");
-
- if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage("Bad Method");
-
- for (i = 0; i < strlen(argv[2]); i++) {
- if (strchr("ASRV", argv[2][i]) == NULL) usage("Bad test\n");
- }
-
- h = (gf_internal_t *) gf.scratch;
- if (w <= 32) {
- mask = 0;
- for (i = 0; i < w; i++) mask |= (1 << i);
- }
-
- verbose = (strchr(argv[2], 'V') != NULL);
- single = (strchr(argv[2], 'S') != NULL || strchr(argv[2], 'A') != NULL);
- region = (strchr(argv[2], 'R') != NULL || strchr(argv[2], 'A') != NULL);
-
- if (((h->region_type & GF_REGION_ALTMAP) != 0) && (h->mult_type == GF_MULT_COMPOSITE)) {
- test_alt_map();
- }
-
- if (!gf_init_easy(&gf_def, w, GF_MULT_DEFAULT)) problem("No default for this value of w");
-
- if (verbose) printf("Seed: %ld\n", t0);
-
- if (single) {
-
- if (w <= 32) {
- if (gf.multiply.w32 == NULL) problem("No multiplication operation defined.");
- if (verbose) { printf("Testing single multiplications/divisions.\n"); fflush(stdout); }
- if (w <= 10) {
- top = (1 << w)*(1 << w);
- } else {
- top = 1000000;
- }
- for (i = 0; i < top; i++) {
- if (w <= 10) {
- a = i % (1 << w);
- b = i >> w;
- } else if (i < 10) {
- a = 0;
- b = MOA_Random_W(w, 1);
- } else if (i < 20) {
- b = 0;
- a = MOA_Random_W(w, 1);
- } else if (i < 30) {
- a = 1;
- b = MOA_Random_W(w, 1);
- } else if (i < 40) {
- b = 1;
- a = MOA_Random_W(w, 1);
- } else {
- a = MOA_Random_W(w, 1);
- b = MOA_Random_W(w, 1);
- }
-
- c = gf.multiply.w32(&gf, a, b);
- tested = 0;
-
- /* If this is not composite, then first test against the default: */
-
- if (h->mult_type != GF_MULT_COMPOSITE) {
- tested = 1;
- d = gf_def.multiply.w32(&gf_def, a, b);
-
- if (c != d) {
- printf("Error in single multiplication (all numbers in hex):\n\n");
- printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c);
- printf(" The default returned %x\n", d);
- exit(1);
- }
- }
-
- /* Now, we also need to double-check, in case the default is wanky, and when
- we're performing composite operations. Start with 0 and 1: */
-
- if (a == 0 || b == 0 || a == 1 || b == 1) {
- tested = 1;
- if (((a == 0 || b == 0) && c != 0) ||
- (a == 1 && c != b) ||
- (b == 1 && c != a)) {
- printf("Error in single multiplication (all numbers in hex):\n\n");
- printf(" gf.multiply.w32() of %x and %x returned %x, which is clearly wrong.\n", a, b, c);
- exit(1);
- }
-
- /* If division or inverses are defined, let's test all combinations to make sure
- that the operations are consistent with each other. */
-
- } else {
- if ((c & mask) != c) {
- printf("Error in single multiplication (all numbers in hex):\n\n");
- printf(" gf.multiply.w32() of %x and %x returned %x, which is too big.\n", a, b, c);
- exit(1);
- }
-
- }
- if (gf.inverse.w32 != NULL && (a != 0 || b != 0)) {
- tested = 1;
- if (a != 0) {
- ai = gf.inverse.w32(&gf, a);
-
- if (gf.multiply.w32(&gf, c, ai) != b) {
- printf("Error in single multiplication (all numbers in hex):\n\n");
- printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c);
- printf(" The inverse of %x is %x, and gf_multiply.w32() of %x and %x equals %x\n",
- a, ai, c, ai, gf.multiply.w32(&gf, c, ai));
- exit(1);
- }
- }
- if (b != 0) {
- bi = gf.inverse.w32(&gf, b);
- if (gf.multiply.w32(&gf, c, bi) != a) {
- printf("Error in single multiplication (all numbers in hex):\n\n");
- printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c);
- printf(" The inverse of %x is %x, and gf_multiply.w32() of %x and %x equals %x\n",
- b, bi, c, bi, gf.multiply.w32(&gf, c, bi));
- exit(1);
- }
- }
- }
- if (gf.divide.w32 != NULL && (a != 0 || b != 0)) {
- tested = 1;
-
- if (a != 0) {
- ai = gf.divide.w32(&gf, c, a);
-
- if (ai != b) {
- printf("Error in single multiplication/division (all numbers in hex):\n\n");
- printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c);
- printf(" gf.divide.w32() of %x and %x returned %x\n", c, a, ai);
- exit(1);
- }
- }
- if (b != 0) {
- bi = gf.divide.w32(&gf, c, b);
-
- if (bi != a) {
- printf("Error in single multiplication/division (all numbers in hex):\n\n");
- printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c);
- printf(" gf.divide.w32() of %x and %x returned %x\n", c, b, bi);
- exit(1);
- }
- }
- }
-
- if (!tested) problem("There is no way to test multiplication.\n");
- }
-
- } else if (w == 64) {
- if (verbose) { printf("Testing single multiplications/divisions.\n"); fflush(stdout); }
- if (gf.multiply.w64 == NULL) problem("No multiplication operation defined.");
- for (i = 0; i < 1000; i++) {
- for (j = 0; j < 1000; j++) {
- a64 = MOA_Random_64();
- b64 = MOA_Random_64();
- c64 = gf.multiply.w64(&gf, a64, b64);
- if ((a64 == 0 || b64 == 0) && c64 != 0) problem("Single Multiplication by zero Failed");
- if (a64 != 0 && b64 != 0) {
- d64 = (gf.divide.w64 == NULL) ? gf_def.divide.w64(&gf_def, c64, b64) : gf.divide.w64(&gf, c64, b64);
- if (d64 != a64) {
- printf("0x%llx * 0x%llx =? 0x%llx (check-a: 0x%llx)\n", a64, b64, c64, d64);
- problem("Single multiplication/division failed");
- }
- }
- }
- }
- if (gf.inverse.w64 == NULL) {
- printf("No inverse defined for this method.\n");
- } else {
- if (verbose) { printf("Testing Inversions.\n"); fflush(stdout); }
- for (i = 0; i < 1000; i++) {
- do { a64 = MOA_Random_64(); } while (a64 == 0);
- b64 = gf.inverse.w64(&gf, a64);
- if (gf.multiply.w64(&gf, a64, b64) != 1) problem("Inversion failed.\n");
- }
- }
- } else if (w == 128) {
- if (verbose) { printf("Testing single multiplications/divisions.\n"); fflush(stdout); }
- if (gf.multiply.w128 == NULL) problem("No multiplication operation defined.");
- for (i = 0; i < 500; i++) {
- for (j = 0; j < 500; j++) {
- MOA_Random_128(a128);
- MOA_Random_128(b128);
- gf.multiply.w128(&gf, a128, b128, c128);
- if ((GF_W128_IS_ZERO(a128) && GF_W128_IS_ZERO(b128)) && !(GF_W128_IS_ZERO(c128))) problem("Single Multiplication by zero Failed");
- if (!GF_W128_IS_ZERO(a128) && !GF_W128_IS_ZERO(b128)) {
- gf.divide.w128 == NULL ? gf_def.divide.w128(&gf_def, c128, b128, d128) : gf.divide.w128(&gf, c128, b128, d128);
- if (!GF_W128_EQUAL(a128, d128)) {
- printf("0x%llx 0x%llx * 0x%llx 0x%llx =? 0x%llx 0x%llx (check-a: 0x%llx 0x%llx)\n", a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], d128[0], d128[1]);
- problem("Single multiplication/division failed");
- }
- }
- }
- }
- if (gf.inverse.w128 == NULL) {
- printf("No inverse defined for this method.\n");
- } else {
- if (verbose) { printf("Testing Inversions.\n"); fflush(stdout); }
- for (i = 0; i < 1000; i++) {
- do { MOA_Random_128(a128); } while (GF_W128_IS_ZERO(a128));
- gf.inverse.w128(&gf, a128, b128);
- gf.multiply.w128(&gf, a128, b128, c128);
- if (!(c128[0] == 0 && c128[1] == 1)) problem("Inversion failed.\n");
- }
- }
-
- } else {
- problem("Value of w not implemented yet");
- }
- }
-
- if (region) {
-
- if (w == 4) {
- if (gf.multiply_region.w32 == NULL) {
- printf("No multiply_region.\n");
- } else {
- r8b = (uint8_t *) malloc(REGION_SIZE);
- r8c = (uint8_t *) malloc(REGION_SIZE);
- r8d = (uint8_t *) malloc(REGION_SIZE);
- fill_random_region(r8b, REGION_SIZE);
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src != dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (a = 0; a < 16; a++) {
- fill_random_region(r8c, REGION_SIZE);
- memcpy(r8d, r8c, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint8_t);
- gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8c+sindex), a, size, xor);
- for (i = sindex; i < eindex; i++) {
- b = (r8b[i] >> 4);
- c = (r8c[i] >> 4);
- d = (r8d[i] >> 4);
- if (!xor && gf.multiply.w32(&gf, a, b) != c) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i));
- printf(" %d * %d = %d, but should equal %d\n", a, b, c, gf.multiply.w32(&gf, a, b) );
- printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i));
- printf(" %d %d %d %d\n", a, b, c, d);
- printf(" %d %d %d %d\n", a, r8b[i], r8c[i], r8d[i]);
- problem("Failed buffer-constant, xor=1");
- }
- b = (r8b[i] & 0xf);
- c = (r8c[i] & 0xf);
- d = (r8d[i] & 0xf);
- if (!xor && gf.multiply.w32(&gf, a, b) != c) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i));
- printf(" %d * %d = %d, but should equal %d\n", a, b, c, gf.multiply.w32(&gf, a, b) );
- printf("i=%d. 0x%x 0x%x 0x%x 0x%x\n", i, a, r8b[i], r8c[i], r8d[i]);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i));
- printf(" (%d * %d ^ %d) should equal %d - equals %d\n",
- a, b, d, (gf.multiply.w32(&gf, a, b) ^ d), c);
- printf(" %d %d %d %d\n", a, r8b[i], r8c[i], r8d[i]);
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src == dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (a = 0; a < 16; a++) {
- fill_random_region(r8b, REGION_SIZE);
- memcpy(r8d, r8b, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint8_t);
- gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8b+sindex), a, size, xor);
- for (i = sindex; i < eindex; i++) {
- b = (r8b[i] >> 4);
- d = (r8d[i] >> 4);
- if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0");
- if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) {
- printf("i=%d. %d %d %d\n", i, a, b, d);
- printf("i=%d. %d %d %d\n", i, a, r8b[i], r8d[i]);
- problem("Failed buffer-constant, xor=1");
- }
- b = (r8b[i] & 0xf);
- d = (r8d[i] & 0xf);
- if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0");
- if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) {
- printf("%d %d %d\n", a, b, d);
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- free(r8b);
- free(r8c);
- free(r8d);
- }
- } else if (w == 8) {
- if (gf.multiply_region.w32 == NULL) {
- printf("No multiply_region.\n");
- } else {
- r8b = (uint8_t *) malloc(REGION_SIZE);
- r8c = (uint8_t *) malloc(REGION_SIZE);
- r8d = (uint8_t *) malloc(REGION_SIZE);
- fill_random_region(r8b, REGION_SIZE);
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src != dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (a = 0; a < 256; a++) {
- fill_random_region(r8c, REGION_SIZE);
- memcpy(r8d, r8c, REGION_SIZE);
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- sindex = 0;
- eindex = REGION_SIZE;
- } else {
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1);
- }
- size = (eindex-sindex)*sizeof(uint8_t);
- gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8c+sindex), a, size, xor);
- for (i = sindex; i < eindex; i++) {
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- b = get_alt_map_2w8(i, (uint8_t*)r8b, REGION_SIZE / 2);
- c = get_alt_map_2w8(i, (uint8_t*)r8c, REGION_SIZE / 2);
- d = get_alt_map_2w8(i, (uint8_t*)r8d, REGION_SIZE / 2);
- } else {
- b = r8b[i];
- c = r8c[i];
- d = r8d[i];
- }
- if (!xor && gf.multiply.w32(&gf, a, b) != c) {
- printf("i=%d. %d %d %d %d\n", i, a, b, c, d);
- printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]);
- printf("%llx. Sindex: %d\n", r8b+i, sindex);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) {
- printf("i=%d. %d %d %d %d\n", i, a, b, c, d);
- printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]);
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- for (xor = 0; xor < 2; xor++) {
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- continue;
- }
- if (verbose) {
- printf("Testing buffer-constant, src == dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (a = 0; a < 256; a++) {
- fill_random_region(r8b, REGION_SIZE);
- memcpy(r8d, r8b, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint8_t);
- gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8b+sindex), a, size, xor);
- for (i = sindex; i < eindex; i++) {
- b = r8b[i];
- d = r8d[i];
- if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0");
- if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) {
- printf("i=%d. %d %d %d\n", i, a, b, d);
- printf("i=%d. %d %d %d\n", i, a, r8b[i], r8d[i]);
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- free(r8b);
- free(r8c);
- free(r8d);
- }
- } else if (w == 16) {
- if (gf.multiply_region.w32 == NULL) {
- printf("No multiply_region.\n");
- } else {
- r16b = (uint16_t *) malloc(REGION_SIZE);
- r16c = (uint16_t *) malloc(REGION_SIZE);
- r16d = (uint16_t *) malloc(REGION_SIZE);
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src != dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- fill_random_region(r16b, REGION_SIZE);
- a = MOA_Random_W(w, 0);
- fill_random_region(r16c, REGION_SIZE);
- memcpy(r16d, r16c, REGION_SIZE);
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- sindex = 0;
- eindex = REGION_SIZE / sizeof(uint16_t);
- } else {
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint16_t)-MOA_Random_W(3, 1);
- }
- size = (eindex-sindex)*sizeof(uint16_t);
- gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16c+sindex), a, size, xor);
- ai = gf.inverse.w32(&gf, a);
- if (!xor) {
- gf.multiply_region.w32(&gf, (void *) (r16c+sindex), (void *) (r16d+sindex), ai, size, xor);
- } else {
- gf.multiply_region.w32(&gf, (void *) (r16c+sindex), (void *) (r16d+sindex), 1, size, xor);
- gf.multiply_region.w32(&gf, (void *) (r16d+sindex), (void *) (r16b+sindex), ai, size, xor);
- }
-
- for (i = sindex; i < eindex; i++) {
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- b = get_alt_map_2w16(i, (uint8_t*)r16b, size / 2);
- c = get_alt_map_2w16(i, (uint8_t*)r16c, size / 2);
- d = get_alt_map_2w16(i, (uint8_t*)r16d, size / 2);
- } else {
- b = r16b[i];
- c = r16c[i];
- d = r16d[i];
- }
- if (!xor && d != b) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i));
- printf("We have %d * %d = %d, and %d * %d = %d.\n", a, b, c, c, ai, d);
- printf("%d is the inverse of %d\n", ai, a);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && b != 0) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i));
- printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n");
- printf(" b should equal 0, but it doesn't. Probe into it.\n");
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- for (xor = 0; xor < 2; xor++) {
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- continue;
- }
- if (verbose) {
- printf("Testing buffer-constant, src == dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- a = MOA_Random_W(w, 0);
- fill_random_region(r16b, REGION_SIZE);
- memcpy(r16d, r16b, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint16_t)-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint16_t);
- gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), a, size, xor);
- ai = gf.inverse.w32(&gf, a);
- if (!xor) {
- gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), ai, size, xor);
- } else {
- gf.multiply_region.w32(&gf, (void *) (r16d+sindex), (void *) (r16b+sindex), 1, size, xor);
- gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), ai, size, 0);
- }
-
- for (i = sindex; i < eindex; i++) {
- b = r16b[i];
- c = r16c[i];
- d = r16d[i];
- if (!xor && (d != b)) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i));
- printf("We did d=b; b = ba; b = b(a^-1).\n");
- printf("So, b should equal d, but it doesn't. Look into it.\n");
- printf("b = %d. d = %d. a = %d\n", b, d, a);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && d != b) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i));
- printf("We did d=b; b = b + ba; b += d; b = b(a^-1);\n");
- printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n");
- printf("So, b should equal d, but it doesn't. Look into it.\n");
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- free(r16b);
- free(r16c);
- free(r16d);
- }
- } else if (w == 32) {
- if (gf.multiply_region.w32 == NULL) {
- printf("No multiply_region.\n");
- } else {
- r32b = (uint32_t *) malloc(REGION_SIZE);
- r32c = (uint32_t *) malloc(REGION_SIZE);
- r32d = (uint32_t *) malloc(REGION_SIZE);
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src != dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- a = MOA_Random_32();
- fill_random_region(r32b, REGION_SIZE);
- fill_random_region(r32c, REGION_SIZE);
- memcpy(r32d, r32c, REGION_SIZE);
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- sindex = 0;
- eindex = REGION_SIZE / sizeof(uint32_t);
- } else {
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint32_t)-MOA_Random_W(3, 1);
- }
- size = (eindex-sindex)*sizeof(uint32_t);
- gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32c+sindex), a, size, xor);
- ai = gf.inverse.w32(&gf, a);
- if (!xor) {
- gf.multiply_region.w32(&gf, (void *) (r32c+sindex), (void *) (r32d+sindex), ai, size, xor);
- } else {
- gf.multiply_region.w32(&gf, (void *) (r32c+sindex), (void *) (r32d+sindex), 1, size, xor);
- gf.multiply_region.w32(&gf, (void *) (r32d+sindex), (void *) (r32b+sindex), ai, size, xor);
- }
- for (i = sindex; i < eindex; i++) {
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- b = get_alt_map_2w32(i, (uint8_t*)r32b, size / 2);
- c = get_alt_map_2w32(i, (uint8_t*)r32c, size / 2);
- d = get_alt_map_2w32(i, (uint8_t*)r32d, size / 2);
- i++;
- } else {
- b = r32b[i];
- c = r32c[i];
- d = r32d[i];
- }
- if (!xor && d != b) {
- printf("i=%d. Addresses: b: 0x%lx\n", i, (unsigned long) (r32b+i));
- printf("We have %d * %d = %d, and %d * %d = %d.\n", a, b, c, c, ai, d);
- printf("%d is the inverse of %d\n", ai, a);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && b != 0) {
- printf("i=%d. Addresses: b: 0x%lx c: 0x%lx d: 0x%lx\n", i,
- (unsigned long) (r32b+i), (unsigned long) (r32c+i), (unsigned long) (r32d+i));
- printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n");
- printf(" b should equal 0, but it doesn't. Probe into it.\n");
- printf("a: %8x b: %8x c: %8x, d: %8x\n", a, b, c, d);
- problem("Failed buffer-constant, xor=1");
- }
-
- }
- }
- }
- for (xor = 0; xor < 2; xor++) {
- if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 &&
- (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) {
- continue;
- }
- if (verbose) {
- printf("Testing buffer-constant, src == dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- a = MOA_Random_32();
- fill_random_region(r32b, REGION_SIZE);
- memcpy(r32d, r32b, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint32_t)-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint32_t);
- gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), a, size, xor);
- ai = gf.inverse.w32(&gf, a);
- if (!xor) {
- gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), ai, size, xor);
- } else {
- gf.multiply_region.w32(&gf, (void *) (r32d+sindex), (void *) (r32b+sindex), 1, size, xor);
- gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), ai, size, 0);
- }
-
- for (i = sindex; i < eindex; i++) {
- b = r32b[i];
- c = r32c[i];
- d = r32d[i];
- if (!xor && (d != b)) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r32b+i));
- printf("We did d=b; b = ba; b = b(a^-1).\n");
- printf("So, b should equal d, but it doesn't. Look into it.\n");
- printf("b = %d. d = %d. a = %d\n", b, d, a);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && d != b) {
- printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r32b+i));
- printf("We did d=b; b = b + ba; b += d; b = b(a^-1);\n");
- printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n");
- printf("So, b should equal d, but it doesn't. Look into it.\n");
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- free(r32b);
- free(r32c);
- free(r32d);
- }
- } else if (w == 64) {
- if (gf.multiply_region.w64 == NULL) {
- printf("No multiply_region.\n");
- } else {
- r64b = (uint64_t *) malloc(REGION_SIZE);
- r64c = (uint64_t *) malloc(REGION_SIZE);
- r64d = (uint64_t *) malloc(REGION_SIZE);
- fill_random_region(r64b, REGION_SIZE);
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src != dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- a64 = MOA_Random_64();
- fill_random_region(r64c, REGION_SIZE);
- memcpy(r64d, r64c, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint64_t)-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint64_t);
- gf.multiply_region.w64(&gf, (void *) (r64b+sindex), (void *) (r64c+sindex), a64, size, xor);
- for (i = sindex; i < eindex; i++) {
- b64 = r64b[i];
- c64 = r64c[i];
- d64 = r64d[i];
- if (!xor && gf.multiply.w64(&gf, a64, b64) != c64) {
- printf("i=%d. 0x%llx 0x%llx 0x%llx should be 0x%llx\n", i, a64, b64, c64,
- gf.multiply.w64(&gf, a64, b64));
- printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64c[i]);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && (gf.multiply.w64(&gf, a64, b64) ^ d64) != c64) {
- printf("i=%d. 0x%llx 0x%llx 0x%llx 0x%llx\n", i, a64, b64, c64, d64);
- printf("i=%d. 0x%llx 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64c[i], r64d[i]);
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src == dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- a64 = MOA_Random_64();
- fill_random_region(r64b, REGION_SIZE);
- memcpy(r64d, r64b, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/sizeof(uint64_t)-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint64_t);
- gf.multiply_region.w64(&gf, (void *) (r64b+sindex), (void *) (r64b+sindex), a64, size, xor);
- for (i = sindex; i < eindex; i++) {
- b64 = r64b[i];
- d64 = r64d[i];
- if (!xor && gf.multiply.w64(&gf, a64, d64) != b64) problem("Failed buffer-constant, xor=0");
- if (xor && (gf.multiply.w64(&gf, a64, d64) ^ d64) != b64) {
- printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, b64, d64);
- printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64d[i]);
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- free(r64b);
- free(r64c);
- free(r64d);
- }
- } else if (w == 128) {
- if (gf.multiply_region.w128 == NULL) {
- printf("No multiply_region.\n");
- } else {
- r128b = (uint64_t *) malloc(REGION_SIZE);
- r128c = (uint64_t *) malloc(REGION_SIZE);
- r128d = (uint64_t *) malloc(REGION_SIZE);
- fill_random_region(r128b, REGION_SIZE);
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src != dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- MOA_Random_128(a128);
- fill_random_region(r128c, REGION_SIZE);
- memcpy(r128d, r128c, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- eindex = REGION_SIZE/(2*sizeof(uint64_t))-MOA_Random_W(3, 1);
- size = (eindex-sindex)*sizeof(uint64_t)*2;
- gf.multiply_region.w128(&gf, (void *) (r128b+sindex*2), (void *) (r128c+sindex*2), a128, size, xor);
- for (i = sindex; i < eindex; i++) {
- b128[0] = r128b[2*i];
- b128[1] = r128b[2*i+1];
- c128[0] = r128c[2*i];
- c128[1] = r128c[2*i+1];
- d128[0] = r128d[2*i];
- d128[1] = r128d[2*i+1];
- gf.multiply.w128(&gf, a128, b128, e128);
- if (xor) {
- e128[0] ^= d128[0];
- e128[1] ^= d128[1];
- }
- if (!xor && !GF_W128_EQUAL(c128, e128)) {
- printf("i=%d. 0x%llx%llx 0x%llx%llx 0x%llx%llx should be 0x%llx%llx\n",
- i, a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], e128[0], e128[1]);
- problem("Failed buffer-constant, xor=0");
- }
- if (xor && !GF_W128_EQUAL(e128, c128)) {
- printf("i=%d. 0x%llx%llx 0x%llx%llx 0x%llx%llx 0x%llx%llx\n", i,
- a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], d128[0], d128[1]);
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- for (xor = 0; xor < 2; xor++) {
- if (verbose) {
- printf("Testing buffer-constant, src == dest, xor = %d\n", xor);
- fflush(stdout);
- }
- for (j = 0; j < 1000; j++) {
- MOA_Random_128(a128);
- fill_random_region(r128b, REGION_SIZE);
- memcpy(r128d, r128b, REGION_SIZE);
- sindex = MOA_Random_W(3, 1);
- sindex = 0;
- eindex = REGION_SIZE/(2*sizeof(uint64_t))-MOA_Random_W(3, 1);
- eindex = REGION_SIZE/(2*sizeof(uint64_t));
- size = (eindex-sindex)*sizeof(uint64_t)*2;
- gf.multiply_region.w128(&gf, (void *) (r128b+sindex), (void *) (r128b+sindex), a128, size, xor);
- for (i = sindex; i < eindex; i++) {
- b128[0] = r128b[2*i];
- b128[1] = r128b[2*i + 1];
- d128[0] = r128d[2*i];
- d128[1] = r128d[2*i + 1];
- gf.multiply.w128(&gf, a128, d128, e128);
- if (xor) {
- e128[0] ^= d128[0];
- e128[1] ^= d128[1];
- }
- if (!xor && !GF_W128_EQUAL(b128, e128)) problem("Failed buffer-constant, xor=0");
- if (xor && !GF_W128_EQUAL(b128, e128)) {
- problem("Failed buffer-constant, xor=1");
- }
- }
- }
- }
- free(r128b);
- free(r128c);
- free(r128d);
- }
- }
- }
- exit(0);
-}